From 1972c3f0a93c23d861f8b00e4b3570f450d4519a Mon Sep 17 00:00:00 2001 From: dev Date: Thu, 25 Jun 2026 20:29:42 +0200 Subject: feat: set has_no_wiki_article flag for entries without Wikipedia article - Mark entries as has_no_wiki_article=1 when Wikidata returns no result - Also mark entries in batches that failed with HTTP errors - Re-run populated 2705 wiki articles, 592 marked as no wiki --- src/wikidata.go | 47 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/src/wikidata.go b/src/wikidata.go index b3018e5..dfaf1bd 100644 --- a/src/wikidata.go +++ b/src/wikidata.go @@ -14,7 +14,7 @@ import ( const ( wikidataSparql = "https://query.wikidata.org/sparql" wikiBatchSize = 30 - wikiDelay = 1 * time.Second // ~15 req/min, safe under 20 req/min limit + wikiDelay = 2 * time.Second // ~15 req/min, safe under 20 req/min limit wikiMaxRetries = 3 wikiRetryBackoff = 15 * time.Second ) @@ -79,33 +79,54 @@ func (a *App) fetchWikiArticles() error { return fmt.Errorf("begin tx: %w", err) } - stmt, err := tx.Prepare(`UPDATE imdb SET wiki_article = ? WHERE imdb_id = ?`) + wikiStmt, err := tx.Prepare(`UPDATE imdb SET wiki_article = ? WHERE imdb_id = ?`) if err != nil { tx.Rollback() return fmt.Errorf("prepare wiki update: %w", err) } - defer stmt.Close() + defer wikiStmt.Close() + + noWikiStmt, err := tx.Prepare(`UPDATE imdb SET has_no_wiki_article = 1 WHERE imdb_id = ?`) + if err != nil { + tx.Rollback() + return fmt.Errorf("prepare no_wiki update: %w", err) + } + defer noWikiStmt.Close() updated := 0 + noWiki := 0 for i := 0; i < len(ids); i += wikiBatchSize { chunk := ids[i:min(i+wikiBatchSize, len(ids))] results, err := a.queryWikidataBatch(chunk) if err != nil { log.Printf("wikidata batch error at offset %d: %v", i, err) - // skip batch, continue + // mark all in skipped batch as no-wiki + for _, id := range chunk { + if _, err := noWikiStmt.Exec(id); err != nil { + tx.Rollback() + return fmt.Errorf("mark no_wiki for %s: %w", id, err) + } + noWiki++ + } continue } - for id, acc := range results { - if acc.title == "" { - continue - } - if _, err := stmt.Exec(acc.title, id); err != nil { - tx.Rollback() - return fmt.Errorf("update wiki_article for %s: %w", id, err) + for _, id := range chunk { + acc, found := results[id] + if found && acc.title != "" { + if _, err := wikiStmt.Exec(acc.title, id); err != nil { + tx.Rollback() + return fmt.Errorf("update wiki_article for %s: %w", id, err) + } + updated++ + } else { + if _, err := noWikiStmt.Exec(id); err != nil { + tx.Rollback() + return fmt.Errorf("mark no_wiki for %s: %w", id, err) + } + noWiki++ } - updated++ } done := i + len(chunk) @@ -121,7 +142,7 @@ func (a *App) fetchWikiArticles() error { return fmt.Errorf("commit wiki articles: %w", err) } - log.Printf("fetchWikiArticles: %d wiki articles updated", updated) + log.Printf("fetchWikiArticles: %d wiki articles updated, %d marked as no wiki", updated, noWiki) return nil } -- cgit v1.2.3