summaryrefslogtreecommitdiff
path: root/src/wikidata.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/wikidata.go')
-rw-r--r--src/wikidata.go47
1 files changed, 34 insertions, 13 deletions
diff --git a/src/wikidata.go b/src/wikidata.go
index b3018e5..dfaf1bd 100644
--- a/src/wikidata.go
+++ b/src/wikidata.go
@@ -14,7 +14,7 @@ import (
const (
wikidataSparql = "https://query.wikidata.org/sparql"
wikiBatchSize = 30
- wikiDelay = 1 * time.Second // ~15 req/min, safe under 20 req/min limit
+ wikiDelay = 2 * time.Second // ~15 req/min, safe under 20 req/min limit
wikiMaxRetries = 3
wikiRetryBackoff = 15 * time.Second
)
@@ -79,33 +79,54 @@ func (a *App) fetchWikiArticles() error {
return fmt.Errorf("begin tx: %w", err)
}
- stmt, err := tx.Prepare(`UPDATE imdb SET wiki_article = ? WHERE imdb_id = ?`)
+ wikiStmt, err := tx.Prepare(`UPDATE imdb SET wiki_article = ? WHERE imdb_id = ?`)
if err != nil {
tx.Rollback()
return fmt.Errorf("prepare wiki update: %w", err)
}
- defer stmt.Close()
+ defer wikiStmt.Close()
+
+ noWikiStmt, err := tx.Prepare(`UPDATE imdb SET has_no_wiki_article = 1 WHERE imdb_id = ?`)
+ if err != nil {
+ tx.Rollback()
+ return fmt.Errorf("prepare no_wiki update: %w", err)
+ }
+ defer noWikiStmt.Close()
updated := 0
+ noWiki := 0
for i := 0; i < len(ids); i += wikiBatchSize {
chunk := ids[i:min(i+wikiBatchSize, len(ids))]
results, err := a.queryWikidataBatch(chunk)
if err != nil {
log.Printf("wikidata batch error at offset %d: %v", i, err)
- // skip batch, continue
+ // mark all in skipped batch as no-wiki
+ for _, id := range chunk {
+ if _, err := noWikiStmt.Exec(id); err != nil {
+ tx.Rollback()
+ return fmt.Errorf("mark no_wiki for %s: %w", id, err)
+ }
+ noWiki++
+ }
continue
}
- for id, acc := range results {
- if acc.title == "" {
- continue
- }
- if _, err := stmt.Exec(acc.title, id); err != nil {
- tx.Rollback()
- return fmt.Errorf("update wiki_article for %s: %w", id, err)
+ for _, id := range chunk {
+ acc, found := results[id]
+ if found && acc.title != "" {
+ if _, err := wikiStmt.Exec(acc.title, id); err != nil {
+ tx.Rollback()
+ return fmt.Errorf("update wiki_article for %s: %w", id, err)
+ }
+ updated++
+ } else {
+ if _, err := noWikiStmt.Exec(id); err != nil {
+ tx.Rollback()
+ return fmt.Errorf("mark no_wiki for %s: %w", id, err)
+ }
+ noWiki++
}
- updated++
}
done := i + len(chunk)
@@ -121,7 +142,7 @@ func (a *App) fetchWikiArticles() error {
return fmt.Errorf("commit wiki articles: %w", err)
}
- log.Printf("fetchWikiArticles: %d wiki articles updated", updated)
+ log.Printf("fetchWikiArticles: %d wiki articles updated, %d marked as no wiki", updated, noWiki)
return nil
}