summaryrefslogtreecommitdiff
path: root/src/wikidata.go
diff options
context:
space:
mode:
authordev2026-06-26 03:37:51 +0200
committerdev2026-06-26 03:37:51 +0200
commit15d06c9802d08037283aa218ccc2f92a9236fcc9 (patch)
tree1cd3628b5680212c723fd00c694b15fb0bad1f08 /src/wikidata.go
parent8e2d742e59b3923852e1ef6e7a5e2ee1de14ce45 (diff)
downloadhnimdbbot-15d06c9802d08037283aa218ccc2f92a9236fcc9.tar.gz
feat: add -wiki-only flag to rerun only wiki data extraction
- fetchWikiArticlesData is standalone again (re-extracted from consumer) - -wiki-only flag skips SPARQL pipeline, runs only wiki data fetch - Default behavior: full pipeline (SPARQL + wiki data in parallel)
Diffstat (limited to 'src/wikidata.go')
-rw-r--r--src/wikidata.go25
1 files changed, 25 insertions, 0 deletions
diff --git a/src/wikidata.go b/src/wikidata.go
index 9dacb7b..5d1b594 100644
--- a/src/wikidata.go
+++ b/src/wikidata.go
@@ -274,6 +274,31 @@ func (a *App) wikiDataConsumer(artCh <-chan wikiArticleFetch, done chan<- struct
log.Printf("fetchWikiArticlesData: %d updated, %d skipped (non-200)", updated, skipped)
}
+// fetchWikiArticlesData fetches wiki article data from the custom server for all
+// entries that have a wiki_article but need data extraction. Callable independently.
+func (a *App) fetchWikiArticlesData() error {
+ existing, err := a.getExistingWikiArticles()
+ if err != nil {
+ return err
+ }
+ if len(existing) == 0 {
+ log.Println("fetchWikiArticlesData: all entries complete, skipping")
+ return nil
+ }
+ log.Printf("fetchWikiArticlesData: %d entries need wiki data", len(existing))
+
+ artCh := make(chan wikiArticleFetch, len(existing))
+ consumerDone := make(chan struct{})
+ go a.wikiDataConsumer(artCh, consumerDone)
+
+ for _, e := range existing {
+ artCh <- wikiArticleFetch{imdbID: e.imdbID, name: e.wikiArticle}
+ }
+ close(artCh)
+ <-consumerDone
+
+ return nil
+}
// queryWikidataBatch sends a SPARQL query for the given IDs and returns a map of id -> wikiAcc.
func (a *App) queryWikidataBatch(ids []string) (map[string]wikiAcc, error) {