diff options
| author | dev | 2026-06-26 03:37:51 +0200 |
|---|---|---|
| committer | dev | 2026-06-26 03:37:51 +0200 |
| commit | 15d06c9802d08037283aa218ccc2f92a9236fcc9 (patch) | |
| tree | 1cd3628b5680212c723fd00c694b15fb0bad1f08 /src/wikidata.go | |
| parent | 8e2d742e59b3923852e1ef6e7a5e2ee1de14ce45 (diff) | |
| download | hnimdbbot-15d06c9802d08037283aa218ccc2f92a9236fcc9.tar.gz | |
feat: add -wiki-only flag to rerun only wiki data extraction
- fetchWikiArticlesData is standalone again (re-extracted from consumer)
- -wiki-only flag skips SPARQL pipeline, runs only wiki data fetch
- Default behavior: full pipeline (SPARQL + wiki data in parallel)
Diffstat (limited to 'src/wikidata.go')
| -rw-r--r-- | src/wikidata.go | 25 |
1 files changed, 25 insertions, 0 deletions
diff --git a/src/wikidata.go b/src/wikidata.go index 9dacb7b..5d1b594 100644 --- a/src/wikidata.go +++ b/src/wikidata.go @@ -274,6 +274,31 @@ func (a *App) wikiDataConsumer(artCh <-chan wikiArticleFetch, done chan<- struct log.Printf("fetchWikiArticlesData: %d updated, %d skipped (non-200)", updated, skipped) } +// fetchWikiArticlesData fetches wiki article data from the custom server for all +// entries that have a wiki_article but need data extraction. Callable independently. +func (a *App) fetchWikiArticlesData() error { + existing, err := a.getExistingWikiArticles() + if err != nil { + return err + } + if len(existing) == 0 { + log.Println("fetchWikiArticlesData: all entries complete, skipping") + return nil + } + log.Printf("fetchWikiArticlesData: %d entries need wiki data", len(existing)) + + artCh := make(chan wikiArticleFetch, len(existing)) + consumerDone := make(chan struct{}) + go a.wikiDataConsumer(artCh, consumerDone) + + for _, e := range existing { + artCh <- wikiArticleFetch{imdbID: e.imdbID, name: e.wikiArticle} + } + close(artCh) + <-consumerDone + + return nil +} // queryWikidataBatch sends a SPARQL query for the given IDs and returns a map of id -> wikiAcc. func (a *App) queryWikidataBatch(ids []string) (map[string]wikiAcc, error) { |
