diff options
| author | dev | 2026-06-26 14:14:52 +0200 |
|---|---|---|
| committer | dev | 2026-06-26 14:14:52 +0200 |
| commit | 06536f57b1fdc76212da6b85fbc9287cc4f0de70 (patch) | |
| tree | 755e49a396091f953c428cb59a4cf56f03463267 /src/wikidata.go | |
| parent | 13992fedaa0beaf93f6214993c95e685d249638f (diff) | |
| download | hnimdbbot-06536f57b1fdc76212da6b85fbc9287cc4f0de70.tar.gz | |
feat: add three-level logging with per-request debug output
- New --log-level flag: debug (default info), info, silent
debug: every API request logged (method, URL, status, duration)
info: normal events (batch progress, entry counts, summaries)
silent: only warnings and fatal errors
- Replaced all log.Printf/Fatalf calls with level-gated helpers
- API request timing added to queryWikiArticle, queryWikidataBatch, downloadFile
- Retries and backoff logged in debug mode
Diffstat (limited to 'src/wikidata.go')
| -rw-r--r-- | src/wikidata.go | 34 |
1 files changed, 18 insertions, 16 deletions
diff --git a/src/wikidata.go b/src/wikidata.go index 59c8188..3101673 100644 --- a/src/wikidata.go +++ b/src/wikidata.go @@ -4,7 +4,6 @@ import ( "encoding/json" "fmt" "io" - "log" "net/http" "net/url" "strings" @@ -77,7 +76,7 @@ func (a *App) fetchWikiArticles() error { return err } if len(ids) == 0 { - log.Println("fetchWikiArticles: all entries have wiki_article, skipping") + logInfo("fetchWikiArticles: all entries have wiki_article, skipping") return nil } @@ -110,7 +109,7 @@ func (a *App) fetchWikiArticles() error { close(artCh) <-consumerDone - log.Printf("fetchWikiArticles: pipeline complete") + logInfo("fetchWikiArticles: pipeline complete") return nil } @@ -151,7 +150,7 @@ func (a *App) getExistingWikiArticles() ([]existingWikiArticle, error) { func (a *App) sparqlPipeline(ids []string, artCh chan<- wikiArticleFetch, done chan<- struct{}) { defer close(done) - log.Printf("fetchWikiArticles: %d entries missing wiki_article", len(ids)) + logInfo("fetchWikiArticles: %d entries missing wiki_article", len(ids)) batchNum := 0 totalResolved := 0 totalNoWiki := 0 @@ -162,7 +161,7 @@ func (a *App) sparqlPipeline(ids []string, artCh chan<- wikiArticleFetch, done c results, err := a.queryWikidataBatch(chunk) if err != nil { - log.Printf("sparql batch %d error: %v, marking all as no-wiki", batchNum, err) + logWarn("sparql batch %d error: %v, marking all as no-wiki", batchNum, err) // Mark all in failed batch as no-wiki tx, _ := a.DB.Begin() stmt, _ := tx.Prepare(`UPDATE imdb SET has_no_wiki_article = 1 WHERE imdb_id = ?`) @@ -178,14 +177,14 @@ func (a *App) sparqlPipeline(ids []string, artCh chan<- wikiArticleFetch, done c // Commit batch to DB tx, err := a.DB.Begin() if err != nil { - log.Printf("sparql batch %d begin tx error: %v", batchNum, err) + logWarn("sparql batch %d begin tx error: %v", batchNum, err) continue } wikiStmt, err := tx.Prepare(`UPDATE imdb SET wiki_article = ? WHERE imdb_id = ?`) if err != nil { tx.Rollback() - log.Printf("sparql batch %d prepare error: %v", batchNum, err) + logWarn("sparql batch %d prepare error: %v", batchNum, err) continue } @@ -193,7 +192,7 @@ func (a *App) sparqlPipeline(ids []string, artCh chan<- wikiArticleFetch, done c if err != nil { wikiStmt.Close() tx.Rollback() - log.Printf("sparql batch %d prepare no_wiki error: %v", batchNum, err) + logWarn("sparql batch %d prepare no_wiki error: %v", batchNum, err) continue } @@ -217,7 +216,7 @@ func (a *App) sparqlPipeline(ids []string, artCh chan<- wikiArticleFetch, done c tx.Commit() - log.Printf("fetchWikiArticles: sparql batch %d/%d - %d/%d resolved", + logInfo("fetchWikiArticles: sparql batch %d/%d - %d/%d resolved", batchNum, (len(ids)+wikiBatchSize-1)/wikiBatchSize, resolved, len(chunk)) // Rate limit between SPARQL requests @@ -226,7 +225,7 @@ func (a *App) sparqlPipeline(ids []string, artCh chan<- wikiArticleFetch, done c } } - log.Printf("fetchWikiArticles: SPARQL done - %d resolved, %d no-wiki", totalResolved, totalNoWiki) + logInfo("fetchWikiArticles: SPARQL done - %d resolved, %d no-wiki", totalResolved, totalNoWiki) } // wikiDataConsumer fetches wiki article data from the custom server. @@ -242,7 +241,7 @@ func (a *App) wikiDataConsumer(artCh <-chan wikiArticleFetch, done chan<- struct entry, statusCode, err := a.queryWikiArticle(art.name) if err != nil { - log.Printf("wiki error %s (%s): HTTP %d - %v", art.imdbID, art.name, statusCode, err) + logWarn("wiki error %s (%s): HTTP %d - %v", art.imdbID, art.name, statusCode, err) skipped++ } @@ -265,13 +264,13 @@ func (a *App) wikiDataConsumer(artCh <-chan wikiArticleFetch, done chan<- struct // Insert people (actors, directors, screenwriters) if statusCode == 200 && len(entry.People) > 0 { if err := a.insertWikiPeople(art.imdbID, entry.People); err != nil { - log.Printf("insert people error %s: %v", art.imdbID, err) + logWarn("insert people error %s: %v", art.imdbID, err) } a.DB.Exec(`UPDATE imdb SET has_people = 1 WHERE imdb_id = ?`, art.imdbID) } } - log.Printf("fetchWikiArticlesData: %d updated, %d skipped (non-200)", updated, skipped) + logInfo("fetchWikiArticlesData: %d updated, %d skipped (non-200)", updated, skipped) } // insertWikiPeople upserts people into people/who tables. @@ -312,10 +311,10 @@ func (a *App) fetchWikiArticlesData() error { return err } if len(existing) == 0 { - log.Println("fetchWikiArticlesData: all entries complete, skipping") + logInfo("fetchWikiArticlesData: all entries complete, skipping") return nil } - log.Printf("fetchWikiArticlesData: %d entries need wiki data", len(existing)) + logInfo("fetchWikiArticlesData: %d entries need wiki data", len(existing)) artCh := make(chan wikiArticleFetch, len(existing)) consumerDone := make(chan struct{}) @@ -340,10 +339,13 @@ func (a *App) queryWikidataBatch(ids []string) (map[string]wikiAcc, error) { "format": {"json"}, }.Encode() + start := time.Now() raw, err := doGETWithRetry(endpoint, a.Config.UserAgent) if err != nil { + logHTTPRequest("GET", endpoint, 0, time.Since(start).Seconds()) return nil, fmt.Errorf("SPARQL request: %w", err) } + logHTTPRequest("GET", endpoint, 200, time.Since(start).Seconds()) var data sparqlResponse if err := json.Unmarshal(raw, &data); err != nil { @@ -419,7 +421,7 @@ func doGETWithRetry(uri, userAgent string) ([]byte, error) { for attempt := 0; attempt < wikiMaxRetries; attempt++ { if attempt > 0 { backoff := wikiRetryBackoff * time.Duration(1<<(attempt-1)) - log.Printf(" retry %d/%d after %v", attempt+1, wikiMaxRetries, backoff) + logDebug(" retry %d/%d after %v", attempt+1, wikiMaxRetries, backoff) time.Sleep(backoff) } raw, err := doGET(uri, userAgent) |
