summaryrefslogtreecommitdiff
path: root/src/wikidata.go
diff options
context:
space:
mode:
authordev2026-06-26 14:14:52 +0200
committerdev2026-06-26 14:14:52 +0200
commit06536f57b1fdc76212da6b85fbc9287cc4f0de70 (patch)
tree755e49a396091f953c428cb59a4cf56f03463267 /src/wikidata.go
parent13992fedaa0beaf93f6214993c95e685d249638f (diff)
downloadhnimdbbot-06536f57b1fdc76212da6b85fbc9287cc4f0de70.tar.gz
feat: add three-level logging with per-request debug output
- New --log-level flag: debug (default info), info, silent debug: every API request logged (method, URL, status, duration) info: normal events (batch progress, entry counts, summaries) silent: only warnings and fatal errors - Replaced all log.Printf/Fatalf calls with level-gated helpers - API request timing added to queryWikiArticle, queryWikidataBatch, downloadFile - Retries and backoff logged in debug mode
Diffstat (limited to 'src/wikidata.go')
-rw-r--r--src/wikidata.go34
1 files changed, 18 insertions, 16 deletions
diff --git a/src/wikidata.go b/src/wikidata.go
index 59c8188..3101673 100644
--- a/src/wikidata.go
+++ b/src/wikidata.go
@@ -4,7 +4,6 @@ import (
"encoding/json"
"fmt"
"io"
- "log"
"net/http"
"net/url"
"strings"
@@ -77,7 +76,7 @@ func (a *App) fetchWikiArticles() error {
return err
}
if len(ids) == 0 {
- log.Println("fetchWikiArticles: all entries have wiki_article, skipping")
+ logInfo("fetchWikiArticles: all entries have wiki_article, skipping")
return nil
}
@@ -110,7 +109,7 @@ func (a *App) fetchWikiArticles() error {
close(artCh)
<-consumerDone
- log.Printf("fetchWikiArticles: pipeline complete")
+ logInfo("fetchWikiArticles: pipeline complete")
return nil
}
@@ -151,7 +150,7 @@ func (a *App) getExistingWikiArticles() ([]existingWikiArticle, error) {
func (a *App) sparqlPipeline(ids []string, artCh chan<- wikiArticleFetch, done chan<- struct{}) {
defer close(done)
- log.Printf("fetchWikiArticles: %d entries missing wiki_article", len(ids))
+ logInfo("fetchWikiArticles: %d entries missing wiki_article", len(ids))
batchNum := 0
totalResolved := 0
totalNoWiki := 0
@@ -162,7 +161,7 @@ func (a *App) sparqlPipeline(ids []string, artCh chan<- wikiArticleFetch, done c
results, err := a.queryWikidataBatch(chunk)
if err != nil {
- log.Printf("sparql batch %d error: %v, marking all as no-wiki", batchNum, err)
+ logWarn("sparql batch %d error: %v, marking all as no-wiki", batchNum, err)
// Mark all in failed batch as no-wiki
tx, _ := a.DB.Begin()
stmt, _ := tx.Prepare(`UPDATE imdb SET has_no_wiki_article = 1 WHERE imdb_id = ?`)
@@ -178,14 +177,14 @@ func (a *App) sparqlPipeline(ids []string, artCh chan<- wikiArticleFetch, done c
// Commit batch to DB
tx, err := a.DB.Begin()
if err != nil {
- log.Printf("sparql batch %d begin tx error: %v", batchNum, err)
+ logWarn("sparql batch %d begin tx error: %v", batchNum, err)
continue
}
wikiStmt, err := tx.Prepare(`UPDATE imdb SET wiki_article = ? WHERE imdb_id = ?`)
if err != nil {
tx.Rollback()
- log.Printf("sparql batch %d prepare error: %v", batchNum, err)
+ logWarn("sparql batch %d prepare error: %v", batchNum, err)
continue
}
@@ -193,7 +192,7 @@ func (a *App) sparqlPipeline(ids []string, artCh chan<- wikiArticleFetch, done c
if err != nil {
wikiStmt.Close()
tx.Rollback()
- log.Printf("sparql batch %d prepare no_wiki error: %v", batchNum, err)
+ logWarn("sparql batch %d prepare no_wiki error: %v", batchNum, err)
continue
}
@@ -217,7 +216,7 @@ func (a *App) sparqlPipeline(ids []string, artCh chan<- wikiArticleFetch, done c
tx.Commit()
- log.Printf("fetchWikiArticles: sparql batch %d/%d - %d/%d resolved",
+ logInfo("fetchWikiArticles: sparql batch %d/%d - %d/%d resolved",
batchNum, (len(ids)+wikiBatchSize-1)/wikiBatchSize, resolved, len(chunk))
// Rate limit between SPARQL requests
@@ -226,7 +225,7 @@ func (a *App) sparqlPipeline(ids []string, artCh chan<- wikiArticleFetch, done c
}
}
- log.Printf("fetchWikiArticles: SPARQL done - %d resolved, %d no-wiki", totalResolved, totalNoWiki)
+ logInfo("fetchWikiArticles: SPARQL done - %d resolved, %d no-wiki", totalResolved, totalNoWiki)
}
// wikiDataConsumer fetches wiki article data from the custom server.
@@ -242,7 +241,7 @@ func (a *App) wikiDataConsumer(artCh <-chan wikiArticleFetch, done chan<- struct
entry, statusCode, err := a.queryWikiArticle(art.name)
if err != nil {
- log.Printf("wiki error %s (%s): HTTP %d - %v", art.imdbID, art.name, statusCode, err)
+ logWarn("wiki error %s (%s): HTTP %d - %v", art.imdbID, art.name, statusCode, err)
skipped++
}
@@ -265,13 +264,13 @@ func (a *App) wikiDataConsumer(artCh <-chan wikiArticleFetch, done chan<- struct
// Insert people (actors, directors, screenwriters)
if statusCode == 200 && len(entry.People) > 0 {
if err := a.insertWikiPeople(art.imdbID, entry.People); err != nil {
- log.Printf("insert people error %s: %v", art.imdbID, err)
+ logWarn("insert people error %s: %v", art.imdbID, err)
}
a.DB.Exec(`UPDATE imdb SET has_people = 1 WHERE imdb_id = ?`, art.imdbID)
}
}
- log.Printf("fetchWikiArticlesData: %d updated, %d skipped (non-200)", updated, skipped)
+ logInfo("fetchWikiArticlesData: %d updated, %d skipped (non-200)", updated, skipped)
}
// insertWikiPeople upserts people into people/who tables.
@@ -312,10 +311,10 @@ func (a *App) fetchWikiArticlesData() error {
return err
}
if len(existing) == 0 {
- log.Println("fetchWikiArticlesData: all entries complete, skipping")
+ logInfo("fetchWikiArticlesData: all entries complete, skipping")
return nil
}
- log.Printf("fetchWikiArticlesData: %d entries need wiki data", len(existing))
+ logInfo("fetchWikiArticlesData: %d entries need wiki data", len(existing))
artCh := make(chan wikiArticleFetch, len(existing))
consumerDone := make(chan struct{})
@@ -340,10 +339,13 @@ func (a *App) queryWikidataBatch(ids []string) (map[string]wikiAcc, error) {
"format": {"json"},
}.Encode()
+ start := time.Now()
raw, err := doGETWithRetry(endpoint, a.Config.UserAgent)
if err != nil {
+ logHTTPRequest("GET", endpoint, 0, time.Since(start).Seconds())
return nil, fmt.Errorf("SPARQL request: %w", err)
}
+ logHTTPRequest("GET", endpoint, 200, time.Since(start).Seconds())
var data sparqlResponse
if err := json.Unmarshal(raw, &data); err != nil {
@@ -419,7 +421,7 @@ func doGETWithRetry(uri, userAgent string) ([]byte, error) {
for attempt := 0; attempt < wikiMaxRetries; attempt++ {
if attempt > 0 {
backoff := wikiRetryBackoff * time.Duration(1<<(attempt-1))
- log.Printf(" retry %d/%d after %v", attempt+1, wikiMaxRetries, backoff)
+ logDebug(" retry %d/%d after %v", attempt+1, wikiMaxRetries, backoff)
time.Sleep(backoff)
}
raw, err := doGET(uri, userAgent)