summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordev2026-06-26 14:14:52 +0200
committerdev2026-06-26 14:14:52 +0200
commit06536f57b1fdc76212da6b85fbc9287cc4f0de70 (patch)
tree755e49a396091f953c428cb59a4cf56f03463267
parent13992fedaa0beaf93f6214993c95e685d249638f (diff)
downloadhnimdbbot-06536f57b1fdc76212da6b85fbc9287cc4f0de70.tar.gz
feat: add three-level logging with per-request debug output
- New --log-level flag: debug (default info), info, silent debug: every API request logged (method, URL, status, duration) info: normal events (batch progress, entry counts, summaries) silent: only warnings and fatal errors - Replaced all log.Printf/Fatalf calls with level-gated helpers - API request timing added to queryWikiArticle, queryWikidataBatch, downloadFile - Retries and backoff logged in debug mode
-rw-r--r--src/imdbdata.go32
-rw-r--r--src/loglevel.go78
-rw-r--r--src/main.go31
-rw-r--r--src/wikiarticle.go7
-rw-r--r--src/wikidata.go34
5 files changed, 135 insertions, 47 deletions
diff --git a/src/imdbdata.go b/src/imdbdata.go
index 8313188..7a2890d 100644
--- a/src/imdbdata.go
+++ b/src/imdbdata.go
@@ -5,11 +5,11 @@ import (
"compress/gzip"
"fmt"
"io"
- "log"
"net/http"
"os"
"path/filepath"
"strconv"
+ "time"
"strings"
)
@@ -26,16 +26,19 @@ func dataPath(name string) string {
// downloadFile fetches url and writes to dst, overwriting if present.
func downloadFile(dst, url string) error {
return nil
+
resp, err := http.Get(url)
if err != nil {
- return fmt.Errorf("http get: %w", err)
+ return fmt.Errorf("request %s: %w", url, err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
- return fmt.Errorf("http %d from %s", resp.StatusCode, url)
+ body, _ := io.ReadAll(io.LimitReader(resp.Body, 2048))
+ return fmt.Errorf("HTTP %d: %s", resp.StatusCode, body)
}
+ start := time.Now()
f, err := os.Create(dst)
if err != nil {
return fmt.Errorf("create %s: %w", dst, err)
@@ -46,7 +49,8 @@ func downloadFile(dst, url string) error {
if err != nil {
return fmt.Errorf("write %s: %w", dst, err)
}
- log.Printf("downloaded %s", url)
+ logHTTPRequest("GET", url, resp.StatusCode, time.Since(start).Seconds())
+ logInfo("downloaded %s", url)
return nil
}
@@ -97,7 +101,7 @@ func (a *App) downloadImdbDatasets() error {
for _, p := range pairs {
// skip if TSV already exists (download may be stubbed out)
if _, err := os.Stat(p.tsv); err == nil {
- log.Printf("reusing existing %s", p.tsv)
+ logInfo("reusing existing %s", p.tsv)
continue
}
@@ -110,7 +114,7 @@ func (a *App) downloadImdbDatasets() error {
if err := os.Remove(p.gz); err != nil {
return fmt.Errorf("remove %s: %w", p.gz, err)
}
- log.Printf("extracted %s", p.tsv)
+ logInfo("extracted %s", p.tsv)
}
return nil
}
@@ -327,7 +331,7 @@ func (a *App) applyImdbUpdates(ratings map[string]ratingEntry, basics map[string
return fmt.Errorf("commit: %w", err)
}
- log.Printf("applyImdbUpdates: %d ratings, %d basics, %d genres updated", rCount, bCount, gCount)
+ logInfo("applyImdbUpdates: %d ratings, %d basics, %d genres updated", rCount, bCount, gCount)
return nil
}
@@ -366,10 +370,10 @@ func (a *App) fetchAndUpdateImdbData() error {
return fmt.Errorf("count genres: %w", err)
}
if missingRatings == 0 && missingGenres == 0 {
- log.Println("fetchAndUpdateImdbData: all entries complete, skipping")
+ logInfo("fetchAndUpdateImdbData: all entries complete, skipping")
return nil
}
- log.Printf("fetchAndUpdateImdbData: %d missing ratings, %d missing genres", missingRatings, missingGenres)
+ logInfo("fetchAndUpdateImdbData: %d missing ratings, %d missing genres", missingRatings, missingGenres)
// Download / refresh datasets
if err := a.downloadImdbDatasets(); err != nil {
return err
@@ -381,30 +385,30 @@ func (a *App) fetchAndUpdateImdbData() error {
return err
}
if len(ratingIDs) == 0 {
- log.Println("fetchAndUpdateImdbData: no entries need updating")
+ logInfo("fetchAndUpdateImdbData: no entries need updating")
return nil
}
- log.Printf("fetchAndUpdateImdbData: %d entries need rating update", len(ratingIDs))
+ logInfo("fetchAndUpdateImdbData: %d entries need rating update", len(ratingIDs))
// Gather all imdb_ids for basics update
allIDs, err := a.getAllImdbIDs()
if err != nil {
return err
}
- log.Printf("fetchAndUpdateImdbData: %d entries total for basics update", len(allIDs))
+ logInfo("fetchAndUpdateImdbData: %d entries total for basics update", len(allIDs))
// Parse datasets
ratings, err := a.parseTitleRatings(ratingIDs)
if err != nil {
return fmt.Errorf("parseTitleRatings: %w", err)
}
- log.Printf("fetchAndUpdateImdbData: found ratings for %d entries", len(ratings))
+ logInfo("fetchAndUpdateImdbData: found ratings for %d entries", len(ratings))
basics, err := a.parseTitleBasics(allIDs)
if err != nil {
return fmt.Errorf("parseTitleBasics: %w", err)
}
- log.Printf("fetchAndUpdateImdbData: found basics for %d entries", len(basics))
+ logInfo("fetchAndUpdateImdbData: found basics for %d entries", len(basics))
// Write to DB
if err := a.applyImdbUpdates(ratings, basics); err != nil {
diff --git a/src/loglevel.go b/src/loglevel.go
new file mode 100644
index 0000000..6f0e8aa
--- /dev/null
+++ b/src/loglevel.go
@@ -0,0 +1,78 @@
+package main
+
+import (
+ "fmt"
+ "log"
+ "os"
+)
+
+// LogLevel controls verbosity.
+type LogLevel int
+
+const (
+ LogDebug LogLevel = iota
+ LogInfo
+ LogSilent
+)
+
+// logLevel is set in main() from the --log-level flag.
+var logLevel LogLevel = LogInfo
+
+// setLogLevel parses the flag value.
+func setLogLevel(val string) {
+ switch val {
+ case "debug":
+ logLevel = LogDebug
+ case "info":
+ logLevel = LogInfo
+ case "silent":
+ logLevel = LogSilent
+ default:
+ log.Fatalf("invalid --log-level %q (want debug, info, or silent)", val)
+ }
+}
+
+func logDebug(format string, v ...interface{}) {
+ if logLevel == LogDebug {
+ log.Printf(format, v...)
+ }
+}
+
+func logInfo(format string, v ...interface{}) {
+ if logLevel <= LogInfo {
+ log.Printf(format, v...)
+ }
+}
+
+func logWarn(format string, v ...interface{}) {
+ log.Printf(format, v...)
+}
+
+func logFatal(format string, v ...interface{}) {
+ log.Fatalf(format, v...)
+}
+
+// logHTTPRequest logs an HTTP request in debug mode.
+func logHTTPRequest(method, url string, statusCode int, duration float64) {
+ if logLevel == LogDebug {
+ log.Printf("HTTP %s %s -> %d (%.2fs)", method, url, statusCode, duration)
+ }
+}
+
+// logHTTPRetry logs a retry attempt in debug mode.
+func logHTTPRetry(attempt, maxAttempts int, backoffSecs int, target string) {
+ if logLevel == LogDebug {
+ log.Printf("retry %d/%d for %s after %ds backoff", attempt, maxAttempts, target, backoffSecs)
+ }
+}
+
+// logFatalErr is like logFatal but takes a message and error separately.
+func logFatalErr(msg string, err error) {
+ log.Fatalf("%s: %v", msg, err)
+}
+
+// Fatal with two args like the original log.Fatal(err, msg) pattern.
+func logFatalTwoArgs(err error, msg string) {
+ fmt.Fprintln(os.Stderr, err)
+ log.Fatalf("%s", msg)
+}
diff --git a/src/main.go b/src/main.go
index cf1c74b..2436976 100644
--- a/src/main.go
+++ b/src/main.go
@@ -3,7 +3,6 @@ package main
import (
"flag"
"fmt"
- "log"
"regexp"
_ "github.com/go-sql-driver/mysql"
@@ -56,7 +55,7 @@ func (a *App) populateImdbTable() error {
}
if !imdbIDRe.MatchString(param) {
- log.Printf("populateImdbTable: invalid param %q, skipping", param)
+ logWarn("populateImdbTable: invalid param %q, skipping", param)
continue
}
@@ -82,7 +81,7 @@ func (a *App) populateImdbTable() error {
return fmt.Errorf("commit: %w", err)
}
- log.Printf("populateImdbTable: inserted %d, skipped %d (already existed)", inserted, skipped)
+ logInfo("populateImdbTable: inserted %d, skipped %d (already existed)", inserted, skipped)
return nil
}
@@ -105,7 +104,7 @@ func (a *App) getExistingImdbIDs() (map[string]bool, error) {
return nil, fmt.Errorf("rows iteration: %w", err)
}
- log.Printf("getExistingImdbIDs: %d existing records", len(existing))
+ logInfo("getExistingImdbIDs: %d existing records", len(existing))
return existing, nil
}
@@ -139,7 +138,7 @@ func (a *App) extractImdbIDs() error {
match := imdbTitleRe.FindStringSubmatch(link.URL)
if len(match) < 2 {
- log.Printf("no IMDb ID found in URL: %s", link.URL)
+ logWarn("no IMDb ID found in URL: %s", link.URL)
continue
}
@@ -160,52 +159,54 @@ func (a *App) extractImdbIDs() error {
return fmt.Errorf("commit: %w", err)
}
- log.Printf("extractImdbIDs: scanned %d rows, updated %d", count, updated)
+ logInfo("extractImdbIDs: scanned %d rows, updated %d", count, updated)
return nil
}
func main() {
wikiOnly := flag.Bool("wiki-only", false, "only fetch wiki article data, skip SPARQL")
+ logLevelFlag := flag.String("log-level", "info", "logging level: debug, info, silent")
flag.Parse()
+ setLogLevel(*logLevelFlag)
cfg, err := LoadConfig("config.json")
if err != nil {
- log.Fatalf("failed to load config: %v", err)
+ logFatalErr("failed to load config", err)
}
app := App{Config: cfg}
- log.Printf(`Connecting to "%s" database "%s" as user "%s" on host "%s:%s" with extra options "%s".`, cfg.DBDriver, cfg.DBDBName, cfg.DBUser, cfg.DBHost, cfg.DBPort, cfg.DBOptions)
+ logInfo(`Connecting to "%s" database "%s" as user "%s" on host "%s:%s" with extra options "%s".`, cfg.DBDriver, cfg.DBDBName, cfg.DBUser, cfg.DBHost, cfg.DBPort, cfg.DBOptions)
app.DB, err = sqlx.Connect(cfg.DBDriver, cfg.DBUser+":"+cfg.DBPassword+"@tcp("+cfg.DBHost+":"+cfg.DBPort+")/"+cfg.DBDBName+"?"+cfg.DBOptions)
if err != nil {
- log.Fatal(err, "Cannot connect to database")
+ logFatalTwoArgs(err, "Cannot connect to database")
}
if err = app.DB.Ping(); err != nil {
- log.Fatal(err, "No connection to database")
+ logFatalTwoArgs(err, "No connection to database")
}
defer app.DB.Close()
if !*wikiOnly {
if err = app.extractImdbIDs(); err != nil {
- log.Fatalf("extractImdbIDs: %v", err)
+ logFatalErr("extractImdbIDs", err)
}
if err = app.populateImdbTable(); err != nil {
- log.Fatalf("populateImdbTable: %v", err)
+ logFatalErr("populateImdbTable", err)
}
if err = app.fetchAndUpdateImdbData(); err != nil {
- log.Fatalf("fetchAndUpdateImdbData: %v", err)
+ logFatalErr("fetchAndUpdateImdbData", err)
}
if err = app.fetchWikiArticles(); err != nil {
- log.Fatalf("fetchWikiArticles: %v", err)
+ logFatalErr("fetchWikiArticles", err)
}
} else {
if err = app.fetchWikiArticlesData(); err != nil {
- log.Fatalf("fetchWikiArticlesData: %v", err)
+ logFatalErr("fetchWikiArticlesData", err)
}
}
}
diff --git a/src/wikiarticle.go b/src/wikiarticle.go
index c1ce486..5f7aaa1 100644
--- a/src/wikiarticle.go
+++ b/src/wikiarticle.go
@@ -4,7 +4,6 @@ import (
"encoding/json"
"fmt"
"io"
- "log"
"net/http"
"net/url"
"strconv"
@@ -39,10 +38,11 @@ func (a *App) queryWikiArticle(name string) (wikiArticleEntry, int, error) {
var resp *http.Response
var err error
+ start := time.Now()
for attempt := 0; attempt < 5; attempt++ {
if attempt > 0 {
backoff := 1 << attempt
- log.Printf("retry %d/%d for %s after %ds backoff", attempt, 4, name, backoff)
+ logHTTPRetry(attempt, 4, backoff, name)
time.Sleep(time.Duration(backoff) * time.Second)
}
@@ -59,15 +59,18 @@ func (a *App) queryWikiArticle(name string) (wikiArticleEntry, int, error) {
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(io.LimitReader(resp.Body, 2048))
resp.Body.Close()
+ logHTTPRequest("GET", reqURL, resp.StatusCode, time.Since(start).Seconds())
return wikiArticleEntry{}, resp.StatusCode, fmt.Errorf("HTTP %d: %s", resp.StatusCode, body)
}
break
}
if err != nil {
+ logHTTPRequest("GET", reqURL, 0, time.Since(start).Seconds())
return wikiArticleEntry{}, 0, fmt.Errorf("http get: %w", err)
}
defer resp.Body.Close()
+ logHTTPRequest("GET", reqURL, resp.StatusCode, time.Since(start).Seconds())
var articles []map[string]interface{}
if err := json.NewDecoder(resp.Body).Decode(&articles); err != nil {
diff --git a/src/wikidata.go b/src/wikidata.go
index 59c8188..3101673 100644
--- a/src/wikidata.go
+++ b/src/wikidata.go
@@ -4,7 +4,6 @@ import (
"encoding/json"
"fmt"
"io"
- "log"
"net/http"
"net/url"
"strings"
@@ -77,7 +76,7 @@ func (a *App) fetchWikiArticles() error {
return err
}
if len(ids) == 0 {
- log.Println("fetchWikiArticles: all entries have wiki_article, skipping")
+ logInfo("fetchWikiArticles: all entries have wiki_article, skipping")
return nil
}
@@ -110,7 +109,7 @@ func (a *App) fetchWikiArticles() error {
close(artCh)
<-consumerDone
- log.Printf("fetchWikiArticles: pipeline complete")
+ logInfo("fetchWikiArticles: pipeline complete")
return nil
}
@@ -151,7 +150,7 @@ func (a *App) getExistingWikiArticles() ([]existingWikiArticle, error) {
func (a *App) sparqlPipeline(ids []string, artCh chan<- wikiArticleFetch, done chan<- struct{}) {
defer close(done)
- log.Printf("fetchWikiArticles: %d entries missing wiki_article", len(ids))
+ logInfo("fetchWikiArticles: %d entries missing wiki_article", len(ids))
batchNum := 0
totalResolved := 0
totalNoWiki := 0
@@ -162,7 +161,7 @@ func (a *App) sparqlPipeline(ids []string, artCh chan<- wikiArticleFetch, done c
results, err := a.queryWikidataBatch(chunk)
if err != nil {
- log.Printf("sparql batch %d error: %v, marking all as no-wiki", batchNum, err)
+ logWarn("sparql batch %d error: %v, marking all as no-wiki", batchNum, err)
// Mark all in failed batch as no-wiki
tx, _ := a.DB.Begin()
stmt, _ := tx.Prepare(`UPDATE imdb SET has_no_wiki_article = 1 WHERE imdb_id = ?`)
@@ -178,14 +177,14 @@ func (a *App) sparqlPipeline(ids []string, artCh chan<- wikiArticleFetch, done c
// Commit batch to DB
tx, err := a.DB.Begin()
if err != nil {
- log.Printf("sparql batch %d begin tx error: %v", batchNum, err)
+ logWarn("sparql batch %d begin tx error: %v", batchNum, err)
continue
}
wikiStmt, err := tx.Prepare(`UPDATE imdb SET wiki_article = ? WHERE imdb_id = ?`)
if err != nil {
tx.Rollback()
- log.Printf("sparql batch %d prepare error: %v", batchNum, err)
+ logWarn("sparql batch %d prepare error: %v", batchNum, err)
continue
}
@@ -193,7 +192,7 @@ func (a *App) sparqlPipeline(ids []string, artCh chan<- wikiArticleFetch, done c
if err != nil {
wikiStmt.Close()
tx.Rollback()
- log.Printf("sparql batch %d prepare no_wiki error: %v", batchNum, err)
+ logWarn("sparql batch %d prepare no_wiki error: %v", batchNum, err)
continue
}
@@ -217,7 +216,7 @@ func (a *App) sparqlPipeline(ids []string, artCh chan<- wikiArticleFetch, done c
tx.Commit()
- log.Printf("fetchWikiArticles: sparql batch %d/%d - %d/%d resolved",
+ logInfo("fetchWikiArticles: sparql batch %d/%d - %d/%d resolved",
batchNum, (len(ids)+wikiBatchSize-1)/wikiBatchSize, resolved, len(chunk))
// Rate limit between SPARQL requests
@@ -226,7 +225,7 @@ func (a *App) sparqlPipeline(ids []string, artCh chan<- wikiArticleFetch, done c
}
}
- log.Printf("fetchWikiArticles: SPARQL done - %d resolved, %d no-wiki", totalResolved, totalNoWiki)
+ logInfo("fetchWikiArticles: SPARQL done - %d resolved, %d no-wiki", totalResolved, totalNoWiki)
}
// wikiDataConsumer fetches wiki article data from the custom server.
@@ -242,7 +241,7 @@ func (a *App) wikiDataConsumer(artCh <-chan wikiArticleFetch, done chan<- struct
entry, statusCode, err := a.queryWikiArticle(art.name)
if err != nil {
- log.Printf("wiki error %s (%s): HTTP %d - %v", art.imdbID, art.name, statusCode, err)
+ logWarn("wiki error %s (%s): HTTP %d - %v", art.imdbID, art.name, statusCode, err)
skipped++
}
@@ -265,13 +264,13 @@ func (a *App) wikiDataConsumer(artCh <-chan wikiArticleFetch, done chan<- struct
// Insert people (actors, directors, screenwriters)
if statusCode == 200 && len(entry.People) > 0 {
if err := a.insertWikiPeople(art.imdbID, entry.People); err != nil {
- log.Printf("insert people error %s: %v", art.imdbID, err)
+ logWarn("insert people error %s: %v", art.imdbID, err)
}
a.DB.Exec(`UPDATE imdb SET has_people = 1 WHERE imdb_id = ?`, art.imdbID)
}
}
- log.Printf("fetchWikiArticlesData: %d updated, %d skipped (non-200)", updated, skipped)
+ logInfo("fetchWikiArticlesData: %d updated, %d skipped (non-200)", updated, skipped)
}
// insertWikiPeople upserts people into people/who tables.
@@ -312,10 +311,10 @@ func (a *App) fetchWikiArticlesData() error {
return err
}
if len(existing) == 0 {
- log.Println("fetchWikiArticlesData: all entries complete, skipping")
+ logInfo("fetchWikiArticlesData: all entries complete, skipping")
return nil
}
- log.Printf("fetchWikiArticlesData: %d entries need wiki data", len(existing))
+ logInfo("fetchWikiArticlesData: %d entries need wiki data", len(existing))
artCh := make(chan wikiArticleFetch, len(existing))
consumerDone := make(chan struct{})
@@ -340,10 +339,13 @@ func (a *App) queryWikidataBatch(ids []string) (map[string]wikiAcc, error) {
"format": {"json"},
}.Encode()
+ start := time.Now()
raw, err := doGETWithRetry(endpoint, a.Config.UserAgent)
if err != nil {
+ logHTTPRequest("GET", endpoint, 0, time.Since(start).Seconds())
return nil, fmt.Errorf("SPARQL request: %w", err)
}
+ logHTTPRequest("GET", endpoint, 200, time.Since(start).Seconds())
var data sparqlResponse
if err := json.Unmarshal(raw, &data); err != nil {
@@ -419,7 +421,7 @@ func doGETWithRetry(uri, userAgent string) ([]byte, error) {
for attempt := 0; attempt < wikiMaxRetries; attempt++ {
if attempt > 0 {
backoff := wikiRetryBackoff * time.Duration(1<<(attempt-1))
- log.Printf(" retry %d/%d after %v", attempt+1, wikiMaxRetries, backoff)
+ logDebug(" retry %d/%d after %v", attempt+1, wikiMaxRetries, backoff)
time.Sleep(backoff)
}
raw, err := doGET(uri, userAgent)