diff options
| author | dev | 2026-06-26 14:14:52 +0200 |
|---|---|---|
| committer | dev | 2026-06-26 14:14:52 +0200 |
| commit | 06536f57b1fdc76212da6b85fbc9287cc4f0de70 (patch) | |
| tree | 755e49a396091f953c428cb59a4cf56f03463267 | |
| parent | 13992fedaa0beaf93f6214993c95e685d249638f (diff) | |
| download | hnimdbbot-06536f57b1fdc76212da6b85fbc9287cc4f0de70.tar.gz | |
feat: add three-level logging with per-request debug output
- New --log-level flag: debug (default info), info, silent
debug: every API request logged (method, URL, status, duration)
info: normal events (batch progress, entry counts, summaries)
silent: only warnings and fatal errors
- Replaced all log.Printf/Fatalf calls with level-gated helpers
- API request timing added to queryWikiArticle, queryWikidataBatch, downloadFile
- Retries and backoff logged in debug mode
| -rw-r--r-- | src/imdbdata.go | 32 | ||||
| -rw-r--r-- | src/loglevel.go | 78 | ||||
| -rw-r--r-- | src/main.go | 31 | ||||
| -rw-r--r-- | src/wikiarticle.go | 7 | ||||
| -rw-r--r-- | src/wikidata.go | 34 |
5 files changed, 135 insertions, 47 deletions
diff --git a/src/imdbdata.go b/src/imdbdata.go index 8313188..7a2890d 100644 --- a/src/imdbdata.go +++ b/src/imdbdata.go @@ -5,11 +5,11 @@ import ( "compress/gzip" "fmt" "io" - "log" "net/http" "os" "path/filepath" "strconv" + "time" "strings" ) @@ -26,16 +26,19 @@ func dataPath(name string) string { // downloadFile fetches url and writes to dst, overwriting if present. func downloadFile(dst, url string) error { return nil + resp, err := http.Get(url) if err != nil { - return fmt.Errorf("http get: %w", err) + return fmt.Errorf("request %s: %w", url, err) } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { - return fmt.Errorf("http %d from %s", resp.StatusCode, url) + body, _ := io.ReadAll(io.LimitReader(resp.Body, 2048)) + return fmt.Errorf("HTTP %d: %s", resp.StatusCode, body) } + start := time.Now() f, err := os.Create(dst) if err != nil { return fmt.Errorf("create %s: %w", dst, err) @@ -46,7 +49,8 @@ func downloadFile(dst, url string) error { if err != nil { return fmt.Errorf("write %s: %w", dst, err) } - log.Printf("downloaded %s", url) + logHTTPRequest("GET", url, resp.StatusCode, time.Since(start).Seconds()) + logInfo("downloaded %s", url) return nil } @@ -97,7 +101,7 @@ func (a *App) downloadImdbDatasets() error { for _, p := range pairs { // skip if TSV already exists (download may be stubbed out) if _, err := os.Stat(p.tsv); err == nil { - log.Printf("reusing existing %s", p.tsv) + logInfo("reusing existing %s", p.tsv) continue } @@ -110,7 +114,7 @@ func (a *App) downloadImdbDatasets() error { if err := os.Remove(p.gz); err != nil { return fmt.Errorf("remove %s: %w", p.gz, err) } - log.Printf("extracted %s", p.tsv) + logInfo("extracted %s", p.tsv) } return nil } @@ -327,7 +331,7 @@ func (a *App) applyImdbUpdates(ratings map[string]ratingEntry, basics map[string return fmt.Errorf("commit: %w", err) } - log.Printf("applyImdbUpdates: %d ratings, %d basics, %d genres updated", rCount, bCount, gCount) + logInfo("applyImdbUpdates: %d ratings, %d basics, %d genres updated", rCount, bCount, gCount) return nil } @@ -366,10 +370,10 @@ func (a *App) fetchAndUpdateImdbData() error { return fmt.Errorf("count genres: %w", err) } if missingRatings == 0 && missingGenres == 0 { - log.Println("fetchAndUpdateImdbData: all entries complete, skipping") + logInfo("fetchAndUpdateImdbData: all entries complete, skipping") return nil } - log.Printf("fetchAndUpdateImdbData: %d missing ratings, %d missing genres", missingRatings, missingGenres) + logInfo("fetchAndUpdateImdbData: %d missing ratings, %d missing genres", missingRatings, missingGenres) // Download / refresh datasets if err := a.downloadImdbDatasets(); err != nil { return err @@ -381,30 +385,30 @@ func (a *App) fetchAndUpdateImdbData() error { return err } if len(ratingIDs) == 0 { - log.Println("fetchAndUpdateImdbData: no entries need updating") + logInfo("fetchAndUpdateImdbData: no entries need updating") return nil } - log.Printf("fetchAndUpdateImdbData: %d entries need rating update", len(ratingIDs)) + logInfo("fetchAndUpdateImdbData: %d entries need rating update", len(ratingIDs)) // Gather all imdb_ids for basics update allIDs, err := a.getAllImdbIDs() if err != nil { return err } - log.Printf("fetchAndUpdateImdbData: %d entries total for basics update", len(allIDs)) + logInfo("fetchAndUpdateImdbData: %d entries total for basics update", len(allIDs)) // Parse datasets ratings, err := a.parseTitleRatings(ratingIDs) if err != nil { return fmt.Errorf("parseTitleRatings: %w", err) } - log.Printf("fetchAndUpdateImdbData: found ratings for %d entries", len(ratings)) + logInfo("fetchAndUpdateImdbData: found ratings for %d entries", len(ratings)) basics, err := a.parseTitleBasics(allIDs) if err != nil { return fmt.Errorf("parseTitleBasics: %w", err) } - log.Printf("fetchAndUpdateImdbData: found basics for %d entries", len(basics)) + logInfo("fetchAndUpdateImdbData: found basics for %d entries", len(basics)) // Write to DB if err := a.applyImdbUpdates(ratings, basics); err != nil { diff --git a/src/loglevel.go b/src/loglevel.go new file mode 100644 index 0000000..6f0e8aa --- /dev/null +++ b/src/loglevel.go @@ -0,0 +1,78 @@ +package main + +import ( + "fmt" + "log" + "os" +) + +// LogLevel controls verbosity. +type LogLevel int + +const ( + LogDebug LogLevel = iota + LogInfo + LogSilent +) + +// logLevel is set in main() from the --log-level flag. +var logLevel LogLevel = LogInfo + +// setLogLevel parses the flag value. +func setLogLevel(val string) { + switch val { + case "debug": + logLevel = LogDebug + case "info": + logLevel = LogInfo + case "silent": + logLevel = LogSilent + default: + log.Fatalf("invalid --log-level %q (want debug, info, or silent)", val) + } +} + +func logDebug(format string, v ...interface{}) { + if logLevel == LogDebug { + log.Printf(format, v...) + } +} + +func logInfo(format string, v ...interface{}) { + if logLevel <= LogInfo { + log.Printf(format, v...) + } +} + +func logWarn(format string, v ...interface{}) { + log.Printf(format, v...) +} + +func logFatal(format string, v ...interface{}) { + log.Fatalf(format, v...) +} + +// logHTTPRequest logs an HTTP request in debug mode. +func logHTTPRequest(method, url string, statusCode int, duration float64) { + if logLevel == LogDebug { + log.Printf("HTTP %s %s -> %d (%.2fs)", method, url, statusCode, duration) + } +} + +// logHTTPRetry logs a retry attempt in debug mode. +func logHTTPRetry(attempt, maxAttempts int, backoffSecs int, target string) { + if logLevel == LogDebug { + log.Printf("retry %d/%d for %s after %ds backoff", attempt, maxAttempts, target, backoffSecs) + } +} + +// logFatalErr is like logFatal but takes a message and error separately. +func logFatalErr(msg string, err error) { + log.Fatalf("%s: %v", msg, err) +} + +// Fatal with two args like the original log.Fatal(err, msg) pattern. +func logFatalTwoArgs(err error, msg string) { + fmt.Fprintln(os.Stderr, err) + log.Fatalf("%s", msg) +} diff --git a/src/main.go b/src/main.go index cf1c74b..2436976 100644 --- a/src/main.go +++ b/src/main.go @@ -3,7 +3,6 @@ package main import ( "flag" "fmt" - "log" "regexp" _ "github.com/go-sql-driver/mysql" @@ -56,7 +55,7 @@ func (a *App) populateImdbTable() error { } if !imdbIDRe.MatchString(param) { - log.Printf("populateImdbTable: invalid param %q, skipping", param) + logWarn("populateImdbTable: invalid param %q, skipping", param) continue } @@ -82,7 +81,7 @@ func (a *App) populateImdbTable() error { return fmt.Errorf("commit: %w", err) } - log.Printf("populateImdbTable: inserted %d, skipped %d (already existed)", inserted, skipped) + logInfo("populateImdbTable: inserted %d, skipped %d (already existed)", inserted, skipped) return nil } @@ -105,7 +104,7 @@ func (a *App) getExistingImdbIDs() (map[string]bool, error) { return nil, fmt.Errorf("rows iteration: %w", err) } - log.Printf("getExistingImdbIDs: %d existing records", len(existing)) + logInfo("getExistingImdbIDs: %d existing records", len(existing)) return existing, nil } @@ -139,7 +138,7 @@ func (a *App) extractImdbIDs() error { match := imdbTitleRe.FindStringSubmatch(link.URL) if len(match) < 2 { - log.Printf("no IMDb ID found in URL: %s", link.URL) + logWarn("no IMDb ID found in URL: %s", link.URL) continue } @@ -160,52 +159,54 @@ func (a *App) extractImdbIDs() error { return fmt.Errorf("commit: %w", err) } - log.Printf("extractImdbIDs: scanned %d rows, updated %d", count, updated) + logInfo("extractImdbIDs: scanned %d rows, updated %d", count, updated) return nil } func main() { wikiOnly := flag.Bool("wiki-only", false, "only fetch wiki article data, skip SPARQL") + logLevelFlag := flag.String("log-level", "info", "logging level: debug, info, silent") flag.Parse() + setLogLevel(*logLevelFlag) cfg, err := LoadConfig("config.json") if err != nil { - log.Fatalf("failed to load config: %v", err) + logFatalErr("failed to load config", err) } app := App{Config: cfg} - log.Printf(`Connecting to "%s" database "%s" as user "%s" on host "%s:%s" with extra options "%s".`, cfg.DBDriver, cfg.DBDBName, cfg.DBUser, cfg.DBHost, cfg.DBPort, cfg.DBOptions) + logInfo(`Connecting to "%s" database "%s" as user "%s" on host "%s:%s" with extra options "%s".`, cfg.DBDriver, cfg.DBDBName, cfg.DBUser, cfg.DBHost, cfg.DBPort, cfg.DBOptions) app.DB, err = sqlx.Connect(cfg.DBDriver, cfg.DBUser+":"+cfg.DBPassword+"@tcp("+cfg.DBHost+":"+cfg.DBPort+")/"+cfg.DBDBName+"?"+cfg.DBOptions) if err != nil { - log.Fatal(err, "Cannot connect to database") + logFatalTwoArgs(err, "Cannot connect to database") } if err = app.DB.Ping(); err != nil { - log.Fatal(err, "No connection to database") + logFatalTwoArgs(err, "No connection to database") } defer app.DB.Close() if !*wikiOnly { if err = app.extractImdbIDs(); err != nil { - log.Fatalf("extractImdbIDs: %v", err) + logFatalErr("extractImdbIDs", err) } if err = app.populateImdbTable(); err != nil { - log.Fatalf("populateImdbTable: %v", err) + logFatalErr("populateImdbTable", err) } if err = app.fetchAndUpdateImdbData(); err != nil { - log.Fatalf("fetchAndUpdateImdbData: %v", err) + logFatalErr("fetchAndUpdateImdbData", err) } if err = app.fetchWikiArticles(); err != nil { - log.Fatalf("fetchWikiArticles: %v", err) + logFatalErr("fetchWikiArticles", err) } } else { if err = app.fetchWikiArticlesData(); err != nil { - log.Fatalf("fetchWikiArticlesData: %v", err) + logFatalErr("fetchWikiArticlesData", err) } } } diff --git a/src/wikiarticle.go b/src/wikiarticle.go index c1ce486..5f7aaa1 100644 --- a/src/wikiarticle.go +++ b/src/wikiarticle.go @@ -4,7 +4,6 @@ import ( "encoding/json" "fmt" "io" - "log" "net/http" "net/url" "strconv" @@ -39,10 +38,11 @@ func (a *App) queryWikiArticle(name string) (wikiArticleEntry, int, error) { var resp *http.Response var err error + start := time.Now() for attempt := 0; attempt < 5; attempt++ { if attempt > 0 { backoff := 1 << attempt - log.Printf("retry %d/%d for %s after %ds backoff", attempt, 4, name, backoff) + logHTTPRetry(attempt, 4, backoff, name) time.Sleep(time.Duration(backoff) * time.Second) } @@ -59,15 +59,18 @@ func (a *App) queryWikiArticle(name string) (wikiArticleEntry, int, error) { if resp.StatusCode != http.StatusOK { body, _ := io.ReadAll(io.LimitReader(resp.Body, 2048)) resp.Body.Close() + logHTTPRequest("GET", reqURL, resp.StatusCode, time.Since(start).Seconds()) return wikiArticleEntry{}, resp.StatusCode, fmt.Errorf("HTTP %d: %s", resp.StatusCode, body) } break } if err != nil { + logHTTPRequest("GET", reqURL, 0, time.Since(start).Seconds()) return wikiArticleEntry{}, 0, fmt.Errorf("http get: %w", err) } defer resp.Body.Close() + logHTTPRequest("GET", reqURL, resp.StatusCode, time.Since(start).Seconds()) var articles []map[string]interface{} if err := json.NewDecoder(resp.Body).Decode(&articles); err != nil { diff --git a/src/wikidata.go b/src/wikidata.go index 59c8188..3101673 100644 --- a/src/wikidata.go +++ b/src/wikidata.go @@ -4,7 +4,6 @@ import ( "encoding/json" "fmt" "io" - "log" "net/http" "net/url" "strings" @@ -77,7 +76,7 @@ func (a *App) fetchWikiArticles() error { return err } if len(ids) == 0 { - log.Println("fetchWikiArticles: all entries have wiki_article, skipping") + logInfo("fetchWikiArticles: all entries have wiki_article, skipping") return nil } @@ -110,7 +109,7 @@ func (a *App) fetchWikiArticles() error { close(artCh) <-consumerDone - log.Printf("fetchWikiArticles: pipeline complete") + logInfo("fetchWikiArticles: pipeline complete") return nil } @@ -151,7 +150,7 @@ func (a *App) getExistingWikiArticles() ([]existingWikiArticle, error) { func (a *App) sparqlPipeline(ids []string, artCh chan<- wikiArticleFetch, done chan<- struct{}) { defer close(done) - log.Printf("fetchWikiArticles: %d entries missing wiki_article", len(ids)) + logInfo("fetchWikiArticles: %d entries missing wiki_article", len(ids)) batchNum := 0 totalResolved := 0 totalNoWiki := 0 @@ -162,7 +161,7 @@ func (a *App) sparqlPipeline(ids []string, artCh chan<- wikiArticleFetch, done c results, err := a.queryWikidataBatch(chunk) if err != nil { - log.Printf("sparql batch %d error: %v, marking all as no-wiki", batchNum, err) + logWarn("sparql batch %d error: %v, marking all as no-wiki", batchNum, err) // Mark all in failed batch as no-wiki tx, _ := a.DB.Begin() stmt, _ := tx.Prepare(`UPDATE imdb SET has_no_wiki_article = 1 WHERE imdb_id = ?`) @@ -178,14 +177,14 @@ func (a *App) sparqlPipeline(ids []string, artCh chan<- wikiArticleFetch, done c // Commit batch to DB tx, err := a.DB.Begin() if err != nil { - log.Printf("sparql batch %d begin tx error: %v", batchNum, err) + logWarn("sparql batch %d begin tx error: %v", batchNum, err) continue } wikiStmt, err := tx.Prepare(`UPDATE imdb SET wiki_article = ? WHERE imdb_id = ?`) if err != nil { tx.Rollback() - log.Printf("sparql batch %d prepare error: %v", batchNum, err) + logWarn("sparql batch %d prepare error: %v", batchNum, err) continue } @@ -193,7 +192,7 @@ func (a *App) sparqlPipeline(ids []string, artCh chan<- wikiArticleFetch, done c if err != nil { wikiStmt.Close() tx.Rollback() - log.Printf("sparql batch %d prepare no_wiki error: %v", batchNum, err) + logWarn("sparql batch %d prepare no_wiki error: %v", batchNum, err) continue } @@ -217,7 +216,7 @@ func (a *App) sparqlPipeline(ids []string, artCh chan<- wikiArticleFetch, done c tx.Commit() - log.Printf("fetchWikiArticles: sparql batch %d/%d - %d/%d resolved", + logInfo("fetchWikiArticles: sparql batch %d/%d - %d/%d resolved", batchNum, (len(ids)+wikiBatchSize-1)/wikiBatchSize, resolved, len(chunk)) // Rate limit between SPARQL requests @@ -226,7 +225,7 @@ func (a *App) sparqlPipeline(ids []string, artCh chan<- wikiArticleFetch, done c } } - log.Printf("fetchWikiArticles: SPARQL done - %d resolved, %d no-wiki", totalResolved, totalNoWiki) + logInfo("fetchWikiArticles: SPARQL done - %d resolved, %d no-wiki", totalResolved, totalNoWiki) } // wikiDataConsumer fetches wiki article data from the custom server. @@ -242,7 +241,7 @@ func (a *App) wikiDataConsumer(artCh <-chan wikiArticleFetch, done chan<- struct entry, statusCode, err := a.queryWikiArticle(art.name) if err != nil { - log.Printf("wiki error %s (%s): HTTP %d - %v", art.imdbID, art.name, statusCode, err) + logWarn("wiki error %s (%s): HTTP %d - %v", art.imdbID, art.name, statusCode, err) skipped++ } @@ -265,13 +264,13 @@ func (a *App) wikiDataConsumer(artCh <-chan wikiArticleFetch, done chan<- struct // Insert people (actors, directors, screenwriters) if statusCode == 200 && len(entry.People) > 0 { if err := a.insertWikiPeople(art.imdbID, entry.People); err != nil { - log.Printf("insert people error %s: %v", art.imdbID, err) + logWarn("insert people error %s: %v", art.imdbID, err) } a.DB.Exec(`UPDATE imdb SET has_people = 1 WHERE imdb_id = ?`, art.imdbID) } } - log.Printf("fetchWikiArticlesData: %d updated, %d skipped (non-200)", updated, skipped) + logInfo("fetchWikiArticlesData: %d updated, %d skipped (non-200)", updated, skipped) } // insertWikiPeople upserts people into people/who tables. @@ -312,10 +311,10 @@ func (a *App) fetchWikiArticlesData() error { return err } if len(existing) == 0 { - log.Println("fetchWikiArticlesData: all entries complete, skipping") + logInfo("fetchWikiArticlesData: all entries complete, skipping") return nil } - log.Printf("fetchWikiArticlesData: %d entries need wiki data", len(existing)) + logInfo("fetchWikiArticlesData: %d entries need wiki data", len(existing)) artCh := make(chan wikiArticleFetch, len(existing)) consumerDone := make(chan struct{}) @@ -340,10 +339,13 @@ func (a *App) queryWikidataBatch(ids []string) (map[string]wikiAcc, error) { "format": {"json"}, }.Encode() + start := time.Now() raw, err := doGETWithRetry(endpoint, a.Config.UserAgent) if err != nil { + logHTTPRequest("GET", endpoint, 0, time.Since(start).Seconds()) return nil, fmt.Errorf("SPARQL request: %w", err) } + logHTTPRequest("GET", endpoint, 200, time.Since(start).Seconds()) var data sparqlResponse if err := json.Unmarshal(raw, &data); err != nil { @@ -419,7 +421,7 @@ func doGETWithRetry(uri, userAgent string) ([]byte, error) { for attempt := 0; attempt < wikiMaxRetries; attempt++ { if attempt > 0 { backoff := wikiRetryBackoff * time.Duration(1<<(attempt-1)) - log.Printf(" retry %d/%d after %v", attempt+1, wikiMaxRetries, backoff) + logDebug(" retry %d/%d after %v", attempt+1, wikiMaxRetries, backoff) time.Sleep(backoff) } raw, err := doGET(uri, userAgent) |
