diff options
| -rw-r--r-- | src/wikiarticle.go | 70 |
1 files changed, 47 insertions, 23 deletions
diff --git a/src/wikiarticle.go b/src/wikiarticle.go index 4ff72b6..984eac3 100644 --- a/src/wikiarticle.go +++ b/src/wikiarticle.go @@ -31,6 +31,7 @@ func (a *App) fetchWikiArticlesData() error { rows, err := a.DB.Query(` SELECT id, imdb_id, wiki_article FROM imdb WHERE wiki_article IS NOT NULL + AND wiki_status_code != 404 AND (synopsis IS NULL OR description IS NULL OR year IS NULL OR poster_url IS NULL OR license IS NULL OR license_url IS NULL OR num_accolades IS NULL) `) @@ -79,52 +80,75 @@ func (a *App) fetchWikiArticlesData() error { } defer stmt.Close() + statusStmt, err := tx.Prepare(` + UPDATE imdb SET wiki_status_code = ? WHERE id = ? + `) + if err != nil { + tx.Rollback() + return fmt.Errorf("prepare wiki status update: %w", err) + } + defer statusStmt.Close() + type result struct { - id int - entry wikiArticleEntry + id int + entry wikiArticleEntry + statusCode int } ch := make(chan result, 1) - // Serial processing with 1 req/s rate limit + // Serial processing with 2s between requests go func() { for i, item := range entries { if i > 0 { time.Sleep(2 * time.Second) } - entry, err := a.queryWikiArticle(item.wikiArticle) + entry, statusCode, err := a.queryWikiArticle(item.wikiArticle) + ch <- result{id: item.id, entry: entry, statusCode: statusCode} if err != nil { - log.Printf("wiki error for %s/%d (%s): %v", item.imdbID, i, item.wikiArticle, err) - continue + log.Printf("wiki error %d/%d %s (%s): HTTP %d - %v", i+1, len(entries), item.imdbID, item.wikiArticle, statusCode, err) } - ch <- result{id: item.id, entry: entry} } close(ch) }() updated := 0 + skipped := 0 for r := range ch { - e := r.entry - _, err := stmt.Exec( - e.Synopsis, e.Description, e.Year, e.PosterURL, - e.License, e.LicenseURL, e.NumAccolades, r.id, - ) - if err != nil { - tx.Rollback() - return fmt.Errorf("update wiki data for id %d: %w", r.id, err) + // Always record status code + if r.statusCode > 0 { + if _, err := statusStmt.Exec(r.statusCode, r.id); err != nil { + tx.Rollback() + return fmt.Errorf("update wiki_status_code for id %d: %w", r.id, err) + } + } + + // Only update data fields on success + if r.statusCode == 200 { + e := r.entry + _, err := stmt.Exec( + e.Synopsis, e.Description, e.Year, e.PosterURL, + e.License, e.LicenseURL, e.NumAccolades, r.id, + ) + if err != nil { + tx.Rollback() + return fmt.Errorf("update wiki data for id %d: %w", r.id, err) + } + updated++ + } else { + skipped++ } - updated++ } if err := tx.Commit(); err != nil { return fmt.Errorf("commit wiki data: %w", err) } - log.Printf("fetchWikiArticlesData: %d entries updated", updated) + log.Printf("fetchWikiArticlesData: %d updated, %d skipped (non-200)", updated, skipped) return nil } // queryWikiArticle fetches and parses a single wiki article from the custom server. -func (a *App) queryWikiArticle(name string) (wikiArticleEntry, error) { +func (a *App) queryWikiArticle(name string) (wikiArticleEntry, int, error) { reqURL := a.Config.WikiServer + "?" + url.Values{ "username": {a.Config.WikiUsername}, "name": {name}, @@ -152,22 +176,22 @@ func (a *App) queryWikiArticle(name string) (wikiArticleEntry, error) { if resp.StatusCode != http.StatusOK { body, _ := io.ReadAll(io.LimitReader(resp.Body, 2048)) resp.Body.Close() - return wikiArticleEntry{}, fmt.Errorf("HTTP %d: %s", resp.StatusCode, body) + return wikiArticleEntry{}, resp.StatusCode, fmt.Errorf("HTTP %d: %s", resp.StatusCode, body) } break } if err != nil { - return wikiArticleEntry{}, fmt.Errorf("http get: %w", err) + return wikiArticleEntry{}, 0, fmt.Errorf("http get: %w", err) } defer resp.Body.Close() var articles []map[string]interface{} if err := json.NewDecoder(resp.Body).Decode(&articles); err != nil { - return wikiArticleEntry{}, fmt.Errorf("json decode: %w", err) + return wikiArticleEntry{}, 200, fmt.Errorf("json decode: %w", err) } if len(articles) == 0 { - return wikiArticleEntry{}, fmt.Errorf("no articles returned") + return wikiArticleEntry{}, 200, fmt.Errorf("no articles returned") } article := articles[0] @@ -197,7 +221,7 @@ func (a *App) queryWikiArticle(name string) (wikiArticleEntry, error) { // num_accolades from tables entry.NumAccolades = extractAccolades(article) - return entry, nil + return entry, 200, nil } func extractSynopsis(article map[string]interface{}) string { |
