diff options
Diffstat (limited to 'src/wikiarticle.go')
| -rw-r--r-- | src/wikiarticle.go | 122 |
1 files changed, 0 insertions, 122 deletions
diff --git a/src/wikiarticle.go b/src/wikiarticle.go index 85bf28a..5b891b6 100644 --- a/src/wikiarticle.go +++ b/src/wikiarticle.go @@ -25,128 +25,6 @@ type wikiArticleEntry struct { NumAccolades int } -// fetchWikiArticlesData queries the custom wiki server for all entries -// that have a wiki_article and updates the imdb table with extracted fields. -func (a *App) fetchWikiArticlesData() error { - rows, err := a.DB.Query(` - SELECT id, imdb_id, wiki_article FROM imdb - WHERE wiki_article IS NOT NULL - AND wiki_status_code != 404 - AND (synopsis IS NULL OR description IS NULL OR year IS NULL - OR poster_url IS NULL OR license IS NULL OR license_url IS NULL OR num_accolades IS NULL) - `) - if err != nil { - return fmt.Errorf("query wiki articles: %w", err) - } - defer rows.Close() - - type dbRow struct { - id int - imdbID string - wikiArticle string - } - var entries []dbRow - for rows.Next() { - var r dbRow - if err := rows.Scan(&r.id, &r.imdbID, &r.wikiArticle); err != nil { - return fmt.Errorf("scan row: %w", err) - } - entries = append(entries, r) - } - if err := rows.Err(); err != nil { - return fmt.Errorf("rows iteration: %w", err) - } - - if len(entries) == 0 { - log.Println("fetchWikiArticlesData: all entries complete, skipping") - return nil - } - log.Printf("fetchWikiArticlesData: %d entries need wiki data", len(entries)) - - tx, err := a.DB.Begin() - if err != nil { - return fmt.Errorf("begin tx: %w", err) - } - - stmt, err := tx.Prepare(` - UPDATE imdb SET - synopsis = ?, description = ?, year = ?, poster_url = ?, - license = ?, license_url = ?, num_accolades = ? - WHERE id = ? - `) - if err != nil { - tx.Rollback() - return fmt.Errorf("prepare wiki update: %w", err) - } - defer stmt.Close() - - statusStmt, err := tx.Prepare(` - UPDATE imdb SET wiki_status_code = ? WHERE id = ? - `) - if err != nil { - tx.Rollback() - return fmt.Errorf("prepare wiki status update: %w", err) - } - defer statusStmt.Close() - - type result struct { - id int - entry wikiArticleEntry - statusCode int - } - ch := make(chan result, 1) - - // Serial processing with 2s between requests - go func() { - for i, item := range entries { - if i > 0 { - time.Sleep(2 * time.Second) - } - entry, statusCode, err := a.queryWikiArticle(item.wikiArticle) - ch <- result{id: item.id, entry: entry, statusCode: statusCode} - if err != nil { - log.Printf("wiki error %d/%d %s (%s): HTTP %d - %v", i+1, len(entries), item.imdbID, item.wikiArticle, statusCode, err) - } - } - close(ch) - }() - - updated := 0 - skipped := 0 - for r := range ch { - // Always record status code - if r.statusCode > 0 { - if _, err := statusStmt.Exec(r.statusCode, r.id); err != nil { - tx.Rollback() - return fmt.Errorf("update wiki_status_code for id %d: %w", r.id, err) - } - } - - // Only update data fields on success - if r.statusCode == 200 { - e := r.entry - _, err := stmt.Exec( - e.Synopsis, e.Description, e.Year, e.PosterURL, - e.License, e.LicenseURL, e.NumAccolades, r.id, - ) - if err != nil { - tx.Rollback() - return fmt.Errorf("update wiki data for id %d: %w", r.id, err) - } - updated++ - } else { - skipped++ - } - } - - if err := tx.Commit(); err != nil { - return fmt.Errorf("commit wiki data: %w", err) - } - - log.Printf("fetchWikiArticlesData: %d updated, %d skipped (non-200)", updated, skipped) - return nil -} - func (a *App) queryWikiArticle(name string) (wikiArticleEntry, int, error) { // Build URL — name is decoded from DB, encode it for the request reqURL := fmt.Sprintf("%s?username=%s&name=%s", |
