summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/wikiarticle.go70
1 files changed, 47 insertions, 23 deletions
diff --git a/src/wikiarticle.go b/src/wikiarticle.go
index 4ff72b6..984eac3 100644
--- a/src/wikiarticle.go
+++ b/src/wikiarticle.go
@@ -31,6 +31,7 @@ func (a *App) fetchWikiArticlesData() error {
rows, err := a.DB.Query(`
SELECT id, imdb_id, wiki_article FROM imdb
WHERE wiki_article IS NOT NULL
+ AND wiki_status_code != 404
AND (synopsis IS NULL OR description IS NULL OR year IS NULL
OR poster_url IS NULL OR license IS NULL OR license_url IS NULL OR num_accolades IS NULL)
`)
@@ -79,52 +80,75 @@ func (a *App) fetchWikiArticlesData() error {
}
defer stmt.Close()
+ statusStmt, err := tx.Prepare(`
+ UPDATE imdb SET wiki_status_code = ? WHERE id = ?
+ `)
+ if err != nil {
+ tx.Rollback()
+ return fmt.Errorf("prepare wiki status update: %w", err)
+ }
+ defer statusStmt.Close()
+
type result struct {
- id int
- entry wikiArticleEntry
+ id int
+ entry wikiArticleEntry
+ statusCode int
}
ch := make(chan result, 1)
- // Serial processing with 1 req/s rate limit
+ // Serial processing with 2s between requests
go func() {
for i, item := range entries {
if i > 0 {
time.Sleep(2 * time.Second)
}
- entry, err := a.queryWikiArticle(item.wikiArticle)
+ entry, statusCode, err := a.queryWikiArticle(item.wikiArticle)
+ ch <- result{id: item.id, entry: entry, statusCode: statusCode}
if err != nil {
- log.Printf("wiki error for %s/%d (%s): %v", item.imdbID, i, item.wikiArticle, err)
- continue
+ log.Printf("wiki error %d/%d %s (%s): HTTP %d - %v", i+1, len(entries), item.imdbID, item.wikiArticle, statusCode, err)
}
- ch <- result{id: item.id, entry: entry}
}
close(ch)
}()
updated := 0
+ skipped := 0
for r := range ch {
- e := r.entry
- _, err := stmt.Exec(
- e.Synopsis, e.Description, e.Year, e.PosterURL,
- e.License, e.LicenseURL, e.NumAccolades, r.id,
- )
- if err != nil {
- tx.Rollback()
- return fmt.Errorf("update wiki data for id %d: %w", r.id, err)
+ // Always record status code
+ if r.statusCode > 0 {
+ if _, err := statusStmt.Exec(r.statusCode, r.id); err != nil {
+ tx.Rollback()
+ return fmt.Errorf("update wiki_status_code for id %d: %w", r.id, err)
+ }
+ }
+
+ // Only update data fields on success
+ if r.statusCode == 200 {
+ e := r.entry
+ _, err := stmt.Exec(
+ e.Synopsis, e.Description, e.Year, e.PosterURL,
+ e.License, e.LicenseURL, e.NumAccolades, r.id,
+ )
+ if err != nil {
+ tx.Rollback()
+ return fmt.Errorf("update wiki data for id %d: %w", r.id, err)
+ }
+ updated++
+ } else {
+ skipped++
}
- updated++
}
if err := tx.Commit(); err != nil {
return fmt.Errorf("commit wiki data: %w", err)
}
- log.Printf("fetchWikiArticlesData: %d entries updated", updated)
+ log.Printf("fetchWikiArticlesData: %d updated, %d skipped (non-200)", updated, skipped)
return nil
}
// queryWikiArticle fetches and parses a single wiki article from the custom server.
-func (a *App) queryWikiArticle(name string) (wikiArticleEntry, error) {
+func (a *App) queryWikiArticle(name string) (wikiArticleEntry, int, error) {
reqURL := a.Config.WikiServer + "?" + url.Values{
"username": {a.Config.WikiUsername},
"name": {name},
@@ -152,22 +176,22 @@ func (a *App) queryWikiArticle(name string) (wikiArticleEntry, error) {
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(io.LimitReader(resp.Body, 2048))
resp.Body.Close()
- return wikiArticleEntry{}, fmt.Errorf("HTTP %d: %s", resp.StatusCode, body)
+ return wikiArticleEntry{}, resp.StatusCode, fmt.Errorf("HTTP %d: %s", resp.StatusCode, body)
}
break
}
if err != nil {
- return wikiArticleEntry{}, fmt.Errorf("http get: %w", err)
+ return wikiArticleEntry{}, 0, fmt.Errorf("http get: %w", err)
}
defer resp.Body.Close()
var articles []map[string]interface{}
if err := json.NewDecoder(resp.Body).Decode(&articles); err != nil {
- return wikiArticleEntry{}, fmt.Errorf("json decode: %w", err)
+ return wikiArticleEntry{}, 200, fmt.Errorf("json decode: %w", err)
}
if len(articles) == 0 {
- return wikiArticleEntry{}, fmt.Errorf("no articles returned")
+ return wikiArticleEntry{}, 200, fmt.Errorf("no articles returned")
}
article := articles[0]
@@ -197,7 +221,7 @@ func (a *App) queryWikiArticle(name string) (wikiArticleEntry, error) {
// num_accolades from tables
entry.NumAccolades = extractAccolades(article)
- return entry, nil
+ return entry, 200, nil
}
func extractSynopsis(article map[string]interface{}) string {