diff options
| author | dev | 2026-06-26 18:15:45 +0200 |
|---|---|---|
| committer | dev | 2026-06-26 18:15:45 +0200 |
| commit | b8255840d99c1d0bd81db1a40afc74ab13bf8501 (patch) | |
| tree | 246c0348765b3602d448b30af6dc981dd08ba6b5 | |
| parent | 06536f57b1fdc76212da6b85fbc9287cc4f0de70 (diff) | |
| download | hnimdbbot-b8255840d99c1d0bd81db1a40afc74ab13bf8501.tar.gz | |
fix: only count awards tables in num_accolades
extractAccolades was summing rows from all tables (including
episode lists), producing inflated counts (e.g. 708 for
Unreported_World which has 0 actual awards). Now it filters
to tables whose headers contain 'Award'.
| -rw-r--r-- | src/wikiarticle.go | 18 |
1 files changed, 18 insertions, 0 deletions
diff --git a/src/wikiarticle.go b/src/wikiarticle.go index 5f7aaa1..09c2da8 100644 --- a/src/wikiarticle.go +++ b/src/wikiarticle.go @@ -206,18 +206,36 @@ func extractAccolades(article map[string]interface{}) int { if !ok { return 0 } + total := 0 for _, t := range tables { tab, ok := t.(map[string]interface{}) if !ok { continue } + + // Only count tables whose headers mention "Award" or "award" + if !hasAwardHeader(tab) { + continue + } + rows, _ := tab["rows"].([]interface{}) total += len(rows) } return total } +// hasAwardHeader checks if a table's headers contain award-related columns. +func hasAwardHeader(tab map[string]interface{}) bool { + headers, ok := tab["headers"].([]interface{}) + if !ok || len(headers) == 0 { + return false + } + + headerStr := strings.ToLower(fmt.Sprintf("%v", headers)) + return strings.Contains(headerStr, "award") +} + // extractPeople extracts actors, directors, and screenwriters from the article. func extractPeople(article map[string]interface{}) []wikiPerson { var people []wikiPerson |
