package main import ( "encoding/json" "fmt" "io" "net/http" "net/url" "strconv" "strings" "time" ) var wikiArticleClient = &http.Client{Timeout: 60 * time.Second} // wikiPerson carries a person extracted from a wiki article. type wikiPerson struct { Name string Profession int // 1=actor, 2=director, 3=screenwriter } // wikiArticleEntry holds extracted fields from a wiki article API response. type wikiArticleEntry struct { Description string Year int PosterURL string Synopsis string License string LicenseURL string NumAccolades int People []wikiPerson } func (a *App) queryWikiArticle(name string) (wikiArticleEntry, int, error) { // Build URL — name is decoded from DB, encode it for the request reqURL := fmt.Sprintf("%s?username=%s&name=%s", a.Config.WikiServer, url.QueryEscape(a.Config.WikiUsername), url.PathEscape(name)) var resp *http.Response var err error start := time.Now() for attempt := 0; attempt < 5; attempt++ { if attempt > 0 { backoff := 1 << attempt logHTTPRetry(attempt, 4, backoff, name) time.Sleep(time.Duration(backoff) * time.Second) } resp, err = wikiArticleClient.Get(reqURL) if err != nil { continue } if resp.StatusCode == http.StatusTooManyRequests { resp.Body.Close() continue } if resp.StatusCode != http.StatusOK { body, _ := io.ReadAll(io.LimitReader(resp.Body, 2048)) resp.Body.Close() logHTTPRequest("GET", reqURL, resp.StatusCode, time.Since(start).Seconds()) return wikiArticleEntry{}, resp.StatusCode, fmt.Errorf("HTTP %d: %s", resp.StatusCode, body) } break } if err != nil { logHTTPRequest("GET", reqURL, 0, time.Since(start).Seconds()) return wikiArticleEntry{}, 0, fmt.Errorf("http get: %w", err) } defer resp.Body.Close() logHTTPRequest("GET", reqURL, resp.StatusCode, time.Since(start).Seconds()) var articles []map[string]interface{} if err := json.NewDecoder(resp.Body).Decode(&articles); err != nil { return wikiArticleEntry{}, 200, fmt.Errorf("json decode: %w", err) } if len(articles) == 0 { return wikiArticleEntry{}, 200, fmt.Errorf("no articles returned") } article := articles[0] var entry wikiArticleEntry // description if desc, ok := article["description"]; ok { entry.Description = fmt.Sprintf("%v", desc) } // synopsis from Plot section entry.Synopsis = extractSynopsis(article) // year, poster_url from infobox entry.Year, entry.PosterURL = extractInfoboxData(article) // people from infobox and sections entry.People = extractPeople(article) // license if licList, ok := article["license"]; ok { if arr, ok := licList.([]interface{}); ok && len(arr) > 0 { if lic, ok := arr[0].(map[string]interface{}); ok { entry.License = fmt.Sprintf("%v", lic["name"]) entry.LicenseURL = fmt.Sprintf("%v", lic["url"]) } } } // num_accolades from tables entry.NumAccolades = extractAccolades(article) return entry, 200, nil } func extractSynopsis(article map[string]interface{}) string { sections, ok := article["sections"].([]interface{}) if !ok { return "" } for _, sec := range sections { s, ok := sec.(map[string]interface{}) if !ok || s["name"] != "Plot" { continue } var parts []string if pp, ok := s["has_parts"].([]interface{}); ok { for _, p := range pp { if pp2, ok := p.(map[string]interface{}); ok && pp2["type"] == "paragraph" { parts = append(parts, fmt.Sprintf("%v", pp2["value"])) } } } if len(parts) > 0 { return strings.Join(parts, " ") } return "" } return "" } func extractInfoboxData(article map[string]interface{}) (year int, posterURL string) { infoboxes, ok := article["infoboxes"].([]interface{}) if !ok || len(infoboxes) == 0 { return } ib, ok := infoboxes[0].(map[string]interface{}) if !ok { return } parts, ok := ib["has_parts"].([]interface{}) if !ok || len(parts) == 0 { return } section, ok := parts[0].(map[string]interface{}) if !ok { return } subParts, _ := section["has_parts"].([]interface{}) for _, p := range subParts { fp, ok := p.(map[string]interface{}) if !ok { continue } // poster from first image if fp["type"] == "image" && posterURL == "" { if imgs, ok := fp["images"].([]interface{}); ok && len(imgs) > 0 { if img, ok := imgs[0].(map[string]interface{}); ok { if cu, ok := img["content_url"]; ok { posterURL = fmt.Sprintf("%v", cu) } } } } // year from Release dates if fp["name"] == "Release dates" && year == 0 { if items, ok := fp["has_parts"].([]interface{}); ok && len(items) > 0 { if item, ok := items[0].(map[string]interface{}); ok { val := fmt.Sprintf("%v", item["value"]) year = extractYear(val) } } } } return } func extractYear(s string) int { // Look for 4-digit year pattern like "1972" or "(1972-03-14)" for i := 0; i+3 < len(s); i++ { if s[i] == '(' { i++ // skip paren } if i+4 <= len(s) && s[i] >= '1' && s[i] <= '2' { if y, err := strconv.Atoi(s[i : i+4]); err == nil && y >= 1800 && y <= 2100 { return y } } } return 0 } func extractAccolades(article map[string]interface{}) int { tables, ok := article["tables"].([]interface{}) if !ok { return 0 } total := 0 for _, t := range tables { tab, ok := t.(map[string]interface{}) if !ok { continue } rows, _ := tab["rows"].([]interface{}) total += len(rows) } return total } // extractPeople extracts actors, directors, and screenwriters from the article. func extractPeople(article map[string]interface{}) []wikiPerson { var people []wikiPerson // Directors and screenwriters from infobox ibParts := getInfoboxParts(article) for _, p := range ibParts { fp, ok := p.(map[string]interface{}) if !ok { continue } name := fp["name"] if name == "Directed by" { people = append(people, extractPersonFromField(fp, 2)...) } if name == "Screenplay by" { people = append(people, extractPersonFromField(fp, 3)...) } } // Actors from Cast section for _, sec := range getSections(article) { s, ok := sec.(map[string]interface{}) if !ok || s["name"] != "Cast" { continue } if hp, ok := s["has_parts"].([]interface{}); ok { for _, part := range hp { p, ok := part.(map[string]interface{}) if !ok || p["type"] != "list" { continue } if items, ok := p["has_parts"].([]interface{}); ok { for _, item := range items { item, ok := item.(map[string]interface{}) if !ok { continue } if link, ok := getFirstPersonLink(item); ok { people = append(people, wikiPerson{Name: link, Profession: 1}) } } } break // only first list in Cast section } } break // only Cast section } return people } // getInfoboxParts returns the inner parts of the infobox section. func getInfoboxParts(article map[string]interface{}) []interface{} { infoboxes, ok := article["infoboxes"].([]interface{}) if !ok || len(infoboxes) == 0 { return nil } ib, ok := infoboxes[0].(map[string]interface{}) if !ok { return nil } parts, ok := ib["has_parts"].([]interface{}) if !ok || len(parts) == 0 { return nil } section, ok := parts[0].(map[string]interface{}) if !ok { return nil } if hp, ok := section["has_parts"].([]interface{}); ok { return hp } return nil } // getSections returns sections from the article. func getSections(article map[string]interface{}) []interface{} { sections, ok := article["sections"].([]interface{}) if !ok { return nil } return sections } // extractPersonFromField extracts a person name from a field or list. func extractPersonFromField(fp map[string]interface{}, profession int) []wikiPerson { var people []wikiPerson switch fp["type"] { case "field": if val, ok := fp["value"]; ok && val != nil { name := fmt.Sprintf("%v", val) if name != "" { people = append(people, wikiPerson{Name: name, Profession: profession}) } } case "list": for _, item := range fp["has_parts"].([]interface{}) { item, ok := item.(map[string]interface{}) if !ok { continue } // Prefer link text over value if link, ok := getFirstPersonLink(item); ok { people = append(people, wikiPerson{Name: link, Profession: profession}) } else if val := item["value"]; val != nil { name := fmt.Sprintf("%v", val) if name != "" { people = append(people, wikiPerson{Name: name, Profession: profession}) } } } } return people } // getFirstPersonLink extracts the first link text from a list item. func getFirstPersonLink(item map[string]interface{}) (string, bool) { links, ok := item["links"].([]interface{}) if !ok || len(links) == 0 { return "", false } link, ok := links[0].(map[string]interface{}) if !ok { return "", false } if text, ok := link["text"]; ok && text != nil { s := fmt.Sprintf("%v", text) if s != "" { return s, true } } return "", false }