diff options
Diffstat (limited to 'src/wikiarticle.go')
| -rw-r--r-- | src/wikiarticle.go | 150 |
1 files changed, 144 insertions, 6 deletions
diff --git a/src/wikiarticle.go b/src/wikiarticle.go index 5b891b6..cef9f02 100644 --- a/src/wikiarticle.go +++ b/src/wikiarticle.go @@ -14,15 +14,22 @@ import ( var wikiArticleClient = &http.Client{Timeout: 60 * time.Second} +// wikiPerson carries a person extracted from a wiki article. +type wikiPerson struct { + Name string + Profession int // 1=actor, 2=director, 3=screenwriter +} + // wikiArticleEntry holds extracted fields from a wiki article API response. type wikiArticleEntry struct { - Description string - Year int - PosterURL string - Synopsis string - License string - LicenseURL string + Description string + Year int + PosterURL string + Synopsis string + License string + LicenseURL string NumAccolades int + People []wikiPerson } func (a *App) queryWikiArticle(name string) (wikiArticleEntry, int, error) { @@ -84,6 +91,9 @@ func (a *App) queryWikiArticle(name string) (wikiArticleEntry, int, error) { // year, poster_url from infobox entry.Year, entry.PosterURL = extractInfoboxData(article) + // people from infobox and sections + entry.People = extractPeople(article) + // license if licList, ok := article["license"]; ok { if arr, ok := licList.([]interface{}); ok && len(arr) > 0 { @@ -204,3 +214,131 @@ func extractAccolades(article map[string]interface{}) int { } return total } + +// extractPeople extracts actors, directors, and screenwriters from the article. +func extractPeople(article map[string]interface{}) []wikiPerson { + var people []wikiPerson + + // Directors and screenwriters from infobox + ibParts := getInfoboxParts(article) + for _, p := range ibParts { + fp, ok := p.(map[string]interface{}) + if !ok { + continue + } + name := fp["name"] + if name == "Directed by" { + people = append(people, extractPersonFromField(fp, 2)...) + } + if name == "Screenplay by" { + people = append(people, extractPersonFromField(fp, 3)...) + } + } + + // Actors from Cast section + for _, sec := range getSections(article) { + s, ok := sec.(map[string]interface{}) + if !ok || s["name"] != "Cast" { + continue + } + for _, part := range s["has_parts"].([]interface{}) { + p, ok := part.(map[string]interface{}) + if !ok || p["type"] != "list" { + continue + } + for _, item := range p["has_parts"].([]interface{}) { + item, ok := item.(map[string]interface{}) + if !ok { + continue + } + if link, ok := getFirstPersonLink(item); ok { + people = append(people, wikiPerson{Name: link, Profession: 1}) + } + } + break // only first list in Cast section + } + break // only Cast section + } + + return people +} + +// getInfoboxParts returns the inner parts of the infobox section. +func getInfoboxParts(article map[string]interface{}) []interface{} { + infoboxes, ok := article["infoboxes"].([]interface{}) + if !ok || len(infoboxes) == 0 { + return nil + } + ib, ok := infoboxes[0].(map[string]interface{}) + if !ok { + return nil + } + parts, ok := ib["has_parts"].([]interface{}) + if !ok || len(parts) == 0 { + return nil + } + section, ok := parts[0].(map[string]interface{}) + if !ok { + return nil + } + return section["has_parts"].([]interface{}) +} + +// getSections returns sections from the article. +func getSections(article map[string]interface{}) []interface{} { + sections, ok := article["sections"].([]interface{}) + if !ok { + return nil + } + return sections +} + +// extractPersonFromField extracts a person name from a field or list. +func extractPersonFromField(fp map[string]interface{}, profession int) []wikiPerson { + var people []wikiPerson + switch fp["type"] { + case "field": + if val, ok := fp["value"]; ok && val != nil { + name := fmt.Sprintf("%v", val) + if name != "" { + people = append(people, wikiPerson{Name: name, Profession: profession}) + } + } + case "list": + for _, item := range fp["has_parts"].([]interface{}) { + item, ok := item.(map[string]interface{}) + if !ok { + continue + } + // Prefer link text over value + if link, ok := getFirstPersonLink(item); ok { + people = append(people, wikiPerson{Name: link, Profession: profession}) + } else if val := item["value"]; val != nil { + name := fmt.Sprintf("%v", val) + if name != "" { + people = append(people, wikiPerson{Name: name, Profession: profession}) + } + } + } + } + return people +} + +// getFirstPersonLink extracts the first link text from a list item. +func getFirstPersonLink(item map[string]interface{}) (string, bool) { + links, ok := item["links"].([]interface{}) + if !ok || len(links) == 0 { + return "", false + } + link, ok := links[0].(map[string]interface{}) + if !ok { + return "", false + } + if text, ok := link["text"]; ok && text != nil { + s := fmt.Sprintf("%v", text) + if s != "" { + return s, true + } + } + return "", false +} |
