summaryrefslogtreecommitdiff
path: root/src/wikiarticle.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/wikiarticle.go')
-rw-r--r--src/wikiarticle.go150
1 files changed, 144 insertions, 6 deletions
diff --git a/src/wikiarticle.go b/src/wikiarticle.go
index 5b891b6..cef9f02 100644
--- a/src/wikiarticle.go
+++ b/src/wikiarticle.go
@@ -14,15 +14,22 @@ import (
var wikiArticleClient = &http.Client{Timeout: 60 * time.Second}
+// wikiPerson carries a person extracted from a wiki article.
+type wikiPerson struct {
+ Name string
+ Profession int // 1=actor, 2=director, 3=screenwriter
+}
+
// wikiArticleEntry holds extracted fields from a wiki article API response.
type wikiArticleEntry struct {
- Description string
- Year int
- PosterURL string
- Synopsis string
- License string
- LicenseURL string
+ Description string
+ Year int
+ PosterURL string
+ Synopsis string
+ License string
+ LicenseURL string
NumAccolades int
+ People []wikiPerson
}
func (a *App) queryWikiArticle(name string) (wikiArticleEntry, int, error) {
@@ -84,6 +91,9 @@ func (a *App) queryWikiArticle(name string) (wikiArticleEntry, int, error) {
// year, poster_url from infobox
entry.Year, entry.PosterURL = extractInfoboxData(article)
+ // people from infobox and sections
+ entry.People = extractPeople(article)
+
// license
if licList, ok := article["license"]; ok {
if arr, ok := licList.([]interface{}); ok && len(arr) > 0 {
@@ -204,3 +214,131 @@ func extractAccolades(article map[string]interface{}) int {
}
return total
}
+
+// extractPeople extracts actors, directors, and screenwriters from the article.
+func extractPeople(article map[string]interface{}) []wikiPerson {
+ var people []wikiPerson
+
+ // Directors and screenwriters from infobox
+ ibParts := getInfoboxParts(article)
+ for _, p := range ibParts {
+ fp, ok := p.(map[string]interface{})
+ if !ok {
+ continue
+ }
+ name := fp["name"]
+ if name == "Directed by" {
+ people = append(people, extractPersonFromField(fp, 2)...)
+ }
+ if name == "Screenplay by" {
+ people = append(people, extractPersonFromField(fp, 3)...)
+ }
+ }
+
+ // Actors from Cast section
+ for _, sec := range getSections(article) {
+ s, ok := sec.(map[string]interface{})
+ if !ok || s["name"] != "Cast" {
+ continue
+ }
+ for _, part := range s["has_parts"].([]interface{}) {
+ p, ok := part.(map[string]interface{})
+ if !ok || p["type"] != "list" {
+ continue
+ }
+ for _, item := range p["has_parts"].([]interface{}) {
+ item, ok := item.(map[string]interface{})
+ if !ok {
+ continue
+ }
+ if link, ok := getFirstPersonLink(item); ok {
+ people = append(people, wikiPerson{Name: link, Profession: 1})
+ }
+ }
+ break // only first list in Cast section
+ }
+ break // only Cast section
+ }
+
+ return people
+}
+
+// getInfoboxParts returns the inner parts of the infobox section.
+func getInfoboxParts(article map[string]interface{}) []interface{} {
+ infoboxes, ok := article["infoboxes"].([]interface{})
+ if !ok || len(infoboxes) == 0 {
+ return nil
+ }
+ ib, ok := infoboxes[0].(map[string]interface{})
+ if !ok {
+ return nil
+ }
+ parts, ok := ib["has_parts"].([]interface{})
+ if !ok || len(parts) == 0 {
+ return nil
+ }
+ section, ok := parts[0].(map[string]interface{})
+ if !ok {
+ return nil
+ }
+ return section["has_parts"].([]interface{})
+}
+
+// getSections returns sections from the article.
+func getSections(article map[string]interface{}) []interface{} {
+ sections, ok := article["sections"].([]interface{})
+ if !ok {
+ return nil
+ }
+ return sections
+}
+
+// extractPersonFromField extracts a person name from a field or list.
+func extractPersonFromField(fp map[string]interface{}, profession int) []wikiPerson {
+ var people []wikiPerson
+ switch fp["type"] {
+ case "field":
+ if val, ok := fp["value"]; ok && val != nil {
+ name := fmt.Sprintf("%v", val)
+ if name != "" {
+ people = append(people, wikiPerson{Name: name, Profession: profession})
+ }
+ }
+ case "list":
+ for _, item := range fp["has_parts"].([]interface{}) {
+ item, ok := item.(map[string]interface{})
+ if !ok {
+ continue
+ }
+ // Prefer link text over value
+ if link, ok := getFirstPersonLink(item); ok {
+ people = append(people, wikiPerson{Name: link, Profession: profession})
+ } else if val := item["value"]; val != nil {
+ name := fmt.Sprintf("%v", val)
+ if name != "" {
+ people = append(people, wikiPerson{Name: name, Profession: profession})
+ }
+ }
+ }
+ }
+ return people
+}
+
+// getFirstPersonLink extracts the first link text from a list item.
+func getFirstPersonLink(item map[string]interface{}) (string, bool) {
+ links, ok := item["links"].([]interface{})
+ if !ok || len(links) == 0 {
+ return "", false
+ }
+ link, ok := links[0].(map[string]interface{})
+ if !ok {
+ return "", false
+ }
+ if text, ok := link["text"]; ok && text != nil {
+ s := fmt.Sprintf("%v", text)
+ if s != "" {
+ return s, true
+ }
+ }
+ return "", false
+}