summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorhorus2023-12-29 16:45:00 +0100
committerhorus2023-12-29 16:45:00 +0100
commit1eac387a4af90a9281741939aefb423c8c9ec084 (patch)
tree1d095af17c005dc2b405f2fd2385aa60fc2e5b6e
parent136837ec8414591b0efccfaf74d3657aab0a08da (diff)
downloadcurious-crawler-1eac387a4af90a9281741939aefb423c8c9ec084.tar.gz
fix bug of missing categories
-rw-r--r--categories.go46
1 files changed, 34 insertions, 12 deletions
diff --git a/categories.go b/categories.go
index 20f4b07..dc71cad 100644
--- a/categories.go
+++ b/categories.go
@@ -19,11 +19,17 @@ func (app *App) queryWMLabs(wiki_url string) ([]string, bool) {
var categories []string
title, hostname := getWikipediaTitle(wiki_url)
- wm_url := ("https://xtools.wmflabs.org/api/page/assessments/" + hostname + "/" + title)
+ wm_url := ("https://xtools.wmcloud.org/api/page/assessments/" + hostname + "/" + title)
if "" == title || "/" == title {
+ log.Debug("queryWMLabs: empty title supplied. returning false")
return []string{}, false
}
+ if "github.com" == hostname {
+ log.Debug("queryWMLabs: hostname == github.com, not wikipedia. returning false")
+ return []string{}, false
+ }
+ log.Debugf("queryWMLabs: wm_url: %s", wm_url)
response := getResponse(wm_url)
resp_data, err := ioutil.ReadAll(response.Body)
@@ -38,26 +44,41 @@ func (app *App) queryWMLabs(wiki_url string) ([]string, bool) {
panic(err)
}
+ log.Debugf("queryWMLabs: json data: %+v", data)
for k, v := range data {
if "project" != k && "elapsed_time" != k {
- wp := v.(map[string]interface{})
- for k2, v2 := range wp {
- if k2 == "wikiprojects" {
- list := v2.(map[string]interface{})
- for k3, _ := range list {
- cat := normalizeCategory(k3)
- if "" != cat {
- categories = append(categories, cat)
+ wp_title := v.(map[string]interface{})
+
+ // descending one step in the json array (key would be the title of the wikipedia page)
+ for _, tmp_v := range wp_title {
+ wp := tmp_v.(map[string]interface{})
+
+ for k2, v2 := range wp {
+ log.Debugf("queryWMLabs: range over wp: key %s : values: %s", k2, v2)
+ if k2 == "wikiprojects" {
+
+ list := v2.(map[string]interface{})
+ log.Debugf("queryWMLabs: wikiprojects list: %+v", list)
+
+ for k3, _ := range list {
+ log.Debugf("queryWMLabs: unnormalized cat: %s", k3)
+ cat := normalizeCategory(k3)
+ if "" != cat {
+ categories = append(categories, cat)
+ }
}
}
}
}
+ } else {
+ //log.Debugf("queryWMLabs: json: keys doesnt match: key: %s", k)
}
}
if len(categories) > 0 {
return categories, true
}
+ log.Debug("queryWMLabs: len(categories) == 0. returning false")
return categories, false
}
@@ -66,9 +87,10 @@ func (app *App) crawlWMLabs(wiki_url string) (Category, bool) {
//path := strings.TrimPrefix(u.EscapedPath(), "/wiki/")
title, hostname := getWikipediaTitle(wiki_url)
- wm_url := ("https://xtools.wmflabs.org/articleinfo/" + hostname + "/" + title)
+ wm_url := ("https://xtools.wmcloud.org/articleinfo/" + hostname + "/" + title)
if "" == title || "/" == title {
+ log.Debug("crawlWMLabs: empty title supplied. returning false")
return Category{}, false
}
@@ -86,9 +108,9 @@ func (app *App) crawlWMLabs(wiki_url string) (Category, bool) {
}
if category.Name == "" || category.Url == "" {
- log.Warnf("title: %s WM URL: %s \tWiki Url: %s", title, wm_url, wiki_url)
+ log.Warnf("crawlWMLabs: title: %s WM URL: %s \tWiki Url: %s", title, wm_url, wiki_url)
} else {
- log.Warnf("crawler: %+v", category)
+ log.Warnf("crawlWMLabs: crawler: %+v", category)
}
return category, true
}