diff options
| author | horus | 2023-12-29 16:45:00 +0100 |
|---|---|---|
| committer | horus | 2023-12-29 16:45:00 +0100 |
| commit | 1eac387a4af90a9281741939aefb423c8c9ec084 (patch) | |
| tree | 1d095af17c005dc2b405f2fd2385aa60fc2e5b6e | |
| parent | 136837ec8414591b0efccfaf74d3657aab0a08da (diff) | |
| download | curious-crawler-1eac387a4af90a9281741939aefb423c8c9ec084.tar.gz | |
fix bug of missing categories
| -rw-r--r-- | categories.go | 46 |
1 files changed, 34 insertions, 12 deletions
diff --git a/categories.go b/categories.go index 20f4b07..dc71cad 100644 --- a/categories.go +++ b/categories.go @@ -19,11 +19,17 @@ func (app *App) queryWMLabs(wiki_url string) ([]string, bool) { var categories []string title, hostname := getWikipediaTitle(wiki_url) - wm_url := ("https://xtools.wmflabs.org/api/page/assessments/" + hostname + "/" + title) + wm_url := ("https://xtools.wmcloud.org/api/page/assessments/" + hostname + "/" + title) if "" == title || "/" == title { + log.Debug("queryWMLabs: empty title supplied. returning false") return []string{}, false } + if "github.com" == hostname { + log.Debug("queryWMLabs: hostname == github.com, not wikipedia. returning false") + return []string{}, false + } + log.Debugf("queryWMLabs: wm_url: %s", wm_url) response := getResponse(wm_url) resp_data, err := ioutil.ReadAll(response.Body) @@ -38,26 +44,41 @@ func (app *App) queryWMLabs(wiki_url string) ([]string, bool) { panic(err) } + log.Debugf("queryWMLabs: json data: %+v", data) for k, v := range data { if "project" != k && "elapsed_time" != k { - wp := v.(map[string]interface{}) - for k2, v2 := range wp { - if k2 == "wikiprojects" { - list := v2.(map[string]interface{}) - for k3, _ := range list { - cat := normalizeCategory(k3) - if "" != cat { - categories = append(categories, cat) + wp_title := v.(map[string]interface{}) + + // descending one step in the json array (key would be the title of the wikipedia page) + for _, tmp_v := range wp_title { + wp := tmp_v.(map[string]interface{}) + + for k2, v2 := range wp { + log.Debugf("queryWMLabs: range over wp: key %s : values: %s", k2, v2) + if k2 == "wikiprojects" { + + list := v2.(map[string]interface{}) + log.Debugf("queryWMLabs: wikiprojects list: %+v", list) + + for k3, _ := range list { + log.Debugf("queryWMLabs: unnormalized cat: %s", k3) + cat := normalizeCategory(k3) + if "" != cat { + categories = append(categories, cat) + } } } } } + } else { + //log.Debugf("queryWMLabs: json: keys doesnt match: key: %s", k) } } if len(categories) > 0 { return categories, true } + log.Debug("queryWMLabs: len(categories) == 0. returning false") return categories, false } @@ -66,9 +87,10 @@ func (app *App) crawlWMLabs(wiki_url string) (Category, bool) { //path := strings.TrimPrefix(u.EscapedPath(), "/wiki/") title, hostname := getWikipediaTitle(wiki_url) - wm_url := ("https://xtools.wmflabs.org/articleinfo/" + hostname + "/" + title) + wm_url := ("https://xtools.wmcloud.org/articleinfo/" + hostname + "/" + title) if "" == title || "/" == title { + log.Debug("crawlWMLabs: empty title supplied. returning false") return Category{}, false } @@ -86,9 +108,9 @@ func (app *App) crawlWMLabs(wiki_url string) (Category, bool) { } if category.Name == "" || category.Url == "" { - log.Warnf("title: %s WM URL: %s \tWiki Url: %s", title, wm_url, wiki_url) + log.Warnf("crawlWMLabs: title: %s WM URL: %s \tWiki Url: %s", title, wm_url, wiki_url) } else { - log.Warnf("crawler: %+v", category) + log.Warnf("crawlWMLabs: crawler: %+v", category) } return category, true } |
