summaryrefslogtreecommitdiff
path: root/wikipedia.go
diff options
context:
space:
mode:
Diffstat (limited to 'wikipedia.go')
-rw-r--r--wikipedia.go15
1 files changed, 9 insertions, 6 deletions
diff --git a/wikipedia.go b/wikipedia.go
index 3df392d..fbc2b81 100644
--- a/wikipedia.go
+++ b/wikipedia.go
@@ -4,6 +4,7 @@ import (
"encoding/json"
"regexp"
"strings"
+
//"strconv"
"io/ioutil"
"net/url"
@@ -13,7 +14,9 @@ import (
)
func (app *App) crawlWikipedia(url string) {
- c := colly.NewCollector()
+ c := colly.NewCollector(
+ colly.UserAgent(app.Config.UserAgent),
+ )
c.OnHTML("#mw-normal-catlinks", func(e *colly.HTMLElement) {
e.ForEach("ul > li > a", func(i int, e *colly.HTMLElement) {
@@ -141,10 +144,10 @@ func (app *App) _changeTitle(id_to_delete int, correct_url string) {
log.Printf("new_title: %s, old_title: %s, cur_title: %s \n", new_title, old_title, cur_title)
}
-func getWikipediaExcerpt(title string) string {
+func (app *App) getWikipediaExcerpt(title string) string {
var err error
- response := getWikipediaResponse(title)
+ response := app.getWikipediaResponse(title)
resp_data, err := ioutil.ReadAll(response.Body)
if err != nil {
panic(err)
@@ -201,7 +204,7 @@ func (app *App) saveExcerpts() error {
}
title, _ := getWikipediaTitle(url)
- excerpt := getWikipediaExcerpt(title)
+ excerpt := app.getWikipediaExcerpt(title)
query = "UPDATE article SET excerpt_html = ? WHERE id = ?"
stmt, err := app.DB.Prepare(query)
@@ -221,7 +224,7 @@ func (app *App) saveExcerpts() error {
return nil
}
-func wikipediaRealUrl(wiki_url string) string {
+func (app *App) wikipediaRealUrl(wiki_url string) string {
/**
* We don't change urls with parameters, because we would loose the context.
*/
@@ -247,7 +250,7 @@ func wikipediaRealUrl(wiki_url string) string {
return wiki_url
}
- response := getWikipediaRedirectResponse(hostname, title)
+ response := app.getWikipediaRedirectResponse(hostname, title)
resp_data, err := ioutil.ReadAll(response.Body)
if err != nil {
panic(err)