diff options
| author | admin | 2026-03-29 16:50:38 +0200 |
|---|---|---|
| committer | admin | 2026-03-29 16:50:38 +0200 |
| commit | f3300bec030793d40115a08f46a7cbf49f06c2fd (patch) | |
| tree | 16e19878b474aeed873a56f1ac37a1819dc360d1 /wikipedia.go | |
| parent | 1b28f44a9f1c90e49ddf0149becaa004addc50d3 (diff) | |
| download | curious-crawler-f3300bec030793d40115a08f46a7cbf49f06c2fd.tar.gz | |
fix missing user agent
Diffstat (limited to 'wikipedia.go')
| -rw-r--r-- | wikipedia.go | 15 |
1 files changed, 9 insertions, 6 deletions
diff --git a/wikipedia.go b/wikipedia.go index 3df392d..fbc2b81 100644 --- a/wikipedia.go +++ b/wikipedia.go @@ -4,6 +4,7 @@ import ( "encoding/json" "regexp" "strings" + //"strconv" "io/ioutil" "net/url" @@ -13,7 +14,9 @@ import ( ) func (app *App) crawlWikipedia(url string) { - c := colly.NewCollector() + c := colly.NewCollector( + colly.UserAgent(app.Config.UserAgent), + ) c.OnHTML("#mw-normal-catlinks", func(e *colly.HTMLElement) { e.ForEach("ul > li > a", func(i int, e *colly.HTMLElement) { @@ -141,10 +144,10 @@ func (app *App) _changeTitle(id_to_delete int, correct_url string) { log.Printf("new_title: %s, old_title: %s, cur_title: %s \n", new_title, old_title, cur_title) } -func getWikipediaExcerpt(title string) string { +func (app *App) getWikipediaExcerpt(title string) string { var err error - response := getWikipediaResponse(title) + response := app.getWikipediaResponse(title) resp_data, err := ioutil.ReadAll(response.Body) if err != nil { panic(err) @@ -201,7 +204,7 @@ func (app *App) saveExcerpts() error { } title, _ := getWikipediaTitle(url) - excerpt := getWikipediaExcerpt(title) + excerpt := app.getWikipediaExcerpt(title) query = "UPDATE article SET excerpt_html = ? WHERE id = ?" stmt, err := app.DB.Prepare(query) @@ -221,7 +224,7 @@ func (app *App) saveExcerpts() error { return nil } -func wikipediaRealUrl(wiki_url string) string { +func (app *App) wikipediaRealUrl(wiki_url string) string { /** * We don't change urls with parameters, because we would loose the context. */ @@ -247,7 +250,7 @@ func wikipediaRealUrl(wiki_url string) string { return wiki_url } - response := getWikipediaRedirectResponse(hostname, title) + response := app.getWikipediaRedirectResponse(hostname, title) resp_data, err := ioutil.ReadAll(response.Body) if err != nil { panic(err) |
