From f3300bec030793d40115a08f46a7cbf49f06c2fd Mon Sep 17 00:00:00 2001 From: admin Date: Sun, 29 Mar 2026 16:50:38 +0200 Subject: fix missing user agent --- main.go | 89 +++++++++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 53 insertions(+), 36 deletions(-) (limited to 'main.go') diff --git a/main.go b/main.go index f3d0a6b..feecf27 100644 --- a/main.go +++ b/main.go @@ -53,6 +53,7 @@ func main() { //app.saveAllCategories() app.updateAllDiscussions() //app.walkDown() + //app.saveExcerpts() /** * Resolve redirects on stored urls. @@ -76,17 +77,20 @@ func (app *App) walkDown() { //max_item := 15494000 //max_item := 15038031 //max_item := 14450000 + //max_item := 47528683 + //max_item := 46750000 - const maxRoutines = 20 + const maxRoutines = 10 q := queue.New(maxRoutines) defer q.Close() - for i := max_item; i > 22600000; i-- { + //for i := max_item; i > 22600000; i-- { + for i := max_item; i > 44921609; i-- { q.Add() go func(i int) { defer q.Done() - Story, ok := getStory(i) + Story, ok := app.getStory(i) if ok { log.Infof("%+v\n", Story) err = app.saveStory(Story) @@ -126,8 +130,8 @@ func getMaxItem() int { func (app *App) topStories() { var err error - data1 := strings.TrimSuffix(string(getTopStories()), "]") - data2 := strings.TrimPrefix(string(getBestStories()), "[") + data1 := strings.TrimSuffix(string(app.getTopStories()), "]") + data2 := strings.TrimPrefix(string(app.getBestStories()), "[") data1 = data1 + "," data := data1 + data2 @@ -146,7 +150,7 @@ func (app *App) topStories() { for _, id := range story_ids { q.Add() go func(id int) { - Story, ok := getStory(id) + Story, ok := app.getStory(id) defer q.Done() if ok { log.Infof("%+v\n", Story) @@ -155,14 +159,17 @@ func (app *App) topStories() { log.Fatal(err) } - log.Debug("topStories: crawling for Categories") - categories, ok := app.crawlForCategories(Story.Url) - if ok { - article_id := app.getArticleIdFromUrl(Story.Url) - app.saveCategory(article_id, categories) - } else { - log.Warn("topStories: crawling for Categories: not ok") - } + /* + log.Debug("topStories: crawling for Categories") + categories, ok := app.crawlForCategories(Story.Url) + if ok { + article_id := app.getArticleIdFromUrl(Story.Url) + app.saveCategory(article_id, categories) + } else { + log.Warn("topStories: crawling for Categories: not ok") + time.Sleep(time.Duration(app.Config.Delay) * time.Second) + } + */ } }(id) @@ -170,8 +177,8 @@ func (app *App) topStories() { q.Wait() } -func getStory(id int) (Story, bool) { - Story := getDetail(id) +func (app *App) getStory(id int) (Story, bool) { + Story := app.getDetail(id) if Story.Dead || Story.Deleted { return Story, false } @@ -205,17 +212,27 @@ func getStory(id int) (Story, bool) { } if is_wiki { Story.Url = wikipediaNormalizeUrl(Story.Url) - Story.Url = wikipediaRealUrl(Story.Url) + Story.Url = app.wikipediaRealUrl(Story.Url) return Story, true } return Story, false } -func getResponse(url string) *http.Response { +func (app *App) getResponse(url string) *http.Response { var err error var response *http.Response - response, err = http.Get(url) + req, err := http.NewRequest("GET", url, nil) + if err != nil { + // Fehlerbehandlung + } + + req.Header.Set("User-Agent", app.Config.UserAgent) // Hier den User-Agent setzen + + client := &http.Client{} + + response, err = client.Do(req) + //response, err = http.Get(url) if err != nil { for i := 0; i < 4; i++ { if i == 0 { @@ -233,33 +250,33 @@ func getResponse(url string) *http.Response { return response } -func getBestResponse() *http.Response { +func (app *App) getBestResponse() *http.Response { _url := "https://hacker-news.firebaseio.com/v0/beststories.json" - return getResponse(_url) + return app.getResponse(_url) } -func getTopResponse() *http.Response { +func (app *App) getTopResponse() *http.Response { _url := "https://hacker-news.firebaseio.com/v0/topstories.json" - return getResponse(_url) + return app.getResponse(_url) } -func getWikipediaResponse(title string) *http.Response { +func (app *App) getWikipediaResponse(title string) *http.Response { _url := "https://en.wikipedia.org/w/api.php?action=query&prop=extracts&format=json&exintro=&titles=" + title - return getResponse(_url) + return app.getResponse(_url) } -func getWikipediaRedirectResponse(hostname, title string) *http.Response { +func (app *App) getWikipediaRedirectResponse(hostname, title string) *http.Response { _url := "https://" + hostname + "/w/api.php?action=query&prop=info&format=json&redirects=1&inprop=url&titles=" + title - return getResponse(_url) + return app.getResponse(_url) } -func getStoryResponse(item_id string) *http.Response { +func (app *App) getStoryResponse(item_id string) *http.Response { _url := "https://hacker-news.firebaseio.com/v0/item/" + item_id + ".json" - return getResponse(_url) + return app.getResponse(_url) } -func getDetail(id int) Story { - response := getStoryResponse(strconv.Itoa(id)) +func (app *App) getDetail(id int) Story { + response := app.getStoryResponse(strconv.Itoa(id)) data, err := ioutil.ReadAll(response.Body) if err != nil { panic(err) @@ -273,8 +290,8 @@ func getDetail(id int) Story { return story } -func getTopStories() []byte { - response := getTopResponse() +func (app *App) getTopStories() []byte { + response := app.getTopResponse() data, err := ioutil.ReadAll(response.Body) if err != nil { panic(err) @@ -283,8 +300,8 @@ func getTopStories() []byte { return data } -func getBestStories() []byte { - response := getBestResponse() +func (app *App) getBestStories() []byte { + response := app.getBestResponse() data, err := ioutil.ReadAll(response.Body) if err != nil { @@ -307,7 +324,7 @@ func (app *App) updateAllDiscussions() { q.Add() go func(item_id int) { defer q.Done() - Story, ok := getStory(item_id) + Story, ok := app.getStory(item_id) if !ok { /** * Check if we got a network error or a dead story. -- cgit v1.2.3