diff options
| author | admin | 2026-03-29 18:53:12 +0200 |
|---|---|---|
| committer | admin | 2026-03-29 18:53:12 +0200 |
| commit | 9cc3133c40439075233470ede48c5a8d7d68669f (patch) | |
| tree | 6925fbc2e95c1e5425e335dfe97cff0a8011c9d2 /main.go | |
| parent | f3300bec030793d40115a08f46a7cbf49f06c2fd (diff) | |
| download | curious-crawler-9cc3133c40439075233470ede48c5a8d7d68669f.tar.gz | |
synchronous crawling for categories
Diffstat (limited to 'main.go')
| -rw-r--r-- | main.go | 33 |
1 files changed, 20 insertions, 13 deletions
@@ -43,8 +43,9 @@ func main() { defer app.DB.Close() //app.fixAllCategories() - //return + //app.saveAllCategories() + //return app.deleteOrphanedArticles() app.topStories() app.wikipediaFixAllUrls() @@ -80,7 +81,7 @@ func (app *App) walkDown() { //max_item := 47528683 //max_item := 46750000 - const maxRoutines = 10 + const maxRoutines = 20 q := queue.New(maxRoutines) defer q.Close() @@ -144,6 +145,7 @@ func (app *App) topStories() { } const maxRoutines = 20 + storyChannel := make(chan Story, len(story_ids)) q := queue.New(maxRoutines) defer q.Close() @@ -154,27 +156,32 @@ func (app *App) topStories() { defer q.Done() if ok { log.Infof("%+v\n", Story) + err = app.saveStory(Story) if err != nil { log.Fatal(err) } - /* - log.Debug("topStories: crawling for Categories") - categories, ok := app.crawlForCategories(Story.Url) - if ok { - article_id := app.getArticleIdFromUrl(Story.Url) - app.saveCategory(article_id, categories) - } else { - log.Warn("topStories: crawling for Categories: not ok") - time.Sleep(time.Duration(app.Config.Delay) * time.Second) - } - */ + log.Debugf("sending Story to channel: %+v\n", Story) + storyChannel <- Story } }(id) } q.Wait() + close(storyChannel) + + for story := range storyChannel { + log.Debug("topStories: crawling for Categories") + categories, ok := app.crawlForCategories(story.Url) + if ok { + article_id := app.getArticleIdFromUrl(story.Url) + app.saveCategory(article_id, categories) + } else { + log.Info("topStories: crawling for Categories: not ok. Check previous log output.") + } + time.Sleep(time.Duration(app.Config.Delay) * time.Second) + } } func (app *App) getStory(id int) (Story, bool) { |
