From 9cc3133c40439075233470ede48c5a8d7d68669f Mon Sep 17 00:00:00 2001 From: admin Date: Sun, 29 Mar 2026 18:53:12 +0200 Subject: synchronous crawling for categories --- main.go | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) (limited to 'main.go') diff --git a/main.go b/main.go index feecf27..d19598b 100644 --- a/main.go +++ b/main.go @@ -43,8 +43,9 @@ func main() { defer app.DB.Close() //app.fixAllCategories() - //return + //app.saveAllCategories() + //return app.deleteOrphanedArticles() app.topStories() app.wikipediaFixAllUrls() @@ -80,7 +81,7 @@ func (app *App) walkDown() { //max_item := 47528683 //max_item := 46750000 - const maxRoutines = 10 + const maxRoutines = 20 q := queue.New(maxRoutines) defer q.Close() @@ -144,6 +145,7 @@ func (app *App) topStories() { } const maxRoutines = 20 + storyChannel := make(chan Story, len(story_ids)) q := queue.New(maxRoutines) defer q.Close() @@ -154,27 +156,32 @@ func (app *App) topStories() { defer q.Done() if ok { log.Infof("%+v\n", Story) + err = app.saveStory(Story) if err != nil { log.Fatal(err) } - /* - log.Debug("topStories: crawling for Categories") - categories, ok := app.crawlForCategories(Story.Url) - if ok { - article_id := app.getArticleIdFromUrl(Story.Url) - app.saveCategory(article_id, categories) - } else { - log.Warn("topStories: crawling for Categories: not ok") - time.Sleep(time.Duration(app.Config.Delay) * time.Second) - } - */ + log.Debugf("sending Story to channel: %+v\n", Story) + storyChannel <- Story } }(id) } q.Wait() + close(storyChannel) + + for story := range storyChannel { + log.Debug("topStories: crawling for Categories") + categories, ok := app.crawlForCategories(story.Url) + if ok { + article_id := app.getArticleIdFromUrl(story.Url) + app.saveCategory(article_id, categories) + } else { + log.Info("topStories: crawling for Categories: not ok. Check previous log output.") + } + time.Sleep(time.Duration(app.Config.Delay) * time.Second) + } } func (app *App) getStory(id int) (Story, bool) { -- cgit v1.2.3