package main import ( "time" "github.com/gocolly/colly" ) func (app *App) Scrape(shops []Shop) { wait := make(chan bool) count := 0 for _, shop := range shops { go app.ScrapeShop(shop, wait) count++ } // Wait until all go routines finished for i := 0; i < count; i++ { <-wait } } func (app *App) ScrapeShop(shop Shop, wait chan bool) { var W []Angebot var err error // retry on error for i := 1; i < 4; i++ { W = app.ScrapeHTML(shop) W = sanitize_offer(W, shop, i) if len(W) >= 1 { break } } // if no results, return early if len(W) == 0 { wait <- true return } err = app.save_offer(W) if err != nil { shop.error_msg = err.Error() shop.Warn("Saving offers failed.") } err = app.remove_expired(W, shop) if err != nil { shop.error_msg = err.Error() shop.Warn("Removing expired offers failed.") } wait <- true } func (app *App) ScrapeHTML(shop Shop) []Angebot { switch shop.Name { case "Bottleworld": return app.ScrapeBottleWord(shop) case "MC Whisky": return app.ScrapeMCWhisky(shop) case "Rum & Co": return app.ScrapeRumundCo(shop) case "Whic": return app.ScrapeWhic(shop) case "Whisky.de": return app.ScrapeWhiskyde(shop) case "Whiskysite": return app.ScrapeWhiskysitenl(shop) case "Whisky World": return app.ScrapeWhiskyworld(shop) case "Whiskyzone": return app.ScrapeWhiskyzone(shop) case "Drankdozijn": return app.ScrapeDrankdozijn(shop) default: shop.Warn("No Crawler") } return []Angebot{} } /* * Sets the crawler config. */ func (app *App) customCollector(allowed_urls []string) *colly.Collector { c := colly.NewCollector( colly.UserAgent(app.Config.UserAgent), colly.AllowedDomains(allowed_urls...), ) c.IgnoreRobotsTxt = app.Config.IgnoreRobotsTXT c.Limit(&colly.LimitRule{ DomainGlob: "*", RandomDelay: time.Duration(app.Config.Delay) * time.Second, }) return c }