diff options
Diffstat (limited to 'crawler/scrape.go')
| -rw-r--r-- | crawler/scrape.go | 34 |
1 files changed, 30 insertions, 4 deletions
diff --git a/crawler/scrape.go b/crawler/scrape.go index 4bc66e0..6874239 100644 --- a/crawler/scrape.go +++ b/crawler/scrape.go @@ -1,7 +1,10 @@ package main import ( + "time" + log "github.com/Sirupsen/logrus" + "github.com/gocolly/colly" ) func (app *App) ScrapeHTML(shops []Shop) { @@ -26,10 +29,6 @@ func (app *App) Scrape(shop Shop, wait chan bool) { var W []Angebot var err error - if err != nil { - Fatal(err, "scrape.go: Starting transaction failed. Shop: "+shop.Name) - } - // retry on error for i := 1; i < 4; i++ { W = app.ScrapeShop(shop) @@ -41,6 +40,13 @@ func (app *App) Scrape(shop Shop, wait chan bool) { } } + // if no results, return early + if len(W) == 0 { + wait <- true + return + + } + err = app.save_offer(W) if err != nil { Warn(err, "Saving offers failed. Shop: "+shop.Name) @@ -72,9 +78,29 @@ func (app *App) ScrapeShop(shop Shop) []Angebot { return app.ScrapeWhiskyworld(shop) case "Whiskyzone": return app.ScrapeWhiskyzone(shop) + case "Drankdozijn": + return app.ScrapeDrankdozijn(shop) default: log.Println(shop.Name + ": No Crawler") } return []Angebot{} } + +/* + * Sets the crawler config. + */ +func (app *App) customCollector(allowed_urls []string) *colly.Collector { + c := colly.NewCollector( + colly.UserAgent(app.Config.UserAgent), + colly.AllowedDomains(allowed_urls...), + ) + c.IgnoreRobotsTxt = app.Config.IgnoreRobotsTXT + + c.Limit(&colly.LimitRule{ + DomainGlob: "*", + RandomDelay: time.Duration(app.Config.Delay) * time.Second, + }) + + return c +} |
