From 0026ba55f03c5378d5773459fcdd7c6931ff42a5 Mon Sep 17 00:00:00 2001 From: Max Date: Fri, 15 Jun 2018 19:38:04 +0200 Subject: Introduces central crawler config. (crawler) --- crawler/shop_whic.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'crawler/shop_whic.go') diff --git a/crawler/shop_whic.go b/crawler/shop_whic.go index af86bdc..2d0170b 100644 --- a/crawler/shop_whic.go +++ b/crawler/shop_whic.go @@ -12,9 +12,7 @@ func (app *App) ScrapeWhic(shop Shop) []Angebot { Shop_url := "https://whic.de/angebote" Whiskys := []Angebot{} - c := colly.NewCollector( - colly.AllowedDomains("whic.de"), - ) + c := customCollector([]string{"whic.de"}) c.OnHTML("li.item", func(e *colly.HTMLElement) { -- cgit v1.2.3 From 8d68ac7c900241eb8499a94c23ab1f60750e7aed Mon Sep 17 00:00:00 2001 From: horus Date: Fri, 15 Jun 2018 23:28:18 +0200 Subject: Introduces config for user agent, robots.txt and crawler delay. (crawler) --- crawler/shop_whic.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'crawler/shop_whic.go') diff --git a/crawler/shop_whic.go b/crawler/shop_whic.go index 2d0170b..93bff23 100644 --- a/crawler/shop_whic.go +++ b/crawler/shop_whic.go @@ -12,7 +12,7 @@ func (app *App) ScrapeWhic(shop Shop) []Angebot { Shop_url := "https://whic.de/angebote" Whiskys := []Angebot{} - c := customCollector([]string{"whic.de"}) + c := app.customCollector([]string{"whic.de"}) c.OnHTML("li.item", func(e *colly.HTMLElement) { @@ -127,7 +127,10 @@ func (app *App) ScrapeWhic(shop Shop) []Angebot { e.Request.Ctx.Put("website", string(e.Response.Body)) }) - c.Visit(Shop_url) + err := c.Visit(Shop_url) + if err != nil { + Warn(nil, shop.Name+": "+err.Error()) + } return Whiskys } -- cgit v1.2.3