From c59d15dfc04e0fb75c8132a3ce778dcf801645c1 Mon Sep 17 00:00:00 2001 From: horus Date: Fri, 15 Jun 2018 16:19:58 +0200 Subject: Fix because changed html. (crawler) --- crawler/shop_whiskyworld.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'crawler/shop_whiskyworld.go') diff --git a/crawler/shop_whiskyworld.go b/crawler/shop_whiskyworld.go index af97511..f617ebb 100644 --- a/crawler/shop_whiskyworld.go +++ b/crawler/shop_whiskyworld.go @@ -106,7 +106,10 @@ func (app *App) ScrapeWhiskyworld(shop Shop) []Angebot { }) - W.Image_url = "https:" + e.ChildAttr("img", "data-src") + W.Image_url = e.ChildAttr("img", "data-src") + if !strings.HasPrefix(W.Image_url, "https:") { + W.Image_url = "https:" + W.Image_url + } e.Request.Visit(W.Url) W.Website = e.Request.Ctx.Get("website") -- cgit v1.2.3 From 0026ba55f03c5378d5773459fcdd7c6931ff42a5 Mon Sep 17 00:00:00 2001 From: Max Date: Fri, 15 Jun 2018 19:38:04 +0200 Subject: Introduces central crawler config. (crawler) --- crawler/shop_whiskyworld.go | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'crawler/shop_whiskyworld.go') diff --git a/crawler/shop_whiskyworld.go b/crawler/shop_whiskyworld.go index f617ebb..7b57d37 100644 --- a/crawler/shop_whiskyworld.go +++ b/crawler/shop_whiskyworld.go @@ -15,11 +15,7 @@ func (app *App) ScrapeWhiskyworld(shop Shop) []Angebot { Whiskys := []Angebot{} - c := colly.NewCollector( - colly.UserAgent("friendly"), - colly.AllowedDomains("whiskyworld.de"), - colly.AllowedDomains("www.whiskyworld.de"), - ) + c := customCollector([]string{"whiskyworld.de", "www.whiskyworld.de"}) c.OnHTML(".product-item", func(e *colly.HTMLElement) { if !stringInSlice(e.Request.URL.String(), Shop_urls) { -- cgit v1.2.3 From 8d68ac7c900241eb8499a94c23ab1f60750e7aed Mon Sep 17 00:00:00 2001 From: horus Date: Fri, 15 Jun 2018 23:28:18 +0200 Subject: Introduces config for user agent, robots.txt and crawler delay. (crawler) --- crawler/shop_whiskyworld.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'crawler/shop_whiskyworld.go') diff --git a/crawler/shop_whiskyworld.go b/crawler/shop_whiskyworld.go index 7b57d37..3f0874d 100644 --- a/crawler/shop_whiskyworld.go +++ b/crawler/shop_whiskyworld.go @@ -15,7 +15,7 @@ func (app *App) ScrapeWhiskyworld(shop Shop) []Angebot { Whiskys := []Angebot{} - c := customCollector([]string{"whiskyworld.de", "www.whiskyworld.de"}) + c := app.customCollector([]string{"whiskyworld.de", "www.whiskyworld.de"}) c.OnHTML(".product-item", func(e *colly.HTMLElement) { if !stringInSlice(e.Request.URL.String(), Shop_urls) { @@ -123,7 +123,10 @@ func (app *App) ScrapeWhiskyworld(shop Shop) []Angebot { }) for _, url := range Shop_urls { - c.Visit(url) + err := c.Visit(url) + if err != nil { + Warn(nil, shop.Name+": "+err.Error()) + } } return Whiskys -- cgit v1.2.3