summaryrefslogtreecommitdiff
path: root/crawler/shop_bottleworld.go
diff options
context:
space:
mode:
authorhorus2018-06-15 23:28:18 +0200
committerhorus2018-06-15 23:28:18 +0200
commit8d68ac7c900241eb8499a94c23ab1f60750e7aed (patch)
tree3a5d444f866383d5cdefc512242dc2afa236641e /crawler/shop_bottleworld.go
parent0026ba55f03c5378d5773459fcdd7c6931ff42a5 (diff)
downloadalkobote-8d68ac7c900241eb8499a94c23ab1f60750e7aed.tar.gz
Introduces config for user agent, robots.txt and crawler delay. (crawler)
Diffstat (limited to 'crawler/shop_bottleworld.go')
-rw-r--r--crawler/shop_bottleworld.go17
1 files changed, 14 insertions, 3 deletions
diff --git a/crawler/shop_bottleworld.go b/crawler/shop_bottleworld.go
index de9fe13..d679b43 100644
--- a/crawler/shop_bottleworld.go
+++ b/crawler/shop_bottleworld.go
@@ -6,13 +6,18 @@ import (
// "github.com/PuerkitoBio/goquery"
"github.com/gocolly/colly"
+ "log"
+ "time"
)
func (app *App) ScrapeBottleWord(shop Shop) []Angebot {
Shop_url := "https://www.bottleworld.de/aktuelle-sonderpreise/show/all"
Whiskys := []Angebot{}
- c := customCollector([]string{"bottleworld.de", "www.bottleworld.de"})
+ c := app.customCollector([]string{"bottleworld.de", "www.bottleworld.de"})
+
+ log.Println(c.IgnoreRobotsTxt)
+ log.Println(time.Duration(app.Config.Delay))
c.OnHTML("li.item", func(e *colly.HTMLElement) {
W := Angebot{}
@@ -60,7 +65,10 @@ func (app *App) ScrapeBottleWord(shop Shop) []Angebot {
W.Image_url = e.ChildAttr("img", "src")
- e.Request.Visit(W.Url)
+ erro := e.Request.Visit(W.Url)
+ if erro != nil {
+ Warn(nil, W.Url+" "+erro.Error())
+ }
var ctx string
W.Volume, ctx = get_volume(e)
@@ -109,7 +117,10 @@ func (app *App) ScrapeBottleWord(shop Shop) []Angebot {
e.Request.Ctx.Put("spirit_type", detect_spirit_type(text_noisy))
})
- c.Visit(Shop_url)
+ err := c.Visit(Shop_url)
+ if err != nil {
+ Warn(nil, shop.Name+": "+err.Error())
+ }
return Whiskys
}