From c7105fe21f872295a8e773f353bc47fda2d0c292 Mon Sep 17 00:00:00 2001 From: Max Date: Sat, 10 Feb 2018 01:52:59 +0100 Subject: Better detection of spirit type. (bottleworld) --- crawler/shop_bottleworld.go | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'crawler/shop_bottleworld.go') diff --git a/crawler/shop_bottleworld.go b/crawler/shop_bottleworld.go index fdf1cd8..3eb151d 100644 --- a/crawler/shop_bottleworld.go +++ b/crawler/shop_bottleworld.go @@ -10,6 +10,7 @@ import ( ) func ScrapeBottleWord(shop Shop) []Angebot { + Shop_url := "https://www.bottleworld.de/aktuelle-sonderpreise/show/all" Whiskys := []Angebot{} c := colly.NewCollector( @@ -22,15 +23,7 @@ func ScrapeBottleWord(shop Shop) []Angebot { whisky_name := e.ChildText("h2 > a") - matched, err := regexp.MatchString("Whiske?y", whisky_name) - if err != nil { - log.Fatal(err) - } - if !matched { - W.Spirit_type = detect_spirit_type(whisky_name) - } else { - W.Spirit_type = "Whisky" - } + var err error whisky_url := e.ChildAttr("a", "href") W.Name = whisky_name @@ -67,6 +60,8 @@ func ScrapeBottleWord(shop Shop) []Angebot { W.Volume = get_volume(e) W.Abv = get_abv(e) + W.Spirit_type = e.Request.Ctx.Get("spirit_type") + Whiskys = append(Whiskys, W) }) @@ -85,7 +80,15 @@ func ScrapeBottleWord(shop Shop) []Angebot { }) }) - c.Visit("https://www.bottleworld.de/aktuelle-sonderpreise/show/all") + c.OnHTML(".short-description", func(e *colly.HTMLElement) { + if e.Request.URL.String() == Shop_url { + return + } + text_noisy := e.ChildText(".std") + e.Request.Ctx.Put("spirit_type", detect_spirit_type(text_noisy)) + }) + + c.Visit(Shop_url) return Whiskys } -- cgit v1.2.3