diff options
| author | Max | 2018-02-10 01:52:59 +0100 |
|---|---|---|
| committer | Max | 2018-02-10 01:52:59 +0100 |
| commit | c7105fe21f872295a8e773f353bc47fda2d0c292 (patch) | |
| tree | f58df63ead903cc9a8d3f7d12e672d2f2e97448e /crawler | |
| parent | 5f94523186e664f207398a8fb4cfbdf207987f2e (diff) | |
| download | alkobote-c7105fe21f872295a8e773f353bc47fda2d0c292.tar.gz | |
Better detection of spirit type. (bottleworld)
Diffstat (limited to 'crawler')
| -rw-r--r-- | crawler/shop_bottleworld.go | 23 |
1 files changed, 13 insertions, 10 deletions
diff --git a/crawler/shop_bottleworld.go b/crawler/shop_bottleworld.go index fdf1cd8..3eb151d 100644 --- a/crawler/shop_bottleworld.go +++ b/crawler/shop_bottleworld.go @@ -10,6 +10,7 @@ import ( ) func ScrapeBottleWord(shop Shop) []Angebot { + Shop_url := "https://www.bottleworld.de/aktuelle-sonderpreise/show/all" Whiskys := []Angebot{} c := colly.NewCollector( @@ -22,15 +23,7 @@ func ScrapeBottleWord(shop Shop) []Angebot { whisky_name := e.ChildText("h2 > a") - matched, err := regexp.MatchString("Whiske?y", whisky_name) - if err != nil { - log.Fatal(err) - } - if !matched { - W.Spirit_type = detect_spirit_type(whisky_name) - } else { - W.Spirit_type = "Whisky" - } + var err error whisky_url := e.ChildAttr("a", "href") W.Name = whisky_name @@ -67,6 +60,8 @@ func ScrapeBottleWord(shop Shop) []Angebot { W.Volume = get_volume(e) W.Abv = get_abv(e) + W.Spirit_type = e.Request.Ctx.Get("spirit_type") + Whiskys = append(Whiskys, W) }) @@ -85,7 +80,15 @@ func ScrapeBottleWord(shop Shop) []Angebot { }) }) - c.Visit("https://www.bottleworld.de/aktuelle-sonderpreise/show/all") + c.OnHTML(".short-description", func(e *colly.HTMLElement) { + if e.Request.URL.String() == Shop_url { + return + } + text_noisy := e.ChildText(".std") + e.Request.Ctx.Put("spirit_type", detect_spirit_type(text_noisy)) + }) + + c.Visit(Shop_url) return Whiskys } |
