summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMax2018-02-10 01:52:59 +0100
committerMax2018-02-10 01:52:59 +0100
commitc7105fe21f872295a8e773f353bc47fda2d0c292 (patch)
treef58df63ead903cc9a8d3f7d12e672d2f2e97448e
parent5f94523186e664f207398a8fb4cfbdf207987f2e (diff)
downloadalkobote-c7105fe21f872295a8e773f353bc47fda2d0c292.tar.gz
Better detection of spirit type. (bottleworld)
-rw-r--r--crawler/shop_bottleworld.go23
1 files changed, 13 insertions, 10 deletions
diff --git a/crawler/shop_bottleworld.go b/crawler/shop_bottleworld.go
index fdf1cd8..3eb151d 100644
--- a/crawler/shop_bottleworld.go
+++ b/crawler/shop_bottleworld.go
@@ -10,6 +10,7 @@ import (
)
func ScrapeBottleWord(shop Shop) []Angebot {
+ Shop_url := "https://www.bottleworld.de/aktuelle-sonderpreise/show/all"
Whiskys := []Angebot{}
c := colly.NewCollector(
@@ -22,15 +23,7 @@ func ScrapeBottleWord(shop Shop) []Angebot {
whisky_name := e.ChildText("h2 > a")
- matched, err := regexp.MatchString("Whiske?y", whisky_name)
- if err != nil {
- log.Fatal(err)
- }
- if !matched {
- W.Spirit_type = detect_spirit_type(whisky_name)
- } else {
- W.Spirit_type = "Whisky"
- }
+ var err error
whisky_url := e.ChildAttr("a", "href")
W.Name = whisky_name
@@ -67,6 +60,8 @@ func ScrapeBottleWord(shop Shop) []Angebot {
W.Volume = get_volume(e)
W.Abv = get_abv(e)
+ W.Spirit_type = e.Request.Ctx.Get("spirit_type")
+
Whiskys = append(Whiskys, W)
})
@@ -85,7 +80,15 @@ func ScrapeBottleWord(shop Shop) []Angebot {
})
})
- c.Visit("https://www.bottleworld.de/aktuelle-sonderpreise/show/all")
+ c.OnHTML(".short-description", func(e *colly.HTMLElement) {
+ if e.Request.URL.String() == Shop_url {
+ return
+ }
+ text_noisy := e.ChildText(".std")
+ e.Request.Ctx.Put("spirit_type", detect_spirit_type(text_noisy))
+ })
+
+ c.Visit(Shop_url)
return Whiskys
}