summaryrefslogtreecommitdiff
path: root/crawler/shop_bottleworld.go
diff options
context:
space:
mode:
Diffstat (limited to 'crawler/shop_bottleworld.go')
-rw-r--r--crawler/shop_bottleworld.go43
1 files changed, 39 insertions, 4 deletions
diff --git a/crawler/shop_bottleworld.go b/crawler/shop_bottleworld.go
index b6af7e0..a3eae35 100644
--- a/crawler/shop_bottleworld.go
+++ b/crawler/shop_bottleworld.go
@@ -3,7 +3,7 @@ package main
import (
"log"
"regexp"
- // "strings"
+ "strings"
// "github.com/PuerkitoBio/goquery"
"github.com/gocolly/colly"
@@ -27,8 +27,7 @@ func ScrapeBottleWord(shop Shop) []Angebot {
log.Fatal(err)
}
if !matched {
- //W.Spirit_type = "Anderes"
- return
+ W.Spirit_type = detect_spirit_type(whisky_name)
} else {
W.Spirit_type = "Whisky"
}
@@ -51,14 +50,50 @@ func ScrapeBottleWord(shop Shop) []Angebot {
}
})
})
+
+ price_per_litre_noisy := e.ChildText(".price-per-liter")
+ price_per_litre, err := sanitize_price_per(price_per_litre_noisy)
+ if err != nil {
+ log.Fatal(err)
+ }
+ W.Price_per_litre = price_per_litre
+
W.Image_url = e.ChildAttr("img", "src")
+ e.Request.Visit(W.Url)
+
W.Shop = shop.Id
- W.Spirit_type = "Whisky"
+
+ volume_noisy := e.Request.Ctx.Get("volume")
+ W.Volume, err = extract_volume(volume_noisy)
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ abv_noisy := e.Request.Ctx.Get("abv")
+ W.Abv, err = extract_abv(abv_noisy)
+ if err != nil {
+ log.Fatal(err)
+ }
Whiskys = append(Whiskys, W)
})
+ c.OnHTML("#product-attribute-specs-table", func(e *colly.HTMLElement) {
+ e.ForEach("tr", func(i int, e *colly.HTMLElement) {
+ td_str := e.ChildText("td")
+ matched, err := regexp.MatchString("[0-9]+([,.][0-9]+)? l$", td_str)
+ if err != nil {
+ log.Fatal(err)
+ }
+ if matched {
+ e.Request.Ctx.Put("volume", td_str)
+ } else if strings.Contains(td_str, "%") {
+ e.Request.Ctx.Put("abv", td_str)
+ }
+ })
+ })
+
c.Visit("https://www.bottleworld.de/aktuelle-sonderpreise/show/all")
return Whiskys