summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorhorus2018-02-10 14:31:47 +0100
committerhorus2018-02-10 14:31:47 +0100
commit6e0858f00d941e96010b794c3f16e31cbef2e72d (patch)
tree86575cc63adc2cc918bb9d2397f2340beb31bfde
parent20a25c6069f626e16da35e3a29f91dae764a09bb (diff)
downloadalkobote-6e0858f00d941e96010b794c3f16e31cbef2e72d.tar.gz
Crawler extracts volume, price per litre and abv. (Whisky World)
-rw-r--r--crawler/shop_whiskyde.go2
-rw-r--r--crawler/shop_whiskyworld.go26
-rw-r--r--crawler/utility.go3
3 files changed, 29 insertions, 2 deletions
diff --git a/crawler/shop_whiskyde.go b/crawler/shop_whiskyde.go
index 053cc7f..b450c86 100644
--- a/crawler/shop_whiskyde.go
+++ b/crawler/shop_whiskyde.go
@@ -64,8 +64,6 @@ func ScrapeWhiskyde(shop Shop) []Angebot {
log.Fatal(err)
}
- e.ForEach(".article-amount", func(i int, e *colly.HTMLElement) {
- })
if W.Volume == 0 {
log.Println("Whisky.de: " + W.Name + " kein Volume erkannt")
return
diff --git a/crawler/shop_whiskyworld.go b/crawler/shop_whiskyworld.go
index 36b144e..8e4b984 100644
--- a/crawler/shop_whiskyworld.go
+++ b/crawler/shop_whiskyworld.go
@@ -44,6 +44,32 @@ func ScrapeWhiskyworld(shop Shop) []Angebot {
return
}
+ e.ForEach(".product-infobox", func(i int, e *colly.HTMLElement) {
+ text_noisy := e.ChildText(".item-inh")
+ W.Volume, err = extract_volume(text_noisy)
+ if err != nil {
+ log.Fatal(err)
+ }
+ abv_noisy := strings.TrimSpace(strings.SplitAfter(text_noisy, "Liter")[1])
+ abv_noisy = strings.TrimPrefix(abv_noisy, "/")
+ W.Abv, err = extract_abv(abv_noisy)
+ if err != nil {
+ log.Fatal(err)
+ }
+ })
+
+ e.ForEach(".price", func(i int, e *colly.HTMLElement) {
+ base_price_noisy := e.ChildText(".unit")
+ if strings.Contains(base_price_noisy, "Liter") {
+ base_price_noisy = strings.TrimSpace(strings.SplitAfter(base_price_noisy, "Liter")[0])
+ W.Base_price, err = sanitize_base_price(base_price_noisy)
+ if err != nil {
+ log.Fatal(err)
+ }
+ }
+
+ })
+
W.Image_url = "https:" + e.ChildAttr("img", "src")
W.Shop = shop.Id
diff --git a/crawler/utility.go b/crawler/utility.go
index 19c4050..1e426b9 100644
--- a/crawler/utility.go
+++ b/crawler/utility.go
@@ -70,6 +70,9 @@ func extract_abv(abv_noisy string) (float32, error) {
if strings.Contains(abv_noisy, "vol") {
abv_noisy = strings.Replace(abv_noisy, "vol", "", 1)
}
+ if strings.Contains(abv_noisy, "Vol") {
+ abv_noisy = strings.Replace(abv_noisy, "Vol", "", 1)
+ }
abv_noisy = strings.Replace(abv_noisy, ",", ".", 1)
abv_noisy = strings.TrimSpace(abv_noisy)