summaryrefslogtreecommitdiff
path: root/crawler/shop_mcwhisky.go
diff options
context:
space:
mode:
authorMax2018-02-08 18:26:41 +0100
committerMax2018-02-08 18:26:41 +0100
commitf6904aab20e2d09255fd0adabfd246165ff3cb02 (patch)
treef7ac27cb5dd34443640235a97ce9bde8f2a1816a /crawler/shop_mcwhisky.go
parentae7ed42df6a55e36c82b88e7c71569951847a68c (diff)
downloadalkobote-f6904aab20e2d09255fd0adabfd246165ff3cb02.tar.gz
Crawler extracts volume, price per litre and abv. (MC Whisky, Rum & Co, Whic)
Diffstat (limited to 'crawler/shop_mcwhisky.go')
-rw-r--r--crawler/shop_mcwhisky.go41
1 files changed, 40 insertions, 1 deletions
diff --git a/crawler/shop_mcwhisky.go b/crawler/shop_mcwhisky.go
index e0c1ab8..b44e892 100644
--- a/crawler/shop_mcwhisky.go
+++ b/crawler/shop_mcwhisky.go
@@ -2,6 +2,7 @@ package main
import (
"log"
+ "regexp"
// "strings"
// "github.com/PuerkitoBio/goquery"
@@ -9,6 +10,8 @@ import (
)
func ScrapeMCWhisky(shop Shop) []Angebot {
+ Shop_url := "https://www.mcwhisky.com/whisky/whisky-sonderangebote.html"
+
Whiskys := []Angebot{}
c := colly.NewCollector(
@@ -17,6 +20,11 @@ func ScrapeMCWhisky(shop Shop) []Angebot {
)
c.OnHTML("li.item", func(e *colly.HTMLElement) {
+
+ if e.Request.URL.String() != Shop_url {
+ return
+ }
+
W := Angebot{}
whisky_name := e.ChildAttr("a", "title")
@@ -40,15 +48,46 @@ func ScrapeMCWhisky(shop Shop) []Angebot {
}
})
})
+
+ price_per_litre_noisy := e.ChildText(".price-box-extended-info-ppl")
+ W.Base_price, err = sanitize_base_price(price_per_litre_noisy)
+ if err != nil {
+ log.Fatal(err)
+ }
+
W.Image_url = e.ChildAttr("img", "src")
+ e.Request.Visit(W.Url)
+
+ W.Volume = get_volume(e)
+ W.Abv = get_abv(e)
+
W.Shop = shop.Id
W.Spirit_type = "Whisky"
Whiskys = append(Whiskys, W)
})
- c.Visit("https://www.mcwhisky.com/whisky/whisky-sonderangebote.html")
+ c.OnHTML(".products-attributes-alcohol", func(e *colly.HTMLElement) {
+ text_noisy := e.Text
+
+ r_abv, err := regexp.Compile(`[0-9]+([,.][0-9]+)?( )?%`)
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ e.Request.Ctx.Put("abv", r_abv.FindString(text_noisy))
+
+ r_volume, err := regexp.Compile(`[0-9]+([,.][0-9]+)?( )?Liter$`)
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ e.Request.Ctx.Put("volume", r_volume.FindString(text_noisy))
+
+ })
+
+ c.Visit(Shop_url)
return Whiskys
}