diff options
Diffstat (limited to 'crawler/shop_mcwhisky.go')
| -rw-r--r-- | crawler/shop_mcwhisky.go | 41 |
1 files changed, 40 insertions, 1 deletions
diff --git a/crawler/shop_mcwhisky.go b/crawler/shop_mcwhisky.go index e0c1ab8..b44e892 100644 --- a/crawler/shop_mcwhisky.go +++ b/crawler/shop_mcwhisky.go @@ -2,6 +2,7 @@ package main import ( "log" + "regexp" // "strings" // "github.com/PuerkitoBio/goquery" @@ -9,6 +10,8 @@ import ( ) func ScrapeMCWhisky(shop Shop) []Angebot { + Shop_url := "https://www.mcwhisky.com/whisky/whisky-sonderangebote.html" + Whiskys := []Angebot{} c := colly.NewCollector( @@ -17,6 +20,11 @@ func ScrapeMCWhisky(shop Shop) []Angebot { ) c.OnHTML("li.item", func(e *colly.HTMLElement) { + + if e.Request.URL.String() != Shop_url { + return + } + W := Angebot{} whisky_name := e.ChildAttr("a", "title") @@ -40,15 +48,46 @@ func ScrapeMCWhisky(shop Shop) []Angebot { } }) }) + + price_per_litre_noisy := e.ChildText(".price-box-extended-info-ppl") + W.Base_price, err = sanitize_base_price(price_per_litre_noisy) + if err != nil { + log.Fatal(err) + } + W.Image_url = e.ChildAttr("img", "src") + e.Request.Visit(W.Url) + + W.Volume = get_volume(e) + W.Abv = get_abv(e) + W.Shop = shop.Id W.Spirit_type = "Whisky" Whiskys = append(Whiskys, W) }) - c.Visit("https://www.mcwhisky.com/whisky/whisky-sonderangebote.html") + c.OnHTML(".products-attributes-alcohol", func(e *colly.HTMLElement) { + text_noisy := e.Text + + r_abv, err := regexp.Compile(`[0-9]+([,.][0-9]+)?( )?%`) + if err != nil { + log.Fatal(err) + } + + e.Request.Ctx.Put("abv", r_abv.FindString(text_noisy)) + + r_volume, err := regexp.Compile(`[0-9]+([,.][0-9]+)?( )?Liter$`) + if err != nil { + log.Fatal(err) + } + + e.Request.Ctx.Put("volume", r_volume.FindString(text_noisy)) + + }) + + c.Visit(Shop_url) return Whiskys } |
