summaryrefslogtreecommitdiff
path: root/crawler/shop_whic.go
diff options
context:
space:
mode:
Diffstat (limited to 'crawler/shop_whic.go')
-rw-r--r--crawler/shop_whic.go45
1 files changed, 44 insertions, 1 deletions
diff --git a/crawler/shop_whic.go b/crawler/shop_whic.go
index e489161..6025050 100644
--- a/crawler/shop_whic.go
+++ b/crawler/shop_whic.go
@@ -2,6 +2,7 @@ package main
import (
"log"
+ "regexp"
"strings"
"github.com/PuerkitoBio/goquery"
@@ -9,6 +10,7 @@ import (
)
func ScrapeWhic(shop Shop) []Angebot {
+ Shop_url := "https://whic.de/angebote"
Whiskys := []Angebot{}
c := colly.NewCollector(
@@ -16,6 +18,11 @@ func ScrapeWhic(shop Shop) []Angebot {
)
c.OnHTML("li.item", func(e *colly.HTMLElement) {
+
+ if e.Request.URL.String() != Shop_url {
+ return
+ }
+
W := Angebot{}
whisky_name := e.ChildAttr("a", "title")
@@ -41,6 +48,12 @@ func ScrapeWhic(shop Shop) []Angebot {
})
})
+ base_price_noisy := e.ChildText(".base-price")
+ W.Base_price, err = sanitize_base_price(base_price_noisy)
+ if err != nil {
+ log.Fatal(err)
+ }
+
/*
* colly does not parse a <noscript>, thus we are reading the content and parse it as html.
*/
@@ -52,13 +65,43 @@ func ScrapeWhic(shop Shop) []Angebot {
}
W.Image_url, _ = doc.Find("img").Attr("src")
+ e.Request.Visit(W.Url)
+ W.Volume = get_volume(e)
+ W.Abv = get_abv(e)
+
W.Shop = shop.Id
W.Spirit_type = "Whisky"
Whiskys = append(Whiskys, W)
})
- c.Visit("https://whic.de/angebote")
+ c.OnHTML("#product-view-head-txt-extra-info", func(e *colly.HTMLElement) {
+ text_noisy := e.Text
+
+ r_volume, err := regexp.Compile("Volumen: ([0-9]+([.,][0-9]+)) Liter")
+ if err != nil {
+ log.Fatal(err)
+ }
+ volume := r_volume.FindStringSubmatch(text_noisy)
+ if volume == nil || len(volume) < 2 {
+ return
+ }
+
+ e.Request.Ctx.Put("volume", volume[1]+"l")
+
+ r_abv, err := regexp.Compile("Alkoholgehalt: ([0-9]+([.,][0-9]+))%")
+ if err != nil {
+ log.Fatal(err)
+ }
+ abv := r_abv.FindStringSubmatch(text_noisy)
+ if abv == nil || len(abv) < 2 {
+ return
+ }
+
+ e.Request.Ctx.Put("abv", abv[1]+"%")
+ })
+
+ c.Visit(Shop_url)
return Whiskys
}