diff options
| -rw-r--r-- | crawler/shop_whiskysitenl.go | 84 | ||||
| -rw-r--r-- | crawler/utility.go | 14 |
2 files changed, 93 insertions, 5 deletions
diff --git a/crawler/shop_whiskysitenl.go b/crawler/shop_whiskysitenl.go index 43345b2..fb2940d 100644 --- a/crawler/shop_whiskysitenl.go +++ b/crawler/shop_whiskysitenl.go @@ -1,7 +1,6 @@ package main import ( - log "github.com/Sirupsen/logrus" "regexp" "strings" @@ -11,6 +10,8 @@ import ( func (app *App) ScrapeWhiskysitenl(shop Shop) []Angebot { Whiskys := []Angebot{} + Shop_url := "https://www.whiskysite.nl/en/specials/?limit=100" + c := colly.NewCollector( colly.AllowedDomains("whiskysite.nl"), colly.AllowedDomains("www.whiskysite.nl"), @@ -30,30 +31,103 @@ func (app *App) ScrapeWhiskysitenl(shop Shop) []Angebot { price_discount_noisy := e.ChildText(".product-block-price") r, err := regexp.Compile("[0-9]+(,[0-9]{1,2})") if err != nil { - log.Fatal(err) + Fatal(err, "Whiskysite.nl: Discounted price regex failed") } discounted_price := r.FindString(strings.Trim(strings.TrimPrefix(price_discount_noisy, regular_price), "")) W.Original_price, err = convert_price(regular_price) if err != nil { - //log.Println(W.Name, err) + W.error_msg = err.Error() + W.error_ctx = regular_price + WarnOffer(W, "Whiskysite.nl: Extracting original price failed") return } W.Discounted_price, err = convert_price(discounted_price) if err != nil { - //log.Println(W.Name, err) + W.error_msg = err.Error() + W.error_ctx = discounted_price + WarnOffer(W, "Whiskysite.nl: Extracting discounted price failed") return } W.Image_url = e.ChildAttr("img", "src") + if e.Request.Ctx.Get("volume_failed") != "" { + W.error_msg = "Whiskysite.nl: Extracting volume via Liter-Regex failed" + W.error_ctx = e.Request.Ctx.Get("volume_failed") + WarnOffer(W, "Whiskysite.nl: Extracting volume via Liter-Regex failed") + return + } + if e.Request.Ctx.Get("abv_failed") != "" { + W.error_msg = "Whiskysite.nl: Extracting abv via Abv-Regex failed" + W.error_ctx = e.Request.Ctx.Get("volume_failed") + WarnOffer(W, "Whiskysite.nl: Extracting abv via Abv-Regex failed") + return + } + + var ctx string + W.Volume, ctx = get_volume(e) + if W.Volume == 0 { + W.error_msg = "Whiskysite.nl: Extracting volume failed" + W.error_ctx = ctx + WarnOffer(W, "Whiskysite.nl: Extracting volume failed") + return + } + W.Abv, ctx = get_volume(e) + if W.Abv == 0 { + W.error_msg = "Whiskysite.nl: Extracting abv failed" + W.error_ctx = ctx + WarnOffer(W, "Whiskysite.nl: Extracting abv failed") + return + } + + // calculate base price, volume is never zero + W.Base_price = int(RoundToEven(float64(W.Discounted_price) / float64(W.Volume))) + W.Shop = shop.Id W.Spirit_type = "Whisky" Whiskys = append(Whiskys, W) }) - c.Visit("https://www.whiskysite.nl/en/specials/?limit=100") + c.OnHTML("#information", func(e *colly.HTMLElement) { + if e.Request.URL.String() == Shop_url { + return + } + text_noisy := e.Text + + // 0.70ltr. 43.00% + // 0,70 l 46% + // 1,0ltr. 43% + r_number, err := regexp.Compile("[0-9]+([.,][0-9]+)?") + if err != nil { + Fatal(err, "Whiskysite.nl: Number regex failed") + } + r_liter, err := regexp.Compile("[0-9]+([.,][0-9]+)?( )*(l|ltr)") + if err != nil { + Fatal(err, "Whiskysite.nl: Volume regex failed") + } + litre_noisy := r_liter.FindString(text_noisy) + if litre_noisy == "" { + e.Request.Ctx.Put("volume_failed", text_noisy) + return + } + e.Request.Ctx.Put("volume", r_number.FindString(litre_noisy)) + + r_abv, err := regexp.Compile("[0-9]+([.,][0-9]+)?( )*%") + if err != nil { + Fatal(err, "Whiskysite.nl: Abv regex failed") + } + abv_noisy := r_abv.FindString(text_noisy) + if abv_noisy == "" { + e.Request.Ctx.Put("abv_failed", text_noisy) + return + } + e.Request.Ctx.Put("abv", abv_noisy) + + }) + + c.Visit(Shop_url) return Whiskys } diff --git a/crawler/utility.go b/crawler/utility.go index 29f14d6..f588c22 100644 --- a/crawler/utility.go +++ b/crawler/utility.go @@ -2,6 +2,7 @@ package main import ( "errors" + "math" "regexp" "strconv" "strings" @@ -189,3 +190,16 @@ func get_base_price(e *colly.HTMLElement) (int, error) { return base_price, nil } + +/* + * Source: https://golang.org/src/math/floor.go?s=2165:2200#L104 + * Will use std lib with go version >= 1.10 + */ +func RoundToEven(x float64) float64 { + t := math.Trunc(x) + odd := math.Remainder(t, 2) != 0 + if d := math.Abs(x - t); d > 0.5 || (d == 0.5 && odd) { + return t + math.Copysign(1, x) + } + return t +} |
