summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--crawler/shop_whiskysitenl.go84
-rw-r--r--crawler/utility.go14
2 files changed, 93 insertions, 5 deletions
diff --git a/crawler/shop_whiskysitenl.go b/crawler/shop_whiskysitenl.go
index 43345b2..fb2940d 100644
--- a/crawler/shop_whiskysitenl.go
+++ b/crawler/shop_whiskysitenl.go
@@ -1,7 +1,6 @@
package main
import (
- log "github.com/Sirupsen/logrus"
"regexp"
"strings"
@@ -11,6 +10,8 @@ import (
func (app *App) ScrapeWhiskysitenl(shop Shop) []Angebot {
Whiskys := []Angebot{}
+ Shop_url := "https://www.whiskysite.nl/en/specials/?limit=100"
+
c := colly.NewCollector(
colly.AllowedDomains("whiskysite.nl"),
colly.AllowedDomains("www.whiskysite.nl"),
@@ -30,30 +31,103 @@ func (app *App) ScrapeWhiskysitenl(shop Shop) []Angebot {
price_discount_noisy := e.ChildText(".product-block-price")
r, err := regexp.Compile("[0-9]+(,[0-9]{1,2})")
if err != nil {
- log.Fatal(err)
+ Fatal(err, "Whiskysite.nl: Discounted price regex failed")
}
discounted_price := r.FindString(strings.Trim(strings.TrimPrefix(price_discount_noisy, regular_price), ""))
W.Original_price, err = convert_price(regular_price)
if err != nil {
- //log.Println(W.Name, err)
+ W.error_msg = err.Error()
+ W.error_ctx = regular_price
+ WarnOffer(W, "Whiskysite.nl: Extracting original price failed")
return
}
W.Discounted_price, err = convert_price(discounted_price)
if err != nil {
- //log.Println(W.Name, err)
+ W.error_msg = err.Error()
+ W.error_ctx = discounted_price
+ WarnOffer(W, "Whiskysite.nl: Extracting discounted price failed")
return
}
W.Image_url = e.ChildAttr("img", "src")
+ if e.Request.Ctx.Get("volume_failed") != "" {
+ W.error_msg = "Whiskysite.nl: Extracting volume via Liter-Regex failed"
+ W.error_ctx = e.Request.Ctx.Get("volume_failed")
+ WarnOffer(W, "Whiskysite.nl: Extracting volume via Liter-Regex failed")
+ return
+ }
+ if e.Request.Ctx.Get("abv_failed") != "" {
+ W.error_msg = "Whiskysite.nl: Extracting abv via Abv-Regex failed"
+ W.error_ctx = e.Request.Ctx.Get("volume_failed")
+ WarnOffer(W, "Whiskysite.nl: Extracting abv via Abv-Regex failed")
+ return
+ }
+
+ var ctx string
+ W.Volume, ctx = get_volume(e)
+ if W.Volume == 0 {
+ W.error_msg = "Whiskysite.nl: Extracting volume failed"
+ W.error_ctx = ctx
+ WarnOffer(W, "Whiskysite.nl: Extracting volume failed")
+ return
+ }
+ W.Abv, ctx = get_volume(e)
+ if W.Abv == 0 {
+ W.error_msg = "Whiskysite.nl: Extracting abv failed"
+ W.error_ctx = ctx
+ WarnOffer(W, "Whiskysite.nl: Extracting abv failed")
+ return
+ }
+
+ // calculate base price, volume is never zero
+ W.Base_price = int(RoundToEven(float64(W.Discounted_price) / float64(W.Volume)))
+
W.Shop = shop.Id
W.Spirit_type = "Whisky"
Whiskys = append(Whiskys, W)
})
- c.Visit("https://www.whiskysite.nl/en/specials/?limit=100")
+ c.OnHTML("#information", func(e *colly.HTMLElement) {
+ if e.Request.URL.String() == Shop_url {
+ return
+ }
+ text_noisy := e.Text
+
+ // 0.70ltr. 43.00%
+ // 0,70 l 46%
+ // 1,0ltr. 43%
+ r_number, err := regexp.Compile("[0-9]+([.,][0-9]+)?")
+ if err != nil {
+ Fatal(err, "Whiskysite.nl: Number regex failed")
+ }
+ r_liter, err := regexp.Compile("[0-9]+([.,][0-9]+)?( )*(l|ltr)")
+ if err != nil {
+ Fatal(err, "Whiskysite.nl: Volume regex failed")
+ }
+ litre_noisy := r_liter.FindString(text_noisy)
+ if litre_noisy == "" {
+ e.Request.Ctx.Put("volume_failed", text_noisy)
+ return
+ }
+ e.Request.Ctx.Put("volume", r_number.FindString(litre_noisy))
+
+ r_abv, err := regexp.Compile("[0-9]+([.,][0-9]+)?( )*%")
+ if err != nil {
+ Fatal(err, "Whiskysite.nl: Abv regex failed")
+ }
+ abv_noisy := r_abv.FindString(text_noisy)
+ if abv_noisy == "" {
+ e.Request.Ctx.Put("abv_failed", text_noisy)
+ return
+ }
+ e.Request.Ctx.Put("abv", abv_noisy)
+
+ })
+
+ c.Visit(Shop_url)
return Whiskys
}
diff --git a/crawler/utility.go b/crawler/utility.go
index 29f14d6..f588c22 100644
--- a/crawler/utility.go
+++ b/crawler/utility.go
@@ -2,6 +2,7 @@ package main
import (
"errors"
+ "math"
"regexp"
"strconv"
"strings"
@@ -189,3 +190,16 @@ func get_base_price(e *colly.HTMLElement) (int, error) {
return base_price, nil
}
+
+/*
+ * Source: https://golang.org/src/math/floor.go?s=2165:2200#L104
+ * Will use std lib with go version >= 1.10
+ */
+func RoundToEven(x float64) float64 {
+ t := math.Trunc(x)
+ odd := math.Remainder(t, 2) != 0
+ if d := math.Abs(x - t); d > 0.5 || (d == 0.5 && odd) {
+ return t + math.Copysign(1, x)
+ }
+ return t
+}