summaryrefslogtreecommitdiff
path: root/crawler/shop_whiskysitenl.go
diff options
context:
space:
mode:
authorhorus_arch2018-02-17 15:07:52 +0100
committerhorus_arch2018-02-17 15:07:52 +0100
commitfc83917d623228b09191f178062e59fad0722795 (patch)
treedd81ddee7f28550c974e27b60a0d0419a53b0c8d /crawler/shop_whiskysitenl.go
parentbcdea2f8e95f5305625a773223829478c8c13bed (diff)
downloadalkobote-fc83917d623228b09191f178062e59fad0722795.tar.gz
Adds crawler for whiskysite.nl. (crawler)
Diffstat (limited to 'crawler/shop_whiskysitenl.go')
-rw-r--r--crawler/shop_whiskysitenl.go84
1 files changed, 79 insertions, 5 deletions
diff --git a/crawler/shop_whiskysitenl.go b/crawler/shop_whiskysitenl.go
index 43345b2..fb2940d 100644
--- a/crawler/shop_whiskysitenl.go
+++ b/crawler/shop_whiskysitenl.go
@@ -1,7 +1,6 @@
package main
import (
- log "github.com/Sirupsen/logrus"
"regexp"
"strings"
@@ -11,6 +10,8 @@ import (
func (app *App) ScrapeWhiskysitenl(shop Shop) []Angebot {
Whiskys := []Angebot{}
+ Shop_url := "https://www.whiskysite.nl/en/specials/?limit=100"
+
c := colly.NewCollector(
colly.AllowedDomains("whiskysite.nl"),
colly.AllowedDomains("www.whiskysite.nl"),
@@ -30,30 +31,103 @@ func (app *App) ScrapeWhiskysitenl(shop Shop) []Angebot {
price_discount_noisy := e.ChildText(".product-block-price")
r, err := regexp.Compile("[0-9]+(,[0-9]{1,2})")
if err != nil {
- log.Fatal(err)
+ Fatal(err, "Whiskysite.nl: Discounted price regex failed")
}
discounted_price := r.FindString(strings.Trim(strings.TrimPrefix(price_discount_noisy, regular_price), ""))
W.Original_price, err = convert_price(regular_price)
if err != nil {
- //log.Println(W.Name, err)
+ W.error_msg = err.Error()
+ W.error_ctx = regular_price
+ WarnOffer(W, "Whiskysite.nl: Extracting original price failed")
return
}
W.Discounted_price, err = convert_price(discounted_price)
if err != nil {
- //log.Println(W.Name, err)
+ W.error_msg = err.Error()
+ W.error_ctx = discounted_price
+ WarnOffer(W, "Whiskysite.nl: Extracting discounted price failed")
return
}
W.Image_url = e.ChildAttr("img", "src")
+ if e.Request.Ctx.Get("volume_failed") != "" {
+ W.error_msg = "Whiskysite.nl: Extracting volume via Liter-Regex failed"
+ W.error_ctx = e.Request.Ctx.Get("volume_failed")
+ WarnOffer(W, "Whiskysite.nl: Extracting volume via Liter-Regex failed")
+ return
+ }
+ if e.Request.Ctx.Get("abv_failed") != "" {
+ W.error_msg = "Whiskysite.nl: Extracting abv via Abv-Regex failed"
+ W.error_ctx = e.Request.Ctx.Get("volume_failed")
+ WarnOffer(W, "Whiskysite.nl: Extracting abv via Abv-Regex failed")
+ return
+ }
+
+ var ctx string
+ W.Volume, ctx = get_volume(e)
+ if W.Volume == 0 {
+ W.error_msg = "Whiskysite.nl: Extracting volume failed"
+ W.error_ctx = ctx
+ WarnOffer(W, "Whiskysite.nl: Extracting volume failed")
+ return
+ }
+ W.Abv, ctx = get_volume(e)
+ if W.Abv == 0 {
+ W.error_msg = "Whiskysite.nl: Extracting abv failed"
+ W.error_ctx = ctx
+ WarnOffer(W, "Whiskysite.nl: Extracting abv failed")
+ return
+ }
+
+ // calculate base price, volume is never zero
+ W.Base_price = int(RoundToEven(float64(W.Discounted_price) / float64(W.Volume)))
+
W.Shop = shop.Id
W.Spirit_type = "Whisky"
Whiskys = append(Whiskys, W)
})
- c.Visit("https://www.whiskysite.nl/en/specials/?limit=100")
+ c.OnHTML("#information", func(e *colly.HTMLElement) {
+ if e.Request.URL.String() == Shop_url {
+ return
+ }
+ text_noisy := e.Text
+
+ // 0.70ltr. 43.00%
+ // 0,70 l 46%
+ // 1,0ltr. 43%
+ r_number, err := regexp.Compile("[0-9]+([.,][0-9]+)?")
+ if err != nil {
+ Fatal(err, "Whiskysite.nl: Number regex failed")
+ }
+ r_liter, err := regexp.Compile("[0-9]+([.,][0-9]+)?( )*(l|ltr)")
+ if err != nil {
+ Fatal(err, "Whiskysite.nl: Volume regex failed")
+ }
+ litre_noisy := r_liter.FindString(text_noisy)
+ if litre_noisy == "" {
+ e.Request.Ctx.Put("volume_failed", text_noisy)
+ return
+ }
+ e.Request.Ctx.Put("volume", r_number.FindString(litre_noisy))
+
+ r_abv, err := regexp.Compile("[0-9]+([.,][0-9]+)?( )*%")
+ if err != nil {
+ Fatal(err, "Whiskysite.nl: Abv regex failed")
+ }
+ abv_noisy := r_abv.FindString(text_noisy)
+ if abv_noisy == "" {
+ e.Request.Ctx.Put("abv_failed", text_noisy)
+ return
+ }
+ e.Request.Ctx.Put("abv", abv_noisy)
+
+ })
+
+ c.Visit(Shop_url)
return Whiskys
}