summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorhorus_arch2018-02-17 13:51:35 +0100
committerhorus_arch2018-02-17 13:51:35 +0100
commitbcdea2f8e95f5305625a773223829478c8c13bed (patch)
treeefac40b03131b4f9e43de848920695ee785a0a3f
parent9ebf51364773dae6db4c0c47d77710c9f1a37b51 (diff)
downloadalkobote-bcdea2f8e95f5305625a773223829478c8c13bed.tar.gz
Introduces context on errors. (crawler)
-rw-r--r--crawler/database.go4
-rw-r--r--crawler/log.go31
-rw-r--r--crawler/main.go3
-rw-r--r--crawler/post_process.go9
-rw-r--r--crawler/scrape.go24
-rw-r--r--crawler/shop_bottleworld.go30
-rw-r--r--crawler/shop_mcwhisky.go30
-rw-r--r--crawler/shop_rumundco.go50
-rw-r--r--crawler/shop_whic.go35
-rw-r--r--crawler/shop_whiskyde.go38
-rw-r--r--crawler/shop_whiskysitenl.go2
-rw-r--r--crawler/shop_whiskyworld.go46
-rw-r--r--crawler/shop_whiskyzone.go34
-rw-r--r--crawler/shops.go4
-rw-r--r--crawler/utility.go23
15 files changed, 252 insertions, 111 deletions
diff --git a/crawler/database.go b/crawler/database.go
index 2608c59..557f491 100644
--- a/crawler/database.go
+++ b/crawler/database.go
@@ -109,7 +109,9 @@ func (app *App) save_offer(W []Angebot) error {
_, err = stmt.Exec(o.Shop, o.Name, o.Url, o.Abv, o.Volume, o.Original_price, o.Discounted_price, o.Base_price, o.Valid_until, o.Image_url, o.Website, o.Spirit_type, app.Now)
}
if err != nil {
- Debug(err, "Save Offer: Inserting offer failed")
+ o.error_msg = err.Error()
+ o.error_ctx = "Save Offer: Inserting offer failed"
+ DebugOffer(o, "Save Offer: Inserting offer failed")
return err
}
diff --git a/crawler/log.go b/crawler/log.go
index 7d8e707..0cd681d 100644
--- a/crawler/log.go
+++ b/crawler/log.go
@@ -24,11 +24,18 @@ func init() {
}
func Fatal(err error, msg string) {
- log.WithFields(
- log.Fields{
- "error": err.Error(),
- },
- ).Fatal(msg)
+ if err != nil {
+ log.WithFields(
+ log.Fields{
+ "error": err.Error(),
+ },
+ ).Fatal(msg)
+ } else {
+ log.Fatal(msg)
+ }
+}
+func FatalOffer(offer Angebot, msg string) {
+ log.WithFields(getFields(offer)).Fatal(msg)
}
func Println(err error, msg string) {
@@ -83,5 +90,19 @@ func getFields(offer Angebot) log.Fields {
"Image_url": offer.Image_url,
"Spirit Type": offer.Spirit_type,
"Valid Until": offer.Valid_until,
+ "Error_msg": offer.error_msg,
+ "Error_ctx": offer.error_ctx,
+ }
+}
+
+func Warn(err error, msg string) {
+ if err != nil {
+ log.WithFields(
+ log.Fields{
+ "error": err.Error(),
+ },
+ ).Warn(msg)
+ } else {
+ log.Warn(msg)
}
}
diff --git a/crawler/main.go b/crawler/main.go
index 8727443..ece25e4 100644
--- a/crawler/main.go
+++ b/crawler/main.go
@@ -37,6 +37,9 @@ type Angebot struct {
Spirit_type string
Website string
Valid_until int
+
+ error_msg string
+ error_ctx string
}
type Shop struct {
diff --git a/crawler/post_process.go b/crawler/post_process.go
index dfd7861..4688cc8 100644
--- a/crawler/post_process.go
+++ b/crawler/post_process.go
@@ -37,9 +37,8 @@ func (app *App) short_url() error {
v.Add("url", offer_db.Url)
polr_url := app.Config.Polr_URL + "?" + v.Encode()
- if app.Config.Debug {
- log.Debug("polr_url: " + polr_url + " ( " + offer_db.Url + " )")
- }
+ log.Debug("polr_url: " + polr_url + " ( " + offer_db.Url + " )")
+
resp, err := http.Get(polr_url)
if err != nil {
return err
@@ -52,9 +51,7 @@ func (app *App) short_url() error {
}
offer_db.Short_url = string(short_url)
- if app.Config.Debug {
- log.Debug("short_url: " + string(short_url) + " ( " + offer_db.Url + " )")
- }
+ log.Debug("short_url: " + string(short_url) + " ( " + offer_db.Url + " )")
Angebote = append(Angebote, offer_db)
}
diff --git a/crawler/scrape.go b/crawler/scrape.go
index f6ad80b..ae63e5c 100644
--- a/crawler/scrape.go
+++ b/crawler/scrape.go
@@ -28,41 +28,41 @@ func (app *App) Scrape(shop Shop, wait chan bool) {
var W []Angebot
var err error
- W = ScrapeShop(shop)
+ W = app.ScrapeShop(shop)
W = sanitize_offer(W, shop)
err = app.save_offer(W)
if err != nil {
- Fatal(err, "Saving offers failed")
+ Warn(err, "Saving offers failed")
}
err = app.remove_expired(W, shop)
if err != nil {
- Fatal(err, "Removing expired offers failed")
+ Warn(err, "Removing expired offers failed")
}
wait <- true
}
-func ScrapeShop(shop Shop) []Angebot {
+func (app *App) ScrapeShop(shop Shop) []Angebot {
switch shop.Name {
case "Bottleworld":
- return ScrapeBottleWord(shop)
+ return app.ScrapeBottleWord(shop)
case "MC Whisky":
- return ScrapeMCWhisky(shop)
+ return app.ScrapeMCWhisky(shop)
case "Rum & Co":
- return ScrapeRumundCo(shop)
+ return app.ScrapeRumundCo(shop)
case "Whic":
- return ScrapeWhic(shop)
+ return app.ScrapeWhic(shop)
case "Whisky.de":
- return ScrapeWhiskyde(shop)
+ return app.ScrapeWhiskyde(shop)
//case "Whiskysite.nl":
- // return ScrapeWhiskysitenl(shop)
+ // return app.ScrapeWhiskysitenl(shop)
case "Whisky World":
- return ScrapeWhiskyworld(shop)
+ return app.ScrapeWhiskyworld(shop)
case "Whiskyzone":
- return ScrapeWhiskyzone(shop)
+ return app.ScrapeWhiskyzone(shop)
default:
log.Println(shop.Name + ": No Crawler")
}
diff --git a/crawler/shop_bottleworld.go b/crawler/shop_bottleworld.go
index db45791..720f2c8 100644
--- a/crawler/shop_bottleworld.go
+++ b/crawler/shop_bottleworld.go
@@ -8,7 +8,7 @@ import (
"github.com/gocolly/colly"
)
-func ScrapeBottleWord(shop Shop) []Angebot {
+func (app *App) ScrapeBottleWord(shop Shop) []Angebot {
Shop_url := "https://www.bottleworld.de/aktuelle-sonderpreise/show/all"
Whiskys := []Angebot{}
@@ -32,13 +32,19 @@ func ScrapeBottleWord(shop Shop) []Angebot {
e.ForEach(".old-price", func(i int, e *colly.HTMLElement) {
W.Original_price, err = convert_price(e.ChildText(".price"))
if err != nil {
- Fatal(err, "Bottleworld: Converting original price failed")
+ W.error_msg = err.Error()
+ W.error_ctx = e.ChildText(".price")
+ WarnOffer(W, "Bottleworld: Converting original price failed")
+ return
}
})
e.ForEach(".special-price", func(i int, e *colly.HTMLElement) {
W.Discounted_price, err = convert_price(e.ChildText(".price"))
if err != nil {
- Fatal(err, "Bottleworld: Converting discounted price failed")
+ W.error_msg = err.Error()
+ W.error_ctx = e.ChildText(".price")
+ WarnOffer(W, "Bottleworld: Converting discounted price failed")
+ return
}
})
})
@@ -46,7 +52,10 @@ func ScrapeBottleWord(shop Shop) []Angebot {
price_per_litre_noisy := e.ChildText(".price-per-liter")
price_per_litre, err := sanitize_base_price(price_per_litre_noisy)
if err != nil {
- Fatal(err, "Bottleworld: Sanitizing base price failed")
+ W.error_msg = err.Error()
+ W.error_ctx = price_per_litre_noisy
+ WarnOffer(W, "Bottleworld: Sanitizing base price failed")
+ return
}
W.Base_price = price_per_litre
@@ -56,14 +65,19 @@ func ScrapeBottleWord(shop Shop) []Angebot {
W.Shop = shop.Id
- W.Volume = get_volume(e)
+ var ctx string
+ W.Volume, ctx = get_volume(e)
if W.Volume == 0 {
- DebugOffer(W, "Bottleworld: Volume is zero")
+ W.error_msg = "Bottleworld: Volume is zero"
+ W.error_ctx = ctx
+ WarnOffer(W, "Bottleworld: Volume is zero")
return
}
- W.Abv = get_abv(e)
+ W.Abv, ctx = get_abv(e)
if W.Abv == 0 {
- DebugOffer(W, "Bottleworld: Abv is zero")
+ W.error_msg = "Bottleworld: Abv is zero"
+ W.error_ctx = ctx
+ WarnOffer(W, "Bottleworld: Abv is zero")
return
}
diff --git a/crawler/shop_mcwhisky.go b/crawler/shop_mcwhisky.go
index b423c72..c015d26 100644
--- a/crawler/shop_mcwhisky.go
+++ b/crawler/shop_mcwhisky.go
@@ -6,7 +6,7 @@ import (
"github.com/gocolly/colly"
)
-func ScrapeMCWhisky(shop Shop) []Angebot {
+func (app *App) ScrapeMCWhisky(shop Shop) []Angebot {
Shop_url := "https://www.mcwhisky.com/whisky/whisky-sonderangebote.html"
Whiskys := []Angebot{}
@@ -35,13 +35,19 @@ func ScrapeMCWhisky(shop Shop) []Angebot {
e.ForEach(".old-price", func(i int, e *colly.HTMLElement) {
W.Original_price, err = convert_price(e.ChildText(".price"))
if err != nil {
- Fatal(err, "MC Whisky: Converting original price failed")
+ W.error_msg = err.Error()
+ W.error_ctx = e.ChildText(".price")
+ WarnOffer(W, "MC Whisky: Converting original price failed")
+ return
}
})
e.ForEach(".special-price", func(i int, e *colly.HTMLElement) {
W.Discounted_price, err = convert_price(e.ChildText(".price"))
if err != nil {
- Fatal(err, "MC Whisky: Converting discounted price failed")
+ W.error_msg = err.Error()
+ W.error_ctx = e.ChildText(".price")
+ WarnOffer(W, "MC Whisky: Converting discounted price failed")
+ return
}
})
})
@@ -49,21 +55,29 @@ func ScrapeMCWhisky(shop Shop) []Angebot {
price_per_litre_noisy := e.ChildText(".price-box-extended-info-ppl")
W.Base_price, err = sanitize_base_price(price_per_litre_noisy)
if err != nil {
- Fatal(err, "MC Whisky: Sanitizing base price failed")
+ W.error_msg = err.Error()
+ W.error_ctx = price_per_litre_noisy
+ WarnOffer(W, "MC Whisky: Sanitizing base price failed")
+ return
}
W.Image_url = e.ChildAttr("img", "src")
e.Request.Visit(W.Url)
- W.Volume = get_volume(e)
+ var ctx string
+ W.Volume, ctx = get_volume(e)
if W.Abv == 0 {
- DebugOffer(W, "MC Whisky: Volume is zero")
+ W.error_msg = "MC Whisky: Volume is zero"
+ W.error_ctx = ctx
+ WarnOffer(W, "MC Whisky: Volume is zero")
return
}
- W.Abv = get_abv(e)
+ W.Abv, ctx = get_abv(e)
if W.Abv == 0 {
- DebugOffer(W, "MC Whisky: Abv is zero")
+ W.error_msg = "MC Whisky: Abv is zero"
+ W.error_ctx = ctx
+ WarnOffer(W, "MC Whisky: Abv is zero")
return
}
diff --git a/crawler/shop_rumundco.go b/crawler/shop_rumundco.go
index 25b89bd..58de518 100644
--- a/crawler/shop_rumundco.go
+++ b/crawler/shop_rumundco.go
@@ -8,7 +8,7 @@ import (
"github.com/gocolly/colly"
)
-func ScrapeRumundCo(shop Shop) []Angebot {
+func (app *App) ScrapeRumundCo(shop Shop) []Angebot {
Shop_url := "https://www.rumundco.de/navi.php?q=4&kf=29&kk-suesse-von=0&kk-suesse-bis=100&kk-milde-von=0&kk-milde-bis=100&kk-wuerze-von=0&kk-wuerze-bis=100&kk-frucht-von=0&kk-frucht-bis=100&kk-torf-von=0&kk-torf-bis=100&hf=0&af=90&Sortierung=11&a=350"
@@ -35,6 +35,9 @@ func ScrapeRumundCo(shop Shop) []Angebot {
Fatal(err, "Rum & Co: Verfügbar regex failed")
}
if !matched {
+ W.error_msg = "Rum & Co: Offer not available"
+ W.error_ctx = e.ChildText(".delivery-status")
+ WarnOffer(W, "Rum & Co: Offer not available")
return
}
@@ -50,23 +53,34 @@ func ScrapeRumundCo(shop Shop) []Angebot {
e.ForEach(".price_wrapper", func(i int, e *colly.HTMLElement) {
regular_price := e.ChildText("del.value")
if "" == regular_price {
- PrintlnOffer(W, "Rum & Co: No regular price found")
+ W.error_msg = "Rum & Co: No regular price found"
+ W.error_ctx = regular_price
+ WarnOffer(W, "Rum & Co: No regular price found")
return
}
W.Original_price, err = convert_price(regular_price)
if err != nil {
- Fatal(err, "Rum & Co: Original price: Convert price failed")
+ W.error_msg = err.Error()
+ W.error_ctx = regular_price
+ WarnOffer(W, "Rum & Co: Original price: Convert price failed")
+ return
}
W.Discounted_price, err = convert_price(e.ChildText(".price-value"))
if err != nil {
- Fatal(err, "Rum & Co: Discounted price: Convert price failed")
+ W.error_msg = err.Error()
+ W.error_ctx = e.ChildText(".price-value")
+ WarnOffer(W, "Rum & Co: Discounted price: Convert price failed")
+ return
}
e.ForEach(".base_price", func(i int, e *colly.HTMLElement) {
price_per_litre_noisy := e.ChildText(".value")
W.Base_price, err = sanitize_base_price(price_per_litre_noisy)
if err != nil {
- Fatal(err, "Rum & Co: Base price: Sanitizing base price failed")
+ W.error_msg = err.Error()
+ W.error_ctx = e.ChildText(".value")
+ WarnOffer(W, "Rum & Co: Base price: Sanitizing base price failed")
+ return
}
})
@@ -82,7 +96,9 @@ func ScrapeRumundCo(shop Shop) []Angebot {
}
image_url_noisy_slice := r_pagespeed.FindStringSubmatch(image_url_noisy)
if len(image_url_noisy_slice) < 2 {
- PrintlnOffer(W, "Rum & Co: (Pagespeed) Image URL not found")
+ W.error_msg = "Rum & Co: (Pagespeed) Image URL not found"
+ W.error_ctx = image_url_noisy
+ WarnOffer(W, "Rum & Co: (Pagespeed) Image URL not found")
return
}
image_url_noisy = strings.Replace(image_url_noisy, image_url_noisy_slice[1], "", 1)
@@ -92,23 +108,32 @@ func ScrapeRumundCo(shop Shop) []Angebot {
e.Request.Visit(W.Url)
- W.Volume = get_volume(e)
+ var ctx string
+ W.Volume, ctx = get_volume(e)
if W.Volume == 0 {
- DebugOffer(W, "Rum & Co: Volume is zero")
+ W.error_msg = "Rum & Co: Volume is zero"
+ W.error_ctx = ctx
+ WarnOffer(W, "Rum & Co: Volume is zero")
return
}
if "" == abv_noisy {
- W.Abv = get_abv(e)
+ W.Abv, ctx = get_abv(e)
+ abv_noisy = ctx
} else {
W.Abv, err = extract_abv(abv_noisy)
if err != nil {
- Fatal(err, "Rum & Co: Base price: Extracting ABV failed")
+ W.error_msg = err.Error()
+ W.error_ctx = abv_noisy
+ WarnOffer(W, "Rum & Co: Base price: Extracting ABV failed")
+ return
}
}
if W.Abv == 0 {
- DebugOffer(W, "Rum & Co: Abv is zero")
+ W.error_msg = "Rum & Co: Abv is zero"
+ W.error_ctx = abv_noisy
+ WarnOffer(W, "Rum & Co: Abv is zero")
return
}
@@ -124,10 +149,7 @@ func ScrapeRumundCo(shop Shop) []Angebot {
e.ForEach("tr", func(i int, e *colly.HTMLElement) {
text_noisy := e.ChildText("th")
- //log.Println("Visiting (" + e.Request.URL.String() + "). Found: " + text_noisy + " END")
-
if strings.Contains(text_noisy, "Genauer Inhalt:") {
- //log.Println("Visiting (" + e.Request.URL.String() + "). Found (V): " + e.ChildText("td") + " END")
e.Request.Ctx.Put("volume", e.ChildText("td"))
} else if strings.Contains(text_noisy, "Alkoholgehalt in %:") {
e.Request.Ctx.Put("abv", e.ChildText("a"))
diff --git a/crawler/shop_whic.go b/crawler/shop_whic.go
index e082ad1..0e7cdf9 100644
--- a/crawler/shop_whic.go
+++ b/crawler/shop_whic.go
@@ -8,7 +8,7 @@ import (
"github.com/gocolly/colly"
)
-func ScrapeWhic(shop Shop) []Angebot {
+func (app *App) ScrapeWhic(shop Shop) []Angebot {
Shop_url := "https://whic.de/angebote"
Whiskys := []Angebot{}
@@ -36,13 +36,19 @@ func ScrapeWhic(shop Shop) []Angebot {
e.ForEach(".old-price", func(i int, e *colly.HTMLElement) {
W.Original_price, err = convert_price(e.ChildText(".price"))
if err != nil {
- Fatal(err, "Whic: Converting original price failed")
+ W.error_msg = err.Error()
+ W.error_ctx = e.ChildText(".price")
+ WarnOffer(W, "Whic: Converting original price failed")
+ return
}
})
e.ForEach(".special-price", func(i int, e *colly.HTMLElement) {
W.Discounted_price, err = convert_price(e.ChildText(".price"))
if err != nil {
- Fatal(err, "Whic: Converting discounted price failed")
+ W.error_msg = err.Error()
+ W.error_ctx = e.ChildText(".price")
+ WarnOffer(W, "Whic: Converting discounted price failed")
+ return
}
})
})
@@ -50,7 +56,10 @@ func ScrapeWhic(shop Shop) []Angebot {
base_price_noisy := e.ChildText(".base-price")
W.Base_price, err = sanitize_base_price(base_price_noisy)
if err != nil {
- Fatal(err, "Whic: Sanitizing base price failed")
+ W.error_msg = err.Error()
+ W.error_ctx = base_price_noisy
+ WarnOffer(W, "Whic: Sanitizing base price failed")
+ return
}
/*
@@ -60,19 +69,27 @@ func ScrapeWhic(shop Shop) []Angebot {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(img_link_noisy))
if err != nil {
- Fatal(err, "Whic: Parsing document in Goquery failed")
+ W.error_msg = err.Error()
+ W.error_ctx = img_link_noisy
+ FatalOffer(W, "Whic: Parsing document in Goquery failed")
}
W.Image_url, _ = doc.Find("img").Attr("src")
e.Request.Visit(W.Url)
- W.Volume = get_volume(e)
+
+ var ctx string
+ W.Volume, ctx = get_volume(e)
if W.Volume == 0 {
- DebugOffer(W, "Whic: Volume is zero")
+ W.error_msg = "Whic: Volume is zero"
+ W.error_ctx = ctx
+ WarnOffer(W, "Whic: Volume is zero")
return
}
- W.Abv = get_abv(e)
+ W.Abv, ctx = get_abv(e)
if W.Abv == 0 {
- DebugOffer(W, "Whic: Abv is zero")
+ W.error_msg = "Whic: Abv is zero"
+ W.error_ctx = ctx
+ WarnOffer(W, "Whic: Abv is zero")
return
}
diff --git a/crawler/shop_whiskyde.go b/crawler/shop_whiskyde.go
index f13190b..ffbbe08 100644
--- a/crawler/shop_whiskyde.go
+++ b/crawler/shop_whiskyde.go
@@ -6,7 +6,7 @@ import (
"github.com/gocolly/colly"
)
-func ScrapeWhiskyde(shop Shop) []Angebot {
+func (app *App) ScrapeWhiskyde(shop Shop) []Angebot {
Shop_url := "https://www.whisky.de/shop/Aktuell/Sonderangebote/"
Whiskys := []Angebot{}
@@ -33,13 +33,19 @@ func ScrapeWhiskyde(shop Shop) []Angebot {
e.ForEach(".article-price-original", func(i int, e *colly.HTMLElement) {
W.Original_price, err = convert_price(e.ChildText("del"))
if err != nil {
- Fatal(err, "Whisky.de: Converting original price failed")
+ W.error_msg = err.Error()
+ W.error_ctx = e.ChildText("del")
+ WarnOffer(W, "Whisky.de: Converting original price failed")
+ return
}
})
e.ForEach(".article-price", func(i int, e *colly.HTMLElement) {
W.Discounted_price, err = convert_price(e.ChildText(".article-price-default"))
if err != nil {
- Fatal(err, "Whisky.de: Converting discounted price failed")
+ W.error_msg = err.Error()
+ W.error_ctx = e.ChildText(".article-price-default")
+ WarnOffer(W, "Whisky.de: Converting discounted price failed")
+ return
}
})
@@ -54,25 +60,38 @@ func ScrapeWhiskyde(shop Shop) []Angebot {
text_noisy := e.ChildText(".article-amount")
if !strings.Contains(text_noisy, "Liter") {
+ W.error_ctx = text_noisy
+ W.error_msg = "Whisky.de: String 'Liter' not found."
+ WarnOffer(W, "Whisky.de: String 'Liter' not found.")
return
}
abv_noisy := strings.TrimSpace(strings.SplitAfter(text_noisy, "Liter")[1])
W.Volume, err = extract_volume(text_noisy)
if err != nil {
- Fatal(err, "Whisky.de: Extracting volume failed")
+ W.error_msg = err.Error()
+ W.error_ctx = text_noisy
+ WarnOffer(W, "Whisky.de: Extracting volume failed")
+ return
}
W.Abv, err = extract_abv(abv_noisy)
if err != nil {
- Fatal(err, "Whisky.de: Extracting abv failed")
+ W.error_msg = err.Error()
+ W.error_ctx = abv_noisy
+ WarnOffer(W, "Whisky.de: Extracting abv failed")
+ return
}
if W.Volume == 0 {
- DebugOffer(W, "Whisky.de: Volume is zero")
+ W.error_msg = "Whisky.de: Volume is zero"
+ W.error_ctx = text_noisy
+ WarnOffer(W, "Whisky.de: Volume is zero")
return
}
if W.Abv == 0 {
- DebugOffer(W, "Whisky.de: Abv is zero")
+ W.error_msg = "Whisky.de: Abv is zero"
+ W.error_ctx = abv_noisy
+ WarnOffer(W, "Whisky.de: Abv is zero")
return
}
@@ -81,7 +100,10 @@ func ScrapeWhiskyde(shop Shop) []Angebot {
W.Base_price, err = convert_price(e.ChildText(".article-unitprice-default"))
if err != nil {
- Fatal(err, "Whisky.de: Converting base price failed")
+ W.error_msg = err.Error()
+ W.error_ctx = e.ChildText(".article-unitprice-default")
+ WarnOffer(W, "Whisky.de: Converting base price failed")
+ return
}
e.Request.Visit(W.Url)
diff --git a/crawler/shop_whiskysitenl.go b/crawler/shop_whiskysitenl.go
index a555123..43345b2 100644
--- a/crawler/shop_whiskysitenl.go
+++ b/crawler/shop_whiskysitenl.go
@@ -8,7 +8,7 @@ import (
"github.com/gocolly/colly"
)
-func ScrapeWhiskysitenl(shop Shop) []Angebot {
+func (app *App) ScrapeWhiskysitenl(shop Shop) []Angebot {
Whiskys := []Angebot{}
c := colly.NewCollector(
diff --git a/crawler/shop_whiskyworld.go b/crawler/shop_whiskyworld.go
index 5235d3c..def22c6 100644
--- a/crawler/shop_whiskyworld.go
+++ b/crawler/shop_whiskyworld.go
@@ -6,7 +6,7 @@ import (
"github.com/gocolly/colly"
)
-func ScrapeWhiskyworld(shop Shop) []Angebot {
+func (app *App) ScrapeWhiskyworld(shop Shop) []Angebot {
Shop_urls := []string{"https://www.whiskyworld.de/themen/sonderangebote?ft=%257B%2522produkt_kategorie%2522:%2522Blended%2BMalt%2522%257D",
"https://www.whiskyworld.de/themen/sonderangebote?ft=%257B%2522produkt_kategorie%2522:%2522Blended%2BWhiskies%2522%257D",
@@ -42,13 +42,17 @@ func ScrapeWhiskyworld(shop Shop) []Angebot {
W.Original_price, err = convert_price(regular_price)
if err != nil {
- Fatal(err, "Whiskyworld: Converting original price failed")
+ W.error_msg = err.Error()
+ W.error_ctx = regular_price
+ WarnOffer(W, "Whiskyworld: Converting original price failed")
return
}
W.Discounted_price, err = convert_price(e.ChildText(".uvp"))
if err != nil {
- Fatal(err, "Whiskyworld: Converting discounted price failed")
+ W.error_msg = err.Error()
+ W.error_ctx = e.ChildText(".uvp")
+ WarnOffer(W, "Whiskyworld: Converting discounted price failed")
return
}
@@ -56,13 +60,31 @@ func ScrapeWhiskyworld(shop Shop) []Angebot {
text_noisy := e.ChildText(".item-inh")
W.Volume, err = extract_volume(text_noisy)
if err != nil {
- Fatal(err, "Whiskyworld: Extracting volume failed")
+ W.error_msg = err.Error()
+ W.error_ctx = text_noisy
+ WarnOffer(W, "Whiskyworld: Extracting volume failed")
+ return
+ }
+ if W.Volume == 0 {
+ W.error_msg = "Whiskyworld: Volume is zero"
+ W.error_ctx = text_noisy
+ WarnOffer(W, "Whiskyworld: Volume is zero")
+ return
}
abv_noisy := strings.TrimSpace(strings.SplitAfter(text_noisy, "Liter")[1])
abv_noisy = strings.TrimPrefix(abv_noisy, "/")
W.Abv, err = extract_abv(abv_noisy)
if err != nil {
- Fatal(err, "Whiskyworld: Extracting abv failed")
+ W.error_msg = err.Error()
+ W.error_ctx = abv_noisy
+ WarnOffer(W, "Whiskyworld: Extracting abv failed")
+ return
+ }
+ if W.Abv == 0 {
+ W.error_msg = "Whiskyworld: Abv is zero"
+ W.error_ctx = abv_noisy
+ WarnOffer(W, "Whiskyworld: Abv is zero")
+ return
}
})
@@ -72,7 +94,10 @@ func ScrapeWhiskyworld(shop Shop) []Angebot {
base_price_noisy = strings.TrimSpace(strings.SplitAfter(base_price_noisy, "Liter")[0])
W.Base_price, err = sanitize_base_price(base_price_noisy)
if err != nil {
- Fatal(err, "Whiskyworld: Sanitizing base price failed")
+ W.error_msg = err.Error()
+ W.error_ctx = base_price_noisy
+ WarnOffer(W, "Whiskyworld: Sanitizing base price failed")
+ return
}
}
@@ -83,15 +108,6 @@ func ScrapeWhiskyworld(shop Shop) []Angebot {
W.Shop = shop.Id
W.Spirit_type = "Whisky"
- if W.Volume == 0 {
- DebugOffer(W, "Whiskyworld: Volume is zero")
- return
- }
- if W.Abv == 0 {
- DebugOffer(W, "Whiskyworld: Abv is zero")
- return
- }
-
e.Request.Visit(W.Url)
W.Website = e.Request.Ctx.Get("website")
diff --git a/crawler/shop_whiskyzone.go b/crawler/shop_whiskyzone.go
index 8d86b8a..f01e93f 100644
--- a/crawler/shop_whiskyzone.go
+++ b/crawler/shop_whiskyzone.go
@@ -6,7 +6,7 @@ import (
"github.com/gocolly/colly"
)
-func ScrapeWhiskyzone(shop Shop) []Angebot {
+func (app *App) ScrapeWhiskyzone(shop Shop) []Angebot {
Shop_url := "https://www.whiskyzone.de/widgets/emotion/index/emotionId/248/controllerName/listing"
@@ -45,23 +45,33 @@ func ScrapeWhiskyzone(shop Shop) []Angebot {
var err error
W.Discounted_price, err = convert_price(e.Request.Ctx.Get("discounted_price"))
if err != nil {
- Fatal(err, "Whiskyzone: Convert discounted price failed")
+ W.error_msg = err.Error()
+ W.error_ctx = e.Request.Ctx.Get("discounted_price")
+ WarnOffer(W, "Whiskyzone: Convert discounted price failed")
+ return
}
W.Original_price, err = convert_price(e.Request.Ctx.Get("original_price"))
if err != nil {
- Fatal(err, "Whiskyzone: Convert original price failed")
+ W.error_msg = err.Error()
+ W.error_ctx = e.Request.Ctx.Get("original_price")
+ WarnOffer(W, "Whiskyzone: Convert original price failed")
+ return
}
- W.Volume = get_volume(e)
- W.Abv = get_abv(e)
-
+ var ctx string
+ W.Volume, ctx = get_volume(e)
if W.Volume == 0 {
- DebugOffer(W, "Whiskyzone: Volume is zero")
+ W.error_msg = "Whiskyzone: Volume is zero"
+ W.error_ctx = ctx
+ WarnOffer(W, "Whiskyzone: Volume is zero")
return
}
+ W.Abv, ctx = get_abv(e)
if W.Abv == 0 {
- DebugOffer(W, "Whiskyzone: Abv is zero")
+ W.error_msg = "Whiskyzone: Abv is zero"
+ W.error_ctx = ctx
+ WarnOffer(W, "Whiskyzone: Abv is zero")
return
}
@@ -69,7 +79,13 @@ func ScrapeWhiskyzone(shop Shop) []Angebot {
if base_price == "same_as_discounted_price" {
W.Base_price = W.Discounted_price
} else {
- W.Base_price = get_base_price(e)
+ W.Base_price, err = get_base_price(e)
+ if err != nil {
+ W.error_msg = err.Error()
+ W.error_ctx = base_price
+ WarnOffer(W, "Whiskyzone: Extracting base price failed")
+ return
+ }
}
W.Website = e.Request.Ctx.Get("website")
diff --git a/crawler/shops.go b/crawler/shops.go
index 8cfd9f4..1babc9d 100644
--- a/crawler/shops.go
+++ b/crawler/shops.go
@@ -111,9 +111,7 @@ func (app *App) getShops() ([]Shop, error) {
if err != nil {
return []Shop{}, err
}
- if app.Config.Debug {
- log.Println("Crawling: " + shop.Name)
- }
+ log.Debug("Crawling: " + shop.Name)
Shops = append(Shops, shop)
}
diff --git a/crawler/utility.go b/crawler/utility.go
index 3c587b9..29f14d6 100644
--- a/crawler/utility.go
+++ b/crawler/utility.go
@@ -1,7 +1,7 @@
package main
import (
- log "github.com/Sirupsen/logrus"
+ "errors"
"regexp"
"strconv"
"strings"
@@ -131,7 +131,7 @@ func extract_abv(abv_noisy string) (float32, error) {
/*
* In litre, but float.
*/
-func get_volume(e *colly.HTMLElement) float32 {
+func get_volume(e *colly.HTMLElement) (float32, string) {
volume_noisy := e.Request.Ctx.Get("volume")
@@ -140,8 +140,7 @@ func get_volume(e *colly.HTMLElement) float32 {
Fatal(err, "Get volume regex failed")
}
if !matched {
- log.Debug("get_volume: not matched: " + volume_noisy)
- return 0
+ return 0, volume_noisy
}
volume, err := extract_volume(volume_noisy)
@@ -149,18 +148,18 @@ func get_volume(e *colly.HTMLElement) float32 {
Fatal(err, "Get Volume: Extract Volume failed: "+volume_noisy)
}
- return volume
+ return volume, ""
}
/*
* In procent. (float)
*/
-func get_abv(e *colly.HTMLElement) float32 {
+func get_abv(e *colly.HTMLElement) (float32, string) {
abv_noisy := e.Request.Ctx.Get("abv")
if abv_noisy == "" {
- return 0
+ return 0, abv_noisy
}
// abv_noisy = strings.Replace(abv_noisy, ".", ",", 1)
@@ -169,24 +168,24 @@ func get_abv(e *colly.HTMLElement) float32 {
Fatal(err, "Get ABV: Extract ABV failed: "+abv_noisy)
}
- return abv
+ return abv, ""
}
/*
* In cents. (int)
*/
-func get_base_price(e *colly.HTMLElement) int {
+func get_base_price(e *colly.HTMLElement) (int, error) {
base_price_noisy := e.Request.Ctx.Get("base_price")
if base_price_noisy == "" {
- return 0
+ return 0, errors.New("Base price empty")
}
base_price, err := sanitize_base_price(base_price_noisy)
if err != nil {
- Fatal(err, "Get base price: sanitize base price failed")
+ return 0, err
}
- return base_price
+ return base_price, nil
}