diff options
| author | horus_arch | 2018-02-17 13:51:35 +0100 |
|---|---|---|
| committer | horus_arch | 2018-02-17 13:51:35 +0100 |
| commit | bcdea2f8e95f5305625a773223829478c8c13bed (patch) | |
| tree | efac40b03131b4f9e43de848920695ee785a0a3f /crawler | |
| parent | 9ebf51364773dae6db4c0c47d77710c9f1a37b51 (diff) | |
| download | alkobote-bcdea2f8e95f5305625a773223829478c8c13bed.tar.gz | |
Introduces context on errors. (crawler)
Diffstat (limited to 'crawler')
| -rw-r--r-- | crawler/database.go | 4 | ||||
| -rw-r--r-- | crawler/log.go | 31 | ||||
| -rw-r--r-- | crawler/main.go | 3 | ||||
| -rw-r--r-- | crawler/post_process.go | 9 | ||||
| -rw-r--r-- | crawler/scrape.go | 24 | ||||
| -rw-r--r-- | crawler/shop_bottleworld.go | 30 | ||||
| -rw-r--r-- | crawler/shop_mcwhisky.go | 30 | ||||
| -rw-r--r-- | crawler/shop_rumundco.go | 50 | ||||
| -rw-r--r-- | crawler/shop_whic.go | 35 | ||||
| -rw-r--r-- | crawler/shop_whiskyde.go | 38 | ||||
| -rw-r--r-- | crawler/shop_whiskysitenl.go | 2 | ||||
| -rw-r--r-- | crawler/shop_whiskyworld.go | 46 | ||||
| -rw-r--r-- | crawler/shop_whiskyzone.go | 34 | ||||
| -rw-r--r-- | crawler/shops.go | 4 | ||||
| -rw-r--r-- | crawler/utility.go | 23 |
15 files changed, 252 insertions, 111 deletions
diff --git a/crawler/database.go b/crawler/database.go index 2608c59..557f491 100644 --- a/crawler/database.go +++ b/crawler/database.go @@ -109,7 +109,9 @@ func (app *App) save_offer(W []Angebot) error { _, err = stmt.Exec(o.Shop, o.Name, o.Url, o.Abv, o.Volume, o.Original_price, o.Discounted_price, o.Base_price, o.Valid_until, o.Image_url, o.Website, o.Spirit_type, app.Now) } if err != nil { - Debug(err, "Save Offer: Inserting offer failed") + o.error_msg = err.Error() + o.error_ctx = "Save Offer: Inserting offer failed" + DebugOffer(o, "Save Offer: Inserting offer failed") return err } diff --git a/crawler/log.go b/crawler/log.go index 7d8e707..0cd681d 100644 --- a/crawler/log.go +++ b/crawler/log.go @@ -24,11 +24,18 @@ func init() { } func Fatal(err error, msg string) { - log.WithFields( - log.Fields{ - "error": err.Error(), - }, - ).Fatal(msg) + if err != nil { + log.WithFields( + log.Fields{ + "error": err.Error(), + }, + ).Fatal(msg) + } else { + log.Fatal(msg) + } +} +func FatalOffer(offer Angebot, msg string) { + log.WithFields(getFields(offer)).Fatal(msg) } func Println(err error, msg string) { @@ -83,5 +90,19 @@ func getFields(offer Angebot) log.Fields { "Image_url": offer.Image_url, "Spirit Type": offer.Spirit_type, "Valid Until": offer.Valid_until, + "Error_msg": offer.error_msg, + "Error_ctx": offer.error_ctx, + } +} + +func Warn(err error, msg string) { + if err != nil { + log.WithFields( + log.Fields{ + "error": err.Error(), + }, + ).Warn(msg) + } else { + log.Warn(msg) } } diff --git a/crawler/main.go b/crawler/main.go index 8727443..ece25e4 100644 --- a/crawler/main.go +++ b/crawler/main.go @@ -37,6 +37,9 @@ type Angebot struct { Spirit_type string Website string Valid_until int + + error_msg string + error_ctx string } type Shop struct { diff --git a/crawler/post_process.go b/crawler/post_process.go index dfd7861..4688cc8 100644 --- a/crawler/post_process.go +++ b/crawler/post_process.go @@ -37,9 +37,8 @@ func (app *App) short_url() error { v.Add("url", offer_db.Url) polr_url := app.Config.Polr_URL + "?" + v.Encode() - if app.Config.Debug { - log.Debug("polr_url: " + polr_url + " ( " + offer_db.Url + " )") - } + log.Debug("polr_url: " + polr_url + " ( " + offer_db.Url + " )") + resp, err := http.Get(polr_url) if err != nil { return err @@ -52,9 +51,7 @@ func (app *App) short_url() error { } offer_db.Short_url = string(short_url) - if app.Config.Debug { - log.Debug("short_url: " + string(short_url) + " ( " + offer_db.Url + " )") - } + log.Debug("short_url: " + string(short_url) + " ( " + offer_db.Url + " )") Angebote = append(Angebote, offer_db) } diff --git a/crawler/scrape.go b/crawler/scrape.go index f6ad80b..ae63e5c 100644 --- a/crawler/scrape.go +++ b/crawler/scrape.go @@ -28,41 +28,41 @@ func (app *App) Scrape(shop Shop, wait chan bool) { var W []Angebot var err error - W = ScrapeShop(shop) + W = app.ScrapeShop(shop) W = sanitize_offer(W, shop) err = app.save_offer(W) if err != nil { - Fatal(err, "Saving offers failed") + Warn(err, "Saving offers failed") } err = app.remove_expired(W, shop) if err != nil { - Fatal(err, "Removing expired offers failed") + Warn(err, "Removing expired offers failed") } wait <- true } -func ScrapeShop(shop Shop) []Angebot { +func (app *App) ScrapeShop(shop Shop) []Angebot { switch shop.Name { case "Bottleworld": - return ScrapeBottleWord(shop) + return app.ScrapeBottleWord(shop) case "MC Whisky": - return ScrapeMCWhisky(shop) + return app.ScrapeMCWhisky(shop) case "Rum & Co": - return ScrapeRumundCo(shop) + return app.ScrapeRumundCo(shop) case "Whic": - return ScrapeWhic(shop) + return app.ScrapeWhic(shop) case "Whisky.de": - return ScrapeWhiskyde(shop) + return app.ScrapeWhiskyde(shop) //case "Whiskysite.nl": - // return ScrapeWhiskysitenl(shop) + // return app.ScrapeWhiskysitenl(shop) case "Whisky World": - return ScrapeWhiskyworld(shop) + return app.ScrapeWhiskyworld(shop) case "Whiskyzone": - return ScrapeWhiskyzone(shop) + return app.ScrapeWhiskyzone(shop) default: log.Println(shop.Name + ": No Crawler") } diff --git a/crawler/shop_bottleworld.go b/crawler/shop_bottleworld.go index db45791..720f2c8 100644 --- a/crawler/shop_bottleworld.go +++ b/crawler/shop_bottleworld.go @@ -8,7 +8,7 @@ import ( "github.com/gocolly/colly" ) -func ScrapeBottleWord(shop Shop) []Angebot { +func (app *App) ScrapeBottleWord(shop Shop) []Angebot { Shop_url := "https://www.bottleworld.de/aktuelle-sonderpreise/show/all" Whiskys := []Angebot{} @@ -32,13 +32,19 @@ func ScrapeBottleWord(shop Shop) []Angebot { e.ForEach(".old-price", func(i int, e *colly.HTMLElement) { W.Original_price, err = convert_price(e.ChildText(".price")) if err != nil { - Fatal(err, "Bottleworld: Converting original price failed") + W.error_msg = err.Error() + W.error_ctx = e.ChildText(".price") + WarnOffer(W, "Bottleworld: Converting original price failed") + return } }) e.ForEach(".special-price", func(i int, e *colly.HTMLElement) { W.Discounted_price, err = convert_price(e.ChildText(".price")) if err != nil { - Fatal(err, "Bottleworld: Converting discounted price failed") + W.error_msg = err.Error() + W.error_ctx = e.ChildText(".price") + WarnOffer(W, "Bottleworld: Converting discounted price failed") + return } }) }) @@ -46,7 +52,10 @@ func ScrapeBottleWord(shop Shop) []Angebot { price_per_litre_noisy := e.ChildText(".price-per-liter") price_per_litre, err := sanitize_base_price(price_per_litre_noisy) if err != nil { - Fatal(err, "Bottleworld: Sanitizing base price failed") + W.error_msg = err.Error() + W.error_ctx = price_per_litre_noisy + WarnOffer(W, "Bottleworld: Sanitizing base price failed") + return } W.Base_price = price_per_litre @@ -56,14 +65,19 @@ func ScrapeBottleWord(shop Shop) []Angebot { W.Shop = shop.Id - W.Volume = get_volume(e) + var ctx string + W.Volume, ctx = get_volume(e) if W.Volume == 0 { - DebugOffer(W, "Bottleworld: Volume is zero") + W.error_msg = "Bottleworld: Volume is zero" + W.error_ctx = ctx + WarnOffer(W, "Bottleworld: Volume is zero") return } - W.Abv = get_abv(e) + W.Abv, ctx = get_abv(e) if W.Abv == 0 { - DebugOffer(W, "Bottleworld: Abv is zero") + W.error_msg = "Bottleworld: Abv is zero" + W.error_ctx = ctx + WarnOffer(W, "Bottleworld: Abv is zero") return } diff --git a/crawler/shop_mcwhisky.go b/crawler/shop_mcwhisky.go index b423c72..c015d26 100644 --- a/crawler/shop_mcwhisky.go +++ b/crawler/shop_mcwhisky.go @@ -6,7 +6,7 @@ import ( "github.com/gocolly/colly" ) -func ScrapeMCWhisky(shop Shop) []Angebot { +func (app *App) ScrapeMCWhisky(shop Shop) []Angebot { Shop_url := "https://www.mcwhisky.com/whisky/whisky-sonderangebote.html" Whiskys := []Angebot{} @@ -35,13 +35,19 @@ func ScrapeMCWhisky(shop Shop) []Angebot { e.ForEach(".old-price", func(i int, e *colly.HTMLElement) { W.Original_price, err = convert_price(e.ChildText(".price")) if err != nil { - Fatal(err, "MC Whisky: Converting original price failed") + W.error_msg = err.Error() + W.error_ctx = e.ChildText(".price") + WarnOffer(W, "MC Whisky: Converting original price failed") + return } }) e.ForEach(".special-price", func(i int, e *colly.HTMLElement) { W.Discounted_price, err = convert_price(e.ChildText(".price")) if err != nil { - Fatal(err, "MC Whisky: Converting discounted price failed") + W.error_msg = err.Error() + W.error_ctx = e.ChildText(".price") + WarnOffer(W, "MC Whisky: Converting discounted price failed") + return } }) }) @@ -49,21 +55,29 @@ func ScrapeMCWhisky(shop Shop) []Angebot { price_per_litre_noisy := e.ChildText(".price-box-extended-info-ppl") W.Base_price, err = sanitize_base_price(price_per_litre_noisy) if err != nil { - Fatal(err, "MC Whisky: Sanitizing base price failed") + W.error_msg = err.Error() + W.error_ctx = price_per_litre_noisy + WarnOffer(W, "MC Whisky: Sanitizing base price failed") + return } W.Image_url = e.ChildAttr("img", "src") e.Request.Visit(W.Url) - W.Volume = get_volume(e) + var ctx string + W.Volume, ctx = get_volume(e) if W.Abv == 0 { - DebugOffer(W, "MC Whisky: Volume is zero") + W.error_msg = "MC Whisky: Volume is zero" + W.error_ctx = ctx + WarnOffer(W, "MC Whisky: Volume is zero") return } - W.Abv = get_abv(e) + W.Abv, ctx = get_abv(e) if W.Abv == 0 { - DebugOffer(W, "MC Whisky: Abv is zero") + W.error_msg = "MC Whisky: Abv is zero" + W.error_ctx = ctx + WarnOffer(W, "MC Whisky: Abv is zero") return } diff --git a/crawler/shop_rumundco.go b/crawler/shop_rumundco.go index 25b89bd..58de518 100644 --- a/crawler/shop_rumundco.go +++ b/crawler/shop_rumundco.go @@ -8,7 +8,7 @@ import ( "github.com/gocolly/colly" ) -func ScrapeRumundCo(shop Shop) []Angebot { +func (app *App) ScrapeRumundCo(shop Shop) []Angebot { Shop_url := "https://www.rumundco.de/navi.php?q=4&kf=29&kk-suesse-von=0&kk-suesse-bis=100&kk-milde-von=0&kk-milde-bis=100&kk-wuerze-von=0&kk-wuerze-bis=100&kk-frucht-von=0&kk-frucht-bis=100&kk-torf-von=0&kk-torf-bis=100&hf=0&af=90&Sortierung=11&a=350" @@ -35,6 +35,9 @@ func ScrapeRumundCo(shop Shop) []Angebot { Fatal(err, "Rum & Co: Verfügbar regex failed") } if !matched { + W.error_msg = "Rum & Co: Offer not available" + W.error_ctx = e.ChildText(".delivery-status") + WarnOffer(W, "Rum & Co: Offer not available") return } @@ -50,23 +53,34 @@ func ScrapeRumundCo(shop Shop) []Angebot { e.ForEach(".price_wrapper", func(i int, e *colly.HTMLElement) { regular_price := e.ChildText("del.value") if "" == regular_price { - PrintlnOffer(W, "Rum & Co: No regular price found") + W.error_msg = "Rum & Co: No regular price found" + W.error_ctx = regular_price + WarnOffer(W, "Rum & Co: No regular price found") return } W.Original_price, err = convert_price(regular_price) if err != nil { - Fatal(err, "Rum & Co: Original price: Convert price failed") + W.error_msg = err.Error() + W.error_ctx = regular_price + WarnOffer(W, "Rum & Co: Original price: Convert price failed") + return } W.Discounted_price, err = convert_price(e.ChildText(".price-value")) if err != nil { - Fatal(err, "Rum & Co: Discounted price: Convert price failed") + W.error_msg = err.Error() + W.error_ctx = e.ChildText(".price-value") + WarnOffer(W, "Rum & Co: Discounted price: Convert price failed") + return } e.ForEach(".base_price", func(i int, e *colly.HTMLElement) { price_per_litre_noisy := e.ChildText(".value") W.Base_price, err = sanitize_base_price(price_per_litre_noisy) if err != nil { - Fatal(err, "Rum & Co: Base price: Sanitizing base price failed") + W.error_msg = err.Error() + W.error_ctx = e.ChildText(".value") + WarnOffer(W, "Rum & Co: Base price: Sanitizing base price failed") + return } }) @@ -82,7 +96,9 @@ func ScrapeRumundCo(shop Shop) []Angebot { } image_url_noisy_slice := r_pagespeed.FindStringSubmatch(image_url_noisy) if len(image_url_noisy_slice) < 2 { - PrintlnOffer(W, "Rum & Co: (Pagespeed) Image URL not found") + W.error_msg = "Rum & Co: (Pagespeed) Image URL not found" + W.error_ctx = image_url_noisy + WarnOffer(W, "Rum & Co: (Pagespeed) Image URL not found") return } image_url_noisy = strings.Replace(image_url_noisy, image_url_noisy_slice[1], "", 1) @@ -92,23 +108,32 @@ func ScrapeRumundCo(shop Shop) []Angebot { e.Request.Visit(W.Url) - W.Volume = get_volume(e) + var ctx string + W.Volume, ctx = get_volume(e) if W.Volume == 0 { - DebugOffer(W, "Rum & Co: Volume is zero") + W.error_msg = "Rum & Co: Volume is zero" + W.error_ctx = ctx + WarnOffer(W, "Rum & Co: Volume is zero") return } if "" == abv_noisy { - W.Abv = get_abv(e) + W.Abv, ctx = get_abv(e) + abv_noisy = ctx } else { W.Abv, err = extract_abv(abv_noisy) if err != nil { - Fatal(err, "Rum & Co: Base price: Extracting ABV failed") + W.error_msg = err.Error() + W.error_ctx = abv_noisy + WarnOffer(W, "Rum & Co: Base price: Extracting ABV failed") + return } } if W.Abv == 0 { - DebugOffer(W, "Rum & Co: Abv is zero") + W.error_msg = "Rum & Co: Abv is zero" + W.error_ctx = abv_noisy + WarnOffer(W, "Rum & Co: Abv is zero") return } @@ -124,10 +149,7 @@ func ScrapeRumundCo(shop Shop) []Angebot { e.ForEach("tr", func(i int, e *colly.HTMLElement) { text_noisy := e.ChildText("th") - //log.Println("Visiting (" + e.Request.URL.String() + "). Found: " + text_noisy + " END") - if strings.Contains(text_noisy, "Genauer Inhalt:") { - //log.Println("Visiting (" + e.Request.URL.String() + "). Found (V): " + e.ChildText("td") + " END") e.Request.Ctx.Put("volume", e.ChildText("td")) } else if strings.Contains(text_noisy, "Alkoholgehalt in %:") { e.Request.Ctx.Put("abv", e.ChildText("a")) diff --git a/crawler/shop_whic.go b/crawler/shop_whic.go index e082ad1..0e7cdf9 100644 --- a/crawler/shop_whic.go +++ b/crawler/shop_whic.go @@ -8,7 +8,7 @@ import ( "github.com/gocolly/colly" ) -func ScrapeWhic(shop Shop) []Angebot { +func (app *App) ScrapeWhic(shop Shop) []Angebot { Shop_url := "https://whic.de/angebote" Whiskys := []Angebot{} @@ -36,13 +36,19 @@ func ScrapeWhic(shop Shop) []Angebot { e.ForEach(".old-price", func(i int, e *colly.HTMLElement) { W.Original_price, err = convert_price(e.ChildText(".price")) if err != nil { - Fatal(err, "Whic: Converting original price failed") + W.error_msg = err.Error() + W.error_ctx = e.ChildText(".price") + WarnOffer(W, "Whic: Converting original price failed") + return } }) e.ForEach(".special-price", func(i int, e *colly.HTMLElement) { W.Discounted_price, err = convert_price(e.ChildText(".price")) if err != nil { - Fatal(err, "Whic: Converting discounted price failed") + W.error_msg = err.Error() + W.error_ctx = e.ChildText(".price") + WarnOffer(W, "Whic: Converting discounted price failed") + return } }) }) @@ -50,7 +56,10 @@ func ScrapeWhic(shop Shop) []Angebot { base_price_noisy := e.ChildText(".base-price") W.Base_price, err = sanitize_base_price(base_price_noisy) if err != nil { - Fatal(err, "Whic: Sanitizing base price failed") + W.error_msg = err.Error() + W.error_ctx = base_price_noisy + WarnOffer(W, "Whic: Sanitizing base price failed") + return } /* @@ -60,19 +69,27 @@ func ScrapeWhic(shop Shop) []Angebot { doc, err := goquery.NewDocumentFromReader(strings.NewReader(img_link_noisy)) if err != nil { - Fatal(err, "Whic: Parsing document in Goquery failed") + W.error_msg = err.Error() + W.error_ctx = img_link_noisy + FatalOffer(W, "Whic: Parsing document in Goquery failed") } W.Image_url, _ = doc.Find("img").Attr("src") e.Request.Visit(W.Url) - W.Volume = get_volume(e) + + var ctx string + W.Volume, ctx = get_volume(e) if W.Volume == 0 { - DebugOffer(W, "Whic: Volume is zero") + W.error_msg = "Whic: Volume is zero" + W.error_ctx = ctx + WarnOffer(W, "Whic: Volume is zero") return } - W.Abv = get_abv(e) + W.Abv, ctx = get_abv(e) if W.Abv == 0 { - DebugOffer(W, "Whic: Abv is zero") + W.error_msg = "Whic: Abv is zero" + W.error_ctx = ctx + WarnOffer(W, "Whic: Abv is zero") return } diff --git a/crawler/shop_whiskyde.go b/crawler/shop_whiskyde.go index f13190b..ffbbe08 100644 --- a/crawler/shop_whiskyde.go +++ b/crawler/shop_whiskyde.go @@ -6,7 +6,7 @@ import ( "github.com/gocolly/colly" ) -func ScrapeWhiskyde(shop Shop) []Angebot { +func (app *App) ScrapeWhiskyde(shop Shop) []Angebot { Shop_url := "https://www.whisky.de/shop/Aktuell/Sonderangebote/" Whiskys := []Angebot{} @@ -33,13 +33,19 @@ func ScrapeWhiskyde(shop Shop) []Angebot { e.ForEach(".article-price-original", func(i int, e *colly.HTMLElement) { W.Original_price, err = convert_price(e.ChildText("del")) if err != nil { - Fatal(err, "Whisky.de: Converting original price failed") + W.error_msg = err.Error() + W.error_ctx = e.ChildText("del") + WarnOffer(W, "Whisky.de: Converting original price failed") + return } }) e.ForEach(".article-price", func(i int, e *colly.HTMLElement) { W.Discounted_price, err = convert_price(e.ChildText(".article-price-default")) if err != nil { - Fatal(err, "Whisky.de: Converting discounted price failed") + W.error_msg = err.Error() + W.error_ctx = e.ChildText(".article-price-default") + WarnOffer(W, "Whisky.de: Converting discounted price failed") + return } }) @@ -54,25 +60,38 @@ func ScrapeWhiskyde(shop Shop) []Angebot { text_noisy := e.ChildText(".article-amount") if !strings.Contains(text_noisy, "Liter") { + W.error_ctx = text_noisy + W.error_msg = "Whisky.de: String 'Liter' not found." + WarnOffer(W, "Whisky.de: String 'Liter' not found.") return } abv_noisy := strings.TrimSpace(strings.SplitAfter(text_noisy, "Liter")[1]) W.Volume, err = extract_volume(text_noisy) if err != nil { - Fatal(err, "Whisky.de: Extracting volume failed") + W.error_msg = err.Error() + W.error_ctx = text_noisy + WarnOffer(W, "Whisky.de: Extracting volume failed") + return } W.Abv, err = extract_abv(abv_noisy) if err != nil { - Fatal(err, "Whisky.de: Extracting abv failed") + W.error_msg = err.Error() + W.error_ctx = abv_noisy + WarnOffer(W, "Whisky.de: Extracting abv failed") + return } if W.Volume == 0 { - DebugOffer(W, "Whisky.de: Volume is zero") + W.error_msg = "Whisky.de: Volume is zero" + W.error_ctx = text_noisy + WarnOffer(W, "Whisky.de: Volume is zero") return } if W.Abv == 0 { - DebugOffer(W, "Whisky.de: Abv is zero") + W.error_msg = "Whisky.de: Abv is zero" + W.error_ctx = abv_noisy + WarnOffer(W, "Whisky.de: Abv is zero") return } @@ -81,7 +100,10 @@ func ScrapeWhiskyde(shop Shop) []Angebot { W.Base_price, err = convert_price(e.ChildText(".article-unitprice-default")) if err != nil { - Fatal(err, "Whisky.de: Converting base price failed") + W.error_msg = err.Error() + W.error_ctx = e.ChildText(".article-unitprice-default") + WarnOffer(W, "Whisky.de: Converting base price failed") + return } e.Request.Visit(W.Url) diff --git a/crawler/shop_whiskysitenl.go b/crawler/shop_whiskysitenl.go index a555123..43345b2 100644 --- a/crawler/shop_whiskysitenl.go +++ b/crawler/shop_whiskysitenl.go @@ -8,7 +8,7 @@ import ( "github.com/gocolly/colly" ) -func ScrapeWhiskysitenl(shop Shop) []Angebot { +func (app *App) ScrapeWhiskysitenl(shop Shop) []Angebot { Whiskys := []Angebot{} c := colly.NewCollector( diff --git a/crawler/shop_whiskyworld.go b/crawler/shop_whiskyworld.go index 5235d3c..def22c6 100644 --- a/crawler/shop_whiskyworld.go +++ b/crawler/shop_whiskyworld.go @@ -6,7 +6,7 @@ import ( "github.com/gocolly/colly" ) -func ScrapeWhiskyworld(shop Shop) []Angebot { +func (app *App) ScrapeWhiskyworld(shop Shop) []Angebot { Shop_urls := []string{"https://www.whiskyworld.de/themen/sonderangebote?ft=%257B%2522produkt_kategorie%2522:%2522Blended%2BMalt%2522%257D", "https://www.whiskyworld.de/themen/sonderangebote?ft=%257B%2522produkt_kategorie%2522:%2522Blended%2BWhiskies%2522%257D", @@ -42,13 +42,17 @@ func ScrapeWhiskyworld(shop Shop) []Angebot { W.Original_price, err = convert_price(regular_price) if err != nil { - Fatal(err, "Whiskyworld: Converting original price failed") + W.error_msg = err.Error() + W.error_ctx = regular_price + WarnOffer(W, "Whiskyworld: Converting original price failed") return } W.Discounted_price, err = convert_price(e.ChildText(".uvp")) if err != nil { - Fatal(err, "Whiskyworld: Converting discounted price failed") + W.error_msg = err.Error() + W.error_ctx = e.ChildText(".uvp") + WarnOffer(W, "Whiskyworld: Converting discounted price failed") return } @@ -56,13 +60,31 @@ func ScrapeWhiskyworld(shop Shop) []Angebot { text_noisy := e.ChildText(".item-inh") W.Volume, err = extract_volume(text_noisy) if err != nil { - Fatal(err, "Whiskyworld: Extracting volume failed") + W.error_msg = err.Error() + W.error_ctx = text_noisy + WarnOffer(W, "Whiskyworld: Extracting volume failed") + return + } + if W.Volume == 0 { + W.error_msg = "Whiskyworld: Volume is zero" + W.error_ctx = text_noisy + WarnOffer(W, "Whiskyworld: Volume is zero") + return } abv_noisy := strings.TrimSpace(strings.SplitAfter(text_noisy, "Liter")[1]) abv_noisy = strings.TrimPrefix(abv_noisy, "/") W.Abv, err = extract_abv(abv_noisy) if err != nil { - Fatal(err, "Whiskyworld: Extracting abv failed") + W.error_msg = err.Error() + W.error_ctx = abv_noisy + WarnOffer(W, "Whiskyworld: Extracting abv failed") + return + } + if W.Abv == 0 { + W.error_msg = "Whiskyworld: Abv is zero" + W.error_ctx = abv_noisy + WarnOffer(W, "Whiskyworld: Abv is zero") + return } }) @@ -72,7 +94,10 @@ func ScrapeWhiskyworld(shop Shop) []Angebot { base_price_noisy = strings.TrimSpace(strings.SplitAfter(base_price_noisy, "Liter")[0]) W.Base_price, err = sanitize_base_price(base_price_noisy) if err != nil { - Fatal(err, "Whiskyworld: Sanitizing base price failed") + W.error_msg = err.Error() + W.error_ctx = base_price_noisy + WarnOffer(W, "Whiskyworld: Sanitizing base price failed") + return } } @@ -83,15 +108,6 @@ func ScrapeWhiskyworld(shop Shop) []Angebot { W.Shop = shop.Id W.Spirit_type = "Whisky" - if W.Volume == 0 { - DebugOffer(W, "Whiskyworld: Volume is zero") - return - } - if W.Abv == 0 { - DebugOffer(W, "Whiskyworld: Abv is zero") - return - } - e.Request.Visit(W.Url) W.Website = e.Request.Ctx.Get("website") diff --git a/crawler/shop_whiskyzone.go b/crawler/shop_whiskyzone.go index 8d86b8a..f01e93f 100644 --- a/crawler/shop_whiskyzone.go +++ b/crawler/shop_whiskyzone.go @@ -6,7 +6,7 @@ import ( "github.com/gocolly/colly" ) -func ScrapeWhiskyzone(shop Shop) []Angebot { +func (app *App) ScrapeWhiskyzone(shop Shop) []Angebot { Shop_url := "https://www.whiskyzone.de/widgets/emotion/index/emotionId/248/controllerName/listing" @@ -45,23 +45,33 @@ func ScrapeWhiskyzone(shop Shop) []Angebot { var err error W.Discounted_price, err = convert_price(e.Request.Ctx.Get("discounted_price")) if err != nil { - Fatal(err, "Whiskyzone: Convert discounted price failed") + W.error_msg = err.Error() + W.error_ctx = e.Request.Ctx.Get("discounted_price") + WarnOffer(W, "Whiskyzone: Convert discounted price failed") + return } W.Original_price, err = convert_price(e.Request.Ctx.Get("original_price")) if err != nil { - Fatal(err, "Whiskyzone: Convert original price failed") + W.error_msg = err.Error() + W.error_ctx = e.Request.Ctx.Get("original_price") + WarnOffer(W, "Whiskyzone: Convert original price failed") + return } - W.Volume = get_volume(e) - W.Abv = get_abv(e) - + var ctx string + W.Volume, ctx = get_volume(e) if W.Volume == 0 { - DebugOffer(W, "Whiskyzone: Volume is zero") + W.error_msg = "Whiskyzone: Volume is zero" + W.error_ctx = ctx + WarnOffer(W, "Whiskyzone: Volume is zero") return } + W.Abv, ctx = get_abv(e) if W.Abv == 0 { - DebugOffer(W, "Whiskyzone: Abv is zero") + W.error_msg = "Whiskyzone: Abv is zero" + W.error_ctx = ctx + WarnOffer(W, "Whiskyzone: Abv is zero") return } @@ -69,7 +79,13 @@ func ScrapeWhiskyzone(shop Shop) []Angebot { if base_price == "same_as_discounted_price" { W.Base_price = W.Discounted_price } else { - W.Base_price = get_base_price(e) + W.Base_price, err = get_base_price(e) + if err != nil { + W.error_msg = err.Error() + W.error_ctx = base_price + WarnOffer(W, "Whiskyzone: Extracting base price failed") + return + } } W.Website = e.Request.Ctx.Get("website") diff --git a/crawler/shops.go b/crawler/shops.go index 8cfd9f4..1babc9d 100644 --- a/crawler/shops.go +++ b/crawler/shops.go @@ -111,9 +111,7 @@ func (app *App) getShops() ([]Shop, error) { if err != nil { return []Shop{}, err } - if app.Config.Debug { - log.Println("Crawling: " + shop.Name) - } + log.Debug("Crawling: " + shop.Name) Shops = append(Shops, shop) } diff --git a/crawler/utility.go b/crawler/utility.go index 3c587b9..29f14d6 100644 --- a/crawler/utility.go +++ b/crawler/utility.go @@ -1,7 +1,7 @@ package main import ( - log "github.com/Sirupsen/logrus" + "errors" "regexp" "strconv" "strings" @@ -131,7 +131,7 @@ func extract_abv(abv_noisy string) (float32, error) { /* * In litre, but float. */ -func get_volume(e *colly.HTMLElement) float32 { +func get_volume(e *colly.HTMLElement) (float32, string) { volume_noisy := e.Request.Ctx.Get("volume") @@ -140,8 +140,7 @@ func get_volume(e *colly.HTMLElement) float32 { Fatal(err, "Get volume regex failed") } if !matched { - log.Debug("get_volume: not matched: " + volume_noisy) - return 0 + return 0, volume_noisy } volume, err := extract_volume(volume_noisy) @@ -149,18 +148,18 @@ func get_volume(e *colly.HTMLElement) float32 { Fatal(err, "Get Volume: Extract Volume failed: "+volume_noisy) } - return volume + return volume, "" } /* * In procent. (float) */ -func get_abv(e *colly.HTMLElement) float32 { +func get_abv(e *colly.HTMLElement) (float32, string) { abv_noisy := e.Request.Ctx.Get("abv") if abv_noisy == "" { - return 0 + return 0, abv_noisy } // abv_noisy = strings.Replace(abv_noisy, ".", ",", 1) @@ -169,24 +168,24 @@ func get_abv(e *colly.HTMLElement) float32 { Fatal(err, "Get ABV: Extract ABV failed: "+abv_noisy) } - return abv + return abv, "" } /* * In cents. (int) */ -func get_base_price(e *colly.HTMLElement) int { +func get_base_price(e *colly.HTMLElement) (int, error) { base_price_noisy := e.Request.Ctx.Get("base_price") if base_price_noisy == "" { - return 0 + return 0, errors.New("Base price empty") } base_price, err := sanitize_base_price(base_price_noisy) if err != nil { - Fatal(err, "Get base price: sanitize base price failed") + return 0, err } - return base_price + return base_price, nil } |
