From bcdea2f8e95f5305625a773223829478c8c13bed Mon Sep 17 00:00:00 2001 From: horus_arch Date: Sat, 17 Feb 2018 13:51:35 +0100 Subject: Introduces context on errors. (crawler) --- crawler/shop_rumundco.go | 50 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 14 deletions(-) (limited to 'crawler/shop_rumundco.go') diff --git a/crawler/shop_rumundco.go b/crawler/shop_rumundco.go index 25b89bd..58de518 100644 --- a/crawler/shop_rumundco.go +++ b/crawler/shop_rumundco.go @@ -8,7 +8,7 @@ import ( "github.com/gocolly/colly" ) -func ScrapeRumundCo(shop Shop) []Angebot { +func (app *App) ScrapeRumundCo(shop Shop) []Angebot { Shop_url := "https://www.rumundco.de/navi.php?q=4&kf=29&kk-suesse-von=0&kk-suesse-bis=100&kk-milde-von=0&kk-milde-bis=100&kk-wuerze-von=0&kk-wuerze-bis=100&kk-frucht-von=0&kk-frucht-bis=100&kk-torf-von=0&kk-torf-bis=100&hf=0&af=90&Sortierung=11&a=350" @@ -35,6 +35,9 @@ func ScrapeRumundCo(shop Shop) []Angebot { Fatal(err, "Rum & Co: Verfügbar regex failed") } if !matched { + W.error_msg = "Rum & Co: Offer not available" + W.error_ctx = e.ChildText(".delivery-status") + WarnOffer(W, "Rum & Co: Offer not available") return } @@ -50,23 +53,34 @@ func ScrapeRumundCo(shop Shop) []Angebot { e.ForEach(".price_wrapper", func(i int, e *colly.HTMLElement) { regular_price := e.ChildText("del.value") if "" == regular_price { - PrintlnOffer(W, "Rum & Co: No regular price found") + W.error_msg = "Rum & Co: No regular price found" + W.error_ctx = regular_price + WarnOffer(W, "Rum & Co: No regular price found") return } W.Original_price, err = convert_price(regular_price) if err != nil { - Fatal(err, "Rum & Co: Original price: Convert price failed") + W.error_msg = err.Error() + W.error_ctx = regular_price + WarnOffer(W, "Rum & Co: Original price: Convert price failed") + return } W.Discounted_price, err = convert_price(e.ChildText(".price-value")) if err != nil { - Fatal(err, "Rum & Co: Discounted price: Convert price failed") + W.error_msg = err.Error() + W.error_ctx = e.ChildText(".price-value") + WarnOffer(W, "Rum & Co: Discounted price: Convert price failed") + return } e.ForEach(".base_price", func(i int, e *colly.HTMLElement) { price_per_litre_noisy := e.ChildText(".value") W.Base_price, err = sanitize_base_price(price_per_litre_noisy) if err != nil { - Fatal(err, "Rum & Co: Base price: Sanitizing base price failed") + W.error_msg = err.Error() + W.error_ctx = e.ChildText(".value") + WarnOffer(W, "Rum & Co: Base price: Sanitizing base price failed") + return } }) @@ -82,7 +96,9 @@ func ScrapeRumundCo(shop Shop) []Angebot { } image_url_noisy_slice := r_pagespeed.FindStringSubmatch(image_url_noisy) if len(image_url_noisy_slice) < 2 { - PrintlnOffer(W, "Rum & Co: (Pagespeed) Image URL not found") + W.error_msg = "Rum & Co: (Pagespeed) Image URL not found" + W.error_ctx = image_url_noisy + WarnOffer(W, "Rum & Co: (Pagespeed) Image URL not found") return } image_url_noisy = strings.Replace(image_url_noisy, image_url_noisy_slice[1], "", 1) @@ -92,23 +108,32 @@ func ScrapeRumundCo(shop Shop) []Angebot { e.Request.Visit(W.Url) - W.Volume = get_volume(e) + var ctx string + W.Volume, ctx = get_volume(e) if W.Volume == 0 { - DebugOffer(W, "Rum & Co: Volume is zero") + W.error_msg = "Rum & Co: Volume is zero" + W.error_ctx = ctx + WarnOffer(W, "Rum & Co: Volume is zero") return } if "" == abv_noisy { - W.Abv = get_abv(e) + W.Abv, ctx = get_abv(e) + abv_noisy = ctx } else { W.Abv, err = extract_abv(abv_noisy) if err != nil { - Fatal(err, "Rum & Co: Base price: Extracting ABV failed") + W.error_msg = err.Error() + W.error_ctx = abv_noisy + WarnOffer(W, "Rum & Co: Base price: Extracting ABV failed") + return } } if W.Abv == 0 { - DebugOffer(W, "Rum & Co: Abv is zero") + W.error_msg = "Rum & Co: Abv is zero" + W.error_ctx = abv_noisy + WarnOffer(W, "Rum & Co: Abv is zero") return } @@ -124,10 +149,7 @@ func ScrapeRumundCo(shop Shop) []Angebot { e.ForEach("tr", func(i int, e *colly.HTMLElement) { text_noisy := e.ChildText("th") - //log.Println("Visiting (" + e.Request.URL.String() + "). Found: " + text_noisy + " END") - if strings.Contains(text_noisy, "Genauer Inhalt:") { - //log.Println("Visiting (" + e.Request.URL.String() + "). Found (V): " + e.ChildText("td") + " END") e.Request.Ctx.Put("volume", e.ChildText("td")) } else if strings.Contains(text_noisy, "Alkoholgehalt in %:") { e.Request.Ctx.Put("abv", e.ChildText("a")) -- cgit v1.2.3