From bf5f6b98a1d933d5f0ffb7fe965428f4dab5e3b0 Mon Sep 17 00:00:00 2001 From: horus Date: Fri, 16 Feb 2018 18:06:50 +0100 Subject: Structured logging part two. (crawler) --- crawler/sanitize.go | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) (limited to 'crawler/sanitize.go') diff --git a/crawler/sanitize.go b/crawler/sanitize.go index 949e0f0..4f76c69 100644 --- a/crawler/sanitize.go +++ b/crawler/sanitize.go @@ -1,12 +1,13 @@ package main import ( - log "github.com/Sirupsen/logrus" "regexp" "strings" + + log "github.com/Sirupsen/logrus" ) -func sanitize_offer(angebote []Angebot) []Angebot { +func sanitize_offer(angebote []Angebot, shop Shop) []Angebot { var W []Angebot @@ -14,23 +15,31 @@ func sanitize_offer(angebote []Angebot) []Angebot { offer.Name = sanitize_name(offer.Name) if offer.Abv == 0 { - log.Println("sanitize.go: abv zero: " + offer.Name + "( " + offer.Url + ")") + DebugOffer(offer, "Sanitizer: Abv is zero") continue } if offer.Volume == 0 { - log.Println("sanitize.go: volume zero: " + offer.Name + "( " + offer.Url + ")") + DebugOffer(offer, "Sanitizer: Volume is zero") continue } if offer.Discounted_price == 0 { - log.Println("sanitize.go: discounted_price zero: " + offer.Name + "( " + offer.Url + ")") + DebugOffer(offer, "Sanitizer: Discounted price is zero") continue } if offer.Original_price == 0 { - log.Println("sanitize.go: original_price zero: " + offer.Name + "( " + offer.Url + ")") + DebugOffer(offer, "Sanitizer: Original price is zero") continue } if offer.Base_price == 0 { - log.Println("sanitize.go: base_price zero: " + offer.Name + "( " + offer.Url + ")") + DebugOffer(offer, "Sanitizer: Base price is zero") + continue + } + if offer.Url == "" { + DebugOffer(offer, "Sanitizer: URL is empty") + continue + } + if offer.Image_url == "" { + DebugOffer(offer, "Sanitizer: Image-URL is empty") continue } @@ -39,6 +48,10 @@ func sanitize_offer(angebote []Angebot) []Angebot { W = append(W, offer) } + if len(W) < 1 { + log.Warn("Sanitizer: No results for shop: " + shop.Name) + } + return W } @@ -57,7 +70,7 @@ func sanitize_name(name string) string { r_liter, err := regexp.Compile(`[0-9]+([,.][0-9]+)?( )?[lL](iter)?`) if err != nil { - log.Fatal(err) + Fatal(err, "sanitize_name: Liter-Regexp failed") } for { name_liter := r_liter.FindString(name) @@ -80,7 +93,7 @@ func sanitize_name(name string) string { r_procent, err := regexp.Compile(`[0-9]+([,.][0-9]+)?\%`) if err != nil { - log.Fatal(err) + Fatal(err, "sanitize_name: Procent-Regexp failed") } for { name_procent := r_procent.FindString(name) @@ -93,7 +106,7 @@ func sanitize_name(name string) string { r_release, err := regexp.Compile(`Release$`) if err != nil { - log.Fatal(err) + Fatal(err, "sanitize_name: Release-Regexp failed") } name_release := r_release.FindString(name) name = strings.Replace(name, name_release, "", 1) @@ -101,7 +114,7 @@ func sanitize_name(name string) string { r_2x, err := regexp.Compile(`[0-9]+( )*[xX]( )`) if err != nil { - log.Fatal(err) + Fatal(err, "sanitize_name: '2x'-Regexp failed") } for { name_2x := r_2x.FindString(name) -- cgit v1.2.3