diff options
| author | horus | 2018-02-16 18:06:50 +0100 |
|---|---|---|
| committer | horus | 2018-02-16 18:06:50 +0100 |
| commit | bf5f6b98a1d933d5f0ffb7fe965428f4dab5e3b0 (patch) | |
| tree | c95eb6426b61965b37da2b60da36cfe2c02a92b4 /crawler/sanitize.go | |
| parent | ed6ab4da59f80bf9fa2cbf15da5c9167dff44ea4 (diff) | |
| download | alkobote-bf5f6b98a1d933d5f0ffb7fe965428f4dab5e3b0.tar.gz | |
Structured logging part two. (crawler)
Diffstat (limited to 'crawler/sanitize.go')
| -rw-r--r-- | crawler/sanitize.go | 35 |
1 files changed, 24 insertions, 11 deletions
diff --git a/crawler/sanitize.go b/crawler/sanitize.go index 949e0f0..4f76c69 100644 --- a/crawler/sanitize.go +++ b/crawler/sanitize.go @@ -1,12 +1,13 @@ package main import ( - log "github.com/Sirupsen/logrus" "regexp" "strings" + + log "github.com/Sirupsen/logrus" ) -func sanitize_offer(angebote []Angebot) []Angebot { +func sanitize_offer(angebote []Angebot, shop Shop) []Angebot { var W []Angebot @@ -14,23 +15,31 @@ func sanitize_offer(angebote []Angebot) []Angebot { offer.Name = sanitize_name(offer.Name) if offer.Abv == 0 { - log.Println("sanitize.go: abv zero: " + offer.Name + "( " + offer.Url + ")") + DebugOffer(offer, "Sanitizer: Abv is zero") continue } if offer.Volume == 0 { - log.Println("sanitize.go: volume zero: " + offer.Name + "( " + offer.Url + ")") + DebugOffer(offer, "Sanitizer: Volume is zero") continue } if offer.Discounted_price == 0 { - log.Println("sanitize.go: discounted_price zero: " + offer.Name + "( " + offer.Url + ")") + DebugOffer(offer, "Sanitizer: Discounted price is zero") continue } if offer.Original_price == 0 { - log.Println("sanitize.go: original_price zero: " + offer.Name + "( " + offer.Url + ")") + DebugOffer(offer, "Sanitizer: Original price is zero") continue } if offer.Base_price == 0 { - log.Println("sanitize.go: base_price zero: " + offer.Name + "( " + offer.Url + ")") + DebugOffer(offer, "Sanitizer: Base price is zero") + continue + } + if offer.Url == "" { + DebugOffer(offer, "Sanitizer: URL is empty") + continue + } + if offer.Image_url == "" { + DebugOffer(offer, "Sanitizer: Image-URL is empty") continue } @@ -39,6 +48,10 @@ func sanitize_offer(angebote []Angebot) []Angebot { W = append(W, offer) } + if len(W) < 1 { + log.Warn("Sanitizer: No results for shop: " + shop.Name) + } + return W } @@ -57,7 +70,7 @@ func sanitize_name(name string) string { r_liter, err := regexp.Compile(`[0-9]+([,.][0-9]+)?( )?[lL](iter)?`) if err != nil { - log.Fatal(err) + Fatal(err, "sanitize_name: Liter-Regexp failed") } for { name_liter := r_liter.FindString(name) @@ -80,7 +93,7 @@ func sanitize_name(name string) string { r_procent, err := regexp.Compile(`[0-9]+([,.][0-9]+)?\%`) if err != nil { - log.Fatal(err) + Fatal(err, "sanitize_name: Procent-Regexp failed") } for { name_procent := r_procent.FindString(name) @@ -93,7 +106,7 @@ func sanitize_name(name string) string { r_release, err := regexp.Compile(`Release$`) if err != nil { - log.Fatal(err) + Fatal(err, "sanitize_name: Release-Regexp failed") } name_release := r_release.FindString(name) name = strings.Replace(name, name_release, "", 1) @@ -101,7 +114,7 @@ func sanitize_name(name string) string { r_2x, err := regexp.Compile(`[0-9]+( )*[xX]( )`) if err != nil { - log.Fatal(err) + Fatal(err, "sanitize_name: '2x'-Regexp failed") } for { name_2x := r_2x.FindString(name) |
