summaryrefslogtreecommitdiff
path: root/crawler/sanitize.go
diff options
context:
space:
mode:
authorhorus2018-02-16 18:06:50 +0100
committerhorus2018-02-16 18:06:50 +0100
commitbf5f6b98a1d933d5f0ffb7fe965428f4dab5e3b0 (patch)
treec95eb6426b61965b37da2b60da36cfe2c02a92b4 /crawler/sanitize.go
parented6ab4da59f80bf9fa2cbf15da5c9167dff44ea4 (diff)
downloadalkobote-bf5f6b98a1d933d5f0ffb7fe965428f4dab5e3b0.tar.gz
Structured logging part two. (crawler)
Diffstat (limited to 'crawler/sanitize.go')
-rw-r--r--crawler/sanitize.go35
1 files changed, 24 insertions, 11 deletions
diff --git a/crawler/sanitize.go b/crawler/sanitize.go
index 949e0f0..4f76c69 100644
--- a/crawler/sanitize.go
+++ b/crawler/sanitize.go
@@ -1,12 +1,13 @@
package main
import (
- log "github.com/Sirupsen/logrus"
"regexp"
"strings"
+
+ log "github.com/Sirupsen/logrus"
)
-func sanitize_offer(angebote []Angebot) []Angebot {
+func sanitize_offer(angebote []Angebot, shop Shop) []Angebot {
var W []Angebot
@@ -14,23 +15,31 @@ func sanitize_offer(angebote []Angebot) []Angebot {
offer.Name = sanitize_name(offer.Name)
if offer.Abv == 0 {
- log.Println("sanitize.go: abv zero: " + offer.Name + "( " + offer.Url + ")")
+ DebugOffer(offer, "Sanitizer: Abv is zero")
continue
}
if offer.Volume == 0 {
- log.Println("sanitize.go: volume zero: " + offer.Name + "( " + offer.Url + ")")
+ DebugOffer(offer, "Sanitizer: Volume is zero")
continue
}
if offer.Discounted_price == 0 {
- log.Println("sanitize.go: discounted_price zero: " + offer.Name + "( " + offer.Url + ")")
+ DebugOffer(offer, "Sanitizer: Discounted price is zero")
continue
}
if offer.Original_price == 0 {
- log.Println("sanitize.go: original_price zero: " + offer.Name + "( " + offer.Url + ")")
+ DebugOffer(offer, "Sanitizer: Original price is zero")
continue
}
if offer.Base_price == 0 {
- log.Println("sanitize.go: base_price zero: " + offer.Name + "( " + offer.Url + ")")
+ DebugOffer(offer, "Sanitizer: Base price is zero")
+ continue
+ }
+ if offer.Url == "" {
+ DebugOffer(offer, "Sanitizer: URL is empty")
+ continue
+ }
+ if offer.Image_url == "" {
+ DebugOffer(offer, "Sanitizer: Image-URL is empty")
continue
}
@@ -39,6 +48,10 @@ func sanitize_offer(angebote []Angebot) []Angebot {
W = append(W, offer)
}
+ if len(W) < 1 {
+ log.Warn("Sanitizer: No results for shop: " + shop.Name)
+ }
+
return W
}
@@ -57,7 +70,7 @@ func sanitize_name(name string) string {
r_liter, err := regexp.Compile(`[0-9]+([,.][0-9]+)?( )?[lL](iter)?`)
if err != nil {
- log.Fatal(err)
+ Fatal(err, "sanitize_name: Liter-Regexp failed")
}
for {
name_liter := r_liter.FindString(name)
@@ -80,7 +93,7 @@ func sanitize_name(name string) string {
r_procent, err := regexp.Compile(`[0-9]+([,.][0-9]+)?\%`)
if err != nil {
- log.Fatal(err)
+ Fatal(err, "sanitize_name: Procent-Regexp failed")
}
for {
name_procent := r_procent.FindString(name)
@@ -93,7 +106,7 @@ func sanitize_name(name string) string {
r_release, err := regexp.Compile(`Release$`)
if err != nil {
- log.Fatal(err)
+ Fatal(err, "sanitize_name: Release-Regexp failed")
}
name_release := r_release.FindString(name)
name = strings.Replace(name, name_release, "", 1)
@@ -101,7 +114,7 @@ func sanitize_name(name string) string {
r_2x, err := regexp.Compile(`[0-9]+( )*[xX]( )`)
if err != nil {
- log.Fatal(err)
+ Fatal(err, "sanitize_name: '2x'-Regexp failed")
}
for {
name_2x := r_2x.FindString(name)