summaryrefslogtreecommitdiff
path: root/crawler/sanitize.go
diff options
context:
space:
mode:
authorhorus_arch2018-02-19 15:15:45 +0100
committerhorus_arch2018-02-19 15:15:45 +0100
commite3312da52d0ede1c5f783feba0840c53eeca723d (patch)
tree5eecf493a0c2bbd0d5debe4fd2fb73cfd714c9b0 /crawler/sanitize.go
parent8700aaaec582744a04af65eeff7e9ff8555b194a (diff)
downloadalkobote-e3312da52d0ede1c5f783feba0840c53eeca723d.tar.gz
Bugfix + detects age. (crawler)
Diffstat (limited to 'crawler/sanitize.go')
-rw-r--r--crawler/sanitize.go28
1 files changed, 28 insertions, 0 deletions
diff --git a/crawler/sanitize.go b/crawler/sanitize.go
index 8d84507..9025f87 100644
--- a/crawler/sanitize.go
+++ b/crawler/sanitize.go
@@ -2,6 +2,7 @@ package main
import (
"regexp"
+ "strconv"
"strings"
log "github.com/Sirupsen/logrus"
@@ -12,8 +13,11 @@ func sanitize_offer(angebote []Angebot, shop Shop) []Angebot {
var W []Angebot
for _, offer := range angebote {
+
offer.Name = sanitize_name(offer.Name)
+ offer.Age = get_age_from_name(offer.Name)
+
if false == _check_abv_for_spirit_type(offer) {
continue
}
@@ -182,3 +186,27 @@ func _check_abv_for_spirit_type(offer Angebot) bool {
return true
}
+
+func get_age_from_name(name string) int {
+ r_years, err := regexp.Compile(`[0-9]+\s*Jahre`)
+ if err != nil {
+ Fatal(err, "get_age_from_name: Years regexp failed")
+ }
+ age_noisy := r_years.FindString(name)
+ if age_noisy == "" {
+ log.Debug("get_age_from_name: No Age found in (" + name + ")")
+ return 0
+ }
+
+ r, err := regexp.Compile(`[0-9]+`)
+ if err != nil {
+ Fatal(err, "get_age_from_name: Numbers regexp failed")
+ }
+ age_noisy = r.FindString(age_noisy)
+
+ age, err := strconv.Atoi(age_noisy)
+ if err != nil {
+ Fatal(err, "get_age_from_name: String to int (atoi) failed")
+ }
+ return age
+}