From e3312da52d0ede1c5f783feba0840c53eeca723d Mon Sep 17 00:00:00 2001 From: horus_arch Date: Mon, 19 Feb 2018 15:15:45 +0100 Subject: Bugfix + detects age. (crawler) --- crawler/sanitize.go | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'crawler/sanitize.go') diff --git a/crawler/sanitize.go b/crawler/sanitize.go index 8d84507..9025f87 100644 --- a/crawler/sanitize.go +++ b/crawler/sanitize.go @@ -2,6 +2,7 @@ package main import ( "regexp" + "strconv" "strings" log "github.com/Sirupsen/logrus" @@ -12,8 +13,11 @@ func sanitize_offer(angebote []Angebot, shop Shop) []Angebot { var W []Angebot for _, offer := range angebote { + offer.Name = sanitize_name(offer.Name) + offer.Age = get_age_from_name(offer.Name) + if false == _check_abv_for_spirit_type(offer) { continue } @@ -182,3 +186,27 @@ func _check_abv_for_spirit_type(offer Angebot) bool { return true } + +func get_age_from_name(name string) int { + r_years, err := regexp.Compile(`[0-9]+\s*Jahre`) + if err != nil { + Fatal(err, "get_age_from_name: Years regexp failed") + } + age_noisy := r_years.FindString(name) + if age_noisy == "" { + log.Debug("get_age_from_name: No Age found in (" + name + ")") + return 0 + } + + r, err := regexp.Compile(`[0-9]+`) + if err != nil { + Fatal(err, "get_age_from_name: Numbers regexp failed") + } + age_noisy = r.FindString(age_noisy) + + age, err := strconv.Atoi(age_noisy) + if err != nil { + Fatal(err, "get_age_from_name: String to int (atoi) failed") + } + return age +} -- cgit v1.2.3