diff options
| author | horus_arch | 2018-02-19 15:15:45 +0100 |
|---|---|---|
| committer | horus_arch | 2018-02-19 15:15:45 +0100 |
| commit | e3312da52d0ede1c5f783feba0840c53eeca723d (patch) | |
| tree | 5eecf493a0c2bbd0d5debe4fd2fb73cfd714c9b0 /crawler/sanitize.go | |
| parent | 8700aaaec582744a04af65eeff7e9ff8555b194a (diff) | |
| download | alkobote-e3312da52d0ede1c5f783feba0840c53eeca723d.tar.gz | |
Bugfix + detects age. (crawler)
Diffstat (limited to 'crawler/sanitize.go')
| -rw-r--r-- | crawler/sanitize.go | 28 |
1 files changed, 28 insertions, 0 deletions
diff --git a/crawler/sanitize.go b/crawler/sanitize.go index 8d84507..9025f87 100644 --- a/crawler/sanitize.go +++ b/crawler/sanitize.go @@ -2,6 +2,7 @@ package main import ( "regexp" + "strconv" "strings" log "github.com/Sirupsen/logrus" @@ -12,8 +13,11 @@ func sanitize_offer(angebote []Angebot, shop Shop) []Angebot { var W []Angebot for _, offer := range angebote { + offer.Name = sanitize_name(offer.Name) + offer.Age = get_age_from_name(offer.Name) + if false == _check_abv_for_spirit_type(offer) { continue } @@ -182,3 +186,27 @@ func _check_abv_for_spirit_type(offer Angebot) bool { return true } + +func get_age_from_name(name string) int { + r_years, err := regexp.Compile(`[0-9]+\s*Jahre`) + if err != nil { + Fatal(err, "get_age_from_name: Years regexp failed") + } + age_noisy := r_years.FindString(name) + if age_noisy == "" { + log.Debug("get_age_from_name: No Age found in (" + name + ")") + return 0 + } + + r, err := regexp.Compile(`[0-9]+`) + if err != nil { + Fatal(err, "get_age_from_name: Numbers regexp failed") + } + age_noisy = r.FindString(age_noisy) + + age, err := strconv.Atoi(age_noisy) + if err != nil { + Fatal(err, "get_age_from_name: String to int (atoi) failed") + } + return age +} |
