diff options
| author | Max | 2018-07-09 23:46:44 +0200 |
|---|---|---|
| committer | Max | 2018-07-09 23:46:44 +0200 |
| commit | 937a0d4b6a545836e4b4fc1ec9ce5eebb7c8ba33 (patch) | |
| tree | 4fe04d19c5c2d1450a8029eb8968d8bcc89b0553 /crawler | |
| parent | 62e921195fac9653024f1a68e37119c4f6e3d0b4 (diff) | |
| download | alkobote-937a0d4b6a545836e4b4fc1ec9ce5eebb7c8ba33.tar.gz | |
Improves name sanitizing. (crawler)
Diffstat (limited to 'crawler')
| -rw-r--r-- | crawler/sanitize.go | 24 |
1 files changed, 16 insertions, 8 deletions
diff --git a/crawler/sanitize.go b/crawler/sanitize.go index d67b32b..b6af903 100644 --- a/crawler/sanitize.go +++ b/crawler/sanitize.go @@ -71,31 +71,39 @@ func sanitize_offer(angebote []Angebot, shop Shop, try int) []Angebot { func sanitize_name(name string) string { if strings.Contains(name, "Literflasche") { - name = strings.Replace(name, "Literflasche", "", 1) + name = strings.Replace(name, "Literflasche", "", -1) } if strings.Contains(name, "y.o.") { - name = strings.Replace(name, "y.o.", "Jahre", 1) + name = strings.Replace(name, "y.o.", "Jahre", -1) } if strings.Contains(name, "years old") { - name = strings.Replace(name, "years old", "Jahre", 1) + name = strings.Replace(name, "years old", "Jahre", -1) } if strings.Contains(name, "years") { - name = strings.Replace(name, "years", "Jahre", 1) + name = strings.Replace(name, "years", "Jahre", -1) } if strings.Contains(name, "Years Old") { - name = strings.Replace(name, "Years Old", "Jahre", 1) + name = strings.Replace(name, "Years Old", "Jahre", -1) } if strings.Contains(name, " Anos ") { - name = strings.Replace(name, " Anos ", " Jahre ", 1) + name = strings.Replace(name, " Anos ", " Jahre ", -1) } if strings.Contains(name, " anos ") { - name = strings.Replace(name, " anos ", " Jahre ", 1) + name = strings.Replace(name, " anos ", " Jahre ", -1) + } + + if strings.Contains(name, " Vol. ") { + name = strings.Replace(name, " Vol. ", " ", -1) + } + + if strings.Contains(name, " vol. ") { + name = strings.Replace(name, " vol. ", " ", -1) } r_J, err := regexp.Compile(`[0-9]+(\s)*J(\s|-)`) @@ -112,7 +120,7 @@ func sanitize_name(name string) string { name = strings.Replace(name, age_noisy, age+" Jahre ", 1) } - r_liter, err := regexp.Compile(`[0-9]+([,.][0-9]+)?( )?[cC]?[lL]((iter)|(tr))?`) + r_liter, err := regexp.Compile(`[0-9]+([,.][0-9]+)?( )?[cC]?[lL]((iter)|(tr))?(\s|$)`) if err != nil { Fatal(err, "sanitize_name: Liter-Regexp failed") } |
