summaryrefslogtreecommitdiff
path: root/crawler/sanitize.go
diff options
context:
space:
mode:
authorMax2018-07-09 23:46:44 +0200
committerMax2018-07-09 23:46:44 +0200
commit937a0d4b6a545836e4b4fc1ec9ce5eebb7c8ba33 (patch)
tree4fe04d19c5c2d1450a8029eb8968d8bcc89b0553 /crawler/sanitize.go
parent62e921195fac9653024f1a68e37119c4f6e3d0b4 (diff)
downloadalkobote-937a0d4b6a545836e4b4fc1ec9ce5eebb7c8ba33.tar.gz
Improves name sanitizing. (crawler)
Diffstat (limited to 'crawler/sanitize.go')
-rw-r--r--crawler/sanitize.go24
1 files changed, 16 insertions, 8 deletions
diff --git a/crawler/sanitize.go b/crawler/sanitize.go
index d67b32b..b6af903 100644
--- a/crawler/sanitize.go
+++ b/crawler/sanitize.go
@@ -71,31 +71,39 @@ func sanitize_offer(angebote []Angebot, shop Shop, try int) []Angebot {
func sanitize_name(name string) string {
if strings.Contains(name, "Literflasche") {
- name = strings.Replace(name, "Literflasche", "", 1)
+ name = strings.Replace(name, "Literflasche", "", -1)
}
if strings.Contains(name, "y.o.") {
- name = strings.Replace(name, "y.o.", "Jahre", 1)
+ name = strings.Replace(name, "y.o.", "Jahre", -1)
}
if strings.Contains(name, "years old") {
- name = strings.Replace(name, "years old", "Jahre", 1)
+ name = strings.Replace(name, "years old", "Jahre", -1)
}
if strings.Contains(name, "years") {
- name = strings.Replace(name, "years", "Jahre", 1)
+ name = strings.Replace(name, "years", "Jahre", -1)
}
if strings.Contains(name, "Years Old") {
- name = strings.Replace(name, "Years Old", "Jahre", 1)
+ name = strings.Replace(name, "Years Old", "Jahre", -1)
}
if strings.Contains(name, " Anos ") {
- name = strings.Replace(name, " Anos ", " Jahre ", 1)
+ name = strings.Replace(name, " Anos ", " Jahre ", -1)
}
if strings.Contains(name, " anos ") {
- name = strings.Replace(name, " anos ", " Jahre ", 1)
+ name = strings.Replace(name, " anos ", " Jahre ", -1)
+ }
+
+ if strings.Contains(name, " Vol. ") {
+ name = strings.Replace(name, " Vol. ", " ", -1)
+ }
+
+ if strings.Contains(name, " vol. ") {
+ name = strings.Replace(name, " vol. ", " ", -1)
}
r_J, err := regexp.Compile(`[0-9]+(\s)*J(\s|-)`)
@@ -112,7 +120,7 @@ func sanitize_name(name string) string {
name = strings.Replace(name, age_noisy, age+" Jahre ", 1)
}
- r_liter, err := regexp.Compile(`[0-9]+([,.][0-9]+)?( )?[cC]?[lL]((iter)|(tr))?`)
+ r_liter, err := regexp.Compile(`[0-9]+([,.][0-9]+)?( )?[cC]?[lL]((iter)|(tr))?(\s|$)`)
if err != nil {
Fatal(err, "sanitize_name: Liter-Regexp failed")
}