diff options
Diffstat (limited to 'crawler')
| -rw-r--r-- | crawler/sanitize.go | 24 |
1 files changed, 16 insertions, 8 deletions
diff --git a/crawler/sanitize.go b/crawler/sanitize.go index d67b32b..b6af903 100644 --- a/crawler/sanitize.go +++ b/crawler/sanitize.go @@ -71,31 +71,39 @@ func sanitize_offer(angebote []Angebot, shop Shop, try int) []Angebot { func sanitize_name(name string) string { if strings.Contains(name, "Literflasche") { - name = strings.Replace(name, "Literflasche", "", 1) + name = strings.Replace(name, "Literflasche", "", -1) } if strings.Contains(name, "y.o.") { - name = strings.Replace(name, "y.o.", "Jahre", 1) + name = strings.Replace(name, "y.o.", "Jahre", -1) } if strings.Contains(name, "years old") { - name = strings.Replace(name, "years old", "Jahre", 1) + name = strings.Replace(name, "years old", "Jahre", -1) } if strings.Contains(name, "years") { - name = strings.Replace(name, "years", "Jahre", 1) + name = strings.Replace(name, "years", "Jahre", -1) } if strings.Contains(name, "Years Old") { - name = strings.Replace(name, "Years Old", "Jahre", 1) + name = strings.Replace(name, "Years Old", "Jahre", -1) } if strings.Contains(name, " Anos ") { - name = strings.Replace(name, " Anos ", " Jahre ", 1) + name = strings.Replace(name, " Anos ", " Jahre ", -1) } if strings.Contains(name, " anos ") { - name = strings.Replace(name, " anos ", " Jahre ", 1) + name = strings.Replace(name, " anos ", " Jahre ", -1) + } + + if strings.Contains(name, " Vol. ") { + name = strings.Replace(name, " Vol. ", " ", -1) + } + + if strings.Contains(name, " vol. ") { + name = strings.Replace(name, " vol. ", " ", -1) } r_J, err := regexp.Compile(`[0-9]+(\s)*J(\s|-)`) @@ -112,7 +120,7 @@ func sanitize_name(name string) string { name = strings.Replace(name, age_noisy, age+" Jahre ", 1) } - r_liter, err := regexp.Compile(`[0-9]+([,.][0-9]+)?( )?[cC]?[lL]((iter)|(tr))?`) + r_liter, err := regexp.Compile(`[0-9]+([,.][0-9]+)?( )?[cC]?[lL]((iter)|(tr))?(\s|$)`) if err != nil { Fatal(err, "sanitize_name: Liter-Regexp failed") } |
