From 937a0d4b6a545836e4b4fc1ec9ce5eebb7c8ba33 Mon Sep 17 00:00:00 2001 From: Max Date: Mon, 9 Jul 2018 23:46:44 +0200 Subject: Improves name sanitizing. (crawler) --- crawler/sanitize.go | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) (limited to 'crawler/sanitize.go') diff --git a/crawler/sanitize.go b/crawler/sanitize.go index d67b32b..b6af903 100644 --- a/crawler/sanitize.go +++ b/crawler/sanitize.go @@ -71,31 +71,39 @@ func sanitize_offer(angebote []Angebot, shop Shop, try int) []Angebot { func sanitize_name(name string) string { if strings.Contains(name, "Literflasche") { - name = strings.Replace(name, "Literflasche", "", 1) + name = strings.Replace(name, "Literflasche", "", -1) } if strings.Contains(name, "y.o.") { - name = strings.Replace(name, "y.o.", "Jahre", 1) + name = strings.Replace(name, "y.o.", "Jahre", -1) } if strings.Contains(name, "years old") { - name = strings.Replace(name, "years old", "Jahre", 1) + name = strings.Replace(name, "years old", "Jahre", -1) } if strings.Contains(name, "years") { - name = strings.Replace(name, "years", "Jahre", 1) + name = strings.Replace(name, "years", "Jahre", -1) } if strings.Contains(name, "Years Old") { - name = strings.Replace(name, "Years Old", "Jahre", 1) + name = strings.Replace(name, "Years Old", "Jahre", -1) } if strings.Contains(name, " Anos ") { - name = strings.Replace(name, " Anos ", " Jahre ", 1) + name = strings.Replace(name, " Anos ", " Jahre ", -1) } if strings.Contains(name, " anos ") { - name = strings.Replace(name, " anos ", " Jahre ", 1) + name = strings.Replace(name, " anos ", " Jahre ", -1) + } + + if strings.Contains(name, " Vol. ") { + name = strings.Replace(name, " Vol. ", " ", -1) + } + + if strings.Contains(name, " vol. ") { + name = strings.Replace(name, " vol. ", " ", -1) } r_J, err := regexp.Compile(`[0-9]+(\s)*J(\s|-)`) @@ -112,7 +120,7 @@ func sanitize_name(name string) string { name = strings.Replace(name, age_noisy, age+" Jahre ", 1) } - r_liter, err := regexp.Compile(`[0-9]+([,.][0-9]+)?( )?[cC]?[lL]((iter)|(tr))?`) + r_liter, err := regexp.Compile(`[0-9]+([,.][0-9]+)?( )?[cC]?[lL]((iter)|(tr))?(\s|$)`) if err != nil { Fatal(err, "sanitize_name: Liter-Regexp failed") } -- cgit v1.2.3