summaryrefslogtreecommitdiff
path: root/crawler/sanitize.go
diff options
context:
space:
mode:
Diffstat (limited to 'crawler/sanitize.go')
-rw-r--r--crawler/sanitize.go36
1 files changed, 36 insertions, 0 deletions
diff --git a/crawler/sanitize.go b/crawler/sanitize.go
index 26d254d..346100a 100644
--- a/crawler/sanitize.go
+++ b/crawler/sanitize.go
@@ -71,6 +71,28 @@ func sanitize_name(name string) string {
name = strings.Replace(name, "Years Old", "Jahre", 1)
}
+ if strings.Contains(name, " Anos ") {
+ name = strings.Replace(name, " Anos ", " Jahre ", 1)
+ }
+
+ if strings.Contains(name, " anos ") {
+ name = strings.Replace(name, " anos ", " Jahre ", 1)
+ }
+
+ r_J, err := regexp.Compile(`[0-9]+(\s)*J(\s|-)`)
+ if err != nil {
+ Fatal(err, "sanitize_name: J-Regexp (J für Jahr) failed")
+ }
+ age_noisy := r_J.FindString(name)
+ if age_noisy != "" {
+ r_number, err := regexp.Compile(`[0-9]+`)
+ if err != nil {
+ Fatal(err, "sanitize_name: Number-Regexp failed")
+ }
+ age := r_number.FindString(age_noisy)
+ name = strings.Replace(name, age_noisy, age+" Jahre ", 1)
+ }
+
r_liter, err := regexp.Compile(`[0-9]+([,.][0-9]+)?( )?[lL](iter)?`)
if err != nil {
Fatal(err, "sanitize_name: Liter-Regexp failed")
@@ -128,6 +150,20 @@ func sanitize_name(name string) string {
name = strings.TrimSpace(name)
}
+ // removes redundant white spaces
+ r_ws, err := regexp.Compile(`\s(\s)+`)
+ if err != nil {
+ Fatal(err, "sanitize_name: White Space-Regexp failed")
+ }
+ for {
+ ws := r_ws.FindString(name)
+ if ws == "" {
+ break
+ }
+ name = strings.Replace(name, ws, " ", -1)
+ name = strings.TrimSpace(name)
+ }
+
return name
}