summaryrefslogtreecommitdiff
path: root/crawler/sanitize.go
diff options
context:
space:
mode:
authorMax2018-02-08 01:04:06 +0100
committerMax2018-02-08 01:04:06 +0100
commit5aed13831b523043d41229c8857445d6e889cc72 (patch)
treee1118aa021061eec4c2df34e3b1f6d3892410b6b /crawler/sanitize.go
parentddef17e4afc59d614d064c97426e8cedcc6599fc (diff)
downloadalkobote-5aed13831b523043d41229c8857445d6e889cc72.tar.gz
Better sanitizing.
Diffstat (limited to 'crawler/sanitize.go')
-rw-r--r--crawler/sanitize.go71
1 files changed, 67 insertions, 4 deletions
diff --git a/crawler/sanitize.go b/crawler/sanitize.go
index ddcd4f6..c86faff 100644
--- a/crawler/sanitize.go
+++ b/crawler/sanitize.go
@@ -8,23 +8,86 @@ import (
func sanitize_offer(angebote []Angebot) []Angebot {
+ var W []Angebot
+
for _, offer := range angebote {
offer.Name = sanitize_name(offer.Name)
+
+ W = append(W, offer)
}
- return angebote
+ return W
}
func sanitize_name(name string) string {
if strings.Contains(name, "y.o.") {
name = strings.Replace(name, "y.o.", "Jahre", 1)
}
- r_liter, err := regexp.Compile("[0-9]+([,.][0-9](([lL])| ([Ll]iter))?")
+
+ if strings.Contains(name, "years old") {
+ name = strings.Replace(name, "years old", "Jahre", 1)
+ }
+
+ if strings.Contains(name, "Years Old") {
+ name = strings.Replace(name, "Years Old", "Jahre", 1)
+ }
+
+ r_liter, err := regexp.Compile(`[0-9]+([,.][0-9]+)?( )?[lL](iter)?`)
+ if err != nil {
+ log.Fatal(err)
+ }
+ for {
+ name_liter := r_liter.FindString(name)
+ if name_liter == "" {
+ break
+ }
+ name = strings.Replace(name, name_liter, "", -1)
+ name = strings.TrimSpace(name)
+ }
+
+ if strings.Contains(name, "Liter") {
+ name = strings.Replace(name, "Liter", "", -1)
+ }
+ name = strings.TrimSpace(name)
+
+ if strings.Contains(name, "liter") {
+ name = strings.Replace(name, "liter", "", -1)
+ }
+ name = strings.TrimSpace(name)
+
+ r_procent, err := regexp.Compile(`[0-9]+([,.][0-9]+)?\%`)
if err != nil {
log.Fatal(err)
}
- name_liter := r_liter.FindString(name)
- name = strings.Replace(name, name_liter, "", 1)
+ for {
+ name_procent := r_procent.FindString(name)
+ if name_procent == "" {
+ break
+ }
+ name = strings.Replace(name, name_procent, "", -1)
+ name = strings.TrimSpace(name)
+ }
+
+ r_release, err := regexp.Compile(`Release$`)
+ if err != nil {
+ log.Fatal(err)
+ }
+ name_release := r_release.FindString(name)
+ name = strings.Replace(name, name_release, "", 1)
+ name = strings.TrimSpace(name)
+
+ r_2x, err := regexp.Compile(`[0-9]+( )*[xX]( )`)
+ if err != nil {
+ log.Fatal(err)
+ }
+ for {
+ name_2x := r_2x.FindString(name)
+ if name_2x == "" {
+ break
+ }
+ name = strings.Replace(name, name_2x, "", -1)
+ name = strings.TrimSpace(name)
+ }
return name
}