diff options
Diffstat (limited to 'crawler/sanitize.go')
| -rw-r--r-- | crawler/sanitize.go | 71 |
1 files changed, 67 insertions, 4 deletions
diff --git a/crawler/sanitize.go b/crawler/sanitize.go index ddcd4f6..c86faff 100644 --- a/crawler/sanitize.go +++ b/crawler/sanitize.go @@ -8,23 +8,86 @@ import ( func sanitize_offer(angebote []Angebot) []Angebot { + var W []Angebot + for _, offer := range angebote { offer.Name = sanitize_name(offer.Name) + + W = append(W, offer) } - return angebote + return W } func sanitize_name(name string) string { if strings.Contains(name, "y.o.") { name = strings.Replace(name, "y.o.", "Jahre", 1) } - r_liter, err := regexp.Compile("[0-9]+([,.][0-9](([lL])| ([Ll]iter))?") + + if strings.Contains(name, "years old") { + name = strings.Replace(name, "years old", "Jahre", 1) + } + + if strings.Contains(name, "Years Old") { + name = strings.Replace(name, "Years Old", "Jahre", 1) + } + + r_liter, err := regexp.Compile(`[0-9]+([,.][0-9]+)?( )?[lL](iter)?`) + if err != nil { + log.Fatal(err) + } + for { + name_liter := r_liter.FindString(name) + if name_liter == "" { + break + } + name = strings.Replace(name, name_liter, "", -1) + name = strings.TrimSpace(name) + } + + if strings.Contains(name, "Liter") { + name = strings.Replace(name, "Liter", "", -1) + } + name = strings.TrimSpace(name) + + if strings.Contains(name, "liter") { + name = strings.Replace(name, "liter", "", -1) + } + name = strings.TrimSpace(name) + + r_procent, err := regexp.Compile(`[0-9]+([,.][0-9]+)?\%`) if err != nil { log.Fatal(err) } - name_liter := r_liter.FindString(name) - name = strings.Replace(name, name_liter, "", 1) + for { + name_procent := r_procent.FindString(name) + if name_procent == "" { + break + } + name = strings.Replace(name, name_procent, "", -1) + name = strings.TrimSpace(name) + } + + r_release, err := regexp.Compile(`Release$`) + if err != nil { + log.Fatal(err) + } + name_release := r_release.FindString(name) + name = strings.Replace(name, name_release, "", 1) + name = strings.TrimSpace(name) + + r_2x, err := regexp.Compile(`[0-9]+( )*[xX]( )`) + if err != nil { + log.Fatal(err) + } + for { + name_2x := r_2x.FindString(name) + if name_2x == "" { + break + } + name = strings.Replace(name, name_2x, "", -1) + name = strings.TrimSpace(name) + } return name } |
