diff options
| author | Max | 2018-02-08 01:04:06 +0100 |
|---|---|---|
| committer | Max | 2018-02-08 01:04:06 +0100 |
| commit | 5aed13831b523043d41229c8857445d6e889cc72 (patch) | |
| tree | e1118aa021061eec4c2df34e3b1f6d3892410b6b | |
| parent | ddef17e4afc59d614d064c97426e8cedcc6599fc (diff) | |
| download | alkobote-5aed13831b523043d41229c8857445d6e889cc72.tar.gz | |
Better sanitizing.
| -rw-r--r-- | crawler/sanitize.go | 71 |
1 files changed, 67 insertions, 4 deletions
diff --git a/crawler/sanitize.go b/crawler/sanitize.go index ddcd4f6..c86faff 100644 --- a/crawler/sanitize.go +++ b/crawler/sanitize.go @@ -8,23 +8,86 @@ import ( func sanitize_offer(angebote []Angebot) []Angebot { + var W []Angebot + for _, offer := range angebote { offer.Name = sanitize_name(offer.Name) + + W = append(W, offer) } - return angebote + return W } func sanitize_name(name string) string { if strings.Contains(name, "y.o.") { name = strings.Replace(name, "y.o.", "Jahre", 1) } - r_liter, err := regexp.Compile("[0-9]+([,.][0-9](([lL])| ([Ll]iter))?") + + if strings.Contains(name, "years old") { + name = strings.Replace(name, "years old", "Jahre", 1) + } + + if strings.Contains(name, "Years Old") { + name = strings.Replace(name, "Years Old", "Jahre", 1) + } + + r_liter, err := regexp.Compile(`[0-9]+([,.][0-9]+)?( )?[lL](iter)?`) + if err != nil { + log.Fatal(err) + } + for { + name_liter := r_liter.FindString(name) + if name_liter == "" { + break + } + name = strings.Replace(name, name_liter, "", -1) + name = strings.TrimSpace(name) + } + + if strings.Contains(name, "Liter") { + name = strings.Replace(name, "Liter", "", -1) + } + name = strings.TrimSpace(name) + + if strings.Contains(name, "liter") { + name = strings.Replace(name, "liter", "", -1) + } + name = strings.TrimSpace(name) + + r_procent, err := regexp.Compile(`[0-9]+([,.][0-9]+)?\%`) if err != nil { log.Fatal(err) } - name_liter := r_liter.FindString(name) - name = strings.Replace(name, name_liter, "", 1) + for { + name_procent := r_procent.FindString(name) + if name_procent == "" { + break + } + name = strings.Replace(name, name_procent, "", -1) + name = strings.TrimSpace(name) + } + + r_release, err := regexp.Compile(`Release$`) + if err != nil { + log.Fatal(err) + } + name_release := r_release.FindString(name) + name = strings.Replace(name, name_release, "", 1) + name = strings.TrimSpace(name) + + r_2x, err := regexp.Compile(`[0-9]+( )*[xX]( )`) + if err != nil { + log.Fatal(err) + } + for { + name_2x := r_2x.FindString(name) + if name_2x == "" { + break + } + name = strings.Replace(name, name_2x, "", -1) + name = strings.TrimSpace(name) + } return name } |
