summaryrefslogtreecommitdiff
path: root/crawler/sanitize.go
blob: a40745ab4a17597abd7eeed382ccf7bca2da5e59 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
package main

import (
	"log"
	"regexp"
	"strings"
)

func sanitize_offer(angebote []Angebot) []Angebot {

	var W []Angebot

	for _, offer := range angebote {
		offer.Name = sanitize_name(offer.Name)

		W = append(W, offer)
	}

	return W
}

func sanitize_name(name string) string {
	if strings.Contains(name, "y.o.") {
		name = strings.Replace(name, "y.o.", "Jahre", 1)
	}

	if strings.Contains(name, "years old") {
		name = strings.Replace(name, "years old", "Jahre", 1)
	}

	if strings.Contains(name, "Years Old") {
		name = strings.Replace(name, "Years Old", "Jahre", 1)
	}

	r_liter, err := regexp.Compile(`[0-9]+([,.][0-9]+)?( )?[lL](iter)?`)
	if err != nil {
		log.Fatal(err)
	}
	for {
		name_liter := r_liter.FindString(name)
		if name_liter == "" {
			break
		}
		name = strings.Replace(name, name_liter, "", -1)
		name = strings.TrimSpace(name)
	}

	if strings.Contains(name, "Liter") {
		name = strings.Replace(name, "Liter", "", -1)
	}
	name = strings.TrimSpace(name)

	if strings.Contains(name, "liter") {
		name = strings.Replace(name, "liter", "", -1)
	}
	name = strings.TrimSpace(name)

	r_procent, err := regexp.Compile(`[0-9]+([,.][0-9]+)?\%`)
	if err != nil {
		log.Fatal(err)
	}
	for {
		name_procent := r_procent.FindString(name)
		if name_procent == "" {
			break
		}
		name = strings.Replace(name, name_procent, "", -1)
		name = strings.TrimSpace(name)
	}

	r_release, err := regexp.Compile(`Release$`)
	if err != nil {
		log.Fatal(err)
	}
	name_release := r_release.FindString(name)
	name = strings.Replace(name, name_release, "", 1)
	name = strings.TrimSpace(name)

	r_2x, err := regexp.Compile(`[0-9]+( )*[xX]( )`)
	if err != nil {
		log.Fatal(err)
	}
	for {
		name_2x := r_2x.FindString(name)
		if name_2x == "" {
			break
		}
		name = strings.Replace(name, name_2x, "", -1)
		name = strings.TrimSpace(name)
	}

	return name
}

func sanitize_base_price(price_noisy string) (price int, err error) {
	if strings.Contains(price_noisy, "Preis pro Liter") {
		price_noisy = strings.Replace(price_noisy, "Preis pro Liter", "", -1)
	}

	if strings.Contains(price_noisy, " pro 1 l") {
		price_noisy = strings.Replace(price_noisy, " pro 1 l", "", -1)
	}

	if strings.Contains(price_noisy, " pro 1 stück") {
		price_noisy = strings.Replace(price_noisy, " pro 1 stück", "", -1)
	}

	if strings.Contains(price_noisy, " pro 1 Stück") {
		price_noisy = strings.Replace(price_noisy, " pro 1 Stück", "", -1)
	}

	if strings.Contains(price_noisy, "Grundpreis:") {
		price_noisy = strings.Replace(price_noisy, "Grundpreis", "", -1)
		price_noisy = strings.TrimSpace(price_noisy)
	}

	if strings.Contains(price_noisy, "/Liter") {
		price_noisy = strings.Replace(price_noisy, "/Liter", "", -1)
		price_noisy = strings.TrimSpace(price_noisy)
	}

	return convert_price(price_noisy)
}