diff options
| -rw-r--r-- | crawler/convert_price.go (renamed from crawler/sanitize_price.go) | 2 | ||||
| -rw-r--r-- | crawler/sanitize.go | 30 | ||||
| -rw-r--r-- | crawler/sanitize_name.go | 13 | ||||
| -rw-r--r-- | crawler/scrape.go | 2 | ||||
| -rw-r--r-- | crawler/shop_bottleworld.go | 4 | ||||
| -rw-r--r-- | crawler/shop_mcwhisky.go | 6 | ||||
| -rw-r--r-- | crawler/shop_rumundco.go | 4 | ||||
| -rw-r--r-- | crawler/shop_whic.go | 4 | ||||
| -rw-r--r-- | crawler/shop_whiskyde.go | 4 | ||||
| -rw-r--r-- | crawler/shop_whiskysitenl.go | 4 | ||||
| -rw-r--r-- | crawler/shop_whiskyworld.go | 4 | ||||
| -rw-r--r-- | crawler/shop_whiskyzone.go | 4 |
12 files changed, 50 insertions, 31 deletions
diff --git a/crawler/sanitize_price.go b/crawler/convert_price.go index 2052842..54386d6 100644 --- a/crawler/sanitize_price.go +++ b/crawler/convert_price.go @@ -6,7 +6,7 @@ import ( "strings" ) -func sanitize_price(price string) (int, error) { +func convert_price(price string) (int, error) { if "" == price { return 0, errors.New("Empty string") } diff --git a/crawler/sanitize.go b/crawler/sanitize.go new file mode 100644 index 0000000..ddcd4f6 --- /dev/null +++ b/crawler/sanitize.go @@ -0,0 +1,30 @@ +package main + +import ( + "log" + "regexp" + "strings" +) + +func sanitize_offer(angebote []Angebot) []Angebot { + + for _, offer := range angebote { + offer.Name = sanitize_name(offer.Name) + } + + return angebote +} + +func sanitize_name(name string) string { + if strings.Contains(name, "y.o.") { + name = strings.Replace(name, "y.o.", "Jahre", 1) + } + r_liter, err := regexp.Compile("[0-9]+([,.][0-9](([lL])| ([Ll]iter))?") + if err != nil { + log.Fatal(err) + } + name_liter := r_liter.FindString(name) + name = strings.Replace(name, name_liter, "", 1) + + return name +} diff --git a/crawler/sanitize_name.go b/crawler/sanitize_name.go deleted file mode 100644 index 73b2714..0000000 --- a/crawler/sanitize_name.go +++ /dev/null @@ -1,13 +0,0 @@ -package main - -import ( - "strings" -) - -func sanitize_name(name string) string { - if strings.Contains(name, "y.o.") { - name = strings.Replace(name, "y.o.", "Jahre", 1) - } - - return name -} diff --git a/crawler/scrape.go b/crawler/scrape.go index 0595240..31b3618 100644 --- a/crawler/scrape.go +++ b/crawler/scrape.go @@ -17,6 +17,8 @@ func (app *App) ScrapeHTML(shops []Shop) { W = ScrapeShop(shop) + W = sanitize_offer(W) + err = app.save_offer(W) if err != nil { log.Fatal(err) diff --git a/crawler/shop_bottleworld.go b/crawler/shop_bottleworld.go index 3a3c631..b6af7e0 100644 --- a/crawler/shop_bottleworld.go +++ b/crawler/shop_bottleworld.go @@ -39,13 +39,13 @@ func ScrapeBottleWord(shop Shop) []Angebot { e.ForEach(".price-box", func(i int, e *colly.HTMLElement) { e.ForEach(".old-price", func(i int, e *colly.HTMLElement) { - W.Original_price, err = sanitize_price(e.ChildText(".price")) + W.Original_price, err = convert_price(e.ChildText(".price")) if err != nil { log.Fatal(err) } }) e.ForEach(".special-price", func(i int, e *colly.HTMLElement) { - W.Discounted_price, err = sanitize_price(e.ChildText(".price")) + W.Discounted_price, err = convert_price(e.ChildText(".price")) if err != nil { log.Fatal(err) } diff --git a/crawler/shop_mcwhisky.go b/crawler/shop_mcwhisky.go index e45e740..e0c1ab8 100644 --- a/crawler/shop_mcwhisky.go +++ b/crawler/shop_mcwhisky.go @@ -19,7 +19,7 @@ func ScrapeMCWhisky(shop Shop) []Angebot { c.OnHTML("li.item", func(e *colly.HTMLElement) { W := Angebot{} - whisky_name := sanitize_name(e.ChildAttr("a", "title")) + whisky_name := e.ChildAttr("a", "title") whisky_url := e.ChildAttr("a", "href") W.Name = whisky_name W.Url = whisky_url @@ -28,13 +28,13 @@ func ScrapeMCWhisky(shop Shop) []Angebot { e.ForEach(".price-box", func(i int, e *colly.HTMLElement) { e.ForEach(".old-price", func(i int, e *colly.HTMLElement) { - W.Original_price, err = sanitize_price(e.ChildText(".price")) + W.Original_price, err = convert_price(e.ChildText(".price")) if err != nil { log.Fatal(err) } }) e.ForEach(".special-price", func(i int, e *colly.HTMLElement) { - W.Discounted_price, err = sanitize_price(e.ChildText(".price")) + W.Discounted_price, err = convert_price(e.ChildText(".price")) if err != nil { log.Fatal(err) } diff --git a/crawler/shop_rumundco.go b/crawler/shop_rumundco.go index ae349f3..d62250e 100644 --- a/crawler/shop_rumundco.go +++ b/crawler/shop_rumundco.go @@ -39,11 +39,11 @@ func ScrapeRumundCo(shop Shop) []Angebot { if "" == regular_price { return } - W.Original_price, err = sanitize_price(regular_price) + W.Original_price, err = convert_price(regular_price) if err != nil { log.Fatal(err) } - W.Discounted_price, err = sanitize_price(e.ChildText(".price-value")) + W.Discounted_price, err = convert_price(e.ChildText(".price-value")) if err != nil { log.Fatal(err) } diff --git a/crawler/shop_whic.go b/crawler/shop_whic.go index 896b1fb..e489161 100644 --- a/crawler/shop_whic.go +++ b/crawler/shop_whic.go @@ -28,13 +28,13 @@ func ScrapeWhic(shop Shop) []Angebot { e.ForEach(".price-box", func(i int, e *colly.HTMLElement) { e.ForEach(".old-price", func(i int, e *colly.HTMLElement) { - W.Original_price, err = sanitize_price(e.ChildText(".price")) + W.Original_price, err = convert_price(e.ChildText(".price")) if err != nil { log.Fatal(err) } }) e.ForEach(".special-price", func(i int, e *colly.HTMLElement) { - W.Discounted_price, err = sanitize_price(e.ChildText(".price")) + W.Discounted_price, err = convert_price(e.ChildText(".price")) if err != nil { log.Fatal(err) } diff --git a/crawler/shop_whiskyde.go b/crawler/shop_whiskyde.go index 657bfe0..d90e0cc 100644 --- a/crawler/shop_whiskyde.go +++ b/crawler/shop_whiskyde.go @@ -26,13 +26,13 @@ func ScrapeWhiskyde(shop Shop) []Angebot { var err error e.ForEach(".article-price-original", func(i int, e *colly.HTMLElement) { - W.Original_price, err = sanitize_price(e.ChildText("del")) + W.Original_price, err = convert_price(e.ChildText("del")) if err != nil { log.Fatal(err) } }) e.ForEach(".article-price", func(i int, e *colly.HTMLElement) { - W.Discounted_price, err = sanitize_price(e.ChildText(".article-price-default")) + W.Discounted_price, err = convert_price(e.ChildText(".article-price-default")) if err != nil { log.Fatal(err) } diff --git a/crawler/shop_whiskysitenl.go b/crawler/shop_whiskysitenl.go index c8b35a2..656cf18 100644 --- a/crawler/shop_whiskysitenl.go +++ b/crawler/shop_whiskysitenl.go @@ -34,12 +34,12 @@ func ScrapeWhiskysitenl(shop Shop) []Angebot { } discounted_price := r.FindString(strings.Trim(strings.TrimPrefix(price_discount_noisy, regular_price), "")) - W.Original_price, err = sanitize_price(regular_price) + W.Original_price, err = convert_price(regular_price) if err != nil { //log.Println(W.Name, err) return } - W.Discounted_price, err = sanitize_price(discounted_price) + W.Discounted_price, err = convert_price(discounted_price) if err != nil { //log.Println(W.Name, err) return diff --git a/crawler/shop_whiskyworld.go b/crawler/shop_whiskyworld.go index e07c42f..36b144e 100644 --- a/crawler/shop_whiskyworld.go +++ b/crawler/shop_whiskyworld.go @@ -32,13 +32,13 @@ func ScrapeWhiskyworld(shop Shop) []Angebot { var err error - W.Original_price, err = sanitize_price(regular_price) + W.Original_price, err = convert_price(regular_price) if err != nil { log.Fatal(err) return } - W.Discounted_price, err = sanitize_price(e.ChildText(".uvp")) + W.Discounted_price, err = convert_price(e.ChildText(".uvp")) if err != nil { log.Fatal(err) return diff --git a/crawler/shop_whiskyzone.go b/crawler/shop_whiskyzone.go index a9e73d0..3303b5e 100644 --- a/crawler/shop_whiskyzone.go +++ b/crawler/shop_whiskyzone.go @@ -29,12 +29,12 @@ func ScrapeWhiskyzone(shop Shop) []Angebot { if err != nil { log.Fatal(err) } - W.Discounted_price, err = sanitize_price(r.FindString(price_discount_noisy)) + W.Discounted_price, err = convert_price(r.FindString(price_discount_noisy)) if err != nil { log.Fatal(err) return } - W.Original_price, err = sanitize_price(r.FindString(price_regular_noisy)) + W.Original_price, err = convert_price(r.FindString(price_regular_noisy)) if err != nil { log.Fatal(err) return |
