summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--crawler/convert_price.go (renamed from crawler/sanitize_price.go)2
-rw-r--r--crawler/sanitize.go30
-rw-r--r--crawler/sanitize_name.go13
-rw-r--r--crawler/scrape.go2
-rw-r--r--crawler/shop_bottleworld.go4
-rw-r--r--crawler/shop_mcwhisky.go6
-rw-r--r--crawler/shop_rumundco.go4
-rw-r--r--crawler/shop_whic.go4
-rw-r--r--crawler/shop_whiskyde.go4
-rw-r--r--crawler/shop_whiskysitenl.go4
-rw-r--r--crawler/shop_whiskyworld.go4
-rw-r--r--crawler/shop_whiskyzone.go4
12 files changed, 50 insertions, 31 deletions
diff --git a/crawler/sanitize_price.go b/crawler/convert_price.go
index 2052842..54386d6 100644
--- a/crawler/sanitize_price.go
+++ b/crawler/convert_price.go
@@ -6,7 +6,7 @@ import (
"strings"
)
-func sanitize_price(price string) (int, error) {
+func convert_price(price string) (int, error) {
if "" == price {
return 0, errors.New("Empty string")
}
diff --git a/crawler/sanitize.go b/crawler/sanitize.go
new file mode 100644
index 0000000..ddcd4f6
--- /dev/null
+++ b/crawler/sanitize.go
@@ -0,0 +1,30 @@
+package main
+
+import (
+ "log"
+ "regexp"
+ "strings"
+)
+
+func sanitize_offer(angebote []Angebot) []Angebot {
+
+ for _, offer := range angebote {
+ offer.Name = sanitize_name(offer.Name)
+ }
+
+ return angebote
+}
+
+func sanitize_name(name string) string {
+ if strings.Contains(name, "y.o.") {
+ name = strings.Replace(name, "y.o.", "Jahre", 1)
+ }
+ r_liter, err := regexp.Compile("[0-9]+([,.][0-9](([lL])| ([Ll]iter))?")
+ if err != nil {
+ log.Fatal(err)
+ }
+ name_liter := r_liter.FindString(name)
+ name = strings.Replace(name, name_liter, "", 1)
+
+ return name
+}
diff --git a/crawler/sanitize_name.go b/crawler/sanitize_name.go
deleted file mode 100644
index 73b2714..0000000
--- a/crawler/sanitize_name.go
+++ /dev/null
@@ -1,13 +0,0 @@
-package main
-
-import (
- "strings"
-)
-
-func sanitize_name(name string) string {
- if strings.Contains(name, "y.o.") {
- name = strings.Replace(name, "y.o.", "Jahre", 1)
- }
-
- return name
-}
diff --git a/crawler/scrape.go b/crawler/scrape.go
index 0595240..31b3618 100644
--- a/crawler/scrape.go
+++ b/crawler/scrape.go
@@ -17,6 +17,8 @@ func (app *App) ScrapeHTML(shops []Shop) {
W = ScrapeShop(shop)
+ W = sanitize_offer(W)
+
err = app.save_offer(W)
if err != nil {
log.Fatal(err)
diff --git a/crawler/shop_bottleworld.go b/crawler/shop_bottleworld.go
index 3a3c631..b6af7e0 100644
--- a/crawler/shop_bottleworld.go
+++ b/crawler/shop_bottleworld.go
@@ -39,13 +39,13 @@ func ScrapeBottleWord(shop Shop) []Angebot {
e.ForEach(".price-box", func(i int, e *colly.HTMLElement) {
e.ForEach(".old-price", func(i int, e *colly.HTMLElement) {
- W.Original_price, err = sanitize_price(e.ChildText(".price"))
+ W.Original_price, err = convert_price(e.ChildText(".price"))
if err != nil {
log.Fatal(err)
}
})
e.ForEach(".special-price", func(i int, e *colly.HTMLElement) {
- W.Discounted_price, err = sanitize_price(e.ChildText(".price"))
+ W.Discounted_price, err = convert_price(e.ChildText(".price"))
if err != nil {
log.Fatal(err)
}
diff --git a/crawler/shop_mcwhisky.go b/crawler/shop_mcwhisky.go
index e45e740..e0c1ab8 100644
--- a/crawler/shop_mcwhisky.go
+++ b/crawler/shop_mcwhisky.go
@@ -19,7 +19,7 @@ func ScrapeMCWhisky(shop Shop) []Angebot {
c.OnHTML("li.item", func(e *colly.HTMLElement) {
W := Angebot{}
- whisky_name := sanitize_name(e.ChildAttr("a", "title"))
+ whisky_name := e.ChildAttr("a", "title")
whisky_url := e.ChildAttr("a", "href")
W.Name = whisky_name
W.Url = whisky_url
@@ -28,13 +28,13 @@ func ScrapeMCWhisky(shop Shop) []Angebot {
e.ForEach(".price-box", func(i int, e *colly.HTMLElement) {
e.ForEach(".old-price", func(i int, e *colly.HTMLElement) {
- W.Original_price, err = sanitize_price(e.ChildText(".price"))
+ W.Original_price, err = convert_price(e.ChildText(".price"))
if err != nil {
log.Fatal(err)
}
})
e.ForEach(".special-price", func(i int, e *colly.HTMLElement) {
- W.Discounted_price, err = sanitize_price(e.ChildText(".price"))
+ W.Discounted_price, err = convert_price(e.ChildText(".price"))
if err != nil {
log.Fatal(err)
}
diff --git a/crawler/shop_rumundco.go b/crawler/shop_rumundco.go
index ae349f3..d62250e 100644
--- a/crawler/shop_rumundco.go
+++ b/crawler/shop_rumundco.go
@@ -39,11 +39,11 @@ func ScrapeRumundCo(shop Shop) []Angebot {
if "" == regular_price {
return
}
- W.Original_price, err = sanitize_price(regular_price)
+ W.Original_price, err = convert_price(regular_price)
if err != nil {
log.Fatal(err)
}
- W.Discounted_price, err = sanitize_price(e.ChildText(".price-value"))
+ W.Discounted_price, err = convert_price(e.ChildText(".price-value"))
if err != nil {
log.Fatal(err)
}
diff --git a/crawler/shop_whic.go b/crawler/shop_whic.go
index 896b1fb..e489161 100644
--- a/crawler/shop_whic.go
+++ b/crawler/shop_whic.go
@@ -28,13 +28,13 @@ func ScrapeWhic(shop Shop) []Angebot {
e.ForEach(".price-box", func(i int, e *colly.HTMLElement) {
e.ForEach(".old-price", func(i int, e *colly.HTMLElement) {
- W.Original_price, err = sanitize_price(e.ChildText(".price"))
+ W.Original_price, err = convert_price(e.ChildText(".price"))
if err != nil {
log.Fatal(err)
}
})
e.ForEach(".special-price", func(i int, e *colly.HTMLElement) {
- W.Discounted_price, err = sanitize_price(e.ChildText(".price"))
+ W.Discounted_price, err = convert_price(e.ChildText(".price"))
if err != nil {
log.Fatal(err)
}
diff --git a/crawler/shop_whiskyde.go b/crawler/shop_whiskyde.go
index 657bfe0..d90e0cc 100644
--- a/crawler/shop_whiskyde.go
+++ b/crawler/shop_whiskyde.go
@@ -26,13 +26,13 @@ func ScrapeWhiskyde(shop Shop) []Angebot {
var err error
e.ForEach(".article-price-original", func(i int, e *colly.HTMLElement) {
- W.Original_price, err = sanitize_price(e.ChildText("del"))
+ W.Original_price, err = convert_price(e.ChildText("del"))
if err != nil {
log.Fatal(err)
}
})
e.ForEach(".article-price", func(i int, e *colly.HTMLElement) {
- W.Discounted_price, err = sanitize_price(e.ChildText(".article-price-default"))
+ W.Discounted_price, err = convert_price(e.ChildText(".article-price-default"))
if err != nil {
log.Fatal(err)
}
diff --git a/crawler/shop_whiskysitenl.go b/crawler/shop_whiskysitenl.go
index c8b35a2..656cf18 100644
--- a/crawler/shop_whiskysitenl.go
+++ b/crawler/shop_whiskysitenl.go
@@ -34,12 +34,12 @@ func ScrapeWhiskysitenl(shop Shop) []Angebot {
}
discounted_price := r.FindString(strings.Trim(strings.TrimPrefix(price_discount_noisy, regular_price), ""))
- W.Original_price, err = sanitize_price(regular_price)
+ W.Original_price, err = convert_price(regular_price)
if err != nil {
//log.Println(W.Name, err)
return
}
- W.Discounted_price, err = sanitize_price(discounted_price)
+ W.Discounted_price, err = convert_price(discounted_price)
if err != nil {
//log.Println(W.Name, err)
return
diff --git a/crawler/shop_whiskyworld.go b/crawler/shop_whiskyworld.go
index e07c42f..36b144e 100644
--- a/crawler/shop_whiskyworld.go
+++ b/crawler/shop_whiskyworld.go
@@ -32,13 +32,13 @@ func ScrapeWhiskyworld(shop Shop) []Angebot {
var err error
- W.Original_price, err = sanitize_price(regular_price)
+ W.Original_price, err = convert_price(regular_price)
if err != nil {
log.Fatal(err)
return
}
- W.Discounted_price, err = sanitize_price(e.ChildText(".uvp"))
+ W.Discounted_price, err = convert_price(e.ChildText(".uvp"))
if err != nil {
log.Fatal(err)
return
diff --git a/crawler/shop_whiskyzone.go b/crawler/shop_whiskyzone.go
index a9e73d0..3303b5e 100644
--- a/crawler/shop_whiskyzone.go
+++ b/crawler/shop_whiskyzone.go
@@ -29,12 +29,12 @@ func ScrapeWhiskyzone(shop Shop) []Angebot {
if err != nil {
log.Fatal(err)
}
- W.Discounted_price, err = sanitize_price(r.FindString(price_discount_noisy))
+ W.Discounted_price, err = convert_price(r.FindString(price_discount_noisy))
if err != nil {
log.Fatal(err)
return
}
- W.Original_price, err = sanitize_price(r.FindString(price_regular_noisy))
+ W.Original_price, err = convert_price(r.FindString(price_regular_noisy))
if err != nil {
log.Fatal(err)
return