summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorhorus_arch2018-02-04 20:54:58 +0100
committerhorus_arch2018-02-04 20:54:58 +0100
commit4f7a9316da8e19fa9466ee377148c7d07bf39fd9 (patch)
tree7c4fb5b35ea107d7379b4c7a601aa9a7d9d184d5
parentfe12a1d41d74ac55dc3c1b27821375541ee5f2b2 (diff)
downloadalkobote-4f7a9316da8e19fa9466ee377148c7d07bf39fd9.tar.gz
Data from all shops are now saved in a data structure.
-rw-r--r--main.go28
-rw-r--r--rumundco.go30
-rw-r--r--sanitize_price.go10
-rw-r--r--whic.go33
-rw-r--r--whiskysitenl.go35
-rw-r--r--whiskyworld.go39
-rw-r--r--whiskyzone.go35
7 files changed, 149 insertions, 61 deletions
diff --git a/main.go b/main.go
index f78a777..ce63d37 100644
--- a/main.go
+++ b/main.go
@@ -25,27 +25,23 @@ func main() {
W = ScrapeMCWhisky()
printName(W, "MC Whisky")
- /*
- printName("Whic")
- ScrapeWhic()
+ W = ScrapeRumundCo()
+ printName(W, "Rum und Co")
- W = ScrapeWhiskyde()
- printName(W, "Whisky.de")
- */
+ W = ScrapeWhic()
+ printName(W, "Whic")
- /*
- printName("Whiskysite.nl")
- ScrapeWhiskysitenl()
+ W = ScrapeWhiskyde()
+ printName(W, "Whisky.de")
- printName("Whiskyworld")
- ScrapeWhiskyworld()
+ W = ScrapeWhiskysitenl()
+ printName(W, "Whiskysite.nl")
- printName("Whiskyzone")
- ScrapeWhiskyzone()
+ W = ScrapeWhiskyworld()
+ printName(W, "Whiskyworld")
- printName("Rum und Co")
- ScrapeRumundCo()
- */
+ W = ScrapeWhiskyzone()
+ printName(W, "Whiskyzone")
}
func printName(W []Angebot, name string) {
diff --git a/rumundco.go b/rumundco.go
index d941e73..3c1fb4e 100644
--- a/rumundco.go
+++ b/rumundco.go
@@ -1,7 +1,6 @@
package main
import (
- "fmt"
"log"
"regexp"
"strings"
@@ -10,13 +9,17 @@ import (
"github.com/gocolly/colly"
)
-func ScrapeRumundCo() {
+func ScrapeRumundCo() []Angebot {
+ Whiskys := []Angebot{}
+
c := colly.NewCollector(
colly.AllowedDomains("rumundco.de"),
colly.AllowedDomains("www.rumundco.de"),
)
c.OnHTML(".product-teaser", func(e *colly.HTMLElement) {
+ W := Angebot{}
+
whisky_name := strings.TrimPrefix(e.ChildAttr("img", "alt"), "Restposten: ")
whisky_url := "https://www.rumundco.de/" + e.ChildAttr("a", "href")
@@ -28,21 +31,32 @@ func ScrapeRumundCo() {
return
}
- log.Println(whisky_name)
- log.Println(whisky_url)
+ W.Name = whisky_name
+ W.Url = whisky_url
e.ForEach(".price_wrapper", func(i int, e *colly.HTMLElement) {
regular_price := e.ChildText("del.value")
if "" == regular_price {
return
}
- log.Println(regular_price)
- log.Println(e.ChildText(".price-value"))
+ W.Original_price, err = sanitize_price(regular_price)
+ if err != nil {
+ log.Fatal(err)
+ }
+ W.Discounted_price, err = sanitize_price(e.ChildText(".price-value"))
+ if err != nil {
+ log.Fatal(err)
+ }
})
- log.Println("https://www.rumundco.de/" + e.ChildAttr("img", "src"))
+ W.Image_url = "https://www.rumundco.de/" + e.ChildAttr("img", "src")
+
+ W.Shop = "Rum & Co"
+ W.Spirit_type = "Whisky"
- fmt.Println("")
+ Whiskys = append(Whiskys, W)
})
c.Visit("https://www.rumundco.de/navi.php?q=4&kf=29&kk-suesse-von=0&kk-suesse-bis=100&kk-milde-von=0&kk-milde-bis=100&kk-wuerze-von=0&kk-wuerze-bis=100&kk-frucht-von=0&kk-frucht-bis=100&kk-torf-von=0&kk-torf-bis=100&hf=0&af=90&Sortierung=11&a=350")
+
+ return Whiskys
}
diff --git a/sanitize_price.go b/sanitize_price.go
index 8b9443f..2052842 100644
--- a/sanitize_price.go
+++ b/sanitize_price.go
@@ -7,6 +7,10 @@ import (
)
func sanitize_price(price string) (int, error) {
+ if "" == price {
+ return 0, errors.New("Empty string")
+ }
+
multiply_by_10 := false
multiply_by_100 := true
@@ -24,6 +28,12 @@ func sanitize_price(price string) (int, error) {
price = strings.TrimSuffix(strings.ToLower(price), "euro")
price = strings.TrimSpace(price)
+ if len(price) < 2 {
+ price = "0" + price
+ } else if len(price) < 3 {
+ price = "00" + price
+ }
+
c := string(price[len(price)-2:])
c = string(c[0:1])
diff --git a/whic.go b/whic.go
index ecbd109..fced386 100644
--- a/whic.go
+++ b/whic.go
@@ -1,7 +1,6 @@
package main
import (
- "fmt"
"log"
"strings"
@@ -9,23 +8,36 @@ import (
"github.com/gocolly/colly"
)
-func ScrapeWhic() {
+func ScrapeWhic() []Angebot {
+ Whiskys := []Angebot{}
+
c := colly.NewCollector(
colly.AllowedDomains("whic.de"),
)
c.OnHTML("li.item", func(e *colly.HTMLElement) {
+ W := Angebot{}
+
whisky_name := e.ChildAttr("a", "title")
whisky_url := e.ChildAttr("a", "href")
- log.Println(whisky_name)
- log.Println(whisky_url)
+
+ W.Name = whisky_name
+ W.Url = whisky_url
+
+ var err error
e.ForEach(".price-box", func(i int, e *colly.HTMLElement) {
e.ForEach(".old-price", func(i int, e *colly.HTMLElement) {
- log.Println(e.ChildText(".price"))
+ W.Original_price, err = sanitize_price(e.ChildText(".price"))
+ if err != nil {
+ log.Fatal(err)
+ }
})
e.ForEach(".special-price", func(i int, e *colly.HTMLElement) {
- log.Println(e.ChildText(".price"))
+ W.Discounted_price, err = sanitize_price(e.ChildText(".price"))
+ if err != nil {
+ log.Fatal(err)
+ }
})
})
@@ -38,10 +50,15 @@ func ScrapeWhic() {
if err != nil {
log.Fatal(err)
}
- log.Println(doc.Find("img").Attr("src"))
+ W.Image_url, _ = doc.Find("img").Attr("src")
+
+ W.Shop = "Whic"
+ W.Spirit_type = "Whisky"
- fmt.Println("")
+ Whiskys = append(Whiskys, W)
})
c.Visit("https://whic.de/angebote")
+
+ return Whiskys
}
diff --git a/whiskysitenl.go b/whiskysitenl.go
index dcca681..3eefc06 100644
--- a/whiskysitenl.go
+++ b/whiskysitenl.go
@@ -1,7 +1,6 @@
package main
import (
- "fmt"
"log"
"regexp"
"strings"
@@ -9,7 +8,9 @@ import (
"github.com/gocolly/colly"
)
-func ScrapeWhiskysitenl() {
+func ScrapeWhiskysitenl() []Angebot {
+ Whiskys := []Angebot{}
+
c := colly.NewCollector(
colly.AllowedDomains("whiskysite.nl"),
colly.AllowedDomains("www.whiskysite.nl"),
@@ -17,10 +18,14 @@ func ScrapeWhiskysitenl() {
c.OnHTML(".product-block", func(e *colly.HTMLElement) {
+ W := Angebot{}
+
whisky_name := e.ChildAttr("img", "alt")
whisky_url := e.ChildAttr("a", "href")
- log.Println(whisky_name)
- log.Println(whisky_url)
+
+ W.Name = whisky_name
+ W.Url = whisky_url
+
regular_price := e.ChildText(".price-old")
price_discount_noisy := e.ChildText(".product-block-price")
r, err := regexp.Compile("[0-9]+(,[0-9]{1,2})")
@@ -28,13 +33,27 @@ func ScrapeWhiskysitenl() {
log.Fatal(err)
}
discounted_price := r.FindString(strings.Trim(strings.TrimPrefix(price_discount_noisy, regular_price), ""))
- log.Println(discounted_price + "€")
- log.Println(strings.TrimPrefix(regular_price, "€") + "€")
- log.Println(e.ChildAttr("img", "src"))
+ W.Original_price, err = sanitize_price(regular_price)
+ if err != nil {
+ //log.Println(W.Name, err)
+ return
+ }
+ W.Discounted_price, err = sanitize_price(discounted_price)
+ if err != nil {
+ //log.Println(W.Name, err)
+ return
+ }
+
+ W.Image_url = e.ChildAttr("img", "src")
- fmt.Println("")
+ W.Shop = "Whiskysite.nl"
+ W.Spirit_type = "Whisky"
+
+ Whiskys = append(Whiskys, W)
})
c.Visit("https://www.whiskysite.nl/en/specials/?limit=100")
+
+ return Whiskys
}
diff --git a/whiskyworld.go b/whiskyworld.go
index 734c9f2..65bbacd 100644
--- a/whiskyworld.go
+++ b/whiskyworld.go
@@ -1,14 +1,16 @@
package main
import (
- "fmt"
"log"
"strings"
"github.com/gocolly/colly"
)
-func ScrapeWhiskyworld() {
+func ScrapeWhiskyworld() []Angebot {
+
+ Whiskys := []Angebot{}
+
c := colly.NewCollector(
colly.AllowedDomains("whiskyworld.de"),
colly.AllowedDomains("www.whiskyworld.de"),
@@ -16,28 +18,43 @@ func ScrapeWhiskyworld() {
c.OnHTML(".product-item", func(e *colly.HTMLElement) {
+ W := Angebot{}
+
whisky_name_part1 := e.ChildText("h3")
whisky_name_part2 := e.ChildText(".item-description")
- whisky_name := whisky_name_part1 + " " + whisky_name_part2
+ W.Name = whisky_name_part1 + " " + whisky_name_part2
- whisky_url := "https://www.whiskyworld.de/" + strings.TrimPrefix(e.ChildAttr("a", "href"), "../")
- log.Println(whisky_name)
- log.Println(whisky_url)
+ W.Url = "https://www.whiskyworld.de/" + strings.TrimPrefix(e.ChildAttr("a", "href"), "../")
regular_price_noisy := e.ChildText(".offer")
regular_price := strings.TrimSuffix(strings.TrimPrefix(regular_price_noisy, "statt "), " €*")
- discounted_price := e.ChildText(".uvp")
- log.Println(strings.TrimSuffix(discounted_price, " €") + "€")
- log.Println(regular_price + "€")
+ var err error
+
+ W.Original_price, err = sanitize_price(regular_price)
+ if err != nil {
+ log.Fatal(err)
+ return
+ }
- log.Println("https:" + e.ChildAttr("img", "src"))
+ W.Discounted_price, err = sanitize_price(e.ChildText(".uvp"))
+ if err != nil {
+ log.Fatal(err)
+ return
+ }
- fmt.Println("")
+ W.Image_url = "https:" + e.ChildAttr("img", "src")
+
+ W.Shop = "Whisky World"
+ W.Spirit_type = "Whisky"
+
+ Whiskys = append(Whiskys, W)
})
c.Visit("https://www.whiskyworld.de/themen/sonderangebote?ft=%257B%2522produkt_kategorie%2522:%2522Blended%2BMalt%2522%257D")
c.Visit("https://www.whiskyworld.de/themen/sonderangebote?ft=%257B%2522produkt_kategorie%2522:%2522Blended%2BWhiskies%2522%257D")
c.Visit("https://www.whiskyworld.de/themen/sonderangebote?ft=%257B%2522produkt_kategorie%2522:%2522Single%2BMalt%2522%257D")
+
+ return Whiskys
}
diff --git a/whiskyzone.go b/whiskyzone.go
index 274f3dd..dc4a047 100644
--- a/whiskyzone.go
+++ b/whiskyzone.go
@@ -1,14 +1,16 @@
package main
import (
- "fmt"
"log"
"regexp"
"github.com/gocolly/colly"
)
-func ScrapeWhiskyzone() {
+func ScrapeWhiskyzone() []Angebot {
+
+ Whiskys := []Angebot{}
+
c := colly.NewCollector(
colly.AllowedDomains("whiskyzone.de"),
colly.AllowedDomains("www.whiskyzone.de"),
@@ -16,10 +18,10 @@ func ScrapeWhiskyzone() {
c.OnHTML(".product--info", func(e *colly.HTMLElement) {
- whisky_name := e.ChildAttr("a", "title")
- whisky_url := e.ChildAttr("a", "href")
- log.Println(whisky_name)
- log.Println(whisky_url)
+ W := Angebot{}
+
+ W.Name = e.ChildAttr("a", "title")
+ W.Url = e.ChildAttr("a", "href")
price_discount_noisy := e.ChildText(".price--default")
price_regular_noisy := e.ChildText(".price--discount")
@@ -27,15 +29,28 @@ func ScrapeWhiskyzone() {
if err != nil {
log.Fatal(err)
}
- log.Println(r.FindString(price_discount_noisy) + "€")
- log.Println(r.FindString(price_regular_noisy) + "€")
+ W.Discounted_price, err = sanitize_price(r.FindString(price_discount_noisy))
+ if err != nil {
+ log.Fatal(err)
+ return
+ }
+ W.Original_price, err = sanitize_price(r.FindString(price_regular_noisy))
+ if err != nil {
+ log.Fatal(err)
+ return
+ }
e.ForEach(".image--media", func(i int, e *colly.HTMLElement) {
- log.Println(e.ChildAttr("img", "src"))
+ W.Image_url = e.ChildAttr("img", "src")
})
- fmt.Println("")
+ W.Shop = "Whiskyzone"
+ W.Spirit_type = "Whisky"
+
+ Whiskys = append(Whiskys, W)
})
c.Visit("https://www.whiskyzone.de/widgets/emotion/index/emotionId/248/controllerName/listing")
+
+ return Whiskys
}