summaryrefslogtreecommitdiff
path: root/crawler/shop_whiskyworld.go
diff options
context:
space:
mode:
Diffstat (limited to 'crawler/shop_whiskyworld.go')
-rw-r--r--crawler/shop_whiskyworld.go31
1 files changed, 27 insertions, 4 deletions
diff --git a/crawler/shop_whiskyworld.go b/crawler/shop_whiskyworld.go
index 8e4b984..c0fb7b6 100644
--- a/crawler/shop_whiskyworld.go
+++ b/crawler/shop_whiskyworld.go
@@ -9,14 +9,23 @@ import (
func ScrapeWhiskyworld(shop Shop) []Angebot {
+ Shop_urls := []string{"https://www.whiskyworld.de/themen/sonderangebote?ft=%257B%2522produkt_kategorie%2522:%2522Blended%2BMalt%2522%257D",
+ "https://www.whiskyworld.de/themen/sonderangebote?ft=%257B%2522produkt_kategorie%2522:%2522Blended%2BWhiskies%2522%257D",
+ "https://www.whiskyworld.de/themen/sonderangebote?ft=%257B%2522produkt_kategorie%2522:%2522Single%2BMalt%2522%257D",
+ }
+
Whiskys := []Angebot{}
c := colly.NewCollector(
+ colly.UserAgent("friendly"),
colly.AllowedDomains("whiskyworld.de"),
colly.AllowedDomains("www.whiskyworld.de"),
)
c.OnHTML(".product-item", func(e *colly.HTMLElement) {
+ if !stringInSlice(e.Request.URL.String(), Shop_urls) {
+ return
+ }
W := Angebot{}
@@ -25,7 +34,7 @@ func ScrapeWhiskyworld(shop Shop) []Angebot {
W.Name = whisky_name_part1 + " " + whisky_name_part2
- W.Url = "https://www.whiskyworld.de/" + strings.TrimPrefix(e.ChildAttr("a", "href"), "../")
+ W.Url = "https://www.whiskyworld.de/" + e.ChildAttr("a", "href")
regular_price_noisy := e.ChildText(".offer")
regular_price := strings.TrimSuffix(strings.TrimPrefix(regular_price_noisy, "statt "), " €*")
@@ -34,12 +43,14 @@ func ScrapeWhiskyworld(shop Shop) []Angebot {
W.Original_price, err = convert_price(regular_price)
if err != nil {
+ log.Println("Whisky World: Original_price failed: " + regular_price + " // " + W.Name + " // " + W.Url + " // " + e.Request.URL.String())
log.Fatal(err)
return
}
W.Discounted_price, err = convert_price(e.ChildText(".uvp"))
if err != nil {
+ log.Println("Whisky World: Discounted_price failed")
log.Fatal(err)
return
}
@@ -64,6 +75,7 @@ func ScrapeWhiskyworld(shop Shop) []Angebot {
base_price_noisy = strings.TrimSpace(strings.SplitAfter(base_price_noisy, "Liter")[0])
W.Base_price, err = sanitize_base_price(base_price_noisy)
if err != nil {
+ log.Println("Whisky World: Base_price failed")
log.Fatal(err)
}
}
@@ -75,12 +87,23 @@ func ScrapeWhiskyworld(shop Shop) []Angebot {
W.Shop = shop.Id
W.Spirit_type = "Whisky"
+ e.Request.Visit(W.Url)
+ W.Website = e.Request.Ctx.Get("website")
+
Whiskys = append(Whiskys, W)
})
- c.Visit("https://www.whiskyworld.de/themen/sonderangebote?ft=%257B%2522produkt_kategorie%2522:%2522Blended%2BMalt%2522%257D")
- c.Visit("https://www.whiskyworld.de/themen/sonderangebote?ft=%257B%2522produkt_kategorie%2522:%2522Blended%2BWhiskies%2522%257D")
- c.Visit("https://www.whiskyworld.de/themen/sonderangebote?ft=%257B%2522produkt_kategorie%2522:%2522Single%2BMalt%2522%257D")
+ c.OnHTML("body", func(e *colly.HTMLElement) {
+ if stringInSlice(e.Request.URL.String(), Shop_urls) {
+ return
+ }
+
+ e.Request.Ctx.Put("website", string(e.Response.Body))
+ })
+
+ for _, url := range Shop_urls {
+ c.Visit(url)
+ }
return Whiskys
}