summaryrefslogtreecommitdiff
path: root/crawler/shop_whiskyzone.go
diff options
context:
space:
mode:
authorhorus2018-02-12 22:55:47 +0100
committerhorus2018-02-12 22:55:47 +0100
commitb6b5993e2c1215c90342398a21e6503a8c03950d (patch)
tree60d62bb386875aa14fa4bb56625f5daeb0a64920 /crawler/shop_whiskyzone.go
parentca5ac0bcb9206e81faab60cc8a8d6da697bdfdbe (diff)
downloadalkobote-b6b5993e2c1215c90342398a21e6503a8c03950d.tar.gz
Crawls now whiskworld.de and whiskyzone.de. (crawler)
Diffstat (limited to 'crawler/shop_whiskyzone.go')
-rw-r--r--crawler/shop_whiskyzone.go90
1 files changed, 75 insertions, 15 deletions
diff --git a/crawler/shop_whiskyzone.go b/crawler/shop_whiskyzone.go
index 3303b5e..5809b7e 100644
--- a/crawler/shop_whiskyzone.go
+++ b/crawler/shop_whiskyzone.go
@@ -2,13 +2,15 @@ package main
import (
"log"
- "regexp"
+ "strings"
"github.com/gocolly/colly"
)
func ScrapeWhiskyzone(shop Shop) []Angebot {
+ Shop_url := "https://www.whiskyzone.de/widgets/emotion/index/emotionId/248/controllerName/listing"
+
Whiskys := []Angebot{}
c := colly.NewCollector(
@@ -18,39 +20,97 @@ func ScrapeWhiskyzone(shop Shop) []Angebot {
c.OnHTML(".product--info", func(e *colly.HTMLElement) {
+ if e.Request.URL.String() != Shop_url {
+ return
+ }
+
W := Angebot{}
W.Name = e.ChildAttr("a", "title")
W.Url = e.ChildAttr("a", "href")
- price_discount_noisy := e.ChildText(".price--default")
- price_regular_noisy := e.ChildText(".price--discount")
- r, err := regexp.Compile("[0-9]+(,[0-9]{1,2})")
+ e.ForEach(".image--media", func(i int, e *colly.HTMLElement) {
+ W.Image_url = e.ChildAttr("img", "src")
+ })
+
+ W.Shop = shop.Id
+ W.Spirit_type = "Whisky"
+
+ e.Request.Visit(W.Url)
+
+ var err error
+ W.Discounted_price, err = convert_price(e.Request.Ctx.Get("discounted_price"))
if err != nil {
+ log.Println("Discounted_price failed")
log.Fatal(err)
}
- W.Discounted_price, err = convert_price(r.FindString(price_discount_noisy))
+
+ W.Original_price, err = convert_price(e.Request.Ctx.Get("original_price"))
if err != nil {
+ log.Println("Original_price failed")
log.Fatal(err)
- return
}
- W.Original_price, err = convert_price(r.FindString(price_regular_noisy))
- if err != nil {
- log.Fatal(err)
+
+ W.Volume = get_volume(e)
+ W.Abv = get_abv(e)
+
+ base_price := e.Request.Ctx.Get("base_price")
+ if base_price == "same_as_discounted_price" {
+ W.Base_price = W.Discounted_price
+ } else {
+ W.Base_price = get_base_price(e)
+ }
+
+ W.Website = e.Request.Ctx.Get("website")
+ Whiskys = append(Whiskys, W)
+ })
+
+ c.OnHTML(".product--buybox", func(e *colly.HTMLElement) {
+ if e.Request.URL.String() == Shop_url {
return
}
- e.ForEach(".image--media", func(i int, e *colly.HTMLElement) {
- W.Image_url = e.ChildAttr("img", "src")
+ // Original & Discounted Price
+ e.ForEach(".product--price.price--default.price--discount", func(i int, e *colly.HTMLElement) {
+ e.Request.Ctx.Put("discounted_price", e.ChildText(".price--content.content--default"))
+ e.Request.Ctx.Put("original_price", e.ChildText(".price--line-through"))
})
- W.Shop = shop.Id
- W.Spirit_type = "Whisky"
+ // Volume & Base Price
+ e.ForEach(".product--price.price--unit", func(i int, e *colly.HTMLElement) {
+ text_noisy_t := e.Text
+ text_noisy_t = strings.Replace(text_noisy_t, "Inhalt", "", 1)
+ text_noisy_t = strings.Replace(text_noisy_t, ":", "", 1)
- Whiskys = append(Whiskys, W)
+ // Containts the base price in "(" if it's not "1 Liter"
+ if strings.Contains(text_noisy_t, "(") {
+ text_noisy := strings.Split(text_noisy_t, "(")
+ volume_noisy := strings.Replace(text_noisy[0], "(", "", 1)
+ e.Request.Ctx.Put("volume", volume_noisy)
+
+ base_price_noisy := strings.Replace(text_noisy[1], ")", "", 1)
+ e.Request.Ctx.Put("base_price", base_price_noisy)
+ } else {
+ e.Request.Ctx.Put("volume", text_noisy_t)
+ e.Request.Ctx.Put("base_price", "same_as_discounted_price")
+ }
+ })
+
+ // ABV
+ e.ForEach(".base-info--entry.entry-attribute", func(i int, e *colly.HTMLElement) {
+ text_noisy := e.ChildText(".entry--content")
+
+ if strings.Contains(text_noisy, "Alkoholgehalt") && strings.Contains(text_noisy, "%") {
+ abv_noisy := strings.Replace(text_noisy, "Alkoholgehalt:", "", 1)
+ e.Request.Ctx.Put("abv", abv_noisy)
+
+ }
+ })
+
+ e.Request.Ctx.Put("website", string(e.Response.Body))
})
- c.Visit("https://www.whiskyzone.de/widgets/emotion/index/emotionId/248/controllerName/listing")
+ c.Visit(Shop_url)
return Whiskys
}