diff options
Diffstat (limited to 'crawler/shop_whiskyzone.go')
| -rw-r--r-- | crawler/shop_whiskyzone.go | 90 |
1 files changed, 75 insertions, 15 deletions
diff --git a/crawler/shop_whiskyzone.go b/crawler/shop_whiskyzone.go index 3303b5e..5809b7e 100644 --- a/crawler/shop_whiskyzone.go +++ b/crawler/shop_whiskyzone.go @@ -2,13 +2,15 @@ package main import ( "log" - "regexp" + "strings" "github.com/gocolly/colly" ) func ScrapeWhiskyzone(shop Shop) []Angebot { + Shop_url := "https://www.whiskyzone.de/widgets/emotion/index/emotionId/248/controllerName/listing" + Whiskys := []Angebot{} c := colly.NewCollector( @@ -18,39 +20,97 @@ func ScrapeWhiskyzone(shop Shop) []Angebot { c.OnHTML(".product--info", func(e *colly.HTMLElement) { + if e.Request.URL.String() != Shop_url { + return + } + W := Angebot{} W.Name = e.ChildAttr("a", "title") W.Url = e.ChildAttr("a", "href") - price_discount_noisy := e.ChildText(".price--default") - price_regular_noisy := e.ChildText(".price--discount") - r, err := regexp.Compile("[0-9]+(,[0-9]{1,2})") + e.ForEach(".image--media", func(i int, e *colly.HTMLElement) { + W.Image_url = e.ChildAttr("img", "src") + }) + + W.Shop = shop.Id + W.Spirit_type = "Whisky" + + e.Request.Visit(W.Url) + + var err error + W.Discounted_price, err = convert_price(e.Request.Ctx.Get("discounted_price")) if err != nil { + log.Println("Discounted_price failed") log.Fatal(err) } - W.Discounted_price, err = convert_price(r.FindString(price_discount_noisy)) + + W.Original_price, err = convert_price(e.Request.Ctx.Get("original_price")) if err != nil { + log.Println("Original_price failed") log.Fatal(err) - return } - W.Original_price, err = convert_price(r.FindString(price_regular_noisy)) - if err != nil { - log.Fatal(err) + + W.Volume = get_volume(e) + W.Abv = get_abv(e) + + base_price := e.Request.Ctx.Get("base_price") + if base_price == "same_as_discounted_price" { + W.Base_price = W.Discounted_price + } else { + W.Base_price = get_base_price(e) + } + + W.Website = e.Request.Ctx.Get("website") + Whiskys = append(Whiskys, W) + }) + + c.OnHTML(".product--buybox", func(e *colly.HTMLElement) { + if e.Request.URL.String() == Shop_url { return } - e.ForEach(".image--media", func(i int, e *colly.HTMLElement) { - W.Image_url = e.ChildAttr("img", "src") + // Original & Discounted Price + e.ForEach(".product--price.price--default.price--discount", func(i int, e *colly.HTMLElement) { + e.Request.Ctx.Put("discounted_price", e.ChildText(".price--content.content--default")) + e.Request.Ctx.Put("original_price", e.ChildText(".price--line-through")) }) - W.Shop = shop.Id - W.Spirit_type = "Whisky" + // Volume & Base Price + e.ForEach(".product--price.price--unit", func(i int, e *colly.HTMLElement) { + text_noisy_t := e.Text + text_noisy_t = strings.Replace(text_noisy_t, "Inhalt", "", 1) + text_noisy_t = strings.Replace(text_noisy_t, ":", "", 1) - Whiskys = append(Whiskys, W) + // Containts the base price in "(" if it's not "1 Liter" + if strings.Contains(text_noisy_t, "(") { + text_noisy := strings.Split(text_noisy_t, "(") + volume_noisy := strings.Replace(text_noisy[0], "(", "", 1) + e.Request.Ctx.Put("volume", volume_noisy) + + base_price_noisy := strings.Replace(text_noisy[1], ")", "", 1) + e.Request.Ctx.Put("base_price", base_price_noisy) + } else { + e.Request.Ctx.Put("volume", text_noisy_t) + e.Request.Ctx.Put("base_price", "same_as_discounted_price") + } + }) + + // ABV + e.ForEach(".base-info--entry.entry-attribute", func(i int, e *colly.HTMLElement) { + text_noisy := e.ChildText(".entry--content") + + if strings.Contains(text_noisy, "Alkoholgehalt") && strings.Contains(text_noisy, "%") { + abv_noisy := strings.Replace(text_noisy, "Alkoholgehalt:", "", 1) + e.Request.Ctx.Put("abv", abv_noisy) + + } + }) + + e.Request.Ctx.Put("website", string(e.Response.Body)) }) - c.Visit("https://www.whiskyzone.de/widgets/emotion/index/emotionId/248/controllerName/listing") + c.Visit(Shop_url) return Whiskys } |
