package main import ( "errors" "strings" "github.com/gocolly/colly" ) func (app *App) ScrapeWhiskyzone(shop Shop) []Angebot { Shop_url := "https://www.whiskyzone.de/widgets/emotion/index/emotionId/248/controllerName/listing" Whiskys := []Angebot{} c := app.customCollector([]string{"whiskyzone.de", "www.whiskyzone.de"}) c.OnHTML(".product--info", func(e *colly.HTMLElement) { if e.Request.URL.String() != Shop_url { return } W := Angebot{} W.Name = e.ChildAttr("a", "title") W.Url = e.ChildAttr("a", "href") e.ForEach(".image--media", func(i int, e *colly.HTMLElement) { W.Image_url = e.ChildAttr("img", "src") }) W.Shop = shop.Id W.Spirit_type = "Whisky" e.Request.Ctx.Put("offer_url", W.Url) e.Request.Visit(W.Url) if "sold_out" == e.Request.Ctx.Get("sold_out") { W.Println("Whiskyzone: Sold out") return } var err error W.Discounted_price, err = convert_price(e.Request.Ctx.Get("discounted_price")) if err != nil { W.error_msg = err.Error() W.error_ctx = e.Request.Ctx.Get("discounted_price") W.Println("Whiskyzone: Convert discounted price failed") return } W.Original_price, err = convert_price(e.Request.Ctx.Get("original_price")) if err != nil { W.error_msg = err.Error() W.error_ctx = e.Request.Ctx.Get("original_price") W.Println("Whiskyzone: Convert original price failed") return } var ctx string W.Volume, ctx = get_volume(e) if W.Volume == 0 { W.error_msg = "Whiskyzone: Volume is zero" W.error_ctx = ctx W.Println("Whiskyzone: Volume is zero") return } W.Abv, ctx = get_abv(e) if W.Abv == 0 { W.error_msg = "Whiskyzone: Abv is zero" W.error_ctx = ctx W.Println("Whiskyzone: Abv is zero") return } base_price := e.Request.Ctx.Get("base_price") if base_price == "same_as_discounted_price" { W.Base_price = W.Discounted_price } else { W.Base_price, err = get_base_price(e) if err != nil { W.error_msg = err.Error() W.error_ctx = base_price W.Println("Whiskyzone: Extracting base price failed") return } } W.Website = e.Request.Ctx.Get("website") Whiskys = append(Whiskys, W) }) c.OnHTML(".product--buybox", func(e *colly.HTMLElement) { offer_url := e.Request.Ctx.Get("offer_url") Debug(nil, "Visiting: "+e.Request.URL.String()+" with offer_url: "+offer_url) if e.Request.URL.String() != offer_url { return } if e.Request.URL.String() == Shop_url { return } // Original & Discounted Price e.ForEach(".product--price.price--default.price--discount", func(i int, e *colly.HTMLElement) { Debug(errors.New("Discount: "+e.ChildText(".price--content.content--default")), "Whiskyzone: Original:"+e.ChildText(".price--line-through")) e.Request.Ctx.Put("discounted_price", e.ChildText(".price--content.content--default")) e.Request.Ctx.Put("original_price", e.ChildText(".price--line-through")) }) // Volume & Base Price e.ForEach(".product--price.price--unit", func(i int, e *colly.HTMLElement) { text_noisy_t := e.Text text_noisy_t = strings.Replace(text_noisy_t, "Inhalt", "", 1) text_noisy_t = strings.Replace(text_noisy_t, ":", "", 1) // Containts the base price in "(" if it's not "1 Liter" if strings.Contains(text_noisy_t, "(") { text_noisy := strings.Split(text_noisy_t, "(") volume_noisy := strings.Replace(text_noisy[0], "(", "", 1) e.Request.Ctx.Put("volume", volume_noisy) base_price_noisy := strings.Replace(text_noisy[1], ")", "", 1) e.Request.Ctx.Put("base_price", base_price_noisy) } else { e.Request.Ctx.Put("volume", text_noisy_t) e.Request.Ctx.Put("base_price", "same_as_discounted_price") } }) // ABV e.ForEach(".base-info--entry.entry-attribute", func(i int, e *colly.HTMLElement) { text_noisy := e.ChildText(".entry--content") if strings.Contains(text_noisy, "Alkoholgehalt") && strings.Contains(text_noisy, "%") { abv_noisy := strings.Replace(text_noisy, "Alkoholgehalt:", "", 1) e.Request.Ctx.Put("abv", abv_noisy) } }) e.Request.Ctx.Put("website", string(e.Response.Body)) }) // Product not found c.OnHTML(".detail-error--headline", func(e *colly.HTMLElement) { if e.Request.URL.String() == Shop_url { return } e.Request.Ctx.Put("sold_out", "sold_out") }) err := c.Visit(Shop_url) if err != nil { shop.error_msg = err.Error() shop.error_ctx = Shop_url shop.Warn("Crawling failed") } return Whiskys }