package main import ( "regexp" "strings" // "github.com/PuerkitoBio/goquery" "github.com/gocolly/colly" ) func (app *App) ScrapeBottleWord(shop Shop) []Angebot { Shop_url := "https://www.bottleworld.de/aktuelle-sonderpreise/show/all" Whiskys := []Angebot{} c := app.customCollector([]string{"bottleworld.de", "www.bottleworld.de"}) c.OnHTML("li.item", func(e *colly.HTMLElement) { W := Angebot{} W.Shop = shop.Id whisky_name := e.ChildText("h2 > a") var err error whisky_url := e.ChildAttr("a", "href") W.Name = whisky_name W.Url = whisky_url e.ForEach(".price-box", func(i int, e *colly.HTMLElement) { e.ForEach(".old-price", func(i int, e *colly.HTMLElement) { W.Original_price, err = convert_price(e.ChildText(".price")) if err != nil { W.error_msg = err.Error() W.error_ctx = e.ChildText(".price") W.Println("Bottleworld: Converting original price failed") return } }) e.ForEach(".special-price", func(i int, e *colly.HTMLElement) { W.Discounted_price, err = convert_price(e.ChildText(".price")) if err != nil { W.error_msg = err.Error() W.error_ctx = e.ChildText(".price") W.Println("Bottleworld: Converting discounted price failed") return } }) }) price_per_litre_noisy := e.ChildText(".price-per-liter") price_per_litre, err := sanitize_base_price(price_per_litre_noisy) if err != nil { W.error_msg = err.Error() W.error_ctx = price_per_litre_noisy W.Println("Bottleworld: Sanitizing base price failed") return } W.Base_price = price_per_litre W.Image_url = e.ChildAttr("img", "src") erro := e.Request.Visit(W.Url) if erro != nil { Warn(nil, W.Url+" "+erro.Error()) } var ctx string W.Volume, ctx = get_volume(e) if W.Volume == 0 { W.error_msg = "Bottleworld: Volume is zero" W.error_ctx = ctx W.Println("Bottleworld: Volume is zero") return } W.Abv, ctx = get_abv(e) if W.Abv == 0 { W.error_msg = "Bottleworld: Abv is zero" W.error_ctx = ctx W.Println("Bottleworld: Abv is zero") return } W.Spirit_type = e.Request.Ctx.Get("spirit_type") W.Website = e.Request.Ctx.Get("website") Whiskys = append(Whiskys, W) }) c.OnHTML("#product-attribute-specs-table", func(e *colly.HTMLElement) { e.ForEach("tr", func(i int, e *colly.HTMLElement) { td_str := e.ChildText("td") matched, err := regexp.MatchString("^[0-9]+([,.][0-9]+)? l$", td_str) if err != nil { Fatal(err, "Bottleworld: Volume and ABV Regex failed") } if matched { e.Request.Ctx.Put("volume", td_str) } else if strings.Contains(td_str, "%") { e.Request.Ctx.Put("abv", td_str) } }) e.Request.Ctx.Put("website", string(e.Response.Body)) }) c.OnHTML(".short-description", func(e *colly.HTMLElement) { if e.Request.URL.String() == Shop_url { return } text_noisy := e.ChildText(".std") e.Request.Ctx.Put("spirit_type", detect_spirit_type(text_noisy)) }) err := c.Visit(Shop_url) if err != nil { Warn(nil, shop.Name+": "+err.Error()) } return Whiskys }