package main import ( "regexp" "strings" "github.com/gocolly/colly" ) func (app *App) ScrapeWhiskysitenl(shop Shop) []Angebot { Whiskys := []Angebot{} Shop_url := "https://www.whiskysite.nl/en/specials/?limit=100" c := colly.NewCollector( colly.AllowedDomains("whiskysite.nl"), colly.AllowedDomains("www.whiskysite.nl"), ) c.OnHTML(".product-block", func(e *colly.HTMLElement) { W := Angebot{} whisky_name := e.ChildAttr("img", "alt") whisky_url := e.ChildAttr("a", "href") W.Name = whisky_name W.Url = whisky_url regular_price := e.ChildText(".price-old") price_discount_noisy := e.ChildText(".product-block-price") r, err := regexp.Compile("[0-9]+(,[0-9]{1,2})") if err != nil { Fatal(err, "Whiskysite.nl: Discounted price regex failed") } discounted_price := r.FindString(strings.Trim(strings.TrimPrefix(price_discount_noisy, regular_price), "")) W.Original_price, err = convert_price(regular_price) if err != nil { W.error_msg = err.Error() W.error_ctx = regular_price WarnOffer(W, "Whiskysite.nl: Extracting original price failed") return } W.Discounted_price, err = convert_price(discounted_price) if err != nil { W.error_msg = err.Error() W.error_ctx = discounted_price WarnOffer(W, "Whiskysite.nl: Extracting discounted price failed") return } W.Image_url = e.ChildAttr("img", "src") if e.Request.Ctx.Get("volume_failed") != "" { W.error_msg = "Whiskysite.nl: Extracting volume via Liter-Regex failed" W.error_ctx = e.Request.Ctx.Get("volume_failed") WarnOffer(W, "Whiskysite.nl: Extracting volume via Liter-Regex failed") return } if e.Request.Ctx.Get("abv_failed") != "" { W.error_msg = "Whiskysite.nl: Extracting abv via Abv-Regex failed" W.error_ctx = e.Request.Ctx.Get("volume_failed") WarnOffer(W, "Whiskysite.nl: Extracting abv via Abv-Regex failed") return } var ctx string W.Volume, ctx = get_volume(e) if W.Volume == 0 { W.error_msg = "Whiskysite.nl: Extracting volume failed" W.error_ctx = ctx WarnOffer(W, "Whiskysite.nl: Extracting volume failed") return } W.Abv, ctx = get_volume(e) if W.Abv == 0 { W.error_msg = "Whiskysite.nl: Extracting abv failed" W.error_ctx = ctx WarnOffer(W, "Whiskysite.nl: Extracting abv failed") return } // calculate base price, volume is never zero W.Base_price = int(RoundToEven(float64(W.Discounted_price) / float64(W.Volume))) W.Shop = shop.Id W.Spirit_type = "Whisky" Whiskys = append(Whiskys, W) }) c.OnHTML("#information", func(e *colly.HTMLElement) { if e.Request.URL.String() == Shop_url { return } text_noisy := e.Text // 0.70ltr. 43.00% // 0,70 l 46% // 1,0ltr. 43% r_number, err := regexp.Compile("[0-9]+([.,][0-9]+)?") if err != nil { Fatal(err, "Whiskysite.nl: Number regex failed") } r_liter, err := regexp.Compile("[0-9]+([.,][0-9]+)?( )*(l|ltr)") if err != nil { Fatal(err, "Whiskysite.nl: Volume regex failed") } litre_noisy := r_liter.FindString(text_noisy) if litre_noisy == "" { e.Request.Ctx.Put("volume_failed", text_noisy) return } e.Request.Ctx.Put("volume", r_number.FindString(litre_noisy)) r_abv, err := regexp.Compile("[0-9]+([.,][0-9]+)?( )*%") if err != nil { Fatal(err, "Whiskysite.nl: Abv regex failed") } abv_noisy := r_abv.FindString(text_noisy) if abv_noisy == "" { e.Request.Ctx.Put("abv_failed", text_noisy) return } e.Request.Ctx.Put("abv", abv_noisy) }) c.Visit(Shop_url) return Whiskys }