package main import ( "regexp" "strings" "github.com/gocolly/colly" ) func (app *App) ScrapeWhiskysitenl(shop Shop) []Angebot { Whiskys := []Angebot{} Shop_url := "https://www.whiskysite.nl/en/specials/?limit=100" c := app.customCollector([]string{"whiskysite.nl", "www.whiskysite.nl"}) c.OnHTML(".product-block", func(e *colly.HTMLElement) { if e.Request.URL.String() != Shop_url { return } W := Angebot{} W.Shop = shop.Id W.Spirit_type = "Whisky" whisky_name := e.ChildAttr("img", "alt") whisky_url := e.ChildAttr("a", "href") W.Name = whisky_name W.Url = whisky_url regular_price := e.ChildText(".price-old") price_discount_noisy := e.ChildText(".product-block-price") r, err := regexp.Compile("[0-9]+(,[0-9]{1,2})") if err != nil { Fatal(err, "Whiskysite.nl: Discounted price regex failed") } discounted_price := r.FindString(strings.Trim(strings.TrimPrefix(price_discount_noisy, regular_price), "")) W.Original_price, err = convert_price(regular_price) if err != nil { W.error_msg = err.Error() W.error_ctx = regular_price W.Println("Whiskysite.nl: Extracting original price failed") return } W.Discounted_price, err = convert_price(discounted_price) if err != nil { W.error_msg = err.Error() W.error_ctx = discounted_price W.Println("Whiskysite.nl: Extracting discounted price failed") return } W.Image_url = e.ChildAttr("img", "src") e.Request.Visit(W.Url) volume_failed := e.Request.Ctx.Get("volume_failed") if volume_failed != "" { W.error_msg = "Whiskysite.nl: Extracting volume via Liter-Regex failed" W.error_ctx = volume_failed W.Println("Whiskysite.nl: Extracting volume via Liter-Regex failed") return } if e.Request.Ctx.Get("abv_failed") != "" { W.error_msg = "Whiskysite.nl: Extracting abv via Abv-Regex failed" W.error_ctx = e.Request.Ctx.Get("volume_failed") W.Println("Whiskysite.nl: Extracting abv via Abv-Regex failed") return } var ctx string W.Volume, ctx = get_volume(e) if W.Volume == 0 { W.error_msg = "Whiskysite.nl: Extracting volume failed" W.error_ctx = ctx W.Println("Whiskysite.nl: Extracting volume failed") return } W.Abv, ctx = get_abv(e) if W.Abv == 0 { W.error_msg = "Whiskysite.nl: Extracting abv failed" W.error_ctx = ctx W.Println("Whiskysite.nl: Extracting abv failed") return } // calculate base price, volume is never zero W.Base_price = int(RoundToEven(float64(W.Discounted_price) / float64(W.Volume))) W.Website = e.Request.Ctx.Get("website") Whiskys = append(Whiskys, W) }) c.OnHTML("#information", func(e *colly.HTMLElement) { if e.Request.URL.String() == Shop_url { return } text_noisy := e.Text // 0.70ltr. 43.00% // 0,70 l 46% // 1,0ltr. 43% r_number, err := regexp.Compile("[0-9]+([.,][0-9]+)?") if err != nil { Fatal(err, "Whiskysite.nl: Number regex failed") } r_liter, err := regexp.Compile("[0-9]+([.,][0-9]+)?( )*(l|ltr)") if err != nil { Fatal(err, "Whiskysite.nl: Volume regex failed") } litre_noisy := r_liter.FindString(text_noisy) if litre_noisy == "" { e.Request.Ctx.Put("volume_failed", text_noisy) return } /* * it's important to add "Liter", because it's required for get_volume() */ e.Request.Ctx.Put("volume", r_number.FindString(litre_noisy)+" Liter") r_abv, err := regexp.Compile("[0-9]+([.,][0-9]+)?( )*%") if err != nil { Fatal(err, "Whiskysite.nl: Abv regex failed") } abv_noisy := r_abv.FindString(text_noisy) if abv_noisy == "" { e.Request.Ctx.Put("abv_failed", text_noisy) return } e.Request.Ctx.Put("abv", abv_noisy) e.Request.Ctx.Put("website", string(e.Response.Body)) }) err := c.Visit(Shop_url) if err != nil { Warn(nil, shop.Name+": "+err.Error()) } return Whiskys }