package main import ( "regexp" "strings" // "github.com/PuerkitoBio/goquery" "github.com/gocolly/colly" ) func (app *App) ScrapeRumundCo(shop Shop) []Angebot { Shop_url := "https://www.rumundco.de/navi.php?q=4&kf=29&kk-suesse-von=0&kk-suesse-bis=100&kk-milde-von=0&kk-milde-bis=100&kk-wuerze-von=0&kk-wuerze-bis=100&kk-frucht-von=0&kk-frucht-bis=100&kk-torf-von=0&kk-torf-bis=100&hf=0&af=90&Sortierung=11&a=350" Whiskys := []Angebot{} c := app.customCollector([]string{"rumundco.de", "www.rumundco.de"}) c.OnHTML(".product-teaser", func(e *colly.HTMLElement) { if e.Request.URL.String() != Shop_url { return } W := Angebot{} W.Shop = shop.Id W.Spirit_type = "Whisky" whisky_name := strings.TrimPrefix(e.ChildAttr("img", "alt"), "Restposten: ") whisky_url := "https://www.rumundco.de/" + e.ChildAttr("a", "href") matched, err := regexp.MatchString("verfügbar", e.ChildText(".delivery-status")) if err != nil { Fatal(err, "Rum & Co: Verfügbar regex failed") } if !matched { W.error_msg = "Rum & Co: Offer not available" W.error_ctx = e.ChildText(".delivery-status") PrintlnOffer(W, "Rum & Co: Offer not available") return } W.Name = whisky_name W.Url = whisky_url r_abv, err := regexp.Compile("[0-9]+([,.][0-9])?( )*(%|([vV]ol))") if err != nil { Fatal(err, "Rum & Co: Abv regex failed") } abv_noisy := r_abv.FindString(whisky_name) e.ForEach(".price_wrapper", func(i int, e *colly.HTMLElement) { regular_price := e.ChildText("del.value") if "" == regular_price { W.error_msg = "Rum & Co: No regular price found" W.error_ctx = regular_price PrintlnOffer(W, "Rum & Co: No regular price found") return } W.Original_price, err = convert_price(regular_price) if err != nil { W.error_msg = err.Error() W.error_ctx = regular_price PrintlnOffer(W, "Rum & Co: Original price: Convert price failed") return } W.Discounted_price, err = convert_price(e.ChildText(".price-value")) if err != nil { W.error_msg = err.Error() W.error_ctx = e.ChildText(".price-value") PrintlnOffer(W, "Rum & Co: Discounted price: Convert price failed") return } e.ForEach(".base_price", func(i int, e *colly.HTMLElement) { price_per_litre_noisy := e.ChildText(".value") W.Base_price, err = sanitize_base_price(price_per_litre_noisy) if err != nil { W.error_msg = err.Error() W.error_ctx = e.ChildText(".value") PrintlnOffer(W, "Rum & Co: Base price: Sanitizing base price failed") return } }) }) // Rum & Co uses pagespeed image_url_noisy := e.ChildAttr("img", "src") if strings.Contains(image_url_noisy, "pagespeed") { r_pagespeed, err := regexp.Compile(`jpg(\.pagespeed.+)$`) if err != nil { Fatal(err, "Rum & Co: Pagespeed regexp failed") } image_url_noisy_slice := r_pagespeed.FindStringSubmatch(image_url_noisy) if len(image_url_noisy_slice) < 2 { W.error_msg = "Rum & Co: (Pagespeed) Image URL not found" W.error_ctx = image_url_noisy PrintlnOffer(W, "Rum & Co: (Pagespeed) Image URL not found") return } image_url_noisy = strings.Replace(image_url_noisy, image_url_noisy_slice[1], "", 1) } W.Image_url = "https://www.rumundco.de/" + image_url_noisy e.Request.Visit(W.Url) var ctx string W.Volume, ctx = get_volume(e) if W.Volume == 0 { W.error_msg = "Rum & Co: Volume is zero" W.error_ctx = ctx PrintlnOffer(W, "Rum & Co: Volume is zero") return } if "" == abv_noisy { W.Abv, ctx = get_abv(e) abv_noisy = ctx } else { W.Abv, err = extract_abv(abv_noisy) if err != nil { W.error_msg = err.Error() W.error_ctx = abv_noisy PrintlnOffer(W, "Rum & Co: Base price: Extracting ABV failed") return } } if W.Abv == 0 { W.error_msg = "Rum & Co: Abv is zero" W.error_ctx = abv_noisy PrintlnOffer(W, "Rum & Co: Abv is zero") return } W.Website = e.Request.Ctx.Get("website") Whiskys = append(Whiskys, W) }) c.OnHTML("#table-collapse .product-attributes table", func(e *colly.HTMLElement) { e.ForEach("tr", func(i int, e *colly.HTMLElement) { text_noisy := e.ChildText("th") if strings.Contains(text_noisy, "Genauer Inhalt:") { e.Request.Ctx.Put("volume", e.ChildText("td")) } else if strings.Contains(text_noisy, "Alkoholgehalt in %:") { e.Request.Ctx.Put("abv", e.ChildText("a")) } }) e.Request.Ctx.Put("website", string(e.Response.Body)) }) err := c.Visit(Shop_url) if err != nil { Warn(nil, shop.Name+": "+err.Error()) } return Whiskys }