package main import ( log "github.com/Sirupsen/logrus" "strings" "github.com/gocolly/colly" ) func ScrapeWhiskyworld(shop Shop) []Angebot { Shop_urls := []string{"https://www.whiskyworld.de/themen/sonderangebote?ft=%257B%2522produkt_kategorie%2522:%2522Blended%2BMalt%2522%257D", "https://www.whiskyworld.de/themen/sonderangebote?ft=%257B%2522produkt_kategorie%2522:%2522Blended%2BWhiskies%2522%257D", "https://www.whiskyworld.de/themen/sonderangebote?ft=%257B%2522produkt_kategorie%2522:%2522Single%2BMalt%2522%257D", } Whiskys := []Angebot{} c := colly.NewCollector( colly.UserAgent("friendly"), colly.AllowedDomains("whiskyworld.de"), colly.AllowedDomains("www.whiskyworld.de"), ) c.OnHTML(".product-item", func(e *colly.HTMLElement) { if !stringInSlice(e.Request.URL.String(), Shop_urls) { return } W := Angebot{} whisky_name_part1 := e.ChildText("h3") whisky_name_part2 := e.ChildText(".item-description") W.Name = whisky_name_part1 + " " + whisky_name_part2 W.Url = "https://www.whiskyworld.de/" + e.ChildAttr("a", "href") regular_price_noisy := e.ChildText(".offer") regular_price := strings.TrimSuffix(strings.TrimPrefix(regular_price_noisy, "statt "), " €*") var err error W.Original_price, err = convert_price(regular_price) if err != nil { log.Println("Whisky World: Original_price failed: " + regular_price + " // " + W.Name + " // " + W.Url + " // " + e.Request.URL.String()) log.Fatal(err) return } W.Discounted_price, err = convert_price(e.ChildText(".uvp")) if err != nil { log.Println("Whisky World: Discounted_price failed") log.Fatal(err) return } e.ForEach(".product-infobox", func(i int, e *colly.HTMLElement) { text_noisy := e.ChildText(".item-inh") W.Volume, err = extract_volume(text_noisy) if err != nil { log.Fatal(err) } abv_noisy := strings.TrimSpace(strings.SplitAfter(text_noisy, "Liter")[1]) abv_noisy = strings.TrimPrefix(abv_noisy, "/") W.Abv, err = extract_abv(abv_noisy) if err != nil { log.Fatal(err) } }) e.ForEach(".price", func(i int, e *colly.HTMLElement) { base_price_noisy := e.ChildText(".unit") if strings.Contains(base_price_noisy, "Liter") { base_price_noisy = strings.TrimSpace(strings.SplitAfter(base_price_noisy, "Liter")[0]) W.Base_price, err = sanitize_base_price(base_price_noisy) if err != nil { log.Println("Whisky World: Base_price failed") log.Fatal(err) } } }) W.Image_url = "https:" + e.ChildAttr("img", "src") W.Shop = shop.Id W.Spirit_type = "Whisky" e.Request.Visit(W.Url) W.Website = e.Request.Ctx.Get("website") Whiskys = append(Whiskys, W) }) c.OnHTML("body", func(e *colly.HTMLElement) { if stringInSlice(e.Request.URL.String(), Shop_urls) { return } e.Request.Ctx.Put("website", string(e.Response.Body)) }) for _, url := range Shop_urls { c.Visit(url) } return Whiskys }