package main import ( "bytes" "encoding/json" "io/ioutil" "net/http" "strings" //"github.com/gocolly/colly" "github.com/PuerkitoBio/goquery" log "github.com/sirupsen/logrus" ) func (app *App) ScrapeSpirituosenWolf(shop Shop) []Angebot { Offers := []Angebot{} /** * Parse the API. */ API_URL := "https://www.spirituosen-wolf.de/widgets/listing/listingCount/sCategory/466?p=1&n=100&c=466&o=7&loadProducts=1" http_client := http.Client{} req, err := http.NewRequest(http.MethodGet, API_URL, nil) if err != nil { // TODO panic(err) } req.Header.Set("accept", "application/json") req.Header.Set("User-Agent", "like googlebot") api_resp, err := http_client.Do(req) if err != nil { // TODO panic(err) } api_body, err := ioutil.ReadAll(api_resp.Body) if err != nil { // TODO panic(err) } //log.Println("%v\n", string(api_body)) type api struct { TotalCount int `json:"totalCount"` Listing string `json:"listing"` Pagination string `json:"pagination"` } var api_val api err = json.Unmarshal(api_body, &api_val) if err != nil { log.Println("Wolf: offers json unmarshal failed") log.Printf("%+s\n", string(api_body)) panic(err) } html, err := goquery.NewDocumentFromReader(bytes.NewBuffer([]byte(api_val.Listing))) if err != nil { log.Fatal(err) } html.Find(".product--box.box--basic").Each(func(count int, doc *goquery.Selection) { offer := Angebot{} offer.Shop = shop.Id doc.Find(".sw_frontend_listing_box_article_imagewrapper").Each(func(i int, s *goquery.Selection) { // Url offer.Url, _ = s.Find("a").Attr("href") // Image_url _srcset, _ := s.Find("img").Attr("srcset") srcset := strings.Split(_srcset, ",") for _, image_url := range srcset { if strings.Contains(image_url, "@2x") { offer.Image_url = image_url break } } // Name offer.Name, _ = s.Find("img").Attr("alt") }) doc.Find(".sw_frontend_listing_box_article_priceinfo_alcvol").Each(func(i int, s *goquery.Selection) { // Abv offer.Abv, err = extract_abv(strings.TrimSpace(s.Text())) if err != nil { log.Println("Wolf: Extracting Abv failed") } }) doc.Find(".sw_frontend_listing_box_article_priceinfo_qty").Each(func(i int, s *goquery.Selection) { // Volume offer.Volume, err = extract_volume(strings.TrimSpace(strings.TrimPrefix("Inhalt:", strings.TrimSpace(s.Text())))) if err != nil { log.Println("Wolf: Extracting volume failed") } }) doc.Find(".sw_frontend_listing_box_article_price_pseudo").Each(func(i int, s *goquery.Selection) { // Original_price offer.Original_price, err = convert_price(strings.TrimSpace(s.Text())) if err != nil { log.Println("Wolf: Converting original_price failed") } }) doc.Find(".sw_frontend_listing_box_article_price_default").Each(func(i int, s *goquery.Selection) { // Discounted_price offer.Discounted_price, err = convert_price(strings.TrimSpace(s.Text())) if err != nil { log.Println("Wolf: Converting Discounted_price failed") } }) doc.Find(".sw_frontend_listing_box_article_priceinfo_baseprice").Each(func(i int, s *goquery.Selection) { // Base_price offer.Base_price, err = sanitize_base_price(strings.TrimSpace(s.Text())) if err != nil { log.Println("Wolf: Sanitizing Base_price failed") } }) // detect spirit_type offer.Spirit_type = detect_spirit_type(offer.Name) if offer.Spirit_type == "Verschiedenes" { offer.Spirit_type = WolfGetSpiritTypeFromUrl(offer.Url) } Offers = append(Offers, offer) }) return Offers } func WolfGetSpiritTypeFromUrl(url string) string { url = strings.TrimPrefix("https://www.spirituosen-wolf.de/", url) subfolders := strings.Split(url, "/") for index, folder := range subfolders { // match Gin, Whisky, R switch folder { case "whisk-e-y": return "Whisky" case "gin-co": if subfolders[1] == "genever" { return "Genever" } return "Gin" case "likoer": return "Likör" case "rum-cachaca", "rum-co": return "Rum" case "champagner-co": switch subfolders[1] { case "champagner": return "Champagner" case "Cremant": return "Cremant" default: return "Sekt" } case "wodka-vodka": return "Wodka" case "weine": return "Wein" case "grappa-korn-braende": switch subfolders[index+1] { case "grappa": return "Grappa" case "korn": return "Korn" default: return "Verschiedenes" } case "brandy-cognac-co": switch subfolders[index+1] { case "cognac": return "Cognac" case "brandy": return "Brandy" case "calvados": return "Calvados" case "armagnac": return "Armagnac" } case "cachaca": return "Cachaca" case "Absinth": return "Absinth" case "mezcal": return "Mezcal" case "Tequila": return "Tequila" default: return "Verschiedenes" } } return "Verschiedenes" }