package main import ( "encoding/json" "io/ioutil" "net/http" "strings" //"strconv" log "github.com/Sirupsen/logrus" "github.com/gocolly/colly" ) func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot { Offers := []Angebot{} /** * Parse the API. */ API_URL := "https://api.drankdozijn.nl/sale-products?country=DE&language=de" http_client := http.Client{} req, err := http.NewRequest(http.MethodGet, API_URL, nil) if err != nil { // TODO panic(err) } req.Header.Set("accept", "application/json") req.Header.Set("User-Agent", "") api_resp, err := http_client.Do(req) if err != nil { // TODO panic(err) } api_body, err := ioutil.ReadAll(api_resp.Body) if err != nil { // TODO panic(err) } var tmp_api_map map[string]interface{} err = json.Unmarshal(api_body, &tmp_api_map) if err != nil { // TODO log.Println("json unmarshal failed") panic(err) } for _, value := range tmp_api_map { api_data := value.(map[string]interface{}) if api_data["type"] != "offer" { continue } W := Angebot{} W.Shop = shop.Id W.Name = api_data["saleDescription"].(string) tmp_desc := api_data["group"].(map[string]interface{}) W.Spirit_type = detect_spirit_type(tmp_desc["description"].(string)) //v, ok := api_data["price"] //log.Println(v, ok) if v, _ := api_data["price"]; v == nil { log.Println("price is nil -> skip offer") //DebugOffer(W, "Drankdozijn: Skip Offer") continue } else { //log.Println("price is NOT nil -> NOT SKIPPING!") } W.Original_price, err = convert_price(api_data["price"].(string)) if err != nil { // TODO panic(err) } W.Discounted_price, err = convert_price(api_data["salePrice"].(string)) if err != nil { // TODO panic(err) } // Offer URL tmp_offer_url_map := api_data["products"].(map[string]interface{}) for _, v := range tmp_offer_url_map { tmp_url := v.(map[string]interface{}) W.Url = "https://drankdozijn.de/artikel/" + (tmp_url["alias"]).(string) } c := app.customCollector([]string{"drankdozijn.de", "drankdozijn.nl"}) c.OnHTML(".product_top", func(e *colly.HTMLElement) { // log.Println(".product_top") e.ForEach(".product_image", func(i int, e *colly.HTMLElement) { W.Image_url = e.ChildAttr("img", "src") }) if strings.Contains(W.Name, "+ gratis") || strings.Contains(W.Name, "& gratis") { DebugOffer(W, "Drankdozijn: Skip Offer") return } e.Request.Visit(W.Url) var ctx string W.Volume, ctx = get_volume(e) if W.Volume == 0 { W.error_msg = e.Request.Ctx.Get("volume") W.error_ctx = ctx PrintlnOffer(W, "Drankdozijn: Volume is zero") return } W.Abv, ctx = get_abv(e) if W.Abv == 0 { W.error_msg = "Drankdozijn: Abv is zero" W.error_ctx = ctx PrintlnOffer(W, "Drankdozijn: abv is zero") return } base_price_noisy := e.Request.Ctx.Get("base_price") W.Base_price, err = convert_price(base_price_noisy) if err != nil { W.error_msg = err.Error() W.error_ctx = e.ChildText(".price_l") PrintlnOffer(W, "Drankdozijn: Converting base price failed") return } if W.Spirit_type == "Cognac" { W.Spirit_type = e.Request.Ctx.Get("spirit_type") } if W.Spirit_type == "Likör" { tmp_type := e.Request.Ctx.Get("spirit_type") switch tmp_type { case "Tequila": W.Spirit_type = "Tequila" } } if W.Spirit_type == "Wein" { tmp_type := e.Request.Ctx.Get("spirit_type") switch tmp_type { case "Champagner": case "Champagne": W.Spirit_type = "Champagner" default: DebugOffer(W, "Drankdozijn: Skip Offer") return } } }) c.OnHTML(".main_price", func(e *colly.HTMLElement) { //log.Println(".main_price") //e.Request.Ctx.Put("base_price", strings.TrimPrefix(e.ChildText(".price_l"), "/L")) e.Request.Ctx.Put("base_price", e.ChildText(".price_l")) }) c.OnHTML(".main_description", func(e *colly.HTMLElement) { //log.Println(".main_price") prev := "" count := 0 e.ForEach(".col-xs-6", func(i int, e *colly.HTMLElement) { if count%2 == 0 { prev = e.Text } else { switch strings.TrimSpace(prev) { case "Inhalt": case "Inhoud": e.Request.Ctx.Put("volume", e.Text) case "Alkoholgehalt": case "Alcoholpercentage": e.Request.Ctx.Put("abv", e.Text) case "Kategorie": case "Categorie": e.Request.Ctx.Put("spirit_type", e.Text) } prev = "" } count++ }) }) c.OnHTML("body", func(e *colly.HTMLElement) { /* log.Println("body") e.Request.Ctx.Put("website", string(e.Response.Body)) W.Website = e.Request.Ctx.Get("website") */ W.Website = string(e.Response.Body) //W.Website = string(e.Response.Body) }) err = c.Visit(W.Url) if err != nil { Warn(nil, shop.Name+": Error (Visit): "+err.Error()) } else { log.Println("Visit " + W.Url) } //DebugOffer(W, "DEBUG") Offers = append(Offers, W) } return Offers // ++++++++++ OLD +++++++++ /* Offers := []Angebot{} Shop_url_base := "https://drankdozijn.de/aanbiedingen/" var Shop_url string Async_url := "https://drankdozijn.de/async/scroll" types := map[int]string{230: "Whisky", 270: "Gin", 220: "Wodka", 210: "Rum", 250: "Likör", 240: "Cognac", 100: "Champagner"} //types := map[int]string{100: "Champagner"} var current_type string c := app.customCollector([]string{"drankdozijn.de", "drankdozijn.nl"}) c.OnHTML(".product_top", func(e *colly.HTMLElement) { if e.Request.URL.String() != Shop_url && e.Request.URL.String() != Async_url { //Debug(nil, "Drankdozijn.de: Request url ("+e.Request.URL.String()+") is not shop url ("+Shop_url+").") return } W := Angebot{} W.Shop = shop.Id W.Spirit_type = current_type var err error var skip_offer bool e.ForEach(".product_image", func(i int, e *colly.HTMLElement) { W.Url = e.ChildAttr("a", "href") W.Image_url = e.ChildAttr("img", "src") }) e.ForEach(".product_title", func(i int, e *colly.HTMLElement) { W.Name = e.ChildText("a") }) if strings.Contains(W.Name, "+ gratis") || strings.Contains(W.Name, "& gratis") { DebugOffer(W, "Drankdozijn: Skip Offer") return } e.ForEach(".product_price", func(i int, e *colly.HTMLElement) { original_price_noisy := e.ChildText(".product_acties") if !strings.Contains(original_price_noisy, "€") { PrintlnOffer(W, "Drankdozijn: Original price has no € sign. Skipping!") skip_offer = true return } W.Original_price, err = convert_price(original_price_noisy) if err != nil { W.error_msg = err.Error() W.error_ctx = e.ChildText(".product_acties") PrintlnOffer(W, "Drankdozijn: Converting original price failed") return } W.Discounted_price, err = convert_price(e.ChildText(".product_aanbieding_prijs")) if err != nil { W.error_msg = err.Error() W.error_ctx = e.ChildText(".product_aanbieding_prijs") PrintlnOffer(W, "Drankdozijn: Converting discounted price failed") return } }) if skip_offer { return } e.Request.Visit(W.Url) var ctx string W.Volume, ctx = get_volume(e) if W.Volume == 0 { W.error_msg = e.Request.Ctx.Get("volume") W.error_ctx = ctx PrintlnOffer(W, "Drankdozijn: Volume is zero") return } W.Abv, ctx = get_abv(e) if W.Abv == 0 { W.error_msg = "Drankdozijn: Abv is zero" W.error_ctx = ctx PrintlnOffer(W, "Drankdozijn: abv is zero") return } base_price_noisy := e.Request.Ctx.Get("base_price") W.Base_price, err = convert_price(base_price_noisy) if err != nil { W.error_msg = err.Error() W.error_ctx = e.ChildText(".price_l") PrintlnOffer(W, "Drankdozijn: Converting base price failed") return } if current_type == "Cognac" { W.Spirit_type = e.Request.Ctx.Get("spirit_type") } if current_type == "Likör" { tmp_type := e.Request.Ctx.Get("spirit_type") switch tmp_type { case "Tequila": W.Spirit_type = "Tequila" } } if current_type == "Champagner" && (e.Request.Ctx.Get("spirit_type") != "Champagner" && e.Request.Ctx.Get("spirit_type") != "Champagne") { DebugOffer(W, "Drankdozijn: Skip Offer") return } W.Website = e.Request.Ctx.Get("website") //DebugOffer(W, "DEBUG") Offers = append(Offers, W) }) c.OnHTML(".main_price", func(e *colly.HTMLElement) { //e.Request.Ctx.Put("base_price", strings.TrimPrefix(e.ChildText(".price_l"), "/L")) e.Request.Ctx.Put("base_price", e.ChildText(".price_l")) }) c.OnHTML(".main_description", func(e *colly.HTMLElement) { prev := "" count := 0 e.ForEach(".col-xs-6", func(i int, e *colly.HTMLElement) { if count%2 == 0 { prev = e.Text } else { switch strings.TrimSpace(prev) { case "Inhalt": case "Inhoud": e.Request.Ctx.Put("volume", e.Text) case "Alkoholgehalt": case "Alcoholpercentage": e.Request.Ctx.Put("abv", e.Text) case "Kategorie": case "Categorie": e.Request.Ctx.Put("spirit_type", e.Text) } prev = "" } count++ }) }) c.OnHTML("body", func(e *colly.HTMLElement) { if e.Request.URL.String() == Shop_url { return } e.Request.Ctx.Put("website", string(e.Response.Body)) }) var cookie *http.Cookie var has_cookie bool c.OnResponse(func(r *colly.Response) { //log.Debug("Cookies:", c.Cookies(r.Request.URL.String())) if len(c.Cookies(r.Request.URL.String())) > 0 { has_cookie = true cookie = c.Cookies(r.Request.URL.String())[0] } }) for groepnr, cur_type := range types { current_type = cur_type switch current_type { case "Wodka": Shop_url = Shop_url_base + "vodka" case "Likör": Shop_url = Shop_url_base + "likeuren" case "Champagner": Shop_url = Shop_url_base + "wijn" default: Shop_url = Shop_url_base + current_type } //log.Debug(Shop_url) err := c.Visit(Shop_url) if err != nil { Warn(nil, shop.Name+": Error (Visit): "+err.Error()) } c.OnRequest(func(r *colly.Request) { r.Headers.Set("X-Requested-With", "XMLHttpRequest") r.Headers.Set("Referer", Shop_url) if has_cookie { //log.Debug("Setting Cookie: " + cookie.String()) r.Headers.Set("Cookie", cookie.String()) } }) for i := 12; true; i = i + 12 { log.Debug("Crawling Drankdozijn: type = " + cur_type + " items = " + strconv.Itoa(i)) err := c.Post(Async_url, map[string]string{"items": strconv.Itoa(i), "datum": "0", "groepnr": strconv.Itoa(groepnr)}) if err != nil { if "EOF" != err.Error() { Warn(nil, shop.Name+": Error (Post): "+err.Error()) } break } } } return Offers */ }