diff options
Diffstat (limited to 'crawler')
| -rw-r--r-- | crawler/shop_drankdozijn.go | 411 |
1 files changed, 131 insertions, 280 deletions
diff --git a/crawler/shop_drankdozijn.go b/crawler/shop_drankdozijn.go index e4fd444..850f462 100644 --- a/crawler/shop_drankdozijn.go +++ b/crawler/shop_drankdozijn.go @@ -19,6 +19,7 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot { * Parse the API. */ API_URL := "https://api.drankdozijn.nl/sale-products?country=DE&language=de" + IMAGE_URL := "https://res-2.cloudinary.com/boozeboodcdn/image/upload/e_trim:10/c_pad,g_south,h_400,w_280/c_limit,h_910,w_280/f_auto,q_auto:best/v1/" http_client := http.Client{} @@ -72,8 +73,8 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot { //v, ok := api_data["price"] //log.Println(v, ok) if v, _ := api_data["price"]; v == nil { - log.Println("price is nil -> skip offer") //DebugOffer(W, "Drankdozijn: Skip Offer") + DebugOffer(W, "Drankdozijn: price is nil -> skip offer") continue } else { //log.Println("price is NOT nil -> NOT SKIPPING!") @@ -89,11 +90,34 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot { panic(err) } + if W.Discounted_price >= W.Original_price { + DebugOffer(W, "Drankdozijn: Discounted price is not cheaper") + continue + } + // Offer URL - tmp_offer_url_map := api_data["products"].(map[string]interface{}) - for _, v := range tmp_offer_url_map { - tmp_url := v.(map[string]interface{}) - W.Url = "https://drankdozijn.de/artikel/" + (tmp_url["alias"]).(string) + tmp_offer_url_img_map := api_data["products"].(map[string]interface{}) + + // Check for bundle offer + if len(tmp_offer_url_img_map) > 1 { + DebugOffer(W, "Drankdozijn: Skip Offer because of bundle") + continue + } + + for _, v := range tmp_offer_url_img_map { + tmp_map := v.(map[string]interface{}) + W.Url = "https://drankdozijn.de/artikel/" + (tmp_map["alias"]).(string) + tmp_image_map := tmp_map["images"].([]interface{}) + W.Image_url = IMAGE_URL + tmp_image_map[0].(string) + //log.Println(W.Image_url) + + /* + for _, v2 := range tmp_image_map { + tmp_image := v2.(map[string]interface{}) + W.Image_url = IMAGE_URL + tmp_image["0"].(string) + log.Println(W.Image_url) + } + */ } c := app.customCollector([]string{"drankdozijn.de", "drankdozijn.nl"}) @@ -101,9 +125,11 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot { c.OnHTML(".product_top", func(e *colly.HTMLElement) { // log.Println(".product_top") - e.ForEach(".product_image", func(i int, e *colly.HTMLElement) { - W.Image_url = e.ChildAttr("img", "src") - }) + /* + e.ForEach(".product_image", func(i int, e *colly.HTMLElement) { + W.Image_url = e.ChildAttr("img", "src") + }) + */ if strings.Contains(W.Name, "+ gratis") || strings.Contains(W.Name, "& gratis") { DebugOffer(W, "Drankdozijn: Skip Offer") @@ -112,62 +138,71 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot { e.Request.Visit(W.Url) - var ctx string - - W.Volume, ctx = get_volume(e) - if W.Volume == 0 { - W.error_msg = e.Request.Ctx.Get("volume") - W.error_ctx = ctx - PrintlnOffer(W, "Drankdozijn: Volume is zero") - return - } - - W.Abv, ctx = get_abv(e) - if W.Abv == 0 { - W.error_msg = "Drankdozijn: Abv is zero" - W.error_ctx = ctx - PrintlnOffer(W, "Drankdozijn: abv is zero") - return - } - - base_price_noisy := e.Request.Ctx.Get("base_price") - W.Base_price, err = convert_price(base_price_noisy) - if err != nil { - W.error_msg = err.Error() - W.error_ctx = e.ChildText(".price_l") - PrintlnOffer(W, "Drankdozijn: Converting base price failed") - return - } - - if W.Spirit_type == "Cognac" { - W.Spirit_type = e.Request.Ctx.Get("spirit_type") - } - - if W.Spirit_type == "Likör" { - tmp_type := e.Request.Ctx.Get("spirit_type") - switch tmp_type { - case "Tequila": - W.Spirit_type = "Tequila" + /* + var ctx string + + W.Volume, ctx = get_volume(e) + if W.Volume == 0 { + W.error_msg = e.Request.Ctx.Get("volume") + W.error_ctx = ctx + PrintlnOffer(W, "Drankdozijn: Volume is zero") + return + } + + W.Abv, ctx = get_abv(e) + if W.Abv == 0 { + W.error_msg = "Drankdozijn: Abv is zero" + W.error_ctx = ctx + PrintlnOffer(W, "Drankdozijn: abv is zero") + return + } + + base_price_noisy := e.Request.Ctx.Get("base_price") + W.Base_price, err = convert_price(base_price_noisy) + if err != nil { + W.error_msg = err.Error() + W.error_ctx = e.ChildText(".price_l") + PrintlnOffer(W, "Drankdozijn: Converting base price failed") + return + } + + if W.Spirit_type == "Cognac" { + W.Spirit_type = e.Request.Ctx.Get("spirit_type") + } + + if W.Spirit_type == "Likör" { + tmp_type := e.Request.Ctx.Get("spirit_type") + switch tmp_type { + case "Tequila": + W.Spirit_type = "Tequila" + } } - } - if W.Spirit_type == "Wein" { - tmp_type := e.Request.Ctx.Get("spirit_type") - switch tmp_type { - case "Champagner": - case "Champagne": - W.Spirit_type = "Champagner" - default: - DebugOffer(W, "Drankdozijn: Skip Offer") - return + if W.Spirit_type == "Wein" { + tmp_type := e.Request.Ctx.Get("spirit_type") + switch tmp_type { + case "Champagner": + case "Champagne": + W.Spirit_type = "Champagner" + default: + DebugOffer(W, "Drankdozijn: Skip Offer") + return + } } - } + */ }) c.OnHTML(".main_price", func(e *colly.HTMLElement) { //log.Println(".main_price") //e.Request.Ctx.Put("base_price", strings.TrimPrefix(e.ChildText(".price_l"), "/L")) - e.Request.Ctx.Put("base_price", e.ChildText(".price_l")) + //e.Request.Ctx.Put("base_price", e.ChildText(".price_l")) + W.Base_price, err = convert_price(e.ChildText(".price_l")) + if err != nil { + W.error_msg = err.Error() + W.error_ctx = e.ChildText(".price_l") + PrintlnOffer(W, "Drankdozijn: Converting base price failed") + return + } }) c.OnHTML(".main_description", func(e *colly.HTMLElement) { @@ -178,16 +213,45 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot { if count%2 == 0 { prev = e.Text } else { + //log.Println(strings.TrimSpace(prev) + ": " + e.Text) switch strings.TrimSpace(prev) { - case "Inhalt": - case "Inhoud": - e.Request.Ctx.Put("volume", e.Text) - case "Alkoholgehalt": - case "Alcoholpercentage": - e.Request.Ctx.Put("abv", e.Text) - case "Kategorie": - case "Categorie": - e.Request.Ctx.Put("spirit_type", e.Text) + case "Inhalt", "Inhoud": + //e.Request.Ctx.Put("volume", e.Text) + W.Volume, err = extract_volume(e.Text) + if W.Volume == 0 { + W.error_msg = e.Text + W.error_ctx = err.Error() + PrintlnOffer(W, "Drankdozijn: Volume is zero, returning") + return + } + case "Alkoholgehalt", "Alcoholpercentage": + //e.Request.Ctx.Put("abv", e.Text) + W.Abv, err = extract_abv(e.Text) + if W.Abv == 0 { + W.error_msg = "Drankdozijn: Abv is zero" + W.error_ctx = err.Error() + PrintlnOffer(W, "Drankdozijn: abv is zero") + return + } + case "Kategorie", "Categorie": + //e.Request.Ctx.Put("spirit_type", e.Text) + tmp_type := e.Text + if tmp_type == "Likör" { + switch tmp_type { + case "Tequila": + W.Spirit_type = "Tequila" + } + } + + if tmp_type == "Wein" { + switch tmp_type { + case "Champagner", "Champagne": + W.Spirit_type = "Champagner" + default: + DebugOffer(W, "Drankdozijn: Skip Offer") + return + } + } } prev = "" @@ -211,226 +275,13 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot { if err != nil { Warn(nil, shop.Name+": Error (Visit): "+err.Error()) } else { - log.Println("Visit " + W.Url) + //log.Println("Visit " + W.Url) } - //DebugOffer(W, "DEBUG") + //DebugOffer(W, "DEBUG OFFER") Offers = append(Offers, W) } return Offers - - // ++++++++++ OLD +++++++++ - - /* - Offers := []Angebot{} - - Shop_url_base := "https://drankdozijn.de/aanbiedingen/" - var Shop_url string - Async_url := "https://drankdozijn.de/async/scroll" - - types := map[int]string{230: "Whisky", 270: "Gin", 220: "Wodka", 210: "Rum", 250: "Likör", 240: "Cognac", 100: "Champagner"} - //types := map[int]string{100: "Champagner"} - var current_type string - - c := app.customCollector([]string{"drankdozijn.de", "drankdozijn.nl"}) - - c.OnHTML(".product_top", func(e *colly.HTMLElement) { - - if e.Request.URL.String() != Shop_url && e.Request.URL.String() != Async_url { - //Debug(nil, "Drankdozijn.de: Request url ("+e.Request.URL.String()+") is not shop url ("+Shop_url+").") - return - } - - W := Angebot{} - - W.Shop = shop.Id - W.Spirit_type = current_type - - var err error - var skip_offer bool - - e.ForEach(".product_image", func(i int, e *colly.HTMLElement) { - W.Url = e.ChildAttr("a", "href") - W.Image_url = e.ChildAttr("img", "src") - }) - e.ForEach(".product_title", func(i int, e *colly.HTMLElement) { - W.Name = e.ChildText("a") - }) - - if strings.Contains(W.Name, "+ gratis") || strings.Contains(W.Name, "& gratis") { - DebugOffer(W, "Drankdozijn: Skip Offer") - return - } - - e.ForEach(".product_price", func(i int, e *colly.HTMLElement) { - original_price_noisy := e.ChildText(".product_acties") - if !strings.Contains(original_price_noisy, "€") { - PrintlnOffer(W, "Drankdozijn: Original price has no € sign. Skipping!") - skip_offer = true - return - } - W.Original_price, err = convert_price(original_price_noisy) - if err != nil { - W.error_msg = err.Error() - W.error_ctx = e.ChildText(".product_acties") - PrintlnOffer(W, "Drankdozijn: Converting original price failed") - return - } - W.Discounted_price, err = convert_price(e.ChildText(".product_aanbieding_prijs")) - if err != nil { - W.error_msg = err.Error() - W.error_ctx = e.ChildText(".product_aanbieding_prijs") - PrintlnOffer(W, "Drankdozijn: Converting discounted price failed") - return - } - }) - - if skip_offer { - return - } - - e.Request.Visit(W.Url) - - var ctx string - - W.Volume, ctx = get_volume(e) - if W.Volume == 0 { - W.error_msg = e.Request.Ctx.Get("volume") - W.error_ctx = ctx - PrintlnOffer(W, "Drankdozijn: Volume is zero") - return - } - - W.Abv, ctx = get_abv(e) - if W.Abv == 0 { - W.error_msg = "Drankdozijn: Abv is zero" - W.error_ctx = ctx - PrintlnOffer(W, "Drankdozijn: abv is zero") - return - } - - base_price_noisy := e.Request.Ctx.Get("base_price") - W.Base_price, err = convert_price(base_price_noisy) - if err != nil { - W.error_msg = err.Error() - W.error_ctx = e.ChildText(".price_l") - PrintlnOffer(W, "Drankdozijn: Converting base price failed") - return - } - - if current_type == "Cognac" { - W.Spirit_type = e.Request.Ctx.Get("spirit_type") - } - - if current_type == "Likör" { - tmp_type := e.Request.Ctx.Get("spirit_type") - switch tmp_type { - case "Tequila": - W.Spirit_type = "Tequila" - } - } - - if current_type == "Champagner" && (e.Request.Ctx.Get("spirit_type") != "Champagner" && e.Request.Ctx.Get("spirit_type") != "Champagne") { - DebugOffer(W, "Drankdozijn: Skip Offer") - return - } - - W.Website = e.Request.Ctx.Get("website") - - //DebugOffer(W, "DEBUG") - - Offers = append(Offers, W) - }) - - c.OnHTML(".main_price", func(e *colly.HTMLElement) { - //e.Request.Ctx.Put("base_price", strings.TrimPrefix(e.ChildText(".price_l"), "/L")) - e.Request.Ctx.Put("base_price", e.ChildText(".price_l")) - }) - - c.OnHTML(".main_description", func(e *colly.HTMLElement) { - prev := "" - count := 0 - e.ForEach(".col-xs-6", func(i int, e *colly.HTMLElement) { - if count%2 == 0 { - prev = e.Text - } else { - switch strings.TrimSpace(prev) { - case "Inhalt": - case "Inhoud": - e.Request.Ctx.Put("volume", e.Text) - case "Alkoholgehalt": - case "Alcoholpercentage": - e.Request.Ctx.Put("abv", e.Text) - case "Kategorie": - case "Categorie": - e.Request.Ctx.Put("spirit_type", e.Text) - } - - prev = "" - } - count++ - }) - }) - - c.OnHTML("body", func(e *colly.HTMLElement) { - if e.Request.URL.String() == Shop_url { - return - } - e.Request.Ctx.Put("website", string(e.Response.Body)) - }) - - var cookie *http.Cookie - var has_cookie bool - c.OnResponse(func(r *colly.Response) { - //log.Debug("Cookies:", c.Cookies(r.Request.URL.String())) - if len(c.Cookies(r.Request.URL.String())) > 0 { - has_cookie = true - cookie = c.Cookies(r.Request.URL.String())[0] - } - }) - - for groepnr, cur_type := range types { - current_type = cur_type - switch current_type { - case "Wodka": - Shop_url = Shop_url_base + "vodka" - case "Likör": - Shop_url = Shop_url_base + "likeuren" - case "Champagner": - Shop_url = Shop_url_base + "wijn" - default: - Shop_url = Shop_url_base + current_type - } - - //log.Debug(Shop_url) - err := c.Visit(Shop_url) - if err != nil { - Warn(nil, shop.Name+": Error (Visit): "+err.Error()) - } - - c.OnRequest(func(r *colly.Request) { - r.Headers.Set("X-Requested-With", "XMLHttpRequest") - r.Headers.Set("Referer", Shop_url) - if has_cookie { - //log.Debug("Setting Cookie: " + cookie.String()) - r.Headers.Set("Cookie", cookie.String()) - } - }) - - for i := 12; true; i = i + 12 { - log.Debug("Crawling Drankdozijn: type = " + cur_type + " items = " + strconv.Itoa(i)) - err := c.Post(Async_url, map[string]string{"items": strconv.Itoa(i), "datum": "0", "groepnr": strconv.Itoa(groepnr)}) - if err != nil { - if "EOF" != err.Error() { - Warn(nil, shop.Name+": Error (Post): "+err.Error()) - } - break - } - } - } - - return Offers - */ } |
