summaryrefslogtreecommitdiff
path: root/crawler
diff options
context:
space:
mode:
Diffstat (limited to 'crawler')
-rw-r--r--crawler/shop_drankdozijn.go411
1 files changed, 131 insertions, 280 deletions
diff --git a/crawler/shop_drankdozijn.go b/crawler/shop_drankdozijn.go
index e4fd444..850f462 100644
--- a/crawler/shop_drankdozijn.go
+++ b/crawler/shop_drankdozijn.go
@@ -19,6 +19,7 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot {
* Parse the API.
*/
API_URL := "https://api.drankdozijn.nl/sale-products?country=DE&language=de"
+ IMAGE_URL := "https://res-2.cloudinary.com/boozeboodcdn/image/upload/e_trim:10/c_pad,g_south,h_400,w_280/c_limit,h_910,w_280/f_auto,q_auto:best/v1/"
http_client := http.Client{}
@@ -72,8 +73,8 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot {
//v, ok := api_data["price"]
//log.Println(v, ok)
if v, _ := api_data["price"]; v == nil {
- log.Println("price is nil -> skip offer")
//DebugOffer(W, "Drankdozijn: Skip Offer")
+ DebugOffer(W, "Drankdozijn: price is nil -> skip offer")
continue
} else {
//log.Println("price is NOT nil -> NOT SKIPPING!")
@@ -89,11 +90,34 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot {
panic(err)
}
+ if W.Discounted_price >= W.Original_price {
+ DebugOffer(W, "Drankdozijn: Discounted price is not cheaper")
+ continue
+ }
+
// Offer URL
- tmp_offer_url_map := api_data["products"].(map[string]interface{})
- for _, v := range tmp_offer_url_map {
- tmp_url := v.(map[string]interface{})
- W.Url = "https://drankdozijn.de/artikel/" + (tmp_url["alias"]).(string)
+ tmp_offer_url_img_map := api_data["products"].(map[string]interface{})
+
+ // Check for bundle offer
+ if len(tmp_offer_url_img_map) > 1 {
+ DebugOffer(W, "Drankdozijn: Skip Offer because of bundle")
+ continue
+ }
+
+ for _, v := range tmp_offer_url_img_map {
+ tmp_map := v.(map[string]interface{})
+ W.Url = "https://drankdozijn.de/artikel/" + (tmp_map["alias"]).(string)
+ tmp_image_map := tmp_map["images"].([]interface{})
+ W.Image_url = IMAGE_URL + tmp_image_map[0].(string)
+ //log.Println(W.Image_url)
+
+ /*
+ for _, v2 := range tmp_image_map {
+ tmp_image := v2.(map[string]interface{})
+ W.Image_url = IMAGE_URL + tmp_image["0"].(string)
+ log.Println(W.Image_url)
+ }
+ */
}
c := app.customCollector([]string{"drankdozijn.de", "drankdozijn.nl"})
@@ -101,9 +125,11 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot {
c.OnHTML(".product_top", func(e *colly.HTMLElement) {
// log.Println(".product_top")
- e.ForEach(".product_image", func(i int, e *colly.HTMLElement) {
- W.Image_url = e.ChildAttr("img", "src")
- })
+ /*
+ e.ForEach(".product_image", func(i int, e *colly.HTMLElement) {
+ W.Image_url = e.ChildAttr("img", "src")
+ })
+ */
if strings.Contains(W.Name, "+ gratis") || strings.Contains(W.Name, "& gratis") {
DebugOffer(W, "Drankdozijn: Skip Offer")
@@ -112,62 +138,71 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot {
e.Request.Visit(W.Url)
- var ctx string
-
- W.Volume, ctx = get_volume(e)
- if W.Volume == 0 {
- W.error_msg = e.Request.Ctx.Get("volume")
- W.error_ctx = ctx
- PrintlnOffer(W, "Drankdozijn: Volume is zero")
- return
- }
-
- W.Abv, ctx = get_abv(e)
- if W.Abv == 0 {
- W.error_msg = "Drankdozijn: Abv is zero"
- W.error_ctx = ctx
- PrintlnOffer(W, "Drankdozijn: abv is zero")
- return
- }
-
- base_price_noisy := e.Request.Ctx.Get("base_price")
- W.Base_price, err = convert_price(base_price_noisy)
- if err != nil {
- W.error_msg = err.Error()
- W.error_ctx = e.ChildText(".price_l")
- PrintlnOffer(W, "Drankdozijn: Converting base price failed")
- return
- }
-
- if W.Spirit_type == "Cognac" {
- W.Spirit_type = e.Request.Ctx.Get("spirit_type")
- }
-
- if W.Spirit_type == "Likör" {
- tmp_type := e.Request.Ctx.Get("spirit_type")
- switch tmp_type {
- case "Tequila":
- W.Spirit_type = "Tequila"
+ /*
+ var ctx string
+
+ W.Volume, ctx = get_volume(e)
+ if W.Volume == 0 {
+ W.error_msg = e.Request.Ctx.Get("volume")
+ W.error_ctx = ctx
+ PrintlnOffer(W, "Drankdozijn: Volume is zero")
+ return
+ }
+
+ W.Abv, ctx = get_abv(e)
+ if W.Abv == 0 {
+ W.error_msg = "Drankdozijn: Abv is zero"
+ W.error_ctx = ctx
+ PrintlnOffer(W, "Drankdozijn: abv is zero")
+ return
+ }
+
+ base_price_noisy := e.Request.Ctx.Get("base_price")
+ W.Base_price, err = convert_price(base_price_noisy)
+ if err != nil {
+ W.error_msg = err.Error()
+ W.error_ctx = e.ChildText(".price_l")
+ PrintlnOffer(W, "Drankdozijn: Converting base price failed")
+ return
+ }
+
+ if W.Spirit_type == "Cognac" {
+ W.Spirit_type = e.Request.Ctx.Get("spirit_type")
+ }
+
+ if W.Spirit_type == "Likör" {
+ tmp_type := e.Request.Ctx.Get("spirit_type")
+ switch tmp_type {
+ case "Tequila":
+ W.Spirit_type = "Tequila"
+ }
}
- }
- if W.Spirit_type == "Wein" {
- tmp_type := e.Request.Ctx.Get("spirit_type")
- switch tmp_type {
- case "Champagner":
- case "Champagne":
- W.Spirit_type = "Champagner"
- default:
- DebugOffer(W, "Drankdozijn: Skip Offer")
- return
+ if W.Spirit_type == "Wein" {
+ tmp_type := e.Request.Ctx.Get("spirit_type")
+ switch tmp_type {
+ case "Champagner":
+ case "Champagne":
+ W.Spirit_type = "Champagner"
+ default:
+ DebugOffer(W, "Drankdozijn: Skip Offer")
+ return
+ }
}
- }
+ */
})
c.OnHTML(".main_price", func(e *colly.HTMLElement) {
//log.Println(".main_price")
//e.Request.Ctx.Put("base_price", strings.TrimPrefix(e.ChildText(".price_l"), "/L"))
- e.Request.Ctx.Put("base_price", e.ChildText(".price_l"))
+ //e.Request.Ctx.Put("base_price", e.ChildText(".price_l"))
+ W.Base_price, err = convert_price(e.ChildText(".price_l"))
+ if err != nil {
+ W.error_msg = err.Error()
+ W.error_ctx = e.ChildText(".price_l")
+ PrintlnOffer(W, "Drankdozijn: Converting base price failed")
+ return
+ }
})
c.OnHTML(".main_description", func(e *colly.HTMLElement) {
@@ -178,16 +213,45 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot {
if count%2 == 0 {
prev = e.Text
} else {
+ //log.Println(strings.TrimSpace(prev) + ": " + e.Text)
switch strings.TrimSpace(prev) {
- case "Inhalt":
- case "Inhoud":
- e.Request.Ctx.Put("volume", e.Text)
- case "Alkoholgehalt":
- case "Alcoholpercentage":
- e.Request.Ctx.Put("abv", e.Text)
- case "Kategorie":
- case "Categorie":
- e.Request.Ctx.Put("spirit_type", e.Text)
+ case "Inhalt", "Inhoud":
+ //e.Request.Ctx.Put("volume", e.Text)
+ W.Volume, err = extract_volume(e.Text)
+ if W.Volume == 0 {
+ W.error_msg = e.Text
+ W.error_ctx = err.Error()
+ PrintlnOffer(W, "Drankdozijn: Volume is zero, returning")
+ return
+ }
+ case "Alkoholgehalt", "Alcoholpercentage":
+ //e.Request.Ctx.Put("abv", e.Text)
+ W.Abv, err = extract_abv(e.Text)
+ if W.Abv == 0 {
+ W.error_msg = "Drankdozijn: Abv is zero"
+ W.error_ctx = err.Error()
+ PrintlnOffer(W, "Drankdozijn: abv is zero")
+ return
+ }
+ case "Kategorie", "Categorie":
+ //e.Request.Ctx.Put("spirit_type", e.Text)
+ tmp_type := e.Text
+ if tmp_type == "Likör" {
+ switch tmp_type {
+ case "Tequila":
+ W.Spirit_type = "Tequila"
+ }
+ }
+
+ if tmp_type == "Wein" {
+ switch tmp_type {
+ case "Champagner", "Champagne":
+ W.Spirit_type = "Champagner"
+ default:
+ DebugOffer(W, "Drankdozijn: Skip Offer")
+ return
+ }
+ }
}
prev = ""
@@ -211,226 +275,13 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot {
if err != nil {
Warn(nil, shop.Name+": Error (Visit): "+err.Error())
} else {
- log.Println("Visit " + W.Url)
+ //log.Println("Visit " + W.Url)
}
- //DebugOffer(W, "DEBUG")
+ //DebugOffer(W, "DEBUG OFFER")
Offers = append(Offers, W)
}
return Offers
-
- // ++++++++++ OLD +++++++++
-
- /*
- Offers := []Angebot{}
-
- Shop_url_base := "https://drankdozijn.de/aanbiedingen/"
- var Shop_url string
- Async_url := "https://drankdozijn.de/async/scroll"
-
- types := map[int]string{230: "Whisky", 270: "Gin", 220: "Wodka", 210: "Rum", 250: "Likör", 240: "Cognac", 100: "Champagner"}
- //types := map[int]string{100: "Champagner"}
- var current_type string
-
- c := app.customCollector([]string{"drankdozijn.de", "drankdozijn.nl"})
-
- c.OnHTML(".product_top", func(e *colly.HTMLElement) {
-
- if e.Request.URL.String() != Shop_url && e.Request.URL.String() != Async_url {
- //Debug(nil, "Drankdozijn.de: Request url ("+e.Request.URL.String()+") is not shop url ("+Shop_url+").")
- return
- }
-
- W := Angebot{}
-
- W.Shop = shop.Id
- W.Spirit_type = current_type
-
- var err error
- var skip_offer bool
-
- e.ForEach(".product_image", func(i int, e *colly.HTMLElement) {
- W.Url = e.ChildAttr("a", "href")
- W.Image_url = e.ChildAttr("img", "src")
- })
- e.ForEach(".product_title", func(i int, e *colly.HTMLElement) {
- W.Name = e.ChildText("a")
- })
-
- if strings.Contains(W.Name, "+ gratis") || strings.Contains(W.Name, "& gratis") {
- DebugOffer(W, "Drankdozijn: Skip Offer")
- return
- }
-
- e.ForEach(".product_price", func(i int, e *colly.HTMLElement) {
- original_price_noisy := e.ChildText(".product_acties")
- if !strings.Contains(original_price_noisy, "€") {
- PrintlnOffer(W, "Drankdozijn: Original price has no € sign. Skipping!")
- skip_offer = true
- return
- }
- W.Original_price, err = convert_price(original_price_noisy)
- if err != nil {
- W.error_msg = err.Error()
- W.error_ctx = e.ChildText(".product_acties")
- PrintlnOffer(W, "Drankdozijn: Converting original price failed")
- return
- }
- W.Discounted_price, err = convert_price(e.ChildText(".product_aanbieding_prijs"))
- if err != nil {
- W.error_msg = err.Error()
- W.error_ctx = e.ChildText(".product_aanbieding_prijs")
- PrintlnOffer(W, "Drankdozijn: Converting discounted price failed")
- return
- }
- })
-
- if skip_offer {
- return
- }
-
- e.Request.Visit(W.Url)
-
- var ctx string
-
- W.Volume, ctx = get_volume(e)
- if W.Volume == 0 {
- W.error_msg = e.Request.Ctx.Get("volume")
- W.error_ctx = ctx
- PrintlnOffer(W, "Drankdozijn: Volume is zero")
- return
- }
-
- W.Abv, ctx = get_abv(e)
- if W.Abv == 0 {
- W.error_msg = "Drankdozijn: Abv is zero"
- W.error_ctx = ctx
- PrintlnOffer(W, "Drankdozijn: abv is zero")
- return
- }
-
- base_price_noisy := e.Request.Ctx.Get("base_price")
- W.Base_price, err = convert_price(base_price_noisy)
- if err != nil {
- W.error_msg = err.Error()
- W.error_ctx = e.ChildText(".price_l")
- PrintlnOffer(W, "Drankdozijn: Converting base price failed")
- return
- }
-
- if current_type == "Cognac" {
- W.Spirit_type = e.Request.Ctx.Get("spirit_type")
- }
-
- if current_type == "Likör" {
- tmp_type := e.Request.Ctx.Get("spirit_type")
- switch tmp_type {
- case "Tequila":
- W.Spirit_type = "Tequila"
- }
- }
-
- if current_type == "Champagner" && (e.Request.Ctx.Get("spirit_type") != "Champagner" && e.Request.Ctx.Get("spirit_type") != "Champagne") {
- DebugOffer(W, "Drankdozijn: Skip Offer")
- return
- }
-
- W.Website = e.Request.Ctx.Get("website")
-
- //DebugOffer(W, "DEBUG")
-
- Offers = append(Offers, W)
- })
-
- c.OnHTML(".main_price", func(e *colly.HTMLElement) {
- //e.Request.Ctx.Put("base_price", strings.TrimPrefix(e.ChildText(".price_l"), "/L"))
- e.Request.Ctx.Put("base_price", e.ChildText(".price_l"))
- })
-
- c.OnHTML(".main_description", func(e *colly.HTMLElement) {
- prev := ""
- count := 0
- e.ForEach(".col-xs-6", func(i int, e *colly.HTMLElement) {
- if count%2 == 0 {
- prev = e.Text
- } else {
- switch strings.TrimSpace(prev) {
- case "Inhalt":
- case "Inhoud":
- e.Request.Ctx.Put("volume", e.Text)
- case "Alkoholgehalt":
- case "Alcoholpercentage":
- e.Request.Ctx.Put("abv", e.Text)
- case "Kategorie":
- case "Categorie":
- e.Request.Ctx.Put("spirit_type", e.Text)
- }
-
- prev = ""
- }
- count++
- })
- })
-
- c.OnHTML("body", func(e *colly.HTMLElement) {
- if e.Request.URL.String() == Shop_url {
- return
- }
- e.Request.Ctx.Put("website", string(e.Response.Body))
- })
-
- var cookie *http.Cookie
- var has_cookie bool
- c.OnResponse(func(r *colly.Response) {
- //log.Debug("Cookies:", c.Cookies(r.Request.URL.String()))
- if len(c.Cookies(r.Request.URL.String())) > 0 {
- has_cookie = true
- cookie = c.Cookies(r.Request.URL.String())[0]
- }
- })
-
- for groepnr, cur_type := range types {
- current_type = cur_type
- switch current_type {
- case "Wodka":
- Shop_url = Shop_url_base + "vodka"
- case "Likör":
- Shop_url = Shop_url_base + "likeuren"
- case "Champagner":
- Shop_url = Shop_url_base + "wijn"
- default:
- Shop_url = Shop_url_base + current_type
- }
-
- //log.Debug(Shop_url)
- err := c.Visit(Shop_url)
- if err != nil {
- Warn(nil, shop.Name+": Error (Visit): "+err.Error())
- }
-
- c.OnRequest(func(r *colly.Request) {
- r.Headers.Set("X-Requested-With", "XMLHttpRequest")
- r.Headers.Set("Referer", Shop_url)
- if has_cookie {
- //log.Debug("Setting Cookie: " + cookie.String())
- r.Headers.Set("Cookie", cookie.String())
- }
- })
-
- for i := 12; true; i = i + 12 {
- log.Debug("Crawling Drankdozijn: type = " + cur_type + " items = " + strconv.Itoa(i))
- err := c.Post(Async_url, map[string]string{"items": strconv.Itoa(i), "datum": "0", "groepnr": strconv.Itoa(groepnr)})
- if err != nil {
- if "EOF" != err.Error() {
- Warn(nil, shop.Name+": Error (Post): "+err.Error())
- }
- break
- }
- }
- }
-
- return Offers
- */
}