summaryrefslogtreecommitdiff
path: root/crawler
diff options
context:
space:
mode:
authorMax2019-01-11 20:28:09 +0100
committerMax2019-01-11 20:28:09 +0100
commit6c4de0beead82d646e743c7c0919af1f7add3b80 (patch)
treeb7f9a5e359909c041f8e5fe1b7d6d5b95e67b4b8 /crawler
parent6261e6b0115997af9e50c3a586c982aa23f8c6f9 (diff)
downloadalkobote-6c4de0beead82d646e743c7c0919af1f7add3b80.tar.gz
Crawler for Drankdozijn now uses the official API. (crawler)
Diffstat (limited to 'crawler')
-rw-r--r--crawler/shop_drankdozijn.go525
1 files changed, 365 insertions, 160 deletions
diff --git a/crawler/shop_drankdozijn.go b/crawler/shop_drankdozijn.go
index 782a0f5..a76148f 100644
--- a/crawler/shop_drankdozijn.go
+++ b/crawler/shop_drankdozijn.go
@@ -1,221 +1,426 @@
package main
import (
+ "encoding/json"
+ "io/ioutil"
"net/http"
- "strconv"
"strings"
+ //"strconv"
- log "github.com/Sirupsen/logrus"
+ //log "github.com/Sirupsen/logrus"
"github.com/gocolly/colly"
)
func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot {
- Shop_url_base := "https://drankdozijn.de/aanbiedingen/"
- var Shop_url string
- Async_url := "https://drankdozijn.de/async/scroll"
Offers := []Angebot{}
- types := map[int]string{230: "Whisky", 270: "Gin", 220: "Wodka", 210: "Rum", 250: "Likör", 240: "Cognac", 100: "Champagner"}
- //types := map[int]string{100: "Champagner"}
- var current_type string
+ /**
+ * Parse the API.
+ */
+ API_URL := "https://api.drankdozijn.nl/sale-products?country=DE&language=de"
- c := app.customCollector([]string{"drankdozijn.de", "drankdozijn.nl"})
+ c := http.Client{}
- c.OnHTML(".product_top", func(e *colly.HTMLElement) {
+ req, err := http.NewRequest(http.MethodGet, API_URL, nil)
+ if err != nil {
+ // TODO
+ panic(err)
+ }
+
+ req.Header.Set("accept", "application/json")
+ req.Header.Set("User-Agent", "")
+
+ api_resp, err := c.Do(req)
+ if err != nil {
+ // TODO
+ panic(err)
+ }
- if e.Request.URL.String() != Shop_url && e.Request.URL.String() != Async_url {
- //Debug(nil, "Drankdozijn.de: Request url ("+e.Request.URL.String()+") is not shop url ("+Shop_url+").")
- return
+ api_body, err := ioutil.ReadAll(api_resp.Body)
+ if err != nil {
+ // TODO
+ panic(err)
+ }
+
+ var tmp_api_map map[string]interface{}
+
+ err = json.Unmarshal(api_body, tmp_api_map)
+ if err != nil {
+ // TODO
+ panic(err)
+ }
+
+ for _, value := range tmp_api_map {
+
+ api_data := value.(map[string]interface{})
+
+ if api_data["type"] != "offer" {
+ continue
}
W := Angebot{}
-
W.Shop = shop.Id
- W.Spirit_type = current_type
- var err error
- var skip_offer bool
+ W.Name = api_data["saleDescription"].(string)
- e.ForEach(".product_image", func(i int, e *colly.HTMLElement) {
- W.Url = e.ChildAttr("a", "href")
- W.Image_url = e.ChildAttr("img", "src")
- })
- e.ForEach(".product_title", func(i int, e *colly.HTMLElement) {
- W.Name = e.ChildText("a")
- })
+ W.Spirit_type = detect_spirit_type(api_data["description"].(string))
+
+ W.Original_price, err = convert_price(api_data["price"].(string))
+ if err != nil {
+ // TODO
+ panic(err)
+ }
+ W.Discounted_price, err = convert_price(api_data["salePrice"].(string))
+ if err != nil {
+ // TODO
+ panic(err)
+ }
- if strings.Contains(W.Name, "+ gratis") || strings.Contains(W.Name, "& gratis") {
- DebugOffer(W, "Drankdozijn: Skip Offer")
- return
+ // Offer URL
+ tmp_offer_url_map := api_data["products"].(map[string]interface{})
+ for _, v := range tmp_offer_url_map {
+ tmp_url := v.(map[string]interface{})
+ W.Url = "https://drankdozijn.de/artikel/" + (tmp_url["alias"]).(string)
}
- e.ForEach(".product_price", func(i int, e *colly.HTMLElement) {
- original_price_noisy := e.ChildText(".product_acties")
- if !strings.Contains(original_price_noisy, "€") {
- PrintlnOffer(W, "Drankdozijn: Original price has no € sign. Skipping!")
- skip_offer = true
+ c := app.customCollector([]string{"drankdozijn.de", "drankdozijn.nl"})
+
+ err = c.Visit(W.Url)
+ if err != nil {
+ Warn(nil, shop.Name+": Error (Visit): "+err.Error())
+ }
+
+ c.OnHTML(".product_top", func(e *colly.HTMLElement) {
+ /*
+ if e.Request.URL.String() != Shop_url && e.Request.URL.String() != Async_url {
+ //Debug(nil, "Drankdozijn.de: Request url ("+e.Request.URL.String()+") is not shop url ("+Shop_url+").")
+ return
+ }
+ */
+
+ e.ForEach(".product_image", func(i int, e *colly.HTMLElement) {
+ W.Image_url = e.ChildAttr("img", "src")
+ })
+
+ if strings.Contains(W.Name, "+ gratis") || strings.Contains(W.Name, "& gratis") {
+ DebugOffer(W, "Drankdozijn: Skip Offer")
return
}
- W.Original_price, err = convert_price(original_price_noisy)
- if err != nil {
- W.error_msg = err.Error()
- W.error_ctx = e.ChildText(".product_acties")
- PrintlnOffer(W, "Drankdozijn: Converting original price failed")
+
+ e.Request.Visit(W.Url)
+
+ var ctx string
+
+ W.Volume, ctx = get_volume(e)
+ if W.Volume == 0 {
+ W.error_msg = e.Request.Ctx.Get("volume")
+ W.error_ctx = ctx
+ PrintlnOffer(W, "Drankdozijn: Volume is zero")
+ return
+ }
+
+ W.Abv, ctx = get_abv(e)
+ if W.Abv == 0 {
+ W.error_msg = "Drankdozijn: Abv is zero"
+ W.error_ctx = ctx
+ PrintlnOffer(W, "Drankdozijn: abv is zero")
return
}
- W.Discounted_price, err = convert_price(e.ChildText(".product_aanbieding_prijs"))
+
+ base_price_noisy := e.Request.Ctx.Get("base_price")
+ W.Base_price, err = convert_price(base_price_noisy)
if err != nil {
W.error_msg = err.Error()
- W.error_ctx = e.ChildText(".product_aanbieding_prijs")
- PrintlnOffer(W, "Drankdozijn: Converting discounted price failed")
+ W.error_ctx = e.ChildText(".price_l")
+ PrintlnOffer(W, "Drankdozijn: Converting base price failed")
return
}
+
+ if W.Spirit_type == "Cognac" {
+ W.Spirit_type = e.Request.Ctx.Get("spirit_type")
+ }
+
+ if W.Spirit_type == "Likör" {
+ tmp_type := e.Request.Ctx.Get("spirit_type")
+ switch tmp_type {
+ case "Tequila":
+ W.Spirit_type = "Tequila"
+ }
+ }
+
+ if W.Spirit_type == "Wein" {
+ tmp_type := e.Request.Ctx.Get("spirit_type")
+ switch tmp_type {
+ case "Champagner":
+ case "Champagne":
+ W.Spirit_type = "Champagner"
+ default:
+ DebugOffer(W, "Drankdozijn: Skip Offer")
+ return
+ }
+ }
+
+ W.Website = e.Request.Ctx.Get("website")
+
+ //DebugOffer(W, "DEBUG")
+
+ Offers = append(Offers, W)
})
- if skip_offer {
- return
- }
+ c.OnHTML(".main_price", func(e *colly.HTMLElement) {
+ //e.Request.Ctx.Put("base_price", strings.TrimPrefix(e.ChildText(".price_l"), "/L"))
+ e.Request.Ctx.Put("base_price", e.ChildText(".price_l"))
+ })
- e.Request.Visit(W.Url)
+ c.OnHTML(".main_description", func(e *colly.HTMLElement) {
+ prev := ""
+ count := 0
+ e.ForEach(".col-xs-6", func(i int, e *colly.HTMLElement) {
+ if count%2 == 0 {
+ prev = e.Text
+ } else {
+ switch strings.TrimSpace(prev) {
+ case "Inhalt":
+ case "Inhoud":
+ e.Request.Ctx.Put("volume", e.Text)
+ case "Alkoholgehalt":
+ case "Alcoholpercentage":
+ e.Request.Ctx.Put("abv", e.Text)
+ case "Kategorie":
+ case "Categorie":
+ e.Request.Ctx.Put("spirit_type", e.Text)
+ }
+
+ prev = ""
+ }
+ count++
+ })
+ })
- var ctx string
+ c.OnHTML("body", func(e *colly.HTMLElement) {
+ /*
+ if e.Request.URL.String() == Shop_url {
+ return
+ }
+ */
+ e.Request.Ctx.Put("website", string(e.Response.Body))
+ })
- W.Volume, ctx = get_volume(e)
- if W.Volume == 0 {
- W.error_msg = e.Request.Ctx.Get("volume")
- W.error_ctx = ctx
- PrintlnOffer(W, "Drankdozijn: Volume is zero")
- return
- }
+ }
- W.Abv, ctx = get_abv(e)
- if W.Abv == 0 {
- W.error_msg = "Drankdozijn: Abv is zero"
- W.error_ctx = ctx
- PrintlnOffer(W, "Drankdozijn: abv is zero")
- return
- }
+ return Offers
- base_price_noisy := e.Request.Ctx.Get("base_price")
- W.Base_price, err = convert_price(base_price_noisy)
- if err != nil {
- W.error_msg = err.Error()
- W.error_ctx = e.ChildText(".price_l")
- PrintlnOffer(W, "Drankdozijn: Converting base price failed")
- return
- }
+ // ++++++++++ OLD +++++++++
- if current_type == "Cognac" {
- W.Spirit_type = e.Request.Ctx.Get("spirit_type")
- }
+ /*
+ Offers := []Angebot{}
- if current_type == "Likör" {
- tmp_type := e.Request.Ctx.Get("spirit_type")
- switch tmp_type {
- case "Tequila":
- W.Spirit_type = "Tequila"
+ Shop_url_base := "https://drankdozijn.de/aanbiedingen/"
+ var Shop_url string
+ Async_url := "https://drankdozijn.de/async/scroll"
+
+ types := map[int]string{230: "Whisky", 270: "Gin", 220: "Wodka", 210: "Rum", 250: "Likör", 240: "Cognac", 100: "Champagner"}
+ //types := map[int]string{100: "Champagner"}
+ var current_type string
+
+ c := app.customCollector([]string{"drankdozijn.de", "drankdozijn.nl"})
+
+ c.OnHTML(".product_top", func(e *colly.HTMLElement) {
+
+ if e.Request.URL.String() != Shop_url && e.Request.URL.String() != Async_url {
+ //Debug(nil, "Drankdozijn.de: Request url ("+e.Request.URL.String()+") is not shop url ("+Shop_url+").")
+ return
}
- }
- if current_type == "Champagner" && (e.Request.Ctx.Get("spirit_type") != "Champagner" && e.Request.Ctx.Get("spirit_type") != "Champagne") {
- DebugOffer(W, "Drankdozijn: Skip Offer")
- return
- }
+ W := Angebot{}
+
+ W.Shop = shop.Id
+ W.Spirit_type = current_type
+
+ var err error
+ var skip_offer bool
+
+ e.ForEach(".product_image", func(i int, e *colly.HTMLElement) {
+ W.Url = e.ChildAttr("a", "href")
+ W.Image_url = e.ChildAttr("img", "src")
+ })
+ e.ForEach(".product_title", func(i int, e *colly.HTMLElement) {
+ W.Name = e.ChildText("a")
+ })
+
+ if strings.Contains(W.Name, "+ gratis") || strings.Contains(W.Name, "& gratis") {
+ DebugOffer(W, "Drankdozijn: Skip Offer")
+ return
+ }
+
+ e.ForEach(".product_price", func(i int, e *colly.HTMLElement) {
+ original_price_noisy := e.ChildText(".product_acties")
+ if !strings.Contains(original_price_noisy, "€") {
+ PrintlnOffer(W, "Drankdozijn: Original price has no € sign. Skipping!")
+ skip_offer = true
+ return
+ }
+ W.Original_price, err = convert_price(original_price_noisy)
+ if err != nil {
+ W.error_msg = err.Error()
+ W.error_ctx = e.ChildText(".product_acties")
+ PrintlnOffer(W, "Drankdozijn: Converting original price failed")
+ return
+ }
+ W.Discounted_price, err = convert_price(e.ChildText(".product_aanbieding_prijs"))
+ if err != nil {
+ W.error_msg = err.Error()
+ W.error_ctx = e.ChildText(".product_aanbieding_prijs")
+ PrintlnOffer(W, "Drankdozijn: Converting discounted price failed")
+ return
+ }
+ })
+
+ if skip_offer {
+ return
+ }
+
+ e.Request.Visit(W.Url)
+
+ var ctx string
+
+ W.Volume, ctx = get_volume(e)
+ if W.Volume == 0 {
+ W.error_msg = e.Request.Ctx.Get("volume")
+ W.error_ctx = ctx
+ PrintlnOffer(W, "Drankdozijn: Volume is zero")
+ return
+ }
+
+ W.Abv, ctx = get_abv(e)
+ if W.Abv == 0 {
+ W.error_msg = "Drankdozijn: Abv is zero"
+ W.error_ctx = ctx
+ PrintlnOffer(W, "Drankdozijn: abv is zero")
+ return
+ }
+
+ base_price_noisy := e.Request.Ctx.Get("base_price")
+ W.Base_price, err = convert_price(base_price_noisy)
+ if err != nil {
+ W.error_msg = err.Error()
+ W.error_ctx = e.ChildText(".price_l")
+ PrintlnOffer(W, "Drankdozijn: Converting base price failed")
+ return
+ }
+
+ if current_type == "Cognac" {
+ W.Spirit_type = e.Request.Ctx.Get("spirit_type")
+ }
- W.Website = e.Request.Ctx.Get("website")
-
- //DebugOffer(W, "DEBUG")
-
- Offers = append(Offers, W)
- })
-
- c.OnHTML(".main_price", func(e *colly.HTMLElement) {
- //e.Request.Ctx.Put("base_price", strings.TrimPrefix(e.ChildText(".price_l"), "/L"))
- e.Request.Ctx.Put("base_price", e.ChildText(".price_l"))
- })
-
- c.OnHTML(".main_description", func(e *colly.HTMLElement) {
- prev := ""
- count := 0
- e.ForEach(".col-xs-6", func(i int, e *colly.HTMLElement) {
- if count%2 == 0 {
- prev = e.Text
- } else {
- switch strings.TrimSpace(prev) {
- case "Inhalt":
- case "Inhoud":
- e.Request.Ctx.Put("volume", e.Text)
- case "Alkoholgehalt":
- case "Alcoholpercentage":
- e.Request.Ctx.Put("abv", e.Text)
- case "Kategorie":
- case "Categorie":
- e.Request.Ctx.Put("spirit_type", e.Text)
+ if current_type == "Likör" {
+ tmp_type := e.Request.Ctx.Get("spirit_type")
+ switch tmp_type {
+ case "Tequila":
+ W.Spirit_type = "Tequila"
}
+ }
- prev = ""
+ if current_type == "Champagner" && (e.Request.Ctx.Get("spirit_type") != "Champagner" && e.Request.Ctx.Get("spirit_type") != "Champagne") {
+ DebugOffer(W, "Drankdozijn: Skip Offer")
+ return
}
- count++
+
+ W.Website = e.Request.Ctx.Get("website")
+
+ //DebugOffer(W, "DEBUG")
+
+ Offers = append(Offers, W)
})
- })
- c.OnHTML("body", func(e *colly.HTMLElement) {
- if e.Request.URL.String() == Shop_url {
- return
- }
- e.Request.Ctx.Put("website", string(e.Response.Body))
- })
-
- var cookie *http.Cookie
- var has_cookie bool
- c.OnResponse(func(r *colly.Response) {
- //log.Debug("Cookies:", c.Cookies(r.Request.URL.String()))
- if len(c.Cookies(r.Request.URL.String())) > 0 {
- has_cookie = true
- cookie = c.Cookies(r.Request.URL.String())[0]
- }
- })
-
- for groepnr, cur_type := range types {
- current_type = cur_type
- switch current_type {
- case "Wodka":
- Shop_url = Shop_url_base + "vodka"
- case "Likör":
- Shop_url = Shop_url_base + "likeuren"
- case "Champagner":
- Shop_url = Shop_url_base + "wijn"
- default:
- Shop_url = Shop_url_base + current_type
- }
+ c.OnHTML(".main_price", func(e *colly.HTMLElement) {
+ //e.Request.Ctx.Put("base_price", strings.TrimPrefix(e.ChildText(".price_l"), "/L"))
+ e.Request.Ctx.Put("base_price", e.ChildText(".price_l"))
+ })
- //log.Debug(Shop_url)
- err := c.Visit(Shop_url)
- if err != nil {
- Warn(nil, shop.Name+": Error (Visit): "+err.Error())
- }
+ c.OnHTML(".main_description", func(e *colly.HTMLElement) {
+ prev := ""
+ count := 0
+ e.ForEach(".col-xs-6", func(i int, e *colly.HTMLElement) {
+ if count%2 == 0 {
+ prev = e.Text
+ } else {
+ switch strings.TrimSpace(prev) {
+ case "Inhalt":
+ case "Inhoud":
+ e.Request.Ctx.Put("volume", e.Text)
+ case "Alkoholgehalt":
+ case "Alcoholpercentage":
+ e.Request.Ctx.Put("abv", e.Text)
+ case "Kategorie":
+ case "Categorie":
+ e.Request.Ctx.Put("spirit_type", e.Text)
+ }
+
+ prev = ""
+ }
+ count++
+ })
+ })
+
+ c.OnHTML("body", func(e *colly.HTMLElement) {
+ if e.Request.URL.String() == Shop_url {
+ return
+ }
+ e.Request.Ctx.Put("website", string(e.Response.Body))
+ })
- c.OnRequest(func(r *colly.Request) {
- r.Headers.Set("X-Requested-With", "XMLHttpRequest")
- r.Headers.Set("Referer", Shop_url)
- if has_cookie {
- //log.Debug("Setting Cookie: " + cookie.String())
- r.Headers.Set("Cookie", cookie.String())
+ var cookie *http.Cookie
+ var has_cookie bool
+ c.OnResponse(func(r *colly.Response) {
+ //log.Debug("Cookies:", c.Cookies(r.Request.URL.String()))
+ if len(c.Cookies(r.Request.URL.String())) > 0 {
+ has_cookie = true
+ cookie = c.Cookies(r.Request.URL.String())[0]
}
})
- for i := 12; true; i = i + 12 {
- log.Debug("Crawling Drankdozijn: type = " + cur_type + " items = " + strconv.Itoa(i))
- err := c.Post(Async_url, map[string]string{"items": strconv.Itoa(i), "datum": "0", "groepnr": strconv.Itoa(groepnr)})
+ for groepnr, cur_type := range types {
+ current_type = cur_type
+ switch current_type {
+ case "Wodka":
+ Shop_url = Shop_url_base + "vodka"
+ case "Likör":
+ Shop_url = Shop_url_base + "likeuren"
+ case "Champagner":
+ Shop_url = Shop_url_base + "wijn"
+ default:
+ Shop_url = Shop_url_base + current_type
+ }
+
+ //log.Debug(Shop_url)
+ err := c.Visit(Shop_url)
if err != nil {
- if "EOF" != err.Error() {
- Warn(nil, shop.Name+": Error (Post): "+err.Error())
+ Warn(nil, shop.Name+": Error (Visit): "+err.Error())
+ }
+
+ c.OnRequest(func(r *colly.Request) {
+ r.Headers.Set("X-Requested-With", "XMLHttpRequest")
+ r.Headers.Set("Referer", Shop_url)
+ if has_cookie {
+ //log.Debug("Setting Cookie: " + cookie.String())
+ r.Headers.Set("Cookie", cookie.String())
+ }
+ })
+
+ for i := 12; true; i = i + 12 {
+ log.Debug("Crawling Drankdozijn: type = " + cur_type + " items = " + strconv.Itoa(i))
+ err := c.Post(Async_url, map[string]string{"items": strconv.Itoa(i), "datum": "0", "groepnr": strconv.Itoa(groepnr)})
+ if err != nil {
+ if "EOF" != err.Error() {
+ Warn(nil, shop.Name+": Error (Post): "+err.Error())
+ }
+ break
}
- break
}
}
- }
- return Offers
+ return Offers
+ */
}