summaryrefslogtreecommitdiff
path: root/crawler/shop_drankdozijn.go
diff options
context:
space:
mode:
authorhorus2018-06-16 16:05:06 +0200
committerhorus2018-06-16 16:05:06 +0200
commitd0b2f70f278924b264fce12b3da7c4c87cbe4593 (patch)
treef3edcf3861ad4482a023173370a5e3b6cad991a5 /crawler/shop_drankdozijn.go
parent0dedda30a0cb983c41f879e9fe0be53a79ba347c (diff)
downloadalkobote-d0b2f70f278924b264fce12b3da7c4c87cbe4593.tar.gz
Adds scraper for Drankdozijn. (crawler)
Diffstat (limited to 'crawler/shop_drankdozijn.go')
-rw-r--r--crawler/shop_drankdozijn.go192
1 files changed, 192 insertions, 0 deletions
diff --git a/crawler/shop_drankdozijn.go b/crawler/shop_drankdozijn.go
new file mode 100644
index 0000000..0a5cca4
--- /dev/null
+++ b/crawler/shop_drankdozijn.go
@@ -0,0 +1,192 @@
+package main
+
+import (
+ "net/http"
+ "strconv"
+ "strings"
+
+ log "github.com/Sirupsen/logrus"
+ "github.com/gocolly/colly"
+)
+
+func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot {
+ Shop_url_base := "https://drankdozijn.de/aanbiedingen/"
+ var Shop_url string
+ Async_url := "https://drankdozijn.de/async/scroll"
+
+ Offers := []Angebot{}
+
+ types := map[int]string{230: "Whisky", 270: "Gin", 220: "Wodka", 210: "Rum", 250: "Likör", 240: "Cognac"}
+ //types := map[int]string{240: "Cognac"}
+ var current_type string
+
+ c := app.customCollector([]string{"drankdozijn.de"})
+
+ c.OnHTML(".product_top", func(e *colly.HTMLElement) {
+
+ if e.Request.URL.String() != Shop_url && e.Request.URL.String() != Async_url {
+ //Debug(nil, "Drankdozijn.de: Request url ("+e.Request.URL.String()+") is not shop url ("+Shop_url+").")
+ return
+ }
+
+ W := Angebot{}
+
+ W.Shop = shop.Id
+ W.Spirit_type = current_type
+
+ var err error
+
+ e.ForEach(".product_image", func(i int, e *colly.HTMLElement) {
+ W.Url = e.ChildAttr("a", "href")
+ W.Image_url = e.ChildAttr("img", "src")
+ })
+ e.ForEach(".product_title", func(i int, e *colly.HTMLElement) {
+ W.Name = e.ChildText("a")
+ })
+
+ if strings.Contains(W.Name, "+ gratis") || strings.Contains(W.Name, "& gratis") {
+ DebugOffer(W, "Drankdozijn: Skip Offer")
+ return
+ }
+
+ e.ForEach(".product_price", func(i int, e *colly.HTMLElement) {
+ W.Original_price, err = convert_price(e.ChildText(".product_acties"))
+ if err != nil {
+ W.error_msg = err.Error()
+ W.error_ctx = e.ChildText(".product_acties")
+ PrintlnOffer(W, "Drankdozijn: Converting original price failed")
+ return
+ }
+ W.Discounted_price, err = convert_price(e.ChildText(".product_aanbieding_prijs"))
+ if err != nil {
+ W.error_msg = err.Error()
+ W.error_ctx = e.ChildText(".product_aanbieding_prijs")
+ PrintlnOffer(W, "Drankdozijn: Converting discounted price failed")
+ return
+ }
+ })
+
+ e.Request.Visit(W.Url)
+
+ var ctx string
+
+ W.Volume, ctx = get_volume(e)
+ if W.Volume == 0 {
+ W.error_msg = e.Request.Ctx.Get("volume")
+ W.error_ctx = ctx
+ PrintlnOffer(W, "Drankdozijn: Volume is zero")
+ return
+ }
+
+ W.Abv, ctx = get_abv(e)
+ if W.Abv == 0 {
+ W.error_msg = "Drankdozijn: Abv is zero"
+ W.error_ctx = ctx
+ PrintlnOffer(W, "Drankdozijn: abv is zero")
+ return
+ }
+
+ base_price_noisy := e.Request.Ctx.Get("base_price")
+ W.Base_price, err = convert_price(base_price_noisy)
+ if err != nil {
+ W.error_msg = err.Error()
+ W.error_ctx = e.ChildText(".price_l")
+ PrintlnOffer(W, "Drankdozijn: Converting base price failed")
+ return
+ }
+
+ if current_type == "Cognac" {
+ W.Spirit_type = e.Request.Ctx.Get("spirit_type")
+ }
+
+ W.Website = e.Request.Ctx.Get("website")
+
+ //DebugOffer(W, "DEBUG")
+
+ Offers = append(Offers, W)
+ })
+
+ c.OnHTML(".main_price", func(e *colly.HTMLElement) {
+ //e.Request.Ctx.Put("base_price", strings.TrimPrefix(e.ChildText(".price_l"), "/L"))
+ e.Request.Ctx.Put("base_price", e.ChildText(".price_l"))
+ })
+
+ c.OnHTML(".main_description", func(e *colly.HTMLElement) {
+ prev := ""
+ count := 0
+ e.ForEach(".col-xs-6", func(i int, e *colly.HTMLElement) {
+ if count%2 == 0 {
+ prev = e.Text
+ } else {
+ switch strings.TrimSpace(prev) {
+ case "Inhalt":
+ e.Request.Ctx.Put("volume", e.Text)
+ case "Alkoholgehalt":
+ e.Request.Ctx.Put("abv", e.Text)
+ case "Kategorie":
+ e.Request.Ctx.Put("spirit_type", e.Text)
+ }
+
+ prev = ""
+ }
+ count++
+ })
+ })
+
+ c.OnHTML("body", func(e *colly.HTMLElement) {
+ if e.Request.URL.String() == Shop_url {
+ return
+ }
+ e.Request.Ctx.Put("website", string(e.Response.Body))
+ })
+
+ var cookie *http.Cookie
+ var has_cookie bool
+ c.OnResponse(func(r *colly.Response) {
+ //log.Debug("Cookies:", c.Cookies(r.Request.URL.String()))
+ if len(c.Cookies(r.Request.URL.String())) > 0 {
+ has_cookie = true
+ cookie = c.Cookies(r.Request.URL.String())[0]
+ }
+ })
+
+ for groepnr, cur_type := range types {
+ current_type = cur_type
+ switch current_type {
+ case "Wodka":
+ Shop_url = Shop_url_base + "vodka"
+ case "Likör":
+ Shop_url = Shop_url_base + "likeuren"
+ default:
+ Shop_url = Shop_url_base + current_type
+ }
+
+ //log.Debug(Shop_url)
+ err := c.Visit(Shop_url)
+ if err != nil {
+ Warn(nil, shop.Name+": Error (Visit): "+err.Error())
+ }
+
+ c.OnRequest(func(r *colly.Request) {
+ r.Headers.Set("X-Requested-With", "XMLHttpRequest")
+ r.Headers.Set("Referer", Shop_url)
+ if has_cookie {
+ //log.Debug("Setting Cookie: " + cookie.String())
+ r.Headers.Set("Cookie", cookie.String())
+ }
+ })
+
+ for i := 12; true; i = i + 12 {
+ log.Debug("Crawling Drankdozijn: type = " + cur_type + " items = " + strconv.Itoa(i))
+ err := c.Post(Async_url, map[string]string{"items": strconv.Itoa(i), "datum": "0", "groepnr": strconv.Itoa(groepnr)})
+ if err != nil {
+ if "EOF" != err.Error() {
+ Warn(nil, shop.Name+": Error (Post): "+err.Error())
+ }
+ break
+ }
+ }
+ }
+
+ return Offers
+}