diff options
| author | horus | 2018-06-16 16:05:06 +0200 |
|---|---|---|
| committer | horus | 2018-06-16 16:05:06 +0200 |
| commit | d0b2f70f278924b264fce12b3da7c4c87cbe4593 (patch) | |
| tree | f3edcf3861ad4482a023173370a5e3b6cad991a5 /crawler | |
| parent | 0dedda30a0cb983c41f879e9fe0be53a79ba347c (diff) | |
| download | alkobote-d0b2f70f278924b264fce12b3da7c4c87cbe4593.tar.gz | |
Adds scraper for Drankdozijn. (crawler)
Diffstat (limited to 'crawler')
| -rw-r--r-- | crawler/scrape.go | 2 | ||||
| -rw-r--r-- | crawler/shop_drankdozijn.go | 192 | ||||
| -rw-r--r-- | crawler/shops.go | 8 |
3 files changed, 202 insertions, 0 deletions
diff --git a/crawler/scrape.go b/crawler/scrape.go index de79813..6874239 100644 --- a/crawler/scrape.go +++ b/crawler/scrape.go @@ -78,6 +78,8 @@ func (app *App) ScrapeShop(shop Shop) []Angebot { return app.ScrapeWhiskyworld(shop) case "Whiskyzone": return app.ScrapeWhiskyzone(shop) + case "Drankdozijn": + return app.ScrapeDrankdozijn(shop) default: log.Println(shop.Name + ": No Crawler") } diff --git a/crawler/shop_drankdozijn.go b/crawler/shop_drankdozijn.go new file mode 100644 index 0000000..0a5cca4 --- /dev/null +++ b/crawler/shop_drankdozijn.go @@ -0,0 +1,192 @@ +package main + +import ( + "net/http" + "strconv" + "strings" + + log "github.com/Sirupsen/logrus" + "github.com/gocolly/colly" +) + +func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot { + Shop_url_base := "https://drankdozijn.de/aanbiedingen/" + var Shop_url string + Async_url := "https://drankdozijn.de/async/scroll" + + Offers := []Angebot{} + + types := map[int]string{230: "Whisky", 270: "Gin", 220: "Wodka", 210: "Rum", 250: "Likör", 240: "Cognac"} + //types := map[int]string{240: "Cognac"} + var current_type string + + c := app.customCollector([]string{"drankdozijn.de"}) + + c.OnHTML(".product_top", func(e *colly.HTMLElement) { + + if e.Request.URL.String() != Shop_url && e.Request.URL.String() != Async_url { + //Debug(nil, "Drankdozijn.de: Request url ("+e.Request.URL.String()+") is not shop url ("+Shop_url+").") + return + } + + W := Angebot{} + + W.Shop = shop.Id + W.Spirit_type = current_type + + var err error + + e.ForEach(".product_image", func(i int, e *colly.HTMLElement) { + W.Url = e.ChildAttr("a", "href") + W.Image_url = e.ChildAttr("img", "src") + }) + e.ForEach(".product_title", func(i int, e *colly.HTMLElement) { + W.Name = e.ChildText("a") + }) + + if strings.Contains(W.Name, "+ gratis") || strings.Contains(W.Name, "& gratis") { + DebugOffer(W, "Drankdozijn: Skip Offer") + return + } + + e.ForEach(".product_price", func(i int, e *colly.HTMLElement) { + W.Original_price, err = convert_price(e.ChildText(".product_acties")) + if err != nil { + W.error_msg = err.Error() + W.error_ctx = e.ChildText(".product_acties") + PrintlnOffer(W, "Drankdozijn: Converting original price failed") + return + } + W.Discounted_price, err = convert_price(e.ChildText(".product_aanbieding_prijs")) + if err != nil { + W.error_msg = err.Error() + W.error_ctx = e.ChildText(".product_aanbieding_prijs") + PrintlnOffer(W, "Drankdozijn: Converting discounted price failed") + return + } + }) + + e.Request.Visit(W.Url) + + var ctx string + + W.Volume, ctx = get_volume(e) + if W.Volume == 0 { + W.error_msg = e.Request.Ctx.Get("volume") + W.error_ctx = ctx + PrintlnOffer(W, "Drankdozijn: Volume is zero") + return + } + + W.Abv, ctx = get_abv(e) + if W.Abv == 0 { + W.error_msg = "Drankdozijn: Abv is zero" + W.error_ctx = ctx + PrintlnOffer(W, "Drankdozijn: abv is zero") + return + } + + base_price_noisy := e.Request.Ctx.Get("base_price") + W.Base_price, err = convert_price(base_price_noisy) + if err != nil { + W.error_msg = err.Error() + W.error_ctx = e.ChildText(".price_l") + PrintlnOffer(W, "Drankdozijn: Converting base price failed") + return + } + + if current_type == "Cognac" { + W.Spirit_type = e.Request.Ctx.Get("spirit_type") + } + + W.Website = e.Request.Ctx.Get("website") + + //DebugOffer(W, "DEBUG") + + Offers = append(Offers, W) + }) + + c.OnHTML(".main_price", func(e *colly.HTMLElement) { + //e.Request.Ctx.Put("base_price", strings.TrimPrefix(e.ChildText(".price_l"), "/L")) + e.Request.Ctx.Put("base_price", e.ChildText(".price_l")) + }) + + c.OnHTML(".main_description", func(e *colly.HTMLElement) { + prev := "" + count := 0 + e.ForEach(".col-xs-6", func(i int, e *colly.HTMLElement) { + if count%2 == 0 { + prev = e.Text + } else { + switch strings.TrimSpace(prev) { + case "Inhalt": + e.Request.Ctx.Put("volume", e.Text) + case "Alkoholgehalt": + e.Request.Ctx.Put("abv", e.Text) + case "Kategorie": + e.Request.Ctx.Put("spirit_type", e.Text) + } + + prev = "" + } + count++ + }) + }) + + c.OnHTML("body", func(e *colly.HTMLElement) { + if e.Request.URL.String() == Shop_url { + return + } + e.Request.Ctx.Put("website", string(e.Response.Body)) + }) + + var cookie *http.Cookie + var has_cookie bool + c.OnResponse(func(r *colly.Response) { + //log.Debug("Cookies:", c.Cookies(r.Request.URL.String())) + if len(c.Cookies(r.Request.URL.String())) > 0 { + has_cookie = true + cookie = c.Cookies(r.Request.URL.String())[0] + } + }) + + for groepnr, cur_type := range types { + current_type = cur_type + switch current_type { + case "Wodka": + Shop_url = Shop_url_base + "vodka" + case "Likör": + Shop_url = Shop_url_base + "likeuren" + default: + Shop_url = Shop_url_base + current_type + } + + //log.Debug(Shop_url) + err := c.Visit(Shop_url) + if err != nil { + Warn(nil, shop.Name+": Error (Visit): "+err.Error()) + } + + c.OnRequest(func(r *colly.Request) { + r.Headers.Set("X-Requested-With", "XMLHttpRequest") + r.Headers.Set("Referer", Shop_url) + if has_cookie { + //log.Debug("Setting Cookie: " + cookie.String()) + r.Headers.Set("Cookie", cookie.String()) + } + }) + + for i := 12; true; i = i + 12 { + log.Debug("Crawling Drankdozijn: type = " + cur_type + " items = " + strconv.Itoa(i)) + err := c.Post(Async_url, map[string]string{"items": strconv.Itoa(i), "datum": "0", "groepnr": strconv.Itoa(groepnr)}) + if err != nil { + if "EOF" != err.Error() { + Warn(nil, shop.Name+": Error (Post): "+err.Error()) + } + break + } + } + } + + return Offers +} diff --git a/crawler/shops.go b/crawler/shops.go index d9fcc0d..61676ed 100644 --- a/crawler/shops.go +++ b/crawler/shops.go @@ -90,6 +90,14 @@ func getShopsFromStruct() []Shop { Shipping_costs: 495, Free_shipping: "75€", }) + Shops = append(Shops, Shop{ + Name: "Drankdozijn", + Url: "https://Drankdozijn.de", + Short_url: "https://l.fuselkoenig.de/whiskyzone", + Logo_url: "", + Shipping_costs: 595, + Free_shipping: "250€", + }) return Shops } |
