package main import ( "net/http" "strconv" "strings" log "github.com/Sirupsen/logrus" "github.com/gocolly/colly" ) func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot { Shop_url_base := "https://drankdozijn.de/aanbiedingen/" var Shop_url string Async_url := "https://drankdozijn.de/async/scroll" Offers := []Angebot{} types := map[int]string{230: "Whisky", 270: "Gin", 220: "Wodka", 210: "Rum", 250: "Likör", 240: "Cognac", 100: "Champagner"} //types := map[int]string{100: "Champagner"} var current_type string c := app.customCollector([]string{"drankdozijn.de", "drankdozijn.nl"}) c.OnHTML(".product_top", func(e *colly.HTMLElement) { if e.Request.URL.String() != Shop_url && e.Request.URL.String() != Async_url { //Debug(nil, "Drankdozijn.de: Request url ("+e.Request.URL.String()+") is not shop url ("+Shop_url+").") return } W := Angebot{} W.Shop = shop.Id W.Spirit_type = current_type var err error var skip_offer bool e.ForEach(".product_image", func(i int, e *colly.HTMLElement) { W.Url = e.ChildAttr("a", "href") W.Image_url = e.ChildAttr("img", "src") }) e.ForEach(".product_title", func(i int, e *colly.HTMLElement) { W.Name = e.ChildText("a") }) if strings.Contains(W.Name, "+ gratis") || strings.Contains(W.Name, "& gratis") { DebugOffer(W, "Drankdozijn: Skip Offer") return } e.ForEach(".product_price", func(i int, e *colly.HTMLElement) { original_price_noisy := e.ChildText(".product_acties") if !strings.Contains(original_price_noisy, "€") { PrintlnOffer(W, "Drankdozijn: Original price has no € sign. Skipping!") skip_offer = true return } W.Original_price, err = convert_price(original_price_noisy) if err != nil { W.error_msg = err.Error() W.error_ctx = e.ChildText(".product_acties") PrintlnOffer(W, "Drankdozijn: Converting original price failed") return } W.Discounted_price, err = convert_price(e.ChildText(".product_aanbieding_prijs")) if err != nil { W.error_msg = err.Error() W.error_ctx = e.ChildText(".product_aanbieding_prijs") PrintlnOffer(W, "Drankdozijn: Converting discounted price failed") return } }) if skip_offer { return } e.Request.Visit(W.Url) var ctx string W.Volume, ctx = get_volume(e) if W.Volume == 0 { W.error_msg = e.Request.Ctx.Get("volume") W.error_ctx = ctx PrintlnOffer(W, "Drankdozijn: Volume is zero") return } W.Abv, ctx = get_abv(e) if W.Abv == 0 { W.error_msg = "Drankdozijn: Abv is zero" W.error_ctx = ctx PrintlnOffer(W, "Drankdozijn: abv is zero") return } base_price_noisy := e.Request.Ctx.Get("base_price") W.Base_price, err = convert_price(base_price_noisy) if err != nil { W.error_msg = err.Error() W.error_ctx = e.ChildText(".price_l") PrintlnOffer(W, "Drankdozijn: Converting base price failed") return } if current_type == "Cognac" { W.Spirit_type = e.Request.Ctx.Get("spirit_type") } if current_type == "Likör" { tmp_type := e.Request.Ctx.Get("spirit_type") switch tmp_type { case "Tequila": W.Spirit_type = "Tequila" } } if current_type == "Champagner" && (e.Request.Ctx.Get("spirit_type") != "Champagner" && e.Request.Ctx.Get("spirit_type") != "Champagne") { DebugOffer(W, "Drankdozijn: Skip Offer") return } W.Website = e.Request.Ctx.Get("website") //DebugOffer(W, "DEBUG") Offers = append(Offers, W) }) c.OnHTML(".main_price", func(e *colly.HTMLElement) { //e.Request.Ctx.Put("base_price", strings.TrimPrefix(e.ChildText(".price_l"), "/L")) e.Request.Ctx.Put("base_price", e.ChildText(".price_l")) }) c.OnHTML(".main_description", func(e *colly.HTMLElement) { prev := "" count := 0 e.ForEach(".col-xs-6", func(i int, e *colly.HTMLElement) { if count%2 == 0 { prev = e.Text } else { switch strings.TrimSpace(prev) { case "Inhalt": case "Inhoud": e.Request.Ctx.Put("volume", e.Text) case "Alkoholgehalt": case "Alcoholpercentage": e.Request.Ctx.Put("abv", e.Text) case "Kategorie": case "Categorie": e.Request.Ctx.Put("spirit_type", e.Text) } prev = "" } count++ }) }) c.OnHTML("body", func(e *colly.HTMLElement) { if e.Request.URL.String() == Shop_url { return } e.Request.Ctx.Put("website", string(e.Response.Body)) }) var cookie *http.Cookie var has_cookie bool c.OnResponse(func(r *colly.Response) { //log.Debug("Cookies:", c.Cookies(r.Request.URL.String())) if len(c.Cookies(r.Request.URL.String())) > 0 { has_cookie = true cookie = c.Cookies(r.Request.URL.String())[0] } }) for groepnr, cur_type := range types { current_type = cur_type switch current_type { case "Wodka": Shop_url = Shop_url_base + "vodka" case "Likör": Shop_url = Shop_url_base + "likeuren" case "Champagner": Shop_url = Shop_url_base + "wijn" default: Shop_url = Shop_url_base + current_type } //log.Debug(Shop_url) err := c.Visit(Shop_url) if err != nil { Warn(nil, shop.Name+": Error (Visit): "+err.Error()) } c.OnRequest(func(r *colly.Request) { r.Headers.Set("X-Requested-With", "XMLHttpRequest") r.Headers.Set("Referer", Shop_url) if has_cookie { //log.Debug("Setting Cookie: " + cookie.String()) r.Headers.Set("Cookie", cookie.String()) } }) for i := 12; true; i = i + 12 { log.Debug("Crawling Drankdozijn: type = " + cur_type + " items = " + strconv.Itoa(i)) err := c.Post(Async_url, map[string]string{"items": strconv.Itoa(i), "datum": "0", "groepnr": strconv.Itoa(groepnr)}) if err != nil { if "EOF" != err.Error() { Warn(nil, shop.Name+": Error (Post): "+err.Error()) } break } } } return Offers }