summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--crawler/shop_drankdozijn.go330
1 files changed, 167 insertions, 163 deletions
diff --git a/crawler/shop_drankdozijn.go b/crawler/shop_drankdozijn.go
index f66ac78..7a9e786 100644
--- a/crawler/shop_drankdozijn.go
+++ b/crawler/shop_drankdozijn.go
@@ -2,11 +2,12 @@ package main
import (
"encoding/json"
+ "fmt"
"io/ioutil"
"net/http"
- "strings"
+ //"strings"
- "github.com/gocolly/colly"
+ //"github.com/gocolly/colly"
log "github.com/sirupsen/logrus"
)
@@ -17,7 +18,8 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot {
/**
* Parse the API.
*/
- API_URL := "https://api.drankdozijn.nl/sale-products?country=DE&language=de"
+ API_URL := "https://es-api.drankdozijn.nl/sale-products?country=DE&language=de"
+ API_URL_PRODUCT := "https://es-api.drankdozijn.nl/product?country=DE&language=de&page_template=artikel&alias="
IMAGE_URL := "https://res-2.cloudinary.com/boozeboodcdn/image/upload/e_trim:10/c_pad,g_south,h_400,w_280/c_limit,h_910,w_280/f_auto,q_auto:best/v1/"
http_client := http.Client{}
@@ -29,7 +31,7 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot {
}
req.Header.Set("accept", "application/json")
- req.Header.Set("User-Agent", "")
+ req.Header.Set("User-Agent", "like googlebot")
api_resp, err := http_client.Do(req)
if err != nil {
@@ -42,51 +44,83 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot {
// TODO
panic(err)
}
+ //log.Println("%v\n", string(api_body))
- var tmp_api_map map[string]interface{}
+ type api_products struct {
+ Alias string
+ Images []string
+ }
+ type api_product_group struct {
+ Description string
+ }
+ type api_offer struct {
+ Type string
+ Price float64
+ SalePrice float64
+ SaleDescription string
+ Group map[string]interface{}
+ ProductGroup api_product_group
+ Products []api_products
+ }
+ type Value struct {
+ Alias string `json:"alias"`
+ Description string `json:"description"`
+ }
+ type Feature struct {
+ Alias string `json:"alias"`
+ Description string `json:"description"`
+ Id int `json:"id"`
+ Value Value `json:"value"`
+ ShownOnPage bool `json:"shownOnPage"`
+ }
+ type product_details struct {
+ FormatedPrice string `json:"formatedPrice"`
+ FormatedSalePrice string `json:"formatedSalePrice"`
+ Description string `json:"description"`
+ Features []Feature `json:"features`
+ }
- err = json.Unmarshal(api_body, &tmp_api_map)
+ var offers []api_offer
+ //err = json.Unmarshal(api_body, &tmp_api_map)
+ err = json.Unmarshal(api_body, &offers)
if err != nil {
// TODO
- log.Println("json unmarshal failed")
+ log.Println("offers json unmarshal failed")
+ log.Printf("%+v\n", string(api_body))
panic(err)
}
- for _, value := range tmp_api_map {
-
- api_data := value.(map[string]interface{})
-
- if api_data["type"] != "offer" {
- continue
- }
+ for _, api_data := range offers {
W := Angebot{}
W.Shop = shop.Id
- W.Name = api_data["saleDescription"].(string)
+ W.Name = api_data.SaleDescription
- tmp_desc := api_data["group"].(map[string]interface{})
-
- tmp_spirit_type := tmp_desc["description"].(string)
+ tmp_spirit_type := api_data.ProductGroup.Description
if "Bier" == tmp_spirit_type {
W.Debug("Drankdozijn: skip offer because it's beer")
continue
}
- W.Spirit_type = detect_spirit_type(tmp_desc["description"].(string))
+ W.Spirit_type = detect_spirit_type(tmp_spirit_type)
- if v, _ := api_data["price"]; v == nil {
+ if api_data.Price == 0 {
+ //log.Println("%v\n", api_data["price"])
+ //log.Println("%v\n", api_data)
W.Debug("Drankdozijn: price is nil -> skip offer")
continue
}
- W.Original_price, err = convert_price(api_data["price"].(string))
+ W.Original_price, err = convert_price(fmt.Sprintf("%.2f", api_data.Price))
if err != nil {
// TODO
panic(err)
}
- W.Discounted_price, err = convert_price(api_data["salePrice"].(string))
+ log.Println(W.Original_price)
+
+ W.Discounted_price, err = convert_price(fmt.Sprintf("%.2f", api_data.SalePrice))
if err != nil {
// TODO
panic(err)
@@ -98,171 +132,141 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot {
}
// Offer URL
- tmp_offer_url_img_map := api_data["products"].(map[string]interface{})
+ //tmp_offer_url_img_map := api_data.Products["products"].(map[string]interface{})
+ tmp_offer_product_map := api_data.Products
// Check for bundle offer
- if len(tmp_offer_url_img_map) > 1 {
+ if len(tmp_offer_product_map) > 1 {
W.Debug("Drankdozijn: Skip Offer because of bundle")
continue
}
- for _, v := range tmp_offer_url_img_map {
- tmp_map := v.(map[string]interface{})
- W.Url = "https://drankdozijn.de/artikel/" + (tmp_map["alias"]).(string)
- tmp_image_map := tmp_map["images"].([]interface{})
- W.Image_url = IMAGE_URL + tmp_image_map[0].(string)
+ var alias string
+ for _, v := range tmp_offer_product_map {
+ W.Url = "https://drankdozijn.de/artikel/" + v.Alias
+ //tmp_image_map := tmp_map["images"].([]interface{})
+ //W.Image_url = IMAGE_URL + tmp_image_map[0].(string)
+ W.Image_url = IMAGE_URL + v.Images[0]
+ alias = v.Alias
}
- c := app.customCollector([]string{"drankdozijn.de", "drankdozijn.nl"})
-
- c.OnHTML("span.product_title", func(e *colly.HTMLElement) {
-
- Trace(nil, "D12: On span.product_title: "+e.Text)
-
- if e.Text != W.Name {
- W.Trace("Name changed from: " + W.Name + " to " + e.Text)
- W.Name = e.Text
- }
+ req, err := http.NewRequest(http.MethodGet, API_URL_PRODUCT+alias, nil)
+ if err != nil {
+ // TODO
+ panic(err)
+ }
- })
+ req.Header.Set("accept", "application/json")
+ req.Header.Set("User-Agent", "like googlebot")
- c.OnHTML(".product_top", func(e *colly.HTMLElement) {
+ api_resp, err := http_client.Do(req)
+ if err != nil {
+ // TODO
+ panic(err)
+ }
- if strings.Contains(W.Name, "+ gratis") || strings.Contains(W.Name, "& gratis") {
- W.Debug("Drankdozijn: Skip Offer because it contains gratis ware")
- return
- }
+ api_body, err := ioutil.ReadAll(api_resp.Body)
+ if err != nil {
+ // TODO
+ panic(err)
+ }
+ var d12_product product_details
+ err = json.Unmarshal(api_body, &d12_product)
+ if err != nil {
+ // TODO
+ log.Println("product_details json unmarshal failed")
+ log.Printf("%+v\n", string(api_body))
+ panic(err)
+ }
- e.Request.Visit(W.Url)
+ W.Base_price, err = convert_price(d12_product.FormatedPrice)
+ if err != nil {
+ // TODO
+ log.Println("converting price from product_details failed")
+ panic(err)
+ }
- })
+ for _, v := range d12_product.Features {
+ if v.Description == "Alkoholgehalt" {
+ W.Abv, err = extract_abv(v.Value.Description)
+ if err != nil {
+ log.Println("extracting abv failed")
+ }
+ } else if v.Description == "Inhalt" {
+ W.Volume, err = extract_volume(v.Value.Description)
+ if err != nil {
+ log.Println("extracting volume failed")
+ }
+ } else if v.Description == "Kategorie" {
+ tmp_type := detect_spirit_type(v.Value.Description)
+ if "Champagner" == tmp_type {
+ if tmp_type != W.Spirit_type {
+ W.Trace("Spirit Type Changed: " + W.Spirit_type + " -> " + tmp_type)
+ W.Spirit_type = tmp_type
+ }
+ W.Spirit_type = tmp_type
+ }
- c.OnHTML(".main_price", func(e *colly.HTMLElement) {
- W.Base_price, err = convert_price(e.ChildText(".price_l"))
- if err != nil {
- W.error_msg = err.Error()
- W.error_ctx = e.ChildText(".price_l")
- W.Println("Drankdozijn: Converting base price failed")
- return
- }
- })
-
- c.OnHTML(".row .main_description", func(e *colly.HTMLElement) {
- prev := ""
- count := 0
- e.ForEach(".col-xs-6", func(i int, e *colly.HTMLElement) {
- if count%2 == 0 {
- prev = e.Text
- } else {
- switch strings.TrimSpace(prev) {
- case "Inhalt", "Inhoud":
- W.Volume, err = extract_volume(e.Text)
- if W.Volume == 0 {
- W.error_msg = e.Text
- W.error_ctx = err.Error()
- W.Println("Drankdozijn: Volume is zero, returning")
- return
- }
- case "Alkoholgehalt", "Alcoholpercentage":
- W.Abv, err = extract_abv(e.Text)
- if W.Abv == 0 {
- W.error_msg = "Drankdozijn: Abv is zero"
- W.error_ctx = err.Error()
- W.Println("Drankdozijn: abv is zero")
- return
+ switch W.Spirit_type {
+ case "Cognac":
+ if tmp_type != W.Spirit_type {
+ W.Trace("Spirit Type Changed: " + W.Spirit_type + " -> " + tmp_type)
+ W.Spirit_type = tmp_type
+ }
+ W.Spirit_type = tmp_type
+ case "Brandy":
+ if tmp_type != W.Spirit_type {
+ W.Trace("Spirit Type Changed: " + W.Spirit_type + " -> " + tmp_type)
+ W.Spirit_type = tmp_type
+ }
+ W.Spirit_type = tmp_type
+ case "Sherry":
+ if tmp_type != W.Spirit_type {
+ W.Trace("Spirit Type Changed: " + W.Spirit_type + " -> " + tmp_type)
+ W.Spirit_type = tmp_type
+ }
+ W.Spirit_type = tmp_type
+ case "Likör":
+ retest_type := detect_spirit_type(tmp_type)
+ if "Likör" != retest_type && "Verschiedenes" != retest_type {
+ if tmp_type != W.Spirit_type {
+ W.Trace("Spirit Type Changed: " + W.Spirit_type + " -> " + retest_type)
+ W.Spirit_type = tmp_type
}
- case "Kategorie", "Categorie":
- tmp_type := e.Text
- tmp_type = detect_spirit_type(tmp_type)
-
- if "Champagner" == tmp_type {
- if tmp_type != W.Spirit_type {
- W.Trace("Spirit Type Changed: " + W.Spirit_type + " -> " + tmp_type)
- W.Spirit_type = tmp_type
- }
+ W.Spirit_type = detect_spirit_type(W.Name)
+ }
+ if "Tequila" == tmp_type {
+ if tmp_type != W.Spirit_type {
+ W.Trace("Spirit Type Changed: " + W.Spirit_type + " -> " + tmp_type)
W.Spirit_type = tmp_type
}
-
- switch W.Spirit_type {
- case "Cognac":
- if tmp_type != W.Spirit_type {
- W.Trace("Spirit Type Changed: " + W.Spirit_type + " -> " + tmp_type)
- W.Spirit_type = tmp_type
- }
+ W.Spirit_type = tmp_type
+ }
+ if "Mezcal" == tmp_type {
+ if tmp_type != W.Spirit_type {
+ W.Trace("Spirit Type Changed: " + W.Spirit_type + " -> " + tmp_type)
W.Spirit_type = tmp_type
- case "Brandy":
- if tmp_type != W.Spirit_type {
- W.Trace("Spirit Type Changed: " + W.Spirit_type + " -> " + tmp_type)
- W.Spirit_type = tmp_type
- }
+ }
+ W.Spirit_type = tmp_type
+ }
+ if "Baijiu" == tmp_type {
+ if tmp_type != W.Spirit_type {
+ W.Trace("Spirit Type Changed: " + W.Spirit_type + " -> " + tmp_type)
W.Spirit_type = tmp_type
- case "Sherry":
- if tmp_type != W.Spirit_type {
- W.Trace("Spirit Type Changed: " + W.Spirit_type + " -> " + tmp_type)
- W.Spirit_type = tmp_type
- }
+ }
+ W.Spirit_type = tmp_type
+ }
+ if "Absinth" == tmp_type {
+ if tmp_type != W.Spirit_type {
+ W.Trace("Spirit Type Changed: " + W.Spirit_type + " -> " + tmp_type)
W.Spirit_type = tmp_type
- case "Likör":
- retest_type := detect_spirit_type(W.Name)
- if "Likör" != retest_type && "Verschiedenes" != retest_type {
- if tmp_type != W.Spirit_type {
- W.Trace("Spirit Type Changed: " + W.Spirit_type + " -> " + retest_type)
- W.Spirit_type = tmp_type
- }
- W.Spirit_type = detect_spirit_type(W.Name)
- }
- if "Tequila" == tmp_type {
- if tmp_type != W.Spirit_type {
- W.Trace("Spirit Type Changed: " + W.Spirit_type + " -> " + tmp_type)
- W.Spirit_type = tmp_type
- }
- W.Spirit_type = tmp_type
- }
- if "Mezcal" == tmp_type {
- if tmp_type != W.Spirit_type {
- W.Trace("Spirit Type Changed: " + W.Spirit_type + " -> " + tmp_type)
- W.Spirit_type = tmp_type
- }
- W.Spirit_type = tmp_type
- }
- if "Baijiu" == tmp_type {
- if tmp_type != W.Spirit_type {
- W.Trace("Spirit Type Changed: " + W.Spirit_type + " -> " + tmp_type)
- W.Spirit_type = tmp_type
- }
- W.Spirit_type = tmp_type
- }
- if "Absinth" == tmp_type {
- if tmp_type != W.Spirit_type {
- W.Trace("Spirit Type Changed: " + W.Spirit_type + " -> " + tmp_type)
- W.Spirit_type = tmp_type
- }
- W.Spirit_type = tmp_type
- }
}
-
+ W.Spirit_type = tmp_type
}
-
- prev = ""
}
- count++
- })
- })
-
- c.OnHTML("body", func(e *colly.HTMLElement) {
- W.Website = string(e.Response.Body)
- })
-
- err = c.Visit(W.Url)
- if err != nil {
- shop.error_msg = err.Error()
- shop.error_ctx = W.Url
- shop.Warn("Crawling failed")
- } else {
- //log.Println("Visit " + W.Url)
+ }
}
-
- //W.Debug("DEBUG OFFER")
+ W.Debug("DEBUG OFFER")
Offers = append(Offers, W)
}