diff options
| author | Maximilian Möhring | 2020-07-28 15:13:06 +0200 |
|---|---|---|
| committer | Maximilian Möhring | 2020-07-28 16:02:59 +0200 |
| commit | 8e65241d1cc928e79e06adf66ce3e9c32c17fa49 (patch) | |
| tree | 0f6b589645614aead99e4d1b4a58d61179826bf1 /crawler | |
| parent | d366ade036ba8889654e5df86d6e2d5072e9f4d4 (diff) | |
| download | alkobote-8e65241d1cc928e79e06adf66ce3e9c32c17fa49.tar.gz | |
Fix d12 to use new api. (crawler)
Diffstat (limited to 'crawler')
| -rw-r--r-- | crawler/shop_drankdozijn.go | 330 |
1 files changed, 167 insertions, 163 deletions
diff --git a/crawler/shop_drankdozijn.go b/crawler/shop_drankdozijn.go index f66ac78..7a9e786 100644 --- a/crawler/shop_drankdozijn.go +++ b/crawler/shop_drankdozijn.go @@ -2,11 +2,12 @@ package main import ( "encoding/json" + "fmt" "io/ioutil" "net/http" - "strings" + //"strings" - "github.com/gocolly/colly" + //"github.com/gocolly/colly" log "github.com/sirupsen/logrus" ) @@ -17,7 +18,8 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot { /** * Parse the API. */ - API_URL := "https://api.drankdozijn.nl/sale-products?country=DE&language=de" + API_URL := "https://es-api.drankdozijn.nl/sale-products?country=DE&language=de" + API_URL_PRODUCT := "https://es-api.drankdozijn.nl/product?country=DE&language=de&page_template=artikel&alias=" IMAGE_URL := "https://res-2.cloudinary.com/boozeboodcdn/image/upload/e_trim:10/c_pad,g_south,h_400,w_280/c_limit,h_910,w_280/f_auto,q_auto:best/v1/" http_client := http.Client{} @@ -29,7 +31,7 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot { } req.Header.Set("accept", "application/json") - req.Header.Set("User-Agent", "") + req.Header.Set("User-Agent", "like googlebot") api_resp, err := http_client.Do(req) if err != nil { @@ -42,51 +44,83 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot { // TODO panic(err) } + //log.Println("%v\n", string(api_body)) - var tmp_api_map map[string]interface{} + type api_products struct { + Alias string + Images []string + } + type api_product_group struct { + Description string + } + type api_offer struct { + Type string + Price float64 + SalePrice float64 + SaleDescription string + Group map[string]interface{} + ProductGroup api_product_group + Products []api_products + } + type Value struct { + Alias string `json:"alias"` + Description string `json:"description"` + } + type Feature struct { + Alias string `json:"alias"` + Description string `json:"description"` + Id int `json:"id"` + Value Value `json:"value"` + ShownOnPage bool `json:"shownOnPage"` + } + type product_details struct { + FormatedPrice string `json:"formatedPrice"` + FormatedSalePrice string `json:"formatedSalePrice"` + Description string `json:"description"` + Features []Feature `json:"features` + } - err = json.Unmarshal(api_body, &tmp_api_map) + var offers []api_offer + //err = json.Unmarshal(api_body, &tmp_api_map) + err = json.Unmarshal(api_body, &offers) if err != nil { // TODO - log.Println("json unmarshal failed") + log.Println("offers json unmarshal failed") + log.Printf("%+v\n", string(api_body)) panic(err) } - for _, value := range tmp_api_map { - - api_data := value.(map[string]interface{}) - - if api_data["type"] != "offer" { - continue - } + for _, api_data := range offers { W := Angebot{} W.Shop = shop.Id - W.Name = api_data["saleDescription"].(string) + W.Name = api_data.SaleDescription - tmp_desc := api_data["group"].(map[string]interface{}) - - tmp_spirit_type := tmp_desc["description"].(string) + tmp_spirit_type := api_data.ProductGroup.Description if "Bier" == tmp_spirit_type { W.Debug("Drankdozijn: skip offer because it's beer") continue } - W.Spirit_type = detect_spirit_type(tmp_desc["description"].(string)) + W.Spirit_type = detect_spirit_type(tmp_spirit_type) - if v, _ := api_data["price"]; v == nil { + if api_data.Price == 0 { + //log.Println("%v\n", api_data["price"]) + //log.Println("%v\n", api_data) W.Debug("Drankdozijn: price is nil -> skip offer") continue } - W.Original_price, err = convert_price(api_data["price"].(string)) + W.Original_price, err = convert_price(fmt.Sprintf("%.2f", api_data.Price)) if err != nil { // TODO panic(err) } - W.Discounted_price, err = convert_price(api_data["salePrice"].(string)) + log.Println(W.Original_price) + + W.Discounted_price, err = convert_price(fmt.Sprintf("%.2f", api_data.SalePrice)) if err != nil { // TODO panic(err) @@ -98,171 +132,141 @@ func (app *App) ScrapeDrankdozijn(shop Shop) []Angebot { } // Offer URL - tmp_offer_url_img_map := api_data["products"].(map[string]interface{}) + //tmp_offer_url_img_map := api_data.Products["products"].(map[string]interface{}) + tmp_offer_product_map := api_data.Products // Check for bundle offer - if len(tmp_offer_url_img_map) > 1 { + if len(tmp_offer_product_map) > 1 { W.Debug("Drankdozijn: Skip Offer because of bundle") continue } - for _, v := range tmp_offer_url_img_map { - tmp_map := v.(map[string]interface{}) - W.Url = "https://drankdozijn.de/artikel/" + (tmp_map["alias"]).(string) - tmp_image_map := tmp_map["images"].([]interface{}) - W.Image_url = IMAGE_URL + tmp_image_map[0].(string) + var alias string + for _, v := range tmp_offer_product_map { + W.Url = "https://drankdozijn.de/artikel/" + v.Alias + //tmp_image_map := tmp_map["images"].([]interface{}) + //W.Image_url = IMAGE_URL + tmp_image_map[0].(string) + W.Image_url = IMAGE_URL + v.Images[0] + alias = v.Alias } - c := app.customCollector([]string{"drankdozijn.de", "drankdozijn.nl"}) - - c.OnHTML("span.product_title", func(e *colly.HTMLElement) { - - Trace(nil, "D12: On span.product_title: "+e.Text) - - if e.Text != W.Name { - W.Trace("Name changed from: " + W.Name + " to " + e.Text) - W.Name = e.Text - } + req, err := http.NewRequest(http.MethodGet, API_URL_PRODUCT+alias, nil) + if err != nil { + // TODO + panic(err) + } - }) + req.Header.Set("accept", "application/json") + req.Header.Set("User-Agent", "like googlebot") - c.OnHTML(".product_top", func(e *colly.HTMLElement) { + api_resp, err := http_client.Do(req) + if err != nil { + // TODO + panic(err) + } - if strings.Contains(W.Name, "+ gratis") || strings.Contains(W.Name, "& gratis") { - W.Debug("Drankdozijn: Skip Offer because it contains gratis ware") - return - } + api_body, err := ioutil.ReadAll(api_resp.Body) + if err != nil { + // TODO + panic(err) + } + var d12_product product_details + err = json.Unmarshal(api_body, &d12_product) + if err != nil { + // TODO + log.Println("product_details json unmarshal failed") + log.Printf("%+v\n", string(api_body)) + panic(err) + } - e.Request.Visit(W.Url) + W.Base_price, err = convert_price(d12_product.FormatedPrice) + if err != nil { + // TODO + log.Println("converting price from product_details failed") + panic(err) + } - }) + for _, v := range d12_product.Features { + if v.Description == "Alkoholgehalt" { + W.Abv, err = extract_abv(v.Value.Description) + if err != nil { + log.Println("extracting abv failed") + } + } else if v.Description == "Inhalt" { + W.Volume, err = extract_volume(v.Value.Description) + if err != nil { + log.Println("extracting volume failed") + } + } else if v.Description == "Kategorie" { + tmp_type := detect_spirit_type(v.Value.Description) + if "Champagner" == tmp_type { + if tmp_type != W.Spirit_type { + W.Trace("Spirit Type Changed: " + W.Spirit_type + " -> " + tmp_type) + W.Spirit_type = tmp_type + } + W.Spirit_type = tmp_type + } - c.OnHTML(".main_price", func(e *colly.HTMLElement) { - W.Base_price, err = convert_price(e.ChildText(".price_l")) - if err != nil { - W.error_msg = err.Error() - W.error_ctx = e.ChildText(".price_l") - W.Println("Drankdozijn: Converting base price failed") - return - } - }) - - c.OnHTML(".row .main_description", func(e *colly.HTMLElement) { - prev := "" - count := 0 - e.ForEach(".col-xs-6", func(i int, e *colly.HTMLElement) { - if count%2 == 0 { - prev = e.Text - } else { - switch strings.TrimSpace(prev) { - case "Inhalt", "Inhoud": - W.Volume, err = extract_volume(e.Text) - if W.Volume == 0 { - W.error_msg = e.Text - W.error_ctx = err.Error() - W.Println("Drankdozijn: Volume is zero, returning") - return - } - case "Alkoholgehalt", "Alcoholpercentage": - W.Abv, err = extract_abv(e.Text) - if W.Abv == 0 { - W.error_msg = "Drankdozijn: Abv is zero" - W.error_ctx = err.Error() - W.Println("Drankdozijn: abv is zero") - return + switch W.Spirit_type { + case "Cognac": + if tmp_type != W.Spirit_type { + W.Trace("Spirit Type Changed: " + W.Spirit_type + " -> " + tmp_type) + W.Spirit_type = tmp_type + } + W.Spirit_type = tmp_type + case "Brandy": + if tmp_type != W.Spirit_type { + W.Trace("Spirit Type Changed: " + W.Spirit_type + " -> " + tmp_type) + W.Spirit_type = tmp_type + } + W.Spirit_type = tmp_type + case "Sherry": + if tmp_type != W.Spirit_type { + W.Trace("Spirit Type Changed: " + W.Spirit_type + " -> " + tmp_type) + W.Spirit_type = tmp_type + } + W.Spirit_type = tmp_type + case "Likör": + retest_type := detect_spirit_type(tmp_type) + if "Likör" != retest_type && "Verschiedenes" != retest_type { + if tmp_type != W.Spirit_type { + W.Trace("Spirit Type Changed: " + W.Spirit_type + " -> " + retest_type) + W.Spirit_type = tmp_type } - case "Kategorie", "Categorie": - tmp_type := e.Text - tmp_type = detect_spirit_type(tmp_type) - - if "Champagner" == tmp_type { - if tmp_type != W.Spirit_type { - W.Trace("Spirit Type Changed: " + W.Spirit_type + " -> " + tmp_type) - W.Spirit_type = tmp_type - } + W.Spirit_type = detect_spirit_type(W.Name) + } + if "Tequila" == tmp_type { + if tmp_type != W.Spirit_type { + W.Trace("Spirit Type Changed: " + W.Spirit_type + " -> " + tmp_type) W.Spirit_type = tmp_type } - - switch W.Spirit_type { - case "Cognac": - if tmp_type != W.Spirit_type { - W.Trace("Spirit Type Changed: " + W.Spirit_type + " -> " + tmp_type) - W.Spirit_type = tmp_type - } + W.Spirit_type = tmp_type + } + if "Mezcal" == tmp_type { + if tmp_type != W.Spirit_type { + W.Trace("Spirit Type Changed: " + W.Spirit_type + " -> " + tmp_type) W.Spirit_type = tmp_type - case "Brandy": - if tmp_type != W.Spirit_type { - W.Trace("Spirit Type Changed: " + W.Spirit_type + " -> " + tmp_type) - W.Spirit_type = tmp_type - } + } + W.Spirit_type = tmp_type + } + if "Baijiu" == tmp_type { + if tmp_type != W.Spirit_type { + W.Trace("Spirit Type Changed: " + W.Spirit_type + " -> " + tmp_type) W.Spirit_type = tmp_type - case "Sherry": - if tmp_type != W.Spirit_type { - W.Trace("Spirit Type Changed: " + W.Spirit_type + " -> " + tmp_type) - W.Spirit_type = tmp_type - } + } + W.Spirit_type = tmp_type + } + if "Absinth" == tmp_type { + if tmp_type != W.Spirit_type { + W.Trace("Spirit Type Changed: " + W.Spirit_type + " -> " + tmp_type) W.Spirit_type = tmp_type - case "Likör": - retest_type := detect_spirit_type(W.Name) - if "Likör" != retest_type && "Verschiedenes" != retest_type { - if tmp_type != W.Spirit_type { - W.Trace("Spirit Type Changed: " + W.Spirit_type + " -> " + retest_type) - W.Spirit_type = tmp_type - } - W.Spirit_type = detect_spirit_type(W.Name) - } - if "Tequila" == tmp_type { - if tmp_type != W.Spirit_type { - W.Trace("Spirit Type Changed: " + W.Spirit_type + " -> " + tmp_type) - W.Spirit_type = tmp_type - } - W.Spirit_type = tmp_type - } - if "Mezcal" == tmp_type { - if tmp_type != W.Spirit_type { - W.Trace("Spirit Type Changed: " + W.Spirit_type + " -> " + tmp_type) - W.Spirit_type = tmp_type - } - W.Spirit_type = tmp_type - } - if "Baijiu" == tmp_type { - if tmp_type != W.Spirit_type { - W.Trace("Spirit Type Changed: " + W.Spirit_type + " -> " + tmp_type) - W.Spirit_type = tmp_type - } - W.Spirit_type = tmp_type - } - if "Absinth" == tmp_type { - if tmp_type != W.Spirit_type { - W.Trace("Spirit Type Changed: " + W.Spirit_type + " -> " + tmp_type) - W.Spirit_type = tmp_type - } - W.Spirit_type = tmp_type - } } - + W.Spirit_type = tmp_type } - - prev = "" } - count++ - }) - }) - - c.OnHTML("body", func(e *colly.HTMLElement) { - W.Website = string(e.Response.Body) - }) - - err = c.Visit(W.Url) - if err != nil { - shop.error_msg = err.Error() - shop.error_ctx = W.Url - shop.Warn("Crawling failed") - } else { - //log.Println("Visit " + W.Url) + } } - - //W.Debug("DEBUG OFFER") + W.Debug("DEBUG OFFER") Offers = append(Offers, W) } |
