diff options
| author | Maximilian Möhring | 2021-01-12 06:25:08 +0100 |
|---|---|---|
| committer | Maximilian Möhring | 2021-01-12 06:25:08 +0100 |
| commit | 2c038e851a23141f5cae470d3f07f4de5d04eed1 (patch) | |
| tree | 5ce59839f79fc7b0043c4b18c7fe5f7ae5a671f4 /crawler | |
| parent | f3e644bf500ed327f122ae726ed2d16ca74d373d (diff) | |
| download | alkobote-2c038e851a23141f5cae470d3f07f4de5d04eed1.tar.gz | |
Add new shop Spirituosen-Wolf.
Diffstat (limited to 'crawler')
| -rw-r--r-- | crawler/shop_spirituosenwolf.de.go | 223 | ||||
| -rw-r--r-- | crawler/shops.go | 11 |
2 files changed, 234 insertions, 0 deletions
diff --git a/crawler/shop_spirituosenwolf.de.go b/crawler/shop_spirituosenwolf.de.go new file mode 100644 index 0000000..3fc04b1 --- /dev/null +++ b/crawler/shop_spirituosenwolf.de.go @@ -0,0 +1,223 @@ +package main + +import ( + "bytes" + "encoding/json" + "io/ioutil" + "net/http" + "strings" + + //"github.com/gocolly/colly" + "github.com/PuerkitoBio/goquery" + log "github.com/sirupsen/logrus" +) + +func (app *App) ScrapeSpirituosenWolf(shop Shop) []Angebot { + + Offers := []Angebot{} + + /** + * Parse the API. + */ + API_URL := "https://www.spirituosen-wolf.de/widgets/listing/listingCount/sCategory/466?p=1&n=100&c=466&o=7&loadProducts=1" + + http_client := http.Client{} + + req, err := http.NewRequest(http.MethodGet, API_URL, nil) + if err != nil { + // TODO + panic(err) + } + + req.Header.Set("accept", "application/json") + req.Header.Set("User-Agent", "like googlebot") + + api_resp, err := http_client.Do(req) + if err != nil { + // TODO + panic(err) + } + + api_body, err := ioutil.ReadAll(api_resp.Body) + if err != nil { + // TODO + panic(err) + } + //log.Println("%v\n", string(api_body)) + + type api struct { + TotalCount int `json:"totalCount"` + Listing string `json:"listing"` + Pagination string `json:"pagination"` + } + + var api_val api + err = json.Unmarshal(api_body, &api_val) + if err != nil { + log.Println("Wolf: offers json unmarshal failed") + log.Printf("%+s\n", string(api_body)) + panic(err) + } + + html, err := goquery.NewDocumentFromReader(bytes.NewBuffer([]byte(api_val.Listing))) + if err != nil { + log.Fatal(err) + } + + html.Find(".product--box.box--basic").Each(func(count int, doc *goquery.Selection) { + offer := Angebot{} + + offer.Shop = shop.Id + + doc.Find(".sw_frontend_listing_box_article_imagewrapper").Each(func(i int, s *goquery.Selection) { + // Url + offer.Url, _ = s.Find("a").Attr("href") + + // Image_url + _srcset, _ := s.Find("img").Attr("srcset") + srcset := strings.Split(_srcset, ",") + for _, image_url := range srcset { + if strings.Contains(image_url, "@2x") { + offer.Image_url = image_url + break + } + } + + // Name + offer.Name, _ = s.Find("img").Attr("alt") + }) + + doc.Find(".sw_frontend_listing_box_article_priceinfo_alcvol").Each(func(i int, s *goquery.Selection) { + // Abv + offer.Abv, err = extract_abv(strings.TrimSpace(s.Text())) + if err != nil { + log.Println("Wolf: Extracting Abv failed") + } + }) + + doc.Find(".sw_frontend_listing_box_article_priceinfo_qty").Each(func(i int, s *goquery.Selection) { + // Volume + offer.Volume, err = extract_volume(strings.TrimSpace(strings.TrimPrefix("Inhalt:", strings.TrimSpace(s.Text())))) + if err != nil { + log.Println("Wolf: Extracting volume failed") + } + }) + + doc.Find(".sw_frontend_listing_box_article_price_pseudo").Each(func(i int, s *goquery.Selection) { + // Original_price + offer.Original_price, err = convert_price(strings.TrimSpace(s.Text())) + if err != nil { + log.Println("Wolf: Converting original_price failed") + } + }) + + doc.Find(".sw_frontend_listing_box_article_price_default").Each(func(i int, s *goquery.Selection) { + // Discounted_price + offer.Discounted_price, err = convert_price(strings.TrimSpace(s.Text())) + if err != nil { + log.Println("Wolf: Converting Discounted_price failed") + } + }) + + doc.Find(".sw_frontend_listing_box_article_priceinfo_baseprice").Each(func(i int, s *goquery.Selection) { + // Base_price + offer.Base_price, err = sanitize_base_price(strings.TrimSpace(s.Text())) + if err != nil { + log.Println("Wolf: Sanitizing Base_price failed") + } + + }) + + // detect spirit_type + offer.Spirit_type = detect_spirit_type(offer.Name) + if offer.Spirit_type == "Verschiedenes" { + offer.Spirit_type = WolfGetSpiritTypeFromUrl(offer.Url) + } + + Offers = append(Offers, offer) + }) + + return Offers +} + +func WolfGetSpiritTypeFromUrl(url string) string { + + url = strings.TrimPrefix("https://www.spirituosen-wolf.de/", url) + subfolders := strings.Split(url, "/") + + for index, folder := range subfolders { + // match Gin, Whisky, R + switch folder { + case "whisk-e-y": + return "Whisky" + + case "gin-co": + if subfolders[1] == "genever" { + return "Genever" + } + return "Gin" + + case "likoer": + return "Likör" + + case "rum-cachaca", "rum-co": + return "Rum" + + case "champagner-co": + switch subfolders[1] { + case "champagner": + return "Champagner" + case "Cremant": + return "Cremant" + default: + return "Sekt" + } + + case "wodka-vodka": + return "Wodka" + + case "weine": + return "Wein" + + case "grappa-korn-braende": + switch subfolders[index+1] { + case "grappa": + return "Grappa" + + case "korn": + return "Korn" + default: + return "Verschiedenes" + } + + case "brandy-cognac-co": + switch subfolders[index+1] { + case "cognac": + return "Cognac" + case "brandy": + return "Brandy" + case "calvados": + return "Calvados" + case "armagnac": + return "Armagnac" + } + + case "cachaca": + return "Cachaca" + + case "Absinth": + return "Absinth" + + case "mezcal": + return "Mezcal" + + case "Tequila": + return "Tequila" + + default: + return "Verschiedenes" + } + + } + return "Verschiedenes" +} diff --git a/crawler/shops.go b/crawler/shops.go index 656cd1f..73def99 100644 --- a/crawler/shops.go +++ b/crawler/shops.go @@ -2,6 +2,8 @@ package main import ( "strings" + + log "github.com/sirupsen/logrus" ) func (app *App) insertShops() error { @@ -96,6 +98,14 @@ func getShopsFromStruct() []Shop { Shipping_costs: 595, Free_shipping: "250€", }) + Shops = append(Shops, Shop{ + Name: "Spirituosen-Wolf", + Url: "https://www.spirituosen-wolf.de", + Short_url: "https://l.fuselkoenig.de/spirituosenwolf", + Logo_url: "", + Shipping_costs: 595, + Free_shipping: "", + }) return Shops } @@ -116,6 +126,7 @@ func (app *App) getShops() ([]Shop, error) { if excludeShopIDs != "" { shop_query = " WHERE id NOT IN (" + excludeShopIDs + ")" } + log.Info("getShops: Exclude following shop ids: " + excludeShopIDs) } query := `SELECT id,name,short_url,url,logo_url,shipping_costs,free_shipping FROM shop ` + shop_query |
