summaryrefslogtreecommitdiff
path: root/crawler
diff options
context:
space:
mode:
Diffstat (limited to 'crawler')
-rw-r--r--crawler/shop_spirituosenwolf.de.go223
-rw-r--r--crawler/shops.go11
2 files changed, 234 insertions, 0 deletions
diff --git a/crawler/shop_spirituosenwolf.de.go b/crawler/shop_spirituosenwolf.de.go
new file mode 100644
index 0000000..3fc04b1
--- /dev/null
+++ b/crawler/shop_spirituosenwolf.de.go
@@ -0,0 +1,223 @@
+package main
+
+import (
+ "bytes"
+ "encoding/json"
+ "io/ioutil"
+ "net/http"
+ "strings"
+
+ //"github.com/gocolly/colly"
+ "github.com/PuerkitoBio/goquery"
+ log "github.com/sirupsen/logrus"
+)
+
+func (app *App) ScrapeSpirituosenWolf(shop Shop) []Angebot {
+
+ Offers := []Angebot{}
+
+ /**
+ * Parse the API.
+ */
+ API_URL := "https://www.spirituosen-wolf.de/widgets/listing/listingCount/sCategory/466?p=1&n=100&c=466&o=7&loadProducts=1"
+
+ http_client := http.Client{}
+
+ req, err := http.NewRequest(http.MethodGet, API_URL, nil)
+ if err != nil {
+ // TODO
+ panic(err)
+ }
+
+ req.Header.Set("accept", "application/json")
+ req.Header.Set("User-Agent", "like googlebot")
+
+ api_resp, err := http_client.Do(req)
+ if err != nil {
+ // TODO
+ panic(err)
+ }
+
+ api_body, err := ioutil.ReadAll(api_resp.Body)
+ if err != nil {
+ // TODO
+ panic(err)
+ }
+ //log.Println("%v\n", string(api_body))
+
+ type api struct {
+ TotalCount int `json:"totalCount"`
+ Listing string `json:"listing"`
+ Pagination string `json:"pagination"`
+ }
+
+ var api_val api
+ err = json.Unmarshal(api_body, &api_val)
+ if err != nil {
+ log.Println("Wolf: offers json unmarshal failed")
+ log.Printf("%+s\n", string(api_body))
+ panic(err)
+ }
+
+ html, err := goquery.NewDocumentFromReader(bytes.NewBuffer([]byte(api_val.Listing)))
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ html.Find(".product--box.box--basic").Each(func(count int, doc *goquery.Selection) {
+ offer := Angebot{}
+
+ offer.Shop = shop.Id
+
+ doc.Find(".sw_frontend_listing_box_article_imagewrapper").Each(func(i int, s *goquery.Selection) {
+ // Url
+ offer.Url, _ = s.Find("a").Attr("href")
+
+ // Image_url
+ _srcset, _ := s.Find("img").Attr("srcset")
+ srcset := strings.Split(_srcset, ",")
+ for _, image_url := range srcset {
+ if strings.Contains(image_url, "@2x") {
+ offer.Image_url = image_url
+ break
+ }
+ }
+
+ // Name
+ offer.Name, _ = s.Find("img").Attr("alt")
+ })
+
+ doc.Find(".sw_frontend_listing_box_article_priceinfo_alcvol").Each(func(i int, s *goquery.Selection) {
+ // Abv
+ offer.Abv, err = extract_abv(strings.TrimSpace(s.Text()))
+ if err != nil {
+ log.Println("Wolf: Extracting Abv failed")
+ }
+ })
+
+ doc.Find(".sw_frontend_listing_box_article_priceinfo_qty").Each(func(i int, s *goquery.Selection) {
+ // Volume
+ offer.Volume, err = extract_volume(strings.TrimSpace(strings.TrimPrefix("Inhalt:", strings.TrimSpace(s.Text()))))
+ if err != nil {
+ log.Println("Wolf: Extracting volume failed")
+ }
+ })
+
+ doc.Find(".sw_frontend_listing_box_article_price_pseudo").Each(func(i int, s *goquery.Selection) {
+ // Original_price
+ offer.Original_price, err = convert_price(strings.TrimSpace(s.Text()))
+ if err != nil {
+ log.Println("Wolf: Converting original_price failed")
+ }
+ })
+
+ doc.Find(".sw_frontend_listing_box_article_price_default").Each(func(i int, s *goquery.Selection) {
+ // Discounted_price
+ offer.Discounted_price, err = convert_price(strings.TrimSpace(s.Text()))
+ if err != nil {
+ log.Println("Wolf: Converting Discounted_price failed")
+ }
+ })
+
+ doc.Find(".sw_frontend_listing_box_article_priceinfo_baseprice").Each(func(i int, s *goquery.Selection) {
+ // Base_price
+ offer.Base_price, err = sanitize_base_price(strings.TrimSpace(s.Text()))
+ if err != nil {
+ log.Println("Wolf: Sanitizing Base_price failed")
+ }
+
+ })
+
+ // detect spirit_type
+ offer.Spirit_type = detect_spirit_type(offer.Name)
+ if offer.Spirit_type == "Verschiedenes" {
+ offer.Spirit_type = WolfGetSpiritTypeFromUrl(offer.Url)
+ }
+
+ Offers = append(Offers, offer)
+ })
+
+ return Offers
+}
+
+func WolfGetSpiritTypeFromUrl(url string) string {
+
+ url = strings.TrimPrefix("https://www.spirituosen-wolf.de/", url)
+ subfolders := strings.Split(url, "/")
+
+ for index, folder := range subfolders {
+ // match Gin, Whisky, R
+ switch folder {
+ case "whisk-e-y":
+ return "Whisky"
+
+ case "gin-co":
+ if subfolders[1] == "genever" {
+ return "Genever"
+ }
+ return "Gin"
+
+ case "likoer":
+ return "Likör"
+
+ case "rum-cachaca", "rum-co":
+ return "Rum"
+
+ case "champagner-co":
+ switch subfolders[1] {
+ case "champagner":
+ return "Champagner"
+ case "Cremant":
+ return "Cremant"
+ default:
+ return "Sekt"
+ }
+
+ case "wodka-vodka":
+ return "Wodka"
+
+ case "weine":
+ return "Wein"
+
+ case "grappa-korn-braende":
+ switch subfolders[index+1] {
+ case "grappa":
+ return "Grappa"
+
+ case "korn":
+ return "Korn"
+ default:
+ return "Verschiedenes"
+ }
+
+ case "brandy-cognac-co":
+ switch subfolders[index+1] {
+ case "cognac":
+ return "Cognac"
+ case "brandy":
+ return "Brandy"
+ case "calvados":
+ return "Calvados"
+ case "armagnac":
+ return "Armagnac"
+ }
+
+ case "cachaca":
+ return "Cachaca"
+
+ case "Absinth":
+ return "Absinth"
+
+ case "mezcal":
+ return "Mezcal"
+
+ case "Tequila":
+ return "Tequila"
+
+ default:
+ return "Verschiedenes"
+ }
+
+ }
+ return "Verschiedenes"
+}
diff --git a/crawler/shops.go b/crawler/shops.go
index 656cd1f..73def99 100644
--- a/crawler/shops.go
+++ b/crawler/shops.go
@@ -2,6 +2,8 @@ package main
import (
"strings"
+
+ log "github.com/sirupsen/logrus"
)
func (app *App) insertShops() error {
@@ -96,6 +98,14 @@ func getShopsFromStruct() []Shop {
Shipping_costs: 595,
Free_shipping: "250€",
})
+ Shops = append(Shops, Shop{
+ Name: "Spirituosen-Wolf",
+ Url: "https://www.spirituosen-wolf.de",
+ Short_url: "https://l.fuselkoenig.de/spirituosenwolf",
+ Logo_url: "",
+ Shipping_costs: 595,
+ Free_shipping: "",
+ })
return Shops
}
@@ -116,6 +126,7 @@ func (app *App) getShops() ([]Shop, error) {
if excludeShopIDs != "" {
shop_query = " WHERE id NOT IN (" + excludeShopIDs + ")"
}
+ log.Info("getShops: Exclude following shop ids: " + excludeShopIDs)
}
query := `SELECT id,name,short_url,url,logo_url,shipping_costs,free_shipping FROM shop ` + shop_query