diff options
| author | horus | 2018-09-16 19:01:35 +0200 |
|---|---|---|
| committer | horus | 2018-09-16 19:01:35 +0200 |
| commit | 6261e6b0115997af9e50c3a586c982aa23f8c6f9 (patch) | |
| tree | 154b5f6e14a3080e7d54c7595ed8f154d303b8e0 /crawler/shop_rumundco.go | |
| parent | 20daade949655b9fa07195b7140964dd10005f23 (diff) | |
| download | alkobote-6261e6b0115997af9e50c3a586c982aa23f8c6f9.tar.gz | |
Enhances crawler for Rum & Co. (crawler)
Diffstat (limited to 'crawler/shop_rumundco.go')
| -rw-r--r-- | crawler/shop_rumundco.go | 53 |
1 files changed, 46 insertions, 7 deletions
diff --git a/crawler/shop_rumundco.go b/crawler/shop_rumundco.go index 275e60a..01e6fe0 100644 --- a/crawler/shop_rumundco.go +++ b/crawler/shop_rumundco.go @@ -1,17 +1,28 @@ package main import ( + "net/url" "regexp" "strings" // "github.com/PuerkitoBio/goquery" + log "github.com/Sirupsen/logrus" "github.com/gocolly/colly" ) func (app *App) ScrapeRumundCo(shop Shop) []Angebot { - // kf=29 means Whisky - Shop_url := "https://www.rumundco.de/navi.php?q=4&kf=29&kk-suesse-von=0&kk-suesse-bis=100&kk-milde-von=0&kk-milde-bis=100&kk-wuerze-von=0&kk-wuerze-bis=100&kk-frucht-von=0&kk-frucht-bis=100&kk-torf-von=0&kk-torf-bis=100&hf=0&af=90&Sortierung=11&a=350" + /* + * kf=29 means Whisky + * kf=63 means Gin + * kf=92 means Tequila + * kf=8 means Rum + */ + Shop_urls := []string{"https://www.rumundco.de/navi.php?q=4&kf=29&kk-suesse-von=0&kk-suesse-bis=100&kk-milde-von=0&kk-milde-bis=100&kk-wuerze-von=0&kk-wuerze-bis=100&kk-frucht-von=0&kk-frucht-bis=100&kk-torf-von=0&kk-torf-bis=100&hf=0&af=90&Sortierung=11&a=350", + "https://www.rumundco.de/navi.php?q=4&kf=63&kk-suesse-von=0&kk-suesse-bis=100&kk-milde-von=0&kk-milde-bis=100&kk-wuerze-von=0&kk-wuerze-bis=100&kk-frucht-von=0&kk-frucht-bis=100&kk-torf-von=0&kk-torf-bis=100&hf=0&af=90&Sortierung=11&a=350", + "https://www.rumundco.de/navi.php?q=4&kf=92&kk-suesse-von=0&kk-suesse-bis=100&kk-milde-von=0&kk-milde-bis=100&kk-wuerze-von=0&kk-wuerze-bis=100&kk-frucht-von=0&kk-frucht-bis=100&kk-torf-von=0&kk-torf-bis=100&hf=0&af=90&Sortierung=11&a=350", + "https://www.rumundco.de/navi.php?q=4&kf=8&kk-suesse-von=0&kk-suesse-bis=100&kk-milde-von=0&kk-milde-bis=100&kk-wuerze-von=0&kk-wuerze-bis=100&kk-frucht-von=0&kk-frucht-bis=100&kk-torf-von=0&kk-torf-bis=100&hf=0&af=290&Sortierung=11&a=350", + } Whiskys := []Angebot{} @@ -19,14 +30,38 @@ func (app *App) ScrapeRumundCo(shop Shop) []Angebot { c.OnHTML(".product-teaser", func(e *colly.HTMLElement) { - if e.Request.URL.String() != Shop_url { + if !stringInSlice(e.Request.URL.String(), Shop_urls) { return } W := Angebot{} W.Shop = shop.Id - W.Spirit_type = "Whisky" + + // spirit type is encoded in "kf" param + param, err := url.ParseQuery(e.Request.URL.RawQuery) + if err != nil { + W.error_msg = "Rum & Co: Parsing Query from Shop-URL failed" + W.error_ctx = e.Request.URL.String() + PrintlnOffer(W, "Rum & Co: Parsing Query from Shop-URL failed") + } + switch param["kf"][0] { + case "29": + W.Spirit_type = "Whisky" + case "63": + W.Spirit_type = "Gin" + case "92": + W.Spirit_type = "Tequila" + case "8": + W.Spirit_type = "Rum" + default: + W.error_msg = "Rum & Co: Query parameter has unexpected value" + W.error_ctx = param["kf"][0] + W.Url = e.Request.URL.String() + PrintlnOffer(W, "Rum & Co: Detecting spirit type failed") + } + + log.Debug("Rum & Co: Crawling " + W.Spirit_type + " with param kf=" + param["kf"][0]) whisky_name := strings.TrimPrefix(e.ChildAttr("img", "alt"), "Restposten: ") whisky_url := "https://www.rumundco.de/" + e.ChildAttr("a", "href") @@ -38,6 +73,8 @@ func (app *App) ScrapeRumundCo(shop Shop) []Angebot { if !matched { W.error_msg = "Rum & Co: Offer not available" W.error_ctx = e.ChildText(".delivery-status") + W.Url = whisky_url + W.Name = whisky_name PrintlnOffer(W, "Rum & Co: Offer not available") return } @@ -159,9 +196,11 @@ func (app *App) ScrapeRumundCo(shop Shop) []Angebot { e.Request.Ctx.Put("website", string(e.Response.Body)) }) - err := c.Visit(Shop_url) - if err != nil { - Warn(nil, shop.Name+": "+err.Error()) + for _, url := range Shop_urls { + err := c.Visit(url) + if err != nil { + Warn(nil, shop.Name+": "+err.Error()) + } } return Whiskys |
