diff options
| author | Max | 2018-02-08 16:07:46 +0100 |
|---|---|---|
| committer | Max | 2018-02-08 16:07:46 +0100 |
| commit | a418c52123969b01c37bafd67ec226410211cccf (patch) | |
| tree | 3df058820a705f5d3fd3867432fd693d4322d751 /crawler/shop_bottleworld.go | |
| parent | ca8db86baaa367e3ec0af2c68ec63d21ae3b6190 (diff) | |
| download | alkobote-a418c52123969b01c37bafd67ec226410211cccf.tar.gz | |
Crawler extracts volume, price per litre and abv. (bottleshop only)
Diffstat (limited to 'crawler/shop_bottleworld.go')
| -rw-r--r-- | crawler/shop_bottleworld.go | 43 |
1 files changed, 39 insertions, 4 deletions
diff --git a/crawler/shop_bottleworld.go b/crawler/shop_bottleworld.go index b6af7e0..a3eae35 100644 --- a/crawler/shop_bottleworld.go +++ b/crawler/shop_bottleworld.go @@ -3,7 +3,7 @@ package main import ( "log" "regexp" - // "strings" + "strings" // "github.com/PuerkitoBio/goquery" "github.com/gocolly/colly" @@ -27,8 +27,7 @@ func ScrapeBottleWord(shop Shop) []Angebot { log.Fatal(err) } if !matched { - //W.Spirit_type = "Anderes" - return + W.Spirit_type = detect_spirit_type(whisky_name) } else { W.Spirit_type = "Whisky" } @@ -51,14 +50,50 @@ func ScrapeBottleWord(shop Shop) []Angebot { } }) }) + + price_per_litre_noisy := e.ChildText(".price-per-liter") + price_per_litre, err := sanitize_price_per(price_per_litre_noisy) + if err != nil { + log.Fatal(err) + } + W.Price_per_litre = price_per_litre + W.Image_url = e.ChildAttr("img", "src") + e.Request.Visit(W.Url) + W.Shop = shop.Id - W.Spirit_type = "Whisky" + + volume_noisy := e.Request.Ctx.Get("volume") + W.Volume, err = extract_volume(volume_noisy) + if err != nil { + log.Fatal(err) + } + + abv_noisy := e.Request.Ctx.Get("abv") + W.Abv, err = extract_abv(abv_noisy) + if err != nil { + log.Fatal(err) + } Whiskys = append(Whiskys, W) }) + c.OnHTML("#product-attribute-specs-table", func(e *colly.HTMLElement) { + e.ForEach("tr", func(i int, e *colly.HTMLElement) { + td_str := e.ChildText("td") + matched, err := regexp.MatchString("[0-9]+([,.][0-9]+)? l$", td_str) + if err != nil { + log.Fatal(err) + } + if matched { + e.Request.Ctx.Put("volume", td_str) + } else if strings.Contains(td_str, "%") { + e.Request.Ctx.Put("abv", td_str) + } + }) + }) + c.Visit("https://www.bottleworld.de/aktuelle-sonderpreise/show/all") return Whiskys |
