diff options
| author | Maximilian Möhring | 2019-05-15 14:03:47 +0200 |
|---|---|---|
| committer | Maximilian Möhring | 2019-05-15 14:03:47 +0200 |
| commit | 8aa7828c2c96f5858b243736e275819dae3aeda7 (patch) | |
| tree | 823617da75d1c024de3e5d4d6c5cae1d7bbef8fa /crawler | |
| parent | a5bd8848db22932c232d7799e68fbc4ea43056d8 (diff) | |
| download | alkobote-8aa7828c2c96f5858b243736e275819dae3aeda7.tar.gz | |
Filter nonalcoholic drinks from Bottleworld. (crawler)
Diffstat (limited to 'crawler')
| -rw-r--r-- | crawler/shop_bottleworld.go | 7 | ||||
| -rw-r--r-- | crawler/utility.go | 3 |
2 files changed, 9 insertions, 1 deletions
diff --git a/crawler/shop_bottleworld.go b/crawler/shop_bottleworld.go index 97ec162..ce49ae1 100644 --- a/crawler/shop_bottleworld.go +++ b/crawler/shop_bottleworld.go @@ -88,6 +88,8 @@ func (app *App) ScrapeBottleWord(shop Shop) []Angebot { }) c.OnHTML("#product-attribute-specs-table", func(e *colly.HTMLElement) { + // Filter nonalcoholic drinks. + has_abv := false e.ForEach("tr", func(i int, e *colly.HTMLElement) { td_str := e.ChildText("td") matched, err := regexp.MatchString("^[0-9]+([,.][0-9]+)? l$", td_str) @@ -97,10 +99,15 @@ func (app *App) ScrapeBottleWord(shop Shop) []Angebot { if matched { e.Request.Ctx.Put("volume", td_str) } else if strings.Contains(td_str, "%") { + has_abv = true e.Request.Ctx.Put("abv", td_str) } }) + if !has_abv { + e.Request.Ctx.Put("abv", "") + e.Request.Ctx.Put("debug_info", "No abv found on page thus set to zero. (nonalcoholic?)") + } e.Request.Ctx.Put("website", string(e.Response.Body)) }) diff --git a/crawler/utility.go b/crawler/utility.go index e59b40b..dd37e16 100644 --- a/crawler/utility.go +++ b/crawler/utility.go @@ -293,9 +293,10 @@ func get_volume(e *colly.HTMLElement) (float32, string) { func get_abv(e *colly.HTMLElement) (float32, string) { abv_noisy := e.Request.Ctx.Get("abv") + debug_info := e.Request.Ctx.Get("debug_info") if abv_noisy == "" { - return 0, abv_noisy + return 0, debug_info } // abv_noisy = strings.Replace(abv_noisy, ".", ",", 1) |
