From 8aa7828c2c96f5858b243736e275819dae3aeda7 Mon Sep 17 00:00:00 2001 From: Maximilian Möhring Date: Wed, 15 May 2019 14:03:47 +0200 Subject: Filter nonalcoholic drinks from Bottleworld. (crawler) --- crawler/shop_bottleworld.go | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'crawler/shop_bottleworld.go') diff --git a/crawler/shop_bottleworld.go b/crawler/shop_bottleworld.go index 97ec162..ce49ae1 100644 --- a/crawler/shop_bottleworld.go +++ b/crawler/shop_bottleworld.go @@ -88,6 +88,8 @@ func (app *App) ScrapeBottleWord(shop Shop) []Angebot { }) c.OnHTML("#product-attribute-specs-table", func(e *colly.HTMLElement) { + // Filter nonalcoholic drinks. + has_abv := false e.ForEach("tr", func(i int, e *colly.HTMLElement) { td_str := e.ChildText("td") matched, err := regexp.MatchString("^[0-9]+([,.][0-9]+)? l$", td_str) @@ -97,10 +99,15 @@ func (app *App) ScrapeBottleWord(shop Shop) []Angebot { if matched { e.Request.Ctx.Put("volume", td_str) } else if strings.Contains(td_str, "%") { + has_abv = true e.Request.Ctx.Put("abv", td_str) } }) + if !has_abv { + e.Request.Ctx.Put("abv", "") + e.Request.Ctx.Put("debug_info", "No abv found on page thus set to zero. (nonalcoholic?)") + } e.Request.Ctx.Put("website", string(e.Response.Body)) }) -- cgit v1.2.3