summaryrefslogtreecommitdiff
path: root/crawler
diff options
context:
space:
mode:
Diffstat (limited to 'crawler')
-rw-r--r--crawler/shop_bottleworld.go7
-rw-r--r--crawler/utility.go3
2 files changed, 9 insertions, 1 deletions
diff --git a/crawler/shop_bottleworld.go b/crawler/shop_bottleworld.go
index 97ec162..ce49ae1 100644
--- a/crawler/shop_bottleworld.go
+++ b/crawler/shop_bottleworld.go
@@ -88,6 +88,8 @@ func (app *App) ScrapeBottleWord(shop Shop) []Angebot {
})
c.OnHTML("#product-attribute-specs-table", func(e *colly.HTMLElement) {
+ // Filter nonalcoholic drinks.
+ has_abv := false
e.ForEach("tr", func(i int, e *colly.HTMLElement) {
td_str := e.ChildText("td")
matched, err := regexp.MatchString("^[0-9]+([,.][0-9]+)? l$", td_str)
@@ -97,10 +99,15 @@ func (app *App) ScrapeBottleWord(shop Shop) []Angebot {
if matched {
e.Request.Ctx.Put("volume", td_str)
} else if strings.Contains(td_str, "%") {
+ has_abv = true
e.Request.Ctx.Put("abv", td_str)
}
})
+ if !has_abv {
+ e.Request.Ctx.Put("abv", "")
+ e.Request.Ctx.Put("debug_info", "No abv found on page thus set to zero. (nonalcoholic?)")
+ }
e.Request.Ctx.Put("website", string(e.Response.Body))
})
diff --git a/crawler/utility.go b/crawler/utility.go
index e59b40b..dd37e16 100644
--- a/crawler/utility.go
+++ b/crawler/utility.go
@@ -293,9 +293,10 @@ func get_volume(e *colly.HTMLElement) (float32, string) {
func get_abv(e *colly.HTMLElement) (float32, string) {
abv_noisy := e.Request.Ctx.Get("abv")
+ debug_info := e.Request.Ctx.Get("debug_info")
if abv_noisy == "" {
- return 0, abv_noisy
+ return 0, debug_info
}
// abv_noisy = strings.Replace(abv_noisy, ".", ",", 1)