From f6904aab20e2d09255fd0adabfd246165ff3cb02 Mon Sep 17 00:00:00 2001 From: Max Date: Thu, 8 Feb 2018 18:26:41 +0100 Subject: Crawler extracts volume, price per litre and abv. (MC Whisky, Rum & Co, Whic) --- crawler/utility.go | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'crawler/utility.go') diff --git a/crawler/utility.go b/crawler/utility.go index a794c4b..9de7845 100644 --- a/crawler/utility.go +++ b/crawler/utility.go @@ -5,6 +5,8 @@ import ( "regexp" "strconv" "strings" + + "github.com/gocolly/colly" ) func detect_spirit_type(name string) string { @@ -47,6 +49,7 @@ func extract_volume(volume string) (float32, error) { if err != nil { return 0, err } + return float32(volume64), err } @@ -61,5 +64,51 @@ func extract_abv(abv_noisy string) (float32, error) { if err != nil { return 0, err } + return float32(abv64), nil } + +/* + * In litre, but float. + */ +func get_volume(e *colly.HTMLElement) float32 { + + volume_noisy := e.Request.Ctx.Get("volume") + + matched, err := regexp.MatchString(`[lL](iter)?`, volume_noisy) + if err != nil { + log.Fatal(err) + } + if !matched { + log.Println("get_volume: not matched: " + volume_noisy) + return 0 + } + + volume, err := extract_volume(volume_noisy) + if err != nil { + log.Println("get_volume: " + volume_noisy) + log.Fatal(err) + } + + return volume +} + +/* + * In procent. (float) + */ +func get_abv(e *colly.HTMLElement) float32 { + + abv_noisy := e.Request.Ctx.Get("abv") + + if abv_noisy == "" { + return 0 + } + + abv, err := extract_abv(abv_noisy) + if err != nil { + log.Println("get_abv: " + abv_noisy) + log.Fatal(err) + } + + return abv +} -- cgit v1.2.3