From f4a905f93824b91a56b3fb7117438935ea16286f Mon Sep 17 00:00:00 2001 From: horus Date: Mon, 12 Feb 2018 22:53:28 +0100 Subject: Improvements, bug fixes, more utility functions, etc... (crawler) --- crawler/utility.go | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) (limited to 'crawler/utility.go') diff --git a/crawler/utility.go b/crawler/utility.go index 1e426b9..c3daeb1 100644 --- a/crawler/utility.go +++ b/crawler/utility.go @@ -9,6 +9,15 @@ import ( "github.com/gocolly/colly" ) +func stringInSlice(a string, list []string) bool { + for _, b := range list { + if b == a { + return true + } + } + return false +} + func detect_spirit_type(name string) string { matched, err := regexp.MatchString(`(^|\s)Gin(\s|$)`, name) if err != nil { @@ -17,7 +26,7 @@ func detect_spirit_type(name string) string { if matched { return "Gin" } - matched, err = regexp.MatchString(`(^|\s)Rum(\s|$)`, name) + matched, err = regexp.MatchString(`(^|\s)Rh?um(\s|$)`, name) if err != nil { log.Fatal(err) } @@ -38,6 +47,34 @@ func detect_spirit_type(name string) string { if matched { return "Whisky" } + matched, err = regexp.MatchString(`(^|\s)Champagner(\s|$)`, name) + if err != nil { + log.Fatal(err) + } + if matched { + return "Champagner" + } + matched, err = regexp.MatchString(`(^|\s)Cognac(\s|$)`, name) + if err != nil { + log.Fatal(err) + } + if matched { + return "Cognac" + } + matched, err = regexp.MatchString(`(^|\s)Grappa(\s|$)`, name) + if err != nil { + log.Fatal(err) + } + if matched { + return "Grappa" + } + matched, err = regexp.MatchString(`(^|\s)Likör(\s|$)`, name) + if err != nil { + log.Fatal(err) + } + if matched { + return "Likör" + } return "Verschiedenes" } @@ -75,6 +112,11 @@ func extract_abv(abv_noisy string) (float32, error) { } abv_noisy = strings.Replace(abv_noisy, ",", ".", 1) abv_noisy = strings.TrimSpace(abv_noisy) + r_abv, err := regexp.Compile(`[0-9]+([,.][0-9]+)?`) + if err != nil { + log.Fatal(err) + } + abv_noisy = r_abv.FindString(abv_noisy) abv64, err := strconv.ParseFloat(abv_noisy, 32) if err != nil { @@ -119,6 +161,7 @@ func get_abv(e *colly.HTMLElement) float32 { if abv_noisy == "" { return 0 } + // abv_noisy = strings.Replace(abv_noisy, ".", ",", 1) abv, err := extract_abv(abv_noisy) if err != nil { @@ -128,3 +171,23 @@ func get_abv(e *colly.HTMLElement) float32 { return abv } + +/* + * In cents. (int) + */ +func get_base_price(e *colly.HTMLElement) int { + + base_price_noisy := e.Request.Ctx.Get("base_price") + + if base_price_noisy == "" { + return 0 + } + + base_price, err := sanitize_base_price(base_price_noisy) + if err != nil { + log.Println("get_base_price: " + base_price_noisy) + log.Fatal(err) + } + + return base_price +} -- cgit v1.2.3