From 0026ba55f03c5378d5773459fcdd7c6931ff42a5 Mon Sep 17 00:00:00 2001 From: Max Date: Fri, 15 Jun 2018 19:38:04 +0200 Subject: Introduces central crawler config. (crawler) --- crawler/utility.go | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'crawler/utility.go') diff --git a/crawler/utility.go b/crawler/utility.go index 5fa78c4..e0acf3f 100644 --- a/crawler/utility.go +++ b/crawler/utility.go @@ -10,6 +10,12 @@ import ( "github.com/gocolly/colly" ) +func customCollector(allowed_urls []string) *colly.Collector { + return colly.NewCollector( + colly.AllowedDomains(allowed_urls...), + ) +} + func stringInSlice(a string, list []string) bool { for _, b := range list { if b == a { -- cgit v1.2.3 From 8d68ac7c900241eb8499a94c23ab1f60750e7aed Mon Sep 17 00:00:00 2001 From: horus Date: Fri, 15 Jun 2018 23:28:18 +0200 Subject: Introduces config for user agent, robots.txt and crawler delay. (crawler) --- crawler/utility.go | 6 ------ 1 file changed, 6 deletions(-) (limited to 'crawler/utility.go') diff --git a/crawler/utility.go b/crawler/utility.go index e0acf3f..5fa78c4 100644 --- a/crawler/utility.go +++ b/crawler/utility.go @@ -10,12 +10,6 @@ import ( "github.com/gocolly/colly" ) -func customCollector(allowed_urls []string) *colly.Collector { - return colly.NewCollector( - colly.AllowedDomains(allowed_urls...), - ) -} - func stringInSlice(a string, list []string) bool { for _, b := range list { if b == a { -- cgit v1.2.3 From f9b561c087ccf5109928371192f0f5807103e296 Mon Sep 17 00:00:00 2001 From: horus Date: Sat, 16 Jun 2018 13:52:18 +0200 Subject: Adds support for cl. (crawler) --- crawler/utility.go | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) (limited to 'crawler/utility.go') diff --git a/crawler/utility.go b/crawler/utility.go index 5fa78c4..0650546 100644 --- a/crawler/utility.go +++ b/crawler/utility.go @@ -81,12 +81,29 @@ func detect_spirit_type(name string) string { } func extract_volume(volume string) (float32, error) { - r_liter, err := regexp.Compile(`[0-9]+([,.][0-9]+)?( )?[lL](iter)?`) + var volume_noisy string + var is_litre_instead_of_cl bool + + // difference between cl... + r_cl, err := regexp.Compile(`[0-9]+([,.][0-9]+)?( )?[cC][lL]`) if err != nil { - Fatal(err, "Extract volume regex failed") + Fatal(err, "Extract volume (centiliter) regex failed") + } + + volume_noisy = r_cl.FindString(volume) + + if volume_noisy == "" { + // ...and litre + is_litre_instead_of_cl = true + + r_liter, err := regexp.Compile(`[0-9]+([,.][0-9]+)?( )?[lL](iter)?`) + if err != nil { + Fatal(err, "Extract volume regex failed") + } + volume_noisy = r_liter.FindString(volume) } - volume_noisy := r_liter.FindString(volume) + // extract numbers r_liter2, err := regexp.Compile(`[0-9]+([,.][0-9]+)?`) if err != nil { Fatal(err, "2nd extract volume regex failed") @@ -99,6 +116,11 @@ func extract_volume(volume string) (float32, error) { return 0, err } + // converting from cl to litre + if !is_litre_instead_of_cl { + volume64 = volume64 / 100 + } + return float32(volume64), err } -- cgit v1.2.3