diff options
| author | horus | 2018-02-12 22:53:28 +0100 |
|---|---|---|
| committer | horus | 2018-02-12 22:53:28 +0100 |
| commit | f4a905f93824b91a56b3fb7117438935ea16286f (patch) | |
| tree | a3bb5a2f755194c63b449345259661d8e0db9ff7 /crawler | |
| parent | 87c347bfb51895499cc862a33453df9945a4656e (diff) | |
| download | alkobote-f4a905f93824b91a56b3fb7117438935ea16286f.tar.gz | |
Improvements, bug fixes, more utility functions, etc... (crawler)
Diffstat (limited to 'crawler')
| -rw-r--r-- | crawler/convert_price.go | 2 | ||||
| -rw-r--r-- | crawler/main.go | 8 | ||||
| -rw-r--r-- | crawler/sanitize.go | 23 | ||||
| -rw-r--r-- | crawler/utility.go | 65 |
4 files changed, 94 insertions, 4 deletions
diff --git a/crawler/convert_price.go b/crawler/convert_price.go index a76c067..2d2bc1a 100644 --- a/crawler/convert_price.go +++ b/crawler/convert_price.go @@ -10,7 +10,7 @@ import ( func convert_price(price string) (int, error) { if "" == price { - return 0, errors.New("Empty string") + return 0, errors.New("convert_price: Empty string") } multiply_by_10 := false diff --git a/crawler/main.go b/crawler/main.go index 8a3556a..779ebba 100644 --- a/crawler/main.go +++ b/crawler/main.go @@ -34,6 +34,7 @@ type Angebot struct { Base_price int Image_url string Spirit_type string + Website string Valid_until int } @@ -65,12 +66,15 @@ func main() { } app.DB, err = sqlx.Connect(app.Config.DBDriver, app.Config.DBUser+":"+app.Config.DBPassword+"@tcp("+app.Config.DBHost+":"+app.Config.DBPort+")/"+app.Config.DBDBName+app.Config.DBOptions) } - defer app.DB.Close() - if err != nil { log.Fatal(err) } + if err = app.DB.Ping(); err != nil { + log.Fatal(err) + } + defer app.DB.Close() + err = app.createTables() if err != nil { log.Fatal(err) diff --git a/crawler/sanitize.go b/crawler/sanitize.go index a40745a..7a2ff58 100644 --- a/crawler/sanitize.go +++ b/crawler/sanitize.go @@ -13,6 +13,29 @@ func sanitize_offer(angebote []Angebot) []Angebot { for _, offer := range angebote { offer.Name = sanitize_name(offer.Name) + if offer.Abv == 0 { + log.Println("sanitize.go: abv zero: " + offer.Name + "( " + offer.Url + ")") + continue + } + if offer.Volume == 0 { + log.Println("sanitize.go: volume zero: " + offer.Name + "( " + offer.Url + ")") + continue + } + if offer.Discounted_price == 0 { + log.Println("sanitize.go: discounted_price zero: " + offer.Name + "( " + offer.Url + ")") + continue + } + if offer.Original_price == 0 { + log.Println("sanitize.go: original_price zero: " + offer.Name + "( " + offer.Url + ")") + continue + } + if offer.Base_price == 0 { + log.Println("sanitize.go: base_price zero: " + offer.Name + "( " + offer.Url + ")") + continue + } + + //offer.Website = "" + W = append(W, offer) } diff --git a/crawler/utility.go b/crawler/utility.go index 1e426b9..c3daeb1 100644 --- a/crawler/utility.go +++ b/crawler/utility.go @@ -9,6 +9,15 @@ import ( "github.com/gocolly/colly" ) +func stringInSlice(a string, list []string) bool { + for _, b := range list { + if b == a { + return true + } + } + return false +} + func detect_spirit_type(name string) string { matched, err := regexp.MatchString(`(^|\s)Gin(\s|$)`, name) if err != nil { @@ -17,7 +26,7 @@ func detect_spirit_type(name string) string { if matched { return "Gin" } - matched, err = regexp.MatchString(`(^|\s)Rum(\s|$)`, name) + matched, err = regexp.MatchString(`(^|\s)Rh?um(\s|$)`, name) if err != nil { log.Fatal(err) } @@ -38,6 +47,34 @@ func detect_spirit_type(name string) string { if matched { return "Whisky" } + matched, err = regexp.MatchString(`(^|\s)Champagner(\s|$)`, name) + if err != nil { + log.Fatal(err) + } + if matched { + return "Champagner" + } + matched, err = regexp.MatchString(`(^|\s)Cognac(\s|$)`, name) + if err != nil { + log.Fatal(err) + } + if matched { + return "Cognac" + } + matched, err = regexp.MatchString(`(^|\s)Grappa(\s|$)`, name) + if err != nil { + log.Fatal(err) + } + if matched { + return "Grappa" + } + matched, err = regexp.MatchString(`(^|\s)Likör(\s|$)`, name) + if err != nil { + log.Fatal(err) + } + if matched { + return "Likör" + } return "Verschiedenes" } @@ -75,6 +112,11 @@ func extract_abv(abv_noisy string) (float32, error) { } abv_noisy = strings.Replace(abv_noisy, ",", ".", 1) abv_noisy = strings.TrimSpace(abv_noisy) + r_abv, err := regexp.Compile(`[0-9]+([,.][0-9]+)?`) + if err != nil { + log.Fatal(err) + } + abv_noisy = r_abv.FindString(abv_noisy) abv64, err := strconv.ParseFloat(abv_noisy, 32) if err != nil { @@ -119,6 +161,7 @@ func get_abv(e *colly.HTMLElement) float32 { if abv_noisy == "" { return 0 } + // abv_noisy = strings.Replace(abv_noisy, ".", ",", 1) abv, err := extract_abv(abv_noisy) if err != nil { @@ -128,3 +171,23 @@ func get_abv(e *colly.HTMLElement) float32 { return abv } + +/* + * In cents. (int) + */ +func get_base_price(e *colly.HTMLElement) int { + + base_price_noisy := e.Request.Ctx.Get("base_price") + + if base_price_noisy == "" { + return 0 + } + + base_price, err := sanitize_base_price(base_price_noisy) + if err != nil { + log.Println("get_base_price: " + base_price_noisy) + log.Fatal(err) + } + + return base_price +} |
