diff options
| author | horus_arch | 2018-02-19 15:15:45 +0100 |
|---|---|---|
| committer | horus_arch | 2018-02-19 15:15:45 +0100 |
| commit | e3312da52d0ede1c5f783feba0840c53eeca723d (patch) | |
| tree | 5eecf493a0c2bbd0d5debe4fd2fb73cfd714c9b0 /crawler | |
| parent | 8700aaaec582744a04af65eeff7e9ff8555b194a (diff) | |
| download | alkobote-e3312da52d0ede1c5f783feba0840c53eeca723d.tar.gz | |
Bugfix + detects age. (crawler)
Diffstat (limited to 'crawler')
| -rw-r--r-- | crawler/database.go | 9 | ||||
| -rw-r--r-- | crawler/main.go | 1 | ||||
| -rw-r--r-- | crawler/sanitize.go | 28 |
3 files changed, 34 insertions, 4 deletions
diff --git a/crawler/database.go b/crawler/database.go index cf8989a..c1e0dd7 100644 --- a/crawler/database.go +++ b/crawler/database.go @@ -23,12 +23,13 @@ func (app *App) createTables() error { query2 := `CREATE TABLE IF NOT EXISTS angebot ( id INTEGER PRIMARY KEY AUTO_INCREMENT, - shop INT NOT NULL, name VARCHAR(255) NOT NULL, - url VARCHAR(255) NOT NULL, - short_url TEXT, abv DECIMAL(10,2) NOT NULL, volume DECIMAL(10,2) NOT NULL, + age INT NOT NULL DEFAULT 0, + shop INT NOT NULL, + url VARCHAR(255) NOT NULL, + short_url TEXT, original_price INT NOT NULL, discounted_price INT NOT NULL, base_price INT NOT NULL, @@ -99,7 +100,7 @@ func (app *App) save_offer(W []Angebot) error { // resembles UNIQUE constraint detect_duplicate_query := `SELECT 1 FROM all_view WHERE name = ? AND shop_id = ? AND - volume = ? AND abv = ? AND original_price = ? AND discounted_price = ? AND valid_until = ?` + volume = ? AND abv = ? AND original_price = ? AND discounted_price = ? AND valid_until > ?` err := app.DB.QueryRow(detect_duplicate_query, o.Name, o.Shop, o.Volume, o.Abv, o.Original_price, o.Discounted_price, o.Valid_until).Scan(&found) diff --git a/crawler/main.go b/crawler/main.go index ece25e4..573a718 100644 --- a/crawler/main.go +++ b/crawler/main.go @@ -27,6 +27,7 @@ type Angebot struct { Name string Abv float32 Volume float32 + Age int Shop int Url string Short_url string diff --git a/crawler/sanitize.go b/crawler/sanitize.go index 8d84507..9025f87 100644 --- a/crawler/sanitize.go +++ b/crawler/sanitize.go @@ -2,6 +2,7 @@ package main import ( "regexp" + "strconv" "strings" log "github.com/Sirupsen/logrus" @@ -12,8 +13,11 @@ func sanitize_offer(angebote []Angebot, shop Shop) []Angebot { var W []Angebot for _, offer := range angebote { + offer.Name = sanitize_name(offer.Name) + offer.Age = get_age_from_name(offer.Name) + if false == _check_abv_for_spirit_type(offer) { continue } @@ -182,3 +186,27 @@ func _check_abv_for_spirit_type(offer Angebot) bool { return true } + +func get_age_from_name(name string) int { + r_years, err := regexp.Compile(`[0-9]+\s*Jahre`) + if err != nil { + Fatal(err, "get_age_from_name: Years regexp failed") + } + age_noisy := r_years.FindString(name) + if age_noisy == "" { + log.Debug("get_age_from_name: No Age found in (" + name + ")") + return 0 + } + + r, err := regexp.Compile(`[0-9]+`) + if err != nil { + Fatal(err, "get_age_from_name: Numbers regexp failed") + } + age_noisy = r.FindString(age_noisy) + + age, err := strconv.Atoi(age_noisy) + if err != nil { + Fatal(err, "get_age_from_name: String to int (atoi) failed") + } + return age +} |
