summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--crawler/database.go9
-rw-r--r--crawler/main.go1
-rw-r--r--crawler/sanitize.go28
3 files changed, 34 insertions, 4 deletions
diff --git a/crawler/database.go b/crawler/database.go
index cf8989a..c1e0dd7 100644
--- a/crawler/database.go
+++ b/crawler/database.go
@@ -23,12 +23,13 @@ func (app *App) createTables() error {
query2 := `CREATE TABLE IF NOT EXISTS angebot (
id INTEGER PRIMARY KEY AUTO_INCREMENT,
- shop INT NOT NULL,
name VARCHAR(255) NOT NULL,
- url VARCHAR(255) NOT NULL,
- short_url TEXT,
abv DECIMAL(10,2) NOT NULL,
volume DECIMAL(10,2) NOT NULL,
+ age INT NOT NULL DEFAULT 0,
+ shop INT NOT NULL,
+ url VARCHAR(255) NOT NULL,
+ short_url TEXT,
original_price INT NOT NULL,
discounted_price INT NOT NULL,
base_price INT NOT NULL,
@@ -99,7 +100,7 @@ func (app *App) save_offer(W []Angebot) error {
// resembles UNIQUE constraint
detect_duplicate_query := `SELECT 1 FROM all_view WHERE name = ? AND shop_id = ? AND
- volume = ? AND abv = ? AND original_price = ? AND discounted_price = ? AND valid_until = ?`
+ volume = ? AND abv = ? AND original_price = ? AND discounted_price = ? AND valid_until > ?`
err := app.DB.QueryRow(detect_duplicate_query, o.Name, o.Shop, o.Volume, o.Abv, o.Original_price, o.Discounted_price, o.Valid_until).Scan(&found)
diff --git a/crawler/main.go b/crawler/main.go
index ece25e4..573a718 100644
--- a/crawler/main.go
+++ b/crawler/main.go
@@ -27,6 +27,7 @@ type Angebot struct {
Name string
Abv float32
Volume float32
+ Age int
Shop int
Url string
Short_url string
diff --git a/crawler/sanitize.go b/crawler/sanitize.go
index 8d84507..9025f87 100644
--- a/crawler/sanitize.go
+++ b/crawler/sanitize.go
@@ -2,6 +2,7 @@ package main
import (
"regexp"
+ "strconv"
"strings"
log "github.com/Sirupsen/logrus"
@@ -12,8 +13,11 @@ func sanitize_offer(angebote []Angebot, shop Shop) []Angebot {
var W []Angebot
for _, offer := range angebote {
+
offer.Name = sanitize_name(offer.Name)
+ offer.Age = get_age_from_name(offer.Name)
+
if false == _check_abv_for_spirit_type(offer) {
continue
}
@@ -182,3 +186,27 @@ func _check_abv_for_spirit_type(offer Angebot) bool {
return true
}
+
+func get_age_from_name(name string) int {
+ r_years, err := regexp.Compile(`[0-9]+\s*Jahre`)
+ if err != nil {
+ Fatal(err, "get_age_from_name: Years regexp failed")
+ }
+ age_noisy := r_years.FindString(name)
+ if age_noisy == "" {
+ log.Debug("get_age_from_name: No Age found in (" + name + ")")
+ return 0
+ }
+
+ r, err := regexp.Compile(`[0-9]+`)
+ if err != nil {
+ Fatal(err, "get_age_from_name: Numbers regexp failed")
+ }
+ age_noisy = r.FindString(age_noisy)
+
+ age, err := strconv.Atoi(age_noisy)
+ if err != nil {
+ Fatal(err, "get_age_from_name: String to int (atoi) failed")
+ }
+ return age
+}