summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorhorus2018-02-12 22:53:28 +0100
committerhorus2018-02-12 22:53:28 +0100
commitf4a905f93824b91a56b3fb7117438935ea16286f (patch)
treea3bb5a2f755194c63b449345259661d8e0db9ff7
parent87c347bfb51895499cc862a33453df9945a4656e (diff)
downloadalkobote-f4a905f93824b91a56b3fb7117438935ea16286f.tar.gz
Improvements, bug fixes, more utility functions, etc... (crawler)
-rw-r--r--crawler/convert_price.go2
-rw-r--r--crawler/main.go8
-rw-r--r--crawler/sanitize.go23
-rw-r--r--crawler/utility.go65
4 files changed, 94 insertions, 4 deletions
diff --git a/crawler/convert_price.go b/crawler/convert_price.go
index a76c067..2d2bc1a 100644
--- a/crawler/convert_price.go
+++ b/crawler/convert_price.go
@@ -10,7 +10,7 @@ import (
func convert_price(price string) (int, error) {
if "" == price {
- return 0, errors.New("Empty string")
+ return 0, errors.New("convert_price: Empty string")
}
multiply_by_10 := false
diff --git a/crawler/main.go b/crawler/main.go
index 8a3556a..779ebba 100644
--- a/crawler/main.go
+++ b/crawler/main.go
@@ -34,6 +34,7 @@ type Angebot struct {
Base_price int
Image_url string
Spirit_type string
+ Website string
Valid_until int
}
@@ -65,12 +66,15 @@ func main() {
}
app.DB, err = sqlx.Connect(app.Config.DBDriver, app.Config.DBUser+":"+app.Config.DBPassword+"@tcp("+app.Config.DBHost+":"+app.Config.DBPort+")/"+app.Config.DBDBName+app.Config.DBOptions)
}
- defer app.DB.Close()
-
if err != nil {
log.Fatal(err)
}
+ if err = app.DB.Ping(); err != nil {
+ log.Fatal(err)
+ }
+ defer app.DB.Close()
+
err = app.createTables()
if err != nil {
log.Fatal(err)
diff --git a/crawler/sanitize.go b/crawler/sanitize.go
index a40745a..7a2ff58 100644
--- a/crawler/sanitize.go
+++ b/crawler/sanitize.go
@@ -13,6 +13,29 @@ func sanitize_offer(angebote []Angebot) []Angebot {
for _, offer := range angebote {
offer.Name = sanitize_name(offer.Name)
+ if offer.Abv == 0 {
+ log.Println("sanitize.go: abv zero: " + offer.Name + "( " + offer.Url + ")")
+ continue
+ }
+ if offer.Volume == 0 {
+ log.Println("sanitize.go: volume zero: " + offer.Name + "( " + offer.Url + ")")
+ continue
+ }
+ if offer.Discounted_price == 0 {
+ log.Println("sanitize.go: discounted_price zero: " + offer.Name + "( " + offer.Url + ")")
+ continue
+ }
+ if offer.Original_price == 0 {
+ log.Println("sanitize.go: original_price zero: " + offer.Name + "( " + offer.Url + ")")
+ continue
+ }
+ if offer.Base_price == 0 {
+ log.Println("sanitize.go: base_price zero: " + offer.Name + "( " + offer.Url + ")")
+ continue
+ }
+
+ //offer.Website = ""
+
W = append(W, offer)
}
diff --git a/crawler/utility.go b/crawler/utility.go
index 1e426b9..c3daeb1 100644
--- a/crawler/utility.go
+++ b/crawler/utility.go
@@ -9,6 +9,15 @@ import (
"github.com/gocolly/colly"
)
+func stringInSlice(a string, list []string) bool {
+ for _, b := range list {
+ if b == a {
+ return true
+ }
+ }
+ return false
+}
+
func detect_spirit_type(name string) string {
matched, err := regexp.MatchString(`(^|\s)Gin(\s|$)`, name)
if err != nil {
@@ -17,7 +26,7 @@ func detect_spirit_type(name string) string {
if matched {
return "Gin"
}
- matched, err = regexp.MatchString(`(^|\s)Rum(\s|$)`, name)
+ matched, err = regexp.MatchString(`(^|\s)Rh?um(\s|$)`, name)
if err != nil {
log.Fatal(err)
}
@@ -38,6 +47,34 @@ func detect_spirit_type(name string) string {
if matched {
return "Whisky"
}
+ matched, err = regexp.MatchString(`(^|\s)Champagner(\s|$)`, name)
+ if err != nil {
+ log.Fatal(err)
+ }
+ if matched {
+ return "Champagner"
+ }
+ matched, err = regexp.MatchString(`(^|\s)Cognac(\s|$)`, name)
+ if err != nil {
+ log.Fatal(err)
+ }
+ if matched {
+ return "Cognac"
+ }
+ matched, err = regexp.MatchString(`(^|\s)Grappa(\s|$)`, name)
+ if err != nil {
+ log.Fatal(err)
+ }
+ if matched {
+ return "Grappa"
+ }
+ matched, err = regexp.MatchString(`(^|\s)Likör(\s|$)`, name)
+ if err != nil {
+ log.Fatal(err)
+ }
+ if matched {
+ return "Likör"
+ }
return "Verschiedenes"
}
@@ -75,6 +112,11 @@ func extract_abv(abv_noisy string) (float32, error) {
}
abv_noisy = strings.Replace(abv_noisy, ",", ".", 1)
abv_noisy = strings.TrimSpace(abv_noisy)
+ r_abv, err := regexp.Compile(`[0-9]+([,.][0-9]+)?`)
+ if err != nil {
+ log.Fatal(err)
+ }
+ abv_noisy = r_abv.FindString(abv_noisy)
abv64, err := strconv.ParseFloat(abv_noisy, 32)
if err != nil {
@@ -119,6 +161,7 @@ func get_abv(e *colly.HTMLElement) float32 {
if abv_noisy == "" {
return 0
}
+ // abv_noisy = strings.Replace(abv_noisy, ".", ",", 1)
abv, err := extract_abv(abv_noisy)
if err != nil {
@@ -128,3 +171,23 @@ func get_abv(e *colly.HTMLElement) float32 {
return abv
}
+
+/*
+ * In cents. (int)
+ */
+func get_base_price(e *colly.HTMLElement) int {
+
+ base_price_noisy := e.Request.Ctx.Get("base_price")
+
+ if base_price_noisy == "" {
+ return 0
+ }
+
+ base_price, err := sanitize_base_price(base_price_noisy)
+ if err != nil {
+ log.Println("get_base_price: " + base_price_noisy)
+ log.Fatal(err)
+ }
+
+ return base_price
+}