summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMax2018-02-08 16:07:46 +0100
committerMax2018-02-08 16:07:46 +0100
commita418c52123969b01c37bafd67ec226410211cccf (patch)
tree3df058820a705f5d3fd3867432fd693d4322d751
parentca8db86baaa367e3ec0af2c68ec63d21ae3b6190 (diff)
downloadalkobote-a418c52123969b01c37bafd67ec226410211cccf.tar.gz
Crawler extracts volume, price per litre and abv. (bottleshop only)
-rw-r--r--crawler/database.go21
-rw-r--r--crawler/main.go5
-rw-r--r--crawler/sanitize.go8
-rw-r--r--crawler/shop_bottleworld.go43
4 files changed, 66 insertions, 11 deletions
diff --git a/crawler/database.go b/crawler/database.go
index 741028f..b1d4e5e 100644
--- a/crawler/database.go
+++ b/crawler/database.go
@@ -24,8 +24,12 @@ func (app *App) createTables() error {
shop int,
name TEXT,
url TEXT,
+ short_url TEXT,
+ abv FLOAT(100,0),
+ volume FLOAT,
original_price INT,
discounted_price INT,
+ price_per_litre INT,
image_url TEXT,
spirit_type TEXT,
valid_until INT DEFAULT NULL,
@@ -37,7 +41,14 @@ func (app *App) createTables() error {
return err
}
- query3 := `CREATE OR REPLACE VIEW angebote AS SELECT angebot.name,angebot.url,original_price, discounted_price,image_url,shop.name as shop, shop.url as shop_url, (original_price/discounted_price) AS quotient FROM angebot JOIN shop ON angebot.shop = shop.id WHERE spirit_type = "Whisky" AND original_price > 1998`
+ query3 := `CREATE OR REPLACE VIEW whisky_view AS
+ SELECT
+ angebot.id, angebot.name, angebot.abv, angebot.volume, angebot.url,original_price, discounted_price, angebot.price_per_litre, image_url,
+ shop.name as shop, shop.url as shop_url, (original_price/discounted_price) AS quotient
+ FROM angebot
+ JOIN shop ON angebot.shop = shop.id
+ WHERE
+ spirit_type = "Whisky" AND original_price > 1998`
_, err = app.DB.Exec(query3)
return err
@@ -45,7 +56,7 @@ func (app *App) createTables() error {
func (app *App) save_offer(W []Angebot) error {
- query := `INSERT INTO angebot (shop, name, url, original_price, discounted_price, valid_until, image_url, spirit_type, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`
+ query := `INSERT INTO angebot (shop, name, url, abv, volume, original_price, discounted_price, price_per_litre, valid_until, image_url, spirit_type, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
stmt, err := app.DB.Prepare(query)
if err != nil {
@@ -61,7 +72,7 @@ func (app *App) save_offer(W []Angebot) error {
continue
}
- err := app.DB.QueryRow("SELECT 1 FROM angebot WHERE shop = ? AND name = ? AND url = ? AND original_price = ? AND discounted_price = ? AND image_url = ? AND spirit_type = ?", o.Shop, o.Name, o.Url, o.Original_price, o.Discounted_price, o.Image_url, o.Spirit_type).Scan(&found)
+ err := app.DB.QueryRow("SELECT 1 FROM angebot WHERE shop = ? AND name = ? AND url = ? AND original_price = ? AND discounted_price = ? AND spirit_type = ?", o.Shop, o.Name, o.Url, o.Original_price, o.Discounted_price, o.Spirit_type).Scan(&found)
/*
*/
@@ -69,9 +80,9 @@ func (app *App) save_offer(W []Angebot) error {
if err == sql.ErrNoRows {
if 0 == o.Valid_until {
- _, err = stmt.Exec(o.Shop, o.Name, o.Url, o.Original_price, o.Discounted_price, sql.NullInt64{}, o.Image_url, o.Spirit_type, app.Now)
+ _, err = stmt.Exec(o.Shop, o.Name, o.Url, o.Abv, o.Volume, o.Original_price, o.Discounted_price, o.Price_per_litre, sql.NullInt64{}, o.Image_url, o.Spirit_type, app.Now)
} else {
- _, err = stmt.Exec(o.Shop, o.Name, o.Url, o.Original_price, o.Discounted_price, o.Valid_until, o.Image_url, o.Spirit_type, app.Now)
+ _, err = stmt.Exec(o.Shop, o.Name, o.Url, o.Abv, o.Volume, o.Original_price, o.Discounted_price, o.Price_per_litre, o.Valid_until, o.Image_url, o.Spirit_type, app.Now)
}
if err != nil {
return err
diff --git a/crawler/main.go b/crawler/main.go
index 9a21c5f..a25feaf 100644
--- a/crawler/main.go
+++ b/crawler/main.go
@@ -25,12 +25,13 @@ type App struct {
type Angebot struct {
Id int
Name string
- Abv string
- Volume string
+ Abv float32
+ Volume float32
Shop int
Url string
Original_price int
Discounted_price int
+ Price_per_litre int
Image_url string
Spirit_type string
Valid_until int
diff --git a/crawler/sanitize.go b/crawler/sanitize.go
index c86faff..fc4ee81 100644
--- a/crawler/sanitize.go
+++ b/crawler/sanitize.go
@@ -91,3 +91,11 @@ func sanitize_name(name string) string {
return name
}
+
+func sanitize_price_per(price_noisy string) (price int, err error) {
+ if strings.Contains(price_noisy, "Preis pro Liter") {
+ price_noisy = strings.Replace(price_noisy, "Preis pro Liter", "", -1)
+ }
+
+ return convert_price(price_noisy)
+}
diff --git a/crawler/shop_bottleworld.go b/crawler/shop_bottleworld.go
index b6af7e0..a3eae35 100644
--- a/crawler/shop_bottleworld.go
+++ b/crawler/shop_bottleworld.go
@@ -3,7 +3,7 @@ package main
import (
"log"
"regexp"
- // "strings"
+ "strings"
// "github.com/PuerkitoBio/goquery"
"github.com/gocolly/colly"
@@ -27,8 +27,7 @@ func ScrapeBottleWord(shop Shop) []Angebot {
log.Fatal(err)
}
if !matched {
- //W.Spirit_type = "Anderes"
- return
+ W.Spirit_type = detect_spirit_type(whisky_name)
} else {
W.Spirit_type = "Whisky"
}
@@ -51,14 +50,50 @@ func ScrapeBottleWord(shop Shop) []Angebot {
}
})
})
+
+ price_per_litre_noisy := e.ChildText(".price-per-liter")
+ price_per_litre, err := sanitize_price_per(price_per_litre_noisy)
+ if err != nil {
+ log.Fatal(err)
+ }
+ W.Price_per_litre = price_per_litre
+
W.Image_url = e.ChildAttr("img", "src")
+ e.Request.Visit(W.Url)
+
W.Shop = shop.Id
- W.Spirit_type = "Whisky"
+
+ volume_noisy := e.Request.Ctx.Get("volume")
+ W.Volume, err = extract_volume(volume_noisy)
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ abv_noisy := e.Request.Ctx.Get("abv")
+ W.Abv, err = extract_abv(abv_noisy)
+ if err != nil {
+ log.Fatal(err)
+ }
Whiskys = append(Whiskys, W)
})
+ c.OnHTML("#product-attribute-specs-table", func(e *colly.HTMLElement) {
+ e.ForEach("tr", func(i int, e *colly.HTMLElement) {
+ td_str := e.ChildText("td")
+ matched, err := regexp.MatchString("[0-9]+([,.][0-9]+)? l$", td_str)
+ if err != nil {
+ log.Fatal(err)
+ }
+ if matched {
+ e.Request.Ctx.Put("volume", td_str)
+ } else if strings.Contains(td_str, "%") {
+ e.Request.Ctx.Put("abv", td_str)
+ }
+ })
+ })
+
c.Visit("https://www.bottleworld.de/aktuelle-sonderpreise/show/all")
return Whiskys