diff options
| -rw-r--r-- | crawler/database.go | 21 | ||||
| -rw-r--r-- | crawler/main.go | 5 | ||||
| -rw-r--r-- | crawler/sanitize.go | 8 | ||||
| -rw-r--r-- | crawler/shop_bottleworld.go | 43 |
4 files changed, 66 insertions, 11 deletions
diff --git a/crawler/database.go b/crawler/database.go index 741028f..b1d4e5e 100644 --- a/crawler/database.go +++ b/crawler/database.go @@ -24,8 +24,12 @@ func (app *App) createTables() error { shop int, name TEXT, url TEXT, + short_url TEXT, + abv FLOAT(100,0), + volume FLOAT, original_price INT, discounted_price INT, + price_per_litre INT, image_url TEXT, spirit_type TEXT, valid_until INT DEFAULT NULL, @@ -37,7 +41,14 @@ func (app *App) createTables() error { return err } - query3 := `CREATE OR REPLACE VIEW angebote AS SELECT angebot.name,angebot.url,original_price, discounted_price,image_url,shop.name as shop, shop.url as shop_url, (original_price/discounted_price) AS quotient FROM angebot JOIN shop ON angebot.shop = shop.id WHERE spirit_type = "Whisky" AND original_price > 1998` + query3 := `CREATE OR REPLACE VIEW whisky_view AS + SELECT + angebot.id, angebot.name, angebot.abv, angebot.volume, angebot.url,original_price, discounted_price, angebot.price_per_litre, image_url, + shop.name as shop, shop.url as shop_url, (original_price/discounted_price) AS quotient + FROM angebot + JOIN shop ON angebot.shop = shop.id + WHERE + spirit_type = "Whisky" AND original_price > 1998` _, err = app.DB.Exec(query3) return err @@ -45,7 +56,7 @@ func (app *App) createTables() error { func (app *App) save_offer(W []Angebot) error { - query := `INSERT INTO angebot (shop, name, url, original_price, discounted_price, valid_until, image_url, spirit_type, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)` + query := `INSERT INTO angebot (shop, name, url, abv, volume, original_price, discounted_price, price_per_litre, valid_until, image_url, spirit_type, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)` stmt, err := app.DB.Prepare(query) if err != nil { @@ -61,7 +72,7 @@ func (app *App) save_offer(W []Angebot) error { continue } - err := app.DB.QueryRow("SELECT 1 FROM angebot WHERE shop = ? AND name = ? AND url = ? AND original_price = ? AND discounted_price = ? AND image_url = ? AND spirit_type = ?", o.Shop, o.Name, o.Url, o.Original_price, o.Discounted_price, o.Image_url, o.Spirit_type).Scan(&found) + err := app.DB.QueryRow("SELECT 1 FROM angebot WHERE shop = ? AND name = ? AND url = ? AND original_price = ? AND discounted_price = ? AND spirit_type = ?", o.Shop, o.Name, o.Url, o.Original_price, o.Discounted_price, o.Spirit_type).Scan(&found) /* */ @@ -69,9 +80,9 @@ func (app *App) save_offer(W []Angebot) error { if err == sql.ErrNoRows { if 0 == o.Valid_until { - _, err = stmt.Exec(o.Shop, o.Name, o.Url, o.Original_price, o.Discounted_price, sql.NullInt64{}, o.Image_url, o.Spirit_type, app.Now) + _, err = stmt.Exec(o.Shop, o.Name, o.Url, o.Abv, o.Volume, o.Original_price, o.Discounted_price, o.Price_per_litre, sql.NullInt64{}, o.Image_url, o.Spirit_type, app.Now) } else { - _, err = stmt.Exec(o.Shop, o.Name, o.Url, o.Original_price, o.Discounted_price, o.Valid_until, o.Image_url, o.Spirit_type, app.Now) + _, err = stmt.Exec(o.Shop, o.Name, o.Url, o.Abv, o.Volume, o.Original_price, o.Discounted_price, o.Price_per_litre, o.Valid_until, o.Image_url, o.Spirit_type, app.Now) } if err != nil { return err diff --git a/crawler/main.go b/crawler/main.go index 9a21c5f..a25feaf 100644 --- a/crawler/main.go +++ b/crawler/main.go @@ -25,12 +25,13 @@ type App struct { type Angebot struct { Id int Name string - Abv string - Volume string + Abv float32 + Volume float32 Shop int Url string Original_price int Discounted_price int + Price_per_litre int Image_url string Spirit_type string Valid_until int diff --git a/crawler/sanitize.go b/crawler/sanitize.go index c86faff..fc4ee81 100644 --- a/crawler/sanitize.go +++ b/crawler/sanitize.go @@ -91,3 +91,11 @@ func sanitize_name(name string) string { return name } + +func sanitize_price_per(price_noisy string) (price int, err error) { + if strings.Contains(price_noisy, "Preis pro Liter") { + price_noisy = strings.Replace(price_noisy, "Preis pro Liter", "", -1) + } + + return convert_price(price_noisy) +} diff --git a/crawler/shop_bottleworld.go b/crawler/shop_bottleworld.go index b6af7e0..a3eae35 100644 --- a/crawler/shop_bottleworld.go +++ b/crawler/shop_bottleworld.go @@ -3,7 +3,7 @@ package main import ( "log" "regexp" - // "strings" + "strings" // "github.com/PuerkitoBio/goquery" "github.com/gocolly/colly" @@ -27,8 +27,7 @@ func ScrapeBottleWord(shop Shop) []Angebot { log.Fatal(err) } if !matched { - //W.Spirit_type = "Anderes" - return + W.Spirit_type = detect_spirit_type(whisky_name) } else { W.Spirit_type = "Whisky" } @@ -51,14 +50,50 @@ func ScrapeBottleWord(shop Shop) []Angebot { } }) }) + + price_per_litre_noisy := e.ChildText(".price-per-liter") + price_per_litre, err := sanitize_price_per(price_per_litre_noisy) + if err != nil { + log.Fatal(err) + } + W.Price_per_litre = price_per_litre + W.Image_url = e.ChildAttr("img", "src") + e.Request.Visit(W.Url) + W.Shop = shop.Id - W.Spirit_type = "Whisky" + + volume_noisy := e.Request.Ctx.Get("volume") + W.Volume, err = extract_volume(volume_noisy) + if err != nil { + log.Fatal(err) + } + + abv_noisy := e.Request.Ctx.Get("abv") + W.Abv, err = extract_abv(abv_noisy) + if err != nil { + log.Fatal(err) + } Whiskys = append(Whiskys, W) }) + c.OnHTML("#product-attribute-specs-table", func(e *colly.HTMLElement) { + e.ForEach("tr", func(i int, e *colly.HTMLElement) { + td_str := e.ChildText("td") + matched, err := regexp.MatchString("[0-9]+([,.][0-9]+)? l$", td_str) + if err != nil { + log.Fatal(err) + } + if matched { + e.Request.Ctx.Put("volume", td_str) + } else if strings.Contains(td_str, "%") { + e.Request.Ctx.Put("abv", td_str) + } + }) + }) + c.Visit("https://www.bottleworld.de/aktuelle-sonderpreise/show/all") return Whiskys |
