From 13a807854bf4d0258723ec3152b217ed4cf8e051 Mon Sep 17 00:00:00 2001 From: Max Date: Mon, 5 Feb 2018 23:48:16 +0100 Subject: Adds referential integrity. --- bottleworld.go | 4 +- database.go | 35 ++++++++--------- main.go | 50 +++++++++++++++++++------ mcwhisky.go | 4 +- rumundco.go | 4 +- shops.go | 114 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ whic.go | 4 +- whiskyde.go | 4 +- whiskysitenl.go | 4 +- whiskyworld.go | 4 +- whiskyzone.go | 4 +- 11 files changed, 187 insertions(+), 44 deletions(-) create mode 100644 shops.go diff --git a/bottleworld.go b/bottleworld.go index 55ee47c..3a3c631 100644 --- a/bottleworld.go +++ b/bottleworld.go @@ -9,7 +9,7 @@ import ( "github.com/gocolly/colly" ) -func ScrapeBottleWord() []Angebot { +func ScrapeBottleWord(shop Shop) []Angebot { Whiskys := []Angebot{} c := colly.NewCollector( @@ -53,7 +53,7 @@ func ScrapeBottleWord() []Angebot { }) W.Image_url = e.ChildAttr("img", "src") - W.Shop = "bottleworld.de" + W.Shop = shop.Id W.Spirit_type = "Whisky" Whiskys = append(Whiskys, W) diff --git a/database.go b/database.go index d5d59de..a6145bb 100644 --- a/database.go +++ b/database.go @@ -6,30 +6,31 @@ import ( ) func (app *App) createTables() error { - query1 := `CREATE TABLE IF NOT EXISTS angebot ( + query1 := `CREATE TABLE IF NOT EXISTS shop( id INTEGER PRIMARY KEY AUTO_INCREMENT, - shop TEXT, - name TEXT, - url TEXT, - original_price INT, - discounted_price INT, - image_url TEXT, - spirit_type TEXT, - valid_until INT DEFAULT NULL, - created_at INT + name varchar(255) UNIQUE, + url varchar(255) UNIQUE, + logo_url text, + shipping_costs text, + free_shipping text )` _, err := app.DB.Exec(query1) if err != nil { return err } - query2 := `CREATE TABLE IF NOT EXISTS shop( + query2 := `CREATE TABLE IF NOT EXISTS angebot ( id INTEGER PRIMARY KEY AUTO_INCREMENT, - name text, - url text, - logo_url text, - shipping_costs text, - free_shipping text + shop int, + name TEXT, + url TEXT, + original_price INT, + discounted_price INT, + image_url TEXT, + spirit_type TEXT, + valid_until INT DEFAULT NULL, + created_at INT, + FOREIGN KEY(shop) REFERENCES shop(id) )` _, err = app.DB.Exec(query2) return err @@ -108,7 +109,7 @@ func (app *App) remove_expired(W []Angebot) error { func (app *App) offer_contains(W []Angebot, offer_db Angebot) bool { for _, v := range W { - if v.Shop == offer_db.Shop && v.Name == offer_db.Name && v.Url == offer_db.Url && v.Original_price == offer_db.Original_price && v.Discounted_price == offer_db.Discounted_price { + if v.Shop == offer_db.Shop && v.Name == offer_db.Name && v.Original_price == offer_db.Original_price && v.Discounted_price == offer_db.Discounted_price { if app.Config.Debug { log.Println("Contains: " + v.Name) diff --git a/main.go b/main.go index 3dc3fc0..5255e1c 100644 --- a/main.go +++ b/main.go @@ -25,7 +25,7 @@ type App struct { type Angebot struct { Id int Name string - Shop string + Shop int Url string Original_price int Discounted_price int @@ -35,6 +35,7 @@ type Angebot struct { } type Shop struct { + Id int Name string Url string Logo_url string @@ -72,7 +73,17 @@ func main() { log.Fatal(err) } - W := ScrapeHTML() + err = app.insertShops() + if err != nil { + log.Fatal(err) + } + + shops, err := app.getShops() + if err != nil { + log.Fatal(err) + } + + W := ScrapeHTML(shops) err = app.save_offer(W) if err != nil { @@ -98,15 +109,32 @@ func printName(W []Angebot, name string) { fmt.Println(string(output)) } -func ScrapeHTML() []Angebot { - W := ScrapeBottleWord() - W = append(W, ScrapeMCWhisky()...) - W = append(W, ScrapeRumundCo()...) - W = append(W, ScrapeWhic()...) - W = append(W, ScrapeWhiskyde()...) - W = append(W, ScrapeWhiskysitenl()...) - W = append(W, ScrapeWhiskyworld()...) - W = append(W, ScrapeWhiskyzone()...) +func ScrapeHTML(shops []Shop) []Angebot { + var W []Angebot + + for _, shop := range shops { + + switch shop.Name { + case "Bottleworld": + W = append(W, ScrapeBottleWord(shop)...) + case "MC Whisky": + W = append(W, ScrapeMCWhisky(shop)...) + case "Rum & Co": + W = append(W, ScrapeRumundCo(shop)...) + case "Whic": + W = append(W, ScrapeWhic(shop)...) + case "Whisky.de": + W = append(W, ScrapeWhiskyde(shop)...) + case "Whiskysite.nl": + W = append(W, ScrapeWhiskysitenl(shop)...) + case "Whisky World": + W = append(W, ScrapeWhiskyworld(shop)...) + case "Whiskyzone": + W = append(W, ScrapeWhiskyzone(shop)...) + default: + log.Println(shop.Name + ": No Crawler") + } + } return W } diff --git a/mcwhisky.go b/mcwhisky.go index b50b4e5..e45e740 100644 --- a/mcwhisky.go +++ b/mcwhisky.go @@ -8,7 +8,7 @@ import ( "github.com/gocolly/colly" ) -func ScrapeMCWhisky() []Angebot { +func ScrapeMCWhisky(shop Shop) []Angebot { Whiskys := []Angebot{} c := colly.NewCollector( @@ -42,7 +42,7 @@ func ScrapeMCWhisky() []Angebot { }) W.Image_url = e.ChildAttr("img", "src") - W.Shop = "MC Whisky" + W.Shop = shop.Id W.Spirit_type = "Whisky" Whiskys = append(Whiskys, W) diff --git a/rumundco.go b/rumundco.go index 3c1fb4e..ae349f3 100644 --- a/rumundco.go +++ b/rumundco.go @@ -9,7 +9,7 @@ import ( "github.com/gocolly/colly" ) -func ScrapeRumundCo() []Angebot { +func ScrapeRumundCo(shop Shop) []Angebot { Whiskys := []Angebot{} c := colly.NewCollector( @@ -50,7 +50,7 @@ func ScrapeRumundCo() []Angebot { }) W.Image_url = "https://www.rumundco.de/" + e.ChildAttr("img", "src") - W.Shop = "Rum & Co" + W.Shop = shop.Id W.Spirit_type = "Whisky" Whiskys = append(Whiskys, W) diff --git a/shops.go b/shops.go new file mode 100644 index 0000000..92b11cd --- /dev/null +++ b/shops.go @@ -0,0 +1,114 @@ +package main + +import ( + "log" +) + +func (app *App) insertShops() error { + shops := getShopsFromStruct() + + query := `INSERT IGNORE INTO shop (name, url, logo_url, shipping_costs, free_shipping) VALUES(?, ?, ?, ?, ?)` + + for _, v := range shops { + + _, err := app.DB.Exec(query, v.Name, v.Url, v.Logo_url, v.Shipping_costs, v.Free_shipping) + if err != nil { + return err + } + } + + return nil + +} + +func getShopsFromStruct() []Shop { + Shops := []Shop{} + + Shops = append(Shops, Shop{ + Name: "Bottleworld", + Url: "https://www.bottleword.de", + Logo_url: "", + Shipping_costs: 0, + Free_shipping: "", + }) + Shops = append(Shops, Shop{ + Name: "MC Whisky", + Url: "https://www.mcwhisky.com", + Logo_url: "", + Shipping_costs: 0, + Free_shipping: "", + }) + Shops = append(Shops, Shop{ + Name: "Rum & Co", + Url: "https://www.rumundco.de", + Logo_url: "", + Shipping_costs: 0, + Free_shipping: "", + }) + Shops = append(Shops, Shop{ + Name: "Whic", + Url: "https://whic.de", + Logo_url: "", + Shipping_costs: 0, + Free_shipping: "", + }) + Shops = append(Shops, Shop{ + Name: "Whisky.de", + Url: "https://www.whisky.de", + Logo_url: "", + Shipping_costs: 0, + Free_shipping: "", + }) + Shops = append(Shops, Shop{ + Name: "Whiskysite.nl", + Url: "https://www.whiskysite.nl", + Logo_url: "", + Shipping_costs: 0, + Free_shipping: "", + }) + Shops = append(Shops, Shop{ + Name: "Whisky World", + Url: "https://www.whiskyworld.de", + Logo_url: "", + Shipping_costs: 0, + Free_shipping: "", + }) + Shops = append(Shops, Shop{ + Name: "Whiskyzone", + Url: "https://www.whiskyzone.de", + Logo_url: "", + Shipping_costs: 0, + Free_shipping: "", + }) + + return Shops +} + +func (app *App) getShops() ([]Shop, error) { + + Shops := []Shop{} + + query := `SELECT id,name,url,logo_url,shipping_costs,free_shipping FROM shop` + + rows, err := app.DB.Queryx(query) + if err != nil { + return []Shop{}, err + } + defer rows.Close() + + for rows.Next() { + var shop Shop + err = rows.StructScan(&shop) + + if err != nil { + return []Shop{}, err + } + if app.Config.Debug { + log.Println("Appending: " + shop.Name) + } + + Shops = append(Shops, shop) + } + + return Shops, nil +} diff --git a/whic.go b/whic.go index fced386..896b1fb 100644 --- a/whic.go +++ b/whic.go @@ -8,7 +8,7 @@ import ( "github.com/gocolly/colly" ) -func ScrapeWhic() []Angebot { +func ScrapeWhic(shop Shop) []Angebot { Whiskys := []Angebot{} c := colly.NewCollector( @@ -52,7 +52,7 @@ func ScrapeWhic() []Angebot { } W.Image_url, _ = doc.Find("img").Attr("src") - W.Shop = "Whic" + W.Shop = shop.Id W.Spirit_type = "Whisky" Whiskys = append(Whiskys, W) diff --git a/whiskyde.go b/whiskyde.go index 5bf3248..657bfe0 100644 --- a/whiskyde.go +++ b/whiskyde.go @@ -7,7 +7,7 @@ import ( "github.com/gocolly/colly" ) -func ScrapeWhiskyde() []Angebot { +func ScrapeWhiskyde(shop Shop) []Angebot { Whiskys := []Angebot{} c := colly.NewCollector( @@ -46,7 +46,7 @@ func ScrapeWhiskyde() []Angebot { //W.Valid_until = e.ChildText(".article-price-special") }) - W.Shop = "Whisky.de" + W.Shop = shop.Id W.Spirit_type = "Whisky" Whiskys = append(Whiskys, W) diff --git a/whiskysitenl.go b/whiskysitenl.go index 3eefc06..c8b35a2 100644 --- a/whiskysitenl.go +++ b/whiskysitenl.go @@ -8,7 +8,7 @@ import ( "github.com/gocolly/colly" ) -func ScrapeWhiskysitenl() []Angebot { +func ScrapeWhiskysitenl(shop Shop) []Angebot { Whiskys := []Angebot{} c := colly.NewCollector( @@ -47,7 +47,7 @@ func ScrapeWhiskysitenl() []Angebot { W.Image_url = e.ChildAttr("img", "src") - W.Shop = "Whiskysite.nl" + W.Shop = shop.Id W.Spirit_type = "Whisky" Whiskys = append(Whiskys, W) diff --git a/whiskyworld.go b/whiskyworld.go index 65bbacd..e07c42f 100644 --- a/whiskyworld.go +++ b/whiskyworld.go @@ -7,7 +7,7 @@ import ( "github.com/gocolly/colly" ) -func ScrapeWhiskyworld() []Angebot { +func ScrapeWhiskyworld(shop Shop) []Angebot { Whiskys := []Angebot{} @@ -46,7 +46,7 @@ func ScrapeWhiskyworld() []Angebot { W.Image_url = "https:" + e.ChildAttr("img", "src") - W.Shop = "Whisky World" + W.Shop = shop.Id W.Spirit_type = "Whisky" Whiskys = append(Whiskys, W) diff --git a/whiskyzone.go b/whiskyzone.go index dc4a047..a9e73d0 100644 --- a/whiskyzone.go +++ b/whiskyzone.go @@ -7,7 +7,7 @@ import ( "github.com/gocolly/colly" ) -func ScrapeWhiskyzone() []Angebot { +func ScrapeWhiskyzone(shop Shop) []Angebot { Whiskys := []Angebot{} @@ -44,7 +44,7 @@ func ScrapeWhiskyzone() []Angebot { W.Image_url = e.ChildAttr("img", "src") }) - W.Shop = "Whiskyzone" + W.Shop = shop.Id W.Spirit_type = "Whisky" Whiskys = append(Whiskys, W) -- cgit v1.2.3