diff options
| author | Max | 2018-02-06 00:35:39 +0100 |
|---|---|---|
| committer | Max | 2018-02-06 00:35:39 +0100 |
| commit | 71950479fbd6088f249e5fda3b180f294d1d745d (patch) | |
| tree | 06f360a7e02b7e0011bda815fa102ec54ae8d0ec /crawler | |
| parent | 13a807854bf4d0258723ec3152b217ed4cf8e051 (diff) | |
| download | alkobote-71950479fbd6088f249e5fda3b180f294d1d745d.tar.gz | |
Moves crawler to designated directory.
Diffstat (limited to 'crawler')
| -rw-r--r-- | crawler/Makefile | 20 | ||||
| -rw-r--r-- | crawler/config.go | 76 | ||||
| -rw-r--r-- | crawler/database.go | 130 | ||||
| -rw-r--r-- | crawler/main.go | 140 | ||||
| -rw-r--r-- | crawler/sanitize_name.go | 13 | ||||
| -rw-r--r-- | crawler/sanitize_price.go | 103 | ||||
| -rw-r--r-- | crawler/shop_bottleworld.go | 65 | ||||
| -rw-r--r-- | crawler/shop_mcwhisky.go | 54 | ||||
| -rw-r--r-- | crawler/shop_rumundco.go | 62 | ||||
| -rw-r--r-- | crawler/shop_whic.go | 64 | ||||
| -rw-r--r-- | crawler/shop_whiskyde.go | 58 | ||||
| -rw-r--r-- | crawler/shop_whiskysitenl.go | 59 | ||||
| -rw-r--r-- | crawler/shop_whiskyworld.go | 60 | ||||
| -rw-r--r-- | crawler/shop_whiskyzone.go | 56 | ||||
| -rw-r--r-- | crawler/shops.go | 114 |
15 files changed, 1074 insertions, 0 deletions
diff --git a/crawler/Makefile b/crawler/Makefile new file mode 100644 index 0000000..7126e13 --- /dev/null +++ b/crawler/Makefile @@ -0,0 +1,20 @@ +BINARY := $(notdir $(patsubst %/,%,$(dir $(abspath $(lastword $(MAKEFILE_LIST)))))) +MAINFILE := +SOURCEDIR := . +SOURCES := $(shell find $(SOURCEDIR) -name '*.go') + +all: build run + +build: $(BINARY) + +$(BINARY): $(SOURCES) + go build -o $(BINARY) + +run: build + ./$(BINARY) + +clean: + $(RM) $(RMFLAGS) $(BINARY) + +edit: + $(EDITOR) $(MAINFILE) *.go diff --git a/crawler/config.go b/crawler/config.go new file mode 100644 index 0000000..2706201 --- /dev/null +++ b/crawler/config.go @@ -0,0 +1,76 @@ +package main + +import ( + log "github.com/Sirupsen/logrus" + "github.com/spf13/viper" +) + +type Config struct { + DBDriver string + DBDBName string + DBHost string + DBPort string + DBUser string + DBPassword string + DBOptions string + DBPath string // for sqlite + + Debug bool +} + +// Parses the configuration and sets the configuration struct. +func (c *Config) parseConfig(configFile string) { + + viper.SetDefault("DBDriver", "mysql") + viper.SetDefault("DBDBName", "alkobote") + viper.SetDefault("DBHost", "localhost") + viper.SetDefault("DBPort", "3306") + + viper.SetDefault("DBPath", "./alkobote.db") + + viper.SetDefault("Debug", false) + + // Name of the configuration file + viper.SetConfigName("config") + + // Where to find the config file + if configFile == "" { + viper.AddConfigPath("/etc/alkobote.de/") + viper.AddConfigPath(".") + viper.AddConfigPath("$HOME/.config/alkobote.de/") + viper.AddConfigPath("$HOME/alkobote.de/") + } else { + viper.AddConfigPath(configFile) + } + + // Env variables need to be prefixed with "ALKOBOTE_" + viper.SetEnvPrefix("ALKOBOTE") + + // Parses automatic the matching env variables + viper.AutomaticEnv() + + // Reads the config + err := viper.ReadInConfig() + if err != nil { + log.WithFields( + log.Fields{ + "error": err.Error(), + }, + ).Fatal("Fatal error config file") + } + + c.setsConfig() +} + +// Actually sets the config struct +func (c *Config) setsConfig() { + c.DBDriver = viper.GetString("DBDriver") + c.DBHost = viper.GetString("DBHost") + c.DBPort = viper.GetString("DBPort") + c.DBUser = viper.GetString("DBUser") + c.DBPassword = viper.GetString("DBPassword") + c.DBDBName = viper.GetString("DBDBName") + c.DBOptions = viper.GetString("DBOptions") + c.DBPath = viper.GetString("DBPath") + c.Debug = viper.GetBool("Debug") +} diff --git a/crawler/database.go b/crawler/database.go new file mode 100644 index 0000000..a6145bb --- /dev/null +++ b/crawler/database.go @@ -0,0 +1,130 @@ +package main + +import ( + "database/sql" + "log" +) + +func (app *App) createTables() error { + query1 := `CREATE TABLE IF NOT EXISTS shop( + id INTEGER PRIMARY KEY AUTO_INCREMENT, + name varchar(255) UNIQUE, + url varchar(255) UNIQUE, + logo_url text, + shipping_costs text, + free_shipping text + )` + _, err := app.DB.Exec(query1) + if err != nil { + return err + } + + query2 := `CREATE TABLE IF NOT EXISTS angebot ( + id INTEGER PRIMARY KEY AUTO_INCREMENT, + shop int, + name TEXT, + url TEXT, + original_price INT, + discounted_price INT, + image_url TEXT, + spirit_type TEXT, + valid_until INT DEFAULT NULL, + created_at INT, + FOREIGN KEY(shop) REFERENCES shop(id) + )` + _, err = app.DB.Exec(query2) + return err +} + +func (app *App) save_offer(W []Angebot) error { + + query := `INSERT INTO angebot (shop, name, url, original_price, discounted_price, valid_until, image_url, spirit_type, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)` + + stmt, err := app.DB.Prepare(query) + if err != nil { + return err + } + defer stmt.Close() + + for _, o := range W { + + var found int + + if o.Discounted_price == 0 || o.Original_price == 0 { + continue + } + + err := app.DB.QueryRow("SELECT 1 FROM angebot WHERE shop = ? AND name = ? AND url = ? AND original_price = ? AND discounted_price = ? AND image_url = ? AND spirit_type = ?", o.Shop, o.Name, o.Url, o.Original_price, o.Discounted_price, o.Image_url, o.Spirit_type).Scan(&found) + + /* + */ + + if err == sql.ErrNoRows { + + if 0 == o.Valid_until { + _, err = stmt.Exec(o.Shop, o.Name, o.Url, o.Original_price, o.Discounted_price, sql.NullInt64{}, o.Image_url, o.Spirit_type, app.Now) + } else { + _, err = stmt.Exec(o.Shop, o.Name, o.Url, o.Original_price, o.Discounted_price, o.Valid_until, o.Image_url, o.Spirit_type, app.Now) + } + if err != nil { + return err + } + + } + } + + //return app.remove_expired(W) + return nil +} + +func (app *App) remove_expired(W []Angebot) error { + + query := `SELECT id, shop, name, url, original_price, discounted_price FROM angebot WHERE created_at < ? AND valid_until IS NULL` + + rows, err := app.DB.Queryx(query, app.Now) + if err != nil { + return err + } + defer rows.Close() + + for rows.Next() { + var offer_db Angebot + err = rows.StructScan(&offer_db) + + if err != nil { + return err + } + + if !app.offer_contains(W, offer_db) { + expire_query := `UPDATE angebot SET valid_until = ? WHERE id = ?` + _, err = app.DB.Exec(expire_query, app.Now, offer_db.Id) + if err != nil { + return err + } + } + } + + return nil +} + +func (app *App) offer_contains(W []Angebot, offer_db Angebot) bool { + for _, v := range W { + if v.Shop == offer_db.Shop && v.Name == offer_db.Name && v.Original_price == offer_db.Original_price && v.Discounted_price == offer_db.Discounted_price { + + if app.Config.Debug { + log.Println("Contains: " + v.Name) + log.Println("") + } + + return true + + } + } + + if app.Config.Debug { + log.Println("Contains not: " + offer_db.Name) + log.Println("") + } + + return false +} diff --git a/crawler/main.go b/crawler/main.go new file mode 100644 index 0000000..5255e1c --- /dev/null +++ b/crawler/main.go @@ -0,0 +1,140 @@ +package main + +import ( + "encoding/json" + "fmt" + "log" + "time" + + _ "database/sql" + _ "github.com/go-sql-driver/mysql" + //_ "github.com/mattn/go-sqlite3" + + "github.com/jmoiron/sqlx" +) + +type App struct { + Offers []Angebot + Shops []Shop + Config *Config + DB *sqlx.DB + Now int64 + Debug bool +} + +type Angebot struct { + Id int + Name string + Shop int + Url string + Original_price int + Discounted_price int + Image_url string + Spirit_type string + Valid_until int +} + +type Shop struct { + Id int + Name string + Url string + Logo_url string + Shipping_costs int + Free_shipping string +} + +func main() { + + var err error + + app := App{Config: &Config{}} + app.Config.parseConfig("") + + app.Now = time.Now().Unix() + + if "sqlite3" == app.Config.DBDriver { + //app.DB, err = sqlx.Connect(app.Config.DBDriver, app.Config.DBPath) + app.DB, err = sqlx.Connect(app.Config.DBDriver, app.Config.DBPath) + } else { + + if app.Config.Debug { + log.Println(app.Config.DBUser + ":" + app.Config.DBPassword + "@tcp(" + app.Config.DBHost + ":" + app.Config.DBPort + ")/" + app.Config.DBDBName + app.Config.DBOptions) + } + app.DB, err = sqlx.Connect(app.Config.DBDriver, app.Config.DBUser+":"+app.Config.DBPassword+"@tcp("+app.Config.DBHost+":"+app.Config.DBPort+")/"+app.Config.DBDBName+app.Config.DBOptions) + } + defer app.DB.Close() + + if err != nil { + log.Fatal(err) + } + + err = app.createTables() + if err != nil { + log.Fatal(err) + } + + err = app.insertShops() + if err != nil { + log.Fatal(err) + } + + shops, err := app.getShops() + if err != nil { + log.Fatal(err) + } + + W := ScrapeHTML(shops) + + err = app.save_offer(W) + if err != nil { + log.Fatal(err) + } + err = app.remove_expired(W) + if err != nil { + log.Fatal(err) + } +} + +func printName(W []Angebot, name string) { + return + fmt.Println("-------------------") + fmt.Println("Sonderangebote von " + name) + fmt.Println("-------------------") + + output, err := json.MarshalIndent(W, "", " ") + if err != nil { + log.Fatal(err) + } + + fmt.Println(string(output)) +} + +func ScrapeHTML(shops []Shop) []Angebot { + var W []Angebot + + for _, shop := range shops { + + switch shop.Name { + case "Bottleworld": + W = append(W, ScrapeBottleWord(shop)...) + case "MC Whisky": + W = append(W, ScrapeMCWhisky(shop)...) + case "Rum & Co": + W = append(W, ScrapeRumundCo(shop)...) + case "Whic": + W = append(W, ScrapeWhic(shop)...) + case "Whisky.de": + W = append(W, ScrapeWhiskyde(shop)...) + case "Whiskysite.nl": + W = append(W, ScrapeWhiskysitenl(shop)...) + case "Whisky World": + W = append(W, ScrapeWhiskyworld(shop)...) + case "Whiskyzone": + W = append(W, ScrapeWhiskyzone(shop)...) + default: + log.Println(shop.Name + ": No Crawler") + } + } + + return W +} diff --git a/crawler/sanitize_name.go b/crawler/sanitize_name.go new file mode 100644 index 0000000..73b2714 --- /dev/null +++ b/crawler/sanitize_name.go @@ -0,0 +1,13 @@ +package main + +import ( + "strings" +) + +func sanitize_name(name string) string { + if strings.Contains(name, "y.o.") { + name = strings.Replace(name, "y.o.", "Jahre", 1) + } + + return name +} diff --git a/crawler/sanitize_price.go b/crawler/sanitize_price.go new file mode 100644 index 0000000..2052842 --- /dev/null +++ b/crawler/sanitize_price.go @@ -0,0 +1,103 @@ +package main + +import ( + "errors" + "strconv" + "strings" +) + +func sanitize_price(price string) (int, error) { + if "" == price { + return 0, errors.New("Empty string") + } + + multiply_by_10 := false + multiply_by_100 := true + + price = strings.TrimSpace(price) + + price = strings.TrimPrefix(price, "€") + price = strings.TrimSpace(price) + + price = strings.TrimSuffix(price, "€") + price = strings.TrimSpace(price) + + price = strings.TrimSuffix(strings.ToLower(price), "eur") + price = strings.TrimSpace(price) + + price = strings.TrimSuffix(strings.ToLower(price), "euro") + price = strings.TrimSpace(price) + + if len(price) < 2 { + price = "0" + price + } else if len(price) < 3 { + price = "00" + price + } + + c := string(price[len(price)-2:]) + c = string(c[0:1]) + + /* + Extracts the second last char and checks if it's a "." or a ",". + */ + if "," == c { + if strings.Count(price, ",") > 1 { + return 0, errors.New("Invalid format") + } + + multiply_by_10 = true + multiply_by_100 = false + + } else if "." == c { + if strings.Count(price, ".") > 1 { + return 0, errors.New("Invalid format") + } + + multiply_by_10 = true + multiply_by_100 = false + + } + + c = string(price[len(price)-3:]) + c = string(c[0:1]) + + /* + Extracts the third last char and checks if it's a "." or a ",". + */ + if "," == c { + if strings.Count(price, ",") > 1 { + return 0, errors.New("Invalid format") + } + + multiply_by_10 = false + multiply_by_100 = false + + } else if "." == c { + if strings.Count(price, ".") > 1 { + return 0, errors.New("Invalid format") + } + + multiply_by_10 = false + multiply_by_100 = false + + } + + price = strings.Replace(price, ",", "", -1) + price = strings.Replace(price, ".", "", -1) + + /* + Casts the price to integer in cents (not euro!). + */ + price_int, err := strconv.Atoi(price) + if err != nil { + return 0, err + } + + if multiply_by_10 { + price_int = price_int * 10 + } else if multiply_by_100 { + price_int = price_int * 100 + } + + return price_int, nil +} diff --git a/crawler/shop_bottleworld.go b/crawler/shop_bottleworld.go new file mode 100644 index 0000000..3a3c631 --- /dev/null +++ b/crawler/shop_bottleworld.go @@ -0,0 +1,65 @@ +package main + +import ( + "log" + "regexp" + // "strings" + + // "github.com/PuerkitoBio/goquery" + "github.com/gocolly/colly" +) + +func ScrapeBottleWord(shop Shop) []Angebot { + Whiskys := []Angebot{} + + c := colly.NewCollector( + colly.AllowedDomains("bottleworld.de"), + colly.AllowedDomains("www.bottleworld.de"), + ) + + c.OnHTML("li.item", func(e *colly.HTMLElement) { + W := Angebot{} + + whisky_name := e.ChildText("h2 > a") + + matched, err := regexp.MatchString("Whiske?y", whisky_name) + if err != nil { + log.Fatal(err) + } + if !matched { + //W.Spirit_type = "Anderes" + return + } else { + W.Spirit_type = "Whisky" + } + + whisky_url := e.ChildAttr("a", "href") + W.Name = whisky_name + W.Url = whisky_url + + e.ForEach(".price-box", func(i int, e *colly.HTMLElement) { + e.ForEach(".old-price", func(i int, e *colly.HTMLElement) { + W.Original_price, err = sanitize_price(e.ChildText(".price")) + if err != nil { + log.Fatal(err) + } + }) + e.ForEach(".special-price", func(i int, e *colly.HTMLElement) { + W.Discounted_price, err = sanitize_price(e.ChildText(".price")) + if err != nil { + log.Fatal(err) + } + }) + }) + W.Image_url = e.ChildAttr("img", "src") + + W.Shop = shop.Id + W.Spirit_type = "Whisky" + + Whiskys = append(Whiskys, W) + }) + + c.Visit("https://www.bottleworld.de/aktuelle-sonderpreise/show/all") + + return Whiskys +} diff --git a/crawler/shop_mcwhisky.go b/crawler/shop_mcwhisky.go new file mode 100644 index 0000000..e45e740 --- /dev/null +++ b/crawler/shop_mcwhisky.go @@ -0,0 +1,54 @@ +package main + +import ( + "log" + // "strings" + + // "github.com/PuerkitoBio/goquery" + "github.com/gocolly/colly" +) + +func ScrapeMCWhisky(shop Shop) []Angebot { + Whiskys := []Angebot{} + + c := colly.NewCollector( + colly.AllowedDomains("mcwhisky.com"), + colly.AllowedDomains("www.mcwhisky.com"), + ) + + c.OnHTML("li.item", func(e *colly.HTMLElement) { + W := Angebot{} + + whisky_name := sanitize_name(e.ChildAttr("a", "title")) + whisky_url := e.ChildAttr("a", "href") + W.Name = whisky_name + W.Url = whisky_url + + var err error + + e.ForEach(".price-box", func(i int, e *colly.HTMLElement) { + e.ForEach(".old-price", func(i int, e *colly.HTMLElement) { + W.Original_price, err = sanitize_price(e.ChildText(".price")) + if err != nil { + log.Fatal(err) + } + }) + e.ForEach(".special-price", func(i int, e *colly.HTMLElement) { + W.Discounted_price, err = sanitize_price(e.ChildText(".price")) + if err != nil { + log.Fatal(err) + } + }) + }) + W.Image_url = e.ChildAttr("img", "src") + + W.Shop = shop.Id + W.Spirit_type = "Whisky" + + Whiskys = append(Whiskys, W) + }) + + c.Visit("https://www.mcwhisky.com/whisky/whisky-sonderangebote.html") + + return Whiskys +} diff --git a/crawler/shop_rumundco.go b/crawler/shop_rumundco.go new file mode 100644 index 0000000..ae349f3 --- /dev/null +++ b/crawler/shop_rumundco.go @@ -0,0 +1,62 @@ +package main + +import ( + "log" + "regexp" + "strings" + + // "github.com/PuerkitoBio/goquery" + "github.com/gocolly/colly" +) + +func ScrapeRumundCo(shop Shop) []Angebot { + Whiskys := []Angebot{} + + c := colly.NewCollector( + colly.AllowedDomains("rumundco.de"), + colly.AllowedDomains("www.rumundco.de"), + ) + + c.OnHTML(".product-teaser", func(e *colly.HTMLElement) { + W := Angebot{} + + whisky_name := strings.TrimPrefix(e.ChildAttr("img", "alt"), "Restposten: ") + whisky_url := "https://www.rumundco.de/" + e.ChildAttr("a", "href") + + matched, err := regexp.MatchString("verfügbar", e.ChildText(".delivery-status")) + if err != nil { + log.Fatal(err) + } + if !matched { + return + } + + W.Name = whisky_name + W.Url = whisky_url + + e.ForEach(".price_wrapper", func(i int, e *colly.HTMLElement) { + regular_price := e.ChildText("del.value") + if "" == regular_price { + return + } + W.Original_price, err = sanitize_price(regular_price) + if err != nil { + log.Fatal(err) + } + W.Discounted_price, err = sanitize_price(e.ChildText(".price-value")) + if err != nil { + log.Fatal(err) + } + }) + W.Image_url = "https://www.rumundco.de/" + e.ChildAttr("img", "src") + + W.Shop = shop.Id + W.Spirit_type = "Whisky" + + Whiskys = append(Whiskys, W) + }) + + c.Visit("https://www.rumundco.de/navi.php?q=4&kf=29&kk-suesse-von=0&kk-suesse-bis=100&kk-milde-von=0&kk-milde-bis=100&kk-wuerze-von=0&kk-wuerze-bis=100&kk-frucht-von=0&kk-frucht-bis=100&kk-torf-von=0&kk-torf-bis=100&hf=0&af=90&Sortierung=11&a=350") + + return Whiskys +} diff --git a/crawler/shop_whic.go b/crawler/shop_whic.go new file mode 100644 index 0000000..896b1fb --- /dev/null +++ b/crawler/shop_whic.go @@ -0,0 +1,64 @@ +package main + +import ( + "log" + "strings" + + "github.com/PuerkitoBio/goquery" + "github.com/gocolly/colly" +) + +func ScrapeWhic(shop Shop) []Angebot { + Whiskys := []Angebot{} + + c := colly.NewCollector( + colly.AllowedDomains("whic.de"), + ) + + c.OnHTML("li.item", func(e *colly.HTMLElement) { + W := Angebot{} + + whisky_name := e.ChildAttr("a", "title") + whisky_url := e.ChildAttr("a", "href") + + W.Name = whisky_name + W.Url = whisky_url + + var err error + + e.ForEach(".price-box", func(i int, e *colly.HTMLElement) { + e.ForEach(".old-price", func(i int, e *colly.HTMLElement) { + W.Original_price, err = sanitize_price(e.ChildText(".price")) + if err != nil { + log.Fatal(err) + } + }) + e.ForEach(".special-price", func(i int, e *colly.HTMLElement) { + W.Discounted_price, err = sanitize_price(e.ChildText(".price")) + if err != nil { + log.Fatal(err) + } + }) + }) + + /* + * colly does not parse a <noscript>, thus we are reading the content and parse it as html. + */ + img_link_noisy := e.ChildText(".product-image") + + doc, err := goquery.NewDocumentFromReader(strings.NewReader(img_link_noisy)) + if err != nil { + log.Fatal(err) + } + W.Image_url, _ = doc.Find("img").Attr("src") + + W.Shop = shop.Id + W.Spirit_type = "Whisky" + + Whiskys = append(Whiskys, W) + }) + + c.Visit("https://whic.de/angebote") + + return Whiskys +} diff --git a/crawler/shop_whiskyde.go b/crawler/shop_whiskyde.go new file mode 100644 index 0000000..657bfe0 --- /dev/null +++ b/crawler/shop_whiskyde.go @@ -0,0 +1,58 @@ +package main + +import ( + "log" + "strings" + + "github.com/gocolly/colly" +) + +func ScrapeWhiskyde(shop Shop) []Angebot { + Whiskys := []Angebot{} + + c := colly.NewCollector( + colly.AllowedDomains("whisky.de"), + colly.AllowedDomains("www.whisky.de"), + ) + + c.OnHTML(".is-buyable", func(e *colly.HTMLElement) { + + W := Angebot{} + whisky_name := e.ChildAttr("a", "title") + W.Name = whisky_name + whisky_url := strings.Replace(e.ChildAttr("a", "href"), "?&searchorigin=2", "", 1) + W.Url = whisky_url + + var err error + + e.ForEach(".article-price-original", func(i int, e *colly.HTMLElement) { + W.Original_price, err = sanitize_price(e.ChildText("del")) + if err != nil { + log.Fatal(err) + } + }) + e.ForEach(".article-price", func(i int, e *colly.HTMLElement) { + W.Discounted_price, err = sanitize_price(e.ChildText(".article-price-default")) + if err != nil { + log.Fatal(err) + } + }) + + e.ForEach(".article-thumbnail", func(i int, e *colly.HTMLElement) { + W.Image_url = e.ChildAttr("img", "data-src") + }) + + e.ForEach(".article-price-prefix", func(i int, e *colly.HTMLElement) { + //W.Valid_until = e.ChildText(".article-price-special") + }) + + W.Shop = shop.Id + W.Spirit_type = "Whisky" + + Whiskys = append(Whiskys, W) + }) + + c.Visit("https://www.whisky.de/shop/Aktuell/Sonderangebote/") + + return Whiskys +} diff --git a/crawler/shop_whiskysitenl.go b/crawler/shop_whiskysitenl.go new file mode 100644 index 0000000..c8b35a2 --- /dev/null +++ b/crawler/shop_whiskysitenl.go @@ -0,0 +1,59 @@ +package main + +import ( + "log" + "regexp" + "strings" + + "github.com/gocolly/colly" +) + +func ScrapeWhiskysitenl(shop Shop) []Angebot { + Whiskys := []Angebot{} + + c := colly.NewCollector( + colly.AllowedDomains("whiskysite.nl"), + colly.AllowedDomains("www.whiskysite.nl"), + ) + + c.OnHTML(".product-block", func(e *colly.HTMLElement) { + + W := Angebot{} + + whisky_name := e.ChildAttr("img", "alt") + whisky_url := e.ChildAttr("a", "href") + + W.Name = whisky_name + W.Url = whisky_url + + regular_price := e.ChildText(".price-old") + price_discount_noisy := e.ChildText(".product-block-price") + r, err := regexp.Compile("[0-9]+(,[0-9]{1,2})") + if err != nil { + log.Fatal(err) + } + discounted_price := r.FindString(strings.Trim(strings.TrimPrefix(price_discount_noisy, regular_price), "")) + + W.Original_price, err = sanitize_price(regular_price) + if err != nil { + //log.Println(W.Name, err) + return + } + W.Discounted_price, err = sanitize_price(discounted_price) + if err != nil { + //log.Println(W.Name, err) + return + } + + W.Image_url = e.ChildAttr("img", "src") + + W.Shop = shop.Id + W.Spirit_type = "Whisky" + + Whiskys = append(Whiskys, W) + }) + + c.Visit("https://www.whiskysite.nl/en/specials/?limit=100") + + return Whiskys +} diff --git a/crawler/shop_whiskyworld.go b/crawler/shop_whiskyworld.go new file mode 100644 index 0000000..e07c42f --- /dev/null +++ b/crawler/shop_whiskyworld.go @@ -0,0 +1,60 @@ +package main + +import ( + "log" + "strings" + + "github.com/gocolly/colly" +) + +func ScrapeWhiskyworld(shop Shop) []Angebot { + + Whiskys := []Angebot{} + + c := colly.NewCollector( + colly.AllowedDomains("whiskyworld.de"), + colly.AllowedDomains("www.whiskyworld.de"), + ) + + c.OnHTML(".product-item", func(e *colly.HTMLElement) { + + W := Angebot{} + + whisky_name_part1 := e.ChildText("h3") + whisky_name_part2 := e.ChildText(".item-description") + + W.Name = whisky_name_part1 + " " + whisky_name_part2 + + W.Url = "https://www.whiskyworld.de/" + strings.TrimPrefix(e.ChildAttr("a", "href"), "../") + + regular_price_noisy := e.ChildText(".offer") + regular_price := strings.TrimSuffix(strings.TrimPrefix(regular_price_noisy, "statt "), " €*") + + var err error + + W.Original_price, err = sanitize_price(regular_price) + if err != nil { + log.Fatal(err) + return + } + + W.Discounted_price, err = sanitize_price(e.ChildText(".uvp")) + if err != nil { + log.Fatal(err) + return + } + + W.Image_url = "https:" + e.ChildAttr("img", "src") + + W.Shop = shop.Id + W.Spirit_type = "Whisky" + + Whiskys = append(Whiskys, W) + }) + + c.Visit("https://www.whiskyworld.de/themen/sonderangebote?ft=%257B%2522produkt_kategorie%2522:%2522Blended%2BMalt%2522%257D") + c.Visit("https://www.whiskyworld.de/themen/sonderangebote?ft=%257B%2522produkt_kategorie%2522:%2522Blended%2BWhiskies%2522%257D") + c.Visit("https://www.whiskyworld.de/themen/sonderangebote?ft=%257B%2522produkt_kategorie%2522:%2522Single%2BMalt%2522%257D") + + return Whiskys +} diff --git a/crawler/shop_whiskyzone.go b/crawler/shop_whiskyzone.go new file mode 100644 index 0000000..a9e73d0 --- /dev/null +++ b/crawler/shop_whiskyzone.go @@ -0,0 +1,56 @@ +package main + +import ( + "log" + "regexp" + + "github.com/gocolly/colly" +) + +func ScrapeWhiskyzone(shop Shop) []Angebot { + + Whiskys := []Angebot{} + + c := colly.NewCollector( + colly.AllowedDomains("whiskyzone.de"), + colly.AllowedDomains("www.whiskyzone.de"), + ) + + c.OnHTML(".product--info", func(e *colly.HTMLElement) { + + W := Angebot{} + + W.Name = e.ChildAttr("a", "title") + W.Url = e.ChildAttr("a", "href") + price_discount_noisy := e.ChildText(".price--default") + price_regular_noisy := e.ChildText(".price--discount") + + r, err := regexp.Compile("[0-9]+(,[0-9]{1,2})") + if err != nil { + log.Fatal(err) + } + W.Discounted_price, err = sanitize_price(r.FindString(price_discount_noisy)) + if err != nil { + log.Fatal(err) + return + } + W.Original_price, err = sanitize_price(r.FindString(price_regular_noisy)) + if err != nil { + log.Fatal(err) + return + } + + e.ForEach(".image--media", func(i int, e *colly.HTMLElement) { + W.Image_url = e.ChildAttr("img", "src") + }) + + W.Shop = shop.Id + W.Spirit_type = "Whisky" + + Whiskys = append(Whiskys, W) + }) + + c.Visit("https://www.whiskyzone.de/widgets/emotion/index/emotionId/248/controllerName/listing") + + return Whiskys +} diff --git a/crawler/shops.go b/crawler/shops.go new file mode 100644 index 0000000..92b11cd --- /dev/null +++ b/crawler/shops.go @@ -0,0 +1,114 @@ +package main + +import ( + "log" +) + +func (app *App) insertShops() error { + shops := getShopsFromStruct() + + query := `INSERT IGNORE INTO shop (name, url, logo_url, shipping_costs, free_shipping) VALUES(?, ?, ?, ?, ?)` + + for _, v := range shops { + + _, err := app.DB.Exec(query, v.Name, v.Url, v.Logo_url, v.Shipping_costs, v.Free_shipping) + if err != nil { + return err + } + } + + return nil + +} + +func getShopsFromStruct() []Shop { + Shops := []Shop{} + + Shops = append(Shops, Shop{ + Name: "Bottleworld", + Url: "https://www.bottleword.de", + Logo_url: "", + Shipping_costs: 0, + Free_shipping: "", + }) + Shops = append(Shops, Shop{ + Name: "MC Whisky", + Url: "https://www.mcwhisky.com", + Logo_url: "", + Shipping_costs: 0, + Free_shipping: "", + }) + Shops = append(Shops, Shop{ + Name: "Rum & Co", + Url: "https://www.rumundco.de", + Logo_url: "", + Shipping_costs: 0, + Free_shipping: "", + }) + Shops = append(Shops, Shop{ + Name: "Whic", + Url: "https://whic.de", + Logo_url: "", + Shipping_costs: 0, + Free_shipping: "", + }) + Shops = append(Shops, Shop{ + Name: "Whisky.de", + Url: "https://www.whisky.de", + Logo_url: "", + Shipping_costs: 0, + Free_shipping: "", + }) + Shops = append(Shops, Shop{ + Name: "Whiskysite.nl", + Url: "https://www.whiskysite.nl", + Logo_url: "", + Shipping_costs: 0, + Free_shipping: "", + }) + Shops = append(Shops, Shop{ + Name: "Whisky World", + Url: "https://www.whiskyworld.de", + Logo_url: "", + Shipping_costs: 0, + Free_shipping: "", + }) + Shops = append(Shops, Shop{ + Name: "Whiskyzone", + Url: "https://www.whiskyzone.de", + Logo_url: "", + Shipping_costs: 0, + Free_shipping: "", + }) + + return Shops +} + +func (app *App) getShops() ([]Shop, error) { + + Shops := []Shop{} + + query := `SELECT id,name,url,logo_url,shipping_costs,free_shipping FROM shop` + + rows, err := app.DB.Queryx(query) + if err != nil { + return []Shop{}, err + } + defer rows.Close() + + for rows.Next() { + var shop Shop + err = rows.StructScan(&shop) + + if err != nil { + return []Shop{}, err + } + if app.Config.Debug { + log.Println("Appending: " + shop.Name) + } + + Shops = append(Shops, shop) + } + + return Shops, nil +} |
