summaryrefslogtreecommitdiff
path: root/crawler
diff options
context:
space:
mode:
authorMax2018-02-06 00:35:39 +0100
committerMax2018-02-06 00:35:39 +0100
commit71950479fbd6088f249e5fda3b180f294d1d745d (patch)
tree06f360a7e02b7e0011bda815fa102ec54ae8d0ec /crawler
parent13a807854bf4d0258723ec3152b217ed4cf8e051 (diff)
downloadalkobote-71950479fbd6088f249e5fda3b180f294d1d745d.tar.gz
Moves crawler to designated directory.
Diffstat (limited to 'crawler')
-rw-r--r--crawler/Makefile20
-rw-r--r--crawler/config.go76
-rw-r--r--crawler/database.go130
-rw-r--r--crawler/main.go140
-rw-r--r--crawler/sanitize_name.go13
-rw-r--r--crawler/sanitize_price.go103
-rw-r--r--crawler/shop_bottleworld.go65
-rw-r--r--crawler/shop_mcwhisky.go54
-rw-r--r--crawler/shop_rumundco.go62
-rw-r--r--crawler/shop_whic.go64
-rw-r--r--crawler/shop_whiskyde.go58
-rw-r--r--crawler/shop_whiskysitenl.go59
-rw-r--r--crawler/shop_whiskyworld.go60
-rw-r--r--crawler/shop_whiskyzone.go56
-rw-r--r--crawler/shops.go114
15 files changed, 1074 insertions, 0 deletions
diff --git a/crawler/Makefile b/crawler/Makefile
new file mode 100644
index 0000000..7126e13
--- /dev/null
+++ b/crawler/Makefile
@@ -0,0 +1,20 @@
+BINARY := $(notdir $(patsubst %/,%,$(dir $(abspath $(lastword $(MAKEFILE_LIST))))))
+MAINFILE :=
+SOURCEDIR := .
+SOURCES := $(shell find $(SOURCEDIR) -name '*.go')
+
+all: build run
+
+build: $(BINARY)
+
+$(BINARY): $(SOURCES)
+ go build -o $(BINARY)
+
+run: build
+ ./$(BINARY)
+
+clean:
+ $(RM) $(RMFLAGS) $(BINARY)
+
+edit:
+ $(EDITOR) $(MAINFILE) *.go
diff --git a/crawler/config.go b/crawler/config.go
new file mode 100644
index 0000000..2706201
--- /dev/null
+++ b/crawler/config.go
@@ -0,0 +1,76 @@
+package main
+
+import (
+ log "github.com/Sirupsen/logrus"
+ "github.com/spf13/viper"
+)
+
+type Config struct {
+ DBDriver string
+ DBDBName string
+ DBHost string
+ DBPort string
+ DBUser string
+ DBPassword string
+ DBOptions string
+ DBPath string // for sqlite
+
+ Debug bool
+}
+
+// Parses the configuration and sets the configuration struct.
+func (c *Config) parseConfig(configFile string) {
+
+ viper.SetDefault("DBDriver", "mysql")
+ viper.SetDefault("DBDBName", "alkobote")
+ viper.SetDefault("DBHost", "localhost")
+ viper.SetDefault("DBPort", "3306")
+
+ viper.SetDefault("DBPath", "./alkobote.db")
+
+ viper.SetDefault("Debug", false)
+
+ // Name of the configuration file
+ viper.SetConfigName("config")
+
+ // Where to find the config file
+ if configFile == "" {
+ viper.AddConfigPath("/etc/alkobote.de/")
+ viper.AddConfigPath(".")
+ viper.AddConfigPath("$HOME/.config/alkobote.de/")
+ viper.AddConfigPath("$HOME/alkobote.de/")
+ } else {
+ viper.AddConfigPath(configFile)
+ }
+
+ // Env variables need to be prefixed with "ALKOBOTE_"
+ viper.SetEnvPrefix("ALKOBOTE")
+
+ // Parses automatic the matching env variables
+ viper.AutomaticEnv()
+
+ // Reads the config
+ err := viper.ReadInConfig()
+ if err != nil {
+ log.WithFields(
+ log.Fields{
+ "error": err.Error(),
+ },
+ ).Fatal("Fatal error config file")
+ }
+
+ c.setsConfig()
+}
+
+// Actually sets the config struct
+func (c *Config) setsConfig() {
+ c.DBDriver = viper.GetString("DBDriver")
+ c.DBHost = viper.GetString("DBHost")
+ c.DBPort = viper.GetString("DBPort")
+ c.DBUser = viper.GetString("DBUser")
+ c.DBPassword = viper.GetString("DBPassword")
+ c.DBDBName = viper.GetString("DBDBName")
+ c.DBOptions = viper.GetString("DBOptions")
+ c.DBPath = viper.GetString("DBPath")
+ c.Debug = viper.GetBool("Debug")
+}
diff --git a/crawler/database.go b/crawler/database.go
new file mode 100644
index 0000000..a6145bb
--- /dev/null
+++ b/crawler/database.go
@@ -0,0 +1,130 @@
+package main
+
+import (
+ "database/sql"
+ "log"
+)
+
+func (app *App) createTables() error {
+ query1 := `CREATE TABLE IF NOT EXISTS shop(
+ id INTEGER PRIMARY KEY AUTO_INCREMENT,
+ name varchar(255) UNIQUE,
+ url varchar(255) UNIQUE,
+ logo_url text,
+ shipping_costs text,
+ free_shipping text
+ )`
+ _, err := app.DB.Exec(query1)
+ if err != nil {
+ return err
+ }
+
+ query2 := `CREATE TABLE IF NOT EXISTS angebot (
+ id INTEGER PRIMARY KEY AUTO_INCREMENT,
+ shop int,
+ name TEXT,
+ url TEXT,
+ original_price INT,
+ discounted_price INT,
+ image_url TEXT,
+ spirit_type TEXT,
+ valid_until INT DEFAULT NULL,
+ created_at INT,
+ FOREIGN KEY(shop) REFERENCES shop(id)
+ )`
+ _, err = app.DB.Exec(query2)
+ return err
+}
+
+func (app *App) save_offer(W []Angebot) error {
+
+ query := `INSERT INTO angebot (shop, name, url, original_price, discounted_price, valid_until, image_url, spirit_type, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`
+
+ stmt, err := app.DB.Prepare(query)
+ if err != nil {
+ return err
+ }
+ defer stmt.Close()
+
+ for _, o := range W {
+
+ var found int
+
+ if o.Discounted_price == 0 || o.Original_price == 0 {
+ continue
+ }
+
+ err := app.DB.QueryRow("SELECT 1 FROM angebot WHERE shop = ? AND name = ? AND url = ? AND original_price = ? AND discounted_price = ? AND image_url = ? AND spirit_type = ?", o.Shop, o.Name, o.Url, o.Original_price, o.Discounted_price, o.Image_url, o.Spirit_type).Scan(&found)
+
+ /*
+ */
+
+ if err == sql.ErrNoRows {
+
+ if 0 == o.Valid_until {
+ _, err = stmt.Exec(o.Shop, o.Name, o.Url, o.Original_price, o.Discounted_price, sql.NullInt64{}, o.Image_url, o.Spirit_type, app.Now)
+ } else {
+ _, err = stmt.Exec(o.Shop, o.Name, o.Url, o.Original_price, o.Discounted_price, o.Valid_until, o.Image_url, o.Spirit_type, app.Now)
+ }
+ if err != nil {
+ return err
+ }
+
+ }
+ }
+
+ //return app.remove_expired(W)
+ return nil
+}
+
+func (app *App) remove_expired(W []Angebot) error {
+
+ query := `SELECT id, shop, name, url, original_price, discounted_price FROM angebot WHERE created_at < ? AND valid_until IS NULL`
+
+ rows, err := app.DB.Queryx(query, app.Now)
+ if err != nil {
+ return err
+ }
+ defer rows.Close()
+
+ for rows.Next() {
+ var offer_db Angebot
+ err = rows.StructScan(&offer_db)
+
+ if err != nil {
+ return err
+ }
+
+ if !app.offer_contains(W, offer_db) {
+ expire_query := `UPDATE angebot SET valid_until = ? WHERE id = ?`
+ _, err = app.DB.Exec(expire_query, app.Now, offer_db.Id)
+ if err != nil {
+ return err
+ }
+ }
+ }
+
+ return nil
+}
+
+func (app *App) offer_contains(W []Angebot, offer_db Angebot) bool {
+ for _, v := range W {
+ if v.Shop == offer_db.Shop && v.Name == offer_db.Name && v.Original_price == offer_db.Original_price && v.Discounted_price == offer_db.Discounted_price {
+
+ if app.Config.Debug {
+ log.Println("Contains: " + v.Name)
+ log.Println("")
+ }
+
+ return true
+
+ }
+ }
+
+ if app.Config.Debug {
+ log.Println("Contains not: " + offer_db.Name)
+ log.Println("")
+ }
+
+ return false
+}
diff --git a/crawler/main.go b/crawler/main.go
new file mode 100644
index 0000000..5255e1c
--- /dev/null
+++ b/crawler/main.go
@@ -0,0 +1,140 @@
+package main
+
+import (
+ "encoding/json"
+ "fmt"
+ "log"
+ "time"
+
+ _ "database/sql"
+ _ "github.com/go-sql-driver/mysql"
+ //_ "github.com/mattn/go-sqlite3"
+
+ "github.com/jmoiron/sqlx"
+)
+
+type App struct {
+ Offers []Angebot
+ Shops []Shop
+ Config *Config
+ DB *sqlx.DB
+ Now int64
+ Debug bool
+}
+
+type Angebot struct {
+ Id int
+ Name string
+ Shop int
+ Url string
+ Original_price int
+ Discounted_price int
+ Image_url string
+ Spirit_type string
+ Valid_until int
+}
+
+type Shop struct {
+ Id int
+ Name string
+ Url string
+ Logo_url string
+ Shipping_costs int
+ Free_shipping string
+}
+
+func main() {
+
+ var err error
+
+ app := App{Config: &Config{}}
+ app.Config.parseConfig("")
+
+ app.Now = time.Now().Unix()
+
+ if "sqlite3" == app.Config.DBDriver {
+ //app.DB, err = sqlx.Connect(app.Config.DBDriver, app.Config.DBPath)
+ app.DB, err = sqlx.Connect(app.Config.DBDriver, app.Config.DBPath)
+ } else {
+
+ if app.Config.Debug {
+ log.Println(app.Config.DBUser + ":" + app.Config.DBPassword + "@tcp(" + app.Config.DBHost + ":" + app.Config.DBPort + ")/" + app.Config.DBDBName + app.Config.DBOptions)
+ }
+ app.DB, err = sqlx.Connect(app.Config.DBDriver, app.Config.DBUser+":"+app.Config.DBPassword+"@tcp("+app.Config.DBHost+":"+app.Config.DBPort+")/"+app.Config.DBDBName+app.Config.DBOptions)
+ }
+ defer app.DB.Close()
+
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ err = app.createTables()
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ err = app.insertShops()
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ shops, err := app.getShops()
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ W := ScrapeHTML(shops)
+
+ err = app.save_offer(W)
+ if err != nil {
+ log.Fatal(err)
+ }
+ err = app.remove_expired(W)
+ if err != nil {
+ log.Fatal(err)
+ }
+}
+
+func printName(W []Angebot, name string) {
+ return
+ fmt.Println("-------------------")
+ fmt.Println("Sonderangebote von " + name)
+ fmt.Println("-------------------")
+
+ output, err := json.MarshalIndent(W, "", " ")
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ fmt.Println(string(output))
+}
+
+func ScrapeHTML(shops []Shop) []Angebot {
+ var W []Angebot
+
+ for _, shop := range shops {
+
+ switch shop.Name {
+ case "Bottleworld":
+ W = append(W, ScrapeBottleWord(shop)...)
+ case "MC Whisky":
+ W = append(W, ScrapeMCWhisky(shop)...)
+ case "Rum & Co":
+ W = append(W, ScrapeRumundCo(shop)...)
+ case "Whic":
+ W = append(W, ScrapeWhic(shop)...)
+ case "Whisky.de":
+ W = append(W, ScrapeWhiskyde(shop)...)
+ case "Whiskysite.nl":
+ W = append(W, ScrapeWhiskysitenl(shop)...)
+ case "Whisky World":
+ W = append(W, ScrapeWhiskyworld(shop)...)
+ case "Whiskyzone":
+ W = append(W, ScrapeWhiskyzone(shop)...)
+ default:
+ log.Println(shop.Name + ": No Crawler")
+ }
+ }
+
+ return W
+}
diff --git a/crawler/sanitize_name.go b/crawler/sanitize_name.go
new file mode 100644
index 0000000..73b2714
--- /dev/null
+++ b/crawler/sanitize_name.go
@@ -0,0 +1,13 @@
+package main
+
+import (
+ "strings"
+)
+
+func sanitize_name(name string) string {
+ if strings.Contains(name, "y.o.") {
+ name = strings.Replace(name, "y.o.", "Jahre", 1)
+ }
+
+ return name
+}
diff --git a/crawler/sanitize_price.go b/crawler/sanitize_price.go
new file mode 100644
index 0000000..2052842
--- /dev/null
+++ b/crawler/sanitize_price.go
@@ -0,0 +1,103 @@
+package main
+
+import (
+ "errors"
+ "strconv"
+ "strings"
+)
+
+func sanitize_price(price string) (int, error) {
+ if "" == price {
+ return 0, errors.New("Empty string")
+ }
+
+ multiply_by_10 := false
+ multiply_by_100 := true
+
+ price = strings.TrimSpace(price)
+
+ price = strings.TrimPrefix(price, "€")
+ price = strings.TrimSpace(price)
+
+ price = strings.TrimSuffix(price, "€")
+ price = strings.TrimSpace(price)
+
+ price = strings.TrimSuffix(strings.ToLower(price), "eur")
+ price = strings.TrimSpace(price)
+
+ price = strings.TrimSuffix(strings.ToLower(price), "euro")
+ price = strings.TrimSpace(price)
+
+ if len(price) < 2 {
+ price = "0" + price
+ } else if len(price) < 3 {
+ price = "00" + price
+ }
+
+ c := string(price[len(price)-2:])
+ c = string(c[0:1])
+
+ /*
+ Extracts the second last char and checks if it's a "." or a ",".
+ */
+ if "," == c {
+ if strings.Count(price, ",") > 1 {
+ return 0, errors.New("Invalid format")
+ }
+
+ multiply_by_10 = true
+ multiply_by_100 = false
+
+ } else if "." == c {
+ if strings.Count(price, ".") > 1 {
+ return 0, errors.New("Invalid format")
+ }
+
+ multiply_by_10 = true
+ multiply_by_100 = false
+
+ }
+
+ c = string(price[len(price)-3:])
+ c = string(c[0:1])
+
+ /*
+ Extracts the third last char and checks if it's a "." or a ",".
+ */
+ if "," == c {
+ if strings.Count(price, ",") > 1 {
+ return 0, errors.New("Invalid format")
+ }
+
+ multiply_by_10 = false
+ multiply_by_100 = false
+
+ } else if "." == c {
+ if strings.Count(price, ".") > 1 {
+ return 0, errors.New("Invalid format")
+ }
+
+ multiply_by_10 = false
+ multiply_by_100 = false
+
+ }
+
+ price = strings.Replace(price, ",", "", -1)
+ price = strings.Replace(price, ".", "", -1)
+
+ /*
+ Casts the price to integer in cents (not euro!).
+ */
+ price_int, err := strconv.Atoi(price)
+ if err != nil {
+ return 0, err
+ }
+
+ if multiply_by_10 {
+ price_int = price_int * 10
+ } else if multiply_by_100 {
+ price_int = price_int * 100
+ }
+
+ return price_int, nil
+}
diff --git a/crawler/shop_bottleworld.go b/crawler/shop_bottleworld.go
new file mode 100644
index 0000000..3a3c631
--- /dev/null
+++ b/crawler/shop_bottleworld.go
@@ -0,0 +1,65 @@
+package main
+
+import (
+ "log"
+ "regexp"
+ // "strings"
+
+ // "github.com/PuerkitoBio/goquery"
+ "github.com/gocolly/colly"
+)
+
+func ScrapeBottleWord(shop Shop) []Angebot {
+ Whiskys := []Angebot{}
+
+ c := colly.NewCollector(
+ colly.AllowedDomains("bottleworld.de"),
+ colly.AllowedDomains("www.bottleworld.de"),
+ )
+
+ c.OnHTML("li.item", func(e *colly.HTMLElement) {
+ W := Angebot{}
+
+ whisky_name := e.ChildText("h2 > a")
+
+ matched, err := regexp.MatchString("Whiske?y", whisky_name)
+ if err != nil {
+ log.Fatal(err)
+ }
+ if !matched {
+ //W.Spirit_type = "Anderes"
+ return
+ } else {
+ W.Spirit_type = "Whisky"
+ }
+
+ whisky_url := e.ChildAttr("a", "href")
+ W.Name = whisky_name
+ W.Url = whisky_url
+
+ e.ForEach(".price-box", func(i int, e *colly.HTMLElement) {
+ e.ForEach(".old-price", func(i int, e *colly.HTMLElement) {
+ W.Original_price, err = sanitize_price(e.ChildText(".price"))
+ if err != nil {
+ log.Fatal(err)
+ }
+ })
+ e.ForEach(".special-price", func(i int, e *colly.HTMLElement) {
+ W.Discounted_price, err = sanitize_price(e.ChildText(".price"))
+ if err != nil {
+ log.Fatal(err)
+ }
+ })
+ })
+ W.Image_url = e.ChildAttr("img", "src")
+
+ W.Shop = shop.Id
+ W.Spirit_type = "Whisky"
+
+ Whiskys = append(Whiskys, W)
+ })
+
+ c.Visit("https://www.bottleworld.de/aktuelle-sonderpreise/show/all")
+
+ return Whiskys
+}
diff --git a/crawler/shop_mcwhisky.go b/crawler/shop_mcwhisky.go
new file mode 100644
index 0000000..e45e740
--- /dev/null
+++ b/crawler/shop_mcwhisky.go
@@ -0,0 +1,54 @@
+package main
+
+import (
+ "log"
+ // "strings"
+
+ // "github.com/PuerkitoBio/goquery"
+ "github.com/gocolly/colly"
+)
+
+func ScrapeMCWhisky(shop Shop) []Angebot {
+ Whiskys := []Angebot{}
+
+ c := colly.NewCollector(
+ colly.AllowedDomains("mcwhisky.com"),
+ colly.AllowedDomains("www.mcwhisky.com"),
+ )
+
+ c.OnHTML("li.item", func(e *colly.HTMLElement) {
+ W := Angebot{}
+
+ whisky_name := sanitize_name(e.ChildAttr("a", "title"))
+ whisky_url := e.ChildAttr("a", "href")
+ W.Name = whisky_name
+ W.Url = whisky_url
+
+ var err error
+
+ e.ForEach(".price-box", func(i int, e *colly.HTMLElement) {
+ e.ForEach(".old-price", func(i int, e *colly.HTMLElement) {
+ W.Original_price, err = sanitize_price(e.ChildText(".price"))
+ if err != nil {
+ log.Fatal(err)
+ }
+ })
+ e.ForEach(".special-price", func(i int, e *colly.HTMLElement) {
+ W.Discounted_price, err = sanitize_price(e.ChildText(".price"))
+ if err != nil {
+ log.Fatal(err)
+ }
+ })
+ })
+ W.Image_url = e.ChildAttr("img", "src")
+
+ W.Shop = shop.Id
+ W.Spirit_type = "Whisky"
+
+ Whiskys = append(Whiskys, W)
+ })
+
+ c.Visit("https://www.mcwhisky.com/whisky/whisky-sonderangebote.html")
+
+ return Whiskys
+}
diff --git a/crawler/shop_rumundco.go b/crawler/shop_rumundco.go
new file mode 100644
index 0000000..ae349f3
--- /dev/null
+++ b/crawler/shop_rumundco.go
@@ -0,0 +1,62 @@
+package main
+
+import (
+ "log"
+ "regexp"
+ "strings"
+
+ // "github.com/PuerkitoBio/goquery"
+ "github.com/gocolly/colly"
+)
+
+func ScrapeRumundCo(shop Shop) []Angebot {
+ Whiskys := []Angebot{}
+
+ c := colly.NewCollector(
+ colly.AllowedDomains("rumundco.de"),
+ colly.AllowedDomains("www.rumundco.de"),
+ )
+
+ c.OnHTML(".product-teaser", func(e *colly.HTMLElement) {
+ W := Angebot{}
+
+ whisky_name := strings.TrimPrefix(e.ChildAttr("img", "alt"), "Restposten: ")
+ whisky_url := "https://www.rumundco.de/" + e.ChildAttr("a", "href")
+
+ matched, err := regexp.MatchString("verfügbar", e.ChildText(".delivery-status"))
+ if err != nil {
+ log.Fatal(err)
+ }
+ if !matched {
+ return
+ }
+
+ W.Name = whisky_name
+ W.Url = whisky_url
+
+ e.ForEach(".price_wrapper", func(i int, e *colly.HTMLElement) {
+ regular_price := e.ChildText("del.value")
+ if "" == regular_price {
+ return
+ }
+ W.Original_price, err = sanitize_price(regular_price)
+ if err != nil {
+ log.Fatal(err)
+ }
+ W.Discounted_price, err = sanitize_price(e.ChildText(".price-value"))
+ if err != nil {
+ log.Fatal(err)
+ }
+ })
+ W.Image_url = "https://www.rumundco.de/" + e.ChildAttr("img", "src")
+
+ W.Shop = shop.Id
+ W.Spirit_type = "Whisky"
+
+ Whiskys = append(Whiskys, W)
+ })
+
+ c.Visit("https://www.rumundco.de/navi.php?q=4&kf=29&kk-suesse-von=0&kk-suesse-bis=100&kk-milde-von=0&kk-milde-bis=100&kk-wuerze-von=0&kk-wuerze-bis=100&kk-frucht-von=0&kk-frucht-bis=100&kk-torf-von=0&kk-torf-bis=100&hf=0&af=90&Sortierung=11&a=350")
+
+ return Whiskys
+}
diff --git a/crawler/shop_whic.go b/crawler/shop_whic.go
new file mode 100644
index 0000000..896b1fb
--- /dev/null
+++ b/crawler/shop_whic.go
@@ -0,0 +1,64 @@
+package main
+
+import (
+ "log"
+ "strings"
+
+ "github.com/PuerkitoBio/goquery"
+ "github.com/gocolly/colly"
+)
+
+func ScrapeWhic(shop Shop) []Angebot {
+ Whiskys := []Angebot{}
+
+ c := colly.NewCollector(
+ colly.AllowedDomains("whic.de"),
+ )
+
+ c.OnHTML("li.item", func(e *colly.HTMLElement) {
+ W := Angebot{}
+
+ whisky_name := e.ChildAttr("a", "title")
+ whisky_url := e.ChildAttr("a", "href")
+
+ W.Name = whisky_name
+ W.Url = whisky_url
+
+ var err error
+
+ e.ForEach(".price-box", func(i int, e *colly.HTMLElement) {
+ e.ForEach(".old-price", func(i int, e *colly.HTMLElement) {
+ W.Original_price, err = sanitize_price(e.ChildText(".price"))
+ if err != nil {
+ log.Fatal(err)
+ }
+ })
+ e.ForEach(".special-price", func(i int, e *colly.HTMLElement) {
+ W.Discounted_price, err = sanitize_price(e.ChildText(".price"))
+ if err != nil {
+ log.Fatal(err)
+ }
+ })
+ })
+
+ /*
+ * colly does not parse a <noscript>, thus we are reading the content and parse it as html.
+ */
+ img_link_noisy := e.ChildText(".product-image")
+
+ doc, err := goquery.NewDocumentFromReader(strings.NewReader(img_link_noisy))
+ if err != nil {
+ log.Fatal(err)
+ }
+ W.Image_url, _ = doc.Find("img").Attr("src")
+
+ W.Shop = shop.Id
+ W.Spirit_type = "Whisky"
+
+ Whiskys = append(Whiskys, W)
+ })
+
+ c.Visit("https://whic.de/angebote")
+
+ return Whiskys
+}
diff --git a/crawler/shop_whiskyde.go b/crawler/shop_whiskyde.go
new file mode 100644
index 0000000..657bfe0
--- /dev/null
+++ b/crawler/shop_whiskyde.go
@@ -0,0 +1,58 @@
+package main
+
+import (
+ "log"
+ "strings"
+
+ "github.com/gocolly/colly"
+)
+
+func ScrapeWhiskyde(shop Shop) []Angebot {
+ Whiskys := []Angebot{}
+
+ c := colly.NewCollector(
+ colly.AllowedDomains("whisky.de"),
+ colly.AllowedDomains("www.whisky.de"),
+ )
+
+ c.OnHTML(".is-buyable", func(e *colly.HTMLElement) {
+
+ W := Angebot{}
+ whisky_name := e.ChildAttr("a", "title")
+ W.Name = whisky_name
+ whisky_url := strings.Replace(e.ChildAttr("a", "href"), "?&searchorigin=2", "", 1)
+ W.Url = whisky_url
+
+ var err error
+
+ e.ForEach(".article-price-original", func(i int, e *colly.HTMLElement) {
+ W.Original_price, err = sanitize_price(e.ChildText("del"))
+ if err != nil {
+ log.Fatal(err)
+ }
+ })
+ e.ForEach(".article-price", func(i int, e *colly.HTMLElement) {
+ W.Discounted_price, err = sanitize_price(e.ChildText(".article-price-default"))
+ if err != nil {
+ log.Fatal(err)
+ }
+ })
+
+ e.ForEach(".article-thumbnail", func(i int, e *colly.HTMLElement) {
+ W.Image_url = e.ChildAttr("img", "data-src")
+ })
+
+ e.ForEach(".article-price-prefix", func(i int, e *colly.HTMLElement) {
+ //W.Valid_until = e.ChildText(".article-price-special")
+ })
+
+ W.Shop = shop.Id
+ W.Spirit_type = "Whisky"
+
+ Whiskys = append(Whiskys, W)
+ })
+
+ c.Visit("https://www.whisky.de/shop/Aktuell/Sonderangebote/")
+
+ return Whiskys
+}
diff --git a/crawler/shop_whiskysitenl.go b/crawler/shop_whiskysitenl.go
new file mode 100644
index 0000000..c8b35a2
--- /dev/null
+++ b/crawler/shop_whiskysitenl.go
@@ -0,0 +1,59 @@
+package main
+
+import (
+ "log"
+ "regexp"
+ "strings"
+
+ "github.com/gocolly/colly"
+)
+
+func ScrapeWhiskysitenl(shop Shop) []Angebot {
+ Whiskys := []Angebot{}
+
+ c := colly.NewCollector(
+ colly.AllowedDomains("whiskysite.nl"),
+ colly.AllowedDomains("www.whiskysite.nl"),
+ )
+
+ c.OnHTML(".product-block", func(e *colly.HTMLElement) {
+
+ W := Angebot{}
+
+ whisky_name := e.ChildAttr("img", "alt")
+ whisky_url := e.ChildAttr("a", "href")
+
+ W.Name = whisky_name
+ W.Url = whisky_url
+
+ regular_price := e.ChildText(".price-old")
+ price_discount_noisy := e.ChildText(".product-block-price")
+ r, err := regexp.Compile("[0-9]+(,[0-9]{1,2})")
+ if err != nil {
+ log.Fatal(err)
+ }
+ discounted_price := r.FindString(strings.Trim(strings.TrimPrefix(price_discount_noisy, regular_price), ""))
+
+ W.Original_price, err = sanitize_price(regular_price)
+ if err != nil {
+ //log.Println(W.Name, err)
+ return
+ }
+ W.Discounted_price, err = sanitize_price(discounted_price)
+ if err != nil {
+ //log.Println(W.Name, err)
+ return
+ }
+
+ W.Image_url = e.ChildAttr("img", "src")
+
+ W.Shop = shop.Id
+ W.Spirit_type = "Whisky"
+
+ Whiskys = append(Whiskys, W)
+ })
+
+ c.Visit("https://www.whiskysite.nl/en/specials/?limit=100")
+
+ return Whiskys
+}
diff --git a/crawler/shop_whiskyworld.go b/crawler/shop_whiskyworld.go
new file mode 100644
index 0000000..e07c42f
--- /dev/null
+++ b/crawler/shop_whiskyworld.go
@@ -0,0 +1,60 @@
+package main
+
+import (
+ "log"
+ "strings"
+
+ "github.com/gocolly/colly"
+)
+
+func ScrapeWhiskyworld(shop Shop) []Angebot {
+
+ Whiskys := []Angebot{}
+
+ c := colly.NewCollector(
+ colly.AllowedDomains("whiskyworld.de"),
+ colly.AllowedDomains("www.whiskyworld.de"),
+ )
+
+ c.OnHTML(".product-item", func(e *colly.HTMLElement) {
+
+ W := Angebot{}
+
+ whisky_name_part1 := e.ChildText("h3")
+ whisky_name_part2 := e.ChildText(".item-description")
+
+ W.Name = whisky_name_part1 + " " + whisky_name_part2
+
+ W.Url = "https://www.whiskyworld.de/" + strings.TrimPrefix(e.ChildAttr("a", "href"), "../")
+
+ regular_price_noisy := e.ChildText(".offer")
+ regular_price := strings.TrimSuffix(strings.TrimPrefix(regular_price_noisy, "statt "), " €*")
+
+ var err error
+
+ W.Original_price, err = sanitize_price(regular_price)
+ if err != nil {
+ log.Fatal(err)
+ return
+ }
+
+ W.Discounted_price, err = sanitize_price(e.ChildText(".uvp"))
+ if err != nil {
+ log.Fatal(err)
+ return
+ }
+
+ W.Image_url = "https:" + e.ChildAttr("img", "src")
+
+ W.Shop = shop.Id
+ W.Spirit_type = "Whisky"
+
+ Whiskys = append(Whiskys, W)
+ })
+
+ c.Visit("https://www.whiskyworld.de/themen/sonderangebote?ft=%257B%2522produkt_kategorie%2522:%2522Blended%2BMalt%2522%257D")
+ c.Visit("https://www.whiskyworld.de/themen/sonderangebote?ft=%257B%2522produkt_kategorie%2522:%2522Blended%2BWhiskies%2522%257D")
+ c.Visit("https://www.whiskyworld.de/themen/sonderangebote?ft=%257B%2522produkt_kategorie%2522:%2522Single%2BMalt%2522%257D")
+
+ return Whiskys
+}
diff --git a/crawler/shop_whiskyzone.go b/crawler/shop_whiskyzone.go
new file mode 100644
index 0000000..a9e73d0
--- /dev/null
+++ b/crawler/shop_whiskyzone.go
@@ -0,0 +1,56 @@
+package main
+
+import (
+ "log"
+ "regexp"
+
+ "github.com/gocolly/colly"
+)
+
+func ScrapeWhiskyzone(shop Shop) []Angebot {
+
+ Whiskys := []Angebot{}
+
+ c := colly.NewCollector(
+ colly.AllowedDomains("whiskyzone.de"),
+ colly.AllowedDomains("www.whiskyzone.de"),
+ )
+
+ c.OnHTML(".product--info", func(e *colly.HTMLElement) {
+
+ W := Angebot{}
+
+ W.Name = e.ChildAttr("a", "title")
+ W.Url = e.ChildAttr("a", "href")
+ price_discount_noisy := e.ChildText(".price--default")
+ price_regular_noisy := e.ChildText(".price--discount")
+
+ r, err := regexp.Compile("[0-9]+(,[0-9]{1,2})")
+ if err != nil {
+ log.Fatal(err)
+ }
+ W.Discounted_price, err = sanitize_price(r.FindString(price_discount_noisy))
+ if err != nil {
+ log.Fatal(err)
+ return
+ }
+ W.Original_price, err = sanitize_price(r.FindString(price_regular_noisy))
+ if err != nil {
+ log.Fatal(err)
+ return
+ }
+
+ e.ForEach(".image--media", func(i int, e *colly.HTMLElement) {
+ W.Image_url = e.ChildAttr("img", "src")
+ })
+
+ W.Shop = shop.Id
+ W.Spirit_type = "Whisky"
+
+ Whiskys = append(Whiskys, W)
+ })
+
+ c.Visit("https://www.whiskyzone.de/widgets/emotion/index/emotionId/248/controllerName/listing")
+
+ return Whiskys
+}
diff --git a/crawler/shops.go b/crawler/shops.go
new file mode 100644
index 0000000..92b11cd
--- /dev/null
+++ b/crawler/shops.go
@@ -0,0 +1,114 @@
+package main
+
+import (
+ "log"
+)
+
+func (app *App) insertShops() error {
+ shops := getShopsFromStruct()
+
+ query := `INSERT IGNORE INTO shop (name, url, logo_url, shipping_costs, free_shipping) VALUES(?, ?, ?, ?, ?)`
+
+ for _, v := range shops {
+
+ _, err := app.DB.Exec(query, v.Name, v.Url, v.Logo_url, v.Shipping_costs, v.Free_shipping)
+ if err != nil {
+ return err
+ }
+ }
+
+ return nil
+
+}
+
+func getShopsFromStruct() []Shop {
+ Shops := []Shop{}
+
+ Shops = append(Shops, Shop{
+ Name: "Bottleworld",
+ Url: "https://www.bottleword.de",
+ Logo_url: "",
+ Shipping_costs: 0,
+ Free_shipping: "",
+ })
+ Shops = append(Shops, Shop{
+ Name: "MC Whisky",
+ Url: "https://www.mcwhisky.com",
+ Logo_url: "",
+ Shipping_costs: 0,
+ Free_shipping: "",
+ })
+ Shops = append(Shops, Shop{
+ Name: "Rum & Co",
+ Url: "https://www.rumundco.de",
+ Logo_url: "",
+ Shipping_costs: 0,
+ Free_shipping: "",
+ })
+ Shops = append(Shops, Shop{
+ Name: "Whic",
+ Url: "https://whic.de",
+ Logo_url: "",
+ Shipping_costs: 0,
+ Free_shipping: "",
+ })
+ Shops = append(Shops, Shop{
+ Name: "Whisky.de",
+ Url: "https://www.whisky.de",
+ Logo_url: "",
+ Shipping_costs: 0,
+ Free_shipping: "",
+ })
+ Shops = append(Shops, Shop{
+ Name: "Whiskysite.nl",
+ Url: "https://www.whiskysite.nl",
+ Logo_url: "",
+ Shipping_costs: 0,
+ Free_shipping: "",
+ })
+ Shops = append(Shops, Shop{
+ Name: "Whisky World",
+ Url: "https://www.whiskyworld.de",
+ Logo_url: "",
+ Shipping_costs: 0,
+ Free_shipping: "",
+ })
+ Shops = append(Shops, Shop{
+ Name: "Whiskyzone",
+ Url: "https://www.whiskyzone.de",
+ Logo_url: "",
+ Shipping_costs: 0,
+ Free_shipping: "",
+ })
+
+ return Shops
+}
+
+func (app *App) getShops() ([]Shop, error) {
+
+ Shops := []Shop{}
+
+ query := `SELECT id,name,url,logo_url,shipping_costs,free_shipping FROM shop`
+
+ rows, err := app.DB.Queryx(query)
+ if err != nil {
+ return []Shop{}, err
+ }
+ defer rows.Close()
+
+ for rows.Next() {
+ var shop Shop
+ err = rows.StructScan(&shop)
+
+ if err != nil {
+ return []Shop{}, err
+ }
+ if app.Config.Debug {
+ log.Println("Appending: " + shop.Name)
+ }
+
+ Shops = append(Shops, shop)
+ }
+
+ return Shops, nil
+}