diff options
| author | horus_arch | 2018-02-04 20:54:58 +0100 |
|---|---|---|
| committer | horus_arch | 2018-02-04 20:54:58 +0100 |
| commit | 4f7a9316da8e19fa9466ee377148c7d07bf39fd9 (patch) | |
| tree | 7c4fb5b35ea107d7379b4c7a601aa9a7d9d184d5 | |
| parent | fe12a1d41d74ac55dc3c1b27821375541ee5f2b2 (diff) | |
| download | alkobote-4f7a9316da8e19fa9466ee377148c7d07bf39fd9.tar.gz | |
Data from all shops are now saved in a data structure.
| -rw-r--r-- | main.go | 28 | ||||
| -rw-r--r-- | rumundco.go | 30 | ||||
| -rw-r--r-- | sanitize_price.go | 10 | ||||
| -rw-r--r-- | whic.go | 33 | ||||
| -rw-r--r-- | whiskysitenl.go | 35 | ||||
| -rw-r--r-- | whiskyworld.go | 39 | ||||
| -rw-r--r-- | whiskyzone.go | 35 |
7 files changed, 149 insertions, 61 deletions
@@ -25,27 +25,23 @@ func main() { W = ScrapeMCWhisky() printName(W, "MC Whisky") - /* - printName("Whic") - ScrapeWhic() + W = ScrapeRumundCo() + printName(W, "Rum und Co") - W = ScrapeWhiskyde() - printName(W, "Whisky.de") - */ + W = ScrapeWhic() + printName(W, "Whic") - /* - printName("Whiskysite.nl") - ScrapeWhiskysitenl() + W = ScrapeWhiskyde() + printName(W, "Whisky.de") - printName("Whiskyworld") - ScrapeWhiskyworld() + W = ScrapeWhiskysitenl() + printName(W, "Whiskysite.nl") - printName("Whiskyzone") - ScrapeWhiskyzone() + W = ScrapeWhiskyworld() + printName(W, "Whiskyworld") - printName("Rum und Co") - ScrapeRumundCo() - */ + W = ScrapeWhiskyzone() + printName(W, "Whiskyzone") } func printName(W []Angebot, name string) { diff --git a/rumundco.go b/rumundco.go index d941e73..3c1fb4e 100644 --- a/rumundco.go +++ b/rumundco.go @@ -1,7 +1,6 @@ package main import ( - "fmt" "log" "regexp" "strings" @@ -10,13 +9,17 @@ import ( "github.com/gocolly/colly" ) -func ScrapeRumundCo() { +func ScrapeRumundCo() []Angebot { + Whiskys := []Angebot{} + c := colly.NewCollector( colly.AllowedDomains("rumundco.de"), colly.AllowedDomains("www.rumundco.de"), ) c.OnHTML(".product-teaser", func(e *colly.HTMLElement) { + W := Angebot{} + whisky_name := strings.TrimPrefix(e.ChildAttr("img", "alt"), "Restposten: ") whisky_url := "https://www.rumundco.de/" + e.ChildAttr("a", "href") @@ -28,21 +31,32 @@ func ScrapeRumundCo() { return } - log.Println(whisky_name) - log.Println(whisky_url) + W.Name = whisky_name + W.Url = whisky_url e.ForEach(".price_wrapper", func(i int, e *colly.HTMLElement) { regular_price := e.ChildText("del.value") if "" == regular_price { return } - log.Println(regular_price) - log.Println(e.ChildText(".price-value")) + W.Original_price, err = sanitize_price(regular_price) + if err != nil { + log.Fatal(err) + } + W.Discounted_price, err = sanitize_price(e.ChildText(".price-value")) + if err != nil { + log.Fatal(err) + } }) - log.Println("https://www.rumundco.de/" + e.ChildAttr("img", "src")) + W.Image_url = "https://www.rumundco.de/" + e.ChildAttr("img", "src") + + W.Shop = "Rum & Co" + W.Spirit_type = "Whisky" - fmt.Println("") + Whiskys = append(Whiskys, W) }) c.Visit("https://www.rumundco.de/navi.php?q=4&kf=29&kk-suesse-von=0&kk-suesse-bis=100&kk-milde-von=0&kk-milde-bis=100&kk-wuerze-von=0&kk-wuerze-bis=100&kk-frucht-von=0&kk-frucht-bis=100&kk-torf-von=0&kk-torf-bis=100&hf=0&af=90&Sortierung=11&a=350") + + return Whiskys } diff --git a/sanitize_price.go b/sanitize_price.go index 8b9443f..2052842 100644 --- a/sanitize_price.go +++ b/sanitize_price.go @@ -7,6 +7,10 @@ import ( ) func sanitize_price(price string) (int, error) { + if "" == price { + return 0, errors.New("Empty string") + } + multiply_by_10 := false multiply_by_100 := true @@ -24,6 +28,12 @@ func sanitize_price(price string) (int, error) { price = strings.TrimSuffix(strings.ToLower(price), "euro") price = strings.TrimSpace(price) + if len(price) < 2 { + price = "0" + price + } else if len(price) < 3 { + price = "00" + price + } + c := string(price[len(price)-2:]) c = string(c[0:1]) @@ -1,7 +1,6 @@ package main import ( - "fmt" "log" "strings" @@ -9,23 +8,36 @@ import ( "github.com/gocolly/colly" ) -func ScrapeWhic() { +func ScrapeWhic() []Angebot { + Whiskys := []Angebot{} + c := colly.NewCollector( colly.AllowedDomains("whic.de"), ) c.OnHTML("li.item", func(e *colly.HTMLElement) { + W := Angebot{} + whisky_name := e.ChildAttr("a", "title") whisky_url := e.ChildAttr("a", "href") - log.Println(whisky_name) - log.Println(whisky_url) + + W.Name = whisky_name + W.Url = whisky_url + + var err error e.ForEach(".price-box", func(i int, e *colly.HTMLElement) { e.ForEach(".old-price", func(i int, e *colly.HTMLElement) { - log.Println(e.ChildText(".price")) + W.Original_price, err = sanitize_price(e.ChildText(".price")) + if err != nil { + log.Fatal(err) + } }) e.ForEach(".special-price", func(i int, e *colly.HTMLElement) { - log.Println(e.ChildText(".price")) + W.Discounted_price, err = sanitize_price(e.ChildText(".price")) + if err != nil { + log.Fatal(err) + } }) }) @@ -38,10 +50,15 @@ func ScrapeWhic() { if err != nil { log.Fatal(err) } - log.Println(doc.Find("img").Attr("src")) + W.Image_url, _ = doc.Find("img").Attr("src") + + W.Shop = "Whic" + W.Spirit_type = "Whisky" - fmt.Println("") + Whiskys = append(Whiskys, W) }) c.Visit("https://whic.de/angebote") + + return Whiskys } diff --git a/whiskysitenl.go b/whiskysitenl.go index dcca681..3eefc06 100644 --- a/whiskysitenl.go +++ b/whiskysitenl.go @@ -1,7 +1,6 @@ package main import ( - "fmt" "log" "regexp" "strings" @@ -9,7 +8,9 @@ import ( "github.com/gocolly/colly" ) -func ScrapeWhiskysitenl() { +func ScrapeWhiskysitenl() []Angebot { + Whiskys := []Angebot{} + c := colly.NewCollector( colly.AllowedDomains("whiskysite.nl"), colly.AllowedDomains("www.whiskysite.nl"), @@ -17,10 +18,14 @@ func ScrapeWhiskysitenl() { c.OnHTML(".product-block", func(e *colly.HTMLElement) { + W := Angebot{} + whisky_name := e.ChildAttr("img", "alt") whisky_url := e.ChildAttr("a", "href") - log.Println(whisky_name) - log.Println(whisky_url) + + W.Name = whisky_name + W.Url = whisky_url + regular_price := e.ChildText(".price-old") price_discount_noisy := e.ChildText(".product-block-price") r, err := regexp.Compile("[0-9]+(,[0-9]{1,2})") @@ -28,13 +33,27 @@ func ScrapeWhiskysitenl() { log.Fatal(err) } discounted_price := r.FindString(strings.Trim(strings.TrimPrefix(price_discount_noisy, regular_price), "")) - log.Println(discounted_price + "€") - log.Println(strings.TrimPrefix(regular_price, "€") + "€") - log.Println(e.ChildAttr("img", "src")) + W.Original_price, err = sanitize_price(regular_price) + if err != nil { + //log.Println(W.Name, err) + return + } + W.Discounted_price, err = sanitize_price(discounted_price) + if err != nil { + //log.Println(W.Name, err) + return + } + + W.Image_url = e.ChildAttr("img", "src") - fmt.Println("") + W.Shop = "Whiskysite.nl" + W.Spirit_type = "Whisky" + + Whiskys = append(Whiskys, W) }) c.Visit("https://www.whiskysite.nl/en/specials/?limit=100") + + return Whiskys } diff --git a/whiskyworld.go b/whiskyworld.go index 734c9f2..65bbacd 100644 --- a/whiskyworld.go +++ b/whiskyworld.go @@ -1,14 +1,16 @@ package main import ( - "fmt" "log" "strings" "github.com/gocolly/colly" ) -func ScrapeWhiskyworld() { +func ScrapeWhiskyworld() []Angebot { + + Whiskys := []Angebot{} + c := colly.NewCollector( colly.AllowedDomains("whiskyworld.de"), colly.AllowedDomains("www.whiskyworld.de"), @@ -16,28 +18,43 @@ func ScrapeWhiskyworld() { c.OnHTML(".product-item", func(e *colly.HTMLElement) { + W := Angebot{} + whisky_name_part1 := e.ChildText("h3") whisky_name_part2 := e.ChildText(".item-description") - whisky_name := whisky_name_part1 + " " + whisky_name_part2 + W.Name = whisky_name_part1 + " " + whisky_name_part2 - whisky_url := "https://www.whiskyworld.de/" + strings.TrimPrefix(e.ChildAttr("a", "href"), "../") - log.Println(whisky_name) - log.Println(whisky_url) + W.Url = "https://www.whiskyworld.de/" + strings.TrimPrefix(e.ChildAttr("a", "href"), "../") regular_price_noisy := e.ChildText(".offer") regular_price := strings.TrimSuffix(strings.TrimPrefix(regular_price_noisy, "statt "), " €*") - discounted_price := e.ChildText(".uvp") - log.Println(strings.TrimSuffix(discounted_price, " €") + "€") - log.Println(regular_price + "€") + var err error + + W.Original_price, err = sanitize_price(regular_price) + if err != nil { + log.Fatal(err) + return + } - log.Println("https:" + e.ChildAttr("img", "src")) + W.Discounted_price, err = sanitize_price(e.ChildText(".uvp")) + if err != nil { + log.Fatal(err) + return + } - fmt.Println("") + W.Image_url = "https:" + e.ChildAttr("img", "src") + + W.Shop = "Whisky World" + W.Spirit_type = "Whisky" + + Whiskys = append(Whiskys, W) }) c.Visit("https://www.whiskyworld.de/themen/sonderangebote?ft=%257B%2522produkt_kategorie%2522:%2522Blended%2BMalt%2522%257D") c.Visit("https://www.whiskyworld.de/themen/sonderangebote?ft=%257B%2522produkt_kategorie%2522:%2522Blended%2BWhiskies%2522%257D") c.Visit("https://www.whiskyworld.de/themen/sonderangebote?ft=%257B%2522produkt_kategorie%2522:%2522Single%2BMalt%2522%257D") + + return Whiskys } diff --git a/whiskyzone.go b/whiskyzone.go index 274f3dd..dc4a047 100644 --- a/whiskyzone.go +++ b/whiskyzone.go @@ -1,14 +1,16 @@ package main import ( - "fmt" "log" "regexp" "github.com/gocolly/colly" ) -func ScrapeWhiskyzone() { +func ScrapeWhiskyzone() []Angebot { + + Whiskys := []Angebot{} + c := colly.NewCollector( colly.AllowedDomains("whiskyzone.de"), colly.AllowedDomains("www.whiskyzone.de"), @@ -16,10 +18,10 @@ func ScrapeWhiskyzone() { c.OnHTML(".product--info", func(e *colly.HTMLElement) { - whisky_name := e.ChildAttr("a", "title") - whisky_url := e.ChildAttr("a", "href") - log.Println(whisky_name) - log.Println(whisky_url) + W := Angebot{} + + W.Name = e.ChildAttr("a", "title") + W.Url = e.ChildAttr("a", "href") price_discount_noisy := e.ChildText(".price--default") price_regular_noisy := e.ChildText(".price--discount") @@ -27,15 +29,28 @@ func ScrapeWhiskyzone() { if err != nil { log.Fatal(err) } - log.Println(r.FindString(price_discount_noisy) + "€") - log.Println(r.FindString(price_regular_noisy) + "€") + W.Discounted_price, err = sanitize_price(r.FindString(price_discount_noisy)) + if err != nil { + log.Fatal(err) + return + } + W.Original_price, err = sanitize_price(r.FindString(price_regular_noisy)) + if err != nil { + log.Fatal(err) + return + } e.ForEach(".image--media", func(i int, e *colly.HTMLElement) { - log.Println(e.ChildAttr("img", "src")) + W.Image_url = e.ChildAttr("img", "src") }) - fmt.Println("") + W.Shop = "Whiskyzone" + W.Spirit_type = "Whisky" + + Whiskys = append(Whiskys, W) }) c.Visit("https://www.whiskyzone.de/widgets/emotion/index/emotionId/248/controllerName/listing") + + return Whiskys } |
