From 0ab75dabe3a9547b8cb716d950d5657eafb28293 Mon Sep 17 00:00:00 2001 From: horus_arch Date: Sun, 4 Feb 2018 15:37:50 +0100 Subject: Adds structured data processing. --- main.go | 54 +++++++++++++++++++++++++++++++++++++++--------------- whiskyde.go | 17 ++++++++++++++++- 2 files changed, 55 insertions(+), 16 deletions(-) diff --git a/main.go b/main.go index 19c6ad6..79f578b 100644 --- a/main.go +++ b/main.go @@ -1,34 +1,58 @@ package main import ( + "encoding/json" "fmt" + "log" ) +type Angebot struct { + Name string + Shop string + Url string + Original_price string + Discounted_price string + Image_url string + Spirit_type string + Valid_until string +} + func main() { - printName("BottleWorld") - ScrapeBottleWord() + /* + printName("BottleWorld") + ScrapeBottleWord() - printName("MC Whisky") - ScrapeMCWhisky() + printName("MC Whisky") + ScrapeMCWhisky() - printName("Whic") - ScrapeWhic() + printName("Whic") + ScrapeWhic() + */ printName("Whisky.de") - ScrapeWhiskyde() + W := ScrapeWhiskyde() + + output, err := json.Marshal(W) + if err != nil { + log.Fatal(err) + } + + fmt.Println(string(output)) - printName("Whiskysite.nl") - ScrapeWhiskysitenl() + /* + printName("Whiskysite.nl") + ScrapeWhiskysitenl() - printName("Whiskyworld") - ScrapeWhiskyworld() + printName("Whiskyworld") + ScrapeWhiskyworld() - printName("Whiskyzone") - ScrapeWhiskyzone() + printName("Whiskyzone") + ScrapeWhiskyzone() - printName("Rum und Co") - ScrapeRumundCo() + printName("Rum und Co") + ScrapeRumundCo() + */ } func printName(name string) { diff --git a/whiskyde.go b/whiskyde.go index 99df1ff..90032ba 100644 --- a/whiskyde.go +++ b/whiskyde.go @@ -7,7 +7,9 @@ import ( "github.com/gocolly/colly" ) -func ScrapeWhiskyde() { +func ScrapeWhiskyde() []Angebot { + Whiskys := []Angebot{} + c := colly.NewCollector( colly.AllowedDomains("whisky.de"), colly.AllowedDomains("www.whisky.de"), @@ -15,28 +17,41 @@ func ScrapeWhiskyde() { c.OnHTML(".is-buyable", func(e *colly.HTMLElement) { + W := Angebot{} + whisky_name := e.ChildAttr("a", "title") + W.Name = whisky_name whisky_url := e.ChildAttr("a", "href") + W.Url = whisky_url + log.Println(whisky_name) log.Println(whisky_url) e.ForEach(".article-price-original", func(i int, e *colly.HTMLElement) { + W.Original_price = e.ChildText("del") log.Println(e.ChildText("del")) }) e.ForEach(".article-price", func(i int, e *colly.HTMLElement) { + W.Discounted_price = e.ChildText(".article-price-default") log.Println(e.ChildText(".article-price-default")) }) e.ForEach(".article-thumbnail", func(i int, e *colly.HTMLElement) { + W.Image_url = e.ChildAttr("img", "data-src") log.Println(e.ChildAttr("img", "data-src")) }) e.ForEach(".article-price-prefix", func(i int, e *colly.HTMLElement) { + W.Valid_until = e.ChildText(".article-price-special") log.Println(e.ChildText(".article-price-special")) }) + Whiskys = append(Whiskys, W) + fmt.Println("") }) c.Visit("https://www.whisky.de/shop/Aktuell/Sonderangebote/") + + return Whiskys } -- cgit v1.2.3