From 35882837a2821749f3a2b1dfa23f19c4168004d3 Mon Sep 17 00:00:00 2001 From: Max Date: Thu, 1 Feb 2018 16:13:56 +0100 Subject: Crawled the first seven shops. --- bottleworld.go | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 bottleworld.go (limited to 'bottleworld.go') diff --git a/bottleworld.go b/bottleworld.go new file mode 100644 index 0000000..2f5991a --- /dev/null +++ b/bottleworld.go @@ -0,0 +1,52 @@ +package main + +import ( + "fmt" + "log" + "regexp" + // "strings" + + // "github.com/PuerkitoBio/goquery" + "github.com/gocolly/colly" +) + +func ScrapeBottleWord() { + c := colly.NewCollector( + colly.AllowedDomains("bottleworld.de"), + colly.AllowedDomains("www.bottleworld.de"), + ) + + c.OnHTML("li.item", func(e *colly.HTMLElement) { + whisky_name := e.ChildText("h2 > a") + + matched, err := regexp.MatchString("Whiske?y", whisky_name) + if err != nil { + log.Fatal(err) + } + if !matched { + return + } + + whisky_url := e.ChildAttr("a", "href") + log.Println(whisky_name) + log.Println(whisky_url) + + e.ForEach(".price-box", func(i int, e *colly.HTMLElement) { + e.ForEach(".old-price", func(i int, e *colly.HTMLElement) { + log.Println(e.ChildText(".price")) + }) + e.ForEach(".special-price", func(i int, e *colly.HTMLElement) { + log.Println(e.ChildText(".price")) + }) + }) + log.Println(e.ChildAttr("img", "src")) + + fmt.Println("") + }) + + c.Visit("https://www.bottleworld.de/aktuelle-sonderpreise/show/all") +} + +func main() { + ScrapeBottleWord() +} -- cgit v1.2.3