From 35882837a2821749f3a2b1dfa23f19c4168004d3 Mon Sep 17 00:00:00 2001 From: Max Date: Thu, 1 Feb 2018 16:13:56 +0100 Subject: Crawled the first seven shops. --- whiskysitenl.go | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 whiskysitenl.go (limited to 'whiskysitenl.go') diff --git a/whiskysitenl.go b/whiskysitenl.go new file mode 100644 index 0000000..30af0ef --- /dev/null +++ b/whiskysitenl.go @@ -0,0 +1,44 @@ +package main + +import ( + "fmt" + "log" + "regexp" + "strings" + + "github.com/gocolly/colly" +) + +func ScrapeWhiskysitenl() { + c := colly.NewCollector( + colly.AllowedDomains("whiskysite.nl"), + colly.AllowedDomains("www.whiskysite.nl"), + ) + + c.OnHTML(".product-block", func(e *colly.HTMLElement) { + + whisky_name := e.ChildAttr("img", "alt") + whisky_url := e.ChildAttr("a", "href") + log.Println(whisky_name) + log.Println(whisky_url) + regular_price := e.ChildText(".price-old") + price_discount_noisy := e.ChildText(".product-block-price") + r, err := regexp.Compile("[0-9]+(,[0-9]{1,2})") + if err != nil { + log.Fatal(err) + } + discounted_price := r.FindString(strings.Trim(strings.TrimPrefix(price_discount_noisy, regular_price), "")) + log.Println(discounted_price + "€") + log.Println(strings.TrimPrefix(regular_price, "€") + "€") + + log.Println(e.ChildAttr("img", "src")) + + fmt.Println("") + }) + + c.Visit("https://www.whiskysite.nl/en/specials/?limit=100") +} + +func main() { + ScrapeWhiskysitenl() +} -- cgit v1.2.3