From 35882837a2821749f3a2b1dfa23f19c4168004d3 Mon Sep 17 00:00:00 2001 From: Max Date: Thu, 1 Feb 2018 16:13:56 +0100 Subject: Crawled the first seven shops. --- whiskyworld.go | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 whiskyworld.go (limited to 'whiskyworld.go') diff --git a/whiskyworld.go b/whiskyworld.go new file mode 100644 index 0000000..58735b9 --- /dev/null +++ b/whiskyworld.go @@ -0,0 +1,47 @@ +package main + +import ( + "fmt" + "log" + "strings" + + "github.com/gocolly/colly" +) + +func ScrapeWhiskyworld() { + c := colly.NewCollector( + colly.AllowedDomains("whiskyworld.de"), + colly.AllowedDomains("www.whiskyworld.de"), + ) + + c.OnHTML(".product-item", func(e *colly.HTMLElement) { + + whisky_name_part1 := e.ChildText("h3") + whisky_name_part2 := e.ChildText(".item-description") + + whisky_name := whisky_name_part1 + " " + whisky_name_part2 + + whisky_url := "https://www.whiskyworld.de/" + strings.TrimPrefix(e.ChildAttr("a", "href"), "../") + log.Println(whisky_name) + log.Println(whisky_url) + + regular_price_noisy := e.ChildText(".offer") + regular_price := strings.TrimSuffix(strings.TrimPrefix(regular_price_noisy, "statt "), " €*") + + discounted_price := e.ChildText(".uvp") + log.Println(strings.TrimSuffix(discounted_price, " €") + "€") + log.Println(regular_price + "€") + + log.Println("https:" + e.ChildAttr("img", "src")) + + fmt.Println("") + }) + + c.Visit("https://www.whiskyworld.de/themen/sonderangebote?ft=%257B%2522produkt_kategorie%2522:%2522Blended%2BMalt%2522%257D") + c.Visit("https://www.whiskyworld.de/themen/sonderangebote?ft=%257B%2522produkt_kategorie%2522:%2522Blended%2BWhiskies%2522%257D") + c.Visit("https://www.whiskyworld.de/themen/sonderangebote?ft=%257B%2522produkt_kategorie%2522:%2522Single%2BMalt%2522%257D") +} + +func main() { + ScrapeWhiskyworld() +} -- cgit v1.2.3