From b6b5993e2c1215c90342398a21e6503a8c03950d Mon Sep 17 00:00:00 2001 From: horus Date: Mon, 12 Feb 2018 22:55:47 +0100 Subject: Crawls now whiskworld.de and whiskyzone.de. (crawler) --- crawler/shop_whiskyworld.go | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) (limited to 'crawler/shop_whiskyworld.go') diff --git a/crawler/shop_whiskyworld.go b/crawler/shop_whiskyworld.go index 8e4b984..c0fb7b6 100644 --- a/crawler/shop_whiskyworld.go +++ b/crawler/shop_whiskyworld.go @@ -9,14 +9,23 @@ import ( func ScrapeWhiskyworld(shop Shop) []Angebot { + Shop_urls := []string{"https://www.whiskyworld.de/themen/sonderangebote?ft=%257B%2522produkt_kategorie%2522:%2522Blended%2BMalt%2522%257D", + "https://www.whiskyworld.de/themen/sonderangebote?ft=%257B%2522produkt_kategorie%2522:%2522Blended%2BWhiskies%2522%257D", + "https://www.whiskyworld.de/themen/sonderangebote?ft=%257B%2522produkt_kategorie%2522:%2522Single%2BMalt%2522%257D", + } + Whiskys := []Angebot{} c := colly.NewCollector( + colly.UserAgent("friendly"), colly.AllowedDomains("whiskyworld.de"), colly.AllowedDomains("www.whiskyworld.de"), ) c.OnHTML(".product-item", func(e *colly.HTMLElement) { + if !stringInSlice(e.Request.URL.String(), Shop_urls) { + return + } W := Angebot{} @@ -25,7 +34,7 @@ func ScrapeWhiskyworld(shop Shop) []Angebot { W.Name = whisky_name_part1 + " " + whisky_name_part2 - W.Url = "https://www.whiskyworld.de/" + strings.TrimPrefix(e.ChildAttr("a", "href"), "../") + W.Url = "https://www.whiskyworld.de/" + e.ChildAttr("a", "href") regular_price_noisy := e.ChildText(".offer") regular_price := strings.TrimSuffix(strings.TrimPrefix(regular_price_noisy, "statt "), " €*") @@ -34,12 +43,14 @@ func ScrapeWhiskyworld(shop Shop) []Angebot { W.Original_price, err = convert_price(regular_price) if err != nil { + log.Println("Whisky World: Original_price failed: " + regular_price + " // " + W.Name + " // " + W.Url + " // " + e.Request.URL.String()) log.Fatal(err) return } W.Discounted_price, err = convert_price(e.ChildText(".uvp")) if err != nil { + log.Println("Whisky World: Discounted_price failed") log.Fatal(err) return } @@ -64,6 +75,7 @@ func ScrapeWhiskyworld(shop Shop) []Angebot { base_price_noisy = strings.TrimSpace(strings.SplitAfter(base_price_noisy, "Liter")[0]) W.Base_price, err = sanitize_base_price(base_price_noisy) if err != nil { + log.Println("Whisky World: Base_price failed") log.Fatal(err) } } @@ -75,12 +87,23 @@ func ScrapeWhiskyworld(shop Shop) []Angebot { W.Shop = shop.Id W.Spirit_type = "Whisky" + e.Request.Visit(W.Url) + W.Website = e.Request.Ctx.Get("website") + Whiskys = append(Whiskys, W) }) - c.Visit("https://www.whiskyworld.de/themen/sonderangebote?ft=%257B%2522produkt_kategorie%2522:%2522Blended%2BMalt%2522%257D") - c.Visit("https://www.whiskyworld.de/themen/sonderangebote?ft=%257B%2522produkt_kategorie%2522:%2522Blended%2BWhiskies%2522%257D") - c.Visit("https://www.whiskyworld.de/themen/sonderangebote?ft=%257B%2522produkt_kategorie%2522:%2522Single%2BMalt%2522%257D") + c.OnHTML("body", func(e *colly.HTMLElement) { + if stringInSlice(e.Request.URL.String(), Shop_urls) { + return + } + + e.Request.Ctx.Put("website", string(e.Response.Body)) + }) + + for _, url := range Shop_urls { + c.Visit(url) + } return Whiskys } -- cgit v1.2.3