diff options
| author | Max | 2018-02-06 00:35:39 +0100 |
|---|---|---|
| committer | Max | 2018-02-06 00:35:39 +0100 |
| commit | 71950479fbd6088f249e5fda3b180f294d1d745d (patch) | |
| tree | 06f360a7e02b7e0011bda815fa102ec54ae8d0ec /whic.go | |
| parent | 13a807854bf4d0258723ec3152b217ed4cf8e051 (diff) | |
| download | alkobote-71950479fbd6088f249e5fda3b180f294d1d745d.tar.gz | |
Moves crawler to designated directory.
Diffstat (limited to 'whic.go')
| -rw-r--r-- | whic.go | 64 |
1 files changed, 0 insertions, 64 deletions
diff --git a/whic.go b/whic.go deleted file mode 100644 index 896b1fb..0000000 --- a/whic.go +++ /dev/null @@ -1,64 +0,0 @@ -package main - -import ( - "log" - "strings" - - "github.com/PuerkitoBio/goquery" - "github.com/gocolly/colly" -) - -func ScrapeWhic(shop Shop) []Angebot { - Whiskys := []Angebot{} - - c := colly.NewCollector( - colly.AllowedDomains("whic.de"), - ) - - c.OnHTML("li.item", func(e *colly.HTMLElement) { - W := Angebot{} - - whisky_name := e.ChildAttr("a", "title") - whisky_url := e.ChildAttr("a", "href") - - W.Name = whisky_name - W.Url = whisky_url - - var err error - - e.ForEach(".price-box", func(i int, e *colly.HTMLElement) { - e.ForEach(".old-price", func(i int, e *colly.HTMLElement) { - W.Original_price, err = sanitize_price(e.ChildText(".price")) - if err != nil { - log.Fatal(err) - } - }) - e.ForEach(".special-price", func(i int, e *colly.HTMLElement) { - W.Discounted_price, err = sanitize_price(e.ChildText(".price")) - if err != nil { - log.Fatal(err) - } - }) - }) - - /* - * colly does not parse a <noscript>, thus we are reading the content and parse it as html. - */ - img_link_noisy := e.ChildText(".product-image") - - doc, err := goquery.NewDocumentFromReader(strings.NewReader(img_link_noisy)) - if err != nil { - log.Fatal(err) - } - W.Image_url, _ = doc.Find("img").Attr("src") - - W.Shop = shop.Id - W.Spirit_type = "Whisky" - - Whiskys = append(Whiskys, W) - }) - - c.Visit("https://whic.de/angebote") - - return Whiskys -} |
