summaryrefslogtreecommitdiff
path: root/whic.go
diff options
context:
space:
mode:
authorMax2018-02-06 00:35:39 +0100
committerMax2018-02-06 00:35:39 +0100
commit71950479fbd6088f249e5fda3b180f294d1d745d (patch)
tree06f360a7e02b7e0011bda815fa102ec54ae8d0ec /whic.go
parent13a807854bf4d0258723ec3152b217ed4cf8e051 (diff)
downloadalkobote-71950479fbd6088f249e5fda3b180f294d1d745d.tar.gz
Moves crawler to designated directory.
Diffstat (limited to 'whic.go')
-rw-r--r--whic.go64
1 files changed, 0 insertions, 64 deletions
diff --git a/whic.go b/whic.go
deleted file mode 100644
index 896b1fb..0000000
--- a/whic.go
+++ /dev/null
@@ -1,64 +0,0 @@
-package main
-
-import (
- "log"
- "strings"
-
- "github.com/PuerkitoBio/goquery"
- "github.com/gocolly/colly"
-)
-
-func ScrapeWhic(shop Shop) []Angebot {
- Whiskys := []Angebot{}
-
- c := colly.NewCollector(
- colly.AllowedDomains("whic.de"),
- )
-
- c.OnHTML("li.item", func(e *colly.HTMLElement) {
- W := Angebot{}
-
- whisky_name := e.ChildAttr("a", "title")
- whisky_url := e.ChildAttr("a", "href")
-
- W.Name = whisky_name
- W.Url = whisky_url
-
- var err error
-
- e.ForEach(".price-box", func(i int, e *colly.HTMLElement) {
- e.ForEach(".old-price", func(i int, e *colly.HTMLElement) {
- W.Original_price, err = sanitize_price(e.ChildText(".price"))
- if err != nil {
- log.Fatal(err)
- }
- })
- e.ForEach(".special-price", func(i int, e *colly.HTMLElement) {
- W.Discounted_price, err = sanitize_price(e.ChildText(".price"))
- if err != nil {
- log.Fatal(err)
- }
- })
- })
-
- /*
- * colly does not parse a <noscript>, thus we are reading the content and parse it as html.
- */
- img_link_noisy := e.ChildText(".product-image")
-
- doc, err := goquery.NewDocumentFromReader(strings.NewReader(img_link_noisy))
- if err != nil {
- log.Fatal(err)
- }
- W.Image_url, _ = doc.Find("img").Attr("src")
-
- W.Shop = shop.Id
- W.Spirit_type = "Whisky"
-
- Whiskys = append(Whiskys, W)
- })
-
- c.Visit("https://whic.de/angebote")
-
- return Whiskys
-}