diff options
Diffstat (limited to 'whic.go')
| -rw-r--r-- | whic.go | 51 |
1 files changed, 51 insertions, 0 deletions
@@ -0,0 +1,51 @@ +package main + +import ( + "fmt" + "log" + "strings" + + "github.com/PuerkitoBio/goquery" + "github.com/gocolly/colly" +) + +func ScrapeWhic() { + c := colly.NewCollector( + colly.AllowedDomains("whic.de"), + ) + + c.OnHTML("li.item", func(e *colly.HTMLElement) { + whisky_name := e.ChildAttr("a", "title") + whisky_url := e.ChildAttr("a", "href") + log.Println(whisky_name) + log.Println(whisky_url) + + e.ForEach(".price-box", func(i int, e *colly.HTMLElement) { + e.ForEach(".old-price", func(i int, e *colly.HTMLElement) { + log.Println(e.ChildText(".price")) + }) + e.ForEach(".special-price", func(i int, e *colly.HTMLElement) { + log.Println(e.ChildText(".price")) + }) + }) + + /* + * colly does not parse a <noscript>, thus we are reading the content and parse it as html. + */ + img_link_noisy := e.ChildText(".product-image") + + doc, err := goquery.NewDocumentFromReader(strings.NewReader(img_link_noisy)) + if err != nil { + log.Fatal(err) + } + log.Println(doc.Find("img").Attr("src")) + + fmt.Println("") + }) + + c.Visit("https://whic.de/angebote") +} + +func main() { + ScrapeWhic() +} |
