diff options
| author | horus_arch | 2018-02-04 20:18:27 +0100 |
|---|---|---|
| committer | horus_arch | 2018-02-04 20:18:27 +0100 |
| commit | fe12a1d41d74ac55dc3c1b27821375541ee5f2b2 (patch) | |
| tree | 57f841f57fe8405e0461d7fd5a9c08787caa0cca | |
| parent | 0687b4217abf0c278bcab50de4edafec76da4a91 (diff) | |
| download | alkobote-fe12a1d41d74ac55dc3c1b27821375541ee5f2b2.tar.gz | |
Structures the crawler for bottleworld and mcwhisky.
| -rw-r--r-- | bottleworld.go | 33 | ||||
| -rw-r--r-- | main.go | 20 | ||||
| -rw-r--r-- | mcwhisky.go | 34 | ||||
| -rw-r--r-- | sanitize_name.go | 13 |
4 files changed, 73 insertions, 27 deletions
diff --git a/bottleworld.go b/bottleworld.go index edba866..55ee47c 100644 --- a/bottleworld.go +++ b/bottleworld.go @@ -1,7 +1,6 @@ package main import ( - "fmt" "log" "regexp" // "strings" @@ -10,13 +9,17 @@ import ( "github.com/gocolly/colly" ) -func ScrapeBottleWord() { +func ScrapeBottleWord() []Angebot { + Whiskys := []Angebot{} + c := colly.NewCollector( colly.AllowedDomains("bottleworld.de"), colly.AllowedDomains("www.bottleworld.de"), ) c.OnHTML("li.item", func(e *colly.HTMLElement) { + W := Angebot{} + whisky_name := e.ChildText("h2 > a") matched, err := regexp.MatchString("Whiske?y", whisky_name) @@ -24,25 +27,39 @@ func ScrapeBottleWord() { log.Fatal(err) } if !matched { + //W.Spirit_type = "Anderes" return + } else { + W.Spirit_type = "Whisky" } whisky_url := e.ChildAttr("a", "href") - log.Println(whisky_name) - log.Println(whisky_url) + W.Name = whisky_name + W.Url = whisky_url e.ForEach(".price-box", func(i int, e *colly.HTMLElement) { e.ForEach(".old-price", func(i int, e *colly.HTMLElement) { - log.Println(e.ChildText(".price")) + W.Original_price, err = sanitize_price(e.ChildText(".price")) + if err != nil { + log.Fatal(err) + } }) e.ForEach(".special-price", func(i int, e *colly.HTMLElement) { - log.Println(e.ChildText(".price")) + W.Discounted_price, err = sanitize_price(e.ChildText(".price")) + if err != nil { + log.Fatal(err) + } }) }) - log.Println(e.ChildAttr("img", "src")) + W.Image_url = e.ChildAttr("img", "src") - fmt.Println("") + W.Shop = "bottleworld.de" + W.Spirit_type = "Whisky" + + Whiskys = append(Whiskys, W) }) c.Visit("https://www.bottleworld.de/aktuelle-sonderpreise/show/all") + + return Whiskys } @@ -19,19 +19,19 @@ type Angebot struct { func main() { - /* - printName("BottleWorld") - ScrapeBottleWord() + W := ScrapeBottleWord() + printName(W, "BottleWorld") - printName("MC Whisky") - ScrapeMCWhisky() + W = ScrapeMCWhisky() + printName(W, "MC Whisky") - printName("Whic") - ScrapeWhic() - */ + /* + printName("Whic") + ScrapeWhic() - W := ScrapeWhiskyde() - printName(W, "Whisky.de") + W = ScrapeWhiskyde() + printName(W, "Whisky.de") + */ /* printName("Whiskysite.nl") diff --git a/mcwhisky.go b/mcwhisky.go index 142d65a..b50b4e5 100644 --- a/mcwhisky.go +++ b/mcwhisky.go @@ -1,7 +1,6 @@ package main import ( - "fmt" "log" // "strings" @@ -9,30 +8,47 @@ import ( "github.com/gocolly/colly" ) -func ScrapeMCWhisky() { +func ScrapeMCWhisky() []Angebot { + Whiskys := []Angebot{} + c := colly.NewCollector( colly.AllowedDomains("mcwhisky.com"), colly.AllowedDomains("www.mcwhisky.com"), ) c.OnHTML("li.item", func(e *colly.HTMLElement) { - whisky_name := e.ChildAttr("a", "title") + W := Angebot{} + + whisky_name := sanitize_name(e.ChildAttr("a", "title")) whisky_url := e.ChildAttr("a", "href") - log.Println(whisky_name) - log.Println(whisky_url) + W.Name = whisky_name + W.Url = whisky_url + + var err error e.ForEach(".price-box", func(i int, e *colly.HTMLElement) { e.ForEach(".old-price", func(i int, e *colly.HTMLElement) { - log.Println(e.ChildText(".price")) + W.Original_price, err = sanitize_price(e.ChildText(".price")) + if err != nil { + log.Fatal(err) + } }) e.ForEach(".special-price", func(i int, e *colly.HTMLElement) { - log.Println(e.ChildText(".price")) + W.Discounted_price, err = sanitize_price(e.ChildText(".price")) + if err != nil { + log.Fatal(err) + } }) }) - log.Println(e.ChildAttr("img", "src")) + W.Image_url = e.ChildAttr("img", "src") - fmt.Println("") + W.Shop = "MC Whisky" + W.Spirit_type = "Whisky" + + Whiskys = append(Whiskys, W) }) c.Visit("https://www.mcwhisky.com/whisky/whisky-sonderangebote.html") + + return Whiskys } diff --git a/sanitize_name.go b/sanitize_name.go new file mode 100644 index 0000000..73b2714 --- /dev/null +++ b/sanitize_name.go @@ -0,0 +1,13 @@ +package main + +import ( + "strings" +) + +func sanitize_name(name string) string { + if strings.Contains(name, "y.o.") { + name = strings.Replace(name, "y.o.", "Jahre", 1) + } + + return name +} |
