summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bottleworld.go33
-rw-r--r--main.go20
-rw-r--r--mcwhisky.go34
-rw-r--r--sanitize_name.go13
4 files changed, 73 insertions, 27 deletions
diff --git a/bottleworld.go b/bottleworld.go
index edba866..55ee47c 100644
--- a/bottleworld.go
+++ b/bottleworld.go
@@ -1,7 +1,6 @@
package main
import (
- "fmt"
"log"
"regexp"
// "strings"
@@ -10,13 +9,17 @@ import (
"github.com/gocolly/colly"
)
-func ScrapeBottleWord() {
+func ScrapeBottleWord() []Angebot {
+ Whiskys := []Angebot{}
+
c := colly.NewCollector(
colly.AllowedDomains("bottleworld.de"),
colly.AllowedDomains("www.bottleworld.de"),
)
c.OnHTML("li.item", func(e *colly.HTMLElement) {
+ W := Angebot{}
+
whisky_name := e.ChildText("h2 > a")
matched, err := regexp.MatchString("Whiske?y", whisky_name)
@@ -24,25 +27,39 @@ func ScrapeBottleWord() {
log.Fatal(err)
}
if !matched {
+ //W.Spirit_type = "Anderes"
return
+ } else {
+ W.Spirit_type = "Whisky"
}
whisky_url := e.ChildAttr("a", "href")
- log.Println(whisky_name)
- log.Println(whisky_url)
+ W.Name = whisky_name
+ W.Url = whisky_url
e.ForEach(".price-box", func(i int, e *colly.HTMLElement) {
e.ForEach(".old-price", func(i int, e *colly.HTMLElement) {
- log.Println(e.ChildText(".price"))
+ W.Original_price, err = sanitize_price(e.ChildText(".price"))
+ if err != nil {
+ log.Fatal(err)
+ }
})
e.ForEach(".special-price", func(i int, e *colly.HTMLElement) {
- log.Println(e.ChildText(".price"))
+ W.Discounted_price, err = sanitize_price(e.ChildText(".price"))
+ if err != nil {
+ log.Fatal(err)
+ }
})
})
- log.Println(e.ChildAttr("img", "src"))
+ W.Image_url = e.ChildAttr("img", "src")
- fmt.Println("")
+ W.Shop = "bottleworld.de"
+ W.Spirit_type = "Whisky"
+
+ Whiskys = append(Whiskys, W)
})
c.Visit("https://www.bottleworld.de/aktuelle-sonderpreise/show/all")
+
+ return Whiskys
}
diff --git a/main.go b/main.go
index 3a8efa3..f78a777 100644
--- a/main.go
+++ b/main.go
@@ -19,19 +19,19 @@ type Angebot struct {
func main() {
- /*
- printName("BottleWorld")
- ScrapeBottleWord()
+ W := ScrapeBottleWord()
+ printName(W, "BottleWorld")
- printName("MC Whisky")
- ScrapeMCWhisky()
+ W = ScrapeMCWhisky()
+ printName(W, "MC Whisky")
- printName("Whic")
- ScrapeWhic()
- */
+ /*
+ printName("Whic")
+ ScrapeWhic()
- W := ScrapeWhiskyde()
- printName(W, "Whisky.de")
+ W = ScrapeWhiskyde()
+ printName(W, "Whisky.de")
+ */
/*
printName("Whiskysite.nl")
diff --git a/mcwhisky.go b/mcwhisky.go
index 142d65a..b50b4e5 100644
--- a/mcwhisky.go
+++ b/mcwhisky.go
@@ -1,7 +1,6 @@
package main
import (
- "fmt"
"log"
// "strings"
@@ -9,30 +8,47 @@ import (
"github.com/gocolly/colly"
)
-func ScrapeMCWhisky() {
+func ScrapeMCWhisky() []Angebot {
+ Whiskys := []Angebot{}
+
c := colly.NewCollector(
colly.AllowedDomains("mcwhisky.com"),
colly.AllowedDomains("www.mcwhisky.com"),
)
c.OnHTML("li.item", func(e *colly.HTMLElement) {
- whisky_name := e.ChildAttr("a", "title")
+ W := Angebot{}
+
+ whisky_name := sanitize_name(e.ChildAttr("a", "title"))
whisky_url := e.ChildAttr("a", "href")
- log.Println(whisky_name)
- log.Println(whisky_url)
+ W.Name = whisky_name
+ W.Url = whisky_url
+
+ var err error
e.ForEach(".price-box", func(i int, e *colly.HTMLElement) {
e.ForEach(".old-price", func(i int, e *colly.HTMLElement) {
- log.Println(e.ChildText(".price"))
+ W.Original_price, err = sanitize_price(e.ChildText(".price"))
+ if err != nil {
+ log.Fatal(err)
+ }
})
e.ForEach(".special-price", func(i int, e *colly.HTMLElement) {
- log.Println(e.ChildText(".price"))
+ W.Discounted_price, err = sanitize_price(e.ChildText(".price"))
+ if err != nil {
+ log.Fatal(err)
+ }
})
})
- log.Println(e.ChildAttr("img", "src"))
+ W.Image_url = e.ChildAttr("img", "src")
- fmt.Println("")
+ W.Shop = "MC Whisky"
+ W.Spirit_type = "Whisky"
+
+ Whiskys = append(Whiskys, W)
})
c.Visit("https://www.mcwhisky.com/whisky/whisky-sonderangebote.html")
+
+ return Whiskys
}
diff --git a/sanitize_name.go b/sanitize_name.go
new file mode 100644
index 0000000..73b2714
--- /dev/null
+++ b/sanitize_name.go
@@ -0,0 +1,13 @@
+package main
+
+import (
+ "strings"
+)
+
+func sanitize_name(name string) string {
+ if strings.Contains(name, "y.o.") {
+ name = strings.Replace(name, "y.o.", "Jahre", 1)
+ }
+
+ return name
+}