summaryrefslogtreecommitdiff
path: root/crawler
diff options
context:
space:
mode:
authorMax2018-06-15 19:38:04 +0200
committerMax2018-06-15 19:38:04 +0200
commit0026ba55f03c5378d5773459fcdd7c6931ff42a5 (patch)
treec0a6c9b4fd542e4177ce99159e101c5187a56f02 /crawler
parentb3b35a1706cd99e0978147a4d1b841381cf48348 (diff)
downloadalkobote-0026ba55f03c5378d5773459fcdd7c6931ff42a5.tar.gz
Introduces central crawler config. (crawler)
Diffstat (limited to 'crawler')
-rw-r--r--crawler/shop_bottleworld.go5
-rw-r--r--crawler/shop_mcwhisky.go5
-rw-r--r--crawler/shop_rumundco.go5
-rw-r--r--crawler/shop_whic.go4
-rw-r--r--crawler/shop_whiskyde.go5
-rw-r--r--crawler/shop_whiskysitenl.go5
-rw-r--r--crawler/shop_whiskyworld.go6
-rw-r--r--crawler/shop_whiskyzone.go5
-rw-r--r--crawler/utility.go6
9 files changed, 14 insertions, 32 deletions
diff --git a/crawler/shop_bottleworld.go b/crawler/shop_bottleworld.go
index b92896d..de9fe13 100644
--- a/crawler/shop_bottleworld.go
+++ b/crawler/shop_bottleworld.go
@@ -12,10 +12,7 @@ func (app *App) ScrapeBottleWord(shop Shop) []Angebot {
Shop_url := "https://www.bottleworld.de/aktuelle-sonderpreise/show/all"
Whiskys := []Angebot{}
- c := colly.NewCollector(
- colly.AllowedDomains("bottleworld.de"),
- colly.AllowedDomains("www.bottleworld.de"),
- )
+ c := customCollector([]string{"bottleworld.de", "www.bottleworld.de"})
c.OnHTML("li.item", func(e *colly.HTMLElement) {
W := Angebot{}
diff --git a/crawler/shop_mcwhisky.go b/crawler/shop_mcwhisky.go
index cea020a..ef780a9 100644
--- a/crawler/shop_mcwhisky.go
+++ b/crawler/shop_mcwhisky.go
@@ -11,10 +11,7 @@ func (app *App) ScrapeMCWhisky(shop Shop) []Angebot {
Whiskys := []Angebot{}
- c := colly.NewCollector(
- colly.AllowedDomains("mcwhisky.com"),
- colly.AllowedDomains("www.mcwhisky.com"),
- )
+ c := customCollector([]string{"mcwhisky.com", "www.mcwhisky.com"})
c.OnHTML("li.item", func(e *colly.HTMLElement) {
diff --git a/crawler/shop_rumundco.go b/crawler/shop_rumundco.go
index 1ce202f..4b72c08 100644
--- a/crawler/shop_rumundco.go
+++ b/crawler/shop_rumundco.go
@@ -14,10 +14,7 @@ func (app *App) ScrapeRumundCo(shop Shop) []Angebot {
Whiskys := []Angebot{}
- c := colly.NewCollector(
- colly.AllowedDomains("rumundco.de"),
- colly.AllowedDomains("www.rumundco.de"),
- )
+ c := customCollector([]string{"rumundco.de", "www.rumundco.de"})
c.OnHTML(".product-teaser", func(e *colly.HTMLElement) {
diff --git a/crawler/shop_whic.go b/crawler/shop_whic.go
index af86bdc..2d0170b 100644
--- a/crawler/shop_whic.go
+++ b/crawler/shop_whic.go
@@ -12,9 +12,7 @@ func (app *App) ScrapeWhic(shop Shop) []Angebot {
Shop_url := "https://whic.de/angebote"
Whiskys := []Angebot{}
- c := colly.NewCollector(
- colly.AllowedDomains("whic.de"),
- )
+ c := customCollector([]string{"whic.de"})
c.OnHTML("li.item", func(e *colly.HTMLElement) {
diff --git a/crawler/shop_whiskyde.go b/crawler/shop_whiskyde.go
index 7117d71..9e061ac 100644
--- a/crawler/shop_whiskyde.go
+++ b/crawler/shop_whiskyde.go
@@ -11,10 +11,7 @@ func (app *App) ScrapeWhiskyde(shop Shop) []Angebot {
Whiskys := []Angebot{}
- c := colly.NewCollector(
- colly.AllowedDomains("whisky.de"),
- colly.AllowedDomains("www.whisky.de"),
- )
+ c := customCollector([]string{"whisky.de", "www.whisky.de"})
c.OnHTML(".is-buyable", func(e *colly.HTMLElement) {
diff --git a/crawler/shop_whiskysitenl.go b/crawler/shop_whiskysitenl.go
index f1b667c..4dad313 100644
--- a/crawler/shop_whiskysitenl.go
+++ b/crawler/shop_whiskysitenl.go
@@ -13,10 +13,7 @@ func (app *App) ScrapeWhiskysitenl(shop Shop) []Angebot {
Shop_url := "https://www.whiskysite.nl/en/specials/?limit=100"
- c := colly.NewCollector(
- colly.AllowedDomains("whiskysite.nl"),
- colly.AllowedDomains("www.whiskysite.nl"),
- )
+ c := customCollector([]string{"whiskysite.nl", "www.whiskysite.nl"})
c.OnHTML(".product-block", func(e *colly.HTMLElement) {
diff --git a/crawler/shop_whiskyworld.go b/crawler/shop_whiskyworld.go
index f617ebb..7b57d37 100644
--- a/crawler/shop_whiskyworld.go
+++ b/crawler/shop_whiskyworld.go
@@ -15,11 +15,7 @@ func (app *App) ScrapeWhiskyworld(shop Shop) []Angebot {
Whiskys := []Angebot{}
- c := colly.NewCollector(
- colly.UserAgent("friendly"),
- colly.AllowedDomains("whiskyworld.de"),
- colly.AllowedDomains("www.whiskyworld.de"),
- )
+ c := customCollector([]string{"whiskyworld.de", "www.whiskyworld.de"})
c.OnHTML(".product-item", func(e *colly.HTMLElement) {
if !stringInSlice(e.Request.URL.String(), Shop_urls) {
diff --git a/crawler/shop_whiskyzone.go b/crawler/shop_whiskyzone.go
index 2c1fb99..4dc825a 100644
--- a/crawler/shop_whiskyzone.go
+++ b/crawler/shop_whiskyzone.go
@@ -13,10 +13,7 @@ func (app *App) ScrapeWhiskyzone(shop Shop) []Angebot {
Whiskys := []Angebot{}
- c := colly.NewCollector(
- colly.AllowedDomains("whiskyzone.de"),
- colly.AllowedDomains("www.whiskyzone.de"),
- )
+ c := customCollector([]string{"whiskyzone.de", "www.whiskyzone.de"})
c.OnHTML(".product--info", func(e *colly.HTMLElement) {
diff --git a/crawler/utility.go b/crawler/utility.go
index 5fa78c4..e0acf3f 100644
--- a/crawler/utility.go
+++ b/crawler/utility.go
@@ -10,6 +10,12 @@ import (
"github.com/gocolly/colly"
)
+func customCollector(allowed_urls []string) *colly.Collector {
+ return colly.NewCollector(
+ colly.AllowedDomains(allowed_urls...),
+ )
+}
+
func stringInSlice(a string, list []string) bool {
for _, b := range list {
if b == a {