diff options
| author | Max | 2018-06-15 19:38:04 +0200 |
|---|---|---|
| committer | Max | 2018-06-15 19:38:04 +0200 |
| commit | 0026ba55f03c5378d5773459fcdd7c6931ff42a5 (patch) | |
| tree | c0a6c9b4fd542e4177ce99159e101c5187a56f02 | |
| parent | b3b35a1706cd99e0978147a4d1b841381cf48348 (diff) | |
| download | alkobote-0026ba55f03c5378d5773459fcdd7c6931ff42a5.tar.gz | |
Introduces central crawler config. (crawler)
| -rw-r--r-- | crawler/shop_bottleworld.go | 5 | ||||
| -rw-r--r-- | crawler/shop_mcwhisky.go | 5 | ||||
| -rw-r--r-- | crawler/shop_rumundco.go | 5 | ||||
| -rw-r--r-- | crawler/shop_whic.go | 4 | ||||
| -rw-r--r-- | crawler/shop_whiskyde.go | 5 | ||||
| -rw-r--r-- | crawler/shop_whiskysitenl.go | 5 | ||||
| -rw-r--r-- | crawler/shop_whiskyworld.go | 6 | ||||
| -rw-r--r-- | crawler/shop_whiskyzone.go | 5 | ||||
| -rw-r--r-- | crawler/utility.go | 6 |
9 files changed, 14 insertions, 32 deletions
diff --git a/crawler/shop_bottleworld.go b/crawler/shop_bottleworld.go index b92896d..de9fe13 100644 --- a/crawler/shop_bottleworld.go +++ b/crawler/shop_bottleworld.go @@ -12,10 +12,7 @@ func (app *App) ScrapeBottleWord(shop Shop) []Angebot { Shop_url := "https://www.bottleworld.de/aktuelle-sonderpreise/show/all" Whiskys := []Angebot{} - c := colly.NewCollector( - colly.AllowedDomains("bottleworld.de"), - colly.AllowedDomains("www.bottleworld.de"), - ) + c := customCollector([]string{"bottleworld.de", "www.bottleworld.de"}) c.OnHTML("li.item", func(e *colly.HTMLElement) { W := Angebot{} diff --git a/crawler/shop_mcwhisky.go b/crawler/shop_mcwhisky.go index cea020a..ef780a9 100644 --- a/crawler/shop_mcwhisky.go +++ b/crawler/shop_mcwhisky.go @@ -11,10 +11,7 @@ func (app *App) ScrapeMCWhisky(shop Shop) []Angebot { Whiskys := []Angebot{} - c := colly.NewCollector( - colly.AllowedDomains("mcwhisky.com"), - colly.AllowedDomains("www.mcwhisky.com"), - ) + c := customCollector([]string{"mcwhisky.com", "www.mcwhisky.com"}) c.OnHTML("li.item", func(e *colly.HTMLElement) { diff --git a/crawler/shop_rumundco.go b/crawler/shop_rumundco.go index 1ce202f..4b72c08 100644 --- a/crawler/shop_rumundco.go +++ b/crawler/shop_rumundco.go @@ -14,10 +14,7 @@ func (app *App) ScrapeRumundCo(shop Shop) []Angebot { Whiskys := []Angebot{} - c := colly.NewCollector( - colly.AllowedDomains("rumundco.de"), - colly.AllowedDomains("www.rumundco.de"), - ) + c := customCollector([]string{"rumundco.de", "www.rumundco.de"}) c.OnHTML(".product-teaser", func(e *colly.HTMLElement) { diff --git a/crawler/shop_whic.go b/crawler/shop_whic.go index af86bdc..2d0170b 100644 --- a/crawler/shop_whic.go +++ b/crawler/shop_whic.go @@ -12,9 +12,7 @@ func (app *App) ScrapeWhic(shop Shop) []Angebot { Shop_url := "https://whic.de/angebote" Whiskys := []Angebot{} - c := colly.NewCollector( - colly.AllowedDomains("whic.de"), - ) + c := customCollector([]string{"whic.de"}) c.OnHTML("li.item", func(e *colly.HTMLElement) { diff --git a/crawler/shop_whiskyde.go b/crawler/shop_whiskyde.go index 7117d71..9e061ac 100644 --- a/crawler/shop_whiskyde.go +++ b/crawler/shop_whiskyde.go @@ -11,10 +11,7 @@ func (app *App) ScrapeWhiskyde(shop Shop) []Angebot { Whiskys := []Angebot{} - c := colly.NewCollector( - colly.AllowedDomains("whisky.de"), - colly.AllowedDomains("www.whisky.de"), - ) + c := customCollector([]string{"whisky.de", "www.whisky.de"}) c.OnHTML(".is-buyable", func(e *colly.HTMLElement) { diff --git a/crawler/shop_whiskysitenl.go b/crawler/shop_whiskysitenl.go index f1b667c..4dad313 100644 --- a/crawler/shop_whiskysitenl.go +++ b/crawler/shop_whiskysitenl.go @@ -13,10 +13,7 @@ func (app *App) ScrapeWhiskysitenl(shop Shop) []Angebot { Shop_url := "https://www.whiskysite.nl/en/specials/?limit=100" - c := colly.NewCollector( - colly.AllowedDomains("whiskysite.nl"), - colly.AllowedDomains("www.whiskysite.nl"), - ) + c := customCollector([]string{"whiskysite.nl", "www.whiskysite.nl"}) c.OnHTML(".product-block", func(e *colly.HTMLElement) { diff --git a/crawler/shop_whiskyworld.go b/crawler/shop_whiskyworld.go index f617ebb..7b57d37 100644 --- a/crawler/shop_whiskyworld.go +++ b/crawler/shop_whiskyworld.go @@ -15,11 +15,7 @@ func (app *App) ScrapeWhiskyworld(shop Shop) []Angebot { Whiskys := []Angebot{} - c := colly.NewCollector( - colly.UserAgent("friendly"), - colly.AllowedDomains("whiskyworld.de"), - colly.AllowedDomains("www.whiskyworld.de"), - ) + c := customCollector([]string{"whiskyworld.de", "www.whiskyworld.de"}) c.OnHTML(".product-item", func(e *colly.HTMLElement) { if !stringInSlice(e.Request.URL.String(), Shop_urls) { diff --git a/crawler/shop_whiskyzone.go b/crawler/shop_whiskyzone.go index 2c1fb99..4dc825a 100644 --- a/crawler/shop_whiskyzone.go +++ b/crawler/shop_whiskyzone.go @@ -13,10 +13,7 @@ func (app *App) ScrapeWhiskyzone(shop Shop) []Angebot { Whiskys := []Angebot{} - c := colly.NewCollector( - colly.AllowedDomains("whiskyzone.de"), - colly.AllowedDomains("www.whiskyzone.de"), - ) + c := customCollector([]string{"whiskyzone.de", "www.whiskyzone.de"}) c.OnHTML(".product--info", func(e *colly.HTMLElement) { diff --git a/crawler/utility.go b/crawler/utility.go index 5fa78c4..e0acf3f 100644 --- a/crawler/utility.go +++ b/crawler/utility.go @@ -10,6 +10,12 @@ import ( "github.com/gocolly/colly" ) +func customCollector(allowed_urls []string) *colly.Collector { + return colly.NewCollector( + colly.AllowedDomains(allowed_urls...), + ) +} + func stringInSlice(a string, list []string) bool { for _, b := range list { if b == a { |
