summaryrefslogtreecommitdiff
path: root/crawler/scrape.go
diff options
context:
space:
mode:
Diffstat (limited to 'crawler/scrape.go')
-rw-r--r--crawler/scrape.go34
1 files changed, 30 insertions, 4 deletions
diff --git a/crawler/scrape.go b/crawler/scrape.go
index 4bc66e0..6874239 100644
--- a/crawler/scrape.go
+++ b/crawler/scrape.go
@@ -1,7 +1,10 @@
package main
import (
+ "time"
+
log "github.com/Sirupsen/logrus"
+ "github.com/gocolly/colly"
)
func (app *App) ScrapeHTML(shops []Shop) {
@@ -26,10 +29,6 @@ func (app *App) Scrape(shop Shop, wait chan bool) {
var W []Angebot
var err error
- if err != nil {
- Fatal(err, "scrape.go: Starting transaction failed. Shop: "+shop.Name)
- }
-
// retry on error
for i := 1; i < 4; i++ {
W = app.ScrapeShop(shop)
@@ -41,6 +40,13 @@ func (app *App) Scrape(shop Shop, wait chan bool) {
}
}
+ // if no results, return early
+ if len(W) == 0 {
+ wait <- true
+ return
+
+ }
+
err = app.save_offer(W)
if err != nil {
Warn(err, "Saving offers failed. Shop: "+shop.Name)
@@ -72,9 +78,29 @@ func (app *App) ScrapeShop(shop Shop) []Angebot {
return app.ScrapeWhiskyworld(shop)
case "Whiskyzone":
return app.ScrapeWhiskyzone(shop)
+ case "Drankdozijn":
+ return app.ScrapeDrankdozijn(shop)
default:
log.Println(shop.Name + ": No Crawler")
}
return []Angebot{}
}
+
+/*
+ * Sets the crawler config.
+ */
+func (app *App) customCollector(allowed_urls []string) *colly.Collector {
+ c := colly.NewCollector(
+ colly.UserAgent(app.Config.UserAgent),
+ colly.AllowedDomains(allowed_urls...),
+ )
+ c.IgnoreRobotsTxt = app.Config.IgnoreRobotsTXT
+
+ c.Limit(&colly.LimitRule{
+ DomainGlob: "*",
+ RandomDelay: time.Duration(app.Config.Delay) * time.Second,
+ })
+
+ return c
+}