diff options
| author | Max | 2019-04-17 09:44:56 +0200 |
|---|---|---|
| committer | Max | 2019-04-17 09:44:56 +0200 |
| commit | 4dc18e3691127e058833fd9c7a5bbee333c3a66c (patch) | |
| tree | cfc6829bea0bf05854beb5a6a4b78eb1899c0b59 /scrape.go | |
| download | ghrss-4dc18e3691127e058833fd9c7a5bbee333c3a66c.tar.gz | |
Initial commit.
Diffstat (limited to 'scrape.go')
| -rw-r--r-- | scrape.go | 84 |
1 files changed, 84 insertions, 0 deletions
diff --git a/scrape.go b/scrape.go new file mode 100644 index 0000000..ebb02fa --- /dev/null +++ b/scrape.go @@ -0,0 +1,84 @@ +package main + +import ( + "time" + + log "github.com/Sirupsen/logrus" + "github.com/gocolly/colly" +) + +func (app *App) ScrapeHTML(platforms []Platform) { + + wait := make(chan bool) + count := 0 + + for _, platform := range platforms { + + go app.Scrape(platform, wait) + count++ + + } + + // Wait until all go routines finished + for i := 0; i < count; i++ { + <-wait + } +} + +func (app *App) Scrape(platform Platform, wait chan bool) { + var Entries []Entry + var err error + + // retry on error + for i := 1; i < 4; i++ { + Entries = app.ScrapePlatform(platform) + + if len(Entries) >= 1 { + break + } + } + + // if no results, return early + if len(Entries) == 0 { + wait <- true + return + + } + + err = app.SaveEntries(Entries) + if err != nil { + Warn(err, "Saving entries failed. Platform: "+platform.Name) + } + + wait <- true +} + +func (app *App) ScrapePlatform(platform Platform) []Entry { + + switch platform.Name { + case "Github": + return app.ScrapeGithub(platform) + default: + log.Println(platform.Name + ": No Crawler") + } + + return []Entry{} +} + +/* + * Sets the crawler config. + */ +func (app *App) customCollector(allowed_urls []string) *colly.Collector { + c := colly.NewCollector( + colly.UserAgent(app.Config.UserAgent), + colly.AllowedDomains(allowed_urls...), + ) + c.IgnoreRobotsTxt = app.Config.IgnoreRobotsTXT + + c.Limit(&colly.LimitRule{ + DomainGlob: "*", + RandomDelay: time.Duration(app.Config.Delay) * time.Second, + }) + + return c +} |
