diff options
| author | Maximilian | 2019-04-17 10:33:39 +0200 |
|---|---|---|
| committer | Maximilian | 2019-04-17 10:33:39 +0200 |
| commit | 6a771c754a9ede42fa9bff743087510a2168cf59 (patch) | |
| tree | 1e0d5ba00d93e4d53fceb97fc3990ce49dfa712e /github.go | |
| parent | 4dc18e3691127e058833fd9c7a5bbee333c3a66c (diff) | |
| download | ghrss-6a771c754a9ede42fa9bff743087510a2168cf59.tar.gz | |
Adds crawler for Github.
Diffstat (limited to 'github.go')
| -rw-r--r-- | github.go | 73 |
1 files changed, 72 insertions, 1 deletions
@@ -1,5 +1,76 @@ package main +import ( + "strconv" + "strings" + + "github.com/gocolly/colly" +) + func (app *App) ScrapeGithub(platform Platform) []Entry { - return []Entry{} + var err error + + URL := platform.URL + Languages := app.GetLanguages() + UpdatePeriods := app.GetUpdatePeriods() + current_language := Language{} + current_update_period := UpdatePeriod{} + + Entries := []Entry{} + + c := app.customCollector([]string{"www.github.com", "github.com"}) + + c.OnHTML("ol.repo-list > li", func(e *colly.HTMLElement) { + entry := Entry{} + owner := Owner{} + + e.ForEach("div > h3", func(i int, e *colly.HTMLElement) { + entry.URL = URL + e.ChildAttr("a", "href") + entry.Title = e.ChildText("a") + owner.Name = strings.TrimSuffix(e.ChildText("a > span"), " /") + }) + + e.ForEach("div.py-1", func(i int, e *colly.HTMLElement) { + entry.Synopsis = e.ChildText("p") + }) + + e.ForEach("div.text-gray", func(i int, e *colly.HTMLElement) { + if i == 0 { + entry.Stars, err = strconv.Atoi(e.ChildText("a.muted-text")) + if err != nil { + Warn(err, "Github: Extracting stars from "+entry.Title+" failed") + } + } + }) + + owner.Platform = &platform + owner.URL = URL + owner.Name + + entry.Owner = &owner + entry.Platform = &platform + entry.Language = ¤t_language + entry.UpdatePeriod = ¤t_update_period + entry.Created_At = app.Now + + Entries = append(Entries, entry) + }) + + for _, l := range Languages { + + current_language = l + + for _, t := range UpdatePeriods { + + current_update_period = t + + CURRENT_URL := URL + "/trending/" + l.Name + "?since=" + t.Name + + err := c.Visit(CURRENT_URL) + if err != nil { + Warn(err, "Scraping Platform "+platform.Name+" failed with URL: "+CURRENT_URL) + } + } + } + + return Entries } |
