summaryrefslogtreecommitdiff
path: root/github.go
diff options
context:
space:
mode:
authorMaximilian2019-04-17 10:33:39 +0200
committerMaximilian2019-04-17 10:33:39 +0200
commit6a771c754a9ede42fa9bff743087510a2168cf59 (patch)
tree1e0d5ba00d93e4d53fceb97fc3990ce49dfa712e /github.go
parent4dc18e3691127e058833fd9c7a5bbee333c3a66c (diff)
downloadghrss-6a771c754a9ede42fa9bff743087510a2168cf59.tar.gz
Adds crawler for Github.
Diffstat (limited to 'github.go')
-rw-r--r--github.go73
1 files changed, 72 insertions, 1 deletions
diff --git a/github.go b/github.go
index f673d1d..410d847 100644
--- a/github.go
+++ b/github.go
@@ -1,5 +1,76 @@
package main
+import (
+ "strconv"
+ "strings"
+
+ "github.com/gocolly/colly"
+)
+
func (app *App) ScrapeGithub(platform Platform) []Entry {
- return []Entry{}
+ var err error
+
+ URL := platform.URL
+ Languages := app.GetLanguages()
+ UpdatePeriods := app.GetUpdatePeriods()
+ current_language := Language{}
+ current_update_period := UpdatePeriod{}
+
+ Entries := []Entry{}
+
+ c := app.customCollector([]string{"www.github.com", "github.com"})
+
+ c.OnHTML("ol.repo-list > li", func(e *colly.HTMLElement) {
+ entry := Entry{}
+ owner := Owner{}
+
+ e.ForEach("div > h3", func(i int, e *colly.HTMLElement) {
+ entry.URL = URL + e.ChildAttr("a", "href")
+ entry.Title = e.ChildText("a")
+ owner.Name = strings.TrimSuffix(e.ChildText("a > span"), " /")
+ })
+
+ e.ForEach("div.py-1", func(i int, e *colly.HTMLElement) {
+ entry.Synopsis = e.ChildText("p")
+ })
+
+ e.ForEach("div.text-gray", func(i int, e *colly.HTMLElement) {
+ if i == 0 {
+ entry.Stars, err = strconv.Atoi(e.ChildText("a.muted-text"))
+ if err != nil {
+ Warn(err, "Github: Extracting stars from "+entry.Title+" failed")
+ }
+ }
+ })
+
+ owner.Platform = &platform
+ owner.URL = URL + owner.Name
+
+ entry.Owner = &owner
+ entry.Platform = &platform
+ entry.Language = &current_language
+ entry.UpdatePeriod = &current_update_period
+ entry.Created_At = app.Now
+
+ Entries = append(Entries, entry)
+ })
+
+ for _, l := range Languages {
+
+ current_language = l
+
+ for _, t := range UpdatePeriods {
+
+ current_update_period = t
+
+ CURRENT_URL := URL + "/trending/" + l.Name + "?since=" + t.Name
+
+ err := c.Visit(CURRENT_URL)
+ if err != nil {
+ Warn(err, "Scraping Platform "+platform.Name+" failed with URL: "+CURRENT_URL)
+ }
+ }
+ }
+
+ return Entries
}