summaryrefslogtreecommitdiff
path: root/github.go
diff options
context:
space:
mode:
authorhorus2019-04-17 12:00:48 +0200
committerhorus2019-04-17 12:00:48 +0200
commit421a7232e7f0436705b9f2819cd51227ac6cd821 (patch)
tree1692db806bcba83a57a5fcaea02796991a90c700 /github.go
parentdeea79c010ec7db7a374f23ca5cdabc6e45433db (diff)
downloadghrss-421a7232e7f0436705b9f2819cd51227ac6cd821.tar.gz
v0.1
Diffstat (limited to 'github.go')
-rw-r--r--github.go38
1 files changed, 30 insertions, 8 deletions
diff --git a/github.go b/github.go
index 410d847..668fd6c 100644
--- a/github.go
+++ b/github.go
@@ -4,10 +4,12 @@ import (
"strconv"
"strings"
+ log "github.com/Sirupsen/logrus"
"github.com/gocolly/colly"
)
func (app *App) ScrapeGithub(platform Platform) []Entry {
+
var err error
URL := platform.URL
@@ -26,32 +28,51 @@ func (app *App) ScrapeGithub(platform Platform) []Entry {
e.ForEach("div > h3", func(i int, e *colly.HTMLElement) {
entry.URL = URL + e.ChildAttr("a", "href")
- entry.Title = e.ChildText("a")
owner.Name = strings.TrimSuffix(e.ChildText("a > span"), " /")
+ owner.Name = strings.TrimSpace(owner.Name)
+ entry.Title = strings.TrimPrefix(e.ChildText("a"), owner.Name+" /")
+ entry.Title = strings.TrimSpace(entry.Title)
})
e.ForEach("div.py-1", func(i int, e *colly.HTMLElement) {
entry.Synopsis = e.ChildText("p")
})
- e.ForEach("div.text-gray", func(i int, e *colly.HTMLElement) {
- if i == 0 {
- entry.Stars, err = strconv.Atoi(e.ChildText("a.muted-text"))
+ e.ForEach("div.text-gray > a.muted-link", func(i int, e *colly.HTMLElement) {
+ if strings.Contains(e.Attr("href"), "stargazers") {
+ stars := strings.TrimSpace(strings.Replace(e.Text, ",", "", -1))
+ entry.Stars, err = strconv.Atoi(stars)
if err != nil {
Warn(err, "Github: Extracting stars from "+entry.Title+" failed")
}
}
})
+ l := Language{}
+ l.ID = current_language.ID
+ l.Name = current_language.Name
+
+ p := Platform{}
+ p.ID = platform.ID
+ p.Name = platform.Name
+ p.URL = platform.URL
+
+ u := UpdatePeriod{}
+ u.ID = current_update_period.ID
+ u.Name = current_update_period.Name
+
owner.Platform = &platform
owner.URL = URL + owner.Name
entry.Owner = &owner
- entry.Platform = &platform
- entry.Language = &current_language
- entry.UpdatePeriod = &current_update_period
+ entry.Platform = &p
+ entry.Language = &l
+ entry.UpdatePeriod = &u
entry.Created_At = app.Now
+ log.Debugf("%+v\n", owner)
+ log.Debugf("%+v\n", entry)
+
Entries = append(Entries, entry)
})
@@ -63,7 +84,8 @@ func (app *App) ScrapeGithub(platform Platform) []Entry {
current_update_period = t
- CURRENT_URL := URL + "/trending/" + l.Name + "?since=" + t.Name
+ CURRENT_URL := platform.URL + "/trending/" + l.Name + "?since=" + t.Name
+ log.Println("Crawling " + CURRENT_URL)
err := c.Visit(CURRENT_URL)
if err != nil {