diff options
| author | horus | 2019-04-17 12:00:48 +0200 |
|---|---|---|
| committer | horus | 2019-04-17 12:00:48 +0200 |
| commit | 421a7232e7f0436705b9f2819cd51227ac6cd821 (patch) | |
| tree | 1692db806bcba83a57a5fcaea02796991a90c700 /github.go | |
| parent | deea79c010ec7db7a374f23ca5cdabc6e45433db (diff) | |
| download | ghrss-421a7232e7f0436705b9f2819cd51227ac6cd821.tar.gz | |
v0.1
Diffstat (limited to 'github.go')
| -rw-r--r-- | github.go | 38 |
1 files changed, 30 insertions, 8 deletions
@@ -4,10 +4,12 @@ import ( "strconv" "strings" + log "github.com/Sirupsen/logrus" "github.com/gocolly/colly" ) func (app *App) ScrapeGithub(platform Platform) []Entry { + var err error URL := platform.URL @@ -26,32 +28,51 @@ func (app *App) ScrapeGithub(platform Platform) []Entry { e.ForEach("div > h3", func(i int, e *colly.HTMLElement) { entry.URL = URL + e.ChildAttr("a", "href") - entry.Title = e.ChildText("a") owner.Name = strings.TrimSuffix(e.ChildText("a > span"), " /") + owner.Name = strings.TrimSpace(owner.Name) + entry.Title = strings.TrimPrefix(e.ChildText("a"), owner.Name+" /") + entry.Title = strings.TrimSpace(entry.Title) }) e.ForEach("div.py-1", func(i int, e *colly.HTMLElement) { entry.Synopsis = e.ChildText("p") }) - e.ForEach("div.text-gray", func(i int, e *colly.HTMLElement) { - if i == 0 { - entry.Stars, err = strconv.Atoi(e.ChildText("a.muted-text")) + e.ForEach("div.text-gray > a.muted-link", func(i int, e *colly.HTMLElement) { + if strings.Contains(e.Attr("href"), "stargazers") { + stars := strings.TrimSpace(strings.Replace(e.Text, ",", "", -1)) + entry.Stars, err = strconv.Atoi(stars) if err != nil { Warn(err, "Github: Extracting stars from "+entry.Title+" failed") } } }) + l := Language{} + l.ID = current_language.ID + l.Name = current_language.Name + + p := Platform{} + p.ID = platform.ID + p.Name = platform.Name + p.URL = platform.URL + + u := UpdatePeriod{} + u.ID = current_update_period.ID + u.Name = current_update_period.Name + owner.Platform = &platform owner.URL = URL + owner.Name entry.Owner = &owner - entry.Platform = &platform - entry.Language = ¤t_language - entry.UpdatePeriod = ¤t_update_period + entry.Platform = &p + entry.Language = &l + entry.UpdatePeriod = &u entry.Created_At = app.Now + log.Debugf("%+v\n", owner) + log.Debugf("%+v\n", entry) + Entries = append(Entries, entry) }) @@ -63,7 +84,8 @@ func (app *App) ScrapeGithub(platform Platform) []Entry { current_update_period = t - CURRENT_URL := URL + "/trending/" + l.Name + "?since=" + t.Name + CURRENT_URL := platform.URL + "/trending/" + l.Name + "?since=" + t.Name + log.Println("Crawling " + CURRENT_URL) err := c.Visit(CURRENT_URL) if err != nil { |
