diff options
Diffstat (limited to 'github.go')
| -rw-r--r-- | github.go | 38 |
1 files changed, 30 insertions, 8 deletions
@@ -4,10 +4,12 @@ import ( "strconv" "strings" + log "github.com/Sirupsen/logrus" "github.com/gocolly/colly" ) func (app *App) ScrapeGithub(platform Platform) []Entry { + var err error URL := platform.URL @@ -26,32 +28,51 @@ func (app *App) ScrapeGithub(platform Platform) []Entry { e.ForEach("div > h3", func(i int, e *colly.HTMLElement) { entry.URL = URL + e.ChildAttr("a", "href") - entry.Title = e.ChildText("a") owner.Name = strings.TrimSuffix(e.ChildText("a > span"), " /") + owner.Name = strings.TrimSpace(owner.Name) + entry.Title = strings.TrimPrefix(e.ChildText("a"), owner.Name+" /") + entry.Title = strings.TrimSpace(entry.Title) }) e.ForEach("div.py-1", func(i int, e *colly.HTMLElement) { entry.Synopsis = e.ChildText("p") }) - e.ForEach("div.text-gray", func(i int, e *colly.HTMLElement) { - if i == 0 { - entry.Stars, err = strconv.Atoi(e.ChildText("a.muted-text")) + e.ForEach("div.text-gray > a.muted-link", func(i int, e *colly.HTMLElement) { + if strings.Contains(e.Attr("href"), "stargazers") { + stars := strings.TrimSpace(strings.Replace(e.Text, ",", "", -1)) + entry.Stars, err = strconv.Atoi(stars) if err != nil { Warn(err, "Github: Extracting stars from "+entry.Title+" failed") } } }) + l := Language{} + l.ID = current_language.ID + l.Name = current_language.Name + + p := Platform{} + p.ID = platform.ID + p.Name = platform.Name + p.URL = platform.URL + + u := UpdatePeriod{} + u.ID = current_update_period.ID + u.Name = current_update_period.Name + owner.Platform = &platform owner.URL = URL + owner.Name entry.Owner = &owner - entry.Platform = &platform - entry.Language = ¤t_language - entry.UpdatePeriod = ¤t_update_period + entry.Platform = &p + entry.Language = &l + entry.UpdatePeriod = &u entry.Created_At = app.Now + log.Debugf("%+v\n", owner) + log.Debugf("%+v\n", entry) + Entries = append(Entries, entry) }) @@ -63,7 +84,8 @@ func (app *App) ScrapeGithub(platform Platform) []Entry { current_update_period = t - CURRENT_URL := URL + "/trending/" + l.Name + "?since=" + t.Name + CURRENT_URL := platform.URL + "/trending/" + l.Name + "?since=" + t.Name + log.Println("Crawling " + CURRENT_URL) err := c.Visit(CURRENT_URL) if err != nil { |
