package main import ( "strconv" "strings" "github.com/gocolly/colly" ) func (app *App) ScrapeGithub(platform Platform) []Entry { var err error URL := platform.URL Languages := app.GetLanguages() UpdatePeriods := app.GetUpdatePeriods() current_language := Language{} current_update_period := UpdatePeriod{} Entries := []Entry{} c := app.customCollector([]string{"www.github.com", "github.com"}) c.OnHTML("ol.repo-list > li", func(e *colly.HTMLElement) { entry := Entry{} owner := Owner{} e.ForEach("div > h3", func(i int, e *colly.HTMLElement) { entry.URL = URL + e.ChildAttr("a", "href") entry.Title = e.ChildText("a") owner.Name = strings.TrimSuffix(e.ChildText("a > span"), " /") }) e.ForEach("div.py-1", func(i int, e *colly.HTMLElement) { entry.Synopsis = e.ChildText("p") }) e.ForEach("div.text-gray", func(i int, e *colly.HTMLElement) { if i == 0 { entry.Stars, err = strconv.Atoi(e.ChildText("a.muted-text")) if err != nil { Warn(err, "Github: Extracting stars from "+entry.Title+" failed") } } }) owner.Platform = &platform owner.URL = URL + owner.Name entry.Owner = &owner entry.Platform = &platform entry.Language = ¤t_language entry.UpdatePeriod = ¤t_update_period entry.Created_At = app.Now Entries = append(Entries, entry) }) for _, l := range Languages { current_language = l for _, t := range UpdatePeriods { current_update_period = t CURRENT_URL := URL + "/trending/" + l.Name + "?since=" + t.Name err := c.Visit(CURRENT_URL) if err != nil { Warn(err, "Scraping Platform "+platform.Name+" failed with URL: "+CURRENT_URL) } } } return Entries }