summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--database.go2
-rw-r--r--github.go73
-rw-r--r--main.go7
-rw-r--r--scrape.go6
4 files changed, 81 insertions, 7 deletions
diff --git a/database.go b/database.go
index 044fa08..7d56018 100644
--- a/database.go
+++ b/database.go
@@ -69,7 +69,7 @@ CREATE TABLE IF NOT EXISTS entry (
INSERT IGNORE INTO platform (id, name, url) VALUES (
NULL,
"Github",
- "https://github.com/trending"
+ "https://github.com/"
);
`
_, err = app.DB.Exec(init_platform_query)
diff --git a/github.go b/github.go
index f673d1d..410d847 100644
--- a/github.go
+++ b/github.go
@@ -1,5 +1,76 @@
package main
+import (
+ "strconv"
+ "strings"
+
+ "github.com/gocolly/colly"
+)
+
func (app *App) ScrapeGithub(platform Platform) []Entry {
- return []Entry{}
+ var err error
+
+ URL := platform.URL
+ Languages := app.GetLanguages()
+ UpdatePeriods := app.GetUpdatePeriods()
+ current_language := Language{}
+ current_update_period := UpdatePeriod{}
+
+ Entries := []Entry{}
+
+ c := app.customCollector([]string{"www.github.com", "github.com"})
+
+ c.OnHTML("ol.repo-list > li", func(e *colly.HTMLElement) {
+ entry := Entry{}
+ owner := Owner{}
+
+ e.ForEach("div > h3", func(i int, e *colly.HTMLElement) {
+ entry.URL = URL + e.ChildAttr("a", "href")
+ entry.Title = e.ChildText("a")
+ owner.Name = strings.TrimSuffix(e.ChildText("a > span"), " /")
+ })
+
+ e.ForEach("div.py-1", func(i int, e *colly.HTMLElement) {
+ entry.Synopsis = e.ChildText("p")
+ })
+
+ e.ForEach("div.text-gray", func(i int, e *colly.HTMLElement) {
+ if i == 0 {
+ entry.Stars, err = strconv.Atoi(e.ChildText("a.muted-text"))
+ if err != nil {
+ Warn(err, "Github: Extracting stars from "+entry.Title+" failed")
+ }
+ }
+ })
+
+ owner.Platform = &platform
+ owner.URL = URL + owner.Name
+
+ entry.Owner = &owner
+ entry.Platform = &platform
+ entry.Language = &current_language
+ entry.UpdatePeriod = &current_update_period
+ entry.Created_At = app.Now
+
+ Entries = append(Entries, entry)
+ })
+
+ for _, l := range Languages {
+
+ current_language = l
+
+ for _, t := range UpdatePeriods {
+
+ current_update_period = t
+
+ CURRENT_URL := URL + "/trending/" + l.Name + "?since=" + t.Name
+
+ err := c.Visit(CURRENT_URL)
+ if err != nil {
+ Warn(err, "Scraping Platform "+platform.Name+" failed with URL: "+CURRENT_URL)
+ }
+ }
+ }
+
+ return Entries
}
diff --git a/main.go b/main.go
index 39fbe49..0459675 100644
--- a/main.go
+++ b/main.go
@@ -12,7 +12,7 @@ import (
type App struct {
Config *Config
DB *sqlx.DB
- Now int64
+ Now time.Time
Debug bool
}
@@ -25,7 +25,7 @@ func main() {
// overwrite the global
_conf = Config{}
- app.Now = time.Now().Unix()
+ app.Now = time.Now()
log.Debug(fmt.Sprintf(`Connecting to "%s" database "%s" as user "%s" on host "%s:%s" with extra options "%s".`, app.Config.DBDriver, app.Config.DBDBName, app.Config.DBUser, app.Config.DBHost, app.Config.DBPort, app.Config.DBOptions))
@@ -44,4 +44,7 @@ func main() {
Fatal(err, "Creating table failed")
}
+ platforms := app.GetPlatforms()
+ app.Scrape(platforms)
+
}
diff --git a/scrape.go b/scrape.go
index ebb02fa..3b7887b 100644
--- a/scrape.go
+++ b/scrape.go
@@ -7,14 +7,14 @@ import (
"github.com/gocolly/colly"
)
-func (app *App) ScrapeHTML(platforms []Platform) {
+func (app *App) Scrape(platforms []Platform) {
wait := make(chan bool)
count := 0
for _, platform := range platforms {
- go app.Scrape(platform, wait)
+ go app.ScrapeHTML(platform, wait)
count++
}
@@ -25,7 +25,7 @@ func (app *App) ScrapeHTML(platforms []Platform) {
}
}
-func (app *App) Scrape(platform Platform, wait chan bool) {
+func (app *App) ScrapeHTML(platform Platform, wait chan bool) {
var Entries []Entry
var err error