diff options
| author | horus | 2019-04-17 12:00:48 +0200 |
|---|---|---|
| committer | horus | 2019-04-17 12:00:48 +0200 |
| commit | 421a7232e7f0436705b9f2819cd51227ac6cd821 (patch) | |
| tree | 1692db806bcba83a57a5fcaea02796991a90c700 | |
| parent | deea79c010ec7db7a374f23ca5cdabc6e45433db (diff) | |
| download | ghrss-421a7232e7f0436705b9f2819cd51227ac6cd821.tar.gz | |
v0.1
| -rw-r--r-- | database.go | 44 | ||||
| -rw-r--r-- | getdata.go | 19 | ||||
| -rw-r--r-- | github.go | 38 | ||||
| -rw-r--r-- | main.go | 2 |
4 files changed, 73 insertions, 30 deletions
diff --git a/database.go b/database.go index 7d56018..fda19f9 100644 --- a/database.go +++ b/database.go @@ -1,35 +1,32 @@ package main -import ( -/* - "database/sql" - "fmt" - "strings" -*/ -) - func (app *App) createTables() error { + var err error + /** * Copied from schema.sql * TODO: Load this from the file itself. */ - query := ` + queries := []string{` CREATE TABLE IF NOT EXISTS platform ( id INT PRIMARY KEY AUTO_INCREMENT, name VARCHAR(255) UNIQUE NOT NULL, url VARCHAR(255) UNIQUE NOT NULL ) CHARSET=utf8; - + `, + ` CREATE TABLE IF NOT EXISTS language ( id INT PRIMARY KEY AUTO_INCREMENT, name VARCHAR(255) UNIQUE NOT NULL ) CHARSET=utf8; - + `, + ` CREATE TABLE IF NOT EXISTS update_period ( id INT PRIMARY KEY AUTO_INCREMENT, name VARCHAR(255) UNIQUE NOT NULL ) CHARSET=utf8; - + `, + ` CREATE TABLE IF NOT EXISTS owner ( id INT PRIMARY KEY AUTO_INCREMENT, name VARCHAR(255) NOT NULL, @@ -37,7 +34,8 @@ CREATE TABLE IF NOT EXISTS owner ( platform INT NOT NULL, CONSTRAINT fk_owner_platform FOREIGN KEY (platform) REFERENCES platform(id) ) CHARSET=utf8; - + `, + ` CREATE TABLE IF NOT EXISTS entry ( id INT PRIMARY KEY AUTO_INCREMENT, title VARCHAR(255) NOT NULL, @@ -54,11 +52,13 @@ CREATE TABLE IF NOT EXISTS entry ( CONSTRAINT fk_entry_language FOREIGN KEY (language) REFERENCES language(id), CONSTRAINT fk_entry_period FOREIGN KEY (update_period) REFERENCES update_period(id) ) CHARSET=utf8; - ` + `} - _, err := app.DB.Exec(query) - if err != nil { - return err + for _, query := range queries { + _, err := app.DB.Exec(query) + if err != nil { + return err + } } /** @@ -141,7 +141,7 @@ func (app *App) SaveEntries(entries []Entry) error { ?, ?, ?, - ?, + ? ); ` stmt, err := app.DB.Prepare(query) @@ -157,6 +157,12 @@ func (app *App) SaveEntries(entries []Entry) error { if err != nil { continue } + + // Populates the owner struct with the correct id + e.Owner, err = app.GetOwnerID(e.Owner) + if err != nil { + continue + } _, err = stmt.Exec(e.Title, e.Synopsis, e.Owner.ID, e.Platform.ID, e.URL, e.Language.ID, e.Stars, e.UpdatePeriod.ID, app.Now) if err != nil { @@ -180,7 +186,7 @@ func (app *App) SaveOwner(owner Owner) error { NULL, ?, ?, - ?, + ? );` stmt, err := app.DB.Prepare(query) @@ -5,7 +5,7 @@ func (app *App) GetPlatforms() []Platform { platforms := []Platform{} query := ` - SELECT id, name, FROM Platform; + SELECT id, name, url FROM platform; ` rows, err := app.DB.Queryx(query) @@ -30,7 +30,7 @@ func (app *App) GetLanguages() []Language { languages := []Language{} query := ` - SELECT id, name, FROM language; + SELECT id, name FROM language; ` rows, err := app.DB.Queryx(query) @@ -55,7 +55,7 @@ func (app *App) GetUpdatePeriods() []UpdatePeriod { periods := []UpdatePeriod{} query := ` - SELECT id, name, FROM update_period; + SELECT id, name FROM update_period; ` rows, err := app.DB.Queryx(query) @@ -74,3 +74,16 @@ func (app *App) GetUpdatePeriods() []UpdatePeriod { return periods } + +func (app *App) GetOwnerID(owner *Owner) (*Owner, error) { + + query := `SELECT id, name, url FROM owner WHERE url = ?` + + err := app.DB.QueryRowx(query, owner.URL).StructScan(owner) + if err != nil { + Warn(err, "GetOwnerID: Query or StructScan failed") + return &Owner{}, err + } + + return owner, nil +} @@ -4,10 +4,12 @@ import ( "strconv" "strings" + log "github.com/Sirupsen/logrus" "github.com/gocolly/colly" ) func (app *App) ScrapeGithub(platform Platform) []Entry { + var err error URL := platform.URL @@ -26,32 +28,51 @@ func (app *App) ScrapeGithub(platform Platform) []Entry { e.ForEach("div > h3", func(i int, e *colly.HTMLElement) { entry.URL = URL + e.ChildAttr("a", "href") - entry.Title = e.ChildText("a") owner.Name = strings.TrimSuffix(e.ChildText("a > span"), " /") + owner.Name = strings.TrimSpace(owner.Name) + entry.Title = strings.TrimPrefix(e.ChildText("a"), owner.Name+" /") + entry.Title = strings.TrimSpace(entry.Title) }) e.ForEach("div.py-1", func(i int, e *colly.HTMLElement) { entry.Synopsis = e.ChildText("p") }) - e.ForEach("div.text-gray", func(i int, e *colly.HTMLElement) { - if i == 0 { - entry.Stars, err = strconv.Atoi(e.ChildText("a.muted-text")) + e.ForEach("div.text-gray > a.muted-link", func(i int, e *colly.HTMLElement) { + if strings.Contains(e.Attr("href"), "stargazers") { + stars := strings.TrimSpace(strings.Replace(e.Text, ",", "", -1)) + entry.Stars, err = strconv.Atoi(stars) if err != nil { Warn(err, "Github: Extracting stars from "+entry.Title+" failed") } } }) + l := Language{} + l.ID = current_language.ID + l.Name = current_language.Name + + p := Platform{} + p.ID = platform.ID + p.Name = platform.Name + p.URL = platform.URL + + u := UpdatePeriod{} + u.ID = current_update_period.ID + u.Name = current_update_period.Name + owner.Platform = &platform owner.URL = URL + owner.Name entry.Owner = &owner - entry.Platform = &platform - entry.Language = ¤t_language - entry.UpdatePeriod = ¤t_update_period + entry.Platform = &p + entry.Language = &l + entry.UpdatePeriod = &u entry.Created_At = app.Now + log.Debugf("%+v\n", owner) + log.Debugf("%+v\n", entry) + Entries = append(Entries, entry) }) @@ -63,7 +84,8 @@ func (app *App) ScrapeGithub(platform Platform) []Entry { current_update_period = t - CURRENT_URL := URL + "/trending/" + l.Name + "?since=" + t.Name + CURRENT_URL := platform.URL + "/trending/" + l.Name + "?since=" + t.Name + log.Println("Crawling " + CURRENT_URL) err := c.Visit(CURRENT_URL) if err != nil { @@ -1,7 +1,9 @@ package main import ( + _ "database/sql" "fmt" + _ "github.com/go-sql-driver/mysql" "time" log "github.com/Sirupsen/logrus" |
