diff options
| author | horus | 2020-04-02 21:53:30 +0200 |
|---|---|---|
| committer | horus | 2020-04-02 21:53:30 +0200 |
| commit | 0b90b7a3b0f38f0babf4d788f4d7dd5e43253341 (patch) | |
| tree | a5492cf5246522a5dd0e201be3ae988ae7e6245c /database.go | |
| download | curious-crawler-0b90b7a3b0f38f0babf4d788f4d7dd5e43253341.tar.gz | |
Initial commit.
Diffstat (limited to 'database.go')
| -rw-r--r-- | database.go | 425 |
1 files changed, 425 insertions, 0 deletions
diff --git a/database.go b/database.go new file mode 100644 index 0000000..e40279e --- /dev/null +++ b/database.go @@ -0,0 +1,425 @@ +package main + +import ( + log "github.com/Sirupsen/logrus" + "regexp" + "strconv" + + "database/sql" + _ "github.com/go-sql-driver/mysql" +) + +func (app *App) saveStory(s Story) error { + match, err := regexp.MatchString("github.com($|/)", s.Url) + if err != nil { + log.Warn("Failed to parse and match regex") + return err + + } + if match { + /** + * Special handling for github stories. + */ + return app.saveCode(s) + } + + query := ` + INSERT IGNORE article ( + id, + created_at, + updated_at, + url, + title + ) VALUES ( + NULL, + ?, + ?, + ?, + ? + ); + ` + + stmt, err := app.DB.Prepare(query) + if err != nil { + log.Warn("saveStory: Preparing query failed") + return err + } + defer stmt.Close() + + _, err = stmt.Exec(app.Now, app.Now, s.Url, s.Title) + if err != nil { + log.Warn("saveStory: Statement execution failed") + return err + } + log.Debugf("saveStory: Successfull insert for item %d\n", s.Id) + + query = ` + INSERT IGNORE discussion ( + id, + created_at, + updated_at, + article_id, + title, + source, + item_id, + source_url, + posted_on, + comments, + upvotes + ) VALUES ( + NULL, + ?, + ?, + (SELECT id FROM article WHERE url = ?), + ?, + ?, + ?, + ?, + ?, + ?, + ? + ); + ` + stmt2, err := app.DB.Prepare(query) + if err != nil { + log.Warn("saveStory: Preparing second query failed") + return err + } + defer stmt2.Close() + + _, err = stmt2.Exec(app.Now, app.Now, s.Url, s.Title, "HN", s.Id, "https://news.ycombinator.com/item?id="+strconv.Itoa(s.Id), s.Time, s.Descendants, s.Score) + if err != nil { + log.Warn("saveStory: Statement execution failed") + return err + } + + return nil +} + +func (app *App) saveCode(s Story) error { + query := ` + INSERT IGNORE code( + id, + created_at, + updated_at, + url, + title, + source, + item_id, + source_url, + posted_on, + comments, + upvotes + ) VALUES ( + NULL, + ?, + ?, + ?, + ?, + ?, + ?, + ?, + ?, + ?, + ? + ); + ` + stmt, err := app.DB.Prepare(query) + if err != nil { + log.Warn("saveCode: Preparing query failed") + return err + } + defer stmt.Close() + + _, err = stmt.Exec(app.Now, app.Now, s.Url, s.Title, "HN", s.Id, "https://news.ycombinator.com/item?id="+strconv.Itoa(s.Id), s.Time, s.Descendants, s.Score) + if err != nil { + log.Warn("saveCode: Statement execution failed") + return err + } + + return nil +} + +func (app *App) updateDiscussion(story Story) error { + + query := ` + UPDATE discussion + set updated_at = ?, + comments = ?, + upvotes = ? + WHERE item_id = ?; + ` + stmt, err := app.DB.Prepare(query) + if err != nil { + log.Warn("updateDiscussion: Preparing query failed") + return err + } + defer stmt.Close() + + _, err = stmt.Exec(app.Now, story.Descendants, story.Score, story.Id) + if err != nil { + log.Warnf("updateDiscussion: Statement execution failed") + return err + } + log.Debugf("updateDiscussion: Successful update of %d with new Score: %d, Comments: %d\n", story.Id, story.Score, story.Descendants) + + return nil +} + +func (app *App) updateArticleUrl(id int, url string) error { + query := ` + UPDATE article + set updated_at = ?, + url = ? + WHERE id = ? + ` + stmt, err := app.DB.Prepare(query) + if err != nil { + log.Warn("updateArticleUrl: Preparing query failed") + return err + } + defer stmt.Close() + + _, err = stmt.Exec(app.Now, url, id) + if err != nil { + log.Warnf("updateArticleUrl: Statement execution failed") + return err + } + log.Debugf("updateArticleUrl: Successful update new url: %s\n", url) + + return nil + +} + +func (app *App) wikipediaMergeArticles(id_to_delete int, correct_url string) error { + query := "SELECT id FROM discussion WHERE article_id = ?" + row := app.DB.QueryRow(query, id_to_delete) + var disc_id int + err := row.Scan(&disc_id) + if err != nil { + log.Warnf("wikipediaMergeArticles: Query first row failed. id: %d url: %s", id_to_delete, correct_url) + return err + } + query = "SELECT id FROM article WHERE url = ?" + row = app.DB.QueryRow(query, correct_url) + var article_id int + err = row.Scan(&article_id) + if err != nil { + log.Warn("wikipediaMergeArticles: Query second row failed") + return err + } + + query = "UPDATE discussion SET article_id = ?, updated_at = ? WHERE id = ?;" + stmt, err := app.DB.Prepare(query) + if err != nil { + log.Warn("wikipediaMergeArticles: Preparing query failed") + return err + } + defer stmt.Close() + + _, err = stmt.Exec(article_id, app.Now, disc_id) + if err != nil { + log.Warn("wikipediaMergeArticles: Update discussion failed") + return err + } + + query = "UPDATE article_category SET article_id = ? WHERE id = ?;" + stmt2, err := app.DB.Prepare(query) + if err != nil { + log.Warn("wikipediaMergeArticles: Preparing article_category query failed") + return err + } + defer stmt2.Close() + + _, err = stmt2.Exec(article_id, id_to_delete) + if err != nil { + log.Warn("wikipediaMergeArticles: Update article_category failed") + return err + } + + return nil +} + +func (app *App) deleteOrphanedArticles() error { + query := ` + DELETE a FROM + article AS a + LEFT JOIN + discussion AS d ON a.id = d.article_id + WHERE d.id IS NULL;` + _, err := app.DB.Exec(query) + if err != nil { + log.Warnf("deleteOrphanedArticles: Executing query failed: %s", err.Error()) + return err + } + + return nil +} + +func (app *App) saveCategory(article_id int, categories []string) { + + for _, category := range categories { + query := "SELECT id FROM category WHERE name = ?" + row := app.DB.QueryRow(query, category) + var category_id int + err := row.Scan(&category_id) + + if err != nil { + if err != sql.ErrNoRows { + log.Warn("saveCategory: Selecting category id failed") + log.Fatal(err) + } + } + + if err == sql.ErrNoRows { + query = ` + INSERT INTO category ( + id, + created_at, + updated_at, + name + ) VALUES ( + null, + ?, + ?, + ? + )` + + stmt, err := app.DB.Prepare(query) + if err != nil { + log.Fatal(err) + } + defer stmt.Close() + + result, err := stmt.Exec(app.Now, app.Now, category) + if err != nil { + log.Fatal(err) + } + + category_id64, err := result.LastInsertId() + category_id = int(category_id64) + if err != nil { + log.Fatal(err) + } + } + + query = ` + INSERT IGNORE article_category ( + id, + article_id, + category_id + ) VALUES ( + null, + ?, + ? + ) + ` + + stmt2, err := app.DB.Prepare(query) + if err != nil { + log.Fatal(err) + } + _, err = stmt2.Exec(article_id, category_id) + if err != nil { + log.Fatal(err) + } + } +} + +func (app *App) updateWikipediaUrls() { + rows, err := app.DB.Query("SELECT DISTINCT id, url FROM article;") + if err != nil { + log.Fatal(err) + } + + for rows.Next() { + var wiki_url string + var article_id int + + err = rows.Scan(&article_id, &wiki_url) + if err != nil { + log.Fatal(err) + } + + real_url := wikipediaRealUrl(wiki_url) + if real_url != wiki_url && "" != real_url { + + /** + * Check if we already have the canonical url and merge if necessary. + */ + row := app.DB.QueryRow("SELECT count(*) FROM article WHERE url = ?", real_url) + var count int + err = row.Scan(&count) + if err != nil { + log.Fatal(err) + } + if 0 < count { + err = app.wikipediaMergeArticles(article_id, real_url) + if err != nil { + log.Fatal(err) + } + continue + } + + stmt, err := app.DB.Prepare("UPDATE article SET url = ? WHERE id = ?") + if err != nil { + log.Warnf("updateWikipediaUrls: Preparing query failed for: (%d) %s", article_id, wiki_url) + log.Fatal(err) + } + defer stmt.Close() + + _, err = stmt.Exec(real_url, article_id) + if err != nil { + log.Warnf("updateWikipediaUrls: Executing statement failed for: (%d) %s", article_id, wiki_url) + log.Fatal(err) + } + log.Debugf("(%d) Updated from %s to %s", article_id, wiki_url, real_url) + } + } +} + +func (app *App) getArticleIdFromUrl(wiki_url string) int { + row := app.DB.QueryRow("SELECT id FROM article WHERE url = ?", wiki_url) + var article_id int + err := row.Scan(&article_id) + if err != nil { + log.Warnf("getArticleIdFromUrl: Query or scanning failed for: %s", wiki_url) + log.Fatal(err) + } + return article_id +} + +func (app *App) fixAllCategories() { + rows, err := app.DB.Query("SELECT id, name FROM category;") + if err != nil { + log.Fatal(err) + } + + for rows.Next() { + var category_id int + var category_name string + + err = rows.Scan(&category_id, &category_name) + if err != nil { + log.Fatal(err) + } + + category_fixed := normalizeCategory(category_name) + if category_fixed != category_name { + log.Warn(category_fixed) + stmt, err := app.DB.Prepare("UPDATE category SET name = ? WHERE id = ? AND name = ?") + if err != nil { + log.Warnf("fixAllCategories: Preparing query failed for: (%d) %s", category_id, category_fixed) + log.Fatal(err) + } + defer stmt.Close() + + _, err = stmt.Exec(category_fixed, category_id, category_name) + if err != nil { + log.Warnf("fixAllCategories: Exec stmt failed for: (%d) %s", category_id, category_fixed) + log.Fatal(err) + } + } + + } +} |
