summaryrefslogtreecommitdiff
path: root/database.go
diff options
context:
space:
mode:
authorhorus2020-04-02 21:53:30 +0200
committerhorus2020-04-02 21:53:30 +0200
commit0b90b7a3b0f38f0babf4d788f4d7dd5e43253341 (patch)
treea5492cf5246522a5dd0e201be3ae988ae7e6245c /database.go
downloadcurious-crawler-0b90b7a3b0f38f0babf4d788f4d7dd5e43253341.tar.gz
Initial commit.
Diffstat (limited to 'database.go')
-rw-r--r--database.go425
1 files changed, 425 insertions, 0 deletions
diff --git a/database.go b/database.go
new file mode 100644
index 0000000..e40279e
--- /dev/null
+++ b/database.go
@@ -0,0 +1,425 @@
+package main
+
+import (
+ log "github.com/Sirupsen/logrus"
+ "regexp"
+ "strconv"
+
+ "database/sql"
+ _ "github.com/go-sql-driver/mysql"
+)
+
+func (app *App) saveStory(s Story) error {
+ match, err := regexp.MatchString("github.com($|/)", s.Url)
+ if err != nil {
+ log.Warn("Failed to parse and match regex")
+ return err
+
+ }
+ if match {
+ /**
+ * Special handling for github stories.
+ */
+ return app.saveCode(s)
+ }
+
+ query := `
+ INSERT IGNORE article (
+ id,
+ created_at,
+ updated_at,
+ url,
+ title
+ ) VALUES (
+ NULL,
+ ?,
+ ?,
+ ?,
+ ?
+ );
+ `
+
+ stmt, err := app.DB.Prepare(query)
+ if err != nil {
+ log.Warn("saveStory: Preparing query failed")
+ return err
+ }
+ defer stmt.Close()
+
+ _, err = stmt.Exec(app.Now, app.Now, s.Url, s.Title)
+ if err != nil {
+ log.Warn("saveStory: Statement execution failed")
+ return err
+ }
+ log.Debugf("saveStory: Successfull insert for item %d\n", s.Id)
+
+ query = `
+ INSERT IGNORE discussion (
+ id,
+ created_at,
+ updated_at,
+ article_id,
+ title,
+ source,
+ item_id,
+ source_url,
+ posted_on,
+ comments,
+ upvotes
+ ) VALUES (
+ NULL,
+ ?,
+ ?,
+ (SELECT id FROM article WHERE url = ?),
+ ?,
+ ?,
+ ?,
+ ?,
+ ?,
+ ?,
+ ?
+ );
+ `
+ stmt2, err := app.DB.Prepare(query)
+ if err != nil {
+ log.Warn("saveStory: Preparing second query failed")
+ return err
+ }
+ defer stmt2.Close()
+
+ _, err = stmt2.Exec(app.Now, app.Now, s.Url, s.Title, "HN", s.Id, "https://news.ycombinator.com/item?id="+strconv.Itoa(s.Id), s.Time, s.Descendants, s.Score)
+ if err != nil {
+ log.Warn("saveStory: Statement execution failed")
+ return err
+ }
+
+ return nil
+}
+
+func (app *App) saveCode(s Story) error {
+ query := `
+ INSERT IGNORE code(
+ id,
+ created_at,
+ updated_at,
+ url,
+ title,
+ source,
+ item_id,
+ source_url,
+ posted_on,
+ comments,
+ upvotes
+ ) VALUES (
+ NULL,
+ ?,
+ ?,
+ ?,
+ ?,
+ ?,
+ ?,
+ ?,
+ ?,
+ ?,
+ ?
+ );
+ `
+ stmt, err := app.DB.Prepare(query)
+ if err != nil {
+ log.Warn("saveCode: Preparing query failed")
+ return err
+ }
+ defer stmt.Close()
+
+ _, err = stmt.Exec(app.Now, app.Now, s.Url, s.Title, "HN", s.Id, "https://news.ycombinator.com/item?id="+strconv.Itoa(s.Id), s.Time, s.Descendants, s.Score)
+ if err != nil {
+ log.Warn("saveCode: Statement execution failed")
+ return err
+ }
+
+ return nil
+}
+
+func (app *App) updateDiscussion(story Story) error {
+
+ query := `
+ UPDATE discussion
+ set updated_at = ?,
+ comments = ?,
+ upvotes = ?
+ WHERE item_id = ?;
+ `
+ stmt, err := app.DB.Prepare(query)
+ if err != nil {
+ log.Warn("updateDiscussion: Preparing query failed")
+ return err
+ }
+ defer stmt.Close()
+
+ _, err = stmt.Exec(app.Now, story.Descendants, story.Score, story.Id)
+ if err != nil {
+ log.Warnf("updateDiscussion: Statement execution failed")
+ return err
+ }
+ log.Debugf("updateDiscussion: Successful update of %d with new Score: %d, Comments: %d\n", story.Id, story.Score, story.Descendants)
+
+ return nil
+}
+
+func (app *App) updateArticleUrl(id int, url string) error {
+ query := `
+ UPDATE article
+ set updated_at = ?,
+ url = ?
+ WHERE id = ?
+ `
+ stmt, err := app.DB.Prepare(query)
+ if err != nil {
+ log.Warn("updateArticleUrl: Preparing query failed")
+ return err
+ }
+ defer stmt.Close()
+
+ _, err = stmt.Exec(app.Now, url, id)
+ if err != nil {
+ log.Warnf("updateArticleUrl: Statement execution failed")
+ return err
+ }
+ log.Debugf("updateArticleUrl: Successful update new url: %s\n", url)
+
+ return nil
+
+}
+
+func (app *App) wikipediaMergeArticles(id_to_delete int, correct_url string) error {
+ query := "SELECT id FROM discussion WHERE article_id = ?"
+ row := app.DB.QueryRow(query, id_to_delete)
+ var disc_id int
+ err := row.Scan(&disc_id)
+ if err != nil {
+ log.Warnf("wikipediaMergeArticles: Query first row failed. id: %d url: %s", id_to_delete, correct_url)
+ return err
+ }
+ query = "SELECT id FROM article WHERE url = ?"
+ row = app.DB.QueryRow(query, correct_url)
+ var article_id int
+ err = row.Scan(&article_id)
+ if err != nil {
+ log.Warn("wikipediaMergeArticles: Query second row failed")
+ return err
+ }
+
+ query = "UPDATE discussion SET article_id = ?, updated_at = ? WHERE id = ?;"
+ stmt, err := app.DB.Prepare(query)
+ if err != nil {
+ log.Warn("wikipediaMergeArticles: Preparing query failed")
+ return err
+ }
+ defer stmt.Close()
+
+ _, err = stmt.Exec(article_id, app.Now, disc_id)
+ if err != nil {
+ log.Warn("wikipediaMergeArticles: Update discussion failed")
+ return err
+ }
+
+ query = "UPDATE article_category SET article_id = ? WHERE id = ?;"
+ stmt2, err := app.DB.Prepare(query)
+ if err != nil {
+ log.Warn("wikipediaMergeArticles: Preparing article_category query failed")
+ return err
+ }
+ defer stmt2.Close()
+
+ _, err = stmt2.Exec(article_id, id_to_delete)
+ if err != nil {
+ log.Warn("wikipediaMergeArticles: Update article_category failed")
+ return err
+ }
+
+ return nil
+}
+
+func (app *App) deleteOrphanedArticles() error {
+ query := `
+ DELETE a FROM
+ article AS a
+ LEFT JOIN
+ discussion AS d ON a.id = d.article_id
+ WHERE d.id IS NULL;`
+ _, err := app.DB.Exec(query)
+ if err != nil {
+ log.Warnf("deleteOrphanedArticles: Executing query failed: %s", err.Error())
+ return err
+ }
+
+ return nil
+}
+
+func (app *App) saveCategory(article_id int, categories []string) {
+
+ for _, category := range categories {
+ query := "SELECT id FROM category WHERE name = ?"
+ row := app.DB.QueryRow(query, category)
+ var category_id int
+ err := row.Scan(&category_id)
+
+ if err != nil {
+ if err != sql.ErrNoRows {
+ log.Warn("saveCategory: Selecting category id failed")
+ log.Fatal(err)
+ }
+ }
+
+ if err == sql.ErrNoRows {
+ query = `
+ INSERT INTO category (
+ id,
+ created_at,
+ updated_at,
+ name
+ ) VALUES (
+ null,
+ ?,
+ ?,
+ ?
+ )`
+
+ stmt, err := app.DB.Prepare(query)
+ if err != nil {
+ log.Fatal(err)
+ }
+ defer stmt.Close()
+
+ result, err := stmt.Exec(app.Now, app.Now, category)
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ category_id64, err := result.LastInsertId()
+ category_id = int(category_id64)
+ if err != nil {
+ log.Fatal(err)
+ }
+ }
+
+ query = `
+ INSERT IGNORE article_category (
+ id,
+ article_id,
+ category_id
+ ) VALUES (
+ null,
+ ?,
+ ?
+ )
+ `
+
+ stmt2, err := app.DB.Prepare(query)
+ if err != nil {
+ log.Fatal(err)
+ }
+ _, err = stmt2.Exec(article_id, category_id)
+ if err != nil {
+ log.Fatal(err)
+ }
+ }
+}
+
+func (app *App) updateWikipediaUrls() {
+ rows, err := app.DB.Query("SELECT DISTINCT id, url FROM article;")
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ for rows.Next() {
+ var wiki_url string
+ var article_id int
+
+ err = rows.Scan(&article_id, &wiki_url)
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ real_url := wikipediaRealUrl(wiki_url)
+ if real_url != wiki_url && "" != real_url {
+
+ /**
+ * Check if we already have the canonical url and merge if necessary.
+ */
+ row := app.DB.QueryRow("SELECT count(*) FROM article WHERE url = ?", real_url)
+ var count int
+ err = row.Scan(&count)
+ if err != nil {
+ log.Fatal(err)
+ }
+ if 0 < count {
+ err = app.wikipediaMergeArticles(article_id, real_url)
+ if err != nil {
+ log.Fatal(err)
+ }
+ continue
+ }
+
+ stmt, err := app.DB.Prepare("UPDATE article SET url = ? WHERE id = ?")
+ if err != nil {
+ log.Warnf("updateWikipediaUrls: Preparing query failed for: (%d) %s", article_id, wiki_url)
+ log.Fatal(err)
+ }
+ defer stmt.Close()
+
+ _, err = stmt.Exec(real_url, article_id)
+ if err != nil {
+ log.Warnf("updateWikipediaUrls: Executing statement failed for: (%d) %s", article_id, wiki_url)
+ log.Fatal(err)
+ }
+ log.Debugf("(%d) Updated from %s to %s", article_id, wiki_url, real_url)
+ }
+ }
+}
+
+func (app *App) getArticleIdFromUrl(wiki_url string) int {
+ row := app.DB.QueryRow("SELECT id FROM article WHERE url = ?", wiki_url)
+ var article_id int
+ err := row.Scan(&article_id)
+ if err != nil {
+ log.Warnf("getArticleIdFromUrl: Query or scanning failed for: %s", wiki_url)
+ log.Fatal(err)
+ }
+ return article_id
+}
+
+func (app *App) fixAllCategories() {
+ rows, err := app.DB.Query("SELECT id, name FROM category;")
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ for rows.Next() {
+ var category_id int
+ var category_name string
+
+ err = rows.Scan(&category_id, &category_name)
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ category_fixed := normalizeCategory(category_name)
+ if category_fixed != category_name {
+ log.Warn(category_fixed)
+ stmt, err := app.DB.Prepare("UPDATE category SET name = ? WHERE id = ? AND name = ?")
+ if err != nil {
+ log.Warnf("fixAllCategories: Preparing query failed for: (%d) %s", category_id, category_fixed)
+ log.Fatal(err)
+ }
+ defer stmt.Close()
+
+ _, err = stmt.Exec(category_fixed, category_id, category_name)
+ if err != nil {
+ log.Warnf("fixAllCategories: Exec stmt failed for: (%d) %s", category_id, category_fixed)
+ log.Fatal(err)
+ }
+ }
+
+ }
+}