summaryrefslogtreecommitdiff
path: root/src/main.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/main.go')
-rw-r--r--src/main.go91
1 files changed, 91 insertions, 0 deletions
diff --git a/src/main.go b/src/main.go
index e079760..0aa01bb 100644
--- a/src/main.go
+++ b/src/main.go
@@ -20,6 +20,93 @@ type LinkRow struct {
}
var imdbTitleRe = regexp.MustCompile(`/title/(tt\d+)($|/)`)
+var imdbIDRe = regexp.MustCompile(`^(tt\d+)$`)
+
+func (a *App) populateImdbTable() error {
+ // Gather all IMDb IDs already in the table to avoid duplicates
+ existing, err := a.getExistingImdbIDs()
+ if err != nil {
+ return fmt.Errorf("getExistingImdbIDs: %w", err)
+ }
+
+ rows, err := a.DB.Query(`
+ SELECT DISTINCT param FROM links
+ WHERE host = 'www.imdb.com' AND param IS NOT NULL AND param != ''
+ `)
+ if err != nil {
+ return fmt.Errorf("query links: %w", err)
+ }
+ defer rows.Close()
+
+ tx := a.DB.MustBegin()
+ stmt, err := tx.Prepare(`INSERT INTO imdb (imdb_id) VALUES (?)`)
+ if err != nil {
+ tx.Rollback()
+ return fmt.Errorf("prepare insert: %w", err)
+ }
+ defer stmt.Close()
+
+ var inserted, skipped int
+ for rows.Next() {
+ var param string
+ if err := rows.Scan(&param); err != nil {
+ tx.Rollback()
+ return fmt.Errorf("scan param: %w", err)
+ }
+
+ if !imdbIDRe.MatchString(param) {
+ log.Printf("populateImdbTable: invalid param %q, skipping", param)
+ continue
+ }
+
+ if existing[param] {
+ skipped++
+ continue
+ }
+
+ _, err := stmt.Exec(param)
+ if err != nil {
+ tx.Rollback()
+ return fmt.Errorf("insert %s: %w", param, err)
+ }
+ inserted++
+ existing[param] = true
+ }
+ if err := rows.Err(); err != nil {
+ tx.Rollback()
+ return fmt.Errorf("rows iteration: %w", err)
+ }
+
+ if err := tx.Commit(); err != nil {
+ return fmt.Errorf("commit: %w", err)
+ }
+
+ log.Printf("populateImdbTable: inserted %d, skipped %d (already existed)", inserted, skipped)
+ return nil
+}
+
+func (a *App) getExistingImdbIDs() (map[string]bool, error) {
+ rows, err := a.DB.Query(`SELECT imdb_id FROM imdb`)
+ if err != nil {
+ return nil, fmt.Errorf("query imdb: %w", err)
+ }
+ defer rows.Close()
+
+ existing := make(map[string]bool)
+ for rows.Next() {
+ var imdbID string
+ if err := rows.Scan(&imdbID); err != nil {
+ return nil, fmt.Errorf("scan imdb_id: %w", err)
+ }
+ existing[imdbID] = true
+ }
+ if err := rows.Err(); err != nil {
+ return nil, fmt.Errorf("rows iteration: %w", err)
+ }
+
+ log.Printf("getExistingImdbIDs: %d existing records", len(existing))
+ return existing, nil
+}
func (a *App) extractImdbIDs() error {
rows, err := a.DB.Query(`
@@ -99,4 +186,8 @@ func main() {
if err = app.extractImdbIDs(); err != nil {
log.Fatalf("extractImdbIDs: %v", err)
}
+
+ if err = app.populateImdbTable(); err != nil {
+ log.Fatalf("populateImdbTable: %v", err)
+ }
}