diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/main.go | 91 |
1 files changed, 91 insertions, 0 deletions
diff --git a/src/main.go b/src/main.go index e079760..0aa01bb 100644 --- a/src/main.go +++ b/src/main.go @@ -20,6 +20,93 @@ type LinkRow struct { } var imdbTitleRe = regexp.MustCompile(`/title/(tt\d+)($|/)`) +var imdbIDRe = regexp.MustCompile(`^(tt\d+)$`) + +func (a *App) populateImdbTable() error { + // Gather all IMDb IDs already in the table to avoid duplicates + existing, err := a.getExistingImdbIDs() + if err != nil { + return fmt.Errorf("getExistingImdbIDs: %w", err) + } + + rows, err := a.DB.Query(` + SELECT DISTINCT param FROM links + WHERE host = 'www.imdb.com' AND param IS NOT NULL AND param != '' + `) + if err != nil { + return fmt.Errorf("query links: %w", err) + } + defer rows.Close() + + tx := a.DB.MustBegin() + stmt, err := tx.Prepare(`INSERT INTO imdb (imdb_id) VALUES (?)`) + if err != nil { + tx.Rollback() + return fmt.Errorf("prepare insert: %w", err) + } + defer stmt.Close() + + var inserted, skipped int + for rows.Next() { + var param string + if err := rows.Scan(¶m); err != nil { + tx.Rollback() + return fmt.Errorf("scan param: %w", err) + } + + if !imdbIDRe.MatchString(param) { + log.Printf("populateImdbTable: invalid param %q, skipping", param) + continue + } + + if existing[param] { + skipped++ + continue + } + + _, err := stmt.Exec(param) + if err != nil { + tx.Rollback() + return fmt.Errorf("insert %s: %w", param, err) + } + inserted++ + existing[param] = true + } + if err := rows.Err(); err != nil { + tx.Rollback() + return fmt.Errorf("rows iteration: %w", err) + } + + if err := tx.Commit(); err != nil { + return fmt.Errorf("commit: %w", err) + } + + log.Printf("populateImdbTable: inserted %d, skipped %d (already existed)", inserted, skipped) + return nil +} + +func (a *App) getExistingImdbIDs() (map[string]bool, error) { + rows, err := a.DB.Query(`SELECT imdb_id FROM imdb`) + if err != nil { + return nil, fmt.Errorf("query imdb: %w", err) + } + defer rows.Close() + + existing := make(map[string]bool) + for rows.Next() { + var imdbID string + if err := rows.Scan(&imdbID); err != nil { + return nil, fmt.Errorf("scan imdb_id: %w", err) + } + existing[imdbID] = true + } + if err := rows.Err(); err != nil { + return nil, fmt.Errorf("rows iteration: %w", err) + } + + log.Printf("getExistingImdbIDs: %d existing records", len(existing)) + return existing, nil +} func (a *App) extractImdbIDs() error { rows, err := a.DB.Query(` @@ -99,4 +186,8 @@ func main() { if err = app.extractImdbIDs(); err != nil { log.Fatalf("extractImdbIDs: %v", err) } + + if err = app.populateImdbTable(); err != nil { + log.Fatalf("populateImdbTable: %v", err) + } } |
