diff options
Diffstat (limited to 'src/main.go')
| -rw-r--r-- | src/main.go | 85 |
1 files changed, 70 insertions, 15 deletions
diff --git a/src/main.go b/src/main.go index ebfbd98..e079760 100644 --- a/src/main.go +++ b/src/main.go @@ -1,27 +1,79 @@ package main import ( - "encoding/json" "fmt" - "html" - "io/ioutil" - "net/http" - "net/url" + "log" "regexp" - "strconv" - "strings" - "time" - "github.com/PuerkitoBio/goquery" - "github.com/anikhasibul/queue" + _ "github.com/go-sql-driver/mysql" "github.com/jmoiron/sqlx" - log "github.com/sirupsen/logrus" ) type App struct { Config *Config DB *sqlx.DB - Now time.Time +} + +type LinkRow struct { + ID int `db:"id"` + URL string `db:"url"` +} + +var imdbTitleRe = regexp.MustCompile(`/title/(tt\d+)($|/)`) + +func (a *App) extractImdbIDs() error { + rows, err := a.DB.Query(` + SELECT id, url FROM links + WHERE field = 1 AND url LIKE '%.com/title%' AND host = 'www.imdb.com' + AND (param IS NULL OR param = '') + `) + if err != nil { + return fmt.Errorf("query links: %w", err) + } + defer rows.Close() + + var count, updated int + tx := a.DB.MustBegin() + stmt, err := tx.Prepare(`UPDATE links SET param = ? WHERE id = ?`) + if err != nil { + tx.Rollback() + return fmt.Errorf("prepare update: %w", err) + } + defer stmt.Close() + + for rows.Next() { + count++ + var link LinkRow + if err := rows.Scan(&link.ID, &link.URL); err != nil { + tx.Rollback() + return fmt.Errorf("scan row: %w", err) + } + + match := imdbTitleRe.FindStringSubmatch(link.URL) + if len(match) < 2 { + log.Printf("no IMDb ID found in URL: %s", link.URL) + continue + } + + imdbID := match[1] + _, err := stmt.Exec(imdbID, link.ID) + if err != nil { + tx.Rollback() + return fmt.Errorf("update link %d: %w", link.ID, err) + } + updated++ + } + if err := rows.Err(); err != nil { + tx.Rollback() + return fmt.Errorf("rows iteration: %w", err) + } + + if err := tx.Commit(); err != nil { + return fmt.Errorf("commit: %w", err) + } + + log.Printf("extractImdbIDs: scanned %d rows, updated %d", count, updated) + return nil } func main() { @@ -31,11 +83,10 @@ func main() { } app := App{Config: cfg} - app.Now = time.Now() - log.Debug(fmt.Sprintf(`Connecting to "%s" database "%s" as user "%s" on host "%s:%s" with extra options "%s".`, app.Config.DBDriver, app.Config.DBDBName, app.Config.DBUser, app.Config.DBHost, app.Config.DBPort, app.Config.DBOptions)) + log.Printf(`Connecting to "%s" database "%s" as user "%s" on host "%s:%s" with extra options "%s".`, cfg.DBDriver, cfg.DBDBName, cfg.DBUser, cfg.DBHost, cfg.DBPort, cfg.DBOptions) - app.DB, err = sqlx.Connect(app.Config.DBDriver, app.Config.DBUser+":"+app.Config.DBPassword+"@tcp("+app.Config.DBHost+":"+app.Config.DBPort+")/"+app.Config.DBDBName+"?"+app.Config.DBOptions) + app.DB, err = sqlx.Connect(cfg.DBDriver, cfg.DBUser+":"+cfg.DBPassword+"@tcp("+cfg.DBHost+":"+cfg.DBPort+")/"+cfg.DBDBName+"?"+cfg.DBOptions) if err != nil { log.Fatal(err, "Cannot connect to database") } @@ -44,4 +95,8 @@ func main() { log.Fatal(err, "No connection to database") } defer app.DB.Close() + + if err = app.extractImdbIDs(); err != nil { + log.Fatalf("extractImdbIDs: %v", err) + } } |
