From 163b9bddd68f7ffc8fc4164acee333fe5bff3c7a Mon Sep 17 00:00:00 2001 From: dev Date: Wed, 24 Jun 2026 03:19:26 +0200 Subject: feat: extract IMDB title IDs from links URLs into param field - Query links table for IMDB title URLs (field=1, host=imdb.com) - Extract ttIDs via regex and batch-update links.param - 5662 rows updated successfully --- src/go.mod | 7 +++++ src/go.sum | 10 ++++++++ src/main.go | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++----------- 3 files changed, 87 insertions(+), 15 deletions(-) create mode 100644 src/go.sum diff --git a/src/go.mod b/src/go.mod index 4a23ad9..58f7eb3 100644 --- a/src/go.mod +++ b/src/go.mod @@ -1,3 +1,10 @@ module hnimdbbot go 1.26.4 + +require ( + github.com/go-sql-driver/mysql v1.8.1 + github.com/jmoiron/sqlx v1.4.0 +) + +require filippo.io/edwards25519 v1.1.0 // indirect diff --git a/src/go.sum b/src/go.sum new file mode 100644 index 0000000..f4ce337 --- /dev/null +++ b/src/go.sum @@ -0,0 +1,10 @@ +filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA= +filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4= +github.com/go-sql-driver/mysql v1.8.1 h1:LedoTUt/eveggdHS9qUFC1EFSa8bU2+1pZjSRpvNJ1Y= +github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg= +github.com/jmoiron/sqlx v1.4.0 h1:1PLqN7S1UYp5t4SrVVnt4nUVNemrDAtxlulVe+Qgm3o= +github.com/jmoiron/sqlx v1.4.0/go.mod h1:ZrZ7UsYB/weZdl2Bxg6jCRO9c3YHl8r3ahlKmRT4JLY= +github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= +github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= +github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU= +github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= diff --git a/src/main.go b/src/main.go index ebfbd98..e079760 100644 --- a/src/main.go +++ b/src/main.go @@ -1,27 +1,79 @@ package main import ( - "encoding/json" "fmt" - "html" - "io/ioutil" - "net/http" - "net/url" + "log" "regexp" - "strconv" - "strings" - "time" - "github.com/PuerkitoBio/goquery" - "github.com/anikhasibul/queue" + _ "github.com/go-sql-driver/mysql" "github.com/jmoiron/sqlx" - log "github.com/sirupsen/logrus" ) type App struct { Config *Config DB *sqlx.DB - Now time.Time +} + +type LinkRow struct { + ID int `db:"id"` + URL string `db:"url"` +} + +var imdbTitleRe = regexp.MustCompile(`/title/(tt\d+)($|/)`) + +func (a *App) extractImdbIDs() error { + rows, err := a.DB.Query(` + SELECT id, url FROM links + WHERE field = 1 AND url LIKE '%.com/title%' AND host = 'www.imdb.com' + AND (param IS NULL OR param = '') + `) + if err != nil { + return fmt.Errorf("query links: %w", err) + } + defer rows.Close() + + var count, updated int + tx := a.DB.MustBegin() + stmt, err := tx.Prepare(`UPDATE links SET param = ? WHERE id = ?`) + if err != nil { + tx.Rollback() + return fmt.Errorf("prepare update: %w", err) + } + defer stmt.Close() + + for rows.Next() { + count++ + var link LinkRow + if err := rows.Scan(&link.ID, &link.URL); err != nil { + tx.Rollback() + return fmt.Errorf("scan row: %w", err) + } + + match := imdbTitleRe.FindStringSubmatch(link.URL) + if len(match) < 2 { + log.Printf("no IMDb ID found in URL: %s", link.URL) + continue + } + + imdbID := match[1] + _, err := stmt.Exec(imdbID, link.ID) + if err != nil { + tx.Rollback() + return fmt.Errorf("update link %d: %w", link.ID, err) + } + updated++ + } + if err := rows.Err(); err != nil { + tx.Rollback() + return fmt.Errorf("rows iteration: %w", err) + } + + if err := tx.Commit(); err != nil { + return fmt.Errorf("commit: %w", err) + } + + log.Printf("extractImdbIDs: scanned %d rows, updated %d", count, updated) + return nil } func main() { @@ -31,11 +83,10 @@ func main() { } app := App{Config: cfg} - app.Now = time.Now() - log.Debug(fmt.Sprintf(`Connecting to "%s" database "%s" as user "%s" on host "%s:%s" with extra options "%s".`, app.Config.DBDriver, app.Config.DBDBName, app.Config.DBUser, app.Config.DBHost, app.Config.DBPort, app.Config.DBOptions)) + log.Printf(`Connecting to "%s" database "%s" as user "%s" on host "%s:%s" with extra options "%s".`, cfg.DBDriver, cfg.DBDBName, cfg.DBUser, cfg.DBHost, cfg.DBPort, cfg.DBOptions) - app.DB, err = sqlx.Connect(app.Config.DBDriver, app.Config.DBUser+":"+app.Config.DBPassword+"@tcp("+app.Config.DBHost+":"+app.Config.DBPort+")/"+app.Config.DBDBName+"?"+app.Config.DBOptions) + app.DB, err = sqlx.Connect(cfg.DBDriver, cfg.DBUser+":"+cfg.DBPassword+"@tcp("+cfg.DBHost+":"+cfg.DBPort+")/"+cfg.DBDBName+"?"+cfg.DBOptions) if err != nil { log.Fatal(err, "Cannot connect to database") } @@ -44,4 +95,8 @@ func main() { log.Fatal(err, "No connection to database") } defer app.DB.Close() + + if err = app.extractImdbIDs(); err != nil { + log.Fatalf("extractImdbIDs: %v", err) + } } -- cgit v1.2.3