summaryrefslogtreecommitdiff
path: root/src/main.go
diff options
context:
space:
mode:
authordev2026-06-24 03:19:26 +0200
committerdev2026-06-24 03:19:26 +0200
commit163b9bddd68f7ffc8fc4164acee333fe5bff3c7a (patch)
treec073d535daaad40ba00de99870dff67809bce6a6 /src/main.go
parent16a92e119b8123902b0773ba2878fb3926d9ee09 (diff)
downloadhnimdbbot-163b9bddd68f7ffc8fc4164acee333fe5bff3c7a.tar.gz
feat: extract IMDB title IDs from links URLs into param field
- Query links table for IMDB title URLs (field=1, host=imdb.com) - Extract ttIDs via regex and batch-update links.param - 5662 rows updated successfully
Diffstat (limited to 'src/main.go')
-rw-r--r--src/main.go85
1 files changed, 70 insertions, 15 deletions
diff --git a/src/main.go b/src/main.go
index ebfbd98..e079760 100644
--- a/src/main.go
+++ b/src/main.go
@@ -1,27 +1,79 @@
package main
import (
- "encoding/json"
"fmt"
- "html"
- "io/ioutil"
- "net/http"
- "net/url"
+ "log"
"regexp"
- "strconv"
- "strings"
- "time"
- "github.com/PuerkitoBio/goquery"
- "github.com/anikhasibul/queue"
+ _ "github.com/go-sql-driver/mysql"
"github.com/jmoiron/sqlx"
- log "github.com/sirupsen/logrus"
)
type App struct {
Config *Config
DB *sqlx.DB
- Now time.Time
+}
+
+type LinkRow struct {
+ ID int `db:"id"`
+ URL string `db:"url"`
+}
+
+var imdbTitleRe = regexp.MustCompile(`/title/(tt\d+)($|/)`)
+
+func (a *App) extractImdbIDs() error {
+ rows, err := a.DB.Query(`
+ SELECT id, url FROM links
+ WHERE field = 1 AND url LIKE '%.com/title%' AND host = 'www.imdb.com'
+ AND (param IS NULL OR param = '')
+ `)
+ if err != nil {
+ return fmt.Errorf("query links: %w", err)
+ }
+ defer rows.Close()
+
+ var count, updated int
+ tx := a.DB.MustBegin()
+ stmt, err := tx.Prepare(`UPDATE links SET param = ? WHERE id = ?`)
+ if err != nil {
+ tx.Rollback()
+ return fmt.Errorf("prepare update: %w", err)
+ }
+ defer stmt.Close()
+
+ for rows.Next() {
+ count++
+ var link LinkRow
+ if err := rows.Scan(&link.ID, &link.URL); err != nil {
+ tx.Rollback()
+ return fmt.Errorf("scan row: %w", err)
+ }
+
+ match := imdbTitleRe.FindStringSubmatch(link.URL)
+ if len(match) < 2 {
+ log.Printf("no IMDb ID found in URL: %s", link.URL)
+ continue
+ }
+
+ imdbID := match[1]
+ _, err := stmt.Exec(imdbID, link.ID)
+ if err != nil {
+ tx.Rollback()
+ return fmt.Errorf("update link %d: %w", link.ID, err)
+ }
+ updated++
+ }
+ if err := rows.Err(); err != nil {
+ tx.Rollback()
+ return fmt.Errorf("rows iteration: %w", err)
+ }
+
+ if err := tx.Commit(); err != nil {
+ return fmt.Errorf("commit: %w", err)
+ }
+
+ log.Printf("extractImdbIDs: scanned %d rows, updated %d", count, updated)
+ return nil
}
func main() {
@@ -31,11 +83,10 @@ func main() {
}
app := App{Config: cfg}
- app.Now = time.Now()
- log.Debug(fmt.Sprintf(`Connecting to "%s" database "%s" as user "%s" on host "%s:%s" with extra options "%s".`, app.Config.DBDriver, app.Config.DBDBName, app.Config.DBUser, app.Config.DBHost, app.Config.DBPort, app.Config.DBOptions))
+ log.Printf(`Connecting to "%s" database "%s" as user "%s" on host "%s:%s" with extra options "%s".`, cfg.DBDriver, cfg.DBDBName, cfg.DBUser, cfg.DBHost, cfg.DBPort, cfg.DBOptions)
- app.DB, err = sqlx.Connect(app.Config.DBDriver, app.Config.DBUser+":"+app.Config.DBPassword+"@tcp("+app.Config.DBHost+":"+app.Config.DBPort+")/"+app.Config.DBDBName+"?"+app.Config.DBOptions)
+ app.DB, err = sqlx.Connect(cfg.DBDriver, cfg.DBUser+":"+cfg.DBPassword+"@tcp("+cfg.DBHost+":"+cfg.DBPort+")/"+cfg.DBDBName+"?"+cfg.DBOptions)
if err != nil {
log.Fatal(err, "Cannot connect to database")
}
@@ -44,4 +95,8 @@ func main() {
log.Fatal(err, "No connection to database")
}
defer app.DB.Close()
+
+ if err = app.extractImdbIDs(); err != nil {
+ log.Fatalf("extractImdbIDs: %v", err)
+ }
}