diff options
| author | Maximilian Möhring | 2022-11-16 00:27:45 +0100 |
|---|---|---|
| committer | Maximilian Möhring | 2022-11-16 00:27:45 +0100 |
| commit | 7c6ec599d219071bb9200ba5d5fe4c52a855637e (patch) | |
| tree | 40d44dc3cbea535d1012f13a462f8fd53d284b3b | |
| parent | 6d03d2dc5f1dd913d247f3d88377cddad6dcd7a7 (diff) | |
| download | alkobote-7c6ec599d219071bb9200ba5d5fe4c52a855637e.tar.gz | |
link shorter reuses short urls
| -rw-r--r-- | crawler/post_process.go | 38 | ||||
| -rw-r--r-- | crawler/utility.go | 10 |
2 files changed, 38 insertions, 10 deletions
diff --git a/crawler/post_process.go b/crawler/post_process.go index c0c433d..dce6539 100644 --- a/crawler/post_process.go +++ b/crawler/post_process.go @@ -1,6 +1,7 @@ package main import ( + "database/sql" "fmt" log "github.com/sirupsen/logrus" @@ -33,11 +34,13 @@ func (app *App) short_url() error { } defer rows.Close() - query = `SELECT DISTINCT url FROM all_view ORDER BY URL ASC` - short_urls := []string{} + query = `SELECT DISTINCT url FROM all_view ORDER BY URL ASC;` + var Short_urls []sql.NullString - err = app.DB.Select(&short_urls, query) + err = app.DB.Select(&Short_urls, query) if err != nil { + log.Error("get all short_urls failed") + log.Fatal(err) return err } @@ -48,16 +51,31 @@ func (app *App) short_url() error { return err } - for { - generated_short_url := getRandomString(5) - if !stringInSlice(generated_short_url, short_urls) { - offer_db.Short_url = generated_short_url - log.Debug("short_url: " + string(generated_short_url) + " ( " + offer_db.Url + " )") - Angebote = append(Angebote, offer_db) - break + query = `SELECT DISTINCT short_url FROM angebot WHERE url = ? LIMIT 1` + var short_url_exists sql.NullString + err = app.DB.Get(&short_url_exists, query, offer_db.Url) + if err != nil && err != sql.ErrNoRows { + log.Error("get short_url from long_url failed") + log.Fatal(err) + return err + } + + if !short_url_exists.Valid { + for { + generated_short_url := getRandomString(5) + if !stringInSQLSlice(generated_short_url, Short_urls) { + offer_db.Short_url = generated_short_url + log.Debug("new short_url: " + string(generated_short_url) + " ( " + offer_db.Url + " )") + break + } } + } else { + offer_db.Short_url = short_url_exists.String + log.Debug("update old short_url: " + string(short_url_exists.String) + " ( " + offer_db.Url + " )") } + Angebote = append(Angebote, offer_db) + } for _, offer := range Angebote { diff --git a/crawler/utility.go b/crawler/utility.go index d282dbd..c619b34 100644 --- a/crawler/utility.go +++ b/crawler/utility.go @@ -1,6 +1,7 @@ package main import ( + "database/sql" "errors" "math" "math/rand" @@ -21,6 +22,15 @@ func stringInSlice(a string, list []string) bool { return false } +func stringInSQLSlice(a string, list []sql.NullString) bool { + for _, b := range list { + if b.String == a { + return true + } + } + return false +} + func detect_spirit_type(name string) string { name = strings.ToLower(name) |
