From 6d03d2dc5f1dd913d247f3d88377cddad6dcd7a7 Mon Sep 17 00:00:00 2001 From: Maximilian Möhring Date: Tue, 15 Nov 2022 22:18:54 +0100 Subject: No external link shorter anymore. --- crawler/post_process.go | 39 ++++++++++++++++----------------------- crawler/utility.go | 16 ++++++++++++++++ 2 files changed, 32 insertions(+), 23 deletions(-) diff --git a/crawler/post_process.go b/crawler/post_process.go index 76fc70f..c0c433d 100644 --- a/crawler/post_process.go +++ b/crawler/post_process.go @@ -2,9 +2,6 @@ package main import ( "fmt" - "io/ioutil" - "net/http" - "net/url" log "github.com/sirupsen/logrus" ) @@ -36,6 +33,14 @@ func (app *App) short_url() error { } defer rows.Close() + query = `SELECT DISTINCT url FROM all_view ORDER BY URL ASC` + short_urls := []string{} + + err = app.DB.Select(&short_urls, query) + if err != nil { + return err + } + for rows.Next() { var offer_db Angebot err = rows.StructScan(&offer_db) @@ -43,28 +48,16 @@ func (app *App) short_url() error { return err } - v := url.Values{} - v.Set("key", app.Config.Polr_API_Key) - v.Add("url", offer_db.Url) - polr_url := app.Config.Polr_URL + "?" + v.Encode() - - log.Debug("polr_url: " + polr_url + " ( " + offer_db.Url + " )") - - resp, err := http.Get(polr_url) - if err != nil { - return err - } - defer resp.Body.Close() - - short_url, err := ioutil.ReadAll(resp.Body) - if err != nil { - return err + for { + generated_short_url := getRandomString(5) + if !stringInSlice(generated_short_url, short_urls) { + offer_db.Short_url = generated_short_url + log.Debug("short_url: " + string(generated_short_url) + " ( " + offer_db.Url + " )") + Angebote = append(Angebote, offer_db) + break + } } - offer_db.Short_url = string(short_url) - log.Debug("short_url: " + string(short_url) + " ( " + offer_db.Url + " )") - - Angebote = append(Angebote, offer_db) } for _, offer := range Angebote { diff --git a/crawler/utility.go b/crawler/utility.go index 5b91c51..d282dbd 100644 --- a/crawler/utility.go +++ b/crawler/utility.go @@ -3,9 +3,11 @@ package main import ( "errors" "math" + "math/rand" "regexp" "strconv" "strings" + "time" "github.com/gocolly/colly" ) @@ -370,3 +372,17 @@ func RoundToEven(x float64) float64 { } return t } + +/* + * Returns random string with len n, used for short urls + */ +func getRandomString(n int) string { + var letters = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789") + + rand.Seed(time.Now().UnixNano()) + s := make([]rune, n) + for i := range s { + s[i] = letters[rand.Intn(len(letters))] + } + return string(s) +} -- cgit v1.2.3