diff options
Diffstat (limited to 'helper.go')
| -rw-r--r-- | helper.go | 168 |
1 files changed, 168 insertions, 0 deletions
diff --git a/helper.go b/helper.go new file mode 100644 index 0000000..af5f4c1 --- /dev/null +++ b/helper.go @@ -0,0 +1,168 @@ +package main + +import ( + _url "net/url" + "strings" + log "github.com/sirupsen/logrus" + "regexp" +) + +func stripHNPrefix(title string) string { + title = strings.TrimPrefix(title, "Ask HN:") + title = strings.TrimPrefix(title, "Show HN:") + title = strings.TrimPrefix(title, "Tell HN:") + title = strings.TrimPrefix(title, "Experiment HN:") + title = strings.TrimPrefix(title, "Launch HN:") + + return strings.TrimSpace(title) +} + +/** + * removes given param from URL + */ +func _removeParam(url, key string) string { + u, err := _url.Parse(url) + if err != nil { + log.Fatal(err) + } + q := u.Query() + q.Del(key) + u.RawQuery = q.Encode() + return u.String() +} + +func normalizeUrl(url string) string { + match, err := regexp.MatchString("^http://", url) + if err != nil { + log.Fatal(err) + } + if match { + log.Debug("normalize: ", "http:// ", url) + r := regexp.MustCompile("^http://") + url = r.ReplaceAllString(url, "https://") + } + + // add missing https:// if no scheme + u, err := _url.Parse(url) + if err != nil { + log.Fatal(err) + } + + if "" == u.Scheme { + if strings.HasPrefix(url, "/") { + url = "https:" + url + } else { + url = "https://" + url + } + } + + + match, err = regexp.MatchString("youtube://", url) + if err != nil { + log.Fatal(err) + } + if match { + r := regexp.MustCompile("youtube://") + url = r.ReplaceAllString(url, "https://") + } + + match, err = regexp.MatchString("youtu.be/", url) + if err != nil { + log.Fatal(err) + } + if match { + log.Debug("normalize: ", "youtu.be ", url) + + /** + * remove tracking param "si" + */ + url = _removeParam(url, "si") + url = _removeParam(url, "feature") + + u, err := _url.Parse(url) + if err != nil { + log.Fatal(err) + } + q := u.Query() + q.Add("v", strings.TrimLeft(u.Path, "/")) + + u.Host = "www.youtube.com" + u.Path = "watch" + + u.RawQuery = q.Encode() + url = u.String() + + //r := regexp.MustCompile("youtu.be/") + //url = r.ReplaceAllString(url, "youtube.com/watch?v=") + } + + match, err = regexp.MatchString("/m.youtube.com/", url) + if err != nil { + log.Fatal(err) + } + if match { + log.Debug("normalize: ", "m.youtube.com ", url) + + /** + * remove tracking param "si" + */ + url = _removeParam(url, "si") + url = _removeParam(url, "feature") + + r := regexp.MustCompile("/m.youtube.com/") + url = r.ReplaceAllString(url, "/www.youtube.com/") + } + + match, err = regexp.MatchString("/m.imdb.com/", url) + if err != nil { + log.Fatal(err) + } + if match { + log.Debug("normalize: ", "m.imdb.com ", url) + + /** + * remove tracking param "si" + */ + url = _removeParam(url, "si") + url = _removeParam(url, "feature") + + r := regexp.MustCompile("/m.imdb.com/") + url = r.ReplaceAllString(url, "/www.imdb.com") + } + + /* + match, err = regexp.MatchString("m.wikipedia.org", url) + if err != nil { + log.Fatal(err) + } + if match { + r := regexp.MustCompile("m.wikipedia.org") + url = r.ReplaceAllString(url, "wikipedia.org") + } + */ + + /** + * remove tracking utm_ params + */ + url = _removeParam(url, "utm_source") + url = _removeParam(url, "utm_medium") + url = _removeParam(url, "utm_campaign") + url = _removeParam(url, "utm_term") + url = _removeParam(url, "utm_content") + + u, err = _url.Parse(url) + if err != nil { + log.Fatal(err) + } + + /** + * Append www. to normalize URL. exclude relative URLs starting with // since this is not recognized by Go + * Screw that, wierd edge case. Someone pasted a + */ + if ! strings.HasPrefix(u.Host, "www.") { + u.Host = "www." + u.Host + } + url = u.String() + + return url +} |
