summaryrefslogtreecommitdiff
path: root/helper.go
diff options
context:
space:
mode:
Diffstat (limited to 'helper.go')
-rw-r--r--helper.go168
1 files changed, 168 insertions, 0 deletions
diff --git a/helper.go b/helper.go
new file mode 100644
index 0000000..af5f4c1
--- /dev/null
+++ b/helper.go
@@ -0,0 +1,168 @@
+package main
+
+import (
+ _url "net/url"
+ "strings"
+ log "github.com/sirupsen/logrus"
+ "regexp"
+)
+
+func stripHNPrefix(title string) string {
+ title = strings.TrimPrefix(title, "Ask HN:")
+ title = strings.TrimPrefix(title, "Show HN:")
+ title = strings.TrimPrefix(title, "Tell HN:")
+ title = strings.TrimPrefix(title, "Experiment HN:")
+ title = strings.TrimPrefix(title, "Launch HN:")
+
+ return strings.TrimSpace(title)
+}
+
+/**
+ * removes given param from URL
+ */
+func _removeParam(url, key string) string {
+ u, err := _url.Parse(url)
+ if err != nil {
+ log.Fatal(err)
+ }
+ q := u.Query()
+ q.Del(key)
+ u.RawQuery = q.Encode()
+ return u.String()
+}
+
+func normalizeUrl(url string) string {
+ match, err := regexp.MatchString("^http://", url)
+ if err != nil {
+ log.Fatal(err)
+ }
+ if match {
+ log.Debug("normalize: ", "http:// ", url)
+ r := regexp.MustCompile("^http://")
+ url = r.ReplaceAllString(url, "https://")
+ }
+
+ // add missing https:// if no scheme
+ u, err := _url.Parse(url)
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ if "" == u.Scheme {
+ if strings.HasPrefix(url, "/") {
+ url = "https:" + url
+ } else {
+ url = "https://" + url
+ }
+ }
+
+
+ match, err = regexp.MatchString("youtube://", url)
+ if err != nil {
+ log.Fatal(err)
+ }
+ if match {
+ r := regexp.MustCompile("youtube://")
+ url = r.ReplaceAllString(url, "https://")
+ }
+
+ match, err = regexp.MatchString("youtu.be/", url)
+ if err != nil {
+ log.Fatal(err)
+ }
+ if match {
+ log.Debug("normalize: ", "youtu.be ", url)
+
+ /**
+ * remove tracking param "si"
+ */
+ url = _removeParam(url, "si")
+ url = _removeParam(url, "feature")
+
+ u, err := _url.Parse(url)
+ if err != nil {
+ log.Fatal(err)
+ }
+ q := u.Query()
+ q.Add("v", strings.TrimLeft(u.Path, "/"))
+
+ u.Host = "www.youtube.com"
+ u.Path = "watch"
+
+ u.RawQuery = q.Encode()
+ url = u.String()
+
+ //r := regexp.MustCompile("youtu.be/")
+ //url = r.ReplaceAllString(url, "youtube.com/watch?v=")
+ }
+
+ match, err = regexp.MatchString("/m.youtube.com/", url)
+ if err != nil {
+ log.Fatal(err)
+ }
+ if match {
+ log.Debug("normalize: ", "m.youtube.com ", url)
+
+ /**
+ * remove tracking param "si"
+ */
+ url = _removeParam(url, "si")
+ url = _removeParam(url, "feature")
+
+ r := regexp.MustCompile("/m.youtube.com/")
+ url = r.ReplaceAllString(url, "/www.youtube.com/")
+ }
+
+ match, err = regexp.MatchString("/m.imdb.com/", url)
+ if err != nil {
+ log.Fatal(err)
+ }
+ if match {
+ log.Debug("normalize: ", "m.imdb.com ", url)
+
+ /**
+ * remove tracking param "si"
+ */
+ url = _removeParam(url, "si")
+ url = _removeParam(url, "feature")
+
+ r := regexp.MustCompile("/m.imdb.com/")
+ url = r.ReplaceAllString(url, "/www.imdb.com")
+ }
+
+ /*
+ match, err = regexp.MatchString("m.wikipedia.org", url)
+ if err != nil {
+ log.Fatal(err)
+ }
+ if match {
+ r := regexp.MustCompile("m.wikipedia.org")
+ url = r.ReplaceAllString(url, "wikipedia.org")
+ }
+ */
+
+ /**
+ * remove tracking utm_ params
+ */
+ url = _removeParam(url, "utm_source")
+ url = _removeParam(url, "utm_medium")
+ url = _removeParam(url, "utm_campaign")
+ url = _removeParam(url, "utm_term")
+ url = _removeParam(url, "utm_content")
+
+ u, err = _url.Parse(url)
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ /**
+ * Append www. to normalize URL. exclude relative URLs starting with // since this is not recognized by Go
+ * Screw that, wierd edge case. Someone pasted a
+ */
+ if ! strings.HasPrefix(u.Host, "www.") {
+ u.Host = "www." + u.Host
+ }
+ url = u.String()
+
+ return url
+}