package main import ( _url "net/url" "strings" log "github.com/sirupsen/logrus" "regexp" ) func stripHNPrefix(title string) string { title = strings.TrimPrefix(title, "Ask HN:") title = strings.TrimPrefix(title, "Show HN:") title = strings.TrimPrefix(title, "Tell HN:") title = strings.TrimPrefix(title, "Experiment HN:") title = strings.TrimPrefix(title, "Launch HN:") return strings.TrimSpace(title) } /** * removes given param from URL */ func _removeParam(url, key string) string { u, err := _url.Parse(url) if err != nil { log.Fatal(err) } q := u.Query() q.Del(key) u.RawQuery = q.Encode() return u.String() } func normalizeUrl(url string) string { match, err := regexp.MatchString("^http://", url) if err != nil { log.Fatal(err) } if match { log.Debug("normalize: ", "http:// ", url) r := regexp.MustCompile("^http://") url = r.ReplaceAllString(url, "https://") } // add missing https:// if no scheme u, err := _url.Parse(url) if err != nil { log.Fatal(err) } if "" == u.Scheme { if strings.HasPrefix(url, "/") { url = "https:" + url } else { url = "https://" + url } } match, err = regexp.MatchString("youtube://", url) if err != nil { log.Fatal(err) } if match { r := regexp.MustCompile("youtube://") url = r.ReplaceAllString(url, "https://") } match, err = regexp.MatchString("youtu.be/", url) if err != nil { log.Fatal(err) } if match { log.Debug("normalize: ", "youtu.be ", url) /** * remove tracking param "si" */ url = _removeParam(url, "si") url = _removeParam(url, "feature") u, err := _url.Parse(url) if err != nil { log.Fatal(err) } q := u.Query() q.Add("v", strings.TrimLeft(u.Path, "/")) u.Host = "www.youtube.com" u.Path = "watch" u.RawQuery = q.Encode() url = u.String() //r := regexp.MustCompile("youtu.be/") //url = r.ReplaceAllString(url, "youtube.com/watch?v=") } match, err = regexp.MatchString("/m.youtube.com/", url) if err != nil { log.Fatal(err) } if match { log.Debug("normalize: ", "m.youtube.com ", url) /** * remove tracking param "si" */ url = _removeParam(url, "si") url = _removeParam(url, "feature") r := regexp.MustCompile("/m.youtube.com/") url = r.ReplaceAllString(url, "/www.youtube.com/") } match, err = regexp.MatchString("/m.imdb.com/", url) if err != nil { log.Fatal(err) } if match { log.Debug("normalize: ", "m.imdb.com ", url) /** * remove tracking param "si" */ url = _removeParam(url, "si") url = _removeParam(url, "feature") r := regexp.MustCompile("/m.imdb.com/") url = r.ReplaceAllString(url, "/www.imdb.com") } /* match, err = regexp.MatchString("m.wikipedia.org", url) if err != nil { log.Fatal(err) } if match { r := regexp.MustCompile("m.wikipedia.org") url = r.ReplaceAllString(url, "wikipedia.org") } */ /** * remove tracking utm_ params */ url = _removeParam(url, "utm_source") url = _removeParam(url, "utm_medium") url = _removeParam(url, "utm_campaign") url = _removeParam(url, "utm_term") url = _removeParam(url, "utm_content") u, err = _url.Parse(url) if err != nil { log.Fatal(err) } /** * Append www. to normalize URL. exclude relative URLs starting with // since this is not recognized by Go * Screw that, wierd edge case. Someone pasted a */ if ! strings.HasPrefix(u.Host, "www.") { u.Host = "www." + u.Host } url = u.String() return url }