package main import ( _url "net/url" "strings" log "github.com/sirupsen/logrus" "regexp" xhtml "golang.org/x/net/html" ) func stripHNPrefix(title string) string { title = strings.TrimPrefix(title, "Ask HN:") title = strings.TrimPrefix(title, "Show HN:") title = strings.TrimPrefix(title, "Tell HN:") title = strings.TrimPrefix(title, "Experiment HN:") title = strings.TrimPrefix(title, "Launch HN:") return strings.TrimSpace(title) } /** * removes given param from URL */ func _removeParam(url, key string) string { u, err := _url.Parse(url) if err != nil { log.Fatal(err) } q := u.Query() q.Del(key) u.RawQuery = q.Encode() return u.String() } func normalizeUrl(url string) string { /** * Redirect http:// to https:// */ match, err := regexp.MatchString("^http://", url) if err != nil { log.Fatal(err) } if match { log.Debug("normalize: ", "http:// ", url) r := regexp.MustCompile("^http://") url = r.ReplaceAllString(url, "https://") } /** * add missing https:// if no scheme * Fun fact: https://news.ycombinator.com/item?id=27351340 broke this part */ u, err := _url.Parse(url) if err != nil { log.Fatal(err) } if "" == u.Scheme { if strings.HasPrefix(url, "/") { url = "https:" + url } else { url = "https://" + url } } /** * Apple TV accepts youtube:// scheme */ match, err = regexp.MatchString("youtube://", url) if err != nil { log.Fatal(err) } if match { r := regexp.MustCompile("youtube://") url = r.ReplaceAllString(url, "https://") } /** * Redirect youtu.be to desktop version */ match, err = regexp.MatchString("youtu.be/", url) if err != nil { log.Fatal(err) } if match { log.Debug("normalize: ", "youtu.be ", url) u, err := _url.Parse(url) if err != nil { log.Fatal(err) } q := u.Query() q.Add("v", strings.TrimLeft(u.Path, "/")) u.Host = "www.youtube.com" u.Path = "watch" u.RawQuery = q.Encode() url = u.String() //r := regexp.MustCompile("youtu.be/") //url = r.ReplaceAllString(url, "youtube.com/watch?v=") } /** * Redirect m.youtube.com to desktop version */ match, err = regexp.MatchString("/m.youtube.com/", url) if err != nil { log.Fatal(err) } if match { log.Debug("normalize: ", "m.youtube.com ", url) /** * remove tracking param "si" */ url = _removeParam(url, "si") r := regexp.MustCompile("/m.youtube.com/") url = r.ReplaceAllString(url, "/www.youtube.com/") } /** * Redirect m.imdb.com to desktop version */ match, err = regexp.MatchString("/m.imdb.com/", url) if err != nil { log.Fatal(err) } if match { log.Debug("normalize: ", "m.imdb.com ", url) r := regexp.MustCompile("/m.imdb.com/") url = r.ReplaceAllString(url, "/www.imdb.com") } /** * Append www. to normalize URL. exclude relative URLs starting with // since this is not recognized by Go * Screw that, wierd edge case. Someone pasted a */ u, err = _url.Parse(url) if err != nil { log.Fatal(err) } if ! strings.HasPrefix(u.Host, "www.") { u.Host = "www." + u.Host } url = u.String() /** * Redirects youtube.com/c/ to youtube.com/@ */ match, err = regexp.MatchString("/.youtube.com/c/", url) if err != nil { log.Fatal(err) } if match { log.Debug("normalize: ", "youtube.com/c/ -> @ ", url) /** * remove tracking param "si" */ url = _removeParam(url, "si") r := regexp.MustCompile("youtube.com/c/") url = r.ReplaceAllString(url, "youtube.com/@") } /** * remove tracking param "si", "feature" and "pp" from every youtube video */ match, err = regexp.MatchString("/www.youtube.com/", url) if err != nil { log.Fatal(err) } if match { url = _removeParam(url, "si") url = _removeParam(url, "pp") url = _removeParam(url, "feature") } /** * remove tracking utm_ params */ url = _removeParam(url, "utm_source") url = _removeParam(url, "utm_medium") url = _removeParam(url, "utm_campaign") url = _removeParam(url, "utm_term") url = _removeParam(url, "utm_content") return url } func RemoveNode(root_node *xhtml.Node, remove_me *xhtml.Node) { found_node := false check_nodes := make(map[int]*xhtml.Node) i := 0 // loop through siblings for n := root_node.FirstChild; n != nil; n = n.NextSibling { if n == remove_me { found_node = true n.Parent.RemoveChild(n) } check_nodes[i] = n i++ } // check if removing node is found // if yes no need to check childs returning // if no continue loop through childs and so on if found_node == false { for _, item := range check_nodes { RemoveNode(item, remove_me) } } }