diff options
Diffstat (limited to 'helper.go')
| -rw-r--r-- | helper.go | 112 |
1 files changed, 83 insertions, 29 deletions
@@ -5,6 +5,7 @@ import ( "strings" log "github.com/sirupsen/logrus" "regexp" + xhtml "golang.org/x/net/html" ) func stripHNPrefix(title string) string { @@ -32,6 +33,10 @@ func _removeParam(url, key string) string { } func normalizeUrl(url string) string { + + /** + * Redirect http:// to https:// + */ match, err := regexp.MatchString("^http://", url) if err != nil { log.Fatal(err) @@ -42,7 +47,10 @@ func normalizeUrl(url string) string { url = r.ReplaceAllString(url, "https://") } - // add missing https:// if no scheme + /** + * add missing https:// if no scheme + * Fun fact: https://news.ycombinator.com/item?id=27351340 broke this part + */ u, err := _url.Parse(url) if err != nil { log.Fatal(err) @@ -56,7 +64,9 @@ func normalizeUrl(url string) string { } } - + /** + * Apple TV accepts youtube:// scheme + */ match, err = regexp.MatchString("youtube://", url) if err != nil { log.Fatal(err) @@ -66,6 +76,9 @@ func normalizeUrl(url string) string { url = r.ReplaceAllString(url, "https://") } + /** + * Redirect youtu.be to desktop version + */ match, err = regexp.MatchString("youtu.be/", url) if err != nil { log.Fatal(err) @@ -73,12 +86,6 @@ func normalizeUrl(url string) string { if match { log.Debug("normalize: ", "youtu.be ", url) - /** - * remove tracking param "si" - */ - url = _removeParam(url, "si") - url = _removeParam(url, "feature") - u, err := _url.Parse(url) if err != nil { log.Fatal(err) @@ -96,6 +103,9 @@ func normalizeUrl(url string) string { //url = r.ReplaceAllString(url, "youtube.com/watch?v=") } + /** + * Redirect m.youtube.com to desktop version + */ match, err = regexp.MatchString("/m.youtube.com/", url) if err != nil { log.Fatal(err) @@ -107,12 +117,14 @@ func normalizeUrl(url string) string { * remove tracking param "si" */ url = _removeParam(url, "si") - url = _removeParam(url, "feature") r := regexp.MustCompile("/m.youtube.com/") url = r.ReplaceAllString(url, "/www.youtube.com/") } + /** + * Redirect m.imdb.com to desktop version + */ match, err = regexp.MatchString("/m.imdb.com/", url) if err != nil { log.Fatal(err) @@ -120,26 +132,56 @@ func normalizeUrl(url string) string { if match { log.Debug("normalize: ", "m.imdb.com ", url) + r := regexp.MustCompile("/m.imdb.com/") + url = r.ReplaceAllString(url, "/www.imdb.com") + } + + /** + * Append www. to normalize URL. exclude relative URLs starting with // since this is not recognized by Go + * Screw that, wierd edge case. Someone pasted a + */ + u, err = _url.Parse(url) + if err != nil { + log.Fatal(err) + } + + if ! strings.HasPrefix(u.Host, "www.") { + u.Host = "www." + u.Host + } + + url = u.String() + + /** + * Redirects youtube.com/c/<name> to youtube.com/@<name> + */ + match, err = regexp.MatchString("/.youtube.com/c/", url) + if err != nil { + log.Fatal(err) + } + if match { + log.Debug("normalize: ", "youtube.com/c/ -> @ ", url) + /** * remove tracking param "si" */ url = _removeParam(url, "si") - url = _removeParam(url, "feature") - r := regexp.MustCompile("/m.imdb.com/") - url = r.ReplaceAllString(url, "/www.imdb.com") + r := regexp.MustCompile("youtube.com/c/") + url = r.ReplaceAllString(url, "youtube.com/@") } - /* - match, err = regexp.MatchString("m.wikipedia.org", url) + /** + * remove tracking param "si", "feature" and "pp" from every youtube video + */ + match, err = regexp.MatchString("/www.youtube.com/", url) if err != nil { log.Fatal(err) } if match { - r := regexp.MustCompile("m.wikipedia.org") - url = r.ReplaceAllString(url, "wikipedia.org") + url = _removeParam(url, "si") + url = _removeParam(url, "pp") + url = _removeParam(url, "feature") } - */ /** * remove tracking utm_ params @@ -150,19 +192,31 @@ func normalizeUrl(url string) string { url = _removeParam(url, "utm_term") url = _removeParam(url, "utm_content") - u, err = _url.Parse(url) - if err != nil { - log.Fatal(err) - } + return url +} - /** - * Append www. to normalize URL. exclude relative URLs starting with // since this is not recognized by Go - * Screw that, wierd edge case. Someone pasted a - */ - if ! strings.HasPrefix(u.Host, "www.") { - u.Host = "www." + u.Host +func RemoveNode(root_node *xhtml.Node, remove_me *xhtml.Node) { + found_node := false + check_nodes := make(map[int]*xhtml.Node) + i := 0 + + // loop through siblings + for n := root_node.FirstChild; n != nil; n = n.NextSibling { + if n == remove_me { + found_node = true + n.Parent.RemoveChild(n) } - url = u.String() - return url + check_nodes[i] = n + i++ + } + + // check if removing node is found + // if yes no need to check childs returning + // if no continue loop through childs and so on + if found_node == false { + for _, item := range check_nodes { + RemoveNode(item, remove_me) + } + } } |
