summaryrefslogtreecommitdiff
path: root/plugin/readability/url_helpers.py
diff options
context:
space:
mode:
authorryanss2015-01-12 23:59:00 -0500
committerryanss2015-01-12 23:59:00 -0500
commit7151ac23fd2f2e04c69d1fb642081201c8a16ce6 (patch)
tree244632b642feb987263b5afe9e0d5e0ef265dbd4 /plugin/readability/url_helpers.py
parent0e72d030addb30976089a98bbfaffdc0f10eacd7 (diff)
downloadvim-hn-7151ac23fd2f2e04c69d1fb642081201c8a16ce6.tar.gz
Add readability parsing for reading linked pages
Diffstat (limited to 'plugin/readability/url_helpers.py')
-rwxr-xr-xplugin/readability/url_helpers.py52
1 files changed, 52 insertions, 0 deletions
diff --git a/plugin/readability/url_helpers.py b/plugin/readability/url_helpers.py
new file mode 100755
index 0000000..8234c8d
--- /dev/null
+++ b/plugin/readability/url_helpers.py
@@ -0,0 +1,52 @@
+import logging
+from urlparse import urlparse
+
+def host_for_url(url):
+ """
+ >>> host_for_url('http://base/whatever/fdsh')
+ 'base'
+ >>> host_for_url('invalid')
+ """
+ host = urlparse(url)[1]
+ if not host:
+ logging.error("could not extract host from URL: %r" % (url,))
+ return None
+ return host
+
+def absolute_url(url, base_href):
+ """
+ >>> absolute_url('foo', 'http://base/whatever/ooo/fdsh')
+ 'http://base/whatever/ooo/foo'
+
+ >>> absolute_url('foo/bar/', 'http://base')
+ 'http://base/foo/bar/'
+
+ >>> absolute_url('/foo/bar', 'http://base/whatever/fdskf')
+ 'http://base/foo/bar'
+
+ >>> absolute_url('\\n/foo/bar', 'http://base/whatever/fdskf')
+ 'http://base/foo/bar'
+
+ >>> absolute_url('http://localhost/foo', 'http://base/whatever/fdskf')
+ 'http://localhost/foo'
+ """
+ url = url.strip()
+ proto = urlparse(url)[0]
+ if proto:
+ return url
+
+ base_url_parts = urlparse(base_href)
+ base_server = '://'.join(base_url_parts[:2])
+ if url.startswith('/'):
+ return base_server + url
+ else:
+ path = base_url_parts[2]
+ if '/' in path:
+ path = path.rsplit('/', 1)[0] + '/'
+ else:
+ path = '/'
+ return base_server + path + url
+
+if __name__ == '__main__':
+ import doctest
+ doctest.testmod() \ No newline at end of file