diff options
| author | ryanss | 2015-01-12 23:59:00 -0500 |
|---|---|---|
| committer | ryanss | 2015-01-12 23:59:00 -0500 |
| commit | 7151ac23fd2f2e04c69d1fb642081201c8a16ce6 (patch) | |
| tree | 244632b642feb987263b5afe9e0d5e0ef265dbd4 /plugin/readability/url_helpers.py | |
| parent | 0e72d030addb30976089a98bbfaffdc0f10eacd7 (diff) | |
| download | vim-hn-7151ac23fd2f2e04c69d1fb642081201c8a16ce6.tar.gz | |
Add readability parsing for reading linked pages
Diffstat (limited to 'plugin/readability/url_helpers.py')
| -rwxr-xr-x | plugin/readability/url_helpers.py | 52 |
1 files changed, 52 insertions, 0 deletions
diff --git a/plugin/readability/url_helpers.py b/plugin/readability/url_helpers.py new file mode 100755 index 0000000..8234c8d --- /dev/null +++ b/plugin/readability/url_helpers.py @@ -0,0 +1,52 @@ +import logging +from urlparse import urlparse + +def host_for_url(url): + """ + >>> host_for_url('http://base/whatever/fdsh') + 'base' + >>> host_for_url('invalid') + """ + host = urlparse(url)[1] + if not host: + logging.error("could not extract host from URL: %r" % (url,)) + return None + return host + +def absolute_url(url, base_href): + """ + >>> absolute_url('foo', 'http://base/whatever/ooo/fdsh') + 'http://base/whatever/ooo/foo' + + >>> absolute_url('foo/bar/', 'http://base') + 'http://base/foo/bar/' + + >>> absolute_url('/foo/bar', 'http://base/whatever/fdskf') + 'http://base/foo/bar' + + >>> absolute_url('\\n/foo/bar', 'http://base/whatever/fdskf') + 'http://base/foo/bar' + + >>> absolute_url('http://localhost/foo', 'http://base/whatever/fdskf') + 'http://localhost/foo' + """ + url = url.strip() + proto = urlparse(url)[0] + if proto: + return url + + base_url_parts = urlparse(base_href) + base_server = '://'.join(base_url_parts[:2]) + if url.startswith('/'): + return base_server + url + else: + path = base_url_parts[2] + if '/' in path: + path = path.rsplit('/', 1)[0] + '/' + else: + path = '/' + return base_server + path + url + +if __name__ == '__main__': + import doctest + doctest.testmod()
\ No newline at end of file |
