163 lines
4.2 KiB
Python
163 lines
4.2 KiB
Python
|
from collections import namedtuple
|
||
|
|
||
|
from ..exceptions import LocationParseError
|
||
|
|
||
|
|
||
|
class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])):
|
||
|
"""
|
||
|
Datastructure for representing an HTTP URL. Used as a return value for
|
||
|
:func:`parse_url`.
|
||
|
"""
|
||
|
slots = ()
|
||
|
|
||
|
def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, query=None, fragment=None):
|
||
|
return super(Url, cls).__new__(cls, scheme, auth, host, port, path, query, fragment)
|
||
|
|
||
|
@property
|
||
|
def hostname(self):
|
||
|
"""For backwards-compatibility with urlparse. We're nice like that."""
|
||
|
return self.host
|
||
|
|
||
|
@property
|
||
|
def request_uri(self):
|
||
|
"""Absolute path including the query string."""
|
||
|
uri = self.path or '/'
|
||
|
|
||
|
if self.query is not None:
|
||
|
uri += '?' + self.query
|
||
|
|
||
|
return uri
|
||
|
|
||
|
@property
|
||
|
def netloc(self):
|
||
|
"""Network location including host and port"""
|
||
|
if self.port:
|
||
|
return '%s:%d' % (self.host, self.port)
|
||
|
return self.host
|
||
|
|
||
|
|
||
|
def split_first(s, delims):
|
||
|
"""
|
||
|
Given a string and an iterable of delimiters, split on the first found
|
||
|
delimiter. Return two split parts and the matched delimiter.
|
||
|
|
||
|
If not found, then the first part is the full input string.
|
||
|
|
||
|
Example: ::
|
||
|
|
||
|
>>> split_first('foo/bar?baz', '?/=')
|
||
|
('foo', 'bar?baz', '/')
|
||
|
>>> split_first('foo/bar?baz', '123')
|
||
|
('foo/bar?baz', '', None)
|
||
|
|
||
|
Scales linearly with number of delims. Not ideal for large number of delims.
|
||
|
"""
|
||
|
min_idx = None
|
||
|
min_delim = None
|
||
|
for d in delims:
|
||
|
idx = s.find(d)
|
||
|
if idx < 0:
|
||
|
continue
|
||
|
|
||
|
if min_idx is None or idx < min_idx:
|
||
|
min_idx = idx
|
||
|
min_delim = d
|
||
|
|
||
|
if min_idx is None or min_idx < 0:
|
||
|
return s, '', None
|
||
|
|
||
|
return s[:min_idx], s[min_idx+1:], min_delim
|
||
|
|
||
|
|
||
|
def parse_url(url):
|
||
|
"""
|
||
|
Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is
|
||
|
performed to parse incomplete urls. Fields not provided will be None.
|
||
|
|
||
|
Partly backwards-compatible with :mod:`urlparse`.
|
||
|
|
||
|
Example: ::
|
||
|
|
||
|
>>> parse_url('http://google.com/mail/')
|
||
|
Url(scheme='http', host='google.com', port=None, path='/', ...)
|
||
|
>>> parse_url('google.com:80')
|
||
|
Url(scheme=None, host='google.com', port=80, path=None, ...)
|
||
|
>>> parse_url('/foo?bar')
|
||
|
Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...)
|
||
|
"""
|
||
|
|
||
|
# While this code has overlap with stdlib's urlparse, it is much
|
||
|
# simplified for our needs and less annoying.
|
||
|
# Additionally, this implementations does silly things to be optimal
|
||
|
# on CPython.
|
||
|
|
||
|
scheme = None
|
||
|
auth = None
|
||
|
host = None
|
||
|
port = None
|
||
|
path = None
|
||
|
fragment = None
|
||
|
query = None
|
||
|
|
||
|
# Scheme
|
||
|
if '://' in url:
|
||
|
scheme, url = url.split('://', 1)
|
||
|
|
||
|
# Find the earliest Authority Terminator
|
||
|
# (http://tools.ietf.org/html/rfc3986#section-3.2)
|
||
|
url, path_, delim = split_first(url, ['/', '?', '#'])
|
||
|
|
||
|
if delim:
|
||
|
# Reassemble the path
|
||
|
path = delim + path_
|
||
|
|
||
|
# Auth
|
||
|
if '@' in url:
|
||
|
# Last '@' denotes end of auth part
|
||
|
auth, url = url.rsplit('@', 1)
|
||
|
|
||
|
# IPv6
|
||
|
if url and url[0] == '[':
|
||
|
host, url = url.split(']', 1)
|
||
|
host += ']'
|
||
|
|
||
|
# Port
|
||
|
if ':' in url:
|
||
|
_host, port = url.split(':', 1)
|
||
|
|
||
|
if not host:
|
||
|
host = _host
|
||
|
|
||
|
if port:
|
||
|
# If given, ports must be integers.
|
||
|
if not port.isdigit():
|
||
|
raise LocationParseError(url)
|
||
|
port = int(port)
|
||
|
else:
|
||
|
# Blank ports are cool, too. (rfc3986#section-3.2.3)
|
||
|
port = None
|
||
|
|
||
|
elif not host and url:
|
||
|
host = url
|
||
|
|
||
|
if not path:
|
||
|
return Url(scheme, auth, host, port, path, query, fragment)
|
||
|
|
||
|
# Fragment
|
||
|
if '#' in path:
|
||
|
path, fragment = path.split('#', 1)
|
||
|
|
||
|
# Query
|
||
|
if '?' in path:
|
||
|
path, query = path.split('?', 1)
|
||
|
|
||
|
return Url(scheme, auth, host, port, path, query, fragment)
|
||
|
|
||
|
|
||
|
def get_host(url):
|
||
|
"""
|
||
|
Deprecated. Use :func:`.parse_url` instead.
|
||
|
"""
|
||
|
p = parse_url(url)
|
||
|
return p.scheme or 'http', p.hostname, p.port
|