Skip to content
Merged
34 changes: 24 additions & 10 deletions Doc/library/urllib.request.rst
Original file line number Diff line number Diff line change
Expand Up @@ -146,16 +146,19 @@ The :mod:`urllib.request` module defines the following functions:
attribute to modify its position in the handlers list.


.. function:: pathname2url(path)
.. function:: pathname2url(path, *, add_scheme=False)

Convert the given local path to a ``file:`` URL. This function uses
:func:`~urllib.parse.quote` function to encode the path. For historical
reasons, the return value omits the ``file:`` scheme prefix. This example
shows the function being used on Windows::
:func:`~urllib.parse.quote` function to encode the path.

If *add_scheme* is false (the default), the return value omits the
``file:`` scheme prefix. Set *add_scheme* to true to return a complete URL.

This example shows the function being used on Windows::

>>> from urllib.request import pathname2url
>>> path = 'C:\\Program Files'
>>> 'file:' + pathname2url(path)
>>> pathname2url(path, add_scheme=True)
'file:///C:/Program%20Files'

.. versionchanged:: 3.14
Expand All @@ -168,17 +171,25 @@ The :mod:`urllib.request` module defines the following functions:
sections. For example, the path ``/etc/hosts`` is converted to
the URL ``///etc/hosts``.

.. versionchanged:: next
The *add_scheme* argument was added.


.. function:: url2pathname(url)
.. function:: url2pathname(url, *, has_scheme=False)

Convert the given ``file:`` URL to a local path. This function uses
:func:`~urllib.parse.unquote` to decode the URL. For historical reasons,
the given value *must* omit the ``file:`` scheme prefix. This example shows
the function being used on Windows::
:func:`~urllib.parse.unquote` to decode the URL.

If *has_scheme* is false (the default), the given value should omit a
``file:`` scheme prefix. If *has_scheme* is set to true, the given value
should include the prefix; a :exc:`~urllib.error.URLError` is raised if it
doesn't.

This example shows the function being used on Windows::

>>> from urllib.request import url2pathname
>>> url = 'file:///C:/Program%20Files'
>>> url2pathname(url.removeprefix('file:'))
>>> url2pathname(url, has_scheme=True)
'C:\\Program Files'

.. versionchanged:: 3.14
Expand All @@ -193,6 +204,9 @@ The :mod:`urllib.request` module defines the following functions:
returned (as before), and on other platforms a
:exc:`~urllib.error.URLError` is raised.

.. versionchanged:: next
The *has_scheme* argument was added.


.. function:: getproxies()

Expand Down
7 changes: 5 additions & 2 deletions Doc/whatsnew/3.14.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1197,16 +1197,19 @@ urllib
supporting SHA-256 digest authentication as specified in :rfc:`7616`.
(Contributed by Calvin Bui in :gh:`128193`.)

* Improve standards compliance when parsing and emitting ``file:`` URLs.
* Improve ergonomics and standards compliance when parsing and emitting
``file:`` URLs.

In :func:`urllib.request.url2pathname`:

- Accept a complete URL when the new *has_scheme* argument is set to true.
- Discard URL authorities that resolve to a local IP address.
- Raise :exc:`~urllib.error.URLError` if a URL authority doesn't resolve
to ``localhost``, except on Windows where we return a UNC path.
to a local IP address, except on Windows where we return a UNC path.

In :func:`urllib.request.pathname2url`:

- Return a complete URL when the new *add_scheme* argument is set to true.
- Include an empty URL authority when a path begins with a slash. For
example, the path ``/etc/hosts`` is converted to the URL ``///etc/hosts``.

Expand Down
6 changes: 2 additions & 4 deletions Lib/pathlib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1271,17 +1271,15 @@ def as_uri(self):
if not self.is_absolute():
raise ValueError("relative paths can't be expressed as file URIs")
from urllib.request import pathname2url
return f'file:{pathname2url(str(self))}'
return pathname2url(str(self), add_scheme=True)

@classmethod
def from_uri(cls, uri):
"""Return a new path from the given 'file' URI."""
if not uri.startswith('file:'):
raise ValueError(f"URI does not start with 'file:': {uri!r}")
from urllib.error import URLError
from urllib.request import url2pathname
try:
path = cls(url2pathname(uri.removeprefix('file:')))
path = cls(url2pathname(uri, has_scheme=True))
except URLError as exc:
raise ValueError(exc.reason) from None
if not path.is_absolute():
Expand Down
4 changes: 2 additions & 2 deletions Lib/test/test_pathlib/test_pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -3302,8 +3302,8 @@ def test_from_uri_posix(self):
@needs_posix
def test_from_uri_pathname2url_posix(self):
P = self.cls
self.assertEqual(P.from_uri('file:' + pathname2url('/foo/bar')), P('/foo/bar'))
self.assertEqual(P.from_uri('file:' + pathname2url('//foo/bar')), P('//foo/bar'))
self.assertEqual(P.from_uri(pathname2url('/foo/bar', add_scheme=True)), P('/foo/bar'))
self.assertEqual(P.from_uri(pathname2url('//foo/bar', add_scheme=True)), P('//foo/bar'))

@needs_windows
def test_absolute_windows(self):
Expand Down
34 changes: 32 additions & 2 deletions Lib/test/test_urllib.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ def test_missing_localfile(self):

def test_file_notexists(self):
fd, tmp_file = tempfile.mkstemp()
tmp_file_canon_url = 'file:' + urllib.request.pathname2url(tmp_file)
tmp_file_canon_url = urllib.request.pathname2url(tmp_file, add_scheme=True)
parsed = urllib.parse.urlsplit(tmp_file_canon_url)
tmp_fileurl = parsed._replace(netloc='localhost').geturl()
try:
Expand Down Expand Up @@ -620,7 +620,7 @@ def tearDown(self):

def constructLocalFileUrl(self, filePath):
filePath = os.path.abspath(filePath)
return "file:" + urllib.request.pathname2url(filePath)
return urllib.request.pathname2url(filePath, add_scheme=True)

def createNewTempFile(self, data=b""):
"""Creates a new temporary file containing the specified data,
Expand Down Expand Up @@ -1435,6 +1435,12 @@ def test_pathname2url(self):
self.assertEqual(fn(f'a{sep}b.c'), 'a/b.c')
self.assertEqual(fn(f'{sep}a{sep}b.c'), '///a/b.c')
self.assertEqual(fn(f'{sep}a{sep}b%#c'), '///a/b%25%23c')
self.assertEqual(fn('', add_scheme=True), 'file:')
self.assertEqual(fn(sep, add_scheme=True), 'file:///')
self.assertEqual(fn('a', add_scheme=True), 'file:a')
self.assertEqual(fn(f'a{sep}b.c', add_scheme=True), 'file:a/b.c')
self.assertEqual(fn(f'{sep}a{sep}b.c', add_scheme=True), 'file:///a/b.c')
self.assertEqual(fn(f'{sep}a{sep}b%#c', add_scheme=True), 'file:///a/b%25%23c')

@unittest.skipUnless(sys.platform == 'win32',
'test specific to Windows pathnames.')
Expand Down Expand Up @@ -1503,6 +1509,30 @@ def test_url2pathname(self):
self.assertEqual(fn('//localhost/foo/bar'), f'{sep}foo{sep}bar')
self.assertEqual(fn('///foo/bar'), f'{sep}foo{sep}bar')
self.assertEqual(fn('////foo/bar'), f'{sep}{sep}foo{sep}bar')
self.assertEqual(fn('data:blah'), 'data:blah')
self.assertEqual(fn('data://blah'), f'data:{sep}{sep}')
self.assertEqual(fn('file:', has_scheme=True), '')
self.assertEqual(fn('FILE:', has_scheme=True), '')
self.assertEqual(fn('FiLe:', has_scheme=True), '')
self.assertEqual(fn('file:/', has_scheme=True), f'{sep}')
self.assertEqual(fn('file:///', has_scheme=True), f'{sep}')
self.assertEqual(fn('file:////', has_scheme=True), f'{sep}{sep}')
self.assertEqual(fn('file:foo', has_scheme=True), 'foo')
self.assertEqual(fn('file:foo/bar', has_scheme=True), f'foo{sep}bar')
self.assertEqual(fn('file:/foo/bar', has_scheme=True), f'{sep}foo{sep}bar')
self.assertEqual(fn('file://localhost/foo/bar', has_scheme=True), f'{sep}foo{sep}bar')
self.assertEqual(fn('file:///foo/bar', has_scheme=True), f'{sep}foo{sep}bar')
self.assertEqual(fn('file:////foo/bar', has_scheme=True), f'{sep}{sep}foo{sep}bar')
self.assertEqual(fn('file:data:blah', has_scheme=True), 'data:blah')
self.assertEqual(fn('file:data://blah', has_scheme=True), f'data:{sep}{sep}blah')
self.assertRaises(urllib.error.URLError, fn, '', has_scheme=True)
self.assertRaises(urllib.error.URLError, fn, ':', has_scheme=True)
self.assertRaises(urllib.error.URLError, fn, 'foo', has_scheme=True)
self.assertRaises(urllib.error.URLError, fn, 'http:foo', has_scheme=True)
self.assertRaises(urllib.error.URLError, fn, 'localfile:foo', has_scheme=True)
self.assertRaises(urllib.error.URLError, fn, 'data:foo', has_scheme=True)
self.assertRaises(urllib.error.URLError, fn, 'data:file:foo', has_scheme=True)
self.assertRaises(urllib.error.URLError, fn, 'data:file://foo', has_scheme=True)

@unittest.skipUnless(sys.platform == 'win32',
'test specific to Windows pathnames.')
Expand Down
2 changes: 1 addition & 1 deletion Lib/test/test_urllib2.py
Original file line number Diff line number Diff line change
Expand Up @@ -809,7 +809,7 @@ def test_file(self):

TESTFN = os_helper.TESTFN
towrite = b"hello, world\n"
canonurl = 'file:' + urllib.request.pathname2url(os.path.abspath(TESTFN))
canonurl = urllib.request.pathname2url(os.path.abspath(TESTFN), add_scheme=True)
parsed = urlsplit(canonurl)
if parsed.netloc:
raise unittest.SkipTest("non-local working directory")
Expand Down
2 changes: 1 addition & 1 deletion Lib/test/test_urllib2net.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def test_file(self):
f.write('hi there\n')
f.close()
urls = [
'file:' + urllib.request.pathname2url(os.path.abspath(TESTFN)),
urllib.request.pathname2url(os.path.abspath(TESTFN), add_scheme=True),
('file:///nonsensename/etc/passwd', None,
urllib.error.URLError),
]
Expand Down
32 changes: 21 additions & 11 deletions Lib/urllib/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -1466,17 +1466,16 @@ def get_names(self):
def open_local_file(self, req):
import email.utils
import mimetypes
filename = _splittype(req.full_url)[1]
localfile = url2pathname(filename)
localfile = url2pathname(req.full_url, has_scheme=True)
try:
stats = os.stat(localfile)
size = stats.st_size
modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
mtype = mimetypes.guess_type(filename)[0]
mtype = mimetypes.guess_file_type(localfile)[0]
headers = email.message_from_string(
'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
(mtype or 'text/plain', size, modified))
origurl = f'file:{pathname2url(localfile)}'
origurl = pathname2url(localfile, add_scheme=True)
return addinfourl(open(localfile, 'rb'), headers, origurl)
except OSError as exp:
raise URLError(exp, exp.filename)
Expand Down Expand Up @@ -1635,9 +1634,16 @@ def data_open(self, req):

# Code move from the old urllib module

def url2pathname(url):
"""OS-specific conversion from a relative URL of the 'file' scheme
to a file system path; not recommended for general use."""
def url2pathname(url, has_scheme=False):
"""Convert the given file URL to a local file system path.

The 'file:' scheme prefix must be omitted unless *has_scheme*
is set to true.
"""
if has_scheme:
scheme, url = _splittype(url)
if scheme != 'file':
raise URLError("URL is missing a 'file:' scheme")
authority, url = _splithost(url)
if os.name == 'nt':
if not _is_local_authority(authority):
Expand All @@ -1661,13 +1667,17 @@ def url2pathname(url):
return unquote(url, encoding=encoding, errors=errors)


def pathname2url(pathname):
"""OS-specific conversion from a file system path to a relative URL
of the 'file' scheme; not recommended for general use."""
def pathname2url(pathname, add_scheme=False):
"""Convert the given local file system path to a file URL.

The 'file:' scheme prefix is omitted unless *add_scheme*
is set to true.
"""
if os.name == 'nt':
pathname = pathname.replace('\\', '/')
encoding = sys.getfilesystemencoding()
errors = sys.getfilesystemencodeerrors()
scheme = 'file:' if add_scheme else ''
drive, root, tail = os.path.splitroot(pathname)
if drive:
# First, clean up some special forms. We are going to sacrifice the
Expand All @@ -1689,7 +1699,7 @@ def pathname2url(pathname):
# avoids interpreting the path as a URL authority.
root = '//' + root
tail = quote(tail, encoding=encoding, errors=errors)
return drive + root + tail
return scheme + drive + root + tail


# Utility functions
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Add optional *add_scheme* argument to :func:`urllib.request.pathname2url`; when
set to true, a complete URL is returned. Likewise add optional *has_scheme*
argument to :func:`~urllib.request.url2pathname`; when set to true, a complete
URL is accepted.
Loading