Skip to content

Commit

Permalink
Fix erroneous non-ASCII in local version (pypa#469)
Browse files Browse the repository at this point in the history
* Fix validation in the packaging.version.Version's constructor

* Fix validation in the packaging.specifiers.Specifier's constructor

* Complement relevant tests, including adding cases related to presence
  of non-ASCII whitespace characters in input strings

* Fix docs of packaging.version.VERSION_PATTERN by mentioning necessity
  of the re.ASCII flag
  • Loading branch information
zuo committed Oct 26, 2021
1 parent 42e1396 commit 89ee170
Show file tree
Hide file tree
Showing 6 changed files with 48 additions and 4 deletions.
5 changes: 4 additions & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@ Changelog
*unreleased*
~~~~~~~~~~~~

No unreleased changes.
* Fix parsing of ``Version`` and ``Specifier``, to prevent certain
non-ASCII letters from being accepted as a part of the local version
segment (:issue:`469`); also, fix the docs of ``VERSION_PATTERN``, to
mention necessity of the ``re.ASCII`` flag

21.0 - 2021-07-03
~~~~~~~~~~~~~~~~~
Expand Down
2 changes: 1 addition & 1 deletion docs/version.rst
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ Reference
The pattern is not anchored at either end, and is intended for embedding
in larger expressions (for example, matching a version number as part of
a file name). The regular expression should be compiled with the
``re.VERBOSE`` and ``re.IGNORECASE`` flags set.
``re.VERBOSE``, ``re.IGNORECASE`` and ``re.ASCII`` flags set.


.. _PEP 440: https://www.python.org/dev/peps/pep-0440/
Expand Down
24 changes: 24 additions & 0 deletions packaging/specifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,23 @@ class Specifier(_IndividualSpecifier):

_regex = re.compile(r"^\s*" + _regex_str + r"\s*$", re.VERBOSE | re.IGNORECASE)

# Note: an additional check, based of the following regular
# expression, is necessary because without it the 'a-z'
# character ranges in the above regular expression, in
# conjunction with re.IGNORECASE, would cause erroneous
# acceptance of non-ASCII letters in the local version segment
# (see: https://docs.python.org/library/re.html#re.IGNORECASE).
_supplementary_restriction_regex = re.compile(
r"""
\s*===.* # No restriction in the identity operator case.
|
[\s\0-\177]* # In all other cases only whitespace characters
# and ASCII-only non-whitespace characters are
# allowed.
""",
re.VERBOSE,
)

_operators = {
"~=": "compatible",
"==": "equal",
Expand All @@ -422,6 +439,13 @@ class Specifier(_IndividualSpecifier):
"===": "arbitrary",
}

def __init__(self, spec: str = "", prereleases: Optional[bool] = None) -> None:
super().__init__(spec, prereleases)

match = self._supplementary_restriction_regex.fullmatch(spec)
if not match:
raise InvalidSpecifier(f"Invalid specifier: '{spec}'")

@_require_version_compare
def _compare_compatible(self, prospective: ParsedVersion, spec: str) -> bool:

Expand Down
12 changes: 10 additions & 2 deletions packaging/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,12 +256,20 @@ def _legacy_cmpkey(version: str) -> LegacyCmpKey:

class Version(_BaseVersion):

_regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE)
_regex = re.compile(
VERSION_PATTERN,
# Note: the re.ASCII flag is necessary because without it the
# 'a-z' character ranges in VERSION_PATTERN, in conjunction
# with re.IGNORECASE, would cause erroneous acceptance of
# non-ASCII letters in the local version segment (see:
# https://docs.python.org/library/re.html#re.IGNORECASE).
re.VERBOSE | re.IGNORECASE | re.ASCII,
)

def __init__(self, version: str) -> None:

# Validate the version and parse it into pieces
match = self._regex.search(version)
match = self._regex.fullmatch(version.strip())
if not match:
raise InvalidVersion(f"Invalid version: '{version}'")

Expand Down
5 changes: 5 additions & 0 deletions tests/test_specifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ def test_specifiers_valid(self, specifier):
# Cannot use a prefix matching after a .devN version
"==1.0.dev1.*",
"!=1.0.dev1.*",
# Local version which includes a non-ASCII letter that matches
# regex '[a-z]' when re.IGNORECASE is in force and re.ASCII is not
"==1.0+\u0130",
],
)
def test_specifiers_invalid(self, specifier):
Expand Down Expand Up @@ -197,6 +200,7 @@ def test_specifiers_invalid(self, specifier):
# Various other normalizations
"v1.0",
" \r \f \v v1.0\t\n",
" \r\N{NARROW NO-BREAK SPACE}\v v1.0\N{PARAGRAPH SEPARATOR}",
],
)
def test_specifiers_normalized(self, version):
Expand All @@ -221,6 +225,7 @@ def test_specifiers_normalized(self, version):
("~=2.0", "~=2.0"),
# Spaces should be removed
("< 2", "<2"),
("<\N{HAIR SPACE}2", "<2"),
],
)
def test_specifiers_str_and_repr(self, specifier, expected):
Expand Down
4 changes: 4 additions & 0 deletions tests/test_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ def test_valid_versions(self, version):
"1.0+_foobar",
"1.0+foo&asd",
"1.0+1+1",
# Local version which includes a non-ASCII letter that matches
# regex '[a-z]' when re.IGNORECASE is in force and re.ASCII is not
"1.0+\u0130",
],
)
def test_invalid_versions(self, version):
Expand Down Expand Up @@ -218,6 +221,7 @@ def test_invalid_versions(self, version):
# Various other normalizations
("v1.0", "1.0"),
(" v1.0\t\n", "1.0"),
("\N{NARROW NO-BREAK SPACE}1.0\t\N{PARAGRAPH SEPARATOR}\n ", "1.0"),
],
)
def test_normalized_versions(self, version, normalized):
Expand Down

0 comments on commit 89ee170

Please sign in to comment.