Skip to content

Remove redundant code and fix pylint #175

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Nov 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions setup.cfg

This file was deleted.

1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
include_package_data=True,
zip_safe=False,
platforms=["Any"],
python_requires=">=3.6",
classifiers=[
"Development Status :: 5 - Production/Stable",
"License :: OSI Approved :: BSD License",
Expand Down
2 changes: 1 addition & 1 deletion tests/test_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def _assert_encoding(self, content_type, body, expected_encoding, expected_unico
else:
self.assertTrue(
body_unicode in expected_unicode,
"%s is not in %s" % (body_unicode, expected_unicode),
f"{body_unicode} is not in {expected_unicode}",
)

def test_content_type_and_conversion(self):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def test_missing_semicolon(self):
):
self.assertEqual(replace_entities(entity, encoding="cp1252"), result)
self.assertEqual(
replace_entities("x%sy" % entity, encoding="cp1252"), "x%sy" % result
replace_entities(f"x{entity}y", encoding="cp1252"), f"x{result}y"
)

def test_encoding(self):
Expand Down
18 changes: 5 additions & 13 deletions tests/test_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,12 +266,8 @@ def test_safe_url_idna_encoding_failure(self):

# DNS label too long
self.assertEqual(
safe_url_string(
"http://www.{label}.com/résumé?q=résumé".format(label="example" * 11)
),
"http://www.{label}.com/r%C3%A9sum%C3%A9?q=r%C3%A9sum%C3%A9".format(
label="example" * 11
),
safe_url_string(f"http://www.{'example' * 11}.com/résumé?q=résumé"),
f"http://www.{'example' * 11}.com/r%C3%A9sum%C3%A9?q=r%C3%A9sum%C3%A9",
)

def test_safe_url_port_number(self):
Expand Down Expand Up @@ -971,12 +967,8 @@ def test_canonicalize_url_idna_exceptions(self):

# DNS label too long
self.assertEqual(
canonicalize_url(
"http://www.{label}.com/résumé?q=résumé".format(label="example" * 11)
),
"http://www.{label}.com/r%C3%A9sum%C3%A9?q=r%C3%A9sum%C3%A9".format(
label="example" * 11
),
canonicalize_url(f"http://www.{'example' * 11}.com/résumé?q=résumé"),
f"http://www.{'example' * 11}.com/r%C3%A9sum%C3%A9?q=r%C3%A9sum%C3%A9",
)

def test_preserve_nonfragment_hash(self):
Expand Down Expand Up @@ -1033,7 +1025,7 @@ def test_bytes_uri(self):

def test_unicode_uri(self):
result = parse_data_uri("data:,é")
self.assertEqual(result.data, "é".encode("utf-8"))
self.assertEqual(result.data, "é".encode())

def test_default_mediatype(self):
result = parse_data_uri("data:;charset=iso-8859-7,%be%d3%be")
Expand Down
2 changes: 1 addition & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# and then run "tox" from this directory.

[tox]
envlist = py27, pypy, py35, py36, py37, py38, py39, py310, pypy3, docs, security, flake8, pylint, black
envlist = py36, py37, py38, py39, py310, pypy3, docs, security, flake8, pylint, black

[testenv]
deps =
Expand Down
12 changes: 5 additions & 7 deletions w3lib/encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@
Functions for handling encoding of web pages
"""
import re, codecs, encodings
from sys import version_info
from typing import Callable, Match, Optional, Tuple, Union, cast
from w3lib._types import AnyUnicodeError, StrOrBytes
from w3lib.util import to_native_str
import w3lib.util

_HEADER_ENCODING_RE = re.compile(r"charset=([\w-]+)", re.I)

Expand Down Expand Up @@ -46,6 +45,7 @@ def http_content_type_encoding(content_type: Optional[str]) -> Optional[str]:
_XML_ENCODING_RE = _TEMPLATE % ("encoding", r"(?P<xmlcharset>[\w-]+)")

# check for meta tags, or xml decl. and stop search if a body tag is encountered
# pylint: disable=consider-using-f-string
_BODY_ENCODING_PATTERN = (
r"<\s*(?:meta%s(?:(?:\s+%s|\s+%s){2}|\s+%s)|\?xml\s[^>]+%s|body)"
% (_SKIP_ATTRS, _HTTPEQUIV_RE, _CONTENT_RE, _CONTENT2_RE, _XML_ENCODING_RE)
Expand Down Expand Up @@ -93,7 +93,7 @@ def html_body_declared_encoding(html_body_str: StrOrBytes) -> Optional[str]:
or match.group("xmlcharset")
)
if encoding:
return resolve_encoding(to_native_str(encoding))
return resolve_encoding(w3lib.util.to_unicode(encoding))

return None

Expand Down Expand Up @@ -163,7 +163,7 @@ def resolve_encoding(encoding_alias: str) -> Optional[str]:
(codecs.BOM_UTF16_LE, "utf-16-le"),
(codecs.BOM_UTF8, "utf-8"),
]
_FIRST_CHARS = set(c[0] for (c, _) in _BOM_TABLE)
_FIRST_CHARS = {c[0] for (c, _) in _BOM_TABLE}


def read_bom(data: bytes) -> Union[Tuple[None, None], Tuple[str, bytes]]:
Expand Down Expand Up @@ -208,9 +208,7 @@ def to_unicode(data_str: bytes, encoding: str) -> str:
Characters that cannot be converted will be converted to ``\\ufffd`` (the
unicode replacement character).
"""
return data_str.decode(
encoding, "replace" if version_info[0:2] >= (3, 3) else "w3lib_replace"
)
return data_str.decode(encoding, "replace")


def html_to_unicode(
Expand Down
4 changes: 1 addition & 3 deletions w3lib/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,9 +228,7 @@ def remove_tags_with_content(

utext = to_unicode(text, encoding)
if which_ones:
tags = "|".join(
[r"<%s\b.*?</%s>|<%s\s*/>" % (tag, tag, tag) for tag in which_ones]
)
tags = "|".join([fr"<{tag}\b.*?</{tag}>|<{tag}\s*/>" for tag in which_ones])
retags = re.compile(tags, re.DOTALL | re.IGNORECASE)
utext = retags.sub("", utext)
return utext
Expand Down
4 changes: 2 additions & 2 deletions w3lib/http.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from base64 import urlsafe_b64encode
from typing import Any, List, MutableMapping, Optional, AnyStr, Sequence, Union, Mapping
from w3lib.util import to_bytes, to_native_str
from w3lib.util import to_bytes, to_unicode

HeadersDictInput = Mapping[bytes, Union[Any, Sequence]]
HeadersDictOutput = MutableMapping[bytes, List[bytes]]
Expand Down Expand Up @@ -97,7 +97,7 @@ def basic_auth_header(

"""

auth = "%s:%s" % (to_native_str(username), to_native_str(password))
auth = f"{to_unicode(username)}:{to_unicode(password)}"
# XXX: RFC 2617 doesn't define encoding, but ISO-8859-1
# seems to be the most widely used encoding here. See also:
# http://greenbytes.de/tech/webdav/draft-ietf-httpauth-basicauth-enc-latest.html
Expand Down
4 changes: 3 additions & 1 deletion w3lib/url.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ def path_to_file_uri(path: str) -> str:
x = pathname2url(os.path.abspath(path))
if os.name == "nt":
x = x.replace("|", ":") # http://bugs.python.org/issue5861
return "file:///%s" % x.lstrip("/")
return f"file:///{x.lstrip('/')}"


def file_uri_to_path(uri: str) -> str:
Expand All @@ -344,6 +344,7 @@ def any_to_uri(uri_or_path: str) -> str:
_char = set(map(chr, range(127)))

# RFC 2045 token.
# pylint: disable=consider-using-f-string
_token = r"[{}]+".format(
re.escape(
"".join(
Expand All @@ -359,6 +360,7 @@ def any_to_uri(uri_or_path: str) -> str:
)

# RFC 822 quoted-string, without surrounding quotation marks.
# pylint: disable=consider-using-f-string
_quoted_string = r"(?:[{}]|(?:\\[{}]))*".format(
re.escape("".join(_char - {'"', "\\", "\r"})), re.escape("".join(_char))
)
Expand Down