scrapy · Gallaecio · Nov 15, 2021 · Nov 9, 2021 · Nov 9, 2021 · Nov 9, 2021
diff --git a/setup.cfg b/setup.cfg
diff --git a/setup.py b/setup.py
@@ -13,6 +13,7 @@
     include_package_data=True,
     zip_safe=False,
     platforms=["Any"],
+    python_requires=">=3.6",
     classifiers=[
         "Development Status :: 5 - Production/Stable",
         "License :: OSI Approved :: BSD License",

diff --git a/tests/test_encoding.py b/tests/test_encoding.py
@@ -149,7 +149,7 @@ def _assert_encoding(self, content_type, body, expected_encoding, expected_unico
         else:
             self.assertTrue(
                 body_unicode in expected_unicode,
-                "%s is not in %s" % (body_unicode, expected_unicode),
+                f"{body_unicode} is not in {expected_unicode}",
             )
 
     def test_content_type_and_conversion(self):

diff --git a/tests/test_html.py b/tests/test_html.py
@@ -124,7 +124,7 @@ def test_missing_semicolon(self):
         ):
             self.assertEqual(replace_entities(entity, encoding="cp1252"), result)
             self.assertEqual(
-                replace_entities("x%sy" % entity, encoding="cp1252"), "x%sy" % result
+                replace_entities(f"x{entity}y", encoding="cp1252"), f"x{result}y"
             )
 
     def test_encoding(self):

diff --git a/tests/test_url.py b/tests/test_url.py
@@ -266,12 +266,8 @@ def test_safe_url_idna_encoding_failure(self):
 
         # DNS label too long
         self.assertEqual(
-            safe_url_string(
-                "http://www.{label}.com/résumé?q=résumé".format(label="example" * 11)
-            ),
-            "http://www.{label}.com/r%C3%A9sum%C3%A9?q=r%C3%A9sum%C3%A9".format(
-                label="example" * 11
-            ),
+            safe_url_string(f"http://www.{'example' * 11}.com/résumé?q=résumé"),
+            f"http://www.{'example' * 11}.com/r%C3%A9sum%C3%A9?q=r%C3%A9sum%C3%A9",
         )
 
     def test_safe_url_port_number(self):
@@ -971,12 +967,8 @@ def test_canonicalize_url_idna_exceptions(self):
 
         # DNS label too long
         self.assertEqual(
-            canonicalize_url(
-                "http://www.{label}.com/résumé?q=résumé".format(label="example" * 11)
-            ),
-            "http://www.{label}.com/r%C3%A9sum%C3%A9?q=r%C3%A9sum%C3%A9".format(
-                label="example" * 11
-            ),
+            canonicalize_url(f"http://www.{'example' * 11}.com/résumé?q=résumé"),
+            f"http://www.{'example' * 11}.com/r%C3%A9sum%C3%A9?q=r%C3%A9sum%C3%A9",
         )
 
     def test_preserve_nonfragment_hash(self):
@@ -1033,7 +1025,7 @@ def test_bytes_uri(self):
 
     def test_unicode_uri(self):
         result = parse_data_uri("data:,é")
-        self.assertEqual(result.data, "é".encode("utf-8"))
+        self.assertEqual(result.data, "é".encode())
 
     def test_default_mediatype(self):
         result = parse_data_uri("data:;charset=iso-8859-7,%be%d3%be")

diff --git a/tox.ini b/tox.ini
@@ -4,7 +4,7 @@
 # and then run "tox" from this directory.
 
 [tox]
-envlist = py27, pypy, py35, py36, py37, py38, py39, py310, pypy3, docs, security, flake8, pylint, black
+envlist = py36, py37, py38, py39, py310, pypy3, docs, security, flake8, pylint, black
 
 [testenv]
 deps =

diff --git a/w3lib/encoding.py b/w3lib/encoding.py
@@ -2,10 +2,9 @@
 Functions for handling encoding of web pages
 """
 import re, codecs, encodings
-from sys import version_info
 from typing import Callable, Match, Optional, Tuple, Union, cast
 from w3lib._types import AnyUnicodeError, StrOrBytes
-from w3lib.util import to_native_str
+import w3lib.util
 
 _HEADER_ENCODING_RE = re.compile(r"charset=([\w-]+)", re.I)
 
@@ -46,6 +45,7 @@ def http_content_type_encoding(content_type: Optional[str]) -> Optional[str]:
 _XML_ENCODING_RE = _TEMPLATE % ("encoding", r"(?P<xmlcharset>[\w-]+)")
 
 # check for meta tags, or xml decl. and stop search if a body tag is encountered
+# pylint: disable=consider-using-f-string
 _BODY_ENCODING_PATTERN = (
     r"<\s*(?:meta%s(?:(?:\s+%s|\s+%s){2}|\s+%s)|\?xml\s[^>]+%s|body)"
     % (_SKIP_ATTRS, _HTTPEQUIV_RE, _CONTENT_RE, _CONTENT2_RE, _XML_ENCODING_RE)
@@ -93,7 +93,7 @@ def html_body_declared_encoding(html_body_str: StrOrBytes) -> Optional[str]:
             or match.group("xmlcharset")
         )
         if encoding:
-            return resolve_encoding(to_native_str(encoding))
+            return resolve_encoding(w3lib.util.to_unicode(encoding))
 
     return None
 
@@ -163,7 +163,7 @@ def resolve_encoding(encoding_alias: str) -> Optional[str]:
     (codecs.BOM_UTF16_LE, "utf-16-le"),
     (codecs.BOM_UTF8, "utf-8"),
 ]
-_FIRST_CHARS = set(c[0] for (c, _) in _BOM_TABLE)
+_FIRST_CHARS = {c[0] for (c, _) in _BOM_TABLE}
 
 
 def read_bom(data: bytes) -> Union[Tuple[None, None], Tuple[str, bytes]]:
@@ -208,9 +208,7 @@ def to_unicode(data_str: bytes, encoding: str) -> str:
     Characters that cannot be converted will be converted to ``\\ufffd`` (the
     unicode replacement character).
     """
-    return data_str.decode(
-        encoding, "replace" if version_info[0:2] >= (3, 3) else "w3lib_replace"
-    )
+    return data_str.decode(encoding, "replace")
 
 
 def html_to_unicode(

diff --git a/w3lib/html.py b/w3lib/html.py
@@ -228,9 +228,7 @@ def remove_tags_with_content(
 
     utext = to_unicode(text, encoding)
     if which_ones:
-        tags = "|".join(
-            [r"<%s\b.*?</%s>|<%s\s*/>" % (tag, tag, tag) for tag in which_ones]
-        )
+        tags = "|".join([fr"<{tag}\b.*?</{tag}>|<{tag}\s*/>" for tag in which_ones])
         retags = re.compile(tags, re.DOTALL | re.IGNORECASE)
         utext = retags.sub("", utext)
     return utext

diff --git a/w3lib/http.py b/w3lib/http.py
@@ -1,6 +1,6 @@
 from base64 import urlsafe_b64encode
 from typing import Any, List, MutableMapping, Optional, AnyStr, Sequence, Union, Mapping
-from w3lib.util import to_bytes, to_native_str
+from w3lib.util import to_bytes, to_unicode
 
 HeadersDictInput = Mapping[bytes, Union[Any, Sequence]]
 HeadersDictOutput = MutableMapping[bytes, List[bytes]]
@@ -97,7 +97,7 @@ def basic_auth_header(
 
     """
 
-    auth = "%s:%s" % (to_native_str(username), to_native_str(password))
+    auth = f"{to_unicode(username)}:{to_unicode(password)}"
     # XXX: RFC 2617 doesn't define encoding, but ISO-8859-1
     # seems to be the most widely used encoding here. See also:
     # http://greenbytes.de/tech/webdav/draft-ietf-httpauth-basicauth-enc-latest.html

diff --git a/w3lib/url.py b/w3lib/url.py
@@ -319,7 +319,7 @@ def path_to_file_uri(path: str) -> str:
     x = pathname2url(os.path.abspath(path))
     if os.name == "nt":
         x = x.replace("|", ":")  # http://bugs.python.org/issue5861
-    return "file:///%s" % x.lstrip("/")
+    return f"file:///{x.lstrip('/')}"
 
 
 def file_uri_to_path(uri: str) -> str:
@@ -344,6 +344,7 @@ def any_to_uri(uri_or_path: str) -> str:
 _char = set(map(chr, range(127)))
 
 # RFC 2045 token.
+# pylint: disable=consider-using-f-string
 _token = r"[{}]+".format(
     re.escape(
         "".join(
@@ -359,6 +360,7 @@ def any_to_uri(uri_or_path: str) -> str:
 )
 
 # RFC 822 quoted-string, without surrounding quotation marks.
+# pylint: disable=consider-using-f-string
 _quoted_string = r"(?:[{}]|(?:\\[{}]))*".format(
     re.escape("".join(_char - {'"', "\\", "\r"})), re.escape("".join(_char))
 )