enable pyupgrade rules

GateNLP · Jul 10, 2024 · b9fe3ee · b9fe3ee
1 parent 325f167
commit b9fe3ee
Show file tree

Hide file tree

Showing 17 changed files with 60 additions and 52 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -64,7 +64,8 @@ select = [
     "PD",
     "I",
     "PT",
-    "B"
+    "B",
+    "UP",
 ]
 
 ignore = [

diff --git a/src/wpextract/dl/exporter.py b/src/wpextract/dl/exporter.py
@@ -2,7 +2,6 @@
 import html
 import json
 import os
-from typing import List
 from urllib import parse as urlparse
 
 from tqdm.auto import tqdm
@@ -18,7 +17,7 @@ class Exporter:
 
     @staticmethod
     def download_media(
-        session: RequestSession, media: List[str], output_folder: str
+        session: RequestSession, media: list[str], output_folder: str
     ) -> int:
         """Downloads the media files based on the given URLs.
 
@@ -139,7 +138,7 @@ def write_file(filename, data):
 
     @staticmethod
     def export_posts(
-        posts: List[dict],
+        posts: list[dict],
         filename: str,
     ):
         """Exports posts to the specified file.

diff --git a/src/wpextract/dl/requestsession.py b/src/wpextract/dl/requestsession.py
@@ -2,7 +2,7 @@
 import random
 import time
 from http.cookies import SimpleCookie
-from typing import Tuple, Union
+from typing import Union
 
 import requests
 from requests.adapters import HTTPAdapter
@@ -14,21 +14,25 @@
 
 class ConnectionCouldNotResolve(Exception):
     """The remote host could not be resolved."""
+
     pass
 
 
 class ConnectionReset(Exception):
     """The connection was reset during the request."""
+
     pass
 
 
 class ConnectionRefused(Exception):
     """The connection was refused by the server."""
+
     pass
 
 
 class ConnectionTimeout(Exception):
     """A connection timeout occurred."""
+
     pass
 
 
@@ -38,6 +42,7 @@ class HTTPError400(Exception):
     See Also:
         HTTPErrorInvalidPage for a special case of this error.
     """
+
     pass
 
 
@@ -46,41 +51,49 @@ class HTTPErrorInvalidPage(Exception):
 
     This indicates the last page has been passed and all items have been retrieved.
     """
+
     pass
 
 
 class HTTPError401(Exception):
     """HTTP Unauthorized."""
+
     pass
 
 
 class HTTPError403(Exception):
     """HTTP Forbidden."""
+
     pass
 
 
 class HTTPError404(Exception):
     """HTTP Not Found."""
+
     pass
 
 
 class HTTPError500(Exception):
     """HTTP Internal Server Error."""
+
     pass
 
 
 class HTTPError502(Exception):
     """HTTP Bad Gateway."""
+
     pass
 
 
 class HTTPError(Exception):
     """A generic HTTP error with an unexpected code."""
+
     pass
 
 
 class HTTPTooManyRedirects(Exception):
     """Raised if the number of allowed redirects exceeds the configured maximum value."""
+
     pass
 
 
@@ -172,11 +185,12 @@ def wait(self):
         time.sleep(self.wait_s * wait_factor)
 
 
-AuthorizationType = Union[Tuple[str, str], HTTPBasicAuth, HTTPDigestAuth]
+AuthorizationType = Union[tuple[str, str], HTTPBasicAuth, HTTPDigestAuth]
 
 
 class RequestSession:
     """Manages HTTP requests and their behaviour."""
+
     def __init__(
         self,
         proxy: str = None,
@@ -253,13 +267,13 @@ def do_request(self, method, url, data=None, stream=False):
                 )
         except requests.ConnectionError as e:
             if "Errno -5" in str(e) or "Errno -2" in str(e) or "Errno -3" in str(e):
-                logging.error("Could not resolve host %s" % url)
+                logging.error(f"Could not resolve host {url}")
                 raise ConnectionCouldNotResolve from e
             elif "Errno 111" in str(e):
-                logging.error("Connection refused by %s" % url)
+                logging.error(f"Connection refused by {url}")
                 raise ConnectionRefused from e
             elif "RemoteDisconnected" in str(e):
-                logging.error("Connection reset by %s" % url)
+                logging.error(f"Connection reset by {url}")
                 raise ConnectionReset from e
             else:
                 print(e)

diff --git a/src/wpextract/downloader.py b/src/wpextract/downloader.py
@@ -1,6 +1,6 @@
 import logging
 from pathlib import Path
-from typing import List, Optional
+from typing import Optional
 
 from wpextract.dl.exceptions import WordPressApiNotV2
 from wpextract.dl.exporter import Exporter
@@ -15,7 +15,7 @@ def __init__(
         self,
         target: str,
         out_path: Path,
-        data_types: List[str],
+        data_types: list[str],
         session: Optional[RequestSession] = None,
         json_prefix: Optional[str] = None,
     ):
@@ -136,7 +136,7 @@ def _list_obj(self, obj_type, start=None, limit=None, cache=True):
             )
         except WordPressApiNotV2:
             logging.error("The API does not support WP V2")
-        except IOError as e:
+        except OSError as e:
             logging.error(f"Could not open {e.filename} for writing")
         print()
 

diff --git a/src/wpextract/extract.py b/src/wpextract/extract.py
@@ -1,6 +1,6 @@
 import logging
 from pathlib import Path
-from typing import Dict, Optional
+from typing import Optional
 
 from pandas import DataFrame
 
@@ -46,7 +46,7 @@ class WPExtractor:
     pages: Optional[DataFrame]
     """DataFrame of extracted pages."""
 
-    scrape_url_mapping: Dict[str, Path]
+    scrape_url_mapping: dict[str, Path]
 
     def __init__(
         self,

diff --git a/src/wpextract/extractors/data/images.py b/src/wpextract/extractors/data/images.py
@@ -1,7 +1,7 @@
 import logging
 import re
 from dataclasses import dataclass
-from typing import List, Optional
+from typing import Optional
 
 from wpextract.extractors.data.links import Linkable, LinkRegistry
 
@@ -56,8 +56,8 @@ def resolve_image(
 
 
 def resolve_images(
-    registry: LinkRegistry, images: List[ResolvableMediaUse]
-) -> List[ResolvableMediaUse]:
+    registry: LinkRegistry, images: list[ResolvableMediaUse]
+) -> list[ResolvableMediaUse]:
     """Resolve the internal links of a list of media uses.
 
     Args:

diff --git a/src/wpextract/extractors/data/link_resolver.py b/src/wpextract/extractors/data/link_resolver.py
@@ -1,5 +1,5 @@
 import logging
-from typing import List, Optional
+from typing import Optional
 from urllib.parse import urlparse, urlunparse
 
 from wpextract.extractors.data.links import LinkRegistry, ResolvableLink
@@ -66,8 +66,8 @@ def resolve_link(
 
 
 def resolve_links(
-    registry: LinkRegistry, links: List[ResolvableLink]
-) -> List[ResolvableLink]:
+    registry: LinkRegistry, links: list[ResolvableLink]
+) -> list[ResolvableLink]:
     """Resolve a list of links against the link registry.
 
     Args:

diff --git a/src/wpextract/extractors/data/links.py b/src/wpextract/extractors/data/links.py
@@ -1,5 +1,5 @@
 from dataclasses import dataclass
-from typing import Dict, List, Optional
+from typing import Optional
 
 
 @dataclass
@@ -29,8 +29,8 @@ class ResolvableLink(Link):
 class LinkRegistry:
     """A collection of all known links on the site."""
 
-    links: List[Linkable]
-    url_index_cache: Dict[str, int]
+    links: list[Linkable]
+    url_index_cache: dict[str, int]
 
     def __init__(self):
         """Init a new registry."""
@@ -59,7 +59,7 @@ def add_linkable(
         if _refresh_cache:
             self._refresh_cache()
 
-    def add_linkables(self, data_type: str, links: List[str], idxes: List[str]) -> None:
+    def add_linkables(self, data_type: str, links: list[str], idxes: list[str]) -> None:
         """Add multiple linkable items at once.
 
         Args:

diff --git a/src/wpextract/extractors/io.py b/src/wpextract/extractors/io.py
@@ -2,7 +2,7 @@
 import json
 import logging
 from pathlib import Path
-from typing import Any, List, Optional
+from typing import Any, Optional
 
 import numpy as np
 import pandas as pd
@@ -23,7 +23,7 @@ def load_from_path(path: Path) -> Optional[dict]:
     if not path.is_file():
         return None
 
-    with open(path, "r") as f:
+    with open(path) as f:
         return json.load(f)
 
 
@@ -53,7 +53,7 @@ def load_df(path: Path, index_col: str = "id") -> Optional[pd.DataFrame]:
     return pd.json_normalize(data_raw).set_index(index_col)
 
 
-def _set_nested_keys(row_dict: dict, split_key: List[str], val: Any):
+def _set_nested_keys(row_dict: dict, split_key: list[str], val: Any):
     """Set a value in the dictionary with nested keys.
 
     Args:

diff --git a/src/wpextract/extractors/posts.py b/src/wpextract/extractors/posts.py
@@ -1,6 +1,6 @@
 import logging
 from pathlib import Path
-from typing import Dict, Optional
+from typing import Optional
 
 import pandas as pd
 from pandas import DataFrame
@@ -57,7 +57,7 @@
 def load_posts(
     path: Path,
     link_registry: LinkRegistry,
-    scrape_urls_files: Dict[str, Path],
+    scrape_urls_files: dict[str, Path],
     translation_pickers: Optional[PickerListType] = None,
 ) -> Optional[pd.DataFrame]:
     """Load the posts from a JSON file.

diff --git a/src/wpextract/parse/content.py b/src/wpextract/parse/content.py
@@ -1,6 +1,5 @@
 import copy
 import logging
-from typing import List, Tuple
 from urllib.parse import urljoin, urlparse, urlunparse
 
 import pandas as pd
@@ -15,13 +14,13 @@
 NEWLINE_TAGS = {"br", "p"}
 
 
-InternalLinks = List[ResolvableLink]
-ExternalLinks = List[Link]
+InternalLinks = list[ResolvableLink]
+ExternalLinks = list[Link]
 
 
 def extract_links(
     doc: BeautifulSoup, self_link: str
-) -> Tuple[InternalLinks, ExternalLinks]:
+) -> tuple[InternalLinks, ExternalLinks]:
     """Get the internal and external links of the document.
 
     Args:
@@ -60,7 +59,7 @@ def extract_links(
     return internal_links, external_links
 
 
-Embeds = List[str]
+Embeds = list[str]
 
 
 def extract_embeds(doc: BeautifulSoup) -> Embeds:
@@ -75,7 +74,7 @@ def extract_embeds(doc: BeautifulSoup) -> Embeds:
     return [iframe["src"] for iframe in doc.find_all("iframe")]
 
 
-Images = List[MediaUse]
+Images = list[MediaUse]
 
 
 def extract_images(doc: BeautifulSoup, self_link: str) -> Images:

diff --git a/src/wpextract/parse/translations/_extractor.py b/src/wpextract/parse/translations/_extractor.py
@@ -1,13 +1,13 @@
 import logging
-from typing import List, Optional, Type
+from typing import Optional
 
 import pandas as pd
 from bs4 import BeautifulSoup
 
 import wpextract.parse.translations._pickers as pickers
 
 PICKERS = [pickers.Polylang, pickers.GenericLangSwitcher]
-PickerListType = List[Type[pickers.LangPicker]]
+PickerListType = list[type[pickers.LangPicker]]
 
 PageTranslationData = pd.Series
 

diff --git a/src/wpextract/parse/translations/_pickers.py b/src/wpextract/parse/translations/_pickers.py
@@ -1,5 +1,4 @@
 from abc import ABC, abstractmethod
-from typing import List
 
 from bs4 import BeautifulSoup, PageElement, Tag
 from langcodes import Language
@@ -18,7 +17,7 @@ class LangPicker(ABC):
     """The document to extract the language picker from."""
     root_el: Tag
     """The root element of the language picker, populated if [`LangPicker.matches`][wpextract.parse.translations.LangPicker.matches] is succesful."""
-    translations: List[TranslationLink]
+    translations: list[TranslationLink]
     """A list of translation links, populated by calling [`LangPicker.add_translation`][wpextract.parse.translations.LangPicker.add_translation] within [`LangPicker.extract`][wpextract.parse.translations.LangPicker.extract]."""
     current_language: Language
     """The current language of the page, populated by calling [`LangPicker.set_current_lang`][wpextract.parse.translations.LangPicker.set_current_lang] within [`LangPicker.extract`][wpextract.parse.translations.LangPicker.extract]."""
-Original file line number
+Diff line change
@@ Expand Up / @@ -64,7 +64,8 @@ select = [ @@
         "PD",
         "I",
         "PT",
-        "B"
+        "B",
+        "UP",
     ]
     ignore = [
@@ Expand Down @@