diff --git a/pyproject.toml b/pyproject.toml
index 782b569..bb7252d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -54,7 +54,10 @@ ignore = [
]
[tool.ruff.lint.per-file-ignores]
-"**/tests/*" = ["D103"] # Ignore method docstring errors in tests
+"**/tests/*" = [
+ "D103", # Ignore method docstring errors in tests
+ "PD901", # Allow `df` variable name in tests
+]
[tool.ruff.lint.pydocstyle]
convention = "google"
diff --git a/src/extractor/extractors/io.py b/src/extractor/extractors/io.py
index 6218520..1cd57b0 100644
--- a/src/extractor/extractors/io.py
+++ b/src/extractor/extractors/io.py
@@ -110,13 +110,13 @@ def _remove_nan(value: Any) -> Any:
Returns:
The value with NA and NaN replaced.
"""
- if type(value) == dict:
+ if isinstance(value, dict):
for dkey, dvalue in value.items():
value[dkey] = _remove_nan(dvalue)
- elif type(value) == list:
+ elif isinstance(value, list):
for i, item in enumerate(value):
value[i] = _remove_nan(item)
- elif type(value) == float and np.isnan(value):
+ elif isinstance(value, float) and np.isnan(value):
value = None
elif value is pd.NA:
value = None
diff --git a/src/extractor/extractors/posts.py b/src/extractor/extractors/posts.py
index 75af64a..a00fc12 100644
--- a/src/extractor/extractors/posts.py
+++ b/src/extractor/extractors/posts.py
@@ -87,7 +87,7 @@ def load_posts(
# Get the "url" property if there is an image
posts_df["og_image_url"] = posts_df["yoast_head_json.og_image"].apply(
lambda image: image[0]["url"]
- if type(image) != float and len(image) > 0
+ if not isinstance(image, float) and len(image) > 0
else None
)
diff --git a/src/extractor/parse/translations/_pickers.py b/src/extractor/parse/translations/_pickers.py
index f9532b5..3b19c63 100644
--- a/src/extractor/parse/translations/_pickers.py
+++ b/src/extractor/parse/translations/_pickers.py
@@ -84,8 +84,9 @@ def add_translation(self, href: str, lang: str) -> None:
class Polylang(LangPicker):
- """Language picker from the plugin \
- `Polylang `.
+ """Language picker from the plugin Polylang.
+
+ `WordPress plugin page `.
Has the structure::
diff --git a/tests/extractors/test_media.py b/tests/extractors/test_media.py
index c914c85..a105362 100644
--- a/tests/extractors/test_media.py
+++ b/tests/extractors/test_media.py
@@ -20,8 +20,8 @@ def media_df(media_df_and_registry):
def test_media_times(media_df):
media_1 = media_df.loc[1]
- assert type(media_1.date_gmt) == pd.Timestamp
- assert type(media_1.modified_gmt) == pd.Timestamp
+ assert isinstance(media_1.date_gmt, pd.Timestamp)
+ assert isinstance(media_1.modified_gmt, pd.Timestamp)
assert media_1.date_gmt.tzinfo is None, "date_gmt had timezone information"
assert media_1.modified_gmt.tzinfo is None, "modified_gmt had timezone information"
diff --git a/tests/extractors/test_pages.py b/tests/extractors/test_pages.py
index eba8e2a..cc098b8 100644
--- a/tests/extractors/test_pages.py
+++ b/tests/extractors/test_pages.py
@@ -27,8 +27,8 @@ def test_equals_expected(datadir, pages_df):
def test_post_times(pages_df):
post_1 = pages_df.loc[1]
- assert type(post_1.date_gmt) == pd.Timestamp
- assert type(post_1.modified_gmt) == pd.Timestamp
+ assert isinstance(post_1.date_gmt, pd.Timestamp)
+ assert isinstance(post_1.modified_gmt, pd.Timestamp)
assert post_1.date_gmt.tzinfo is None, "date_gmt had timezone information"
assert post_1.modified_gmt.tzinfo is None, "modified_gmt had timezone information"
diff --git a/tests/extractors/test_posts.py b/tests/extractors/test_posts.py
index 829980a..bed65fc 100644
--- a/tests/extractors/test_posts.py
+++ b/tests/extractors/test_posts.py
@@ -86,8 +86,8 @@ def posts_df(posts_df_and_registry):
def test_post_times(posts_df):
post_1 = posts_df.loc[1]
- assert type(post_1.date_gmt) == pd.Timestamp
- assert type(post_1.modified_gmt) == pd.Timestamp
+ assert isinstance(post_1.date_gmt, pd.Timestamp)
+ assert isinstance(post_1.modified_gmt, pd.Timestamp)
assert post_1.date_gmt.tzinfo is None, "date_gmt had timezone information"
assert post_1.modified_gmt.tzinfo is None, "modified_gmt had timezone information"
@@ -172,7 +172,7 @@ def test_translations_bidirectional(posts_df_and_registry):
posts_df, registry = posts_df_and_registry
posts_df = resolve_post_translations(registry, posts_df)
# Currently 1 <-> 2, let's remove 1 <- 2
- posts_df.at[2, "translations"] = []
+ posts_df.loc[2, "translations"] = []
posts_df = ensure_translations_undirected(posts_df)