fix new ruff errors

crocs-muni · Nov 10, 2023 · 4a316ad · 4a316ad
1 parent a6aeca3
commit 4a316ad
Show file tree

Hide file tree

Showing 12 changed files with 49 additions and 47 deletions.
diff --git a/notebooks/cc/cert_id_eval.ipynb b/notebooks/cc/cert_id_eval.ipynb
@@ -39,7 +39,7 @@
     "from sec_certs.dataset import CCDataset\n",
     "from sec_certs.cert_rules import cc_rules\n",
     "import csv\n",
-    "import pandas as pd"
+    "import pandas as pd\n"
    ]
   },
   {
@@ -51,7 +51,7 @@
     "num_schemes = len(cc_rules[\"cc_cert_id\"])\n",
     "num_scheme_rules = sum(len(rules) for rules in cc_rules[\"cc_cert_id\"].values())\n",
     "print(f\"\\\\newcommand{{\\\\numccschemes}}{{{num_schemes}}}\")\n",
-    "print(f\"\\\\newcommand{{\\\\numccschemeidrules}}{{{num_scheme_rules}}}\")"
+    "print(f\"\\\\newcommand{{\\\\numccschemeidrules}}{{{num_scheme_rules}}}\")\n"
    ]
   },
   {
@@ -64,7 +64,7 @@
    },
    "outputs": [],
    "source": [
-    "dset = CCDataset.from_web_latest()"
+    "dset = CCDataset.from_web_latest()\n"
    ]
   },
   {
@@ -75,7 +75,7 @@
    "source": [
     "num_ids = len(list(filter(lambda cert: cert.heuristics.cert_id, dset)))\n",
     "print(f\"\\\\newcommand{{\\\\numcccerts}}{{{len(dset)}}}\")\n",
-    "print(f\"\\\\newcommand{{\\\\numccids}}{{{num_ids}}}\")"
+    "print(f\"\\\\newcommand{{\\\\numccids}}{{{num_ids}}}\")\n"
    ]
   },
   {
@@ -107,7 +107,7 @@
     "    if not cert.heuristics.cert_id:\n",
     "        missing_id_dgsts.add(cert.dgst)\n",
     "        missing_id.append((cert.dgst, cert.scheme))\n",
-    "pd.DataFrame(missing_id, columns=[\"id\", \"scheme\"])"
+    "pd.DataFrame(missing_id, columns=[\"id\", \"scheme\"])\n"
    ]
   },
   {
@@ -126,7 +126,7 @@
     "missing_manual = pd.read_csv(\"../../data/cert_id_eval/missing_ids.csv\")\n",
     "print(set(missing_manual.id) == missing_id_dgsts)\n",
     "print(set(missing_manual.id).difference(missing_id_dgsts))\n",
-    "print(set(missing_id_dgsts).difference(missing_manual.id))"
+    "print(set(missing_id_dgsts).difference(missing_manual.id))\n"
    ]
   },
   {
@@ -144,7 +144,7 @@
     "num_missing_manual_unfixable = num_missing_manual - num_missing_manual_fixable\n",
     "print(f\"\\\\newcommand{{\\\\numccmissingid}}{{{num_missing_manual}}}\")\n",
     "print(f\"\\\\newcommand{{\\\\numccmissingidfixable}}{{{num_missing_manual_fixable}}}\")\n",
-    "print(f\"\\\\newcommand{{\\\\numccmissingidunfixable}}{{{num_missing_manual_unfixable}}}\")"
+    "print(f\"\\\\newcommand{{\\\\numccmissingidunfixable}}{{{num_missing_manual_unfixable}}}\")\n"
    ]
   },
   {
@@ -153,7 +153,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "missing_manual.loc[missing_manual.cert_id.isnull()].reason.value_counts()"
+    "missing_manual.loc[missing_manual.cert_id.isnull()].reason.value_counts()\n"
    ]
   },
   {
@@ -162,7 +162,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "missing_manual.loc[missing_manual.cert_id.notnull()].reason.value_counts()"
+    "missing_manual.loc[missing_manual.cert_id.notnull()].reason.value_counts()\n"
    ]
   },
   {
@@ -195,7 +195,7 @@
     "for idd, entries in id_mapping.items():\n",
     "    if len(entries) > 1 and idd:\n",
     "        print(idd, entries)\n",
-    "        duplicate_id_dgsts.update(entries)"
+    "        duplicate_id_dgsts.update(entries)\n"
    ]
   },
   {
@@ -235,9 +235,9 @@
     "        print(hash, entries)\n",
     "        for entry in entries:\n",
     "            duplicate_doc_dgsts.add(entry)\n",
-    "            \n",
+    "\n",
     "duplicate_ids_due_doc = duplicate_doc_dgsts.intersection(duplicate_id_dgsts)\n",
-    "duplicate_ids_issue = duplicate_id_dgsts.difference(duplicate_doc_dgsts)"
+    "duplicate_ids_issue = duplicate_id_dgsts.difference(duplicate_doc_dgsts)\n"
    ]
   },
   {
@@ -262,7 +262,7 @@
    "outputs": [],
    "source": [
     "for id in duplicate_ids_issue:\n",
-    "    print(id, dset[id].heuristics.cert_id)"
+    "    print(id, dset[id].heuristics.cert_id)\n"
    ]
   },
   {
@@ -273,7 +273,7 @@
    "source": [
     "print(f\"\\\\newcommand{{\\\\numccduplicateid}}{{{len(duplicate_id_dgsts)}}}\")\n",
     "print(f\"\\\\newcommand{{\\\\numccduplicateidcolission}}{{{len(duplicate_ids_due_doc)}}}\")\n",
-    "print(f\"\\\\newcommand{{\\\\numccduplicateidissue}}{{{len(duplicate_ids_issue)}}}\")"
+    "print(f\"\\\\newcommand{{\\\\numccduplicateidissue}}{{{len(duplicate_ids_issue)}}}\")\n"
    ]
   },
   {
@@ -300,7 +300,7 @@
     "duplicate_manual = pd.read_csv(\"../../data/cert_id_eval/duplicate_ids.csv\")\n",
     "print(set(duplicate_manual.id) == duplicate_id_dgsts)\n",
     "print(set(duplicate_manual.id).difference(duplicate_id_dgsts))\n",
-    "print(set(duplicate_id_dgsts).difference(duplicate_manual.id))"
+    "print(set(duplicate_id_dgsts).difference(duplicate_manual.id))\n"
    ]
   },
   {
@@ -309,7 +309,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "duplicate_manual[duplicate_manual.result == \"tp\"].reason.value_counts()"
+    "duplicate_manual[duplicate_manual.result == \"tp\"].reason.value_counts()\n"
    ]
   },
   {
@@ -318,7 +318,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "duplicate_manual[duplicate_manual.result == \"fp\"].reason.value_counts()"
+    "duplicate_manual[duplicate_manual.result == \"fp\"].reason.value_counts()\n"
    ]
   },
   {
@@ -371,7 +371,7 @@
     "                impossible.add(line[\"id\"])\n",
     "        else:\n",
     "            correct.add(line[\"id\"])\n",
-    "print(len(correct), len(possible), len(impossible))"
+    "print(len(correct), len(possible), len(impossible))\n"
    ]
   },
   {
@@ -395,7 +395,7 @@
     "            incorrect.add(line[\"id\"])\n",
     "        else:\n",
     "            correct.add(line[\"id\"])\n",
-    "print(len(correct), len(incorrect))"
+    "print(len(correct), len(incorrect))\n"
    ]
   },
   {
@@ -410,7 +410,7 @@
    "source": [
     "print(f\"\\\\newcommand{{\\\\numccideval}}{{{len(correct) + len(incorrect)}}}\")\n",
     "print(f\"\\\\newcommand{{\\\\numccidevalcorrect}}{{{len(correct)}}}\")\n",
-    "print(f\"\\\\newcommand{{\\\\numccidevalincorrect}}{{{len(incorrect)}}}\")"
+    "print(f\"\\\\newcommand{{\\\\numccidevalincorrect}}{{{len(incorrect)}}}\")\n"
    ]
   },
   {
@@ -426,7 +426,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "manual_references = pd.read_csv(\"../../data/cert_id_eval/random_references.csv\")"
+    "manual_references = pd.read_csv(\"../../data/cert_id_eval/random_references.csv\")\n"
    ]
   },
   {
@@ -438,7 +438,7 @@
     "print(\"The referenced cert is a...\")\n",
     "print(manual_references[manual_references.reason != \"self\"].reason.value_counts())\n",
     "print(\"... in the current cert.\")\n",
-    "print(\"Total refs:\", sum(manual_references.reason != \"self\"))"
+    "print(\"Total refs:\", sum(manual_references.reason != \"self\"))\n"
    ]
   },
   {
@@ -456,7 +456,7 @@
     "print(f\"\\\\newcommand{{\\\\numCcRefEvalPrevVersion}}{{{sum(manual_references.reason == 'previous version')}}}\")\n",
     "\n",
     "print(f\"\\\\newcommand{{\\\\numCcRefEvalInReport}}{{{sum((manual_references.location == 'report') & (manual_references.reason != 'self'))}}}\")\n",
-    "print(f\"\\\\newcommand{{\\\\numCcRefEvalInTarget}}{{{sum((manual_references.location == 'target') & (manual_references.reason != 'self'))}}}\")"
+    "print(f\"\\\\newcommand{{\\\\numCcRefEvalInTarget}}{{{sum((manual_references.location == 'target') & (manual_references.reason != 'self'))}}}\")\n"
    ]
   }
  ],
@@ -476,7 +476,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.3"
+   "version": "3.10.13"
   },
   "vscode": {
    "interpreter": {

diff --git a/src/sec_certs/dataset/dataset.py b/src/sec_certs/dataset/dataset.py
@@ -502,7 +502,7 @@ def to_label_studio_json(self, output_path: str | Path) -> None:
             candidates = [cpe_dset[x].title for x in cert.heuristics.cpe_matches]
             candidates += ["No good match"] * (config.cpe_n_max_matches - len(candidates))
             options = ["option_" + str(x) for x in range(1, config.cpe_n_max_matches)]
-            dct.update({o: c for o, c in zip(options, candidates)})
+            dct.update(dict(zip(options, candidates)))
             lst.append(dct)
 
         with Path(output_path).open("w") as handle:
@@ -634,6 +634,6 @@ def update_with_certs(self, certs: list[CertSubType]) -> None:
         Enriches the dataset with `certs`
         :param List[Certificate] certs: new certs to include into the dataset.
         """
-        if any([x not in self for x in certs]):
+        if any(x not in self for x in certs):
             logger.warning("Updating dataset with certificates outside of the dataset!")
         self.certs.update({x.dgst: x for x in certs})
diff --git a/src/sec_certs/dataset/fips_algorithm.py b/src/sec_certs/dataset/fips_algorithm.py
@@ -82,7 +82,7 @@ def download_alg_list_htmls(output_dir: Path) -> list[Path]:
         if failed_tuples:
             failed_urls, failed_paths = zip(*failed_tuples)
             responses = helpers.download_parallel(failed_urls, failed_paths)
-            if any([x != constants.RESPONSE_OK for x in responses]):
+            if any(x != constants.RESPONSE_OK for x in responses):
                 raise ValueError("Failed to download the algorithms HTML data, the dataset won't be constructed.")
 
         return paths

diff --git a/src/sec_certs/model/matching.py b/src/sec_certs/model/matching.py
@@ -28,7 +28,7 @@ def _compute_match(self, one: str, other: str) -> float:
         )
 
     @staticmethod
-    def _match_certs(matchers: Sequence["AbstractMatcher"], certs: list[CertSubType], threshold: float):
+    def _match_certs(matchers: Sequence[AbstractMatcher], certs: list[CertSubType], threshold: float):
         scores: list[tuple[float, int, int]] = []
         matched_is: set[int] = set()
         matched_js: set[int] = set()

diff --git a/src/sec_certs/sample/cc.py b/src/sec_certs/sample/cc.py
@@ -247,7 +247,7 @@ class PdfData(BasePdfData, ComplexSerializableType):
         st_filename: str | None = field(default=None)
 
         def __bool__(self) -> bool:
-            return any([x is not None for x in vars(self)])
+            return any(x is not None for x in vars(self))
 
         @property
         def bsi_data(self) -> dict[str, Any] | None:
@@ -612,13 +612,12 @@ def merge(self, other: CCCertificate, other_source: str | None = None) -> None:
             )
 
         for att, val in vars(self).items():
-            if not val:
-                setattr(self, att, getattr(other, att))
-            elif other_source == "html" and att == "protection_profiles":
-                setattr(self, att, getattr(other, att))
-            elif other_source == "html" and att == "maintenance_updates":
-                setattr(self, att, getattr(other, att))
-            elif att == "state":
+            if (
+                (not val)
+                or (other_source == "html" and att == "protection_profiles")
+                or (other_source == "html" and att == "maintenance_updates")
+                or (att == "state")
+            ):
                 setattr(self, att, getattr(other, att))
             else:
                 if getattr(self, att) != getattr(other, att):

diff --git a/src/sec_certs/sample/fips.py b/src/sec_certs/sample/fips.py
@@ -64,7 +64,7 @@ def parse_single_detail_entry(key, entry):
         )
         entries = [(FIPSHTMLParser.normalize_string(key.text), entry) for key, entry in entries]
         entries = [parse_single_detail_entry(*x) for x in entries if x[0] in DETAILS_KEY_NORMALIZATION_DICT]
-        entries = {x: y for x, y in entries}
+        entries = dict(entries)
 
         if "caveat" in entries:
             entries["mentioned_certs"] = FIPSHTMLParser.get_mentioned_certs_from_caveat(entries["caveat"])

diff --git a/src/sec_certs/sample/sar.py b/src/sec_certs/sample/sar.py
@@ -50,7 +50,7 @@ def contains_level(string: str) -> bool:
     @staticmethod
     def matches_re(string: str) -> bool:
         return any(
-            [re.match(sar_class + "(?:_[A-Z]{3,4}){1,2}(?:\\.[0-9]){0,2}", string) for sar_class in SAR_CLASS_MAPPING]
+            re.match(sar_class + "(?:_[A-Z]{3,4}){1,2}(?:\\.[0-9]){0,2}", string) for sar_class in SAR_CLASS_MAPPING
         )
 
     def __lt__(self, other: Any) -> bool:

diff --git a/src/sec_certs/utils/extract.py b/src/sec_certs/utils/extract.py
@@ -353,7 +353,7 @@ def search_only_headers_bsi(filepath: Path):  # noqa: C901
 
             for m in re.finditer(rule_and_sep, whole_text):
                 # check if previous rules had at least one match
-                if constants.TAG_CERT_ID not in items_found.keys():
+                if constants.TAG_CERT_ID not in items_found:
                     logger.error(f"ERROR: front page not found for file: {filepath}")
 
                 match_groups = m.groups()

diff --git a/src/sec_certs/utils/helpers.py b/src/sec_certs/utils/helpers.py
@@ -28,7 +28,11 @@ def download_file(
         time.sleep(delay)
         # See https://github.com/psf/requests/issues/3953 for header justification
         r = requests.get(
-            url, allow_redirects=True, timeout=constants.REQUEST_TIMEOUT, stream=True, headers={"Accept-Encoding": None}  # type: ignore
+            url,
+            allow_redirects=True,
+            timeout=constants.REQUEST_TIMEOUT,
+            stream=True,
+            headers={"Accept-Encoding": None},  # type: ignore
         )
         ctx: Any
         if show_progress_bar:

diff --git a/src/sec_certs/utils/pandas.py b/src/sec_certs/utils/pandas.py
@@ -141,7 +141,7 @@ def get_sar_level_from_set(sars: set[SAR], sar_family: str) -> int | None:
     Given a set of SARs and a family name, will return level of the seeked SAR from the set.
     """
     family_sars_dict = {x.family: x for x in sars} if (sars and not pd.isnull(sars)) else {}
-    if sar_family not in family_sars_dict.keys():
+    if sar_family not in family_sars_dict:
         return None
     return family_sars_dict[sar_family].level
 
@@ -211,7 +211,7 @@ def compute_cve_correlations(
     tuples = list(
         zip(n_cves_corrs, n_cves_pvalues, worst_cve_corrs, worst_cve_pvalues, avg_cve_corrs, avg_cve_pvalues, supports)
     )
-    dct = {family: correlations for family, correlations in zip(["eal"] + families, tuples)}
+    dct = dict(zip(["eal"] + families, tuples))
     df_corr = pd.DataFrame.from_dict(
         dct,
         orient="index",

diff --git a/src/sec_certs/utils/pdf.py b/src/sec_certs/utils/pdf.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import glob
 import logging
 import subprocess
 from datetime import datetime, timedelta, timezone
@@ -54,7 +53,7 @@ def ocr_pdf_file(pdf_path: Path) -> str:
         if ppm.returncode != 0:
             raise ValueError(f"pdftoppm failed: {ppm.returncode}")
 
-        for ppm_path in map(Path, glob.glob(str(tmppath / "image*.ppm"))):
+        for ppm_path in tmppath.rglob("image*.ppm"):
             base = ppm_path.with_suffix("")
             content = pytesseract.image_to_string(Image.open(ppm_path), lang="eng+deu+fra")
 

diff --git a/tests/test_config.py b/tests/test_config.py
@@ -42,8 +42,8 @@ def test_config_from_yaml(simple_config_dict, simple_config_yaml: Path) -> None:
 
 
 def test_load_env_values(simple_config_dict, simple_config_yaml):
-    os.environ["seccerts_log_filepath"] = "/some/nonsense/path"
-    os.environ["always_false_positive_fips_cert_id_threshold"] = "10"
+    os.environ["SECCERTS_LOG_FILEPATH"] = "/some/nonsense/path"
+    os.environ["ALWAYS_FALSE_POSITIVE_FIPS_CERT_ID_THRESHOLD"] = "10"
 
     config_module.config.load_from_yaml(simple_config_yaml)
 
@@ -57,7 +57,7 @@ def test_load_env_values(simple_config_dict, simple_config_yaml):
 def test_complex_config_load(simple_config_dict, simple_config_yaml):
     config_module.config.year_difference_between_validations = 123456789
     config_module.config.n_threads = 987654321
-    os.environ["seccerts_n_threads"] = "1"
+    os.environ["SECCERTS_N_THREADS"] = "1"
 
     config_module.config.load_from_yaml(simple_config_yaml)
     for key, val in simple_config_dict.items():