Skip to content

Commit

Permalink
fix new ruff errors
Browse files Browse the repository at this point in the history
  • Loading branch information
adamjanovsky committed Nov 10, 2023
1 parent a6aeca3 commit 4a316ad
Show file tree
Hide file tree
Showing 12 changed files with 49 additions and 47 deletions.
48 changes: 24 additions & 24 deletions notebooks/cc/cert_id_eval.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
"from sec_certs.dataset import CCDataset\n",
"from sec_certs.cert_rules import cc_rules\n",
"import csv\n",
"import pandas as pd"
"import pandas as pd\n"
]
},
{
Expand All @@ -51,7 +51,7 @@
"num_schemes = len(cc_rules[\"cc_cert_id\"])\n",
"num_scheme_rules = sum(len(rules) for rules in cc_rules[\"cc_cert_id\"].values())\n",
"print(f\"\\\\newcommand{{\\\\numccschemes}}{{{num_schemes}}}\")\n",
"print(f\"\\\\newcommand{{\\\\numccschemeidrules}}{{{num_scheme_rules}}}\")"
"print(f\"\\\\newcommand{{\\\\numccschemeidrules}}{{{num_scheme_rules}}}\")\n"
]
},
{
Expand All @@ -64,7 +64,7 @@
},
"outputs": [],
"source": [
"dset = CCDataset.from_web_latest()"
"dset = CCDataset.from_web_latest()\n"
]
},
{
Expand All @@ -75,7 +75,7 @@
"source": [
"num_ids = len(list(filter(lambda cert: cert.heuristics.cert_id, dset)))\n",
"print(f\"\\\\newcommand{{\\\\numcccerts}}{{{len(dset)}}}\")\n",
"print(f\"\\\\newcommand{{\\\\numccids}}{{{num_ids}}}\")"
"print(f\"\\\\newcommand{{\\\\numccids}}{{{num_ids}}}\")\n"
]
},
{
Expand Down Expand Up @@ -107,7 +107,7 @@
" if not cert.heuristics.cert_id:\n",
" missing_id_dgsts.add(cert.dgst)\n",
" missing_id.append((cert.dgst, cert.scheme))\n",
"pd.DataFrame(missing_id, columns=[\"id\", \"scheme\"])"
"pd.DataFrame(missing_id, columns=[\"id\", \"scheme\"])\n"
]
},
{
Expand All @@ -126,7 +126,7 @@
"missing_manual = pd.read_csv(\"../../data/cert_id_eval/missing_ids.csv\")\n",
"print(set(missing_manual.id) == missing_id_dgsts)\n",
"print(set(missing_manual.id).difference(missing_id_dgsts))\n",
"print(set(missing_id_dgsts).difference(missing_manual.id))"
"print(set(missing_id_dgsts).difference(missing_manual.id))\n"
]
},
{
Expand All @@ -144,7 +144,7 @@
"num_missing_manual_unfixable = num_missing_manual - num_missing_manual_fixable\n",
"print(f\"\\\\newcommand{{\\\\numccmissingid}}{{{num_missing_manual}}}\")\n",
"print(f\"\\\\newcommand{{\\\\numccmissingidfixable}}{{{num_missing_manual_fixable}}}\")\n",
"print(f\"\\\\newcommand{{\\\\numccmissingidunfixable}}{{{num_missing_manual_unfixable}}}\")"
"print(f\"\\\\newcommand{{\\\\numccmissingidunfixable}}{{{num_missing_manual_unfixable}}}\")\n"
]
},
{
Expand All @@ -153,7 +153,7 @@
"metadata": {},
"outputs": [],
"source": [
"missing_manual.loc[missing_manual.cert_id.isnull()].reason.value_counts()"
"missing_manual.loc[missing_manual.cert_id.isnull()].reason.value_counts()\n"
]
},
{
Expand All @@ -162,7 +162,7 @@
"metadata": {},
"outputs": [],
"source": [
"missing_manual.loc[missing_manual.cert_id.notnull()].reason.value_counts()"
"missing_manual.loc[missing_manual.cert_id.notnull()].reason.value_counts()\n"
]
},
{
Expand Down Expand Up @@ -195,7 +195,7 @@
"for idd, entries in id_mapping.items():\n",
" if len(entries) > 1 and idd:\n",
" print(idd, entries)\n",
" duplicate_id_dgsts.update(entries)"
" duplicate_id_dgsts.update(entries)\n"
]
},
{
Expand Down Expand Up @@ -235,9 +235,9 @@
" print(hash, entries)\n",
" for entry in entries:\n",
" duplicate_doc_dgsts.add(entry)\n",
" \n",
"\n",
"duplicate_ids_due_doc = duplicate_doc_dgsts.intersection(duplicate_id_dgsts)\n",
"duplicate_ids_issue = duplicate_id_dgsts.difference(duplicate_doc_dgsts)"
"duplicate_ids_issue = duplicate_id_dgsts.difference(duplicate_doc_dgsts)\n"
]
},
{
Expand All @@ -262,7 +262,7 @@
"outputs": [],
"source": [
"for id in duplicate_ids_issue:\n",
" print(id, dset[id].heuristics.cert_id)"
" print(id, dset[id].heuristics.cert_id)\n"
]
},
{
Expand All @@ -273,7 +273,7 @@
"source": [
"print(f\"\\\\newcommand{{\\\\numccduplicateid}}{{{len(duplicate_id_dgsts)}}}\")\n",
"print(f\"\\\\newcommand{{\\\\numccduplicateidcolission}}{{{len(duplicate_ids_due_doc)}}}\")\n",
"print(f\"\\\\newcommand{{\\\\numccduplicateidissue}}{{{len(duplicate_ids_issue)}}}\")"
"print(f\"\\\\newcommand{{\\\\numccduplicateidissue}}{{{len(duplicate_ids_issue)}}}\")\n"
]
},
{
Expand All @@ -300,7 +300,7 @@
"duplicate_manual = pd.read_csv(\"../../data/cert_id_eval/duplicate_ids.csv\")\n",
"print(set(duplicate_manual.id) == duplicate_id_dgsts)\n",
"print(set(duplicate_manual.id).difference(duplicate_id_dgsts))\n",
"print(set(duplicate_id_dgsts).difference(duplicate_manual.id))"
"print(set(duplicate_id_dgsts).difference(duplicate_manual.id))\n"
]
},
{
Expand All @@ -309,7 +309,7 @@
"metadata": {},
"outputs": [],
"source": [
"duplicate_manual[duplicate_manual.result == \"tp\"].reason.value_counts()"
"duplicate_manual[duplicate_manual.result == \"tp\"].reason.value_counts()\n"
]
},
{
Expand All @@ -318,7 +318,7 @@
"metadata": {},
"outputs": [],
"source": [
"duplicate_manual[duplicate_manual.result == \"fp\"].reason.value_counts()"
"duplicate_manual[duplicate_manual.result == \"fp\"].reason.value_counts()\n"
]
},
{
Expand Down Expand Up @@ -371,7 +371,7 @@
" impossible.add(line[\"id\"])\n",
" else:\n",
" correct.add(line[\"id\"])\n",
"print(len(correct), len(possible), len(impossible))"
"print(len(correct), len(possible), len(impossible))\n"
]
},
{
Expand All @@ -395,7 +395,7 @@
" incorrect.add(line[\"id\"])\n",
" else:\n",
" correct.add(line[\"id\"])\n",
"print(len(correct), len(incorrect))"
"print(len(correct), len(incorrect))\n"
]
},
{
Expand All @@ -410,7 +410,7 @@
"source": [
"print(f\"\\\\newcommand{{\\\\numccideval}}{{{len(correct) + len(incorrect)}}}\")\n",
"print(f\"\\\\newcommand{{\\\\numccidevalcorrect}}{{{len(correct)}}}\")\n",
"print(f\"\\\\newcommand{{\\\\numccidevalincorrect}}{{{len(incorrect)}}}\")"
"print(f\"\\\\newcommand{{\\\\numccidevalincorrect}}{{{len(incorrect)}}}\")\n"
]
},
{
Expand All @@ -426,7 +426,7 @@
"metadata": {},
"outputs": [],
"source": [
"manual_references = pd.read_csv(\"../../data/cert_id_eval/random_references.csv\")"
"manual_references = pd.read_csv(\"../../data/cert_id_eval/random_references.csv\")\n"
]
},
{
Expand All @@ -438,7 +438,7 @@
"print(\"The referenced cert is a...\")\n",
"print(manual_references[manual_references.reason != \"self\"].reason.value_counts())\n",
"print(\"... in the current cert.\")\n",
"print(\"Total refs:\", sum(manual_references.reason != \"self\"))"
"print(\"Total refs:\", sum(manual_references.reason != \"self\"))\n"
]
},
{
Expand All @@ -456,7 +456,7 @@
"print(f\"\\\\newcommand{{\\\\numCcRefEvalPrevVersion}}{{{sum(manual_references.reason == 'previous version')}}}\")\n",
"\n",
"print(f\"\\\\newcommand{{\\\\numCcRefEvalInReport}}{{{sum((manual_references.location == 'report') & (manual_references.reason != 'self'))}}}\")\n",
"print(f\"\\\\newcommand{{\\\\numCcRefEvalInTarget}}{{{sum((manual_references.location == 'target') & (manual_references.reason != 'self'))}}}\")"
"print(f\"\\\\newcommand{{\\\\numCcRefEvalInTarget}}{{{sum((manual_references.location == 'target') & (manual_references.reason != 'self'))}}}\")\n"
]
}
],
Expand All @@ -476,7 +476,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
"version": "3.10.13"
},
"vscode": {
"interpreter": {
Expand Down
4 changes: 2 additions & 2 deletions src/sec_certs/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,7 +502,7 @@ def to_label_studio_json(self, output_path: str | Path) -> None:
candidates = [cpe_dset[x].title for x in cert.heuristics.cpe_matches]
candidates += ["No good match"] * (config.cpe_n_max_matches - len(candidates))
options = ["option_" + str(x) for x in range(1, config.cpe_n_max_matches)]
dct.update({o: c for o, c in zip(options, candidates)})
dct.update(dict(zip(options, candidates)))
lst.append(dct)

with Path(output_path).open("w") as handle:
Expand Down Expand Up @@ -634,6 +634,6 @@ def update_with_certs(self, certs: list[CertSubType]) -> None:
Enriches the dataset with `certs`
:param List[Certificate] certs: new certs to include into the dataset.
"""
if any([x not in self for x in certs]):
if any(x not in self for x in certs):
logger.warning("Updating dataset with certificates outside of the dataset!")
self.certs.update({x.dgst: x for x in certs})
2 changes: 1 addition & 1 deletion src/sec_certs/dataset/fips_algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def download_alg_list_htmls(output_dir: Path) -> list[Path]:
if failed_tuples:
failed_urls, failed_paths = zip(*failed_tuples)
responses = helpers.download_parallel(failed_urls, failed_paths)
if any([x != constants.RESPONSE_OK for x in responses]):
if any(x != constants.RESPONSE_OK for x in responses):
raise ValueError("Failed to download the algorithms HTML data, the dataset won't be constructed.")

return paths
Expand Down
2 changes: 1 addition & 1 deletion src/sec_certs/model/matching.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def _compute_match(self, one: str, other: str) -> float:
)

@staticmethod
def _match_certs(matchers: Sequence["AbstractMatcher"], certs: list[CertSubType], threshold: float):
def _match_certs(matchers: Sequence[AbstractMatcher], certs: list[CertSubType], threshold: float):
scores: list[tuple[float, int, int]] = []
matched_is: set[int] = set()
matched_js: set[int] = set()
Expand Down
15 changes: 7 additions & 8 deletions src/sec_certs/sample/cc.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ class PdfData(BasePdfData, ComplexSerializableType):
st_filename: str | None = field(default=None)

def __bool__(self) -> bool:
return any([x is not None for x in vars(self)])
return any(x is not None for x in vars(self))

@property
def bsi_data(self) -> dict[str, Any] | None:
Expand Down Expand Up @@ -612,13 +612,12 @@ def merge(self, other: CCCertificate, other_source: str | None = None) -> None:
)

for att, val in vars(self).items():
if not val:
setattr(self, att, getattr(other, att))
elif other_source == "html" and att == "protection_profiles":
setattr(self, att, getattr(other, att))
elif other_source == "html" and att == "maintenance_updates":
setattr(self, att, getattr(other, att))
elif att == "state":
if (
(not val)
or (other_source == "html" and att == "protection_profiles")
or (other_source == "html" and att == "maintenance_updates")
or (att == "state")
):
setattr(self, att, getattr(other, att))
else:
if getattr(self, att) != getattr(other, att):
Expand Down
2 changes: 1 addition & 1 deletion src/sec_certs/sample/fips.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def parse_single_detail_entry(key, entry):
)
entries = [(FIPSHTMLParser.normalize_string(key.text), entry) for key, entry in entries]
entries = [parse_single_detail_entry(*x) for x in entries if x[0] in DETAILS_KEY_NORMALIZATION_DICT]
entries = {x: y for x, y in entries}
entries = dict(entries)

if "caveat" in entries:
entries["mentioned_certs"] = FIPSHTMLParser.get_mentioned_certs_from_caveat(entries["caveat"])
Expand Down
2 changes: 1 addition & 1 deletion src/sec_certs/sample/sar.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def contains_level(string: str) -> bool:
@staticmethod
def matches_re(string: str) -> bool:
return any(
[re.match(sar_class + "(?:_[A-Z]{3,4}){1,2}(?:\\.[0-9]){0,2}", string) for sar_class in SAR_CLASS_MAPPING]
re.match(sar_class + "(?:_[A-Z]{3,4}){1,2}(?:\\.[0-9]){0,2}", string) for sar_class in SAR_CLASS_MAPPING
)

def __lt__(self, other: Any) -> bool:
Expand Down
2 changes: 1 addition & 1 deletion src/sec_certs/utils/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ def search_only_headers_bsi(filepath: Path): # noqa: C901

for m in re.finditer(rule_and_sep, whole_text):
# check if previous rules had at least one match
if constants.TAG_CERT_ID not in items_found.keys():
if constants.TAG_CERT_ID not in items_found:
logger.error(f"ERROR: front page not found for file: {filepath}")

match_groups = m.groups()
Expand Down
6 changes: 5 additions & 1 deletion src/sec_certs/utils/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@ def download_file(
time.sleep(delay)
# See https://github.com/psf/requests/issues/3953 for header justification
r = requests.get(
url, allow_redirects=True, timeout=constants.REQUEST_TIMEOUT, stream=True, headers={"Accept-Encoding": None} # type: ignore
url,
allow_redirects=True,
timeout=constants.REQUEST_TIMEOUT,
stream=True,
headers={"Accept-Encoding": None}, # type: ignore
)
ctx: Any
if show_progress_bar:
Expand Down
4 changes: 2 additions & 2 deletions src/sec_certs/utils/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def get_sar_level_from_set(sars: set[SAR], sar_family: str) -> int | None:
Given a set of SARs and a family name, will return level of the seeked SAR from the set.
"""
family_sars_dict = {x.family: x for x in sars} if (sars and not pd.isnull(sars)) else {}
if sar_family not in family_sars_dict.keys():
if sar_family not in family_sars_dict:
return None
return family_sars_dict[sar_family].level

Expand Down Expand Up @@ -211,7 +211,7 @@ def compute_cve_correlations(
tuples = list(
zip(n_cves_corrs, n_cves_pvalues, worst_cve_corrs, worst_cve_pvalues, avg_cve_corrs, avg_cve_pvalues, supports)
)
dct = {family: correlations for family, correlations in zip(["eal"] + families, tuples)}
dct = dict(zip(["eal"] + families, tuples))
df_corr = pd.DataFrame.from_dict(
dct,
orient="index",
Expand Down
3 changes: 1 addition & 2 deletions src/sec_certs/utils/pdf.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

import glob
import logging
import subprocess
from datetime import datetime, timedelta, timezone
Expand Down Expand Up @@ -54,7 +53,7 @@ def ocr_pdf_file(pdf_path: Path) -> str:
if ppm.returncode != 0:
raise ValueError(f"pdftoppm failed: {ppm.returncode}")

for ppm_path in map(Path, glob.glob(str(tmppath / "image*.ppm"))):
for ppm_path in tmppath.rglob("image*.ppm"):
base = ppm_path.with_suffix("")
content = pytesseract.image_to_string(Image.open(ppm_path), lang="eng+deu+fra")

Expand Down
6 changes: 3 additions & 3 deletions tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ def test_config_from_yaml(simple_config_dict, simple_config_yaml: Path) -> None:


def test_load_env_values(simple_config_dict, simple_config_yaml):
os.environ["seccerts_log_filepath"] = "/some/nonsense/path"
os.environ["always_false_positive_fips_cert_id_threshold"] = "10"
os.environ["SECCERTS_LOG_FILEPATH"] = "/some/nonsense/path"
os.environ["ALWAYS_FALSE_POSITIVE_FIPS_CERT_ID_THRESHOLD"] = "10"

config_module.config.load_from_yaml(simple_config_yaml)

Expand All @@ -57,7 +57,7 @@ def test_load_env_values(simple_config_dict, simple_config_yaml):
def test_complex_config_load(simple_config_dict, simple_config_yaml):
config_module.config.year_difference_between_validations = 123456789
config_module.config.n_threads = 987654321
os.environ["seccerts_n_threads"] = "1"
os.environ["SECCERTS_N_THREADS"] = "1"

config_module.config.load_from_yaml(simple_config_yaml)
for key, val in simple_config_dict.items():
Expand Down

0 comments on commit 4a316ad

Please sign in to comment.