Skip to content

Commit

Permalink
Move to new dgst algorithm for CC.
Browse files Browse the repository at this point in the history
  • Loading branch information
J08nY committed Jul 19, 2024
1 parent 34de9ca commit e42f5ca
Show file tree
Hide file tree
Showing 18 changed files with 106 additions and 68 deletions.
23 changes: 17 additions & 6 deletions src/sec_certs/dataset/cc.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from sec_certs.sample.cc_scheme import EntryType
from sec_certs.sample.protection_profile import ProtectionProfile
from sec_certs.serialization.json import ComplexSerializableType, serialize
from sec_certs.utils import helpers
from sec_certs.utils import helpers, sanitization
from sec_certs.utils import parallel_processing as cert_processing
from sec_certs.utils.profiling import staged

Expand Down Expand Up @@ -368,7 +368,14 @@ def map_ip_to_hostname(url: str) -> str:
return CCDataset.BASE_URL + relative_path

def _get_primary_key_str(row: Tag):
return row["category"] + row["cert_name"] + row["report_link"]
return "|".join(
[
row["category"],
row["cert_name"],
sanitization.sanitize_link_fname(row["report_link"]) or "None",
sanitization.sanitize_link_fname(row["st_link"]) or "None",
]
)

cert_status = "active" if "active" in str(file) else "archived"

Expand Down Expand Up @@ -408,11 +415,15 @@ def _get_primary_key_str(row: Tag):
df_base = df.loc[~df.is_maintenance].copy()
df_main = df.loc[df.is_maintenance].copy()

df_base.report_link = df_base.report_link.map(map_ip_to_hostname)
df_base.st_link = df_base.st_link.map(map_ip_to_hostname)
df_base.report_link = df_base.report_link.map(map_ip_to_hostname).map(sanitization.sanitize_link)
df_base.st_link = df_base.st_link.map(map_ip_to_hostname).map(sanitization.sanitize_link)

df_main.maintenance_report_link = df_main.maintenance_report_link.map(map_ip_to_hostname)
df_main.maintenance_st_link = df_main.maintenance_st_link.map(map_ip_to_hostname)
df_main.maintenance_report_link = df_main.maintenance_report_link.map(map_ip_to_hostname).map(
sanitization.sanitize_link
)
df_main.maintenance_st_link = df_main.maintenance_st_link.map(map_ip_to_hostname).map(
sanitization.sanitize_link
)

n_all = len(df_base)
n_deduplicated = len(df_base.drop_duplicates(subset=["dgst"]))
Expand Down
58 changes: 37 additions & 21 deletions src/sec_certs/sample/cc.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

import sec_certs.utils.extract
import sec_certs.utils.pdf
import sec_certs.utils.sanitization
from sec_certs import constants
from sec_certs.cert_rules import SARS_IMPLIED_FROM_EAL, cc_rules, rules, security_level_csv_scan
from sec_certs.configuration import config
Expand All @@ -27,7 +26,7 @@
from sec_certs.sample.sar import SAR
from sec_certs.serialization.json import ComplexSerializableType
from sec_certs.serialization.pandas import PandasSerializableType
from sec_certs.utils import helpers
from sec_certs.utils import helpers, sanitization
from sec_certs.utils.extract import normalize_match_string, scheme_frontpage_functions


Expand Down Expand Up @@ -57,16 +56,10 @@ class MaintenanceReport(ComplexSerializableType):
maintenance_st_link: str | None

def __post_init__(self):
super().__setattr__(
"maintenance_report_link", sec_certs.utils.sanitization.sanitize_cc_link(self.maintenance_report_link)
)
super().__setattr__(
"maintenance_st_link", sec_certs.utils.sanitization.sanitize_cc_link(self.maintenance_st_link)
)
super().__setattr__(
"maintenance_title", sec_certs.utils.sanitization.sanitize_string(self.maintenance_title)
)
super().__setattr__("maintenance_date", sec_certs.utils.sanitization.sanitize_date(self.maintenance_date))
super().__setattr__("maintenance_report_link", sanitization.sanitize_link(self.maintenance_report_link))
super().__setattr__("maintenance_st_link", sanitization.sanitize_link(self.maintenance_st_link))
super().__setattr__("maintenance_title", sanitization.sanitize_string(self.maintenance_title))
super().__setattr__("maintenance_date", sanitization.sanitize_date(self.maintenance_date))

@classmethod
def from_dict(cls, dct: dict) -> CCCertificate.MaintenanceReport:
Expand Down Expand Up @@ -420,20 +413,20 @@ def __init__(

self.status = status
self.category = category
self.name = sec_certs.utils.sanitization.sanitize_string(name)
self.name = sanitization.sanitize_string(name)

self.manufacturer = None
if manufacturer:
self.manufacturer = sec_certs.utils.sanitization.sanitize_string(manufacturer)
self.manufacturer = sanitization.sanitize_string(manufacturer)

self.scheme = scheme
self.security_level = sec_certs.utils.sanitization.sanitize_security_levels(security_level)
self.not_valid_before = sec_certs.utils.sanitization.sanitize_date(not_valid_before)
self.not_valid_after = sec_certs.utils.sanitization.sanitize_date(not_valid_after)
self.report_link = sec_certs.utils.sanitization.sanitize_cc_link(report_link)
self.st_link = sec_certs.utils.sanitization.sanitize_cc_link(st_link)
self.cert_link = sec_certs.utils.sanitization.sanitize_cc_link(cert_link)
self.manufacturer_web = sec_certs.utils.sanitization.sanitize_link(manufacturer_web)
self.security_level = sanitization.sanitize_security_levels(security_level)
self.not_valid_before = sanitization.sanitize_date(not_valid_before)
self.not_valid_after = sanitization.sanitize_date(not_valid_after)
self.report_link = sanitization.sanitize_link(report_link)
self.st_link = sanitization.sanitize_link(st_link)
self.cert_link = sanitization.sanitize_link(cert_link)
self.manufacturer_web = sanitization.sanitize_link(manufacturer_web)
self.protection_profiles = protection_profiles
self.maintenance_updates = maintenance_updates
self.state = state if state else self.InternalState()
Expand All @@ -445,6 +438,29 @@ def dgst(self) -> str:
"""
Computes the primary key of the sample using first 16 bytes of SHA-256 digest
"""
if not (self.name is not None and self.category is not None):
raise RuntimeError("Certificate digest can't be computed, because information is missing.")
return helpers.get_first_16_bytes_sha256(
"|".join(
[
self.category,
self.name,
sanitization.sanitize_link_fname(self.report_link) or "None",
sanitization.sanitize_link_fname(self.st_link) or "None",
]
)
)

@property
def old_dgst(self) -> str:
if not (self.name is not None and self.report_link is not None and self.category is not None):
raise RuntimeError("Certificate digest can't be computed, because information is missing.")
return helpers.get_first_16_bytes_sha256(
self.category + self.name + sanitization.sanitize_cc_link(self.report_link) # type: ignore
)

@property
def older_dgst(self) -> str:
if not (self.name is not None and self.report_link is not None and self.category is not None):
raise RuntimeError("Certificate digest can't be computed, because information is missing.")
return helpers.get_first_16_bytes_sha256(self.category + self.name + self.report_link)
Expand Down
9 changes: 9 additions & 0 deletions src/sec_certs/utils/sanitization.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import html
import logging
from datetime import date
from pathlib import Path
from urllib.parse import urlparse

import numpy as np
import pandas as pd
Expand All @@ -23,6 +25,13 @@ def sanitize_link(record: str | None) -> str | None:
return record.replace(":443", "").replace(" ", "%20").replace("http://", "https://")


def sanitize_link_fname(record: str | None) -> str | None:
if not record:
return None
parsed = urlparse(record)
return Path(parsed.path).name


def sanitize_cc_link(record: str | None) -> str | None:
record = sanitize_link(record)
if not record:
Expand Down
8 changes: 4 additions & 4 deletions tests/cc/test_cc_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def transitive_vulnerability_dataset(analysis_data_dir) -> CCDataset:

@pytest.fixture
def random_certificate(processed_cc_dset: CCDataset) -> CCCertificate:
return processed_cc_dset["ebd276cca70fd723"]
return processed_cc_dset["ed91ff3e658457fd"]


def test_match_cpe(random_certificate: CCCertificate):
Expand Down Expand Up @@ -162,7 +162,7 @@ def test_single_record_references_heuristics(random_certificate: CCCertificate):

def test_reference_dataset(reference_dataset: CCDataset):
reference_dataset._compute_references()
test_cert = reference_dataset["692e91451741ef49"]
test_cert = reference_dataset["d1b238729b25d745"]

assert test_cert.heuristics.report_references.directly_referenced_by == {"BSI-DSZ-CC-0370-2006"}
assert test_cert.heuristics.report_references.indirectly_referenced_by == {
Expand All @@ -175,12 +175,12 @@ def test_reference_dataset(reference_dataset: CCDataset):

def test_direct_transitive_vulnerability_dataset(transitive_vulnerability_dataset: CCDataset):
transitive_vulnerability_dataset._compute_transitive_vulnerabilities()
assert transitive_vulnerability_dataset["d0705c9e6fbaeba3"].heuristics.direct_transitive_cves == {"CVE-2013-5385"}
assert transitive_vulnerability_dataset["11f77cb31b931a57"].heuristics.direct_transitive_cves == {"CVE-2013-5385"}


def test_indirect_transitive_vulnerability_dataset(transitive_vulnerability_dataset: CCDataset):
transitive_vulnerability_dataset._compute_transitive_vulnerabilities()
assert transitive_vulnerability_dataset["d0705c9e6fbaeba3"].heuristics.indirect_transitive_cves == {"CVE-2013-5385"}
assert transitive_vulnerability_dataset["11f77cb31b931a57"].heuristics.indirect_transitive_cves == {"CVE-2013-5385"}


def test_sar_object():
Expand Down
44 changes: 23 additions & 21 deletions tests/cc/test_cc_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,37 +11,39 @@


def test_download_and_convert_pdfs(toy_dataset: CCDataset, data_dir: Path):
for cert in toy_dataset:
print(cert.dgst, cert.old_dgst, cert.older_dgst)
template_report_pdf_hashes = {
"309ac2fd7f2dcf17": "774c41fbba980191ca40ae610b2f61484c5997417b3325b6fd68b345173bde52",
"8cf86948f02f047d": "533a5995ef8b736cc48cfda30e8aafec77d285511471e0e5a9e8007c8750203a",
"8a5e6bcda602920c": "e277151e4b279085cd3041ce914ffb3942b43e5ace911c557ad6b8ed764a4ece",
"e3dcf91ef38ddbf0": "774c41fbba980191ca40ae610b2f61484c5997417b3325b6fd68b345173bde52",
"ed7611868f0f9d97": "533a5995ef8b736cc48cfda30e8aafec77d285511471e0e5a9e8007c8750203a",
"8f08cacb49a742fb": "e277151e4b279085cd3041ce914ffb3942b43e5ace911c557ad6b8ed764a4ece",
}

template_st_pdf_hashes = {
"309ac2fd7f2dcf17": "b9a45995d9e40b2515506bbf5945e806ef021861820426c6d0a6a074090b47a9",
"8cf86948f02f047d": "3c8614338899d956e9e56f1aa88d90e37df86f3310b875d9d14ec0f71e4759be",
"8a5e6bcda602920c": "fcee91f09bb72a6526a1f94d0ab754a6db3fbe3ba5773cd372df19788bb25292",
"e3dcf91ef38ddbf0": "b9a45995d9e40b2515506bbf5945e806ef021861820426c6d0a6a074090b47a9",
"ed7611868f0f9d97": "3c8614338899d956e9e56f1aa88d90e37df86f3310b875d9d14ec0f71e4759be",
"8f08cacb49a742fb": "fcee91f09bb72a6526a1f94d0ab754a6db3fbe3ba5773cd372df19788bb25292",
}

template_cert_pdf_hashes = {
"309ac2fd7f2dcf17": "9d38bca310c4d349cc39471e0b75d939cc275db9a75b07b8a365d719cfbedcc5",
"8cf86948f02f047d": None,
"8a5e6bcda602920c": "4ba78f26f505819183256ca5a6b404fa90c750fe160c41791e4c400f64e2f6d5",
"e3dcf91ef38ddbf0": "9d38bca310c4d349cc39471e0b75d939cc275db9a75b07b8a365d719cfbedcc5",
"ed7611868f0f9d97": None,
"8f08cacb49a742fb": "4ba78f26f505819183256ca5a6b404fa90c750fe160c41791e4c400f64e2f6d5",
}

with TemporaryDirectory() as td:
toy_dataset.copy_dataset(td)
toy_dataset.download_all_artifacts()

if not (
toy_dataset["309ac2fd7f2dcf17"].state.report.download_ok
or toy_dataset["309ac2fd7f2dcf17"].state.st.download_ok
or toy_dataset["309ac2fd7f2dcf17"].state.cert.download_ok
or toy_dataset["8cf86948f02f047d"].state.report.download_ok
or toy_dataset["8cf86948f02f047d"].state.st.download_ok
or toy_dataset["8a5e6bcda602920c"].state.report.download_ok
or toy_dataset["8a5e6bcda602920c"].state.st.download_ok
or toy_dataset["8a5e6bcda602920c"].state.cert.download_ok
toy_dataset["e3dcf91ef38ddbf0"].state.report.download_ok
or toy_dataset["e3dcf91ef38ddbf0"].state.st.download_ok
or toy_dataset["e3dcf91ef38ddbf0"].state.cert.download_ok
or toy_dataset["ed7611868f0f9d97"].state.report.download_ok
or toy_dataset["ed7611868f0f9d97"].state.st.download_ok
or toy_dataset["8f08cacb49a742fb"].state.report.download_ok
or toy_dataset["8f08cacb49a742fb"].state.st.download_ok
or toy_dataset["8f08cacb49a742fb"].state.cert.download_ok
):
pytest.xfail(reason="Fail due to error during download")

Expand All @@ -60,15 +62,15 @@ def test_download_and_convert_pdfs(toy_dataset: CCDataset, data_dir: Path):
if cert.cert_link:
assert cert.state.cert.txt_path.exists()

template_report_txt_path = data_dir / "report_309ac2fd7f2dcf17.txt"
template_st_txt_path = data_dir / "target_309ac2fd7f2dcf17.txt"
template_report_txt_path = data_dir / "report_e3dcf91ef38ddbf0.txt"
template_st_txt_path = data_dir / "target_e3dcf91ef38ddbf0.txt"
assert (
abs(toy_dataset["309ac2fd7f2dcf17"].state.st.txt_path.stat().st_size - template_st_txt_path.stat().st_size)
abs(toy_dataset["e3dcf91ef38ddbf0"].state.st.txt_path.stat().st_size - template_st_txt_path.stat().st_size)
< 1000
)
assert (
abs(
toy_dataset["309ac2fd7f2dcf17"].state.report.txt_path.stat().st_size
toy_dataset["e3dcf91ef38ddbf0"].state.report.txt_path.stat().st_size
- template_report_txt_path.stat().st_size
)
< 1000
Expand Down
4 changes: 2 additions & 2 deletions tests/cc/test_cc_maintenance_updates.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def test_methods_not_meant_to_be_implemented():
def test_download_artifacts(mu_dset: CCDatasetMaintenanceUpdates):
# Conversion and extraction is identical to CC, will not test.
mu_dset.download_all_artifacts()
mu = mu_dset["cert_8a5e6bcda602920c_update_559ed93dd80320b5"]
mu = mu_dset["cert_8f08cacb49a742fb_update_559ed93dd80320b5"]

if not (mu.state.report.download_ok or mu.state.st.download_ok):
pytest.xfail(reason="Fail due to error on CC server.")
Expand Down Expand Up @@ -82,4 +82,4 @@ def test_from_web():
dset = CCDatasetMaintenanceUpdates.from_web_latest()
assert dset is not None
assert len(dset) >= 492 # Contents as of November 2022, maintenances should not disappear
assert "cert_8a5e6bcda602920c_update_559ed93dd80320b5" in dset # random cert verified to be present
assert "cert_8f08cacb49a742fb_update_559ed93dd80320b5" in dset # random cert verified to be present
6 changes: 3 additions & 3 deletions tests/data/cc/analysis/reference_dataset.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"certs": [
{
"_type": "sec_certs.sample.cc.CCCertificate",
"dgst": "c30de3192d2e8ec2",
"dgst": "3129688580711e08",
"status": "archived",
"category": "Other Devices and Systems",
"name": "Océ Digital Access Controller (DAC) R10.1.5 for use in the Océ VarioPrint 1055, 1055 BC, 1055 DP, 1065, 1075, 2062, 2075, 2075 DP printer/copier/scanner products",
Expand Down Expand Up @@ -583,7 +583,7 @@
},
{
"_type": "sec_certs.sample.cc.CCCertificate",
"dgst": "53fe111411edfa45",
"dgst": "2c47b65953dcffb3",
"status": "archived",
"category": "Other Devices and Systems",
"name": "Océ Digital Access Controller (DAC) R9.1.6",
Expand Down Expand Up @@ -1229,7 +1229,7 @@
},
{
"_type": "sec_certs.sample.cc.CCCertificate",
"dgst": "692e91451741ef49",
"dgst": "d1b238729b25d745",
"status": "archived",
"category": "Other Devices and Systems",
"name": "Océ Digital Access Controller R8.1.10",
Expand Down
6 changes: 3 additions & 3 deletions tests/data/cc/analysis/transitive_vulnerability_dataset.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"certs": [
{
"_type": "sec_certs.sample.cc.CCCertificate",
"dgst": "d0705c9e6fbaeba3",
"dgst": "11f77cb31b931a57",
"status": "active",
"category": "Operating Systems",
"name": "IBM z/OS Version 2 Release 1",
Expand Down Expand Up @@ -1339,7 +1339,7 @@
},
{
"_type": "sec_certs.sample.cc.CCCertificate",
"dgst": "011796336c7b94de",
"dgst": "487cf9415b61b49f",
"status": "archived",
"category": "Operating Systems",
"name": "RACF Element of z/OS Version 2, Release 1",
Expand Down Expand Up @@ -2288,7 +2288,7 @@
},
{
"_type": "sec_certs.sample.cc.CCCertificate",
"dgst": "ebc77980250ee68f",
"dgst": "c310425745136fdd",
"status": "active",
"category": "Operating Systems",
"name": "IBM z/OS Version 2 Release 2",
Expand Down
4 changes: 2 additions & 2 deletions tests/data/cc/analysis/vulnerable_dataset.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"certs": [
{
"_type": "sec_certs.sample.cc.CCCertificate",
"dgst": "ebd276cca70fd723",
"dgst": "ed91ff3e658457fd",
"status": "active",
"category": "Access Control Devices and Systems",
"name": "IBM Security Access Manager for Enterprise Single Sign-On Version 8.2",
Expand Down Expand Up @@ -97,7 +97,7 @@
},
{
"_type": "sec_certs.sample.cc.CCCertificate",
"dgst": "37e1b22e5933b0ed",
"dgst": "95e3850bef32f410",
"status": "active",
"category": "Access Control Devices and Systems",
"name": "IBM WebSphere Application Server (WAS) 7.0",
Expand Down
2 changes: 1 addition & 1 deletion tests/data/cc/certificate/fictional_cert.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"_type": "sec_certs.sample.cc.CCCertificate",
"dgst": "a9ccb81a92e547dc",
"dgst": "8049938203b26f7b",
"status": "archived",
"category": "Sample category",
"name": "Sample certificate name",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"certs": [
{
"_type": "sec_certs.sample.cc_maintenance_update.CCMaintenanceUpdate",
"dgst": "cert_8a5e6bcda602920c_update_559ed93dd80320b5",
"dgst": "cert_8f08cacb49a742fb_update_559ed93dd80320b5",
"name": "Fortinet FortiGate w/ FortiOS v5.6.7 Build 6022",
"report_link": "https://www.commoncriteriaportal.org/files/epfiles/383-7-159%20MR%20v1.0e.pdf",
"st_link": "https://www.commoncriteriaportal.org/files/epfiles/383-7-159%20ST%20v1.4%20CCRA.pdf",
Expand Down Expand Up @@ -93,7 +93,7 @@
"indirect_transitive_cves": null,
"scheme_data": null
},
"related_cert_digest": "8a5e6bcda602920c",
"related_cert_digest": "8f08cacb49a742fb",
"maintenance_date": "2019-08-26"
}
]
Expand Down
Loading

0 comments on commit e42f5ca

Please sign in to comment.