Skip to content

Commit

Permalink
Merge pull request #442 from crocs-muni/fix/cc-schemes-again
Browse files Browse the repository at this point in the history
Fix CC scheme downloads
  • Loading branch information
J08nY authored Oct 2, 2024
2 parents 046d611 + 2cd9093 commit 8be47e0
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 17 deletions.
4 changes: 2 additions & 2 deletions src/sec_certs/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,8 @@
CC_NORWAY_CERTIFIED_URL = CC_NORWAY_BASE_URL + "/certified-products/category1919.html"
CC_NORWAY_ARCHIVED_URL = CC_NORWAY_BASE_URL + "/certified-products/product-archive/"
CC_KOREA_BASE_URL = "https://itscc.kr"
CC_KOREA_EN_URL = CC_KOREA_BASE_URL + "/main/main.do?accessMode=home_en"
CC_KOREA_CERTIFIED_URL = CC_KOREA_BASE_URL + "/certprod/list.do"
CC_KOREA_EN_URL = CC_KOREA_BASE_URL + "/main/mainEn.do"
CC_KOREA_CERTIFIED_URL = CC_KOREA_BASE_URL + "/certprod/listA.do"
CC_KOREA_PRODUCT_URL = CC_KOREA_BASE_URL + "/certprod/view.do?product_id={}&product_class=1"
CC_SINGAPORE_BASE_URL = "https://www.csa.gov.sg"
CC_SINGAPORE_CERTIFIED_URL = (
Expand Down
43 changes: 28 additions & 15 deletions src/sec_certs/sample/cc_scheme.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,12 @@ def _get(url: str, session=None, **kwargs) -> Response:
return _getq(url, None, session, **kwargs)


def _get_page(url: str, session=None) -> BeautifulSoup:
return BeautifulSoup(_get(url, session).content, "html5lib")
def _get_page(url: str, session=None, **kwargs) -> BeautifulSoup:
return BeautifulSoup(_get(url, session, **kwargs).content, "html5lib")


def _get_hash(url: str, session=None) -> bytes:
resp = _get(url, session)
def _get_hash(url: str, session=None, **kwargs) -> bytes:
resp = _get(url, session, **kwargs)
h = hashlib.sha256()
for chunk in resp.iter_content():
h.update(chunk)
Expand Down Expand Up @@ -222,15 +222,23 @@ def get_canada_in_evaluation() -> list[dict[str, Any]]:


def _get_france(url, enhanced, artifacts) -> list[dict[str, Any]]: # noqa: C901
base_soup = _get_page(url)
session = requests.session()
challenge_soup = _get_page(constants.CC_ANSSI_BASE_URL, session=session)
bln_script = challenge_soup.find("head").find_all("script")[1]
bln_match = re.search(r"\"value\":\"([a-zA-Z0-9_-]+)\"", bln_script.string)
if not bln_match:
raise ValueError("Balleen challenge missing")
bln_value = bln_match.group(1)
session.cookies.set("bln_challengejs", bln_value, domain="cyber.gouv.fr")
base_soup = _get_page(url, session=session)
pager = base_soup.find("nav", class_="pager")
last_page_a = re.search("[0-9]+", pager.find("a", title="Aller à la dernière page").text)
if not last_page_a:
raise ValueError
pages = int(last_page_a.group())
results = []
for page in range(pages + 1):
soup = _get_page(url + f"?page={page}")
soup = _get_page(url + f"?page={page}", session=session)
for row in soup.find_all("article", class_="node--type-produit-certifie-cc"):
cert: dict[str, Any] = {
"product": sns(row.find("h3").text),
Expand All @@ -255,7 +263,7 @@ def _get_france(url, enhanced, artifacts) -> list[dict[str, Any]]: # noqa: C901
cert["expiration_date"] = value
if enhanced:
e: dict[str, Any] = {}
cert_page = _get_page(cert["url"])
cert_page = _get_page(cert["url"], session=session)
infos = cert_page.find("div", class_="product-infos-wrapper")
for tr in infos.find_all("tr"):
label = tr.find("th").text
Expand Down Expand Up @@ -290,15 +298,15 @@ def _get_france(url, enhanced, artifacts) -> list[dict[str, Any]]: # noqa: C901
if "Rapport de certification" in a.text:
e["report_link"] = urljoin(constants.CC_ANSSI_BASE_URL, a["href"])
if artifacts:
e["report_hash"] = _get_hash(e["report_link"]).hex()
e["report_hash"] = _get_hash(e["report_link"], session=session).hex()
elif "Cible de sécurité" in a.text:
e["target_link"] = urljoin(constants.CC_ANSSI_BASE_URL, a["href"])
if artifacts:
e["target_hash"] = _get_hash(e["target_link"]).hex()
e["target_hash"] = _get_hash(e["target_link"], session=session).hex()
elif "Certificat" in a.text:
e["cert_link"] = urljoin(constants.CC_ANSSI_BASE_URL, a["href"])
if artifacts:
e["cert_hash"] = _get_hash(e["cert_link"]).hex()
e["cert_hash"] = _get_hash(e["cert_link"], session=session).hex()
cert["enhanced"] = e
results.append(cert)
return results
Expand Down Expand Up @@ -621,13 +629,18 @@ def _get_japan(url, enhanced, artifacts) -> list[dict[str, Any]]: # noqa: C901
tds = tr.find_all("td")
if not tds:
continue
if len(tds) == 6:
if len(tds) in (6, 7):
cert: dict[str, Any] = {
"cert_id": sns(tds[0].text),
"supplier": sns(tds[1].text),
"toe_overseas_name": sns(tds[2].text),
"claim": sns(tds[4].text),
}
if len(tds) == 6:
cert["expiration_date"] = sns(tds[5].text)
cert["claim"] = sns(tds[4].text)
else:
cert["expiration_date"] = sns(tds[4].text)
cert["claim"] = sns(tds[5].text)
cert_date = sns(tds[3].text)
toe_a = tds[2].find("a")
if toe_a and "href" in toe_a.attrs:
Expand Down Expand Up @@ -1061,7 +1074,7 @@ def _get_korea( # noqa: C901
product_class: int, enhanced: bool, artifacts: bool
) -> list[dict[str, Any]]:
session = requests.session()
session.get(constants.CC_KOREA_EN_URL)
session.get(constants.CC_KOREA_EN_URL, verify=False)
# Get base page
url = constants.CC_KOREA_CERTIFIED_URL + f"?product_class={product_class}"
soup = _get_page(url, session=session)
Expand All @@ -1071,7 +1084,7 @@ def _get_korea( # noqa: C901
while pages:
page = pages.pop()
csrf = soup.find("form", id="fm").find("input", attrs={"name": "csrf"})["value"]
resp = session.post(url, data={"csrf": csrf, "selectPage": page, "product_class": product_class})
resp = session.post(url, data={"csrf": csrf, "selectPage": page, "product_class": product_class}, verify=False)
soup = BeautifulSoup(resp.content, "html5lib")
tbody = soup.find("table", class_="cpl").find("tbody")
for tr in tbody.find_all("tr"):
Expand All @@ -1093,7 +1106,7 @@ def _get_korea( # noqa: C901
e: dict[str, Any] = {}
if not cert["product_link"]:
continue
cert_page = _get_page(cert["product_link"], session)
cert_page = _get_page(cert["product_link"], session=session)
main = cert_page.find("div", class_="mainContent")
table = main.find("table", class_="shortenedWidth")
v = e
Expand Down

0 comments on commit 8be47e0

Please sign in to comment.