Skip to content

Commit

Permalink
Merge pull request #31 from cisagov/improvement/setup-sslmate
Browse files Browse the repository at this point in the history
Setup Certificate Search
  • Loading branch information
king-alexander authored Jan 12, 2023
2 parents 009c449 + a4d63ce commit 9dd0abe
Show file tree
Hide file tree
Showing 9 changed files with 70 additions and 58 deletions.
6 changes: 3 additions & 3 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ updates:
- dependency-name: actions/setup-python
- dependency-name: hashicorp/setup-terraform
- dependency-name: mxschmitt/action-tmate
# Managed by cisagov/skeleton-python-library
- dependency-name: actions/download-artifact
- dependency-name: actions/upload-artifact
# # Managed by cisagov/skeleton-python-library
# - dependency-name: actions/download-artifact
# - dependency-name: actions/upload-artifact

- package-ecosystem: "pip"
directory: "/"
Expand Down
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# admiral 👩‍✈️🚢⚓️ #

[![GitHub Build Status](https://github.com/cisagov/admiral/workflows/build/badge.svg)](https://github.com/cisagov/admiral/actions)
[![CodeQL](https://github.com/cisagov/admiral/workflows/CodeQL/badge.svg)](https://github.com/cisagov/admiral/actions/workflows/codeql-analysis.yml)
[![Coverage Status](https://coveralls.io/repos/github/cisagov/admiral/badge.svg?branch=develop)](https://coveralls.io/github/cisagov/admiral?branch=develop)
[![Known Vulnerabilities](https://snyk.io/test/github/cisagov/admiral/develop/badge.svg)](https://snyk.io/test/github/cisagov/admiral)
[![GitHub Build Status](https://github.com/cisagov/skeleton-python-library/workflows/build/badge.svg)](https://github.com/cisagov/skeleton-python-library/actions)
[![CodeQL](https://github.com/cisagov/skeleton-python-library/workflows/CodeQL/badge.svg)](https://github.com/cisagov/skeleton-python-library/actions/workflows/codeql-analysis.yml)
[![Coverage Status](https://coveralls.io/repos/github/cisagov/skeleton-python-library/badge.svg?branch=develop)](https://coveralls.io/github/cisagov/skeleton-python-library?branch=develop)
[![Known Vulnerabilities](https://snyk.io/test/github/cisagov/skeleton-python-library/develop/badge.svg)](https://snyk.io/test/github/cisagov/skeleton-python-library)

This project implements a distributed certificate transparency log harvester.

Expand Down
4 changes: 4 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ secrets:
file: ./secrets/mongo-root-passwd.txt
redis_conf:
file: ./secrets/redis.conf
sslmate_api_key_txt:
file: ./secrets/sslmate-api-key.txt

x-admiral-template: &admiral-template
build:
Expand All @@ -25,6 +27,8 @@ x-admiral-template: &admiral-template
secrets:
- source: admiral_yml
target: admiral.yml
- source: sslmate_api_key_txt
target: sslmate-api-key.txt
volumes: # map for development only, comment out otherwise
- ./src/admiral:/usr/src/admiral/admiral

Expand Down
42 changes: 29 additions & 13 deletions examples/load_certs.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,25 +16,36 @@
"""

# Standard Python Libraries
import base64
import pprint
import ssl
import time

# Third-Party Libraries
from celery import group
import dateutil.parser as parser
from dateutil.tz import UTC
from dateutil.utils import default_tzinfo
from docopt import docopt
from mongoengine import context_managers
from tqdm import tqdm

# cisagov Libraries
from admiral.celery import configure_app
from admiral.certs.tasks import cert_by_id, summary_by_domain
from admiral.certs.tasks import cert_by_issuance, summary_by_domain
from admiral.model import Cert, Domain
from admiral.util import connect_from_config

# Globals
PP = pprint.PrettyPrinter(indent=4)
EARLIEST_EXPIRED_DATE = parser.parse("2018-10-01")
EARLIEST_EXPIRED_DATE = default_tzinfo(
# Make the earliest expired date timezone aware. This date in
# particular represents the start of FY19, the fiscal year during
# which Emergency Directive 19-01 went into effect. For more, see
# https://www.cisa.gov/emergency-directive-19-01.
parser.parse("2018-10-01"),
UTC,
)


def cert_id_exists_in_database(log_id):
Expand All @@ -52,29 +63,31 @@ def cert_id_exists_in_database(log_id):
return False


def get_new_log_ids(domain, max_expired_date, verbose=False):
"""Generate a sequence of new CT Log IDs.
def get_new_log_issuances(domain, max_expired_date, verbose=False):
"""Generate a sequence of new CT Log issuances.
Arguments:
domain -- the domain name to query
max_expired_date -- a date to filter out expired certificates
Yields a sequence of new, unique, log IDs.
Yields a sequence of new, unique, log issuances.
"""
if verbose:
tqdm.write(f"requesting certificate list for: {domain}")
expired = domain != "nasa.gov" # NASA is breaking the CT Log
cert_list = summary_by_domain.delay(domain, subdomains=True, expired=expired)

cert_list = summary_by_domain.delay(domain, subdomains=True)
cert_list = cert_list.get()
duplicate_log_ids = set()
for i in tqdm(cert_list, desc="Subjects", unit="entries", leave=False):
log_id = i["id"]
# Write the first valid DNS name to verbose output. Otherwise,
# the names are discarded.
name = i["dns_names"][0]
cert_expiration_date = parser.parse(i["not_after"])
if verbose:
tqdm.write(
f"id: {log_id}:\tex: {cert_expiration_date}\t"
f'{i["name_value"]}...\t',
f"id: {log_id}:\tex: {cert_expiration_date}\t" f"{name}...\t",
end="",
)
if cert_expiration_date < max_expired_date:
Expand All @@ -92,7 +105,7 @@ def get_new_log_ids(domain, max_expired_date, verbose=False):
duplicate_log_ids.add(log_id)
if verbose:
tqdm.write("will import")
yield (log_id)
yield (i)


def group_update_domain(domain, max_expired_date, verbose=False, dry_run=False):
Expand All @@ -107,8 +120,9 @@ def group_update_domain(domain, max_expired_date, verbose=False, dry_run=False):
"""
# create a list of signatures to be executed in parallel
signatures = []
for log_id in get_new_log_ids(domain.domain, max_expired_date, verbose):
signatures.append(cert_by_id.s(log_id))

for issuance in get_new_log_issuances(domain.domain, max_expired_date, verbose):
signatures.append(cert_by_issuance.s(issuance))

# create a job with all the signatures
job = group(signatures)
Expand All @@ -125,7 +139,9 @@ def group_update_domain(domain, max_expired_date, verbose=False, dry_run=False):
tasks_to_results = zip(job.tasks, results.join())

# create x509 certificates from the results
for task, pem in tasks_to_results:
for task, result in tasks_to_results:
data = base64.b64decode(result["data"]) # encoded in ASN.1 DER
pem = ssl.DER_cert_to_PEM_cert(data)
cert, is_precert = Cert.from_pem(pem)
cert.log_id = task.get("args")[0] # get log_id from task
if is_precert:
Expand Down
5 changes: 0 additions & 5 deletions secrets/admiral.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,6 @@ celery-defaults: &celery-defaults
result_backend: redis://:fruitcake@redis:6379/0
result_expires: 3600
task_acks_late: true
# According to https://groups.google.com/g/crtsh/c/NZJntKrBdmg,
# crt.sh requests are limited to 60 per minute. The default rate limit
# for tasks is applied to each worker individually. So since there are
# 6 cert workers, '10/m' honors the limit set by crt.sh.
task_default_rate_limit: 10/m
task_reject_on_worker_lost: true
task_track_started: true
task_send_sent_event: true
Expand Down
1 change: 1 addition & 0 deletions secrets/sslmate-api-key.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
example
2 changes: 1 addition & 1 deletion src/admiral/_version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
"""This file defines the version of this module."""
__version__ = "1.1.0"
__version__ = "0.2.0"
56 changes: 26 additions & 30 deletions src/admiral/certs/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
from celery.utils.log import get_task_logger
import requests

# We use the version number to identify our user-agent string
from .._version import __version__

logger = get_task_logger(__name__)

# regexr.com/3e8n2
Expand All @@ -25,66 +28,59 @@

@shared_task(
autoretry_for=(Exception, requests.HTTPError, requests.exceptions.HTTPError),
rate_limit="10/h",
retry_backoff=True,
retry_jitter=True,
retry_kwargs={"max_retries": 16},
)
def summary_by_domain(domain, subdomains=True, expired=False):
def summary_by_domain(domain, subdomains=True):
"""Fetch a summary of the certificates in the log.
Arguments:
domain -- the domain to query
subdomains -- include certificates of subdomains
expired -- include expired certificates
"""
# validate input
m = DOMAIN_NAME_RE.match(domain)
if m is None:
raise ValueError(f"invalid domain name format: {domain}")

wildcard_param = "%." if subdomains else ""
expired_param = "" if expired else "&exclude=expired"
# read SSLMate API key
key = ""
with open("/run/secrets/sslmate-api-key.txt") as file:
key = file.read().rstrip()

logger.info(f"Fetching certs from CT log for: {wildcard_param}{domain}")
logger.info(f"Fetching certs from CT log for: {domain}")
url = (
f"https://crt.sh/?Identity={wildcard_param}{domain}{expired_param}"
f"&output=json"
f"https://api.certspotter.com/v1/issuances?domain={domain}&include_subdomains={subdomains}"
f"&expand=dns_names&expand=cert"
)
req = requests.get(
url,
headers={"User-Agent": "cyhy/2.0.0"},
headers={
"Authorization": f"Bearer {key}",
"User-Agent": f"admiral/{__version__}",
},
timeout=(CONNECT_TIMEOUT, READ_TIMEOUT),
)

if req.ok:
data = json.loads(req.content)
if subdomains:
# a query for the unwildcarded domain needs to be made separately
data += summary_by_domain(domain, subdomains=False, expired=expired)
return data
else:
req.raise_for_status()


@shared_task(
autoretry_for=(Exception, requests.HTTPError, requests.exceptions.HTTPError),
retry_backoff=True,
retry_jitter=True,
retry_kwargs={"max_retries": 16},
)
def cert_by_id(id):
"""Fetch a certificate by log ID."""
logger.info(f"Fetching cert data from CT log for id: {id}.")
@shared_task
def cert_by_issuance(issuance):
"""Fetch a certificate object from the issuance object.
url = f"https://crt.sh/?d={id}"
req = requests.get(
url,
headers={"User-Agent": "cyhy/2.0.0"},
timeout=(CONNECT_TIMEOUT, READ_TIMEOUT),
)
Arguments:
issuance -- the certificate issuance record found in one or more logs
if req.ok:
return req.content.decode()
else:
req.raise_for_status()
"""
id = issuance["id"]
logger.info(f"Fetching cert data from CT log for id: {id}.")

return issuance["cert"]
4 changes: 2 additions & 2 deletions tests/cert_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# from cryptography.hazmat.backends import default_backend
import pytest

# from admiral.certs.tasks import cert_by_id, summary_by_domain
# from admiral.certs.tasks import cert_by_issuance, summary_by_domain

PP = pprint.PrettyPrinter(indent=4)

Expand Down Expand Up @@ -42,7 +42,7 @@ def celery():
# # get the first id from the summaries
# id = summary.get()[0]["id"]
# print(f"requesting certificate for id: {id}")
# first_cert = cert_by_id.delay(id)
# first_cert = cert_by_issuance.delay(id)
# pem = first_cert.get(timeout=60)
# print("done")
#
Expand Down

0 comments on commit 9dd0abe

Please sign in to comment.