Skip to content
This repository has been archived by the owner on Feb 14, 2024. It is now read-only.

Commit

Permalink
Add hibp to pe_source module
Browse files Browse the repository at this point in the history
  • Loading branch information
aloftus23 committed Feb 13, 2024
1 parent 602d449 commit 6405a20
Show file tree
Hide file tree
Showing 7 changed files with 353 additions and 26 deletions.
18 changes: 18 additions & 0 deletions src/pe_reports/data/config.py
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,21 @@ def db_password_key(filename=REPORT_DB_CONFIG, section="pe_db_password_key"):
else:
raise Exception(f"Section {section} not found in {filename}")
return db["key"]


def get_hibp_token(filename=REPORT_DB_CONFIG, section="hibp"):
"""Get hibp token."""
if os.path.isfile(filename):
parser = ConfigParser()
parser.read(filename, encoding="utf-8")
if parser.has_section(section):
params = parser.items(section)
_key = params[0]
key = _key[1]
else:
raise Exception(
"Section {} not found in the {} file".format(section, filename)
)
else:
raise Exception("Database.ini file not found at this path: {}".format(filename))
return key
32 changes: 16 additions & 16 deletions src/pe_source/cybersixgill.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@
get_breaches,
get_data_source_uid,
get_orgs,
insert_sixgill_alerts,
insert_sixgill_breaches,
insert_sixgill_credentials,
insert_sixgill_alerts_tsql,
insert_sixgill_breaches_tsql,
insert_sixgill_credentials_tsql,
insert_sixgill_mentions,
insert_sixgill_topCVEs,
insert_sixgill_topCVEs_tsql,
)
from .data.sixgill.api import get_sixgill_organizations
from .data.sixgill.source import (
Expand Down Expand Up @@ -204,19 +204,19 @@ def get_alerts(
try:
alert_id = alert_row["sixgill_id"]

# content_snip, asset_mentioned, asset_type = get_alerts_content(
# sixgill_org_id, alert_id, org_assets_dict
# )
content_snip, asset_mentioned, asset_type = get_alerts_content(
sixgill_org_id, alert_id, org_assets_dict
)

alerts_df.at[alert_index, "content_snip"] = content_snip
alerts_df.at[alert_index, "asset_mentioned"] = asset_mentioned
alerts_df.at[alert_index, "asset_type"] = asset_type
except Exception as e:
# LOGGER.error(
# "Failed fetching a specific alert content for %s", org_id
# )
# LOGGER.error(e)
# print(traceback.format_exc())
LOGGER.error(
"Failed fetching a specific alert content for %s", org_id
)
LOGGER.error(e)
print(traceback.format_exc())
alerts_df.at[alert_index, "content_snip"] = ""
alerts_df.at[alert_index, "asset_mentioned"] = ""
alerts_df.at[alert_index, "asset_type"] = ""
Expand All @@ -229,7 +229,7 @@ def get_alerts(

# Insert alert data into the PE database
try:
insert_sixgill_alerts(alerts_df)
insert_sixgill_alerts_tsql(alerts_df)
except Exception as e:
LOGGER.error("Failed inserting alert data for %s", org_id)
LOGGER.error(e)
Expand Down Expand Up @@ -410,7 +410,7 @@ def get_credentials(self, org_id, sixgill_org_id, pe_org_uid, source_uid):

# Insert breach data into the PE database
try:
insert_sixgill_breaches(creds_breach_df)
insert_sixgill_breaches_tsql(creds_breach_df)
except Exception as e:
LOGGER.error("Failed inserting breaches for %s", org_id)
LOGGER.error(e)
Expand Down Expand Up @@ -443,7 +443,7 @@ def get_credentials(self, org_id, sixgill_org_id, pe_org_uid, source_uid):
]
]
try:
insert_sixgill_credentials(creds_df)
insert_sixgill_credentials_tsql(creds_df)
except Exception as e:
LOGGER.error("Failed inserting credentials for %s", org_id)
LOGGER.error(e)
Expand Down Expand Up @@ -477,7 +477,7 @@ def get_topCVEs(self, source_uid):

# Insert credential data into the PE database
try:
insert_sixgill_topCVEs(top_cve_df)
insert_sixgill_topCVEs_tsql(top_cve_df)
except Exception as e:
LOGGER.error("Failed inserting top CVEs.")
LOGGER.error(e)
Expand Down
153 changes: 152 additions & 1 deletion src/pe_source/data/pe_db/db_query_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import requests

# cisagov Libraries
from pe_reports.data.config import config, staging_config
from pe_reports.data.config import config, get_hibp_token, staging_config
from pe_reports.data.db_query import task_api_call

LOGGER = logging.getLogger(__name__)
Expand Down Expand Up @@ -1642,3 +1642,154 @@ def query_all_cves(modified_date=None):
# total_data["first_seen"] = pd.to_datetime(total_data["first_seen"]).dt.date
# total_data["last_seen"] = pd.to_datetime(total_data["last_seen"]).dt.date
return total_data


def execute_hibp_breach_values(jsonList, table):
"""Execute breach values."""
"SQL 'INSERT' of a datafame"
conn = connect()
sql = """INSERT INTO public.credential_breaches (
breach_name,
description,
exposed_cred_count,
breach_date,
added_date,
modified_date,
data_classes,
password_included,
is_verified,
is_fabricated,
is_sensitive,
is_retired,
is_spam_list,
data_source_uid
) VALUES %s
ON CONFLICT (breach_name)
DO UPDATE SET modified_date = EXCLUDED.modified_date,
exposed_cred_count = EXCLUDED.exposed_cred_count,
password_included = EXCLUDED.password_included;"""
values = [[value for value in dict.values()] for dict in jsonList]
cursor = conn.cursor()
try:
extras.execute_values(cursor, sql, values)
conn.commit()
LOGGER.info("Data inserted into credential_breaches successfully..")
except (Exception, psycopg2.DatabaseError) as err:
LOGGER.error(err)
cursor.close()


def query_db(query, args=(), one=False):
"""Query the database."""
conn = connect()
cur = conn.cursor()
cur.execute(query, args)
r = [
{cur.description[i][0]: value for i, value in enumerate(row)}
for row in cur.fetchall()
]

return (r[0] if r else None) if one else r


def query_PE_subs(PE_org_id):
"""Query Posture and Exposure subdomains."""
conn = connect()
sql = """
SELECT sd.sub_domain, rd.root_domain
FROM sub_domains sd
join root_domains rd on rd.root_domain_uid = sd.root_domain_uid
where rd.organizations_uid = %(org_id)s;"""
df = pd.read_sql_query(sql, conn, params={"org_id": PE_org_id})
return df


def get_hibp_breaches():
"""Get breaches."""
Breaches_URL = "https://haveibeenpwned.com/api/v2/breaches"
# TODO: Add bearer token
token = get_hibp_token()
params = {"Authorization": f"Bearer {token}"}
breaches = requests.get(Breaches_URL, headers=params)
breach_list = []
breach_dict = {}
if breaches.status_code == 200:
jsonResponse = breaches.json()
for line in jsonResponse:
breach = {
"breach_name": line["Name"],
"breach_date": line["BreachDate"],
"added_date": line["AddedDate"],
"exposed_cred_count": line["PwnCount"],
"modified_date": line["ModifiedDate"],
"data_classes": line["DataClasses"],
"description": line["Description"],
"is_verified": line["IsVerified"],
"is_fabricated": line["IsFabricated"],
"is_sensitive": line["IsSensitive"],
"is_retired": line["IsRetired"],
"is_spam_list": line["IsSpamList"],
}
if "Passwords" in line["DataClasses"]:
breach["password_included"] = True
else:
breach["password_included"] = False
breach_list.append(breach)
breach_dict[line["Name"]] = breach
return (pd.DataFrame(breach_list), breach_dict)
else:
print(breaches.text)


def get_emails(domain):
"""Get emails."""
Emails_URL = "https://haveibeenpwned.com/api/v2/enterprisesubscriber/domainsearch/"
# TODO: Add bearer token
token = get_hibp_token()
params = {"Authorization": f"Bearer {token}"}
run_failed = True
counter = 0
while run_failed:
URL = Emails_URL + domain
r = requests.get(URL, headers=params)
status = r.status_code
counter += 1
if status == 200:
return r.json()
elif counter > 5:
run_failed = False
else:
run_failed = True
# LOGGER.info(status)
# LOGGER.info(r.text)
# LOGGER.info(f"Trying to run on {domain} again")
if status == 502:
time.sleep(60 * 3)


def execute_hibp_emails_values(jsonList):
"""Execute values."""
"SQL 'INSERT' of a datafame"
conn = connect()
sql = """INSERT INTO public.credential_exposures (
email,
organizations_uid,
root_domain,
sub_domain,
modified_date,
breach_name,
credential_breaches_uid,
data_source_uid,
name
) VALUES %s
ON CONFLICT (email, breach_name)
DO NOTHING;"""
values = [[value for value in dict.values()] for dict in jsonList]
cursor = conn.cursor()
# try:
extras.execute_values(cursor, sql, values)
conn.commit()
LOGGER.info("\t\tHIBP data inserted into credential_exposures successfully..")
# except (Exception, psycopg2.DatabaseError) as err:
# show_psycopg2_exception(err)
# cursor.close()
8 changes: 5 additions & 3 deletions src/pe_source/data/sixgill/api.py
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -182,11 +182,13 @@ def dve_top_cves():
clean_top_10_cves = []
for result in result_list:
cve_id = result.get("name")
dynamic_rating = result.get("x_sixgill_info").get("score").get("current")
dynamic_rating = result.get("x_sixgill_info").get("rating").get("current")
if result.get("x_sixgill_info").get("nvd").get("v3") is None:
nvd_v3_score = None
else:
nvd_v3_score = result.get("x_sixgill_info").get("nvd").get("v3").get("current")
nvd_v3_score = (
result.get("x_sixgill_info").get("nvd").get("v3").get("current")
)
nvd_base_score = "{'v2': None, 'v3': " + str(nvd_v3_score) + "}"
clean_cve = {
"cve_id": cve_id,
Expand All @@ -197,6 +199,7 @@ def dve_top_cves():

return clean_top_10_cves


def credential_auth(params):
"""Get data about a specific CVE."""
url = "https://api.cybersixgill.com/credentials/leaks"
Expand Down Expand Up @@ -257,7 +260,6 @@ def setOrganizationUsers(org_id):
or userrole == role2
and user_id != id_role1
):

url = (
f"https://api.cybersixgill.com/multi-tenant/organization/"
f"{org_id}/user/{user_id}?role_id={userrole}"
Expand Down
Loading

0 comments on commit 6405a20

Please sign in to comment.