diff --git a/setup.py b/setup.py index 484c5f04..d563088c 100644 --- a/setup.py +++ b/setup.py @@ -93,6 +93,7 @@ def get_version(version_file): py_modules=[splitext(basename(path))[0] for path in glob("src/*.py")], include_package_data=True, install_requires=[ + "beautifulsoup4", "boto3 == 1.21.10", "botocore == 1.24.10", "chevron == 0.14.0", @@ -109,6 +110,7 @@ def get_version(version_file): "importlib_resources == 5.4.0", "matplotlib == 3.3.4", "mongo-db-from-config@http://github.com/cisagov/mongo-db-from-config/tarball/develop", + "nltk", "openpyxl", "pandas == 1.1.5", "psutil", @@ -125,6 +127,7 @@ def get_version(version_file): "schema == 0.7.5", "setuptools == 58.1.0", "shodan ==1.27.0", + "spacy", "sublist3r", "types-PyYAML == 6.0.4", "urllib3 == 1.26.7", diff --git a/src/adhoc/__init__.py b/src/adhoc/__init__.py new file mode 100644 index 00000000..ababb128 --- /dev/null +++ b/src/adhoc/__init__.py @@ -0,0 +1 @@ +"""Init file for module implementation.""" diff --git a/src/adhoc/data/__init__.py b/src/adhoc/data/__init__.py new file mode 100644 index 00000000..8c5bf486 --- /dev/null +++ b/src/adhoc/data/__init__.py @@ -0,0 +1 @@ +"""Adhoc data init.""" diff --git a/src/adhoc/data/config.py b/src/adhoc/data/config.py new file mode 100644 index 00000000..ae4dc5a7 --- /dev/null +++ b/src/adhoc/data/config.py @@ -0,0 +1,39 @@ +"""Configure database connection.""" +# Standard Python Libraries +from configparser import ConfigParser + + +def config(filename="/home/ubuntu/adhoc/data/database.ini", section="postgresql"): + """Configure postgres.""" + # create a parser + parser = ConfigParser() + # read config file + parser.read(filename) + + # get section, default to postgresql + db = {} + if parser.has_section(section): + params = parser.items(section) + for param in params: + db[param[0]] = param[1] + else: + raise Exception("Section {} not found in the {} file".format(section, filename)) + return db + + +def config2(filename="/home/ubuntu/adhoc/data/database.ini", section="crossfeedDB"): + """Configure Crossfeed.""" + # create a parser + parser = ConfigParser() + # read config file + parser.read(filename) + + # get section, default to postgresql + db = {} + if parser.has_section(section): + params = parser.items(section) + for param in params: + db[param[0]] = param[1] + else: + raise Exception("Section {} not found in the {} file".format(section, filename)) + return db diff --git a/src/adhoc/data/database.ini b/src/adhoc/data/database.ini new file mode 100644 index 00000000..55b95c17 --- /dev/null +++ b/src/adhoc/data/database.ini @@ -0,0 +1,13 @@ +[postgresql] +host= +database= +user= +password= +port= + +[crossfeedDB] +host= +database= +user= +password= +port= diff --git a/src/adhoc/data/run.py b/src/adhoc/data/run.py new file mode 100644 index 00000000..4dc7729a --- /dev/null +++ b/src/adhoc/data/run.py @@ -0,0 +1,335 @@ +"""Database queries.""" +# Standard Python Libraries +import sys + +# Third-Party Libraries +import pandas as pd +import psycopg2 +from psycopg2 import OperationalError +import psycopg2.extras as extras + +from .config import config + +CONN_PARAMS_DIC = config() + + +def show_psycopg2_exception(err): + """Error handleing for postgres issues.""" + err_type, traceback = sys.exc_info() + line_n = traceback.tb_lineno + print("\npsycopg2 ERROR:", err, "on line number:", line_n) + print("psycopg2 traceback:", traceback, "-- type:", err_type) + print("\nextensions.Diagnostics:", err) + print("pgerror:", err) + print("pgcode:", err, "\n") + + +def connect(thread): + """Connect to postgres database.""" + conn = None + try: + conn = psycopg2.connect(**CONN_PARAMS_DIC) + except OperationalError as err: + show_psycopg2_exception(err) + conn = None + return conn + + +def close(conn): + """Close connection.""" + conn.close() + return + + +def execute_values(conn, dataframe, table, except_condition=";"): + """Insert into datafame.""" + tpls = [tuple(x) for x in dataframe.to_numpy()] + cols = ",".join(list(dataframe.columns)) + sql = "INSERT INTO {}({}) VALUES %s" + sql = sql + except_condition + cursor = conn.cursor() + try: + extras.execute_values(cursor, sql.format(table, cols), tpls) + conn.commit() + print("Data inserted using execute_values() successfully..") + except (Exception, psycopg2.DatabaseError) as err: + show_psycopg2_exception(err) + cursor.close() + + +def query_values(conn, table, where=";"): + """Insert of a datafame.""" + sql = "SELECT * FROM {}" + sql = sql + where + # try just pandas... pd..read_sql_query(sql, conn) + df = pd.read_sql_query(sql.format(table), conn) + conn.close() + return df + + +def query_orgs(thread): + """Query orgs.""" + conn = connect(thread) + orgs = query_values(conn, "organizations", " WHERE report_on is True;") + close(conn) + print(orgs) + return orgs + + +def query_roots(conn, org_uid): + """Insert into datafame.""" + sql = "SELECT * FROM root_domains WHERE organizations_uid = '{}'" + # try just pandas... pd..read_sql_query(sql, conn) + df = pd.read_sql_query(sql.format(org_uid), conn) + return df + + +def query_null_roots(conn, org_uid): + """Insert into datafame.""" + sql = "SELECT * FROM root_domains WHERE root_domain = 'Null_Root'" + # try just pandas... pd..read_sql_query(sql, conn) + df = pd.read_sql_query(sql, conn) + return df + + +def execute_hibp_breach_values(conn, dataframe, table): + """Insert into datafame.""" + tpls = [tuple(x) for x in dataframe.to_numpy()] + cols = ",".join(list(dataframe.columns)) + sql = """INSERT INTO {}({}) VALUES %s + ON CONFLICT (breach_name) + DO UPDATE SET modified_date = EXCLUDED.modified_date;""" + cursor = conn.cursor() + try: + extras.execute_values( + cursor, + sql.format( + table, + cols, + ), + tpls, + ) + conn.commit() + print("Data inserted using execute_values() successfully..") + except (Exception, psycopg2.DatabaseError) as err: + show_psycopg2_exception(err) + cursor.close() + + +def execute_hibp_emails_values(conn, dataframe, table): + """Insert into datafame.""" + tpls = [tuple(x) for x in dataframe.to_numpy()] + cols = ",".join(list(dataframe.columns)) + sql = """INSERT INTO {}({}) VALUES %s + ON CONFLICT (email, breach_name) + DO NOTHING;""" + cursor = conn.cursor() + try: + extras.execute_values( + cursor, + sql.format( + table, + cols, + ), + tpls, + ) + conn.commit() + print("Data inserted using execute_values() successfully..") + except (Exception, psycopg2.DatabaseError) as err: + show_psycopg2_exception(err) + cursor.close() + + +# No longer in use +def query_null_subs(conn): + """Insert into datafame.""" + sql = """SELECT o.name, o.organizations_uid, rd.root_domain, rd.root_domain_uid, sd.sub_domain, sd.sub_domain_uid FROM sub_domains as sd + JOIN root_domains as rd ON sd.root_domain_uid = rd.root_domain_uid + JOIN organizations as o ON o.organizations_uid = rd.organizations_uid + WHERE sub_domain = 'Null_Sub'""" + # try just pandas... pd..read_sql_query(sql, conn) + df = pd.read_sql_query(sql, conn) + return df + + +def execute_shodan_data(dataframe, table, thread, org_name, failed): + """Insert shodan data into db.""" + conn = connect(thread) + tpls = [tuple(x) for x in dataframe.to_numpy()] + cols = ",".join(list(dataframe.columns)) + sql = """INSERT INTO {}({}) VALUES %s + ON CONFLICT (organizations_uid, ip, port, protocol, timestamp) + DO NOTHING;""" + cursor = conn.cursor() + try: + extras.execute_values( + cursor, + sql.format( + table, + cols, + ), + tpls, + ) + conn.commit() + print( + f"{thread} Data inserted using execute_values() successfully - {org_name}" + ) + except Exception as e: + print(f"{org_name} failed inserting into {table}") + print(f"{thread} {e} - {org_name}") + failed.append(f"{org_name} failed inserting into {table}") + conn.rollback() + cursor.close() + cursor.close() + return failed + + +def execute_dnsmonitor_data(dataframe, table): + """Execute dns monitor data.""" + conn = connect("") + tpls = [tuple(x) for x in dataframe.to_numpy()] + cols = ",".join(list(dataframe.columns)) + sql = """INSERT INTO {}({}) VALUES %s + ON CONFLICT (domain_permutation, organizations_uid) + DO UPDATE SET ipv4 = EXCLUDED.ipv4, + ipv6 = EXCLUDED.ipv6, + date_observed = EXCLUDED.date_observed, + mail_server = EXCLUDED.mail_server, + name_server = EXCLUDED.name_server, + sub_domain_uid = EXCLUDED.sub_domain_uid, + data_source_uid = EXCLUDED.data_source_uid;""" + cursor = conn.cursor() + extras.execute_values( + cursor, + sql.format( + table, + cols, + ), + tpls, + ) + conn.commit() + print("DNSMonitor Data inserted using execute_values() successfully..") + + +def execute_dnsmonitor_alert_data(dataframe, table): + """Execute alert data.""" + conn = connect("") + tpls = [tuple(x) for x in dataframe.to_numpy()] + cols = ",".join(list(dataframe.columns)) + sql = """INSERT INTO {}({}) VALUES %s + ON CONFLICT (alert_type, sub_domain_uid, date, new_value) + DO NOTHING;""" + cursor = conn.cursor() + extras.execute_values( + cursor, + sql.format( + table, + cols, + ), + tpls, + ) + conn.commit() + print("DNSMonitor Alert Data inserted using execute_values() successfully..") + + +def query_ips(org_id): + """Query IPs.""" + conn = connect("") + sql = """SELECT wa.asset as ip_address + FROM web_assets wa + WHERE wa.organizations_uid = '{}' + and wa.report_on = True + and wa.asset_type = 'ipv4' + """ + # to just return ipv4 change last line to the following: + # and wa.asset_type = 'ipv4' + df = pd.read_sql(sql.format(org_id), conn) + conn.close() + return df + + +def query_orgs_rev(): + """Query orgs in reverse.""" + conn = connect("") + sql = "SELECT * FROM organizations WHERE report_on is True ORDER BY organizations_uid DESC;" + df = pd.read_sql_query(sql, conn) + close(conn) + return df + + +def query_web_assets(conn, org_id): + """Query web assets.""" + sql = """SELECT o.name, o.organizations_uid, wa.asset_type, wa.asset, wa.ip_type, + wa.asset_origin, wa.report_on, wa.last_scanned + FROM web_assets as wa + JOIN organizations o on o.organizations_uid = wa.organizations_uid + WHERE wa.report_on = True + and o.organizations_uid = '{}' + """ + df = pd.read_sql(sql.format(org_id), conn) + + conn.close() + return df + + +# No longer in use +def check_ip(ip): + """Check IPs.""" + conn = connect("") + sql = """SELECT wa.asset as ip, o.name as org FROM web_assets wa + JOIN organizations o on o.organizations_uid = wa.organizations_uid + WHERE wa.asset = '{}'""" + df = pd.read_sql_query(sql.format(ip), conn) + close(conn) + return df + + +def getSubdomain(domain): + """Get subdomain.""" + conn = connect("") + cur = conn.cursor() + sql = """SELECT * FROM sub_domains sd + WHERE sd.sub_domain = '{}'""" + cur.execute(sql.format(domain)) + sub = cur.fetchone() + cur.close() + return sub + + +def getRootdomain(domain): + """Get root domain.""" + conn = connect("") + cur = conn.cursor() + sql = """SELECT * FROM root_domains rd + WHERE rd.root_domain = '{}'""" + cur.execute(sql.format(domain)) + root = cur.fetchone() + cur.close() + return root + + +# ***Scpecifically for DNSMonitor +# TODO: Don't hardcode subdomain uid +def addRootToSubdomain(domain): + """Add root to subdomain.""" + # TODO: getDataSource() + root_domain_uid = getRootdomain(domain)[0] + conn = connect("") + sql = """insert into sub_domains(sub_domain, root_domain_uid, root_domain, data_source_uid) + values ('{}', '{}', '{}','f7229dcc-98a9-11ec-a1c4-02589a36c9d7');""" + cur = conn.cursor() + cur.execute(sql.format(domain, root_domain_uid, domain)) + conn.commit() + close(conn) + print(f"Success adding root domain, {domain}, to subdomains table.") + + +def getDataSource(source): + """Get data source.""" + conn = connect("") + cur = conn.cursor() + sql = """SELECT * FROM data_source WHERE name='{}'""" + cur.execute(sql.format(source)) + source = cur.fetchone() + cur.close() + return source diff --git a/src/adhoc/enumerate_subs_from_root.py b/src/adhoc/enumerate_subs_from_root.py new file mode 100644 index 00000000..67a60bed --- /dev/null +++ b/src/adhoc/enumerate_subs_from_root.py @@ -0,0 +1,111 @@ +"""Script to enumerate subs based on a provided root domain.""" +# Standard Python Libraries +import datetime +import json + +# Third-Party Libraries +import pandas as pd +import requests + +# cisagov Libraries +from pe_reports.data.db_query import connect, execute_values, get_orgs + +API_WHOIS = "at_k5eJoD6do4NSnXL2BY3o1e9BH1t2b" + + +def query_roots(org_uid): + """Query all ips that link to a cidr related to a specific org.""" + print(org_uid) + conn = connect() + sql = """SELECT r.root_domain_uid, r.root_domain FROM root_domains r + where r.organizations_uid = %(org_uid)s + and r.enumerate_subs = True + """ + df = pd.read_sql(sql, conn, params={"org_uid": org_uid}) + conn.close() + return df + + +def execute_subs(conn, dataframe): + """Save subdomains dataframe to the P&E DB.""" + df = dataframe.drop_duplicates() + except_clause = """ ON CONFLICT (sub_domain, root_domain_uid) + DO + NOTHING;""" + execute_values(conn, df, "public.sub_domains", except_clause) + + +def get_data_source_uid(source): + """Get data source uid.""" + conn = connect() + cur = conn.cursor() + sql = """SELECT * FROM data_source WHERE name = '{}'""" + cur.execute(sql.format(source)) + source = cur.fetchone()[0] + cur.close() + cur = conn.cursor() + # Update last_run in data_source table + date = datetime.datetime.today().strftime("%Y-%m-%d") + sql = """update data_source set last_run = '{}' + where name = '{}';""" + cur.execute(sql.format(date, source)) + cur.close() + conn.close() + return source + + +def getSubdomain(domain, root_uid): + """Get all sub-domains from passed in root domain.""" + url = "https://domains-subdomains-discovery.whoisxmlapi.com/api/v1" + payload = json.dumps( + { + "apiKey": f"{API_WHOIS}", + "domains": {"include": [f"{domain}"]}, + "subdomains": {"include": ["*"], "exclude": []}, + } + ) + headers = {"Content-Type": "application/json"} + response = requests.request("POST", url, headers=headers, data=payload) + data = response.json() + subdomains = data["domainsList"] + print(subdomains) + + data_source = get_data_source_uid("WhoisXML") + found_subs = [ + { + "sub_domain": domain, + "root_domain_uid": root_uid, + "data_source_uid": data_source, + } + ] + for sub in subdomains: + if sub != f"www.{domain}": + found_subs.append( + { + "sub_domain": sub, + "root_domain_uid": root_uid, + "data_source_uid": data_source, + } + ) + return found_subs + + +def enumerate_and_save_subs(root_uid, root_domain): + """Enumerate subdomains basedon on a private root.""" + subs = getSubdomain(root_domain, root_uid) + subs = pd.DataFrame(subs) + conn = connect() + execute_subs(conn, subs) + + +def main(): + """Query orgs and run them through the enuemeration function.""" + orgs = get_orgs("") + for i, org in orgs.iterrows(): + roots = query_roots(org["organizations_uid"]) + for j, root in roots.iterrows(): + enumerate_and_save_subs(root["root_domain_uid"], root["root_domain"]) + + +if __name__ == "__main__": + main() diff --git a/src/adhoc/fill_cidrs_from_cyhy_assets.py b/src/adhoc/fill_cidrs_from_cyhy_assets.py new file mode 100644 index 00000000..9b7c3899 --- /dev/null +++ b/src/adhoc/fill_cidrs_from_cyhy_assets.py @@ -0,0 +1,52 @@ +"""Fill CIDRs table from cyhy assets.""" + +# Third-Party Libraries +import pandas as pd + +# cisagov Libraries +from pe_reports.data.db_query import connect + + +def query_cyhy_assets(cyhy_db_id, conn): + """Query cyhy assets.""" + sql = """ + SELECT * + FROM cyhy_db_assets ca + where ca.org_id = %(org_id)s; + """ + + df = pd.read_sql_query(sql, conn, params={"org_id": cyhy_db_id}) + + return df + + +def fill_cidrs(orgs): + """Fill CIDRs.""" + network_count = 0 + + for i, org in orgs.iterrows(): + conn = connect() + # if org['cyhy_db_name'] not in ['DOC', 'DOC_CENSUS']: + # continue + org_id = org["organizations_uid"] + print(org) + networks = query_cyhy_assets(org["cyhy_db_name"], conn) + print(networks) + for j, network in networks.iterrows(): + network_count += 1 + net = network["network"] + print(net) + cur = conn.cursor() + try: + cur.callproc("insert_cidr", (network["network"], org_id, "cyhy_db")) + except Exception as e: + print(e) + continue + + row = cur.fetchone() + print(row) + conn.commit() + cur.close() + conn.close() + + print(network_count) diff --git a/src/adhoc/fill_ips_from_cidrs.py b/src/adhoc/fill_ips_from_cidrs.py new file mode 100644 index 00000000..d00f708d --- /dev/null +++ b/src/adhoc/fill_ips_from_cidrs.py @@ -0,0 +1,86 @@ +"""Fill IPs table from CIDR blocks.""" +# Standard Python Libraries +import hashlib +import ipaddress +import logging + +# Third-Party Libraries +import pandas as pd +import psycopg2 + +# cisagov Libraries +from pe_reports.data.db_query import connect, show_psycopg2_exception + + +def execute_ips(conn, dataframe): + """Insert the ips into the ips table in the database and link them to the associated cidr.""" + for i, row in dataframe.iterrows(): + try: + cur = conn.cursor() + sql = """ + INSERT INTO ips(ip_hash, ip, origin_cidr) VALUES (%s, %s, %s) + ON CONFLICT (ip) + DO + UPDATE SET origin_cidr = UUID(EXCLUDED.origin_cidr); """ + cur.execute(sql, (row["ip_hash"], row["ip"], row["origin_cidr"])) + conn.commit() + except (Exception, psycopg2.DatabaseError) as err: + show_psycopg2_exception(err) + cur.close() + continue + print("IPs inserted using execute_values() successfully..") + + +def query_cidrs(): + """Query all cidrs ordered by length.""" + conn = connect() + sql = """SELECT tc.cidr_uid, tc.network, tc.organizations_uid, tc.insert_alert + FROM cidrs tc + ORDER BY masklen(tc.network) + """ + df = pd.read_sql(sql, conn) + conn.close() + return df + + +def enumerate_ips(cidr, cidr_uid): + """Enumerate all ips for a provided cidr.""" + ips_from_cidrs = [] + print(cidr) + for ip in ipaddress.IPv4Network(cidr): + hash_object = hashlib.sha256(str(ip).encode("utf-8")) + ip_obj = { + "ip_hash": hash_object.hexdigest(), + "ip": str(ip), + "origin_cidr": cidr_uid, + } + ips_from_cidrs.append(ip_obj) + return ips_from_cidrs + + +def fill_ips_from_cidrs(): + """For each cidr enumerate all ips and add them to the ips table.""" + cidrs = query_cidrs() + ips_from_cidrs = [] + for i, cidr in cidrs.iterrows(): + + if cidr["insert_alert"] is not None: + continue + ips_from_cidrs = ips_from_cidrs + enumerate_ips( + cidr["network"], cidr["cidr_uid"] + ) + ips_from_cidrs = pd.DataFrame(ips_from_cidrs) + logging.info(ips_from_cidrs) + logging.info(ips_from_cidrs.drop_duplicates(subset=["ip"])) + conn = connect() + execute_ips(conn, ips_from_cidrs) + print("Succuss adding IPS to Cidrs") + + +def main(): + """Separate and fill database table from cidr block.""" + fill_ips_from_cidrs() + + +if __name__ == "__main__": + main() diff --git a/src/adhoc/link_subs_and_ips_from_ips.py b/src/adhoc/link_subs_and_ips_from_ips.py new file mode 100644 index 00000000..71758d3e --- /dev/null +++ b/src/adhoc/link_subs_and_ips_from_ips.py @@ -0,0 +1,160 @@ +"""Link sub-domains and IPs from IP lookups.""" +# Standard Python Libraries +import datetime +import threading +import time + +# Third-Party Libraries +import numpy as np +import pandas as pd +import requests + +# cisagov Libraries +from pe_reports.data.db_query import connect + + +def reverseLookup(ip, failed_ips): + """Take an ip and find all associated subdomains.""" + # TODO: Add API key + api = "at_k5eJoD6do4NSnXL2BY3o1e9BH1t2b" + url = f"https://dns-history.whoisxmlapi.com/api/v1?apiKey={api}&ip={ip}" + payload = {} + headers = {} + response = requests.request("GET", url, headers=headers, data=payload).json() + if response.get("code") == 429: + response = requests.request("GET", url, headers=headers, data=payload).json() + if response.get("code") == 429: + response = requests.request( + "GET", url, headers=headers, data=payload + ).json() + if response.get("code") == 429: + failed_ips.append(ip) + found_domains = [] + try: + try: + # update last_reverse_lookup field + conn = connect() + cur = conn.cursor() + date = datetime.datetime.today().strftime("%Y-%m-%d") + sql = """update ips set last_reverse_lookup = %s + where ip = %s;""" + cur.execute(sql, (date, str(ip))) + conn.commit() + cur.close() + conn.close() + except Exception as e: + print("failed to update timestamp field") + print(e) + if response["size"] > 0: + + result = response["result"] + for domain in result: + print(domain) + try: + found_domains.append( + { + "sub_domain": domain["name"], + "root": ".".join(domain["name"].rsplit(".")[-2:]), + } + ) + except KeyError: + continue + + except Exception as e: + print(response) + print("failed to return response") + print(e) + return found_domains + + +def query_ips(org_uid): + """Query all ips that link to a cidr related to a specific org.""" + print(org_uid) + conn = connect() + sql = """SELECT i.ip_hash, i.ip, ct.network FROM ips i + JOIN cidrs ct on ct.cidr_uid = i.origin_cidr + where ct.organizations_uid = %(org_uid)s + and i.origin_cidr is not null + and (i.last_reverse_lookup < current_date - interval '7 days' or i.last_reverse_lookup is null) + """ + df = pd.read_sql(sql, conn, params={"org_uid": org_uid}) + conn.close() + return df + + +def link_domain_from_ip(ip_hash, ip, org_uid, data_source, failed_ips): + """From a provided ip find domains and link them in the db.""" + conn = connect() + found_domains = reverseLookup(ip, failed_ips) + for domain in found_domains: + cur = conn.cursor() + cur.callproc( + "link_ips_and_subs", + ( + ip_hash, + ip, + org_uid, + domain["sub_domain"], + data_source, + None, + domain["root"], + ), + ) + row = cur.fetchone() + print(row) + conn.commit() + cur.close() + return 1 + + +def run_ip_chunk(org, ips, thread): + """Run the provided chunk through the linking process.""" + org_uid = org["organizations_uid"] + count = 0 + start_time = time.time() + last_50 = time.time() + failed_ips = [] + for j, ip in ips.iterrows(): + count += 1 + if count % 50 == 0: + print(f"{thread} Currently Running ips: {count}/{len(ips)}") + print(f"{thread} {time.time() - last_50} seconds for the last 50 IPs") + last_50 = time.time() + try: + link_domain_from_ip( + ip["ip_hash"], ip["ip"], org_uid, "WhoisXML", failed_ips + ) + except requests.exceptions.SSLError as e: + print(e) + time.sleep(1) + continue + print(f"{thread} Ips took {time.time() - start_time} to link to subs") + + +def connect_subs_from_ips(orgs): + """For each org find all domains that are associated to an ip and create link in the ip_subs table.""" + for i, org in orgs.iterrows(): + print(f"Running on {org['name']}") + org_uid = org["organizations_uid"] + ips = query_ips(org_uid) + print(ips) + # run_ip_chunk(org,ips,"") + num_chunks = 8 + ips_split = np.array_split(ips, num_chunks) + + x = 0 + thread_list = [] + while x < len(ips_split): + thread_name = f"Thread {x+1}: " + # start thread + t = threading.Thread( + target=run_ip_chunk, args=(org, ips_split[x], thread_name) + ) + t.start() + thread_list.append(t) + x += 1 + + for thread in thread_list: + thread.join() + + print("All threads have finished.") diff --git a/src/adhoc/link_subs_and_ips_from_subs.py b/src/adhoc/link_subs_and_ips_from_subs.py new file mode 100644 index 00000000..dd98574c --- /dev/null +++ b/src/adhoc/link_subs_and_ips_from_subs.py @@ -0,0 +1,65 @@ +"""Link sub-domains and IPs from sub-domain lookups.""" +# Standard Python Libraries +import hashlib +import socket + +# Third-Party Libraries +import pandas as pd + +# cisagov Libraries +from pe_reports.data.db_query import connect + + +def find_ips(domain): + """Find the ip for a provided domain.""" + try: + ip = socket.gethostbyname(domain) + except Exception: + ip = None + print(ip) + return ip + + +def query_subs(org_uid): + """Query all subs for an organization.""" + conn = connect() + sql = """SELECT sd.* FROM sub_domains sd + JOIN root_domains rd on rd.root_domain_uid = sd.root_domain_uid + where rd.organizations_uid = %(org_uid)s + """ + df = pd.read_sql(sql, conn, params={"org_uid": org_uid}) + conn.close() + return df + + +def link_ip_from_domain(sub, root_uid, org_uid, data_source): + """Link IP from domain.""" + conn = connect() + ip = find_ips(sub) + if not ip: + return 0 + hash_object = hashlib.sha256(str(ip).encode("utf-8")) + ip_hash = hash_object.hexdigest() + cur = conn.cursor() + cur.callproc( + "link_ips_and_subs", (ip_hash, ip, org_uid, sub, data_source, root_uid, None) + ) + row = cur.fetchone() + print(row) + conn.commit() + cur.close() + return 1 + + +def connect_ips_from_subs(orgs): + """For each org, find all ips associated with its sub_domains and link them in the ips_subs table.""" + for i, org in orgs.iterrows(): + org_uid = org["organizations_uid"] + subs = query_subs(str(org_uid)) + for i, sub in subs.iterrows(): + sub_domain = sub["sub_domain"] + root_uid = sub["root_domain_uid"] + if sub_domain == "Null_Sub": + continue + link_ip_from_domain(sub_domain, root_uid, org_uid, "unknown") + print("Finished connecting ips from subs") diff --git a/src/adhoc/shodan_dedupe.py b/src/adhoc/shodan_dedupe.py new file mode 100644 index 00000000..a0c5f915 --- /dev/null +++ b/src/adhoc/shodan_dedupe.py @@ -0,0 +1,400 @@ +"""Shodan dedupe script.""" +# Standard Python Libraries +import hashlib +import logging +import time + +# Third-Party Libraries +import pandas as pd +import shodan + +# cisagov Libraries +from pe_reports.data.db_query import close, connect, execute_values, get_orgs_df + +states = [ + "AL", + "AK", + "AZ", + "AR", + "CA", + "CO", + "CT", + "DC", + "DE", + "FL", + "GA", + "HI", + "ID", + "IL", + "IN", + "IA", + "KS", + "KY", + "LA", + "ME", + "MD", + "MA", + "MI", + "MN", + "MS", + "MO", + "MT", + "NE", + "NV", + "NH", + "NJ", + "NM", + "NY", + "NC", + "ND", + "OH", + "OK", + "OR", + "PA", + "RI", + "SC", + "SD", + "TN", + "TX", + "UT", + "VT", + "VA", + "WA", + "WV", + "WI", + "WY", +] +state_names = [ + "Alaska", + "Alabama", + "Arkansas", + "American Samoa", + "Arizona", + "California", + "Colorado", + "Connecticut", + "Delaware", + "Florida", + "Georgia", + "Guam", + "Hawaii", + "Iowa", + "Idaho", + "Illinois", + "Indiana", + "Kansas", + "Kentucky", + "Louisiana", + "Massachusetts", + "Maryland", + "Maine", + "Michigan", + "Minnesota", + "Missouri", + "Mississippi", + "Montana", + "North Carolina", + "North Dakota", + "Nebraska", + "New Hampshire", + "New Jersey", + "New Mexico", + "Nevada", + "New York", + "Ohio", + "Oklahoma", + "Oregon", + "Pennsylvania", + "Puerto Rico", + "Rhode Island", + "South Carolina", + "South Dakota", + "Tennessee", + "Texas", + "Utah", + "Virginia", + "Virgin Islands", + "Vermont", + "Washington", + "Wisconsin", + "West Virginia", + "Wyoming", +] + + +def state_check(host_org): + """Check state.""" + found = False + if host_org: + for state in state_names: + if state in host_org: + return state + return found + + +def query_floating_ips(conn, org_id): + """Query floating IPs.""" + sql = """ + SELECT i.ip + FROM ips i + join ips_subs ip_s on ip_s.ip_hash = i.ip_hash + join sub_domains sd on sd.sub_domain_uid = ip_s.sub_domain_uid + join root_domains rd on rd.root_domain_uid = sd.root_domain_uid + WHERE rd.organizations_uid = %(org_id)s + AND i.origin_cidr is null; + """ + df = pd.read_sql(sql, conn, params={"org_id": org_id}) + ips = set(df["ip"]) + conn.close() + return ips + + +def query_cidrs(conn, org_id): + """Query Cidr.""" + print(org_id) + sql = """ + SELECT network, cidr_uid + FROM cidrs ct + join organizations o on o.organizations_uid = ct.organizations_uid + WHERE o.organizations_uid = %(org_id)s; + """ + df = pd.read_sql(sql, conn, params={"org_id": org_id}) + conn.close() + return df + + +def cidr_dedupe(cidrs, api, org_type): + """Dedupe CIDR.""" + ip_obj = [] + results = [] + for i, cidr in cidrs.iterrows(): + query = f"net:{cidr['network']}" + result = search(api, query, ip_obj, cidr["cidr_uid"], org_type) + results.append(result) + found = len([i for i in results if i != 0]) + logging.info(f"CIDRs with IPs found: {found}") + new_ips = pd.DataFrame(ip_obj) + if len(new_ips) > 0: + new_ips = new_ips.drop_duplicates(subset="ip", keep="first") + conn = connect() + except_clause = """ ON CONFLICT (ip) + DO + UPDATE SET shodan_results = EXCLUDED.shodan_results""" + execute_values(conn, new_ips, "public.ips", except_clause) + close(conn) + + +def ip_dedupe(api, ips, agency_type): + """Count number of IPs with data on Shodan.""" + matched = 0 + ips = list(ips) + float_ips = [] + for i in range(int(len(ips) / 100) + 1): + if (i + 1) * 100 > len(ips): + try: + hosts = api.host(ips[i * 100 : len(ips)]) + except shodan.exception.APIError: + try: + time.sleep(2) + hosts = api.host(ips[i * 100 : len(ips)]) + except Exception: + logging.error(f"{i} failed again") + continue + except shodan.APIError as e: + logging.error("Error: {}".format(e)) + else: + try: + hosts = api.host(ips[i * 100 : (i + 1) * 100]) + except shodan.exception.APIError: + time.sleep(2) + try: + hosts = api.host(ips[i * 100 : (i + 1) * 100]) + except shodan.APIError as err: + print("Error: {}".format(err)) + continue + if isinstance(hosts, list): + for h in hosts: + state = state_check(h["org"]) + hash_object = hashlib.sha256(str(h["ip_str"]).encode("utf-8")) + ip_hash = hash_object.hexdigest() + if state and agency_type == "FEDERAL": + float_ips.append( + { + "ip_hash": ip_hash, + "ip": h["ip_str"], + "shodan_results": False, + "origin_cidr": None, + } + ) + else: + float_ips.append( + { + "ip_hash": ip_hash, + "ip": h["ip_str"], + "shodan_results": True, + "origin_cidr": None, + } + ) + else: + state = state_check(hosts["org"]) + hash_object = hashlib.sha256(str(hosts["ip_str"]).encode("utf-8")) + ip_hash = hash_object.hexdigest() + if state and agency_type == "FEDERAL": + float_ips.append( + { + "ip_hash": ip_hash, + "ip": hosts["ip_str"], + "shodan_results": False, + "origin_cidr": None, + } + ) + else: + float_ips.append( + { + "ip_hash": ip_hash, + "ip": hosts["ip_str"], + "shodan_results": True, + "origin_cidr": None, + } + ) + matched = matched + len(hosts) + new_ips = pd.DataFrame(float_ips) + if len(new_ips) > 0: + new_ips = new_ips.drop_duplicates(subset="ip", keep="first") + conn = connect() + except_clause = """ ON CONFLICT (ip) + DO + UPDATE SET shodan_results = EXCLUDED.shodan_results""" + execute_values(conn, new_ips, "public.ips", except_clause) + close(conn) + + +def search(api, query, ip_obj, cidr_uid, org_type): + """Search Shodan API using query and add IPs to set.""" + # Wrap the request in a try/ except block to catch errors + try: + logging.info(query) + # Search Shodan + try: + results = api.search(query) + except shodan.exception.APIError: + time.sleep(2) + results = api.search(query) + # Show the results + for result in results["matches"]: + # if ":" in result["ip_str"]: + # print("ipv6 found ", result["ip_str"]) + # ip_type = "ipv6" + # else: + # ip_type = "ipv4" + state = state_check(result["org"]) + hash_object = hashlib.sha256(str(result["ip_str"]).encode("utf-8")) + ip_hash = hash_object.hexdigest() + if state and org_type == "FEDERAL": + ip_obj.append( + { + "ip_hash": ip_hash, + "ip": result["ip_str"], + "shodan_results": False, + "origin_cidr": cidr_uid, + } + ) + else: + ip_obj.append( + { + "ip_hash": ip_hash, + "ip": result["ip_str"], + "shodan_results": True, + "origin_cidr": cidr_uid, + } + ) + i = 1 + while i < results["total"] / 100: + try: + # Search Shodan + try: + results = api.search(query=query, page=i) + except shodan.exception.APIError: + time.sleep(2) + results = api.search(query, page=i) + # Show the results + for result in results["matches"]: + # if ":" in result["ip_str"]: + # print("ipv6 found ", result["ip_str"]) + # ip_type = "ipv6" + # else: + # ip_type = "ipv4" + state = state_check(result["org"]) + hash_object = hashlib.sha256(str(result["ip_str"]).encode("utf-8")) + ip_hash = hash_object.hexdigest() + if state and org_type == "FEDERAL": + ip_obj.append( + { + "ip_hash": ip_hash, + "ip": result["ip_str"], + "shodan_results": False, + "origin_cidr": cidr_uid, + } + ) + else: + ip_obj.append( + { + "ip_hash": ip_hash, + "ip": result["ip_str"], + "shodan_results": True, + "origin_cidr": cidr_uid, + } + ) + i = i + 1 + except shodan.APIError as e: + logging.error("Error: {}".format(e)) + logging.error(query) + results = {"total": 0} + except shodan.APIError as e: + logging.error("Error: {}".format(e)) + # IF it breaks to here it fails + logging.error(f"Failed on {query}") + return 0 + return results["total"] + + +def dedupe(orgs): + """Check list of IPs, CIDRs, ASNS, and FQDNs in Shodan and output set of IPs.""" + # get username and password from config file + key = "dkx4uJW66PXjihngybpdWuuJjNF7yxrE" + api = shodan.Shodan(key) + # , 'USTDA', 'VA', 'DHS_ST', 'DOC_BEA', 'DOC_BIS', 'DOC_CENSUS', 'DOC_NIST', 'DOC_NOAA', 'DOC_NTIA', 'DOC_OS', 'DOC_USPTO', 'NRC', 'EAC', 'ED', 'GSEC', 'GSA', 'HHS', 'HHS_NIH', 'NASA', 'OPM', 'CFPB', 'NSF', 'SSS', 'DOC_OIG', 'DOL_BLS' + for i, org in orgs.iterrows(): + # if org['cyhy_db_name'] in [ 'DHS_NPPD', 'DHS_CBP', 'DHS_CIS', 'DHS_FEMA', + # 'DHS_FLETC', 'DHS_ICE', 'DHS_TSA', 'DHS_USSS','DOI_OS-OAS', 'DOC','DOI_OSM', + # 'DOL','DOE','DOS','DOJ','DOT','HHS_FDA','HUD','SBA','DOI_OS','DOI_IBC','DOI', + # 'DOI_NPS','DOI_BIA','DOI_BLM', 'DOI_FWS', 'DOI_BSEE-BOEM-ONRR', 'DOI_BOR', 'EOP', + # 'EPA', 'SSA', 'USAID','USDA']: + # continue + logging.info(f"Running on {org['name']}") + conn = connect() + cidrs = query_cidrs(conn, org["organizations_uid"]) + logging.info(f"{len(cidrs)} cidrs found") + if len(cidrs) > 0: + cidr_dedupe(cidrs, api, org["agency_type"]) + conn = connect() + logging.info("Grabbing floating IPs") + ips = query_floating_ips(conn, org["organizations_uid"]) + logging.info("Got Ips") + if len(ips) > 0: + logging.info("Running dedupe on IPs") + ip_dedupe(api, ips, org["agency_type"]) + logging.info("Finished dedupe") + + +def main(): + """Get all organization information from shodan.""" + orgs = get_orgs_df() + orgs = orgs[orgs["report_on"] is True] + print(orgs) + + dedupe(orgs) + + +if __name__ == "__main__": + main() diff --git a/src/pe_reports/__init__.py b/src/pe_reports/__init__.py index c7a0b3ac..f8f032bf 100644 --- a/src/pe_reports/__init__.py +++ b/src/pe_reports/__init__.py @@ -21,6 +21,8 @@ # Stakeholder views from pe_reports.home.views import home_blueprint from pe_reports.stakeholder.views import stakeholder_blueprint +from pe_reports.stakeholder_full.views import stakeholder_full_blueprint +from pe_reports.stakeholder_lite.views import stakeholder_lite_blueprint from ._version import __version__ # noqa: F401 @@ -75,6 +77,8 @@ # Register the flask apps app.register_blueprint(stakeholder_blueprint) +app.register_blueprint(stakeholder_lite_blueprint) +app.register_blueprint(stakeholder_full_blueprint) # TODO: Add login blueprint. Issue #207 contains details # app.register_blueprint(manage_login_blueprint) app.register_blueprint(home_blueprint) diff --git a/src/pe_reports/data/db_query.py b/src/pe_reports/data/db_query.py index cc28cc36..05f8d78f 100644 --- a/src/pe_reports/data/db_query.py +++ b/src/pe_reports/data/db_query.py @@ -1,6 +1,7 @@ """Query the PE PostgreSQL database.""" # Standard Python Libraries +import logging import sys # Third-Party Libraries @@ -9,14 +10,12 @@ import psycopg2 from psycopg2 import OperationalError from psycopg2.extensions import AsIs - -# cisagov Libraries -from pe_reports import app +import psycopg2.extras as extras from .config import config # Setup logging to central file -LOGGER = app.config["LOGGER"] +LOGGER = logging.getLogger(__name__) CONN_PARAMS_DIC = config() @@ -48,6 +47,22 @@ def close(conn): return +def execute_values(conn, dataframe, table, except_condition=";"): + """INSERT into table, generic.""" + tpls = [tuple(x) for x in dataframe.to_numpy()] + cols = ",".join(list(dataframe.columns)) + sql = "INSERT INTO {}({}) VALUES %s" + sql = sql + except_condition + cursor = conn.cursor() + try: + extras.execute_values(cursor, sql.format(table, cols), tpls) + conn.commit() + print("Data inserted using execute_values() successfully..") + except (Exception, psycopg2.DatabaseError) as err: + show_psycopg2_exception(err) + cursor.close() + + def get_orgs(conn): """Query organizations table.""" try: @@ -64,6 +79,34 @@ def get_orgs(conn): close(conn) +def get_orgs_df(): + """Query organizations table for new orgs.""" + conn = connect() + try: + sql = """SELECT * FROM organizations""" + pe_orgs_df = pd.read_sql(sql, conn) + return pe_orgs_df + except (Exception, psycopg2.DatabaseError) as error: + logging.error("There was a problem with your database query %s", error) + finally: + if conn is not None: + close(conn) + + +def get_new_orgs(): + """Query organizations table for new orgs.""" + conn = connect() + try: + sql = """SELECT * FROM organizations WHERE report_on='False'""" + pe_orgs_df = pd.read_sql(sql, conn) + return pe_orgs_df + except (Exception, psycopg2.DatabaseError) as error: + logging.error("There was a problem with your database query %s", error) + finally: + if conn is not None: + close(conn) + + def query_creds_view(org_uid, start_date, end_date): """Query credentials view.""" conn = connect() diff --git a/src/pe_reports/stakeholder/templates/stakeholder_UI/home_stakeholder.html b/src/pe_reports/stakeholder/templates/stakeholder_UI/home_stakeholder.html index 900788bf..80fdb293 100644 --- a/src/pe_reports/stakeholder/templates/stakeholder_UI/home_stakeholder.html +++ b/src/pe_reports/stakeholder/templates/stakeholder_UI/home_stakeholder.html @@ -8,71 +8,67 @@ height="200" />
-
-
- +
+ - -
-
+ +
+
-
-
-
-
- Add external agency -
-
-
- {{ formExternal.hidden_tag() }} {{ - formExternal.cust.label(style='font-size: 24px') }} {{ - formExternal.cust(class='form-control') }} {{ - formExternal.custDomainAliases.label(style='font-size: 24px') }} - {{ formExternal.custDomainAliases(class='form-control') }} {{ - formExternal.custRootDomain.label(style='font-size: 24px') }} {{ - formExternal.custRootDomain(class='form-control') }} {{ - formExternal.custExecutives.label(style='font-size: 24px') }} {{ - formExternal.custExecutives(class='form-control') }} -
- {{ formExternal.submit(class='btn btn-primary') }} -
-
+
+
+
+
+ Add external agency +
+
+
+ {{ formExternal.hidden_tag() }} {{ + formExternal.cust.label(style='font-size: 24px') }} {{ + formExternal.cust(class='form-control') }} {{ + formExternal.custDomainAliases.label(style='font-size: 24px') }} {{ + formExternal.custDomainAliases(class='form-control') }} {{ + formExternal.custRootDomain.label(style='font-size: 24px') }} {{ + formExternal.custRootDomain(class='form-control') }} {{ + formExternal.custExecutives.label(style='font-size: 24px') }} {{ + formExternal.custExecutives(class='form-control') }} +
+ {{ formExternal.submit(class='btn btn-primary') }} +
- - + }); + - {% endblock %} -
+{% endblock %} diff --git a/src/pe_reports/stakeholder/views.py b/src/pe_reports/stakeholder/views.py index ed0ae20c..a18d8c46 100644 --- a/src/pe_reports/stakeholder/views.py +++ b/src/pe_reports/stakeholder/views.py @@ -6,18 +6,30 @@ import json import logging import os +import re import socket # Third-Party Libraries +from bs4 import BeautifulSoup from flask import Blueprint, flash, redirect, render_template, url_for +import nltk +from nltk import pos_tag, word_tokenize import psycopg2 import psycopg2.extras import requests +import spacy + +# If you are getting errors saying that a "en_core_web_lg" is loaded. Run the command " python -m spacy download en_core_web_trf" but might have to chagne the name fo the spacy model +import spacy.cli # cisagov Libraries from pe_reports.data.config import config from pe_reports.stakeholder.forms import InfoFormExternal +spacy.cli.download("en_core_web_lg") +nlp = spacy.load("en_core_web_lg") +# TODO + LOGGER = logging.getLogger(__name__) # CSG credentials @@ -519,6 +531,74 @@ def getalluserinfo(): ) +def getNames(url): + """Get the names from url data.""" + doc = nlp(getAbout(url)) + + d = [] + + for ent in doc.ents: + d.append((ent.label_, ent.text)) + + return d + + +def getAbout(url): + """Get stakeholder about page.""" + thepage = requests.get(url).text + + soup = BeautifulSoup(thepage, "lxml") + + body = soup.body.text + + body = body.replace("\n", " ") + body = body.replace("\t", " ") + body = body.replace("\r", " ") + body = body.replace("\xa0", " ") + # body = re.sub(r'[^ws]', '', body) + + return body + + +def theExecs(URL): + """Gather all executives names from data returned from about page url.""" + mytext = getAbout(URL) + + tokens = word_tokenize(mytext) + + thetag = pos_tag(tokens) + + ne_tree = nltk.ne_chunk(thetag) + + for x in ne_tree: + if "PERSON" in x: + print(x) + + regex_pattern = re.compile(r"[@_'’!#\-$%^&*()<>?/\|}{~:]") + + thereturn = getNames(URL) + + executives = [] + + for hy in thereturn: + + # print(hy) + + if ("PERSON" in hy) and (hy[1] not in executives) and (len(hy[1]) < 50): + # executives.append(hy[1]) + # print(hy[1]) + + # if not regex_pattern.search(hy[1]) and len(hy[1].split()) > 1 and not difflib.get_close_matches(hy[1], executives): + if not regex_pattern.search(hy[1]) and len(hy[1].split()) > 1: + person = hy[1].split(" ") + if len(person) <= 1: + # print(person) + executives.append(hy[1]) + # print(f'{hy[0]} {hy[1]}') + # print(executives) + return executives + + @stakeholder_blueprint.route("/stakeholder", methods=["GET", "POST"]) def stakeholder(): """Process form information, instantiate form and render page template.""" @@ -542,6 +622,7 @@ def stakeholder(): formExternal.custRootDomain.data = "" formExternal.custExecutives.data = "" allDomain = getAgencies(cust) + allExecutives = list(theExecs(custExecutives)) allSubDomain = getSubdomain(custRootDomainValue) allValidIP = getallsubdomainIPS(custRootDomainValue) @@ -570,7 +651,7 @@ def stakeholder(): custDomainAliases, custRootDomain, allValidIP, - custExecutives, + allExecutives, ) else: diff --git a/src/pe_reports/stakeholder_full/__init__.py b/src/pe_reports/stakeholder_full/__init__.py new file mode 100644 index 00000000..ababb128 --- /dev/null +++ b/src/pe_reports/stakeholder_full/__init__.py @@ -0,0 +1 @@ +"""Init file for module implementation.""" diff --git a/src/pe_reports/stakeholder_full/forms.py b/src/pe_reports/stakeholder_full/forms.py new file mode 100644 index 00000000..1513d95d --- /dev/null +++ b/src/pe_reports/stakeholder_full/forms.py @@ -0,0 +1,34 @@ +"""Create the stakeholder data input form.""" + +# Third-Party Libraries +from flask_wtf import FlaskForm +from wtforms import StringField, SubmitField +from wtforms.validators import DataRequired + + +class InfoFormExternal(FlaskForm): + """Create web form to take user input on organization information/details.""" + + cust = StringField( + "What is the cyhy id for the stakeholder?", validators=[DataRequired()] + ) + # TODO: The following form field may be used in a future update. Issue #208 + # custIP = StringField( + # "What is the stakeholder ip/cidr? *comma separate entries", + # validators=[DataRequired()], + # ) + custRootDomain = StringField( + "What is the root domain for this stakeholder? " "*comma separate entries" + ) + custDomainAliases = StringField( + "What are the organization aliases? " "*comma separate entries" + ) + # TODO: The following form field may be used in a future update. Issue #208 + # custSubDomain = StringField( + # "What is the sub-domain for this stakeholder?" " *comma separate entries" + # ) + custExecutives = StringField( + "What is the url for the Excutives for this stakeholder? " + "*comma separate entries" + ) + submit = SubmitField("Submit", render_kw={"onclick": "loading()"}) diff --git a/src/pe_reports/stakeholder_full/templates/stakeholder_full_UI/home_stakeholder_full.html b/src/pe_reports/stakeholder_full/templates/stakeholder_full_UI/home_stakeholder_full.html new file mode 100644 index 00000000..ac761591 --- /dev/null +++ b/src/pe_reports/stakeholder_full/templates/stakeholder_full_UI/home_stakeholder_full.html @@ -0,0 +1,117 @@ +{% extends "base.html" %} {% block content %} + +
+ CISA seal image +
+

Instructions

+
    +
  1. + Add all orgs through the "Add New Stakeholders" Form Button +
  2. +
  3. Verify you have added all the orgs for this run
  4. +
  5. + Fill the ips for all orgs by clicking the "Fill Ips" Button. This will + take a while to run +
  6. +
  7. + Launch the "Link Subs from IPs button. This will take over 11 days to + run so just make sure it is started +
  8. +
+

+
+ +
+
+ + + +
+ +
+ + + +
+
+ +
+
+
+
+ Add New Stakeholder +
+
+
+ {{ formExternal.hidden_tag() }} {{ + formExternal.cust.label(style='font-size: 24px') }} {{ + formExternal.cust(class='form-control') }} {{ + formExternal.custDomainAliases.label(style='font-size: 24px') }} + {{ formExternal.custDomainAliases(class='form-control') }} {{ + formExternal.custRootDomain.label(style='font-size: 24px') }} {{ + formExternal.custRootDomain(class='form-control') }} {{ + formExternal.custExecutives.label(style='font-size: 24px') }} {{ + formExternal.custExecutives(class='form-control') }} +
+ {{ formExternal.submit(class='btn btn-primary') }} +
+
+
+
+
+
+ + + + {% endblock %} +
diff --git a/src/pe_reports/stakeholder_full/views.py b/src/pe_reports/stakeholder_full/views.py new file mode 100644 index 00000000..158318c5 --- /dev/null +++ b/src/pe_reports/stakeholder_full/views.py @@ -0,0 +1,678 @@ +"""Classes and associated functions that render the UI app pages.""" + +# Standard Python Libraries +import datetime +from datetime import date +from ipaddress import ip_address, ip_network +import json +import logging +import os +import re + +# import os +import socket + +# Third-Party Libraries +from bs4 import BeautifulSoup +from flask import Blueprint, flash, redirect, render_template, url_for +import nltk +from nltk import pos_tag, word_tokenize +import numpy as np +import pandas as pd +import psycopg2 +import psycopg2.extras +import requests +import spacy + +# cisagov Libraries +from adhoc.enumerate_subs_from_root import enumerate_and_save_subs, query_roots +from adhoc.fill_cidrs_from_cyhy_assets import fill_cidrs +from adhoc.fill_ips_from_cidrs import fill_ips_from_cidrs +from adhoc.link_subs_and_ips_from_ips import connect_subs_from_ips +from adhoc.link_subs_and_ips_from_subs import connect_ips_from_subs +from adhoc.shodan_dedupe import dedupe +from pe_reports.data.config import config +from pe_reports.data.db_query import execute_values, get_orgs_df +from pe_reports.stakeholder_full.forms import InfoFormExternal + +# import traceback + + +# If you are getting errors saying that a "en_core_web_lg" is loaded. Run the command " python -m spacy download en_core_web_trf" but might have to chagne the name fo the spacy model +nlp = spacy.load("en_core_web_lg") + +LOGGER = logging.getLogger(__name__) + +# CSG credentials +API_Client_ID = os.getenv("CSGUSER") +API_Client_secret = os.environ.get("CSGSECRET") +API_WHOIS = os.environ.get("WHOIS_VAR") + +conn = None +cursor = None +thedateToday = date.today().strftime("%Y-%m-%d") + + +def getToken(): + """Get authorization token from Cybersixgill (CSG).""" + LOGGER.info(API_Client_ID) + LOGGER.info(API_Client_secret) + d = { + "grant_type": "client_credentials", + "client_id": f"{API_Client_ID}", + "client_secret": f"{API_Client_secret}", + } + r = requests.post("https://api.cybersixgill.com/auth/token", data=d) + LOGGER.info(r) + r = r.text.split(":") + r = r[1].lstrip('"').rsplit('"')[0] + return r + + +def getAgencies(cyhy_db_id): + """Get all agency names from P&E database.""" + global conn, cursor + + try: + params = config() + + conn = psycopg2.connect(**params) + + if conn: + LOGGER.info( + "There was a connection made to" + "the database and the query was executed." + ) + + cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) + + query = """ + select organizations_uid, name + from organizations + where cyhy_db_name = %s;""" + + cursor.execute(query, (cyhy_db_id)) + + result = cursor.fetchall() + resultDict = {} + + for row in result: + # row[0] = org UUID + # row[1] = org name + resultDict[f"{row[0]}"] = f"{row[1]}" + return resultDict + + except (Exception, psycopg2.DatabaseError) as err: + LOGGER.error("There was a problem logging into the psycopg database %s", err) + finally: + if conn is not None: + cursor.close() + conn.close() + LOGGER.info("The connection/query was completed and closed.") + + return resultDict + + +def set_org_to_report_on(cyhy_db_id): + """Query cyhy assets.""" + sql = """ + SELECT * + FROM organizations o + where o.cyhy_db_name = %(org_id)s + """ + params = config() + conn = psycopg2.connect(**params) + df = pd.read_sql_query(sql, conn, params={"org_id": cyhy_db_id}) + + if len(df) < 1: + LOGGER.error("No org found for that cyhy id") + return 0 + + for i, row in df.iterrows(): + if row["report_on"] is True and row["premium_report"] is True: + continue + cursor = conn.cursor() + sql = """UPDATE organizations + SET report_on = True, premium_report = True + WHERE organizations_uid = %s""" + uid = row["organizations_uid"] + cursor.execute(sql, [uid]) + conn.commit() + cursor.close() + conn.close() + return df + + +def get_data_source_uid(source): + """Get data source uid.""" + params = config() + conn = psycopg2.connect(**params) + cur = conn.cursor() + sql = """SELECT * FROM data_source WHERE name = '{}'""" + cur.execute(sql.format(source)) + source = cur.fetchone()[0] + cur.close() + cur = conn.cursor() + # Update last_run in data_source table + date = datetime.datetime.today().strftime("%Y-%m-%d") + sql = """update data_source set last_run = '{}' + where name = '{}';""" + cur.execute(sql.format(date, source)) + cur.close() + conn.close() + return source + + +def get_cidrs_and_ips(org_uid): + """Query all cidrs and ips for an organization.""" + params = config() + conn = psycopg2.connect(**params) + cur = conn.cursor() + sql = """SELECT network from cidrs where + organizations_uid = %s;""" + cur.execute(sql, [org_uid]) + cidrs = cur.fetchall() + sql = """ + SELECT i.ip + FROM ips i + join ips_subs ip_s on ip_s.ip_hash = i.ip_hash + join sub_domains sd on sd.sub_domain_uid = ip_s.sub_domain_uid + join root_domains rd on rd.root_domain_uid = sd.root_domain_uid + WHERE rd.organizations_uid = %s + AND i.origin_cidr is null; + """ + cur.execute(sql, [org_uid]) + ips = cur.fetchall() + conn.close() + cidrs_ips = cidrs + ips + # cidrs_ips = [item for sublist in cidrs_ips for item in sublist] + cidrs_ips = [x[0] for x in cidrs_ips] + cidrs_ips = validateIP(cidrs_ips) + # cidrs_ips.remove("0.0.0.9") + # cidrs_ips.remove("0.0.0.1") + # cidrs_ips.remove("0.0.0.4") + # cidrs_ips.remove("0.0.0.2") + # cidrs_ips.remove("10.171.32.57") + # cidrs_ips.remove("10.171.40.44") + # cidrs_ips.remove("10.176.56.23") + # cidrs_ips.remove("127.0.0.1") + LOGGER.info(cidrs_ips) + return cidrs_ips + + +def insert_roots(org, domain_list): + """Insert root domains into the database.""" + source_uid = get_data_source_uid("P&E") + roots_list = [] + for domain in domain_list: + try: + ip = socket.gethostbyname(domain) + except Exception: + ip = np.nan + root = { + "organizations_uid": org["organizations_uid"], + "root_domain": domain, + "ip_address": ip, + "data_source_uid": source_uid, + "enumerate_subs": True, + } + roots_list.append(root) + + roots = pd.DataFrame(roots_list) + LOGGER.info(roots) + except_clause = """ ON CONFLICT (root_domain, organizations_uid) + DO NOTHING;""" + params = config() + conn = psycopg2.connect(**params) + execute_values(conn, roots, "public.root_domains", except_clause) + + +def getRootID(org_UUID): + """Get all root domain names from P&E database.""" + global conn, cursor + resultDict = {} + try: + params = config() + + conn = psycopg2.connect(**params) + + if conn: + LOGGER.info( + "There was a connection made to the database and the query was executed " + ) + + cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) + query = "select root_domain_uid, organization_name from" + " root_domains where organizations_uid='{}';" + + cursor.execute(query.format(org_UUID)) + + result = cursor.fetchall() + + for row in result: + # row[0] = root UUID + # row[1] = org name + resultDict[f"{row[1]}"] = f"{row[0]}" + return resultDict + + except (Exception, psycopg2.DatabaseError) as err: + LOGGER.error("There was a problem logging into the psycopg database %s", err) + finally: + if conn is not None: + cursor.close() + conn.close() + LOGGER.info("The connection/query was completed and closed.") + + return resultDict + + +def setCustomerExternalCSG( + customer, customerIP, customerRootDomain, customerSubDomain, customerExecutives +): + """Insert customer not in cyhyDB into the PE-Reports database.""" + global conn, cursor + + iplist = [] + domainlist = [] + try: + LOGGER.info("Starting insert into database...") + + params = config() + + conn = psycopg2.connect(**params) + + if conn: + + LOGGER.info( + "There was a connection made to" + " the database and the query was executed " + ) + + cursor = conn.cursor() + + for ip in customerIP: + iplist.append(ip) + + cursor.execute( + f"insert into organizations(domain_name," + f" domain_ip," + f" date_saved) " + f"values('{customer}'," + f" '{ip}'," + f"'{thedateToday}');" + ) + for domain in customerRootDomain: + domainlist.append(domain) + cursor.execute( + f"insert into domain_assets(domain_name," + f" domain_ip," + f" date_saved) " + f"values('{customer}'," + f" '{ip}', '{thedateToday}');" + ) + + except (Exception, psycopg2.DatabaseError) as err: + LOGGER.error("There was a problem logging into the psycopg database %s", err) + finally: + if conn is not None: + conn.commit() + cursor.close() + conn.close() + LOGGER.info("The connection/query was completed and closed.") + + return iplist + + +def theaddress(domain): + """Get actual IP address of domain.""" + gettheAddress = "" + try: + gettheAddress = socket.gethostbyname(domain) + except socket.gaierror: + LOGGER.info("There is a problem with the domain that you selected") + + return gettheAddress + + +def verifyIPv4(custIP): + """Verify if parameter is a valid IPv4 IP address.""" + try: + if ip_address(custIP): + return True + + else: + return False + + except ValueError as err: + LOGGER.error("The address is incorrect, %s", err) + return False + + +def verifyCIDR(custIP): + """Verify if parameter is a valid CIDR block IP address.""" + try: + if ip_network(custIP): + return True + + else: + return False + + except ValueError as err: + LOGGER.error("The CIDR is incorrect, %s", err) + return False + + +def validateIP(custIP): + """ + Verify IPv4 and CIDR. + + Collect address information into a list that is ready for DB insertion. + """ + verifiedIP = [] + for the_ip in custIP: + if verifyCIDR(the_ip) or verifyIPv4(the_ip): + verifiedIP.append(the_ip) + return verifiedIP + + +def getOrganizations(): + """Get all organization details from Cybersixgill via API.""" + url = "https://api.cybersixgill.com/multi-tenant/organization" + + headers = { + "Content-Type": "application/json", + "Cache-Control": "no-cache", + "Authorization": f"Bearer {getToken()}", + } + + response = requests.get(url, headers=headers).json() + return response + + +def setNewCSGOrg(newOrgName, orgAliases, orgdomainNames, orgIP, orgExecs): + """Set a new stakeholder name at CSG.""" + newOrganization = json.dumps( + { + "name": f"{newOrgName}", + "organization_commercial_category": "customer", + "countries": ["worldwide"], + "industries": ["Government"], + } + ) + url = "https://api.cybersixgill.com/multi-tenant/organization" + + headers = { + "Content-Type": "application/json", + "Cache-Control": "no-cache", + "Authorization": f"Bearer {getToken()}", + } + + response = requests.post(url, headers=headers, data=newOrganization).json() + + newOrgID = response["id"] + + if newOrgID: + LOGGER.info("A new org_id was created: %s", newOrgID) + + setOrganizationUsers(newOrgID) + setOrganizationDetails(newOrgID, orgAliases, orgdomainNames, orgIP, orgExecs) + + return response + + +def setOrganizationUsers(org_id): + """Set CSG user permissions at new stakeholder.""" + role1 = "5d23342df5feaf006a8a8929" + role2 = "5d23342df5feaf006a8a8927" + id_role1 = "610017c216948d7efa077a52" + csg_role_id = "role_id" + csg_user_id = "user_id" + for user in getalluserinfo(): + userrole = user[csg_role_id] + user_id = user[csg_user_id] + + if ( + (userrole == role1) + and (user_id != id_role1) + or userrole == role2 + and user_id != id_role1 + ): + + url = ( + f"https://api.cybersixgill.com/multi-tenant/organization/" + f"{org_id}/user/{user_id}?role_id={userrole}" + ) + + headers = { + "Content-Type": "application/json", + "Cache-Control": "no-cache", + "Authorization": f"Bearer {getToken()}", + } + + response = requests.post(url, headers=headers).json() + LOGGER.info(response) + + +def setOrganizationDetails(org_id, orgAliases, orgDomain, orgIP, orgExecs): + """Set stakeholder details at newly created. + + stakeholder at CSG portal via API. + """ + LOGGER.info("The following is from setting details") + LOGGER.info("The org_id is %s", org_id) + LOGGER.info("The orgAliases is %s", orgAliases) + LOGGER.info("The orgDomain is %s", orgDomain) + LOGGER.info("The orgIP is %s", orgIP) + LOGGER.info("The orgExecs is %s", orgExecs) + newOrganizationDetails = json.dumps( + { + "organization_aliases": {"explicit": orgAliases}, + "domain_names": {"explicit": orgDomain}, + "ip_addresses": {"explicit": orgIP}, + "executives": {"explicit": orgExecs}, + } + ) + url = f"https://api.cybersixgill.com/multi-tenant/" f"organization/{org_id}/assets" + + headers = { + "Content-Type": "application/json", + "Cache-Control": "no-cache", + "Authorization": f"Bearer {getToken()}", + } + + response = requests.put(url, headers=headers, data=newOrganizationDetails).json() + LOGGER.info("The response is %s", response) + + +def getalluserinfo(): + """Get CSG user permission role information from CSG.""" + userInfo = getOrganizations()[1]["assigned_users"] + + return userInfo + + +stakeholder_full_blueprint = Blueprint( + "stakeholder_full", __name__, template_folder="templates/stakeholder_full_UI" +) + + +def getNames(url): + """Retrieve all executive names from URL.""" + doc = nlp(getAbout(url)) + + d = [] + + for ent in doc.ents: + d.append((ent.label_, ent.text)) + + return d + + +def getAbout(url): + """Retrieve stakeholder about page.""" + thepage = requests.get(url).text + + soup = BeautifulSoup(thepage, "lxml") + + body = soup.body.text + + body = body.replace("\n", " ") + body = body.replace("\t", " ") + body = body.replace("\r", " ") + body = body.replace("\xa0", " ") + # body = re.sub(r'[^ws]', '', body) + + return body + + +def theExecs(URL): + """Isolate executive names from information.""" + mytext = getAbout(URL) + + tokens = word_tokenize(mytext) + + thetag = pos_tag(tokens) + + ne_tree = nltk.ne_chunk(thetag) + + for x in ne_tree: + if "PERSON" in x: + print(x) + + regex_pattern = re.compile(r"[@_'’!#\-$%^&*()<>?/\|}{~:]") + + thereturn = getNames(URL) + + executives = [] + + for hy in thereturn: + + # print(hy) + + if ("PERSON" in hy) and (hy[1] not in executives) and (len(hy[1]) < 50): + # executives.append(hy[1]) + # print(hy[1]) + + # if not regex_pattern.search(hy[1]) and len(hy[1].split()) > 1 and not difflib.get_close_matches(hy[1], executives): + if not regex_pattern.search(hy[1]) and len(hy[1].split()) > 1: + person = hy[1].split(" ") + if len(person) <= 1: + # print(person) + executives.append(hy[1]) + # print(f'{hy[0]} {hy[1]}') + # print(executives) + return executives + + +@stakeholder_full_blueprint.route("/stakeholder_full", methods=["GET", "POST"]) +def stakeholder_full(): + """Process form information, instantiate form and render page template.""" + cust = False + custDomainAliases = False + custRootDomain = False + custExecutives = False + + formExternal = InfoFormExternal() + + if formExternal.validate_on_submit(): + LOGGER.info("Got to the submit validate") + cust = formExternal.cust.data + custDomainAliases = formExternal.custDomainAliases.data.split(",") + custRootDomain = formExternal.custRootDomain.data.replace(" ", "").split(",") + # custRootDomainValue = custRootDomain[0] + custExecutives = formExternal.custExecutives.data + formExternal.cust.data = "" + formExternal.custDomainAliases = "" + formExternal.custRootDomain.data = "" + formExternal.custExecutives.data = "" + + allExecutives = list(theExecs(custExecutives)) + + orgs = set_org_to_report_on(cust) + + if orgs.empty: + LOGGER.info("No org found for the given cyhy_id") + flash(f"{cust} is not a valid cyhy_id", "warning") + return redirect(url_for("stakeholder_full.stakeholder_full")) + elif len(orgs) == 1: + try: + # Add roots and enumerate for subs + for i, org in orgs.iterrows(): + insert_roots(org, custRootDomain) + LOGGER.info( + "root domains have been successfully added to the database" + ) + roots = query_roots(org["organizations_uid"]) + for j, root in roots.iterrows(): + enumerate_and_save_subs( + root["root_domain_uid"], root["root_domain"] + ) + LOGGER.info( + "subdomains have been successfully added to the database" + ) + + # Fill the cidrs + fill_cidrs(orgs) + LOGGER.info("Filled all cidrs") + + # Fill IPs table by enumerating CIDRs (all orgs) + # fill_ips_from_cidrs() + + # Connect to subs from IPs table (only new orgs) + # connect_subs_from_ips(orgs) + # LOGGER.info("Filled and linked all IPs") + + # Connect to IPs from subs table (only new orgs) + connect_ips_from_subs(orgs) + + allValidIP = get_cidrs_and_ips(orgs["organizations_uid"].iloc[0]) + + setNewCSGOrg( + cust, + custDomainAliases, + custRootDomain, + allValidIP, + allExecutives, + ) + + # Run pe_dedupe + LOGGER.info("Running dedupe:") + dedupe(orgs) + LOGGER.info("done") + except ValueError as e: + LOGGER.error(f"An error occured: {e}") + flash(f"An error occured: {e}", "warning") + return redirect(url_for("stakeholder_full.stakeholder_full")) + else: + flash( + "multiple orgs were found for the provided cyhy_id, this should not be possible", + "danger", + ) + + return render_template( + "home_stakeholder_full.html", + formExternal=formExternal, + cust=cust, + custRootDomain=custRootDomain, + custExecutives=custExecutives, + custDomainAliases=custDomainAliases, + ) + + +@stakeholder_full_blueprint.route("/link_IPs", methods=["GET", "POST"]) +def link_IPs(): + """Run link IPs script on all orgs that are set to report_on.""" + orgs = get_orgs_df() + report_orgs = orgs[orgs["report_on"] is True] + connect_subs_from_ips(report_orgs) + LOGGER.info("Filled and linked all IPs") + return "nothing" + + +@stakeholder_full_blueprint.route("/fill_IPs", methods=["GET", "POST"]) +def fill_IPs(): + """Run link IPs script on all orgs that are set to report_on.""" + logging.info("Filling IPS") + fill_ips_from_cidrs() + logging.info("Done Filling IPS") + return "nothing" diff --git a/src/pe_reports/stakeholder_lite/__init__.py b/src/pe_reports/stakeholder_lite/__init__.py new file mode 100644 index 00000000..ababb128 --- /dev/null +++ b/src/pe_reports/stakeholder_lite/__init__.py @@ -0,0 +1 @@ +"""Init file for module implementation.""" diff --git a/src/pe_reports/stakeholder_lite/forms.py b/src/pe_reports/stakeholder_lite/forms.py new file mode 100644 index 00000000..26f7a30b --- /dev/null +++ b/src/pe_reports/stakeholder_lite/forms.py @@ -0,0 +1,31 @@ +"""Create the stakeholder data input form.""" + +# Third-Party Libraries +from flask_wtf import FlaskForm +from wtforms import StringField, SubmitField +from wtforms.validators import DataRequired + + +class InfoFormExternal(FlaskForm): + """Create web form to take user input on organization information/details.""" + + orgCount = StringField("How many organizations?", validators=[DataRequired()]) + # TODO: The following form field may be used in a future update. Issue #208 + # custIP = StringField( + # "What is the stakeholder ip/cidr? *comma separate entries", + # validators=[DataRequired()], + # ) + # custRootDomain = StringField( + # "What is the root domain for this stakeholder lite? " "*comma separate entries" + # ) + # custDomainAliases = StringField( + # "What are the organization aliases? " "*comma separate entries" + # ) + # # TODO: The following form field may be used in a future update. Issue #208 + # # custSubDomain = StringField( + # # "What is the sub-domain for this stakeholder?" " *comma separate entries" + # # ) + # custExecutives = StringField( + # "Who are the executives for this stakeholder? " "*comma separate entries" + # ) + submit = SubmitField("Submit", render_kw={"onclick": "loading()"}) diff --git a/src/pe_reports/stakeholder_lite/templates/stakeholder_lite_UI/home_stakeholder_lite.html b/src/pe_reports/stakeholder_lite/templates/stakeholder_lite_UI/home_stakeholder_lite.html new file mode 100644 index 00000000..7dfe7415 --- /dev/null +++ b/src/pe_reports/stakeholder_lite/templates/stakeholder_lite_UI/home_stakeholder_lite.html @@ -0,0 +1,68 @@ +{% extends "base.html" %} {% block content %} + +
+ CISA seal image +
+
+ + + +
+
+ +
+
+
+
+ Add external agency +
+
+
+ {{ formExternal.hidden_tag() }} + {{formExternal.orgCount.label(style='font-size: 24px') }} + {{formExternal.orgCount(class='form-control') }} +
+ {{ formExternal.submit(class='btn btn-primary') }} +
+
+
+
+
+
+ + + +{% endblock %} diff --git a/src/pe_reports/stakeholder_lite/views.py b/src/pe_reports/stakeholder_lite/views.py new file mode 100644 index 00000000..462821e8 --- /dev/null +++ b/src/pe_reports/stakeholder_lite/views.py @@ -0,0 +1,88 @@ +"""Classes and associated functions that render the UI app pages.""" + +# Standard Python Libraries +from datetime import date +import logging +import os +import time + +# Third-Party Libraries +from flask import Blueprint, redirect, render_template, url_for + +# cisagov Libraries +from adhoc.fill_cidrs_from_cyhy_assets import fill_cidrs +from adhoc.fill_ips_from_cidrs import fill_ips_from_cidrs +from adhoc.link_subs_and_ips_from_ips import connect_subs_from_ips +from adhoc.link_subs_and_ips_from_subs import connect_ips_from_subs +from adhoc.shodan_dedupe import dedupe +from pe_reports.data.db_query import get_new_orgs +from pe_reports.stakeholder_lite.forms import InfoFormExternal + +LOGGER = logging.getLogger(__name__) + +# CSG credentials +API_Client_ID = os.getenv("CSGUSER") +API_Client_secret = os.environ.get("CSGSECRET") +API_WHOIS = os.environ.get("WHOIS_VAR") + +conn = None +cursor = None +thedateToday = date.today().strftime("%Y-%m-%d") + + +def getAgenciesByCount(orgCount): + """Get all agency names from P&E database.""" + all_orgs_df = get_new_orgs() + LOGGER.info(all_orgs_df) + new_orgs_df = all_orgs_df.sample(n=orgCount) + # TODO: Update new orgs to report_on + # TODO: Maybe add assets_collected column so we don't need to collect cidrs everytime + LOGGER.info(new_orgs_df) + return new_orgs_df + + +stakeholder_lite_blueprint = Blueprint( + "stakeholder_lite", __name__, template_folder="templates/stakeholder_lite_UI" +) + + +@stakeholder_lite_blueprint.route("/stakeholder_lite", methods=["GET", "POST"]) +def stakeholder_lite(): + """Process form information, instantiate form and render page template.""" + orgCount = False + + formExternal = InfoFormExternal() + + if formExternal.validate_on_submit(): + start_time = time.time() + + LOGGER.info("Got to the submit validate") + orgCount = int(formExternal.orgCount.data.upper()) + orgs = getAgenciesByCount(orgCount) + + # Fill cidrs table (only new orgs) + fill_cidrs(orgs) + LOGGER.info("Filled all cidrs") + + # Fill IPs table by enumerating CIDRs (all orgs) + fill_ips_from_cidrs() + + # Connect to subs from IPs table (only new orgs) + connect_subs_from_ips(orgs) + LOGGER.info("Filled all IPs") + + # Connect to IPs from subs table (only new orgs) + connect_ips_from_subs(orgs) + + # Run pe_dedupe + LOGGER.info("Running dedupe:") + dedupe(orgs) + + LOGGER.info("--- %s seconds ---" % (time.time() - start_time)) + + return redirect(url_for("stakeholder_lite.stakeholder_lite")) + return render_template( + "home_stakeholder_lite.html", + formExternal=formExternal, + orgCount=orgCount, + ) diff --git a/src/pe_reports/templates/base.html b/src/pe_reports/templates/base.html index 0bfc19a7..d4377a0d 100644 --- a/src/pe_reports/templates/base.html +++ b/src/pe_reports/templates/base.html @@ -16,13 +16,15 @@ href="{{ url_for('static', filename='css/custom.css') }}" /> + - + +
-
-

Welcome to P&E Reports

- - -
-
+
{% with messages = get_flashed_messages(with_categories=true) %} {% if messages %} {% for category, message in messages %} {% endfor %} {% endif %} {% endwith %} {%block content%} {% endblock%} +