From 71492f1fb83727868e64cdaad0c5b5caea76b5ab Mon Sep 17 00:00:00 2001 From: Sean Preston Date: Fri, 3 Jun 2022 08:20:35 -0400 Subject: [PATCH] [#557] MSSQL discovery script (#581) * adds script to discover mssql datastore compatibility * make prints consistent * updates changelog * add URL template * add warning * store columns correctly * move uncomitted secrets to another file to add to .gitignore * remove empty secrets file, add to gitignore * add comment explaining lack of secrets --- .gitignore | 5 ++- CHANGELOG.md | 3 ++ scripts/mssql_discover.py | 79 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 scripts/mssql_discover.py diff --git a/.gitignore b/.gitignore index 19ec29a7d..c2d8e5c1e 100644 --- a/.gitignore +++ b/.gitignore @@ -138,4 +138,7 @@ envfiles/ fides_uploads # Prevent SaaS configs from being committed -saas_config.toml \ No newline at end of file +saas_config.toml + +# Script secrets +scripts/secrets.py \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e9ae4ea4..42398565d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,9 @@ The types of changes are: * Use the `RuleResponse` schema within the `PrivacyRequestReposnse` schema [#580](https://github.com/ethyca/fidesops/pull/580) * Updated the webserver to use `PORT` config variable from the `fidesops.toml` file [#586](https://github.com/ethyca/fidesops/pull/586) +### Developer Experience +* Adds a script for MSSQL schema exploration [#557](https://github.com/ethyca/fidesops/pull/581) + ## [1.5.1](https://github.com/ethyca/fidesops/compare/1.5.0...1.5.1) - 2022-05-27 ### Added diff --git a/scripts/mssql_discover.py b/scripts/mssql_discover.py new file mode 100644 index 000000000..1ac047d61 --- /dev/null +++ b/scripts/mssql_discover.py @@ -0,0 +1,79 @@ +import sqlalchemy + +# This file is not committed to the repo, please create secrets.py with the required +# variables in the same dir as this file before running this script +from secrets import ( + USER, + PASS, + IP, + PORT, + DB, +) + +MASTER_MSSQL_URL = f"mssql+pyodbc://{USER}:{PASS}@{IP}:{PORT}/{DB}?driver=ODBC+Driver+17+for+SQL+Server" + + +SUPPORTED_DATA_TYPES = set( + [ + # char types + "varchar", + "nvarchar", + "char", + "nchar", + "ntext", + "text", + # numeric types + "int", + "bigint", + "smallint", + "tinyint", + "money", + "float", + "decimal", + # date types + "date", + "datetime", + "datetime2", + "smalldatetime", + # other types + "bit", + ] +) + + +def mssql_discover(): + """ + Select all databases from the instance + Select the schema data for each data base + Check if there are any fields in the schema that Fidesops does not yet support + """ + engine = sqlalchemy.create_engine(MASTER_MSSQL_URL) + all_dbs = engine.execute("SELECT name FROM sys.databases;").all() + all_columns = [] + flagged_columns = [] + flagged_datatypes = set() + for db_name in all_dbs: + db_name = db_name[0] + try: + columns = engine.execute( + f"SELECT TABLE_NAME, COLUMN_NAME, DATA_TYPE FROM {db_name}.INFORMATION_SCHEMA.COLUMNS;" + ).all() + except Exception: + continue + + all_columns.extend(columns) + for table, column, data_type in columns: + if data_type not in SUPPORTED_DATA_TYPES: + flagged_datatypes.add(data_type) + flagged_columns.append(f"{db_name}.{table}.{column}: {data_type}") + + print(f"{len(all_columns)} columns found") + print(f"{len(flagged_columns)} columns flagged") + print(f"Flagged datatypes:") + print(",\n".join(flagged_datatypes)) + print(f"Flagged columns:") + print(",\n".join(flagged_columns)) + + +if __name__ == "__main__": + mssql_discover()