diff --git a/mysql/assets/configuration/spec.yaml b/mysql/assets/configuration/spec.yaml index 2147e707ca7fa..07d863ddbe442 100644 --- a/mysql/assets/configuration/spec.yaml +++ b/mysql/assets/configuration/spec.yaml @@ -418,6 +418,15 @@ files: Configure how the SQL obfuscator behaves. Note: This option only applies when `dbm` is enabled. options: + - name: disable_sql_obfuscation + hidden: true + description: | + Set to `true` to disable SQL obfuscation for your statements and explain plans. + WARNING: This could result in sending sensitive data to Datadog. Do not enable this if SQL queries + to this database contain sensitive data in their parameters. + value: + type: boolean + example: false - name: replace_digits description: | Set to `true` to replace digits in identifiers and table names with question marks in your SQL statements. diff --git a/mysql/datadog_checks/mysql/config.py b/mysql/datadog_checks/mysql/config.py index f8a7a94883764..35512daf66430 100644 --- a/mysql/datadog_checks/mysql/config.py +++ b/mysql/datadog_checks/mysql/config.py @@ -45,6 +45,7 @@ def __init__(self, instance): # Valid values for this can be found at # https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/semantic_conventions/database.md#connection-level-attributes 'dbms': 'mysql', + 'disable_sql_obfuscation': obfuscator_options_config.get('disable_sql_obfuscation', False), 'replace_digits': obfuscator_options_config.get( 'replace_digits', obfuscator_options_config.get('quantize_sql_tables', False) ), diff --git a/mysql/datadog_checks/mysql/config_models/instance.py b/mysql/datadog_checks/mysql/config_models/instance.py index 83508ab650d26..8cc3ca679d63f 100644 --- a/mysql/datadog_checks/mysql/config_models/instance.py +++ b/mysql/datadog_checks/mysql/config_models/instance.py @@ -1,4 +1,4 @@ -# (C) Datadog, Inc. 2021-present +# (C) Datadog, Inc. 2022-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) @@ -36,6 +36,7 @@ class Config: collect_comments: Optional[bool] collect_metadata: Optional[bool] collect_tables: Optional[bool] + disable_sql_obfuscation: Optional[bool] replace_digits: Optional[bool] diff --git a/mysql/datadog_checks/mysql/statement_samples.py b/mysql/datadog_checks/mysql/statement_samples.py index a87ead9e20094..65bd8fe3ff97a 100644 --- a/mysql/datadog_checks/mysql/statement_samples.py +++ b/mysql/datadog_checks/mysql/statement_samples.py @@ -542,6 +542,7 @@ def _collect_plan_for_statement(self, row): plan_signature = compute_exec_plan_signature(normalized_plan) query_plan_cache_key = (query_cache_key, plan_signature) + disable_sql_obfuscation = is_affirmative(self._config.obfuscator_options.get('disable_sql_obfuscation', False)) if self._seen_samples_ratelimiter.acquire(query_plan_cache_key): return { "timestamp": row["timer_end_time_s"] * 1000, @@ -559,12 +560,14 @@ def _collect_plan_for_statement(self, row): "instance": row['current_schema'], "plan": { "definition": obfuscated_plan, + "definition_raw": plan if disable_sql_obfuscation else None, "signature": plan_signature, "collection_errors": collection_errors if collection_errors else None, }, "query_signature": query_signature, "resource_hash": apm_resource_hash, "statement": obfuscated_statement, + "statement_raw": row['sql_text'] if disable_sql_obfuscation else None, "metadata": { "tables": statement['metadata'].get('tables', None), "commands": statement['metadata'].get('commands', None), diff --git a/mysql/tests/test_statements.py b/mysql/tests/test_statements.py index 648dde29f5213..ea0d794343aaa 100644 --- a/mysql/tests/test_statements.py +++ b/mysql/tests/test_statements.py @@ -446,6 +446,64 @@ def run_query(q): assert metric['dd_commands'] == expected_metadata_payload['commands'] +@pytest.mark.e2e +@pytest.mark.parametrize( + "query,normalized_query,query_signature,disable_sql_obfuscation", + [ + ( + "SELECT table_schema FROM information_schema.tables WHERE table_schema = 'processlist'", + 'SELECT table_schema FROM information_schema.tables WHERE table_schema = ?', + 'dc18df8a6966a866', + True, + ), + ( + "SELECT table_schema FROM information_schema.tables WHERE table_schema = 'processlist'", + 'SELECT table_schema FROM information_schema.tables WHERE table_schema = ?', + 'dc18df8a6966a866', + False, + ), + ( + "SELECT table_catalog FROM information_schema.columns " + "WHERE ordinal_position != 5 AND table_name LIKE '%_list'", + 'SELECT table_catalog FROM information_schema.columns WHERE ordinal_position != ? AND table_name LIKE ?', + 'd75672438d2bdc11', + True, + ), + ], +) +def test_deobfuscated_statement( + dd_agent_check, + dbm_instance, + bob_conn, + query, + normalized_query, + query_signature, + disable_sql_obfuscation, +): + dbm_instance['obfuscator_options'] = {'disable_sql_obfuscation': disable_sql_obfuscation} + + with closing(bob_conn.cursor()) as cursor: + cursor.execute(query) + + aggregator = dd_agent_check(dbm_instance) + + samples = aggregator.get_event_platform_events("dbm-samples") + matching = [s for s in samples if s['db']['query_signature'] == query_signature] + assert len(matching) == 1 + + sample = matching[0] + if disable_sql_obfuscation: + assert sample['db']['statement'] == normalized_query + assert sample['db']['plan']['definition'] is not None + assert sample['db']['statement_raw'] == query + assert sample['db']['plan']['definition_raw'] is not None + else: + assert sample['db']['statement'] == normalized_query + assert sample['db']['plan']['definition'] is not None + assert sample['db']['statement_raw'] is None + assert sample['db']['plan']['definition_raw'] is None + + @pytest.mark.integration @pytest.mark.usefixtures('dd_environment') @pytest.mark.parametrize(