Skip to content

Commit

Permalink
Add schema collection to Postgres integration (#15484) (#15866)
Browse files Browse the repository at this point in the history
  • Loading branch information
jmeunier28 authored Sep 19, 2023
1 parent 454fbde commit d25a65e
Show file tree
Hide file tree
Showing 14 changed files with 640 additions and 18 deletions.
4 changes: 4 additions & 0 deletions postgres/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@

* Attempt to connect to the database and fail fast before trying to establish a connection pool ([#15839](https://github.com/DataDog/integrations-core/pull/15839))

***Added***:

* Add schema collection to Postgres integration (#15484) ([#15866](https://github.com/DataDog/integrations-core/pull/15866))

***Fixed***:

* Revert psycopg3 upgrade ([#15859](https://github.com/DataDog/integrations-core/pull/15859))
Expand Down
34 changes: 34 additions & 0 deletions postgres/assets/configuration/spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,40 @@ files:
type: number
example: 600

- name: collect_schemas
description: |
Enable collection of database schemas. In order to collect schemas from all user databases,
enable `database_autodiscovery`. To collect from a single database, set `dbname` to collect
the schema for that database.
Relation metrics must be enabled for schema collection.
options:
- name: enabled
description: |
Enable collection of database schemas. Requires `dbm: true` and relation metrics must be enabled.
value:
type: boolean
example: false
- name: max_tables
description: |
Maximum amount of tables the Agent collects from the instance.
value:
type: number
example: 1000
display_default: 1000
- name: max_columns
description: |
Maximum amount of columns the Agent collects per table.
value:
type: number
example: 50
display_default: 50
- name: collection_interval
description: |
The database schema collection interval (in seconds).
value:
type: number
example: 600

- name: aws
description: |
This block defines the configuration for AWS RDS and Aurora instances.
Expand Down
6 changes: 6 additions & 0 deletions postgres/datadog_checks/postgres/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,12 @@ def __init__(self, instance):
self.pg_stat_activity_view = instance.get('pg_stat_activity_view', 'pg_stat_activity')
self.statement_samples_config = instance.get('query_samples', instance.get('statement_samples', {})) or {}
self.settings_metadata_config = instance.get('collect_settings', {}) or {}
self.schemas_metadata_config = instance.get('collect_schemas', {"enabled": False})
if not self.relations and self.schemas_metadata_config['enabled']:
raise ConfigurationError(
'In order to collect schemas on this database, you must enable relation metrics collection.'
)

self.resources_metadata_config = instance.get('collect_resources', {}) or {}
self.statement_activity_config = instance.get('query_activity', {}) or {}
self.statement_metrics_config = instance.get('query_metrics', {}) or {}
Expand Down
12 changes: 12 additions & 0 deletions postgres/datadog_checks/postgres/config_models/instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,17 @@ class Azure(BaseModel):
fully_qualified_domain_name: Optional[str] = None


class CollectSchemas(BaseModel):
model_config = ConfigDict(
arbitrary_types_allowed=True,
frozen=True,
)
collection_interval: Optional[float] = None
enabled: Optional[bool] = None
max_columns: Optional[float] = None
max_tables: Optional[float] = None


class CollectSettings(BaseModel):
model_config = ConfigDict(
arbitrary_types_allowed=True,
Expand Down Expand Up @@ -163,6 +174,7 @@ class InstanceConfig(BaseModel):
collect_database_size_metrics: Optional[bool] = None
collect_default_database: Optional[bool] = None
collect_function_metrics: Optional[bool] = None
collect_schemas: Optional[CollectSchemas] = None
collect_settings: Optional[CollectSettings] = None
collect_wal_metrics: Optional[bool] = None
custom_queries: Optional[tuple[MappingProxyType[str, Any], ...]] = None
Expand Down
27 changes: 27 additions & 0 deletions postgres/datadog_checks/postgres/data/conf.yaml.example
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,33 @@ instances:
#
# collection_interval: 600

## Enable collection of database schemas. In order to collect schemas from all user databases,
## enable `database_autodiscovery`. To collect from a single database, set `dbname` to collect
## the schema for that database.
## Relation metrics must be enabled for schema collection.
#
# collect_schemas:

## @param enabled - boolean - optional - default: false
## Enable collection of database schemas. Requires `dbm: true` and relation metrics must be enabled.
#
# enabled: false

## @param max_tables - number - optional - default: 1000
## Maximum amount of tables the Agent collects from the instance.
#
# max_tables: 1000

## @param max_columns - number - optional - default: 50
## Maximum amount of columns the Agent collects per table.
#
# max_columns: 50

## @param collection_interval - number - optional - default: 600
## The database schema collection interval (in seconds).
#
# collection_interval: 600

## This block defines the configuration for AWS RDS and Aurora instances.
##
## Complete this section if you have installed the Datadog AWS Integration
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# Licensed under a 3-clause BSD style license (see LICENSE)

import logging
import re

import psycopg2

Expand Down Expand Up @@ -169,3 +170,11 @@ def _execute_query_and_fetch_rows(self, dbname, query):
with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor:
cursor.execute(query)
return cursor.fetchall()

def _is_parameterized_query(self, statement: str) -> bool:
# Use regex to match $1 to determine if a query is parameterized
# BUT single quoted string '$1' should not be considered as a parameter
# e.g. SELECT * FROM products WHERE id = $1; -- $1 is a parameter
# e.g. SELECT * FROM products WHERE id = '$1'; -- '$1' is not a parameter
parameterized_query_pattern = r"(?<!')\$(?!'\$')[\d]+(?!')"
return re.search(parameterized_query_pattern, statement) is not None
Loading

0 comments on commit d25a65e

Please sign in to comment.