From c1e6be1839cc134849fb72cfa024d93f38b4705f Mon Sep 17 00:00:00 2001 From: Eden Date: Fri, 16 Jun 2023 18:48:38 +0000 Subject: [PATCH 01/86] Add limited pool + LRU cache to MultiDatabaseConnectionPool --- .../datadog_checks/postgres/connections.py | 102 +++++++++-- postgres/tests/test_connections.py | 168 +++++++++++++++--- 2 files changed, 231 insertions(+), 39 deletions(-) diff --git a/postgres/datadog_checks/postgres/connections.py b/postgres/datadog_checks/postgres/connections.py index 738787dee5bc2..03391e27ec72f 100644 --- a/postgres/datadog_checks/postgres/connections.py +++ b/postgres/datadog_checks/postgres/connections.py @@ -4,11 +4,26 @@ import datetime import inspect import threading +from typing import Callable, Dict from collections import namedtuple import psycopg2 -ConnectionWithTTL = namedtuple("ConnectionWithTTL", "connection deadline") + +class ConnectionInfo: + def __init__( + self, + connection: psycopg2.extensions.connection, + deadline: int, + active: bool, + last_accessed: int, + thread: threading.Thread, + ): + self.connection = connection + self.deadline = deadline + self.active = active + self.last_accessed = last_accessed + self.thread = thread class MultiDatabaseConnectionPool(object): @@ -22,6 +37,11 @@ class MultiDatabaseConnectionPool(object): databases still present a connection overhead risk. This class provides a mechanism to prune connections to a database which were not used in the time specified by their TTL. + + If max_conns is specified, the connection pool will limit concurrent connections. + Connection eviction should be handled by the calling code. + If the connection pool is full, try to evict a connection with `evict_lru` until `get_connection` + successfully returns a connection. """ class Stats(object): @@ -37,26 +57,37 @@ def __repr__(self): def reset(self): self.__init__() - def __init__(self, connect_fn): + def __init__(self, connect_fn: Callable[[str], None], max_conns: int = None): + self.max_conns: int = max_conns self._stats = self.Stats() - self._mu = threading.Lock() - self._conns = {} + self._mu = threading.RLock() + self._conns: Dict[str, ConnectionInfo] = {} - if hasattr(inspect, 'signature'): + if hasattr(inspect, "signature"): connect_sig = inspect.signature(connect_fn) if len(connect_sig.parameters) != 1: raise ValueError( "Invalid signature for the connection function. " - "A single parameter for dbname is expected, got signature: {}".format(connect_sig) + "A single parameter for dbname is expected, got signature: {}".format( + connect_sig + ) ) self.connect_fn = connect_fn - def get_connection(self, dbname, ttl_ms): + def get_connection(self, dbname: str, ttl_ms: int): + """ + Grab a connection from the pool if the database is already connected. + If max_conns is specified, and the database isn't already connected, + make a new connection IFF the max_conn limit hasn't been reached. + If we can't fit the connection into the pool, return None. + """ self.prune_connections() with self._mu: - conn = self._conns.pop(dbname, ConnectionWithTTL(None, None)) + conn = self._conns.pop(dbname, ConnectionInfo(None, None, None, None, None)) db = conn.connection if db is None or db.closed: + if self.max_conns is not None and len(self._conns) == self.max_conns: + return None self._stats.connection_opened += 1 db = self.connect_fn(dbname) @@ -65,7 +96,13 @@ def get_connection(self, dbname, ttl_ms): db.rollback() deadline = datetime.datetime.now() + datetime.timedelta(milliseconds=ttl_ms) - self._conns[dbname] = ConnectionWithTTL(db, deadline) + self._conns[dbname] = ConnectionInfo( + connection=db, + deadline=deadline, + active=True, + last_accessed=datetime.datetime.now(), + thread=threading.current_thread(), + ) return db def prune_connections(self): @@ -92,8 +129,51 @@ def close_all_connections(self): success = False return success - def _terminate_connection_unsafe(self, dbname): - db, _ = self._conns.pop(dbname, ConnectionWithTTL(None, None)) + def done(self, dbname: str) -> None: + """ + Mark a connection as done being used, so it can be evicted from the pool. + done() can only be called on a connection in the same thread that the connection + was made. + """ + with self._mu: + if self._conns[dbname].thread != threading.current_thread(): + raise RuntimeError( + "Cannot call done() for this dbname on this thread. Done() can only be called \ + from the same thread the connection was made." + ) + + self._conns[dbname].active = False + + def evict_lru(self) -> str: + """ + Evict and close the inactive connection which was least recently used. + Return the dbname connection that was evicted. + """ + with self._mu: + conns_list = dict(self._conns) + while True: + if not conns_list: + break + + eviction_candidate = self._get_lru(conns_list) + if self._conns[eviction_candidate].active: + del conns_list[eviction_candidate] + continue + + # eviction candidate successfully found + self._terminate_connection_unsafe(eviction_candidate) + return eviction_candidate + + # Could not evict a candidate; return None, calling code should keep trying. + return None + + def _get_lru(self, connections: Dict[str, ConnectionInfo]) -> str: + return min(connections, key=lambda t: self._conns[t].last_accessed) + + def _terminate_connection_unsafe(self, dbname: str): + db = self._conns.pop( + dbname, ConnectionInfo(None, None, None, None, None) + ).connection if db is not None: try: self._stats.connection_closed += 1 diff --git a/postgres/tests/test_connections.py b/postgres/tests/test_connections.py index 50e611dca1046..e6c59ec2f6730 100644 --- a/postgres/tests/test_connections.py +++ b/postgres/tests/test_connections.py @@ -8,6 +8,7 @@ import psycopg2 import pytest +import threading from datadog_checks.postgres import PostgreSql from datadog_checks.postgres.connections import MultiDatabaseConnectionPool @@ -16,16 +17,16 @@ @pytest.mark.integration -@pytest.mark.usefixtures('dd_environment') +@pytest.mark.usefixtures("dd_environment") def test_conn_pool(pg_instance): """ Test simple case of creating a connection pool, pruning a stale connection, and closing all connections. """ - check = PostgreSql('postgres', {}, [pg_instance]) + check = PostgreSql("postgres", {}, [pg_instance]) pool = MultiDatabaseConnectionPool(check._new_connection) - db = pool.get_connection('postgres', 1) + db = pool.get_connection("postgres", 1) assert pool._stats.connection_opened == 1 pool.prune_connections() assert len(pool._conns) == 1 @@ -43,7 +44,7 @@ def test_conn_pool(pg_instance): assert pool._stats.connection_closed_failed == 0 assert pool._stats.connection_pruned == 1 - db = pool.get_connection('postgres', 999 * 1000) + db = pool.get_connection("postgres", 999 * 1000) assert len(pool._conns) == 1 assert pool._stats.connection_opened == 2 success = pool.close_all_connections() @@ -55,19 +56,21 @@ def test_conn_pool(pg_instance): @pytest.mark.integration -@pytest.mark.usefixtures('dd_environment') +@pytest.mark.usefixtures("dd_environment") def test_conn_pool_no_leaks_on_close(pg_instance): """ Test a simple case of opening and closing many connections. There should be no leaked connections on the server. """ unique_id = str(uuid.uuid4()) # Used to isolate this test from others on the DB - check = PostgreSql('postgres', {}, [pg_instance]) + check = PostgreSql("postgres", {}, [pg_instance]) check._config.application_name = unique_id # Used to make verification queries pool2 = MultiDatabaseConnectionPool( - lambda dbname: psycopg2.connect(host=HOST, dbname=dbname, user=USER_ADMIN, password=PASSWORD_ADMIN) + lambda dbname: psycopg2.connect( + host=HOST, dbname=dbname, user=USER_ADMIN, password=PASSWORD_ADMIN + ) ) # Iterate in the test many times to detect flakiness @@ -78,7 +81,9 @@ def get_activity(): """ Fetches all pg_stat_activity rows generated by this test and connection to a "dogs%" database """ - with pool2.get_connection('postgres', 1).cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor: + with pool2.get_connection("postgres", 1).cursor( + cursor_factory=psycopg2.extras.DictCursor + ) as cursor: cursor.execute( "SELECT pid, datname, usename, state, query_start, state_change, application_name" " FROM pg_stat_activity" @@ -89,7 +94,7 @@ def get_activity(): conn_count = 100 for i in range(0, conn_count): - dbname = 'dogs_{}'.format(i) + dbname = "dogs_{}".format(i) db = pool.get_connection(dbname, 10 * 1000) with db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor: cursor.execute("select current_database()") @@ -113,12 +118,14 @@ def get_activity(): if len(rows) == 0: break - assert attempts >= 0, "Connections leaked! Leaked rows found:\n{}".format(pprint.pformat(rows)) + assert attempts >= 0, "Connections leaked! Leaked rows found:\n{}".format( + pprint.pformat(rows) + ) time.sleep(1) @pytest.mark.integration -@pytest.mark.usefixtures('dd_environment') +@pytest.mark.usefixtures("dd_environment") def test_conn_pool_no_leaks_on_prune(pg_instance): """ Test a scenario where many connections are created. These connections should be open on the database @@ -126,13 +133,15 @@ def test_conn_pool_no_leaks_on_prune(pg_instance): """ unique_id = str(uuid.uuid4()) # Used to isolate this test from others on the DB - check = PostgreSql('postgres', {}, [pg_instance]) + check = PostgreSql("postgres", {}, [pg_instance]) check._config.application_name = unique_id pool = MultiDatabaseConnectionPool(check._new_connection) # Used to make verification queries pool2 = MultiDatabaseConnectionPool( - lambda dbname: psycopg2.connect(host=HOST, dbname=dbname, user=USER_ADMIN, password=PASSWORD_ADMIN) + lambda dbname: psycopg2.connect( + host=HOST, dbname=dbname, user=USER_ADMIN, password=PASSWORD_ADMIN + ) ) ttl_long = 90 * 1000 ttl_short = 1 @@ -141,7 +150,9 @@ def get_activity(): """ Fetches all pg_stat_activity rows generated by this test and connection to a "dogs%" database """ - with pool2.get_connection('postgres', 1).cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor: + with pool2.get_connection("postgres", 1).cursor( + cursor_factory=psycopg2.extras.DictCursor + ) as cursor: cursor.execute( "SELECT pid, datname, usename, state, query_start, state_change, application_name" " FROM pg_stat_activity" @@ -155,7 +166,7 @@ def get_many_connections(count, ttl): Retrieves the number of connections from the pool with the specified TTL """ for i in range(0, count): - dbname = 'dogs_{}'.format(i) + dbname = "dogs_{}".format(i) db = pool.get_connection(dbname, ttl) with db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor: cursor.execute("select current_database()") @@ -173,9 +184,13 @@ def get_many_connections(count, ttl): assert pool._stats.connection_opened == 50 # Ensure those connections have the correct deadline and connection status for i in range(0, 50): - dbname = 'dogs_{}'.format(i) - db, deadline = pool._conns[dbname] - approximate_deadline = datetime.datetime.now() + datetime.timedelta(milliseconds=ttl_long) + dbname = "dogs_{}".format(i) + conn_info = pool._conns[dbname] + db = conn_info.connection + deadline = conn_info.deadline + approximate_deadline = datetime.datetime.now() + datetime.timedelta( + milliseconds=ttl_long + ) assert ( approximate_deadline - datetime.timedelta(seconds=1) < deadline @@ -186,8 +201,8 @@ def get_many_connections(count, ttl): # Check that those pooled connections do exist on the database rows = get_activity() assert len(rows) == 50 - assert len({row['datname'] for row in rows}) == 50 - assert all(row['state'] == 'idle' for row in rows) + assert len({row["datname"] for row in rows}) == 50 + assert all(row["state"] == "idle" for row in rows) pool._stats.reset() @@ -201,19 +216,22 @@ def get_many_connections(count, ttl): # The test can be considered successful as long as the backend is eventually terminated. for attempt in range(attempts_to_verify): rows = get_activity() - server_pids = {row['pid'] for row in rows} - conn_pids = {db.info.backend_pid for db, _ in pool._conns.values()} - leaked_rows = [row for row in rows if row['pid'] in server_pids - conn_pids] + server_pids = {row["pid"] for row in rows} + conns = [c.connection for c in pool._conns.values()] + conn_pids = {db.info.backend_pid for db in conns} + leaked_rows = [row for row in rows if row["pid"] in server_pids - conn_pids] if not leaked_rows: break if attempt < attempts_to_verify - 1: time.sleep(1) continue - assert len(leaked_rows) == 0, 'Found leaked rows on the server not in the connection pool' + assert ( + len(leaked_rows) == 0 + ), "Found leaked rows on the server not in the connection pool" - assert len({row['datname'] for row in rows}) == 51 - assert len(rows) == 51, 'Possible leaked connections' - assert all(row['state'] == 'idle' for row in rows) + assert len({row["datname"] for row in rows}) == 51 + assert len(rows) == 51, "Possible leaked connections" + assert all(row["state"] == "idle" for row in rows) assert pool._stats.connection_opened == 1 assert pool._stats.connection_closed == 0 @@ -234,8 +252,102 @@ def get_many_connections(count, ttl): if attempt < attempts_to_verify - 1: time.sleep(1) continue - assert len(leaked_rows) == 0, 'Found leaked rows remaining after TTL was updated to short TTL' + assert ( + len(leaked_rows) == 0 + ), "Found leaked rows remaining after TTL was updated to short TTL" # Final check that the server contains no leaked connections still open rows = get_activity() assert len(rows) == 0 + + +@pytest.mark.integration +@pytest.mark.usefixtures("dd_environment") +def test_conn_pool_hit_pool_limit_and_evict(pg_instance): + """ + Test creating a limited connection pool and adding connections + until the pool is filled. Then, set one to Done and try to evict it. + """ + limit = 5 + check = PostgreSql("postgres", {}, [pg_instance]) + + pool = MultiDatabaseConnectionPool(check._new_connection, limit) + for i in range(limit): + pool.get_connection("dogs_{}".format(i), 60000) + + assert pool._stats.connection_opened == limit + + # ask for one more connection + db = pool.get_connection("dogs_10", 60000) + assert db == None + + # try to evict; none were marked inactive + evicted = pool.evict_lru() + assert evicted == None + assert pool._stats.connection_closed == 0 + + expected_evicted = "dogs_3" + pool.done(expected_evicted) + evicted = pool.evict_lru() + assert evicted == expected_evicted + assert pool._stats.connection_closed == 1 + + # ask for another connection again + db = pool.get_connection("dogs_50", 60000) + assert db != None + + +@pytest.mark.integration +@pytest.mark.usefixtures("dd_environment") +def test_conn_pool_multithreaded(pg_instance): + """ + Test creating a limited connection pool that is shared among several threads. + """ + + def pretend_to_run_query(pool, dbname): + pool.get_connection(dbname, 10000) + time.sleep(5) + pool.done(dbname) + print("Connection {} done ".format(dbname)) + + limit = 30 + check = PostgreSql("postgres", {}, [pg_instance]) + + pool = MultiDatabaseConnectionPool(check._new_connection, limit) + threadpool = list() + for i in range(limit): + thread = threading.Thread( + target=pretend_to_run_query, args=(pool, "dogs_{}".format(i)) + ) + threadpool.append(thread) + thread.start() + + time.sleep(1) + assert pool._stats.connection_opened == limit + + # ask for one more connection + db = pool.get_connection("dogs_{}".format(limit + 1), 1) + assert db == None + + # try to evict; should be too early + evicted = pool.evict_lru() + assert evicted == None + assert pool._stats.connection_closed == 0 + + # try to call done from wrong thread + with pytest.raises(RuntimeError): + pool.done("dogs_3") + + # join threads + for thread in threadpool: + thread.join() + + while True: + evicted = pool.evict_lru() + if evicted == None: + break + + assert pool._stats.connection_closed == limit + # now can add a new connection! + db = pool.get_connection("dogs_{}".format(limit + 1), 60000) + assert db != None From 0bc9124fa2cb9e95cdd5ca83e529f62a77e1e4e8 Mon Sep 17 00:00:00 2001 From: Eden Date: Fri, 16 Jun 2023 19:03:50 +0000 Subject: [PATCH 02/86] remove a rogue print --- postgres/tests/test_connections.py | 1 - 1 file changed, 1 deletion(-) diff --git a/postgres/tests/test_connections.py b/postgres/tests/test_connections.py index e6c59ec2f6730..8af7a56ed5b97 100644 --- a/postgres/tests/test_connections.py +++ b/postgres/tests/test_connections.py @@ -308,7 +308,6 @@ def pretend_to_run_query(pool, dbname): pool.get_connection(dbname, 10000) time.sleep(5) pool.done(dbname) - print("Connection {} done ".format(dbname)) limit = 30 check = PostgreSql("postgres", {}, [pg_instance]) From 40da126f25095f690ab995d0bcdfcfda1ab4bcaa Mon Sep 17 00:00:00 2001 From: Eden Date: Fri, 16 Jun 2023 19:26:39 +0000 Subject: [PATCH 03/86] fixed formatting --- postgres/datadog_checks/postgres/connections.py | 9 ++------- postgres/tests/test_bash.sh | 6 ++++++ 2 files changed, 8 insertions(+), 7 deletions(-) create mode 100755 postgres/tests/test_bash.sh diff --git a/postgres/datadog_checks/postgres/connections.py b/postgres/datadog_checks/postgres/connections.py index 03391e27ec72f..16d36945607a5 100644 --- a/postgres/datadog_checks/postgres/connections.py +++ b/postgres/datadog_checks/postgres/connections.py @@ -5,7 +5,6 @@ import inspect import threading from typing import Callable, Dict -from collections import namedtuple import psycopg2 @@ -68,9 +67,7 @@ def __init__(self, connect_fn: Callable[[str], None], max_conns: int = None): if len(connect_sig.parameters) != 1: raise ValueError( "Invalid signature for the connection function. " - "A single parameter for dbname is expected, got signature: {}".format( - connect_sig - ) + "A single parameter for dbname is expected, got signature: {}".format(connect_sig) ) self.connect_fn = connect_fn @@ -171,9 +168,7 @@ def _get_lru(self, connections: Dict[str, ConnectionInfo]) -> str: return min(connections, key=lambda t: self._conns[t].last_accessed) def _terminate_connection_unsafe(self, dbname: str): - db = self._conns.pop( - dbname, ConnectionInfo(None, None, None, None, None) - ).connection + db = self._conns.pop(dbname, ConnectionInfo(None, None, None, None, None)).connection if db is not None: try: self._stats.connection_closed += 1 diff --git a/postgres/tests/test_bash.sh b/postgres/tests/test_bash.sh new file mode 100755 index 0000000000000..74674e6ef7a71 --- /dev/null +++ b/postgres/tests/test_bash.sh @@ -0,0 +1,6 @@ +x=0 +while [ $x -le 100 ]; +do + ddev test postgres:py3.9-14.0 -k test_statement_samples_collect[dd_admin-dd_admin-dogs-SELECT * FROM breed WHERE name = %s-Labrador-None-None-not_truncated-expected_warnings1-pg_stat_activity] --skip-env + ((x++)) +done \ No newline at end of file From c32e4b45734ae4613dad4a913d757c84c65f1a34 Mon Sep 17 00:00:00 2001 From: Eden Date: Fri, 16 Jun 2023 19:29:57 +0000 Subject: [PATCH 04/86] more helpful documentation --- postgres/datadog_checks/postgres/connections.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/postgres/datadog_checks/postgres/connections.py b/postgres/datadog_checks/postgres/connections.py index 16d36945607a5..254dfb59e95c0 100644 --- a/postgres/datadog_checks/postgres/connections.py +++ b/postgres/datadog_checks/postgres/connections.py @@ -38,9 +38,10 @@ class MultiDatabaseConnectionPool(object): TTL. If max_conns is specified, the connection pool will limit concurrent connections. - Connection eviction should be handled by the calling code. - If the connection pool is full, try to evict a connection with `evict_lru` until `get_connection` - successfully returns a connection. + Connection eviction should be handled by the calling code. Call done() on a connection + when it is no longer necessary, so it will be marked evictable. + If the connection pool is full, try to evict a connection with evict_lru() until a + connection is evicted, then try get_connection() again. """ class Stats(object): @@ -129,6 +130,8 @@ def close_all_connections(self): def done(self, dbname: str) -> None: """ Mark a connection as done being used, so it can be evicted from the pool. + This function does not evict connections from the pool; it just marks them + as inactive. done() can only be called on a connection in the same thread that the connection was made. """ From fe2b0511fb3a188716fe5ff3e3f6c494450674ac Mon Sep 17 00:00:00 2001 From: Eden Date: Fri, 16 Jun 2023 19:34:47 +0000 Subject: [PATCH 05/86] accidentally added an extraneous file --- postgres/tests/test_bash.sh | 6 ------ 1 file changed, 6 deletions(-) delete mode 100755 postgres/tests/test_bash.sh diff --git a/postgres/tests/test_bash.sh b/postgres/tests/test_bash.sh deleted file mode 100755 index 74674e6ef7a71..0000000000000 --- a/postgres/tests/test_bash.sh +++ /dev/null @@ -1,6 +0,0 @@ -x=0 -while [ $x -le 100 ]; -do - ddev test postgres:py3.9-14.0 -k test_statement_samples_collect[dd_admin-dd_admin-dogs-SELECT * FROM breed WHERE name = %s-Labrador-None-None-not_truncated-expected_warnings1-pg_stat_activity] --skip-env - ((x++)) -done \ No newline at end of file From 75bb16c813c64a3874d89abab552a348f377fbf2 Mon Sep 17 00:00:00 2001 From: Eden Date: Fri, 16 Jun 2023 19:44:46 +0000 Subject: [PATCH 06/86] reverse the quotation mark replacements --- .../datadog_checks/postgres/connections.py | 2 +- postgres/tests/test_connections.py | 98 ++++++++----------- 2 files changed, 41 insertions(+), 59 deletions(-) diff --git a/postgres/datadog_checks/postgres/connections.py b/postgres/datadog_checks/postgres/connections.py index 254dfb59e95c0..cfd262598d065 100644 --- a/postgres/datadog_checks/postgres/connections.py +++ b/postgres/datadog_checks/postgres/connections.py @@ -63,7 +63,7 @@ def __init__(self, connect_fn: Callable[[str], None], max_conns: int = None): self._mu = threading.RLock() self._conns: Dict[str, ConnectionInfo] = {} - if hasattr(inspect, "signature"): + if hasattr(inspect, 'signature'): connect_sig = inspect.signature(connect_fn) if len(connect_sig.parameters) != 1: raise ValueError( diff --git a/postgres/tests/test_connections.py b/postgres/tests/test_connections.py index 8af7a56ed5b97..75bb27ae63148 100644 --- a/postgres/tests/test_connections.py +++ b/postgres/tests/test_connections.py @@ -3,12 +3,12 @@ # Licensed under a 3-clause BSD style license (see LICENSE) import datetime import pprint +import threading import time import uuid import psycopg2 import pytest -import threading from datadog_checks.postgres import PostgreSql from datadog_checks.postgres.connections import MultiDatabaseConnectionPool @@ -17,16 +17,16 @@ @pytest.mark.integration -@pytest.mark.usefixtures("dd_environment") +@pytest.mark.usefixtures('dd_environment') def test_conn_pool(pg_instance): """ Test simple case of creating a connection pool, pruning a stale connection, and closing all connections. """ - check = PostgreSql("postgres", {}, [pg_instance]) + check = PostgreSql('postgres', {}, [pg_instance]) pool = MultiDatabaseConnectionPool(check._new_connection) - db = pool.get_connection("postgres", 1) + db = pool.get_connection('postgres', 1) assert pool._stats.connection_opened == 1 pool.prune_connections() assert len(pool._conns) == 1 @@ -44,7 +44,7 @@ def test_conn_pool(pg_instance): assert pool._stats.connection_closed_failed == 0 assert pool._stats.connection_pruned == 1 - db = pool.get_connection("postgres", 999 * 1000) + db = pool.get_connection('postgres', 999 * 1000) assert len(pool._conns) == 1 assert pool._stats.connection_opened == 2 success = pool.close_all_connections() @@ -56,21 +56,19 @@ def test_conn_pool(pg_instance): @pytest.mark.integration -@pytest.mark.usefixtures("dd_environment") +@pytest.mark.usefixtures('dd_environment') def test_conn_pool_no_leaks_on_close(pg_instance): """ Test a simple case of opening and closing many connections. There should be no leaked connections on the server. """ unique_id = str(uuid.uuid4()) # Used to isolate this test from others on the DB - check = PostgreSql("postgres", {}, [pg_instance]) + check = PostgreSql('postgres', {}, [pg_instance]) check._config.application_name = unique_id # Used to make verification queries pool2 = MultiDatabaseConnectionPool( - lambda dbname: psycopg2.connect( - host=HOST, dbname=dbname, user=USER_ADMIN, password=PASSWORD_ADMIN - ) + lambda dbname: psycopg2.connect(host=HOST, dbname=dbname, user=USER_ADMIN, password=PASSWORD_ADMIN) ) # Iterate in the test many times to detect flakiness @@ -81,9 +79,7 @@ def get_activity(): """ Fetches all pg_stat_activity rows generated by this test and connection to a "dogs%" database """ - with pool2.get_connection("postgres", 1).cursor( - cursor_factory=psycopg2.extras.DictCursor - ) as cursor: + with pool2.get_connection('postgres', 1).cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor: cursor.execute( "SELECT pid, datname, usename, state, query_start, state_change, application_name" " FROM pg_stat_activity" @@ -94,7 +90,7 @@ def get_activity(): conn_count = 100 for i in range(0, conn_count): - dbname = "dogs_{}".format(i) + dbname = 'dogs_{}'.format(i) db = pool.get_connection(dbname, 10 * 1000) with db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor: cursor.execute("select current_database()") @@ -118,14 +114,12 @@ def get_activity(): if len(rows) == 0: break - assert attempts >= 0, "Connections leaked! Leaked rows found:\n{}".format( - pprint.pformat(rows) - ) + assert attempts >= 0, "Connections leaked! Leaked rows found:\n{}".format(pprint.pformat(rows)) time.sleep(1) @pytest.mark.integration -@pytest.mark.usefixtures("dd_environment") +@pytest.mark.usefixtures('dd_environment') def test_conn_pool_no_leaks_on_prune(pg_instance): """ Test a scenario where many connections are created. These connections should be open on the database @@ -133,15 +127,13 @@ def test_conn_pool_no_leaks_on_prune(pg_instance): """ unique_id = str(uuid.uuid4()) # Used to isolate this test from others on the DB - check = PostgreSql("postgres", {}, [pg_instance]) + check = PostgreSql('postgres', {}, [pg_instance]) check._config.application_name = unique_id pool = MultiDatabaseConnectionPool(check._new_connection) # Used to make verification queries pool2 = MultiDatabaseConnectionPool( - lambda dbname: psycopg2.connect( - host=HOST, dbname=dbname, user=USER_ADMIN, password=PASSWORD_ADMIN - ) + lambda dbname: psycopg2.connect(host=HOST, dbname=dbname, user=USER_ADMIN, password=PASSWORD_ADMIN) ) ttl_long = 90 * 1000 ttl_short = 1 @@ -150,9 +142,7 @@ def get_activity(): """ Fetches all pg_stat_activity rows generated by this test and connection to a "dogs%" database """ - with pool2.get_connection("postgres", 1).cursor( - cursor_factory=psycopg2.extras.DictCursor - ) as cursor: + with pool2.get_connection('postgres', 1).cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor: cursor.execute( "SELECT pid, datname, usename, state, query_start, state_change, application_name" " FROM pg_stat_activity" @@ -166,7 +156,7 @@ def get_many_connections(count, ttl): Retrieves the number of connections from the pool with the specified TTL """ for i in range(0, count): - dbname = "dogs_{}".format(i) + dbname = 'dogs_{}'.format(i) db = pool.get_connection(dbname, ttl) with db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor: cursor.execute("select current_database()") @@ -184,13 +174,11 @@ def get_many_connections(count, ttl): assert pool._stats.connection_opened == 50 # Ensure those connections have the correct deadline and connection status for i in range(0, 50): - dbname = "dogs_{}".format(i) + dbname = 'dogs_{}'.format(i) conn_info = pool._conns[dbname] db = conn_info.connection deadline = conn_info.deadline - approximate_deadline = datetime.datetime.now() + datetime.timedelta( - milliseconds=ttl_long - ) + approximate_deadline = datetime.datetime.now() + datetime.timedelta(milliseconds=ttl_long) assert ( approximate_deadline - datetime.timedelta(seconds=1) < deadline @@ -201,8 +189,8 @@ def get_many_connections(count, ttl): # Check that those pooled connections do exist on the database rows = get_activity() assert len(rows) == 50 - assert len({row["datname"] for row in rows}) == 50 - assert all(row["state"] == "idle" for row in rows) + assert len({row['datname'] for row in rows}) == 50 + assert all(row['state'] == 'idle' for row in rows) pool._stats.reset() @@ -225,13 +213,11 @@ def get_many_connections(count, ttl): if attempt < attempts_to_verify - 1: time.sleep(1) continue - assert ( - len(leaked_rows) == 0 - ), "Found leaked rows on the server not in the connection pool" + assert len(leaked_rows) == 0, "Found leaked rows on the server not in the connection pool" - assert len({row["datname"] for row in rows}) == 51 + assert len({row['datname'] for row in rows}) == 51 assert len(rows) == 51, "Possible leaked connections" - assert all(row["state"] == "idle" for row in rows) + assert all(row['state'] == 'idle' for row in rows) assert pool._stats.connection_opened == 1 assert pool._stats.connection_closed == 0 @@ -252,9 +238,7 @@ def get_many_connections(count, ttl): if attempt < attempts_to_verify - 1: time.sleep(1) continue - assert ( - len(leaked_rows) == 0 - ), "Found leaked rows remaining after TTL was updated to short TTL" + assert len(leaked_rows) == 0, "Found leaked rows remaining after TTL was updated to short TTL" # Final check that the server contains no leaked connections still open rows = get_activity() @@ -262,28 +246,28 @@ def get_many_connections(count, ttl): @pytest.mark.integration -@pytest.mark.usefixtures("dd_environment") +@pytest.mark.usefixtures('dd_environment') def test_conn_pool_hit_pool_limit_and_evict(pg_instance): """ Test creating a limited connection pool and adding connections until the pool is filled. Then, set one to Done and try to evict it. """ limit = 5 - check = PostgreSql("postgres", {}, [pg_instance]) + check = PostgreSql('postgres', {}, [pg_instance]) pool = MultiDatabaseConnectionPool(check._new_connection, limit) for i in range(limit): - pool.get_connection("dogs_{}".format(i), 60000) + pool.get_connection('dogs_{}'.format(i), 60000) assert pool._stats.connection_opened == limit # ask for one more connection db = pool.get_connection("dogs_10", 60000) - assert db == None + assert db is None # try to evict; none were marked inactive evicted = pool.evict_lru() - assert evicted == None + assert evicted is None assert pool._stats.connection_closed == 0 expected_evicted = "dogs_3" @@ -294,11 +278,11 @@ def test_conn_pool_hit_pool_limit_and_evict(pg_instance): # ask for another connection again db = pool.get_connection("dogs_50", 60000) - assert db != None + assert db is not None @pytest.mark.integration -@pytest.mark.usefixtures("dd_environment") +@pytest.mark.usefixtures('dd_environment') def test_conn_pool_multithreaded(pg_instance): """ Test creating a limited connection pool that is shared among several threads. @@ -310,14 +294,12 @@ def pretend_to_run_query(pool, dbname): pool.done(dbname) limit = 30 - check = PostgreSql("postgres", {}, [pg_instance]) + check = PostgreSql('postgres', {}, [pg_instance]) pool = MultiDatabaseConnectionPool(check._new_connection, limit) - threadpool = list() + threadpool = [] for i in range(limit): - thread = threading.Thread( - target=pretend_to_run_query, args=(pool, "dogs_{}".format(i)) - ) + thread = threading.Thread(target=pretend_to_run_query, args=(pool, 'dogs_{}'.format(i))) threadpool.append(thread) thread.start() @@ -325,12 +307,12 @@ def pretend_to_run_query(pool, dbname): assert pool._stats.connection_opened == limit # ask for one more connection - db = pool.get_connection("dogs_{}".format(limit + 1), 1) - assert db == None + db = pool.get_connection('dogs_{}'.format(limit + 1), 1) + assert db is None # try to evict; should be too early evicted = pool.evict_lru() - assert evicted == None + assert evicted is None assert pool._stats.connection_closed == 0 # try to call done from wrong thread @@ -343,10 +325,10 @@ def pretend_to_run_query(pool, dbname): while True: evicted = pool.evict_lru() - if evicted == None: + if evicted is None: break assert pool._stats.connection_closed == limit # now can add a new connection! - db = pool.get_connection("dogs_{}".format(limit + 1), 60000) - assert db != None + db = pool.get_connection('dogs_{}'.format(limit + 1), 60000) + assert db is not None From 09b8e999bad7ed9669ce9d620bb55f5b65f35d52 Mon Sep 17 00:00:00 2001 From: Eden Date: Fri, 16 Jun 2023 20:08:26 +0000 Subject: [PATCH 07/86] style error --- postgres/datadog_checks/postgres/connections.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/postgres/datadog_checks/postgres/connections.py b/postgres/datadog_checks/postgres/connections.py index cfd262598d065..e38d96a34c406 100644 --- a/postgres/datadog_checks/postgres/connections.py +++ b/postgres/datadog_checks/postgres/connections.py @@ -40,7 +40,7 @@ class MultiDatabaseConnectionPool(object): If max_conns is specified, the connection pool will limit concurrent connections. Connection eviction should be handled by the calling code. Call done() on a connection when it is no longer necessary, so it will be marked evictable. - If the connection pool is full, try to evict a connection with evict_lru() until a + If the connection pool is full, try to evict a connection with evict_lru() until a connection is evicted, then try get_connection() again. """ From 1e4738945dc03544da61997944c03945305270fc Mon Sep 17 00:00:00 2001 From: Eden Date: Tue, 20 Jun 2023 15:28:50 +0000 Subject: [PATCH 08/86] add context managed connection, change eviction algo --- .../datadog_checks/postgres/connections.py | 62 +++++++++++------- postgres/tests/test_connections.py | 65 ++++++++++++++++--- 2 files changed, 95 insertions(+), 32 deletions(-) diff --git a/postgres/datadog_checks/postgres/connections.py b/postgres/datadog_checks/postgres/connections.py index e38d96a34c406..d9749c87117d9 100644 --- a/postgres/datadog_checks/postgres/connections.py +++ b/postgres/datadog_checks/postgres/connections.py @@ -1,6 +1,7 @@ # (C) Datadog, Inc. 2023-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) +import contextlib import datetime import inspect import threading @@ -72,20 +73,28 @@ def __init__(self, connect_fn: Callable[[str], None], max_conns: int = None): ) self.connect_fn = connect_fn - def get_connection(self, dbname: str, ttl_ms: int): + def get_connection(self, dbname: str, ttl_ms: int, timeout: int = None) -> psycopg2.extensions.connection: """ Grab a connection from the pool if the database is already connected. If max_conns is specified, and the database isn't already connected, - make a new connection IFF the max_conn limit hasn't been reached. - If we can't fit the connection into the pool, return None. + make a new connection if the max_conn limit hasn't been reached. + Blocks until a connection can be added to the pool, + and optionally takes a timeout in seconds. """ + start = datetime.datetime.now() self.prune_connections() with self._mu: conn = self._conns.pop(dbname, ConnectionInfo(None, None, None, None, None)) db = conn.connection if db is None or db.closed: if self.max_conns is not None and len(self._conns) == self.max_conns: - return None + # try to free space until we succeed + while len(self._conns) >= self.max_conns: + self.prune_connections() + self.evict_lru() + if timeout != None and (datetime.datetime.now() - start).total_seconds() > timeout: + raise TimeoutError + continue self._stats.connection_opened += 1 db = self.connect_fn(dbname) @@ -103,6 +112,25 @@ def get_connection(self, dbname: str, ttl_ms: int): ) return db + @contextlib.contextmanager + def get_connection_cm(self, dbname: str, ttl_ms: int, timeout: int = None) -> psycopg2.extensions.connection: + """ + Context managed version of get_connection. + TODO: We should eventually move all connection logic in the postgres integration to + use context managed connections from one pool. Then, we can combine + get_connection and get_connection_context_managed, so no connections can be grabbed + out of context. + """ + try: + with self._mu: + db = None + db = self.get_connection(dbname, ttl_ms, timeout) + yield db + finally: + with self._mu: + if db is not None: + self._conns[dbname].active = False + def prune_connections(self): """ This function should be called periodically to prune all connections which have not been @@ -141,35 +169,23 @@ def done(self, dbname: str) -> None: "Cannot call done() for this dbname on this thread. Done() can only be called \ from the same thread the connection was made." ) - self._conns[dbname].active = False def evict_lru(self) -> str: """ Evict and close the inactive connection which was least recently used. - Return the dbname connection that was evicted. + Return the dbname connection that was evicted or None if we couldn't evict a connection. """ with self._mu: - conns_list = dict(self._conns) - while True: - if not conns_list: - break + sorted_conns = sorted(self._conns.items(), key=lambda i: i[1].last_accessed) + for name, conn_info in sorted_conns: + if not conn_info.active: + self._terminate_connection_unsafe(name) + return name - eviction_candidate = self._get_lru(conns_list) - if self._conns[eviction_candidate].active: - del conns_list[eviction_candidate] - continue - - # eviction candidate successfully found - self._terminate_connection_unsafe(eviction_candidate) - return eviction_candidate - - # Could not evict a candidate; return None, calling code should keep trying. + # Could not evict a candidate; return None return None - def _get_lru(self, connections: Dict[str, ConnectionInfo]) -> str: - return min(connections, key=lambda t: self._conns[t].last_accessed) - def _terminate_connection_unsafe(self, dbname: str): db = self._conns.pop(dbname, ConnectionInfo(None, None, None, None, None)).connection if db is not None: diff --git a/postgres/tests/test_connections.py b/postgres/tests/test_connections.py index 75bb27ae63148..24105382fa967 100644 --- a/postgres/tests/test_connections.py +++ b/postgres/tests/test_connections.py @@ -262,8 +262,8 @@ def test_conn_pool_hit_pool_limit_and_evict(pg_instance): assert pool._stats.connection_opened == limit # ask for one more connection - db = pool.get_connection("dogs_10", 60000) - assert db is None + with pytest.raises(TimeoutError): + db = pool.get_connection("dogs_10", 60000, 1) # try to evict; none were marked inactive evicted = pool.evict_lru() @@ -276,9 +276,8 @@ def test_conn_pool_hit_pool_limit_and_evict(pg_instance): assert evicted == expected_evicted assert pool._stats.connection_closed == 1 - # ask for another connection again - db = pool.get_connection("dogs_50", 60000) - assert db is not None + # ask for another connection again, error not raised + db = pool.get_connection("dogs_50", 60000, 1) @pytest.mark.integration @@ -307,8 +306,8 @@ def pretend_to_run_query(pool, dbname): assert pool._stats.connection_opened == limit # ask for one more connection - db = pool.get_connection('dogs_{}'.format(limit + 1), 1) - assert db is None + with pytest.raises(TimeoutError): + db = pool.get_connection('dogs_{}'.format(limit + 1), 1, 1) # try to evict; should be too early evicted = pool.evict_lru() @@ -330,5 +329,53 @@ def pretend_to_run_query(pool, dbname): assert pool._stats.connection_closed == limit # now can add a new connection! - db = pool.get_connection('dogs_{}'.format(limit + 1), 60000) - assert db is not None + db = pool.get_connection('dogs_{}'.format(limit + 1), 60000, 1) + + +@pytest.mark.integration +@pytest.mark.usefixtures('dd_environment') +def test_conn_pool_context_managed(pg_instance): + """ + Test context manager API for connection grabbing. + """ + + def pretend_to_run_query(pool, dbname): + with pool.get_connection_cm(dbname, 10000) as conn: + time.sleep(5) + + limit = 30 + check = PostgreSql('postgres', {}, [pg_instance]) + + pool = MultiDatabaseConnectionPool(check._new_connection, limit) + threadpool = [] + for i in range(limit): + print(i) + thread = threading.Thread(target=pretend_to_run_query, args=(pool, 'dogs_{}'.format(i))) + threadpool.append(thread) + thread.start() + + time.sleep(1) + assert pool._stats.connection_opened == limit + + # ask for one more connection + with pytest.raises(TimeoutError): + with pool.get_connection_cm('dogs_{}'.format(limit + 1), 1, 1): + pass + + # try to call done from wrong thread + with pytest.raises(RuntimeError): + pool.done("dogs_3") + + # join threads + for thread in threadpool: + thread.join() + + # now can add a new connection, one will get kicked out of pool + with pool.get_connection_cm('dogs_{}'.format(limit + 1), 60000) as conn: + pass + + assert pool._stats.connection_closed == 1 + + # close the rest + pool.close_all_connections() + assert pool._stats.connection_closed == limit + 1 From a7efdba8a099308a89434c5459c3b8b37d3a12b1 Mon Sep 17 00:00:00 2001 From: Eden Date: Tue, 20 Jun 2023 15:50:58 +0000 Subject: [PATCH 09/86] push dependencies --- postgres/pyproject.toml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/postgres/pyproject.toml b/postgres/pyproject.toml index cce2d3ac694e3..f08b66a50d299 100644 --- a/postgres/pyproject.toml +++ b/postgres/pyproject.toml @@ -10,6 +10,7 @@ build-backend = "hatchling.build" name = "datadog-postgres" description = "The Postgres check" readme = "README.md" +requires-python = ">=3.8" keywords = [ "datadog", "datadog agent", @@ -24,7 +25,6 @@ classifiers = [ "Intended Audience :: Developers", "Intended Audience :: System Administrators", "License :: OSI Approved :: BSD License", - "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3.9", "Topic :: System :: Monitoring", "Private :: Do Not Upload", @@ -40,11 +40,8 @@ license = "BSD-3-Clause" [project.optional-dependencies] deps = [ - "boto3==1.17.112; python_version < '3.0'", "boto3==1.26.138; python_version > '3.0'", - "cachetools==3.1.1; python_version < '3.0'", "cachetools==5.3.0; python_version > '3.0'", - "futures==3.4.0; python_version < '3.0'", "psycopg2-binary==2.8.6; sys_platform != 'darwin' or platform_machine != 'arm64'", "semver==2.13.0", ] From 1d7b89dbcdec421e69d4b6e2b20d3b1089562b47 Mon Sep 17 00:00:00 2001 From: Eden Date: Tue, 20 Jun 2023 16:28:47 +0000 Subject: [PATCH 10/86] update docstring --- postgres/datadog_checks/postgres/connections.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/postgres/datadog_checks/postgres/connections.py b/postgres/datadog_checks/postgres/connections.py index d9749c87117d9..ec4007396bb9d 100644 --- a/postgres/datadog_checks/postgres/connections.py +++ b/postgres/datadog_checks/postgres/connections.py @@ -39,10 +39,6 @@ class MultiDatabaseConnectionPool(object): TTL. If max_conns is specified, the connection pool will limit concurrent connections. - Connection eviction should be handled by the calling code. Call done() on a connection - when it is no longer necessary, so it will be marked evictable. - If the connection pool is full, try to evict a connection with evict_lru() until a - connection is evicted, then try get_connection() again. """ class Stats(object): From 185ee49559ef038c7b14f0a57bfd65a3513013bc Mon Sep 17 00:00:00 2001 From: Eden Date: Tue, 20 Jun 2023 17:46:42 +0000 Subject: [PATCH 11/86] style --- postgres/datadog_checks/postgres/connections.py | 2 +- postgres/tests/test_connections.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/postgres/datadog_checks/postgres/connections.py b/postgres/datadog_checks/postgres/connections.py index ec4007396bb9d..01a0008682f6c 100644 --- a/postgres/datadog_checks/postgres/connections.py +++ b/postgres/datadog_checks/postgres/connections.py @@ -88,7 +88,7 @@ def get_connection(self, dbname: str, ttl_ms: int, timeout: int = None) -> psyco while len(self._conns) >= self.max_conns: self.prune_connections() self.evict_lru() - if timeout != None and (datetime.datetime.now() - start).total_seconds() > timeout: + if timeout is not None and (datetime.datetime.now() - start).total_seconds() > timeout: raise TimeoutError continue self._stats.connection_opened += 1 diff --git a/postgres/tests/test_connections.py b/postgres/tests/test_connections.py index 24105382fa967..6aa7025431120 100644 --- a/postgres/tests/test_connections.py +++ b/postgres/tests/test_connections.py @@ -263,7 +263,7 @@ def test_conn_pool_hit_pool_limit_and_evict(pg_instance): # ask for one more connection with pytest.raises(TimeoutError): - db = pool.get_connection("dogs_10", 60000, 1) + pool.get_connection("dogs_10", 60000, 1) # try to evict; none were marked inactive evicted = pool.evict_lru() @@ -277,7 +277,7 @@ def test_conn_pool_hit_pool_limit_and_evict(pg_instance): assert pool._stats.connection_closed == 1 # ask for another connection again, error not raised - db = pool.get_connection("dogs_50", 60000, 1) + pool.get_connection("dogs_50", 60000, 1) @pytest.mark.integration @@ -307,7 +307,7 @@ def pretend_to_run_query(pool, dbname): # ask for one more connection with pytest.raises(TimeoutError): - db = pool.get_connection('dogs_{}'.format(limit + 1), 1, 1) + pool.get_connection('dogs_{}'.format(limit + 1), 1, 1) # try to evict; should be too early evicted = pool.evict_lru() @@ -329,7 +329,7 @@ def pretend_to_run_query(pool, dbname): assert pool._stats.connection_closed == limit # now can add a new connection! - db = pool.get_connection('dogs_{}'.format(limit + 1), 60000, 1) + pool.get_connection('dogs_{}'.format(limit + 1), 60000, 1) @pytest.mark.integration @@ -340,7 +340,7 @@ def test_conn_pool_context_managed(pg_instance): """ def pretend_to_run_query(pool, dbname): - with pool.get_connection_cm(dbname, 10000) as conn: + with pool.get_connection_cm(dbname, 10000): time.sleep(5) limit = 30 @@ -371,7 +371,7 @@ def pretend_to_run_query(pool, dbname): thread.join() # now can add a new connection, one will get kicked out of pool - with pool.get_connection_cm('dogs_{}'.format(limit + 1), 60000) as conn: + with pool.get_connection_cm('dogs_{}'.format(limit + 1), 60000): pass assert pool._stats.connection_closed == 1 From c9a6fec913f4817bda4658cfa20131e4ff23ecff Mon Sep 17 00:00:00 2001 From: Eden Date: Tue, 20 Jun 2023 19:18:42 +0000 Subject: [PATCH 12/86] remove unnecessary code, update references --- .../datadog_checks/postgres/connections.py | 52 ++++----- postgres/tests/test_connections.py | 109 ++++-------------- 2 files changed, 44 insertions(+), 117 deletions(-) diff --git a/postgres/datadog_checks/postgres/connections.py b/postgres/datadog_checks/postgres/connections.py index 01a0008682f6c..bfa9d3ed26d15 100644 --- a/postgres/datadog_checks/postgres/connections.py +++ b/postgres/datadog_checks/postgres/connections.py @@ -5,11 +5,22 @@ import datetime import inspect import threading +import time from typing import Callable, Dict import psycopg2 +class ConnectionPoolFullError(Exception): + def __init__(self, size, timeout): + self.size = size + self.timeout = timeout + super().__init__() + + def __str__(self): + return "Could not insert connection in pool size {} within {} seconds".format(self.size, self.timeout) + + class ConnectionInfo: def __init__( self, @@ -69,13 +80,9 @@ def __init__(self, connect_fn: Callable[[str], None], max_conns: int = None): ) self.connect_fn = connect_fn - def get_connection(self, dbname: str, ttl_ms: int, timeout: int = None) -> psycopg2.extensions.connection: + def _get_connection_raw(self, dbname: str, ttl_ms: int, timeout: int = None) -> psycopg2.extensions.connection: """ - Grab a connection from the pool if the database is already connected. - If max_conns is specified, and the database isn't already connected, - make a new connection if the max_conn limit hasn't been reached. - Blocks until a connection can be added to the pool, - and optionally takes a timeout in seconds. + Return a connection from the pool. """ start = datetime.datetime.now() self.prune_connections() @@ -89,7 +96,8 @@ def get_connection(self, dbname: str, ttl_ms: int, timeout: int = None) -> psyco self.prune_connections() self.evict_lru() if timeout is not None and (datetime.datetime.now() - start).total_seconds() > timeout: - raise TimeoutError + raise ConnectionPoolFullError(self.max_conns, timeout) + time.sleep(0.001) continue self._stats.connection_opened += 1 db = self.connect_fn(dbname) @@ -109,18 +117,18 @@ def get_connection(self, dbname: str, ttl_ms: int, timeout: int = None) -> psyco return db @contextlib.contextmanager - def get_connection_cm(self, dbname: str, ttl_ms: int, timeout: int = None) -> psycopg2.extensions.connection: + def get_connection(self, dbname: str, ttl_ms: int, timeout: int = None): """ - Context managed version of get_connection. - TODO: We should eventually move all connection logic in the postgres integration to - use context managed connections from one pool. Then, we can combine - get_connection and get_connection_context_managed, so no connections can be grabbed - out of context. + Grab a connection from the pool if the database is already connected. + If max_conns is specified, and the database isn't already connected, + make a new connection if the max_conn limit hasn't been reached. + Blocks until a connection can be added to the pool, + and optionally takes a timeout in seconds. """ try: with self._mu: db = None - db = self.get_connection(dbname, ttl_ms, timeout) + db = self._get_connection_raw(dbname, ttl_ms, timeout) yield db finally: with self._mu: @@ -151,22 +159,6 @@ def close_all_connections(self): success = False return success - def done(self, dbname: str) -> None: - """ - Mark a connection as done being used, so it can be evicted from the pool. - This function does not evict connections from the pool; it just marks them - as inactive. - done() can only be called on a connection in the same thread that the connection - was made. - """ - with self._mu: - if self._conns[dbname].thread != threading.current_thread(): - raise RuntimeError( - "Cannot call done() for this dbname on this thread. Done() can only be called \ - from the same thread the connection was made." - ) - self._conns[dbname].active = False - def evict_lru(self) -> str: """ Evict and close the inactive connection which was least recently used. diff --git a/postgres/tests/test_connections.py b/postgres/tests/test_connections.py index 6aa7025431120..7b472a4dcf8c3 100644 --- a/postgres/tests/test_connections.py +++ b/postgres/tests/test_connections.py @@ -11,7 +11,7 @@ import pytest from datadog_checks.postgres import PostgreSql -from datadog_checks.postgres.connections import MultiDatabaseConnectionPool +from datadog_checks.postgres.connections import ConnectionPoolFullError, MultiDatabaseConnectionPool from .common import HOST, PASSWORD_ADMIN, USER_ADMIN @@ -26,7 +26,7 @@ def test_conn_pool(pg_instance): check = PostgreSql('postgres', {}, [pg_instance]) pool = MultiDatabaseConnectionPool(check._new_connection) - db = pool.get_connection('postgres', 1) + db = pool._get_connection_raw('postgres', 1) assert pool._stats.connection_opened == 1 pool.prune_connections() assert len(pool._conns) == 1 @@ -44,7 +44,7 @@ def test_conn_pool(pg_instance): assert pool._stats.connection_closed_failed == 0 assert pool._stats.connection_pruned == 1 - db = pool.get_connection('postgres', 999 * 1000) + db = pool._get_connection_raw('postgres', 999 * 1000) assert len(pool._conns) == 1 assert pool._stats.connection_opened == 2 success = pool.close_all_connections() @@ -79,7 +79,8 @@ def get_activity(): """ Fetches all pg_stat_activity rows generated by this test and connection to a "dogs%" database """ - with pool2.get_connection('postgres', 1).cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor: + with pool2.get_connection('postgres', 1) as conn: + cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) cursor.execute( "SELECT pid, datname, usename, state, query_start, state_change, application_name" " FROM pg_stat_activity" @@ -91,7 +92,7 @@ def get_activity(): conn_count = 100 for i in range(0, conn_count): dbname = 'dogs_{}'.format(i) - db = pool.get_connection(dbname, 10 * 1000) + db = pool._get_connection_raw(dbname, 10 * 1000) with db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor: cursor.execute("select current_database()") rows = cursor.fetchall() @@ -142,7 +143,8 @@ def get_activity(): """ Fetches all pg_stat_activity rows generated by this test and connection to a "dogs%" database """ - with pool2.get_connection('postgres', 1).cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor: + with pool2.get_connection('postgres', 1) as conn: + cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) cursor.execute( "SELECT pid, datname, usename, state, query_start, state_change, application_name" " FROM pg_stat_activity" @@ -157,7 +159,7 @@ def get_many_connections(count, ttl): """ for i in range(0, count): dbname = 'dogs_{}'.format(i) - db = pool.get_connection(dbname, ttl) + db = pool._get_connection_raw(dbname, ttl) with db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor: cursor.execute("select current_database()") rows = cursor.fetchall() @@ -247,89 +249,26 @@ def get_many_connections(count, ttl): @pytest.mark.integration @pytest.mark.usefixtures('dd_environment') -def test_conn_pool_hit_pool_limit_and_evict(pg_instance): +def test_conn_pool_single_context(pg_instance): """ - Test creating a limited connection pool and adding connections - until the pool is filled. Then, set one to Done and try to evict it. + Test creating a single connection. """ - limit = 5 check = PostgreSql('postgres', {}, [pg_instance]) - pool = MultiDatabaseConnectionPool(check._new_connection, limit) - for i in range(limit): - pool.get_connection('dogs_{}'.format(i), 60000) - - assert pool._stats.connection_opened == limit - - # ask for one more connection - with pytest.raises(TimeoutError): - pool.get_connection("dogs_10", 60000, 1) + pool = MultiDatabaseConnectionPool(check._new_connection) + with pool.get_connection("dogs_0", 1000): + pass - # try to evict; none were marked inactive - evicted = pool.evict_lru() - assert evicted is None - assert pool._stats.connection_closed == 0 + assert pool._stats.connection_opened == 1 - expected_evicted = "dogs_3" - pool.done(expected_evicted) + expected_evicted = "dogs_0" evicted = pool.evict_lru() assert evicted == expected_evicted assert pool._stats.connection_closed == 1 # ask for another connection again, error not raised - pool.get_connection("dogs_50", 60000, 1) - - -@pytest.mark.integration -@pytest.mark.usefixtures('dd_environment') -def test_conn_pool_multithreaded(pg_instance): - """ - Test creating a limited connection pool that is shared among several threads. - """ - - def pretend_to_run_query(pool, dbname): - pool.get_connection(dbname, 10000) - time.sleep(5) - pool.done(dbname) - - limit = 30 - check = PostgreSql('postgres', {}, [pg_instance]) - - pool = MultiDatabaseConnectionPool(check._new_connection, limit) - threadpool = [] - for i in range(limit): - thread = threading.Thread(target=pretend_to_run_query, args=(pool, 'dogs_{}'.format(i))) - threadpool.append(thread) - thread.start() - - time.sleep(1) - assert pool._stats.connection_opened == limit - - # ask for one more connection - with pytest.raises(TimeoutError): - pool.get_connection('dogs_{}'.format(limit + 1), 1, 1) - - # try to evict; should be too early - evicted = pool.evict_lru() - assert evicted is None - assert pool._stats.connection_closed == 0 - - # try to call done from wrong thread - with pytest.raises(RuntimeError): - pool.done("dogs_3") - - # join threads - for thread in threadpool: - thread.join() - - while True: - evicted = pool.evict_lru() - if evicted is None: - break - - assert pool._stats.connection_closed == limit - # now can add a new connection! - pool.get_connection('dogs_{}'.format(limit + 1), 60000, 1) + with pool.get_connection("dogs_1", 1000): + pass @pytest.mark.integration @@ -340,7 +279,7 @@ def test_conn_pool_context_managed(pg_instance): """ def pretend_to_run_query(pool, dbname): - with pool.get_connection_cm(dbname, 10000): + with pool.get_connection(dbname, 10000): time.sleep(5) limit = 30 @@ -358,20 +297,16 @@ def pretend_to_run_query(pool, dbname): assert pool._stats.connection_opened == limit # ask for one more connection - with pytest.raises(TimeoutError): - with pool.get_connection_cm('dogs_{}'.format(limit + 1), 1, 1): + with pytest.raises(ConnectionPoolFullError): + with pool.get_connection('dogs_{}'.format(limit + 1), 1, 1): pass - # try to call done from wrong thread - with pytest.raises(RuntimeError): - pool.done("dogs_3") - # join threads for thread in threadpool: thread.join() # now can add a new connection, one will get kicked out of pool - with pool.get_connection_cm('dogs_{}'.format(limit + 1), 60000): + with pool.get_connection('dogs_{}'.format(limit + 1), 60000): pass assert pool._stats.connection_closed == 1 From b6a5ddd4f68b0a53e5ef20bab18a1b281e8b93f6 Mon Sep 17 00:00:00 2001 From: Eden Date: Tue, 20 Jun 2023 19:19:32 +0000 Subject: [PATCH 13/86] update references --- postgres/pyproject.toml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/postgres/pyproject.toml b/postgres/pyproject.toml index f08b66a50d299..cce2d3ac694e3 100644 --- a/postgres/pyproject.toml +++ b/postgres/pyproject.toml @@ -10,7 +10,6 @@ build-backend = "hatchling.build" name = "datadog-postgres" description = "The Postgres check" readme = "README.md" -requires-python = ">=3.8" keywords = [ "datadog", "datadog agent", @@ -25,6 +24,7 @@ classifiers = [ "Intended Audience :: Developers", "Intended Audience :: System Administrators", "License :: OSI Approved :: BSD License", + "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3.9", "Topic :: System :: Monitoring", "Private :: Do Not Upload", @@ -40,8 +40,11 @@ license = "BSD-3-Clause" [project.optional-dependencies] deps = [ + "boto3==1.17.112; python_version < '3.0'", "boto3==1.26.138; python_version > '3.0'", + "cachetools==3.1.1; python_version < '3.0'", "cachetools==5.3.0; python_version > '3.0'", + "futures==3.4.0; python_version < '3.0'", "psycopg2-binary==2.8.6; sys_platform != 'darwin' or platform_machine != 'arm64'", "semver==2.13.0", ] From 10f52376ac142c9d92cd9af779a5ad058382d4a5 Mon Sep 17 00:00:00 2001 From: Eden Date: Tue, 13 Jun 2023 17:50:16 +0000 Subject: [PATCH 14/86] init commit --- .../postgres/autodiscovery-script.py | 116 ++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 postgres/datadog_checks/postgres/autodiscovery-script.py diff --git a/postgres/datadog_checks/postgres/autodiscovery-script.py b/postgres/datadog_checks/postgres/autodiscovery-script.py new file mode 100644 index 0000000000000..6da301a3f6dce --- /dev/null +++ b/postgres/datadog_checks/postgres/autodiscovery-script.py @@ -0,0 +1,116 @@ +# Proof of concept script for Postgres db autodiscovery. +# Pass a host to discover databases on that host. + +from typing import List, Callable +import psycopg2 +import sys +import os + +sys.path.append(os.path.abspath("/home/ec2-user/dd/integrations-core/datadog_checks_base")) +from datadog_checks.base.utils.discovery import Discovery + +# sys.path.append(os.path.abspath("/home/ec2-user/dd/integrations-core")) +from relationsmanager import RelationsManager, INDEX_BLOAT_QUERY +from connections import MultiDatabaseConnectionPool + + +AUTODISCOVERY_QUERY: str = """select {columns} from pg_catalog.pg_database where datistemplate = false;""" + +class MultiDatabaseConnectionPoolLimited(MultiDatabaseConnectionPool): + def __init__(self, connect_fn: Callable[[str], None], max_conn: int): + super().__init__(connect_fn) + self.max_conn = max_conn + self.default_ttl_ms = 100 + + def get_connection(self, dbname: str = None) -> psycopg2.extensions.connection: + if len(self._conns) < self.max_conn: + conn = super().get_connection(dbname, self.default_ttl_ms) + return conn + + # if too many connections in pool, loop until a connection is freed + # TODO: should implement a timeout + while len(self._conns) > self.max_conn: + self.prune_connections() + continue + + conn = super().get_connection(dbname, self.default_ttl_ms) + return conn + + +class PostgresAutodiscovery(Discovery): + def __init__(self, host: str, max_conn: int) -> None: + super(PostgresAutodiscovery, self).__init__(self._get_databases, include={'.*': 10}, exclude=[]) + self.host = host + relations_config = [{'relation_regex': '.*'}] + self._relations_manager = RelationsManager(relations_config) + self._conn_pool: MultiDatabaseConnectionPoolLimited = MultiDatabaseConnectionPoolLimited(self._connect, max_conn) + # get once to cache + self.get_items() + # self._conn_pool = psycopg2.ThreadedConnectionPool(minconn=0, maxconn=maxconn, user="postgres", password="p0stgres", port="5432") + + def get_items(self) -> List[str]: + """ + Get_items() from parent class returns a generator with four objects: + > yield pattern, key(item), item, config + This function takes the item of interest (dbname) from this four-tuple + and returns the full list of database names from the generator. + """ + items = list(super(PostgresAutodiscovery, self).get_items()) + items_parsed = [item[1] for item in items] + return items_parsed + + def _connect(self, dbname: str = None) -> None: + # Use ident method + connection_string = "host="+self.host+" user=postgres password=p0stgres" + if dbname is not None: + connection_string += " dbname=" + dbname + conn = psycopg2.connect(connection_string) + + print("connected") + return conn + + def _get_autodiscovery_query(self) -> str: + autodiscovery_query = AUTODISCOVERY_QUERY.format(columns=', '.join(['datname'])) + return autodiscovery_query + + def _get_databases(self) -> List[str]: + conn = self._conn_pool.get_connection() + cursor = conn.cursor() + autodiscovery_query = self._get_autodiscovery_query() + cursor.execute(autodiscovery_query) + databases = list(cursor.fetchall()) + databases = [x[0] for x in databases] # fetchall returns list of tuples representing rows, so need to parse + print("got", databases) + return databases + + def query_relations(self, database: str) -> None: + # print(cached_dbs) + conn = self._conn_pool.get_connection(database) + cursor = conn.cursor() + formatted_query = self._relations_manager.filter_relation_query(INDEX_BLOAT_QUERY, "schemaname") + cursor.execute(formatted_query) + relations = list(cursor.fetchall()) + # print(relations) + + def query_relations_all_databases(self) -> None: + self._print_num_connections() + databases = self.get_items() + for database in databases: + print("getting relations from", database) + self._print_num_connections() + self.query_relations(database) + self._print_num_connections() + + def _print_num_connections(self) -> None: + conn = self._conn_pool.get_connection() + cursor = conn.cursor() + cursor.execute("SELECT sum(numbackends) FROM pg_stat_database;") + rows = list(cursor.fetchall()) + print("NUM CONNECTIONS IS",rows[0]) + +if __name__ == "__main__": + discovery = PostgresAutodiscovery("0.0.0.0", 2) + a_database = discovery.get_items()[0] + discovery._print_num_connections() + discovery.query_relations(a_database) + discovery.query_relations_all_databases() \ No newline at end of file From 6dfd9edf495c4146f67eece0de1f95fd163e2461 Mon Sep 17 00:00:00 2001 From: Eden Date: Tue, 20 Jun 2023 16:19:32 +0000 Subject: [PATCH 15/86] update base branch --- .../postgres/autodiscovery-script.py | 75 +++++++++++-------- 1 file changed, 44 insertions(+), 31 deletions(-) diff --git a/postgres/datadog_checks/postgres/autodiscovery-script.py b/postgres/datadog_checks/postgres/autodiscovery-script.py index 6da301a3f6dce..f83bb8bbd7f79 100644 --- a/postgres/datadog_checks/postgres/autodiscovery-script.py +++ b/postgres/datadog_checks/postgres/autodiscovery-script.py @@ -5,37 +5,21 @@ import psycopg2 import sys import os +import threading +import datetime sys.path.append(os.path.abspath("/home/ec2-user/dd/integrations-core/datadog_checks_base")) + +from util import fmt from datadog_checks.base.utils.discovery import Discovery # sys.path.append(os.path.abspath("/home/ec2-user/dd/integrations-core")) -from relationsmanager import RelationsManager, INDEX_BLOAT_QUERY -from connections import MultiDatabaseConnectionPool +from relationsmanager import RelationsManager, SIZE_METRICS +from connections import MultiDatabaseConnectionPoolLimited AUTODISCOVERY_QUERY: str = """select {columns} from pg_catalog.pg_database where datistemplate = false;""" -class MultiDatabaseConnectionPoolLimited(MultiDatabaseConnectionPool): - def __init__(self, connect_fn: Callable[[str], None], max_conn: int): - super().__init__(connect_fn) - self.max_conn = max_conn - self.default_ttl_ms = 100 - - def get_connection(self, dbname: str = None) -> psycopg2.extensions.connection: - if len(self._conns) < self.max_conn: - conn = super().get_connection(dbname, self.default_ttl_ms) - return conn - - # if too many connections in pool, loop until a connection is freed - # TODO: should implement a timeout - while len(self._conns) > self.max_conn: - self.prune_connections() - continue - - conn = super().get_connection(dbname, self.default_ttl_ms) - return conn - class PostgresAutodiscovery(Discovery): def __init__(self, host: str, max_conn: int) -> None: @@ -43,8 +27,9 @@ def __init__(self, host: str, max_conn: int) -> None: self.host = host relations_config = [{'relation_regex': '.*'}] self._relations_manager = RelationsManager(relations_config) - self._conn_pool: MultiDatabaseConnectionPoolLimited = MultiDatabaseConnectionPoolLimited(self._connect, max_conn) - # get once to cache + self._conn_pool = MultiDatabaseConnectionPoolLimited(self._connect, max_conn) + self.default_ttl = 60000 + # get once to cache dbs self.get_items() # self._conn_pool = psycopg2.ThreadedConnectionPool(minconn=0, maxconn=maxconn, user="postgres", password="p0stgres", port="5432") @@ -74,7 +59,7 @@ def _get_autodiscovery_query(self) -> str: return autodiscovery_query def _get_databases(self) -> List[str]: - conn = self._conn_pool.get_connection() + conn = self._conn_pool.get_connection('postgres', self.default_ttl) cursor = conn.cursor() autodiscovery_query = self._get_autodiscovery_query() cursor.execute(autodiscovery_query) @@ -85,32 +70,60 @@ def _get_databases(self) -> List[str]: def query_relations(self, database: str) -> None: # print(cached_dbs) - conn = self._conn_pool.get_connection(database) + conn = self._conn_pool.get_connection(database, self.default_ttl) cursor = conn.cursor() - formatted_query = self._relations_manager.filter_relation_query(INDEX_BLOAT_QUERY, "schemaname") + query = fmt.format(SIZE_METRICS['query'], metrics_columns=", ".join(SIZE_METRICS['metrics'])) + formatted_query = self._relations_manager.filter_relation_query(query, "nspname") cursor.execute(formatted_query) relations = list(cursor.fetchall()) # print(relations) - def query_relations_all_databases(self) -> None: + def query_relations_all_databases_threaded(self) -> None: self._print_num_connections() databases = self.get_items() + + db_threads = list() + for database in databases: + print("getting relations from", database) + self._print_num_connections() + thread = threading.Thread(target=self.query_relations, args=(database,)) + db_threads.append(thread) + thread.start() + + for index, thread in enumerate(db_threads): + thread.join() + + self._print_num_connections() + + def query_relations_all_databases_sync(self) -> None: + self._print_num_connections() + databases = self.get_items() + for database in databases: print("getting relations from", database) self._print_num_connections() self.query_relations(database) + self._print_num_connections() + def _print_num_connections(self) -> None: - conn = self._conn_pool.get_connection() + conn = self._conn_pool.get_connection('postgres', self.default_ttl) cursor = conn.cursor() cursor.execute("SELECT sum(numbackends) FROM pg_stat_database;") rows = list(cursor.fetchall()) print("NUM CONNECTIONS IS",rows[0]) if __name__ == "__main__": - discovery = PostgresAutodiscovery("0.0.0.0", 2) + discovery = PostgresAutodiscovery("0.0.0.0", 5) a_database = discovery.get_items()[0] discovery._print_num_connections() discovery.query_relations(a_database) - discovery.query_relations_all_databases() \ No newline at end of file + + time = datetime.datetime.now() + discovery.query_relations_all_databases_sync() + print("elapsed: ", datetime.datetime.now() - time, "non-threaded finished") + + time = datetime.datetime.now() + discovery.query_relations_all_databases_threaded() + print("elapsed: ", datetime.datetime.now() - time, "threaded finished") \ No newline at end of file From a0079e7a457e4cef3a2104344cc87080f3b5f29c Mon Sep 17 00:00:00 2001 From: Eden Date: Tue, 20 Jun 2023 16:24:58 +0000 Subject: [PATCH 16/86] rebasing --- .../postgres/autodiscovery-script.py | 129 ------------------ postgres/datadog_checks/postgres/discovery.py | 0 2 files changed, 129 deletions(-) delete mode 100644 postgres/datadog_checks/postgres/autodiscovery-script.py create mode 100644 postgres/datadog_checks/postgres/discovery.py diff --git a/postgres/datadog_checks/postgres/autodiscovery-script.py b/postgres/datadog_checks/postgres/autodiscovery-script.py deleted file mode 100644 index f83bb8bbd7f79..0000000000000 --- a/postgres/datadog_checks/postgres/autodiscovery-script.py +++ /dev/null @@ -1,129 +0,0 @@ -# Proof of concept script for Postgres db autodiscovery. -# Pass a host to discover databases on that host. - -from typing import List, Callable -import psycopg2 -import sys -import os -import threading -import datetime - -sys.path.append(os.path.abspath("/home/ec2-user/dd/integrations-core/datadog_checks_base")) - -from util import fmt -from datadog_checks.base.utils.discovery import Discovery - -# sys.path.append(os.path.abspath("/home/ec2-user/dd/integrations-core")) -from relationsmanager import RelationsManager, SIZE_METRICS -from connections import MultiDatabaseConnectionPoolLimited - - -AUTODISCOVERY_QUERY: str = """select {columns} from pg_catalog.pg_database where datistemplate = false;""" - - -class PostgresAutodiscovery(Discovery): - def __init__(self, host: str, max_conn: int) -> None: - super(PostgresAutodiscovery, self).__init__(self._get_databases, include={'.*': 10}, exclude=[]) - self.host = host - relations_config = [{'relation_regex': '.*'}] - self._relations_manager = RelationsManager(relations_config) - self._conn_pool = MultiDatabaseConnectionPoolLimited(self._connect, max_conn) - self.default_ttl = 60000 - # get once to cache dbs - self.get_items() - # self._conn_pool = psycopg2.ThreadedConnectionPool(minconn=0, maxconn=maxconn, user="postgres", password="p0stgres", port="5432") - - def get_items(self) -> List[str]: - """ - Get_items() from parent class returns a generator with four objects: - > yield pattern, key(item), item, config - This function takes the item of interest (dbname) from this four-tuple - and returns the full list of database names from the generator. - """ - items = list(super(PostgresAutodiscovery, self).get_items()) - items_parsed = [item[1] for item in items] - return items_parsed - - def _connect(self, dbname: str = None) -> None: - # Use ident method - connection_string = "host="+self.host+" user=postgres password=p0stgres" - if dbname is not None: - connection_string += " dbname=" + dbname - conn = psycopg2.connect(connection_string) - - print("connected") - return conn - - def _get_autodiscovery_query(self) -> str: - autodiscovery_query = AUTODISCOVERY_QUERY.format(columns=', '.join(['datname'])) - return autodiscovery_query - - def _get_databases(self) -> List[str]: - conn = self._conn_pool.get_connection('postgres', self.default_ttl) - cursor = conn.cursor() - autodiscovery_query = self._get_autodiscovery_query() - cursor.execute(autodiscovery_query) - databases = list(cursor.fetchall()) - databases = [x[0] for x in databases] # fetchall returns list of tuples representing rows, so need to parse - print("got", databases) - return databases - - def query_relations(self, database: str) -> None: - # print(cached_dbs) - conn = self._conn_pool.get_connection(database, self.default_ttl) - cursor = conn.cursor() - query = fmt.format(SIZE_METRICS['query'], metrics_columns=", ".join(SIZE_METRICS['metrics'])) - formatted_query = self._relations_manager.filter_relation_query(query, "nspname") - cursor.execute(formatted_query) - relations = list(cursor.fetchall()) - # print(relations) - - def query_relations_all_databases_threaded(self) -> None: - self._print_num_connections() - databases = self.get_items() - - db_threads = list() - for database in databases: - print("getting relations from", database) - self._print_num_connections() - thread = threading.Thread(target=self.query_relations, args=(database,)) - db_threads.append(thread) - thread.start() - - for index, thread in enumerate(db_threads): - thread.join() - - self._print_num_connections() - - def query_relations_all_databases_sync(self) -> None: - self._print_num_connections() - databases = self.get_items() - - for database in databases: - print("getting relations from", database) - self._print_num_connections() - self.query_relations(database) - - self._print_num_connections() - - - def _print_num_connections(self) -> None: - conn = self._conn_pool.get_connection('postgres', self.default_ttl) - cursor = conn.cursor() - cursor.execute("SELECT sum(numbackends) FROM pg_stat_database;") - rows = list(cursor.fetchall()) - print("NUM CONNECTIONS IS",rows[0]) - -if __name__ == "__main__": - discovery = PostgresAutodiscovery("0.0.0.0", 5) - a_database = discovery.get_items()[0] - discovery._print_num_connections() - discovery.query_relations(a_database) - - time = datetime.datetime.now() - discovery.query_relations_all_databases_sync() - print("elapsed: ", datetime.datetime.now() - time, "non-threaded finished") - - time = datetime.datetime.now() - discovery.query_relations_all_databases_threaded() - print("elapsed: ", datetime.datetime.now() - time, "threaded finished") \ No newline at end of file diff --git a/postgres/datadog_checks/postgres/discovery.py b/postgres/datadog_checks/postgres/discovery.py new file mode 100644 index 0000000000000..e69de29bb2d1d From ab5f458eead5e19c4458cd3807d62ec9136993ab Mon Sep 17 00:00:00 2001 From: Eden Date: Tue, 20 Jun 2023 16:28:05 +0000 Subject: [PATCH 17/86] discovery init --- postgres/datadog_checks/postgres/discovery.py | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/postgres/datadog_checks/postgres/discovery.py b/postgres/datadog_checks/postgres/discovery.py index e69de29bb2d1d..d0c44957e6562 100644 --- a/postgres/datadog_checks/postgres/discovery.py +++ b/postgres/datadog_checks/postgres/discovery.py @@ -0,0 +1,44 @@ +from typing import List, Callable +from datadog_checks.base.utils.discovery import Discovery +import psycopg2 +import logging +from connections import MultiDatabaseConnectionPool + +AUTODISCOVERY_QUERY: str = """select {columns} from pg_catalog.pg_database where datistemplate = false;""" + +class PostgresAutodiscovery(Discovery): + def __init__(self, host: str, include: List[str], exclude: List[str], interval: int, max_databases: int, log: logging.Logger, conn_pool: MultiDatabaseConnectionPool) -> None: + # parent class asks for includelist to be a dictionary + super(PostgresAutodiscovery, self).__init__(self._get_databases, include={'.*': 10}, exclude=[], interval=60) + self.host = host + self._conn_pool = conn_pool + + + def get_items(self) -> List[str]: + """ + Get_items() from parent class returns a generator with four objects: + > yield pattern, key(item), item, config + This function takes the item of interest (dbname) from this four-tuple + and returns the full list of database names from the generator. + """ + items = list(super().get_items()) + items_parsed = [item[1] for item in items] + return items_parsed + + def _get_autodiscovery_query(self) -> str: + autodiscovery_query = AUTODISCOVERY_QUERY.format(columns=', '.join(['datname'])) + return autodiscovery_query + + def _get_databases(self) -> List[str]: + with self._conn_pool.get_connection_cm('postgres', self.default_ttl) + try: + conn = self._conn_pool.get_connection('postgres', self.default_ttl) + except psycopg2.OperationalError: + + cursor = conn.cursor() + autodiscovery_query = self._get_autodiscovery_query() + cursor.execute(autodiscovery_query) + databases = list(cursor.fetchall()) + databases = [x[0] for x in databases] # fetchall returns list of tuples representing rows, so need to parse + print("Databases found were: ", databases) + return databases \ No newline at end of file From dd2a5e6bc202e2d7522294d49326f44b541d9e92 Mon Sep 17 00:00:00 2001 From: Eden Date: Tue, 20 Jun 2023 20:15:18 +0000 Subject: [PATCH 18/86] reverting quotation mark changes --- postgres/tests/test_connections.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/postgres/tests/test_connections.py b/postgres/tests/test_connections.py index 7b472a4dcf8c3..8ad6df37fd637 100644 --- a/postgres/tests/test_connections.py +++ b/postgres/tests/test_connections.py @@ -206,19 +206,19 @@ def get_many_connections(count, ttl): # The test can be considered successful as long as the backend is eventually terminated. for attempt in range(attempts_to_verify): rows = get_activity() - server_pids = {row["pid"] for row in rows} + server_pids = {row['pid'] for row in rows} conns = [c.connection for c in pool._conns.values()] conn_pids = {db.info.backend_pid for db in conns} - leaked_rows = [row for row in rows if row["pid"] in server_pids - conn_pids] + leaked_rows = [row for row in rows if row['pid'] in server_pids - conn_pids] if not leaked_rows: break if attempt < attempts_to_verify - 1: time.sleep(1) continue - assert len(leaked_rows) == 0, "Found leaked rows on the server not in the connection pool" + assert len(leaked_rows) == 0, 'Found leaked rows on the server not in the connection pool' assert len({row['datname'] for row in rows}) == 51 - assert len(rows) == 51, "Possible leaked connections" + assert len(rows) == 51, 'Possible leaked connections' assert all(row['state'] == 'idle' for row in rows) assert pool._stats.connection_opened == 1 assert pool._stats.connection_closed == 0 @@ -240,7 +240,7 @@ def get_many_connections(count, ttl): if attempt < attempts_to_verify - 1: time.sleep(1) continue - assert len(leaked_rows) == 0, "Found leaked rows remaining after TTL was updated to short TTL" + assert len(leaked_rows) == 0, 'Found leaked rows remaining after TTL was updated to short TTL' # Final check that the server contains no leaked connections still open rows = get_activity() From 629c5f6725cf27a7039f731eb3acc8471aa58c6f Mon Sep 17 00:00:00 2001 From: Eden Date: Tue, 20 Jun 2023 21:06:36 +0000 Subject: [PATCH 19/86] cleaner code, and fixing references in other places --- .../datadog_checks/postgres/connections.py | 11 ++++++----- postgres/datadog_checks/postgres/metadata.py | 5 ++--- .../postgres/statement_samples.py | 18 ++++++++---------- 3 files changed, 16 insertions(+), 18 deletions(-) diff --git a/postgres/datadog_checks/postgres/connections.py b/postgres/datadog_checks/postgres/connections.py index bfa9d3ed26d15..264f1adc6103e 100644 --- a/postgres/datadog_checks/postgres/connections.py +++ b/postgres/datadog_checks/postgres/connections.py @@ -15,7 +15,6 @@ class ConnectionPoolFullError(Exception): def __init__(self, size, timeout): self.size = size self.timeout = timeout - super().__init__() def __str__(self): return "Could not insert connection in pool size {} within {} seconds".format(self.size, self.timeout) @@ -90,14 +89,14 @@ def _get_connection_raw(self, dbname: str, ttl_ms: int, timeout: int = None) -> conn = self._conns.pop(dbname, ConnectionInfo(None, None, None, None, None)) db = conn.connection if db is None or db.closed: - if self.max_conns is not None and len(self._conns) == self.max_conns: + if self.max_conns is not None: # try to free space until we succeed while len(self._conns) >= self.max_conns: self.prune_connections() self.evict_lru() if timeout is not None and (datetime.datetime.now() - start).total_seconds() > timeout: raise ConnectionPoolFullError(self.max_conns, timeout) - time.sleep(0.001) + time.sleep(0.01) continue self._stats.connection_opened += 1 db = self.connect_fn(dbname) @@ -127,13 +126,15 @@ def get_connection(self, dbname: str, ttl_ms: int, timeout: int = None): """ try: with self._mu: - db = None db = self._get_connection_raw(dbname, ttl_ms, timeout) yield db finally: with self._mu: - if db is not None: + try: self._conns[dbname].active = False + except KeyError: + # if self._get_connection_raw hit an exception, self._conns[dbname] didn't get populated + pass def prune_connections(self): """ diff --git a/postgres/datadog_checks/postgres/metadata.py b/postgres/datadog_checks/postgres/metadata.py index 703fb0b842e0b..8456526dc0aa3 100644 --- a/postgres/datadog_checks/postgres/metadata.py +++ b/postgres/datadog_checks/postgres/metadata.py @@ -120,9 +120,8 @@ def _payload_pg_version(self): @tracked_method(agent_check_getter=agent_check_getter) def _collect_postgres_settings(self): - with self._conn_pool.get_connection(self._config.dbname, ttl_ms=self._conn_ttl_ms).cursor( - cursor_factory=psycopg2.extras.DictCursor - ) as cursor: + with self._conn_pool.get_connection(self._config.dbname, ttl_ms=self._conn_ttl_ms) as conn: + cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) self._log.debug("Running query [%s]", PG_SETTINGS_QUERY) self._time_since_last_settings_query = time.time() cursor.execute(PG_SETTINGS_QUERY) diff --git a/postgres/datadog_checks/postgres/statement_samples.py b/postgres/datadog_checks/postgres/statement_samples.py index ec71e7992f82f..2d8648660399e 100644 --- a/postgres/datadog_checks/postgres/statement_samples.py +++ b/postgres/datadog_checks/postgres/statement_samples.py @@ -273,9 +273,8 @@ def _get_active_connections(self): query = PG_ACTIVE_CONNECTIONS_QUERY.format( pg_stat_activity_view=self._config.pg_stat_activity_view, extra_filters=extra_filters ) - with self._conn_pool.get_connection(self._config.dbname, ttl_ms=self._conn_ttl_ms).cursor( - cursor_factory=psycopg2.extras.DictCursor - ) as cursor: + with self._conn_pool.get_connection(self._config.dbname, ttl_ms=self._conn_ttl_ms) as conn: + cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) self._log.debug("Running query [%s] %s", query, params) cursor.execute(query, params) rows = cursor.fetchall() @@ -308,9 +307,8 @@ def _get_new_pg_stat_activity(self, available_activity_columns): pg_stat_activity_view=self._config.pg_stat_activity_view, extra_filters=extra_filters, ) - with self._conn_pool.get_connection(self._config.dbname, ttl_ms=self._conn_ttl_ms).cursor( - cursor_factory=psycopg2.extras.DictCursor - ) as cursor: + with self._conn_pool.get_connection(self._config.dbname, ttl_ms=self._conn_ttl_ms) as conn: + cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) self._log.debug("Running query [%s] %s", query, params) cursor.execute(query, params) rows = cursor.fetchall() @@ -327,9 +325,8 @@ def _get_pg_stat_activity_cols_cached(self, expected_cols): @tracked_method(agent_check_getter=agent_check_getter, track_result_length=True) def _get_available_activity_columns(self, all_expected_columns): - with self._conn_pool.get_connection(self._config.dbname, ttl_ms=self._conn_ttl_ms).cursor( - cursor_factory=psycopg2.extras.DictCursor - ) as cursor: + with self._conn_pool.get_connection(self._config.dbname, ttl_ms=self._conn_ttl_ms) as conn: + cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) cursor.execute( "select * from {pg_stat_activity_view} LIMIT 0".format( pg_stat_activity_view=self._config.pg_stat_activity_view @@ -590,7 +587,8 @@ def _get_db_explain_setup_state_cached(self, dbname): def _run_explain(self, dbname, statement, obfuscated_statement): start_time = time.time() - with self._conn_pool.get_connection(dbname, ttl_ms=self._conn_ttl_ms).cursor() as cursor: + with self._conn_pool.get_connection(dbname, ttl_ms=self._conn_ttl_ms) as conn: + cursor = conn.cursor() self._log.debug("Running query on dbname=%s: %s(%s)", dbname, self._explain_function, obfuscated_statement) cursor.execute( """SELECT {explain_function}($stmt${statement}$stmt$)""".format( From 9995c7fd65590896ebb33968237e31f72654e1ff Mon Sep 17 00:00:00 2001 From: Eden Date: Wed, 21 Jun 2023 13:46:53 +0000 Subject: [PATCH 20/86] update from base, config changes, parsing includelist --- postgres/datadog_checks/postgres/config.py | 2 + .../postgres/data/conf.yaml.example | 45 +++++++++++++++++++ postgres/datadog_checks/postgres/discovery.py | 36 ++++++++------- postgres/datadog_checks/postgres/metadata.py | 5 +-- postgres/datadog_checks/postgres/postgres.py | 2 + .../postgres/statement_samples.py | 18 ++++---- 6 files changed, 78 insertions(+), 30 deletions(-) diff --git a/postgres/datadog_checks/postgres/config.py b/postgres/datadog_checks/postgres/config.py index 99286bbd69f71..eff7add64164e 100644 --- a/postgres/datadog_checks/postgres/config.py +++ b/postgres/datadog_checks/postgres/config.py @@ -38,6 +38,8 @@ def __init__(self, instance): self.reported_hostname = instance.get('reported_hostname', '') self.dbstrict = is_affirmative(instance.get('dbstrict', False)) self.disable_generic_tags = is_affirmative(instance.get('disable_generic_tags', False)) if instance else False + + self.discovery_config = instance.get('database_autodiscovery', {}) self.application_name = instance.get('application_name', 'datadog-agent') if not self.isascii(self.application_name): diff --git a/postgres/datadog_checks/postgres/data/conf.yaml.example b/postgres/datadog_checks/postgres/data/conf.yaml.example index caac3c854d360..d371a72aac4f4 100644 --- a/postgres/datadog_checks/postgres/data/conf.yaml.example +++ b/postgres/datadog_checks/postgres/data/conf.yaml.example @@ -247,6 +247,51 @@ instances: # # application_name: datadog-agent + ## Define the configuration for database autodiscovery. + # Complete this section if you want to auto-discover databases on this host + # instead of specifying each using dbname. + # + # database_autodiscovery: + + ## @param enabled - boolean - optional - default: false + ## Enable database autodiscovery. + # + # enabled: false + + ## @param max_dbs - integer - optional - default: 100 + ## The maximum number of databases this host should monitor. + # + # max_databases: 100 + + + ## @param include - list of strings - optional + ## Regular expression for database names to include as part of + ## `database_autodiscovery`. + ## Will report metrics for databases that are found in this instance, + ## ignores databases listed but not found. + ## Character casing is ignored. The regular expressions start matching from + ## the beginning, so to match anywhere, prepend `.*`. For exact matches append `$`. + ## Defaults to `.*` to include everything. + # include: + # - master$ + # - AdventureWorks.* + + ## @param exclude - list of strings - optional + ## Regular expression for database names to exclude as part of `database_autodiscovery`. + ## Character casing is ignored. The regular expressions start matching from the beginning, + ## so to match anywhere, prepend `.*`. For exact matches append `$`. + ## In case of conflicts, database exclusion via `exclude` takes precedence over + ## those found via `include`. + # + # exclude: + # - model + # - msdb + + ## @param refresh - integer- optional - default: 3600 + ## Frequency in seconds of scans for new databases. Defaults to `3600`. + # + # refresh: 3600 + ## @param dbm - boolean - optional - default: false ## Set to `true` to enable Database Monitoring. # diff --git a/postgres/datadog_checks/postgres/discovery.py b/postgres/datadog_checks/postgres/discovery.py index d0c44957e6562..1cd749e37ea15 100644 --- a/postgres/datadog_checks/postgres/discovery.py +++ b/postgres/datadog_checks/postgres/discovery.py @@ -1,18 +1,24 @@ -from typing import List, Callable +from typing import Dict, List, Callable from datadog_checks.base.utils.discovery import Discovery -import psycopg2 import logging from connections import MultiDatabaseConnectionPool AUTODISCOVERY_QUERY: str = """select {columns} from pg_catalog.pg_database where datistemplate = false;""" class PostgresAutodiscovery(Discovery): - def __init__(self, host: str, include: List[str], exclude: List[str], interval: int, max_databases: int, log: logging.Logger, conn_pool: MultiDatabaseConnectionPool) -> None: + def __init__(self, global_view_db: str, autodiscovery_config: Dict, log: logging.Logger, conn_pool: MultiDatabaseConnectionPool) -> None: # parent class asks for includelist to be a dictionary - super(PostgresAutodiscovery, self).__init__(self._get_databases, include={'.*': 10}, exclude=[], interval=60) - self.host = host + parsed_include = self._parse_includelist(autodiscovery_config.get("include")) + super(PostgresAutodiscovery, self).__init__(self._get_databases, include=parsed_include, exclude=autodiscovery_config.get("exclude"), interval=autodiscovery_config.get("interval")) + self._log = log + self._db = global_view_db self._conn_pool = conn_pool + def _parse_includelist(self, include: List[str]) -> Dict[str, int]: + ret = {} + for item in include: + ret[item] = 0 + return ret def get_items(self) -> List[str]: """ @@ -30,15 +36,11 @@ def _get_autodiscovery_query(self) -> str: return autodiscovery_query def _get_databases(self) -> List[str]: - with self._conn_pool.get_connection_cm('postgres', self.default_ttl) - try: - conn = self._conn_pool.get_connection('postgres', self.default_ttl) - except psycopg2.OperationalError: - - cursor = conn.cursor() - autodiscovery_query = self._get_autodiscovery_query() - cursor.execute(autodiscovery_query) - databases = list(cursor.fetchall()) - databases = [x[0] for x in databases] # fetchall returns list of tuples representing rows, so need to parse - print("Databases found were: ", databases) - return databases \ No newline at end of file + with self._conn_pool.get_connection_cm(self._db, self.default_ttl) as conn: + cursor = conn.cursor() + autodiscovery_query = self._get_autodiscovery_query() + cursor.execute(autodiscovery_query) + databases = list(cursor.fetchall()) + databases = [x[0] for x in databases] # fetchall returns list of tuples representing rows, so need to parse + self.log.info("Databases found were: ", databases) + return databases diff --git a/postgres/datadog_checks/postgres/metadata.py b/postgres/datadog_checks/postgres/metadata.py index 703fb0b842e0b..8456526dc0aa3 100644 --- a/postgres/datadog_checks/postgres/metadata.py +++ b/postgres/datadog_checks/postgres/metadata.py @@ -120,9 +120,8 @@ def _payload_pg_version(self): @tracked_method(agent_check_getter=agent_check_getter) def _collect_postgres_settings(self): - with self._conn_pool.get_connection(self._config.dbname, ttl_ms=self._conn_ttl_ms).cursor( - cursor_factory=psycopg2.extras.DictCursor - ) as cursor: + with self._conn_pool.get_connection(self._config.dbname, ttl_ms=self._conn_ttl_ms) as conn: + cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) self._log.debug("Running query [%s]", PG_SETTINGS_QUERY) self._time_since_last_settings_query = time.time() cursor.execute(PG_SETTINGS_QUERY) diff --git a/postgres/datadog_checks/postgres/postgres.py b/postgres/datadog_checks/postgres/postgres.py index 8069ebd6830b5..0ac90ee07f1da 100644 --- a/postgres/datadog_checks/postgres/postgres.py +++ b/postgres/datadog_checks/postgres/postgres.py @@ -16,6 +16,7 @@ from datadog_checks.postgres import aws from datadog_checks.postgres.metadata import PostgresMetadata from datadog_checks.postgres.metrics_cache import PostgresMetricsCache +from datadog_checks.postgres.discovery import PostgresAutodiscovery from datadog_checks.postgres.relationsmanager import INDEX_BLOAT, RELATION_METRICS, TABLE_BLOAT, RelationsManager from datadog_checks.postgres.statement_samples import PostgresStatementSamples from datadog_checks.postgres.statements import PostgresStatementMetrics @@ -69,6 +70,7 @@ def __init__(self, name, init_config, instances): self._version = None self._is_aurora = None self._version_utils = VersionUtils() + self.autodiscovery = None # Deprecate custom_metrics in favor of custom_queries if 'custom_metrics' in self.instance: self.warning( diff --git a/postgres/datadog_checks/postgres/statement_samples.py b/postgres/datadog_checks/postgres/statement_samples.py index ec71e7992f82f..2d8648660399e 100644 --- a/postgres/datadog_checks/postgres/statement_samples.py +++ b/postgres/datadog_checks/postgres/statement_samples.py @@ -273,9 +273,8 @@ def _get_active_connections(self): query = PG_ACTIVE_CONNECTIONS_QUERY.format( pg_stat_activity_view=self._config.pg_stat_activity_view, extra_filters=extra_filters ) - with self._conn_pool.get_connection(self._config.dbname, ttl_ms=self._conn_ttl_ms).cursor( - cursor_factory=psycopg2.extras.DictCursor - ) as cursor: + with self._conn_pool.get_connection(self._config.dbname, ttl_ms=self._conn_ttl_ms) as conn: + cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) self._log.debug("Running query [%s] %s", query, params) cursor.execute(query, params) rows = cursor.fetchall() @@ -308,9 +307,8 @@ def _get_new_pg_stat_activity(self, available_activity_columns): pg_stat_activity_view=self._config.pg_stat_activity_view, extra_filters=extra_filters, ) - with self._conn_pool.get_connection(self._config.dbname, ttl_ms=self._conn_ttl_ms).cursor( - cursor_factory=psycopg2.extras.DictCursor - ) as cursor: + with self._conn_pool.get_connection(self._config.dbname, ttl_ms=self._conn_ttl_ms) as conn: + cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) self._log.debug("Running query [%s] %s", query, params) cursor.execute(query, params) rows = cursor.fetchall() @@ -327,9 +325,8 @@ def _get_pg_stat_activity_cols_cached(self, expected_cols): @tracked_method(agent_check_getter=agent_check_getter, track_result_length=True) def _get_available_activity_columns(self, all_expected_columns): - with self._conn_pool.get_connection(self._config.dbname, ttl_ms=self._conn_ttl_ms).cursor( - cursor_factory=psycopg2.extras.DictCursor - ) as cursor: + with self._conn_pool.get_connection(self._config.dbname, ttl_ms=self._conn_ttl_ms) as conn: + cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) cursor.execute( "select * from {pg_stat_activity_view} LIMIT 0".format( pg_stat_activity_view=self._config.pg_stat_activity_view @@ -590,7 +587,8 @@ def _get_db_explain_setup_state_cached(self, dbname): def _run_explain(self, dbname, statement, obfuscated_statement): start_time = time.time() - with self._conn_pool.get_connection(dbname, ttl_ms=self._conn_ttl_ms).cursor() as cursor: + with self._conn_pool.get_connection(dbname, ttl_ms=self._conn_ttl_ms) as conn: + cursor = conn.cursor() self._log.debug("Running query on dbname=%s: %s(%s)", dbname, self._explain_function, obfuscated_statement) cursor.execute( """SELECT {explain_function}($stmt${statement}$stmt$)""".format( From e467d30bcfc02dc07f9d88c6359dbf89dfdf2ee1 Mon Sep 17 00:00:00 2001 From: Eden Date: Wed, 21 Jun 2023 15:00:56 +0000 Subject: [PATCH 21/86] first test --- postgres/datadog_checks/postgres/discovery.py | 5 +++ postgres/datadog_checks/postgres/postgres.py | 39 +++++++++++++++++-- 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/postgres/datadog_checks/postgres/discovery.py b/postgres/datadog_checks/postgres/discovery.py index 1cd749e37ea15..25791a73a31a8 100644 --- a/postgres/datadog_checks/postgres/discovery.py +++ b/postgres/datadog_checks/postgres/discovery.py @@ -13,6 +13,7 @@ def __init__(self, global_view_db: str, autodiscovery_config: Dict, log: logging self._log = log self._db = global_view_db self._conn_pool = conn_pool + self._max_databases = self.autodiscovery_config.get("max_databases") def _parse_includelist(self, include: List[str]) -> Dict[str, int]: ret = {} @@ -29,6 +30,10 @@ def get_items(self) -> List[str]: """ items = list(super().get_items()) items_parsed = [item[1] for item in items] + if len(items_parsed) > self._max_databases: + items_parsed = items_parsed[:self._max_databases] + self._log.warning("Autodiscovery found more than {} databases, which was specified as a limit. Truncating list and running checks only on \ + the following databases: {}".format(self._max_databases, items_parsed)) return items_parsed def _get_autodiscovery_query(self) -> str: diff --git a/postgres/datadog_checks/postgres/postgres.py b/postgres/datadog_checks/postgres/postgres.py index 0ac90ee07f1da..2cb044ebd07b2 100644 --- a/postgres/datadog_checks/postgres/postgres.py +++ b/postgres/datadog_checks/postgres/postgres.py @@ -11,9 +11,11 @@ from six import iteritems from datadog_checks.base import AgentCheck +from datadog_checks.base.config import is_affirmative from datadog_checks.base.utils.db import QueryExecutor from datadog_checks.base.utils.db.utils import resolve_db_host as agent_host_resolver from datadog_checks.postgres import aws +from datadog_checks.postgres.connections import MultiDatabaseConnectionPool from datadog_checks.postgres.metadata import PostgresMetadata from datadog_checks.postgres.metrics_cache import PostgresMetricsCache from datadog_checks.postgres.discovery import PostgresAutodiscovery @@ -70,7 +72,6 @@ def __init__(self, name, init_config, instances): self._version = None self._is_aurora = None self._version_utils = VersionUtils() - self.autodiscovery = None # Deprecate custom_metrics in favor of custom_queries if 'custom_metrics' in self.instance: self.warning( @@ -94,10 +95,24 @@ def __init__(self, name, init_config, instances): # map[dbname -> psycopg connection] self._db_pool = {} self._db_pool_lock = threading.Lock() + self.autodiscovery_db_pool = MultiDatabaseConnectionPool(self._new_connection) self.tags_without_db = [t for t in copy.copy(self.tags) if not t.startswith("db:")] + self.autodiscovery = self._build_autodiscovery() self._dynamic_queries = None + def _build_autodiscovery(self): + if not is_affirmative(self._config.get("database_autodiscovery").get("enabled")): + return None + + if not self._config.relations: + self.log.warning("Database autodiscovery is enabled, but relation-level metrics are not being collected.\ + All metrics can be gathered from global view.") + return None + + discovery = PostgresAutodiscovery('postgres', self._config.get("database_autodiscovery"), self.log, self.autodiscovery_db_pool) + return discovery + def set_resource_tags(self): if self.cloud_metadata.get("gcp") is not None: self.tags.append( @@ -439,6 +454,17 @@ def _query_scope(self, cursor, scope, instance_tags, is_custom_metrics): num_results += 1 return num_results + + def _collect_relations_autodiscovery(self, instance_tags, relations_scopes): + if not self.autodiscovery: + return + + databases = self.autodiscovery.get_items() + for db in databases: + with self.autodiscovery_db_pool.get_connection(db, self._config.idle_connection_timeout) as conn: + cursor = conn.cursor() + for scope in relations_scopes: + self._query_scope(cursor, scope, instance_tags, False) def _collect_stats(self, instance_tags): """Query pg_stat_* for various metrics @@ -461,9 +487,16 @@ def _collect_stats(self, instance_tags): # Do we need relation-specific metrics? if self._config.relations: - metric_scope.extend(RELATION_METRICS) + relations_scopes = RELATION_METRICS if self._config.collect_bloat_metrics: - metric_scope.extend([INDEX_BLOAT, TABLE_BLOAT]) + relations_scopes.extend([INDEX_BLOAT, TABLE_BLOAT]) + + # If autodiscovery is enabled, get relation metrics from all databases found + if self.autodiscovery: + self._collect_relations_autodiscovery(instance_tags, relations_scopes) + # otherwise, continue just with dbname + else: + metric_scope.extend(relations_scopes) replication_metrics = self.metrics_cache.get_replication_metrics(self.version, self.is_aurora) if replication_metrics: From 03b10faa3959e9979f3b8bbbc0c8a39baa436a17 Mon Sep 17 00:00:00 2001 From: Eden Date: Wed, 21 Jun 2023 18:00:57 +0000 Subject: [PATCH 22/86] fixing test --- .../postgres/data/conf.yaml.example | 45 ------------------- postgres/datadog_checks/postgres/discovery.py | 2 +- postgres/datadog_checks/postgres/postgres.py | 8 ++-- 3 files changed, 6 insertions(+), 49 deletions(-) diff --git a/postgres/datadog_checks/postgres/data/conf.yaml.example b/postgres/datadog_checks/postgres/data/conf.yaml.example index d371a72aac4f4..caac3c854d360 100644 --- a/postgres/datadog_checks/postgres/data/conf.yaml.example +++ b/postgres/datadog_checks/postgres/data/conf.yaml.example @@ -247,51 +247,6 @@ instances: # # application_name: datadog-agent - ## Define the configuration for database autodiscovery. - # Complete this section if you want to auto-discover databases on this host - # instead of specifying each using dbname. - # - # database_autodiscovery: - - ## @param enabled - boolean - optional - default: false - ## Enable database autodiscovery. - # - # enabled: false - - ## @param max_dbs - integer - optional - default: 100 - ## The maximum number of databases this host should monitor. - # - # max_databases: 100 - - - ## @param include - list of strings - optional - ## Regular expression for database names to include as part of - ## `database_autodiscovery`. - ## Will report metrics for databases that are found in this instance, - ## ignores databases listed but not found. - ## Character casing is ignored. The regular expressions start matching from - ## the beginning, so to match anywhere, prepend `.*`. For exact matches append `$`. - ## Defaults to `.*` to include everything. - # include: - # - master$ - # - AdventureWorks.* - - ## @param exclude - list of strings - optional - ## Regular expression for database names to exclude as part of `database_autodiscovery`. - ## Character casing is ignored. The regular expressions start matching from the beginning, - ## so to match anywhere, prepend `.*`. For exact matches append `$`. - ## In case of conflicts, database exclusion via `exclude` takes precedence over - ## those found via `include`. - # - # exclude: - # - model - # - msdb - - ## @param refresh - integer- optional - default: 3600 - ## Frequency in seconds of scans for new databases. Defaults to `3600`. - # - # refresh: 3600 - ## @param dbm - boolean - optional - default: false ## Set to `true` to enable Database Monitoring. # diff --git a/postgres/datadog_checks/postgres/discovery.py b/postgres/datadog_checks/postgres/discovery.py index 25791a73a31a8..5554c740d8727 100644 --- a/postgres/datadog_checks/postgres/discovery.py +++ b/postgres/datadog_checks/postgres/discovery.py @@ -1,7 +1,7 @@ from typing import Dict, List, Callable from datadog_checks.base.utils.discovery import Discovery +from datadog_checks.postgres.connections import MultiDatabaseConnectionPool import logging -from connections import MultiDatabaseConnectionPool AUTODISCOVERY_QUERY: str = """select {columns} from pg_catalog.pg_database where datistemplate = false;""" diff --git a/postgres/datadog_checks/postgres/postgres.py b/postgres/datadog_checks/postgres/postgres.py index 84d3b8e9f3661..d911a18ec2ca3 100644 --- a/postgres/datadog_checks/postgres/postgres.py +++ b/postgres/datadog_checks/postgres/postgres.py @@ -109,7 +109,7 @@ def __init__(self, name, init_config, instances): self._dynamic_queries = None def _build_autodiscovery(self): - if not is_affirmative(self._config.get("database_autodiscovery").get("enabled")): + if not is_affirmative(self._config.discovery_config): return None if not self._config.relations: @@ -117,7 +117,7 @@ def _build_autodiscovery(self): All metrics can be gathered from global view.") return None - discovery = PostgresAutodiscovery('postgres', self._config.get("database_autodiscovery"), self.log, self.autodiscovery_db_pool) + discovery = PostgresAutodiscovery('postgres', self._config.discovery_config, self.log, self.autodiscovery_db_pool) return discovery def set_resource_tags(self): @@ -502,7 +502,8 @@ def _collect_stats(self, instance_tags): # Do we need relation-specific metrics? if self._config.relations: - relations_scopes = RELATION_METRICS + relations_scopes = list(RELATION_METRICS) + if self._config.collect_bloat_metrics: relations_scopes.extend([INDEX_BLOAT, TABLE_BLOAT]) @@ -513,6 +514,7 @@ def _collect_stats(self, instance_tags): else: metric_scope.extend(relations_scopes) + self.log.warning(metric_scope) replication_metrics = self.metrics_cache.get_replication_metrics(self.version, self.is_aurora) if replication_metrics: replication_metrics_query = copy.deepcopy(REPLICATION_METRICS) From 64bc2783e7e6ea63731f07551e0809a82be60a21 Mon Sep 17 00:00:00 2001 From: Eden Date: Wed, 21 Jun 2023 18:01:42 +0000 Subject: [PATCH 23/86] fix tests --- postgres/assets/configuration/spec.yaml | 56 +++++++++++++++++++ .../postgres/data/conf.yaml.example | 45 +++++++++++++++ 2 files changed, 101 insertions(+) diff --git a/postgres/assets/configuration/spec.yaml b/postgres/assets/configuration/spec.yaml index f6827aa9a300d..2538f095bdf6c 100644 --- a/postgres/assets/configuration/spec.yaml +++ b/postgres/assets/configuration/spec.yaml @@ -300,6 +300,62 @@ files: type: tags: - : + - name: database_autodiscovery + description: | + Define the configuration for database autodiscovery. + Complete this section if you want to auto-discover databases on this host + instead of specifying each using dbname. + options: + - name: enabled + description: Enable database autodiscovery. + value: + type: boolean + example: false + display_default: false + - name: max_databases + description: The maximum number of databases this host should monitor. + value: + type: integer + example: 100 + display_default: 100 + - name: include + description: | + Regular expression for database names to include as part of + database autodiscovery. + Will report metrics for databases that are found in this instance, + ignores databases listed but not found. + Character casing is ignored. The regular expressions start matching from + the beginning, so to match anywhere, prepend `.*`. For exact matches append `$`. + Defaults to `.*` to include everything. + value: + type: array + items: + type: string + example: + - "master$" + - "AdventureWorks.*" + display_default: + - ".*" + - name: exclude + description: | + Regular expression for database names to exclude as part of `database_autodiscovery`. + Character casing is ignored. The regular expressions start matching from the beginning, + so to match anywhere, prepend `.*`. For exact matches append `$`. + In case of conflicts, database exclusion via `exclude` takes precedence over + those found via `include` + value: + type: array + items: + type: string + example: + - "model" + - "msdb" + - name: refresh + description: Frequency in seconds of scans for new databases. Defaults to `3600`. + value: + type: integer + example: 3600 + display_default: 3600 - name: application_name description: | The application_name can be any string of less than NAMEDATALEN characters (64 characters in a standard build). diff --git a/postgres/datadog_checks/postgres/data/conf.yaml.example b/postgres/datadog_checks/postgres/data/conf.yaml.example index caac3c854d360..f73e27be57f71 100644 --- a/postgres/datadog_checks/postgres/data/conf.yaml.example +++ b/postgres/datadog_checks/postgres/data/conf.yaml.example @@ -240,6 +240,51 @@ instances: # tags: # - : + ## Define the configuration for database autodiscovery. + ## Complete this section if you want to auto-discover databases on this host + ## instead of specifying each using dbname. + # + # database_autodiscovery: + + ## @param enabled - boolean - optional - default: false + ## Enable database autodiscovery. + # + # enabled: false + + ## @param max_databases - integer - optional - default: 100 + ## The maximum number of databases this host should monitor. + # + # max_databases: 100 + + ## @param include - list of strings - optional - default: ['.*'] + ## Regular expression for database names to include as part of + ## database autodiscovery. + ## Will report metrics for databases that are found in this instance, + ## ignores databases listed but not found. + ## Character casing is ignored. The regular expressions start matching from + ## the beginning, so to match anywhere, prepend `.*`. For exact matches append `$`. + ## Defaults to `.*` to include everything. + # + # include: + # - master$ + # - AdventureWorks.* + + ## @param exclude - list of strings - optional + ## Regular expression for database names to exclude as part of `database_autodiscovery`. + ## Character casing is ignored. The regular expressions start matching from the beginning, + ## so to match anywhere, prepend `.*`. For exact matches append `$`. + ## In case of conflicts, database exclusion via `exclude` takes precedence over + ## those found via `include` + # + # exclude: + # - model + # - msdb + + ## @param refresh - integer - optional - default: 3600 + ## Frequency in seconds of scans for new databases. Defaults to `3600`. + # + # refresh: 3600 + ## @param application_name - string - optional - default: datadog-agent ## The application_name can be any string of less than NAMEDATALEN characters (64 characters in a standard build). ## It is typically set by an application upon connection to the server. From 5de7d8b820f0921896a3a4c859eaaa0923691429 Mon Sep 17 00:00:00 2001 From: Eden Date: Wed, 21 Jun 2023 21:26:15 +0000 Subject: [PATCH 24/86] enable relations with autodiscovery --- postgres/datadog_checks/postgres/config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/postgres/datadog_checks/postgres/config.py b/postgres/datadog_checks/postgres/config.py index eff7add64164e..715a0dcac9f18 100644 --- a/postgres/datadog_checks/postgres/config.py +++ b/postgres/datadog_checks/postgres/config.py @@ -48,8 +48,8 @@ def __init__(self, instance): self.query_timeout = int(instance.get('query_timeout', 5000)) self.idle_connection_timeout = instance.get('idle_connection_timeout', 60000) self.relations = instance.get('relations', []) - if self.relations and not self.dbname: - raise ConfigurationError('"dbname" parameter must be set when using the "relations" parameter.') + if self.relations and not (self.dbname or self.discovery_config): + raise ConfigurationError('"dbname" parameter must be set OR autodiscovery must be enabled when using the "relations" parameter.') self.tags = self._build_tags(instance.get('tags', [])) From f382c0329a96a2e2ba0545e4175aa5958bcacbeb Mon Sep 17 00:00:00 2001 From: Eden Date: Fri, 23 Jun 2023 15:51:57 +0000 Subject: [PATCH 25/86] updating with tests --- postgres/datadog_checks/postgres/postgres.py | 6 +- postgres/tests/test_discovery.py | 136 +++++++++++++++++++ 2 files changed, 139 insertions(+), 3 deletions(-) create mode 100644 postgres/tests/test_discovery.py diff --git a/postgres/datadog_checks/postgres/postgres.py b/postgres/datadog_checks/postgres/postgres.py index d911a18ec2ca3..a1131e9f4abf1 100644 --- a/postgres/datadog_checks/postgres/postgres.py +++ b/postgres/datadog_checks/postgres/postgres.py @@ -113,11 +113,11 @@ def _build_autodiscovery(self): return None if not self._config.relations: - self.log.warning("Database autodiscovery is enabled, but relation-level metrics are not being collected.\ - All metrics can be gathered from global view.") + self.log.warning("Database autodiscovery is enabled, but relation-level metrics are not being collected." + "All metrics will be gathered from global view.") return None - discovery = PostgresAutodiscovery('postgres', self._config.discovery_config, self.log, self.autodiscovery_db_pool) + discovery = PostgresAutodiscovery('postgres', self._config.discovery_config, self.log, self.autodiscovery_db_pool, self._config.idle_connection_timeout) return discovery def set_resource_tags(self): diff --git a/postgres/tests/test_discovery.py b/postgres/tests/test_discovery.py new file mode 100644 index 0000000000000..72b007b451a37 --- /dev/null +++ b/postgres/tests/test_discovery.py @@ -0,0 +1,136 @@ +# (C) Datadog, Inc. 2023-present +# All rights reserved +# Licensed under Simplified BSD License (see LICENSE) + +from contextlib import contextmanager +import copy +import select +import time + +from .utils import run_one_check +from .common import HOST, USER_ADMIN, PASSWORD_ADMIN +from datadog_checks.postgres import PostgreSql +from datadog_checks.postgres.connections import MultiDatabaseConnectionPool +from datadog_checks.postgres.relationsmanager import RELATION_METRICS, INDEX_BLOAT, TABLE_BLOAT + + +import psycopg2 +import psycopg2.sql +import pytest + +DISCOVERY_CONFIG = { + "enabled": True, + "include": ["dogs_([1-9]|[1-9][0-9]|10[0-9])"], + "exclude":["dogs_5$", "dogs_50$"], +} + +@pytest.mark.integration +@pytest.mark.usefixtures('dd_environment') +def test_autodiscovery_simple(integration_check, pg_instance): + """ + Test simple autodiscovery. + """ + pg_instance["database_autodiscovery"] = DISCOVERY_CONFIG + pg_instance['relations'] = ['pg_index'] + check = integration_check(pg_instance) + run_one_check(check, pg_instance) + + assert check.autodiscovery is not None + databases = check.autodiscovery.get_items() + expected_len = (100-len(DISCOVERY_CONFIG["exclude"])) + assert len(databases) == expected_len + +@pytest.mark.integration +@pytest.mark.usefixtures('dd_environment') +def test_autodiscovery_max_databases(integration_check, pg_instance): + """ + Test database list truncation. + """ + pg_instance["database_autodiscovery"] = copy.deepcopy(DISCOVERY_CONFIG) + pg_instance['database_autodiscovery']['max_databases'] = 20 + pg_instance['relations'] = ['pg_index'] + check = integration_check(pg_instance) + run_one_check(check, pg_instance) + + assert check.autodiscovery is not None + databases = check.autodiscovery.get_items() + assert len(databases) == pg_instance['database_autodiscovery']['max_databases'] + + +@pytest.mark.integration +@pytest.mark.usefixtures('dd_environment') +def test_autodiscovery_refresh(integration_check, pg_instance): + """ + Test cache refresh by adding a database in the middle of a check. + """ + database_to_find = "cats" + @contextmanager + def get_postgres_connection(): + conn_args = {'host': HOST, 'dbname': "postgres", 'user': USER_ADMIN, 'password': PASSWORD_ADMIN} + conn = psycopg2.connect(**conn_args) + conn.autocommit = True + yield conn + + pg_instance["database_autodiscovery"] = copy.deepcopy(DISCOVERY_CONFIG) + pg_instance['database_autodiscovery']['include'].append(database_to_find) + pg_instance['relations'] = ['pg_index'] + pg_instance["database_autodiscovery"]['refresh'] = 1 + check = integration_check(pg_instance) + run_one_check(check, pg_instance) + + assert check.autodiscovery is not None + databases = check.autodiscovery.get_items() + expected_len = (100-len(DISCOVERY_CONFIG["exclude"])) + assert len(databases) == expected_len + + with get_postgres_connection() as conn: + cursor = conn.cursor() + cursor.execute(psycopg2.sql.SQL("CREATE DATABASE {}").format(psycopg2.sql.Identifier(database_to_find))) + + time.sleep(pg_instance["database_autodiscovery"]['refresh']) + databases = check.autodiscovery.get_items() + assert len(databases) == expected_len+1 + # Need to drop the new database to clean up the environment for next tests. + cursor.execute(psycopg2.sql.SQL("DROP DATABASE {} WITH (FORCE);").format(psycopg2.sql.Identifier(database_to_find))) + + +@pytest.mark.integration +@pytest.mark.usefixtures('dd_environment') +def test_autodiscovery_relations_disabled(integration_check, pg_instance): + """ + If no relation metrics are being collected, autodiscovery should not run. + """ + pg_instance["database_autodiscovery"] = DISCOVERY_CONFIG + pg_instance['relations'] = [] + check = integration_check(pg_instance) + run_one_check(check, pg_instance) + + assert check.autodiscovery is None + + +# @pytest.mark.integration +# @pytest.mark.usefixtures('dd_environment') +# def test_autodiscovery_collect_all_relations(aggregator, integration_check, pg_instance): +# """ +# If no relation metrics are being collected, autodiscovery should not run. +# """ +# pg_instance["database_autodiscovery"] = DISCOVERY_CONFIG +# pg_instance['relations'] = [{'relation_regex': '.*'}] +# check = integration_check(pg_instance) +# run_one_check(check, pg_instance) + +# # assert that for all databases found, a relation metric was reported +# databases = check.autodiscovery.get_items() +# relation_scopes = RELATION_METRICS +# relation_scopes.extend(TABLE_BLOAT) +# relation_scopes.extend(INDEX_BLOAT) +# print(type(relation_scopes[0])) +# relation_metrics = [] +# for scope in relation_scopes: +# print(type(scope)) +# relation_metrics.append(scope["metrics"]) +# # relation_metrics = [m["metrics"] for m in relation_scopes] +# print(relation_metrics) +# assert None is not None +# for db in databases: +# aggregator.assert_metric('postgresql.index_bloat', tags=['db:'+db]) \ No newline at end of file From 1e9f32b4035f38516d9c553beef88b2d3fb5f835 Mon Sep 17 00:00:00 2001 From: Eden Date: Fri, 23 Jun 2023 20:49:15 +0000 Subject: [PATCH 26/86] changing the dbs, testing discovery --- postgres/datadog_checks/postgres/discovery.py | 14 ++--- postgres/datadog_checks/postgres/postgres.py | 33 ++++++++++-- .../tests/compose/resources/03_load_data.sh | 11 +++- postgres/tests/test_discovery.py | 53 +++++++++---------- 4 files changed, 70 insertions(+), 41 deletions(-) diff --git a/postgres/datadog_checks/postgres/discovery.py b/postgres/datadog_checks/postgres/discovery.py index 5554c740d8727..950969c36a89c 100644 --- a/postgres/datadog_checks/postgres/discovery.py +++ b/postgres/datadog_checks/postgres/discovery.py @@ -1,19 +1,21 @@ from typing import Dict, List, Callable from datadog_checks.base.utils.discovery import Discovery +from datadog_checks.postgres.relationsmanager import RELATION_METRICS, INDEX_BLOAT, TABLE_BLOAT from datadog_checks.postgres.connections import MultiDatabaseConnectionPool import logging AUTODISCOVERY_QUERY: str = """select {columns} from pg_catalog.pg_database where datistemplate = false;""" class PostgresAutodiscovery(Discovery): - def __init__(self, global_view_db: str, autodiscovery_config: Dict, log: logging.Logger, conn_pool: MultiDatabaseConnectionPool) -> None: + def __init__(self, global_view_db: str, autodiscovery_config: Dict, log: logging.Logger, conn_pool: MultiDatabaseConnectionPool, default_ttl: int) -> None: # parent class asks for includelist to be a dictionary - parsed_include = self._parse_includelist(autodiscovery_config.get("include")) - super(PostgresAutodiscovery, self).__init__(self._get_databases, include=parsed_include, exclude=autodiscovery_config.get("exclude"), interval=autodiscovery_config.get("interval")) + parsed_include = self._parse_includelist(autodiscovery_config.get("include", [".*"])) + super(PostgresAutodiscovery, self).__init__(self._get_databases, include=parsed_include, exclude=autodiscovery_config.get("exclude", []), interval=autodiscovery_config.get("refresh", 3600)) self._log = log self._db = global_view_db self._conn_pool = conn_pool - self._max_databases = self.autodiscovery_config.get("max_databases") + self._default_ttl = default_ttl + self._max_databases = autodiscovery_config.get("max_databases", 100) def _parse_includelist(self, include: List[str]) -> Dict[str, int]: ret = {} @@ -41,11 +43,11 @@ def _get_autodiscovery_query(self) -> str: return autodiscovery_query def _get_databases(self) -> List[str]: - with self._conn_pool.get_connection_cm(self._db, self.default_ttl) as conn: + with self._conn_pool.get_connection(self._db, self._default_ttl) as conn: cursor = conn.cursor() autodiscovery_query = self._get_autodiscovery_query() cursor.execute(autodiscovery_query) databases = list(cursor.fetchall()) databases = [x[0] for x in databases] # fetchall returns list of tuples representing rows, so need to parse - self.log.info("Databases found were: ", databases) + self._log.info("Databases found were: ", databases) return databases diff --git a/postgres/datadog_checks/postgres/postgres.py b/postgres/datadog_checks/postgres/postgres.py index a1131e9f4abf1..c1d7446e11ac9 100644 --- a/postgres/datadog_checks/postgres/postgres.py +++ b/postgres/datadog_checks/postgres/postgres.py @@ -357,6 +357,7 @@ def _run_query_scope(self, cursor, scope, is_custom_metrics, cols, descriptors): results = None is_relations = scope.get('relation') and self._relations_manager.has_relations + # self.log.warning("DOES THIS HAVE REALTIONS? {}".format(is_relations)) try: query = fmt.format(scope['query'], metrics_columns=", ".join(cols)) # if this is a relation-specific query, we need to list all relations last @@ -409,9 +410,10 @@ def _run_query_scope(self, cursor, scope, is_custom_metrics, cols, descriptors): return results - def _query_scope(self, cursor, scope, instance_tags, is_custom_metrics): + def _query_scope(self, cursor, scope, instance_tags, is_custom_metrics, dbname = None): if scope is None: return None + # self.log.warning("Collecting metrics with these tags: {}".format(instance_tags)) # build query cols = list(scope['metrics']) # list of metrics to query, in some order # we must remember that order to parse results @@ -421,7 +423,9 @@ def _query_scope(self, cursor, scope, instance_tags, is_custom_metrics): descriptors = scope['descriptors'] results = self._run_query_scope(cursor, scope, is_custom_metrics, cols, descriptors) if not results: + # self.log.warning("none") return None + self.log.warning(results) # Parse and submit results. @@ -455,6 +459,12 @@ def _query_scope(self, cursor, scope, instance_tags, is_custom_metrics): # connection. if not scope['relation'] and not scope.get('use_global_db_tag', False): tags = copy.copy(self.tags_without_db) + elif dbname is not None: + # self.log.warning("Collecting metrics {} from this db: {}".format(scope, dbname)) + # if dbname is specified in this function, we are querying relation-level metrics from autodiscovered database + tags = copy.copy(self.tags_without_db) + tags.append("db:{}".format(dbname)) + # self.log.warning("current tags {}".format(tags)) else: tags = copy.copy(instance_tags) @@ -473,13 +483,20 @@ def _query_scope(self, cursor, scope, instance_tags, is_custom_metrics): def _collect_relations_autodiscovery(self, instance_tags, relations_scopes): if not self.autodiscovery: return - + # self.log.warning("Entering autodiscovery") databases = self.autodiscovery.get_items() + self.log.warning("Found these databases {}".format(databases)) for db in databases: with self.autodiscovery_db_pool.get_connection(db, self._config.idle_connection_timeout) as conn: cursor = conn.cursor() + # show tables + cursor.execute("SELECT * FROM pg_catalog.pg_tables;") + results = cursor.fetchall() + # self.log.warning("tables {}".format(results)) + for scope in relations_scopes: - self._query_scope(cursor, scope, instance_tags, False) + # self.log.warning("Relation scope is {}".format(scope)) + self._query_scope(cursor, scope, instance_tags, False, db) def _collect_stats(self, instance_tags): """Query pg_stat_* for various metrics @@ -509,12 +526,13 @@ def _collect_stats(self, instance_tags): # If autodiscovery is enabled, get relation metrics from all databases found if self.autodiscovery: + self.log.warning("Entering discovery") self._collect_relations_autodiscovery(instance_tags, relations_scopes) # otherwise, continue just with dbname - else: + else: metric_scope.extend(relations_scopes) - self.log.warning(metric_scope) + # self.log.warning(metric_scope) replication_metrics = self.metrics_cache.get_replication_metrics(self.version, self.is_aurora) if replication_metrics: replication_metrics_query = copy.deepcopy(REPLICATION_METRICS) @@ -526,6 +544,11 @@ def _collect_stats(self, instance_tags): metric_scope.append(replication_stats_metrics) cursor = self.db.cursor() + + # show tables + # cursor.execute("SELECT * FROM pg_catalog.pg_tables;") + # results = cursor.fetchall() + # self.log.warning("tables {}".format(results)) results_len = self._query_scope(cursor, db_instance_metrics, instance_tags, False) if results_len is not None: self.gauge( diff --git a/postgres/tests/compose/resources/03_load_data.sh b/postgres/tests/compose/resources/03_load_data.sh index aee6a034aec15..f0357492720ba 100755 --- a/postgres/tests/compose/resources/03_load_data.sh +++ b/postgres/tests/compose/resources/03_load_data.sh @@ -31,7 +31,15 @@ psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" datadog_test <<-EOSQL EOSQL fi -for DBNAME in dogs dogs_noschema dogs_nofunc; do +i=1 +dbs=() +for ((i=1; i<=100; i++)); do + dbs+=(dogs_$i) +done +dbs+=dogs +dbs+=dogs_noschema +dbs+=dogs_nofunc +for DBNAME in ${dbs[@]}; do psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" "$DBNAME" <<-EOSQL CREATE TABLE breed (id SERIAL, name VARCHAR(255)); @@ -42,5 +50,4 @@ psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" "$DBNAME" <<-EOSQL INSERT INTO breed (name) VALUES ('Labrador Retriver'), ('German Shepherd'), ('Yorkshire Terrier'), ('Golden Retriever'), ('Bulldog'); SELECT * FROM breed WHERE name = 'Labrador'; EOSQL - done diff --git a/postgres/tests/test_discovery.py b/postgres/tests/test_discovery.py index 72b007b451a37..8713ac9a29eac 100644 --- a/postgres/tests/test_discovery.py +++ b/postgres/tests/test_discovery.py @@ -8,10 +8,10 @@ import time from .utils import run_one_check -from .common import HOST, USER_ADMIN, PASSWORD_ADMIN +from .test_relations import RELATION_METRICS +from .common import HOST, USER_ADMIN, PASSWORD_ADMIN, _get_expected_tags from datadog_checks.postgres import PostgreSql from datadog_checks.postgres.connections import MultiDatabaseConnectionPool -from datadog_checks.postgres.relationsmanager import RELATION_METRICS, INDEX_BLOAT, TABLE_BLOAT import psycopg2 @@ -108,29 +108,26 @@ def test_autodiscovery_relations_disabled(integration_check, pg_instance): assert check.autodiscovery is None -# @pytest.mark.integration -# @pytest.mark.usefixtures('dd_environment') -# def test_autodiscovery_collect_all_relations(aggregator, integration_check, pg_instance): -# """ -# If no relation metrics are being collected, autodiscovery should not run. -# """ -# pg_instance["database_autodiscovery"] = DISCOVERY_CONFIG -# pg_instance['relations'] = [{'relation_regex': '.*'}] -# check = integration_check(pg_instance) -# run_one_check(check, pg_instance) - -# # assert that for all databases found, a relation metric was reported -# databases = check.autodiscovery.get_items() -# relation_scopes = RELATION_METRICS -# relation_scopes.extend(TABLE_BLOAT) -# relation_scopes.extend(INDEX_BLOAT) -# print(type(relation_scopes[0])) -# relation_metrics = [] -# for scope in relation_scopes: -# print(type(scope)) -# relation_metrics.append(scope["metrics"]) -# # relation_metrics = [m["metrics"] for m in relation_scopes] -# print(relation_metrics) -# assert None is not None -# for db in databases: -# aggregator.assert_metric('postgresql.index_bloat', tags=['db:'+db]) \ No newline at end of file +@pytest.mark.integration +@pytest.mark.usefixtures('dd_environment') +def test_autodiscovery_collect_all_relations(aggregator, integration_check, pg_instance): + """ + If no relation metrics are being collected, autodiscovery should not run. + """ + pg_instance["database_autodiscovery"] = DISCOVERY_CONFIG + pg_instance['relations'] = ["breed"] + del pg_instance['dbname'] + + check = integration_check(pg_instance) + check.check(pg_instance) + + # assert that for all databases found, a relation metric was reported + databases = check.autodiscovery.get_items() + for db in databases: + print(RELATION_METRICS) + expected_tags = _get_expected_tags(check, pg_instance, db=db, table='breed', schema='public') + for metric in RELATION_METRICS: + aggregator.assert_metric(metric, tags=expected_tags) + print("yay {}".format(metric)) + + assert None is not None \ No newline at end of file From 87f53a9341fac8f3d20f67c04f8bfef2cb790d96 Mon Sep 17 00:00:00 2001 From: Eden Date: Tue, 27 Jun 2023 14:30:55 +0000 Subject: [PATCH 27/86] tests --- postgres/tests/test_discovery.py | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/postgres/tests/test_discovery.py b/postgres/tests/test_discovery.py index 8713ac9a29eac..e1ad338d05baa 100644 --- a/postgres/tests/test_discovery.py +++ b/postgres/tests/test_discovery.py @@ -8,7 +8,6 @@ import time from .utils import run_one_check -from .test_relations import RELATION_METRICS from .common import HOST, USER_ADMIN, PASSWORD_ADMIN, _get_expected_tags from datadog_checks.postgres import PostgreSql from datadog_checks.postgres.connections import MultiDatabaseConnectionPool @@ -24,6 +23,23 @@ "exclude":["dogs_5$", "dogs_50$"], } +RELATION_METRICS = { + 'postgresql.seq_scans', + 'postgresql.seq_rows_read', + 'postgresql.rows_inserted', + 'postgresql.rows_updated', + 'postgresql.rows_deleted', + 'postgresql.rows_hot_updated', + 'postgresql.live_rows', + 'postgresql.dead_rows', + 'postgresql.heap_blocks_read', + 'postgresql.heap_blocks_hit', + 'postgresql.vacuumed', + 'postgresql.autovacuumed', + 'postgresql.analyzed', + 'postgresql.autoanalyzed', +} + @pytest.mark.integration @pytest.mark.usefixtures('dd_environment') def test_autodiscovery_simple(integration_check, pg_instance): @@ -115,7 +131,9 @@ def test_autodiscovery_collect_all_relations(aggregator, integration_check, pg_i If no relation metrics are being collected, autodiscovery should not run. """ pg_instance["database_autodiscovery"] = DISCOVERY_CONFIG - pg_instance['relations'] = ["breed"] + pg_instance['relations'] = [ + {'relation_regex': '.*'}, + ] del pg_instance['dbname'] check = integration_check(pg_instance) @@ -124,10 +142,10 @@ def test_autodiscovery_collect_all_relations(aggregator, integration_check, pg_i # assert that for all databases found, a relation metric was reported databases = check.autodiscovery.get_items() for db in databases: - print(RELATION_METRICS) + # print(RELATION_METRICS) expected_tags = _get_expected_tags(check, pg_instance, db=db, table='breed', schema='public') for metric in RELATION_METRICS: aggregator.assert_metric(metric, tags=expected_tags) - print("yay {}".format(metric)) + # print("yay {}".format(metric)) assert None is not None \ No newline at end of file From 5fd5bf29242224fec741cb7741edfdd6178f54f1 Mon Sep 17 00:00:00 2001 From: Eden Date: Tue, 27 Jun 2023 15:29:41 +0000 Subject: [PATCH 28/86] adding logs to cehck --- postgres/datadog_checks/postgres/postgres.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/postgres/datadog_checks/postgres/postgres.py b/postgres/datadog_checks/postgres/postgres.py index c1d7446e11ac9..e46cd9e429e36 100644 --- a/postgres/datadog_checks/postgres/postgres.py +++ b/postgres/datadog_checks/postgres/postgres.py @@ -421,6 +421,10 @@ def _query_scope(self, cursor, scope, instance_tags, is_custom_metrics, dbname = # A descriptor is the association of a Postgres column name (e.g. 'schemaname') # to a tag name (e.g. 'schema'). descriptors = scope['descriptors'] + if dbname and self._config.relations: + self.log.warning("Autodiscovery enabled; trying to get relations for {}".format(dbname)) + self.log.warning("Relations is {}".format(self._config.relations)) + self.log.warning("dbname is set? {}".format(self._config.dbname)) results = self._run_query_scope(cursor, scope, is_custom_metrics, cols, descriptors) if not results: # self.log.warning("none") From 3733f6ee115612fdb8d3fe6dfac9e0d1c6a1ac77 Mon Sep 17 00:00:00 2001 From: Eden Date: Tue, 27 Jun 2023 15:43:51 +0000 Subject: [PATCH 29/86] more logs --- postgres/datadog_checks/postgres/postgres.py | 1 + 1 file changed, 1 insertion(+) diff --git a/postgres/datadog_checks/postgres/postgres.py b/postgres/datadog_checks/postgres/postgres.py index e46cd9e429e36..0ca77d733694d 100644 --- a/postgres/datadog_checks/postgres/postgres.py +++ b/postgres/datadog_checks/postgres/postgres.py @@ -421,6 +421,7 @@ def _query_scope(self, cursor, scope, instance_tags, is_custom_metrics, dbname = # A descriptor is the association of a Postgres column name (e.g. 'schemaname') # to a tag name (e.g. 'schema'). descriptors = scope['descriptors'] + self.log.warning("test print statement. {}, {}, {}".format(self._config.relations, self._config.dbname, self.autodiscovery)) if dbname and self._config.relations: self.log.warning("Autodiscovery enabled; trying to get relations for {}".format(dbname)) self.log.warning("Relations is {}".format(self._config.relations)) From a987a834d0c27349141b5b3feea73488f37bb795 Mon Sep 17 00:00:00 2001 From: Eden Date: Tue, 27 Jun 2023 17:47:33 +0000 Subject: [PATCH 30/86] one more log --- postgres/datadog_checks/postgres/postgres.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/postgres/datadog_checks/postgres/postgres.py b/postgres/datadog_checks/postgres/postgres.py index 0ca77d733694d..fc5816cf97a9b 100644 --- a/postgres/datadog_checks/postgres/postgres.py +++ b/postgres/datadog_checks/postgres/postgres.py @@ -5,7 +5,7 @@ import os import threading from contextlib import closing -from time import time +from time import time, sleep import psycopg2 from six import iteritems @@ -109,6 +109,9 @@ def __init__(self, name, init_config, instances): self._dynamic_queries = None def _build_autodiscovery(self): + self.log.error("config looks like {}".format(self._config.discovery_config)) + # return None + assert None is None if not is_affirmative(self._config.discovery_config): return None From 973482010958e969c1d0c5a9f862f6f26ad81fc9 Mon Sep 17 00:00:00 2001 From: Eden Date: Tue, 27 Jun 2023 17:48:21 +0000 Subject: [PATCH 31/86] one more log --- postgres/datadog_checks/postgres/postgres.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/postgres/datadog_checks/postgres/postgres.py b/postgres/datadog_checks/postgres/postgres.py index fc5816cf97a9b..ac6cd8f11fd38 100644 --- a/postgres/datadog_checks/postgres/postgres.py +++ b/postgres/datadog_checks/postgres/postgres.py @@ -109,9 +109,7 @@ def __init__(self, name, init_config, instances): self._dynamic_queries = None def _build_autodiscovery(self): - self.log.error("config looks like {}".format(self._config.discovery_config)) - # return None - assert None is None + self.log.warning("config looks like {}".format(self._config.discovery_config)) if not is_affirmative(self._config.discovery_config): return None From f64903050fb4410e381d78787ee4fad3f7aa9f1f Mon Sep 17 00:00:00 2001 From: Eden Date: Tue, 27 Jun 2023 18:03:16 +0000 Subject: [PATCH 32/86] fixing print --- postgres/datadog_checks/postgres/discovery.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/postgres/datadog_checks/postgres/discovery.py b/postgres/datadog_checks/postgres/discovery.py index 950969c36a89c..f816365d98177 100644 --- a/postgres/datadog_checks/postgres/discovery.py +++ b/postgres/datadog_checks/postgres/discovery.py @@ -1,6 +1,5 @@ from typing import Dict, List, Callable from datadog_checks.base.utils.discovery import Discovery -from datadog_checks.postgres.relationsmanager import RELATION_METRICS, INDEX_BLOAT, TABLE_BLOAT from datadog_checks.postgres.connections import MultiDatabaseConnectionPool import logging @@ -49,5 +48,5 @@ def _get_databases(self) -> List[str]: cursor.execute(autodiscovery_query) databases = list(cursor.fetchall()) databases = [x[0] for x in databases] # fetchall returns list of tuples representing rows, so need to parse - self._log.info("Databases found were: ", databases) + self._log.info("Databases found were: {}".format(databases)) return databases From 8659ab18d9700ca6898b7249874d686c62d1b55c Mon Sep 17 00:00:00 2001 From: Eden Date: Tue, 27 Jun 2023 19:06:02 +0000 Subject: [PATCH 33/86] removing extra logs --- postgres/datadog_checks/postgres/postgres.py | 31 ++++++-------------- postgres/tests/test_discovery.py | 27 +++++++++++++---- 2 files changed, 31 insertions(+), 27 deletions(-) diff --git a/postgres/datadog_checks/postgres/postgres.py b/postgres/datadog_checks/postgres/postgres.py index ac6cd8f11fd38..f08b132d5d5b5 100644 --- a/postgres/datadog_checks/postgres/postgres.py +++ b/postgres/datadog_checks/postgres/postgres.py @@ -109,13 +109,17 @@ def __init__(self, name, init_config, instances): self._dynamic_queries = None def _build_autodiscovery(self): - self.log.warning("config looks like {}".format(self._config.discovery_config)) if not is_affirmative(self._config.discovery_config): return None if not self._config.relations: self.log.warning("Database autodiscovery is enabled, but relation-level metrics are not being collected." - "All metrics will be gathered from global view.") + "All metrics will be gathered from global view, and autodiscovery will not run.") + return None + + if is_affirmative(self._config.discovery_config) and self._config.dbname != "postgres": + self.log.warning("Database autodiscovery is enabled, but a database name to monitor was specified in `dbname`." + "The check will default to monitor {}, and autodiscovery will not run.".format(self._config.dbname)) return None discovery = PostgresAutodiscovery('postgres', self._config.discovery_config, self.log, self.autodiscovery_db_pool, self._config.idle_connection_timeout) @@ -414,7 +418,6 @@ def _run_query_scope(self, cursor, scope, is_custom_metrics, cols, descriptors): def _query_scope(self, cursor, scope, instance_tags, is_custom_metrics, dbname = None): if scope is None: return None - # self.log.warning("Collecting metrics with these tags: {}".format(instance_tags)) # build query cols = list(scope['metrics']) # list of metrics to query, in some order # we must remember that order to parse results @@ -422,16 +425,9 @@ def _query_scope(self, cursor, scope, instance_tags, is_custom_metrics, dbname = # A descriptor is the association of a Postgres column name (e.g. 'schemaname') # to a tag name (e.g. 'schema'). descriptors = scope['descriptors'] - self.log.warning("test print statement. {}, {}, {}".format(self._config.relations, self._config.dbname, self.autodiscovery)) - if dbname and self._config.relations: - self.log.warning("Autodiscovery enabled; trying to get relations for {}".format(dbname)) - self.log.warning("Relations is {}".format(self._config.relations)) - self.log.warning("dbname is set? {}".format(self._config.dbname)) results = self._run_query_scope(cursor, scope, is_custom_metrics, cols, descriptors) if not results: - # self.log.warning("none") return None - self.log.warning(results) # Parse and submit results. @@ -466,11 +462,10 @@ def _query_scope(self, cursor, scope, instance_tags, is_custom_metrics, dbname = if not scope['relation'] and not scope.get('use_global_db_tag', False): tags = copy.copy(self.tags_without_db) elif dbname is not None: - # self.log.warning("Collecting metrics {} from this db: {}".format(scope, dbname)) - # if dbname is specified in this function, we are querying relation-level metrics from autodiscovered database + # if dbname is specified in this function, we are querying an autodiscovered database + # and we need to tag it tags = copy.copy(self.tags_without_db) tags.append("db:{}".format(dbname)) - # self.log.warning("current tags {}".format(tags)) else: tags = copy.copy(instance_tags) @@ -489,19 +484,12 @@ def _query_scope(self, cursor, scope, instance_tags, is_custom_metrics, dbname = def _collect_relations_autodiscovery(self, instance_tags, relations_scopes): if not self.autodiscovery: return - # self.log.warning("Entering autodiscovery") + databases = self.autodiscovery.get_items() - self.log.warning("Found these databases {}".format(databases)) for db in databases: with self.autodiscovery_db_pool.get_connection(db, self._config.idle_connection_timeout) as conn: cursor = conn.cursor() - # show tables - cursor.execute("SELECT * FROM pg_catalog.pg_tables;") - results = cursor.fetchall() - # self.log.warning("tables {}".format(results)) - for scope in relations_scopes: - # self.log.warning("Relation scope is {}".format(scope)) self._query_scope(cursor, scope, instance_tags, False, db) def _collect_stats(self, instance_tags): @@ -532,7 +520,6 @@ def _collect_stats(self, instance_tags): # If autodiscovery is enabled, get relation metrics from all databases found if self.autodiscovery: - self.log.warning("Entering discovery") self._collect_relations_autodiscovery(instance_tags, relations_scopes) # otherwise, continue just with dbname else: diff --git a/postgres/tests/test_discovery.py b/postgres/tests/test_discovery.py index e1ad338d05baa..803e030f3c4be 100644 --- a/postgres/tests/test_discovery.py +++ b/postgres/tests/test_discovery.py @@ -9,6 +9,7 @@ from .utils import run_one_check from .common import HOST, USER_ADMIN, PASSWORD_ADMIN, _get_expected_tags +from .conftest import INSTANCE from datadog_checks.postgres import PostgreSql from datadog_checks.postgres.connections import MultiDatabaseConnectionPool @@ -48,6 +49,7 @@ def test_autodiscovery_simple(integration_check, pg_instance): """ pg_instance["database_autodiscovery"] = DISCOVERY_CONFIG pg_instance['relations'] = ['pg_index'] + del pg_instance['dbname'] check = integration_check(pg_instance) run_one_check(check, pg_instance) @@ -65,6 +67,8 @@ def test_autodiscovery_max_databases(integration_check, pg_instance): pg_instance["database_autodiscovery"] = copy.deepcopy(DISCOVERY_CONFIG) pg_instance['database_autodiscovery']['max_databases'] = 20 pg_instance['relations'] = ['pg_index'] + del pg_instance['dbname'] + check = integration_check(pg_instance) run_one_check(check, pg_instance) @@ -90,6 +94,7 @@ def get_postgres_connection(): pg_instance["database_autodiscovery"] = copy.deepcopy(DISCOVERY_CONFIG) pg_instance['database_autodiscovery']['include'].append(database_to_find) pg_instance['relations'] = ['pg_index'] + del pg_instance['dbname'] pg_instance["database_autodiscovery"]['refresh'] = 1 check = integration_check(pg_instance) run_one_check(check, pg_instance) @@ -118,6 +123,7 @@ def test_autodiscovery_relations_disabled(integration_check, pg_instance): """ pg_instance["database_autodiscovery"] = DISCOVERY_CONFIG pg_instance['relations'] = [] + del pg_instance['dbname'] check = integration_check(pg_instance) run_one_check(check, pg_instance) @@ -128,7 +134,7 @@ def test_autodiscovery_relations_disabled(integration_check, pg_instance): @pytest.mark.usefixtures('dd_environment') def test_autodiscovery_collect_all_relations(aggregator, integration_check, pg_instance): """ - If no relation metrics are being collected, autodiscovery should not run. + Check that relation metrics get collected for each database discovered. """ pg_instance["database_autodiscovery"] = DISCOVERY_CONFIG pg_instance['relations'] = [ @@ -142,10 +148,21 @@ def test_autodiscovery_collect_all_relations(aggregator, integration_check, pg_i # assert that for all databases found, a relation metric was reported databases = check.autodiscovery.get_items() for db in databases: - # print(RELATION_METRICS) expected_tags = _get_expected_tags(check, pg_instance, db=db, table='breed', schema='public') for metric in RELATION_METRICS: aggregator.assert_metric(metric, tags=expected_tags) - # print("yay {}".format(metric)) - - assert None is not None \ No newline at end of file + +@pytest.mark.integration +@pytest.mark.usefixtures('dd_environment') +def test_autodiscovery_dbname_specified(integration_check, pg_instance): + """ + If a dbname is specified in the config, autodiscovery should not run. + """ + pg_instance["database_autodiscovery"] = DISCOVERY_CONFIG + pg_instance['relations'] = ['breed'] + pg_instance['dbname'] = "dogs_30" + check = integration_check(pg_instance) + run_one_check(check, pg_instance) + + assert check.autodiscovery is None + \ No newline at end of file From ab65c4828abcc491f35dd644c6db8e9ff0aefdc4 Mon Sep 17 00:00:00 2001 From: Eden Date: Tue, 27 Jun 2023 19:18:32 +0000 Subject: [PATCH 34/86] assert config validity --- postgres/datadog_checks/postgres/config.py | 7 +++++-- postgres/datadog_checks/postgres/postgres.py | 5 ----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/postgres/datadog_checks/postgres/config.py b/postgres/datadog_checks/postgres/config.py index 715a0dcac9f18..26ef92738c527 100644 --- a/postgres/datadog_checks/postgres/config.py +++ b/postgres/datadog_checks/postgres/config.py @@ -39,8 +39,11 @@ def __init__(self, instance): self.dbstrict = is_affirmative(instance.get('dbstrict', False)) self.disable_generic_tags = is_affirmative(instance.get('disable_generic_tags', False)) if instance else False - self.discovery_config = instance.get('database_autodiscovery', {}) - + self.discovery_config = instance.get('database_autodiscovery', {}) + if self.discovery_config and self.dbname != 'postgres': + raise ConfigurationError("'dbname' parameter should not be set when `database_autodiscovery` is enabled." + "To monitor more databases, add them to the `database_autodiscovery` includelist.") + self.application_name = instance.get('application_name', 'datadog-agent') if not self.isascii(self.application_name): raise ConfigurationError("Application name can include only ASCII characters: %s", self.application_name) diff --git a/postgres/datadog_checks/postgres/postgres.py b/postgres/datadog_checks/postgres/postgres.py index f08b132d5d5b5..874e204a8d67e 100644 --- a/postgres/datadog_checks/postgres/postgres.py +++ b/postgres/datadog_checks/postgres/postgres.py @@ -117,11 +117,6 @@ def _build_autodiscovery(self): "All metrics will be gathered from global view, and autodiscovery will not run.") return None - if is_affirmative(self._config.discovery_config) and self._config.dbname != "postgres": - self.log.warning("Database autodiscovery is enabled, but a database name to monitor was specified in `dbname`." - "The check will default to monitor {}, and autodiscovery will not run.".format(self._config.dbname)) - return None - discovery = PostgresAutodiscovery('postgres', self._config.discovery_config, self.log, self.autodiscovery_db_pool, self._config.idle_connection_timeout) return discovery From a7c0c92c5421f0943595f0d8ab19f18ce8265040 Mon Sep 17 00:00:00 2001 From: Eden Date: Tue, 27 Jun 2023 19:46:23 +0000 Subject: [PATCH 35/86] styling --- postgres/datadog_checks/postgres/config.py | 16 ++++--- postgres/datadog_checks/postgres/discovery.py | 45 +++++++++++++------ postgres/datadog_checks/postgres/postgres.py | 35 +++++++++------ postgres/tests/test_discovery.py | 32 ++++++------- 4 files changed, 79 insertions(+), 49 deletions(-) diff --git a/postgres/datadog_checks/postgres/config.py b/postgres/datadog_checks/postgres/config.py index 26ef92738c527..30fa540fb5902 100644 --- a/postgres/datadog_checks/postgres/config.py +++ b/postgres/datadog_checks/postgres/config.py @@ -38,12 +38,14 @@ def __init__(self, instance): self.reported_hostname = instance.get('reported_hostname', '') self.dbstrict = is_affirmative(instance.get('dbstrict', False)) self.disable_generic_tags = is_affirmative(instance.get('disable_generic_tags', False)) if instance else False - - self.discovery_config = instance.get('database_autodiscovery', {}) + + self.discovery_config = instance.get('database_autodiscovery', {}) if self.discovery_config and self.dbname != 'postgres': - raise ConfigurationError("'dbname' parameter should not be set when `database_autodiscovery` is enabled." - "To monitor more databases, add them to the `database_autodiscovery` includelist.") - + raise ConfigurationError( + "'dbname' parameter should not be set when `database_autodiscovery` is enabled." + "To monitor more databases, add them to the `database_autodiscovery` includelist." + ) + self.application_name = instance.get('application_name', 'datadog-agent') if not self.isascii(self.application_name): raise ConfigurationError("Application name can include only ASCII characters: %s", self.application_name) @@ -52,7 +54,9 @@ def __init__(self, instance): self.idle_connection_timeout = instance.get('idle_connection_timeout', 60000) self.relations = instance.get('relations', []) if self.relations and not (self.dbname or self.discovery_config): - raise ConfigurationError('"dbname" parameter must be set OR autodiscovery must be enabled when using the "relations" parameter.') + raise ConfigurationError( + '"dbname" parameter must be set OR autodiscovery must be enabled when using the "relations" parameter.' + ) self.tags = self._build_tags(instance.get('tags', [])) diff --git a/postgres/datadog_checks/postgres/discovery.py b/postgres/datadog_checks/postgres/discovery.py index f816365d98177..a9ee4e70ef1ec 100644 --- a/postgres/datadog_checks/postgres/discovery.py +++ b/postgres/datadog_checks/postgres/discovery.py @@ -1,15 +1,29 @@ -from typing import Dict, List, Callable +import logging +from typing import Dict, List + from datadog_checks.base.utils.discovery import Discovery from datadog_checks.postgres.connections import MultiDatabaseConnectionPool -import logging AUTODISCOVERY_QUERY: str = """select {columns} from pg_catalog.pg_database where datistemplate = false;""" -class PostgresAutodiscovery(Discovery): - def __init__(self, global_view_db: str, autodiscovery_config: Dict, log: logging.Logger, conn_pool: MultiDatabaseConnectionPool, default_ttl: int) -> None: + +class PostgresAutodiscovery(Discovery): + def __init__( + self, + global_view_db: str, + autodiscovery_config: Dict, + log: logging.Logger, + conn_pool: MultiDatabaseConnectionPool, + default_ttl: int, + ) -> None: # parent class asks for includelist to be a dictionary parsed_include = self._parse_includelist(autodiscovery_config.get("include", [".*"])) - super(PostgresAutodiscovery, self).__init__(self._get_databases, include=parsed_include, exclude=autodiscovery_config.get("exclude", []), interval=autodiscovery_config.get("refresh", 3600)) + super(PostgresAutodiscovery, self).__init__( + self._get_databases, + include=parsed_include, + exclude=autodiscovery_config.get("exclude", []), + interval=autodiscovery_config.get("refresh", 3600), + ) self._log = log self._db = global_view_db self._conn_pool = conn_pool @@ -17,11 +31,14 @@ def __init__(self, global_view_db: str, autodiscovery_config: Dict, log: logging self._max_databases = autodiscovery_config.get("max_databases", 100) def _parse_includelist(self, include: List[str]) -> Dict[str, int]: + """ + Convert includelist to a dictionary so the parent class can process it. + """ ret = {} for item in include: ret[item] = 0 return ret - + def get_items(self) -> List[str]: """ Get_items() from parent class returns a generator with four objects: @@ -32,21 +49,23 @@ def get_items(self) -> List[str]: items = list(super().get_items()) items_parsed = [item[1] for item in items] if len(items_parsed) > self._max_databases: - items_parsed = items_parsed[:self._max_databases] - self._log.warning("Autodiscovery found more than {} databases, which was specified as a limit. Truncating list and running checks only on \ - the following databases: {}".format(self._max_databases, items_parsed)) + items_parsed = items_parsed[: self._max_databases] + self._log.warning( + "Autodiscovery found more than {} databases, which was specified as a limit. Truncating list" + "and running checks only on the following databases: {}".format(self._max_databases, items_parsed) + ) return items_parsed - + def _get_autodiscovery_query(self) -> str: autodiscovery_query = AUTODISCOVERY_QUERY.format(columns=', '.join(['datname'])) return autodiscovery_query - + def _get_databases(self) -> List[str]: with self._conn_pool.get_connection(self._db, self._default_ttl) as conn: cursor = conn.cursor() autodiscovery_query = self._get_autodiscovery_query() cursor.execute(autodiscovery_query) databases = list(cursor.fetchall()) - databases = [x[0] for x in databases] # fetchall returns list of tuples representing rows, so need to parse + databases = [x[0] for x in databases] # fetchall returns list of tuples representing rows, so need to parse self._log.info("Databases found were: {}".format(databases)) - return databases + return databases diff --git a/postgres/datadog_checks/postgres/postgres.py b/postgres/datadog_checks/postgres/postgres.py index 874e204a8d67e..8f5973a9d5aeb 100644 --- a/postgres/datadog_checks/postgres/postgres.py +++ b/postgres/datadog_checks/postgres/postgres.py @@ -5,7 +5,7 @@ import os import threading from contextlib import closing -from time import time, sleep +from time import time import psycopg2 from six import iteritems @@ -16,10 +16,9 @@ from datadog_checks.base.utils.db.utils import resolve_db_host as agent_host_resolver from datadog_checks.postgres import aws from datadog_checks.postgres.connections import MultiDatabaseConnectionPool +from datadog_checks.postgres.discovery import PostgresAutodiscovery from datadog_checks.postgres.metadata import PostgresMetadata from datadog_checks.postgres.metrics_cache import PostgresMetricsCache -from datadog_checks.postgres.discovery import PostgresAutodiscovery -from datadog_checks.postgres.relationsmanager import INDEX_BLOAT, RELATION_METRICS, TABLE_BLOAT, RelationsManager from datadog_checks.postgres.relationsmanager import ( DYNAMIC_RELATION_QUERIES, INDEX_BLOAT, @@ -111,13 +110,21 @@ def __init__(self, name, init_config, instances): def _build_autodiscovery(self): if not is_affirmative(self._config.discovery_config): return None - + if not self._config.relations: - self.log.warning("Database autodiscovery is enabled, but relation-level metrics are not being collected." - "All metrics will be gathered from global view, and autodiscovery will not run.") + self.log.warning( + "Database autodiscovery is enabled, but relation-level metrics are not being collected." + "All metrics will be gathered from global view, and autodiscovery will not run." + ) return None - - discovery = PostgresAutodiscovery('postgres', self._config.discovery_config, self.log, self.autodiscovery_db_pool, self._config.idle_connection_timeout) + + discovery = PostgresAutodiscovery( + 'postgres', + self._config.discovery_config, + self.log, + self.autodiscovery_db_pool, + self._config.idle_connection_timeout, + ) return discovery def set_resource_tags(self): @@ -410,7 +417,7 @@ def _run_query_scope(self, cursor, scope, is_custom_metrics, cols, descriptors): return results - def _query_scope(self, cursor, scope, instance_tags, is_custom_metrics, dbname = None): + def _query_scope(self, cursor, scope, instance_tags, is_custom_metrics, dbname=None): if scope is None: return None # build query @@ -475,11 +482,11 @@ def _query_scope(self, cursor, scope, instance_tags, is_custom_metrics, dbname = num_results += 1 return num_results - + def _collect_relations_autodiscovery(self, instance_tags, relations_scopes): if not self.autodiscovery: return - + databases = self.autodiscovery.get_items() for db in databases: with self.autodiscovery_db_pool.get_connection(db, self._config.idle_connection_timeout) as conn: @@ -512,12 +519,12 @@ def _collect_stats(self, instance_tags): if self._config.collect_bloat_metrics: relations_scopes.extend([INDEX_BLOAT, TABLE_BLOAT]) - + # If autodiscovery is enabled, get relation metrics from all databases found if self.autodiscovery: self._collect_relations_autodiscovery(instance_tags, relations_scopes) # otherwise, continue just with dbname - else: + else: metric_scope.extend(relations_scopes) # self.log.warning(metric_scope) @@ -535,7 +542,7 @@ def _collect_stats(self, instance_tags): # show tables # cursor.execute("SELECT * FROM pg_catalog.pg_tables;") - # results = cursor.fetchall() + # results = cursor.fetchall() # self.log.warning("tables {}".format(results)) results_len = self._query_scope(cursor, db_instance_metrics, instance_tags, False) if results_len is not None: diff --git a/postgres/tests/test_discovery.py b/postgres/tests/test_discovery.py index 803e030f3c4be..9969f0d8ed67e 100644 --- a/postgres/tests/test_discovery.py +++ b/postgres/tests/test_discovery.py @@ -2,30 +2,25 @@ # All rights reserved # Licensed under Simplified BSD License (see LICENSE) -from contextlib import contextmanager import copy -import select import time - -from .utils import run_one_check -from .common import HOST, USER_ADMIN, PASSWORD_ADMIN, _get_expected_tags -from .conftest import INSTANCE -from datadog_checks.postgres import PostgreSql -from datadog_checks.postgres.connections import MultiDatabaseConnectionPool - +from contextlib import contextmanager import psycopg2 import psycopg2.sql import pytest +from .common import HOST, PASSWORD_ADMIN, USER_ADMIN, _get_expected_tags +from .utils import run_one_check + DISCOVERY_CONFIG = { "enabled": True, "include": ["dogs_([1-9]|[1-9][0-9]|10[0-9])"], - "exclude":["dogs_5$", "dogs_50$"], + "exclude": ["dogs_5$", "dogs_50$"], } RELATION_METRICS = { - 'postgresql.seq_scans', + 'postgresql.seq_scans', 'postgresql.seq_rows_read', 'postgresql.rows_inserted', 'postgresql.rows_updated', @@ -41,6 +36,7 @@ 'postgresql.autoanalyzed', } + @pytest.mark.integration @pytest.mark.usefixtures('dd_environment') def test_autodiscovery_simple(integration_check, pg_instance): @@ -55,9 +51,10 @@ def test_autodiscovery_simple(integration_check, pg_instance): assert check.autodiscovery is not None databases = check.autodiscovery.get_items() - expected_len = (100-len(DISCOVERY_CONFIG["exclude"])) + expected_len = 100 - len(DISCOVERY_CONFIG["exclude"]) assert len(databases) == expected_len + @pytest.mark.integration @pytest.mark.usefixtures('dd_environment') def test_autodiscovery_max_databases(integration_check, pg_instance): @@ -84,6 +81,7 @@ def test_autodiscovery_refresh(integration_check, pg_instance): Test cache refresh by adding a database in the middle of a check. """ database_to_find = "cats" + @contextmanager def get_postgres_connection(): conn_args = {'host': HOST, 'dbname': "postgres", 'user': USER_ADMIN, 'password': PASSWORD_ADMIN} @@ -101,7 +99,7 @@ def get_postgres_connection(): assert check.autodiscovery is not None databases = check.autodiscovery.get_items() - expected_len = (100-len(DISCOVERY_CONFIG["exclude"])) + expected_len = 100 - len(DISCOVERY_CONFIG["exclude"]) assert len(databases) == expected_len with get_postgres_connection() as conn: @@ -110,9 +108,11 @@ def get_postgres_connection(): time.sleep(pg_instance["database_autodiscovery"]['refresh']) databases = check.autodiscovery.get_items() - assert len(databases) == expected_len+1 + assert len(databases) == expected_len + 1 # Need to drop the new database to clean up the environment for next tests. - cursor.execute(psycopg2.sql.SQL("DROP DATABASE {} WITH (FORCE);").format(psycopg2.sql.Identifier(database_to_find))) + cursor.execute( + psycopg2.sql.SQL("DROP DATABASE {} WITH (FORCE);").format(psycopg2.sql.Identifier(database_to_find)) + ) @pytest.mark.integration @@ -152,6 +152,7 @@ def test_autodiscovery_collect_all_relations(aggregator, integration_check, pg_i for metric in RELATION_METRICS: aggregator.assert_metric(metric, tags=expected_tags) + @pytest.mark.integration @pytest.mark.usefixtures('dd_environment') def test_autodiscovery_dbname_specified(integration_check, pg_instance): @@ -165,4 +166,3 @@ def test_autodiscovery_dbname_specified(integration_check, pg_instance): run_one_check(check, pg_instance) assert check.autodiscovery is None - \ No newline at end of file From 8243c6e833e92c8cb4ac8e88937ffb41f93c40f5 Mon Sep 17 00:00:00 2001 From: Eden Date: Tue, 27 Jun 2023 20:05:44 +0000 Subject: [PATCH 36/86] test change --- postgres/datadog_checks/postgres/postgres.py | 7 ------- postgres/tests/test_discovery.py | 7 ++++--- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/postgres/datadog_checks/postgres/postgres.py b/postgres/datadog_checks/postgres/postgres.py index 8f5973a9d5aeb..b0f769f94a2a0 100644 --- a/postgres/datadog_checks/postgres/postgres.py +++ b/postgres/datadog_checks/postgres/postgres.py @@ -364,7 +364,6 @@ def _run_query_scope(self, cursor, scope, is_custom_metrics, cols, descriptors): results = None is_relations = scope.get('relation') and self._relations_manager.has_relations - # self.log.warning("DOES THIS HAVE REALTIONS? {}".format(is_relations)) try: query = fmt.format(scope['query'], metrics_columns=", ".join(cols)) # if this is a relation-specific query, we need to list all relations last @@ -527,7 +526,6 @@ def _collect_stats(self, instance_tags): else: metric_scope.extend(relations_scopes) - # self.log.warning(metric_scope) replication_metrics = self.metrics_cache.get_replication_metrics(self.version, self.is_aurora) if replication_metrics: replication_metrics_query = copy.deepcopy(REPLICATION_METRICS) @@ -539,11 +537,6 @@ def _collect_stats(self, instance_tags): metric_scope.append(replication_stats_metrics) cursor = self.db.cursor() - - # show tables - # cursor.execute("SELECT * FROM pg_catalog.pg_tables;") - # results = cursor.fetchall() - # self.log.warning("tables {}".format(results)) results_len = self._query_scope(cursor, db_instance_metrics, instance_tags, False) if results_len is not None: self.gauge( diff --git a/postgres/tests/test_discovery.py b/postgres/tests/test_discovery.py index 9969f0d8ed67e..9a1718802474f 100644 --- a/postgres/tests/test_discovery.py +++ b/postgres/tests/test_discovery.py @@ -10,6 +10,8 @@ import psycopg2.sql import pytest + +from datadog_checks.base import ConfigurationError from .common import HOST, PASSWORD_ADMIN, USER_ADMIN, _get_expected_tags from .utils import run_one_check @@ -162,7 +164,6 @@ def test_autodiscovery_dbname_specified(integration_check, pg_instance): pg_instance["database_autodiscovery"] = DISCOVERY_CONFIG pg_instance['relations'] = ['breed'] pg_instance['dbname'] = "dogs_30" - check = integration_check(pg_instance) - run_one_check(check, pg_instance) - assert check.autodiscovery is None + with pytest.raises(ConfigurationError): + integration_check(pg_instance) \ No newline at end of file From b7f5ebc736508fd88d910636811c640dd0e941e7 Mon Sep 17 00:00:00 2001 From: Eden Date: Tue, 27 Jun 2023 20:19:20 +0000 Subject: [PATCH 37/86] license headers --- postgres/datadog_checks/postgres/discovery.py | 4 ++++ postgres/tests/test_discovery.py | 6 +++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/postgres/datadog_checks/postgres/discovery.py b/postgres/datadog_checks/postgres/discovery.py index a9ee4e70ef1ec..54167bc5a236a 100644 --- a/postgres/datadog_checks/postgres/discovery.py +++ b/postgres/datadog_checks/postgres/discovery.py @@ -1,3 +1,7 @@ +# (C) Datadog, Inc. 2023-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + import logging from typing import Dict, List diff --git a/postgres/tests/test_discovery.py b/postgres/tests/test_discovery.py index 9a1718802474f..a5fc08725c233 100644 --- a/postgres/tests/test_discovery.py +++ b/postgres/tests/test_discovery.py @@ -1,6 +1,6 @@ # (C) Datadog, Inc. 2023-present # All rights reserved -# Licensed under Simplified BSD License (see LICENSE) +# Licensed under a 3-clause BSD style license (see LICENSE) import copy import time @@ -10,8 +10,8 @@ import psycopg2.sql import pytest - from datadog_checks.base import ConfigurationError + from .common import HOST, PASSWORD_ADMIN, USER_ADMIN, _get_expected_tags from .utils import run_one_check @@ -166,4 +166,4 @@ def test_autodiscovery_dbname_specified(integration_check, pg_instance): pg_instance['dbname'] = "dogs_30" with pytest.raises(ConfigurationError): - integration_check(pg_instance) \ No newline at end of file + integration_check(pg_instance) From 26d7ff27f2d3b3a1f9cc670600e41abeca5feac9 Mon Sep 17 00:00:00 2001 From: Eden Date: Tue, 27 Jun 2023 20:34:18 +0000 Subject: [PATCH 38/86] syncing files --- .../postgres/config_models/defaults.py | 4 ++++ .../postgres/config_models/instance.py | 12 ++++++++++++ 2 files changed, 16 insertions(+) diff --git a/postgres/datadog_checks/postgres/config_models/defaults.py b/postgres/datadog_checks/postgres/config_models/defaults.py index 5f7127ca758f9..ff0311a5130dc 100644 --- a/postgres/datadog_checks/postgres/config_models/defaults.py +++ b/postgres/datadog_checks/postgres/config_models/defaults.py @@ -70,6 +70,10 @@ def instance_data_directory(field, value): return '/usr/local/pgsql/data' +def instance_database_autodiscovery(field, value): + return get_default_field_value(field, value) + + def instance_dbm(field, value): return False diff --git a/postgres/datadog_checks/postgres/config_models/instance.py b/postgres/datadog_checks/postgres/config_models/instance.py index 9eb83de476756..05c2c1804c164 100644 --- a/postgres/datadog_checks/postgres/config_models/instance.py +++ b/postgres/datadog_checks/postgres/config_models/instance.py @@ -43,6 +43,17 @@ class Config: enabled: Optional[bool] +class DatabaseAutodiscovery(BaseModel): + class Config: + allow_mutation = False + + enabled: Optional[bool] + exclude: Optional[Sequence[str]] + include: Optional[Sequence[str]] + max_databases: Optional[int] + refresh: Optional[int] + + class Gcp(BaseModel): class Config: allow_mutation = False @@ -133,6 +144,7 @@ class Config: collect_wal_metrics: Optional[bool] custom_queries: Optional[Sequence[Mapping[str, Any]]] data_directory: Optional[str] + database_autodiscovery: Optional[DatabaseAutodiscovery] dbm: Optional[bool] dbname: Optional[str] dbstrict: Optional[bool] From 70ee7dac41cbcbe2db6aa069f50347d2c900ef86 Mon Sep 17 00:00:00 2001 From: Eden Date: Tue, 27 Jun 2023 21:22:34 +0000 Subject: [PATCH 39/86] fixing setup script --- postgres/tests/compose/resources/03_load_data.sh | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/postgres/tests/compose/resources/03_load_data.sh b/postgres/tests/compose/resources/03_load_data.sh index f0357492720ba..3c77952f65a45 100755 --- a/postgres/tests/compose/resources/03_load_data.sh +++ b/postgres/tests/compose/resources/03_load_data.sh @@ -32,13 +32,10 @@ EOSQL fi i=1 -dbs=() +dbs=(dogs dogs_noschema dogs_nofunc) for ((i=1; i<=100; i++)); do dbs+=(dogs_$i) done -dbs+=dogs -dbs+=dogs_noschema -dbs+=dogs_nofunc for DBNAME in ${dbs[@]}; do psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" "$DBNAME" <<-EOSQL From d4b9312aa61b05ebb570840d5db1be87256f8773 Mon Sep 17 00:00:00 2001 From: Eden Date: Tue, 27 Jun 2023 21:53:43 +0000 Subject: [PATCH 40/86] force only avaiallble in version >=13 --- postgres/tests/test_discovery.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/postgres/tests/test_discovery.py b/postgres/tests/test_discovery.py index a5fc08725c233..f09c29fdce30a 100644 --- a/postgres/tests/test_discovery.py +++ b/postgres/tests/test_discovery.py @@ -13,7 +13,7 @@ from datadog_checks.base import ConfigurationError from .common import HOST, PASSWORD_ADMIN, USER_ADMIN, _get_expected_tags -from .utils import run_one_check +from .utils import run_one_check, requires_over_13 DISCOVERY_CONFIG = { "enabled": True, @@ -78,6 +78,7 @@ def test_autodiscovery_max_databases(integration_check, pg_instance): @pytest.mark.integration @pytest.mark.usefixtures('dd_environment') +@requires_over_13 def test_autodiscovery_refresh(integration_check, pg_instance): """ Test cache refresh by adding a database in the middle of a check. From 813f24aa86a30bce6e5fe70ee3c772f190a33e48 Mon Sep 17 00:00:00 2001 From: Eden Date: Tue, 27 Jun 2023 21:58:55 +0000 Subject: [PATCH 41/86] requires over 13 line --- postgres/tests/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/postgres/tests/utils.py b/postgres/tests/utils.py index 7396db3227e7e..b6152a2ffe481 100644 --- a/postgres/tests/utils.py +++ b/postgres/tests/utils.py @@ -16,6 +16,10 @@ POSTGRES_VERSION is None or float(POSTGRES_VERSION) < 11, reason='This test is for over 11 only (make sure POSTGRES_VERSION is set)', ) +requires_over_13 = pytest.mark.skipif( + POSTGRES_VERSION is None or float(POSTGRES_VERSION) < 13, + reason='This test is for over 13 only (make sure POSTGRES_VERSION is set)', +) requires_over_14 = pytest.mark.skipif( POSTGRES_VERSION is None or float(POSTGRES_VERSION) < 14, reason='This test is for over 14 only (make sure POSTGRES_VERSION is set)', From 1e6c6b3bcb0025d94bea92a709cb8aea2d532cf6 Mon Sep 17 00:00:00 2001 From: Eden Date: Tue, 27 Jun 2023 22:21:01 +0000 Subject: [PATCH 42/86] changed my mind about altering the tables at setup :,) --- postgres/tests/test_discovery.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/postgres/tests/test_discovery.py b/postgres/tests/test_discovery.py index f09c29fdce30a..391abe675d1a4 100644 --- a/postgres/tests/test_discovery.py +++ b/postgres/tests/test_discovery.py @@ -139,7 +139,8 @@ def test_autodiscovery_collect_all_relations(aggregator, integration_check, pg_i """ Check that relation metrics get collected for each database discovered. """ - pg_instance["database_autodiscovery"] = DISCOVERY_CONFIG + pg_instance["database_autodiscovery"] = copy.deepcopy(DISCOVERY_CONFIG) + pg_instance["database_autodiscovery"]["include"] = ["dogs$", "dogs_noschema$", "dogs_nofunc$"] pg_instance['relations'] = [ {'relation_regex': '.*'}, ] From 4786c58c118d143ac8693461d1f5fbc6666df6a6 Mon Sep 17 00:00:00 2001 From: Eden Date: Tue, 27 Jun 2023 22:24:22 +0000 Subject: [PATCH 43/86] commit didnt update?: --- postgres/tests/compose/resources/03_load_data.sh | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/postgres/tests/compose/resources/03_load_data.sh b/postgres/tests/compose/resources/03_load_data.sh index 3c77952f65a45..88642a04f718e 100755 --- a/postgres/tests/compose/resources/03_load_data.sh +++ b/postgres/tests/compose/resources/03_load_data.sh @@ -31,12 +31,7 @@ psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" datadog_test <<-EOSQL EOSQL fi -i=1 -dbs=(dogs dogs_noschema dogs_nofunc) -for ((i=1; i<=100; i++)); do - dbs+=(dogs_$i) -done -for DBNAME in ${dbs[@]}; do +for DBNAME in dogs dogs_noschema dogs_nofunc; do psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" "$DBNAME" <<-EOSQL CREATE TABLE breed (id SERIAL, name VARCHAR(255)); @@ -47,4 +42,4 @@ psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" "$DBNAME" <<-EOSQL INSERT INTO breed (name) VALUES ('Labrador Retriver'), ('German Shepherd'), ('Yorkshire Terrier'), ('Golden Retriever'), ('Bulldog'); SELECT * FROM breed WHERE name = 'Labrador'; EOSQL -done +done \ No newline at end of file From 375c146f357af3b4a60ddfc29ca4e0d1790b623b Mon Sep 17 00:00:00 2001 From: Eden Date: Tue, 27 Jun 2023 22:24:47 +0000 Subject: [PATCH 44/86] accdientally removed space --- postgres/tests/compose/resources/03_load_data.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/postgres/tests/compose/resources/03_load_data.sh b/postgres/tests/compose/resources/03_load_data.sh index 88642a04f718e..d74d85f37cd8c 100755 --- a/postgres/tests/compose/resources/03_load_data.sh +++ b/postgres/tests/compose/resources/03_load_data.sh @@ -42,4 +42,5 @@ psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" "$DBNAME" <<-EOSQL INSERT INTO breed (name) VALUES ('Labrador Retriver'), ('German Shepherd'), ('Yorkshire Terrier'), ('Golden Retriever'), ('Bulldog'); SELECT * FROM breed WHERE name = 'Labrador'; EOSQL + done \ No newline at end of file From fa40504044fbcf936e1d7d0c374e2fce78d2838f Mon Sep 17 00:00:00 2001 From: Eden Date: Tue, 27 Jun 2023 23:01:10 +0000 Subject: [PATCH 45/86] style --- postgres/tests/test_discovery.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/postgres/tests/test_discovery.py b/postgres/tests/test_discovery.py index 391abe675d1a4..a7f471cc4adb6 100644 --- a/postgres/tests/test_discovery.py +++ b/postgres/tests/test_discovery.py @@ -13,7 +13,7 @@ from datadog_checks.base import ConfigurationError from .common import HOST, PASSWORD_ADMIN, USER_ADMIN, _get_expected_tags -from .utils import run_one_check, requires_over_13 +from .utils import requires_over_13, run_one_check DISCOVERY_CONFIG = { "enabled": True, From d3edeca9af233ad2bb80565cb9e628515bd457a8 Mon Sep 17 00:00:00 2001 From: Eden Date: Thu, 29 Jun 2023 13:44:57 +0000 Subject: [PATCH 46/86] cleaning up code, adding better logs, changing refresh default --- postgres/assets/configuration/spec.yaml | 6 +- postgres/datadog_checks/postgres/config.py | 6 +- postgres/datadog_checks/postgres/discovery.py | 81 ++++++++++--------- postgres/datadog_checks/postgres/postgres.py | 9 +-- postgres/tests/test_discovery.py | 29 ++++--- 5 files changed, 69 insertions(+), 62 deletions(-) diff --git a/postgres/assets/configuration/spec.yaml b/postgres/assets/configuration/spec.yaml index 2538f095bdf6c..6f26fe7092f31 100644 --- a/postgres/assets/configuration/spec.yaml +++ b/postgres/assets/configuration/spec.yaml @@ -351,11 +351,11 @@ files: - "model" - "msdb" - name: refresh - description: Frequency in seconds of scans for new databases. Defaults to `3600`. + description: Frequency in seconds of scans for new databases. Defaults to 10 minutes. value: type: integer - example: 3600 - display_default: 3600 + example: 600 + display_default: 600 - name: application_name description: | The application_name can be any string of less than NAMEDATALEN characters (64 characters in a standard build). diff --git a/postgres/datadog_checks/postgres/config.py b/postgres/datadog_checks/postgres/config.py index 30fa540fb5902..57eec971df07d 100644 --- a/postgres/datadog_checks/postgres/config.py +++ b/postgres/datadog_checks/postgres/config.py @@ -39,8 +39,8 @@ def __init__(self, instance): self.dbstrict = is_affirmative(instance.get('dbstrict', False)) self.disable_generic_tags = is_affirmative(instance.get('disable_generic_tags', False)) if instance else False - self.discovery_config = instance.get('database_autodiscovery', {}) - if self.discovery_config and self.dbname != 'postgres': + self.discovery_config = instance.get('database_autodiscovery', {"enabled": False}) + if self.discovery_config['enabled'] and self.dbname != 'postgres': raise ConfigurationError( "'dbname' parameter should not be set when `database_autodiscovery` is enabled." "To monitor more databases, add them to the `database_autodiscovery` includelist." @@ -53,7 +53,7 @@ def __init__(self, instance): self.query_timeout = int(instance.get('query_timeout', 5000)) self.idle_connection_timeout = instance.get('idle_connection_timeout', 60000) self.relations = instance.get('relations', []) - if self.relations and not (self.dbname or self.discovery_config): + if self.relations and not (self.dbname or self.discovery_config['enabled']): raise ConfigurationError( '"dbname" parameter must be set OR autodiscovery must be enabled when using the "relations" parameter.' ) diff --git a/postgres/datadog_checks/postgres/discovery.py b/postgres/datadog_checks/postgres/discovery.py index 54167bc5a236a..a2a8e04f5f371 100644 --- a/postgres/datadog_checks/postgres/discovery.py +++ b/postgres/datadog_checks/postgres/discovery.py @@ -2,46 +2,38 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) -import logging from typing import Dict, List from datadog_checks.base.utils.discovery import Discovery -from datadog_checks.postgres.connections import MultiDatabaseConnectionPool +from datadog_checks.postgres import PostgreSql +from datadog_checks.postgres.util import DatabaseConfigurationError, warning_with_tags -AUTODISCOVERY_QUERY: str = """select {columns} from pg_catalog.pg_database where datistemplate = false;""" +AUTODISCOVERY_QUERY: str = """select datname from pg_catalog.pg_database where datistemplate = false;""" +DEFAULT_MAX_DATABASES = 100 +DEFAULT_REFRESH = 600 class PostgresAutodiscovery(Discovery): def __init__( self, + check: PostgreSql, global_view_db: str, autodiscovery_config: Dict, - log: logging.Logger, - conn_pool: MultiDatabaseConnectionPool, default_ttl: int, ) -> None: - # parent class asks for includelist to be a dictionary - parsed_include = self._parse_includelist(autodiscovery_config.get("include", [".*"])) super(PostgresAutodiscovery, self).__init__( self._get_databases, - include=parsed_include, + # parent class asks for includelist to be a dictionary + include={db: 0 for db in autodiscovery_config.get("include", [".*"])}, exclude=autodiscovery_config.get("exclude", []), - interval=autodiscovery_config.get("refresh", 3600), + interval=autodiscovery_config.get("refresh", DEFAULT_REFRESH), + limit=autodiscovery_config.get("max_databases", DEFAULT_MAX_DATABASES), ) - self._log = log - self._db = global_view_db - self._conn_pool = conn_pool self._default_ttl = default_ttl - self._max_databases = autodiscovery_config.get("max_databases", 100) - - def _parse_includelist(self, include: List[str]) -> Dict[str, int]: - """ - Convert includelist to a dictionary so the parent class can process it. - """ - ret = {} - for item in include: - ret[item] = 0 - return ret + self._db = global_view_db + self._check = check + self._log = self._check.log + self._conn_pool = self._check.autodiscovery_db_pool def get_items(self) -> List[str]: """ @@ -52,24 +44,35 @@ def get_items(self) -> List[str]: """ items = list(super().get_items()) items_parsed = [item[1] for item in items] - if len(items_parsed) > self._max_databases: - items_parsed = items_parsed[: self._max_databases] - self._log.warning( - "Autodiscovery found more than {} databases, which was specified as a limit. Truncating list" - "and running checks only on the following databases: {}".format(self._max_databases, items_parsed) - ) return items_parsed - def _get_autodiscovery_query(self) -> str: - autodiscovery_query = AUTODISCOVERY_QUERY.format(columns=', '.join(['datname'])) - return autodiscovery_query - def _get_databases(self) -> List[str]: with self._conn_pool.get_connection(self._db, self._default_ttl) as conn: - cursor = conn.cursor() - autodiscovery_query = self._get_autodiscovery_query() - cursor.execute(autodiscovery_query) - databases = list(cursor.fetchall()) - databases = [x[0] for x in databases] # fetchall returns list of tuples representing rows, so need to parse - self._log.info("Databases found were: {}".format(databases)) - return databases + with conn.cursor() as cursor: + cursor.execute(AUTODISCOVERY_QUERY) + databases = list(cursor.fetchall()) + databases = [ + x[0] for x in databases + ] # fetchall returns list of tuples representing rows, so need to parse + self._log.debug("Autodiscovered databases were: {}".format(databases)) + return databases + + def __refresh(self): + prev_cached_items_len = len(self._cached_items) + super().__refresh() + # refresh updates _cached_items, so check if the last refresh + # added a database that put this instance over the limit. + # _cached_items stores databases before the limit filter is applied + if len(self._cached_items) != prev_cached_items_len and len(self._cached_items) > self._limit: + self._check.record_warning( + DatabaseConfigurationError.autodiscovered_databases_exceeds_limit, + warning_with_tags( + "Autodiscovery found %d databases, which was more than the specified limit of %d. " + "Increase `max_databases` in the `database_autodiscovery` block of the agent configuration" + "to see these extra databases." + "Truncating list and running checks only on the following databases: %s", + len(self._cached_items), + self._limit, + self.get_items(), + ), + ) diff --git a/postgres/datadog_checks/postgres/postgres.py b/postgres/datadog_checks/postgres/postgres.py index b0f769f94a2a0..0e63d8ca9d4e7 100644 --- a/postgres/datadog_checks/postgres/postgres.py +++ b/postgres/datadog_checks/postgres/postgres.py @@ -11,7 +11,6 @@ from six import iteritems from datadog_checks.base import AgentCheck -from datadog_checks.base.config import is_affirmative from datadog_checks.base.utils.db import QueryExecutor from datadog_checks.base.utils.db.utils import resolve_db_host as agent_host_resolver from datadog_checks.postgres import aws @@ -108,7 +107,7 @@ def __init__(self, name, init_config, instances): self._dynamic_queries = None def _build_autodiscovery(self): - if not is_affirmative(self._config.discovery_config): + if not self._config.discovery_config['enabled']: return None if not self._config.relations: @@ -489,9 +488,9 @@ def _collect_relations_autodiscovery(self, instance_tags, relations_scopes): databases = self.autodiscovery.get_items() for db in databases: with self.autodiscovery_db_pool.get_connection(db, self._config.idle_connection_timeout) as conn: - cursor = conn.cursor() - for scope in relations_scopes: - self._query_scope(cursor, scope, instance_tags, False, db) + with conn.cursor() as cursor: + for scope in relations_scopes: + self._query_scope(cursor, scope, instance_tags, False, db) def _collect_stats(self, instance_tags): """Query pg_stat_* for various metrics diff --git a/postgres/tests/test_discovery.py b/postgres/tests/test_discovery.py index a7f471cc4adb6..2c6e0feb8e90d 100644 --- a/postgres/tests/test_discovery.py +++ b/postgres/tests/test_discovery.py @@ -17,10 +17,13 @@ DISCOVERY_CONFIG = { "enabled": True, - "include": ["dogs_([1-9]|[1-9][0-9]|10[0-9])"], + "include": ["dogs_([0-9]|[1-9][0-9]|10[0-9])"], "exclude": ["dogs_5$", "dogs_50$"], } +# the number of test databases that exist from [dogs_0, dogs_100] +NUM_DOGS_DATABASES = 101 + RELATION_METRICS = { 'postgresql.seq_scans', 'postgresql.seq_rows_read', @@ -53,7 +56,7 @@ def test_autodiscovery_simple(integration_check, pg_instance): assert check.autodiscovery is not None databases = check.autodiscovery.get_items() - expected_len = 100 - len(DISCOVERY_CONFIG["exclude"]) + expected_len = NUM_DOGS_DATABASES - len(DISCOVERY_CONFIG["exclude"]) assert len(databases) == expected_len @@ -102,20 +105,22 @@ def get_postgres_connection(): assert check.autodiscovery is not None databases = check.autodiscovery.get_items() - expected_len = 100 - len(DISCOVERY_CONFIG["exclude"]) + expected_len = NUM_DOGS_DATABASES - len(DISCOVERY_CONFIG["exclude"]) assert len(databases) == expected_len with get_postgres_connection() as conn: cursor = conn.cursor() - cursor.execute(psycopg2.sql.SQL("CREATE DATABASE {}").format(psycopg2.sql.Identifier(database_to_find))) - - time.sleep(pg_instance["database_autodiscovery"]['refresh']) - databases = check.autodiscovery.get_items() - assert len(databases) == expected_len + 1 - # Need to drop the new database to clean up the environment for next tests. - cursor.execute( - psycopg2.sql.SQL("DROP DATABASE {} WITH (FORCE);").format(psycopg2.sql.Identifier(database_to_find)) - ) + try: + cursor.execute(psycopg2.sql.SQL("CREATE DATABASE {}").format(psycopg2.sql.Identifier(database_to_find))) + + time.sleep(pg_instance["database_autodiscovery"]['refresh']) + databases = check.autodiscovery.get_items() + assert len(databases) == expected_len + 1 + finally: + # Need to drop the new database to clean up the environment for next tests. + cursor.execute( + psycopg2.sql.SQL("DROP DATABASE {} WITH (FORCE);").format(psycopg2.sql.Identifier(database_to_find)) + ) @pytest.mark.integration From 762250807a1316d287c9189edae37ac07e6f30ff Mon Sep 17 00:00:00 2001 From: Eden Date: Thu, 29 Jun 2023 13:57:54 +0000 Subject: [PATCH 47/86] tracking method --- postgres/datadog_checks/postgres/postgres.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/postgres/datadog_checks/postgres/postgres.py b/postgres/datadog_checks/postgres/postgres.py index 0e63d8ca9d4e7..9d9e609f3f76e 100644 --- a/postgres/datadog_checks/postgres/postgres.py +++ b/postgres/datadog_checks/postgres/postgres.py @@ -13,6 +13,7 @@ from datadog_checks.base import AgentCheck from datadog_checks.base.utils.db import QueryExecutor from datadog_checks.base.utils.db.utils import resolve_db_host as agent_host_resolver +from datadog_checks.base.utils.tracking import tracked_method from datadog_checks.postgres import aws from datadog_checks.postgres.connections import MultiDatabaseConnectionPool from datadog_checks.postgres.discovery import PostgresAutodiscovery @@ -481,6 +482,7 @@ def _query_scope(self, cursor, scope, instance_tags, is_custom_metrics, dbname=N return num_results + @tracked_method() def _collect_relations_autodiscovery(self, instance_tags, relations_scopes): if not self.autodiscovery: return From be97495ebf065f3347acf5006bc095a73409b487 Mon Sep 17 00:00:00 2001 From: Eden Date: Thu, 29 Jun 2023 14:01:34 +0000 Subject: [PATCH 48/86] validate config --- postgres/datadog_checks/postgres/data/conf.yaml.example | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/postgres/datadog_checks/postgres/data/conf.yaml.example b/postgres/datadog_checks/postgres/data/conf.yaml.example index f73e27be57f71..c43a6360e64b1 100644 --- a/postgres/datadog_checks/postgres/data/conf.yaml.example +++ b/postgres/datadog_checks/postgres/data/conf.yaml.example @@ -280,10 +280,10 @@ instances: # - model # - msdb - ## @param refresh - integer - optional - default: 3600 - ## Frequency in seconds of scans for new databases. Defaults to `3600`. + ## @param refresh - integer - optional - default: 600 + ## Frequency in seconds of scans for new databases. Defaults to 10 minutes. # - # refresh: 3600 + # refresh: 600 ## @param application_name - string - optional - default: datadog-agent ## The application_name can be any string of less than NAMEDATALEN characters (64 characters in a standard build). From dcba329481e84a2eb0f452710213361fefd3e1ac Mon Sep 17 00:00:00 2001 From: Eden Date: Thu, 29 Jun 2023 14:36:36 +0000 Subject: [PATCH 49/86] fixed autodiscovery building --- postgres/datadog_checks/postgres/discovery.py | 4 ++-- postgres/datadog_checks/postgres/postgres.py | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/postgres/datadog_checks/postgres/discovery.py b/postgres/datadog_checks/postgres/discovery.py index a2a8e04f5f371..362de57f091d4 100644 --- a/postgres/datadog_checks/postgres/discovery.py +++ b/postgres/datadog_checks/postgres/discovery.py @@ -5,7 +5,7 @@ from typing import Dict, List from datadog_checks.base.utils.discovery import Discovery -from datadog_checks.postgres import PostgreSql +from datadog_checks.base import AgentCheck from datadog_checks.postgres.util import DatabaseConfigurationError, warning_with_tags AUTODISCOVERY_QUERY: str = """select datname from pg_catalog.pg_database where datistemplate = false;""" @@ -16,7 +16,7 @@ class PostgresAutodiscovery(Discovery): def __init__( self, - check: PostgreSql, + check: AgentCheck, global_view_db: str, autodiscovery_config: Dict, default_ttl: int, diff --git a/postgres/datadog_checks/postgres/postgres.py b/postgres/datadog_checks/postgres/postgres.py index 9d9e609f3f76e..371009bebce8b 100644 --- a/postgres/datadog_checks/postgres/postgres.py +++ b/postgres/datadog_checks/postgres/postgres.py @@ -119,10 +119,9 @@ def _build_autodiscovery(self): return None discovery = PostgresAutodiscovery( + self, 'postgres', self._config.discovery_config, - self.log, - self.autodiscovery_db_pool, self._config.idle_connection_timeout, ) return discovery From 34a560737f60def7429749bce40468dd307a40d4 Mon Sep 17 00:00:00 2001 From: Eden Date: Thu, 29 Jun 2023 14:54:04 +0000 Subject: [PATCH 50/86] added db config error --- postgres/datadog_checks/postgres/util.py | 1 + 1 file changed, 1 insertion(+) diff --git a/postgres/datadog_checks/postgres/util.py b/postgres/datadog_checks/postgres/util.py index 50640f5b6e162..54ecfa00a7964 100644 --- a/postgres/datadog_checks/postgres/util.py +++ b/postgres/datadog_checks/postgres/util.py @@ -32,6 +32,7 @@ class DatabaseConfigurationError(Enum): pg_stat_statements_not_loaded = 'pg-stat-statements-not-loaded' undefined_explain_function = 'undefined-explain-function' high_pg_stat_statements_max = 'high-pg-stat-statements-max-configuration' + autodiscovered_databases_exceeds_limit = 'autodiscovered-databases-exceeds-limit' def warning_with_tags(warning_message, *args, **kwargs): From 8aef4e1b15cb79837a2e79891d05e7b32cddb01a Mon Sep 17 00:00:00 2001 From: Eden Date: Thu, 29 Jun 2023 15:40:32 +0000 Subject: [PATCH 51/86] fixing warning for max dbs --- postgres/datadog_checks/postgres/discovery.py | 40 +++++++++---------- postgres/tests/test_discovery.py | 10 +++++ 2 files changed, 29 insertions(+), 21 deletions(-) diff --git a/postgres/datadog_checks/postgres/discovery.py b/postgres/datadog_checks/postgres/discovery.py index 362de57f091d4..4d88578b4dcf9 100644 --- a/postgres/datadog_checks/postgres/discovery.py +++ b/postgres/datadog_checks/postgres/discovery.py @@ -4,8 +4,8 @@ from typing import Dict, List -from datadog_checks.base.utils.discovery import Discovery from datadog_checks.base import AgentCheck +from datadog_checks.base.utils.discovery import Discovery from datadog_checks.postgres.util import DatabaseConfigurationError, warning_with_tags AUTODISCOVERY_QUERY: str = """select datname from pg_catalog.pg_database where datistemplate = false;""" @@ -42,7 +42,25 @@ def get_items(self) -> List[str]: This function takes the item of interest (dbname) from this four-tuple and returns the full list of database names from the generator. """ + prev_cached_items_len = len(self._cache._cached_items) items = list(super().get_items()) + # get_items updates _cache._cached_items, so check if the last refresh + # added a database that put this instance over the limit. + # _cache._cached_items stores databases before the limit filter is applied + if len(self._cache._cached_items) != prev_cached_items_len and len(self._cache._cached_items) > self._filter._limit: + self._check.record_warning( + DatabaseConfigurationError.autodiscovered_databases_exceeds_limit, + warning_with_tags( + "Autodiscovery found %d databases, which was more than the specified limit of %d. " + "Increase `max_databases` in the `database_autodiscovery` block of the agent configuration" + "to see these extra databases." + "Truncating list and running checks only on the following databases: %s", + len(self._cache._cached_items), + self._filter._limit, + self.get_items(), + ), + ) + items_parsed = [item[1] for item in items] return items_parsed @@ -56,23 +74,3 @@ def _get_databases(self) -> List[str]: ] # fetchall returns list of tuples representing rows, so need to parse self._log.debug("Autodiscovered databases were: {}".format(databases)) return databases - - def __refresh(self): - prev_cached_items_len = len(self._cached_items) - super().__refresh() - # refresh updates _cached_items, so check if the last refresh - # added a database that put this instance over the limit. - # _cached_items stores databases before the limit filter is applied - if len(self._cached_items) != prev_cached_items_len and len(self._cached_items) > self._limit: - self._check.record_warning( - DatabaseConfigurationError.autodiscovered_databases_exceeds_limit, - warning_with_tags( - "Autodiscovery found %d databases, which was more than the specified limit of %d. " - "Increase `max_databases` in the `database_autodiscovery` block of the agent configuration" - "to see these extra databases." - "Truncating list and running checks only on the following databases: %s", - len(self._cached_items), - self._limit, - self.get_items(), - ), - ) diff --git a/postgres/tests/test_discovery.py b/postgres/tests/test_discovery.py index 2c6e0feb8e90d..e5b2c195c406c 100644 --- a/postgres/tests/test_discovery.py +++ b/postgres/tests/test_discovery.py @@ -77,6 +77,16 @@ def test_autodiscovery_max_databases(integration_check, pg_instance): assert check.autodiscovery is not None databases = check.autodiscovery.get_items() assert len(databases) == pg_instance['database_autodiscovery']['max_databases'] + expected_warning = ["Autodiscovery found {} databases, which was more than the specified limit of {}. " + "Increase `max_databases` in the `database_autodiscovery` block of the agent configuration" + "to see these extra databases." + "Truncating list and running checks only on the following databases: {}\n".format( + len(check.autodiscovery._cache._cached_items), + pg_instance['database_autodiscovery']['max_databases'], + databases + )] + print(check.warnings) + assert check.warnings == expected_warning @pytest.mark.integration From c9f51359501560739cedcbe3753b06e8491c0771 Mon Sep 17 00:00:00 2001 From: Eden Date: Thu, 29 Jun 2023 15:40:53 +0000 Subject: [PATCH 52/86] fixing warning for max dbs --- postgres/datadog_checks/postgres/discovery.py | 5 ++++- postgres/tests/test_discovery.py | 18 ++++++++++-------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/postgres/datadog_checks/postgres/discovery.py b/postgres/datadog_checks/postgres/discovery.py index 4d88578b4dcf9..d6b8dee16ac89 100644 --- a/postgres/datadog_checks/postgres/discovery.py +++ b/postgres/datadog_checks/postgres/discovery.py @@ -47,7 +47,10 @@ def get_items(self) -> List[str]: # get_items updates _cache._cached_items, so check if the last refresh # added a database that put this instance over the limit. # _cache._cached_items stores databases before the limit filter is applied - if len(self._cache._cached_items) != prev_cached_items_len and len(self._cache._cached_items) > self._filter._limit: + if ( + len(self._cache._cached_items) != prev_cached_items_len + and len(self._cache._cached_items) > self._filter._limit + ): self._check.record_warning( DatabaseConfigurationError.autodiscovered_databases_exceeds_limit, warning_with_tags( diff --git a/postgres/tests/test_discovery.py b/postgres/tests/test_discovery.py index e5b2c195c406c..a902856318a62 100644 --- a/postgres/tests/test_discovery.py +++ b/postgres/tests/test_discovery.py @@ -77,14 +77,16 @@ def test_autodiscovery_max_databases(integration_check, pg_instance): assert check.autodiscovery is not None databases = check.autodiscovery.get_items() assert len(databases) == pg_instance['database_autodiscovery']['max_databases'] - expected_warning = ["Autodiscovery found {} databases, which was more than the specified limit of {}. " - "Increase `max_databases` in the `database_autodiscovery` block of the agent configuration" - "to see these extra databases." - "Truncating list and running checks only on the following databases: {}\n".format( - len(check.autodiscovery._cache._cached_items), - pg_instance['database_autodiscovery']['max_databases'], - databases - )] + expected_warning = [ + "Autodiscovery found {} databases, which was more than the specified limit of {}. " + "Increase `max_databases` in the `database_autodiscovery` block of the agent configuration" + "to see these extra databases." + "Truncating list and running checks only on the following databases: {}\n".format( + len(check.autodiscovery._cache._cached_items), + pg_instance['database_autodiscovery']['max_databases'], + databases, + ) + ] print(check.warnings) assert check.warnings == expected_warning From 54f22dff3a97e7365e3b60e47240f4a8a237de2a Mon Sep 17 00:00:00 2001 From: Eden Date: Thu, 29 Jun 2023 15:52:42 +0000 Subject: [PATCH 53/86] too many reprints of databases --- postgres/datadog_checks/postgres/discovery.py | 3 +-- postgres/tests/test_discovery.py | 4 +--- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/postgres/datadog_checks/postgres/discovery.py b/postgres/datadog_checks/postgres/discovery.py index d6b8dee16ac89..11f9aa5d1f158 100644 --- a/postgres/datadog_checks/postgres/discovery.py +++ b/postgres/datadog_checks/postgres/discovery.py @@ -57,10 +57,9 @@ def get_items(self) -> List[str]: "Autodiscovery found %d databases, which was more than the specified limit of %d. " "Increase `max_databases` in the `database_autodiscovery` block of the agent configuration" "to see these extra databases." - "Truncating list and running checks only on the following databases: %s", + "The database list will be truncated.", len(self._cache._cached_items), self._filter._limit, - self.get_items(), ), ) diff --git a/postgres/tests/test_discovery.py b/postgres/tests/test_discovery.py index a902856318a62..9140ff17bcda7 100644 --- a/postgres/tests/test_discovery.py +++ b/postgres/tests/test_discovery.py @@ -81,13 +81,11 @@ def test_autodiscovery_max_databases(integration_check, pg_instance): "Autodiscovery found {} databases, which was more than the specified limit of {}. " "Increase `max_databases` in the `database_autodiscovery` block of the agent configuration" "to see these extra databases." - "Truncating list and running checks only on the following databases: {}\n".format( + "The database list will be truncated.\n".format( len(check.autodiscovery._cache._cached_items), pg_instance['database_autodiscovery']['max_databases'], - databases, ) ] - print(check.warnings) assert check.warnings == expected_warning From 1f5634233452d7c00dc7a35cc0ecd7b72da56fef Mon Sep 17 00:00:00 2001 From: Eden Date: Thu, 29 Jun 2023 17:25:14 +0000 Subject: [PATCH 54/86] tracking --- postgres/datadog_checks/postgres/postgres.py | 2 +- postgres/tests/test_discovery.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/postgres/datadog_checks/postgres/postgres.py b/postgres/datadog_checks/postgres/postgres.py index 371009bebce8b..33bf97edeac6f 100644 --- a/postgres/datadog_checks/postgres/postgres.py +++ b/postgres/datadog_checks/postgres/postgres.py @@ -481,7 +481,7 @@ def _query_scope(self, cursor, scope, instance_tags, is_custom_metrics, dbname=N return num_results - @tracked_method() + @tracked_method(agent_check_getter=None) def _collect_relations_autodiscovery(self, instance_tags, relations_scopes): if not self.autodiscovery: return diff --git a/postgres/tests/test_discovery.py b/postgres/tests/test_discovery.py index 9140ff17bcda7..6eca20b27e09f 100644 --- a/postgres/tests/test_discovery.py +++ b/postgres/tests/test_discovery.py @@ -171,6 +171,11 @@ def test_autodiscovery_collect_all_relations(aggregator, integration_check, pg_i for metric in RELATION_METRICS: aggregator.assert_metric(metric, tags=expected_tags) + aggregator.assert_metric( + 'dd.postgres.operation.time', + tags=['operation:_collect_relations_autodiscovery'], + ) + @pytest.mark.integration @pytest.mark.usefixtures('dd_environment') From 41065f76bf682a9c22c7ed63cde53e66e5e80fdb Mon Sep 17 00:00:00 2001 From: Eden Date: Thu, 29 Jun 2023 17:27:15 +0000 Subject: [PATCH 55/86] style --- postgres/tests/test_discovery.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/postgres/tests/test_discovery.py b/postgres/tests/test_discovery.py index 6eca20b27e09f..f353c8ff38fdc 100644 --- a/postgres/tests/test_discovery.py +++ b/postgres/tests/test_discovery.py @@ -172,9 +172,9 @@ def test_autodiscovery_collect_all_relations(aggregator, integration_check, pg_i aggregator.assert_metric(metric, tags=expected_tags) aggregator.assert_metric( - 'dd.postgres.operation.time', - tags=['operation:_collect_relations_autodiscovery'], - ) + 'dd.postgres.operation.time', + tags=['operation:_collect_relations_autodiscovery'], + ) @pytest.mark.integration From f309a8331389d003ef2b269d2a95c5c9d3c081b7 Mon Sep 17 00:00:00 2001 From: Eden Date: Thu, 29 Jun 2023 18:15:53 +0000 Subject: [PATCH 56/86] tracked method remove param --- postgres/datadog_checks/postgres/postgres.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/postgres/datadog_checks/postgres/postgres.py b/postgres/datadog_checks/postgres/postgres.py index 33bf97edeac6f..371009bebce8b 100644 --- a/postgres/datadog_checks/postgres/postgres.py +++ b/postgres/datadog_checks/postgres/postgres.py @@ -481,7 +481,7 @@ def _query_scope(self, cursor, scope, instance_tags, is_custom_metrics, dbname=N return num_results - @tracked_method(agent_check_getter=None) + @tracked_method() def _collect_relations_autodiscovery(self, instance_tags, relations_scopes): if not self.autodiscovery: return From 8d5b41e744421924b473e912def85c8db11e66f4 Mon Sep 17 00:00:00 2001 From: edengorevoy Date: Mon, 3 Jul 2023 20:47:17 +0000 Subject: [PATCH 57/86] instrument the autodicovery function --- postgres/datadog_checks/postgres/postgres.py | 20 +++++++++++++++++++- postgres/datadog_checks/postgres/util.py | 1 + 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/postgres/datadog_checks/postgres/postgres.py b/postgres/datadog_checks/postgres/postgres.py index 371009bebce8b..ea90416295bf4 100644 --- a/postgres/datadog_checks/postgres/postgres.py +++ b/postgres/datadog_checks/postgres/postgres.py @@ -50,6 +50,7 @@ DatabaseConfigurationError, # noqa: F401 fmt, get_schema_field, + warning_with_tags ) from .version_utils import V9, V9_2, V10, V13, V14, VersionUtils @@ -481,17 +482,34 @@ def _query_scope(self, cursor, scope, instance_tags, is_custom_metrics, dbname=N return num_results - @tracked_method() def _collect_relations_autodiscovery(self, instance_tags, relations_scopes): if not self.autodiscovery: return + start_time = time() databases = self.autodiscovery.get_items() for db in databases: with self.autodiscovery_db_pool.get_connection(db, self._config.idle_connection_timeout) as conn: with conn.cursor() as cursor: for scope in relations_scopes: self._query_scope(cursor, scope, instance_tags, False, db) + elapsed_ms = (time() - start_time) * 1000 + self.histogram( + "dd.postgres._collect_relations_autodiscovery.time", + elapsed_ms * 1000, + tags=self.tags + self._get_debug_tags(), + hostname=self.resolved_hostname, + ) + if elapsed_ms > self._config.min_collection_interval*1000: + self.record_warning( + DatabaseConfigurationError.autodiscovered_metrics_exceeds_collection_interval, + warning_with_tags( + "Collecting metrics on autodiscovery metrics took %d ms, which is longer than " + "the minimum collection interval. Consider increasing the min_collection_interval parameter " + "in the postgres yaml configuration.", + int(elapsed_ms), + ), + ) def _collect_stats(self, instance_tags): """Query pg_stat_* for various metrics diff --git a/postgres/datadog_checks/postgres/util.py b/postgres/datadog_checks/postgres/util.py index 54ecfa00a7964..76eac029fa6fd 100644 --- a/postgres/datadog_checks/postgres/util.py +++ b/postgres/datadog_checks/postgres/util.py @@ -33,6 +33,7 @@ class DatabaseConfigurationError(Enum): undefined_explain_function = 'undefined-explain-function' high_pg_stat_statements_max = 'high-pg-stat-statements-max-configuration' autodiscovered_databases_exceeds_limit = 'autodiscovered-databases-exceeds-limit' + autodiscovered_metrics_exceeds_collection_interval = "autodiscovered-metrics-exceeds-collection-interval" def warning_with_tags(warning_message, *args, **kwargs): From 9e93399178c5922916b0ab6bbc945dab44de4f75 Mon Sep 17 00:00:00 2001 From: edengorevoy Date: Mon, 3 Jul 2023 21:03:36 +0000 Subject: [PATCH 58/86] comment changes --- postgres/datadog_checks/postgres/discovery.py | 27 +++++++------ postgres/tests/test_discovery.py | 40 +++++++++++++++---- 2 files changed, 47 insertions(+), 20 deletions(-) diff --git a/postgres/datadog_checks/postgres/discovery.py b/postgres/datadog_checks/postgres/discovery.py index 11f9aa5d1f158..e0d895a3d9fba 100644 --- a/postgres/datadog_checks/postgres/discovery.py +++ b/postgres/datadog_checks/postgres/discovery.py @@ -27,13 +27,14 @@ def __init__( include={db: 0 for db in autodiscovery_config.get("include", [".*"])}, exclude=autodiscovery_config.get("exclude", []), interval=autodiscovery_config.get("refresh", DEFAULT_REFRESH), - limit=autodiscovery_config.get("max_databases", DEFAULT_MAX_DATABASES), ) self._default_ttl = default_ttl self._db = global_view_db self._check = check self._log = self._check.log self._conn_pool = self._check.autodiscovery_db_pool + self._max_databases = autodiscovery_config.get("max_databases", DEFAULT_MAX_DATABASES) + self._cache_filtered = [] def get_items(self) -> List[str]: """ @@ -42,28 +43,30 @@ def get_items(self) -> List[str]: This function takes the item of interest (dbname) from this four-tuple and returns the full list of database names from the generator. """ - prev_cached_items_len = len(self._cache._cached_items) + prev_cached_items_len = len(self._cache_filtered) items = list(super().get_items()) - # get_items updates _cache._cached_items, so check if the last refresh - # added a database that put this instance over the limit. - # _cache._cached_items stores databases before the limit filter is applied + + # check if the items got refreshed + went over limit + # before this function applies + # the max_databases limit if ( - len(self._cache._cached_items) != prev_cached_items_len - and len(self._cache._cached_items) > self._filter._limit + len(items) != prev_cached_items_len + and len(items) > self._max_databases ): self._check.record_warning( DatabaseConfigurationError.autodiscovered_databases_exceeds_limit, warning_with_tags( "Autodiscovery found %d databases, which was more than the specified limit of %d. " - "Increase `max_databases` in the `database_autodiscovery` block of the agent configuration" - "to see these extra databases." + "Increase `max_databases` in the `database_autodiscovery` block of the agent configuration " + "to see these extra databases. " "The database list will be truncated.", - len(self._cache._cached_items), - self._filter._limit, + len(items), + self._max_databases, ), ) - items_parsed = [item[1] for item in items] + items_parsed = [item[1] for item in items][:self._max_databases] + self._cache_filtered = items_parsed return items_parsed def _get_databases(self) -> List[str]: diff --git a/postgres/tests/test_discovery.py b/postgres/tests/test_discovery.py index f353c8ff38fdc..6021a619aabe5 100644 --- a/postgres/tests/test_discovery.py +++ b/postgres/tests/test_discovery.py @@ -4,6 +4,7 @@ import copy import time +import re from contextlib import contextmanager import psycopg2 @@ -79,12 +80,10 @@ def test_autodiscovery_max_databases(integration_check, pg_instance): assert len(databases) == pg_instance['database_autodiscovery']['max_databases'] expected_warning = [ "Autodiscovery found {} databases, which was more than the specified limit of {}. " - "Increase `max_databases` in the `database_autodiscovery` block of the agent configuration" - "to see these extra databases." - "The database list will be truncated.\n".format( - len(check.autodiscovery._cache._cached_items), - pg_instance['database_autodiscovery']['max_databases'], - ) + "Increase `max_databases` in the `database_autodiscovery` block of the agent configuration " + "to see these extra databases. " + "The database list will be truncated.\n".format(NUM_DOGS_DATABASES - len(DISCOVERY_CONFIG["exclude"]), + pg_instance['database_autodiscovery']['max_databases']) ] assert check.warnings == expected_warning @@ -172,10 +171,35 @@ def test_autodiscovery_collect_all_relations(aggregator, integration_check, pg_i aggregator.assert_metric(metric, tags=expected_tags) aggregator.assert_metric( - 'dd.postgres.operation.time', - tags=['operation:_collect_relations_autodiscovery'], + 'dd.postgres._collect_relations_autodiscovery.time', ) +@pytest.mark.integration +@pytest.mark.usefixtures('dd_environment') +def test_autodiscovery_exceeds_min_interval(aggregator, integration_check, pg_instance): + """ + Check that relation metrics get collected for each database discovered. + """ + pg_instance["database_autodiscovery"] = copy.deepcopy(DISCOVERY_CONFIG) + pg_instance["database_autodiscovery"]["include"] = ["dogs$", "dogs_noschema$", "dogs_nofunc$"] + pg_instance['relations'] = [ + {'relation_regex': '.*'}, + ] + pg_instance['min_collection_interval'] = .001 + del pg_instance['dbname'] + + check = integration_check(pg_instance) + check.check(pg_instance) + + aggregator.assert_metric( + 'dd.postgres._collect_relations_autodiscovery.time', + ) + assert len(check.warnings) == 1 + test_structure = re.compile("Collecting metrics on autodiscovery metrics took .* ms, which is longer than " + "the minimum collection interval. Consider increasing the min_collection_interval parameter " + "in the postgres yaml configuration.\n") + assert test_structure.match(check.warnings[0]) + @pytest.mark.integration @pytest.mark.usefixtures('dd_environment') From d7868064c3d5ee544a4fd15e0a6c43a8ec732b9f Mon Sep 17 00:00:00 2001 From: edengorevoy Date: Wed, 5 Jul 2023 13:36:47 +0000 Subject: [PATCH 59/86] add tag to warnings --- postgres/datadog_checks/postgres/discovery.py | 11 +++++------ postgres/datadog_checks/postgres/postgres.py | 7 ++++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/postgres/datadog_checks/postgres/discovery.py b/postgres/datadog_checks/postgres/discovery.py index e0d895a3d9fba..b026184f89ee9 100644 --- a/postgres/datadog_checks/postgres/discovery.py +++ b/postgres/datadog_checks/postgres/discovery.py @@ -45,14 +45,11 @@ def get_items(self) -> List[str]: """ prev_cached_items_len = len(self._cache_filtered) items = list(super().get_items()) - + # check if the items got refreshed + went over limit # before this function applies # the max_databases limit - if ( - len(items) != prev_cached_items_len - and len(items) > self._max_databases - ): + if len(items) != prev_cached_items_len and len(items) > self._max_databases: self._check.record_warning( DatabaseConfigurationError.autodiscovered_databases_exceeds_limit, warning_with_tags( @@ -62,10 +59,12 @@ def get_items(self) -> List[str]: "The database list will be truncated.", len(items), self._max_databases, + code=DatabaseConfigurationError.autodiscovered_databases_exceeds_limit.value, + max_databases=self._max_databases, ), ) - items_parsed = [item[1] for item in items][:self._max_databases] + items_parsed = [item[1] for item in items][: self._max_databases] self._cache_filtered = items_parsed return items_parsed diff --git a/postgres/datadog_checks/postgres/postgres.py b/postgres/datadog_checks/postgres/postgres.py index ea90416295bf4..e39f1af702043 100644 --- a/postgres/datadog_checks/postgres/postgres.py +++ b/postgres/datadog_checks/postgres/postgres.py @@ -13,7 +13,6 @@ from datadog_checks.base import AgentCheck from datadog_checks.base.utils.db import QueryExecutor from datadog_checks.base.utils.db.utils import resolve_db_host as agent_host_resolver -from datadog_checks.base.utils.tracking import tracked_method from datadog_checks.postgres import aws from datadog_checks.postgres.connections import MultiDatabaseConnectionPool from datadog_checks.postgres.discovery import PostgresAutodiscovery @@ -50,7 +49,7 @@ DatabaseConfigurationError, # noqa: F401 fmt, get_schema_field, - warning_with_tags + warning_with_tags, ) from .version_utils import V9, V9_2, V10, V13, V14, VersionUtils @@ -500,7 +499,7 @@ def _collect_relations_autodiscovery(self, instance_tags, relations_scopes): tags=self.tags + self._get_debug_tags(), hostname=self.resolved_hostname, ) - if elapsed_ms > self._config.min_collection_interval*1000: + if elapsed_ms > self._config.min_collection_interval * 1000: self.record_warning( DatabaseConfigurationError.autodiscovered_metrics_exceeds_collection_interval, warning_with_tags( @@ -508,6 +507,8 @@ def _collect_relations_autodiscovery(self, instance_tags, relations_scopes): "the minimum collection interval. Consider increasing the min_collection_interval parameter " "in the postgres yaml configuration.", int(elapsed_ms), + code=DatabaseConfigurationError.autodiscovered_metrics_exceeds_collection_interval.value, + min_collection_interval=self._config.min_collection_interval, ), ) From 7397cc623d4f6a6741a4663f758cd8dec0a9081c Mon Sep 17 00:00:00 2001 From: edengorevoy Date: Wed, 5 Jul 2023 13:37:33 +0000 Subject: [PATCH 60/86] test change --- postgres/tests/test_discovery.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/postgres/tests/test_discovery.py b/postgres/tests/test_discovery.py index 6021a619aabe5..16ce6e7288f8a 100644 --- a/postgres/tests/test_discovery.py +++ b/postgres/tests/test_discovery.py @@ -3,8 +3,8 @@ # Licensed under a 3-clause BSD style license (see LICENSE) import copy -import time import re +import time from contextlib import contextmanager import psycopg2 @@ -82,8 +82,10 @@ def test_autodiscovery_max_databases(integration_check, pg_instance): "Autodiscovery found {} databases, which was more than the specified limit of {}. " "Increase `max_databases` in the `database_autodiscovery` block of the agent configuration " "to see these extra databases. " - "The database list will be truncated.\n".format(NUM_DOGS_DATABASES - len(DISCOVERY_CONFIG["exclude"]), - pg_instance['database_autodiscovery']['max_databases']) + "The database list will be truncated.\n".format( + NUM_DOGS_DATABASES - len(DISCOVERY_CONFIG["exclude"]), + pg_instance['database_autodiscovery']['max_databases'], + ) ] assert check.warnings == expected_warning @@ -174,6 +176,7 @@ def test_autodiscovery_collect_all_relations(aggregator, integration_check, pg_i 'dd.postgres._collect_relations_autodiscovery.time', ) + @pytest.mark.integration @pytest.mark.usefixtures('dd_environment') def test_autodiscovery_exceeds_min_interval(aggregator, integration_check, pg_instance): @@ -185,7 +188,7 @@ def test_autodiscovery_exceeds_min_interval(aggregator, integration_check, pg_in pg_instance['relations'] = [ {'relation_regex': '.*'}, ] - pg_instance['min_collection_interval'] = .001 + pg_instance['min_collection_interval'] = 0.001 del pg_instance['dbname'] check = integration_check(pg_instance) @@ -195,9 +198,11 @@ def test_autodiscovery_exceeds_min_interval(aggregator, integration_check, pg_in 'dd.postgres._collect_relations_autodiscovery.time', ) assert len(check.warnings) == 1 - test_structure = re.compile("Collecting metrics on autodiscovery metrics took .* ms, which is longer than " + test_structure = re.compile( + "Collecting metrics on autodiscovery metrics took .* ms, which is longer than " "the minimum collection interval. Consider increasing the min_collection_interval parameter " - "in the postgres yaml configuration.\n") + "in the postgres yaml configuration.\n" + ) assert test_structure.match(check.warnings[0]) From fb7be22439c01b8b69d55207201c8b39ae745421 Mon Sep 17 00:00:00 2001 From: edengorevoy Date: Wed, 5 Jul 2023 13:58:22 +0000 Subject: [PATCH 61/86] fix tests from adding tags --- postgres/tests/test_discovery.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/postgres/tests/test_discovery.py b/postgres/tests/test_discovery.py index 16ce6e7288f8a..1deff6e997862 100644 --- a/postgres/tests/test_discovery.py +++ b/postgres/tests/test_discovery.py @@ -82,9 +82,10 @@ def test_autodiscovery_max_databases(integration_check, pg_instance): "Autodiscovery found {} databases, which was more than the specified limit of {}. " "Increase `max_databases` in the `database_autodiscovery` block of the agent configuration " "to see these extra databases. " - "The database list will be truncated.\n".format( + "The database list will be truncated.\ncode=autodiscovered-databases-exceeds-limit max_databases={}".format( NUM_DOGS_DATABASES - len(DISCOVERY_CONFIG["exclude"]), pg_instance['database_autodiscovery']['max_databases'], + pg_instance['database_autodiscovery']['max_databases'], ) ] assert check.warnings == expected_warning From 8e80d543cca88fc404360dfbbb6e16083b12444f Mon Sep 17 00:00:00 2001 From: edengorevoy Date: Wed, 5 Jul 2023 15:54:22 +0000 Subject: [PATCH 62/86] remove accidental *1000 multiplier for collection time --- postgres/datadog_checks/postgres/postgres.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/postgres/datadog_checks/postgres/postgres.py b/postgres/datadog_checks/postgres/postgres.py index e39f1af702043..a3817b8c1e196 100644 --- a/postgres/datadog_checks/postgres/postgres.py +++ b/postgres/datadog_checks/postgres/postgres.py @@ -495,7 +495,7 @@ def _collect_relations_autodiscovery(self, instance_tags, relations_scopes): elapsed_ms = (time() - start_time) * 1000 self.histogram( "dd.postgres._collect_relations_autodiscovery.time", - elapsed_ms * 1000, + elapsed_ms, tags=self.tags + self._get_debug_tags(), hostname=self.resolved_hostname, ) From e8a3f4314ed3b2cba24d8a7c739cc939a5f0f989 Mon Sep 17 00:00:00 2001 From: edengorevoy Date: Fri, 14 Jul 2023 21:25:58 +0000 Subject: [PATCH 63/86] first commit --- postgres/datadog_checks/postgres/config.py | 1 + postgres/datadog_checks/postgres/metadata.py | 182 ++++++++++++++++++- postgres/tests/test_metadata.py | 12 ++ 3 files changed, 194 insertions(+), 1 deletion(-) diff --git a/postgres/datadog_checks/postgres/config.py b/postgres/datadog_checks/postgres/config.py index 57eec971df07d..d1b88fd4ac233 100644 --- a/postgres/datadog_checks/postgres/config.py +++ b/postgres/datadog_checks/postgres/config.py @@ -100,6 +100,7 @@ def __init__(self, instance): self.pg_stat_activity_view = instance.get('pg_stat_activity_view', 'pg_stat_activity') self.statement_samples_config = instance.get('query_samples', instance.get('statement_samples', {})) or {} self.settings_metadata_config = instance.get('collect_settings', {}) or {} + self.schemas_metadata_config = instance.get('collect_schemas', {}) or {} self.resources_metadata_config = instance.get('collect_resources', {}) or {} self.statement_activity_config = instance.get('query_activity', {}) or {} self.statement_metrics_config = instance.get('query_metrics', {}) or {} diff --git a/postgres/datadog_checks/postgres/metadata.py b/postgres/datadog_checks/postgres/metadata.py index 8456526dc0aa3..ce5ce8d57164a 100644 --- a/postgres/datadog_checks/postgres/metadata.py +++ b/postgres/datadog_checks/postgres/metadata.py @@ -17,14 +17,91 @@ from datadog_checks.base.utils.tracking import tracked_method from datadog_checks.postgres.connections import MultiDatabaseConnectionPool -# default pg_settings collection interval in seconds +# default collection intervals in seconds DEFAULT_SETTINGS_COLLECTION_INTERVAL = 600 +DEFAULT_SCHEMAS_COLLECTION_INTERVAL = 600 DEFAULT_RESOURCES_COLLECTION_INTERVAL = 300 PG_SETTINGS_QUERY = """ SELECT name, setting FROM pg_settings """ +DATABASE_INFORMATION_QUERY = """ +SELECT db.oid as id, datname as name, pg_encoding_to_char(encoding) as encoding, rolname as owner, description + FROM pg_catalog.pg_database db + LEFT JOIN pg_catalog.pg_description dc ON dc.objoid = db.oid + JOIN pg_roles a on datdba = a.oid + WHERE datname LIKE '{dbname}'; +""" + + +PG_STAT_TABLES_QUERY = """ +SELECT st.relname as name,seq_scan,idx_scan,relhasindex as hasindexes,relowner::regrole as owner,relispartition as is_partition +FROM pg_stat_all_tables st +LEFT JOIN pg_class c ON c.relname = st.relname +AND relkind IN ('r', 'p') +WHERE schemaname = '{schemaname}' +ORDER BY coalesce(seq_scan, 0) + coalesce(idx_scan, 0) DESC; +""" + +PG_TABLES_QUERY = """ +SELECT tablename as name, hasindexes,relowner::regrole as owner,relispartition as is_partition, (case when relkind = 'p' then true else false end) as is_partitioned +FROM pg_tables st +LEFT JOIN pg_class c ON relname = tablename +AND relkind IN ('r', 'p') +WHERE schemaname = '{schemaname}'; +""" + + +SCHEMA_QUERY = """ + SELECT nspname as name, nspowner::regrole as owner FROM + pg_namespace + WHERE nspname not in ('information_schema', 'pg_catalog') + AND nspname NOT LIKE 'pg_toast%' and nspname NOT LIKE 'pg_temp_%'; +""" + +PG_INDEXES_QUERY = """ +SELECT indexname, indexdef +FROM pg_indexes +WHERE tablename LIKE '{tablename}'; +""" + +PG_CONSTRAINTS_QUERY = """ +SELECT conrelid::regclass AS table_name, conname AS foreign_key, contype, pg_get_constraintdef(oid) +FROM pg_constraint +WHERE contype = 'f' +AND conrelid = +'{tablename}'::regclass; +""" + +COLUMNS_QUERY = """ +SELECT attname as name, format_type(atttypid, atttypmod) AS data_type, attnotnull as not_nullable, pg_get_expr(adbin, adrelid) as default +FROM pg_attribute LEFT JOIN pg_attrdef ad ON adrelid=attrelid +WHERE attrelid = '{tablename}'::regclass +AND attnum > 0 +AND NOT attisdropped; +""" +# SELECT * +# FROM pg_attribute +# WHERE attrelid = 'apm_activity'::regclass +# AND attnum > 0 +# AND NOT attisdropped; + + +PARTITION_PARENT_AND_RANGE_QUERY = """ +SELECT parent, child, pg_get_expr(c.relpartbound, c.oid, true) as partition_range + FROM + (SELECT inhparent::regclass as parent, inhrelid::regclass as child + FROM pg_inherits + WHERE inhrelid = '{tablename}'::regclass::oid) AS inherits + LEFT JOIN pg_class c ON c.oid = child::regclass::oid; +""" + +PARTITION_KEY_QUERY = """ + SELECT relname, pg_get_partkeydef(oid) as partition_key +FROM pg_class WHERE '{parent}' = relname; +""" + def agent_check_getter(self): return self._check @@ -41,6 +118,10 @@ def __init__(self, check, config, shutdown_callback): self.pg_settings_collection_interval = config.settings_metadata_config.get( 'collection_interval', DEFAULT_SETTINGS_COLLECTION_INTERVAL ) + self.schemas_collection_interval = config.schemas_metadata_config.get( + 'collection_interval', DEFAULT_SETTINGS_COLLECTION_INTERVAL + ) + collection_interval = config.resources_metadata_config.get( 'collection_interval', DEFAULT_RESOURCES_COLLECTION_INTERVAL ) @@ -67,8 +148,10 @@ def shutdown_cb(): self._check = check self._config = config self._collect_pg_settings_enabled = is_affirmative(config.settings_metadata_config.get('enabled', False)) + self._collect_schemas_enabled = is_affirmative(config.schemas_metadata_config.get('enabled', False)) self._pg_settings_cached = None self._time_since_last_settings_query = 0 + self._time_since_last_schemas_query = 0 self._conn_ttl_ms = self._config.idle_connection_timeout self._tags_no_db = None self.tags = None @@ -112,11 +195,108 @@ def report_postgres_metadata(self): } self._check.database_monitoring_metadata(json.dumps(event, default=default_json_event_encoding)) + elapsed_s = time.time() - self._time_since_last_schemas_query + if elapsed_s >= self.schemas_collection_interval: + self._collect_schema_info() + def _payload_pg_version(self): version = self._check.version if not version: return "" return 'v{major}.{minor}.{patch}'.format(major=version.major, minor=version.minor, patch=version.patch) + + def _collect_schema_info(self): + databases = [] + if self._check.autodiscovery: + databases = self._check.autodiscovery.get_items() + elif self._config.dbname != 'postgres': + databases.append(self._config.dbname) + else: + # if we are only connecting to 'postgres' database, not worth reporting data model + return + + for database in databases: + self._collect_info_for_database(database) + + + def _collect_info_for_database(self, dbname): + with self._conn_pool.get_connection(dbname, ttl_ms=self._conn_ttl_ms) as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor: + # collect database info + cursor.execute(DATABASE_INFORMATION_QUERY.format(dbname=dbname)) + rows = cursor.fetchall() + postgres_logical_database = [dict(row) for row in rows] + + self._log.warning(postgres_logical_database) + ## Collect user schemas. Returns + # name: str + # owner: str + cursor.execute(SCHEMA_QUERY) + rows = cursor.fetchall() + schemas = [dict(row) for row in rows] + + self._log.warning(schemas) + for schema in schemas: + ## Collect tables for schema. Returns + # name: str + # hasindexes: bool + # owner: str + # is_partition: bool + cursor.execute(PG_TABLES_QUERY.format(schemaname=schema['name'])) + rows = cursor.fetchall() + tables_info = [dict(row) for row in rows] + self._log.warning(tables_info) + table_to_payloads = {} + partitioned_tables = [] + for table in tables_info: + name = table['name'] + self._log.warning("Parsing table {}".format(name)) + table_to_payloads[name] = {'name': name} + if table["hasindexes"]: + cursor.execute(PG_INDEXES_QUERY.format(tablename=name)) + rows = cursor.fetchall() + indexes = {row[0]:row[1] for row in rows} + table_to_payloads[name].update({'indexes': indexes}) + + if table['is_partition']: + cursor.execute(PARTITION_PARENT_AND_RANGE_QUERY.format(tablename=name)) + row = cursor.fetchone() + partition_of = row['parent'] + partition_range = row['partition_range'] + partitioned_tables.append(partition_of) + table_to_payloads[name].update({'partition_of': partition_of}) + table_to_payloads[name].update({'partition_range': partition_range}) + + if table['is_partitioned']: + cursor.execute(PARTITION_KEY_QUERY.format(parent=name)) + row = cursor.fetchone() + self._log.warning(row) + table_to_payloads[name].update({'partition_key':row['partition_key']}) + + # get foreign keys + cursor.execute(PG_CONSTRAINTS_QUERY.format(tablename=table['name'])) + rows = cursor.fetchall() + if rows: + table_to_payloads[name].update({'foreign_keys': {}}) + + ## Get columns + # name: str + # data_type: str + # default: str + # is_nullable: bool + cursor.execute(COLUMNS_QUERY.format(tablename=name)) + rows = cursor.fetchall() + self._log.warning(rows) + columns = [dict(row) for row in rows] + table_to_payloads[name].update({'columns': columns}) + + + self._log.warning(table_to_payloads) + + + + + @tracked_method(agent_check_getter=agent_check_getter) def _collect_postgres_settings(self): diff --git a/postgres/tests/test_metadata.py b/postgres/tests/test_metadata.py index 3058e5150a215..83ab3d4fd82aa 100644 --- a/postgres/tests/test_metadata.py +++ b/postgres/tests/test_metadata.py @@ -38,3 +38,15 @@ def test_collect_metadata(integration_check, dbm_instance, aggregator): assert event['dbms'] == "postgres" assert event['kind'] == "pg_settings" assert len(event["metadata"]) > 0 + +def test_collect_schemas(integration_check, dbm_instance, aggregator): + dbm_instance["collect_schemas"] = {'enabled': True, 'collection_interval': 0.5} + check = integration_check(dbm_instance) + check.check(dbm_instance) + assert None is not None + dbm_metadata = aggregator.get_event_platform_events("dbm-metadata") + event = dbm_metadata[0] + assert event['host'] == "stubbed.hostname" + assert event['dbms'] == "postgres" + assert event['kind'] == "pg_settings" + assert len(event["metadata"]) > 0 \ No newline at end of file From f68196b1e6468af005768585e13dadf3ffa6e557 Mon Sep 17 00:00:00 2001 From: edengorevoy Date: Mon, 17 Jul 2023 18:57:18 +0000 Subject: [PATCH 64/86] All schema collection except ordering by metrics --- postgres/assets/configuration/spec.yaml | 25 ++ postgres/datadog_checks/postgres/metadata.py | 238 +++++++++++------- .../datadog_checks/postgres/metrics_cache.py | 4 +- postgres/datadog_checks/postgres/postgres.py | 3 + .../tests/compose/resources/03_load_data.sh | 2 +- postgres/tests/test_metadata.py | 3 +- 6 files changed, 184 insertions(+), 91 deletions(-) diff --git a/postgres/assets/configuration/spec.yaml b/postgres/assets/configuration/spec.yaml index 6f26fe7092f31..98db7f31dfa48 100644 --- a/postgres/assets/configuration/spec.yaml +++ b/postgres/assets/configuration/spec.yaml @@ -500,6 +500,31 @@ files: value: type: number example: 600 + - name: collect_schemas + description: | + Enable collection of database schemas. In order to collect schemas from all user databases, + enable `database_autodiscovery`. To collect from a single database, set `dbname` to collect + the schema for that database. + options: + - name: enabled + description: | + Enable collection of database schemas. Requires `dbm: true`. + value: + type: boolean + example: false + - name: enabled + description: | + Maximum amount of tables the agent will collect from the instance. + value: + type: number + example: 1000 + display_default: 1000 + - name: collection_interval + description: | + The database schema collection interval (in seconds). + value: + type: number + example: 600 - name: aws description: | diff --git a/postgres/datadog_checks/postgres/metadata.py b/postgres/datadog_checks/postgres/metadata.py index ce5ce8d57164a..3abbacb5b2ecf 100644 --- a/postgres/datadog_checks/postgres/metadata.py +++ b/postgres/datadog_checks/postgres/metadata.py @@ -3,7 +3,7 @@ # Licensed under a 3-clause BSD style license (see LICENSE) import json import time -from typing import Dict, Optional, Tuple # noqa: F401 +from typing import Dict, Optional, Tuple, Union, List # noqa: F401 import psycopg2 @@ -45,7 +45,8 @@ """ PG_TABLES_QUERY = """ -SELECT tablename as name, hasindexes,relowner::regrole as owner,relispartition as is_partition, (case when relkind = 'p' then true else false end) as is_partitioned +SELECT tablename as name, hasindexes,relowner::regrole as owner,relispartition as is_partition, +(case when relkind = 'p' then true else false end) as has_partitions FROM pg_tables st LEFT JOIN pg_class c ON relname = tablename AND relkind IN ('r', 'p') @@ -61,13 +62,13 @@ """ PG_INDEXES_QUERY = """ -SELECT indexname, indexdef +SELECT indexname as name, indexdef as definition FROM pg_indexes WHERE tablename LIKE '{tablename}'; """ PG_CONSTRAINTS_QUERY = """ -SELECT conrelid::regclass AS table_name, conname AS foreign_key, contype, pg_get_constraintdef(oid) +SELECT conname AS name, pg_get_constraintdef(oid) as definition FROM pg_constraint WHERE contype = 'f' AND conrelid = @@ -75,18 +76,12 @@ """ COLUMNS_QUERY = """ -SELECT attname as name, format_type(atttypid, atttypmod) AS data_type, attnotnull as not_nullable, pg_get_expr(adbin, adrelid) as default -FROM pg_attribute LEFT JOIN pg_attrdef ad ON adrelid=attrelid +SELECT attname as name, format_type(atttypid, atttypmod) AS data_type, NOT attnotnull as nullable, pg_get_expr(adbin, adrelid) as default +FROM pg_attribute LEFT JOIN pg_attrdef ad ON adrelid=attrelid AND adnum=attnum WHERE attrelid = '{tablename}'::regclass AND attnum > 0 AND NOT attisdropped; """ -# SELECT * -# FROM pg_attribute -# WHERE attrelid = 'apm_activity'::regclass -# AND attnum > 0 -# AND NOT attisdropped; - PARTITION_PARENT_AND_RANGE_QUERY = """ SELECT parent, child, pg_get_expr(c.relpartbound, c.oid, true) as partition_range @@ -215,88 +210,155 @@ def _collect_schema_info(self): # if we are only connecting to 'postgres' database, not worth reporting data model return + metadata = [] for database in databases: - self._collect_info_for_database(database) + metadata.append(self._collect_metadata_for_database(database)) + event = { + "host": self._check.resolved_hostname, + "agent_version": datadog_agent.get_version(), + "dbms": "postgres", + "kind": "pg_databases", + "collection_interval": self.schemas_collection_interval, + "dbms_version": self._payload_pg_version(), + "tags": self._tags_no_db, + "timestamp": time.time() * 1000, + "cloud_metadata": self._config.cloud_metadata, + "metadata": metadata, + } + self._check.database_monitoring_metadata(json.dumps(event, default=default_json_event_encoding)) + def _query_database_information(self, cursor: psycopg2.extensions.cursor, dbname: str) -> Dict[str, Union[str, int]]: + """ + Collect database info. Returns + description: str + name: str + id: int + encoding: str + owner: str + """ + cursor.execute(DATABASE_INFORMATION_QUERY.format(dbname=dbname)) + row = cursor.fetchone() + print(row) + return row - def _collect_info_for_database(self, dbname): - with self._conn_pool.get_connection(dbname, ttl_ms=self._conn_ttl_ms) as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor: - # collect database info - cursor.execute(DATABASE_INFORMATION_QUERY.format(dbname=dbname)) - rows = cursor.fetchall() - postgres_logical_database = [dict(row) for row in rows] - - self._log.warning(postgres_logical_database) - ## Collect user schemas. Returns - # name: str - # owner: str - cursor.execute(SCHEMA_QUERY) - rows = cursor.fetchall() - schemas = [dict(row) for row in rows] - - self._log.warning(schemas) - for schema in schemas: - ## Collect tables for schema. Returns - # name: str - # hasindexes: bool - # owner: str - # is_partition: bool - cursor.execute(PG_TABLES_QUERY.format(schemaname=schema['name'])) - rows = cursor.fetchall() - tables_info = [dict(row) for row in rows] - self._log.warning(tables_info) - table_to_payloads = {} - partitioned_tables = [] - for table in tables_info: - name = table['name'] - self._log.warning("Parsing table {}".format(name)) - table_to_payloads[name] = {'name': name} - if table["hasindexes"]: - cursor.execute(PG_INDEXES_QUERY.format(tablename=name)) - rows = cursor.fetchall() - indexes = {row[0]:row[1] for row in rows} - table_to_payloads[name].update({'indexes': indexes}) - - if table['is_partition']: - cursor.execute(PARTITION_PARENT_AND_RANGE_QUERY.format(tablename=name)) - row = cursor.fetchone() - partition_of = row['parent'] - partition_range = row['partition_range'] - partitioned_tables.append(partition_of) - table_to_payloads[name].update({'partition_of': partition_of}) - table_to_payloads[name].update({'partition_range': partition_range}) - - if table['is_partitioned']: - cursor.execute(PARTITION_KEY_QUERY.format(parent=name)) - row = cursor.fetchone() - self._log.warning(row) - table_to_payloads[name].update({'partition_key':row['partition_key']}) - - # get foreign keys - cursor.execute(PG_CONSTRAINTS_QUERY.format(tablename=table['name'])) - rows = cursor.fetchall() - if rows: - table_to_payloads[name].update({'foreign_keys': {}}) - - ## Get columns - # name: str - # data_type: str - # default: str - # is_nullable: bool - cursor.execute(COLUMNS_QUERY.format(tablename=name)) - rows = cursor.fetchall() - self._log.warning(rows) - columns = [dict(row) for row in rows] - table_to_payloads[name].update({'columns': columns}) - - - self._log.warning(table_to_payloads) + def _query_schema_information(self, cursor: psycopg2.extensions.cursor, dbname: str) -> Dict[str, str]: + """ + Collect user schemas. Returns + name: str + owner: str + """ + cursor.execute(SCHEMA_QUERY) + rows = cursor.fetchall() + schemas = [dict(row) for row in rows] + return schemas + def _sort_and_limit_table_info(self, table_info: List[Dict[str, Union[str, bool]]], limit: int) -> List[Dict[str, Union[str, bool]]]: + # if relation metrics are enabled, sorted based on last activity information + activity_metrics_cache = self._check.metrics_cache.get_activity_metrics(self._check.version) + if activity_metrics_cache: + self._log.warning(activity_metrics_cache) - - + # else, blindly truncate + def _query_table_information_for_schema(self, cursor: psycopg2.extensions.cursor, schemaname: str) -> List[Dict[str, Union[str, Dict]]]: + """ + Collect table information per schema. Returns a list of dictionaries + with key/values: + "name": str + "owner": str + "partition_key": str (if has partitions) + "partition_of": str (if a partition) + "foreign_keys": dict (if has foreign keys) + name: str + definition: str + "indexes": dict (if has indexes) + name: str + definition: str + "columns": dict + name: str + data_type: str + default: str + nullable: bool + """ + cursor.execute(PG_TABLES_QUERY.format(schemaname=schemaname)) + rows = cursor.fetchall() + tables_info = [dict(row) for row in rows] + self._sort_and_limit_table_info(tables_info, 1000) + self._log.warning(tables_info) + table_payloads = [] + for table in tables_info: + this_payload = {} + name = table['name'] + self._log.warning("Parsing table {}".format(name)) + this_payload.update({'name': name}) + if table["hasindexes"]: + cursor.execute(PG_INDEXES_QUERY.format(tablename=name)) + rows = cursor.fetchall() + indexes = {row[0]:row[1] for row in rows} + this_payload.update({'indexes': indexes}) + + if table['is_partition']: + cursor.execute(PARTITION_PARENT_AND_RANGE_QUERY.format(tablename=name)) + row = cursor.fetchone() + partition_of = row['parent'] + partition_range = row['partition_range'] + this_payload.update({'partition_of': partition_of}) + this_payload.update({'partition_range': partition_range}) + + if table['has_partitions']: + cursor.execute(PARTITION_KEY_QUERY.format(parent=name)) + row = cursor.fetchone() + self._log.warning(row) + this_payload.update({'partition_key':row['partition_key']}) + + # Get foreign keys + cursor.execute(PG_CONSTRAINTS_QUERY.format(tablename=table['name'])) + rows = cursor.fetchall() + self._log.warning("foreign keys {}".format(rows)) + if rows: + this_payload.update({'foreign_keys': {}}) + + # Get columns + cursor.execute(COLUMNS_QUERY.format(tablename=name)) + rows = cursor.fetchall() + self._log.warning(rows) + columns = [dict(row) for row in rows] + this_payload.update({'columns': columns}) + + table_payloads.append(this_payload) + + return table_payloads + + def _collect_metadata_for_database(self, dbname): + metadata = {} + with self._conn_pool.get_connection(dbname, ttl_ms=self._conn_ttl_ms) as conn: + with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor: + database_info = self._query_database_information(cursor, dbname) + metadata.update( + { + "description": database_info['description'], + "name": database_info['name'], + "id": database_info['id'], + "encoding": database_info['encoding'], + "owner": database_info['owner'], + "schemas": [], + } + ) + self._log.warning(database_info) + schema_info = self._query_schema_information(cursor, dbname) + self._log.warning(schema_info) + for schema in schema_info: + tables_info = self._query_table_information_for_schema(cursor, schema['name']) + self._log.warning(tables_info) + metadata['schemas'].append( + { + "name": schema['name'], + "owner": schema['owner'], + "tables": tables_info + } + ) + + return metadata @tracked_method(agent_check_getter=agent_check_getter) def _collect_postgres_settings(self): diff --git a/postgres/datadog_checks/postgres/metrics_cache.py b/postgres/datadog_checks/postgres/metrics_cache.py index 736361062a421..30ee4399000c7 100644 --- a/postgres/datadog_checks/postgres/metrics_cache.py +++ b/postgres/datadog_checks/postgres/metrics_cache.py @@ -44,6 +44,7 @@ def __init__(self, config): self.replication_stats_metrics = None self.activity_metrics = None self._count_metrics = None + self.table_activity_metrics = None def clean_state(self): self.instance_metrics = None @@ -52,6 +53,7 @@ def clean_state(self): self.replication_metrics = None self.replication_stats_metrics = None self.activity_metrics = None + self.table_activity_metrics = None def get_instance_metrics(self, version): """ @@ -180,7 +182,7 @@ def get_replication_stats_metrics(self, version): if version >= V10 and self.replication_stats_metrics is None: self.replication_stats_metrics = dict(REPLICATION_STATS_METRICS) return self.replication_stats_metrics - + def get_activity_metrics(self, version): """Use ACTIVITY_METRICS_LT_8_3 or ACTIVITY_METRICS_8_3 or ACTIVITY_METRICS_9_2 depending on the postgres version in conjunction with ACTIVITY_QUERY_10 or ACTIVITY_QUERY_LT_10. diff --git a/postgres/datadog_checks/postgres/postgres.py b/postgres/datadog_checks/postgres/postgres.py index a3817b8c1e196..93666cb938b8a 100644 --- a/postgres/datadog_checks/postgres/postgres.py +++ b/postgres/datadog_checks/postgres/postgres.py @@ -477,6 +477,9 @@ def _query_scope(self, cursor, scope, instance_tags, is_custom_metrics, dbname=N name, submit_metric = scope['metrics'][column] submit_metric(self, name, value, tags=set(tags), hostname=self.resolved_hostname) + # TODO: if relation-level metrics idx_scan or seq_scan, cache it + + num_results += 1 return num_results diff --git a/postgres/tests/compose/resources/03_load_data.sh b/postgres/tests/compose/resources/03_load_data.sh index d74d85f37cd8c..489d25d942430 100755 --- a/postgres/tests/compose/resources/03_load_data.sh +++ b/postgres/tests/compose/resources/03_load_data.sh @@ -2,7 +2,7 @@ set -e psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" datadog_test <<-EOSQL - CREATE TABLE persons (personid SERIAL, lastname VARCHAR(255), firstname VARCHAR(255), address VARCHAR(255), city VARCHAR(255)); + CREATE TABLE persons (personid SERIAL, lastname VARCHAR(255), firstname VARCHAR(255), address VARCHAR(255), city VARCHAR(255) DEFAULT 'New York'); INSERT INTO persons (lastname, firstname, address, city) VALUES ('Cavaille', 'Leo', 'Midtown', 'New York'), ('Someveryveryveryveryveryveryveryveryveryverylongname', 'something', 'Avenue des Champs Elysees', 'Beautiful city of lights'); CREATE TABLE personsdup1 (personid SERIAL, lastname VARCHAR(255), firstname VARCHAR(255), address VARCHAR(255), city VARCHAR(255)); INSERT INTO personsdup1 (lastname, firstname, address, city) VALUES ('Cavaille', 'Leo', 'Midtown', 'New York'), ('Someveryveryveryveryveryveryveryveryveryverylongname', 'something', 'Avenue des Champs Elysees', 'Beautiful city of lights'); diff --git a/postgres/tests/test_metadata.py b/postgres/tests/test_metadata.py index 83ab3d4fd82aa..52c82d43cb5a7 100644 --- a/postgres/tests/test_metadata.py +++ b/postgres/tests/test_metadata.py @@ -41,6 +41,7 @@ def test_collect_metadata(integration_check, dbm_instance, aggregator): def test_collect_schemas(integration_check, dbm_instance, aggregator): dbm_instance["collect_schemas"] = {'enabled': True, 'collection_interval': 0.5} + dbm_instance['relations'] = {'relation_regex': ".*"} check = integration_check(dbm_instance) check.check(dbm_instance) assert None is not None @@ -48,5 +49,5 @@ def test_collect_schemas(integration_check, dbm_instance, aggregator): event = dbm_metadata[0] assert event['host'] == "stubbed.hostname" assert event['dbms'] == "postgres" - assert event['kind'] == "pg_settings" + assert event['kind'] == "pg_databases" assert len(event["metadata"]) > 0 \ No newline at end of file From d1aa666a38bbe16d353551e909c380b5897f7bad Mon Sep 17 00:00:00 2001 From: edengorevoy Date: Mon, 17 Jul 2023 22:17:44 +0000 Subject: [PATCH 65/86] Ordering tables by relation metrics --- postgres/datadog_checks/postgres/metadata.py | 58 ++++++++++++++----- .../datadog_checks/postgres/metrics_cache.py | 7 ++- postgres/datadog_checks/postgres/postgres.py | 9 ++- postgres/tests/test_metadata.py | 10 +++- 4 files changed, 64 insertions(+), 20 deletions(-) diff --git a/postgres/datadog_checks/postgres/metadata.py b/postgres/datadog_checks/postgres/metadata.py index 55a65e77f2cf5..fbe3ecd62098a 100644 --- a/postgres/datadog_checks/postgres/metadata.py +++ b/postgres/datadog_checks/postgres/metadata.py @@ -136,6 +136,7 @@ def __init__(self, check, config, shutdown_callback): ) self._check = check self._config = config + self.db_pool = self._check.db_pool self._collect_pg_settings_enabled = is_affirmative(config.settings_metadata_config.get('enabled', False)) self._collect_schemas_enabled = is_affirmative(config.schemas_metadata_config.get('enabled', False)) self._pg_settings_cached = None @@ -246,15 +247,41 @@ def _query_schema_information(self, cursor: psycopg2.extensions.cursor, dbname: schemas = [dict(row) for row in rows] return schemas - def _sort_and_limit_table_info(self, table_info: List[Dict[str, Union[str, bool]]], limit: int) -> List[Dict[str, Union[str, bool]]]: + def _sort_and_limit_table_info(self, dbname, table_info: List[Dict[str, Union[str, bool]]], limit: int) -> List[Dict[str, Union[str, bool]]]: + """ + If relation metrics is enabled, sort tables by the number of total accesses (index_rel_scans + seq_scans). + If they are not enabled, the table list will be blindly truncated to the limit. + + If any tables are partitioned, the partitioned table will be returned and not counted against the limit. However, partitions + of the table are counted against the limit. + """ + self.partitioned_tables = 0 + def sort_tables(info): + cache = self._check.metrics_cache.table_activity_metrics + # partition master tables won't get any metrics reported on them, + # so we assign them a high number and don't count them against the table limit + if not info["has_partitions"]: + return cache[dbname][info['name']]['postgresql.index_scans'] + cache[dbname][info['name']]['postgresql.seq_scans'] + else: + self.partitioned_tables += 1 + return float("inf") + # if relation metrics are enabled, sorted based on last activity information - activity_metrics_cache = self._check.metrics_cache.get_activity_metrics(self._check.version) - if activity_metrics_cache: - self._log.warning(activity_metrics_cache) + table_metrics_cache = self._check.metrics_cache.table_activity_metrics + if table_metrics_cache: + self._log.warning(table_metrics_cache) + + table_info = sorted( + table_info, + key=sort_tables, + reverse=True + ) + return table_info[:limit + self.partitioned_tables] # else, blindly truncate + return table_info[:limit] - def _query_table_information_for_schema(self, cursor: psycopg2.extensions.cursor, schemaname: str) -> List[Dict[str, Union[str, Dict]]]: + def _query_table_information_for_schema(self, cursor: psycopg2.extensions.cursor, schemaname: str, dbname: str) -> List[Dict[str, Union[str, Dict]]]: """ Collect table information per schema. Returns a list of dictionaries with key/values: @@ -277,7 +304,7 @@ def _query_table_information_for_schema(self, cursor: psycopg2.extensions.cursor cursor.execute(PG_TABLES_QUERY.format(schemaname=schemaname)) rows = cursor.fetchall() tables_info = [dict(row) for row in rows] - self._sort_and_limit_table_info(tables_info, 1000) + tables_info = self._sort_and_limit_table_info(dbname, tables_info, 1000) self._log.warning(tables_info) table_payloads = [] for table in tables_info: @@ -312,20 +339,21 @@ def _query_table_information_for_schema(self, cursor: psycopg2.extensions.cursor if rows: this_payload.update({'foreign_keys': {}}) - # Get columns - cursor.execute(COLUMNS_QUERY.format(tablename=name)) - rows = cursor.fetchall() - self._log.warning(rows) - columns = [dict(row) for row in rows] - this_payload.update({'columns': columns}) - + # Get columns + if not table['is_partition']: + cursor.execute(COLUMNS_QUERY.format(tablename=name)) + rows = cursor.fetchall() + self._log.warning(rows) + columns = [dict(row) for row in rows] + this_payload.update({'columns': columns}) + table_payloads.append(this_payload) return table_payloads def _collect_metadata_for_database(self, dbname): metadata = {} - with self._conn_pool.get_connection(dbname, ttl_ms=self._conn_ttl_ms) as conn: + with self.db_pool.get_connection(dbname, self._config.idle_connection_timeout) as conn: with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor: database_info = self._query_database_information(cursor, dbname) metadata.update( @@ -342,7 +370,7 @@ def _collect_metadata_for_database(self, dbname): schema_info = self._query_schema_information(cursor, dbname) self._log.warning(schema_info) for schema in schema_info: - tables_info = self._query_table_information_for_schema(cursor, schema['name']) + tables_info = self._query_table_information_for_schema(cursor, schema['name'], dbname) self._log.warning(tables_info) metadata['schemas'].append( { diff --git a/postgres/datadog_checks/postgres/metrics_cache.py b/postgres/datadog_checks/postgres/metrics_cache.py index 30ee4399000c7..8ab76ddaaca78 100644 --- a/postgres/datadog_checks/postgres/metrics_cache.py +++ b/postgres/datadog_checks/postgres/metrics_cache.py @@ -3,6 +3,7 @@ # Licensed under Simplified BSD License (see LICENSE) # https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-PARAMKEYWORDS import logging +from collections import defaultdict from .util import ( ACTIVITY_DD_METRICS, @@ -44,7 +45,8 @@ def __init__(self, config): self.replication_stats_metrics = None self.activity_metrics = None self._count_metrics = None - self.table_activity_metrics = None + if self.config.relations: + self.table_activity_metrics = dict() def clean_state(self): self.instance_metrics = None @@ -53,7 +55,8 @@ def clean_state(self): self.replication_metrics = None self.replication_stats_metrics = None self.activity_metrics = None - self.table_activity_metrics = None + if self.config.relations: + self.table_activity_metrics = dict() def get_instance_metrics(self, version): """ diff --git a/postgres/datadog_checks/postgres/postgres.py b/postgres/datadog_checks/postgres/postgres.py index 0422fc7a4ed61..198c6123ef99d 100644 --- a/postgres/datadog_checks/postgres/postgres.py +++ b/postgres/datadog_checks/postgres/postgres.py @@ -474,7 +474,14 @@ def _query_scope(self, cursor, scope, instance_tags, is_custom_metrics, dbname=N submit_metric(self, name, value, tags=set(tags), hostname=self.resolved_hostname) # TODO: if relation-level metrics idx_scan or seq_scan, cache it - + print(name) + if name in ('postgresql.index_scans', 'postgresql.seq_scans'): + db = dbname if self.autodiscovery else self._config.dbname + if db not in self.metrics_cache.table_activity_metrics.keys(): + self.metrics_cache.table_activity_metrics[db] = {} + if desc_map['table'] not in self.metrics_cache.table_activity_metrics[db].keys(): + self.metrics_cache.table_activity_metrics[db][desc_map['table']] = {'postgresql.index_scans': 0, 'postgresql.seq_scans': 0} + self.metrics_cache.table_activity_metrics[db][desc_map['table']][name] = value num_results += 1 diff --git a/postgres/tests/test_metadata.py b/postgres/tests/test_metadata.py index 52c82d43cb5a7..03ab159dffcd5 100644 --- a/postgres/tests/test_metadata.py +++ b/postgres/tests/test_metadata.py @@ -6,6 +6,7 @@ import pytest from datadog_checks.base.utils.db.utils import DBMAsyncJob +from .utils import run_one_check pytestmark = [pytest.mark.integration, pytest.mark.usefixtures('dd_environment')] @@ -39,11 +40,16 @@ def test_collect_metadata(integration_check, dbm_instance, aggregator): assert event['kind'] == "pg_settings" assert len(event["metadata"]) > 0 +@pytest.mark.integration +@pytest.mark.usefixtures('dd_environment') def test_collect_schemas(integration_check, dbm_instance, aggregator): dbm_instance["collect_schemas"] = {'enabled': True, 'collection_interval': 0.5} - dbm_instance['relations'] = {'relation_regex': ".*"} + dbm_instance['relations'] = [{'relation_regex': ".*"}] + dbm_instance["database_autodiscovery"] = {"enabled": True, "include": ["datadog"]} + del dbm_instance['dbname'] check = integration_check(dbm_instance) - check.check(dbm_instance) + run_one_check(check,dbm_instance) + run_one_check(check,dbm_instance) assert None is not None dbm_metadata = aggregator.get_event_platform_events("dbm-metadata") event = dbm_metadata[0] From d040a9cfc43557cd67d4468130b1dad7015229f8 Mon Sep 17 00:00:00 2001 From: edengorevoy Date: Tue, 18 Jul 2023 16:22:01 +0000 Subject: [PATCH 66/86] toast table changes --- postgres/datadog_checks/postgres/metadata.py | 120 ++++++++++--------- postgres/datadog_checks/postgres/postgres.py | 22 +++- postgres/tests/test_relations.py | 2 + 3 files changed, 85 insertions(+), 59 deletions(-) diff --git a/postgres/datadog_checks/postgres/metadata.py b/postgres/datadog_checks/postgres/metadata.py index fbe3ecd62098a..34f1a313b7e1f 100644 --- a/postgres/datadog_checks/postgres/metadata.py +++ b/postgres/datadog_checks/postgres/metadata.py @@ -35,24 +35,30 @@ PG_STAT_TABLES_QUERY = """ -SELECT st.relname as name,seq_scan,idx_scan,relhasindex as hasindexes,relowner::regrole as owner,relispartition as is_partition -FROM pg_stat_all_tables st -LEFT JOIN pg_class c ON c.relname = st.relname -AND relkind IN ('r', 'p') -WHERE schemaname = '{schemaname}' +SELECT st.relname as name,seq_scan,idx_scan,c.relhasindex as hasindexes,c.relowner::regrole as owner, +(CASE WHEN c.relkind = 'p' THEN true ELSE false END) AS has_partitions, +(CASE WHEN pg_relation_size(c.reltoastrelid) > 500000 THEN t.relname ELSE null END) AS toast_table +FROM pg_class c +LEFT JOIN pg_stat_all_tables st ON c.relname = st.relname +LEFT JOIN pg_class t on c.reltoastrelid = t.oid +WHERE schemaname = 'public' +AND c.relkind IN ('r', 'p') +AND c.relispartition != 't' ORDER BY coalesce(seq_scan, 0) + coalesce(idx_scan, 0) DESC; """ PG_TABLES_QUERY = """ -SELECT tablename as name, hasindexes,relowner::regrole as owner,relispartition as is_partition, -(case when relkind = 'p' then true else false end) as has_partitions +SELECT tablename as name, hasindexes, c.relowner::regrole AS owner, +(CASE WHEN c.relkind = 'p' THEN true ELSE false END) AS has_partitions, +(CASE WHEN pg_relation_size(c.reltoastrelid) > 500000 THEN t.relname ELSE null END) AS toast_table FROM pg_tables st LEFT JOIN pg_class c ON relname = tablename -AND relkind IN ('r', 'p') -WHERE schemaname = '{schemaname}'; +LEFT JOIN pg_class t on c.reltoastrelid = t.oid +WHERE c.relkind IN ('r', 'p') +AND c.relispartition != 't' +AND schemaname = '{schemaname}'; """ - SCHEMA_QUERY = """ SELECT nspname as name, nspowner::regrole as owner FROM pg_namespace @@ -82,20 +88,17 @@ AND NOT attisdropped; """ -PARTITION_PARENT_AND_RANGE_QUERY = """ -SELECT parent, child, pg_get_expr(c.relpartbound, c.oid, true) as partition_range - FROM - (SELECT inhparent::regclass as parent, inhrelid::regclass as child - FROM pg_inherits - WHERE inhrelid = '{tablename}'::regclass::oid) AS inherits - LEFT JOIN pg_class c ON c.oid = child::regclass::oid; -""" - PARTITION_KEY_QUERY = """ SELECT relname, pg_get_partkeydef(oid) as partition_key FROM pg_class WHERE '{parent}' = relname; """ +NUM_PARTITIONS_QUERY = """ +SELECT count(inhrelid::regclass) as num_partitions + FROM pg_inherits + WHERE inhparent = '{parent}'::regclass::oid +""" + def agent_check_getter(self): return self._check @@ -247,39 +250,49 @@ def _query_schema_information(self, cursor: psycopg2.extensions.cursor, dbname: schemas = [dict(row) for row in rows] return schemas - def _sort_and_limit_table_info(self, dbname, table_info: List[Dict[str, Union[str, bool]]], limit: int) -> List[Dict[str, Union[str, bool]]]: + def _get_table_info(self, cursor, dbname, schemaname, limit): """ If relation metrics is enabled, sort tables by the number of total accesses (index_rel_scans + seq_scans). - If they are not enabled, the table list will be blindly truncated to the limit. + If they are not enabled, the table list will be retrieved from pg_stat_all_tables and sorted in the query. If any tables are partitioned, the partitioned table will be returned and not counted against the limit. However, partitions of the table are counted against the limit. """ + if self._config.relations: + cursor.execute(PG_TABLES_QUERY.format(schemaname=schemaname)) + rows = cursor.fetchall() + table_info = [dict(row) for row in rows] + return self._sort_and_limit_table_info(dbname, table_info, limit) + + else: + table_info = cursor.execute(PG_STAT_TABLES_QUERY.format(schemaname=schemaname)) + + def _sort_and_limit_table_info(self, dbname, table_info: List[Dict[str, Union[str, bool]]], limit: int) -> List[Dict[str, Union[str, bool]]]: self.partitioned_tables = 0 def sort_tables(info): cache = self._check.metrics_cache.table_activity_metrics # partition master tables won't get any metrics reported on them, - # so we assign them a high number and don't count them against the table limit + # so we assign them a high number to ensure they're captured in the sort + # and don't count them against the table limit if not info["has_partitions"]: return cache[dbname][info['name']]['postgresql.index_scans'] + cache[dbname][info['name']]['postgresql.seq_scans'] else: - self.partitioned_tables += 1 - return float("inf") + partitions = cache[dbname][info['name']]['partitions'] + main_partition_activity = 0 + for partition in partitions: + main_partition_activity += (cache[dbname][partition]['postgresql.index_scans'] + cache[dbname][partition]['postgresql.seq_scans']) + return main_partition_activity # if relation metrics are enabled, sorted based on last activity information table_metrics_cache = self._check.metrics_cache.table_activity_metrics - if table_metrics_cache: - self._log.warning(table_metrics_cache) - - table_info = sorted( - table_info, - key=sort_tables, - reverse=True - ) - return table_info[:limit + self.partitioned_tables] + self._log.warning(table_metrics_cache) - # else, blindly truncate - return table_info[:limit] + table_info = sorted( + table_info, + key=sort_tables, + reverse=True + ) + return table_info[:limit + self.partitioned_tables] def _query_table_information_for_schema(self, cursor: psycopg2.extensions.cursor, schemaname: str, dbname: str) -> List[Dict[str, Union[str, Dict]]]: """ @@ -287,8 +300,6 @@ def _query_table_information_for_schema(self, cursor: psycopg2.extensions.cursor with key/values: "name": str "owner": str - "partition_key": str (if has partitions) - "partition_of": str (if a partition) "foreign_keys": dict (if has foreign keys) name: str definition: str @@ -300,11 +311,12 @@ def _query_table_information_for_schema(self, cursor: psycopg2.extensions.cursor data_type: str default: str nullable: bool + "toast_table": str (if associated toast table is > 500kb) + "partition_key": str (if has partitions) + "num_partitions": int (if has partitions) """ - cursor.execute(PG_TABLES_QUERY.format(schemaname=schemaname)) - rows = cursor.fetchall() - tables_info = [dict(row) for row in rows] - tables_info = self._sort_and_limit_table_info(dbname, tables_info, 1000) + tables_info = self._get_table_info(cursor, dbname, schemaname, 1000) + # tables_info = self._sort_and_limit_table_info(dbname, tables_info, 1000) self._log.warning(tables_info) table_payloads = [] for table in tables_info: @@ -318,20 +330,19 @@ def _query_table_information_for_schema(self, cursor: psycopg2.extensions.cursor indexes = {row[0]:row[1] for row in rows} this_payload.update({'indexes': indexes}) - if table['is_partition']: - cursor.execute(PARTITION_PARENT_AND_RANGE_QUERY.format(tablename=name)) - row = cursor.fetchone() - partition_of = row['parent'] - partition_range = row['partition_range'] - this_payload.update({'partition_of': partition_of}) - this_payload.update({'partition_range': partition_range}) - if table['has_partitions']: cursor.execute(PARTITION_KEY_QUERY.format(parent=name)) row = cursor.fetchone() self._log.warning(row) this_payload.update({'partition_key':row['partition_key']}) + cursor.execute(NUM_PARTITIONS_QUERY.format(parent=name)) + row = cursor.fetchone() + this_payload.update({'num_partitions':row['num_partitions']}) + + if table['toast_table'] != None: + this_payload.update({'toast_table':row['toast_table']}) + # Get foreign keys cursor.execute(PG_CONSTRAINTS_QUERY.format(tablename=table['name'])) rows = cursor.fetchall() @@ -340,12 +351,11 @@ def _query_table_information_for_schema(self, cursor: psycopg2.extensions.cursor this_payload.update({'foreign_keys': {}}) # Get columns - if not table['is_partition']: - cursor.execute(COLUMNS_QUERY.format(tablename=name)) - rows = cursor.fetchall() - self._log.warning(rows) - columns = [dict(row) for row in rows] - this_payload.update({'columns': columns}) + cursor.execute(COLUMNS_QUERY.format(tablename=name)) + rows = cursor.fetchall() + self._log.warning(rows) + columns = [dict(row) for row in rows] + this_payload.update({'columns': columns}) table_payloads.append(this_payload) diff --git a/postgres/datadog_checks/postgres/postgres.py b/postgres/datadog_checks/postgres/postgres.py index 198c6123ef99d..b98d02b029c87 100644 --- a/postgres/datadog_checks/postgres/postgres.py +++ b/postgres/datadog_checks/postgres/postgres.py @@ -474,14 +474,28 @@ def _query_scope(self, cursor, scope, instance_tags, is_custom_metrics, dbname=N submit_metric(self, name, value, tags=set(tags), hostname=self.resolved_hostname) # TODO: if relation-level metrics idx_scan or seq_scan, cache it - print(name) + if desc_map['table'] == 'test_part1': + print(desc_map) if name in ('postgresql.index_scans', 'postgresql.seq_scans'): db = dbname if self.autodiscovery else self._config.dbname + tablename = desc_map['table'] + try: + partition_of = desc_map['partition_of'] + print("partition of {}".format(partition_of)) + except KeyError: + partition_of = None if db not in self.metrics_cache.table_activity_metrics.keys(): self.metrics_cache.table_activity_metrics[db] = {} - if desc_map['table'] not in self.metrics_cache.table_activity_metrics[db].keys(): - self.metrics_cache.table_activity_metrics[db][desc_map['table']] = {'postgresql.index_scans': 0, 'postgresql.seq_scans': 0} - self.metrics_cache.table_activity_metrics[db][desc_map['table']][name] = value + if tablename not in self.metrics_cache.table_activity_metrics[db].keys(): + self.metrics_cache.table_activity_metrics[db][tablename] = {'postgresql.index_scans': 0, 'postgresql.seq_scans': 0, 'partitioned': False} + # all partitions activity should aggregate under their parent, + # so store a list of partitions per parent to parse later + if partition_of and partition_of not in self.metrics_cache.table_activity_metrics[db].keys(): + self.metrics_cache.table_activity_metrics[db][partition_of] = {'partitioned': True, 'partitions': set()} + + if partition_of is not None: + self.metrics_cache.table_activity_metrics[db][partition_of].update(tablename) + self.metrics_cache.table_activity_metrics[db][tablename][name] = value num_results += 1 diff --git a/postgres/tests/test_relations.py b/postgres/tests/test_relations.py index 7d2590274bcfe..34547e8aff690 100644 --- a/postgres/tests/test_relations.py +++ b/postgres/tests/test_relations.py @@ -75,6 +75,8 @@ def test_partition_relation(aggregator, integration_check, pg_instance): part_1_tags = _get_expected_tags( check, pg_instance, db=pg_instance['dbname'], table='test_part1', partition_of='test_part', schema='public' ) + print(check.metrics_cache.table_activity_metrics) + assert None is not None aggregator.assert_metric('postgresql.relation.pages', value=3, count=1, tags=part_1_tags) aggregator.assert_metric('postgresql.relation.tuples', value=499, count=1, tags=part_1_tags) aggregator.assert_metric('postgresql.relation.all_visible', value=3, count=1, tags=part_1_tags) From 1f01f254f84cc5630ee73c451e7056b9a186aa35 Mon Sep 17 00:00:00 2001 From: edengorevoy Date: Tue, 18 Jul 2023 22:12:53 +0000 Subject: [PATCH 67/86] Adding some testing, and updating the payload --- postgres/datadog_checks/postgres/metadata.py | 171 ++++++++++-------- .../datadog_checks/postgres/metrics_cache.py | 7 +- postgres/datadog_checks/postgres/postgres.py | 20 +- postgres/tests/test_metadata.py | 52 +++++- postgres/tests/test_relations.py | 2 - 5 files changed, 142 insertions(+), 110 deletions(-) diff --git a/postgres/datadog_checks/postgres/metadata.py b/postgres/datadog_checks/postgres/metadata.py index 34f1a313b7e1f..da888d507686b 100644 --- a/postgres/datadog_checks/postgres/metadata.py +++ b/postgres/datadog_checks/postgres/metadata.py @@ -3,7 +3,7 @@ # Licensed under a 3-clause BSD style license (see LICENSE) import json import time -from typing import Dict, Optional, Tuple, Union, List # noqa: F401 +from typing import Dict, List, Optional, Tuple, Union # noqa: F401 import psycopg2 @@ -27,7 +27,7 @@ DATABASE_INFORMATION_QUERY = """ SELECT db.oid as id, datname as name, pg_encoding_to_char(encoding) as encoding, rolname as owner, description - FROM pg_catalog.pg_database db + FROM pg_catalog.pg_database db LEFT JOIN pg_catalog.pg_description dc ON dc.objoid = db.oid JOIN pg_roles a on datdba = a.oid WHERE datname LIKE '{dbname}'; @@ -36,24 +36,24 @@ PG_STAT_TABLES_QUERY = """ SELECT st.relname as name,seq_scan,idx_scan,c.relhasindex as hasindexes,c.relowner::regrole as owner, -(CASE WHEN c.relkind = 'p' THEN true ELSE false END) AS has_partitions, +(CASE WHEN c.relkind = 'p' THEN true ELSE false END) AS has_partitions, (CASE WHEN pg_relation_size(c.reltoastrelid) > 500000 THEN t.relname ELSE null END) AS toast_table -FROM pg_class c +FROM pg_class c LEFT JOIN pg_stat_all_tables st ON c.relname = st.relname -LEFT JOIN pg_class t on c.reltoastrelid = t.oid -WHERE schemaname = 'public' +LEFT JOIN pg_class t on c.reltoastrelid = t.oid +WHERE schemaname = '{schemaname}' AND c.relkind IN ('r', 'p') AND c.relispartition != 't' ORDER BY coalesce(seq_scan, 0) + coalesce(idx_scan, 0) DESC; """ PG_TABLES_QUERY = """ -SELECT tablename as name, hasindexes, c.relowner::regrole AS owner, -(CASE WHEN c.relkind = 'p' THEN true ELSE false END) AS has_partitions, +SELECT tablename as name, hasindexes, c.relowner::regrole AS owner, +(CASE WHEN c.relkind = 'p' THEN true ELSE false END) AS has_partitions, (CASE WHEN pg_relation_size(c.reltoastrelid) > 500000 THEN t.relname ELSE null END) AS toast_table -FROM pg_tables st +FROM pg_tables st LEFT JOIN pg_class c ON relname = tablename -LEFT JOIN pg_class t on c.reltoastrelid = t.oid +LEFT JOIN pg_class t on c.reltoastrelid = t.oid WHERE c.relkind IN ('r', 'p') AND c.relispartition != 't' AND schemaname = '{schemaname}'; @@ -61,27 +61,29 @@ SCHEMA_QUERY = """ SELECT nspname as name, nspowner::regrole as owner FROM - pg_namespace - WHERE nspname not in ('information_schema', 'pg_catalog') + pg_namespace + WHERE nspname not in ('information_schema', 'pg_catalog') AND nspname NOT LIKE 'pg_toast%' and nspname NOT LIKE 'pg_temp_%'; """ PG_INDEXES_QUERY = """ SELECT indexname as name, indexdef as definition -FROM pg_indexes +FROM pg_indexes WHERE tablename LIKE '{tablename}'; """ PG_CONSTRAINTS_QUERY = """ SELECT conname AS name, pg_get_constraintdef(oid) as definition -FROM pg_constraint +FROM pg_constraint WHERE contype = 'f' AND conrelid = -'{tablename}'::regclass; +'{tablename}'::regclass; """ COLUMNS_QUERY = """ -SELECT attname as name, format_type(atttypid, atttypmod) AS data_type, NOT attnotnull as nullable, pg_get_expr(adbin, adrelid) as default +SELECT attname as name, +format_type(atttypid, atttypmod) AS data_type, +NOT attnotnull as nullable, pg_get_expr(adbin, adrelid) as default FROM pg_attribute LEFT JOIN pg_attrdef ad ON adrelid=attrelid AND adnum=attnum WHERE attrelid = '{tablename}'::regclass AND attnum > 0 @@ -89,16 +91,27 @@ """ PARTITION_KEY_QUERY = """ - SELECT relname, pg_get_partkeydef(oid) as partition_key -FROM pg_class WHERE '{parent}' = relname; + SELECT relname, pg_get_partkeydef(oid) as partition_key +FROM pg_class WHERE '{parent}' = relname; """ NUM_PARTITIONS_QUERY = """ SELECT count(inhrelid::regclass) as num_partitions - FROM pg_inherits + FROM pg_inherits WHERE inhparent = '{parent}'::regclass::oid """ +PARTITION_ACTIVITY_QUERY = """ +SELECT + pi.inhparent::regclass AS parent_table_name, + SUM(psu.seq_scan + psu.idx_scan) AS total_activity +FROM pg_catalog.pg_stat_user_tables psu + JOIN pg_class pc ON psu.relname = pc.relname + JOIN pg_inherits pi ON pi.inhrelid = pc.oid +WHERE pi.inhparent = '{parent}'::regclass::oid +GROUP BY pi.inhparent; +""" + def agent_check_getter(self): return self._check @@ -189,7 +202,7 @@ def report_postgres_metadata(self): self._check.database_monitoring_metadata(json.dumps(event, default=default_json_event_encoding)) elapsed_s = time.time() - self._time_since_last_schemas_query - if elapsed_s >= self.schemas_collection_interval: + if elapsed_s >= self.schemas_collection_interval and self._collect_schemas_enabled: self._collect_schema_info() def _payload_pg_version(self): @@ -197,35 +210,37 @@ def _payload_pg_version(self): if not version: return "" return 'v{major}.{minor}.{patch}'.format(major=version.major, minor=version.minor, patch=version.patch) - + def _collect_schema_info(self): databases = [] - if self._check.autodiscovery: + if self._check.autodiscovery: databases = self._check.autodiscovery.get_items() elif self._config.dbname != 'postgres': databases.append(self._config.dbname) else: # if we are only connecting to 'postgres' database, not worth reporting data model return - + metadata = [] for database in databases: metadata.append(self._collect_metadata_for_database(database)) event = { - "host": self._check.resolved_hostname, - "agent_version": datadog_agent.get_version(), - "dbms": "postgres", - "kind": "pg_databases", - "collection_interval": self.schemas_collection_interval, - "dbms_version": self._payload_pg_version(), - "tags": self._tags_no_db, - "timestamp": time.time() * 1000, - "cloud_metadata": self._config.cloud_metadata, - "metadata": metadata, + "host": self._check.resolved_hostname, + "agent_version": datadog_agent.get_version(), + "dbms": "postgres", + "kind": "pg_databases", + "collection_interval": self.schemas_collection_interval, + "dbms_version": self._payload_pg_version(), + "tags": self._tags_no_db, + "timestamp": time.time() * 1000, + "cloud_metadata": self._config.cloud_metadata, + "metadata": metadata, } self._check.database_monitoring_metadata(json.dumps(event, default=default_json_event_encoding)) - - def _query_database_information(self, cursor: psycopg2.extensions.cursor, dbname: str) -> Dict[str, Union[str, int]]: + + def _query_database_information( + self, cursor: psycopg2.extensions.cursor, dbname: str + ) -> Dict[str, Union[str, int]]: """ Collect database info. Returns description: str @@ -235,7 +250,7 @@ def _query_database_information(self, cursor: psycopg2.extensions.cursor, dbname owner: str """ cursor.execute(DATABASE_INFORMATION_QUERY.format(dbname=dbname)) - row = cursor.fetchone() + row = cursor.fetchone() print(row) return row @@ -255,51 +270,52 @@ def _get_table_info(self, cursor, dbname, schemaname, limit): If relation metrics is enabled, sort tables by the number of total accesses (index_rel_scans + seq_scans). If they are not enabled, the table list will be retrieved from pg_stat_all_tables and sorted in the query. - If any tables are partitioned, the partitioned table will be returned and not counted against the limit. However, partitions - of the table are counted against the limit. + If any tables are partitioned, the partitioned table will be returned and not counted against the limit. + However, partitions of the table are counted against the limit. """ if self._config.relations: cursor.execute(PG_TABLES_QUERY.format(schemaname=schemaname)) rows = cursor.fetchall() - table_info = [dict(row) for row in rows] - return self._sort_and_limit_table_info(dbname, table_info, limit) + table_info = [dict(row) for row in rows] + return self._sort_and_limit_table_info(cursor, dbname, table_info, limit) else: - table_info = cursor.execute(PG_STAT_TABLES_QUERY.format(schemaname=schemaname)) + raise NotImplementedError() + # table_info = cursor.execute(PG_STAT_TABLES_QUERY.format(schemaname=schemaname)) - def _sort_and_limit_table_info(self, dbname, table_info: List[Dict[str, Union[str, bool]]], limit: int) -> List[Dict[str, Union[str, bool]]]: - self.partitioned_tables = 0 + def _sort_and_limit_table_info( + self, cursor, dbname, table_info: List[Dict[str, Union[str, bool]]], limit: int + ) -> List[Dict[str, Union[str, bool]]]: def sort_tables(info): cache = self._check.metrics_cache.table_activity_metrics - # partition master tables won't get any metrics reported on them, - # so we assign them a high number to ensure they're captured in the sort - # and don't count them against the table limit + # partition master tables won't get any metrics reported on them, + # so we have to grab the total partition activity if not info["has_partitions"]: - return cache[dbname][info['name']]['postgresql.index_scans'] + cache[dbname][info['name']]['postgresql.seq_scans'] + return ( + cache[dbname][info['name']]['postgresql.index_scans'] + + cache[dbname][info['name']]['postgresql.seq_scans'] + ) else: - partitions = cache[dbname][info['name']]['partitions'] - main_partition_activity = 0 - for partition in partitions: - main_partition_activity += (cache[dbname][partition]['postgresql.index_scans'] + cache[dbname][partition]['postgresql.seq_scans']) - return main_partition_activity + # get activity + cursor.execute(PARTITION_ACTIVITY_QUERY.format(parent=info['name'])) + row = cursor.fetchone() + return row['total_activity'] # if relation metrics are enabled, sorted based on last activity information table_metrics_cache = self._check.metrics_cache.table_activity_metrics self._log.warning(table_metrics_cache) - table_info = sorted( - table_info, - key=sort_tables, - reverse=True - ) - return table_info[:limit + self.partitioned_tables] + table_info = sorted(table_info, key=sort_tables, reverse=True) + return table_info[:limit] - def _query_table_information_for_schema(self, cursor: psycopg2.extensions.cursor, schemaname: str, dbname: str) -> List[Dict[str, Union[str, Dict]]]: + def _query_table_information_for_schema( + self, cursor: psycopg2.extensions.cursor, schemaname: str, dbname: str + ) -> List[Dict[str, Union[str, Dict]]]: """ - Collect table information per schema. Returns a list of dictionaries + Collect table information per schema. Returns a list of dictionaries with key/values: - "name": str - "owner": str + "name": str + "owner": str "foreign_keys": dict (if has foreign keys) name: str definition: str @@ -308,7 +324,7 @@ def _query_table_information_for_schema(self, cursor: psycopg2.extensions.cursor definition: str "columns": dict name: str - data_type: str + data_type: str default: str nullable: bool "toast_table": str (if associated toast table is > 500kb) @@ -316,7 +332,6 @@ def _query_table_information_for_schema(self, cursor: psycopg2.extensions.cursor "num_partitions": int (if has partitions) """ tables_info = self._get_table_info(cursor, dbname, schemaname, 1000) - # tables_info = self._sort_and_limit_table_info(dbname, tables_info, 1000) self._log.warning(tables_info) table_payloads = [] for table in tables_info: @@ -327,21 +342,21 @@ def _query_table_information_for_schema(self, cursor: psycopg2.extensions.cursor if table["hasindexes"]: cursor.execute(PG_INDEXES_QUERY.format(tablename=name)) rows = cursor.fetchall() - indexes = {row[0]:row[1] for row in rows} + indexes = {row[0]: row[1] for row in rows} this_payload.update({'indexes': indexes}) if table['has_partitions']: cursor.execute(PARTITION_KEY_QUERY.format(parent=name)) row = cursor.fetchone() self._log.warning(row) - this_payload.update({'partition_key':row['partition_key']}) + this_payload.update({'partition_key': row['partition_key']}) cursor.execute(NUM_PARTITIONS_QUERY.format(parent=name)) row = cursor.fetchone() - this_payload.update({'num_partitions':row['num_partitions']}) + this_payload.update({'num_partitions': row['num_partitions']}) - if table['toast_table'] != None: - this_payload.update({'toast_table':row['toast_table']}) + if table['toast_table'] is not None: + this_payload.update({'toast_table': row['toast_table']}) # Get foreign keys cursor.execute(PG_CONSTRAINTS_QUERY.format(tablename=table['name'])) @@ -349,14 +364,14 @@ def _query_table_information_for_schema(self, cursor: psycopg2.extensions.cursor self._log.warning("foreign keys {}".format(rows)) if rows: this_payload.update({'foreign_keys': {}}) - - # Get columns + + # Get columns cursor.execute(COLUMNS_QUERY.format(tablename=name)) rows = cursor.fetchall() self._log.warning(rows) columns = [dict(row) for row in rows] this_payload.update({'columns': columns}) - + table_payloads.append(this_payload) return table_payloads @@ -366,7 +381,7 @@ def _collect_metadata_for_database(self, dbname): with self.db_pool.get_connection(dbname, self._config.idle_connection_timeout) as conn: with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor: database_info = self._query_database_information(cursor, dbname) - metadata.update( + metadata.update( { "description": database_info['description'], "name": database_info['name'], @@ -382,14 +397,10 @@ def _collect_metadata_for_database(self, dbname): for schema in schema_info: tables_info = self._query_table_information_for_schema(cursor, schema['name'], dbname) self._log.warning(tables_info) - metadata['schemas'].append( - { - "name": schema['name'], - "owner": schema['owner'], - "tables": tables_info - } + metadata['schemas'].append( + {"name": schema['name'], "owner": schema['owner'], "tables": tables_info} ) - + return metadata @tracked_method(agent_check_getter=agent_check_getter) diff --git a/postgres/datadog_checks/postgres/metrics_cache.py b/postgres/datadog_checks/postgres/metrics_cache.py index 8ab76ddaaca78..617a79b47f85e 100644 --- a/postgres/datadog_checks/postgres/metrics_cache.py +++ b/postgres/datadog_checks/postgres/metrics_cache.py @@ -3,7 +3,6 @@ # Licensed under Simplified BSD License (see LICENSE) # https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-PARAMKEYWORDS import logging -from collections import defaultdict from .util import ( ACTIVITY_DD_METRICS, @@ -46,7 +45,7 @@ def __init__(self, config): self.activity_metrics = None self._count_metrics = None if self.config.relations: - self.table_activity_metrics = dict() + self.table_activity_metrics = {} def clean_state(self): self.instance_metrics = None @@ -56,7 +55,7 @@ def clean_state(self): self.replication_stats_metrics = None self.activity_metrics = None if self.config.relations: - self.table_activity_metrics = dict() + self.table_activity_metrics = {} def get_instance_metrics(self, version): """ @@ -185,7 +184,7 @@ def get_replication_stats_metrics(self, version): if version >= V10 and self.replication_stats_metrics is None: self.replication_stats_metrics = dict(REPLICATION_STATS_METRICS) return self.replication_stats_metrics - + def get_activity_metrics(self, version): """Use ACTIVITY_METRICS_LT_8_3 or ACTIVITY_METRICS_8_3 or ACTIVITY_METRICS_9_2 depending on the postgres version in conjunction with ACTIVITY_QUERY_10 or ACTIVITY_QUERY_LT_10. diff --git a/postgres/datadog_checks/postgres/postgres.py b/postgres/datadog_checks/postgres/postgres.py index b98d02b029c87..68cb81b254c1b 100644 --- a/postgres/datadog_checks/postgres/postgres.py +++ b/postgres/datadog_checks/postgres/postgres.py @@ -474,27 +474,17 @@ def _query_scope(self, cursor, scope, instance_tags, is_custom_metrics, dbname=N submit_metric(self, name, value, tags=set(tags), hostname=self.resolved_hostname) # TODO: if relation-level metrics idx_scan or seq_scan, cache it - if desc_map['table'] == 'test_part1': - print(desc_map) if name in ('postgresql.index_scans', 'postgresql.seq_scans'): db = dbname if self.autodiscovery else self._config.dbname tablename = desc_map['table'] - try: - partition_of = desc_map['partition_of'] - print("partition of {}".format(partition_of)) - except KeyError: - partition_of = None if db not in self.metrics_cache.table_activity_metrics.keys(): self.metrics_cache.table_activity_metrics[db] = {} if tablename not in self.metrics_cache.table_activity_metrics[db].keys(): - self.metrics_cache.table_activity_metrics[db][tablename] = {'postgresql.index_scans': 0, 'postgresql.seq_scans': 0, 'partitioned': False} - # all partitions activity should aggregate under their parent, - # so store a list of partitions per parent to parse later - if partition_of and partition_of not in self.metrics_cache.table_activity_metrics[db].keys(): - self.metrics_cache.table_activity_metrics[db][partition_of] = {'partitioned': True, 'partitions': set()} - - if partition_of is not None: - self.metrics_cache.table_activity_metrics[db][partition_of].update(tablename) + self.metrics_cache.table_activity_metrics[db][tablename] = { + 'postgresql.index_scans': 0, + 'postgresql.seq_scans': 0, + } + self.metrics_cache.table_activity_metrics[db][tablename][name] = value num_results += 1 diff --git a/postgres/tests/test_metadata.py b/postgres/tests/test_metadata.py index 03ab159dffcd5..da28b8478b1aa 100644 --- a/postgres/tests/test_metadata.py +++ b/postgres/tests/test_metadata.py @@ -6,6 +6,7 @@ import pytest from datadog_checks.base.utils.db.utils import DBMAsyncJob + from .utils import run_one_check pytestmark = [pytest.mark.integration, pytest.mark.usefixtures('dd_environment')] @@ -40,20 +41,53 @@ def test_collect_metadata(integration_check, dbm_instance, aggregator): assert event['kind'] == "pg_settings" assert len(event["metadata"]) > 0 + @pytest.mark.integration @pytest.mark.usefixtures('dd_environment') def test_collect_schemas(integration_check, dbm_instance, aggregator): - dbm_instance["collect_schemas"] = {'enabled': True, 'collection_interval': 0.5} + dbm_instance["collect_schemas"] = {'enabled': True, 'collection_interval': 0.5} dbm_instance['relations'] = [{'relation_regex': ".*"}] dbm_instance["database_autodiscovery"] = {"enabled": True, "include": ["datadog"]} del dbm_instance['dbname'] check = integration_check(dbm_instance) - run_one_check(check,dbm_instance) - run_one_check(check,dbm_instance) - assert None is not None + run_one_check(check, dbm_instance) dbm_metadata = aggregator.get_event_platform_events("dbm-metadata") - event = dbm_metadata[0] - assert event['host'] == "stubbed.hostname" - assert event['dbms'] == "postgres" - assert event['kind'] == "pg_databases" - assert len(event["metadata"]) > 0 \ No newline at end of file + schema_event = None + for event in dbm_metadata: + if event['kind'] == "pg_databases": + schema_event = event + assert schema_event is not None + assert schema_event['host'] == "stubbed.hostname" + assert schema_event['dbms'] == "postgres" + assert schema_event['kind'] == "pg_databases" + assert len(event["metadata"]) > 0 + + +def test_get_table_info_relations_enabled(integration_check, dbm_instance, aggregator): + dbm_instance["collect_schemas"] = {'enabled': True, 'collection_interval': 0.5} + dbm_instance['relations'] = [{'relation_regex': ".*"}] + dbm_instance["database_autodiscovery"] = {"enabled": True, "include": ["datadog"]} + del dbm_instance['dbname'] + check = integration_check(dbm_instance) + run_one_check(check, dbm_instance) + dbm_metadata = aggregator.get_event_platform_events("dbm-metadata") + schema_event = None + for event in dbm_metadata: + if event['kind'] == "pg_databases": + schema_event = event + + # there should only be one database, datadog_test + database_metadata = schema_event['metadata'][0] + assert 'datadog_test' == database_metadata['name'] + schema_metadata = database_metadata['schemas'][0] + assert 'public' == schema_metadata['name'] + + # check that all expected tables are present + tables_set = {"test_part", 'persons', "personsdup1", "personsdup2", "pgtable", "pg_newtable"} + tables_not_reported_set = {'test_part1', 'test_part2'} + + # TODO if version is 9 or 10, partitions are not in table, check that + for table in schema_metadata['tables']: + assert tables_set.remove(table['name']) is None + assert table['name'] not in tables_not_reported_set + assert tables_set == set() diff --git a/postgres/tests/test_relations.py b/postgres/tests/test_relations.py index 34547e8aff690..7d2590274bcfe 100644 --- a/postgres/tests/test_relations.py +++ b/postgres/tests/test_relations.py @@ -75,8 +75,6 @@ def test_partition_relation(aggregator, integration_check, pg_instance): part_1_tags = _get_expected_tags( check, pg_instance, db=pg_instance['dbname'], table='test_part1', partition_of='test_part', schema='public' ) - print(check.metrics_cache.table_activity_metrics) - assert None is not None aggregator.assert_metric('postgresql.relation.pages', value=3, count=1, tags=part_1_tags) aggregator.assert_metric('postgresql.relation.tuples', value=499, count=1, tags=part_1_tags) aggregator.assert_metric('postgresql.relation.all_visible', value=3, count=1, tags=part_1_tags) From 8c7725f8f6c95d2b780744289bcf48e66d63f2ce Mon Sep 17 00:00:00 2001 From: edengorevoy Date: Wed, 19 Jul 2023 13:56:15 +0000 Subject: [PATCH 68/86] Add a debug log --- postgres/datadog_checks/postgres/metadata.py | 1 + 1 file changed, 1 insertion(+) diff --git a/postgres/datadog_checks/postgres/metadata.py b/postgres/datadog_checks/postgres/metadata.py index da888d507686b..29480ddde0feb 100644 --- a/postgres/datadog_checks/postgres/metadata.py +++ b/postgres/datadog_checks/postgres/metadata.py @@ -236,6 +236,7 @@ def _collect_schema_info(self): "cloud_metadata": self._config.cloud_metadata, "metadata": metadata, } + self._log.debug("Reporting the following payload: {}".format(event)) self._check.database_monitoring_metadata(json.dumps(event, default=default_json_event_encoding)) def _query_database_information( From 8042b94b3acb34509683b536cdfe9689405ef07d Mon Sep 17 00:00:00 2001 From: edengorevoy Date: Wed, 19 Jul 2023 14:32:56 +0000 Subject: [PATCH 69/86] Updating last_schemas_query at correct time --- postgres/datadog_checks/postgres/metadata.py | 38 +++++++++++--------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/postgres/datadog_checks/postgres/metadata.py b/postgres/datadog_checks/postgres/metadata.py index 29480ddde0feb..b405d306ec22c 100644 --- a/postgres/datadog_checks/postgres/metadata.py +++ b/postgres/datadog_checks/postgres/metadata.py @@ -129,7 +129,7 @@ def __init__(self, check, config, shutdown_callback): 'collection_interval', DEFAULT_SETTINGS_COLLECTION_INTERVAL ) self.schemas_collection_interval = config.schemas_metadata_config.get( - 'collection_interval', DEFAULT_SETTINGS_COLLECTION_INTERVAL + 'collection_interval', DEFAULT_SCHEMAS_COLLECTION_INTERVAL ) collection_interval = config.resources_metadata_config.get( @@ -201,9 +201,23 @@ def report_postgres_metadata(self): } self._check.database_monitoring_metadata(json.dumps(event, default=default_json_event_encoding)) - elapsed_s = time.time() - self._time_since_last_schemas_query - if elapsed_s >= self.schemas_collection_interval and self._collect_schemas_enabled: + elapsed_s_schemas = time.time() - self._time_since_last_schemas_query + if elapsed_s_schemas >= self.schemas_collection_interval and self._collect_schemas_enabled: self._collect_schema_info() + event = { + "host": self._check.resolved_hostname, + "agent_version": datadog_agent.get_version(), + "dbms": "postgres", + "kind": "pg_databases", + "collection_interval": self.schemas_collection_interval, + "dbms_version": self._payload_pg_version(), + "tags": self._tags_no_db, + "timestamp": time.time() * 1000, + "cloud_metadata": self._config.cloud_metadata, + "metadata": metadata, + } + self._log.debug("Reporting the following payload: {}".format(event)) + self._check.database_monitoring_metadata(json.dumps(event, default=default_json_event_encoding)) def _payload_pg_version(self): version = self._check.version @@ -224,20 +238,10 @@ def _collect_schema_info(self): metadata = [] for database in databases: metadata.append(self._collect_metadata_for_database(database)) - event = { - "host": self._check.resolved_hostname, - "agent_version": datadog_agent.get_version(), - "dbms": "postgres", - "kind": "pg_databases", - "collection_interval": self.schemas_collection_interval, - "dbms_version": self._payload_pg_version(), - "tags": self._tags_no_db, - "timestamp": time.time() * 1000, - "cloud_metadata": self._config.cloud_metadata, - "metadata": metadata, - } - self._log.debug("Reporting the following payload: {}".format(event)) - self._check.database_monitoring_metadata(json.dumps(event, default=default_json_event_encoding)) + + self._time_since_last_schemas_query = time.time() + return metadata + def _query_database_information( self, cursor: psycopg2.extensions.cursor, dbname: str From 414db6c6f28bb088450b1df9ae705d68c4bdcf60 Mon Sep 17 00:00:00 2001 From: edengorevoy Date: Wed, 19 Jul 2023 14:52:53 +0000 Subject: [PATCH 70/86] Actually return the metadata from _collect_schema_info --- postgres/datadog_checks/postgres/metadata.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/postgres/datadog_checks/postgres/metadata.py b/postgres/datadog_checks/postgres/metadata.py index b405d306ec22c..9c8a67ab77b4c 100644 --- a/postgres/datadog_checks/postgres/metadata.py +++ b/postgres/datadog_checks/postgres/metadata.py @@ -203,7 +203,7 @@ def report_postgres_metadata(self): elapsed_s_schemas = time.time() - self._time_since_last_schemas_query if elapsed_s_schemas >= self.schemas_collection_interval and self._collect_schemas_enabled: - self._collect_schema_info() + metadata = self._collect_schema_info() event = { "host": self._check.resolved_hostname, "agent_version": datadog_agent.get_version(), @@ -216,8 +216,9 @@ def report_postgres_metadata(self): "cloud_metadata": self._config.cloud_metadata, "metadata": metadata, } - self._log.debug("Reporting the following payload: {}".format(event)) - self._check.database_monitoring_metadata(json.dumps(event, default=default_json_event_encoding)) + json_event = json.dumps(event, default=default_json_event_encoding) + self._log.debug("Reporting the following payload: {}".format(json_event)) + self._check.database_monitoring_metadata(json_event) def _payload_pg_version(self): version = self._check.version From 4c27ddacca9deed05ef076a9192c5fa80982e739 Mon Sep 17 00:00:00 2001 From: edengorevoy Date: Wed, 19 Jul 2023 16:59:12 +0000 Subject: [PATCH 71/86] Convert db id from int to str --- postgres/datadog_checks/postgres/metadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/postgres/datadog_checks/postgres/metadata.py b/postgres/datadog_checks/postgres/metadata.py index 9c8a67ab77b4c..74015cf708a47 100644 --- a/postgres/datadog_checks/postgres/metadata.py +++ b/postgres/datadog_checks/postgres/metadata.py @@ -391,7 +391,7 @@ def _collect_metadata_for_database(self, dbname): { "description": database_info['description'], "name": database_info['name'], - "id": database_info['id'], + "id": str(database_info['id']), "encoding": database_info['encoding'], "owner": database_info['owner'], "schemas": [], From 211f825a74b1abac2c23f83b687f5f11cfd26d8d Mon Sep 17 00:00:00 2001 From: edengorevoy Date: Thu, 20 Jul 2023 14:28:15 +0000 Subject: [PATCH 72/86] Correct json objects for indexes, foreign keys --- postgres/datadog_checks/postgres/metadata.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/postgres/datadog_checks/postgres/metadata.py b/postgres/datadog_checks/postgres/metadata.py index 74015cf708a47..02fcf66ccd569 100644 --- a/postgres/datadog_checks/postgres/metadata.py +++ b/postgres/datadog_checks/postgres/metadata.py @@ -213,8 +213,8 @@ def report_postgres_metadata(self): "dbms_version": self._payload_pg_version(), "tags": self._tags_no_db, "timestamp": time.time() * 1000, - "cloud_metadata": self._config.cloud_metadata, "metadata": metadata, + "cloud_metadata": self._config.cloud_metadata, } json_event = json.dumps(event, default=default_json_event_encoding) self._log.debug("Reporting the following payload: {}".format(json_event)) @@ -348,8 +348,7 @@ def _query_table_information_for_schema( if table["hasindexes"]: cursor.execute(PG_INDEXES_QUERY.format(tablename=name)) rows = cursor.fetchall() - indexes = {row[0]: row[1] for row in rows} - this_payload.update({'indexes': indexes}) + this_payload.update({'indexes': rows}) if table['has_partitions']: cursor.execute(PARTITION_KEY_QUERY.format(parent=name)) @@ -369,13 +368,13 @@ def _query_table_information_for_schema( rows = cursor.fetchall() self._log.warning("foreign keys {}".format(rows)) if rows: - this_payload.update({'foreign_keys': {}}) + this_payload.update({'foreign_keys': rows}) # Get columns cursor.execute(COLUMNS_QUERY.format(tablename=name)) rows = cursor.fetchall() self._log.warning(rows) - columns = [dict(row) for row in rows] + # columns = [dict(row) for row in rows] this_payload.update({'columns': columns}) table_payloads.append(this_payload) From 465f214cc0c37a239ab1955528b0a4119c43463b Mon Sep 17 00:00:00 2001 From: edengorevoy Date: Thu, 20 Jul 2023 14:33:00 +0000 Subject: [PATCH 73/86] Correct json objects for indexes, foreign keys --- postgres/datadog_checks/postgres/metadata.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/postgres/datadog_checks/postgres/metadata.py b/postgres/datadog_checks/postgres/metadata.py index 02fcf66ccd569..d64fff553c7ae 100644 --- a/postgres/datadog_checks/postgres/metadata.py +++ b/postgres/datadog_checks/postgres/metadata.py @@ -348,7 +348,8 @@ def _query_table_information_for_schema( if table["hasindexes"]: cursor.execute(PG_INDEXES_QUERY.format(tablename=name)) rows = cursor.fetchall() - this_payload.update({'indexes': rows}) + idxs = [dict(row) for row in rows] + this_payload.update({'indexes': idxs}) if table['has_partitions']: cursor.execute(PARTITION_KEY_QUERY.format(parent=name)) @@ -368,13 +369,14 @@ def _query_table_information_for_schema( rows = cursor.fetchall() self._log.warning("foreign keys {}".format(rows)) if rows: - this_payload.update({'foreign_keys': rows}) + fks = [dict(row) for row in rows] + this_payload.update({'foreign_keys': fks}) # Get columns cursor.execute(COLUMNS_QUERY.format(tablename=name)) rows = cursor.fetchall() self._log.warning(rows) - # columns = [dict(row) for row in rows] + columns = [dict(row) for row in rows] this_payload.update({'columns': columns}) table_payloads.append(this_payload) From 75aaddb3aa3c6aa5840ae57fce0f9ba1359f3715 Mon Sep 17 00:00:00 2001 From: edengorevoy Date: Wed, 2 Aug 2023 22:39:54 +0000 Subject: [PATCH 74/86] Updating queries and toast table getting --- postgres/datadog_checks/postgres/metadata.py | 50 +++++++++++++------- postgres/tests/test_metadata.py | 10 ++-- 2 files changed, 39 insertions(+), 21 deletions(-) diff --git a/postgres/datadog_checks/postgres/metadata.py b/postgres/datadog_checks/postgres/metadata.py index d64fff553c7ae..141bbd59c2396 100644 --- a/postgres/datadog_checks/postgres/metadata.py +++ b/postgres/datadog_checks/postgres/metadata.py @@ -48,21 +48,26 @@ """ PG_TABLES_QUERY = """ -SELECT tablename as name, hasindexes, c.relowner::regrole AS owner, -(CASE WHEN c.relkind = 'p' THEN true ELSE false END) AS has_partitions, -(CASE WHEN pg_relation_size(c.reltoastrelid) > 500000 THEN t.relname ELSE null END) AS toast_table -FROM pg_tables st -LEFT JOIN pg_class c ON relname = tablename -LEFT JOIN pg_class t on c.reltoastrelid = t.oid -WHERE c.relkind IN ('r', 'p') -AND c.relispartition != 't' -AND schemaname = '{schemaname}'; +SELECT c.relname AS name, + c.relhasindex AS hasindexes, + c.relowner :: regrole AS owner, + ( CASE + WHEN c.relkind = 'p' THEN TRUE + ELSE FALSE + END ) AS has_partitions, + t.relname AS toast_table +FROM pg_class c + LEFT JOIN pg_class t + ON c.reltoastrelid = t.oid +WHERE c.relkind IN ( 'r', 'p' ) + AND c.relispartition != 't' + AND c.relnamespace= '{schemaname}'::regnamespace; """ SCHEMA_QUERY = """ - SELECT nspname as name, nspowner::regrole as owner FROM - pg_namespace - WHERE nspname not in ('information_schema', 'pg_catalog') +SELECT nspname as name, nspowner::regrole as owner FROM +pg_namespace +WHERE nspname not in ('information_schema', 'pg_catalog') AND nspname NOT LIKE 'pg_toast%' and nspname NOT LIKE 'pg_temp_%'; """ @@ -72,6 +77,13 @@ WHERE tablename LIKE '{tablename}'; """ +PG_CHECK_FOR_FOREIGN_KEY = """ +SELECT count(conname) +FROM pg_constraint +WHERE contype = 'f' + AND conrelid = '{tablename}' :: regclass; +""" + PG_CONSTRAINTS_QUERY = """ SELECT conname AS name, pg_get_constraintdef(oid) as definition FROM pg_constraint @@ -362,15 +374,17 @@ def _query_table_information_for_schema( this_payload.update({'num_partitions': row['num_partitions']}) if table['toast_table'] is not None: - this_payload.update({'toast_table': row['toast_table']}) + this_payload.update({'toast_table': table['toast_table']}) # Get foreign keys - cursor.execute(PG_CONSTRAINTS_QUERY.format(tablename=table['name'])) + cursor.execute(PG_CHECK_FOR_FOREIGN_KEY.format(tablename=table['name'])) rows = cursor.fetchall() - self._log.warning("foreign keys {}".format(rows)) - if rows: - fks = [dict(row) for row in rows] - this_payload.update({'foreign_keys': fks}) + if len(rows) > 0: + cursor.execute(PG_CONSTRAINTS_QUERY.format(tablename=table['name'])) + self._log.warning("foreign keys {}".format(rows)) + if rows: + fks = [dict(row) for row in rows] + this_payload.update({'foreign_keys': fks}) # Get columns cursor.execute(COLUMNS_QUERY.format(tablename=name)) diff --git a/postgres/tests/test_metadata.py b/postgres/tests/test_metadata.py index da28b8478b1aa..2952b1dbbbf08 100644 --- a/postgres/tests/test_metadata.py +++ b/postgres/tests/test_metadata.py @@ -83,11 +83,15 @@ def test_get_table_info_relations_enabled(integration_check, dbm_instance, aggre assert 'public' == schema_metadata['name'] # check that all expected tables are present - tables_set = {"test_part", 'persons', "personsdup1", "personsdup2", "pgtable", "pg_newtable"} + tables_set = {'persons', "personsdup1", "personsdup2", "pgtable", "pg_newtable"} tables_not_reported_set = {'test_part1', 'test_part2'} - - # TODO if version is 9 or 10, partitions are not in table, check that for table in schema_metadata['tables']: assert tables_set.remove(table['name']) is None assert table['name'] not in tables_not_reported_set + assert tables_set == set() + + # TODO if version isn't 9 or 10, check that partition master is in table + if not (POSTGRES_VERSION.split('.')[0] == 9) and not (POSTGRES_VERSION.split('.')[0] == 10): + assert "test_part" in schema_metadata['tables'] + From 298cf1b298123fc1f1c439a0752fbf23587db88a Mon Sep 17 00:00:00 2001 From: edengorevoy Date: Wed, 2 Aug 2023 22:48:19 +0000 Subject: [PATCH 75/86] Config update --- postgres/assets/configuration/spec.yaml | 7 ++++--- postgres/datadog_checks/postgres/config.py | 5 +++++ .../postgres/data/conf.yaml.example | 21 +++++++++++++++++++ 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/postgres/assets/configuration/spec.yaml b/postgres/assets/configuration/spec.yaml index 919d02e3089f9..9efa538583c77 100644 --- a/postgres/assets/configuration/spec.yaml +++ b/postgres/assets/configuration/spec.yaml @@ -512,7 +512,8 @@ files: value: type: number example: 600 - - name: collect_schemas + + - name: collect_schemas description: | Enable collection of database schemas. In order to collect schemas from all user databases, enable `database_autodiscovery`. To collect from a single database, set `dbname` to collect @@ -520,11 +521,11 @@ files: options: - name: enabled description: | - Enable collection of database schemas. Requires `dbm: true`. + Enable collection of database schemas. Requires `dbm: true` and relation metrics must be enabled. value: type: boolean example: false - - name: enabled + - name: max_tables description: | Maximum amount of tables the agent will collect from the instance. value: diff --git a/postgres/datadog_checks/postgres/config.py b/postgres/datadog_checks/postgres/config.py index a84ff5e65db82..027b5436711bf 100644 --- a/postgres/datadog_checks/postgres/config.py +++ b/postgres/datadog_checks/postgres/config.py @@ -101,6 +101,11 @@ def __init__(self, instance): self.statement_samples_config = instance.get('query_samples', instance.get('statement_samples', {})) or {} self.settings_metadata_config = instance.get('collect_settings', {}) or {} self.schemas_metadata_config = instance.get('collect_schemas', {}) or {} + if not self.relations and self.schemas_metadata_config['enabled']: + raise ConfigurationError( + 'In order to collect schemas on this database, you must enable relation metrics collection.' + ) + self.resources_metadata_config = instance.get('collect_resources', {}) or {} self.statement_activity_config = instance.get('query_activity', {}) or {} self.statement_metrics_config = instance.get('query_metrics', {}) or {} diff --git a/postgres/datadog_checks/postgres/data/conf.yaml.example b/postgres/datadog_checks/postgres/data/conf.yaml.example index c43a6360e64b1..0b817b0790df2 100644 --- a/postgres/datadog_checks/postgres/data/conf.yaml.example +++ b/postgres/datadog_checks/postgres/data/conf.yaml.example @@ -406,6 +406,27 @@ instances: # # collection_interval: 600 + ## Enable collection of database schemas. In order to collect schemas from all user databases, + ## enable `database_autodiscovery`. To collect from a single database, set `dbname` to collect + ## the schema for that database. + # + # collect_schemas: + + ## @param enabled - boolean - optional - default: false + ## Enable collection of database schemas. Requires `dbm: true` and relation metrics must be enabled. + # + # enabled: false + + ## @param max_tables - number - optional - default: 1000 + ## Maximum amount of tables the agent will collect from the instance. + # + # max_tables: 1000 + + ## @param collection_interval - number - optional - default: 600 + ## The database schema collection interval (in seconds). + # + # collection_interval: 600 + ## This block defines the configuration for AWS RDS and Aurora instances. ## ## Complete this section if you have installed the Datadog AWS Integration From 679f0aead25a532f3acf03b007280ca549c2952f Mon Sep 17 00:00:00 2001 From: edengorevoy Date: Thu, 3 Aug 2023 19:58:05 +0000 Subject: [PATCH 76/86] Updating to include OIDs in return, and don't calculate fk for tables that don't have them --- postgres/assets/configuration/spec.yaml | 1 + postgres/datadog_checks/postgres/config.py | 2 +- .../postgres/data/conf.yaml.example | 1 + postgres/datadog_checks/postgres/metadata.py | 32 ++++++++++----- .../tests/compose/resources/03_load_data.sh | 6 ++- postgres/tests/test_metadata.py | 41 +++++++++++++------ 6 files changed, 57 insertions(+), 26 deletions(-) diff --git a/postgres/assets/configuration/spec.yaml b/postgres/assets/configuration/spec.yaml index 9efa538583c77..86f0abbdebad1 100644 --- a/postgres/assets/configuration/spec.yaml +++ b/postgres/assets/configuration/spec.yaml @@ -518,6 +518,7 @@ files: Enable collection of database schemas. In order to collect schemas from all user databases, enable `database_autodiscovery`. To collect from a single database, set `dbname` to collect the schema for that database. + Relation metrics must be enabled for schema collection. options: - name: enabled description: | diff --git a/postgres/datadog_checks/postgres/config.py b/postgres/datadog_checks/postgres/config.py index 027b5436711bf..af485c90d607e 100644 --- a/postgres/datadog_checks/postgres/config.py +++ b/postgres/datadog_checks/postgres/config.py @@ -100,7 +100,7 @@ def __init__(self, instance): self.pg_stat_activity_view = instance.get('pg_stat_activity_view', 'pg_stat_activity') self.statement_samples_config = instance.get('query_samples', instance.get('statement_samples', {})) or {} self.settings_metadata_config = instance.get('collect_settings', {}) or {} - self.schemas_metadata_config = instance.get('collect_schemas', {}) or {} + self.schemas_metadata_config = instance.get('collect_schemas', {"enabled": False}) if not self.relations and self.schemas_metadata_config['enabled']: raise ConfigurationError( 'In order to collect schemas on this database, you must enable relation metrics collection.' diff --git a/postgres/datadog_checks/postgres/data/conf.yaml.example b/postgres/datadog_checks/postgres/data/conf.yaml.example index 0b817b0790df2..8f3415f35fab1 100644 --- a/postgres/datadog_checks/postgres/data/conf.yaml.example +++ b/postgres/datadog_checks/postgres/data/conf.yaml.example @@ -409,6 +409,7 @@ instances: ## Enable collection of database schemas. In order to collect schemas from all user databases, ## enable `database_autodiscovery`. To collect from a single database, set `dbname` to collect ## the schema for that database. + ## Relation metrics must be enabled for schema collection. # # collect_schemas: diff --git a/postgres/datadog_checks/postgres/metadata.py b/postgres/datadog_checks/postgres/metadata.py index 141bbd59c2396..b83be154a4d88 100644 --- a/postgres/datadog_checks/postgres/metadata.py +++ b/postgres/datadog_checks/postgres/metadata.py @@ -48,7 +48,8 @@ """ PG_TABLES_QUERY = """ -SELECT c.relname AS name, +SELECT c.oid as id, + c.relname AS name, c.relhasindex AS hasindexes, c.relowner :: regrole AS owner, ( CASE @@ -65,7 +66,7 @@ """ SCHEMA_QUERY = """ -SELECT nspname as name, nspowner::regrole as owner FROM +SELECT oid as id, nspname as name, nspowner::regrole as owner FROM pg_namespace WHERE nspname not in ('information_schema', 'pg_catalog') AND nspname NOT LIKE 'pg_toast%' and nspname NOT LIKE 'pg_temp_%'; @@ -263,7 +264,7 @@ def _query_database_information( Collect database info. Returns description: str name: str - id: int + id: str encoding: str owner: str """ @@ -275,12 +276,17 @@ def _query_database_information( def _query_schema_information(self, cursor: psycopg2.extensions.cursor, dbname: str) -> Dict[str, str]: """ Collect user schemas. Returns + id: str name: str owner: str """ cursor.execute(SCHEMA_QUERY) rows = cursor.fetchall() - schemas = [dict(row) for row in rows] + schemas = [] + for row in rows: + schemas.append({"id": str(row['id']), + "name": row['name'], + "owner": row['owner']}) return schemas def _get_table_info(self, cursor, dbname, schemaname, limit): @@ -298,8 +304,11 @@ def _get_table_info(self, cursor, dbname, schemaname, limit): return self._sort_and_limit_table_info(cursor, dbname, table_info, limit) else: - raise NotImplementedError() - # table_info = cursor.execute(PG_STAT_TABLES_QUERY.format(schemaname=schemaname)) + # Config error should catch the case where schema collection is enabled + # and relation metrics aren't, but adding a warning here just in case + self._check.log.warning( + "Relation metrics are not configured for {}, so tables cannot be collected".format(dbname) + ) def _sort_and_limit_table_info( self, cursor, dbname, table_info: List[Dict[str, Union[str, bool]]], limit: int @@ -332,6 +341,7 @@ def _query_table_information_for_schema( """ Collect table information per schema. Returns a list of dictionaries with key/values: + "id": str "name": str "owner": str "foreign_keys": dict (if has foreign keys) @@ -345,7 +355,7 @@ def _query_table_information_for_schema( data_type: str default: str nullable: bool - "toast_table": str (if associated toast table is > 500kb) + "toast_table": str (if associated toast table exists) "partition_key": str (if has partitions) "num_partitions": int (if has partitions) """ @@ -355,7 +365,8 @@ def _query_table_information_for_schema( for table in tables_info: this_payload = {} name = table['name'] - self._log.warning("Parsing table {}".format(name)) + self._log.debug("Parsing table {}".format(name)) + this_payload.update({'id': str(table['id'])}) this_payload.update({'name': name}) if table["hasindexes"]: cursor.execute(PG_INDEXES_QUERY.format(tablename=name)) @@ -378,9 +389,10 @@ def _query_table_information_for_schema( # Get foreign keys cursor.execute(PG_CHECK_FOR_FOREIGN_KEY.format(tablename=table['name'])) - rows = cursor.fetchall() - if len(rows) > 0: + row = cursor.fetchone() + if row['count'] > 0: cursor.execute(PG_CONSTRAINTS_QUERY.format(tablename=table['name'])) + rows = cursor.fetchall() self._log.warning("foreign keys {}".format(rows)) if rows: fks = [dict(row) for row in rows] diff --git a/postgres/tests/compose/resources/03_load_data.sh b/postgres/tests/compose/resources/03_load_data.sh index 489d25d942430..c7eb5f8786b2f 100755 --- a/postgres/tests/compose/resources/03_load_data.sh +++ b/postgres/tests/compose/resources/03_load_data.sh @@ -1,8 +1,10 @@ #!/bin/bash set -e -psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" datadog_test <<-EOSQL - CREATE TABLE persons (personid SERIAL, lastname VARCHAR(255), firstname VARCHAR(255), address VARCHAR(255), city VARCHAR(255) DEFAULT 'New York'); +psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" datadog_test <<-EOSQL + CREATE TABLE cities (city VARCHAR(255), country VARCHAR(255), PRIMARY KEY(city)); + INSERT INTO cities VALUES ('New York', 'USA'), ('Beautiful city of lights', 'France'); + CREATE TABLE persons (personid SERIAL, lastname VARCHAR(255), firstname VARCHAR(255), address VARCHAR(255), city VARCHAR(255) DEFAULT 'New York', CONSTRAINT fk_city FOREIGN KEY (city) REFERENCES cities(city)); INSERT INTO persons (lastname, firstname, address, city) VALUES ('Cavaille', 'Leo', 'Midtown', 'New York'), ('Someveryveryveryveryveryveryveryveryveryverylongname', 'something', 'Avenue des Champs Elysees', 'Beautiful city of lights'); CREATE TABLE personsdup1 (personid SERIAL, lastname VARCHAR(255), firstname VARCHAR(255), address VARCHAR(255), city VARCHAR(255)); INSERT INTO personsdup1 (lastname, firstname, address, city) VALUES ('Cavaille', 'Leo', 'Midtown', 'New York'), ('Someveryveryveryveryveryveryveryveryveryverylongname', 'something', 'Avenue des Champs Elysees', 'Beautiful city of lights'); diff --git a/postgres/tests/test_metadata.py b/postgres/tests/test_metadata.py index 2952b1dbbbf08..f6c005ad16e76 100644 --- a/postgres/tests/test_metadata.py +++ b/postgres/tests/test_metadata.py @@ -8,6 +8,7 @@ from datadog_checks.base.utils.db.utils import DBMAsyncJob from .utils import run_one_check +from .common import POSTGRES_VERSION pytestmark = [pytest.mark.integration, pytest.mark.usefixtures('dd_environment')] @@ -77,21 +78,35 @@ def test_get_table_info_relations_enabled(integration_check, dbm_instance, aggre schema_event = event # there should only be one database, datadog_test - database_metadata = schema_event['metadata'][0] - assert 'datadog_test' == database_metadata['name'] - schema_metadata = database_metadata['schemas'][0] - assert 'public' == schema_metadata['name'] + database_metadata = schema_event['metadata'] + assert len(database_metadata) == 1 + assert 'datadog_test' == database_metadata[0]['name'] + + # there should only two schemas, 'public' and 'datadog'. datadog is empty + schema_metadata_public = database_metadata[0]['schemas'][0] + schema_metadata_datadog = database_metadata[0]['schemas'][1] + assert 'public' == schema_metadata_public['name'] + assert 'datadog' == schema_metadata_datadog['name'] # check that all expected tables are present - tables_set = {'persons', "personsdup1", "personsdup2", "pgtable", "pg_newtable"} + tables_set = {'persons', "personsdup1", "personsdup2", "pgtable", "pg_newtable", "cities"} + # if version isn't 9 or 10, check that partition master is in tables + if not (POSTGRES_VERSION.split('.')[0] == 9) and not (POSTGRES_VERSION.split('.')[0] == 10): + tables_set.update({'test_part'}) tables_not_reported_set = {'test_part1', 'test_part2'} - for table in schema_metadata['tables']: - assert tables_set.remove(table['name']) is None - assert table['name'] not in tables_not_reported_set - - assert tables_set == set() - # TODO if version isn't 9 or 10, check that partition master is in table - if not (POSTGRES_VERSION.split('.')[0] == 9) and not (POSTGRES_VERSION.split('.')[0] == 10): - assert "test_part" in schema_metadata['tables'] + tables_got = [] + for table in schema_metadata_public['tables']: + tables_got.append(table['name']) + + + for table in tables_got: + assert table in tables_set + assert table not in tables_not_reported_set + + assert None is not None + + + + From 727ea75eaedbc2f932c5d75b421df13607c0b4b4 Mon Sep 17 00:00:00 2001 From: edengorevoy Date: Thu, 3 Aug 2023 20:24:29 +0000 Subject: [PATCH 77/86] Update some comments --- postgres/datadog_checks/postgres/metadata.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/postgres/datadog_checks/postgres/metadata.py b/postgres/datadog_checks/postgres/metadata.py index b83be154a4d88..38c20632f4ff0 100644 --- a/postgres/datadog_checks/postgres/metadata.py +++ b/postgres/datadog_checks/postgres/metadata.py @@ -291,11 +291,10 @@ def _query_schema_information(self, cursor: psycopg2.extensions.cursor, dbname: def _get_table_info(self, cursor, dbname, schemaname, limit): """ - If relation metrics is enabled, sort tables by the number of total accesses (index_rel_scans + seq_scans). - If they are not enabled, the table list will be retrieved from pg_stat_all_tables and sorted in the query. + Tables will be sorted by the number of total accesses (index_rel_scans + seq_scans) and truncated to + the max_tables limit. - If any tables are partitioned, the partitioned table will be returned and not counted against the limit. - However, partitions of the table are counted against the limit. + If any tables are partitioned, only the master paritition table name will be returned, and none of its children. """ if self._config.relations: cursor.execute(PG_TABLES_QUERY.format(schemaname=schemaname)) @@ -329,9 +328,6 @@ def sort_tables(info): return row['total_activity'] # if relation metrics are enabled, sorted based on last activity information - table_metrics_cache = self._check.metrics_cache.table_activity_metrics - self._log.warning(table_metrics_cache) - table_info = sorted(table_info, key=sort_tables, reverse=True) return table_info[:limit] From e6d3e88beba128ed1a24f22e44078fd73c12b294 Mon Sep 17 00:00:00 2001 From: edengorevoy Date: Thu, 3 Aug 2023 20:49:12 +0000 Subject: [PATCH 78/86] Make test mor specific --- postgres/datadog_checks/postgres/metadata.py | 29 ++-------- postgres/tests/test_metadata.py | 61 +++++++++----------- 2 files changed, 31 insertions(+), 59 deletions(-) diff --git a/postgres/datadog_checks/postgres/metadata.py b/postgres/datadog_checks/postgres/metadata.py index 38c20632f4ff0..3cce218d3006d 100644 --- a/postgres/datadog_checks/postgres/metadata.py +++ b/postgres/datadog_checks/postgres/metadata.py @@ -33,20 +33,6 @@ WHERE datname LIKE '{dbname}'; """ - -PG_STAT_TABLES_QUERY = """ -SELECT st.relname as name,seq_scan,idx_scan,c.relhasindex as hasindexes,c.relowner::regrole as owner, -(CASE WHEN c.relkind = 'p' THEN true ELSE false END) AS has_partitions, -(CASE WHEN pg_relation_size(c.reltoastrelid) > 500000 THEN t.relname ELSE null END) AS toast_table -FROM pg_class c -LEFT JOIN pg_stat_all_tables st ON c.relname = st.relname -LEFT JOIN pg_class t on c.reltoastrelid = t.oid -WHERE schemaname = '{schemaname}' -AND c.relkind IN ('r', 'p') -AND c.relispartition != 't' -ORDER BY coalesce(seq_scan, 0) + coalesce(idx_scan, 0) DESC; -""" - PG_TABLES_QUERY = """ SELECT c.oid as id, c.relname AS name, @@ -56,13 +42,13 @@ WHEN c.relkind = 'p' THEN TRUE ELSE FALSE END ) AS has_partitions, - t.relname AS toast_table +t.relname AS toast_table FROM pg_class c LEFT JOIN pg_class t ON c.reltoastrelid = t.oid WHERE c.relkind IN ( 'r', 'p' ) AND c.relispartition != 't' - AND c.relnamespace= '{schemaname}'::regnamespace; + AND c.relnamespace= '{schemaname}'::regnamespace; """ SCHEMA_QUERY = """ @@ -252,10 +238,9 @@ def _collect_schema_info(self): metadata = [] for database in databases: metadata.append(self._collect_metadata_for_database(database)) - + self._time_since_last_schemas_query = time.time() return metadata - def _query_database_information( self, cursor: psycopg2.extensions.cursor, dbname: str @@ -284,9 +269,7 @@ def _query_schema_information(self, cursor: psycopg2.extensions.cursor, dbname: rows = cursor.fetchall() schemas = [] for row in rows: - schemas.append({"id": str(row['id']), - "name": row['name'], - "owner": row['owner']}) + schemas.append({"id": str(row['id']), "name": row['name'], "owner": row['owner']}) return schemas def _get_table_info(self, cursor, dbname, schemaname, limit): @@ -305,9 +288,7 @@ def _get_table_info(self, cursor, dbname, schemaname, limit): else: # Config error should catch the case where schema collection is enabled # and relation metrics aren't, but adding a warning here just in case - self._check.log.warning( - "Relation metrics are not configured for {}, so tables cannot be collected".format(dbname) - ) + self._check.log.warning("Relation metrics are not configured for {dbname}, so tables cannot be collected") def _sort_and_limit_table_info( self, cursor, dbname, table_info: List[Dict[str, Union[str, bool]]], limit: int diff --git a/postgres/tests/test_metadata.py b/postgres/tests/test_metadata.py index f6c005ad16e76..c3a756a2b1823 100644 --- a/postgres/tests/test_metadata.py +++ b/postgres/tests/test_metadata.py @@ -2,13 +2,13 @@ # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) from concurrent.futures.thread import ThreadPoolExecutor - +from typing import List import pytest from datadog_checks.base.utils.db.utils import DBMAsyncJob -from .utils import run_one_check from .common import POSTGRES_VERSION +from .utils import run_one_check pytestmark = [pytest.mark.integration, pytest.mark.usefixtures('dd_environment')] @@ -43,28 +43,7 @@ def test_collect_metadata(integration_check, dbm_instance, aggregator): assert len(event["metadata"]) > 0 -@pytest.mark.integration -@pytest.mark.usefixtures('dd_environment') def test_collect_schemas(integration_check, dbm_instance, aggregator): - dbm_instance["collect_schemas"] = {'enabled': True, 'collection_interval': 0.5} - dbm_instance['relations'] = [{'relation_regex': ".*"}] - dbm_instance["database_autodiscovery"] = {"enabled": True, "include": ["datadog"]} - del dbm_instance['dbname'] - check = integration_check(dbm_instance) - run_one_check(check, dbm_instance) - dbm_metadata = aggregator.get_event_platform_events("dbm-metadata") - schema_event = None - for event in dbm_metadata: - if event['kind'] == "pg_databases": - schema_event = event - assert schema_event is not None - assert schema_event['host'] == "stubbed.hostname" - assert schema_event['dbms'] == "postgres" - assert schema_event['kind'] == "pg_databases" - assert len(event["metadata"]) > 0 - - -def test_get_table_info_relations_enabled(integration_check, dbm_instance, aggregator): dbm_instance["collect_schemas"] = {'enabled': True, 'collection_interval': 0.5} dbm_instance['relations'] = [{'relation_regex': ".*"}] dbm_instance["database_autodiscovery"] = {"enabled": True, "include": ["datadog"]} @@ -91,7 +70,7 @@ def test_get_table_info_relations_enabled(integration_check, dbm_instance, aggre # check that all expected tables are present tables_set = {'persons', "personsdup1", "personsdup2", "pgtable", "pg_newtable", "cities"} # if version isn't 9 or 10, check that partition master is in tables - if not (POSTGRES_VERSION.split('.')[0] == 9) and not (POSTGRES_VERSION.split('.')[0] == 10): + if not (POSTGRES_VERSION.split('.')[0] == 9) and not (POSTGRES_VERSION.split('.')[0] == 10): tables_set.update({'test_part'}) tables_not_reported_set = {'test_part1', 'test_part2'} @@ -99,14 +78,26 @@ def test_get_table_info_relations_enabled(integration_check, dbm_instance, aggre for table in schema_metadata_public['tables']: tables_got.append(table['name']) - - for table in tables_got: - assert table in tables_set - assert table not in tables_not_reported_set - - assert None is not None - - - - - + # make some assertions on fields + if table['name'] == "persons": + # check that foreign keys, indexes get reported + keys = list(table.keys()) + assert_fields(keys, ["foreign_keys", "columns", "toast_table", "id","name"]) + assert_fields(list(table['foreign_keys'][0].keys()), ['name', 'definition']) + assert_fields(list(table['columns'][0].keys()), ['name', 'nullable', 'data_type', 'default',]) + if table['name'] == "cities": + keys = list(table.keys()) + assert_fields(keys, ["indexes", "columns", "toast_table", "id", "name"]) + assert_fields(list(table['indexes'][0].keys()), ['name', 'definition']) + + assert_fields(tables_got, tables_set) + assert_not_fields(tables_got, tables_not_reported_set) + + +def assert_fields(keys: List[str], fields: List[str]): + for field in fields: + assert field in keys + +def assert_not_fields(keys: List[str], fields: List[str]): + for field in fields: + assert field not in keys \ No newline at end of file From a19b9a60f625640a82ec7d8416ab25cd5b740f9e Mon Sep 17 00:00:00 2001 From: edengorevoy Date: Thu, 3 Aug 2023 20:53:49 +0000 Subject: [PATCH 79/86] Update config to limit columns --- postgres/assets/configuration/spec.yaml | 7 +++++++ postgres/datadog_checks/postgres/data/conf.yaml.example | 5 +++++ postgres/datadog_checks/postgres/metadata.py | 6 +++--- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/postgres/assets/configuration/spec.yaml b/postgres/assets/configuration/spec.yaml index 86f0abbdebad1..a3fd166ca466c 100644 --- a/postgres/assets/configuration/spec.yaml +++ b/postgres/assets/configuration/spec.yaml @@ -533,6 +533,13 @@ files: type: number example: 1000 display_default: 1000 + - name: max_columns + description: | + Maximum amount of columns the agent will collect per table. + value: + type: number + example: 50 + display_default: 50 - name: collection_interval description: | The database schema collection interval (in seconds). diff --git a/postgres/datadog_checks/postgres/data/conf.yaml.example b/postgres/datadog_checks/postgres/data/conf.yaml.example index 8f3415f35fab1..ce4a5f896f4d3 100644 --- a/postgres/datadog_checks/postgres/data/conf.yaml.example +++ b/postgres/datadog_checks/postgres/data/conf.yaml.example @@ -423,6 +423,11 @@ instances: # # max_tables: 1000 + ## @param max_columns - number - optional - default: 50 + ## Maximum amount of columns the agent will collect per table. + # + # max_columns: 50 + ## @param collection_interval - number - optional - default: 600 ## The database schema collection interval (in seconds). # diff --git a/postgres/datadog_checks/postgres/metadata.py b/postgres/datadog_checks/postgres/metadata.py index 3cce218d3006d..1d8eee24075cb 100644 --- a/postgres/datadog_checks/postgres/metadata.py +++ b/postgres/datadog_checks/postgres/metadata.py @@ -377,9 +377,9 @@ def _query_table_information_for_schema( # Get columns cursor.execute(COLUMNS_QUERY.format(tablename=name)) - rows = cursor.fetchall() - self._log.warning(rows) - columns = [dict(row) for row in rows] + rows = cursor.fetchall()[:] + max_columns = self._config.schemas_metadata_config.get('max_columns', 50) + columns = [dict(row) for row in rows][:max_columns] this_payload.update({'columns': columns}) table_payloads.append(this_payload) From 4522c44fadef0b5c7ae4e729cb796af009f70bcd Mon Sep 17 00:00:00 2001 From: edengorevoy Date: Thu, 3 Aug 2023 20:58:15 +0000 Subject: [PATCH 80/86] Formatting --- postgres/datadog_checks/postgres/metadata.py | 101 +++++++++++-------- postgres/tests/test_metadata.py | 16 ++- 2 files changed, 72 insertions(+), 45 deletions(-) diff --git a/postgres/datadog_checks/postgres/metadata.py b/postgres/datadog_checks/postgres/metadata.py index 1d8eee24075cb..b2be910546945 100644 --- a/postgres/datadog_checks/postgres/metadata.py +++ b/postgres/datadog_checks/postgres/metadata.py @@ -26,89 +26,106 @@ """ DATABASE_INFORMATION_QUERY = """ -SELECT db.oid as id, datname as name, pg_encoding_to_char(encoding) as encoding, rolname as owner, description - FROM pg_catalog.pg_database db - LEFT JOIN pg_catalog.pg_description dc ON dc.objoid = db.oid - JOIN pg_roles a on datdba = a.oid - WHERE datname LIKE '{dbname}'; +SELECT db.oid AS id, + datname AS NAME, + pg_encoding_to_char(encoding) AS encoding, + rolname AS owner, + description +FROM pg_catalog.pg_database db + LEFT JOIN pg_catalog.pg_description dc + ON dc.objoid = db.oid + JOIN pg_roles a + ON datdba = a.oid +WHERE datname LIKE '{dbname}'; """ PG_TABLES_QUERY = """ -SELECT c.oid as id, - c.relname AS name, +SELECT c.oid AS id, + c.relname AS name, c.relhasindex AS hasindexes, c.relowner :: regrole AS owner, ( CASE WHEN c.relkind = 'p' THEN TRUE ELSE FALSE END ) AS has_partitions, -t.relname AS toast_table -FROM pg_class c - LEFT JOIN pg_class t + t.relname AS toast_table +FROM pg_class c + left join pg_class t ON c.reltoastrelid = t.oid WHERE c.relkind IN ( 'r', 'p' ) AND c.relispartition != 't' - AND c.relnamespace= '{schemaname}'::regnamespace; + AND c.relnamespace = '{schemaname}' :: regnamespace; """ SCHEMA_QUERY = """ -SELECT oid as id, nspname as name, nspowner::regrole as owner FROM -pg_namespace -WHERE nspname not in ('information_schema', 'pg_catalog') - AND nspname NOT LIKE 'pg_toast%' and nspname NOT LIKE 'pg_temp_%'; +SELECT oid AS id, + nspname AS name, + nspowner :: regrole AS owner +FROM pg_namespace +WHERE nspname NOT IN ( 'information_schema', 'pg_catalog' ) + AND nspname NOT LIKE 'pg_toast%' + AND nspname NOT LIKE 'pg_temp_%'; """ PG_INDEXES_QUERY = """ -SELECT indexname as name, indexdef as definition -FROM pg_indexes -WHERE tablename LIKE '{tablename}'; +SELECT indexname AS NAME, + indexdef AS definition +FROM pg_indexes +WHERE tablename LIKE '{tablename}'; """ PG_CHECK_FOR_FOREIGN_KEY = """ SELECT count(conname) FROM pg_constraint WHERE contype = 'f' - AND conrelid = '{tablename}' :: regclass; + AND conrelid = '{tablename}' :: regclass; """ PG_CONSTRAINTS_QUERY = """ -SELECT conname AS name, pg_get_constraintdef(oid) as definition +SELECT conname AS name, + pg_get_constraintdef(oid) AS definition FROM pg_constraint WHERE contype = 'f' -AND conrelid = -'{tablename}'::regclass; + AND conrelid = '{tablename}' :: regclass; """ COLUMNS_QUERY = """ -SELECT attname as name, -format_type(atttypid, atttypmod) AS data_type, -NOT attnotnull as nullable, pg_get_expr(adbin, adrelid) as default -FROM pg_attribute LEFT JOIN pg_attrdef ad ON adrelid=attrelid AND adnum=attnum -WHERE attrelid = '{tablename}'::regclass -AND attnum > 0 -AND NOT attisdropped; +SELECT attname AS name, + Format_type(atttypid, atttypmod) AS data_type, + NOT attnotnull AS nullable, + pg_get_expr(adbin, adrelid) AS default +FROM pg_attribute + LEFT JOIN pg_attrdef ad + ON adrelid = attrelid + AND adnum = attnum +WHERE attrelid = '{tablename}' :: regclass + AND attnum > 0 + AND NOT attisdropped; """ PARTITION_KEY_QUERY = """ - SELECT relname, pg_get_partkeydef(oid) as partition_key -FROM pg_class WHERE '{parent}' = relname; +SELECT relname, + pg_get_partkeydef(oid) AS partition_key +FROM pg_class +WHERE '{parent}' = relname; """ NUM_PARTITIONS_QUERY = """ -SELECT count(inhrelid::regclass) as num_partitions - FROM pg_inherits - WHERE inhparent = '{parent}'::regclass::oid +SELECT count(inhrelid :: regclass) AS num_partitions +FROM pg_inherits +WHERE inhparent = '{parent}' :: regclass :: oid """ PARTITION_ACTIVITY_QUERY = """ -SELECT - pi.inhparent::regclass AS parent_table_name, - SUM(psu.seq_scan + psu.idx_scan) AS total_activity -FROM pg_catalog.pg_stat_user_tables psu - JOIN pg_class pc ON psu.relname = pc.relname - JOIN pg_inherits pi ON pi.inhrelid = pc.oid -WHERE pi.inhparent = '{parent}'::regclass::oid -GROUP BY pi.inhparent; +SELECT pi.inhparent :: regclass AS parent_table_name, + SUM(psu.seq_scan + psu.idx_scan) AS total_activity +FROM pg_catalog.pg_stat_user_tables psu + join pg_class pc + ON psu.relname = pc.relname + join pg_inherits pi + ON pi.inhrelid = pc.oid +WHERE pi.inhparent = '{parent}' :: regclass :: oid +GROUP BY pi.inhparent; """ diff --git a/postgres/tests/test_metadata.py b/postgres/tests/test_metadata.py index c3a756a2b1823..8758791559502 100644 --- a/postgres/tests/test_metadata.py +++ b/postgres/tests/test_metadata.py @@ -3,6 +3,7 @@ # Licensed under a 3-clause BSD style license (see LICENSE) from concurrent.futures.thread import ThreadPoolExecutor from typing import List + import pytest from datadog_checks.base.utils.db.utils import DBMAsyncJob @@ -82,9 +83,17 @@ def test_collect_schemas(integration_check, dbm_instance, aggregator): if table['name'] == "persons": # check that foreign keys, indexes get reported keys = list(table.keys()) - assert_fields(keys, ["foreign_keys", "columns", "toast_table", "id","name"]) + assert_fields(keys, ["foreign_keys", "columns", "toast_table", "id", "name"]) assert_fields(list(table['foreign_keys'][0].keys()), ['name', 'definition']) - assert_fields(list(table['columns'][0].keys()), ['name', 'nullable', 'data_type', 'default',]) + assert_fields( + list(table['columns'][0].keys()), + [ + 'name', + 'nullable', + 'data_type', + 'default', + ], + ) if table['name'] == "cities": keys = list(table.keys()) assert_fields(keys, ["indexes", "columns", "toast_table", "id", "name"]) @@ -98,6 +107,7 @@ def assert_fields(keys: List[str], fields: List[str]): for field in fields: assert field in keys + def assert_not_fields(keys: List[str], fields: List[str]): for field in fields: - assert field not in keys \ No newline at end of file + assert field not in keys From 65d28f8eff96739f455ab7707c1a04c254d6b40c Mon Sep 17 00:00:00 2001 From: edengorevoy Date: Thu, 3 Aug 2023 21:03:01 +0000 Subject: [PATCH 81/86] Remove excess warnings and prints --- postgres/datadog_checks/postgres/metadata.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/postgres/datadog_checks/postgres/metadata.py b/postgres/datadog_checks/postgres/metadata.py index b2be910546945..9eef1c650e1de 100644 --- a/postgres/datadog_checks/postgres/metadata.py +++ b/postgres/datadog_checks/postgres/metadata.py @@ -233,7 +233,7 @@ def report_postgres_metadata(self): "cloud_metadata": self._config.cloud_metadata, } json_event = json.dumps(event, default=default_json_event_encoding) - self._log.debug("Reporting the following payload: {}".format(json_event)) + self._log.debug("Reporting the following payload for schema collection: {}".format(json_event)) self._check.database_monitoring_metadata(json_event) def _payload_pg_version(self): @@ -272,7 +272,6 @@ def _query_database_information( """ cursor.execute(DATABASE_INFORMATION_QUERY.format(dbname=dbname)) row = cursor.fetchone() - print(row) return row def _query_schema_information(self, cursor: psycopg2.extensions.cursor, dbname: str) -> Dict[str, str]: @@ -353,13 +352,12 @@ def _query_table_information_for_schema( "partition_key": str (if has partitions) "num_partitions": int (if has partitions) """ + max_tables = self._config.schemas_metadata_config.get('max_tables', 1000) tables_info = self._get_table_info(cursor, dbname, schemaname, 1000) - self._log.warning(tables_info) table_payloads = [] for table in tables_info: this_payload = {} name = table['name'] - self._log.debug("Parsing table {}".format(name)) this_payload.update({'id': str(table['id'])}) this_payload.update({'name': name}) if table["hasindexes"]: @@ -371,7 +369,6 @@ def _query_table_information_for_schema( if table['has_partitions']: cursor.execute(PARTITION_KEY_QUERY.format(parent=name)) row = cursor.fetchone() - self._log.warning(row) this_payload.update({'partition_key': row['partition_key']}) cursor.execute(NUM_PARTITIONS_QUERY.format(parent=name)) @@ -387,7 +384,6 @@ def _query_table_information_for_schema( if row['count'] > 0: cursor.execute(PG_CONSTRAINTS_QUERY.format(tablename=table['name'])) rows = cursor.fetchall() - self._log.warning("foreign keys {}".format(rows)) if rows: fks = [dict(row) for row in rows] this_payload.update({'foreign_keys': fks}) @@ -418,12 +414,9 @@ def _collect_metadata_for_database(self, dbname): "schemas": [], } ) - self._log.warning(database_info) schema_info = self._query_schema_information(cursor, dbname) - self._log.warning(schema_info) for schema in schema_info: tables_info = self._query_table_information_for_schema(cursor, schema['name'], dbname) - self._log.warning(tables_info) metadata['schemas'].append( {"name": schema['name'], "owner": schema['owner'], "tables": tables_info} ) From 4dbf0efac18a388af3a2a14a2311d8d715452344 Mon Sep 17 00:00:00 2001 From: edengorevoy Date: Thu, 3 Aug 2023 21:10:49 +0000 Subject: [PATCH 82/86] Fix schema id propagation --- postgres/datadog_checks/postgres/metadata.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/postgres/datadog_checks/postgres/metadata.py b/postgres/datadog_checks/postgres/metadata.py index fd91b583b4e4e..c6cfdc74c779f 100644 --- a/postgres/datadog_checks/postgres/metadata.py +++ b/postgres/datadog_checks/postgres/metadata.py @@ -352,7 +352,7 @@ def _query_table_information_for_schema( "partition_key": str (if has partitions) "num_partitions": int (if has partitions) """ - max_tables = self._config.schemas_metadata_config.get('max_tables', 1000) + self._config.schemas_metadata_config.get('max_tables', 1000) tables_info = self._get_table_info(cursor, dbname, schemaname, 1000) table_payloads = [] for table in tables_info: @@ -417,9 +417,8 @@ def _collect_metadata_for_database(self, dbname): schema_info = self._query_schema_information(cursor, dbname) for schema in schema_info: tables_info = self._query_table_information_for_schema(cursor, schema['name'], dbname) - metadata['schemas'].append( - {"name": schema['name'], "owner": schema['owner'], "tables": tables_info} - ) + schema.update({"tables": tables_info}) + metadata['schemas'].append(schema) return metadata From 51c34fd96a1d6fe334a355b5c526e622b46f48bd Mon Sep 17 00:00:00 2001 From: edengorevoy Date: Thu, 3 Aug 2023 21:14:17 +0000 Subject: [PATCH 83/86] Clean up caching --- postgres/datadog_checks/postgres/postgres.py | 29 +++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/postgres/datadog_checks/postgres/postgres.py b/postgres/datadog_checks/postgres/postgres.py index 252a0110eb398..968019672c403 100644 --- a/postgres/datadog_checks/postgres/postgres.py +++ b/postgres/datadog_checks/postgres/postgres.py @@ -498,24 +498,27 @@ def _query_scope(self, cursor, scope, instance_tags, is_custom_metrics, dbname=N name, submit_metric = scope['metrics'][column] submit_metric(self, name, value, tags=set(tags), hostname=self.resolved_hostname) - # TODO: if relation-level metrics idx_scan or seq_scan, cache it - if name in ('postgresql.index_scans', 'postgresql.seq_scans'): - db = dbname if self.autodiscovery else self._config.dbname - tablename = desc_map['table'] - if db not in self.metrics_cache.table_activity_metrics.keys(): - self.metrics_cache.table_activity_metrics[db] = {} - if tablename not in self.metrics_cache.table_activity_metrics[db].keys(): - self.metrics_cache.table_activity_metrics[db][tablename] = { - 'postgresql.index_scans': 0, - 'postgresql.seq_scans': 0, - } - - self.metrics_cache.table_activity_metrics[db][tablename][name] = value + self._cache_table_activity(dbname, desc_map['table'], name, value) num_results += 1 return num_results + def _cache_table_activity(self, dbname: str, tablename: str, metric_name: str, value: int, ): + # if relation-level metrics idx_scan or seq_scan, cache it + if metric_name in ('postgresql.index_scans', 'postgresql.seq_scans'): + db = dbname if self.autodiscovery else self._config.dbname + tablename = desc_map['table'] + if db not in self.metrics_cache.table_activity_metrics.keys(): + self.metrics_cache.table_activity_metrics[db] = {} + if tablename not in self.metrics_cache.table_activity_metrics[db].keys(): + self.metrics_cache.table_activity_metrics[db][tablename] = { + 'postgresql.index_scans': 0, + 'postgresql.seq_scans': 0, + } + + self.metrics_cache.table_activity_metrics[db][tablename][name] = value + def _collect_relations_autodiscovery(self, instance_tags, relations_scopes): if not self.autodiscovery: return From 5f90ab1fdf7775b08f7da07345f598d2bf919a1d Mon Sep 17 00:00:00 2001 From: edengorevoy Date: Thu, 3 Aug 2023 21:20:42 +0000 Subject: [PATCH 84/86] Formatting, updating to psycopg3 --- postgres/datadog_checks/postgres/metadata.py | 10 +++--- postgres/datadog_checks/postgres/postgres.py | 35 +++++++++++--------- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/postgres/datadog_checks/postgres/metadata.py b/postgres/datadog_checks/postgres/metadata.py index c6cfdc74c779f..fbfaeda2aaf44 100644 --- a/postgres/datadog_checks/postgres/metadata.py +++ b/postgres/datadog_checks/postgres/metadata.py @@ -259,9 +259,7 @@ def _collect_schema_info(self): self._time_since_last_schemas_query = time.time() return metadata - def _query_database_information( - self, cursor: psycopg2.extensions.cursor, dbname: str - ) -> Dict[str, Union[str, int]]: + def _query_database_information(self, cursor: psycopg.cursor, dbname: str) -> Dict[str, Union[str, int]]: """ Collect database info. Returns description: str @@ -274,7 +272,7 @@ def _query_database_information( row = cursor.fetchone() return row - def _query_schema_information(self, cursor: psycopg2.extensions.cursor, dbname: str) -> Dict[str, str]: + def _query_schema_information(self, cursor: psycopg.cursor, dbname: str) -> Dict[str, str]: """ Collect user schemas. Returns id: str @@ -329,7 +327,7 @@ def sort_tables(info): return table_info[:limit] def _query_table_information_for_schema( - self, cursor: psycopg2.extensions.cursor, schemaname: str, dbname: str + self, cursor: psycopg.cursor, schemaname: str, dbname: str ) -> List[Dict[str, Union[str, Dict]]]: """ Collect table information per schema. Returns a list of dictionaries @@ -402,7 +400,7 @@ def _query_table_information_for_schema( def _collect_metadata_for_database(self, dbname): metadata = {} with self.db_pool.get_connection(dbname, self._config.idle_connection_timeout) as conn: - with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor: + with conn.cursor(row_factory=dict_row) as cursor: database_info = self._query_database_information(cursor, dbname) metadata.update( { diff --git a/postgres/datadog_checks/postgres/postgres.py b/postgres/datadog_checks/postgres/postgres.py index 968019672c403..80fb85b38c1f7 100644 --- a/postgres/datadog_checks/postgres/postgres.py +++ b/postgres/datadog_checks/postgres/postgres.py @@ -498,26 +498,31 @@ def _query_scope(self, cursor, scope, instance_tags, is_custom_metrics, dbname=N name, submit_metric = scope['metrics'][column] submit_metric(self, name, value, tags=set(tags), hostname=self.resolved_hostname) - self._cache_table_activity(dbname, desc_map['table'], name, value) + # if relation-level metrics idx_scan or seq_scan, cache it + if name in ('postgresql.index_scans', 'postgresql.seq_scans'): + self._cache_table_activity(dbname, desc_map['table'], name, value) num_results += 1 return num_results - def _cache_table_activity(self, dbname: str, tablename: str, metric_name: str, value: int, ): - # if relation-level metrics idx_scan or seq_scan, cache it - if metric_name in ('postgresql.index_scans', 'postgresql.seq_scans'): - db = dbname if self.autodiscovery else self._config.dbname - tablename = desc_map['table'] - if db not in self.metrics_cache.table_activity_metrics.keys(): - self.metrics_cache.table_activity_metrics[db] = {} - if tablename not in self.metrics_cache.table_activity_metrics[db].keys(): - self.metrics_cache.table_activity_metrics[db][tablename] = { - 'postgresql.index_scans': 0, - 'postgresql.seq_scans': 0, - } - - self.metrics_cache.table_activity_metrics[db][tablename][name] = value + def _cache_table_activity( + self, + dbname: str, + tablename: str, + metric_name: str, + value: int, + ): + db = dbname if self.autodiscovery else self._config.dbname + if db not in self.metrics_cache.table_activity_metrics.keys(): + self.metrics_cache.table_activity_metrics[db] = {} + if tablename not in self.metrics_cache.table_activity_metrics[db].keys(): + self.metrics_cache.table_activity_metrics[db][tablename] = { + 'postgresql.index_scans': 0, + 'postgresql.seq_scans': 0, + } + + self.metrics_cache.table_activity_metrics[db][tablename][metric_name] = value def _collect_relations_autodiscovery(self, instance_tags, relations_scopes): if not self.autodiscovery: From 3e7b949ec7fe130ae5621e83346f71bffccecd3a Mon Sep 17 00:00:00 2001 From: edengorevoy Date: Fri, 4 Aug 2023 17:28:04 +0000 Subject: [PATCH 85/86] Conditionals for postgres version 9, no partitions --- postgres/datadog_checks/postgres/metadata.py | 45 ++++++++++++++++---- postgres/tests/test_metadata.py | 6 ++- 2 files changed, 41 insertions(+), 10 deletions(-) diff --git a/postgres/datadog_checks/postgres/metadata.py b/postgres/datadog_checks/postgres/metadata.py index fbfaeda2aaf44..057a1cfd54ca5 100644 --- a/postgres/datadog_checks/postgres/metadata.py +++ b/postgres/datadog_checks/postgres/metadata.py @@ -17,6 +17,8 @@ from datadog_checks.base.utils.db.utils import DBMAsyncJob, default_json_event_encoding from datadog_checks.base.utils.tracking import tracked_method +from .version_utils import VersionUtils + # default collection intervals in seconds DEFAULT_SETTINGS_COLLECTION_INTERVAL = 600 DEFAULT_SCHEMAS_COLLECTION_INTERVAL = 600 @@ -40,7 +42,7 @@ WHERE datname LIKE '{dbname}'; """ -PG_TABLES_QUERY = """ +PG_TABLES_QUERY_V10_PLUS = """ SELECT c.oid AS id, c.relname AS name, c.relhasindex AS hasindexes, @@ -58,6 +60,20 @@ AND c.relnamespace = '{schemaname}' :: regnamespace; """ +PG_TABLES_QUERY_V9 = """ +SELECT c.oid AS id, + c.relname AS name, + c.relhasindex AS hasindexes, + c.relowner :: regrole AS owner, + t.relname AS toast_table +FROM pg_class c + left join pg_class t + ON c.reltoastrelid = t.oid +WHERE c.relkind IN ( 'r' ) + AND c.relnamespace = '{schemaname}' :: regnamespace; +""" + + SCHEMA_QUERY = """ SELECT oid AS id, nspname AS name, @@ -294,7 +310,11 @@ def _get_table_info(self, cursor, dbname, schemaname, limit): If any tables are partitioned, only the master paritition table name will be returned, and none of its children. """ if self._config.relations: - cursor.execute(PG_TABLES_QUERY.format(schemaname=schemaname)) + print("version" + str(self._check._version)) + if VersionUtils.transform_version(str(self._check._version))['version.major'] == "9": + cursor.execute(PG_TABLES_QUERY_V9.format(schemaname=schemaname)) + else: + cursor.execute(PG_TABLES_QUERY_V10_PLUS.format(schemaname=schemaname)) rows = cursor.fetchall() table_info = [dict(row) for row in rows] return self._sort_and_limit_table_info(cursor, dbname, table_info, limit) @@ -311,6 +331,12 @@ def sort_tables(info): cache = self._check.metrics_cache.table_activity_metrics # partition master tables won't get any metrics reported on them, # so we have to grab the total partition activity + # note: partitions don't exist in V9, so we have to check this first + if VersionUtils.transform_version(str(self._check._version))['version.major'] == "9": + return ( + cache[dbname][info['name']]['postgresql.index_scans'] + + cache[dbname][info['name']]['postgresql.seq_scans'] + ) if not info["has_partitions"]: return ( cache[dbname][info['name']]['postgresql.index_scans'] @@ -364,14 +390,15 @@ def _query_table_information_for_schema( idxs = [dict(row) for row in rows] this_payload.update({'indexes': idxs}) - if table['has_partitions']: - cursor.execute(PARTITION_KEY_QUERY.format(parent=name)) - row = cursor.fetchone() - this_payload.update({'partition_key': row['partition_key']}) + if VersionUtils.transform_version(str(self._check._version))['version.major'] != "9": + if table['has_partitions']: + cursor.execute(PARTITION_KEY_QUERY.format(parent=name)) + row = cursor.fetchone() + this_payload.update({'partition_key': row['partition_key']}) - cursor.execute(NUM_PARTITIONS_QUERY.format(parent=name)) - row = cursor.fetchone() - this_payload.update({'num_partitions': row['num_partitions']}) + cursor.execute(NUM_PARTITIONS_QUERY.format(parent=name)) + row = cursor.fetchone() + this_payload.update({'num_partitions': row['num_partitions']}) if table['toast_table'] is not None: this_payload.update({'toast_table': table['toast_table']}) diff --git a/postgres/tests/test_metadata.py b/postgres/tests/test_metadata.py index 8758791559502..8a87e9d978226 100644 --- a/postgres/tests/test_metadata.py +++ b/postgres/tests/test_metadata.py @@ -71,7 +71,7 @@ def test_collect_schemas(integration_check, dbm_instance, aggregator): # check that all expected tables are present tables_set = {'persons', "personsdup1", "personsdup2", "pgtable", "pg_newtable", "cities"} # if version isn't 9 or 10, check that partition master is in tables - if not (POSTGRES_VERSION.split('.')[0] == 9) and not (POSTGRES_VERSION.split('.')[0] == 10): + if float(POSTGRES_VERSION) >= 11: tables_set.update({'test_part'}) tables_not_reported_set = {'test_part1', 'test_part2'} @@ -98,6 +98,10 @@ def test_collect_schemas(integration_check, dbm_instance, aggregator): keys = list(table.keys()) assert_fields(keys, ["indexes", "columns", "toast_table", "id", "name"]) assert_fields(list(table['indexes'][0].keys()), ['name', 'definition']) + if float(POSTGRES_VERSION) >= 11: + if table['name'] == 'test_part': + keys = list(table.keys()) + assert_fields(keys, ["num_partitions", "partition_key"]) assert_fields(tables_got, tables_set) assert_not_fields(tables_got, tables_not_reported_set) From 97dfe4e2a5ef3663d684685289f3bdc43c0fb828 Mon Sep 17 00:00:00 2001 From: edengorevoy Date: Fri, 4 Aug 2023 17:37:16 +0000 Subject: [PATCH 86/86] Update table size test --- postgres/tests/common.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/postgres/tests/common.py b/postgres/tests/common.py index 3bc1dfbbc6028..ccbe7cce86590 100644 --- a/postgres/tests/common.py +++ b/postgres/tests/common.py @@ -148,13 +148,13 @@ def check_common_metrics(aggregator, expected_tags, count=1): def check_db_count(aggregator, expected_tags, count=1): - table_count = 5 + table_count = 6 # We create 2 additional partition tables when partition is available if float(POSTGRES_VERSION) >= 11.0: - table_count = 7 + table_count = 8 # And PG >= 14 will also report the parent table if float(POSTGRES_VERSION) >= 14.0: - table_count = 8 + table_count = 9 aggregator.assert_metric( 'postgresql.table.count', value=table_count,