Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Workaround for querying deadlocks in extended events #18781

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions sqlserver/changelog.d/18781.changed
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Use ``datadog`` XE session as the default for deadlock monitoring. Fall back to ``system_health`` if unavailable.
48 changes: 46 additions & 2 deletions sqlserver/datadog_checks/sqlserver/deadlocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,14 @@
from datadog_checks.base.utils.tracking import tracked_method
from datadog_checks.sqlserver.config import SQLServerConfig
from datadog_checks.sqlserver.const import STATIC_INFO_ENGINE_EDITION, STATIC_INFO_VERSION
from datadog_checks.sqlserver.queries import DEADLOCK_TIMESTAMP_ALIAS, DEADLOCK_XML_ALIAS, get_deadlocks_query
from datadog_checks.sqlserver.queries import (
DEADLOCK_TIMESTAMP_ALIAS,
DEADLOCK_XML_ALIAS,
XE_SESSION_DATADOG,
XE_SESSION_SYSTEM,
XE_SESSIONS_QUERY,
get_deadlocks_query,
)

try:
import datadog_agent
Expand All @@ -26,6 +33,8 @@
PAYLOAD_QUERY_SIGNATURE = "query_signatures"
PAYLOAD_XML = "xml"

NO_XE_SESSION_ERROR = f"No XE session `{XE_SESSION_DATADOG}` found"


def agent_check_getter(self):
return self._check
Expand All @@ -42,6 +51,7 @@ def __init__(self, check, config: SQLServerConfig):
self._deadlock_payload_max_bytes = MAX_PAYLOAD_BYTES
self.collection_interval = config.deadlocks_config.get("collection_interval", DEFAULT_COLLECTION_INTERVAL)
self._force_convert_xml_to_str = False
self._xe_session_name = None
super(Deadlocks, self).__init__(
check,
run_sync=True,
Expand Down Expand Up @@ -104,13 +114,43 @@ def _get_lookback_seconds(self):
def _get_connector(self):
return self._check.connection.connector

def _set_xe_session_name(self):
with self._check.connection.open_managed_default_connection(key_prefix=self._conn_key_prefix):
with self._check.connection.get_managed_cursor(key_prefix=self._conn_key_prefix) as cursor:
if self._xe_session_name is None:
cursor.execute(XE_SESSIONS_QUERY)
rows = cursor.fetchall()
if not rows:
raise NoXESessionError(NO_XE_SESSION_ERROR)
xe_system_found = False
for row in rows:
if (session := row[0]) in (XE_SESSION_DATADOG):
self._xe_session_name = session
return
if session == XE_SESSION_SYSTEM:
xe_system_found = True
if xe_system_found:
self._xe_session_name = XE_SESSION_SYSTEM
return
raise NoXESessionError(NO_XE_SESSION_ERROR)

def _query_deadlocks(self):
if self._xe_session_name is None:
try:
self._set_xe_session_name()
except NoXESessionError as e:
self._log.error(str(e))
return
self._log.info(f'Using XE session {self._xe_session_name} to collect deadlocks')

with self._check.connection.open_managed_default_connection(key_prefix=self._conn_key_prefix):
with self._check.connection.get_managed_cursor(key_prefix=self._conn_key_prefix) as cursor:
convert_xml_to_str = False
if self._force_convert_xml_to_str or self._get_connector() == "adodbapi":
convert_xml_to_str = True
query = get_deadlocks_query(convert_xml_to_str)
query = get_deadlocks_query(
convert_xml_to_str=convert_xml_to_str, xe_session_name=self._xe_session_name
)
self._log.debug(
"Running query [%s] with max deadlocks %s and timestamp %s",
query,
Expand Down Expand Up @@ -198,3 +238,7 @@ def _create_deadlock_event(self, deadlock_rows):

def run_job(self):
self.collect_deadlocks()


class NoXESessionError(Exception):
pass
19 changes: 17 additions & 2 deletions sqlserver/datadog_checks/sqlserver/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,11 +214,26 @@
FK.name, FK.parent_object_id, FK.referenced_object_id;
"""

XE_SESSION_DATADOG = "datadog"
XE_SESSION_SYSTEM = "system_health"
XE_SESSIONS_QUERY = f"""
SELECT
s.name AS session_name
FROM
sys.dm_xe_sessions s
JOIN
sys.dm_xe_session_targets t
ON s.address = t.event_session_address
WHERE
t.target_name = 'ring_buffer'
AND s.name IN ('{XE_SESSION_DATADOG}', '{XE_SESSION_SYSTEM}');
"""

DEADLOCK_TIMESTAMP_ALIAS = "timestamp"
DEADLOCK_XML_ALIAS = "event_xml"


def get_deadlocks_query(convert_xml_to_str=False):
def get_deadlocks_query(convert_xml_to_str=False, xe_session_name="datadog"):
"""
Construct the query to fetch deadlocks from the system_health extended event session
:param convert_xml_to_str: Whether to convert the XML to a string. This option is for MSOLEDB drivers
Expand All @@ -235,7 +250,7 @@ def get_deadlocks_query(convert_xml_to_str=False):
FROM (SELECT CAST([target_data] AS XML) AS Target_Data
FROM sys.dm_xe_session_targets AS xt
INNER JOIN sys.dm_xe_sessions AS xs ON xs.address = xt.event_session_address
WHERE xs.name = N'system_health'
WHERE xs.name = N'{xe_session_name}'
AND xt.target_name = N'ring_buffer'
) AS XML_Data
CROSS APPLY Target_Data.nodes('RingBufferTarget/event[@name="xml_deadlock_report"]') AS XEventData(xdr)
Expand Down
15 changes: 15 additions & 0 deletions sqlserver/tests/compose-ha/sql/aoag_primary.sql
Original file line number Diff line number Diff line change
Expand Up @@ -363,3 +363,18 @@ GO
WAITFOR DELAY '00:00:10'
ALTER AVAILABILITY GROUP [AG1] ADD DATABASE [datadog_test-1]
GO

CREATE EVENT SESSION datadog
ON SERVER
ADD EVENT sqlserver.xml_deadlock_report
ADD TARGET package0.ring_buffer
WITH (
MAX_MEMORY = 1024 KB,
EVENT_RETENTION_MODE = ALLOW_SINGLE_EVENT_LOSS,
MAX_DISPATCH_LATENCY = 120 SECONDS,
STARTUP_STATE = ON
);
GO

ALTER EVENT SESSION datadog ON SERVER STATE = START;
GO
15 changes: 15 additions & 0 deletions sqlserver/tests/compose-ha/sql/aoag_secondary.sql
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,18 @@ GO
ALTER AVAILABILITY GROUP [AG1] JOIN WITH (CLUSTER_TYPE = NONE)
ALTER AVAILABILITY GROUP [AG1] GRANT CREATE ANY DATABASE
GO

CREATE EVENT SESSION datadog
ON SERVER
ADD EVENT sqlserver.xml_deadlock_report
ADD TARGET package0.ring_buffer
WITH (
MAX_MEMORY = 1024 KB,
EVENT_RETENTION_MODE = ALLOW_SINGLE_EVENT_LOSS,
MAX_DISPATCH_LATENCY = 120 SECONDS,
STARTUP_STATE = ON
);
GO

ALTER EVENT SESSION datadog ON SERVER STATE = START;
GO
15 changes: 15 additions & 0 deletions sqlserver/tests/compose-high-cardinality-windows/setup.sql
Original file line number Diff line number Diff line change
Expand Up @@ -348,3 +348,18 @@ BEGIN

SET @object_count = @object_count + 1;
END;

CREATE EVENT SESSION datadog
ON SERVER
ADD EVENT sqlserver.xml_deadlock_report
ADD TARGET package0.ring_buffer
WITH (
MAX_MEMORY = 1024 KB,
EVENT_RETENTION_MODE = ALLOW_SINGLE_EVENT_LOSS,
MAX_DISPATCH_LATENCY = 120 SECONDS,
STARTUP_STATE = ON
);
GO

ALTER EVENT SESSION datadog ON SERVER STATE = START;
GO
15 changes: 15 additions & 0 deletions sqlserver/tests/compose-high-cardinality/setup.sql
Original file line number Diff line number Diff line change
Expand Up @@ -331,3 +331,18 @@ BEGIN

SET @object_count = @object_count + 1;
END;

CREATE EVENT SESSION datadog
ON SERVER
ADD EVENT sqlserver.xml_deadlock_report
ADD TARGET package0.ring_buffer
WITH (
MAX_MEMORY = 1024 KB,
EVENT_RETENTION_MODE = ALLOW_SINGLE_EVENT_LOSS,
MAX_DISPATCH_LATENCY = 120 SECONDS,
STARTUP_STATE = ON
);
GO

ALTER EVENT SESSION datadog ON SERVER STATE = START;
GO
15 changes: 15 additions & 0 deletions sqlserver/tests/compose-windows/setup.sql
Original file line number Diff line number Diff line change
Expand Up @@ -278,3 +278,18 @@ END
GO
GRANT EXECUTE on conditionalPlanTest to bob;
GO

CREATE EVENT SESSION datadog
ON SERVER
ADD EVENT sqlserver.xml_deadlock_report
ADD TARGET package0.ring_buffer
WITH (
MAX_MEMORY = 1024 KB,
EVENT_RETENTION_MODE = ALLOW_SINGLE_EVENT_LOSS,
MAX_DISPATCH_LATENCY = 120 SECONDS,
STARTUP_STATE = ON
);
GO

ALTER EVENT SESSION datadog ON SERVER STATE = START;
GO
15 changes: 15 additions & 0 deletions sqlserver/tests/compose/setup.sql
Original file line number Diff line number Diff line change
Expand Up @@ -262,3 +262,18 @@ END
GO
GRANT EXECUTE on conditionalPlanTest to bob;
GO

CREATE EVENT SESSION datadog
ON SERVER
ADD EVENT sqlserver.xml_deadlock_report
ADD TARGET package0.ring_buffer
WITH (
MAX_MEMORY = 1024 KB,
EVENT_RETENTION_MODE = ALLOW_SINGLE_EVENT_LOSS,
MAX_DISPATCH_LATENCY = 120 SECONDS,
STARTUP_STATE = ON
);
GO

ALTER EVENT SESSION datadog ON SERVER STATE = START;
GO
12 changes: 10 additions & 2 deletions sqlserver/tests/test_deadlocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from datadog_checks.sqlserver.deadlocks import (
PAYLOAD_QUERY_SIGNATURE,
PAYLOAD_TIMESTAMP,
XE_SESSION_DATADOG,
Deadlocks,
)
from datadog_checks.sqlserver.queries import DEADLOCK_TIMESTAMP_ALIAS, DEADLOCK_XML_ALIAS
Expand Down Expand Up @@ -66,13 +67,13 @@ def _get_deadlocks_payload(dbm_activity):
return matched


def _get_conn_for_user(instance_docker, user):
def _get_conn_for_user(instance_docker, user, password="Password12!"):
conn_str = (
f"DRIVER={instance_docker['driver']};"
f"Server={instance_docker['host']};"
"Database=master;"
f"UID={user};"
"PWD=Password12!;"
f"PWD={password};"
"TrustServerCertificate=yes;"
)
conn = pyodbc.connect(conn_str, autocommit=False)
Expand Down Expand Up @@ -199,6 +200,13 @@ def test_deadlocks_behind_dbm(dd_run_check, init_config, dbm_instance):
mocked_function.assert_not_called()


@pytest.mark.usefixtures('dd_environment')
def test_xe_session(dd_run_check, dbm_instance):
check = SQLServer(CHECK_NAME, {}, [dbm_instance])
dd_run_check(check)
assert check.deadlocks._xe_session_name == XE_SESSION_DATADOG


DEADLOCKS_PLAN_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "deadlocks")


Expand Down
Loading