Skip to content

Commit

Permalink
feat: Robustness improvements to datadog_diagnostics plugin (#723)
Browse files Browse the repository at this point in the history
- Add `DATADOG_DIAGNOSTICS_ENABLE` for quick disable if needed
- Limit spans with `DATADOG_DIAGNOSTICS_MAX_SPANS` (default 100)
- Fix scope of member variables
- Add unit tests

Manual testing:

- Modify `common.djangoapps.student.views.dashboard.student_dashboard` in
  edx-platform to call `import time; time.sleep(10)` at the start of the
  view.
- Start server and log
- Visit /dashboard
- While the browser is waiting, quickly make a small edit to an
  edx-platform file, causing an autoreload.
- Confirm that spans are logged.
  • Loading branch information
timmc-edx committed Jul 11, 2024
1 parent cb26cc7 commit 5877b40
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 6 deletions.
11 changes: 11 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,17 @@ Change Log
Unreleased
~~~~~~~~~~

[3.5.0] - 2024-07-11
~~~~~~~~~~~~~~~~~~~~
Added
-----
* Toggle ``DATADOG_DIAGNOSTICS_ENABLE`` for disabling that plugin quickly if needed. (Feature remains enabled by default.)

Fixed
-----
* Limit the number of spans collected via new setting ``DATADOG_DIAGNOSTICS_MAX_SPANS``, defaulting to 100. This may help avoid memory leaks.
* Make accidental class variables into member variables in ``datadog_diagnostics``

[3.4.0] - 2024-07-10
~~~~~~~~~~~~~~~~~~~~
Added
Expand Down
2 changes: 1 addition & 1 deletion edx_arch_experiments/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
A plugin to include applications under development by the architecture team at 2U.
"""

__version__ = '3.4.0'
__version__ = '3.5.0'
34 changes: 29 additions & 5 deletions edx_arch_experiments/datadog_diagnostics/apps.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,44 @@
import logging

from django.apps import AppConfig
from django.conf import settings

log = logging.getLogger(__name__)


# .. toggle_name: DATADOG_DIAGNOSTICS_ENABLE
# .. toggle_implementation: DjangoSetting
# .. toggle_default: True
# .. toggle_description: Enables logging of Datadog diagnostics information.
# .. toggle_use_cases: circuit_breaker
# .. toggle_creation_date: 2024-07-11
# .. toggle_tickets: https://github.com/edx/edx-arch-experiments/issues/692
DATADOG_DIAGNOSTICS_ENABLE = getattr(settings, 'DATADOG_DIAGNOSTICS_ENABLE', True)

# .. setting_name: DATADOG_DIAGNOSTICS_MAX_SPANS
# .. setting_default: 100
# .. setting_description: Limit of how many spans to hold onto and log
# when diagnosing Datadog tracing issues. This limits memory consumption
# avoids logging more data than is actually needed for diagnosis.
DATADOG_DIAGNOSTICS_MAX_SPANS = getattr(settings, 'DATADOG_DIAGNOSTICS_MAX_SPANS', 100)


class MissingSpanProcessor:
"""Datadog span processor that logs unfinished spans at shutdown."""
spans_started = 0
spans_finished = 0
open_spans = {}

def __init__(self):
self.spans_started = 0
self.spans_finished = 0
self.open_spans = {}

def on_span_start(self, span):
self.spans_started += 1
self.open_spans[span.span_id] = span
if len(self.open_spans) < DATADOG_DIAGNOSTICS_MAX_SPANS:
self.open_spans[span.span_id] = span

def on_span_finish(self, span):
self.spans_finished += 1
del self.open_spans[span.span_id]
self.open_spans.pop(span.span_id, None) # "delete if present"

def shutdown(self, _timeout):
log.info(f"Spans created = {self.spans_started}; spans finished = {self.spans_finished}")
Expand All @@ -39,6 +60,9 @@ class DatadogDiagnostics(AppConfig):
plugin_app = {}

def ready(self):
if not DATADOG_DIAGNOSTICS_ENABLE:
return

try:
from ddtrace import tracer # pylint: disable=import-outside-toplevel
tracer._span_processors.append(MissingSpanProcessor()) # pylint: disable=protected-access
Expand Down
Empty file.
51 changes: 51 additions & 0 deletions edx_arch_experiments/datadog_diagnostics/tests/test_app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"""
Tests for plugin app.
"""

from unittest.mock import patch

from django.test import TestCase

from .. import apps


class FakeSpan:
"""A fake Span instance that just carries a span_id."""
def __init__(self, span_id):
self.span_id = span_id

def _pprint(self):
return f"span_id={self.span_id}"


class TestMissingSpanProcessor(TestCase):
"""Tests for MissingSpanProcessor."""

@patch.object(apps, 'DATADOG_DIAGNOSTICS_MAX_SPANS', new=3)
def test_metrics(self):
proc = apps.MissingSpanProcessor()
ids = [2, 4, 6, 8, 10]

for span_id in ids:
proc.on_span_start(FakeSpan(span_id))

assert {(sk, sv.span_id) for sk, sv in proc.open_spans.items()} == {(2, 2), (4, 4), (6, 6)}
assert proc.spans_started == 5
assert proc.spans_finished == 0

for span_id in ids:
proc.on_span_finish(FakeSpan(span_id))

assert proc.open_spans.keys() == set()
assert proc.spans_started == 5
assert proc.spans_finished == 5

@patch('edx_arch_experiments.datadog_diagnostics.apps.log.info')
@patch('edx_arch_experiments.datadog_diagnostics.apps.log.error')
def test_logging(self, mock_log_error, mock_log_info):
proc = apps.MissingSpanProcessor()
proc.on_span_start(FakeSpan(17))
proc.shutdown(0)

mock_log_info.assert_called_once_with("Spans created = 1; spans finished = 0")
mock_log_error.assert_called_once_with("Span created but not finished: span_id=17")

0 comments on commit 5877b40

Please sign in to comment.