Skip to content

Commit

Permalink
chore(service-naming): improve inferred service naming algorithm (#11458
Browse files Browse the repository at this point in the history
)

## Motivation

Updates inferred service naming algorithm. Previously, the service
naming algorithm would skip over any flag arguments (args starting with
`-`), as well as the following argument as it was assumed to be the
argument value. The update changes the algorithm to run again if no
service name was found the first time. The second time, the algorithm
will not skip arguments that were preceded by a flag argument.

Effect:
-- Previous Behavior: `pytest --no-cov my/file.py` -> service name =
`""`
-- New Behavior: `pytest --no-cov my/file.py` -> service name =
`"my.file"`

Additionally adds check to ensure a python module included after `-m`
module is importable before using it as the service name.

Also updates the service naming algorithm to use try-catches to prevent
any unforeseen errors.

Adds many more test cases, fixes a few snapshots.

## Checklist
- [x] PR author has checked that all the criteria below are met
- The PR description includes an overview of the change
- The PR description articulates the motivation for the change
- The change includes tests OR the PR description describes a testing
strategy
- The PR description notes risks associated with the change, if any
- Newly-added code is easy to change
- The change follows the [library release note
guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html)
- The change includes or references documentation updates if necessary
- Backport labels are set (if
[applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting))

## Reviewer Checklist
- [x] Reviewer has checked that all the criteria below are met 
- Title is accurate
- All changes are related to the pull request's stated goal
- Avoids breaking
[API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces)
changes
- Testing strategy adequately addresses listed risks
- Newly-added code is easy to change
- Release note makes sense to a user of the library
- If necessary, author has acknowledged and discussed the performance
implications of this PR as reported in the benchmarks PR comment
- Backport labels are set in a manner that is consistent with the
[release branch maintenance
policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
  • Loading branch information
wconti27 authored Nov 20, 2024
1 parent 383f836 commit 110dcfa
Show file tree
Hide file tree
Showing 18 changed files with 227 additions and 85 deletions.
123 changes: 77 additions & 46 deletions ddtrace/settings/_inferred_base_service.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import fnmatch
import importlib.util
import os
import pathlib
import re
Expand All @@ -8,6 +9,11 @@
from typing import Optional
from typing import Tuple

from ..internal.logger import get_logger


log = get_logger(__name__)


INIT_PY = "__init__.py"
ALL_PY_FILES = "*.py"
Expand All @@ -33,7 +39,7 @@ def __init__(self, environ: Dict[str, str]):
# - Match /python, /python3.7, etc.
self.pattern = r"(^|/)(?!.*\.py$)(" + re.escape("python") + r"(\d+\.\d+)?$)"

def detect(self, args: List[str]) -> Optional[ServiceMetadata]:
def detect(self, args: List[str], skip_args_preceded_by_flags=True) -> Optional[ServiceMetadata]:
"""
Detects and returns service metadata based on the provided list of arguments.
Expand All @@ -59,22 +65,27 @@ def detect(self, args: List[str]) -> Optional[ServiceMetadata]:
has_flag_prefix = arg.startswith("-") and not arg.startswith("--ddtrace")
is_env_variable = "=" in arg

should_skip_arg = prev_arg_is_flag or has_flag_prefix or is_env_variable
should_skip_arg = (prev_arg_is_flag and skip_args_preceded_by_flags) or has_flag_prefix or is_env_variable

if module_flag:
return ServiceMetadata(arg)
if _module_exists(arg):
return ServiceMetadata(arg)

if not should_skip_arg:
abs_path = pathlib.Path(arg).resolve()
if not abs_path.exists():
continue
stripped = abs_path
if not stripped.is_dir():
stripped = stripped.parent
value, ok = self.deduce_package_name(stripped)
if ok:
return ServiceMetadata(value)
return ServiceMetadata(self.find_nearest_top_level(stripped))
try:
abs_path = pathlib.Path(arg).resolve()
if not abs_path.exists():
continue
stripped = abs_path
if not stripped.is_dir():
stripped = stripped.parent
value, ok = self.deduce_package_name(stripped)
if ok:
return ServiceMetadata(value)
return ServiceMetadata(self.find_nearest_top_level(stripped))
except Exception as ex:
# Catch any unexpected errors
log.debug("Unexpected error while processing argument: ", arg, "Exception: ", ex)

if has_flag_prefix and arg == "-m":
module_flag = True
Expand Down Expand Up @@ -145,39 +156,51 @@ def detect_service(args: List[str]) -> Optional[str]:
if cache_key in CACHE:
return CACHE.get(cache_key)

# Check both the included command args as well as the executable being run
possible_commands = [*args, sys.executable]
executable_args = set()

# List of detectors to try in order
detectors = {}
for detector_class in detector_classes:
detector_instance = detector_class(dict(os.environ))

for i, command in enumerate(possible_commands):
detector_name = detector_instance.name

if detector_instance.matches(command):
detectors.update({detector_name: detector_instance})
# append to a list of arg indexes to ignore since they are executables
executable_args.add(i)
continue
elif _is_executable(command):
# append to a list of arg indexes to ignore since they are executables
executable_args.add(i)

args_to_search = []
for i, arg in enumerate(args):
# skip any executable args
if i not in executable_args:
args_to_search.append(arg)

# Iterate through the matched detectors
for detector in detectors.values():
metadata = detector.detect(args_to_search)
if metadata and metadata.name:
CACHE[cache_key] = metadata.name
return metadata.name
try:
# Check both the included command args as well as the executable being run
possible_commands = [*args, sys.executable]
executable_args = set()

# List of detectors to try in order
detectors = {}
for detector_class in detector_classes:
detector_instance = detector_class(dict(os.environ))

for i, command in enumerate(possible_commands):
detector_name = detector_instance.name

if detector_instance.matches(command):
detectors.update({detector_name: detector_instance})
# append to a list of arg indexes to ignore since they are executables
executable_args.add(i)
continue
elif _is_executable(command):
# append to a list of arg indexes to ignore since they are executables
executable_args.add(i)

args_to_search = []
for i, arg in enumerate(args):
# skip any executable args
if i not in executable_args:
args_to_search.append(arg)

# Iterate through the matched detectors
for detector in detectors.values():
metadata = detector.detect(args_to_search)
if metadata and metadata.name:
CACHE[cache_key] = metadata.name
return metadata.name

# Iterate through the matched detectors again, this time not skipping args preceded by flag args
for detector in detectors.values():
metadata = detector.detect(args_to_search, skip_args_preceded_by_flags=False)
if metadata and metadata.name:
CACHE[cache_key] = metadata.name
return metadata.name
except Exception as ex:
# Catch any unexpected errors to be extra safe
log.warning("Unexpected error during inferred base service detection: ", ex)

CACHE[cache_key] = None
return None

Expand All @@ -195,3 +218,11 @@ def _is_executable(file_path: str) -> bool:
directory = os.path.dirname(directory)

return False


def _module_exists(module_name: str) -> bool:
"""Check if a module can be imported."""
try:
return importlib.util.find_spec(module_name) is not None
except ModuleNotFoundError:
return False
14 changes: 14 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,20 @@ def create_ddtrace_subprocess_dir_and_return_test_pyfile(tmpdir):
return pyfile


@pytest.fixture
def ddtrace_tmp_path(tmp_path):
# Create a test dir named `ddtrace_subprocess_dir` that will be used by the tracers
ddtrace_dir = tmp_path / DEFAULT_DDTRACE_SUBPROCESS_TEST_SERVICE_NAME
ddtrace_dir.mkdir(exist_ok=True) # Create the directory if it doesn't exist

# Check for __init__.py and create it if it doesn't exist
init_file = ddtrace_dir / "__init__.py"
if not init_file.exists():
init_file.write_text("") # Create an empty __init__.py file

return ddtrace_dir


@pytest.fixture
def run_python_code_in_subprocess(tmpdir):
def _run(code, **kwargs):
Expand Down
4 changes: 2 additions & 2 deletions tests/contrib/gunicorn/test_gunicorn.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def test_no_known_errors_occur(tmp_path):


@pytest.mark.skipif(sys.version_info >= (3, 11), reason="Gunicorn is only supported up to 3.10")
def test_span_schematization(tmp_path):
def test_span_schematization(ddtrace_tmp_path):
for schema_version in [None, "v0", "v1"]:
for service_name in [None, "mysvc"]:
gunicorn_settings = _gunicorn_settings_factory(
Expand All @@ -197,7 +197,7 @@ def test_span_schematization(tmp_path):
),
ignores=["meta.result_class"],
):
with gunicorn_server(gunicorn_settings, tmp_path) as context:
with gunicorn_server(gunicorn_settings, ddtrace_tmp_path) as context:
_, client = context
response = client.get("/")
assert response.status_code == 200
105 changes: 99 additions & 6 deletions tests/internal/service_name/test_inferred_base_service.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import pathlib
import shlex
import tempfile
from unittest.mock import patch

import pytest

from ddtrace.settings._inferred_base_service import _module_exists
from ddtrace.settings._inferred_base_service import detect_service


Expand All @@ -21,7 +23,9 @@ def mock_file_system():
(base_path / "venv" / "bin" / "gunicorn").mkdir(parents=True)

# add a test dir
(base_path / "tests" / "contrib" / "aiohttp").mkdir(parents=True)
(base_path / "tests" / "contrib" / "aiohttp" / "app").mkdir(parents=True)

# other cases

(base_path / "modules" / "m1" / "first" / "nice" / "package").mkdir(parents=True)
(base_path / "modules" / "m2").mkdir(parents=True)
Expand All @@ -35,15 +39,19 @@ def mock_file_system():
(base_path / "venv" / "bin" / "python3.11" / "gunicorn" / "__init__.py").mkdir(parents=True)
(base_path / "venv" / "bin" / "gunicorn" / "__init__.py").touch()

# Create `__init__.py` files that indicate packages
(base_path / "modules" / "m1" / "first" / "nice" / "package" / "__init__.py").touch()
(base_path / "modules" / "m1" / "first" / "nice" / "__init__.py").touch()
(base_path / "modules" / "m1" / "first" / "nice" / "something.py").touch()
(base_path / "modules" / "m1" / "first" / "nice" / "app.py").touch()
(base_path / "modules" / "m1" / "first" / "__init__.py").touch()
(base_path / "modules" / "m1" / "__init__.py").touch()
(base_path / "modules" / "m2" / "__init__.py").touch()
(base_path / "apps" / "app1" / "__main__.py").touch()
(base_path / "apps" / "app2" / "cmd" / "run.py").touch()
(base_path / "apps" / "app2" / "setup.py").touch()

(base_path / "tests" / "contrib" / "aiohttp" / "app" / "web.py").touch()
(base_path / "tests" / "contrib" / "aiohttp" / "app" / "__init__.py").touch()
(base_path / "tests" / "contrib" / "aiohttp" / "test.py").touch()
(base_path / "tests" / "contrib" / "aiohttp" / "__init__.py").touch()
(base_path / "tests" / "contrib" / "__init__.py").touch()
Expand All @@ -59,9 +67,13 @@ def mock_file_system():
@pytest.mark.parametrize(
"cmd,expected",
[
("python tests/contrib/aiohttp/app/web.py", "tests.contrib.aiohttp.app"),
("python tests/contrib/aiohttp", "tests.contrib.aiohttp"),
("python tests/contrib", "tests.contrib"),
("python tests", "tests"),
("python modules/m1/first/nice/package", "m1.first.nice.package"),
("python modules/m1/first/nice", "m1.first.nice"),
("python modules/m1/first/nice/something.py", "m1.first.nice"),
("python modules/m1/first/nice/app.py", "m1.first.nice"),
("python modules/m1/first", "m1.first"),
("python modules/m2", "m2"),
("python apps/app1", "app1"),
Expand All @@ -75,17 +87,98 @@ def mock_file_system():
("venv/bin/python3.11/ddtrace-run python apps/app2/setup.py", "app2"),
("ddtrace-run python apps/app2/setup.py", "app2"),
("python3.12 apps/app2/cmd/run.py", "app2"),
("python -m m1.first.nice.package", "m1.first.nice.package"),
("python -m tests.contrib.aiohttp.app.web", "tests.contrib.aiohttp.app.web"),
("python -m tests.contrib.aiohttp.app", "tests.contrib.aiohttp.app"),
("python -m tests.contrib.aiohttp", "tests.contrib.aiohttp"),
("python -m tests.contrib", "tests.contrib"),
("python -m tests", "tests"),
("python -m http.server 8000", "http.server"),
("python --some-flag apps/app1", "app1"),
# pytest
("pytest tests/contrib/aiohttp", "tests.contrib.aiohttp"),
("pytest --ddtrace tests/contrib/aiohttp", "tests.contrib.aiohttp"),
("pytest --no-cov tests/contrib/aiohttp", "tests.contrib.aiohttp"),
],
)
def test_python_detector(cmd, expected, mock_file_system):
def test_python_detector_service_name_should_exist_file_exists(cmd, expected, mock_file_system):
# Mock the current working directory to the test_modules path
with patch("os.getcwd", return_value=str(mock_file_system)):
cmd_parts = cmd.split(" ")
cmd_parts = shlex.split(cmd)
detected_name = detect_service(cmd_parts)

assert detected_name == expected, f"Test failed for command: [{cmd}]"


@pytest.mark.parametrize(
"cmd,expected",
[
# Commands that should not produce a service name
("", None), # Empty command
("python non_existing_file.py", None), # Non-existing Python script
("python invalid_script.py", None), # Invalid script that isn't found
("gunicorn app:app", None), # Non-Python command
("ls -la", None), # Non-Python random command
("cat README.md", None), # Another random command
("python -m non_existing_module", None), # Non-existing Python module
("python -c 'print([])'", None), # Python inline code not producing a service
("python -m -c 'print([]])'", None), # Inline code with module flag
("echo 'Hello, World!'", None), # Not a Python service
("python3.11 /path/to/some/non_python_file.txt", None), # Non-Python file
("/usr/bin/ls", None), # Another system command
("some_executable --ddtrace hello", None),
("python version", None),
("python -m -v --hello=maam", None),
# error produced from a test, ensure an arg that is very long doesn't break stuff
(
"ddtrace-run pytest -k 'not test_reloader and not test_reload_listeners and not "
+ "test_no_exceptions_when_cancel_pending_request and not test_add_signal and not "
+ "test_ode_removes and not test_skip_touchup and not test_dispatch_signal_triggers"
+ " and not test_keep_alive_connection_context and not test_redirect_with_params and"
+ " not test_keep_alive_client_timeout and not test_logger_vhosts and not test_ssl_in_multiprocess_mode'",
None,
),
],
)
def test_no_service_name(cmd, expected, mock_file_system):
with patch("os.getcwd", return_value=str(mock_file_system)):
cmd_parts = shlex.split(cmd)
detected_name = detect_service(cmd_parts)

assert detected_name == expected, f"Test failed for command: [{cmd}]"


@pytest.mark.parametrize(
"cmd,expected",
[
# Command that is too long
("python " + " ".join(["arg"] * 1000), None), # Excessively long command
# Path with special characters
(r"python /path/with/special/characters/!@#$%^&*()_/some_script.py", None), # Special characters
# Path too deep
(f"python {'/'.join(['deep' * 50])}/script.py", None), # Excessively deep path
],
)
def test_chaos(cmd, expected, mock_file_system):
with patch("os.getcwd", return_value=str(mock_file_system)):
cmd_parts = shlex.split(cmd)
detected_name = detect_service(cmd_parts)

assert detected_name == expected, f"Chaos test failed for command: [{cmd}]"


@pytest.mark.parametrize(
"module_name,should_exist",
[
("tests.contrib.aiohttp.app.web", True),
("tests.contrib.aiohttp.app", True),
("tests.contrib.aiohttp", True),
("tests.contrib", True),
("tests", True),
("tests.releasenotes", False),
("non_existing_module", False),
],
)
def test_module_exists(module_name, should_exist):
exists = _module_exists(module_name)

assert exists == should_exist, f"Module {module_name} existence check failed."
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"type": "web",
"error": 0,
"meta": {
"_dd.base_service": "",
"_dd.base_service": "ddtrace_subprocess_dir",
"_dd.p.dm": "-0",
"_dd.p.tid": "654a694400000000",
"component": "wsgi",
Expand Down Expand Up @@ -40,7 +40,7 @@
"type": "",
"error": 0,
"meta": {
"_dd.base_service": "",
"_dd.base_service": "ddtrace_subprocess_dir",
"_dd.p.tid": "654a694400000000",
"component": "wsgi"
},
Expand All @@ -57,7 +57,7 @@
"type": "web",
"error": 0,
"meta": {
"_dd.base_service": "",
"_dd.base_service": "ddtrace_subprocess_dir",
"_dd.p.tid": "654a694400000000",
"component": "wsgi",
"span.kind": "server"
Expand All @@ -75,7 +75,7 @@
"type": "",
"error": 0,
"meta": {
"_dd.base_service": "",
"_dd.base_service": "ddtrace_subprocess_dir",
"_dd.p.tid": "654a694400000000",
"component": "wsgi",
"result_class": "listiterator"
Expand Down
Loading

0 comments on commit 110dcfa

Please sign in to comment.