Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(service-naming): improve inferred service naming algorithm #11458

Merged
merged 14 commits into from
Nov 20, 2024
116 changes: 71 additions & 45 deletions ddtrace/settings/_inferred_base_service.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import fnmatch
import importlib.util
import os
import pathlib
import re
Expand Down Expand Up @@ -33,7 +34,7 @@ def __init__(self, environ: Dict[str, str]):
# - Match /python, /python3.7, etc.
self.pattern = r"(^|/)(?!.*\.py$)(" + re.escape("python") + r"(\d+\.\d+)?$)"

def detect(self, args: List[str]) -> Optional[ServiceMetadata]:
def detect(self, args: List[str], skip_args_preceded_by_flags=True) -> Optional[ServiceMetadata]:
"""
Detects and returns service metadata based on the provided list of arguments.

Expand All @@ -59,22 +60,27 @@ def detect(self, args: List[str]) -> Optional[ServiceMetadata]:
has_flag_prefix = arg.startswith("-") and not arg.startswith("--ddtrace")
is_env_variable = "=" in arg

should_skip_arg = prev_arg_is_flag or has_flag_prefix or is_env_variable
should_skip_arg = (prev_arg_is_flag and skip_args_preceded_by_flags) or has_flag_prefix or is_env_variable

if module_flag:
return ServiceMetadata(arg)
if _module_exists(arg):
return ServiceMetadata(arg)

if not should_skip_arg:
abs_path = pathlib.Path(arg).resolve()
if not abs_path.exists():
try:
wconti27 marked this conversation as resolved.
Show resolved Hide resolved
abs_path = pathlib.Path(arg).resolve()
if not abs_path.exists():
continue
stripped = abs_path
if not stripped.is_dir():
stripped = stripped.parent
value, ok = self.deduce_package_name(stripped)
if ok:
return ServiceMetadata(value)
return ServiceMetadata(self.find_nearest_top_level(stripped))
except: # noqa: E722
# catch any exceptions and continue iteration to the next arg
continue
stripped = abs_path
if not stripped.is_dir():
stripped = stripped.parent
value, ok = self.deduce_package_name(stripped)
if ok:
return ServiceMetadata(value)
return ServiceMetadata(self.find_nearest_top_level(stripped))

if has_flag_prefix and arg == "-m":
module_flag = True
Expand Down Expand Up @@ -145,39 +151,51 @@ def detect_service(args: List[str]) -> Optional[str]:
if cache_key in CACHE:
return CACHE.get(cache_key)

# Check both the included command args as well as the executable being run
possible_commands = [*args, sys.executable]
executable_args = set()

# List of detectors to try in order
detectors = {}
for detector_class in detector_classes:
detector_instance = detector_class(dict(os.environ))

for i, command in enumerate(possible_commands):
detector_name = detector_instance.name

if detector_instance.matches(command):
detectors.update({detector_name: detector_instance})
# append to a list of arg indexes to ignore since they are executables
executable_args.add(i)
continue
elif _is_executable(command):
# append to a list of arg indexes to ignore since they are executables
executable_args.add(i)

args_to_search = []
for i, arg in enumerate(args):
# skip any executable args
if i not in executable_args:
args_to_search.append(arg)

# Iterate through the matched detectors
for detector in detectors.values():
metadata = detector.detect(args_to_search)
if metadata and metadata.name:
CACHE[cache_key] = metadata.name
return metadata.name
try:
wconti27 marked this conversation as resolved.
Show resolved Hide resolved
# Check both the included command args as well as the executable being run
possible_commands = [*args, sys.executable]
executable_args = set()

# List of detectors to try in order
detectors = {}
for detector_class in detector_classes:
detector_instance = detector_class(dict(os.environ))

for i, command in enumerate(possible_commands):
detector_name = detector_instance.name

if detector_instance.matches(command):
detectors.update({detector_name: detector_instance})
# append to a list of arg indexes to ignore since they are executables
executable_args.add(i)
continue
elif _is_executable(command):
# append to a list of arg indexes to ignore since they are executables
executable_args.add(i)

args_to_search = []
for i, arg in enumerate(args):
# skip any executable args
if i not in executable_args:
args_to_search.append(arg)

# Iterate through the matched detectors
for detector in detectors.values():
metadata = detector.detect(args_to_search)
if metadata and metadata.name:
CACHE[cache_key] = metadata.name
return metadata.name

# Iterate through the matched detectors again, this time not skipping args preceded by flag args
for detector in detectors.values():
metadata = detector.detect(args_to_search, skip_args_preceded_by_flags=False)
if metadata and metadata.name:
CACHE[cache_key] = metadata.name
return metadata.name
except: # noqa: E722
# catch all exceptions to be extra safe to not crash anything
pass
wconti27 marked this conversation as resolved.
Show resolved Hide resolved

CACHE[cache_key] = None
return None
wconti27 marked this conversation as resolved.
Show resolved Hide resolved

Expand All @@ -195,3 +213,11 @@ def _is_executable(file_path: str) -> bool:
directory = os.path.dirname(directory)

return False


def _module_exists(module_name: str) -> bool:
"""Check if a module can be imported."""
try:
return importlib.util.find_spec(module_name) is not None
except ModuleNotFoundError:
return False
14 changes: 14 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,20 @@ def create_ddtrace_subprocess_dir_and_return_test_pyfile(tmpdir):
return pyfile


@pytest.fixture
def ddtrace_tmp_path(tmp_path):
# Create a test dir named `ddtrace_subprocess_dir` that will be used by the tracers
ddtrace_dir = tmp_path / DEFAULT_DDTRACE_SUBPROCESS_TEST_SERVICE_NAME
ddtrace_dir.mkdir(exist_ok=True) # Create the directory if it doesn't exist

# Check for __init__.py and create it if it doesn't exist
init_file = ddtrace_dir / "__init__.py"
if not init_file.exists():
init_file.write_text("") # Create an empty __init__.py file

return ddtrace_dir


@pytest.fixture
def run_python_code_in_subprocess(tmpdir):
def _run(code, **kwargs):
Expand Down
4 changes: 2 additions & 2 deletions tests/contrib/gunicorn/test_gunicorn.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def test_no_known_errors_occur(tmp_path):


@pytest.mark.skipif(sys.version_info >= (3, 11), reason="Gunicorn is only supported up to 3.10")
def test_span_schematization(tmp_path):
def test_span_schematization(ddtrace_tmp_path):
for schema_version in [None, "v0", "v1"]:
for service_name in [None, "mysvc"]:
gunicorn_settings = _gunicorn_settings_factory(
Expand All @@ -197,7 +197,7 @@ def test_span_schematization(tmp_path):
),
ignores=["meta.result_class"],
):
with gunicorn_server(gunicorn_settings, tmp_path) as context:
with gunicorn_server(gunicorn_settings, ddtrace_tmp_path) as context:
_, client = context
response = client.get("/")
assert response.status_code == 200
105 changes: 99 additions & 6 deletions tests/internal/service_name/test_inferred_base_service.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import pathlib
import shlex
import tempfile
from unittest.mock import patch

import pytest

from ddtrace.settings._inferred_base_service import _module_exists
from ddtrace.settings._inferred_base_service import detect_service


Expand All @@ -21,7 +23,9 @@ def mock_file_system():
(base_path / "venv" / "bin" / "gunicorn").mkdir(parents=True)

# add a test dir
(base_path / "tests" / "contrib" / "aiohttp").mkdir(parents=True)
(base_path / "tests" / "contrib" / "aiohttp" / "app").mkdir(parents=True)

# other cases

(base_path / "modules" / "m1" / "first" / "nice" / "package").mkdir(parents=True)
(base_path / "modules" / "m2").mkdir(parents=True)
Expand All @@ -35,15 +39,19 @@ def mock_file_system():
(base_path / "venv" / "bin" / "python3.11" / "gunicorn" / "__init__.py").mkdir(parents=True)
(base_path / "venv" / "bin" / "gunicorn" / "__init__.py").touch()

# Create `__init__.py` files that indicate packages
(base_path / "modules" / "m1" / "first" / "nice" / "package" / "__init__.py").touch()
(base_path / "modules" / "m1" / "first" / "nice" / "__init__.py").touch()
(base_path / "modules" / "m1" / "first" / "nice" / "something.py").touch()
(base_path / "modules" / "m1" / "first" / "nice" / "app.py").touch()
(base_path / "modules" / "m1" / "first" / "__init__.py").touch()
(base_path / "modules" / "m1" / "__init__.py").touch()
(base_path / "modules" / "m2" / "__init__.py").touch()
(base_path / "apps" / "app1" / "__main__.py").touch()
(base_path / "apps" / "app2" / "cmd" / "run.py").touch()
(base_path / "apps" / "app2" / "setup.py").touch()

(base_path / "tests" / "contrib" / "aiohttp" / "app" / "web.py").touch()
(base_path / "tests" / "contrib" / "aiohttp" / "app" / "__init__.py").touch()
(base_path / "tests" / "contrib" / "aiohttp" / "test.py").touch()
(base_path / "tests" / "contrib" / "aiohttp" / "__init__.py").touch()
(base_path / "tests" / "contrib" / "__init__.py").touch()
Expand All @@ -59,9 +67,13 @@ def mock_file_system():
@pytest.mark.parametrize(
"cmd,expected",
[
("python tests/contrib/aiohttp/app/web.py", "tests.contrib.aiohttp.app"),
("python tests/contrib/aiohttp", "tests.contrib.aiohttp"),
("python tests/contrib", "tests.contrib"),
("python tests", "tests"),
("python modules/m1/first/nice/package", "m1.first.nice.package"),
("python modules/m1/first/nice", "m1.first.nice"),
("python modules/m1/first/nice/something.py", "m1.first.nice"),
("python modules/m1/first/nice/app.py", "m1.first.nice"),
("python modules/m1/first", "m1.first"),
("python modules/m2", "m2"),
("python apps/app1", "app1"),
Expand All @@ -75,17 +87,98 @@ def mock_file_system():
("venv/bin/python3.11/ddtrace-run python apps/app2/setup.py", "app2"),
("ddtrace-run python apps/app2/setup.py", "app2"),
("python3.12 apps/app2/cmd/run.py", "app2"),
("python -m m1.first.nice.package", "m1.first.nice.package"),
("python -m tests.contrib.aiohttp.app.web", "tests.contrib.aiohttp.app.web"),
("python -m tests.contrib.aiohttp.app", "tests.contrib.aiohttp.app"),
("python -m tests.contrib.aiohttp", "tests.contrib.aiohttp"),
("python -m tests.contrib", "tests.contrib"),
("python -m tests", "tests"),
("python -m http.server 8000", "http.server"),
("python --some-flag apps/app1", "app1"),
# pytest
("pytest tests/contrib/aiohttp", "tests.contrib.aiohttp"),
("pytest --ddtrace tests/contrib/aiohttp", "tests.contrib.aiohttp"),
("pytest --no-cov tests/contrib/aiohttp", "tests.contrib.aiohttp"),
],
)
def test_python_detector(cmd, expected, mock_file_system):
def test_python_detector_service_name_should_exist_file_exists(cmd, expected, mock_file_system):
# Mock the current working directory to the test_modules path
with patch("os.getcwd", return_value=str(mock_file_system)):
cmd_parts = cmd.split(" ")
cmd_parts = shlex.split(cmd)
detected_name = detect_service(cmd_parts)

assert detected_name == expected, f"Test failed for command: [{cmd}]"


@pytest.mark.parametrize(
"cmd,expected",
[
# Commands that should not produce a service name
("", None), # Empty command
("python non_existing_file.py", None), # Non-existing Python script
("python invalid_script.py", None), # Invalid script that isn't found
("gunicorn app:app", None), # Non-Python command
("ls -la", None), # Non-Python random command
("cat README.md", None), # Another random command
("python -m non_existing_module", None), # Non-existing Python module
("python -c 'print([])'", None), # Python inline code not producing a service
("python -m -c 'print([]])'", None), # Inline code with module flag
("echo 'Hello, World!'", None), # Not a Python service
("python3.11 /path/to/some/non_python_file.txt", None), # Non-Python file
("/usr/bin/ls", None), # Another system command
("some_executable --ddtrace hello", None),
("python version", None),
("python -m -v --hello=maam", None),
# error produced from a test, ensure an arg that is very long doesn't break stuff
(
"ddtrace-run pytest -k 'not test_reloader and not test_reload_listeners and not "
+ "test_no_exceptions_when_cancel_pending_request and not test_add_signal and not "
wconti27 marked this conversation as resolved.
Show resolved Hide resolved
+ "test_ode_removes and not test_skip_touchup and not test_dispatch_signal_triggers"
+ " and not test_keep_alive_connection_context and not test_redirect_with_params and"
+ " not test_keep_alive_client_timeout and not test_logger_vhosts and not test_ssl_in_multiprocess_mode'",
None,
),
],
)
def test_no_service_name(cmd, expected, mock_file_system):
with patch("os.getcwd", return_value=str(mock_file_system)):
cmd_parts = shlex.split(cmd)
detected_name = detect_service(cmd_parts)

assert detected_name == expected, f"Test failed for command: [{cmd}]"


@pytest.mark.parametrize(
"cmd,expected",
[
# Command that is too long
("python " + " ".join(["arg"] * 1000), None), # Excessively long command
# Path with special characters
(r"python /path/with/special/characters/!@#$%^&*()_/some_script.py", None), # Special characters
# Path too deep
(f"python {'/'.join(['deep' * 50])}/script.py", None), # Excessively deep path
],
)
def test_chaos(cmd, expected, mock_file_system):
with patch("os.getcwd", return_value=str(mock_file_system)):
cmd_parts = shlex.split(cmd)
detected_name = detect_service(cmd_parts)

assert detected_name == expected, f"Chaos test failed for command: [{cmd}]"


@pytest.mark.parametrize(
"module_name,should_exist",
[
("tests.contrib.aiohttp.app.web", True),
("tests.contrib.aiohttp.app", True),
("tests.contrib.aiohttp", True),
("tests.contrib", True),
("tests", True),
("tests.releasenotes", False),
("non_existing_module", False),
],
)
def test_module_exists(module_name, should_exist):
exists = _module_exists(module_name)

assert exists == should_exist, f"Module {module_name} existence check failed."
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"type": "web",
"error": 0,
"meta": {
"_dd.base_service": "",
"_dd.base_service": "ddtrace_subprocess_dir",
"_dd.p.dm": "-0",
"_dd.p.tid": "654a694400000000",
"component": "wsgi",
Expand Down Expand Up @@ -40,7 +40,7 @@
"type": "",
"error": 0,
"meta": {
"_dd.base_service": "",
"_dd.base_service": "ddtrace_subprocess_dir",
"_dd.p.tid": "654a694400000000",
"component": "wsgi"
},
Expand All @@ -57,7 +57,7 @@
"type": "web",
"error": 0,
"meta": {
"_dd.base_service": "",
"_dd.base_service": "ddtrace_subprocess_dir",
"_dd.p.tid": "654a694400000000",
"component": "wsgi",
"span.kind": "server"
Expand All @@ -75,7 +75,7 @@
"type": "",
"error": 0,
"meta": {
"_dd.base_service": "",
"_dd.base_service": "ddtrace_subprocess_dir",
"_dd.p.tid": "654a694400000000",
"component": "wsgi",
"result_class": "listiterator"
Expand Down
Loading
Loading