diff --git a/README.md b/README.md index ba1c7ad..88f9408 100644 --- a/README.md +++ b/README.md @@ -41,11 +41,19 @@ $ python3 pyheap_dump.pyz -h ``` for additional options. -#### Containers and Namespaces +### Containers and Namespaces PyHeap can attach to targets that are running in Linux namespaces. Docker containers is the most common example of this situation. -To use PyHeap with a containerized/namespaced process, find the process ID in the host PID namespace. Normally Docker containers have their own PID namespace and the process is running under the PID 1. However, on the host they have another PID. Check the process list with `top` or other process listing tool or use `docker inspect` (in `State`). +**Note:** Some Docker setups doesn't have real processes running on the same (virtual) machine where `docker ...` control commands are executed. One example is WSL 2 + Docker Desktop on Windows. PyHeap doesn't work in such environments. + +If you want to use PyHeap on the root process in a Docker container, use `--docker-container` instead of `--pid/-p` and specify the name or ID: + +```bash +$ sudo python3 pyheap_dump.pyz --docker-container --file heap.pyheap +``` + +If it's not the root process in the container, or you work with another container system (e.g. systemd-nspawn) or just generic Linux namespaces, you need to find the target PID. Please mind that this must be the PID from the dumper point of view: processes in namespaces can have their own PID numbers. For example, if you're about to run the dumper on a Linux host and the target process is running in a container, check the process list with `ps` or `top` on the host. Use `--pid/-p` for the dumper. Make sure the GDB executable is available in the target mount namespace. If the target is in a Docker container, you most likely need to install GDB inside it (please note this can be done in a running container as well). diff --git a/integration_tests/manual_test_e2e.py b/integration_tests/manual_test_e2e.py index 10ffc83..c4f93b4 100644 --- a/integration_tests/manual_test_e2e.py +++ b/integration_tests/manual_test_e2e.py @@ -20,7 +20,7 @@ import sys import time from contextlib import contextmanager, closing -from typing import Iterator +from typing import Iterator, Union import pytest from _pytest.tmpdir import TempPathFactory from pyheap_ui.heap_reader import HeapReader @@ -28,10 +28,10 @@ @pytest.mark.parametrize("docker", [False, True]) def test_e2e(docker: bool, test_heap_path: str) -> None: - with _inferior_process(docker) as ip_pid, _dumper_process( - test_heap_path, ip_pid, sudo_required=docker + with _inferior_process(docker) as ip_pid_or_container, _dumper_process( + test_heap_path, ip_pid_or_container, docker ) as dp: - print(f"Inferior process {ip_pid}") + print(f"Inferior process/container {ip_pid_or_container}") print(f"Dumper process {dp.pid}") dp.wait(10) assert dp.returncode == 0 @@ -72,7 +72,7 @@ def _inferior_process_plain() -> Iterator[int]: @contextmanager -def _inferior_process_docker() -> Iterator[int]: +def _inferior_process_docker() -> Iterator[str]: python_version = f"{sys.version_info.major}.{sys.version_info.minor}" docker_proc = subprocess.run( [ @@ -92,25 +92,8 @@ def _inferior_process_docker() -> Iterator[int]: container_id = docker_proc.stdout.decode("utf-8").strip() - docker_proc = subprocess.run( - ["docker", "inspect", container_id], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) - if docker_proc.returncode != 0: - print(docker_proc.stdout.decode("utf-8")) - print(docker_proc.stderr.decode("utf-8")) - assert docker_proc.returncode == 0 - - inspect_output = json.loads(docker_proc.stdout.decode("utf-8")) - state = inspect_output[0]["State"] - assert state["Status"] == "running" - assert state["Running"] is True - pid = state["Pid"] - time.sleep(1) - try: - yield pid + yield container_id finally: subprocess.run( ["docker", "kill", container_id], @@ -120,7 +103,7 @@ def _inferior_process_docker() -> Iterator[int]: @contextmanager -def _inferior_process(docker: bool) -> Iterator[int]: +def _inferior_process(docker: bool) -> Iterator[Union[int, str]]: if docker: with _inferior_process_docker() as r: yield r @@ -131,19 +114,20 @@ def _inferior_process(docker: bool) -> Iterator[int]: @contextmanager def _dumper_process( - test_heap_path: str, inferiod_pid: int, sudo_required: bool + test_heap_path: str, pid_or_container: Union[int, str], docker: bool ) -> Iterator[subprocess.Popen]: + sudo_required = docker cmd = [] if sudo_required: cmd = ["sudo"] - cmd += [ - sys.executable, - "dist/pyheap_dump.pyz", - "--pid", - str(inferiod_pid), - "--file", - test_heap_path, - ] + cmd += [sys.executable, "dist/pyheap_dump.pyz"] + + if docker: + cmd += ["--docker-container", str(pid_or_container)] + else: + cmd += ["--pid", str(pid_or_container)] + cmd += ["--file", test_heap_path] + dumper_proc = subprocess.Popen( cmd, stdout=subprocess.PIPE, diff --git a/pyheap/src/pyheap_dump.py b/pyheap/src/pyheap_dump.py index f836750..f7e441d 100644 --- a/pyheap/src/pyheap_dump.py +++ b/pyheap/src/pyheap_dump.py @@ -19,6 +19,7 @@ import json import os.path import shutil +import subprocess import uuid from contextlib import closing from dataclasses import dataclass @@ -31,18 +32,26 @@ def dump_heap(args: argparse.Namespace) -> None: + target_pid: int gdb_pid: int nsenter_needed: bool - if _pid_namespace(args.pid) == _pid_namespace(os.getpid()): + if args.docker_container is not None: + print("Target is Docker container") + target_pid = _get_container_pid(args.docker_container) + gdb_pid = 1 + nsenter_needed = True + elif _pid_namespace(args.pid) == _pid_namespace(os.getpid()): print("Dumper and target are in same PID namespace") - gdb_pid = args.pid + target_pid = args.pid + gdb_pid = target_pid nsenter_needed = False else: + target_pid = args.pid gdb_pid = _target_pid_in_own_namespace(args.pid) print(f"Target process PID in its namespace: {gdb_pid}") nsenter_needed = True - print(f"Dumping heap from process {args.pid} into {args.file}") + print(f"Dumping heap from process {target_pid} into {args.file}") print(f"Max length of string representation is {args.str_repr_len}") injector_code = _load_code("injector.py") @@ -51,14 +60,14 @@ def dump_heap(args: argparse.Namespace) -> None: tmp_dir: CrossNamespaceTmpDir progress_file: CrossNamespaceFile heap_file: CrossNamespaceFile - with closing(CrossNamespaceTmpDir(args.pid)) as tmp_dir, closing( + with closing(CrossNamespaceTmpDir(target_pid)) as tmp_dir, closing( tmp_dir.create_file("progress.json", 0o600) ) as progress_file, closing( tmp_dir.create_file(f"{uuid.uuid4()}.pyheap", 0o600) ) as heap_file: cmd = [] if nsenter_needed: - cmd += ["nsenter", "-t", str(args.pid), "-a"] + cmd += ["nsenter", "-t", str(target_pid), "-a"] cmd += [ "gdb", "--readnow", @@ -100,6 +109,39 @@ def dump_heap(args: argparse.Namespace) -> None: exit(p.returncode) +def _get_container_pid(container: str) -> int: + proc = subprocess.run( + ["docker", "inspect", container], + text=True, + encoding="utf-8", + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + if proc.returncode != 0: + print("Cannot determine target PID:") + print(f"`docker inspect {container}` returned:") + print(proc.stderr) + exit(1) + + inspect_obj = json.loads(proc.stdout) + if len(inspect_obj) != 1: + print("Cannot determine target PID:") + print( + f"Expected 1 object in `docker inspect {container}`, but got {len(inspect_obj)}" + ) + exit(1) + + state = inspect_obj[0]["State"] + if state["Status"] != "running": + print("Cannot determine target PID:") + print(f"Container is not running") + exit(1) + + pid = int(state["Pid"]) + print(f"Target PID: {pid}") + return pid + + def _pid_namespace(pid: Union[int, str]) -> str: try: return os.readlink(f"/proc/{pid}/ns/pid") @@ -275,9 +317,13 @@ def _move_heap_file(heap_file: CrossNamespaceFile, final_path: str) -> None: def main() -> None: parser = argparse.ArgumentParser(description="Dump heap.", allow_abbrev=False) - parser.add_argument( - "--pid", "-p", type=int, required=True, help="target process PID" + + target_group = parser.add_mutually_exclusive_group(required=True) + target_group.add_argument("--pid", "-p", type=int, help="target process PID") + target_group.add_argument( + "--docker-container", type=str, help="target Docker container" ) + parser.add_argument("--file", "-f", type=str, required=True, help="heap file name") parser.add_argument( "--str-repr-len",