Skip to content

Commit

Permalink
dumper: support Docker containers as target
Browse files Browse the repository at this point in the history
Closes #169
  • Loading branch information
ivanyu committed Nov 23, 2022
1 parent a0a1b46 commit d85451a
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 42 deletions.
12 changes: 10 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,19 @@ $ python3 pyheap_dump.pyz -h
```
for additional options.

#### Containers and Namespaces
### Containers and Namespaces

PyHeap can attach to targets that are running in Linux namespaces. Docker containers is the most common example of this situation.

To use PyHeap with a containerized/namespaced process, find the process ID in the host PID namespace. Normally Docker containers have their own PID namespace and the process is running under the PID 1. However, on the host they have another PID. Check the process list with `top` or other process listing tool or use `docker inspect` (in `State`).
**Note:** Some Docker setups doesn't have real processes running on the same (virtual) machine where `docker ...` control commands are executed. One example is WSL 2 + Docker Desktop on Windows. PyHeap doesn't work in such environments.

If you want to use PyHeap on the root process in a Docker container, use `--docker-container` instead of `--pid/-p` and specify the name or ID:

```bash
$ sudo python3 pyheap_dump.pyz --docker-container <container_name> --file heap.pyheap
```

If it's not the root process in the container, or you work with another container system (e.g. systemd-nspawn) or just generic Linux namespaces, you need to find the target PID. Please mind that this must be the PID from the dumper point of view: processes in namespaces can have their own PID numbers. For example, if you're about to run the dumper on a Linux host and the target process is running in a container, check the process list with `ps` or `top` on the host. Use `--pid/-p` for the dumper.

Make sure the GDB executable is available in the target mount namespace. If the target is in a Docker container, you most likely need to install GDB inside it (please note this can be done in a running container as well).

Expand Down
50 changes: 17 additions & 33 deletions integration_tests/manual_test_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,18 @@
import sys
import time
from contextlib import contextmanager, closing
from typing import Iterator
from typing import Iterator, Union
import pytest
from _pytest.tmpdir import TempPathFactory
from pyheap_ui.heap_reader import HeapReader


@pytest.mark.parametrize("docker", [False, True])
def test_e2e(docker: bool, test_heap_path: str) -> None:
with _inferior_process(docker) as ip_pid, _dumper_process(
test_heap_path, ip_pid, sudo_required=docker
with _inferior_process(docker) as ip_pid_or_container, _dumper_process(
test_heap_path, ip_pid_or_container, docker
) as dp:
print(f"Inferior process {ip_pid}")
print(f"Inferior process/container {ip_pid_or_container}")
print(f"Dumper process {dp.pid}")
dp.wait(10)
assert dp.returncode == 0
Expand Down Expand Up @@ -72,7 +72,7 @@ def _inferior_process_plain() -> Iterator[int]:


@contextmanager
def _inferior_process_docker() -> Iterator[int]:
def _inferior_process_docker() -> Iterator[str]:
python_version = f"{sys.version_info.major}.{sys.version_info.minor}"
docker_proc = subprocess.run(
[
Expand All @@ -92,25 +92,8 @@ def _inferior_process_docker() -> Iterator[int]:

container_id = docker_proc.stdout.decode("utf-8").strip()

docker_proc = subprocess.run(
["docker", "inspect", container_id],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
if docker_proc.returncode != 0:
print(docker_proc.stdout.decode("utf-8"))
print(docker_proc.stderr.decode("utf-8"))
assert docker_proc.returncode == 0

inspect_output = json.loads(docker_proc.stdout.decode("utf-8"))
state = inspect_output[0]["State"]
assert state["Status"] == "running"
assert state["Running"] is True
pid = state["Pid"]
time.sleep(1)

try:
yield pid
yield container_id
finally:
subprocess.run(
["docker", "kill", container_id],
Expand All @@ -120,7 +103,7 @@ def _inferior_process_docker() -> Iterator[int]:


@contextmanager
def _inferior_process(docker: bool) -> Iterator[int]:
def _inferior_process(docker: bool) -> Iterator[Union[int, str]]:
if docker:
with _inferior_process_docker() as r:
yield r
Expand All @@ -131,19 +114,20 @@ def _inferior_process(docker: bool) -> Iterator[int]:

@contextmanager
def _dumper_process(
test_heap_path: str, inferiod_pid: int, sudo_required: bool
test_heap_path: str, pid_or_container: Union[int, str], docker: bool
) -> Iterator[subprocess.Popen]:
sudo_required = docker
cmd = []
if sudo_required:
cmd = ["sudo"]
cmd += [
sys.executable,
"dist/pyheap_dump.pyz",
"--pid",
str(inferiod_pid),
"--file",
test_heap_path,
]
cmd += [sys.executable, "dist/pyheap_dump.pyz"]

if docker:
cmd += ["--docker-container", str(pid_or_container)]
else:
cmd += ["--pid", str(pid_or_container)]
cmd += ["--file", test_heap_path]

dumper_proc = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
Expand Down
60 changes: 53 additions & 7 deletions pyheap/src/pyheap_dump.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import json
import os.path
import shutil
import subprocess
import uuid
from contextlib import closing
from dataclasses import dataclass
Expand All @@ -31,18 +32,26 @@


def dump_heap(args: argparse.Namespace) -> None:
target_pid: int
gdb_pid: int
nsenter_needed: bool
if _pid_namespace(args.pid) == _pid_namespace(os.getpid()):
if args.docker_container is not None:
print("Target is Docker container")
target_pid = _get_container_pid(args.docker_container)
gdb_pid = 1
nsenter_needed = True
elif _pid_namespace(args.pid) == _pid_namespace(os.getpid()):
print("Dumper and target are in same PID namespace")
gdb_pid = args.pid
target_pid = args.pid
gdb_pid = target_pid
nsenter_needed = False
else:
target_pid = args.pid
gdb_pid = _target_pid_in_own_namespace(args.pid)
print(f"Target process PID in its namespace: {gdb_pid}")
nsenter_needed = True

print(f"Dumping heap from process {args.pid} into {args.file}")
print(f"Dumping heap from process {target_pid} into {args.file}")
print(f"Max length of string representation is {args.str_repr_len}")

injector_code = _load_code("injector.py")
Expand All @@ -51,14 +60,14 @@ def dump_heap(args: argparse.Namespace) -> None:
tmp_dir: CrossNamespaceTmpDir
progress_file: CrossNamespaceFile
heap_file: CrossNamespaceFile
with closing(CrossNamespaceTmpDir(args.pid)) as tmp_dir, closing(
with closing(CrossNamespaceTmpDir(target_pid)) as tmp_dir, closing(
tmp_dir.create_file("progress.json", 0o600)
) as progress_file, closing(
tmp_dir.create_file(f"{uuid.uuid4()}.pyheap", 0o600)
) as heap_file:
cmd = []
if nsenter_needed:
cmd += ["nsenter", "-t", str(args.pid), "-a"]
cmd += ["nsenter", "-t", str(target_pid), "-a"]
cmd += [
"gdb",
"--readnow",
Expand Down Expand Up @@ -100,6 +109,39 @@ def dump_heap(args: argparse.Namespace) -> None:
exit(p.returncode)


def _get_container_pid(container: str) -> int:
proc = subprocess.run(
["docker", "inspect", container],
text=True,
encoding="utf-8",
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
if proc.returncode != 0:
print("Cannot determine target PID:")
print(f"`docker inspect {container}` returned:")
print(proc.stderr)
exit(1)

inspect_obj = json.loads(proc.stdout)
if len(inspect_obj) != 1:
print("Cannot determine target PID:")
print(
f"Expected 1 object in `docker inspect {container}`, but got {len(inspect_obj)}"
)
exit(1)

state = inspect_obj[0]["State"]
if state["Status"] != "running":
print("Cannot determine target PID:")
print(f"Container is not running")
exit(1)

pid = int(state["Pid"])
print(f"Target PID: {pid}")
return pid


def _pid_namespace(pid: Union[int, str]) -> str:
try:
return os.readlink(f"/proc/{pid}/ns/pid")
Expand Down Expand Up @@ -275,9 +317,13 @@ def _move_heap_file(heap_file: CrossNamespaceFile, final_path: str) -> None:

def main() -> None:
parser = argparse.ArgumentParser(description="Dump heap.", allow_abbrev=False)
parser.add_argument(
"--pid", "-p", type=int, required=True, help="target process PID"

target_group = parser.add_mutually_exclusive_group(required=True)
target_group.add_argument("--pid", "-p", type=int, help="target process PID")
target_group.add_argument(
"--docker-container", type=str, help="target Docker container"
)

parser.add_argument("--file", "-f", type=str, required=True, help="heap file name")
parser.add_argument(
"--str-repr-len",
Expand Down

0 comments on commit d85451a

Please sign in to comment.