Skip to content

Commit

Permalink
feat: support running docker runtime stresstest in CI (#6100)
Browse files Browse the repository at this point in the history
Co-authored-by: Boxuan Li <[email protected]>
  • Loading branch information
xingyaoww and li-boxuan authored Jan 7, 2025
1 parent affbc49 commit 77aa843
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 16 deletions.
5 changes: 5 additions & 0 deletions openhands/core/config/sandbox_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ class SandboxConfig:
remote_runtime_resource_factor: Factor to scale the resource allocation for remote runtime.
Must be one of [1, 2, 4, 8]. Will only be used if the runtime is remote.
enable_gpu: Whether to enable GPU.
docker_runtime_kwargs: Additional keyword arguments to pass to the Docker runtime when running containers.
This should be a JSON string that will be parsed into a dictionary.
Example in config.toml:
docker_runtime_kwargs = '{"mem_limit": "4g", "cpu_quota": 100000}'
"""

remote_runtime_api_url: str = 'http://localhost:8000'
Expand All @@ -61,6 +65,7 @@ class SandboxConfig:
close_delay: int = 900
remote_runtime_resource_factor: int = 1
enable_gpu: bool = False
docker_runtime_kwargs: str | None = None

def defaults_to_dict(self) -> dict:
"""Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional."""
Expand Down
6 changes: 2 additions & 4 deletions openhands/runtime/impl/docker/docker_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,13 +267,11 @@ def _init_container(self):
environment=environment,
volumes=volumes,
device_requests=(
[docker.types.DeviceRequest(
capabilities=[['gpu']],
count=-1
)]
[docker.types.DeviceRequest(capabilities=[['gpu']], count=-1)]
if self.config.sandbox.enable_gpu
else None
),
**(self.config.sandbox.docker_runtime_kwargs or {}),
)
self.log('debug', f'Container started. Server url: {self.api_url}')
self.send_status_message('STATUS$CONTAINER_STARTED')
Expand Down
2 changes: 2 additions & 0 deletions tests/runtime/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ def _load_runtime(
use_workspace: bool | None = None,
force_rebuild_runtime: bool = False,
runtime_startup_env_vars: dict[str, str] | None = None,
docker_runtime_kwargs: dict[str, str] | None = None,
) -> Runtime:
sid = 'rt_' + str(random.randint(100000, 999999))

Expand All @@ -226,6 +227,7 @@ def _load_runtime(
config.run_as_openhands = run_as_openhands
config.sandbox.force_rebuild_runtime = force_rebuild_runtime
config.sandbox.keep_runtime_alive = False
config.sandbox.docker_runtime_kwargs = docker_runtime_kwargs
# Folder where all tests create their own folder
global test_mount_path
if use_workspace:
Expand Down
25 changes: 13 additions & 12 deletions tests/runtime/test_stress_docker_runtime.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
"""Stress tests for the DockerRuntime, which connects to the ActionExecutor running in the sandbox."""

import pytest
from conftest import TEST_IN_CI, _close_test_runtime, _load_runtime
from conftest import _close_test_runtime, _load_runtime

from openhands.core.logger import openhands_logger as logger
from openhands.events.action import CmdRunAction


@pytest.mark.skipif(
TEST_IN_CI,
reason='This test should only be run locally, not in CI.',
)
def test_stress_docker_runtime(temp_dir, runtime_cls, repeat=1):
runtime = _load_runtime(temp_dir, runtime_cls)
runtime = _load_runtime(
temp_dir,
runtime_cls,
docker_runtime_kwargs={
'cpu_period': 100000, # 100ms
'cpu_quota': 100000, # Can use 100ms out of each 100ms period (1 CPU)
'mem_limit': '4G', # 4 GB of memory
},
)

action = CmdRunAction(
command='sudo apt-get update && sudo apt-get install -y stress-ng'
Expand All @@ -23,11 +26,9 @@ def test_stress_docker_runtime(temp_dir, runtime_cls, repeat=1):
assert obs.exit_code == 0

for _ in range(repeat):
# run stress-ng stress tests for 5 minutes
# FIXME: this would make Docker daemon die, even though running this
# command on its own in the same container is fine
action = CmdRunAction(command='stress-ng --all 1 -t 5m')
action.timeout = 600
# run stress-ng stress tests for 1 minute
action = CmdRunAction(command='stress-ng --all 1 -t 1m')
action.timeout = 120
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
Expand Down

0 comments on commit 77aa843

Please sign in to comment.