Skip to content

Commit

Permalink
Add test and fix incorrect assumption that project has a single worki…
Browse files Browse the repository at this point in the history
…ng dir. (#303)

* Add failing test and fix incorrect assumption that project
has a single working dir.

* ruff and mypy fixes

* correct logic in mockllm

* more mypy

* Moved tool_call_utils into test_helpers package; asserted on tool call read results

* remove working_dir from ComposeProject
  • Loading branch information
art-dsit authored Aug 30, 2024
1 parent 442f5a9 commit c019f1b
Show file tree
Hide file tree
Showing 10 changed files with 231 additions and 33 deletions.
16 changes: 10 additions & 6 deletions src/inspect_ai/model/_providers/mockllm.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Iterable, Iterator
from typing import Any, Generator, Iterable, Iterator

from inspect_ai.tool import ToolChoice, ToolInfo

Expand Down Expand Up @@ -27,19 +27,21 @@ def __init__(
base_url: str | None = None,
api_key: str | None = None,
config: GenerateConfig = GenerateConfig(),
custom_outputs: Iterable[ModelOutput] = [],
custom_outputs: Iterable[ModelOutput]
| Generator[ModelOutput, None, None]
| None = None,
**model_args: dict[str, Any],
) -> None:
super().__init__(model_name, base_url, api_key, [], config)
self.model_args = model_args
if model_name != "model":
raise ValueError(f"Invalid model name: {model_name}")
if custom_outputs:
if custom_outputs is not None:
# We cannot rely on the user of this model giving custom_outputs the correct type since they do not call this constructor
# Hence this type check and the one in generate.
if not isinstance(custom_outputs, Iterable):
if not isinstance(custom_outputs, Iterable | Generator):
raise ValueError(
f"model_args['custom_outputs'] must be an Iterable, got {custom_outputs}"
f"model_args['custom_outputs'] must be an Iterable or a Generator, got {custom_outputs}"
)
self.outputs = iter(custom_outputs)
else:
Expand All @@ -65,5 +67,7 @@ async def generate(
raise ValueError("custom_outputs ran out of values")

if not isinstance(output, ModelOutput):
raise ValueError("output must be an instance of ModelOutput")
raise ValueError(
f"output must be an instance of ModelOutput; got {type(output)}; content: {repr(output)}"
)
return output
32 changes: 16 additions & 16 deletions src/inspect_ai/util/_sandbox/docker/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,10 +117,10 @@ async def sample_init(
environments: dict[str, SandboxEnvironment] = {}
for service, service_info in services.items():
# update the project w/ the working directory
project.working_dir = await container_working_dir(service, project)
working_dir = await container_working_dir(service, project)

# create the docker sandbox environemnt
docker_env = DockerSandboxEnvironment(service, project)
docker_env = DockerSandboxEnvironment(service, project, working_dir)

# save reference to environment (mark as default if requested)
is_default = service_info.get("x-default", False) is True
Expand Down Expand Up @@ -166,10 +166,11 @@ async def task_cleanup(
async def cli_cleanup(cls, id: str | None) -> None:
await cli_cleanup(id)

def __init__(self, service: str, project: ComposeProject) -> None:
def __init__(self, service: str, project: ComposeProject, working_dir: str) -> None:
super().__init__()
self._service = service
self._project = project
self._working_dir = working_dir

@override
async def exec(
Expand All @@ -184,9 +185,9 @@ async def exec(
# additional args
args = []

final_cwd = Path(self._project.working_dir if cwd is None else cwd)
final_cwd = Path(self._working_dir if cwd is None else cwd)
if not final_cwd.is_absolute():
final_cwd = self._project.working_dir / final_cwd
final_cwd = self._working_dir / final_cwd

args.append("--workdir")
args.append(str(final_cwd))
Expand Down Expand Up @@ -214,7 +215,7 @@ async def write_file(self, file: str, contents: str | bytes) -> None:
sandbox_log(f"write_file: {file}")

# resolve relative file paths
file = container_file(self._project, file)
file = self.container_file(file)

# ensure that the directory exists
parent = Path(file).parent.as_posix()
Expand All @@ -224,7 +225,7 @@ async def write_file(self, file: str, contents: str | bytes) -> None:
if "permission denied" in result.stderr.lower():
raise PermissionError(errno.EACCES, "Permission denied.", parent)
else:
msg = f"Failed to create container directory {parent}: {result.stderr}"
msg = f"Failed to create container directory {parent}: {result}"
raise RuntimeError(msg)

# We want to be able to write a file in the container,
Expand Down Expand Up @@ -259,7 +260,7 @@ async def write_file(self, file: str, contents: str | bytes) -> None:
# compose cp will leave the file owned by root
await compose_cp(
src=local_tmpfile.name,
dest=f"{self._service}:{container_file(self._project,container_tmpfile)}",
dest=f"{self._service}:{self.container_file(container_tmpfile)}",
project=self._project,
)

Expand Down Expand Up @@ -316,7 +317,7 @@ async def read_file(self, file: str, text: bool = True) -> Union[str, bytes]:
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as temp_dir:
# resolve relative file paths
original_file = file
file = container_file(self._project, file)
file = self.container_file(file)

# copy the file
dest_file = os.path.join(temp_dir, os.path.basename(file))
Expand Down Expand Up @@ -353,6 +354,12 @@ async def read_file(self, file: str, text: bool = True) -> Union[str, bytes]:
async with aiofiles.open(dest_file, "rb") as f:
return await f.read()

def container_file(self, file: str) -> str:
path = Path(file)
if not path.is_absolute():
path = Path(self._working_dir) / path
return path.as_posix()


async def container_working_dir(
service: str, project: ComposeProject, default: str = "/"
Expand All @@ -366,10 +373,3 @@ async def container_working_dir(
+ f"{result.stderr}"
)
return default


def container_file(project: ComposeProject, file: str) -> str:
path = Path(file)
if not path.is_absolute():
path = Path(project.working_dir) / path
return path.as_posix()
11 changes: 2 additions & 9 deletions src/inspect_ai/util/_sandbox/docker/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,10 @@ class ComposeProject:
name: str
config: str | None
env: dict[str, str] | None
working_dir: str

@classmethod
async def create(
cls,
name: str,
config: str | None,
env: dict[str, str] = {},
working_dir: str = "/",
cls, name: str, config: str | None, env: dict[str, str] = {}
) -> "ComposeProject":
# ensure we have an auto-compose file if we need one
config = (
Expand All @@ -36,19 +31,17 @@ async def create(
await ensure_auto_compose_file(config)

# return project
return ComposeProject(name, config, env, working_dir)
return ComposeProject(name, config, env)

def __init__(
self,
name: str,
config: str | None,
env: dict[str, str],
working_dir: str,
) -> None:
self.name = name
self.config = config
self.env = env
self.working_dir = working_dir

def __eq__(self, other: object) -> bool:
if not isinstance(other, ComposeProject):
Expand Down
2 changes: 2 additions & 0 deletions src/inspect_ai/util/_sandbox/self_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,8 @@ async def test_cwd_absolute(sandbox_env: SandboxEnvironment) -> None:
await _cleanup_file(sandbox_env, file_name)


# TODO: write a test for when cwd doesn't exist

# Generic type variable for exceptions
E = TypeVar("E", bound=BaseException)

Expand Down
10 changes: 10 additions & 0 deletions tests/model/test_mock_model_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,16 @@ async def test_mock_generate_custom_invalid_iterable_string() -> None:
assert "must be an instance of ModelOutput" in str(e_info.value)


@pytest.mark.asyncio
async def test_mock_generate_custom_invalid_iterable_number() -> None:
with pytest.raises(ValueError) as e_info:
get_model(
"mockllm/model",
custom_outputs=0,
)
assert "must be an Iterable or a Generator" in str(e_info.value)


@pytest.mark.asyncio
async def test_mock_generate_not_enough() -> None:
model = get_model(
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion tests/tools/test_sandbox_tool_eval.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import itertools

from test_helpers.tool_call_utils import get_tool_calls, get_tool_response
from test_helpers.tools import list_files, read_file
from tool_call_utils import get_tool_calls, get_tool_response

from inspect_ai import Task, eval
from inspect_ai.dataset import Sample
Expand Down
6 changes: 5 additions & 1 deletion tests/tools/test_tools.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
from random import randint
from typing import Literal

from test_helpers.tool_call_utils import (
get_tool_call,
get_tool_calls,
get_tool_response,
)
from test_helpers.tools import addition, raise_error, read_file
from test_helpers.utils import (
skip_if_no_anthropic,
Expand All @@ -10,7 +15,6 @@
skip_if_no_openai,
skip_if_no_vertex,
)
from tool_call_utils import get_tool_call, get_tool_calls, get_tool_response

from inspect_ai import Task, eval
from inspect_ai.dataset import Sample
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
services:
default:
image: "python:3.12-bookworm"
command: "tail -f /dev/null"
init: true
network_mode: none
stop_grace_period: 1s
service_1:
image: "python:3.12-bookworm"
command: "tail -f /dev/null"
working_dir: /usr/local/dir1
init: true
network_mode: none
stop_grace_period: 1s
service_2:
image: "python:3.12-bookworm"
command: "tail -f /dev/null"
working_dir: /usr/local/dir2
init: true
network_mode: none
stop_grace_period: 1s
Loading

0 comments on commit c019f1b

Please sign in to comment.