Add test and fix incorrect assumption that project has a single worki…

…ng dir. (#303) * Add failing test and fix incorrect assumption that project has a single working dir. * ruff and mypy fixes * correct logic in mockllm * more mypy * Moved tool_call_utils into test_helpers package; asserted on tool call read results * remove working_dir from ComposeProject
UKGovernmentBEIS · Aug 30, 2024 · c019f1b · c019f1b
1 parent 442f5a9
commit c019f1b
Show file tree

Hide file tree

Showing 10 changed files with 231 additions and 33 deletions.
diff --git a/src/inspect_ai/model/_providers/mockllm.py b/src/inspect_ai/model/_providers/mockllm.py
@@ -1,4 +1,4 @@
-from typing import Any, Iterable, Iterator
+from typing import Any, Generator, Iterable, Iterator
 
 from inspect_ai.tool import ToolChoice, ToolInfo
 
@@ -27,19 +27,21 @@ def __init__(
         base_url: str | None = None,
         api_key: str | None = None,
         config: GenerateConfig = GenerateConfig(),
-        custom_outputs: Iterable[ModelOutput] = [],
+        custom_outputs: Iterable[ModelOutput]
+        | Generator[ModelOutput, None, None]
+        | None = None,
         **model_args: dict[str, Any],
     ) -> None:
         super().__init__(model_name, base_url, api_key, [], config)
         self.model_args = model_args
         if model_name != "model":
             raise ValueError(f"Invalid model name: {model_name}")
-        if custom_outputs:
+        if custom_outputs is not None:
             # We cannot rely on the user of this model giving custom_outputs the correct type since they do not call this constructor
             # Hence this type check and the one in generate.
-            if not isinstance(custom_outputs, Iterable):
+            if not isinstance(custom_outputs, Iterable | Generator):
                 raise ValueError(
-                    f"model_args['custom_outputs'] must be an Iterable, got {custom_outputs}"
+                    f"model_args['custom_outputs'] must be an Iterable or a Generator, got {custom_outputs}"
                 )
             self.outputs = iter(custom_outputs)
         else:
@@ -65,5 +67,7 @@ async def generate(
             raise ValueError("custom_outputs ran out of values")
 
         if not isinstance(output, ModelOutput):
-            raise ValueError("output must be an instance of ModelOutput")
+            raise ValueError(
+                f"output must be an instance of ModelOutput; got {type(output)}; content: {repr(output)}"
+            )
         return output
diff --git a/src/inspect_ai/util/_sandbox/docker/docker.py b/src/inspect_ai/util/_sandbox/docker/docker.py
@@ -117,10 +117,10 @@ async def sample_init(
         environments: dict[str, SandboxEnvironment] = {}
         for service, service_info in services.items():
             # update the project w/ the working directory
-            project.working_dir = await container_working_dir(service, project)
+            working_dir = await container_working_dir(service, project)
 
             # create the docker sandbox environemnt
-            docker_env = DockerSandboxEnvironment(service, project)
+            docker_env = DockerSandboxEnvironment(service, project, working_dir)
 
             # save reference to environment (mark as default if requested)
             is_default = service_info.get("x-default", False) is True
@@ -166,10 +166,11 @@ async def task_cleanup(
     async def cli_cleanup(cls, id: str | None) -> None:
         await cli_cleanup(id)
 
-    def __init__(self, service: str, project: ComposeProject) -> None:
+    def __init__(self, service: str, project: ComposeProject, working_dir: str) -> None:
         super().__init__()
         self._service = service
         self._project = project
+        self._working_dir = working_dir
 
     @override
     async def exec(
@@ -184,9 +185,9 @@ async def exec(
         # additional args
         args = []
 
-        final_cwd = Path(self._project.working_dir if cwd is None else cwd)
+        final_cwd = Path(self._working_dir if cwd is None else cwd)
         if not final_cwd.is_absolute():
-            final_cwd = self._project.working_dir / final_cwd
+            final_cwd = self._working_dir / final_cwd
 
         args.append("--workdir")
         args.append(str(final_cwd))
@@ -214,7 +215,7 @@ async def write_file(self, file: str, contents: str | bytes) -> None:
         sandbox_log(f"write_file: {file}")
 
         # resolve relative file paths
-        file = container_file(self._project, file)
+        file = self.container_file(file)
 
         # ensure that the directory exists
         parent = Path(file).parent.as_posix()
@@ -224,7 +225,7 @@ async def write_file(self, file: str, contents: str | bytes) -> None:
                 if "permission denied" in result.stderr.lower():
                     raise PermissionError(errno.EACCES, "Permission denied.", parent)
                 else:
-                    msg = f"Failed to create container directory {parent}: {result.stderr}"
+                    msg = f"Failed to create container directory {parent}: {result}"
                     raise RuntimeError(msg)
 
         # We want to be able to write a file in the container,
@@ -259,7 +260,7 @@ async def write_file(self, file: str, contents: str | bytes) -> None:
         # compose cp will leave the file owned by root
         await compose_cp(
             src=local_tmpfile.name,
-            dest=f"{self._service}:{container_file(self._project,container_tmpfile)}",
+            dest=f"{self._service}:{self.container_file(container_tmpfile)}",
             project=self._project,
         )
 
@@ -316,7 +317,7 @@ async def read_file(self, file: str, text: bool = True) -> Union[str, bytes]:
         with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as temp_dir:
             # resolve relative file paths
             original_file = file
-            file = container_file(self._project, file)
+            file = self.container_file(file)
 
             # copy the file
             dest_file = os.path.join(temp_dir, os.path.basename(file))
@@ -353,6 +354,12 @@ async def read_file(self, file: str, text: bool = True) -> Union[str, bytes]:
                 async with aiofiles.open(dest_file, "rb") as f:
                     return await f.read()
 
+    def container_file(self, file: str) -> str:
+        path = Path(file)
+        if not path.is_absolute():
+            path = Path(self._working_dir) / path
+        return path.as_posix()
+
 
 async def container_working_dir(
     service: str, project: ComposeProject, default: str = "/"
@@ -366,10 +373,3 @@ async def container_working_dir(
             + f"{result.stderr}"
         )
         return default
-
-
-def container_file(project: ComposeProject, file: str) -> str:
-    path = Path(file)
-    if not path.is_absolute():
-        path = Path(project.working_dir) / path
-    return path.as_posix()
diff --git a/src/inspect_ai/util/_sandbox/docker/util.py b/src/inspect_ai/util/_sandbox/docker/util.py
@@ -17,15 +17,10 @@ class ComposeProject:
     name: str
     config: str | None
     env: dict[str, str] | None
-    working_dir: str
 
     @classmethod
     async def create(
-        cls,
-        name: str,
-        config: str | None,
-        env: dict[str, str] = {},
-        working_dir: str = "/",
+        cls, name: str, config: str | None, env: dict[str, str] = {}
     ) -> "ComposeProject":
         # ensure we have an auto-compose file if we need one
         config = (
@@ -36,19 +31,17 @@ async def create(
         await ensure_auto_compose_file(config)
 
         # return project
-        return ComposeProject(name, config, env, working_dir)
+        return ComposeProject(name, config, env)
 
     def __init__(
         self,
         name: str,
         config: str | None,
         env: dict[str, str],
-        working_dir: str,
     ) -> None:
         self.name = name
         self.config = config
         self.env = env
-        self.working_dir = working_dir
 
     def __eq__(self, other: object) -> bool:
         if not isinstance(other, ComposeProject):

diff --git a/src/inspect_ai/util/_sandbox/self_check.py b/src/inspect_ai/util/_sandbox/self_check.py
@@ -205,6 +205,8 @@ async def test_cwd_absolute(sandbox_env: SandboxEnvironment) -> None:
     await _cleanup_file(sandbox_env, file_name)
 
 
+# TODO: write a test for when cwd doesn't exist
+
 # Generic type variable for exceptions
 E = TypeVar("E", bound=BaseException)
 

diff --git a/tests/model/test_mock_model_llm.py b/tests/model/test_mock_model_llm.py
@@ -72,6 +72,16 @@ async def test_mock_generate_custom_invalid_iterable_string() -> None:
     assert "must be an instance of ModelOutput" in str(e_info.value)
 
 
+@pytest.mark.asyncio
+async def test_mock_generate_custom_invalid_iterable_number() -> None:
+    with pytest.raises(ValueError) as e_info:
+        get_model(
+            "mockllm/model",
+            custom_outputs=0,
+        )
+    assert "must be an Iterable or a Generator" in str(e_info.value)
+
+
 @pytest.mark.asyncio
 async def test_mock_generate_not_enough() -> None:
     model = get_model(

diff --git a/tests/tools/tool_call_utils.py → tests/test_helpers/tool_call_utils.py b/tests/tools/tool_call_utils.py → tests/test_helpers/tool_call_utils.py
diff --git a/tests/tools/test_sandbox_tool_eval.py b/tests/tools/test_sandbox_tool_eval.py
@@ -1,7 +1,7 @@
 import itertools
 
+from test_helpers.tool_call_utils import get_tool_calls, get_tool_response
 from test_helpers.tools import list_files, read_file
-from tool_call_utils import get_tool_calls, get_tool_response
 
 from inspect_ai import Task, eval
 from inspect_ai.dataset import Sample

diff --git a/tests/tools/test_tools.py b/tests/tools/test_tools.py
@@ -1,6 +1,11 @@
 from random import randint
 from typing import Literal
 
+from test_helpers.tool_call_utils import (
+    get_tool_call,
+    get_tool_calls,
+    get_tool_response,
+)
 from test_helpers.tools import addition, raise_error, read_file
 from test_helpers.utils import (
     skip_if_no_anthropic,
@@ -10,7 +15,6 @@
     skip_if_no_openai,
     skip_if_no_vertex,
 )
-from tool_call_utils import get_tool_call, get_tool_calls, get_tool_response
 
 from inspect_ai import Task, eval
 from inspect_ai.dataset import Sample

diff --git a/tests/util/sandbox/docker_compose_multiple_services/docker-compose.yaml b/tests/util/sandbox/docker_compose_multiple_services/docker-compose.yaml
@@ -0,0 +1,21 @@
+services:
+  default:
+    image: "python:3.12-bookworm"
+    command: "tail -f /dev/null"
+    init: true
+    network_mode: none
+    stop_grace_period: 1s
+  service_1:
+    image: "python:3.12-bookworm"
+    command: "tail -f /dev/null"
+    working_dir: /usr/local/dir1
+    init: true
+    network_mode: none
+    stop_grace_period: 1s
+  service_2:
+    image: "python:3.12-bookworm"
+    command: "tail -f /dev/null"
+    working_dir: /usr/local/dir2
+    init: true
+    network_mode: none
+    stop_grace_period: 1s