From 063b320fd6864656e5f2894f0fec6a94f8c89f2c Mon Sep 17 00:00:00 2001
From: aisi-inspect <166920645+aisi-inspect@users.noreply.github.com>
Date: Thu, 3 Oct 2024 14:02:29 +0000
Subject: [PATCH] Built site for gh-pages

---
 .nojekyll       |   2 +-
 agents-api.html |   3 +-
 agents.html     | 167 ++++++++++++++++++++++++------------------------
 eval-logs.html  |   2 +-
 index.html      |   2 +-
 log-viewer.html |   2 +-
 search.json     |  19 ++++--
 sitemap.xml     |   8 +--
 tools.html      |  97 +++++++++++++---------------
 tutorial.html   |  26 ++++----
 vscode.html     |   2 +-
 workflow.html   |   2 +-
 12 files changed, 168 insertions(+), 164 deletions(-)
diff --git a/.nojekyll b/.nojekyll
index f95ee5a4d..00bc099a0 100644
--- a/.nojekyll
+++ b/.nojekyll
@@ -1 +1 @@
-e5d55f6d
\ No newline at end of file
+fb91b253
\ No newline at end of file
diff --git a/agents-api.html b/agents-api.html
index b1f3f8cf8..eb1ef82a1 100644
--- a/agents-api.html
+++ b/agents-api.html
@@ -421,7 +421,7 @@ <h3 class="anchored" data-anchor-id="custom-loop">Custom Loop</h3>
 <span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> agent_loop():</span>
 <span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a>    <span class="cf">async</span> <span class="kw">def</span> solve(state: TaskState, generate: Generate):</span>
 <span id="cb5-4"><a href="#cb5-4" aria-hidden="true" tabindex="-1"></a>        model <span class="op">=</span> get_model()</span>
-<span id="cb5-5"><a href="#cb5-5" aria-hidden="true" tabindex="-1"></a>        <span class="cf">while</span> <span class="va">True</span>:</span>
+<span id="cb5-5"><a href="#cb5-5" aria-hidden="true" tabindex="-1"></a>        <span class="cf">while</span> <span class="kw">not</span> state.completed:</span>
 <span id="cb5-6"><a href="#cb5-6" aria-hidden="true" tabindex="-1"></a>            <span class="co"># call model</span></span>
 <span id="cb5-7"><a href="#cb5-7" aria-hidden="true" tabindex="-1"></a>            output <span class="op">=</span> <span class="cf">await</span> model.generate(state.messages, state.tools)</span>
 <span id="cb5-8"><a href="#cb5-8" aria-hidden="true" tabindex="-1"></a></span>
@@ -438,6 +438,7 @@ <h3 class="anchored" data-anchor-id="custom-loop">Custom Loop</h3>
 <span id="cb5-19"><a href="#cb5-19" aria-hidden="true" tabindex="-1"></a>        <span class="cf">return</span> state</span>
 <span id="cb5-20"><a href="#cb5-20" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb5-21"><a href="#cb5-21" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> solve</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>The <code>state.completed</code> flag is automatically set to <code>False</code> if <code>max_messages</code> for the task is exceeded, so we check it at the top of the loop.</p>
 <p>You can imagine several ways you might want to customise this loop:</p>
 <ol type="1">
 <li>Adding another termination condition for the output satisfying some criteria.</li>
diff --git a/agents.html b/agents.html
index df268cea4..3d843df44 100644
--- a/agents.html
+++ b/agents.html
@@ -516,7 +516,7 @@ <h2 class="anchored" data-anchor-id="sec-custom-scaffolding">Custom Scaffold</h2
 <span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> agent_loop():</span>
 <span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a>    <span class="cf">async</span> <span class="kw">def</span> solve(state: TaskState, generate: Generate):</span>
 <span id="cb2-4"><a href="#cb2-4" aria-hidden="true" tabindex="-1"></a>        model <span class="op">=</span> get_model()</span>
-<span id="cb2-5"><a href="#cb2-5" aria-hidden="true" tabindex="-1"></a>        <span class="cf">while</span> <span class="va">True</span>:</span>
+<span id="cb2-5"><a href="#cb2-5" aria-hidden="true" tabindex="-1"></a>        <span class="cf">while</span> <span class="kw">not</span> state.completed:</span>
 <span id="cb2-6"><a href="#cb2-6" aria-hidden="true" tabindex="-1"></a>            <span class="co"># call model</span></span>
 <span id="cb2-7"><a href="#cb2-7" aria-hidden="true" tabindex="-1"></a>            output <span class="op">=</span> <span class="cf">await</span> model.generate(state.messages, state.tools)</span>
 <span id="cb2-8"><a href="#cb2-8" aria-hidden="true" tabindex="-1"></a></span>
@@ -533,6 +533,7 @@ <h2 class="anchored" data-anchor-id="sec-custom-scaffolding">Custom Scaffold</h2
 <span id="cb2-19"><a href="#cb2-19" aria-hidden="true" tabindex="-1"></a>        <span class="cf">return</span> state</span>
 <span id="cb2-20"><a href="#cb2-20" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb2-21"><a href="#cb2-21" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> solve</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>The <code>state.completed</code> flag is automatically set to <code>False</code> if <code>max_messages</code> for the task is exceeded, so we check it at the top of the loop.</p>
 <p>You can imagine several ways you might want to customise this loop:</p>
 <ol type="1">
 <li>Adding another termination condition for the output satisfying some criteria.</li>
@@ -924,11 +925,7 @@ <h4 class="anchored" data-anchor-id="files">Files</h4>
 </section>
 <section id="script" class="level4">
 <h4 class="anchored" data-anchor-id="script">Script</h4>
-<p>If there is a Sample <code>setup</code> script it will be executed within the default sandbox environment after any Sample <code>files</code> are copied into the environment. The <code>setup</code> field can be either the script contents, a file path containing the script, or a base64 encoded <a href="https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs">Data URL</a>.</p>
-<p>The <code>setup</code> script is by default interpreted as a bash script, however you can have it executed by another interpreter using a shebang comment. For example, this will be executed as a Python script:</p>
-<div class="sourceCode" id="cb12"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="co">#!/usr/bin/env python3</span></span>
-<span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb12-3"><a href="#cb12-3" aria-hidden="true" tabindex="-1"></a><span class="ex">print</span><span class="er">(</span><span class="st">'hello from python'</span><span class="kw">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>If there is a Sample <code>setup</code> bash script it will be executed within the default sandbox environment after any Sample <code>files</code> are copied into the environment. The <code>setup</code> field can be either the script contents, a file path containing the script, or a base64 encoded <a href="https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs">Data URL</a>.</p>
 </section>
 </section>
 <section id="sec-docker-configuration" class="level3">
@@ -964,14 +961,14 @@ <h3 class="anchored" data-anchor-id="sec-docker-configuration">Docker Configurat
 <div class="code-with-filename-file">
 <pre><strong>compose.yaml</strong></pre>
 </div>
-<div class="sourceCode" id="cb13" data-filename="compose.yaml"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="fu">services</span><span class="kw">:</span></span>
-<span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">default</span><span class="kw">:</span><span class="at"> </span></span>
-<span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">build</span><span class="kw">:</span><span class="at"> .</span></span>
-<span id="cb13-4"><a href="#cb13-4" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">init</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb13-5"><a href="#cb13-5" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">command</span><span class="kw">:</span><span class="at"> tail -f /dev/null</span></span>
-<span id="cb13-6"><a href="#cb13-6" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">cpus</span><span class="kw">:</span><span class="at"> </span><span class="fl">1.0</span></span>
-<span id="cb13-7"><a href="#cb13-7" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">mem_limit</span><span class="kw">:</span><span class="at"> 0.5gb</span></span>
-<span id="cb13-8"><a href="#cb13-8" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">network_mode</span><span class="kw">:</span><span class="at"> none</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb12" data-filename="compose.yaml"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="fu">services</span><span class="kw">:</span></span>
+<span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">default</span><span class="kw">:</span><span class="at"> </span></span>
+<span id="cb12-3"><a href="#cb12-3" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">build</span><span class="kw">:</span><span class="at"> .</span></span>
+<span id="cb12-4"><a href="#cb12-4" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">init</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb12-5"><a href="#cb12-5" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">command</span><span class="kw">:</span><span class="at"> tail -f /dev/null</span></span>
+<span id="cb12-6"><a href="#cb12-6" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">cpus</span><span class="kw">:</span><span class="at"> </span><span class="fl">1.0</span></span>
+<span id="cb12-7"><a href="#cb12-7" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">mem_limit</span><span class="kw">:</span><span class="at"> 0.5gb</span></span>
+<span id="cb12-8"><a href="#cb12-8" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">network_mode</span><span class="kw">:</span><span class="at"> none</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 <p>The <code>init: true</code> entry enables the container to respond to shutdown requests. The <code>command</code> is provided to prevent the container from exiting after it starts.</p>
 <p>Here is what a simple <code>compose.yaml</code> would look like for a local pre-built image named <code>ctf-agent-environment</code> (resource and network limits excluded for brevity):</p>
@@ -979,34 +976,34 @@ <h3 class="anchored" data-anchor-id="sec-docker-configuration">Docker Configurat
 <div class="code-with-filename-file">
 <pre><strong>compose.yaml</strong></pre>
 </div>
-<div class="sourceCode" id="cb14" data-filename="compose.yaml"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="fu">services</span><span class="kw">:</span></span>
-<span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">default</span><span class="kw">:</span><span class="at"> </span></span>
-<span id="cb14-3"><a href="#cb14-3" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">image</span><span class="kw">:</span><span class="at"> ctf-agent-environment</span></span>
-<span id="cb14-4"><a href="#cb14-4" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">x-local</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb14-5"><a href="#cb14-5" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">init</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb14-6"><a href="#cb14-6" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">command</span><span class="kw">:</span><span class="at"> tail -f /dev/null</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb13" data-filename="compose.yaml"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="fu">services</span><span class="kw">:</span></span>
+<span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">default</span><span class="kw">:</span><span class="at"> </span></span>
+<span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">image</span><span class="kw">:</span><span class="at"> ctf-agent-environment</span></span>
+<span id="cb13-4"><a href="#cb13-4" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">x-local</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb13-5"><a href="#cb13-5" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">init</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb13-6"><a href="#cb13-6" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">command</span><span class="kw">:</span><span class="at"> tail -f /dev/null</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 <p>The <code>ctf-agent-environment</code> is not an image that exists on a remote registry, so we add the <code>x-local: true</code> to indicate that it should not be pulled. If local images are tagged, they also will not be pulled by default (so <code>x-local: true</code> is not required). For example:</p>
 <div class="code-with-filename">
 <div class="code-with-filename-file">
 <pre><strong>compose.yaml</strong></pre>
 </div>
-<div class="sourceCode" id="cb15" data-filename="compose.yaml"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="fu">services</span><span class="kw">:</span></span>
-<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">default</span><span class="kw">:</span><span class="at"> </span></span>
-<span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">image</span><span class="kw">:</span><span class="at"> ctf-agent-environment:1.0.0</span></span>
-<span id="cb15-4"><a href="#cb15-4" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">init</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb15-5"><a href="#cb15-5" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">command</span><span class="kw">:</span><span class="at"> tail -f /dev/null</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb14" data-filename="compose.yaml"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="fu">services</span><span class="kw">:</span></span>
+<span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">default</span><span class="kw">:</span><span class="at"> </span></span>
+<span id="cb14-3"><a href="#cb14-3" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">image</span><span class="kw">:</span><span class="at"> ctf-agent-environment:1.0.0</span></span>
+<span id="cb14-4"><a href="#cb14-4" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">init</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb14-5"><a href="#cb14-5" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">command</span><span class="kw">:</span><span class="at"> tail -f /dev/null</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 <p>If we are using an image from a remote registry we similarly don’t need to include <code>x-local</code>:</p>
 <div class="code-with-filename">
 <div class="code-with-filename-file">
 <pre><strong>compose.yaml</strong></pre>
 </div>
-<div class="sourceCode" id="cb16" data-filename="compose.yaml"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="fu">services</span><span class="kw">:</span></span>
-<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">default</span><span class="kw">:</span></span>
-<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">image</span><span class="kw">:</span><span class="at"> python:3.12-bookworm</span></span>
-<span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">init</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb16-5"><a href="#cb16-5" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">command</span><span class="kw">:</span><span class="at"> tail -f /dev/null</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb15" data-filename="compose.yaml"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="fu">services</span><span class="kw">:</span></span>
+<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">default</span><span class="kw">:</span></span>
+<span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">image</span><span class="kw">:</span><span class="at"> python:3.12-bookworm</span></span>
+<span id="cb15-4"><a href="#cb15-4" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">init</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb15-5"><a href="#cb15-5" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">command</span><span class="kw">:</span><span class="at"> tail -f /dev/null</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 <p>See the <a href="https://docs.docker.com/compose/compose-file/">Docker Compose</a> documentation for information on all available container options.</p>
 <section id="multiple-environments" class="level4">
@@ -1016,23 +1013,23 @@ <h4 class="anchored" data-anchor-id="multiple-environments">Multiple Environment
 <div class="code-with-filename-file">
 <pre><strong>compose.yaml</strong></pre>
 </div>
-<div class="sourceCode" id="cb17" data-filename="compose.yaml"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a><span class="fu">services</span><span class="kw">:</span></span>
-<span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">default</span><span class="kw">:</span></span>
-<span id="cb17-3"><a href="#cb17-3" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">image</span><span class="kw">:</span><span class="at"> ctf-agent-environment</span></span>
-<span id="cb17-4"><a href="#cb17-4" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">x-local</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb17-5"><a href="#cb17-5" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">init</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb17-6"><a href="#cb17-6" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">cpus</span><span class="kw">:</span><span class="at"> </span><span class="fl">1.0</span></span>
-<span id="cb17-7"><a href="#cb17-7" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">mem_limit</span><span class="kw">:</span><span class="at"> 0.5gb</span></span>
-<span id="cb17-8"><a href="#cb17-8" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">victim</span><span class="kw">:</span></span>
-<span id="cb17-9"><a href="#cb17-9" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">image</span><span class="kw">:</span><span class="at"> ctf-victim-environment</span></span>
-<span id="cb17-10"><a href="#cb17-10" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">x-local</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb17-11"><a href="#cb17-11" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">init</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb17-12"><a href="#cb17-12" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">cpus</span><span class="kw">:</span><span class="at"> </span><span class="fl">1.0</span></span>
-<span id="cb17-13"><a href="#cb17-13" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">mem_limit</span><span class="kw">:</span><span class="at"> 1gb</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb16" data-filename="compose.yaml"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="fu">services</span><span class="kw">:</span></span>
+<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">default</span><span class="kw">:</span></span>
+<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">image</span><span class="kw">:</span><span class="at"> ctf-agent-environment</span></span>
+<span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">x-local</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb16-5"><a href="#cb16-5" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">init</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb16-6"><a href="#cb16-6" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">cpus</span><span class="kw">:</span><span class="at"> </span><span class="fl">1.0</span></span>
+<span id="cb16-7"><a href="#cb16-7" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">mem_limit</span><span class="kw">:</span><span class="at"> 0.5gb</span></span>
+<span id="cb16-8"><a href="#cb16-8" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">victim</span><span class="kw">:</span></span>
+<span id="cb16-9"><a href="#cb16-9" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">image</span><span class="kw">:</span><span class="at"> ctf-victim-environment</span></span>
+<span id="cb16-10"><a href="#cb16-10" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">x-local</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb16-11"><a href="#cb16-11" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">init</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb16-12"><a href="#cb16-12" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">cpus</span><span class="kw">:</span><span class="at"> </span><span class="fl">1.0</span></span>
+<span id="cb16-13"><a href="#cb16-13" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">mem_limit</span><span class="kw">:</span><span class="at"> 1gb</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 <p>The first environment listed is the “default” environment, and can be accessed from within a tool with a normal call to <code>sandbox()</code>. Other environments would be accessed by name, for example:</p>
-<div class="sourceCode" id="cb18"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a>sandbox()          <span class="co"># default sandbox environment</span></span>
-<span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a>sandbox(<span class="st">"victim"</span>)  <span class="co"># named sandbox environment</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a>sandbox()          <span class="co"># default sandbox environment</span></span>
+<span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a>sandbox(<span class="st">"victim"</span>)  <span class="co"># named sandbox environment</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="callout callout-style-default callout-note callout-titled" data-apperance="simple">
 <div class="callout-header d-flex align-content-center">
 <div class="callout-icon-container">
@@ -1050,53 +1047,53 @@ <h4 class="anchored" data-anchor-id="multiple-environments">Multiple Environment
 <section id="infrastructure" class="level4">
 <h4 class="anchored" data-anchor-id="infrastructure">Infrastructure</h4>
 <p>Note that in many cases you’ll want to provision additional infrastructure (e.g.&nbsp;other hosts or volumes). For example, here we define an additional container (“writer”) as well as a volume shared between the default container and the writer container:</p>
-<div class="sourceCode" id="cb19"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a><span class="fu">services</span><span class="kw">:</span></span>
-<span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">default</span><span class="kw">:</span><span class="at"> </span></span>
-<span id="cb19-3"><a href="#cb19-3" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">image</span><span class="kw">:</span><span class="at"> ctf-agent-environment</span></span>
-<span id="cb19-4"><a href="#cb19-4" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">x-local</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb19-5"><a href="#cb19-5" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">init</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb19-6"><a href="#cb19-6" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">volumes</span><span class="kw">:</span></span>
-<span id="cb19-7"><a href="#cb19-7" aria-hidden="true" tabindex="-1"></a><span class="at">      </span><span class="kw">-</span><span class="at"> ctf-challenge-volume:/shared-data</span></span>
-<span id="cb19-8"><a href="#cb19-8" aria-hidden="true" tabindex="-1"></a><span class="at">    </span></span>
-<span id="cb19-9"><a href="#cb19-9" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">writer</span><span class="kw">:</span></span>
-<span id="cb19-10"><a href="#cb19-10" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">image</span><span class="kw">:</span><span class="at"> ctf-challenge-writer</span></span>
-<span id="cb19-11"><a href="#cb19-11" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">x-local</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb19-12"><a href="#cb19-12" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">init</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb19-13"><a href="#cb19-13" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">volumes</span><span class="kw">:</span></span>
-<span id="cb19-14"><a href="#cb19-14" aria-hidden="true" tabindex="-1"></a><span class="at">      </span><span class="kw">-</span><span class="at"> ctf-challenge-volume:/shared-data</span></span>
-<span id="cb19-15"><a href="#cb19-15" aria-hidden="true" tabindex="-1"></a><span class="fu">volumes</span><span class="kw">:</span></span>
-<span id="cb19-16"><a href="#cb19-16" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">ctf-challenge-volume</span><span class="kw">:</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb18"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a><span class="fu">services</span><span class="kw">:</span></span>
+<span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">default</span><span class="kw">:</span><span class="at"> </span></span>
+<span id="cb18-3"><a href="#cb18-3" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">image</span><span class="kw">:</span><span class="at"> ctf-agent-environment</span></span>
+<span id="cb18-4"><a href="#cb18-4" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">x-local</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb18-5"><a href="#cb18-5" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">init</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb18-6"><a href="#cb18-6" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">volumes</span><span class="kw">:</span></span>
+<span id="cb18-7"><a href="#cb18-7" aria-hidden="true" tabindex="-1"></a><span class="at">      </span><span class="kw">-</span><span class="at"> ctf-challenge-volume:/shared-data</span></span>
+<span id="cb18-8"><a href="#cb18-8" aria-hidden="true" tabindex="-1"></a><span class="at">    </span></span>
+<span id="cb18-9"><a href="#cb18-9" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">writer</span><span class="kw">:</span></span>
+<span id="cb18-10"><a href="#cb18-10" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">image</span><span class="kw">:</span><span class="at"> ctf-challenge-writer</span></span>
+<span id="cb18-11"><a href="#cb18-11" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">x-local</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb18-12"><a href="#cb18-12" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">init</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb18-13"><a href="#cb18-13" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">volumes</span><span class="kw">:</span></span>
+<span id="cb18-14"><a href="#cb18-14" aria-hidden="true" tabindex="-1"></a><span class="at">      </span><span class="kw">-</span><span class="at"> ctf-challenge-volume:/shared-data</span></span>
+<span id="cb18-15"><a href="#cb18-15" aria-hidden="true" tabindex="-1"></a><span class="fu">volumes</span><span class="kw">:</span></span>
+<span id="cb18-16"><a href="#cb18-16" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">ctf-challenge-volume</span><span class="kw">:</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>See the documentation on <a href="https://docs.docker.com/compose/compose-file/">Docker Compose</a> files for information on their full schema and feature set.</p>
 </section>
 <section id="sample-metadata" class="level4">
 <h4 class="anchored" data-anchor-id="sample-metadata">Sample Metadata</h4>
 <p>You might want to interpolate Sample metadata into your Docker compose files. You can do this using the standard compose environment variable syntax, where any metadata in the Sample is made available with a <code>SAMPLE_METADATA_</code> prefix. For example, you might have a per-sample memory limit (with a default value of 0.5gb if unspecified):</p>
-<div class="sourceCode" id="cb20"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="fu">services</span><span class="kw">:</span></span>
-<span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">default</span><span class="kw">:</span></span>
-<span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">image</span><span class="kw">:</span><span class="at"> ctf-agent-environment</span></span>
-<span id="cb20-4"><a href="#cb20-4" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">x-local</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb20-5"><a href="#cb20-5" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">init</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb20-6"><a href="#cb20-6" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">cpus</span><span class="kw">:</span><span class="at"> </span><span class="fl">1.0</span></span>
-<span id="cb20-7"><a href="#cb20-7" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">mem_limit</span><span class="kw">:</span><span class="at"> ${SAMPLE_METDATA_MEMORY_LIMIT-0.5gb}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb19"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a><span class="fu">services</span><span class="kw">:</span></span>
+<span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">default</span><span class="kw">:</span></span>
+<span id="cb19-3"><a href="#cb19-3" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">image</span><span class="kw">:</span><span class="at"> ctf-agent-environment</span></span>
+<span id="cb19-4"><a href="#cb19-4" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">x-local</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb19-5"><a href="#cb19-5" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">init</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb19-6"><a href="#cb19-6" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">cpus</span><span class="kw">:</span><span class="at"> </span><span class="fl">1.0</span></span>
+<span id="cb19-7"><a href="#cb19-7" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">mem_limit</span><span class="kw">:</span><span class="at"> ${SAMPLE_METDATA_MEMORY_LIMIT-0.5gb}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>Note that <code>-</code> suffix that provides the default value of 0.5gb. This is important to include so that when the compose file is read <em>without</em> the context of a Sample (for example, when pulling/building images at startup) that a default value is available.</p>
 </section>
 </section>
 <section id="environment-cleanup" class="level3">
 <h3 class="anchored" data-anchor-id="environment-cleanup">Environment Cleanup</h3>
 <p>When a task is completed, Inspect will automatically cleanup resources associated with the sandbox environment (e.g.&nbsp;containers, images, and networks). If for any reason resources are not cleaned up (e.g.&nbsp;if the cleanup itself is interrupted via Ctrl+C) you can globally cleanup all environments with the <code>inspect sandbox cleanup</code> command. For example, here we cleanup all environments associated with the <code>docker</code> provider:</p>
-<div class="sourceCode" id="cb21"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="ex">$</span> inspect sandbox cleanup docker</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb20"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="ex">$</span> inspect sandbox cleanup docker</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>In some cases you may <em>prefer</em> not to cleanup environments. For example, you might want to examine their state interactively from the shell in order to debug an agent. Use the <code>--no-sandbox-cleanup</code> argument to do this:</p>
-<div class="sourceCode" id="cb22"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="ex">$</span> inspect eval ctf.py <span class="at">--no-sandbox-cleanup</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb21"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="ex">$</span> inspect eval ctf.py <span class="at">--no-sandbox-cleanup</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>You can also do this when using <code>eval(</code>):</p>
-<div class="sourceCode" id="cb23"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="bu">eval</span>(<span class="st">"ctf.py"</span>, sandbox_cleanup <span class="op">=</span> <span class="va">False</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb22"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="bu">eval</span>(<span class="st">"ctf.py"</span>, sandbox_cleanup <span class="op">=</span> <span class="va">False</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>When you do this, you’ll see a list of sandbox containers printed out which includes the ID of each container. You can then use this ID to get a shell inside one of the containers:</p>
-<div class="sourceCode" id="cb24"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a><span class="ex">docker</span> exec <span class="at">-it</span> inspect-intercode_ctf-ipg9tbviycpvlgwja5anyvn-default-1 bash</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb23"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="ex">docker</span> exec <span class="at">-it</span> inspect-intercode_ctf-ipg9tbviycpvlgwja5anyvn-default-1 bash</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>When you no longer need the environments, you can clean them up either all at once or individually:</p>
-<div class="sourceCode" id="cb25"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a><span class="co"># cleanup all environments</span></span>
-<span id="cb25-2"><a href="#cb25-2" aria-hidden="true" tabindex="-1"></a><span class="ex">inspect</span> sandbox cleanup docker</span>
-<span id="cb25-3"><a href="#cb25-3" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb25-4"><a href="#cb25-4" aria-hidden="true" tabindex="-1"></a><span class="co"># cleanup single environment</span></span>
-<span id="cb25-5"><a href="#cb25-5" aria-hidden="true" tabindex="-1"></a><span class="ex">inspect</span> sandbox cleanup docker inspect-intercode_ctf-ipg9tbviycpvlgwja5anyvn</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb24"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a><span class="co"># cleanup all environments</span></span>
+<span id="cb24-2"><a href="#cb24-2" aria-hidden="true" tabindex="-1"></a><span class="ex">inspect</span> sandbox cleanup docker</span>
+<span id="cb24-3"><a href="#cb24-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb24-4"><a href="#cb24-4" aria-hidden="true" tabindex="-1"></a><span class="co"># cleanup single environment</span></span>
+<span id="cb24-5"><a href="#cb24-5" aria-hidden="true" tabindex="-1"></a><span class="ex">inspect</span> sandbox cleanup docker inspect-intercode_ctf-ipg9tbviycpvlgwja5anyvn</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </section>
 <section id="resource-management" class="level3">
 <h3 class="anchored" data-anchor-id="resource-management">Resource Management</h3>
@@ -1110,13 +1107,13 @@ <h4 class="anchored" data-anchor-id="running-containers">Running Containers</h4>
 <div class="code-with-filename-file">
 <pre><strong>compose.yaml</strong></pre>
 </div>
-<div class="sourceCode" id="cb26" data-filename="compose.yaml"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a><span class="fu">services</span><span class="kw">:</span></span>
-<span id="cb26-2"><a href="#cb26-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">default</span><span class="kw">:</span><span class="at"> </span></span>
-<span id="cb26-3"><a href="#cb26-3" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">image</span><span class="kw">:</span><span class="at"> ctf-agent-environment</span></span>
-<span id="cb26-4"><a href="#cb26-4" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">x-local</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb26-5"><a href="#cb26-5" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">command</span><span class="kw">:</span><span class="at"> tail -f /dev/null</span></span>
-<span id="cb26-6"><a href="#cb26-6" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">cpus</span><span class="kw">:</span><span class="at"> </span><span class="fl">1.0</span></span>
-<span id="cb26-7"><a href="#cb26-7" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">mem_limit</span><span class="kw">:</span><span class="at"> 0.5gb</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb25" data-filename="compose.yaml"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a><span class="fu">services</span><span class="kw">:</span></span>
+<span id="cb25-2"><a href="#cb25-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">default</span><span class="kw">:</span><span class="at"> </span></span>
+<span id="cb25-3"><a href="#cb25-3" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">image</span><span class="kw">:</span><span class="at"> ctf-agent-environment</span></span>
+<span id="cb25-4"><a href="#cb25-4" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">x-local</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb25-5"><a href="#cb25-5" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">command</span><span class="kw">:</span><span class="at"> tail -f /dev/null</span></span>
+<span id="cb25-6"><a href="#cb25-6" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">cpus</span><span class="kw">:</span><span class="at"> </span><span class="fl">1.0</span></span>
+<span id="cb25-7"><a href="#cb25-7" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">mem_limit</span><span class="kw">:</span><span class="at"> 0.5gb</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 </section>
 <section id="concurrent-execution" class="level4">
@@ -1128,7 +1125,7 @@ <h4 class="anchored" data-anchor-id="concurrent-execution">Concurrent Execution<
 <section id="troubleshooting" class="level3">
 <h3 class="anchored" data-anchor-id="troubleshooting">Troubleshooting</h3>
 <p>You can view more detailed logging around the creation and use of sandbox environments by using the <code>sandbox</code> log level. For example:</p>
-<div class="sourceCode" id="cb27"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a><span class="ex">$</span> inspect eval ctf.py <span class="at">--log-level</span> sandbox</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb26"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a><span class="ex">$</span> inspect eval ctf.py <span class="at">--log-level</span> sandbox</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>The sandbox log level is just above <code>warning</code> (so it will not show <code>http</code> or <code>debug</code> level messages).</p>
 
 
diff --git a/eval-logs.html b/eval-logs.html
index d5f5f7d44..a1d72bb62 100644
--- a/eval-logs.html
+++ b/eval-logs.html
@@ -1102,7 +1102,7 @@ <h3 class="anchored" data-anchor-id="reading-logs">Reading Logs</h3>
     </div>
   </div>
 </footer>
-<script>var lightboxQuarto = GLightbox({"descPosition":"bottom","selector":".lightbox","loop":false,"openEffect":"zoom","closeEffect":"zoom"});
+<script>var lightboxQuarto = GLightbox({"closeEffect":"zoom","selector":".lightbox","openEffect":"zoom","loop":false,"descPosition":"bottom"});
 (function() {
   let previousOnload = window.onload;
   window.onload = () => {
diff --git a/index.html b/index.html
index 07c455845..0ac790a08 100644
--- a/index.html
+++ b/index.html
@@ -1114,7 +1114,7 @@ <h2 class="anchored" data-anchor-id="learning-more">Learning More</h2>
     </div>
   </div>
 </footer>
-<script>var lightboxQuarto = GLightbox({"loop":false,"selector":".lightbox","closeEffect":"zoom","openEffect":"zoom","descPosition":"bottom"});
+<script>var lightboxQuarto = GLightbox({"descPosition":"bottom","selector":".lightbox","loop":false,"openEffect":"zoom","closeEffect":"zoom"});
 (function() {
   let previousOnload = window.onload;
   window.onload = () => {
diff --git a/log-viewer.html b/log-viewer.html
index 79feee4ce..5eaa54c8d 100644
--- a/log-viewer.html
+++ b/log-viewer.html
@@ -1074,7 +1074,7 @@ <h3 class="unlisted anchored" data-anchor-id="other-notes">Other Notes</h3>
     </div>
   </div>
 </footer>
-<script>var lightboxQuarto = GLightbox({"selector":".lightbox","closeEffect":"zoom","openEffect":"zoom","loop":false,"descPosition":"bottom"});
+<script>var lightboxQuarto = GLightbox({"selector":".lightbox","openEffect":"zoom","loop":false,"closeEffect":"zoom","descPosition":"bottom"});
 (function() {
   let previousOnload = window.onload;
   window.onload = () => {
diff --git a/search.json b/search.json
index 21f0360a0..bcb76a295 100644
--- a/search.json
+++ b/search.json
@@ -542,6 +542,17 @@
       "<span class='chapter-number'>8</span>  <span class='chapter-title'>Tools</span>"
     ]
   },
+  {
+    "objectID": "tools.html#parallel-tool-calls",
+    "href": "tools.html#parallel-tool-calls",
+    "title": "Tools",
+    "section": "Parallel Tool Calls",
+    "text": "Parallel Tool Calls\nSome model APIs including OpenAI and Gemini support executing multiple tool calls in parallel. While this can provide a performance improvement, it might not be compatible with semantics of some tools (for example, if they manage some global state between calls).\nYou can opt-out of parallel tool calling by adding parallel=False to the @tool decorator. For example, the built in web browsing tools do this as follows:\n@tool(parallel=False)\ndef web_browser_go() -&gt; Tool:\n    ...",
+    "crumbs": [
+      "Components",
+      "<span class='chapter-number'>8</span>  <span class='chapter-title'>Tools</span>"
+    ]
+  },
   {
     "objectID": "tools.html#sec-bash-and-python",
     "href": "tools.html#sec-bash-and-python",
@@ -558,7 +569,7 @@
     "href": "tools.html#sec-web-browser",
     "title": "Tools",
     "section": "Web Browser",
-    "text": "Web Browser\n\n\n\n\n\n\nNote\n\n\n\nNote that the web browser tool described below is currently only available in the development version of Inspect. You can install the development version with:\npip install git+https://github.com/UKGovernmentBEIS/inspect_ai\n\n\nThe web browser tool provides models with the ability to browse the web using a headless Chromium browser. Navigation, history, and mouse/keyboard interactions are all supported.\n\nConfiguration\nUnder the hood, the web browser is an instance of Chromium orchestrated by Playwright, and runs in its own dedicated Docker container. Therefore, to use the web_browser tool you should reference the inspect_web_browser Docker image provided with Inspect in your compose.yaml. For example, here we use it as our default image:\n\n\ncompose.yaml\n\nservices:\n  default:\n    image: inspect_web_browser\n    init: true\n\nHere, we add a dedicated web_browser service:\n\n\ncompose.yaml\n\nservices:\n  default:\n    image: \"python:3.12-bookworm\"\n    init: true\n    command: \"tail -f /dev/null\"\n  web_browser:\n    image: inspect_web_browser\n    init: true\n\nRather than using the inspect_web_browser image, you can also just include the web browser service components in a custom image (see Custom Images below for details).\n\n\nTask\nA task configured to use the web browser tool might look like this:\nfrom inspect_ai import Task, task\nfrom inspect_ai.scorer import match\nfrom inspect_ai.solver import generate, use_tools\nfrom inspect_ai.tool import bash, web_browser_tools\n\n@task\ndef browser_task():\n    return Task(\n        dataset=read_dataset(),\n        solver=[\n            use_tools([bash()] + web_browser_tools()),\n            generate(),\n        ],\n        scorer=match(),\n        sandbox=(\"docker\", \"compose.yaml\"),\n    )\nNote that we pass web_browser_tools() to use_tools(), which provides a list of web browsing tools (e.g. web_browser_go(), web_browser_click(), etc.).\n\n\nBrowsing\nIf you review the transcripts of a sample with access to the web browser tool, you’ll notice that there are several distinct tools made available for control of the web browser. These tools include:\n\n\n\n\n\n\n\nTool\nDescription\n\n\n\n\nweb_browser_go()\nNavigate the web browser to a URL.\n\n\nweb_browser_click()\nClick an element on the page currently displayed by the web browser.\n\n\nweb_browser_scroll()\nScroll the web browser up or down by one page.\n\n\nweb_browser_forward()\nNavigate the web browser forward in the browser history.\n\n\nweb_browser_back()\nNavigate the web browser back in the browser history.\n\n\nweb_browser_refresh()\nRefresh the current page of the web browser.\n\n\nweb_browser_type()\nType text into an input on a web browser page.\n\n\nweb_browser_type_submit()\nType text into a form input on a web browser page and press ENTER to submit the form.\n\n\n\nIf you like, you can enable a subset of these tools rather than calling web_browser_tools() to use all of them.\nThe return value of each of these tools is a web accessibility tree for the page, which provides a clean view of the content, links, and form fields available on the page (you can look at the accessibility tree for any web page using Chrome Developer Tools).\n\n\nCustom Images\nAbove we demonstrated how to use the pre-configured Inspect web browser container. If you prefer to incorporate the headless web browser and its dependencies into another container that is also supported.\nTo do this, reference the Dockerfile used in the built-in web browser container and ensure that the dependencies, application files, and server run command it uses are also in your container definition:\n# Install playwright\nRUN pip install playwright \nRUN playwright install\nRUN playwright install-deps \n\n# Install other dependancies\nRUN pip install dm-env-rpc pillow bs4 lxml\n\n# Copy Python files alongside the Dockerfile\nCOPY *.py ./\n\n# Run the server\nCMD [\"python3\", \"/app/web_browser/web_server.py\"]",
+    "text": "Web Browser\nThe web browser tools provids models with the ability to browse the web using a headless Chromium browser. Navigation, history, and mouse/keyboard interactions are all supported.\n\nConfiguration\nUnder the hood, the web browser is an instance of Chromium orchestrated by Playwright, and runs in its own dedicated Docker container. Therefore, to use the web_browser tool you should reference the inspect_web_browser Docker image provided with Inspect in your compose.yaml. For example, here we use it as our default image:\n\n\ncompose.yaml\n\nservices:\n  default:\n    image: inspect_web_browser\n    init: true\n\nHere, we add a dedicated web_browser service:\n\n\ncompose.yaml\n\nservices:\n  default:\n    image: \"python:3.12-bookworm\"\n    init: true\n    command: \"tail -f /dev/null\"\n  web_browser:\n    image: inspect_web_browser\n    init: true\n\nRather than using the inspect_web_browser image, you can also just include the web browser service components in a custom image (see Custom Images below for details).\n\n\nTask\nA task configured to use the web browser tools might look like this:\nfrom inspect_ai import Task, task\nfrom inspect_ai.scorer import match\nfrom inspect_ai.solver import generate, use_tools\nfrom inspect_ai.tool import bash, python, web_browser\n\n@task\ndef browser_task():\n    return Task(\n        dataset=read_dataset(),\n        solver=[\n            use_tools([bash(), python()] + web_browser()),\n            generate(),\n        ],\n        scorer=match(),\n        sandbox=(\"docker\", \"compose.yaml\"),\n    )\nNote that unlike some other tool functions like bash(), the web_browser() function returns a list of tools. Therefore, we concatenate it with a list of the other tools we are using in the call to use_tools().\n\n\nBrowsing\nIf you review the transcripts of a sample with access to the web browser tool, you’ll notice that there are several distinct tools made available for control of the web browser. These tools include:\n\n\n\n\n\n\n\nTool\nDescription\n\n\n\n\nweb_browser_go(url)\nNavigate the web browser to a URL.\n\n\nweb_browser_click(element_id)\nClick an element on the page currently displayed by the web browser.\n\n\nweb_browser_type(element_id)\nType text into an input on a web browser page.\n\n\nweb_browser_type_submit(element_id, text)\nType text into a form input on a web browser page and press ENTER to submit the form.\n\n\nweb_browser_scroll(direction)\nScroll the web browser up or down by one page.\n\n\nweb_browser_forward()\nNavigate the web browser forward in the browser history.\n\n\nweb_browser_back()\nNavigate the web browser back in the browser history.\n\n\nweb_browser_refresh()\nRefresh the current page of the web browser.\n\n\n\nThe return value of each of these tools is a web accessibility tree for the page, which provides a clean view of the content, links, and form fields available on the page (you can look at the accessibility tree for any web page using Chrome Developer Tools).\n\n\nCustom Images\nAbove we demonstrated how to use the pre-configured Inspect web browser container. If you prefer to incorporate the headless web browser and its dependencies into another container that is also supported.\nTo do this, reference the Dockerfile used in the built-in web browser container and ensure that the dependencies, application files, and server run command it uses are also in your container definition:\n# Install playwright\nRUN pip install playwright \nRUN playwright install\nRUN playwright install-deps \n\n# Install other dependancies\nRUN pip install dm-env-rpc pillow bs4 lxml\n\n# Copy Python files alongside the Dockerfile\nCOPY *.py ./\n\n# Run the server\nCMD [\"python3\", \"/app/web_browser/web_server.py\"]\nNote that all of the Python files in the _resources directory alongside the Dockerfile need to be available for copying when building the container.",
     "crumbs": [
       "Components",
       "<span class='chapter-number'>8</span>  <span class='chapter-title'>Tools</span>"
@@ -613,7 +624,7 @@
     "href": "agents.html#sec-custom-scaffolding",
     "title": "Agents",
     "section": "Custom Scaffold",
-    "text": "Custom Scaffold\nThe basic agent demonstrated above will work well for some tasks, but in other cases you may want to provide more custom logic. For example, you might want to:\n\nRedirect the model to another trajectory if its not on a productive course.\nExercise more fine grained control over which, when, and how many tool calls are made, and how tool calling errors are handled.\nHave multiple generate() passes each with a distinct set of tools.\n\nTo do this, create a solver that emulates the default tool use loop and provides additional customisation as required. Here is the code at the core of Inspect tool use in generate():\n# call model\nmodel = get_model()\noutput = await model.generate(state.messages, state.tools)\n\n# update state with output\nstate.output = output\nstate.messages.append(output.message)\n\n# call tools and update state\nstate.messages.extend(call_tools(output.message, state.tools))\nThis does everything that default generate() does, save for an outer loop to continue calling the mode as long as it continues calling tools. This is a complete solver agent that implements the outer loop:\n@solver\ndef agent_loop():\n    async def solve(state: TaskState, generate: Generate):\n        model = get_model()\n        while True:\n            # call model\n            output = await model.generate(state.messages, state.tools)\n\n            # update state\n            state.output = output\n            state.messages.append(output.message)\n\n            # make tool calls or terminate if there are none\n            if output.message.tool_calls:\n                state.messages.extend(call_tools(output.message, state.tools))\n            else:\n                break\n\n        return state\n\n    return solve\nYou can imagine several ways you might want to customise this loop:\n\nAdding another termination condition for the output satisfying some criteria.\nUrging the model to keep going after it decides to stop calling tools.\nExamining and possibly filtering the tool calls before invoking call_tools()\nAdding a critique / reflection step between tool calling and generate.\nDeep copying the TaskState and exploring several trajectories.\n\n\nStop Reasons\nOne thing that a custom scaffold may do is try to recover from various conditions that cause the model to stop generating. You can find the reason that generation stopped in the stop_reason field of ModelOutput. For example:\noutput = await model.generate(state.messages, state.tools)\nif output.stop_reason == \"model_length\":\n    # do something to recover from context window overflow\nHere are the possible values for StopReason :\n\n\n\n\n\n\n\nStop Reason\nDescription\n\n\n\n\nstop\nThe model hit a natural stop point or a provided stop sequence\n\n\nmax_tokens\nThe maximum number of tokens specified in the request was reached.\n\n\nmodel_length\nThe model’s context length was exceeded.\n\n\ntool_calls\nThe model called a tool\n\n\ncontent_filter\nContent was omitted due to a content filter.\n\n\nunknown\nUnknown (e.g. unexpected runtime error)\n\n\n\n\n\nError Handling\nBy default expected errors (e.g. file not found, insufficient, permission , timeouts, etc.) are forwarded to the model for possible recovery. If you would like to intervene in the default error handling then rather than immediately appending the list of assistant messages returned from call_tools() to state.messages (as shown above), check the error property of these messages (which will be None in the case of no error) and proceed accordingly.\n\n\nTool Filtering\nWhile its possible to make tools globally available to the model via use_tools(), you may also want to filter the available tools either based on task stages or dynamically based on some other criteria.\nHere’s an example of a solver agent that filters the available tools between calls to generate():\n@solver\ndef ctf_agent():\n    async def solve(state: TaskState, generate: Generate):\n        \n        # first pass w/ core tools\n        state.tools = [decompile(), dissasemble(), bash()]\n        state = await generate(state)\n\n        # second pass w/ prompt and python tool only\n        state.tools = [python()]\n        state.messages.append(ChatMessageUser( \n            content = \"Use Python to extract the flag.\" \n        ))  \n        state = await generate(state)\n\n        # clear tools and return\n        state.tools = []\n        return state\n    \n    return solve\n\n\nAgents API\nFor more sophisticated agents, Inspect offers several additional advanced APIs for state management, sub-agents, and fine grained logging. See the Agents API article for additional details.",
+    "text": "Custom Scaffold\nThe basic agent demonstrated above will work well for some tasks, but in other cases you may want to provide more custom logic. For example, you might want to:\n\nRedirect the model to another trajectory if its not on a productive course.\nExercise more fine grained control over which, when, and how many tool calls are made, and how tool calling errors are handled.\nHave multiple generate() passes each with a distinct set of tools.\n\nTo do this, create a solver that emulates the default tool use loop and provides additional customisation as required. Here is the code at the core of Inspect tool use in generate():\n# call model\nmodel = get_model()\noutput = await model.generate(state.messages, state.tools)\n\n# update state with output\nstate.output = output\nstate.messages.append(output.message)\n\n# call tools and update state\nstate.messages.extend(call_tools(output.message, state.tools))\nThis does everything that default generate() does, save for an outer loop to continue calling the mode as long as it continues calling tools. This is a complete solver agent that implements the outer loop:\n@solver\ndef agent_loop():\n    async def solve(state: TaskState, generate: Generate):\n        model = get_model()\n        while not state.completed:\n            # call model\n            output = await model.generate(state.messages, state.tools)\n\n            # update state\n            state.output = output\n            state.messages.append(output.message)\n\n            # make tool calls or terminate if there are none\n            if output.message.tool_calls:\n                state.messages.extend(call_tools(output.message, state.tools))\n            else:\n                break\n\n        return state\n\n    return solve\nThe state.completed flag is automatically set to False if max_messages for the task is exceeded, so we check it at the top of the loop.\nYou can imagine several ways you might want to customise this loop:\n\nAdding another termination condition for the output satisfying some criteria.\nUrging the model to keep going after it decides to stop calling tools.\nExamining and possibly filtering the tool calls before invoking call_tools()\nAdding a critique / reflection step between tool calling and generate.\nDeep copying the TaskState and exploring several trajectories.\n\n\nStop Reasons\nOne thing that a custom scaffold may do is try to recover from various conditions that cause the model to stop generating. You can find the reason that generation stopped in the stop_reason field of ModelOutput. For example:\noutput = await model.generate(state.messages, state.tools)\nif output.stop_reason == \"model_length\":\n    # do something to recover from context window overflow\nHere are the possible values for StopReason :\n\n\n\n\n\n\n\nStop Reason\nDescription\n\n\n\n\nstop\nThe model hit a natural stop point or a provided stop sequence\n\n\nmax_tokens\nThe maximum number of tokens specified in the request was reached.\n\n\nmodel_length\nThe model’s context length was exceeded.\n\n\ntool_calls\nThe model called a tool\n\n\ncontent_filter\nContent was omitted due to a content filter.\n\n\nunknown\nUnknown (e.g. unexpected runtime error)\n\n\n\n\n\nError Handling\nBy default expected errors (e.g. file not found, insufficient, permission , timeouts, etc.) are forwarded to the model for possible recovery. If you would like to intervene in the default error handling then rather than immediately appending the list of assistant messages returned from call_tools() to state.messages (as shown above), check the error property of these messages (which will be None in the case of no error) and proceed accordingly.\n\n\nTool Filtering\nWhile its possible to make tools globally available to the model via use_tools(), you may also want to filter the available tools either based on task stages or dynamically based on some other criteria.\nHere’s an example of a solver agent that filters the available tools between calls to generate():\n@solver\ndef ctf_agent():\n    async def solve(state: TaskState, generate: Generate):\n        \n        # first pass w/ core tools\n        state.tools = [decompile(), dissasemble(), bash()]\n        state = await generate(state)\n\n        # second pass w/ prompt and python tool only\n        state.tools = [python()]\n        state.messages.append(ChatMessageUser( \n            content = \"Use Python to extract the flag.\" \n        ))  \n        state = await generate(state)\n\n        # clear tools and return\n        state.tools = []\n        return state\n    \n    return solve\n\n\nAgents API\nFor more sophisticated agents, Inspect offers several additional advanced APIs for state management, sub-agents, and fine grained logging. See the Agents API article for additional details.",
     "crumbs": [
       "Components",
       "<span class='chapter-number'>9</span>  <span class='chapter-title'>Agents</span>"
@@ -635,7 +646,7 @@
     "href": "agents.html#sec-sandbox-environments",
     "title": "Agents",
     "section": "Sandboxing",
-    "text": "Sandboxing\nThe examples shown above execute tool code within the main process running the evaluation task. In some cases however, you may require the provisioning of dedicated environments for running tool code. This might be the case if:\n\nYou are creating tools that enable execution of arbitrary code (e.g. a tool that executes shell commands or Python code).\nYou need to provision per-sample file system resources.\nYou want to provide access to a more sophisticated evaluation environment (e.g. creating network hosts for a cybersecurity eval).\n\n\nExample: File Listing\nLet’s take a look at a simple example to illustrate. First, we’ll define a list_files() tool. This tool need to access the ls command—it does so by calling the sandbox() function to get access to the SandboxEnvironment instance for the currently executing Sample:\nfrom inspect_ai.tool import ToolError, tool\nfrom inspect_ai.util import sandbox\n\n@tool\ndef list_files():\n    async def execute(dir: str):\n        \"\"\"List the files in a directory.\n\n        Args:\n            dir (str): Directory\n\n        Returns:\n            File listing of the directory\n        \"\"\"\n        result = await sandbox().exec([\"ls\", dir])\n        if result.success:\n            return result.stdout\n        else:\n            raise ToolError(result.stderr)\n\n    return execute\nThe exec() function is used to list the directory contents. Note that its not immediately clear where or how exec() is implemented (that will be described shortly!).\nHere’s an evaluation that makes use of this tool:\nfrom inspect_ai import task, Task\nfrom inspect_ai.dataset import Sample\nfrom inspect_ai.scorer import includes\nfrom inspect_ai.solver import generate, use_tools\n\ndataset = [\n    Sample(\n        input='Is there a file named \"bar.txt\" ' \n               + 'in the current directory?',\n        target=\"Yes\",\n        files={\"bar.txt\": \"hello\"},\n    )\n]\n\n@task\ndef file_probe()\n    return Task(\n        dataset=dataset,\n        solver=[\n            use_tools([list_files()]), \n            generate()\n        ],\n        sandbox=\"docker\",\n        scorer=includes(),\n    )\n)\nWe’ve included sandbox=\"docker\" to indicate that sandbox environment operations should be executed in a Docker container. Specifying a sandbox environment (either at the task or evaluation level) is required if your tools call the sandbox() function.\nNote that files are specified as part of the Sample. Files can be specified inline using plain text (as depicted above), inline using a base64-encoded data URI, or as a path to a file or remote resource (e.g. S3 bucket). Relative file paths are resolved according to the location of the underlying dataset file.\n\n\nEnvironment Interface\nThe following instance methods are available to tools that need to interact with a SandboxEnvironment:\nclass SandboxEnvironment:\n   \n    async def exec(\n        self,\n        cmd: list[str],\n        input: str | bytes | None = None,\n        cwd: str | None = None,\n        env: dict[str, str] = {},\n        user: str | None = None,\n        timeout: int | None = None,\n    ) -&gt; ExecResult[str]:\n        \"\"\"\n        Raises:\n          TimeoutError: If the specified `timeout` expires.\n          UnicodeDecodeError: If an error occurs while\n            decoding the command output.\n          PermissionError: If the user does not have\n            permission to execute the command.\n        \"\"\"\n        ...\n\n    async def write_file(\n        self, file: str, contents: str | bytes\n    ) -&gt; None:\n        \"\"\"\n        Raises:\n          PermissionError: If the user does not have\n            permission to write to the specified path.\n          IsADirectoryError: If the file exists already and \n            is a directory.\n        \"\"\"\n        ...\n\n    async def read_file(\n        self, file: str, text: bool = True\n    ) -&gt; Union[str | bytes]:\n        \"\"\"\n        Raises:\n          FileNotFoundError: If the file does not exist.\n          UnicodeDecodeError: If an encoding error occurs \n            while reading the file.\n            (only applicable when `text = True`)\n          PermissionError: If the user does not have\n            permission to read from the specified path.\n          IsADirectoryError: If the file is a directory.\n        \"\"\"\n        ...\nNote that write_file() automatically creates parent directories as required if they don’t exist.\nFor each method there is a documented set of errors that are raised: these are expected errors and can either be caught by tools or allowed to propagate in which case they will be reported to the model for potential recovery. In addition, unexpected errors may occur (e.g. a networking error connecting to a remote container): these errors are not reported to the model and fail the Sample with an error state.\nThe sandbox is also available to custom scorers.\n\n\nEnvironment Binding\nThere are two sandbox environments built in to Inspect:\n\n\n\nEnvironment Type\nDescription\n\n\n\n\nlocal\nRun sandbox() methods in the same file system as the running evaluation (should only be used if you are already running your evaluation in another sandbox).\n\n\ndocker\nRun sandbox() methods within a Docker container (see the Docker Configuration section below for additional details).\n\n\n\nSandbox environment definitions can be bound at the Sample, Task, or eval() level. Binding precedence goes from eval(), to Task to Sample, however sandbox config files defined on the Sample always take precedence when the sandbox type for the Sample is the same as the enclosing Task or eval().\nHere is a Task that defines a sandbox:\nTask(\n    dataset=dataset,\n    plan([\n        use_tools([read_file(), list_files()])), \n        generate()\n    ]),\n    scorer=match(),\n    sandbox=\"docker\"\n)\nBy default, any Dockerfile and/or compose.yaml file within the task directory will be automatically discovered and used. If your compose file has a different name then you can provide an override specification as follows:\nsandbox=(\"docker\", \"attacker-compose.yaml\")\nThe configuration file added to the sandbox spec should always be a compose file (rather than a Dockerfile, which is always discovered automatically).\n\n\nPer Sample Setup\nThe Sample class includes sandbox, files and setup fields that are used to specify per-sample sandbox config, file assets, and setup logic.\n\nSandbox\nYou can either define a default sandbox for an entire Task as illustrated abvove, or alternatively define a per-sample sandbox. For example, you might want to do this if each sample has its own Dockerfile and/or custom compose configuration file. (Note, each sample gets its own sandbox instance, even if the sandbox is defined at Task level. So samples do not interfere with each other’s sandboxes.)\nThe sandbox can be specified as a string (e.g. \"docker“) or a list of sandbox type and config file (e.g. [\"docker\", \"compose.yaml\"]).\n\n\nFiles\nSample files is a dict[str,str] that specifies files to copy into sandbox environments. The key of the dict specifies the name of the file to write. By default files are written into the default sandbox environment but they can optionally include a prefix indicating that they should be written into a specific sandbox environment (e.g. \"victim:flag.txt\": \"flag.txt\").\nThe value of the dict can be either the file contents, a file path, or a base64 encoded Data URL.\n\n\nScript\nIf there is a Sample setup script it will be executed within the default sandbox environment after any Sample files are copied into the environment. The setup field can be either the script contents, a file path containing the script, or a base64 encoded Data URL.\nThe setup script is by default interpreted as a bash script, however you can have it executed by another interpreter using a shebang comment. For example, this will be executed as a Python script:\n#!/usr/bin/env python3\n\nprint('hello from python')\n\n\n\nDocker Configuration\nBefore using Docker sandbox environments, please be sure to install Docker Engine (version 24.0.7 or greater).\nYou can use the Docker sandbox enviornment without any special configuration, however most commonly you’ll provide explicit configuration via either a Dockerfile or a Docker Compose configuration file (compose.yaml).\nHere is how Docker sandbox environments are created based on the presence of Dockerfile and/or compose.yml in the task directory:\n\n\n\nConfig Files\nBehavior\n\n\n\n\nNone\nCreates a sandbox environment based on the official python:3.12-bookworm image.\n\n\nDockerfile\nCreates a sandbox environment by building the image.\n\n\ncompose.yaml\nCreates sandbox environment(s) based on compose.yaml.\n\n\n\nProviding a compose.yaml is not strictly required, as Inspect will automatically generate one as needed. Note that the automatically generated compose file will restrict internet access by default, so if your evaluations require this you’ll need to provide your own compose.yaml file.\nHere’s an example of a compose.yaml file that sets container resource limits and isolates it from all network interactions including internet access:\n\n\ncompose.yaml\n\nservices:\n  default: \n    build: .\n    init: true\n    command: tail -f /dev/null\n    cpus: 1.0\n    mem_limit: 0.5gb\n    network_mode: none\n\nThe init: true entry enables the container to respond to shutdown requests. The command is provided to prevent the container from exiting after it starts.\nHere is what a simple compose.yaml would look like for a local pre-built image named ctf-agent-environment (resource and network limits excluded for brevity):\n\n\ncompose.yaml\n\nservices:\n  default: \n    image: ctf-agent-environment\n    x-local: true\n    init: true\n    command: tail -f /dev/null\n\nThe ctf-agent-environment is not an image that exists on a remote registry, so we add the x-local: true to indicate that it should not be pulled. If local images are tagged, they also will not be pulled by default (so x-local: true is not required). For example:\n\n\ncompose.yaml\n\nservices:\n  default: \n    image: ctf-agent-environment:1.0.0\n    init: true\n    command: tail -f /dev/null\n\nIf we are using an image from a remote registry we similarly don’t need to include x-local:\n\n\ncompose.yaml\n\nservices:\n  default:\n    image: python:3.12-bookworm\n    init: true\n    command: tail -f /dev/null\n\nSee the Docker Compose documentation for information on all available container options.\n\nMultiple Environments\nIn some cases you may want to create multiple sandbox environments (e.g. if one environment has complex dependencies that conflict with the dependencies of other environments). To do this specify multiple named services:\n\n\ncompose.yaml\n\nservices:\n  default:\n    image: ctf-agent-environment\n    x-local: true\n    init: true\n    cpus: 1.0\n    mem_limit: 0.5gb\n  victim:\n    image: ctf-victim-environment\n    x-local: true\n    init: true\n    cpus: 1.0\n    mem_limit: 1gb\n\nThe first environment listed is the “default” environment, and can be accessed from within a tool with a normal call to sandbox(). Other environments would be accessed by name, for example:\nsandbox()          # default sandbox environment\nsandbox(\"victim\")  # named sandbox environment\n\n\n\n\n\n\nNote\n\n\n\nIf you define multiple sandbox environments you are required to name one of them “default” so that Inspect knows which environment to resolve for calls to sandbox() without an argument. Alternatively, you can add the x-default key to a service not named “default” to designate it as the default sandbox.\n\n\n\n\nInfrastructure\nNote that in many cases you’ll want to provision additional infrastructure (e.g. other hosts or volumes). For example, here we define an additional container (“writer”) as well as a volume shared between the default container and the writer container:\nservices:\n  default: \n    image: ctf-agent-environment\n    x-local: true\n    init: true\n    volumes:\n      - ctf-challenge-volume:/shared-data\n    \n  writer:\n    image: ctf-challenge-writer\n    x-local: true\n    init: true\n    volumes:\n      - ctf-challenge-volume:/shared-data\nvolumes:\n  ctf-challenge-volume:\nSee the documentation on Docker Compose files for information on their full schema and feature set.\n\n\nSample Metadata\nYou might want to interpolate Sample metadata into your Docker compose files. You can do this using the standard compose environment variable syntax, where any metadata in the Sample is made available with a SAMPLE_METADATA_ prefix. For example, you might have a per-sample memory limit (with a default value of 0.5gb if unspecified):\nservices:\n  default:\n    image: ctf-agent-environment\n    x-local: true\n    init: true\n    cpus: 1.0\n    mem_limit: ${SAMPLE_METDATA_MEMORY_LIMIT-0.5gb}\nNote that - suffix that provides the default value of 0.5gb. This is important to include so that when the compose file is read without the context of a Sample (for example, when pulling/building images at startup) that a default value is available.\n\n\n\nEnvironment Cleanup\nWhen a task is completed, Inspect will automatically cleanup resources associated with the sandbox environment (e.g. containers, images, and networks). If for any reason resources are not cleaned up (e.g. if the cleanup itself is interrupted via Ctrl+C) you can globally cleanup all environments with the inspect sandbox cleanup command. For example, here we cleanup all environments associated with the docker provider:\n$ inspect sandbox cleanup docker\nIn some cases you may prefer not to cleanup environments. For example, you might want to examine their state interactively from the shell in order to debug an agent. Use the --no-sandbox-cleanup argument to do this:\n$ inspect eval ctf.py --no-sandbox-cleanup\nYou can also do this when using eval():\neval(\"ctf.py\", sandbox_cleanup = False)\nWhen you do this, you’ll see a list of sandbox containers printed out which includes the ID of each container. You can then use this ID to get a shell inside one of the containers:\ndocker exec -it inspect-intercode_ctf-ipg9tbviycpvlgwja5anyvn-default-1 bash\nWhen you no longer need the environments, you can clean them up either all at once or individually:\n# cleanup all environments\ninspect sandbox cleanup docker\n\n# cleanup single environment\ninspect sandbox cleanup docker inspect-intercode_ctf-ipg9tbviycpvlgwja5anyvn\n\n\nResource Management\nCreating and executing code within Docker containers can be expensive both in terms of memory and CPU utilisation. Inspect provides some automatic resource management to keep usage reasonable in the default case. This section describes that behaviour as well as how you can tune it for your use-cases.\n\nRunning Containers\nAs described above, each Sample is provisioned its own container. The number of running containers for an evaluation is therefore determined by the max_samples option (which is by default set to max_connections, typically 10 unless overridden).\nUse max_samples to dial up or down the number of containers running at any given time. Note that a running container does not necessarily use CPU resources unless it has active background processes.\nUse a compose.yaml file to limit the resources consumed by each running container. For example:\n\n\ncompose.yaml\n\nservices:\n  default: \n    image: ctf-agent-environment\n    x-local: true\n    command: tail -f /dev/null\n    cpus: 1.0\n    mem_limit: 0.5gb\n\n\n\nConcurrent Execution\nThe SandboxEnvironment.exec() method runs a command within a sandbox environment, typically consuming CPU resources. To protect against overwhelming the system’s CPUs, the implementation of exec() uses Inspect’s subprocess() function, which automatically limits concurrent child processes to the number of CPUs on your system (os.cpu_count()).\nYou can change the number of permitted concurrent subprocess executions using the max_subprocesses option. You might do this for example if you know that your exec() commands tend to use multiple CPU cores and thus should be executed with less concurrency.\n\n\n\nTroubleshooting\nYou can view more detailed logging around the creation and use of sandbox environments by using the sandbox log level. For example:\n$ inspect eval ctf.py --log-level sandbox\nThe sandbox log level is just above warning (so it will not show http or debug level messages).",
+    "text": "Sandboxing\nThe examples shown above execute tool code within the main process running the evaluation task. In some cases however, you may require the provisioning of dedicated environments for running tool code. This might be the case if:\n\nYou are creating tools that enable execution of arbitrary code (e.g. a tool that executes shell commands or Python code).\nYou need to provision per-sample file system resources.\nYou want to provide access to a more sophisticated evaluation environment (e.g. creating network hosts for a cybersecurity eval).\n\n\nExample: File Listing\nLet’s take a look at a simple example to illustrate. First, we’ll define a list_files() tool. This tool need to access the ls command—it does so by calling the sandbox() function to get access to the SandboxEnvironment instance for the currently executing Sample:\nfrom inspect_ai.tool import ToolError, tool\nfrom inspect_ai.util import sandbox\n\n@tool\ndef list_files():\n    async def execute(dir: str):\n        \"\"\"List the files in a directory.\n\n        Args:\n            dir (str): Directory\n\n        Returns:\n            File listing of the directory\n        \"\"\"\n        result = await sandbox().exec([\"ls\", dir])\n        if result.success:\n            return result.stdout\n        else:\n            raise ToolError(result.stderr)\n\n    return execute\nThe exec() function is used to list the directory contents. Note that its not immediately clear where or how exec() is implemented (that will be described shortly!).\nHere’s an evaluation that makes use of this tool:\nfrom inspect_ai import task, Task\nfrom inspect_ai.dataset import Sample\nfrom inspect_ai.scorer import includes\nfrom inspect_ai.solver import generate, use_tools\n\ndataset = [\n    Sample(\n        input='Is there a file named \"bar.txt\" ' \n               + 'in the current directory?',\n        target=\"Yes\",\n        files={\"bar.txt\": \"hello\"},\n    )\n]\n\n@task\ndef file_probe()\n    return Task(\n        dataset=dataset,\n        solver=[\n            use_tools([list_files()]), \n            generate()\n        ],\n        sandbox=\"docker\",\n        scorer=includes(),\n    )\n)\nWe’ve included sandbox=\"docker\" to indicate that sandbox environment operations should be executed in a Docker container. Specifying a sandbox environment (either at the task or evaluation level) is required if your tools call the sandbox() function.\nNote that files are specified as part of the Sample. Files can be specified inline using plain text (as depicted above), inline using a base64-encoded data URI, or as a path to a file or remote resource (e.g. S3 bucket). Relative file paths are resolved according to the location of the underlying dataset file.\n\n\nEnvironment Interface\nThe following instance methods are available to tools that need to interact with a SandboxEnvironment:\nclass SandboxEnvironment:\n   \n    async def exec(\n        self,\n        cmd: list[str],\n        input: str | bytes | None = None,\n        cwd: str | None = None,\n        env: dict[str, str] = {},\n        user: str | None = None,\n        timeout: int | None = None,\n    ) -&gt; ExecResult[str]:\n        \"\"\"\n        Raises:\n          TimeoutError: If the specified `timeout` expires.\n          UnicodeDecodeError: If an error occurs while\n            decoding the command output.\n          PermissionError: If the user does not have\n            permission to execute the command.\n        \"\"\"\n        ...\n\n    async def write_file(\n        self, file: str, contents: str | bytes\n    ) -&gt; None:\n        \"\"\"\n        Raises:\n          PermissionError: If the user does not have\n            permission to write to the specified path.\n          IsADirectoryError: If the file exists already and \n            is a directory.\n        \"\"\"\n        ...\n\n    async def read_file(\n        self, file: str, text: bool = True\n    ) -&gt; Union[str | bytes]:\n        \"\"\"\n        Raises:\n          FileNotFoundError: If the file does not exist.\n          UnicodeDecodeError: If an encoding error occurs \n            while reading the file.\n            (only applicable when `text = True`)\n          PermissionError: If the user does not have\n            permission to read from the specified path.\n          IsADirectoryError: If the file is a directory.\n        \"\"\"\n        ...\nNote that write_file() automatically creates parent directories as required if they don’t exist.\nFor each method there is a documented set of errors that are raised: these are expected errors and can either be caught by tools or allowed to propagate in which case they will be reported to the model for potential recovery. In addition, unexpected errors may occur (e.g. a networking error connecting to a remote container): these errors are not reported to the model and fail the Sample with an error state.\nThe sandbox is also available to custom scorers.\n\n\nEnvironment Binding\nThere are two sandbox environments built in to Inspect:\n\n\n\nEnvironment Type\nDescription\n\n\n\n\nlocal\nRun sandbox() methods in the same file system as the running evaluation (should only be used if you are already running your evaluation in another sandbox).\n\n\ndocker\nRun sandbox() methods within a Docker container (see the Docker Configuration section below for additional details).\n\n\n\nSandbox environment definitions can be bound at the Sample, Task, or eval() level. Binding precedence goes from eval(), to Task to Sample, however sandbox config files defined on the Sample always take precedence when the sandbox type for the Sample is the same as the enclosing Task or eval().\nHere is a Task that defines a sandbox:\nTask(\n    dataset=dataset,\n    plan([\n        use_tools([read_file(), list_files()])), \n        generate()\n    ]),\n    scorer=match(),\n    sandbox=\"docker\"\n)\nBy default, any Dockerfile and/or compose.yaml file within the task directory will be automatically discovered and used. If your compose file has a different name then you can provide an override specification as follows:\nsandbox=(\"docker\", \"attacker-compose.yaml\")\nThe configuration file added to the sandbox spec should always be a compose file (rather than a Dockerfile, which is always discovered automatically).\n\n\nPer Sample Setup\nThe Sample class includes sandbox, files and setup fields that are used to specify per-sample sandbox config, file assets, and setup logic.\n\nSandbox\nYou can either define a default sandbox for an entire Task as illustrated abvove, or alternatively define a per-sample sandbox. For example, you might want to do this if each sample has its own Dockerfile and/or custom compose configuration file. (Note, each sample gets its own sandbox instance, even if the sandbox is defined at Task level. So samples do not interfere with each other’s sandboxes.)\nThe sandbox can be specified as a string (e.g. \"docker“) or a list of sandbox type and config file (e.g. [\"docker\", \"compose.yaml\"]).\n\n\nFiles\nSample files is a dict[str,str] that specifies files to copy into sandbox environments. The key of the dict specifies the name of the file to write. By default files are written into the default sandbox environment but they can optionally include a prefix indicating that they should be written into a specific sandbox environment (e.g. \"victim:flag.txt\": \"flag.txt\").\nThe value of the dict can be either the file contents, a file path, or a base64 encoded Data URL.\n\n\nScript\nIf there is a Sample setup bash script it will be executed within the default sandbox environment after any Sample files are copied into the environment. The setup field can be either the script contents, a file path containing the script, or a base64 encoded Data URL.\n\n\n\nDocker Configuration\nBefore using Docker sandbox environments, please be sure to install Docker Engine (version 24.0.7 or greater).\nYou can use the Docker sandbox enviornment without any special configuration, however most commonly you’ll provide explicit configuration via either a Dockerfile or a Docker Compose configuration file (compose.yaml).\nHere is how Docker sandbox environments are created based on the presence of Dockerfile and/or compose.yml in the task directory:\n\n\n\nConfig Files\nBehavior\n\n\n\n\nNone\nCreates a sandbox environment based on the official python:3.12-bookworm image.\n\n\nDockerfile\nCreates a sandbox environment by building the image.\n\n\ncompose.yaml\nCreates sandbox environment(s) based on compose.yaml.\n\n\n\nProviding a compose.yaml is not strictly required, as Inspect will automatically generate one as needed. Note that the automatically generated compose file will restrict internet access by default, so if your evaluations require this you’ll need to provide your own compose.yaml file.\nHere’s an example of a compose.yaml file that sets container resource limits and isolates it from all network interactions including internet access:\n\n\ncompose.yaml\n\nservices:\n  default: \n    build: .\n    init: true\n    command: tail -f /dev/null\n    cpus: 1.0\n    mem_limit: 0.5gb\n    network_mode: none\n\nThe init: true entry enables the container to respond to shutdown requests. The command is provided to prevent the container from exiting after it starts.\nHere is what a simple compose.yaml would look like for a local pre-built image named ctf-agent-environment (resource and network limits excluded for brevity):\n\n\ncompose.yaml\n\nservices:\n  default: \n    image: ctf-agent-environment\n    x-local: true\n    init: true\n    command: tail -f /dev/null\n\nThe ctf-agent-environment is not an image that exists on a remote registry, so we add the x-local: true to indicate that it should not be pulled. If local images are tagged, they also will not be pulled by default (so x-local: true is not required). For example:\n\n\ncompose.yaml\n\nservices:\n  default: \n    image: ctf-agent-environment:1.0.0\n    init: true\n    command: tail -f /dev/null\n\nIf we are using an image from a remote registry we similarly don’t need to include x-local:\n\n\ncompose.yaml\n\nservices:\n  default:\n    image: python:3.12-bookworm\n    init: true\n    command: tail -f /dev/null\n\nSee the Docker Compose documentation for information on all available container options.\n\nMultiple Environments\nIn some cases you may want to create multiple sandbox environments (e.g. if one environment has complex dependencies that conflict with the dependencies of other environments). To do this specify multiple named services:\n\n\ncompose.yaml\n\nservices:\n  default:\n    image: ctf-agent-environment\n    x-local: true\n    init: true\n    cpus: 1.0\n    mem_limit: 0.5gb\n  victim:\n    image: ctf-victim-environment\n    x-local: true\n    init: true\n    cpus: 1.0\n    mem_limit: 1gb\n\nThe first environment listed is the “default” environment, and can be accessed from within a tool with a normal call to sandbox(). Other environments would be accessed by name, for example:\nsandbox()          # default sandbox environment\nsandbox(\"victim\")  # named sandbox environment\n\n\n\n\n\n\nNote\n\n\n\nIf you define multiple sandbox environments you are required to name one of them “default” so that Inspect knows which environment to resolve for calls to sandbox() without an argument. Alternatively, you can add the x-default key to a service not named “default” to designate it as the default sandbox.\n\n\n\n\nInfrastructure\nNote that in many cases you’ll want to provision additional infrastructure (e.g. other hosts or volumes). For example, here we define an additional container (“writer”) as well as a volume shared between the default container and the writer container:\nservices:\n  default: \n    image: ctf-agent-environment\n    x-local: true\n    init: true\n    volumes:\n      - ctf-challenge-volume:/shared-data\n    \n  writer:\n    image: ctf-challenge-writer\n    x-local: true\n    init: true\n    volumes:\n      - ctf-challenge-volume:/shared-data\nvolumes:\n  ctf-challenge-volume:\nSee the documentation on Docker Compose files for information on their full schema and feature set.\n\n\nSample Metadata\nYou might want to interpolate Sample metadata into your Docker compose files. You can do this using the standard compose environment variable syntax, where any metadata in the Sample is made available with a SAMPLE_METADATA_ prefix. For example, you might have a per-sample memory limit (with a default value of 0.5gb if unspecified):\nservices:\n  default:\n    image: ctf-agent-environment\n    x-local: true\n    init: true\n    cpus: 1.0\n    mem_limit: ${SAMPLE_METDATA_MEMORY_LIMIT-0.5gb}\nNote that - suffix that provides the default value of 0.5gb. This is important to include so that when the compose file is read without the context of a Sample (for example, when pulling/building images at startup) that a default value is available.\n\n\n\nEnvironment Cleanup\nWhen a task is completed, Inspect will automatically cleanup resources associated with the sandbox environment (e.g. containers, images, and networks). If for any reason resources are not cleaned up (e.g. if the cleanup itself is interrupted via Ctrl+C) you can globally cleanup all environments with the inspect sandbox cleanup command. For example, here we cleanup all environments associated with the docker provider:\n$ inspect sandbox cleanup docker\nIn some cases you may prefer not to cleanup environments. For example, you might want to examine their state interactively from the shell in order to debug an agent. Use the --no-sandbox-cleanup argument to do this:\n$ inspect eval ctf.py --no-sandbox-cleanup\nYou can also do this when using eval():\neval(\"ctf.py\", sandbox_cleanup = False)\nWhen you do this, you’ll see a list of sandbox containers printed out which includes the ID of each container. You can then use this ID to get a shell inside one of the containers:\ndocker exec -it inspect-intercode_ctf-ipg9tbviycpvlgwja5anyvn-default-1 bash\nWhen you no longer need the environments, you can clean them up either all at once or individually:\n# cleanup all environments\ninspect sandbox cleanup docker\n\n# cleanup single environment\ninspect sandbox cleanup docker inspect-intercode_ctf-ipg9tbviycpvlgwja5anyvn\n\n\nResource Management\nCreating and executing code within Docker containers can be expensive both in terms of memory and CPU utilisation. Inspect provides some automatic resource management to keep usage reasonable in the default case. This section describes that behaviour as well as how you can tune it for your use-cases.\n\nRunning Containers\nAs described above, each Sample is provisioned its own container. The number of running containers for an evaluation is therefore determined by the max_samples option (which is by default set to max_connections, typically 10 unless overridden).\nUse max_samples to dial up or down the number of containers running at any given time. Note that a running container does not necessarily use CPU resources unless it has active background processes.\nUse a compose.yaml file to limit the resources consumed by each running container. For example:\n\n\ncompose.yaml\n\nservices:\n  default: \n    image: ctf-agent-environment\n    x-local: true\n    command: tail -f /dev/null\n    cpus: 1.0\n    mem_limit: 0.5gb\n\n\n\nConcurrent Execution\nThe SandboxEnvironment.exec() method runs a command within a sandbox environment, typically consuming CPU resources. To protect against overwhelming the system’s CPUs, the implementation of exec() uses Inspect’s subprocess() function, which automatically limits concurrent child processes to the number of CPUs on your system (os.cpu_count()).\nYou can change the number of permitted concurrent subprocess executions using the max_subprocesses option. You might do this for example if you know that your exec() commands tend to use multiple CPU cores and thus should be executed with less concurrency.\n\n\n\nTroubleshooting\nYou can view more detailed logging around the creation and use of sandbox environments by using the sandbox log level. For example:\n$ inspect eval ctf.py --log-level sandbox\nThe sandbox log level is just above warning (so it will not show http or debug level messages).",
     "crumbs": [
       "Components",
       "<span class='chapter-number'>9</span>  <span class='chapter-title'>Agents</span>"
@@ -1240,7 +1251,7 @@
     "href": "agents-api.html#tool-use",
     "title": "Agents API",
     "section": "Tool Use",
-    "text": "Tool Use\n\nCustom Loop\nThe higher level generate() function passed to solvers includes a built-in tool use loop—when the model calls a tool, Inspect calls the underlying Python function and reports the result to the model, proceeding until the model stops calling tools. However, for more advanced agents you may want to intervene in the tool use loop in a variety of ways:\n\nRedirect the model to another trajectory if its not on a productive course.\nExercise more fine grained control over which, when, and how many tool calls are made, and how tool calling errors are handled.\nHave multiple generate() passes each with a distinct set of tools.\n\nTo do this, create a solver that emulates the default tool use loop and provides additional customisation as required. Here is the code at the core of Inspect tool use in generate():\n# call model\nmodel = get_model()\noutput = await model.generate(state.messages, state.tools)\n\n# update state with output\nstate.output = output\nstate.messages.append(output.message)\n\n# call tools and update state\nstate.messages.extend(call_tools(output.message, state.tools))\nThis does everything that default generate() does, save for an outer loop to continue calling the mode as long as it continues calling tools. This is a complete solver agent that implements the outer loop:\n@solver\ndef agent_loop():\n    async def solve(state: TaskState, generate: Generate):\n        model = get_model()\n        while True:\n            # call model\n            output = await model.generate(state.messages, state.tools)\n\n            # update state\n            state.output = output\n            state.messages.append(output.message)\n\n            # make tool calls or terminate if there are none\n            if output.message.tool_calls:\n                state.messages.extend(call_tools(output.message, state.tools))\n            else:\n                break\n\n        return state\n\n    return solve\nYou can imagine several ways you might want to customise this loop:\n\nAdding another termination condition for the output satisfying some criteria.\nUrging the model to keep going after it decides to stop calling tools.\nExamining and possibly filtering the tool calls before invoking call_tools()\nAdding a critique / reflection step between tool calling and generate.\nDeep copying the TaskState and exploring several trajectories.\n\n\n\nStop Reasons\nOne thing that a custom scaffold may do is try to recover from various conditions that cause the model to stop generating. You can find the reason that generation stopped in the stop_reason field of ModelOutput. For example:\noutput = await model.generate(state.messages, state.tools)\nif output.stop_reason == \"model_length\":\n    # do something to recover from context window overflow\nHere are the possible values for StopReason :\n\n\n\n\n\n\n\nStop Reason\nDescription\n\n\n\n\nstop\nThe model hit a natural stop point or a provided stop sequence\n\n\nmax_tokens\nThe maximum number of tokens specified in the request was reached.\n\n\nmodel_length\nThe model’s context length was exceeded.\n\n\ntool_calls\nThe model called a tool\n\n\ncontent_filter\nContent was omitted due to a content filter.\n\n\nunknown\nUnknown (e.g. unexpected runtime error)\n\n\n\n\n\nError Handling\nBy default expected errors (e.g. file not found, insufficient, permission , timeouts, etc.) are forwarded to the model for possible recovery. If you would like to intervene in the default error handling then rather than immediately appending the list of assistant messages returned from call_tools() to state.messages (as shown above), check the error property of these messages (which will be None in the case of no error) and proceed accordingly.\nNote that you don’t necessarily even need to structure the agent using a loop. For example, you might have an inner function implementing the loop, while an outer function dynamically swaps out what tools are available. For example, imagine the above was implemented in a function named tool_use_loop(), you might have outer function like this:\n# first pass w/ core tools\nstate.tools = [decompile(), dissasemble(), bash()]\nstate = await tool_use_loop(state)\n\n# second pass w/ prompt and python tool only\nstate.tools = [python()]\nstate = await tool_use_loop(state)\nTaken together these APIs enable you to build a custom version of generate() with whatever structure and logic you need.\n\n\nTool Descriptions\nIn some cases you may want to change the default descriptions created by a tool author—for example you might want to provide better disambiguation between multiple similar tools that are used together. You also might have need to do this during development of tools (to explore what descriptions are most useful to models).\nThe tool_with() function enables you to take any tool and adapt its name and/or descriptions. For example:\nfrom inspect_ai.tool import tool_with\n\nmy_add = tool_with(\n  tool=add(), \n  name=\"my_add\",\n  description=\"a tool to add numbers\", \n  parameters={\n    \"x\": \"the x argument\",\n    \"y\": \"the y argument\"\n  })\nYou need not provide all of the parameters shown above, for example here are some examples where we modify just the main tool description or only a single parameter:\nmy_add = tool_with(add(), description=\"a tool to add numbers\")\nmy_add = tool_with(add(), parameters={\"x\": \"the x argument\"})\nNote that the tool_with() function returns a copy of the passed tool with modified descriptions (the passed tool retains its original descriptions)..",
+    "text": "Tool Use\n\nCustom Loop\nThe higher level generate() function passed to solvers includes a built-in tool use loop—when the model calls a tool, Inspect calls the underlying Python function and reports the result to the model, proceeding until the model stops calling tools. However, for more advanced agents you may want to intervene in the tool use loop in a variety of ways:\n\nRedirect the model to another trajectory if its not on a productive course.\nExercise more fine grained control over which, when, and how many tool calls are made, and how tool calling errors are handled.\nHave multiple generate() passes each with a distinct set of tools.\n\nTo do this, create a solver that emulates the default tool use loop and provides additional customisation as required. Here is the code at the core of Inspect tool use in generate():\n# call model\nmodel = get_model()\noutput = await model.generate(state.messages, state.tools)\n\n# update state with output\nstate.output = output\nstate.messages.append(output.message)\n\n# call tools and update state\nstate.messages.extend(call_tools(output.message, state.tools))\nThis does everything that default generate() does, save for an outer loop to continue calling the mode as long as it continues calling tools. This is a complete solver agent that implements the outer loop:\n@solver\ndef agent_loop():\n    async def solve(state: TaskState, generate: Generate):\n        model = get_model()\n        while not state.completed:\n            # call model\n            output = await model.generate(state.messages, state.tools)\n\n            # update state\n            state.output = output\n            state.messages.append(output.message)\n\n            # make tool calls or terminate if there are none\n            if output.message.tool_calls:\n                state.messages.extend(call_tools(output.message, state.tools))\n            else:\n                break\n\n        return state\n\n    return solve\nThe state.completed flag is automatically set to False if max_messages for the task is exceeded, so we check it at the top of the loop.\nYou can imagine several ways you might want to customise this loop:\n\nAdding another termination condition for the output satisfying some criteria.\nUrging the model to keep going after it decides to stop calling tools.\nExamining and possibly filtering the tool calls before invoking call_tools()\nAdding a critique / reflection step between tool calling and generate.\nDeep copying the TaskState and exploring several trajectories.\n\n\n\nStop Reasons\nOne thing that a custom scaffold may do is try to recover from various conditions that cause the model to stop generating. You can find the reason that generation stopped in the stop_reason field of ModelOutput. For example:\noutput = await model.generate(state.messages, state.tools)\nif output.stop_reason == \"model_length\":\n    # do something to recover from context window overflow\nHere are the possible values for StopReason :\n\n\n\n\n\n\n\nStop Reason\nDescription\n\n\n\n\nstop\nThe model hit a natural stop point or a provided stop sequence\n\n\nmax_tokens\nThe maximum number of tokens specified in the request was reached.\n\n\nmodel_length\nThe model’s context length was exceeded.\n\n\ntool_calls\nThe model called a tool\n\n\ncontent_filter\nContent was omitted due to a content filter.\n\n\nunknown\nUnknown (e.g. unexpected runtime error)\n\n\n\n\n\nError Handling\nBy default expected errors (e.g. file not found, insufficient, permission , timeouts, etc.) are forwarded to the model for possible recovery. If you would like to intervene in the default error handling then rather than immediately appending the list of assistant messages returned from call_tools() to state.messages (as shown above), check the error property of these messages (which will be None in the case of no error) and proceed accordingly.\nNote that you don’t necessarily even need to structure the agent using a loop. For example, you might have an inner function implementing the loop, while an outer function dynamically swaps out what tools are available. For example, imagine the above was implemented in a function named tool_use_loop(), you might have outer function like this:\n# first pass w/ core tools\nstate.tools = [decompile(), dissasemble(), bash()]\nstate = await tool_use_loop(state)\n\n# second pass w/ prompt and python tool only\nstate.tools = [python()]\nstate = await tool_use_loop(state)\nTaken together these APIs enable you to build a custom version of generate() with whatever structure and logic you need.\n\n\nTool Descriptions\nIn some cases you may want to change the default descriptions created by a tool author—for example you might want to provide better disambiguation between multiple similar tools that are used together. You also might have need to do this during development of tools (to explore what descriptions are most useful to models).\nThe tool_with() function enables you to take any tool and adapt its name and/or descriptions. For example:\nfrom inspect_ai.tool import tool_with\n\nmy_add = tool_with(\n  tool=add(), \n  name=\"my_add\",\n  description=\"a tool to add numbers\", \n  parameters={\n    \"x\": \"the x argument\",\n    \"y\": \"the y argument\"\n  })\nYou need not provide all of the parameters shown above, for example here are some examples where we modify just the main tool description or only a single parameter:\nmy_add = tool_with(add(), description=\"a tool to add numbers\")\nmy_add = tool_with(add(), parameters={\"x\": \"the x argument\"})\nNote that the tool_with() function returns a copy of the passed tool with modified descriptions (the passed tool retains its original descriptions)..",
     "crumbs": [
       "Advanced",
       "<span class='chapter-number'>17</span>  <span class='chapter-title'>Agents API</span>"
diff --git a/sitemap.xml b/sitemap.xml
index 2a4c78f1f..e2e765256 100644
--- a/sitemap.xml
+++ b/sitemap.xml
@@ -2,11 +2,11 @@
 <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
   <url>
     <loc>https://inspect.ai-safety-institute.org.uk/index.html</loc>
-    <lastmod>2024-10-01T19:48:21.698Z</lastmod>
+    <lastmod>2024-10-02T17:04:20.184Z</lastmod>
   </url>
   <url>
     <loc>https://inspect.ai-safety-institute.org.uk/tutorial.html</loc>
-    <lastmod>2024-10-02T13:33:25.803Z</lastmod>
+    <lastmod>2024-10-02T17:04:20.188Z</lastmod>
   </url>
   <url>
     <loc>https://inspect.ai-safety-institute.org.uk/workflow.html</loc>
@@ -30,11 +30,11 @@
   </url>
   <url>
     <loc>https://inspect.ai-safety-institute.org.uk/tools.html</loc>
-    <lastmod>2024-10-01T14:48:34.664Z</lastmod>
+    <lastmod>2024-10-03T13:53:29.232Z</lastmod>
   </url>
   <url>
     <loc>https://inspect.ai-safety-institute.org.uk/agents.html</loc>
-    <lastmod>2024-10-02T13:33:25.803Z</lastmod>
+    <lastmod>2024-10-02T17:04:20.184Z</lastmod>
   </url>
   <url>
     <loc>https://inspect.ai-safety-institute.org.uk/scorers.html</loc>
diff --git a/tools.html b/tools.html
index f0599c537..7f817aae9 100644
--- a/tools.html
+++ b/tools.html
@@ -315,6 +315,7 @@ <h2 id="toc-title">Table of contents</h2>
   <li><a href="#sandboxing" id="toc-sandboxing" class="nav-link" data-scroll-target="#sandboxing">Sandboxing</a></li>
   <li><a href="#tool-choice" id="toc-tool-choice" class="nav-link" data-scroll-target="#tool-choice">Tool Choice</a></li>
   <li><a href="#tool-descriptions" id="toc-tool-descriptions" class="nav-link" data-scroll-target="#tool-descriptions">Tool Descriptions</a></li>
+  <li><a href="#parallel-tool-calls" id="toc-parallel-tool-calls" class="nav-link" data-scroll-target="#parallel-tool-calls">Parallel Tool Calls</a></li>
   <li><a href="#sec-bash-and-python" id="toc-sec-bash-and-python" class="nav-link" data-scroll-target="#sec-bash-and-python">Bash and Python</a></li>
   <li><a href="#sec-web-browser" id="toc-sec-web-browser" class="nav-link" data-scroll-target="#sec-web-browser">Web Browser</a>
   <ul class="collapse">
@@ -598,49 +599,43 @@ <h2 class="anchored" data-anchor-id="tool-descriptions">Tool Descriptions</h2>
 <span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a>my_add <span class="op">=</span> tool_with(add(), parameters<span class="op">=</span>{<span class="st">"x"</span>: <span class="st">"the x argument"</span>})</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>Note that the <code>tool_with()</code> function returns a copy of the passed tool with modified descriptions (the passed tool retains its original descriptions).</p>
 </section>
+<section id="parallel-tool-calls" class="level2">
+<h2 class="anchored" data-anchor-id="parallel-tool-calls">Parallel Tool Calls</h2>
+<p>Some model APIs including OpenAI and Gemini support executing multiple tool calls in parallel. While this can provide a performance improvement, it might not be compatible with semantics of some tools (for example, if they manage some global state between calls).</p>
+<p>You can opt-out of parallel tool calling by adding <code>parallel=False</code> to the <code>@tool</code> decorator. For example, the built in web browsing tools do this as follows:</p>
+<div class="sourceCode" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="at">@tool</span>(parallel<span class="op">=</span><span class="va">False</span>)</span>
+<span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> web_browser_go() <span class="op">-&gt;</span> Tool:</span>
+<span id="cb12-3"><a href="#cb12-3" aria-hidden="true" tabindex="-1"></a>    ...</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</section>
 <section id="sec-bash-and-python" class="level2">
 <h2 class="anchored" data-anchor-id="sec-bash-and-python">Bash and Python</h2>
 <p>The <code>bash()</code> and <code>python()</code> tools enable execution of arbitrary shell commands and Python code, respectively. These tools require the use of a <a href="agents.html#sec-sandbox-environments">Sandbox Environment</a> for the execution of untrusted code. For example, here is how you might use them in an evaluation where the model is asked to write code in order to solve capture the flag (CTF) challenges:</p>
-<div class="sourceCode" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> inspect_ai.tool <span class="im">import</span> bash, python</span>
-<span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb12-3"><a href="#cb12-3" aria-hidden="true" tabindex="-1"></a>CMD_TIMEOUT <span class="op">=</span> <span class="dv">180</span></span>
-<span id="cb12-4"><a href="#cb12-4" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb12-5"><a href="#cb12-5" aria-hidden="true" tabindex="-1"></a><span class="at">@task</span></span>
-<span id="cb12-6"><a href="#cb12-6" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> intercode_ctf():</span>
-<span id="cb12-7"><a href="#cb12-7" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> Task(</span>
-<span id="cb12-8"><a href="#cb12-8" aria-hidden="true" tabindex="-1"></a>        dataset<span class="op">=</span>read_dataset(),</span>
-<span id="cb12-9"><a href="#cb12-9" aria-hidden="true" tabindex="-1"></a>        solver<span class="op">=</span>[</span>
-<span id="cb12-10"><a href="#cb12-10" aria-hidden="true" tabindex="-1"></a>            system_message(<span class="st">"system.txt"</span>),</span>
-<span id="cb12-11"><a href="#cb12-11" aria-hidden="true" tabindex="-1"></a>            use_tools([</span>
-<span id="cb12-12"><a href="#cb12-12" aria-hidden="true" tabindex="-1"></a>                bash(CMD_TIMEOUT), </span>
-<span id="cb12-13"><a href="#cb12-13" aria-hidden="true" tabindex="-1"></a>                python(CMD_TIMEOUT)</span>
-<span id="cb12-14"><a href="#cb12-14" aria-hidden="true" tabindex="-1"></a>            ]),</span>
-<span id="cb12-15"><a href="#cb12-15" aria-hidden="true" tabindex="-1"></a>            generate(),</span>
-<span id="cb12-16"><a href="#cb12-16" aria-hidden="true" tabindex="-1"></a>        ],</span>
-<span id="cb12-17"><a href="#cb12-17" aria-hidden="true" tabindex="-1"></a>        scorer<span class="op">=</span>includes(),</span>
-<span id="cb12-18"><a href="#cb12-18" aria-hidden="true" tabindex="-1"></a>        max_messages<span class="op">=</span><span class="dv">30</span>,</span>
-<span id="cb12-19"><a href="#cb12-19" aria-hidden="true" tabindex="-1"></a>        sandbox<span class="op">=</span><span class="st">"docker"</span>,</span>
-<span id="cb12-20"><a href="#cb12-20" aria-hidden="true" tabindex="-1"></a>    )</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> inspect_ai.tool <span class="im">import</span> bash, python</span>
+<span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a>CMD_TIMEOUT <span class="op">=</span> <span class="dv">180</span></span>
+<span id="cb13-4"><a href="#cb13-4" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb13-5"><a href="#cb13-5" aria-hidden="true" tabindex="-1"></a><span class="at">@task</span></span>
+<span id="cb13-6"><a href="#cb13-6" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> intercode_ctf():</span>
+<span id="cb13-7"><a href="#cb13-7" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> Task(</span>
+<span id="cb13-8"><a href="#cb13-8" aria-hidden="true" tabindex="-1"></a>        dataset<span class="op">=</span>read_dataset(),</span>
+<span id="cb13-9"><a href="#cb13-9" aria-hidden="true" tabindex="-1"></a>        solver<span class="op">=</span>[</span>
+<span id="cb13-10"><a href="#cb13-10" aria-hidden="true" tabindex="-1"></a>            system_message(<span class="st">"system.txt"</span>),</span>
+<span id="cb13-11"><a href="#cb13-11" aria-hidden="true" tabindex="-1"></a>            use_tools([</span>
+<span id="cb13-12"><a href="#cb13-12" aria-hidden="true" tabindex="-1"></a>                bash(CMD_TIMEOUT), </span>
+<span id="cb13-13"><a href="#cb13-13" aria-hidden="true" tabindex="-1"></a>                python(CMD_TIMEOUT)</span>
+<span id="cb13-14"><a href="#cb13-14" aria-hidden="true" tabindex="-1"></a>            ]),</span>
+<span id="cb13-15"><a href="#cb13-15" aria-hidden="true" tabindex="-1"></a>            generate(),</span>
+<span id="cb13-16"><a href="#cb13-16" aria-hidden="true" tabindex="-1"></a>        ],</span>
+<span id="cb13-17"><a href="#cb13-17" aria-hidden="true" tabindex="-1"></a>        scorer<span class="op">=</span>includes(),</span>
+<span id="cb13-18"><a href="#cb13-18" aria-hidden="true" tabindex="-1"></a>        max_messages<span class="op">=</span><span class="dv">30</span>,</span>
+<span id="cb13-19"><a href="#cb13-19" aria-hidden="true" tabindex="-1"></a>        sandbox<span class="op">=</span><span class="st">"docker"</span>,</span>
+<span id="cb13-20"><a href="#cb13-20" aria-hidden="true" tabindex="-1"></a>    )</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>We specify a 3-minute timeout for execution of the bash and python tools to ensure that they don’t perform extremely long running operations.</p>
 <p>See the <a href="agents.html">Agents</a> section for more details on how to build evaluations that allow models to take arbitrary actions over a longer time horizon.</p>
 </section>
 <section id="sec-web-browser" class="level2">
 <h2 class="anchored" data-anchor-id="sec-web-browser">Web Browser</h2>
-<div class="callout callout-style-default callout-note callout-titled" data-apperance="&quot;simple:">
-<div class="callout-header d-flex align-content-center">
-<div class="callout-icon-container">
-<i class="callout-icon"></i>
-</div>
-<div class="callout-title-container flex-fill">
-Note
-</div>
-</div>
-<div class="callout-body-container callout-body">
-<p>Note that the web browser tool described below is currently only available in the development version of Inspect. You can install the development version with:</p>
-<div class="sourceCode" id="cb13"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="ex">pip</span> install git+https://github.com/UKGovernmentBEIS/inspect_ai</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-</div>
-</div>
-<p>The web browser tool provides models with the ability to browse the web using a headless Chromium browser. Navigation, history, and mouse/keyboard interactions are all supported.</p>
+<p>The web browser tools provids models with the ability to browse the web using a headless Chromium browser. Navigation, history, and mouse/keyboard interactions are all supported.</p>
 <section id="configuration" class="level3">
 <h3 class="anchored" data-anchor-id="configuration">Configuration</h3>
 <p>Under the hood, the web browser is an instance of <a href="https://www.chromium.org/chromium-projects/">Chromium</a> orchestrated by <a href="https://playwright.dev/">Playwright</a>, and runs in its own dedicated Docker container. Therefore, to use the web_browser tool you should reference the <code>inspect_web_browser</code> Docker image provided with Inspect in your <code>compose.yaml</code>. For example, here we use it as our default image:</p>
@@ -671,24 +666,24 @@ <h3 class="anchored" data-anchor-id="configuration">Configuration</h3>
 </section>
 <section id="task" class="level3">
 <h3 class="anchored" data-anchor-id="task">Task</h3>
-<p>A task configured to use the web browser tool might look like this:</p>
+<p>A task configured to use the web browser tools might look like this:</p>
 <div class="sourceCode" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> inspect_ai <span class="im">import</span> Task, task</span>
 <span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> inspect_ai.scorer <span class="im">import</span> match</span>
 <span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> inspect_ai.solver <span class="im">import</span> generate, use_tools</span>
-<span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> inspect_ai.tool <span class="im">import</span> bash, web_browser_tools</span>
+<span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> inspect_ai.tool <span class="im">import</span> bash, python, web_browser</span>
 <span id="cb16-5"><a href="#cb16-5" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb16-6"><a href="#cb16-6" aria-hidden="true" tabindex="-1"></a><span class="at">@task</span></span>
 <span id="cb16-7"><a href="#cb16-7" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> browser_task():</span>
 <span id="cb16-8"><a href="#cb16-8" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> Task(</span>
 <span id="cb16-9"><a href="#cb16-9" aria-hidden="true" tabindex="-1"></a>        dataset<span class="op">=</span>read_dataset(),</span>
 <span id="cb16-10"><a href="#cb16-10" aria-hidden="true" tabindex="-1"></a>        solver<span class="op">=</span>[</span>
-<span id="cb16-11"><a href="#cb16-11" aria-hidden="true" tabindex="-1"></a>            use_tools([bash()] <span class="op">+</span> web_browser_tools()),</span>
+<span id="cb16-11"><a href="#cb16-11" aria-hidden="true" tabindex="-1"></a>            use_tools([bash(), python()] <span class="op">+</span> web_browser()),</span>
 <span id="cb16-12"><a href="#cb16-12" aria-hidden="true" tabindex="-1"></a>            generate(),</span>
 <span id="cb16-13"><a href="#cb16-13" aria-hidden="true" tabindex="-1"></a>        ],</span>
 <span id="cb16-14"><a href="#cb16-14" aria-hidden="true" tabindex="-1"></a>        scorer<span class="op">=</span>match(),</span>
 <span id="cb16-15"><a href="#cb16-15" aria-hidden="true" tabindex="-1"></a>        sandbox<span class="op">=</span>(<span class="st">"docker"</span>, <span class="st">"compose.yaml"</span>),</span>
 <span id="cb16-16"><a href="#cb16-16" aria-hidden="true" tabindex="-1"></a>    )</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-<p>Note that we pass <code>web_browser_tools()</code> to <code>use_tools()</code>, which provides a list of web browsing tools (e.g.&nbsp;<code>web_browser_go()</code>, <code>web_browser_click()</code>, etc.).</p>
+<p>Note that unlike some other tool functions like <code>bash()</code>, the <code>web_browser()</code> function returns a list of tools. Therefore, we concatenate it with a list of the other tools we are using in the call to <code>use_tools()</code>.</p>
 </section>
 <section id="browsing" class="level3">
 <h3 class="anchored" data-anchor-id="browsing">Browsing</h3>
@@ -706,15 +701,23 @@ <h3 class="anchored" data-anchor-id="browsing">Browsing</h3>
 </thead>
 <tbody>
 <tr class="odd">
-<td><code>web_browser_go()</code></td>
+<td><code>web_browser_go(url)</code></td>
 <td>Navigate the web browser to a URL.</td>
 </tr>
 <tr class="even">
-<td><code>web_browser_click()</code></td>
+<td><code>web_browser_click(element_id)</code></td>
 <td>Click an element on the page currently displayed by the web browser.</td>
 </tr>
 <tr class="odd">
-<td><code>web_browser_scroll()</code></td>
+<td><code>web_browser_type(element_id)</code></td>
+<td>Type text into an input on a web browser page.</td>
+</tr>
+<tr class="even">
+<td><code>web_browser_type_submit(element_id, text)</code></td>
+<td>Type text into a form input on a web browser page and press ENTER to submit the form.</td>
+</tr>
+<tr class="odd">
+<td><code>web_browser_scroll(direction)</code></td>
 <td>Scroll the web browser up or down by one page.</td>
 </tr>
 <tr class="even">
@@ -729,17 +732,8 @@ <h3 class="anchored" data-anchor-id="browsing">Browsing</h3>
 <td><code>web_browser_refresh()</code></td>
 <td>Refresh the current page of the web browser.</td>
 </tr>
-<tr class="odd">
-<td><code>web_browser_type()</code></td>
-<td>Type text into an input on a web browser page.</td>
-</tr>
-<tr class="even">
-<td><code>web_browser_type_submit()</code></td>
-<td>Type text into a form input on a web browser page and press ENTER to submit the form.</td>
-</tr>
 </tbody>
 </table>
-<p>If you like, you can enable a subset of these tools rather than calling <code>web_browser_tools()</code> to use all of them.</p>
 <p>The return value of each of these tools is a <a href="https://web.dev/articles/the-accessibility-tree">web accessibility tree</a> for the page, which provides a clean view of the content, links, and form fields available on the page (you can look at the accessibility tree for any web page using <a href="https://developer.chrome.com/blog/full-accessibility-tree">Chrome Developer Tools</a>).</p>
 </section>
 <section id="sec-custom-image" class="level3">
@@ -759,6 +753,7 @@ <h3 class="anchored" data-anchor-id="sec-custom-image">Custom Images</h3>
 <span id="cb17-11"><a href="#cb17-11" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb17-12"><a href="#cb17-12" aria-hidden="true" tabindex="-1"></a><span class="co"># Run the server</span></span>
 <span id="cb17-13"><a href="#cb17-13" aria-hidden="true" tabindex="-1"></a><span class="kw">CMD</span> [<span class="st">"python3"</span>, <span class="st">"/app/web_browser/web_server.py"</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Note that all of the Python files in the <a href="https://github.com/UKGovernmentBEIS/inspect_ai/blob/main/src/inspect_ai/tool/_tools/_web_browser/_resources/">_resources</a> directory alongside the <code>Dockerfile</code> need to be available for copying when building the container.</p>
 </section>
 </section>
 <section id="sec-web-search" class="level2">
diff --git a/tutorial.html b/tutorial.html
index a10f5c8c4..bd6b4fd28 100644
--- a/tutorial.html
+++ b/tutorial.html
@@ -442,7 +442,7 @@ <h2 class="anchored" data-anchor-id="sec-security-guide">Security Guide</h2>
 <section id="setup" class="level3 unlisted">
 <h3 class="unlisted anchored" data-anchor-id="setup">Setup</h3>
 <p>We’ll start by importing the functions we need from Inspect and defining a system message that orients the model to its role as a computer security expert.</p>
-<div id="bb8f6cfe" class="cell">
+<div id="dfe55147" class="cell">
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> inspect_ai <span class="im">import</span> Task, <span class="bu">eval</span>, task</span>
 <span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> inspect_ai.dataset <span class="im">import</span> csv_dataset</span>
 <span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> inspect_ai.scorer <span class="im">import</span> model_graded_fact</span>
@@ -459,7 +459,7 @@ <h3 class="unlisted anchored" data-anchor-id="setup">Setup</h3>
 <section id="eval" class="level3 unlisted">
 <h3 class="unlisted anchored" data-anchor-id="eval">Eval</h3>
 <p>Discerning whether the correct security guidance was provided by the model might provide difficult using only text matching algorithms. Here we use a model to read the response and assess the quality of the answer.</p>
-<div id="e374b0c0" class="cell">
+<div id="87b47c24" class="cell">
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="at">@task</span></span>
 <span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> security_guide():</span>
 <span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> Task(</span>
@@ -489,7 +489,7 @@ <h2 class="anchored" data-anchor-id="sec-hellaswag">HellaSwag</h2>
 <section id="setup-1" class="level3 unlisted">
 <h3 class="unlisted anchored" data-anchor-id="setup-1">Setup</h3>
 <p>We’ll start by importing the functions we need from Inspect, defining a system message, and writing a function to convert dataset records to samples (we need to do this to convert the index-based label in the dataset to a letter).</p>
-<div id="3f4a19c7" class="cell">
+<div id="d73eebb0" class="cell">
 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> inspect_ai <span class="im">import</span> Task, <span class="bu">eval</span>, task</span>
 <span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> inspect_ai.dataset <span class="im">import</span> Sample, hf_dataset</span>
 <span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> inspect_ai.scorer <span class="im">import</span> choice</span>
@@ -514,7 +514,7 @@ <h3 class="unlisted anchored" data-anchor-id="setup-1">Setup</h3>
 <section id="eval-1" class="level3 unlisted">
 <h3 class="unlisted anchored" data-anchor-id="eval-1">Eval</h3>
 <p>We’ll load the dataset from <a href="https://huggingface.co/datasets/Rowan/hellaswag">HuggingFace</a> using the <code>hf_dataset()</code> function. We’ll draw data from the validation split, and use the <code>record_to_sample()</code> function to parse the records (we’ll also pass <code>trust=True</code> to indicate that we are okay with Hugging Face executing the dataset loading code provided by hellaswag):</p>
-<div id="a7b79da4" class="cell">
+<div id="9c7e6985" class="cell">
 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="at">@task</span></span>
 <span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> hellaswag():</span>
 <span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a>   </span>
@@ -574,7 +574,7 @@ <h3 class="unlisted anchored" data-anchor-id="setup-2">Setup</h3>
 <li><code>record_to_sample()</code> to convert raw records to samples. Note that we need a function rather than just mapping field names with a <code>FieldSpec</code> because the <strong>answer</strong> field in the dataset needs to be divided into reasoning and the actual answer (which appears at the very end after <code>####</code>).</li>
 <li><code>sample_to_fewshot()</code> to generate fewshot examples from samples.</li>
 </ol>
-<div id="280bd686" class="cell">
+<div id="8ae85ed2" class="cell">
 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> inspect_ai <span class="im">import</span> Task, task</span>
 <span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> inspect_ai.dataset <span class="im">import</span> Sample, hf_dataset</span>
 <span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> inspect_ai.scorer <span class="im">import</span> match</span>
@@ -621,7 +621,7 @@ <h3 class="unlisted anchored" data-anchor-id="setup-2">Setup</h3>
 <section id="eval-2" class="level3 unlisted">
 <h3 class="unlisted anchored" data-anchor-id="eval-2">Eval</h3>
 <p>We’ll load the dataset from <a href="https://huggingface.co/datasets/gsm8k">HuggingFace</a> using the <code>hf_dataset()</code> function. By default we use 10 fewshot examples, but the <code>fewshot</code> task arg can be used to turn this up, down, or off. The <code>fewshot_seed</code> is provided for stability of fewshot examples across runs.</p>
-<div id="32f0187f" class="cell">
+<div id="5fc6eb2e" class="cell">
 <div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="at">@task</span></span>
 <span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> gsm8k(fewshot<span class="op">=</span><span class="dv">10</span>, fewshot_seed<span class="op">=</span><span class="dv">42</span>):</span>
 <span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a>    <span class="co"># build solver list dynamically (may or may not be doing fewshot)</span></span>
@@ -688,7 +688,7 @@ <h2 class="anchored" data-anchor-id="sec-mathematics">Mathematics</h2>
 <section id="setup-3" class="level3 unlisted">
 <h3 class="unlisted anchored" data-anchor-id="setup-3">Setup</h3>
 <p>We’ll start by importing the functions we need from Inspect and defining a prompt that asks the model to reason step by step and respond with its answer on a line at the end. It also nudges the model not to enclose its answer in <code>\boxed</code>, a LaTeX command for displaying equations that models often use in math output.</p>
-<div id="0428d072" class="cell">
+<div id="e2aaf1c1" class="cell">
 <div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> re</span>
 <span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> inspect_ai <span class="im">import</span> Task, task</span>
@@ -726,7 +726,7 @@ <h3 class="unlisted anchored" data-anchor-id="setup-3">Setup</h3>
 <section id="eval-3" class="level3 unlisted">
 <h3 class="unlisted anchored" data-anchor-id="eval-3">Eval</h3>
 <p>Here is the basic setup for our eval. We <code>shuffle</code> the dataset so that when we use <code>--limit</code> to develop on smaller slices we get some variety of inputs and results:</p>
-<div id="115be14f" class="cell">
+<div id="609889b3" class="cell">
 <div class="sourceCode cell-code" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="at">@task</span></span>
 <span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> math(shuffle<span class="op">=</span><span class="va">True</span>):</span>
 <span id="cb12-3"><a href="#cb12-3" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> Task(</span>
@@ -749,7 +749,7 @@ <h3 class="unlisted anchored" data-anchor-id="eval-3">Eval</h3>
 <span id="cb12-20"><a href="#cb12-20" aria-hidden="true" tabindex="-1"></a>    )</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 <p>The heart of this eval isn’t in the task definition though, rather it’s in how we grade the output. Math expressions can be logically equivalent but not literally the same. Consequently, we’ll use a model to assess whether the output and the target are logically equivalent. the <code>expression_equivalence()</code> custom scorer implements this:</p>
-<div id="a57dd6cc" class="cell">
+<div id="bd2fb77c" class="cell">
 <div class="sourceCode cell-code" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="at">@scorer</span>(metrics<span class="op">=</span>[accuracy(), stderr()])</span>
 <span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> expression_equivalence():</span>
 <span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a>    <span class="cf">async</span> <span class="kw">def</span> score(state: TaskState, target: Target):</span>
@@ -830,7 +830,7 @@ <h2 class="anchored" data-anchor-id="sec-tool-use">Tool Use</h2>
 <section id="addition" class="level3 unlisted">
 <h3 class="unlisted anchored" data-anchor-id="addition">Addition</h3>
 <p>We’ll demonstrate with a simple tool that adds two numbers, using the <code>@tool</code> decorator to register it with the system:</p>
-<div id="005d1262" class="cell">
+<div id="eefa6dff" class="cell">
 <div class="sourceCode cell-code" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> inspect_ai <span class="im">import</span> Task, <span class="bu">eval</span>, task</span>
 <span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> inspect_ai.dataset <span class="im">import</span> Sample</span>
 <span id="cb17-3"><a href="#cb17-3" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> inspect_ai.scorer <span class="im">import</span> includes, match</span>
@@ -865,7 +865,7 @@ <h3 class="unlisted anchored" data-anchor-id="addition">Addition</h3>
 <span id="cb19-3"><a href="#cb19-3" aria-hidden="true" tabindex="-1"></a>    y: Second number to add.</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>Type annotations and descriptions are <em>required</em> for tool declarations so that the model can be informed which types to pass back to the tool function and what the purpose of each parameter is.</p>
 <p>Now that we’ve defined the tool, we can use it in an evaluation by passing it to the <code>use_tools()</code> function.</p>
-<div id="fff52da6" class="cell">
+<div id="01db849e" class="cell">
 <div class="sourceCode cell-code" id="cb20"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="at">@task</span></span>
 <span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> addition_problem():</span>
 <span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> Task(</span>
@@ -894,7 +894,7 @@ <h3 class="unlisted anchored" data-anchor-id="task">Task</h3>
 <ol start="2" type="1">
 <li><code>ctf_agent()</code>, which defines the agent’s solver. The solver consists principally of using <code>bash()</code> and <code>python()</code> tools in a loop until the flag is discovered. We’ll describe this function in more detail below.</li>
 </ol>
-<div id="8f61bd81" class="cell">
+<div id="e99fe194" class="cell">
 <div class="sourceCode cell-code" id="cb22"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> textwrap <span class="im">import</span> dedent</span>
 <span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> dataset <span class="im">import</span> read_dataset</span>
@@ -920,7 +920,7 @@ <h3 class="unlisted anchored" data-anchor-id="task">Task</h3>
 </div>
 <p>Note that we specify <code>sandbox="docker"</code> to ensure that code generated from the model is run in a secure <a href="agents.html#sec-sandbox-environments">sandbox environment</a>.</p>
 <p>Here is the definition of the agent:</p>
-<div id="872d1875" class="cell">
+<div id="c4c02088" class="cell">
 <div class="sourceCode cell-code" id="cb23"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="at">@solver</span></span>
 <span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> ctf_agent(max_attempts<span class="op">=</span><span class="dv">3</span>):</span>
 <span id="cb23-3"><a href="#cb23-3" aria-hidden="true" tabindex="-1"></a>    SYSTEM_MESSAGE <span class="op">=</span> dedent(<span class="st">"""</span></span>
diff --git a/vscode.html b/vscode.html
index 8509a33ba..ea3b8f7c0 100644
--- a/vscode.html
+++ b/vscode.html
@@ -913,7 +913,7 @@ <h2 class="anchored" data-anchor-id="troubleshooting">Troubleshooting</h2>
     </div>
   </div>
 </footer>
-<script>var lightboxQuarto = GLightbox({"closeEffect":"zoom","descPosition":"bottom","selector":".lightbox","openEffect":"zoom","loop":false});
+<script>var lightboxQuarto = GLightbox({"openEffect":"zoom","closeEffect":"zoom","loop":false,"selector":".lightbox","descPosition":"bottom"});
 (function() {
   let previousOnload = window.onload;
   window.onload = () => {
diff --git a/workflow.html b/workflow.html
index 1f44dca9f..f53f31356 100644
--- a/workflow.html
+++ b/workflow.html
@@ -1185,7 +1185,7 @@ <h2 class="anchored" data-anchor-id="eval-suites">Eval Suites</h2>
     </div>
   </div>
 </footer>
-<script>var lightboxQuarto = GLightbox({"loop":false,"closeEffect":"zoom","selector":".lightbox","descPosition":"bottom","openEffect":"zoom"});
+<script>var lightboxQuarto = GLightbox({"openEffect":"zoom","descPosition":"bottom","closeEffect":"zoom","selector":".lightbox","loop":false});
 (function() {
   let previousOnload = window.onload;
   window.onload = () => {