Skip to content

Commit

Permalink
Built site for gh-pages
Browse files Browse the repository at this point in the history
  • Loading branch information
aisi-inspect committed Jun 7, 2024
1 parent 065507e commit a641706
Show file tree
Hide file tree
Showing 11 changed files with 216 additions and 170 deletions.
2 changes: 1 addition & 1 deletion .nojekyll
Original file line number Diff line number Diff line change
@@ -1 +1 @@
fd46338c
f4cc5e9f
2 changes: 1 addition & 1 deletion eval-logs.html
Original file line number Diff line number Diff line change
Expand Up @@ -1066,7 +1066,7 @@ <h3 class="anchored" data-anchor-id="reading-logs">Reading Logs</h3>
</div>
</div>
</footer>
<script>var lightboxQuarto = GLightbox({"loop":false,"descPosition":"bottom","selector":".lightbox","openEffect":"zoom","closeEffect":"zoom"});
<script>var lightboxQuarto = GLightbox({"selector":".lightbox","loop":false,"descPosition":"bottom","openEffect":"zoom","closeEffect":"zoom"});
window.onload = () => {
lightboxQuarto.on('slide_before_load', (data) => {
const { slideIndex, slideNode, slideConfig, player, trigger } = data;
Expand Down
34 changes: 17 additions & 17 deletions examples.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion index.html
Original file line number Diff line number Diff line change
Expand Up @@ -983,7 +983,7 @@ <h2 class="anchored" data-anchor-id="learning-more">Learning More</h2>
</div>
</div>
</footer>
<script>var lightboxQuarto = GLightbox({"closeEffect":"zoom","loop":false,"selector":".lightbox","descPosition":"bottom","openEffect":"zoom"});
<script>var lightboxQuarto = GLightbox({"closeEffect":"zoom","selector":".lightbox","openEffect":"zoom","loop":false,"descPosition":"bottom"});
window.onload = () => {
lightboxQuarto.on('slide_before_load', (data) => {
const { slideIndex, slideNode, slideConfig, player, trigger } = data;
Expand Down
2 changes: 1 addition & 1 deletion log-viewer.html
Original file line number Diff line number Diff line change
Expand Up @@ -932,7 +932,7 @@ <h2 class="anchored" data-anchor-id="task-information">Task Information</h2>
</div>
</div>
</footer>
<script>var lightboxQuarto = GLightbox({"closeEffect":"zoom","descPosition":"bottom","openEffect":"zoom","loop":false,"selector":".lightbox"});
<script>var lightboxQuarto = GLightbox({"closeEffect":"zoom","selector":".lightbox","descPosition":"bottom","loop":false,"openEffect":"zoom"});
window.onload = () => {
lightboxQuarto.on('slide_before_load', (data) => {
const { slideIndex, slideNode, slideConfig, player, trigger } = data;
Expand Down
24 changes: 12 additions & 12 deletions search.json

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions sitemap.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
</url>
<url>
<loc>https://UKGovernmentBEIS.github.io/inspect_ai/workflow.html</loc>
<lastmod>2024-06-05T18:05:49.769Z</lastmod>
<lastmod>2024-06-06T12:50:00.585Z</lastmod>
</url>
<url>
<loc>https://UKGovernmentBEIS.github.io/inspect_ai/log-viewer.html</loc>
Expand All @@ -18,23 +18,23 @@
</url>
<url>
<loc>https://UKGovernmentBEIS.github.io/inspect_ai/examples.html</loc>
<lastmod>2024-06-05T22:33:41.429Z</lastmod>
<lastmod>2024-06-07T15:22:10.708Z</lastmod>
</url>
<url>
<loc>https://UKGovernmentBEIS.github.io/inspect_ai/solvers.html</loc>
<lastmod>2024-06-03T00:17:12.730Z</lastmod>
<lastmod>2024-06-06T14:39:44.506Z</lastmod>
</url>
<url>
<loc>https://UKGovernmentBEIS.github.io/inspect_ai/tools.html</loc>
<lastmod>2024-06-05T22:33:22.657Z</lastmod>
<lastmod>2024-06-07T13:37:22.173Z</lastmod>
</url>
<url>
<loc>https://UKGovernmentBEIS.github.io/inspect_ai/scorers.html</loc>
<lastmod>2024-05-29T12:50:39.127Z</lastmod>
</url>
<url>
<loc>https://UKGovernmentBEIS.github.io/inspect_ai/datasets.html</loc>
<lastmod>2024-06-05T18:13:14.695Z</lastmod>
<lastmod>2024-06-06T12:50:00.581Z</lastmod>
</url>
<url>
<loc>https://UKGovernmentBEIS.github.io/inspect_ai/models.html</loc>
Expand Down
25 changes: 12 additions & 13 deletions solvers.html
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ <h2 class="anchored" data-anchor-id="task-states">Task States</h2>
<h2 class="anchored" data-anchor-id="solver-function">Solver Function</h2>
<p>We’ve covered the role of solvers in the system, but what exactly are solvers technically? A solver is a Python function that takes a <code>TaskState</code> and <code>generate</code> function, and then transforms and returns the <code>TaskState</code> (the <code>generate</code> function may or may not be called depending on the solver).</p>
<div class="sourceCode" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="cf">async</span> <span class="kw">def</span> solve(state: TaskState, generate: Generate):</span>
<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a> <span class="co"># do something useful with state (possibly </span></span>
<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a> <span class="co"># do something useful with state (possibly</span></span>
<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a> <span class="co"># calling generate for more advanced solvers)</span></span>
<span id="cb3-4"><a href="#cb3-4" aria-hidden="true" tabindex="-1"></a> <span class="co"># then return the state</span></span>
<span id="cb3-5"><a href="#cb3-5" aria-hidden="true" tabindex="-1"></a> <span class="cf">return</span> state</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
Expand Down Expand Up @@ -402,15 +402,14 @@ <h2 class="anchored" data-anchor-id="built-in-solvers">Built-In Solvers</h2>
<h3 class="anchored" data-anchor-id="multiple-choice">Multiple Choice</h3>
<p>Here is the declaration for the <code>multiple_choice()</code> solver:</p>
<div class="sourceCode" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> multiple_choice(</span>
<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a> cot: <span class="bu">bool</span> <span class="op">=</span> <span class="va">False</span>,</span>
<span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a> template: <span class="bu">str</span> <span class="op">|</span> <span class="va">None</span> <span class="op">=</span> <span class="va">None</span>,</span>
<span id="cb5-4"><a href="#cb5-4" aria-hidden="true" tabindex="-1"></a> max_tokens: <span class="bu">int</span> <span class="op">|</span> <span class="va">None</span> <span class="op">=</span> <span class="va">None</span>,</span>
<span id="cb5-5"><a href="#cb5-5" aria-hidden="true" tabindex="-1"></a> shuffle: <span class="bu">bool</span> <span class="op">|</span> Random <span class="op">=</span> <span class="va">False</span>,</span>
<span id="cb5-6"><a href="#cb5-6" aria-hidden="true" tabindex="-1"></a> answer_pattern: <span class="bu">str</span> <span class="op">|</span> <span class="va">None</span> <span class="op">=</span> <span class="va">None</span>,</span>
<span id="cb5-7"><a href="#cb5-7" aria-hidden="true" tabindex="-1"></a>) <span class="op">-&gt;</span> Solver:</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>The <code>cot</code> parameter determines whether the default template employs chain of thought reasoning or not (defaults to <code>False</code>). Note that using chain of thought will be slower and use more tokens, so you should assess carefully whether your eval benefits from it or not. When <code>cot</code> is <code>False</code>, <code>max_tokens</code> defaults to 32; when <code>True</code>, it defaults to 1024.</p>
<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a> multiple_correct: <span class="bu">bool</span> <span class="op">=</span> <span class="va">False</span>,</span>
<span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a> shuffle: <span class="bu">bool</span> <span class="op">|</span> Random <span class="op">=</span> <span class="va">False</span>,</span>
<span id="cb5-4"><a href="#cb5-4" aria-hidden="true" tabindex="-1"></a> template: <span class="bu">str</span> <span class="op">|</span> <span class="va">None</span> <span class="op">=</span> <span class="va">None</span>,</span>
<span id="cb5-5"><a href="#cb5-5" aria-hidden="true" tabindex="-1"></a>) <span class="op">-&gt;</span> Solver:</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>By default, multiple choice questions have a single correct answer. Set <code>multiple_correct=True</code> if your target has defined multiple correct answers (for example, a <code>target</code> of <code>["B", "C"]</code>). In this case the model is prompted to provide one or more answers, and the sample is scored correct only if each of these answers are provided.</p>
<p>If you specify <code>shuffle=True</code>, then the order of the answers presented to the model will be randomised (this may or may not affect results, depending on the nature of the questions and the model being evaluated).</p>
<p>Generally when using the <code>multiple_choice()</code> solver you should pair it with the <code>answer("letter")</code> scorer.</p>
<p>Use <code>template</code> to provide an alternate prompt template (note that if you do this your template should handle prompting for <code>multiple_correct</code> directly if required).</p>
<p>When using the <code>multiple_choice()</code> solver you should always pair it with the <code>choice()</code> scorer.</p>
</section>
<section id="self-critique" class="level3">
<h3 class="anchored" data-anchor-id="self-critique">Self Critique</h3>
Expand Down Expand Up @@ -656,8 +655,8 @@ <h3 class="anchored" data-anchor-id="example-self-critique">Example: Self Critiq
<span id="cb9-28"><a href="#cb9-28" aria-hidden="true" tabindex="-1"></a><span class="vs">***</span></span>
<span id="cb9-29"><a href="#cb9-29" aria-hidden="true" tabindex="-1"></a><span class="vs">[END DATA]</span></span>
<span id="cb9-30"><a href="#cb9-30" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb9-31"><a href="#cb9-31" aria-hidden="true" tabindex="-1"></a><span class="vs">If the original answer is already correct, just repeat the </span></span>
<span id="cb9-32"><a href="#cb9-32" aria-hidden="true" tabindex="-1"></a><span class="vs">original answer exactly. You should just provide your answer to </span></span>
<span id="cb9-31"><a href="#cb9-31" aria-hidden="true" tabindex="-1"></a><span class="vs">If the original answer is already correct, just repeat the</span></span>
<span id="cb9-32"><a href="#cb9-32" aria-hidden="true" tabindex="-1"></a><span class="vs">original answer exactly. You should just provide your answer to</span></span>
<span id="cb9-33"><a href="#cb9-33" aria-hidden="true" tabindex="-1"></a><span class="vs">the question in exactly this format:</span></span>
<span id="cb9-34"><a href="#cb9-34" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb9-35"><a href="#cb9-35" aria-hidden="true" tabindex="-1"></a><span class="vs">Answer: &lt;your answer&gt; """</span></span>
Expand Down Expand Up @@ -727,7 +726,7 @@ <h2 class="anchored" data-anchor-id="early-termination">Early Termination</h2>
<div class="sourceCode" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a>Task(</span>
<span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a> dataset<span class="op">=</span>json_dataset(<span class="st">"data.json"</span>),</span>
<span id="cb12-3"><a href="#cb12-3" aria-hidden="true" tabindex="-1"></a> plan <span class="op">=</span> Plan(</span>
<span id="cb12-4"><a href="#cb12-4" aria-hidden="true" tabindex="-1"></a> steps <span class="op">=</span> [...], </span>
<span id="cb12-4"><a href="#cb12-4" aria-hidden="true" tabindex="-1"></a> steps <span class="op">=</span> [...],</span>
<span id="cb12-5"><a href="#cb12-5" aria-hidden="true" tabindex="-1"></a> finish <span class="op">=</span> finish_up()</span>
<span id="cb12-6"><a href="#cb12-6" aria-hidden="true" tabindex="-1"></a> ),</span>
<span id="cb12-7"><a href="#cb12-7" aria-hidden="true" tabindex="-1"></a> scorer <span class="op">=</span> model_graded_fact()</span>
Expand All @@ -745,7 +744,7 @@ <h2 class="anchored" data-anchor-id="plan-cleanup">Plan Cleanup</h2>
<span id="cb13-6"><a href="#cb13-6" aria-hidden="true" tabindex="-1"></a>Task(</span>
<span id="cb13-7"><a href="#cb13-7" aria-hidden="true" tabindex="-1"></a> dataset<span class="op">=</span>json_dataset(<span class="st">"data.json"</span>),</span>
<span id="cb13-8"><a href="#cb13-8" aria-hidden="true" tabindex="-1"></a> plan <span class="op">=</span> Plan(</span>
<span id="cb13-9"><a href="#cb13-9" aria-hidden="true" tabindex="-1"></a> steps <span class="op">=</span> [...], </span>
<span id="cb13-9"><a href="#cb13-9" aria-hidden="true" tabindex="-1"></a> steps <span class="op">=</span> [...],</span>
<span id="cb13-10"><a href="#cb13-10" aria-hidden="true" tabindex="-1"></a> cleanup <span class="op">=</span> cleanup</span>
<span id="cb13-11"><a href="#cb13-11" aria-hidden="true" tabindex="-1"></a> ),</span>
<span id="cb13-12"><a href="#cb13-12" aria-hidden="true" tabindex="-1"></a> scorer <span class="op">=</span> model_graded_fact()</span>
Expand Down
Loading

0 comments on commit a641706

Please sign in to comment.