add title to tool content (#857)

UKGovernmentBEIS · Nov 18, 2024 · 82afc6a · 82afc6a
1 parent 560b2fa
commit 82afc6a
Show file tree

Hide file tree

Showing 6 changed files with 79 additions and 17 deletions.
diff --git a/src/inspect_ai/_view/www/log-schema.json b/src/inspect_ai/_view/www/log-schema.json
@@ -671,6 +671,18 @@
           "default": null,
           "title": "Token Limit"
         },
+        "time_limit": {
+          "anyOf": [
+            {
+              "type": "integer"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Time Limit"
+        },
         "max_samples": {
           "anyOf": [
             {
@@ -767,6 +779,7 @@
         "fail_on_error",
         "message_limit",
         "token_limit",
+        "time_limit",
         "max_samples",
         "max_tasks",
         "max_subprocesses",
@@ -2374,6 +2387,18 @@
           ],
           "default": null
         },
+        "metadata": {
+          "anyOf": [
+            {
+              "type": "object"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Metadata"
+        },
         "error": {
           "anyOf": [
             {
@@ -2393,6 +2418,7 @@
         "model",
         "choices",
         "usage",
+        "metadata",
         "error"
       ],
       "additionalProperties": false
@@ -3198,6 +3224,18 @@
     "ToolCallContent": {
       "description": "Content to include in tool call view.",
       "properties": {
+        "title": {
+          "anyOf": [
+            {
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Title"
+        },
         "format": {
           "enum": [
             "text",
@@ -3212,6 +3250,7 @@
         }
       },
       "required": [
+        "title",
         "format",
         "content"
       ],
@@ -3229,6 +3268,7 @@
             "permission",
             "file_not_found",
             "is_a_directory",
+            "output_limit",
             "approval",
             "unknown"
           ],

diff --git a/src/inspect_ai/_view/www/src/types/log.d.ts b/src/inspect_ai/_view/www/src/types/log.d.ts
@@ -38,6 +38,7 @@ export type Approvers = ApproverPolicyConfig[];
 export type FailOnError = boolean | number | null;
 export type MessageLimit = number | null;
 export type TokenLimit = number | null;
+export type TimeLimit = number | null;
 export type MaxSamples = number | null;
 export type MaxTasks = number | null;
 export type MaxSubprocesses = number | null;
@@ -138,6 +139,7 @@ export type Type4 =
   | "permission"
   | "file_not_found"
   | "is_a_directory"
+  | "output_limit"
   | "approval"
   | "unknown";
 export type Message1 = string;
@@ -168,6 +170,7 @@ export type Logprob2 = number;
 export type Bytes1 = number[] | null;
 export type Content4 = Logprob[];
 export type Choices1 = ChatCompletionChoice[];
+export type Metadata4 = {} | null;
 export type Error = string | null;
 export type Scores1 = {
   [k: string]: Score;
@@ -182,7 +185,7 @@ export type Value1 =
     };
 export type Answer = string | null;
 export type Explanation = string | null;
-export type Metadata4 = {} | null;
+export type Metadata5 = {} | null;
 export type Timestamp = string;
 export type Event = "sample_init";
 export type Input1 =
@@ -196,7 +199,7 @@ export type Input1 =
 export type Choices2 = string[] | null;
 export type Target1 = string | string[];
 export type Id2 = number | string | null;
-export type Metadata6 = {} | null;
+export type Metadata7 = {} | null;
 export type Files1 = {
   [k: string]: string;
 } | null;
@@ -251,6 +254,7 @@ export type Id3 = string;
 export type Function2 = string;
 export type Result = string | number | boolean | (ContentText | ContentImage)[];
 export type Truncated = [unknown, unknown] | null;
+export type Title = string | null;
 export type Format = "text" | "markdown";
 export type Content5 = string;
 export type Timestamp5 = string;
@@ -358,7 +362,7 @@ export type Value2 =
     };
 export type Answer1 = string | null;
 export type Explanation2 = string | null;
-export type Metadata7 = {} | null;
+export type Metadata8 = {} | null;
 export type SampleId = string | number | null;
 export type Samples2 = SampleScore[];
 
@@ -414,6 +418,7 @@ export interface EvalConfig {
   fail_on_error: FailOnError;
   message_limit: MessageLimit;
   token_limit: TokenLimit;
+  time_limit: TimeLimit;
   max_samples: MaxSamples;
   max_tasks: MaxTasks;
   max_subprocesses: MaxSubprocesses;
@@ -549,7 +554,7 @@ export interface EvalSample {
   messages: Messages;
   output: ModelOutput;
   scores: Scores1;
-  metadata: Metadata5;
+  metadata: Metadata6;
   store: Store;
   events: Events;
   model_usage: ModelUsage2;
@@ -605,6 +610,7 @@ export interface ModelOutput {
   model: Model1;
   choices: Choices1;
   usage: ModelUsage1 | null;
+  metadata: Metadata4;
   error: Error;
 }
 export interface ChatCompletionChoice {
@@ -648,9 +654,9 @@ export interface Score {
   value: Value1;
   answer: Answer;
   explanation: Explanation;
-  metadata: Metadata4;
+  metadata: Metadata5;
 }
-export interface Metadata5 {}
+export interface Metadata6 {}
 export interface Store {}
 /**
  * Beginning of processing a Sample.
@@ -666,7 +672,7 @@ export interface Sample {
   choices: Choices2;
   target: Target1;
   id: Id2;
-  metadata: Metadata6;
+  metadata: Metadata7;
   sandbox: SandboxEnvironmentSpec | null;
   files: Files1;
   setup: Setup1;
@@ -840,6 +846,7 @@ export interface Arguments1 {
  * Content to include in tool call view.
  */
 export interface ToolCallContent {
+  title: Title;
   format: Format;
   content: Content5;
 }
@@ -965,6 +972,6 @@ export interface SampleScore {
   value: Value2;
   answer: Answer1;
   explanation: Explanation2;
-  metadata: Metadata7;
+  metadata: Metadata8;
   sample_id: SampleId;
 }
diff --git a/src/inspect_ai/approval/_human.py b/src/inspect_ai/approval/_human.py
@@ -40,6 +40,10 @@ async def approve(
             message = message if not trace_enabled() else ""
 
             def add_view_content(view_content: ToolCallContent) -> None:
+                if view_content.title:
+                    renderables.append(
+                        Text.from_markup(f"[bold]{view_content.title}[/bold]\n")
+                    )
                 if view_content.format == "markdown":
                     renderables.append(Markdown(view_content.content))
                 else:

diff --git a/src/inspect_ai/tool/_tool_call.py b/src/inspect_ai/tool/_tool_call.py
@@ -42,6 +42,9 @@ class ToolCallError:
 class ToolCallContent(BaseModel):
     """Content to include in tool call view."""
 
+    title: str | None = Field(default=None)
+    """Optional (plain text) title for tool call content."""
+
     format: Literal["text", "markdown"]
     """Format."""
 

diff --git a/src/inspect_ai/tool/_tools/_execute.py b/src/inspect_ai/tool/_tools/_execute.py
@@ -10,8 +10,9 @@ def viewer(tool_call: ToolCall) -> ToolCallView:
         code = tool_call.arguments.get(code_param, None)
         code = (code or tool_call.function).strip()
         call = ToolCallContent(
+            title=language,
             format="markdown",
-            content=f"{language}\n\n```{language}\n" + code + "\n```\n",
+            content=f"```{language}\n" + code + "\n```\n",
         )
         return ToolCallView(call=call)
 

diff --git a/tools/vscode/src/@types/log.d.ts b/tools/vscode/src/@types/log.d.ts
@@ -38,6 +38,7 @@ export type Approvers = ApproverPolicyConfig[];
 export type FailOnError = boolean | number | null;
 export type MessageLimit = number | null;
 export type TokenLimit = number | null;
+export type TimeLimit = number | null;
 export type MaxSamples = number | null;
 export type MaxTasks = number | null;
 export type MaxSubprocesses = number | null;
@@ -138,6 +139,7 @@ export type Type4 =
   | "permission"
   | "file_not_found"
   | "is_a_directory"
+  | "output_limit"
   | "approval"
   | "unknown";
 export type Message1 = string;
@@ -168,6 +170,7 @@ export type Logprob2 = number;
 export type Bytes1 = number[] | null;
 export type Content4 = Logprob[];
 export type Choices1 = ChatCompletionChoice[];
+export type Metadata4 = {} | null;
 export type Error = string | null;
 export type Scores1 = {
   [k: string]: Score;
@@ -182,7 +185,7 @@ export type Value1 =
     };
 export type Answer = string | null;
 export type Explanation = string | null;
-export type Metadata4 = {} | null;
+export type Metadata5 = {} | null;
 export type Timestamp = string;
 export type Event = "sample_init";
 export type Input1 =
@@ -196,7 +199,7 @@ export type Input1 =
 export type Choices2 = string[] | null;
 export type Target1 = string | string[];
 export type Id2 = number | string | null;
-export type Metadata6 = {} | null;
+export type Metadata7 = {} | null;
 export type Files1 = {
   [k: string]: string;
 } | null;
@@ -251,6 +254,7 @@ export type Id3 = string;
 export type Function2 = string;
 export type Result = string | number | boolean | (ContentText | ContentImage)[];
 export type Truncated = [unknown, unknown] | null;
+export type Title = string | null;
 export type Format = "text" | "markdown";
 export type Content5 = string;
 export type Timestamp5 = string;
@@ -358,7 +362,7 @@ export type Value2 =
     };
 export type Answer1 = string | null;
 export type Explanation2 = string | null;
-export type Metadata7 = {} | null;
+export type Metadata8 = {} | null;
 export type SampleId = string | number | null;
 export type Samples2 = SampleScore[];
 
@@ -414,6 +418,7 @@ export interface EvalConfig {
   fail_on_error: FailOnError;
   message_limit: MessageLimit;
   token_limit: TokenLimit;
+  time_limit: TimeLimit;
   max_samples: MaxSamples;
   max_tasks: MaxTasks;
   max_subprocesses: MaxSubprocesses;
@@ -549,7 +554,7 @@ export interface EvalSample {
   messages: Messages;
   output: ModelOutput;
   scores: Scores1;
-  metadata: Metadata5;
+  metadata: Metadata6;
   store: Store;
   events: Events;
   model_usage: ModelUsage2;
@@ -605,6 +610,7 @@ export interface ModelOutput {
   model: Model1;
   choices: Choices1;
   usage: ModelUsage1 | null;
+  metadata: Metadata4;
   error: Error;
 }
 export interface ChatCompletionChoice {
@@ -648,9 +654,9 @@ export interface Score {
   value: Value1;
   answer: Answer;
   explanation: Explanation;
-  metadata: Metadata4;
+  metadata: Metadata5;
 }
-export interface Metadata5 {}
+export interface Metadata6 {}
 export interface Store {}
 /**
  * Beginning of processing a Sample.
@@ -666,7 +672,7 @@ export interface Sample {
   choices: Choices2;
   target: Target1;
   id: Id2;
-  metadata: Metadata6;
+  metadata: Metadata7;
   sandbox: SandboxEnvironmentSpec | null;
   files: Files1;
   setup: Setup1;
@@ -840,6 +846,7 @@ export interface Arguments1 {
  * Content to include in tool call view.
  */
 export interface ToolCallContent {
+  title: Title;
   format: Format;
   content: Content5;
 }
@@ -965,6 +972,6 @@ export interface SampleScore {
   value: Value2;
   answer: Answer1;
   explanation: Explanation2;
-  metadata: Metadata7;
+  metadata: Metadata8;
   sample_id: SampleId;
 }