Log the list of dataset sample IDs (#774)

* Log the list of dataset sample IDs * relocate sample ids --------- Co-authored-by: aisi-inspect <[email protected]>
UKGovernmentBEIS · Oct 29, 2024 · 6c96d92 · 6c96d92
1 parent 71517d0
commit 6c96d92
Show file tree

Hide file tree

Showing 8 changed files with 41 additions and 6 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -12,6 +12,7 @@
 - Improve performance of write_file for Docker sandboxes.
 - Use user_data_dir rather than user_runtime_dir for view notifications.
 - Implement `read_eval_log_sample()` for JSON log files.
+- Log the list of dataset sample IDs.
 
 ## v0.3.42 (23 October 2024)
 

diff --git a/src/inspect_ai/_eval/run.py b/src/inspect_ai/_eval/run.py
@@ -133,6 +133,11 @@ async def eval_run(
                 else:
                     task.fail_on_error = task_eval_config.fail_on_error
 
+                # add sample ids to dataset if they aren't there (start at 1 not 0)
+                for id, sample in enumerate(task.dataset):
+                    if sample.id is None:
+                        sample.id = id + 1
+
                 # create and track the logger
                 logger = TaskLogger(
                     task_name=task.name,

diff --git a/src/inspect_ai/_eval/task/log.py b/src/inspect_ai/_eval/task/log.py
@@ -1,5 +1,5 @@
 from importlib import metadata as importlib_metadata
-from typing import Any, Literal
+from typing import Any, Literal, cast
 
 from shortuuid import uuid
 
@@ -79,6 +79,9 @@ def __init__(
                 sandbox.type, cwd_relative_path(sandbox.config)
             )
 
+        # ensure that the dataset has sample ids and record them
+        sample_ids = cast(list[int | str], [sample.id for sample in dataset])
+
         # create eval spec
         self.eval = EvalSpec(
             run_id=run_id,
@@ -98,6 +101,7 @@ def __init__(
                 name=dataset.name,
                 location=cwd_relative_path(dataset.location),
                 samples=len(dataset),
+                sample_ids=sample_ids,
                 shuffled=dataset.shuffled,
             ),
             sandbox=sandbox,

diff --git a/src/inspect_ai/_eval/task/run.py b/src/inspect_ai/_eval/task/run.py
@@ -545,11 +545,6 @@ async def resolve_dataset(
     )
     dataset = dataset[dataset_limit]
 
-    # add sample ids to dataset if they aren't there (start at 1 not 0)
-    for id, sample in zip(range(dataset_limit.start, dataset_limit.stop), dataset):
-        if sample.id is None:
-            sample.id = id + 1
-
     # apply epochs (deepcopy the samples so they remain independent)
     samples: list[Sample] = []
     for _ in range(0, epochs):

diff --git a/src/inspect_ai/_view/www/log-schema.json b/src/inspect_ai/_view/www/log-schema.json
@@ -815,6 +815,28 @@
           "default": null,
           "title": "Samples"
         },
+        "sample_ids": {
+          "anyOf": [
+            {
+              "items": {
+                "anyOf": [
+                  {
+                    "type": "integer"
+                  },
+                  {
+                    "type": "string"
+                  }
+                ]
+              },
+              "type": "array"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "title": "Sample Ids"
+        },
         "shuffled": {
           "anyOf": [
             {
@@ -834,6 +856,7 @@
         "name",
         "location",
         "samples",
+        "sample_ids",
         "shuffled"
       ],
       "additionalProperties": false

diff --git a/src/inspect_ai/_view/www/src/types/log.d.ts b/src/inspect_ai/_view/www/src/types/log.d.ts
@@ -19,6 +19,7 @@ export type Tags = string[] | null;
 export type Name = string | null;
 export type Location = string | null;
 export type Samples = number | null;
+export type SampleIds = (number | string)[] | null;
 export type Shuffled = boolean | null;
 /**
  * @minItems 1
@@ -400,6 +401,7 @@ export interface EvalDataset {
   name: Name;
   location: Location;
   samples: Samples;
+  sample_ids: SampleIds;
   shuffled: Shuffled;
 }
 export interface ModelArgs {}

diff --git a/src/inspect_ai/log/_log.py b/src/inspect_ai/log/_log.py
@@ -379,6 +379,9 @@ class EvalDataset(BaseModel):
     samples: int | None = Field(default=None)
     """Number of samples in the dataset."""
 
+    sample_ids: list[int | str] | None = Field(default=None)
+    """IDs of samples in the dataset."""
+
     shuffled: bool | None = Field(default=None)
     """Was the dataset shuffled after reading."""
 

diff --git a/tools/vscode/src/@types/log.d.ts b/tools/vscode/src/@types/log.d.ts
@@ -19,6 +19,7 @@ export type Tags = string[] | null;
 export type Name = string | null;
 export type Location = string | null;
 export type Samples = number | null;
+export type SampleIds = (number | string)[] | null;
 export type Shuffled = boolean | null;
 /**
  * @minItems 1
@@ -400,6 +401,7 @@ export interface EvalDataset {
   name: Name;
   location: Location;
   samples: Samples;
+  sample_ids: SampleIds;
   shuffled: Shuffled;
 }
 export interface ModelArgs {}