diff --git a/CHANGELOG.md b/CHANGELOG.md index cd085c540..ce28d341e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ - Improve performance of write_file for Docker sandboxes. - Use user_data_dir rather than user_runtime_dir for view notifications. - Implement `read_eval_log_sample()` for JSON log files. +- Log the list of dataset sample IDs. ## v0.3.42 (23 October 2024) diff --git a/src/inspect_ai/_eval/run.py b/src/inspect_ai/_eval/run.py index 1452ce0d0..cccfb333b 100644 --- a/src/inspect_ai/_eval/run.py +++ b/src/inspect_ai/_eval/run.py @@ -133,6 +133,11 @@ async def eval_run( else: task.fail_on_error = task_eval_config.fail_on_error + # add sample ids to dataset if they aren't there (start at 1 not 0) + for id, sample in enumerate(task.dataset): + if sample.id is None: + sample.id = id + 1 + # create and track the logger logger = TaskLogger( task_name=task.name, diff --git a/src/inspect_ai/_eval/task/log.py b/src/inspect_ai/_eval/task/log.py index a9399c40c..43d21bee1 100644 --- a/src/inspect_ai/_eval/task/log.py +++ b/src/inspect_ai/_eval/task/log.py @@ -1,5 +1,5 @@ from importlib import metadata as importlib_metadata -from typing import Any, Literal +from typing import Any, Literal, cast from shortuuid import uuid @@ -79,6 +79,9 @@ def __init__( sandbox.type, cwd_relative_path(sandbox.config) ) + # ensure that the dataset has sample ids and record them + sample_ids = cast(list[int | str], [sample.id for sample in dataset]) + # create eval spec self.eval = EvalSpec( run_id=run_id, @@ -98,6 +101,7 @@ def __init__( name=dataset.name, location=cwd_relative_path(dataset.location), samples=len(dataset), + sample_ids=sample_ids, shuffled=dataset.shuffled, ), sandbox=sandbox, diff --git a/src/inspect_ai/_eval/task/run.py b/src/inspect_ai/_eval/task/run.py index 28ce6d115..7f5f60319 100644 --- a/src/inspect_ai/_eval/task/run.py +++ b/src/inspect_ai/_eval/task/run.py @@ -545,11 +545,6 @@ async def resolve_dataset( ) dataset = dataset[dataset_limit] - # add sample ids to dataset if they aren't there (start at 1 not 0) - for id, sample in zip(range(dataset_limit.start, dataset_limit.stop), dataset): - if sample.id is None: - sample.id = id + 1 - # apply epochs (deepcopy the samples so they remain independent) samples: list[Sample] = [] for _ in range(0, epochs): diff --git a/src/inspect_ai/_view/www/log-schema.json b/src/inspect_ai/_view/www/log-schema.json index 170e6f3ee..376f95210 100644 --- a/src/inspect_ai/_view/www/log-schema.json +++ b/src/inspect_ai/_view/www/log-schema.json @@ -815,6 +815,28 @@ "default": null, "title": "Samples" }, + "sample_ids": { + "anyOf": [ + { + "items": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "string" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Sample Ids" + }, "shuffled": { "anyOf": [ { @@ -834,6 +856,7 @@ "name", "location", "samples", + "sample_ids", "shuffled" ], "additionalProperties": false diff --git a/src/inspect_ai/_view/www/src/types/log.d.ts b/src/inspect_ai/_view/www/src/types/log.d.ts index e78f37f04..dbd0a068d 100644 --- a/src/inspect_ai/_view/www/src/types/log.d.ts +++ b/src/inspect_ai/_view/www/src/types/log.d.ts @@ -19,6 +19,7 @@ export type Tags = string[] | null; export type Name = string | null; export type Location = string | null; export type Samples = number | null; +export type SampleIds = (number | string)[] | null; export type Shuffled = boolean | null; /** * @minItems 1 @@ -400,6 +401,7 @@ export interface EvalDataset { name: Name; location: Location; samples: Samples; + sample_ids: SampleIds; shuffled: Shuffled; } export interface ModelArgs {} diff --git a/src/inspect_ai/log/_log.py b/src/inspect_ai/log/_log.py index 17f1b6936..af5c6ca52 100644 --- a/src/inspect_ai/log/_log.py +++ b/src/inspect_ai/log/_log.py @@ -379,6 +379,9 @@ class EvalDataset(BaseModel): samples: int | None = Field(default=None) """Number of samples in the dataset.""" + sample_ids: list[int | str] | None = Field(default=None) + """IDs of samples in the dataset.""" + shuffled: bool | None = Field(default=None) """Was the dataset shuffled after reading.""" diff --git a/tools/vscode/src/@types/log.d.ts b/tools/vscode/src/@types/log.d.ts index e78f37f04..dbd0a068d 100644 --- a/tools/vscode/src/@types/log.d.ts +++ b/tools/vscode/src/@types/log.d.ts @@ -19,6 +19,7 @@ export type Tags = string[] | null; export type Name = string | null; export type Location = string | null; export type Samples = number | null; +export type SampleIds = (number | string)[] | null; export type Shuffled = boolean | null; /** * @minItems 1 @@ -400,6 +401,7 @@ export interface EvalDataset { name: Name; location: Location; samples: Samples; + sample_ids: SampleIds; shuffled: Shuffled; } export interface ModelArgs {}