Skip to content

Commit

Permalink
Log the list of dataset sample IDs (#774)
Browse files Browse the repository at this point in the history
* Log the list of dataset sample IDs

* relocate sample ids

---------

Co-authored-by: aisi-inspect <[email protected]>
  • Loading branch information
jjallaire-aisi and aisi-inspect authored Oct 29, 2024
1 parent 71517d0 commit 6c96d92
Show file tree
Hide file tree
Showing 8 changed files with 41 additions and 6 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
- Improve performance of write_file for Docker sandboxes.
- Use user_data_dir rather than user_runtime_dir for view notifications.
- Implement `read_eval_log_sample()` for JSON log files.
- Log the list of dataset sample IDs.

## v0.3.42 (23 October 2024)

Expand Down
5 changes: 5 additions & 0 deletions src/inspect_ai/_eval/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,11 @@ async def eval_run(
else:
task.fail_on_error = task_eval_config.fail_on_error

# add sample ids to dataset if they aren't there (start at 1 not 0)
for id, sample in enumerate(task.dataset):
if sample.id is None:
sample.id = id + 1

# create and track the logger
logger = TaskLogger(
task_name=task.name,
Expand Down
6 changes: 5 additions & 1 deletion src/inspect_ai/_eval/task/log.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from importlib import metadata as importlib_metadata
from typing import Any, Literal
from typing import Any, Literal, cast

from shortuuid import uuid

Expand Down Expand Up @@ -79,6 +79,9 @@ def __init__(
sandbox.type, cwd_relative_path(sandbox.config)
)

# ensure that the dataset has sample ids and record them
sample_ids = cast(list[int | str], [sample.id for sample in dataset])

# create eval spec
self.eval = EvalSpec(
run_id=run_id,
Expand All @@ -98,6 +101,7 @@ def __init__(
name=dataset.name,
location=cwd_relative_path(dataset.location),
samples=len(dataset),
sample_ids=sample_ids,
shuffled=dataset.shuffled,
),
sandbox=sandbox,
Expand Down
5 changes: 0 additions & 5 deletions src/inspect_ai/_eval/task/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,11 +545,6 @@ async def resolve_dataset(
)
dataset = dataset[dataset_limit]

# add sample ids to dataset if they aren't there (start at 1 not 0)
for id, sample in zip(range(dataset_limit.start, dataset_limit.stop), dataset):
if sample.id is None:
sample.id = id + 1

# apply epochs (deepcopy the samples so they remain independent)
samples: list[Sample] = []
for _ in range(0, epochs):
Expand Down
23 changes: 23 additions & 0 deletions src/inspect_ai/_view/www/log-schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -815,6 +815,28 @@
"default": null,
"title": "Samples"
},
"sample_ids": {
"anyOf": [
{
"items": {
"anyOf": [
{
"type": "integer"
},
{
"type": "string"
}
]
},
"type": "array"
},
{
"type": "null"
}
],
"default": null,
"title": "Sample Ids"
},
"shuffled": {
"anyOf": [
{
Expand All @@ -834,6 +856,7 @@
"name",
"location",
"samples",
"sample_ids",
"shuffled"
],
"additionalProperties": false
Expand Down
2 changes: 2 additions & 0 deletions src/inspect_ai/_view/www/src/types/log.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ export type Tags = string[] | null;
export type Name = string | null;
export type Location = string | null;
export type Samples = number | null;
export type SampleIds = (number | string)[] | null;
export type Shuffled = boolean | null;
/**
* @minItems 1
Expand Down Expand Up @@ -400,6 +401,7 @@ export interface EvalDataset {
name: Name;
location: Location;
samples: Samples;
sample_ids: SampleIds;
shuffled: Shuffled;
}
export interface ModelArgs {}
Expand Down
3 changes: 3 additions & 0 deletions src/inspect_ai/log/_log.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,9 @@ class EvalDataset(BaseModel):
samples: int | None = Field(default=None)
"""Number of samples in the dataset."""

sample_ids: list[int | str] | None = Field(default=None)
"""IDs of samples in the dataset."""

shuffled: bool | None = Field(default=None)
"""Was the dataset shuffled after reading."""

Expand Down
2 changes: 2 additions & 0 deletions tools/vscode/src/@types/log.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ export type Tags = string[] | null;
export type Name = string | null;
export type Location = string | null;
export type Samples = number | null;
export type SampleIds = (number | string)[] | null;
export type Shuffled = boolean | null;
/**
* @minItems 1
Expand Down Expand Up @@ -400,6 +401,7 @@ export interface EvalDataset {
name: Name;
location: Location;
samples: Samples;
sample_ids: SampleIds;
shuffled: Shuffled;
}
export interface ModelArgs {}
Expand Down

0 comments on commit 6c96d92

Please sign in to comment.