diff --git a/src/inspect_ai/_view/www/.prettierrc.js b/src/inspect_ai/_view/www/.prettierrc.js new file mode 100644 index 000000000..3db5f3b75 --- /dev/null +++ b/src/inspect_ai/_view/www/.prettierrc.js @@ -0,0 +1,12 @@ +// Do not remove this file even if the config is empty! +// VSCode's "Format Document" will respect this config and use the default +// settings, which is what we want. Without prettierrc, VSCode falls back to +// users settings, which could be different. + +/** + * @see https://prettier.io/docs/en/configuration.html + * @type {import("prettier").Config} + */ +const config = {}; + +export default config; diff --git a/src/inspect_ai/_view/www/dist/assets/index.js b/src/inspect_ai/_view/www/dist/assets/index.js index 9db23a063..79db3a71a 100644 --- a/src/inspect_ai/_view/www/dist/assets/index.js +++ b/src/inspect_ai/_view/www/dist/assets/index.js @@ -8492,7 +8492,10 @@ const TabPanel = ({ children }) => { const tabContentsId = computeTabContentsId(id, index); - const tabContentsRef = A(); + const tabContentsRef = A( + /** @type {HTMLElement|null} */ + null + ); y(() => { setTimeout(() => { if (scrollPosition !== void 0 && tabContentsRef.current && tabContentsRef.current.scrollTop !== scrollPosition) { @@ -15131,8 +15134,14 @@ const ExpandablePanel = ({ }) => { const [collapsed, setCollapsed] = h(collapse); const [showToggle, setShowToggle] = h(false); - const contentsRef = A(); - const observerRef = A(); + const contentsRef = A( + /** @type {HTMLElement|null} */ + null + ); + const observerRef = A( + /** @type {IntersectionObserver|null} */ + null + ); y(() => { setCollapsed(collapse); }, [children, collapse]); @@ -15302,7 +15311,7 @@ const ToolInput = ({ type, contents, view, style }) => { } if (view) { const toolInputRef = A( - /** @type {HTMLElement|null} */ + /** @type {import("preact").Component & { base: Element }} */ null ); y(() => { @@ -16520,7 +16529,10 @@ const LargeModal = (props) => { setWarningHidden } = props; const modalFooter = footer ? m$1`` : ""; - const scrollRef = A(); + const scrollRef = A( + /** @type {HTMLElement|null} */ + null + ); y(() => { if (scrollRef.current) { setTimeout(() => { @@ -16670,7 +16682,7 @@ const isVscode = () => { }); }; const SampleScores = ({ sample, sampleDescriptor, scorer }) => { - const scores = scorer ? sampleDescriptor.scorer(sample, scorer).scores() : sampleDescriptor.selectedScorer(sample).scores(); + const scores = scorer ? sampleDescriptor.evalDescriptor.scorerDescriptor(sample, { scorer, name: scorer }).scores() : sampleDescriptor.selectedScorerDescriptor(sample).scores(); if (scores.length === 1) { return scores[0].rendered(); } else { @@ -16764,7 +16776,7 @@ const SampleScoreView = ({ scorer }) => { if (!sampleDescriptor) { - return ""; + return m$1``; } const scoreInput = inputString(sample.input); if (sample.choices && sample.choices.length > 0) { @@ -16775,7 +16787,10 @@ const SampleScoreView = ({ }) ); } - const scorerDescriptor = sampleDescriptor.scorer(sample, scorer); + const scorerDescriptor = sampleDescriptor.evalDescriptor.scorerDescriptor( + sample, + { scorer, name: scorer } + ); const explanation = scorerDescriptor.explanation() || "(No Explanation)"; const answer = scorerDescriptor.answer(); const metadata = scorerDescriptor.metadata(); @@ -20279,10 +20294,7 @@ const SampleSummary = ({ parent_id, sample, style, sampleDescriptor }) => { clamp: true }); } - const fullAnswer = sample && sampleDescriptor ? ( - // @ts-ignore - sampleDescriptor.selectedScorer(sample).answer() - ) : void 0; + const fullAnswer = sample && sampleDescriptor ? sampleDescriptor.selectedScorerDescriptor(sample).answer() : void 0; if (fullAnswer) { columns.push({ label: "Answer", @@ -20444,27 +20456,27 @@ const STYLE_CONTENT = "position:absolute; top:0; left:0; height:100%; width:100% class VirtualList extends x$1 { constructor(props) { super(props); + /** @type {HTMLElement} */ + __publicField(this, "base"); this.state = { height: 0, offset: 0 }; - this.resize = this.resize.bind(this); - this.handleScroll = throttle(this.handleScroll.bind(this), 100); + this.resize = () => { + if (this.state.height !== this.base.offsetHeight) { + this.setState({ height: this.base.offsetHeight }); + } + }; + this.handleScroll = throttle(() => { + if (this.base) { + this.setState({ offset: this.base.scrollTop }); + } + if (this.props.sync) { + this.forceUpdate(); + } + }, 100); this.containerRef = b(); } - resize() { - if (this.state.height !== this.base.offsetHeight) { - this.setState({ height: this.base.offsetHeight }); - } - } - handleScroll() { - if (this.base) { - this.setState({ offset: this.base.scrollTop }); - } - if (this.props.sync) { - this.forceUpdate(); - } - } componentDidUpdate() { this.resize(); } @@ -20790,7 +20802,7 @@ const SampleRow = ({ > ${sample ? m$1` <${MarkdownDiv} - markdown=${sampleDescriptor == null ? void 0 : sampleDescriptor.selectedScorer(sample).answer()} + markdown=${sampleDescriptor == null ? void 0 : sampleDescriptor.selectedScorerDescriptor(sample).answer()} style=${{ paddingLeft: "0" }} class="no-last-para-padding" /> @@ -21045,7 +21057,7 @@ const groupBySample = (samples, sampleDescriptor, order2) => { } } }); - const groupCount = samples.length / sampleDescriptor.epochs; + const groupCount = samples.length / sampleDescriptor.evalDescriptor.epochs; const itemCount = samples.length / groupCount; const counter = getCounter(itemCount, groupCount, order2); return (sample, index, previousSample) => { @@ -21074,7 +21086,7 @@ const groupBySample = (samples, sampleDescriptor, order2) => { }; }; const groupByEpoch = (samples, sampleDescriptor, order2) => { - const groupCount = sampleDescriptor.epochs; + const groupCount = sampleDescriptor.evalDescriptor.epochs; const itemCount = samples.length / groupCount; const counter = getCounter(itemCount, groupCount, order2); return (sample, index, previousSample) => { @@ -23222,7 +23234,8 @@ function simpleHttpAPI(logInfo) { }; }); return Promise.resolve({ - files: logs + files: logs, + log_dir }); } else if (log_file) { let evalLog = cache.get(); @@ -23237,7 +23250,8 @@ function simpleHttpAPI(logInfo) { task_id: evalLog.eval.task_id }; return { - files: [result] + files: [result], + log_dir }; } else { throw new Error( @@ -23867,7 +23881,7 @@ const SortFilter = ({ sampleDescriptor, sort, setSort, epochs }) => { val: kEpochDescVal }); } - if ((_a2 = sampleDescriptor == null ? void 0 : sampleDescriptor.scoreDescriptor) == null ? void 0 : _a2.compare) { + if ((_a2 = sampleDescriptor == null ? void 0 : sampleDescriptor.selectedScoreDescriptor) == null ? void 0 : _a2.compare) { options.push({ label: "score asc", val: kScoreAscVal @@ -23956,12 +23970,12 @@ const sortSamples = (sort, samples, samplesDescriptor) => { } } case kScoreAscVal: - return samplesDescriptor.scoreDescriptor.compare( + return samplesDescriptor.selectedScoreDescriptor.compare( samplesDescriptor.selectedScore(a2).value, samplesDescriptor.selectedScore(b2).value ); case kScoreDescVal: - return samplesDescriptor.scoreDescriptor.compare( + return samplesDescriptor.selectedScoreDescriptor.compare( samplesDescriptor.selectedScore(b2).value, samplesDescriptor.selectedScore(a2).value ); @@ -23985,11 +23999,11 @@ const SampleFilter = ({ descriptor, filter, filterChanged }) => { }); } }; - switch ((_a2 = descriptor == null ? void 0 : descriptor.scoreDescriptor) == null ? void 0 : _a2.scoreType) { + switch ((_a2 = descriptor == null ? void 0 : descriptor.selectedScoreDescriptor) == null ? void 0 : _a2.scoreType) { case kScoreTypePassFail: { const options = [{ text: "All", value: "all" }]; options.push( - ...descriptor.scoreDescriptor.categories.map((cat) => { + ...descriptor.selectedScoreDescriptor.categories.map((cat) => { return { text: cat.text, value: cat.val }; }) ); @@ -24002,7 +24016,7 @@ const SampleFilter = ({ descriptor, filter, filterChanged }) => { case kScoreTypeCategorical: { const options = [{ text: "All", value: "all" }]; options.push( - ...descriptor.scoreDescriptor.categories.map((cat) => { + ...descriptor.selectedScoreDescriptor.categories.map((cat) => { return { text: cat, value: cat }; }) ); @@ -24030,12 +24044,12 @@ const SampleFilter = ({ descriptor, filter, filterChanged }) => { `; } case kScoreTypeObject: { - if (!descriptor.scoreDescriptor.categories) { + if (!descriptor.selectedScoreDescriptor.categories) { return ""; } const options = [{ text: "All", value: "all" }]; options.push( - ...descriptor.scoreDescriptor.categories.map((cat) => { + ...descriptor.selectedScoreDescriptor.categories.map((cat) => { return { text: cat.text, value: cat.value }; }) ); @@ -25165,7 +25179,10 @@ const WorkspaceDisplay = ({ } }; const FindBand = ({ hideBand }) => { - const searchBoxRef = A(); + const searchBoxRef = A( + /** @type {HTMLInputElement|null} */ + null + ); y(() => { searchBoxRef.current.focus(); }, []); @@ -25185,13 +25202,16 @@ const FindBand = ({ hideBand }) => { } return expandablePanelEl; }; - const focusedElement = document.activeElement; + const focusedElement = ( + /** @type {HTMLElement} */ + document.activeElement + ); const result = window.find(term, false, !!back, false, false, true, false); const noResultEl = window.document.getElementById( "inspect-find-no-results" ); if (result) { - noResultEl.style.opacity = 0; + noResultEl.style.opacity = "0"; const selection = window.getSelection(); if (selection.rangeCount > 0) { const parentPanel = parentExpandablePanel(selection); @@ -25212,7 +25232,7 @@ const FindBand = ({ hideBand }) => { }, 100); } } else { - noResultEl.style.opacity = 1; + noResultEl.style.opacity = "1"; } if (focusedElement) { focusedElement.focus(); @@ -25305,32 +25325,25 @@ const FindBand = ({ hideBand }) => { `; }; -const createsSamplesDescriptor = (scorers, samples, epochs, selectedScore) => { +const createEvalDescriptor = (scores, samples, epochs) => { if (!samples) { return void 0; } - const score = (sample, scorer = selectedScore == null ? void 0 : selectedScore.scorer) => { - if (sample.scores[scorer]) { - return sample.scores[scorer]; - } else { + const scoreValue = (sample, scoreLabel) => { + if (Object.keys(sample.scores).length === 0 || !scoreLabel) { return void 0; } - }; - const scoreValue = (sample) => { - if (Object.keys(sample.scores).length === 0 || !selectedScore) { - return void 0; - } - if (selectedScore.scorer !== selectedScore.name && sample.scores[selectedScore.scorer] && sample.scores[selectedScore.scorer].value) { - return sample.scores[selectedScore.scorer].value[selectedScore.name]; - } else if (sample.scores[selectedScore.name]) { - return sample.scores[selectedScore.name].value; + if (scoreLabel.scorer !== scoreLabel.name && sample.scores[scoreLabel.scorer] && sample.scores[scoreLabel.scorer].value) { + return sample.scores[scoreLabel.scorer].value[scoreLabel.name]; + } else if (sample.scores[scoreLabel.name]) { + return sample.scores[scoreLabel.name].value; } else { return void 0; } }; const scoreAnswer = (sample, scorer) => { if (sample) { - const sampleScore = score(sample, scorer); + const sampleScore = sample.scores[scorer]; if (sampleScore && sampleScore.answer) { return sampleScore.answer; } @@ -25340,7 +25353,7 @@ const createsSamplesDescriptor = (scorers, samples, epochs, selectedScore) => { }; const scoreExplanation = (sample, scorer) => { if (sample) { - const sampleScore = score(sample, scorer); + const sampleScore = sample.scores[scorer]; if (sampleScore && sampleScore.explanation) { return sampleScore.explanation; } @@ -25349,48 +25362,155 @@ const createsSamplesDescriptor = (scorers, samples, epochs, selectedScore) => { }; const scoreMetadata = (sample, scorer) => { if (sample) { - const sampleScore = score(sample, scorer); + const sampleScore = sample.scores[scorer]; if (sampleScore && sampleScore.metadata) { return sampleScore.metadata; } } return void 0; }; - const uniqScoreValues = [ - ...new Set( - samples.filter((sample) => !!sample.scores).filter((sample) => { - if (!selectedScore) { - return true; + const scoreLabelKey = (scoreLabel) => { + return `${scoreLabel.scorer}.${scoreLabel.name}`; + }; + const scoreDescriptorMap = /* @__PURE__ */ new Map(); + for (const scoreLabel of scores) { + const uniqScoreValues = [ + ...new Set( + samples.filter((sample) => !!sample.scores).filter((sample) => { + if (!scoreLabel) { + return true; + } + if (scoreLabel.scorer !== scoreLabel.name) { + return Object.keys(sample.scores).includes(scoreLabel.scorer) && Object.keys(sample.scores[scoreLabel.scorer].value).includes( + scoreLabel.name + ); + } else { + return Object.keys(sample.scores).includes(scoreLabel.name); + } + }).map((sample) => { + return scoreValue(sample, scoreLabel); + }).filter((value) => { + return value !== null; + }) + ) + ]; + const uniqScoreTypes = [ + ...new Set(uniqScoreValues.map((scoreValue2) => typeof scoreValue2)) + ]; + for (const categorizer of scoreCategorizers) { + const scoreDescriptor2 = categorizer.describe( + uniqScoreValues, + uniqScoreTypes + ); + if (scoreDescriptor2) { + scoreDescriptorMap.set(scoreLabelKey(scoreLabel), scoreDescriptor2); + break; + } + } + } + const scoreDescriptor = (scoreLabel) => { + return scoreDescriptorMap.get(scoreLabelKey(scoreLabel)); + }; + const scoreRendered = (sample, scoreLabel) => { + const descriptor = scoreDescriptor(scoreLabel); + const score2 = scoreValue(sample, scoreLabel); + if (score2 === null || score2 === "undefined") { + return "null"; + } else if (descriptor.render) { + return descriptor.render(score2); + } else { + return score2; + } + }; + const scorerDescriptor = (sample, scoreLabel) => { + return { + metadata: () => { + return scoreMetadata(sample, scoreLabel.scorer); + }, + explanation: () => { + return scoreExplanation(sample, scoreLabel.scorer); + }, + answer: () => { + return scoreAnswer(sample, scoreLabel.scorer); + }, + scores: () => { + if (!sample || !sample.scores) { + return []; } - if (selectedScore.scorer !== selectedScore.name) { - return Object.keys(sample.scores).includes(selectedScore.scorer) && Object.keys(sample.scores[selectedScore.scorer].value).includes( - selectedScore.name - ); + const myScoreDescriptor = scoreDescriptor(scoreLabel); + if (!myScoreDescriptor) { + return []; + } + const scoreNames = scores.map((score2) => { + return score2.name; + }); + const sampleScorer = sample.scores[scoreLabel.scorer]; + const scoreVal = sampleScorer.value; + if (typeof scoreVal === "object") { + const names = Object.keys(scoreVal); + if (names.find((name) => { + return scoreNames.includes(name); + })) { + const scores2 = names.map((name) => { + return { + name, + rendered: () => { + return myScoreDescriptor.render(scoreVal[name]); + } + }; + }); + return scores2; + } else { + return [ + { + name: scoreLabel.scorer, + rendered: () => { + return myScoreDescriptor.render(scoreVal); + } + } + ]; + } } else { - return Object.keys(sample.scores).includes(selectedScore.name); + return [ + { + name: scoreLabel.scorer, + rendered: () => { + return myScoreDescriptor.render(scoreVal); + } + } + ]; } - }).map((sample) => { - return scoreValue(sample); - }).filter((value) => { - return value !== null; - }) - ) - ]; - const uniqScoreTypes = [ - ...new Set(uniqScoreValues.map((scoreValue2) => typeof scoreValue2)) - ]; - let scoreDescriptor; - for (const categorizer of scoreCategorizers) { - scoreDescriptor = categorizer.describe(uniqScoreValues, uniqScoreTypes); - if (scoreDescriptor) { - break; - } + } + }; + }; + const score = (sample, scoreLabel) => { + return { + value: scoreValue(sample, scoreLabel), + render: () => { + return scoreRendered(sample, scoreLabel); + } + }; + }; + return { + epochs, + samples, + scores, + scorerDescriptor, + scoreDescriptor, + score, + scoreAnswer + }; +}; +const createSamplesDescriptor = (evalDescriptor, selectedScore) => { + if (!evalDescriptor) { + return void 0; } - const sizes = samples.reduce( + const sizes = evalDescriptor.samples.reduce( (previous, current) => { var _a2; const text2 = inputString(current.input).join(" "); - const scoreText = scoreValue(current) ? String(scoreValue(current)) : ""; + const scoreValue = evalDescriptor.score(current, selectedScore).value; + const scoreText = scoreValue ? String(scoreValue) : ""; previous[0] = Math.min(Math.max(previous[0], text2.length), 300); previous[1] = Math.min( Math.max(previous[1], arrayToString(current.target).length), @@ -25399,7 +25519,7 @@ const createsSamplesDescriptor = (scorers, samples, epochs, selectedScore) => { previous[2] = Math.min( Math.max( previous[2], - ((_a2 = scoreAnswer(current, selectedScore == null ? void 0 : selectedScore.name)) == null ? void 0 : _a2.length) || 0 + ((_a2 = evalDescriptor.scoreAnswer(current, selectedScore == null ? void 0 : selectedScore.name)) == null ? void 0 : _a2.length) || 0 ), 300 ); @@ -25443,91 +25563,12 @@ const createsSamplesDescriptor = (scorers, samples, epochs, selectedScore) => { score: maxSizes.score / base2 } }; - const scoreRendered = (sample) => { - const score2 = scoreValue(sample); - if (score2 === null || score2 === "undefined") { - return "null"; - } else if (scoreDescriptor.render) { - return scoreDescriptor.render(score2); - } else { - return score2; - } - }; - const scorerDescriptor = (sample, scorer) => { - return { - metadata: () => { - return scoreMetadata(sample, scorer); - }, - explanation: () => { - return scoreExplanation(sample, scorer); - }, - answer: () => { - return scoreAnswer(sample, scorer); - }, - scores: () => { - if (!sample || !sample.scores) { - return []; - } - const scoreNames = scorers.map((score2) => { - return score2.name; - }); - const sampleScorer = sample.scores[scorer]; - const scoreVal = sampleScorer.value; - if (typeof scoreVal === "object") { - const names = Object.keys(scoreVal); - if (names.find((name) => { - return scoreNames.includes(name); - })) { - const scores = names.map((name) => { - return { - name, - rendered: () => { - return scoreDescriptor.render(scoreVal[name]); - } - }; - }); - return scores; - } else { - return [ - { - name: scorer, - rendered: () => { - return scoreDescriptor.render(scoreVal); - } - } - ]; - } - } else { - return [ - { - name: scorer, - rendered: () => { - return scoreDescriptor.render(scoreVal); - } - } - ]; - } - } - }; - }; return { - scoreDescriptor, - epochs, + evalDescriptor, messageShape, - selectedScore: (sample) => { - return { - value: scoreValue(sample), - render: () => { - return scoreRendered(sample); - } - }; - }, - scorer: (sample, scorer) => { - return scorerDescriptor(sample, scorer); - }, - selectedScorer: (sample) => { - return scorerDescriptor(sample, selectedScore == null ? void 0 : selectedScore.scorer); - } + selectedScoreDescriptor: evalDescriptor.scoreDescriptor(selectedScore), + selectedScore: (sample) => evalDescriptor.score(sample, selectedScore), + selectedScorerDescriptor: (sample) => evalDescriptor.scorerDescriptor(sample, selectedScore) }; }; const scoreCategorizers = [ @@ -26115,7 +26156,7 @@ function App({ ] ); y(() => { - var _a3; + var _a3, _b3; const samples = ((_a3 = selectedLog == null ? void 0 : selectedLog.contents) == null ? void 0 : _a3.sampleSummaries) || []; const filtered = samples.filter((sample) => { if (epoch && epoch !== "all") { @@ -26132,7 +26173,7 @@ function App({ }); const { sorted, order: order2 } = sortSamples(sort, filtered, samplesDescriptor); let grouping = "none"; - if ((samplesDescriptor == null ? void 0 : samplesDescriptor.epochs) > 1) { + if (((_b3 = samplesDescriptor == null ? void 0 : samplesDescriptor.evalDescriptor) == null ? void 0 : _b3.epochs) > 1) { if (byEpoch(sort) || epoch !== "all") { grouping = "epoch"; } else if (bySample(sort)) { @@ -26143,15 +26184,17 @@ function App({ setGroupBy(grouping); setGroupByOrder(order2); }, [selectedLog, filter, sort, epoch]); - const samplesDescriptor = T(() => { + const evalDescriptor = T(() => { var _a3, _b3, _c2, _d2; - return createsSamplesDescriptor( + return createEvalDescriptor( scores, (_a3 = selectedLog.contents) == null ? void 0 : _a3.sampleSummaries, - ((_d2 = (_c2 = (_b3 = selectedLog.contents) == null ? void 0 : _b3.eval) == null ? void 0 : _c2.config) == null ? void 0 : _d2.epochs) || 1, - score + ((_d2 = (_c2 = (_b3 = selectedLog.contents) == null ? void 0 : _b3.eval) == null ? void 0 : _c2.config) == null ? void 0 : _d2.epochs) || 1 ); - }, [selectedLog, scores, score]); + }, [selectedLog, scores]); + const samplesDescriptor = T(() => { + return createSamplesDescriptor(evalDescriptor, score); + }, [evalDescriptor, score]); const refreshSampleTab = q( (sample) => { if (selectedSampleTab === void 0) { diff --git a/src/inspect_ai/_view/www/src/App.mjs b/src/inspect_ai/_view/www/src/App.mjs index 6b39724d0..21a5d8aa0 100644 --- a/src/inspect_ai/_view/www/src/App.mjs +++ b/src/inspect_ai/_view/www/src/App.mjs @@ -31,7 +31,10 @@ import { FindBand } from "./components/FindBand.mjs"; import { isVscode } from "./utils/Html.mjs"; import { getVscodeApi } from "./utils/vscode.mjs"; import { kDefaultSort } from "./constants.mjs"; -import { createsSamplesDescriptor } from "./samples/SamplesDescriptor.mjs"; +import { + createEvalDescriptor, + createSamplesDescriptor, +} from "./samples/SamplesDescriptor.mjs"; import { byEpoch, bySample, sortSamples } from "./samples/tools/SortFilter.mjs"; import { resolveAttachments } from "./utils/attachments.mjs"; import { filterFnForType } from "./samples/tools/filters.mjs"; @@ -75,7 +78,7 @@ export function App({ initialState?.headersLoading || false, ); - // Selected Log + /** @type {[import("./Types.mjs").CurrentLog, function(import("./Types.mjs").CurrentLog): void]} */ const [selectedLog, setSelectedLog] = useState( initialState?.selectedLog || { contents: undefined, @@ -94,6 +97,7 @@ export function App({ ? initialState.selectedSampleIndex : -1, ); + /** @type {[import("./types/log").EvalSample, function(import("./types/log").EvalSample): void]} */ const [selectedSample, setSelectedSample] = useState( initialState?.selectedSample, ); @@ -325,7 +329,7 @@ export function App({ // Set the grouping let grouping = "none"; - if (samplesDescriptor?.epochs > 1) { + if (samplesDescriptor?.evalDescriptor?.epochs > 1) { if (byEpoch(sort) || epoch !== "all") { grouping = "epoch"; } else if (bySample(sort)) { @@ -338,14 +342,17 @@ export function App({ setGroupByOrder(order); }, [selectedLog, filter, sort, epoch]); - const samplesDescriptor = useMemo(() => { - return createsSamplesDescriptor( + const evalDescriptor = useMemo(() => { + return createEvalDescriptor( scores, selectedLog.contents?.sampleSummaries, selectedLog.contents?.eval?.config?.epochs || 1, - score, ); - }, [selectedLog, scores, score]); + }, [selectedLog, scores]); + + const samplesDescriptor = useMemo(() => { + return createSamplesDescriptor(evalDescriptor, score); + }, [evalDescriptor, score]); const refreshSampleTab = useCallback( (sample) => { diff --git a/src/inspect_ai/_view/www/src/Types.mjs b/src/inspect_ai/_view/www/src/Types.mjs index a8d71dd30..6b9ad2919 100644 --- a/src/inspect_ai/_view/www/src/Types.mjs +++ b/src/inspect_ai/_view/www/src/Types.mjs @@ -8,7 +8,6 @@ * @typedef {Object} CurrentLog * @property {string} name * @property {import("./api/Types.mjs").EvalSummary} contents - * @property {string} raw */ /** diff --git a/src/inspect_ai/_view/www/src/api/Types.mjs b/src/inspect_ai/_view/www/src/api/Types.mjs index 7b1bb9103..7bcbf04f3 100644 --- a/src/inspect_ai/_view/www/src/api/Types.mjs +++ b/src/inspect_ai/_view/www/src/api/Types.mjs @@ -30,15 +30,26 @@ * @property { import("../types/log").Input } input * @property { import("../types/log").Target } target * @property { import("../types/log").Scores1 } scores + * @property { string } [error] * @property { import("../types/log").Type11 } [limit] */ /** -* @typedef {Object} Capabilities -* @property {boolean} downloadFiles - Indicates if file downloads are supported. -* @property {boolean} webWorkers - Indicates if web workers are supported. -* + * Fields shared by EvalSample and SampleSummary. + * Contains only fields that are copied verbatim in src/inspect_ai/log/_recorders/eval.py. + * + * @typedef {Object} BasicSampleData + * @property { number | string } id + * @property { number } epoch + * @property { import("../types/log").Target } target + * @property { import("../types/log").Scores1 } scores + */ +/** + * @typedef {Object} Capabilities + * @property {boolean} downloadFiles - Indicates if file downloads are supported. + * @property {boolean} webWorkers - Indicates if web workers are supported. + */ /** * @typedef {Object} LogViewAPI diff --git a/src/inspect_ai/_view/www/src/api/api-http.mjs b/src/inspect_ai/_view/www/src/api/api-http.mjs index c351b51aa..493ed64da 100644 --- a/src/inspect_ai/_view/www/src/api/api-http.mjs +++ b/src/inspect_ai/_view/www/src/api/api-http.mjs @@ -56,6 +56,7 @@ function simpleHttpAPI(logInfo) { }); return Promise.resolve({ files: logs, + log_dir, }); } else if (log_file) { // Check the cache @@ -76,6 +77,7 @@ function simpleHttpAPI(logInfo) { return { files: [result], + log_dir, }; } else { // No log.json could be found, and there isn't a log file, diff --git a/src/inspect_ai/_view/www/src/components/ExpandablePanel.mjs b/src/inspect_ai/_view/www/src/components/ExpandablePanel.mjs index 07fe3847b..5287795d5 100644 --- a/src/inspect_ai/_view/www/src/components/ExpandablePanel.mjs +++ b/src/inspect_ai/_view/www/src/components/ExpandablePanel.mjs @@ -14,8 +14,8 @@ export const ExpandablePanel = ({ const [collapsed, setCollapsed] = useState(collapse); const [showToggle, setShowToggle] = useState(false); - const contentsRef = useRef(); - const observerRef = useRef(); + const contentsRef = useRef(/** @type {HTMLElement|null} */ (null)); + const observerRef = useRef(/** @type {IntersectionObserver|null} */ (null)); // Ensure that when content changes, we reset the collapse state. useEffect(() => { diff --git a/src/inspect_ai/_view/www/src/components/FindBand.mjs b/src/inspect_ai/_view/www/src/components/FindBand.mjs index ecd0e3013..9099306cf 100644 --- a/src/inspect_ai/_view/www/src/components/FindBand.mjs +++ b/src/inspect_ai/_view/www/src/components/FindBand.mjs @@ -4,7 +4,7 @@ import { ApplicationIcons } from "../appearance/Icons.mjs"; import { FontSize } from "../appearance/Fonts.mjs"; export const FindBand = ({ hideBand }) => { - const searchBoxRef = useRef(); + const searchBoxRef = useRef(/** @type {HTMLInputElement|null} */ (null)); useEffect(() => { searchBoxRef.current.focus(); }, []); @@ -31,13 +31,14 @@ export const FindBand = ({ hideBand }) => { }; // capture what is focused - const focusedElement = document.activeElement; + const focusedElement = /** @type {HTMLElement} */ (document.activeElement); + // @ts-expect-error: `Window.find` is non-standard const result = window.find(term, false, !!back, false, false, true, false); const noResultEl = window.document.getElementById( "inspect-find-no-results", ); if (result) { - noResultEl.style.opacity = 0; + noResultEl.style.opacity = "0"; const selection = window.getSelection(); if (selection.rangeCount > 0) { // See if the parent is an expandable panel and expand it @@ -58,7 +59,7 @@ export const FindBand = ({ hideBand }) => { }, 100); } } else { - noResultEl.style.opacity = 1; + noResultEl.style.opacity = "1"; } // Return focus to the previously focused element diff --git a/src/inspect_ai/_view/www/src/components/LargeModal.mjs b/src/inspect_ai/_view/www/src/components/LargeModal.mjs index aa0e0d983..49b02b569 100644 --- a/src/inspect_ai/_view/www/src/components/LargeModal.mjs +++ b/src/inspect_ai/_view/www/src/components/LargeModal.mjs @@ -31,7 +31,7 @@ export const LargeModal = (props) => { // Support restoring the scroll position // but only do this for the first time that the children are set - const scrollRef = useRef(); + const scrollRef = useRef(/** @type {HTMLElement|null} */ (null)); useEffect(() => { if (scrollRef.current) { setTimeout(() => { diff --git a/src/inspect_ai/_view/www/src/components/TabSet.mjs b/src/inspect_ai/_view/www/src/components/TabSet.mjs index 56f1c9b22..8ca977e2c 100644 --- a/src/inspect_ai/_view/www/src/components/TabSet.mjs +++ b/src/inspect_ai/_view/www/src/components/TabSet.mjs @@ -44,7 +44,7 @@ export const TabPanel = ({ children, }) => { const tabContentsId = computeTabContentsId(id, index); - const tabContentsRef = useRef(); + const tabContentsRef = useRef(/** @type {HTMLElement|null} */ (null)); useEffect(() => { setTimeout(() => { if ( diff --git a/src/inspect_ai/_view/www/src/components/Tools.mjs b/src/inspect_ai/_view/www/src/components/Tools.mjs index 540e5d88f..d4600e0dd 100644 --- a/src/inspect_ai/_view/www/src/components/Tools.mjs +++ b/src/inspect_ai/_view/www/src/components/Tools.mjs @@ -144,10 +144,13 @@ export const ToolInput = ({ type, contents, view, style }) => { } if (view) { - const toolInputRef = useRef(/** @type {HTMLElement|null} */ (null)); + const toolInputRef = useRef( + /** @type {import("preact").Component & { base: Element }} */ (null), + ); useEffect(() => { // Sniff around for code in the view that could be text highlighted if (toolInputRef.current) { + // @ts-expect-error: TS doesn't know that `HTMLCollection` is iterable. for (const child of toolInputRef.current.base.children) { if (child.tagName === "PRE") { const childChild = child.firstElementChild; diff --git a/src/inspect_ai/_view/www/src/components/VirtualList.mjs b/src/inspect_ai/_view/www/src/components/VirtualList.mjs index 48a8d1976..a6e0eeeb9 100644 --- a/src/inspect_ai/_view/www/src/components/VirtualList.mjs +++ b/src/inspect_ai/_view/www/src/components/VirtualList.mjs @@ -10,32 +10,30 @@ const STYLE_CONTENT = "position:absolute; top:0; left:0; height:100%; width:100%; overflow:visible;"; export class VirtualList extends Component { + /** @type {HTMLElement} */ base; + constructor(props) { super(props); this.state = { height: 0, offset: 0, }; - this.resize = this.resize.bind(this); - this.handleScroll = throttle(this.handleScroll.bind(this), 100); + this.resize = () => { + if (this.state.height !== this.base.offsetHeight) { + this.setState({ height: this.base.offsetHeight }); + } + }; + this.handleScroll = throttle(() => { + if (this.base) { + this.setState({ offset: this.base.scrollTop }); + } + if (this.props.sync) { + this.forceUpdate(); + } + }, 100); this.containerRef = createRef(); } - resize() { - if (this.state.height !== this.base.offsetHeight) { - this.setState({ height: this.base.offsetHeight }); - } - } - - handleScroll() { - if (this.base) { - this.setState({ offset: this.base.scrollTop }); - } - if (this.props.sync) { - this.forceUpdate(); - } - } - componentDidUpdate() { this.resize(); } diff --git a/src/inspect_ai/_view/www/src/samples/SampleDisplay.mjs b/src/inspect_ai/_view/www/src/samples/SampleDisplay.mjs index c9085e103..b294f138f 100644 --- a/src/inspect_ai/_view/www/src/samples/SampleDisplay.mjs +++ b/src/inspect_ai/_view/www/src/samples/SampleDisplay.mjs @@ -422,8 +422,7 @@ const SampleSummary = ({ parent_id, sample, style, sampleDescriptor }) => { const fullAnswer = sample && sampleDescriptor - ? // @ts-ignore - sampleDescriptor.selectedScorer(sample).answer() + ? sampleDescriptor.selectedScorerDescriptor(sample).answer() : undefined; if (fullAnswer) { columns.push({ diff --git a/src/inspect_ai/_view/www/src/samples/SampleList.mjs b/src/inspect_ai/_view/www/src/samples/SampleList.mjs index d085d95c1..a975c6a5d 100644 --- a/src/inspect_ai/_view/www/src/samples/SampleList.mjs +++ b/src/inspect_ai/_view/www/src/samples/SampleList.mjs @@ -17,7 +17,22 @@ import { inputString } from "../utils/Format.mjs"; const kSampleHeight = 88; const kSeparatorHeight = 24; -// Convert samples to a datastructure which contemplates grouping, etc... +/** + * Convert samples to a datastructure which contemplates grouping, etc... + * + * @param {Object} props - The parameters for the component. + * @param {Object} props.listRef - The ref for the list. + * @param {import("./SamplesTab.mjs").ListItem[]} props.items - The samples. + * @param {import("../samples/SamplesDescriptor.mjs").SamplesDescriptor} props.sampleDescriptor - The sample descriptor. + * @param {Object} props.style - The style for the element + * @param {number} props.selectedIndex - The index of the selected sample. + * @param {(index: number) => void} props.setSelectedIndex - The function to set the selected sample index. + * @param {import("../Types.mjs").ScoreLabel} props.selectedScore - The function to get the selected score. + * @param {() => void} props.nextSample - The function to move to the next sample. + * @param {() => void} props.prevSample - The function to move to the previous sample. + * @param {(index: number) => void} props.showSample - The function to show the sample. + * @returns {import("preact").JSX.Element} The SampleList component. + */ export const SampleList = (props) => { const { listRef, @@ -93,6 +108,7 @@ export const SampleList = (props) => { } }, [selectedIndex, rowMap, listRef]); + /** @param {import("./SamplesTab.mjs").ListItem} item */ const renderRow = (item) => { if (item.type === "sample") { return html` @@ -192,6 +208,7 @@ export const SampleList = (props) => { // Count any sample errors and display a bad alerting the user // to any errors const errorCount = items?.reduce((previous, item) => { + // @ts-expect-error if (item.data.error) { return previous + 1; } else { @@ -201,6 +218,7 @@ export const SampleList = (props) => { // Count limits const limitCount = items?.reduce((previous, item) => { + // @ts-expect-error if (item.data.limit) { return previous + 1; } else { @@ -260,6 +278,17 @@ const SeparatorRow = ({ id, title, height }) => { `; }; +/** + * @param {Object} props - The parameters for the component. + * @param {string} props.id - The unique identifier for the sample. + * @param {number} props.index - The index of the sample. + * @param {import("../api/Types.mjs").SampleSummary} props.sample - The sample. + * @param {import("../samples/SamplesDescriptor.mjs").SamplesDescriptor} props.sampleDescriptor - The sample descriptor. + * @param {number} props.height - The height of the sample row. + * @param {boolean} props.selected - Whether the sample is selected. + * @param {(index: number) => void} props.showSample - The function to show the sample. + * @returns {import("preact").JSX.Element} The SampleRow component. + */ const SampleRow = ({ id, index, @@ -339,7 +368,9 @@ const SampleRow = ({ ${sample ? html` <${MarkdownDiv} - markdown=${sampleDescriptor?.selectedScorer(sample).answer()} + markdown=${sampleDescriptor + ?.selectedScorerDescriptor(sample) + .answer()} style=${{ paddingLeft: "0" }} class="no-last-para-padding" /> diff --git a/src/inspect_ai/_view/www/src/samples/SampleScoreView.mjs b/src/inspect_ai/_view/www/src/samples/SampleScoreView.mjs index 568b2a8f5..0885fe1c3 100644 --- a/src/inspect_ai/_view/www/src/samples/SampleScoreView.mjs +++ b/src/inspect_ai/_view/www/src/samples/SampleScoreView.mjs @@ -14,6 +14,14 @@ const labelStyle = { ...TextStyle.secondary, }; +/** + * @param {Object} props - The component props. + * @param {import("../types/log").EvalSample} props.sample - The sample. + * @param {import("../samples/SamplesDescriptor.mjs").SamplesDescriptor} props.sampleDescriptor - The sample descriptor. + * @param {Object} props.style - The style for the element. + * @param {string} props.scorer - The scorer. + * @returns {import("preact").JSX.Element} The SampleScoreView component. + */ export const SampleScoreView = ({ sample, sampleDescriptor, @@ -21,7 +29,7 @@ export const SampleScoreView = ({ scorer, }) => { if (!sampleDescriptor) { - return ""; + return html``; } const scoreInput = inputString(sample.input); @@ -34,7 +42,10 @@ export const SampleScoreView = ({ ); } - const scorerDescriptor = sampleDescriptor.scorer(sample, scorer); + const scorerDescriptor = sampleDescriptor.evalDescriptor.scorerDescriptor( + sample, + { scorer, name: scorer }, + ); const explanation = scorerDescriptor.explanation() || "(No Explanation)"; const answer = scorerDescriptor.answer(); const metadata = scorerDescriptor.metadata(); diff --git a/src/inspect_ai/_view/www/src/samples/SampleScores.mjs b/src/inspect_ai/_view/www/src/samples/SampleScores.mjs index 11e8d8725..4f257540f 100644 --- a/src/inspect_ai/_view/www/src/samples/SampleScores.mjs +++ b/src/inspect_ai/_view/www/src/samples/SampleScores.mjs @@ -1,9 +1,18 @@ import { html } from "htm/preact"; +/** + * @param {Object} props + * @param {import("../api/Types.mjs").SampleSummary} props.sample + * @param {import("../samples/SamplesDescriptor.mjs").SamplesDescriptor} props.sampleDescriptor + * @param {string} props.scorer + * @returns {import("preact").JSX.Element} + */ export const SampleScores = ({ sample, sampleDescriptor, scorer }) => { const scores = scorer - ? sampleDescriptor.scorer(sample, scorer).scores() - : sampleDescriptor.selectedScorer(sample).scores(); + ? sampleDescriptor.evalDescriptor + .scorerDescriptor(sample, { scorer, name: scorer }) + .scores() + : sampleDescriptor.selectedScorerDescriptor(sample).scores(); if (scores.length === 1) { return scores[0].rendered(); diff --git a/src/inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs b/src/inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs index 0b06e314a..7c34a4160 100644 --- a/src/inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +++ b/src/inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs @@ -17,15 +17,26 @@ import { kScoreTypePassFail, } from "../constants.mjs"; +/** + * Represents a utility summary of the samples that doesn't change with the selected score. + * @typedef {Object} EvalDescriptor + * @property {number} epochs - The number of epochs. + * @property {import("../api/Types.mjs").SampleSummary[]} samples - The list of sample summaries. + * @property {import("../Types.mjs").ScoreLabel[]} scores - the list of available scores + * @property {(sample: import("../api/Types.mjs").BasicSampleData, scoreLabel: import("../Types.mjs").ScoreLabel) => ScorerDescriptor} scorerDescriptor - Returns the scorer descriptor for a sample and a specified scorer. + * @property {(scoreLabel: import("../Types.mjs").ScoreLabel) => ScoreDescriptor} scoreDescriptor - Provides information about the score types and how to render them. + * @property {(sample: import("../api/Types.mjs").BasicSampleData, scoreLabel: import("../Types.mjs").ScoreLabel) => SelectedScore} score - Returns information about a score for a sample. + * @property {(sample: import("../api/Types.mjs").BasicSampleData, scorer: string) => string} scoreAnswer - Returns the answer for a sample and a specified scorer. + */ + /** * Represents a utility summary of the samples. * @typedef {Object} SamplesDescriptor - * @property {ScoreDescriptor} scoreDescriptor - Provides information about the score types and how to render them. - * @property {number} epochs - The number of epochs. + * @property {EvalDescriptor} evalDescriptor - The EvalDescriptor. * @property {MessageShape} messageShape - The normalized sizes of input, target, and answer messages. - * @property {(sample: import("../api/Types.mjs").SampleSummary) => SelectedScore} selectedScore - Returns the selected score for a sample. - * @property {(sample: import("../api/Types.mjs").SampleSummary, scorer: string) => ScorerDescriptor} scorer - Returns the scorer descriptor for a sample and a specified scorer. - * @property {(sample: import("../api/Types.mjs").SampleSummary) => ScorerDescriptor} selectedScorer - Returns the scorer descriptor for a sample using the selected scorer. + * @property {ScoreDescriptor} selectedScoreDescriptor - Provides information about the score types and how to render them. + * @property {(sample: import("../api/Types.mjs").BasicSampleData) => SelectedScore} selectedScore - Returns the selected score for a sample. + * @property {(sample: import("../api/Types.mjs").BasicSampleData) => ScorerDescriptor} selectedScorerDescriptor - Returns the scorer descriptor for a sample using the selected scorer. */ /** @@ -42,13 +53,14 @@ import { /** * Provides descriptor functions for a scorer. * @typedef {Object} ScorerDescriptor + * @property {() => string} metadata - Function to retrieve the metadata of the score. * @property {() => string} explanation - Function to retrieve the explanation of the score. * @property {() => string} answer - Function to retrieve the answer associated with the score. * @property {function(): Array<{name: string, rendered: function(): any}>} scores - Function to retrieve scores with their render functions. */ /** - * Represents the selected score for a sample, including its value and render function. + * Represents a score for a sample, including its value and render function. * @typedef {Object} SelectedScore * @property {import("../types/log").Value2} value - The value of the selected score. * @property {function(): any} render - Function to render the selected score. @@ -72,69 +84,48 @@ import { */ /** - * Provides a utility summary of the samples - * - * @param {import("../Types.mjs").ScoreLabel[]} scorers - the list of available scores + * @param {import("../Types.mjs").ScoreLabel[]} scores - the list of available scores * @param {import("../api/Types.mjs").SampleSummary[]} samples - the list of sample summaries * @param {number} epochs - The number of epochs - * @param {import("../Types.mjs").ScoreLabel} [selectedScore] - the currently selected score - * @returns {SamplesDescriptor} The SamplesDescriptor + * @returns {EvalDescriptor} The EvalDescriptor */ -export const createsSamplesDescriptor = ( - scorers, - samples, - epochs, - selectedScore, -) => { +export const createEvalDescriptor = (scores, samples, epochs) => { if (!samples) { return undefined; } /** - * @param {import("../api/Types.mjs").SampleSummary} sample - the currently selected score - * @param {string} scorer - the scorer name - * @returns {import("../types/log").Score} The Score - */ - const score = (sample, scorer = selectedScore?.scorer) => { - if (sample.scores[scorer]) { - return sample.scores[scorer]; - } else { - return undefined; - } - }; - - /** - * @param {import("../api/Types.mjs").SampleSummary} sample - the currently selected score + * @param {import("../api/Types.mjs").BasicSampleData} sample - the currently selected score + * @param {import("../Types.mjs").ScoreLabel} scoreLabel - the score label * @returns {import("../types/log").Value2} The Score */ - const scoreValue = (sample) => { + const scoreValue = (sample, scoreLabel) => { // no scores, no value - if (Object.keys(sample.scores).length === 0 || !selectedScore) { + if (Object.keys(sample.scores).length === 0 || !scoreLabel) { return undefined; } if ( - selectedScore.scorer !== selectedScore.name && - sample.scores[selectedScore.scorer] && - sample.scores[selectedScore.scorer].value + scoreLabel.scorer !== scoreLabel.name && + sample.scores[scoreLabel.scorer] && + sample.scores[scoreLabel.scorer].value ) { - return sample.scores[selectedScore.scorer].value[selectedScore.name]; - } else if (sample.scores[selectedScore.name]) { - return sample.scores[selectedScore.name].value; + return sample.scores[scoreLabel.scorer].value[scoreLabel.name]; + } else if (sample.scores[scoreLabel.name]) { + return sample.scores[scoreLabel.name].value; } else { return undefined; } }; - // Retrieve the answer for a sample /** - * @param {import("../api/Types.mjs").SampleSummary} sample - the currently selected score + * @param {import("../api/Types.mjs").BasicSampleData} sample - the currently selected score * @param {string} scorer - the scorer name * @returns {string} The answer */ const scoreAnswer = (sample, scorer) => { if (sample) { - const sampleScore = score(sample, scorer); + const sampleScore = sample.scores[scorer]; if (sampleScore && sampleScore.answer) { return sampleScore.answer; } @@ -143,15 +134,14 @@ export const createsSamplesDescriptor = ( } }; - // Retrieve the answer for a sample /** - * @param {import("../api/Types.mjs").SampleSummary} sample - the currently selected score + * @param {import("../api/Types.mjs").BasicSampleData} sample - the currently selected score * @param {string} scorer - the scorer name * @returns {string} The explanation */ const scoreExplanation = (sample, scorer) => { if (sample) { - const sampleScore = score(sample, scorer); + const sampleScore = sample.scores[scorer]; if (sampleScore && sampleScore.explanation) { return sampleScore.explanation; } @@ -161,13 +151,13 @@ export const createsSamplesDescriptor = ( // Retrieve the metadata for a sample /** - * @param {import("../api/Types.mjs").SampleSummary} sample - the currently selected score + * @param {import("../api/Types.mjs").BasicSampleData} sample - the currently selected score * @param {string} scorer - the scorer name * @returns {Object} The explanation */ const scoreMetadata = (sample, scorer) => { if (sample) { - const sampleScore = score(sample, scorer); + const sampleScore = sample.scores[scorer]; if (sampleScore && sampleScore.metadata) { return sampleScore.metadata; } @@ -175,150 +165,123 @@ export const createsSamplesDescriptor = ( return undefined; }; - const uniqScoreValues = [ - ...new Set( - samples - .filter((sample) => !!sample.scores) - .filter((sample) => { - // There is no selected scorer, so include this value - if (!selectedScore) { - return true; - } + /** + * @param {import("../Types.mjs").ScoreLabel} scoreLabel + * @returns {string} + */ + const scoreLabelKey = (scoreLabel) => { + return `${scoreLabel.scorer}.${scoreLabel.name}`; + }; - if (selectedScore.scorer !== selectedScore.name) { - return ( - Object.keys(sample.scores).includes(selectedScore.scorer) && - Object.keys(sample.scores[selectedScore.scorer].value).includes( - selectedScore.name, - ) - ); - } else { - return Object.keys(sample.scores).includes(selectedScore.name); - } - }) - .map((sample) => { - return scoreValue(sample); - }) - .filter((value) => { - return value !== null; - }), - ), - ]; - const uniqScoreTypes = [ - ...new Set(uniqScoreValues.map((scoreValue) => typeof scoreValue)), - ]; - - /** @type {ScoreDescriptor} */ - let scoreDescriptor; - for (const categorizer of scoreCategorizers) { - scoreDescriptor = categorizer.describe(uniqScoreValues, uniqScoreTypes); - if (scoreDescriptor) { - break; - } - } + /** + * The EvalDescriptor is memoized. Compute all descriptors now to avoid duplicate work. + * @type {Map} + */ + const scoreDescriptorMap = new Map(); + for (const scoreLabel of scores) { + const uniqScoreValues = [ + ...new Set( + samples + .filter((sample) => !!sample.scores) + .filter((sample) => { + // There is no selected scorer, so include this value + if (!scoreLabel) { + return true; + } - // Find the total length of the value so we can compute an average - const sizes = samples.reduce( - (previous, current) => { - const text = inputString(current.input).join(" "); - const scoreText = scoreValue(current) ? String(scoreValue(current)) : ""; - previous[0] = Math.min(Math.max(previous[0], text.length), 300); - previous[1] = Math.min( - Math.max(previous[1], arrayToString(current.target).length), - 300, - ); - previous[2] = Math.min( - Math.max( - previous[2], - scoreAnswer(current, selectedScore?.name)?.length || 0, - ), - 300, - ); - previous[3] = Math.min( - Math.max(previous[3], current.limit ? current.limit.length : 0), - 50, - ); - previous[4] = Math.min( - Math.max(previous[4], String(current.id).length), - 10, + if (scoreLabel.scorer !== scoreLabel.name) { + return ( + Object.keys(sample.scores).includes(scoreLabel.scorer) && + Object.keys(sample.scores[scoreLabel.scorer].value).includes( + scoreLabel.name, + ) + ); + } else { + return Object.keys(sample.scores).includes(scoreLabel.name); + } + }) + .map((sample) => { + return scoreValue(sample, scoreLabel); + }) + .filter((value) => { + return value !== null; + }), + ), + ]; + const uniqScoreTypes = [ + ...new Set(uniqScoreValues.map((scoreValue) => typeof scoreValue)), + ]; + + for (const categorizer of scoreCategorizers) { + const scoreDescriptor = categorizer.describe( + uniqScoreValues, + uniqScoreTypes, ); - previous[5] = Math.min(Math.max(previous[5], scoreText.length), 30); - - return previous; - }, - [0, 0, 0, 0, 0, 0], - ); + if (scoreDescriptor) { + scoreDescriptorMap.set(scoreLabelKey(scoreLabel), scoreDescriptor); + break; + } + } + } - // normalize to base 1 - const maxSizes = { - input: Math.min(sizes[0], 300), - target: Math.min(sizes[1], 300), - answer: Math.min(sizes[2], 300), - limit: Math.min(sizes[3], 50), - id: Math.min(sizes[4], 10), - score: Math.min(sizes[4], 30), - }; - const base = - maxSizes.input + - maxSizes.target + - maxSizes.answer + - maxSizes.limit + - maxSizes.id + - maxSizes.score || 1; - const messageShape = { - raw: { - input: sizes[0], - target: sizes[1], - answer: sizes[2], - limit: sizes[3], - id: sizes[4], - score: sizes[5], - }, - normalized: { - input: maxSizes.input / base, - target: maxSizes.target / base, - answer: maxSizes.answer / base, - limit: maxSizes.limit / base, - id: maxSizes.id / base, - score: maxSizes.score / base, - }, + /** + * @param {import("../Types.mjs").ScoreLabel} scoreLabel + * @returns {ScoreDescriptor} + */ + const scoreDescriptor = (scoreLabel) => { + return scoreDescriptorMap.get(scoreLabelKey(scoreLabel)); }; - const scoreRendered = (sample) => { - const score = scoreValue(sample); + /** + * @param {import("../api/Types.mjs").BasicSampleData} sample + * @param {import("../Types.mjs").ScoreLabel} scoreLabel + * @returns {any} + */ + const scoreRendered = (sample, scoreLabel) => { + const descriptor = scoreDescriptor(scoreLabel); + const score = scoreValue(sample, scoreLabel); if (score === null || score === "undefined") { return "null"; - } else if (scoreDescriptor.render) { - return scoreDescriptor.render(score); + } else if (descriptor.render) { + return descriptor.render(score); } else { return score; } }; - const scorerDescriptor = (sample, scorer) => { + /** + * @param {import("../api/Types.mjs").BasicSampleData} sample + * @param {import("../Types.mjs").ScoreLabel} scoreLabel + * @returns {ScorerDescriptor} + */ + const scorerDescriptor = (sample, scoreLabel) => { return { metadata: () => { - return scoreMetadata(sample, scorer); + return scoreMetadata(sample, scoreLabel.scorer); }, explanation: () => { - return scoreExplanation(sample, scorer); + return scoreExplanation(sample, scoreLabel.scorer); }, answer: () => { - return scoreAnswer(sample, scorer); + return scoreAnswer(sample, scoreLabel.scorer); }, scores: () => { if (!sample || !sample.scores) { return []; } + const myScoreDescriptor = scoreDescriptor(scoreLabel); + if (!myScoreDescriptor) { + return []; + } // Make a list of all the valid score names (this is // used to distinguish between dictionaries that contain // scores that should be treated as standlone scores and // dictionaries that just contain random values, which is allowed) - const scoreNames = scorers.map((score) => { + const scoreNames = scores.map((score) => { return score.name; }); - const sampleScorer = sample.scores[scorer]; + const sampleScorer = sample.scores[scoreLabel.scorer]; const scoreVal = sampleScorer.value; if (typeof scoreVal === "object") { @@ -338,7 +301,7 @@ export const createsSamplesDescriptor = ( return { name, rendered: () => { - return scoreDescriptor.render(scoreVal[name]); + return myScoreDescriptor.render(scoreVal[name]); }, }; }); @@ -348,9 +311,9 @@ export const createsSamplesDescriptor = ( // we just treat it like an opaque dictionary return [ { - name: scorer, + name: scoreLabel.scorer, rendered: () => { - return scoreDescriptor.render(scoreVal); + return myScoreDescriptor.render(scoreVal); }, }, ]; @@ -358,9 +321,9 @@ export const createsSamplesDescriptor = ( } else { return [ { - name: scorer, + name: scoreLabel.scorer, rendered: () => { - return scoreDescriptor.render(scoreVal); + return myScoreDescriptor.render(scoreVal); }, }, ]; @@ -369,25 +332,119 @@ export const createsSamplesDescriptor = ( }; }; + /** + * @param {import("../api/Types.mjs").BasicSampleData} sample + * @param {import("../Types.mjs").ScoreLabel} scoreLabel + * @returns {SelectedScore} + */ + const score = (sample, scoreLabel) => { + return { + value: scoreValue(sample, scoreLabel), + render: () => { + return scoreRendered(sample, scoreLabel); + }, + }; + }; + return { - scoreDescriptor, epochs, - messageShape, - selectedScore: (sample) => { - return { - value: scoreValue(sample), - render: () => { - return scoreRendered(sample); - }, - }; + samples, + scores, + scorerDescriptor, + scoreDescriptor, + score, + scoreAnswer, + }; +}; + +/** + * Provides a utility summary of the samples + * + * @param {EvalDescriptor} evalDescriptor - The EvalDescriptor. + * @param {import("../Types.mjs").ScoreLabel} selectedScore - Selected score. + * @returns {SamplesDescriptor} - The SamplesDescriptor. + */ +export const createSamplesDescriptor = (evalDescriptor, selectedScore) => { + if (!evalDescriptor) { + return undefined; + } + + // Find the total length of the value so we can compute an average + const sizes = evalDescriptor.samples.reduce( + (previous, current) => { + const text = inputString(current.input).join(" "); + const scoreValue = evalDescriptor.score(current, selectedScore).value; + const scoreText = scoreValue ? String(scoreValue) : ""; + previous[0] = Math.min(Math.max(previous[0], text.length), 300); + previous[1] = Math.min( + Math.max(previous[1], arrayToString(current.target).length), + 300, + ); + previous[2] = Math.min( + Math.max( + previous[2], + evalDescriptor.scoreAnswer(current, selectedScore?.name)?.length || 0, + ), + 300, + ); + previous[3] = Math.min( + Math.max(previous[3], current.limit ? current.limit.length : 0), + 50, + ); + previous[4] = Math.min( + Math.max(previous[4], String(current.id).length), + 10, + ); + previous[5] = Math.min(Math.max(previous[5], scoreText.length), 30); + + return previous; }, - scorer: (sample, scorer) => { - return scorerDescriptor(sample, scorer); + [0, 0, 0, 0, 0, 0], + ); + + // normalize to base 1 + const maxSizes = { + input: Math.min(sizes[0], 300), + target: Math.min(sizes[1], 300), + answer: Math.min(sizes[2], 300), + limit: Math.min(sizes[3], 50), + id: Math.min(sizes[4], 10), + score: Math.min(sizes[4], 30), + }; + const base = + maxSizes.input + + maxSizes.target + + maxSizes.answer + + maxSizes.limit + + maxSizes.id + + maxSizes.score || 1; + const messageShape = { + raw: { + input: sizes[0], + target: sizes[1], + answer: sizes[2], + limit: sizes[3], + id: sizes[4], + score: sizes[5], }, - selectedScorer: (sample) => { - return scorerDescriptor(sample, selectedScore?.scorer); + normalized: { + input: maxSizes.input / base, + target: maxSizes.target / base, + answer: maxSizes.answer / base, + limit: maxSizes.limit / base, + id: maxSizes.id / base, + score: maxSizes.score / base, }, }; + + return { + evalDescriptor, + messageShape, + selectedScoreDescriptor: evalDescriptor.scoreDescriptor(selectedScore), + selectedScore: (sample) => evalDescriptor.score(sample, selectedScore), + selectedScorerDescriptor: (sample) => + evalDescriptor.scorerDescriptor(sample, selectedScore), + }; }; /** diff --git a/src/inspect_ai/_view/www/src/samples/SamplesTab.mjs b/src/inspect_ai/_view/www/src/samples/SamplesTab.mjs index 8d9d17e98..39b8f6e9f 100644 --- a/src/inspect_ai/_view/www/src/samples/SamplesTab.mjs +++ b/src/inspect_ai/_view/www/src/samples/SamplesTab.mjs @@ -55,7 +55,9 @@ export const SamplesTab = ({ sampleScrollPositionRef, setSampleScrollPosition, }) => { + /** @type {[ListItem[], function(ListItem[]): void]} */ const [items, setItems] = useState([]); + /** @type {[ListItem[], function(ListItem[]): void]} */ const [sampleItems, setSampleItems] = useState([]); const sampleListRef = useRef(/** @type {HTMLElement|null} */ (null)); @@ -287,7 +289,7 @@ const groupBySample = (samples, sampleDescriptor, order) => { } } }); - const groupCount = samples.length / sampleDescriptor.epochs; + const groupCount = samples.length / sampleDescriptor.evalDescriptor.epochs; const itemCount = samples.length / groupCount; const counter = getCounter(itemCount, groupCount, order); return (sample, index, previousSample) => { @@ -328,7 +330,7 @@ const groupBySample = (samples, sampleDescriptor, order) => { * @returns {(sample: import("../api/Types.mjs").SampleSummary, index: number, previousSample: import("../api/Types.mjs").SampleSummary) => ListItem[]} The list */ const groupByEpoch = (samples, sampleDescriptor, order) => { - const groupCount = sampleDescriptor.epochs; + const groupCount = sampleDescriptor.evalDescriptor.epochs; const itemCount = samples.length / groupCount; const counter = getCounter(itemCount, groupCount, order); diff --git a/src/inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs b/src/inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs index e720d4404..06c10f0c2 100644 --- a/src/inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +++ b/src/inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs @@ -30,11 +30,11 @@ export const SampleFilter = ({ descriptor, filter, filterChanged }) => { } }; - switch (descriptor?.scoreDescriptor?.scoreType) { + switch (descriptor?.selectedScoreDescriptor?.scoreType) { case kScoreTypePassFail: { const options = [{ text: "All", value: "all" }]; options.push( - ...descriptor.scoreDescriptor.categories.map((cat) => { + ...descriptor.selectedScoreDescriptor.categories.map((cat) => { return { text: cat.text, value: cat.val }; }), ); @@ -48,7 +48,7 @@ export const SampleFilter = ({ descriptor, filter, filterChanged }) => { case kScoreTypeCategorical: { const options = [{ text: "All", value: "all" }]; options.push( - ...descriptor.scoreDescriptor.categories.map((cat) => { + ...descriptor.selectedScoreDescriptor.categories.map((cat) => { return { text: cat, value: cat }; }), ); @@ -79,12 +79,12 @@ export const SampleFilter = ({ descriptor, filter, filterChanged }) => { } case kScoreTypeObject: { - if (!descriptor.scoreDescriptor.categories) { + if (!descriptor.selectedScoreDescriptor.categories) { return ""; } const options = [{ text: "All", value: "all" }]; options.push( - ...descriptor.scoreDescriptor.categories.map((cat) => { + ...descriptor.selectedScoreDescriptor.categories.map((cat) => { return { text: cat.text, value: cat.value }; }), ); diff --git a/src/inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs b/src/inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs index b2ebd3acd..0369f6779 100644 --- a/src/inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +++ b/src/inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs @@ -1,6 +1,13 @@ import { html } from "htm/preact"; import { FontSize, TextStyle } from "../../appearance/Fonts.mjs"; +/** + * @param {Object} props + * @param {import("../../Types.mjs").ScoreLabel[]} props.scores + * @param {import("../../Types.mjs").ScoreLabel} props.score + * @param {(score: import("../../Types.mjs").ScoreLabel) => void} props.setScore + * @returns {import("preact").JSX.Element} + */ export const SelectScorer = ({ scores, score, setScore }) => { const scorers = scores.reduce((accum, scorer) => { if ( diff --git a/src/inspect_ai/_view/www/src/samples/tools/SortFilter.mjs b/src/inspect_ai/_view/www/src/samples/tools/SortFilter.mjs index 725cc8140..a13589f2e 100644 --- a/src/inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +++ b/src/inspect_ai/_view/www/src/samples/tools/SortFilter.mjs @@ -25,7 +25,7 @@ export const SortFilter = ({ sampleDescriptor, sort, setSort, epochs }) => { val: kEpochDescVal, }); } - if (sampleDescriptor?.scoreDescriptor?.compare) { + if (sampleDescriptor?.selectedScoreDescriptor?.compare) { options.push({ label: "score asc", val: kScoreAscVal, @@ -130,12 +130,12 @@ export const sortSamples = (sort, samples, samplesDescriptor) => { } case kScoreAscVal: - return samplesDescriptor.scoreDescriptor.compare( + return samplesDescriptor.selectedScoreDescriptor.compare( samplesDescriptor.selectedScore(a).value, samplesDescriptor.selectedScore(b).value, ); case kScoreDescVal: - return samplesDescriptor.scoreDescriptor.compare( + return samplesDescriptor.selectedScoreDescriptor.compare( samplesDescriptor.selectedScore(b).value, samplesDescriptor.selectedScore(a).value, );