diff --git a/CHANGELOG.md b/CHANGELOG.md index 45cc37cdd..417b77e67 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ - Improve mime type detection for image content encoding (fixes issues w/ webp images). - Fix memory leak in Inspect View worker-based JSON parsing. - Add `fail_on_error` option for `eval_retry()` and `inspect eval-retry`. +- Defer resolving helper models in `self_critique()` and `model_graded_qa()`. ## v0.3.40 (6 October 2024) diff --git a/examples/theory_of_mind.py b/examples/theory_of_mind.py index 95fa9a541..a56b21118 100644 --- a/examples/theory_of_mind.py +++ b/examples/theory_of_mind.py @@ -1,4 +1,4 @@ -from inspect_ai import Task, task +from inspect_ai import Task, eval, task from inspect_ai.dataset import example_dataset from inspect_ai.scorer import model_graded_fact from inspect_ai.solver import chain_of_thought, generate, self_critique @@ -16,3 +16,7 @@ def theory_of_mind(critique=False): solver=solver, scorer=model_graded_fact(), ) + + +if __name__ == "__main__": + eval(theory_of_mind(critique=True), model="openai/gpt-4o") diff --git a/src/inspect_ai/_view/www/dist/assets/index.js b/src/inspect_ai/_view/www/dist/assets/index.js index e30ff0296..20a0b2c62 100644 --- a/src/inspect_ai/_view/www/dist/assets/index.js +++ b/src/inspect_ai/_view/www/dist/assets/index.js @@ -38,14 +38,13 @@ var __publicField = (obj, key2, value) => __defNormalProp(obj, typeof key2 !== " fetch(link2.href, fetchOpts); } })(); -var n$2, l$1, u$1, i$1, o$1, r$2, f$1, e$3, c$2, s$1, h$1 = {}, p$1 = [], v$1 = /acit|ex(?:s|g|n|p|$)|rph|grid|ows|mnc|ntw|ine[ch]|zoo|^ord|itera/i, y$1 = Array.isArray; +var n$2, l$1, u$1, i$1, o$1, r$2, f$1, e$3, c$2, s$1, h$1 = {}, v$1 = [], p$1 = /acit|ex(?:s|g|n|p|$)|rph|grid|ows|mnc|ntw|ine[ch]|zoo|^ord|itera/i, y$1 = Array.isArray; function d$1(n2, l2) { for (var u2 in l2) n2[u2] = l2[u2]; return n2; } function w$1(n2) { - var l2 = n2.parentNode; - l2 && l2.removeChild(n2); + n2 && n2.parentNode && n2.parentNode.removeChild(n2); } function _(l2, u2, t2) { var i2, o2, r2, f2 = {}; @@ -60,10 +59,10 @@ function g(n2, t2, i2, o2, r2) { function m$2() { return { current: null }; } -function k$1(n2) { +function b(n2) { return n2.children; } -function b(n2, l2) { +function k$1(n2, l2) { this.props = n2, this.context = l2; } function x(n2, l2) { @@ -90,13 +89,13 @@ function P() { P.__r = 0; } function S(n2, l2, u2, t2, i2, o2, r2, f2, e2, c2, s2) { - var a2, v2, y2, d2, w2, _2 = t2 && t2.__k || p$1, g2 = l2.length; - for (u2.__d = e2, $(u2, l2, _2), e2 = u2.__d, a2 = 0; a2 < g2; a2++) null != (y2 = u2.__k[a2]) && "boolean" != typeof y2 && "function" != typeof y2 && (v2 = -1 === y2.__i ? h$1 : _2[y2.__i] || h$1, y2.__i = a2, O(n2, y2, v2, i2, o2, r2, f2, e2, c2, s2), d2 = y2.__e, y2.ref && v2.ref != y2.ref && (v2.ref && N(v2.ref, null, y2), s2.push(y2.ref, y2.__c || d2, y2)), null == w2 && null != d2 && (w2 = d2), 65536 & y2.__u || v2.__k === y2.__k ? e2 = I(y2, e2, n2) : "function" == typeof y2.type && void 0 !== y2.__d ? e2 = y2.__d : d2 && (e2 = d2.nextSibling), y2.__d = void 0, y2.__u &= -196609); + var a2, p2, y2, d2, w2, _2 = t2 && t2.__k || v$1, g2 = l2.length; + for (u2.__d = e2, $(u2, l2, _2), e2 = u2.__d, a2 = 0; a2 < g2; a2++) null != (y2 = u2.__k[a2]) && (p2 = -1 === y2.__i ? h$1 : _2[y2.__i] || h$1, y2.__i = a2, O(n2, y2, p2, i2, o2, r2, f2, e2, c2, s2), d2 = y2.__e, y2.ref && p2.ref != y2.ref && (p2.ref && N(p2.ref, null, y2), s2.push(y2.ref, y2.__c || d2, y2)), null == w2 && null != d2 && (w2 = d2), 65536 & y2.__u || p2.__k === y2.__k ? e2 = I(y2, e2, n2) : "function" == typeof y2.type && void 0 !== y2.__d ? e2 = y2.__d : d2 && (e2 = d2.nextSibling), y2.__d = void 0, y2.__u &= -196609); u2.__d = e2, u2.__e = w2; } function $(n2, l2, u2) { var t2, i2, o2, r2, f2, e2 = l2.length, c2 = u2.length, s2 = c2, a2 = 0; - for (n2.__k = [], t2 = 0; t2 < e2; t2++) r2 = t2 + a2, null != (i2 = n2.__k[t2] = null == (i2 = l2[t2]) || "boolean" == typeof i2 || "function" == typeof i2 ? null : "string" == typeof i2 || "number" == typeof i2 || "bigint" == typeof i2 || i2.constructor == String ? g(null, i2, null, null, null) : y$1(i2) ? g(k$1, { children: i2 }, null, null, null) : void 0 === i2.constructor && i2.__b > 0 ? g(i2.type, i2.props, i2.key, i2.ref ? i2.ref : null, i2.__v) : i2) ? (i2.__ = n2, i2.__b = n2.__b + 1, f2 = L(i2, u2, r2, s2), i2.__i = f2, o2 = null, -1 !== f2 && (s2--, (o2 = u2[f2]) && (o2.__u |= 131072)), null == o2 || null === o2.__v ? (-1 == f2 && a2--, "function" != typeof i2.type && (i2.__u |= 65536)) : f2 !== r2 && (f2 == r2 - 1 ? a2-- : f2 == r2 + 1 ? a2++ : f2 > r2 ? s2 > e2 - r2 ? a2 += f2 - r2 : a2-- : f2 < r2 && (f2 == r2 - a2 ? a2 -= f2 - r2 : a2++), f2 !== t2 + a2 && (i2.__u |= 65536))) : (o2 = u2[r2]) && null == o2.key && o2.__e && 0 == (131072 & o2.__u) && (o2.__e == n2.__d && (n2.__d = x(o2)), V(o2, o2, false), u2[r2] = null, s2--); + for (n2.__k = [], t2 = 0; t2 < e2; t2++) null != (i2 = l2[t2]) && "boolean" != typeof i2 && "function" != typeof i2 ? (r2 = t2 + a2, (i2 = n2.__k[t2] = "string" == typeof i2 || "number" == typeof i2 || "bigint" == typeof i2 || i2.constructor == String ? g(null, i2, null, null, null) : y$1(i2) ? g(b, { children: i2 }, null, null, null) : void 0 === i2.constructor && i2.__b > 0 ? g(i2.type, i2.props, i2.key, i2.ref ? i2.ref : null, i2.__v) : i2).__ = n2, i2.__b = n2.__b + 1, o2 = null, -1 !== (f2 = i2.__i = L(i2, u2, r2, s2)) && (s2--, (o2 = u2[f2]) && (o2.__u |= 131072)), null == o2 || null === o2.__v ? (-1 == f2 && a2--, "function" != typeof i2.type && (i2.__u |= 65536)) : f2 !== r2 && (f2 == r2 - 1 ? a2-- : f2 == r2 + 1 ? a2++ : (f2 > r2 ? a2-- : a2++, i2.__u |= 65536))) : i2 = n2.__k[t2] = null; if (s2) for (t2 = 0; t2 < c2; t2++) null != (o2 = u2[t2]) && 0 == (131072 & o2.__u) && (o2.__e == n2.__d && (n2.__d = x(o2)), V(o2, o2)); } function I(n2, l2, u2) { @@ -127,7 +126,7 @@ function L(n2, l2, u2, t2) { return -1; } function T$1(n2, l2, u2) { - "-" === l2[0] ? n2.setProperty(l2, null == u2 ? "" : u2) : n2[l2] = null == u2 ? "" : "number" != typeof u2 || v$1.test(l2) ? u2 : u2 + "px"; + "-" === l2[0] ? n2.setProperty(l2, null == u2 ? "" : u2) : n2[l2] = null == u2 ? "" : "number" != typeof u2 || p$1.test(l2) ? u2 : u2 + "px"; } function A$1(n2, l2, u2, t2, i2) { var o2; @@ -158,21 +157,21 @@ function F(n2) { }; } function O(n2, u2, t2, i2, o2, r2, f2, e2, c2, s2) { - var a2, h2, p2, v2, w2, _2, g2, m2, x2, C2, M2, P2, $2, I2, H, L2, T2 = u2.type; + var a2, h2, v2, p2, w2, _2, g2, m2, x2, C2, M2, P2, $2, I2, H, L2, T2 = u2.type; if (void 0 !== u2.constructor) return null; 128 & t2.__u && (c2 = !!(32 & t2.__u), r2 = [e2 = u2.__e = t2.__e]), (a2 = l$1.__b) && a2(u2); n: if ("function" == typeof T2) try { - if (m2 = u2.props, x2 = "prototype" in T2 && T2.prototype.render, C2 = (a2 = T2.contextType) && i2[a2.__c], M2 = a2 ? C2 ? C2.props.value : a2.__ : i2, t2.__c ? g2 = (h2 = u2.__c = t2.__c).__ = h2.__E : (x2 ? u2.__c = h2 = new T2(m2, M2) : (u2.__c = h2 = new b(m2, M2), h2.constructor = T2, h2.render = q$1), C2 && C2.sub(h2), h2.props = m2, h2.state || (h2.state = {}), h2.context = M2, h2.__n = i2, p2 = h2.__d = true, h2.__h = [], h2._sb = []), x2 && null == h2.__s && (h2.__s = h2.state), x2 && null != T2.getDerivedStateFromProps && (h2.__s == h2.state && (h2.__s = d$1({}, h2.__s)), d$1(h2.__s, T2.getDerivedStateFromProps(m2, h2.__s))), v2 = h2.props, w2 = h2.state, h2.__v = u2, p2) x2 && null == T2.getDerivedStateFromProps && null != h2.componentWillMount && h2.componentWillMount(), x2 && null != h2.componentDidMount && h2.__h.push(h2.componentDidMount); + if (m2 = u2.props, x2 = "prototype" in T2 && T2.prototype.render, C2 = (a2 = T2.contextType) && i2[a2.__c], M2 = a2 ? C2 ? C2.props.value : a2.__ : i2, t2.__c ? g2 = (h2 = u2.__c = t2.__c).__ = h2.__E : (x2 ? u2.__c = h2 = new T2(m2, M2) : (u2.__c = h2 = new k$1(m2, M2), h2.constructor = T2, h2.render = q$1), C2 && C2.sub(h2), h2.props = m2, h2.state || (h2.state = {}), h2.context = M2, h2.__n = i2, v2 = h2.__d = true, h2.__h = [], h2._sb = []), x2 && null == h2.__s && (h2.__s = h2.state), x2 && null != T2.getDerivedStateFromProps && (h2.__s == h2.state && (h2.__s = d$1({}, h2.__s)), d$1(h2.__s, T2.getDerivedStateFromProps(m2, h2.__s))), p2 = h2.props, w2 = h2.state, h2.__v = u2, v2) x2 && null == T2.getDerivedStateFromProps && null != h2.componentWillMount && h2.componentWillMount(), x2 && null != h2.componentDidMount && h2.__h.push(h2.componentDidMount); else { - if (x2 && null == T2.getDerivedStateFromProps && m2 !== v2 && null != h2.componentWillReceiveProps && h2.componentWillReceiveProps(m2, M2), !h2.__e && (null != h2.shouldComponentUpdate && false === h2.shouldComponentUpdate(m2, h2.__s, M2) || u2.__v === t2.__v)) { - for (u2.__v !== t2.__v && (h2.props = m2, h2.state = h2.__s, h2.__d = false), u2.__e = t2.__e, u2.__k = t2.__k, u2.__k.forEach(function(n3) { + if (x2 && null == T2.getDerivedStateFromProps && m2 !== p2 && null != h2.componentWillReceiveProps && h2.componentWillReceiveProps(m2, M2), !h2.__e && (null != h2.shouldComponentUpdate && false === h2.shouldComponentUpdate(m2, h2.__s, M2) || u2.__v === t2.__v)) { + for (u2.__v !== t2.__v && (h2.props = m2, h2.state = h2.__s, h2.__d = false), u2.__e = t2.__e, u2.__k = t2.__k, u2.__k.some(function(n3) { n3 && (n3.__ = u2); }), P2 = 0; P2 < h2._sb.length; P2++) h2.__h.push(h2._sb[P2]); h2._sb = [], h2.__h.length && f2.push(h2); break n; } null != h2.componentWillUpdate && h2.componentWillUpdate(m2, h2.__s, M2), x2 && null != h2.componentDidUpdate && h2.__h.push(function() { - h2.componentDidUpdate(v2, w2, _2); + h2.componentDidUpdate(p2, w2, _2); }); } if (h2.context = M2, h2.props = m2, h2.__P = n2, h2.__e = false, $2 = l$1.__r, I2 = 0, x2) { @@ -181,7 +180,7 @@ function O(n2, u2, t2, i2, o2, r2, f2, e2, c2, s2) { } else do { h2.__d = false, $2 && $2(u2), a2 = h2.render(h2.props, h2.state, h2.context), h2.state = h2.__s; } while (h2.__d && ++I2 < 25); - h2.state = h2.__s, null != h2.getChildContext && (i2 = d$1(d$1({}, i2), h2.getChildContext())), x2 && !p2 && null != h2.getSnapshotBeforeUpdate && (_2 = h2.getSnapshotBeforeUpdate(v2, w2)), S(n2, y$1(L2 = null != a2 && a2.type === k$1 && null == a2.key ? a2.props.children : a2) ? L2 : [L2], u2, t2, i2, o2, r2, f2, e2, c2, s2), h2.base = u2.__e, u2.__u &= -161, h2.__h.length && f2.push(h2), g2 && (h2.__E = h2.__ = null); + h2.state = h2.__s, null != h2.getChildContext && (i2 = d$1(d$1({}, i2), h2.getChildContext())), x2 && !v2 && null != h2.getSnapshotBeforeUpdate && (_2 = h2.getSnapshotBeforeUpdate(p2, w2)), S(n2, y$1(L2 = null != a2 && a2.type === b && null == a2.key ? a2.props.children : a2) ? L2 : [L2], u2, t2, i2, o2, r2, f2, e2, c2, s2), h2.base = u2.__e, u2.__u &= -161, h2.__h.length && f2.push(h2), g2 && (h2.__E = h2.__ = null); } catch (n3) { if (u2.__v = null, c2 || null != r2) { for (u2.__u |= c2 ? 160 : 32; e2 && 8 === e2.nodeType && e2.nextSibling; ) e2 = e2.nextSibling; @@ -205,33 +204,33 @@ function j$1(n2, u2, t2) { } }); } -function z$1(l2, u2, t2, i2, o2, r2, f2, e2, c2) { - var s2, a2, p2, v2, d2, _2, g2, m2 = t2.props, k2 = u2.props, b2 = u2.type; - if ("svg" === b2 ? o2 = "http://www.w3.org/2000/svg" : "math" === b2 ? o2 = "http://www.w3.org/1998/Math/MathML" : o2 || (o2 = "http://www.w3.org/1999/xhtml"), null != r2) { - for (s2 = 0; s2 < r2.length; s2++) if ((d2 = r2[s2]) && "setAttribute" in d2 == !!b2 && (b2 ? d2.localName === b2 : 3 === d2.nodeType)) { - l2 = d2, r2[s2] = null; +function z$1(u2, t2, i2, o2, r2, f2, e2, c2, s2) { + var a2, v2, p2, d2, _2, g2, m2, b2 = i2.props, k2 = t2.props, C2 = t2.type; + if ("svg" === C2 ? r2 = "http://www.w3.org/2000/svg" : "math" === C2 ? r2 = "http://www.w3.org/1998/Math/MathML" : r2 || (r2 = "http://www.w3.org/1999/xhtml"), null != f2) { + for (a2 = 0; a2 < f2.length; a2++) if ((_2 = f2[a2]) && "setAttribute" in _2 == !!C2 && (C2 ? _2.localName === C2 : 3 === _2.nodeType)) { + u2 = _2, f2[a2] = null; break; } } - if (null == l2) { - if (null === b2) return document.createTextNode(k2); - l2 = document.createElementNS(o2, b2, k2.is && k2), r2 = null, e2 = false; + if (null == u2) { + if (null === C2) return document.createTextNode(k2); + u2 = document.createElementNS(r2, C2, k2.is && k2), c2 && (l$1.__m && l$1.__m(t2, f2), c2 = false), f2 = null; } - if (null === b2) m2 === k2 || e2 && l2.data === k2 || (l2.data = k2); + if (null === C2) b2 === k2 || c2 && u2.data === k2 || (u2.data = k2); else { - if (r2 = r2 && n$2.call(l2.childNodes), m2 = t2.props || h$1, !e2 && null != r2) for (m2 = {}, s2 = 0; s2 < l2.attributes.length; s2++) m2[(d2 = l2.attributes[s2]).name] = d2.value; - for (s2 in m2) if (d2 = m2[s2], "children" == s2) ; - else if ("dangerouslySetInnerHTML" == s2) p2 = d2; - else if ("key" !== s2 && !(s2 in k2)) { - if ("value" == s2 && "defaultValue" in k2 || "checked" == s2 && "defaultChecked" in k2) continue; - A$1(l2, s2, null, d2, o2); - } - for (s2 in k2) d2 = k2[s2], "children" == s2 ? v2 = d2 : "dangerouslySetInnerHTML" == s2 ? a2 = d2 : "value" == s2 ? _2 = d2 : "checked" == s2 ? g2 = d2 : "key" === s2 || e2 && "function" != typeof d2 || m2[s2] === d2 || A$1(l2, s2, d2, m2[s2], o2); - if (a2) e2 || p2 && (a2.__html === p2.__html || a2.__html === l2.innerHTML) || (l2.innerHTML = a2.__html), u2.__k = []; - else if (p2 && (l2.innerHTML = ""), S(l2, y$1(v2) ? v2 : [v2], u2, t2, i2, "foreignObject" === b2 ? "http://www.w3.org/1999/xhtml" : o2, r2, f2, r2 ? r2[0] : t2.__k && x(t2, 0), e2, c2), null != r2) for (s2 = r2.length; s2--; ) null != r2[s2] && w$1(r2[s2]); - e2 || (s2 = "value", void 0 !== _2 && (_2 !== l2[s2] || "progress" === b2 && !_2 || "option" === b2 && _2 !== m2[s2]) && A$1(l2, s2, _2, m2[s2], o2), s2 = "checked", void 0 !== g2 && g2 !== l2[s2] && A$1(l2, s2, g2, m2[s2], o2)); - } - return l2; + if (f2 = f2 && n$2.call(u2.childNodes), b2 = i2.props || h$1, !c2 && null != f2) for (b2 = {}, a2 = 0; a2 < u2.attributes.length; a2++) b2[(_2 = u2.attributes[a2]).name] = _2.value; + for (a2 in b2) if (_2 = b2[a2], "children" == a2) ; + else if ("dangerouslySetInnerHTML" == a2) p2 = _2; + else if (!(a2 in k2)) { + if ("value" == a2 && "defaultValue" in k2 || "checked" == a2 && "defaultChecked" in k2) continue; + A$1(u2, a2, null, _2, r2); + } + for (a2 in k2) _2 = k2[a2], "children" == a2 ? d2 = _2 : "dangerouslySetInnerHTML" == a2 ? v2 = _2 : "value" == a2 ? g2 = _2 : "checked" == a2 ? m2 = _2 : c2 && "function" != typeof _2 || b2[a2] === _2 || A$1(u2, a2, _2, b2[a2], r2); + if (v2) c2 || p2 && (v2.__html === p2.__html || v2.__html === u2.innerHTML) || (u2.innerHTML = v2.__html), t2.__k = []; + else if (p2 && (u2.innerHTML = ""), S(u2, y$1(d2) ? d2 : [d2], t2, i2, o2, "foreignObject" === C2 ? "http://www.w3.org/1999/xhtml" : r2, f2, e2, f2 ? f2[0] : i2.__k && x(i2, 0), c2, s2), null != f2) for (a2 = f2.length; a2--; ) w$1(f2[a2]); + c2 || (a2 = "value", "progress" === C2 && null == g2 ? u2.removeAttribute("value") : void 0 !== g2 && (g2 !== u2[a2] || "progress" === C2 && !g2 || "option" === C2 && g2 !== b2[a2]) && A$1(u2, a2, g2, b2[a2], r2), a2 = "checked", void 0 !== m2 && m2 !== u2[a2] && A$1(u2, a2, m2, b2[a2], r2)); + } + return u2; } function N(n2, u2, t2) { try { @@ -254,28 +253,28 @@ function V(n2, u2, t2) { i2.base = i2.__P = null; } if (i2 = n2.__k) for (o2 = 0; o2 < i2.length; o2++) i2[o2] && V(i2[o2], u2, t2 || "function" != typeof n2.type); - t2 || null == n2.__e || w$1(n2.__e), n2.__c = n2.__ = n2.__e = n2.__d = void 0; + t2 || w$1(n2.__e), n2.__c = n2.__ = n2.__e = n2.__d = void 0; } function q$1(n2, l2, u2) { return this.constructor(n2, u2); } function B$1(u2, t2, i2) { var o2, r2, f2, e2; - l$1.__ && l$1.__(u2, t2), r2 = (o2 = "function" == typeof i2) ? null : t2.__k, f2 = [], e2 = [], O(t2, u2 = (!o2 && i2 || t2).__k = _(k$1, null, [u2]), r2 || h$1, h$1, t2.namespaceURI, !o2 && i2 ? [i2] : r2 ? null : t2.firstChild ? n$2.call(t2.childNodes) : null, f2, !o2 && i2 ? i2 : r2 ? r2.__e : t2.firstChild, o2, e2), j$1(f2, u2, e2); + l$1.__ && l$1.__(u2, t2), r2 = (o2 = "function" == typeof i2) ? null : t2.__k, f2 = [], e2 = [], O(t2, u2 = (!o2 && i2 || t2).__k = _(b, null, [u2]), r2 || h$1, h$1, t2.namespaceURI, !o2 && i2 ? [i2] : r2 ? null : t2.firstChild ? n$2.call(t2.childNodes) : null, f2, !o2 && i2 ? i2 : r2 ? r2.__e : t2.firstChild, o2, e2), j$1(f2, u2, e2); } -n$2 = p$1.slice, l$1 = { __e: function(n2, l2, u2, t2) { +n$2 = v$1.slice, l$1 = { __e: function(n2, l2, u2, t2) { for (var i2, o2, r2; l2 = l2.__; ) if ((i2 = l2.__c) && !i2.__) try { if ((o2 = i2.constructor) && null != o2.getDerivedStateFromError && (i2.setState(o2.getDerivedStateFromError(n2)), r2 = i2.__d), null != i2.componentDidCatch && (i2.componentDidCatch(n2, t2 || {}), r2 = i2.__d), r2) return i2.__E = i2; } catch (l3) { n2 = l3; } throw n2; -} }, u$1 = 0, b.prototype.setState = function(n2, l2) { +} }, u$1 = 0, k$1.prototype.setState = function(n2, l2) { var u2; u2 = null != this.__s && this.__s !== this.state ? this.__s : this.__s = d$1({}, this.state), "function" == typeof n2 && (n2 = n2(d$1({}, u2), this.props)), n2 && d$1(u2, n2), null != n2 && this.__v && (l2 && this._sb.push(l2), M(this)); -}, b.prototype.forceUpdate = function(n2) { +}, k$1.prototype.forceUpdate = function(n2) { this.__v && (this.__e = true, n2 && this.__h.push(n2), M(this)); -}, b.prototype.render = k$1, i$1 = [], r$2 = "function" == typeof Promise ? Promise.prototype.then.bind(Promise.resolve()) : setTimeout, f$1 = function(n2, l2) { +}, k$1.prototype.render = b, i$1 = [], r$2 = "function" == typeof Promise ? Promise.prototype.then.bind(Promise.resolve()) : setTimeout, f$1 = function(n2, l2) { return n2.__v.__b - l2.__v.__b; }, P.__r = 0, e$3 = 0, c$2 = F(false), s$1 = F(true); var n$1 = function(t2, s2, r2, e2) { @@ -7729,7 +7728,9 @@ const ApplicationIcons = { approvals: { approve: "bi bi-shield-check", reject: "bi bi-shield-x", - terminate: "bi bi-shield-exclamation" + terminate: "bi bi-shield-exclamation", + escalate: "bi bi-box-arrow-up", + modify: "bi bi-pencil-square" }, arrows: { right: "bi bi-arrow-right", @@ -7893,7 +7894,7 @@ const ErrorPanel = ({ id, classes, title, error }) => { `; }; -class AppErrorBoundary extends b { +class AppErrorBoundary extends k$1 { constructor(props) { super(props); this.state = { hasError: false }; @@ -16555,6 +16556,10 @@ const decisionLabel = (decision) => { return "Rejected"; case "terminate": return "Terminated"; + case "escalate": + return "Escalated"; + case "modify": + return "Modified"; default: return decision; } @@ -16567,6 +16572,10 @@ const decisionIcon = (decision) => { return ApplicationIcons.approvals.reject; case "terminate": return ApplicationIcons.approvals.terminate; + case "escalate": + return ApplicationIcons.approvals.escalate; + case "modify": + return ApplicationIcons.approvals.modify; default: return ApplicationIcons.approve; } @@ -16592,10 +16601,10 @@ const ToolEventView = ({ id, event, style, depth }) => { /> `} ${approvalEvent ? m$1`<${ApprovalEventView} - id="${id}-approval" - event=${approvalEvent} - style=${{ border: "none", padding: 0, marginBottom: 0 }} - />` : ""} + id="${id}-approval" + event=${approvalEvent} + style=${{ border: "none", padding: 0, marginBottom: 0 }} + />` : ""} @@ -17356,7 +17365,7 @@ const SampleDialog = (props) => { }; const STYLE_INNER = "position:relative; overflow:hidden; width:100%; min-height:100%;"; const STYLE_CONTENT = "position:absolute; top:0; left:0; height:100%; width:100%; overflow:visible;"; -class VirtualList extends b { +class VirtualList extends k$1 { constructor(props) { super(props); this.state = { @@ -18888,12 +18897,12 @@ const MultiScorerMetric = ({ scorer, isFirst }) => { `; }; -const asyncJsonParse = (text) => { - return new Promise((resolve, reject) => { - const blob = new Blob([kWorkerCode], { type: "application/javascript" }); - const blobURL = URL.createObjectURL(blob); - const worker = new Worker(blobURL); - try { +const asyncJsonParse = async (text) => { + const blob = new Blob([kWorkerCode], { type: "application/javascript" }); + const blobURL = URL.createObjectURL(blob); + const worker = new Worker(blobURL); + try { + const result = new Promise((resolve, reject) => { worker.onmessage = function(e2) { if (e2.data.success) { resolve(e2.data.result); @@ -18904,13 +18913,13 @@ const asyncJsonParse = (text) => { worker.onerror = function(error) { reject(new Error(error.message)); }; - worker.postMessage({ scriptContent: kJson5ScriptBase64, text }); - } finally { - worker.onterminate = function() { - URL.revokeObjectURL(blobURL); - }; - } - }); + }); + worker.postMessage({ scriptContent: kJson5ScriptBase64, text }); + return await result; + } finally { + worker.terminate(); + URL.revokeObjectURL(blobURL); + } }; const kWorkerCode = ` self.onmessage = function (e) { diff --git a/src/inspect_ai/scorer/_model.py b/src/inspect_ai/scorer/_model.py index 134872d4d..5becc265c 100644 --- a/src/inspect_ai/scorer/_model.py +++ b/src/inspect_ai/scorer/_model.py @@ -110,9 +110,6 @@ def _model_graded_qa_single( ) -> Scorer: # returns a scorer that does model graded qa for a single model - # resolve model - grader_model = get_model(model) - # resolve grading template, instructions, and grade_pattern template = template if template else DEFAULT_MODEL_GRADED_QA_TEMPLATE grading_template = resource(template) @@ -121,6 +118,10 @@ def _model_graded_qa_single( ) async def score(state: TaskState, target: Target) -> Score: + # resolve model + nonlocal model + model = model if isinstance(model, Model) else get_model(model) + # metadata without grading template variables metadata = omit( state.metadata, ["question", "answer", "criterion", "instructions"] @@ -135,7 +136,7 @@ async def score(state: TaskState, target: Target) -> Score: ) # query the model for the score - result = await grader_model.generate(score_prompt) + result = await model.generate(score_prompt) # extract the grade match = re.search(grade_pattern or DEFAULT_GRADE_PATTERN, result.completion) diff --git a/src/inspect_ai/solver/_critique.py b/src/inspect_ai/solver/_critique.py index 02cbb53e7..cde9125f6 100644 --- a/src/inspect_ai/solver/_critique.py +++ b/src/inspect_ai/solver/_critique.py @@ -43,15 +43,16 @@ def self_critique( completion_template or DEFAULT_CRITIQUE_COMPLETION_TEMPLATE ) - # resolve model - critique_model = get_model(model) - async def solve(state: TaskState, generate: Generate) -> TaskState: + # resolve model + nonlocal model + model = model if isinstance(model, Model) else get_model(model) + # metadata without critique template variables metadata = omit(state.metadata, ["question", "completion", "critique"]) # run critique - critique = await critique_model.generate( + critique = await model.generate( critique_templ.format( question=state.input_text, completion=state.output.completion,