From 2df67a3fdc43318fb4ad230df4ad31e59f5376b4 Mon Sep 17 00:00:00 2001 From: Brian Warner Date: Sun, 11 Aug 2024 17:01:14 -0500 Subject: [PATCH] fix(swingset): improve runPolicy vat-cleanup/budget API --- packages/SwingSet/docs/run-policy.md | 56 ++++++++---- packages/SwingSet/src/kernel/kernel.js | 49 +++++++++-- .../SwingSet/src/kernel/state/kernelKeeper.js | 88 +++++++++---------- packages/SwingSet/src/lib/runPolicies.js | 10 +-- packages/SwingSet/src/types-external.js | 57 ++++++++++-- packages/SwingSet/src/types-internal.js | 3 +- .../slow-termination/slow-termination.test.js | 84 +++++++++--------- 7 files changed, 221 insertions(+), 126 deletions(-) diff --git a/packages/SwingSet/docs/run-policy.md b/packages/SwingSet/docs/run-policy.md index 6befcf2e54bf..55c5b7e7a6dd 100644 --- a/packages/SwingSet/docs/run-policy.md +++ b/packages/SwingSet/docs/run-policy.md @@ -66,26 +66,46 @@ The run policy should be provided as the first argument to `controller.run()`. I Some vats may grow very large (i.e. large c-lists with lots of imported/exported objects, or lots of vatstore entries). If/when these are terminated, the burst of cleanup work might overwhelm the kernel, especially when processing all the dropped imports (which trigger GC messages to other vats). -To protect the system against these bursts, the run policy can be configured to terminate vats slowly. Instead of doing all the cleanup work immediately, the policy allows the kernel to do a little bit of work each time `controller.run()` is called (e.g. once per block, for kernels hosted inside a blockchain). +To protect the system against these bursts, the run policy can be configured to terminate vats slowly. Instead of doing all the cleanup work immediately, the policy allows the kernel to do a little bit of work each time `controller.run()` is called (e.g. once per block, for kernels hosted inside a blockchain). Internally, before servicing the run-queue, the kernel checks to see if any vats are in the "terminated but not fully deleted" state, and executes a "vat-cleanup crank", to delete some state. Depending upon what the run-policy allows, it may do multiple vat-cleanup cranks in a single `controller.run()`, or just one, or none at all. And depending upon the budget provided to each one, it may only take one vat-cleanup crank to finish the job, or millions. If the policy limits the number of cranks in a single block, and limits the budget of the crank, then the cleanup process will be spread over multiple blocks. + +For each terminated vat, cleanup proceeds through five phases: + +* `exports`: delete c-list entries for objects/promises *exported* by the vat +* `imports`: same but for objects/promises *imported* by the vat +* `kv`: delete all other kv entries for this vat, mostly vatstore records +* `snapshots`: delete xsnap heap snapshots, typically one per 200 deliveries (`snapshotInterval`) +* `transcripts`: delete transcript spans, and their associated transcript items + +The first two phases, `exports` and `imports`, cause the most activity in other vats. Deleting `exports` can cause objects to be retired, which will deliver `dispatch.retireImports` GC messages to other vats which have imported those objects and used them as keys in a WeakMapStore. Deleting `imports` can cause refcounts to drop to zero, delivering `dispatch.dropImports` into vats which were exporting those objects. Both of these will add `gcKref` "dirt" to the other vat, eventually triggering a BringOutYourDead, which will cause more DB activity. These are generally the phases we must rate-limit to avoid overwhelming the system. + +The other phases cause DB activity (deleting rows), but do not interact with other vats, so it is easier to accomodate more cleanup steps. The budget can be tuned to allow more kv/snapshots/transcripts than exports/imports in a single cleanup run. There are two RunPolicy methods which control this. The first is `runPolicy.allowCleanup()`. This will be invoked many times during `controller.run()`, each time the kernel tries to decide what to do next (once per step). The return value will enable (or not) a fixed amount of cleanup work. The second is `runPolicy.didCleanup({ cleanups })`, which is called later, to inform the policy of how much cleanup work was actually done. The policy can count the cleanups and switch `allowCleanup()` to return `false` when it reaches a threshold. (We need the pre-check `allowCleanup` method because the simple act of looking for cleanup work is itself a cost that we might not be willing to pay). -If `allowCleanup()` exists, it must either return a falsy value or a `{ budget?: number }` object. +If `allowCleanup()` exists, it must either return `false`, `true`, or a budget record. + +A `false` return value (eg `allowCleanup: () => false`) prohibits all cleanup work. This can be useful in a "only clean up during idle blocks" approach (see below), but should not be the only policy used, otherwise vat cleanup would never happen. + +A budget record defines properties to set limits on each phase of cleanup. For example, if `budget.exports = 5`, then each cleanup crank is limited to deleting 5 c-list export records (each of which uses two kv records, one for the kref->vref direction, and another for the vref->kref direction). `budget.transcripts = 10` would allow 10 transcript spans to be deleted, along with all transcript items they referenced (typically 10*200=2000, depending upon `snapshotInterval`). -A falsy return value (eg `allowCleanup: () => false`) prohibits cleanup work. This can be useful in a "only clean up during idle blocks" approach (see below), but should not be the only policy used, otherwise vat cleanup would never happen. +The limit can be set to `Infinity` to allow unlimited deletion of that particular phase. -A numeric `budget` limits how many cleanups are allowed to happen (if any are needed). One "cleanup" will delete one vatstore row, or one c-list entry (note that c-list deletion may trigger GC work), or one heap snapshot record, or one transcript span (including its populated transcript items). Using `{ budget: 5 }` seems to be a reasonable limit on each call, balancing overhead against doing sufficiently small units of work that we can limit the total work performed. +Each budget record must include a `{ default }` property, which is used as the default for any phase that is not explicitly mentioned in the budget. This also provides forwards-compatibility for any phases that might be added in the future. So `budget = { default: 5 }` would provide a conservative budget for cleanup, `budget = { default: 5, kv: 50 }` would enable faster deletion of the non-c-list kvstore entries, and `budget = { default: Infinity }` allows unlimited cleanup for all phases. -If `budget` is missing or `undefined`, the kernel will perform unlimited cleanup work. This also happens if `allowCleanup()` is missing entirely, which maintains the old behavior for host applications that haven't been updated to make new policy objects. Note that cleanup is higher priority than any delivery, and is second only to acceptance queue routing. +Note that the cleanup crank ends when no more work is left to be done (which finally allows the vat to be forgotten entirely), or when an individual phase's budget is exceeded. That means multiple phases might see deletion work in a single crank, if the earlier phase finishes its work without exhausting its budget. For example, if the budget is `{ default: 5 }`, but the vat had 4 exports, 4 imports, 4 other kv entries, 4 snapshots, and 4 transcript spans, then all that work would be done in a single crank, because no individual phase would exhaust its budget. The only case that is even worth mentioning is when the end of the `exports` phase overlaps with the start of the `imports` phase, where we might do four more cleanups than usual. -`didCleanup({ cleanups })` is called when the kernel actually performed some vat-termination cleanup, and the `cleanups` property is a number with the count of cleanups that took place. Each query to `allowCleanup()` might (or might not) be followed by a call to `didCleanup`, with a `cleanups` value that does not exceed the specified budget. Like other policy methods, `didCleanup` should return `true` if the kernel should keep running or `false` if it should stop. +A `true` return value from `allowCleanup()` is equivalent to `{ default: Infinity }`, which allows unlimited cleanup work. This also happens if `allowCleanup()` is missing entirely, which maintains the old behavior for host applications that haven't been updated to make new policy objects. Note that cleanup is higher priority than any delivery, and is second only to acceptance queue routing. + +`didCleanup({ cleanups })` is called when the kernel actually performed some vat-termination cleanup, and the `cleanups` property is a "work record" that counts how much cleanup was performed. It contains one number for each phase that did work, plus a `total` property that is the sum of all phases. A cleanup crank which deleted two `exports` and five `imports` would yield a work record of `{ total: 7, exports: 2, imports: 5 }`. The work reported for each phase will not exceed the budget granted to it by `allowCleanup`. + +Like other post-run policy methods, `didCleanup` should return `true` if the kernel should keep running or `false` if it should stop. To limit the work done per block (for blockchain-based applications) the host's RunPolicy objects must keep track of how many cleanups were reported, and change the behavior of `allowCleanup()` when it reaches a per-block threshold. See below for examples. ## Typical Run Policies -A basic policy might simply limit the block to 100 cranks with deliveries and two vat creations: +A basic policy might simply limit the run/block to 100 cranks with deliveries and two vat creations, and not restrict cleanup at all (so terminated vats are completely deleted in a single step): ```js function make100CrankPolicy() { @@ -181,13 +201,13 @@ function makeSlowTerminationPolicy() { }, allowCleanup() { if (cleanups > 0) { - return { budget: cleanups }; + return { default: cleanups }; } else { return false; } }, - didCleanup(spent) { - cleanups -= spent.cleanups; + didCleanup(details) { + cleanups -= details.cleanups.total; }, }); return policy; @@ -212,17 +232,17 @@ function makeDeliveryOnlyPolicy() { } ``` -The second only performs cleanup, with a limited budget, stopping the run after any deliveries occur (such as GC actions): +The second performs a limited number of cleanups, along with any deliveries (like BOYD) that are caused by the cleanups: ```js function makeCleanupOnlyPolicy() { let cleanups = 5; - const stop: () => false; + const keepGoing: () => true; const policy = harden({ - vatCreated: stop, - crankComplete: stop, - crankFailed: stop, - emptyCrank: stop, + vatCreated: keepGoing, + crankComplete: keepGoing, + crankFailed: keepGoing, + emptyCrank: keepGoing, allowCleanup() { if (cleanups > 0) { return { budget: cleanups }; @@ -251,9 +271,9 @@ async function doBlock() { } ``` -Note that regardless of whatever computron/delivery budget is imposed by the first policy, the second policy will allow one additional delivery to be made (we do not yet have an `allowDelivery()` pre-check method that might inhibit this). The cleanup work, which may or may not happen, will sometimes trigger a GC delivery like `dispatch.dropExports`, but at most one such delivery will be made before the second policy returns `false` and stops `controller.run()`. If cleanup does not trigger such a delivery, or if no cleanup work needs to be done, then one normal run-queue delivery will be performed before the policy has a chance to say "stop". All other cleanup-triggered GC work will be deferred until the first run of the next block. +The second run, if performed, will both delete some vat records, and deliver any GC actions that result, which may trigger a BringOutYourDead delivery for one or more vats. -Also note that `budget` and `cleanups` are plain `Number`s, whereas `comptrons` is a `BigInt`. +Also note that `budget` and `cleanups` property values are plain `Number`s, whereas `comptrons` is a `BigInt`. ## Non-Consensus Wallclock Limits diff --git a/packages/SwingSet/src/kernel/kernel.js b/packages/SwingSet/src/kernel/kernel.js index 634a04f8da67..14ae23a6175f 100644 --- a/packages/SwingSet/src/kernel/kernel.js +++ b/packages/SwingSet/src/kernel/kernel.js @@ -4,6 +4,7 @@ import { assert, Fail } from '@endo/errors'; import { isNat } from '@endo/nat'; import { mustMatch, M } from '@endo/patterns'; import { importBundle } from '@endo/import-bundle'; +import { objectMetaMap } from '@agoric/internal'; import { makeUpgradeDisconnection } from '@agoric/internal/src/upgrade-api.js'; import { kser, kslot, makeError } from '@agoric/kmarshal'; import { assertKnownOptions } from '../lib/assertOptions.js'; @@ -39,6 +40,7 @@ import { notifyTermination } from './notifyTermination.js'; import { makeVatAdminHooks } from './vat-admin-hooks.js'; /** @import * as liveslots from '@agoric/swingset-liveslots' */ +/** @import {PolicyInputCleanupCounts} from '../types-external.js' */ function abbreviateReplacer(_, arg) { if (typeof arg === 'bigint') { @@ -388,12 +390,13 @@ export default function buildKernel( * illegalSyscall: { vatID: VatID, info: SwingSetCapData } | undefined, * vatRequestedTermination: { reject: boolean, info: SwingSetCapData } | undefined, * } } DeliveryStatus + * @import {PolicyInputCleanupCounts} from '../types-external.js'; * @typedef { { * abort?: boolean, // changes should be discarded, not committed * consumeMessage?: boolean, // discard the aborted delivery * didDelivery?: VatID, // we made a delivery to a vat, for run policy and save-snapshot * computrons?: bigint, // computron count for run policy - * cleanups?: number, // cleanup budget spent + * cleanups?: PolicyInputCleanupCounts, // cleanup budget spent * meterID?: string, // deduct those computrons from a meter * measureDirt?: { vatID: VatID, dirt: Dirt }, // dirt counters should increment * terminate?: { vatID: VatID, reject: boolean, info: SwingSetCapData }, // terminate vat, notify vat-admin @@ -669,17 +672,32 @@ export default function buildKernel( * Perform a small (budget-limited) amount of dead-vat cleanup work. * * @param {RunQueueEventCleanupTerminatedVat} message - * 'message' is the run-queue cleanup action, which includes a vatID and budget. - * A budget of 'undefined' allows unlimited work. Otherwise, the budget is a Number, - * and cleanup should not touch more than maybe 5*budget DB rows. + * 'message' is the run-queue cleanup action, which includes a + * vatID and budget. The budget contains work limits for each + * phase of cleanup (perhaps Infinity to allow unlimited + * work). Cleanup should not touch more than maybe 5*limit DB + * rows. * @returns {Promise} */ async function processCleanupTerminatedVat(message) { const { vatID, budget } = message; - const { done, cleanups } = kernelKeeper.cleanupAfterTerminatedVat( + const { done, work } = kernelKeeper.cleanupAfterTerminatedVat( vatID, budget, ); + const w = objectMetaMap(work, desc => (desc.value ? desc : undefined)); + kernelSlog.write({ type: 'vat-cleanup', vatID, work: w }); + + /** @type {PolicyInputCleanupCounts} */ + const cleanups = { + total: + work.exports + + work.imports + + work.kv + + work.snapshots + + work.transcripts, + ...work, + }; if (done) { kernelKeeper.forgetTerminatedVat(vatID); kernelSlog.write({ type: 'vat-cleanup-complete', vatID }); @@ -687,8 +705,7 @@ export default function buildKernel( // We don't perform any deliveries here, so there are no computrons to // report, but we do tell the runPolicy know how much kernel-side DB // work we did, so it can decide how much was too much. - const computrons = 0n; - return harden({ computrons, cleanups }); + return harden({ computrons: 0n, cleanups }); } /** @@ -1782,11 +1799,25 @@ export default function buildKernel( } } + // match return value of runPolicy.allowCleanup, which is + // PolicyOutputCleanupBudget | true | false const allowCleanupShape = M.or( // 'false' will prohibit cleanup false, + // 'true' will allow unlimited cleanup + true, // otherwise allow cleanup, optionally with a limiting budget - M.splitRecord({}, { budget: M.number() }, M.record()), + M.splitRecord( + { default: M.number() }, + { + exports: M.number(), + imports: M.number(), + kv: M.number(), + snapshots: M.number(), + transcripts: M.number(), + }, + M.record(), + ), ); /** @@ -1808,7 +1839,7 @@ export default function buildKernel( }; } // Absent specific configuration, allow unlimited cleanup. - const allowCleanup = policy?.allowCleanup?.() ?? {}; + const allowCleanup = policy?.allowCleanup?.() ?? true; mustMatch(harden(allowCleanup), allowCleanupShape); const message = diff --git a/packages/SwingSet/src/kernel/state/kernelKeeper.js b/packages/SwingSet/src/kernel/state/kernelKeeper.js index b28b808538d6..befe021d18a8 100644 --- a/packages/SwingSet/src/kernel/state/kernelKeeper.js +++ b/packages/SwingSet/src/kernel/state/kernelKeeper.js @@ -1,7 +1,6 @@ /* eslint-disable no-use-before-define */ import { Nat, isNat } from '@endo/nat'; import { assert, Fail } from '@endo/errors'; -import { objectMetaMap } from '@agoric/internal'; import { initializeVatState, makeVatKeeper, @@ -46,6 +45,8 @@ const enableKernelGC = true; * @typedef { import('../../types-external.js').VatKeeper } VatKeeper * @typedef { import('../../types-internal.js').InternalKernelOptions } InternalKernelOptions * @typedef { import('../../types-internal.js').ReapDirtThreshold } ReapDirtThreshold + * @import {CleanupBudget, CleanupWork, PolicyOutputCleanupBudget} from '../../types-external.js'; + * @import {RunQueueEventCleanupTerminatedVat} from '../../types-internal.js'; */ export { DEFAULT_REAP_DIRT_THRESHOLD_KEY }; @@ -947,11 +948,11 @@ export default function makeKernelKeeper( * (so the runPolicy can decide when to stop). * * @param {string} vatID - * @param {number} [budget] - * @returns {{ done: boolean, cleanups: number }} + * @param {CleanupBudget} budget + * @returns {{ done: boolean, work: CleanupWork }} * */ - function cleanupAfterTerminatedVat(vatID, budget = undefined) { + function cleanupAfterTerminatedVat(vatID, budget) { // this is called from terminateVat, which is called from either: // * end of processDeliveryMessage, if crankResults.terminate // * device-vat-admin (when vat-v-a does adminNode.terminateVat) @@ -961,7 +962,6 @@ export default function makeKernelKeeper( // crankResults.terminate insistVatID(vatID); - let cleanups = 0; const work = { exports: 0, imports: 0, @@ -969,20 +969,7 @@ export default function makeKernelKeeper( snapshots: 0, transcripts: 0, }; - let spend = _did => false; // returns "stop now" - if (budget !== undefined) { - assert.typeof(budget, 'number'); - spend = (did = 1) => { - assert(budget !== undefined); // hush TSC - cleanups += did; - budget -= did; - return budget <= 0; - }; - } - const logWork = () => { - const w = objectMetaMap(work, desc => (desc.value ? desc : undefined)); - kernelSlog?.write({ type: 'vat-cleanup', vatID, work: w }); - }; + let remaining = 0; // TODO: it would be slightly cheaper to walk all kvStore keys in // order, and act on each one according to its category (c-list @@ -1017,6 +1004,7 @@ export default function makeKernelKeeper( // used. // first, scan for exported objects, which must be orphaned + remaining = budget.exports ?? budget.default; for (const k of enumeratePrefixedKeys(kvStore, exportPrefix)) { // The void for an object exported by a vat will always be of the form // `o+NN`. The '+' means that the vat exported the object (rather than @@ -1031,13 +1019,14 @@ export default function makeKernelKeeper( // note: adds to maybeFreeKrefs, deletes c-list and .owner abandonKernelObject(kref, vatID); work.exports += 1; - if (spend()) { - logWork(); - return { done: false, cleanups }; + remaining -= 1; + if (remaining <= 0) { + return { done: false, work }; } } // then scan for imported objects, which must be decrefed + remaining = budget.imports ?? budget.default; for (const k of enumeratePrefixedKeys(kvStore, importPrefix)) { // abandoned imports: delete the clist entry as if the vat did a // drop+retire @@ -1046,9 +1035,9 @@ export default function makeKernelKeeper( vatKeeper.deleteCListEntry(kref, vref); // that will also delete both db keys work.imports += 1; - if (spend()) { - logWork(); - return { done: false, cleanups }; + remaining -= 1; + if (remaining <= 0) { + return { done: false, work }; } } @@ -1056,30 +1045,32 @@ export default function makeKernelKeeper( // so they already know the orphaned promises to reject // now loop back through everything and delete it all + remaining = budget.kv ?? budget.default; for (const k of enumeratePrefixedKeys(kvStore, `${vatID}.`)) { kvStore.delete(k); work.kv += 1; - if (spend()) { - logWork(); - return { done: false, cleanups }; + remaining -= 1; + if (remaining <= 0) { + return { done: false, work }; } } // this will internally loop through 'budget' deletions - const dsc = vatKeeper.deleteSnapshots(budget); + remaining = budget.snapshots ?? budget.default; + const dsc = vatKeeper.deleteSnapshots(remaining); work.snapshots += dsc.cleanups; - if (spend(dsc.cleanups)) { - logWork(); - return { done: false, cleanups }; + remaining -= dsc.cleanups; + if (remaining <= 0) { + return { done: false, work }; } // same - const dts = vatKeeper.deleteTranscripts(budget); + remaining = budget.transcripts ?? budget.default; + const dts = vatKeeper.deleteTranscripts(remaining); work.transcripts += dts.cleanups; + remaining -= dts.cleanups; // last task, so increment cleanups, but dc.done is authoritative - spend(dts.cleanups); - logWork(); - return { done: dts.done, cleanups }; + return { done: dts.done, work }; } function deleteVatID(vatID) { @@ -1386,20 +1377,23 @@ export default function makeKernelKeeper( kvStore.set(`vats.terminated`, JSON.stringify(terminatedVats)); } + /** + * @param {PolicyOutputCleanupBudget} allowCleanup + * @returns {RunQueueEventCleanupTerminatedVat | undefined} + */ function nextCleanupTerminatedVatAction(allowCleanup) { - if (!allowCleanup) { + if (allowCleanup === false) { + return undefined; + } else { + const unlimited = { default: Infinity }; + /** @type {CleanupBudget} */ + const budget = allowCleanup === true ? unlimited : allowCleanup; + const vatID = getFirstTerminatedVat(); + if (vatID) { + return { type: 'cleanup-terminated-vat', vatID, budget }; + } return undefined; } - // budget === undefined means "unlimited" - const { budget } = allowCleanup; - // if (getGCActions().size) { - // return undefined; - // } - const vatID = getFirstTerminatedVat(); - if (vatID) { - return { type: 'cleanup-terminated-vat', vatID, budget }; - } - return undefined; } // As refcounts are decremented, we accumulate a set of krefs for which diff --git a/packages/SwingSet/src/lib/runPolicies.js b/packages/SwingSet/src/lib/runPolicies.js index 8adf8ef9c18d..e0770c4b8d89 100644 --- a/packages/SwingSet/src/lib/runPolicies.js +++ b/packages/SwingSet/src/lib/runPolicies.js @@ -4,7 +4,7 @@ export function foreverPolicy() { /** @type { RunPolicy } */ return harden({ allowCleanup() { - return {}; // unlimited budget + return true; // unlimited budget }, vatCreated(_details) { return true; @@ -31,7 +31,7 @@ export function crankCounter( /** @type { RunPolicy } */ const policy = harden({ allowCleanup() { - return { budget: 100 }; // limited budget + return { default: 100 }; // limited budget }, vatCreated() { vats += 1; @@ -64,7 +64,7 @@ export function computronCounter(limit, options = {}) { /** @type { RunPolicy } */ const policy = harden({ allowCleanup() { - return { budget: cleanupBudget }; // limited budget + return { default: cleanupBudget }; // limited budget }, vatCreated() { total += vatCreatedComputrons; @@ -93,7 +93,7 @@ export function wallClockWaiter(seconds) { const timeout = Date.now() + 1000 * seconds; /** @type { RunPolicy } */ const policy = harden({ - allowCleanup: () => ({}), // unlimited budget + allowCleanup: () => true, // unlimited budget vatCreated: () => Date.now() < timeout, crankComplete: () => Date.now() < timeout, crankFailed: () => Date.now() < timeout, @@ -121,7 +121,7 @@ export function someCleanup(budget) { allowCleanup: () => { if (once) { once = false; - return { budget }; + return { default: budget }; } return false; }, diff --git a/packages/SwingSet/src/types-external.js b/packages/SwingSet/src/types-external.js index 364a1d6041f5..04101db8bb82 100644 --- a/packages/SwingSet/src/types-external.js +++ b/packages/SwingSet/src/types-external.js @@ -219,24 +219,71 @@ export {}; */ /** + * PolicyInput is used internally within kernel.js, returned by each + * message processor, and used to decide which of the host's runPolicy + * methods to invoke. The 'details' portion of PolicyInput is passed + * as an argument to those methods, so those types are + * externally-visible. + * + * @typedef { { exports: number, + * imports: number, + * kv: number, + * snapshots: number, + * transcripts: number, + * } } CleanupWork + * + * @typedef { { total: number } & CleanupWork } PolicyInputCleanupCounts + * @typedef { { cleanups: PolicyInputCleanupCounts, computrons?: bigint } } PolicyInputCleanupDetails * @typedef { { computrons?: bigint } } PolicyInputDetails + * * @typedef { [tag: 'none', details: PolicyInputDetails ] } PolicyInputNone * @typedef { [tag: 'create-vat', details: PolicyInputDetails ]} PolicyInputCreateVat * @typedef { [tag: 'crank', details: PolicyInputDetails ] } PolicyInputCrankComplete * @typedef { [tag: 'crank-failed', details: PolicyInputDetails ]} PolicyInputCrankFailed - * @typedef { [tag: 'cleanup', details: { cleanups: number, computrons?: number }] } PolicyInputCleanup + * @typedef { [tag: 'cleanup', details: PolicyInputCleanupDetails] } PolicyInputCleanup + * * @typedef { PolicyInputNone | PolicyInputCreateVat | PolicyInputCrankComplete | * PolicyInputCrankFailed | PolicyInputCleanup } PolicyInput - * @typedef { boolean } PolicyOutput + * + * CleanupBudget is the internal record used to limit the slow + * deletion of terminated vats. Each property limits the number of + * deletions per 'cleanup-terminated-vat' run-queue event, for a + * specific phase (imports, exports, snapshots, etc). It must always + * have a 'default' property, which is used for phases that aren't + * otherwise specified. + * + * @typedef { { default: number, + * exports?: number, + * imports?: number, + * kv?: number, + * snapshots?: number, + * transcripts?: number, + * } } CleanupBudget + * + * PolicyOutputCleanupBudget is the return value of + * runPolicy.allowCleanup(), and tells the kernel how much it is + * allowed to clean up. It is either a CleanupBudget, or 'true' to + * allow unlimited cleanup, or 'false' to forbid any cleanup. + * + * @typedef {CleanupBudget | true | false} PolicyOutputCleanupBudget + * + * PolicyOutput is the boolean returned by all the other runPolicy + * methods, where 'true' means "keep going", and 'false' means "stop + * now". + * + * @typedef {boolean} PolicyOutput + * * @typedef { { - * allowCleanup?: () => false | { budget?: number }, + * allowCleanup?: () => boolean | PolicyOutputCleanupBudget, * vatCreated: (details: {}) => PolicyOutput, * crankComplete: (details: { computrons?: bigint }) => PolicyOutput, * crankFailed: (details: {}) => PolicyOutput, * emptyCrank: () => PolicyOutput, - * didCleanup?: (details: { cleanups: number }) => PolicyOutput, + * didCleanup?: (details: PolicyInputCleanupDetails) => PolicyOutput, * } } RunPolicy - * + */ + +/** * @typedef {object} VatWarehousePolicy * @property { number } [maxVatsOnline] Limit the number of simultaneous workers * @property { boolean } [restartWorkerOnSnapshot] Reload worker immediately upon snapshot creation diff --git a/packages/SwingSet/src/types-internal.js b/packages/SwingSet/src/types-internal.js index 7665a8b2c7c8..1ef68a5735f5 100644 --- a/packages/SwingSet/src/types-internal.js +++ b/packages/SwingSet/src/types-internal.js @@ -143,8 +143,9 @@ export {}; * @typedef { { type: 'retireImports', vatID: VatID, krefs: string[] } } RunQueueEventRetireImports * @typedef { { type: 'negated-gc-action', vatID?: VatID } } RunQueueEventNegatedGCAction * @typedef { { type: 'bringOutYourDead', vatID: VatID } } RunQueueEventBringOutYourDead + * @import {CleanupBudget} from './types-external.js'; * @typedef { { type: 'cleanup-terminated-vat', vatID: VatID, - * budget: number | undefined } } RunQueueEventCleanupTerminatedVat + * budget: CleanupBudget } } RunQueueEventCleanupTerminatedVat * @typedef { RunQueueEventNotify | RunQueueEventSend | RunQueueEventCreateVat | * RunQueueEventUpgradeVat | RunQueueEventChangeVatOptions | RunQueueEventStartVat | * RunQueueEventDropExports | RunQueueEventRetireExports | RunQueueEventRetireImports | diff --git a/packages/SwingSet/test/vat-admin/slow-termination/slow-termination.test.js b/packages/SwingSet/test/vat-admin/slow-termination/slow-termination.test.js index 4092aae7ac0e..336176024c66 100644 --- a/packages/SwingSet/test/vat-admin/slow-termination/slow-termination.test.js +++ b/packages/SwingSet/test/vat-admin/slow-termination/slow-termination.test.js @@ -42,14 +42,14 @@ const makeCleanupPolicy = budget => { emptyCrank: stop, allowCleanup() { if (budget > 0) { - return { budget }; + return { default: budget }; } else { return false; } }, didCleanup(spent) { - budget -= spent.cleanups; - cleanups += spent.cleanups; + budget -= spent.cleanups.total; + cleanups += spent.cleanups.total; }, }); const getCleanups = () => cleanups; @@ -85,7 +85,7 @@ async function doSlowTerminate(t, mode) { crankFailed: () => true, emptyCrank: () => true, didCleanup: ({ cleanups }) => { - countCleanups += cleanups; + countCleanups += cleanups.total; return true; }, }; @@ -148,8 +148,10 @@ async function doSlowTerminate(t, mode) { .pluck() .get(vatID); - // 20*2 for imports, 21*2 for exports, 20*1 for vatstore = 102 - // plus 27 for usual liveslots stuff + // 20*2 for imports, 21*2 for exports, 20*1 for vatstore = 102. + // Plus 21 for the usual liveslots stuff, and 6 for kernel stuff + // like vNN.source/options + t.is(remainingKV().length, 129); t.false(JSON.parse(kvStore.get('vats.terminated')).includes(vatID)); // we get one span for snapshotInitial (=2), then a span every @@ -179,7 +181,7 @@ async function doSlowTerminate(t, mode) { t.true(JSON.parse(kvStore.get('vats.terminated')).includes(vatID)); // no cleanups were allowed, so nothing should be removed yet - t.truthy(kernelStorage.kvStore.get(`${vatID}.options`, undefined)); + t.truthy(kernelStorage.kvStore.get(`${vatID}.options`)); t.is(remainingKV().length, 129); // now do a series of cleanup runs, each with budget=5 @@ -190,8 +192,8 @@ async function doSlowTerminate(t, mode) { return getCleanups(); }; - // cleanup currently deletes c-list exports, then c-list imports, - // then all other kvStore entries, then snapshots, then transcripts + // cleanup deletes c-list exports, then c-list imports, then all + // other kvStore entries, then snapshots, then transcripts let leftKV = remainingKV().length; const cleanKV = async (expectedKV, expectedCleanups) => { @@ -209,77 +211,77 @@ async function doSlowTerminate(t, mode) { await cleanKV(10, 5); // 5 c-list exports await cleanKV(10, 5); // 5 c-list exports - // we have one export left, so this clean(budget=5) will delete the - // two kv for the export, then the first four of our 20 c-list - // imports, each of which also has 2 kv) + // now we have one export left, so this clean(budget.default=5) will + // delete the one export (= two kv), then the first five of our 20 + // c-list imports (each of which also has 2 kv, so 12 kv total) - await cleanKV(10, 5); // 1 c-list exports, 4 c-list imports - await cleanKV(10, 5); // 5 c-list imports + await cleanKV(12, 6); // 1 c-list exports, 5 c-list imports await cleanKV(10, 5); // 5 c-list imports await cleanKV(10, 5); // 5 c-list imports + await cleanKV(10, 5); // 5 c-list imports, leaving none + + // the non-clist kvstore keys should still be present + t.truthy(kernelStorage.kvStore.get(`${vatID}.options`)); - // we have one import left, so this clean(budget=5) will delete its - // two kv, then the first four of our 47 other kv entries (20 - // vatstore plus 27 liveslots overhead - await cleanKV(6, 5); // 1 c-list import, 4 other kv - // now there are 45 other kv entries left - t.is(remainingKV().length, 43); + // there are no imports, so this clean(budget.default=5) will delete + // the first five of our 47 other kv entries (20 vatstore plus 27 + // liveslots overhead await cleanKV(5, 5); // 5 other kv + // now there are 42 other kv entries left + t.is(remainingKV().length, 42); + + await cleanKV(5, 5); // 5 other kv + t.is(remainingKV().length, 37); await cleanKV(5, 5); // 5 other kv + t.is(remainingKV().length, 32); await cleanKV(5, 5); // 5 other kv + t.is(remainingKV().length, 27); await cleanKV(5, 5); // 5 other kv + t.is(remainingKV().length, 22); await cleanKV(5, 5); // 5 other kv + t.is(remainingKV().length, 17); t.is(remainingSnapshots(), 5); await cleanKV(5, 5); // 5 other kv + t.is(remainingKV().length, 12); await cleanKV(5, 5); // 5 other kv + t.is(remainingKV().length, 7); await cleanKV(5, 5); // 5 other kv - - // we have 3 kv entries left, so budget=5 will delete those three, - // then two snapshots + t.is(remainingKV().length, 2); t.is(remainingSnapshots(), 5); - let cleanups = await clean(); + + // there are two kv left, so this clean will delete those, then all 5 snapshots + await cleanKV(2, 7); // 2 final kv, and 5 snapshots t.deepEqual(remainingKV(), []); - t.is(kernelStorage.kvStore.get(`${vatID}.options`, undefined)); - t.is(remainingSnapshots(), 3); + t.is(kernelStorage.kvStore.get(`${vatID}.options`), undefined); + t.is(remainingSnapshots(), 0); + t.is(remainingTranscriptSpans(), 6); let ts = 59; if (mode === 'dieHappy') { ts = 60; } t.is(remainingTranscriptItems(), ts); - t.is(cleanups, 5); - - // there are three snapshots remaining. do a clean with budget=3, to - // exercise the bugfix where we'd call deleteVatTranscripts() with - // budget=0 by mistake - - cleanups = await clean(3); - t.is(cleanups, 3); - t.is(remainingSnapshots(), 0); - t.is(remainingTranscriptSpans(), 6); - t.is(remainingTranscriptItems(), ts); // the next clean (with the default budget of 5) will delete the // five most recent transcript spans, starting with the isCurrent=1 // one (which had 9 or 10 items), leaving just the earliest (which // had 4, due to `snapshotInitial`) - - cleanups = await clean(); + let cleanups = await clean(); t.is(cleanups, 5); t.is(remainingTranscriptSpans(), 1); t.is(remainingTranscriptItems(), 4); + // not quite done t.true(JSON.parse(kvStore.get('vats.terminated')).includes(vatID)); // the final clean deletes the remaining span, and finishes by // removing the "still being deleted" bookkeeping, and the .options cleanups = await clean(); + t.is(cleanups, 1); t.is(remainingTranscriptSpans(), 0); t.is(remainingTranscriptItems(), 0); t.is(remaining(), 0); - t.is(cleanups, 1); - t.false(JSON.parse(kvStore.get('vats.terminated')).includes(vatID)); }