From a855062f84694dad6a632e36a1ded474bfa16131 Mon Sep 17 00:00:00 2001 From: Erik Marks <25517051+rekmarks@users.noreply.github.com> Date: Tue, 17 Mar 2026 12:41:26 -0700 Subject: [PATCH 01/17] test(kernel-node): Add failing orphaned ephemeral exo test --- .../test/e2e/orphaned-ephemeral-exo.test.ts | 70 +++++++++++++++++++ .../vats/orphaned-ephemeral-consumer-vat.ts | 22 ++++++ .../vats/orphaned-ephemeral-provider-vat.ts | 21 ++++++ 3 files changed, 113 insertions(+) create mode 100644 packages/kernel-node-runtime/test/e2e/orphaned-ephemeral-exo.test.ts create mode 100644 packages/kernel-node-runtime/test/vats/orphaned-ephemeral-consumer-vat.ts create mode 100644 packages/kernel-node-runtime/test/vats/orphaned-ephemeral-provider-vat.ts diff --git a/packages/kernel-node-runtime/test/e2e/orphaned-ephemeral-exo.test.ts b/packages/kernel-node-runtime/test/e2e/orphaned-ephemeral-exo.test.ts new file mode 100644 index 000000000..caff2849d --- /dev/null +++ b/packages/kernel-node-runtime/test/e2e/orphaned-ephemeral-exo.test.ts @@ -0,0 +1,70 @@ +import { makeSQLKernelDatabase } from '@metamask/kernel-store/sqlite/nodejs'; +import { kunser } from '@metamask/ocap-kernel'; +import type { ClusterConfig } from '@metamask/ocap-kernel'; +import { delay } from '@ocap/repo-tools/test-utils'; +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { describe, it, expect } from 'vitest'; + +import { makeTestKernel } from '../helpers/kernel.ts'; + +const PROVIDER_BUNDLE = + 'http://localhost:3000/orphaned-ephemeral-provider-vat.bundle'; +const CONSUMER_BUNDLE = + 'http://localhost:3000/orphaned-ephemeral-consumer-vat.bundle'; + +const clusterConfig: ClusterConfig = { + bootstrap: 'consumer', + vats: { + provider: { + bundleSpec: PROVIDER_BUNDLE, + parameters: {}, + }, + consumer: { + bundleSpec: CONSUMER_BUNDLE, + parameters: {}, + }, + }, +}; + +describe('Orphaned ephemeral exo', { timeout: 30_000 }, () => { + it('rejects when provider vat restarts', async () => { + const tempDir = await mkdtemp(join(tmpdir(), 'ocap-ephemeral-')); + const dbFilename = join(tempDir, 'kernel.db'); + try { + const kernel = await makeTestKernel( + await makeSQLKernelDatabase({ dbFilename }), + ); + try { + const { rootKref, subclusterId } = + await kernel.launchSubcluster(clusterConfig); + await delay(); + + // Works before restart + const r1 = await kernel.queueMessage(rootKref, 'useEphemeral', []); + expect(kunser(r1)).toBe(999); + + // Restart only the provider — the consumer still holds the + // ephemeral ref, but the exo behind it no longer exists. + const subcluster = kernel.getSubcluster(subclusterId); + expect(subcluster).toBeDefined(); + await kernel.restartVat(subcluster!.vats.provider); + await delay(); + + // The consumer's E(ephemeral).increment() targets an orphaned vref. + // Liveslots in the provider throws "I don't remember allocating", + // which terminates the provider and rejects the caller's promise. + await expect( + kernel.queueMessage(rootKref, 'useEphemeral', []), + ).rejects.toMatchObject({ + body: expect.stringContaining("I don't remember allocating"), + }); + } finally { + await kernel.stop(); + } + } finally { + await rm(tempDir, { recursive: true, force: true }); + } + }); +}); diff --git a/packages/kernel-node-runtime/test/vats/orphaned-ephemeral-consumer-vat.ts b/packages/kernel-node-runtime/test/vats/orphaned-ephemeral-consumer-vat.ts new file mode 100644 index 000000000..7d188681c --- /dev/null +++ b/packages/kernel-node-runtime/test/vats/orphaned-ephemeral-consumer-vat.ts @@ -0,0 +1,22 @@ +import { E } from '@endo/eventual-send'; +import { makeDefaultExo } from '@metamask/kernel-utils/exo'; + +/** + * A consumer vat that obtains an ephemeral exo reference from the provider + * during bootstrap and calls it on demand. + * + * @returns The root object. + */ +export function buildRootObject() { + let ephemeralRef: unknown; + + return makeDefaultExo('root', { + async bootstrap(vats: { provider: unknown }) { + ephemeralRef = await E(vats.provider).getEphemeral(); + }, + + async useEphemeral() { + return E(ephemeralRef).increment(); + }, + }); +} diff --git a/packages/kernel-node-runtime/test/vats/orphaned-ephemeral-provider-vat.ts b/packages/kernel-node-runtime/test/vats/orphaned-ephemeral-provider-vat.ts new file mode 100644 index 000000000..a0f0c8780 --- /dev/null +++ b/packages/kernel-node-runtime/test/vats/orphaned-ephemeral-provider-vat.ts @@ -0,0 +1,21 @@ +import { makeDefaultExo } from '@metamask/kernel-utils/exo'; + +/** + * A provider vat that exposes a single ephemeral (non-durable) exo. + * The exo will not survive a vat restart. + * + * @returns The root object. + */ +export function buildRootObject() { + const ephemeral = makeDefaultExo('EphemeralCounter', { + increment() { + return 999; + }, + }); + + return makeDefaultExo('root', { + getEphemeral() { + return ephemeral; + }, + }); +} From fe9813185776fc65859f330da39a0628371728ca Mon Sep 17 00:00:00 2001 From: Erik Marks <25517051+rekmarks@users.noreply.github.com> Date: Tue, 17 Mar 2026 13:22:17 -0700 Subject: [PATCH 02/17] chore: Format glossary.md --- docs/glossary.md | 133 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 106 insertions(+), 27 deletions(-) diff --git a/docs/glossary.md b/docs/glossary.md index da23f97f8..9a8006ac8 100644 --- a/docs/glossary.md +++ b/docs/glossary.md @@ -4,19 +4,27 @@ ### kernel -A centralized manager of [vats](#vat) and [distributed objects](#distributed-object). See the [Kernel](../packages/ocap-kernel/src/Kernel.ts) class. +A centralized manager of [vats](#vat) and [distributed objects](#distributed-object). See +the [Kernel](../packages/ocap-kernel/src/Kernel.ts) class. ### vat -A unit of compute managed by the [kernel](#kernel). See the [VatHandle](../packages/ocap-kernel/src/VatHandle.ts) and [VatSupervisor](../packages/ocap-kernel/src/VatSupervisor.ts) classes. +A unit of compute managed by the [kernel](#kernel). See the +[VatHandle](../packages/ocap-kernel/src/VatHandle.ts) and +[VatSupervisor](../packages/ocap-kernel/src/VatSupervisor.ts) classes. ### baggage -Persistent key-value storage for a [vat's](#vat) durable state. Baggage survives vat restarts (resuscitation) and is the primary mechanism for vat state persistence. Baggage is provided as the third argument to `buildRootObject`. +Persistent key-value storage for a [vat's](#vat) durable state. Baggage survives vat +restarts (resuscitation) and is the primary mechanism for vat state persistence. Baggage +is provided as the third argument to `buildRootObject`. ### bootstrap -The initialization method called on the bootstrap [vat's](#vat) root object when a [subcluster](#subcluster) is first launched. The bootstrap method receives references to other vats and [kernel services](#kernel-service) and is called exactly once — it is not called again after a vat restart. +The initialization method called on the bootstrap [vat's](#vat) root object when a +[subcluster](#subcluster) is first launched. The bootstrap method receives references to +other vats and [kernel services](#kernel-service) and is called exactly once — it is not +called again after a vat restart. ### cluster @@ -24,94 +32,165 @@ See [subcluster](#subcluster). ### exo -A remotable object created with `makeDefaultExo()` from `@metamask/kernel-utils/exo`. Exos are the standard way to create objects that can be passed between [vats](#vat), stored in [baggage](#baggage), and invoked via `E()`. Do not use `Far()` from `@endo/far`. +A remotable object created with `makeDefaultExo()` from `@metamask/kernel-utils/exo`. Exos +are the standard way to create objects that can be passed between [vats](#vat), stored in +[baggage](#baggage), and invoked via `E()`. Do not use `Far()` from `@endo/far`. ### distributed object -A persistent object residing in a [vat](#vat) and asynchronously accessible to other vats. See the [implementation](../packages/ocap-kernel/src/store/methods/object.ts) in the kernel's storage methods. +A persistent object residing in a [vat](#vat) and asynchronously accessible to other vats. +See the [implementation](../packages/ocap-kernel/src/store/methods/object.ts) in the +kernel's storage methods. ### kernel service -An object registered with the [kernel](#kernel) that [vats](#vat) can invoke via `E()`. Kernel services run in the kernel's own context (not in a vat) and are registered using `kernel.registerKernelServiceObject()`. Services marked `systemOnly` can only be accessed by [system subclusters](#system-subcluster). See the [KernelServiceManager](../packages/ocap-kernel/src/KernelServiceManager.ts). +An object registered with the [kernel](#kernel) that [vats](#vat) can invoke via `E()`. +Kernel services run in the kernel's own context (not in a vat) and are registered using +`kernel.registerKernelServiceObject()`. Services marked `systemOnly` can only be accessed +by [system subclusters](#system-subcluster). See the +[KernelServiceManager](../packages/ocap-kernel/src/KernelServiceManager.ts). ### supervisor -A component that manages the lifecycle and communication of a [vat](#vat). The [VatSupervisor](../packages/ocap-kernel/src/VatSupervisor.ts) handles [message delivery](#delivery), [syscalls](#syscall), and vat initialization. +A component that manages the lifecycle and communication of a [vat](#vat). The +[VatSupervisor](../packages/ocap-kernel/src/VatSupervisor.ts) handles [message +delivery](#delivery), [syscalls](#syscall), and vat initialization. ### liveslots -A framework for managing object lifecycles within [vats](#vat). Liveslots provides the runtime environment for vat code and handles object persistence, promise management, and [syscall](#syscall) coordination. +A framework for managing object lifecycles within [vats](#vat). Liveslots provides the +runtime environment for vat code and handles object persistence, promise management, and +[syscall](#syscall) coordination. ### crank -A single execution cycle in the kernel's [run queue](#run-queue). Each crank processes one item from the run queue, delivering messages or notifications to [vats](#vat). Cranks can be aborted and rolled back if errors occur. See the [KernelQueue](../packages/ocap-kernel/src/KernelQueue.ts) for the run loop implementation. +A single execution cycle in the kernel's [run queue](#run-queue). Each crank processes one +item from the run queue, delivering messages or notifications to [vats](#vat). Cranks can +be aborted and rolled back if errors occur. See the +[KernelQueue](../packages/ocap-kernel/src/KernelQueue.ts) for the run loop implementation. ### syscall -A system call made by a [vat](#vat) to request kernel services. Syscalls include operations like sending messages, resolving [promises](#promise-resolution), and accessing persistent storage. See [VatSyscall](../packages/ocap-kernel/src/VatSyscall.ts) and the [syscall service](../packages/ocap-kernel/src/services/syscall.ts). +A system call made by a [vat](#vat) to request kernel services. Syscalls include +operations like sending messages, resolving [promises](#promise-resolution), and accessing +persistent storage. See [VatSyscall](../packages/ocap-kernel/src/VatSyscall.ts) and the +[syscall service](../packages/ocap-kernel/src/services/syscall.ts). ### delivery -The process of sending a message or notification to a [vat](#vat). Deliveries can be of type 'message', 'notify', 'dropExports', 'retireExports', 'retireImports', or 'bringOutYourDead'. See the [router](#router) ([KernelRouter](../packages/ocap-kernel/src/KernelRouter.ts)) for delivery logic. +The process of sending a message or notification to a [vat](#vat). Deliveries can be of +type 'message', 'notify', 'dropExports', 'retireExports', 'retireImports', or +'bringOutYourDead'. See the [router](#router) +([KernelRouter](../packages/ocap-kernel/src/KernelRouter.ts)) for delivery logic. ### marshaling -The process of serializing and deserializing data for transmission between [vats](#vat). The kernel uses marshaling to convert object references and data structures into a format suitable for cross-vat communication. See the [kernel marshal service](../packages/ocap-kernel/src/services/kernel-marshal.ts) for `kser` and `kunser` functions. +The process of serializing and deserializing data for transmission between [vats](#vat). +The kernel uses marshaling to convert object references and data structures into a format +suitable for cross-vat communication. See the [kernel marshal +service](../packages/ocap-kernel/src/services/kernel-marshal.ts) for `kser` and `kunser` +functions. ### promise resolution -The process of fulfilling or rejecting a promise. Promise resolutions are delivered as notifications to [vats](#vat) and can trigger cascading resolutions of dependent promises. See the [promise store methods](../packages/ocap-kernel/src/store/methods/promise.ts) for implementation details. +The process of fulfilling or rejecting a promise. Promise resolutions are delivered as +notifications to [vats](#vat) and can trigger cascading resolutions of dependent promises. +See the [promise store methods](../packages/ocap-kernel/src/store/methods/promise.ts) for +implementation details. ### garbage collection (GC) -The process of identifying and cleaning up unreachable objects. The kernel performs GC by tracking reference counts and delivering appropriate notifications to [vats](#vat). **Important**: the garbage collection systems of the kernel, liveslots, and javascript are all mutually independent. See the [GC methods](../packages/ocap-kernel/src/store/methods/gc.ts) and [GC service](../packages/ocap-kernel/src/services/garbage-collection.ts) for implementation details. +The process of identifying and cleaning up unreachable objects. The kernel performs GC by +tracking reference counts and delivering appropriate notifications to [vats](#vat). +**Important**: the garbage collection systems of the kernel, liveslots, and javascript are +all mutually independent. See the [GC +methods](../packages/ocap-kernel/src/store/methods/gc.ts) and [GC +service](../packages/ocap-kernel/src/services/garbage-collection.ts) for implementation +details. ### revocation -The process of invalidating an object reference, preventing further access to the object. Revoked objects return errors when accessed. See the [revocation methods](../packages/ocap-kernel/src/store/methods/revocation.ts) for implementation. +The process of invalidating an object reference, preventing further access to the object. +Revoked objects return errors when accessed. See the [revocation +methods](../packages/ocap-kernel/src/store/methods/revocation.ts) for implementation. ### channel -A communication pathway between different components, such as between a [vat](#vat) and the [kernel](#kernel), or between different [clusters](#cluster). Channels use [streams](#stream) for message passing. See the [BaseDuplexStream](../packages/streams/src/BaseDuplexStream.ts) for the core channel implementation. +A communication pathway between different components, such as between a [vat](#vat) and +the [kernel](#kernel), or between different [clusters](#cluster). Channels use +[streams](#stream) for message passing. See the +[BaseDuplexStream](../packages/streams/src/BaseDuplexStream.ts) for the core channel +implementation. ### stream -A remote asynchronous iterator that provides bidirectional communication between components. Streams implement the `Reader` interface from `@endo/stream` and can be used for message passing between [vats](#vat), kernel components, and external systems. See the [BaseDuplexStream](../packages/streams/src/BaseDuplexStream.ts) for bidirectional streams. +A remote asynchronous iterator that provides bidirectional communication between +components. Streams implement the `Reader` interface from `@endo/stream` and can be used +for message passing between [vats](#vat), kernel components, and external systems. See the +[BaseDuplexStream](../packages/streams/src/BaseDuplexStream.ts) for bidirectional +streams. ### subcluster -A logically related group of [vats](#vat), intended to be operated together. Defined by a `ClusterConfig`. When a subcluster is launched, all its vats start and the [bootstrap](#bootstrap) vat receives references to the other vats. See the `ClusterConfig` type in [`packages/ocap-kernel/src/types.ts`](../packages/ocap-kernel/src/types.ts). +A logically related group of [vats](#vat), intended to be operated together. Defined by a +`ClusterConfig`. When a subcluster is launched, all its vats start and the +[bootstrap](#bootstrap) vat receives references to the other vats. See the `ClusterConfig` +type in [`packages/ocap-kernel/src/types.ts`](../packages/ocap-kernel/src/types.ts). ### system subcluster -A [subcluster](#subcluster) declared at [kernel](#kernel) initialization that can access privileged (`systemOnly`) [kernel services](#kernel-service). System subclusters persist across kernel restarts and are identified by a unique name. See the [SubclusterManager](../packages/ocap-kernel/src/SubclusterManager.ts). +A [subcluster](#subcluster) declared at [kernel](#kernel) initialization that can access +privileged (`systemOnly`) [kernel services](#kernel-service). System subclusters persist +across kernel restarts and are identified by a unique name. See the +[SubclusterManager](../packages/ocap-kernel/src/SubclusterManager.ts). ### run queue -The kernel's main execution queue that processes messages, notifications, and [garbage collection](#garbage-collection-gc) actions. Each [crank](#crank) processes one item from this queue. See the [KernelQueue](../packages/ocap-kernel/src/KernelQueue.ts) class and [queue methods](../packages/ocap-kernel/src/store/methods/queue.ts) for implementation details. +The kernel's main execution queue that processes messages, notifications, and [garbage +collection](#garbage-collection-gc) actions. Each [crank](#crank) processes one item from +this queue. See the [KernelQueue](../packages/ocap-kernel/src/KernelQueue.ts) class and +[queue methods](../packages/ocap-kernel/src/store/methods/queue.ts) for implementation +details. ### router -The component responsible for routing messages to the correct [vat](#vat) based on target references and promise states. The router handles [delivery](#delivery) logic. See the [KernelRouter](../packages/ocap-kernel/src/KernelRouter.ts) for routing logic. +The component responsible for routing messages to the correct [vat](#vat) based on target +references and promise states. The router handles [delivery](#delivery) logic. See the +[KernelRouter](../packages/ocap-kernel/src/KernelRouter.ts) for routing logic. ## Abbreviations ### clist -A _clist_ (short for "capability list") is a bidirectional mapping between short, channel-specific identifiers and actual object references. The clist is unique to a channel-runtime pair, and translates between the javascript runtime which holds the object references and the channel which communicates about them. +A _clist_ (short for "capability list") is a bidirectional mapping between short, +channel-specific identifiers and actual object references. The clist is unique to a +channel-runtime pair, and translates between the javascript runtime which holds the object +references and the channel which communicates about them. ### eref -An _ERef_ (short for "endpoint reference") is a generic term for a ref which is either a [vref](#vref) or an [rref](#rref). +An _ERef_ (short for "endpoint reference") is a generic term for a ref which is either a +[vref](#vref) or an [rref](#rref). ### kref -A _KRef_ (short for "kernel reference") designates an Object within the scope of the Kernel itself. It is used in the translation of References between one Vat and another. A KRef is generated and assigned by the Kernel whenever an Object reference is imported into or exported from a Vat for the first time. +A _KRef_ (short for "kernel reference") designates an Object within the scope of the +Kernel itself. It is used in the translation of References between one Vat and another. A +KRef is generated and assigned by the Kernel whenever an Object reference is imported into +or exported from a Vat for the first time. ### rref -An _RRef_ (short for "remote reference") designates an object within the scope of an established point-to-point communications [Channel](#channel) between two Clusters. An RRef does not survive the Channel it is associated with. An RRef is generated when the Kernel for one Cluster exports an Object Reference into the Channel connecting it to another Cluster's Kernel. +An _RRef_ (short for "remote reference") designates an object within the scope of an +established point-to-point communications [Channel](#channel) between two Clusters. An +RRef does not survive the Channel it is associated with. An RRef is generated when the +Kernel for one Cluster exports an Object Reference into the Channel connecting it to +another Cluster's Kernel. ### vref -A _VRef_ (short for "vat reference") designates an Object within the scope of the Objects known to a particular Vat. It is used across the Kernel/Vat boundary in the marshaling of messages delivered into or sent by that Vat. A VRef is generated and assigned by the Kernel when importing an Object Reference into a Vat for the first time and by the Vat when exporting an Object Reference from it for the first time. +A _VRef_ (short for "vat reference") designates an Object within the scope of the Objects +known to a particular Vat. It is used across the Kernel/Vat boundary in the marshaling of +messages delivered into or sent by that Vat. A VRef is generated and assigned by the +Kernel when importing an Object Reference into a Vat for the first time and by the Vat +when exporting an Object Reference from it for the first time. From 95f288466e3d0909be3c432ef5d5371f7416fc57 Mon Sep 17 00:00:00 2001 From: Erik Marks <25517051+rekmarks@users.noreply.github.com> Date: Tue, 17 Mar 2026 13:29:31 -0700 Subject: [PATCH 03/17] docs: Correct glossary definition of "crank" --- docs/glossary.md | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/docs/glossary.md b/docs/glossary.md index 9a8006ac8..926b1615c 100644 --- a/docs/glossary.md +++ b/docs/glossary.md @@ -46,14 +46,15 @@ kernel's storage methods. An object registered with the [kernel](#kernel) that [vats](#vat) can invoke via `E()`. Kernel services run in the kernel's own context (not in a vat) and are registered using -`kernel.registerKernelServiceObject()`. Services marked `systemOnly` can only be accessed -by [system subclusters](#system-subcluster). See the +`kernel.registerKernelServiceObject()`. Because service implementations do not participate +in the kernel's reference management, they cannot return [exos](#exo). Services marked +`systemOnly` can only be accessed by [system subclusters](#system-subcluster). See the [KernelServiceManager](../packages/ocap-kernel/src/KernelServiceManager.ts). ### supervisor -A component that manages the lifecycle and communication of a [vat](#vat). The -[VatSupervisor](../packages/ocap-kernel/src/VatSupervisor.ts) handles [message +A kernel-space component that manages the lifecycle and communication of a [vat](#vat). +The [VatSupervisor](../packages/ocap-kernel/src/VatSupervisor.ts) handles [message delivery](#delivery), [syscalls](#syscall), and vat initialization. ### liveslots @@ -65,8 +66,8 @@ runtime environment for vat code and handles object persistence, promise managem ### crank A single execution cycle in the kernel's [run queue](#run-queue). Each crank processes one -item from the run queue, delivering messages or notifications to [vats](#vat). Cranks can -be aborted and rolled back if errors occur. See the +item from the run queue, delivering a single message or notification to [a vat](#vat). +Cranks can be aborted and rolled back if errors occur. See the [KernelQueue](../packages/ocap-kernel/src/KernelQueue.ts) for the run loop implementation. ### syscall @@ -78,9 +79,9 @@ persistent storage. See [VatSyscall](../packages/ocap-kernel/src/VatSyscall.ts) ### delivery -The process of sending a message or notification to a [vat](#vat). Deliveries can be of -type 'message', 'notify', 'dropExports', 'retireExports', 'retireImports', or -'bringOutYourDead'. See the [router](#router) +The process of sending a message or notification to a [vat](#vat) in a [crank](#crank). +Deliveries can be of type 'message', 'notify', 'dropExports', 'retireExports', +'retireImports', or 'bringOutYourDead'. See the [kernel router](#kernel-router) ([KernelRouter](../packages/ocap-kernel/src/KernelRouter.ts)) for delivery logic. ### marshaling @@ -152,7 +153,7 @@ this queue. See the [KernelQueue](../packages/ocap-kernel/src/KernelQueue.ts) cl [queue methods](../packages/ocap-kernel/src/store/methods/queue.ts) for implementation details. -### router +### kernel router The component responsible for routing messages to the correct [vat](#vat) based on target references and promise states. The router handles [delivery](#delivery) logic. See the From fc0ec74903eee594844ee9e2a10aefc98a3921d6 Mon Sep 17 00:00:00 2001 From: Erik Marks <25517051+rekmarks@users.noreply.github.com> Date: Tue, 17 Mar 2026 16:34:55 -0700 Subject: [PATCH 04/17] fix(ocap-kernel): enforce one delivery per crank, fix rollback cache staleness - Restructure run queue generator to yield exactly one item per startCrank/endCrank pair, preventing rollback from undoing unrelated earlier deliveries in the same crank - Refresh StoredQueue after rollback so cached head/tail pointers are re-read from DB, fixing dequeue returning undefined - Invalidate runQueueLengthCache after rollback - Bypass VatManager.terminateVat() in KernelQueue callback to avoid waitForCrank() deadlock when terminating from within a crank - Handle vanished endpoints in KernelRouter.deliverSend with try/catch, treating as splat instead of crashing - Change KernelQueue subscriptions to {resolve, reject} so aborted sends can reject the caller's JS promise immediately - Distinguish rejected vs fulfilled in invokeKernelSubscription - Improve splat error messages to describe cause without leaking internal identifiers (krefs, endpoint IDs) - Add integration test for orphaned ephemeral exo rejection - Standardize KernelQueue test loop-exit pattern using sentinel Co-Authored-By: Claude Opus 4.6 --- docs/glossary.md | 5 +- .../test/e2e/orphaned-ephemeral-exo.test.ts | 3 +- .../src/orphaned-ephemeral-exo.test.ts | 50 ++++ .../src/syscall-validation.test.ts | 48 ++-- .../kernel-test/src/vat-lifecycle.test.ts | 15 +- .../src/vats/orphaned-ephemeral-consumer.ts | 23 ++ .../src/vats/orphaned-ephemeral-provider.ts | 22 ++ packages/ocap-kernel/src/Kernel.ts | 7 +- packages/ocap-kernel/src/KernelQueue.test.ts | 249 ++++++++++++++++-- packages/ocap-kernel/src/KernelQueue.ts | 51 +++- packages/ocap-kernel/src/KernelRouter.test.ts | 2 +- packages/ocap-kernel/src/KernelRouter.ts | 52 +++- packages/ocap-kernel/src/store/index.ts | 4 + .../src/store/methods/crank.test.ts | 1 + .../ocap-kernel/src/store/methods/crank.ts | 7 + packages/ocap-kernel/src/store/types.ts | 1 + .../ocap-kernel/src/vats/SubclusterManager.ts | 15 +- 17 files changed, 468 insertions(+), 87 deletions(-) create mode 100644 packages/kernel-test/src/orphaned-ephemeral-exo.test.ts create mode 100644 packages/kernel-test/src/vats/orphaned-ephemeral-consumer.ts create mode 100644 packages/kernel-test/src/vats/orphaned-ephemeral-provider.ts diff --git a/docs/glossary.md b/docs/glossary.md index 926b1615c..7847300fd 100644 --- a/docs/glossary.md +++ b/docs/glossary.md @@ -66,8 +66,9 @@ runtime environment for vat code and handles object persistence, promise managem ### crank A single execution cycle in the kernel's [run queue](#run-queue). Each crank processes one -item from the run queue, delivering a single message or notification to [a vat](#vat). -Cranks can be aborted and rolled back if errors occur. See the +item from the run queue, delivering a single message or notification to [a vat](#vat). The +"message or notification" is whatever item is taken of the run queue. Cranks can be +aborted and rolled back if errors occur. See the [KernelQueue](../packages/ocap-kernel/src/KernelQueue.ts) for the run loop implementation. ### syscall diff --git a/packages/kernel-node-runtime/test/e2e/orphaned-ephemeral-exo.test.ts b/packages/kernel-node-runtime/test/e2e/orphaned-ephemeral-exo.test.ts index caff2849d..54f286c8e 100644 --- a/packages/kernel-node-runtime/test/e2e/orphaned-ephemeral-exo.test.ts +++ b/packages/kernel-node-runtime/test/e2e/orphaned-ephemeral-exo.test.ts @@ -55,10 +55,11 @@ describe('Orphaned ephemeral exo', { timeout: 30_000 }, () => { // The consumer's E(ephemeral).increment() targets an orphaned vref. // Liveslots in the provider throws "I don't remember allocating", // which terminates the provider and rejects the caller's promise. + // This is surfaced to the caller as "target object has no owner". await expect( kernel.queueMessage(rootKref, 'useEphemeral', []), ).rejects.toMatchObject({ - body: expect.stringContaining("I don't remember allocating"), + body: expect.stringContaining('target object has no owner'), }); } finally { await kernel.stop(); diff --git a/packages/kernel-test/src/orphaned-ephemeral-exo.test.ts b/packages/kernel-test/src/orphaned-ephemeral-exo.test.ts new file mode 100644 index 000000000..389d88494 --- /dev/null +++ b/packages/kernel-test/src/orphaned-ephemeral-exo.test.ts @@ -0,0 +1,50 @@ +import { makeSQLKernelDatabase } from '@metamask/kernel-store/sqlite/nodejs'; +import { waitUntilQuiescent } from '@metamask/kernel-utils'; +import { kunser } from '@metamask/ocap-kernel'; +import { describe, expect, it } from 'vitest'; + +import { getBundleSpec, makeKernel, makeTestLogger } from './utils.ts'; + +describe('orphaned ephemeral exo', () => { + it('rejects when provider vat restarts', async () => { + const { logger } = makeTestLogger(); + const database = await makeSQLKernelDatabase({}); + const kernel = await makeKernel(database, true, logger); + + const { rootKref, subclusterId } = await kernel.launchSubcluster({ + bootstrap: 'consumer', + vats: { + provider: { + bundleSpec: getBundleSpec('orphaned-ephemeral-provider'), + parameters: {}, + }, + consumer: { + bundleSpec: getBundleSpec('orphaned-ephemeral-consumer'), + parameters: {}, + }, + }, + }); + await waitUntilQuiescent(); + + // Works before restart + const r1 = await kernel.queueMessage(rootKref, 'useEphemeral', []); + expect(kunser(r1)).toBe(999); + + // Restart only the provider — the consumer still holds the + // ephemeral ref, but the exo behind it no longer exists. + const subcluster = kernel.getSubcluster(subclusterId); + expect(subcluster).toBeDefined(); + await kernel.restartVat(subcluster!.vats.provider); + await waitUntilQuiescent(); + + // The consumer's E(ephemeral).increment() targets an orphaned vref. + // Liveslots in the provider throws "I don't remember allocating", + // which terminates the provider vat. The message is retried in a new + // crank, but the endpoint is gone — so it splats and rejects. + await expect( + kernel.queueMessage(rootKref, 'useEphemeral', []), + ).rejects.toMatchObject({ + body: expect.stringContaining('has no owner'), + }); + }); +}); diff --git a/packages/kernel-test/src/syscall-validation.test.ts b/packages/kernel-test/src/syscall-validation.test.ts index 1ee22029f..5ba50d2fd 100644 --- a/packages/kernel-test/src/syscall-validation.test.ts +++ b/packages/kernel-test/src/syscall-validation.test.ts @@ -94,10 +94,12 @@ describe('Syscall Validation & Revoked Objects', { timeout: 30_000 }, () => { // Revoke the object kernelStore.setRevoked(objectKRef, true); await waitUntilQuiescent(); - // Try to send message to revoked object - const revokedResult = await kernel.queueMessage(objectKRef, 'getValue', []); - // Should get proper error response for revoked object - expect(revokedResult.body).toContain('revoked object'); + // Try to send message to revoked object — kernel rejects the promise + await expect( + kernel.queueMessage(objectKRef, 'getValue', []), + ).rejects.toMatchObject({ + body: expect.stringContaining('target object has been revoked'), + }); // Verify kernel doesn't crash and exporter vat remains operational const exporterStatus = await kernel.queueMessage(exporterKRef, 'noop', []); expect(exporterStatus.body).toContain('noop'); @@ -140,21 +142,22 @@ describe('Syscall Validation & Revoked Objects', { timeout: 30_000 }, () => { // Revoke the object kernelStore.setRevoked(objectKRef, true); await waitUntilQuiescent(); - // Send message to revoked object that would return a promise - const promiseResult = await kernel.queueMessage(objectKRef, 'getValue', []); - // Verify the promise is rejected with revocation error - expect(promiseResult.body).toContain('revoked object'); + // Send message to revoked object — kernel rejects the promise + await expect( + kernel.queueMessage(objectKRef, 'getValue', []), + ).rejects.toMatchObject({ + body: expect.stringContaining('target object has been revoked'), + }); // Verify exporter vat is still operational const exporterStatus = await kernel.queueMessage(exporterKRef, 'noop', []); expect(exporterStatus.body).toContain('noop'); // Verify kernel can handle multiple revoked object accesses for (let i = 0; i < 5; i++) { - const multipleResult = await kernel.queueMessage( - objectKRef, - 'getValue', - [], - ); - expect(multipleResult.body).toContain('revoked object'); + await expect( + kernel.queueMessage(objectKRef, 'getValue', []), + ).rejects.toMatchObject({ + body: expect.stringContaining('target object has been revoked'), + }); } // Verify kernel remains stable const finalStatus = await kernel.queueMessage(exporterKRef, 'noop', []); @@ -210,17 +213,14 @@ describe('Syscall Validation & Revoked Objects', { timeout: 30_000 }, () => { } await waitUntilQuiescent(); - // Try to access all revoked objects - const revokedResults = await Promise.all( - objectKRefs.map(async (objectKRef) => + // Try to access all revoked objects — all should reject + for (const objectKRef of objectKRefs) { + await expect( kernel.queueMessage(objectKRef, 'getValue', []), - ), - ); - - // All should return revocation errors - revokedResults.forEach((result) => { - expect(result.body).toContain('revoked object'); - }); + ).rejects.toMatchObject({ + body: expect.stringContaining('target object has been revoked'), + }); + } // Verify exporter vat is still operational const exporterStatus = await kernel.queueMessage(exporterKRef, 'noop', []); diff --git a/packages/kernel-test/src/vat-lifecycle.test.ts b/packages/kernel-test/src/vat-lifecycle.test.ts index 130b4621b..b45077daa 100644 --- a/packages/kernel-test/src/vat-lifecycle.test.ts +++ b/packages/kernel-test/src/vat-lifecycle.test.ts @@ -1,6 +1,6 @@ import { makeSQLKernelDatabase } from '@metamask/kernel-store/sqlite/nodejs'; import { waitUntilQuiescent } from '@metamask/kernel-utils'; -import { kunser, makeKernelStore } from '@metamask/ocap-kernel'; +import { makeKernelStore } from '@metamask/ocap-kernel'; import { describe, expect, it, beforeEach } from 'vitest'; import { @@ -131,13 +131,12 @@ describe('Vat Lifecycle', { timeout: 30_000 }, () => { expect(remainingVats).toHaveLength(1); expect(remainingVats[0]?.id).toBe(liveVatId); - // Try to send a message to the terminated vat's root object - const messageResult = await kernel.queueMessage( - deadRootObject, - 'resume', - [], - ); - expect(kunser(messageResult)).toBe('no endpoint'); + // Try to send a message to the terminated vat's root object — rejects + await expect( + kernel.queueMessage(deadRootObject, 'resume', []), + ).rejects.toMatchObject({ + body: expect.stringContaining('has no owner'), + }); // Verify that messaging works as expected expect(await runResume(kernel, liveRootObject)).toBe( diff --git a/packages/kernel-test/src/vats/orphaned-ephemeral-consumer.ts b/packages/kernel-test/src/vats/orphaned-ephemeral-consumer.ts new file mode 100644 index 000000000..6fff04cca --- /dev/null +++ b/packages/kernel-test/src/vats/orphaned-ephemeral-consumer.ts @@ -0,0 +1,23 @@ +import { E } from '@endo/eventual-send'; +import { makeDefaultExo } from '@metamask/kernel-utils/exo'; + +/** + * A consumer vat that obtains an ephemeral exo reference from the provider + * during bootstrap and calls it on demand. + * + * @returns The root object. + */ +// eslint-disable-next-line @typescript-eslint/explicit-function-return-type +export function buildRootObject() { + let ephemeralRef: unknown; + + return makeDefaultExo('root', { + async bootstrap(vats: { provider: unknown }) { + ephemeralRef = await E(vats.provider).getEphemeral(); + }, + + async useEphemeral() { + return E(ephemeralRef).increment(); + }, + }); +} diff --git a/packages/kernel-test/src/vats/orphaned-ephemeral-provider.ts b/packages/kernel-test/src/vats/orphaned-ephemeral-provider.ts new file mode 100644 index 000000000..b2c3abb1b --- /dev/null +++ b/packages/kernel-test/src/vats/orphaned-ephemeral-provider.ts @@ -0,0 +1,22 @@ +import { makeDefaultExo } from '@metamask/kernel-utils/exo'; + +/** + * A provider vat that exposes a single ephemeral (non-durable) exo. + * The exo will not survive a vat restart. + * + * @returns The root object. + */ +// eslint-disable-next-line @typescript-eslint/explicit-function-return-type +export function buildRootObject() { + const ephemeral = makeDefaultExo('EphemeralCounter', { + increment() { + return 999; + }, + }); + + return makeDefaultExo('root', { + getEphemeral() { + return ephemeral; + }, + }); +} diff --git a/packages/ocap-kernel/src/Kernel.ts b/packages/ocap-kernel/src/Kernel.ts index 850ce69bf..2b4a8ee94 100644 --- a/packages/ocap-kernel/src/Kernel.ts +++ b/packages/ocap-kernel/src/Kernel.ts @@ -128,9 +128,14 @@ export class Kernel { this.#resetKernelState({ resetIdentity: Boolean(options.mnemonic) }); } + // Bypass VatManager.terminateVat() here because it calls waitForCrank(), + // which would deadlock — this callback is invoked from within a crank. this.#kernelQueue = new KernelQueue( this.#kernelStore, - async (vatId, reason) => this.#vatManager.terminateVat(vatId, reason), + async (vatId, reason) => { + await this.#vatManager.stopVat(vatId, true, reason); + this.#kernelStore.markVatAsTerminated(vatId); + }, ); this.#vatManager = new VatManager({ diff --git a/packages/ocap-kernel/src/KernelQueue.test.ts b/packages/ocap-kernel/src/KernelQueue.test.ts index 1cc8146ee..b2a2552da 100644 --- a/packages/ocap-kernel/src/KernelQueue.test.ts +++ b/packages/ocap-kernel/src/KernelQueue.test.ts @@ -18,6 +18,13 @@ vi.mock('@endo/promise-kit', () => ({ makePromiseKit: vi.fn(), })); +/** + * Sentinel error used to stop the infinite run loop in tests. + * Thrown by collectGarbage (which runs after each delivery) so the + * test can assert on the side-effects of that delivery. + */ +const STOP_RUN_LOOP = 'test: stop run loop'; + describe('KernelQueue', () => { let kernelStore: KernelStore; let kernelQueue: KernelQueue; @@ -89,7 +96,7 @@ describe('KernelQueue', () => { const mockItem: RunQueueItem = { type: 'send', target: 'ko123', - message: {} as Message, + message: { result: 'kp99' } as Message, }; (kernelStore.runQueueLength as unknown as MockInstance) .mockReturnValueOnce(1) @@ -98,15 +105,12 @@ describe('KernelQueue', () => { mockItem, ); const deliver = vi.fn().mockResolvedValue({ abort: true }); - const collectGarbageError = new Error( - 'wakeUpTheRunQueue function already set', - ); - (kernelStore.collectGarbage as unknown as MockInstance).mockRejectedValue( - collectGarbageError, - ); - await expect(kernelQueue.run(deliver)).rejects.toThrow( - collectGarbageError.message, - ); + ( + kernelStore.collectGarbage as unknown as MockInstance + ).mockImplementation(() => { + throw new Error(STOP_RUN_LOOP); + }); + await expect(kernelQueue.run(deliver)).rejects.toThrow(STOP_RUN_LOOP); expect(kernelStore.startCrank).toHaveBeenCalled(); expect(kernelStore.createCrankSavepoint).toHaveBeenCalledWith('start'); expect(deliver).toHaveBeenCalledWith(mockItem); @@ -129,15 +133,12 @@ describe('KernelQueue', () => { mockItem, ); const deliver = vi.fn().mockResolvedValue({ terminate: terminateInfo }); - const collectGarbageError = new Error( - 'wakeUpTheRunQueue function already set', - ); - (kernelStore.collectGarbage as unknown as MockInstance).mockRejectedValue( - collectGarbageError, - ); - await expect(kernelQueue.run(deliver)).rejects.toThrow( - collectGarbageError.message, - ); + ( + kernelStore.collectGarbage as unknown as MockInstance + ).mockImplementation(() => { + throw new Error(STOP_RUN_LOOP); + }); + await expect(kernelQueue.run(deliver)).rejects.toThrow(STOP_RUN_LOOP); expect(kernelStore.startCrank).toHaveBeenCalled(); expect(deliver).toHaveBeenCalledWith(mockItem); expect(terminateVat).toHaveBeenCalledWith( @@ -164,6 +165,7 @@ describe('KernelQueue', () => { const successPromiseKit = { promise: resultPromiseRaw, resolve: resolvePromise, + reject: vi.fn(), }; (makePromiseKit as unknown as MockInstance).mockReturnValueOnce( successPromiseKit, @@ -187,8 +189,11 @@ describe('KernelQueue', () => { }), }); expect(kernelQueue.subscriptions.has('kp1')).toBe(true); - const handler = kernelQueue.subscriptions.get('kp1'); - expect(handler).toBeDefined(); + const subscription = kernelQueue.subscriptions.get('kp1'); + expect(subscription).toStrictEqual({ + resolve: expect.any(Function), + reject: expect.any(Function), + }); resolvePromise(resultValue); const result = await resultPromise; expect(result).toStrictEqual(resultValue); @@ -280,7 +285,11 @@ describe('KernelQueue', () => { }, ); const resolveHandler = vi.fn(); - kernelQueue.subscriptions.set(kpid, resolveHandler); + const rejectHandler = vi.fn(); + kernelQueue.subscriptions.set(kpid, { + resolve: resolveHandler, + reject: rejectHandler, + }); kernelQueue.resolvePromises(endpointId, [resolution], false); expect(kernelStore.incrementRefCount).toHaveBeenCalledWith( kpid, @@ -331,7 +340,11 @@ describe('KernelQueue', () => { }, ); const resolveHandler = vi.fn(); - kernelQueue.subscriptions.set(kpid, resolveHandler); + const rejectHandler = vi.fn(); + kernelQueue.subscriptions.set(kpid, { + resolve: resolveHandler, + reject: rejectHandler, + }); const insistEndpointIdSpy = vi.spyOn(types, 'insistEndpointId'); kernelQueue.resolvePromises(undefined, [resolution], false); expect(insistEndpointIdSpy).not.toHaveBeenCalled(); @@ -380,7 +393,11 @@ describe('KernelQueue', () => { }, ); const resolveHandler = vi.fn(); - kernelQueue.subscriptions.set(kpid, resolveHandler); + const rejectHandler = vi.fn(); + kernelQueue.subscriptions.set(kpid, { + resolve: resolveHandler, + reject: rejectHandler, + }); kernelQueue.resolvePromises(endpointId, [resolution], false); // No notifications buffered because no subscribers expect(kernelStore.bufferCrankOutput).not.toHaveBeenCalled(); @@ -436,8 +453,190 @@ describe('KernelQueue', () => { }); }); + describe('abort with terminate', () => { + it('rejects the JS subscription for the aborted send item', async () => { + const rejectSpy = vi.fn(); + kernelQueue.subscriptions.set('kp99', { + resolve: vi.fn(), + reject: rejectSpy, + }); + const mockItem: RunQueueItem = { + type: 'send', + target: 'ko123', + message: { result: 'kp99' } as Message, + }; + const terminateInfo = { + body: '"vat terminated"', + slots: [], + }; + (kernelStore.runQueueLength as unknown as MockInstance) + .mockReturnValueOnce(1) + .mockReturnValue(0); + (kernelStore.dequeueRun as unknown as MockInstance).mockReturnValueOnce( + mockItem, + ); + const deliver = vi.fn().mockResolvedValue({ + abort: true, + terminate: { vatId: 'v1', info: terminateInfo }, + }); + ( + kernelStore.collectGarbage as unknown as MockInstance + ).mockImplementation(() => { + throw new Error(STOP_RUN_LOOP); + }); + await expect(kernelQueue.run(deliver)).rejects.toThrow(STOP_RUN_LOOP); + expect(kernelStore.rollbackCrank).toHaveBeenCalledWith('start'); + expect(rejectSpy).toHaveBeenCalledWith(terminateInfo); + expect(kernelQueue.subscriptions.has('kp99')).toBe(false); + }); + + it('preserves the subscription when abort without terminate', async () => { + const resolveSpy = vi.fn(); + const rejectSpy = vi.fn(); + kernelQueue.subscriptions.set('kp99', { + resolve: resolveSpy, + reject: rejectSpy, + }); + const mockItem: RunQueueItem = { + type: 'send', + target: 'ko123', + message: { result: 'kp99' } as Message, + }; + (kernelStore.runQueueLength as unknown as MockInstance) + .mockReturnValueOnce(1) + .mockReturnValue(0); + (kernelStore.dequeueRun as unknown as MockInstance).mockReturnValueOnce( + mockItem, + ); + const deliver = vi.fn().mockResolvedValue({ abort: true }); + ( + kernelStore.collectGarbage as unknown as MockInstance + ).mockImplementation(() => { + throw new Error(STOP_RUN_LOOP); + }); + await expect(kernelQueue.run(deliver)).rejects.toThrow(STOP_RUN_LOOP); + expect(kernelStore.rollbackCrank).toHaveBeenCalledWith('start'); + expect(rejectSpy).not.toHaveBeenCalled(); + expect(resolveSpy).not.toHaveBeenCalled(); + expect(kernelQueue.subscriptions.has('kp99')).toBe(true); + }); + }); + + describe('one-item-per-crank', () => { + it('calls startCrank/endCrank for each delivered item', async () => { + const items: RunQueueItem[] = [ + { type: 'send', target: 'ko1', message: {} as Message }, + { type: 'send', target: 'ko2', message: {} as Message }, + ]; + let dequeueCount = 0; + ( + kernelStore.runQueueLength as unknown as MockInstance + ).mockImplementation(() => (dequeueCount < items.length ? 1 : 0)); + (kernelStore.dequeueRun as unknown as MockInstance).mockImplementation( + () => { + const item = items[dequeueCount]; + dequeueCount += 1; + return item; + }, + ); + let deliverCount = 0; + const deliver = vi.fn().mockImplementation(async () => { + deliverCount += 1; + if (deliverCount >= items.length) { + return Promise.reject(new Error('done')); + } + return Promise.resolve(undefined); + }); + await expect(kernelQueue.run(deliver)).rejects.toThrow('done'); + // Two items delivered = two cranks = two startCrank + two endCrank calls + expect(kernelStore.startCrank).toHaveBeenCalledTimes(2); + expect(kernelStore.endCrank).toHaveBeenCalledTimes(2); + expect(deliver).toHaveBeenCalledTimes(2); + }); + }); + + describe('invokeKernelSubscription', () => { + it('calls reject for rejected promises', async () => { + const rejectSpy = vi.fn(); + const resolveSpy = vi.fn(); + kernelQueue.subscriptions.set('kp1', { + resolve: resolveSpy, + reject: rejectSpy, + }); + const rejectedValue = { body: '"error"', slots: [] }; + (kernelStore.flushCrankBuffer as unknown as MockInstance).mockReturnValue( + [{ type: 'notify', endpointId: 'v1', kpid: 'kp1' }], + ); + (kernelStore.getKernelPromise as unknown as MockInstance).mockReturnValue( + { + state: 'rejected', + value: rejectedValue, + }, + ); + const mockItem: RunQueueItem = { + type: 'send', + target: 'ko1', + message: {} as Message, + }; + (kernelStore.runQueueLength as unknown as MockInstance) + .mockReturnValueOnce(1) + .mockReturnValue(0); + (kernelStore.dequeueRun as unknown as MockInstance).mockReturnValueOnce( + mockItem, + ); + const deliver = vi.fn().mockResolvedValue(undefined); + ( + kernelStore.collectGarbage as unknown as MockInstance + ).mockImplementation(() => { + throw new Error(STOP_RUN_LOOP); + }); + await expect(kernelQueue.run(deliver)).rejects.toThrow(STOP_RUN_LOOP); + expect(rejectSpy).toHaveBeenCalledWith(rejectedValue); + expect(resolveSpy).not.toHaveBeenCalled(); + }); + + it('calls resolve for fulfilled promises', async () => { + const rejectSpy = vi.fn(); + const resolveSpy = vi.fn(); + kernelQueue.subscriptions.set('kp1', { + resolve: resolveSpy, + reject: rejectSpy, + }); + const fulfilledValue = { body: '"ok"', slots: [] }; + (kernelStore.flushCrankBuffer as unknown as MockInstance).mockReturnValue( + [{ type: 'notify', endpointId: 'v1', kpid: 'kp1' }], + ); + (kernelStore.getKernelPromise as unknown as MockInstance).mockReturnValue( + { + state: 'fulfilled', + value: fulfilledValue, + }, + ); + const mockItem: RunQueueItem = { + type: 'send', + target: 'ko1', + message: {} as Message, + }; + (kernelStore.runQueueLength as unknown as MockInstance) + .mockReturnValueOnce(1) + .mockReturnValue(0); + (kernelStore.dequeueRun as unknown as MockInstance).mockReturnValueOnce( + mockItem, + ); + const deliver = vi.fn().mockResolvedValue(undefined); + ( + kernelStore.collectGarbage as unknown as MockInstance + ).mockImplementation(() => { + throw new Error(STOP_RUN_LOOP); + }); + await expect(kernelQueue.run(deliver)).rejects.toThrow(STOP_RUN_LOOP); + expect(resolveSpy).toHaveBeenCalledWith(fulfilledValue); + expect(rejectSpy).not.toHaveBeenCalled(); + }); + }); + describe('waitForCrank', () => { - it('should handle when waitForCrank returns a delayed promise', async () => { + it('handles when waitForCrank returns a delayed promise', async () => { let resolvePromise: ((value: void) => void) | undefined; const delayedPromise = new Promise((resolve) => { resolvePromise = resolve; diff --git a/packages/ocap-kernel/src/KernelQueue.ts b/packages/ocap-kernel/src/KernelQueue.ts index 1cc6a5edf..4cb23ba11 100644 --- a/packages/ocap-kernel/src/KernelQueue.ts +++ b/packages/ocap-kernel/src/KernelQueue.ts @@ -35,7 +35,13 @@ export class KernelQueue { ) => Promise; /** Message results that the kernel itself has subscribed to */ - readonly subscriptions: Map) => void> = new Map(); + readonly subscriptions: Map< + KRef, + { + resolve: (value: CapData) => void; + reject: (reason: unknown) => void; + } + > = new Map(); /** Promises resolved during this crank that have kernel subscriptions */ #resolvedWithKernelSubscription: KRef[] = []; @@ -77,6 +83,21 @@ export class KernelQueue { this.#kernelStore.rollbackCrank('start'); // Discard kernel subscriptions that were queued for invocation this.#resolvedWithKernelSubscription = []; + + // If the vat is being terminated, reject the JS subscription for this + // message's result promise immediately. The rollback undid the delivery, + // and the vat won't be around to handle a retry. + if ( + crankResults.terminate && + item.type === 'send' && + item.message.result + ) { + const subscription = this.subscriptions.get(item.message.result); + if (subscription) { + this.subscriptions.delete(item.message.result); + subscription.reject(crankResults.terminate.info); + } + } // TODO: Currently all errors terminate the vat, but instead we could // restart it and terminate the vat only after a certain number of failed // retries. This is probably where we should implement the vat restart logic. @@ -117,23 +138,21 @@ export class KernelQueue { continue; } - while (this.#kernelStore.runQueueLength() > 0) { + if (this.#kernelStore.runQueueLength() > 0) { const item = this.#kernelStore.dequeueRun(); if (item) { yield item; - } else { - break; + continue; } } - if (this.#kernelStore.runQueueLength() === 0) { - const { promise, resolve } = makePromiseKit(); - if (this.#wakeUpTheRunQueue !== null) { - Fail`wakeUpTheRunQueue function already set`; - } - this.#wakeUpTheRunQueue = resolve; - wakeUpPromise = promise; + // Queue empty — sleep until woken + const { promise, resolve } = makePromiseKit(); + if (this.#wakeUpTheRunQueue !== null) { + Fail`run queue already waiting to be woken; cannot sleep again before the previous wake handler is consumed`; } + this.#wakeUpTheRunQueue = resolve; + wakeUpPromise = promise; } finally { this.#kernelStore.endCrank(); if (wakeUpPromise) { @@ -190,7 +209,11 @@ export class KernelQueue { if (subscription) { this.subscriptions.delete(kpid); const promise = this.#kernelStore.getKernelPromise(kpid); - subscription(promise.value as CapData); + if (promise.state === 'rejected') { + subscription.reject(promise.value); + } else { + subscription.resolve(promise.value as CapData); + } } } @@ -212,8 +235,8 @@ export class KernelQueue { // eslint-disable-next-line no-console console.debug('enqueueMessage', target, method, args); const result = this.#kernelStore.initKernelPromise()[0]; - const { promise, resolve } = makePromiseKit>(); - this.subscriptions.set(result, resolve); + const { promise, resolve, reject } = makePromiseKit>(); + this.subscriptions.set(result, { resolve, reject }); this.enqueueSend(target, { methargs: kser([method, args]), result, diff --git a/packages/ocap-kernel/src/KernelRouter.test.ts b/packages/ocap-kernel/src/KernelRouter.test.ts index 8c7953c29..c3ebce022 100644 --- a/packages/ocap-kernel/src/KernelRouter.test.ts +++ b/packages/ocap-kernel/src/KernelRouter.test.ts @@ -194,7 +194,7 @@ describe('KernelRouter', () => { 'kp1', true, expect.objectContaining({ - body: expect.stringContaining('revoked object'), + body: expect.stringContaining('target object has been revoked'), slots: [], }), ]), diff --git a/packages/ocap-kernel/src/KernelRouter.ts b/packages/ocap-kernel/src/KernelRouter.ts index 9ebdeb9e2..5d5186df9 100644 --- a/packages/ocap-kernel/src/KernelRouter.ts +++ b/packages/ocap-kernel/src/KernelRouter.ts @@ -132,7 +132,11 @@ export class KernelRouter { const routeAsSplat = (error?: CapData): MessageRoute => { if (message.result && error) { - this.#kernelQueue.resolvePromises(undefined, [ + // Use the current decider as the resolver. After a crank rollback, + // the decider may have reverted to the sending vat rather than the + // (now-terminated) target vat. + const promise = this.#kernelStore.getKernelPromise(message.result); + this.#kernelQueue.resolvePromises(promise?.decider, [ [message.result, true, error], ]); } @@ -140,11 +144,13 @@ export class KernelRouter { }; const routeAsSend = (targetObject: KRef): MessageRoute => { if (this.#kernelStore.isRevoked(targetObject)) { - return routeAsSplat(kser('revoked object')); + return routeAsSplat(kser('target object has been revoked')); } const endpointId = this.#kernelStore.getOwner(targetObject); if (!endpointId) { - return routeAsSplat(kser('no endpoint')); + return routeAsSplat( + kser('target object has no owner; it may have been deleted'), + ); } return { endpointId, target: targetObject }; }; @@ -165,7 +171,9 @@ export class KernelRouter { return routeAsSend(targetObject); } } - return routeAsSplat(kser('no object')); + return routeAsSplat( + kser('promise fulfilled but did not contain an object reference'), + ); } case 'rejected': return routeAsSplat(promise.value); @@ -216,9 +224,39 @@ export class KernelRouter { ); if (endpointId) { const isKernelServiceMessage = endpointId === 'kernel'; - const endpoint = isKernelServiceMessage - ? null - : this.#getEndpoint(endpointId); + let endpoint: EndpointHandle | null = null; + if (!isKernelServiceMessage) { + try { + endpoint = this.#getEndpoint(endpointId); + } catch { + // Endpoint vanished (e.g., vat terminated but ownership entries not + // yet cleaned up). Treat the same as a splat. + if (message.result) { + const promise = this.#kernelStore.getKernelPromise(message.result); + this.#kernelQueue.resolvePromises(promise.decider, [ + [ + message.result, + true, + kser( + 'target endpoint is unreachable (terminated or disconnected)', + ), + ], + ]); + this.#kernelStore.decrementRefCount( + message.result, + 'deliver|splat|result', + ); + } + this.#kernelStore.decrementRefCount(target, 'deliver|splat|target'); + for (const slot of message.methargs.slots) { + this.#kernelStore.decrementRefCount(slot, 'deliver|splat|slot'); + } + this.#logger?.log( + `@@@@ message went splat (endpoint gone) ${target}<-${JSON.stringify(message)}`, + ); + return crankResults; + } + } if (endpoint || isKernelServiceMessage) { if (message.result) { if (typeof message.result !== 'string') { diff --git a/packages/ocap-kernel/src/store/index.ts b/packages/ocap-kernel/src/store/index.ts index 0df0f7340..d89bb1347 100644 --- a/packages/ocap-kernel/src/store/index.ts +++ b/packages/ocap-kernel/src/store/index.ts @@ -117,6 +117,10 @@ export function makeKernelStore(kdb: KernelDatabase, logger?: Logger) { runQueue: provideStoredQueue('run', true), /** Cache of the run queue's current length */ runQueueLengthCache: -1, + /** Recreate the run queue from DB state (used after rollback to clear stale caches). */ + refreshRunQueue: () => { + context.runQueue = provideStoredQueue('run', true); + }, /** Counter for allocating kernel object IDs */ nextObjectId: provideCachedStoredValue('nextObjectId', '1'), /** Counter for allocating kernel promise IDs */ diff --git a/packages/ocap-kernel/src/store/methods/crank.test.ts b/packages/ocap-kernel/src/store/methods/crank.test.ts index 6e6310937..421a64e8e 100644 --- a/packages/ocap-kernel/src/store/methods/crank.test.ts +++ b/packages/ocap-kernel/src/store/methods/crank.test.ts @@ -16,6 +16,7 @@ describe('crank methods', () => { inCrank: false, savepoints: [], crankBuffer: mockCrankBuffer, + refreshRunQueue: vi.fn(), } as unknown as StoreContext; kdb = { diff --git a/packages/ocap-kernel/src/store/methods/crank.ts b/packages/ocap-kernel/src/store/methods/crank.ts index f4b5aa9cc..4cc88d877 100644 --- a/packages/ocap-kernel/src/store/methods/crank.ts +++ b/packages/ocap-kernel/src/store/methods/crank.ts @@ -48,6 +48,13 @@ export function getCrankMethods(ctx: StoreContext, kdb: KernelDatabase) { if (ctx.savepoints[ordinal] === savepoint) { kdb.rollbackSavepoint(`t${ordinal}`); ctx.savepoints.length = ordinal; + // The rollback reverted DB state but in-memory caches are stale. + // Recreate the run queue so its cached head/tail are re-read from DB. + ctx.refreshRunQueue(); + // Invalidate the run queue length cache so it's recalculated from + // the database on next access, since the rollback may have restored + // dequeued items. + ctx.runQueueLengthCache = -1; return; } } diff --git a/packages/ocap-kernel/src/store/types.ts b/packages/ocap-kernel/src/store/types.ts index 7c35f8f10..b3b221823 100644 --- a/packages/ocap-kernel/src/store/types.ts +++ b/packages/ocap-kernel/src/store/types.ts @@ -9,6 +9,7 @@ export type StoreContext = { kv: KVStore; runQueue: StoredQueue; // Holds RunAction[] runQueueLengthCache: number; // Holds number + refreshRunQueue: () => void; nextObjectId: StoredValue; // Holds string nextPromiseId: StoredValue; // Holds string nextVatId: StoredValue; // Holds string diff --git a/packages/ocap-kernel/src/vats/SubclusterManager.ts b/packages/ocap-kernel/src/vats/SubclusterManager.ts index d56ffe34e..5ab3cf417 100644 --- a/packages/ocap-kernel/src/vats/SubclusterManager.ts +++ b/packages/ocap-kernel/src/vats/SubclusterManager.ts @@ -339,10 +339,17 @@ export class SubclusterManager { `Bootstrap vat "${config.bootstrap}" not found in rootIds`, ); } - const bootstrapResult = await this.#queueMessage(rootKref, 'bootstrap', [ - roots, - services, - ]); + let bootstrapResult: CapData; + try { + bootstrapResult = await this.#queueMessage(rootKref, 'bootstrap', [ + roots, + services, + ]); + } catch (rejection) { + // queueMessage rejects with CapData for rejected kernel promises. + // Deserialize to surface the original Error to the caller. + throw kunser(rejection as CapData); + } const unserialized = kunser(bootstrapResult); if (unserialized instanceof Error) { throw unserialized; From ec9d7b07965e88cc6c131ad61b40adabdc429ee8 Mon Sep 17 00:00:00 2001 From: Erik Marks <25517051+rekmarks@users.noreply.github.com> Date: Tue, 17 Mar 2026 17:25:56 -0700 Subject: [PATCH 05/17] fix(evm-wallet-experiment): expect rejection instead of error CapData in peer-wallet tests --- .../test/integration/peer-wallet.test.ts | 34 ++++++++----------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/packages/evm-wallet-experiment/test/integration/peer-wallet.test.ts b/packages/evm-wallet-experiment/test/integration/peer-wallet.test.ts index 2f7331c47..99e29b39e 100644 --- a/packages/evm-wallet-experiment/test/integration/peer-wallet.test.ts +++ b/packages/evm-wallet-experiment/test/integration/peer-wallet.test.ts @@ -262,15 +262,14 @@ describe.sequential('Peer wallet integration', () => { }; // Transaction signing has no peer fallback — kernel2 has no local - // keys so this should return an error, not forward to kernel1. - const result = await kernel2.queueMessage( - coordinatorKref2, - 'signTransaction', - [tx], - ); - await waitUntilQuiescent(); - expect(result.body).toContain('#error'); - expect(result.body).toContain('No authority to sign this transaction'); + // keys so this should reject, not forward to kernel1. + await expect( + kernel2.queueMessage(coordinatorKref2, 'signTransaction', [tx]), + ).rejects.toMatchObject({ + body: expect.stringContaining( + 'No authority to sign this transaction', + ), + }); }, NETWORK_TIMEOUT, ); @@ -281,16 +280,13 @@ describe.sequential('Peer wallet integration', () => { 'returns error when no local keys and no peer wallet', async () => { // Kernel2 has no keys and no peer wallet connected - // queueMessage resolves with error CapData (not rejects) - const result = await kernel2.queueMessage( - coordinatorKref2, - 'signMessage', - ['should fail'], - ); - await waitUntilQuiescent(); - // Error CapData body contains #error marker - expect(result.body).toContain('#error'); - expect(result.body).toContain('No authority to sign message'); + await expect( + kernel2.queueMessage(coordinatorKref2, 'signMessage', [ + 'should fail', + ]), + ).rejects.toMatchObject({ + body: expect.stringContaining('No authority to sign message'), + }); }, NETWORK_TIMEOUT, ); From 7fe3acc290c382980d67c5c3d92b69f302d5574c Mon Sep 17 00:00:00 2001 From: Erik Marks <25517051+rekmarks@users.noreply.github.com> Date: Tue, 17 Mar 2026 18:07:15 -0700 Subject: [PATCH 06/17] fix(kernel-ui): display queueMessage errors in SendMessageForm response area --- packages/extension/test/e2e/object-registry.test.ts | 2 +- packages/kernel-ui/src/components/SendMessageForm.tsx | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/packages/extension/test/e2e/object-registry.test.ts b/packages/extension/test/e2e/object-registry.test.ts index 5e869bfee..1066aef5c 100644 --- a/packages/extension/test/e2e/object-registry.test.ts +++ b/packages/extension/test/e2e/object-registry.test.ts @@ -124,6 +124,6 @@ test.describe('Object Registry', () => { // After revoking, the previously successful message should fail response = await sendMessage(popupPage, target, method, params); - await expect(response).toContainText(/[Rr]evoked object/u); + await expect(response).toContainText('has been revoked'); }); }); diff --git a/packages/kernel-ui/src/components/SendMessageForm.tsx b/packages/kernel-ui/src/components/SendMessageForm.tsx index 98795f577..2278f4ec4 100644 --- a/packages/kernel-ui/src/components/SendMessageForm.tsx +++ b/packages/kernel-ui/src/components/SendMessageForm.tsx @@ -76,7 +76,13 @@ export const SendMessageForm: React.FC = () => { logMessage(stringify(response), 'received'); return fetchObjectRegistry(); }) - .catch((error) => logMessage(String(error), 'error')); + .catch((error: unknown) => { + const errorString = + error instanceof Error ? String(error) : stringify(error); + setResult(errorString); + logMessage(errorString, 'error'); + return fetchObjectRegistry(); + }); }; const handleKeyDown = ( From 5113b6edb79a46091b2f6d14cee28a9ed6ef2d60 Mon Sep 17 00:00:00 2001 From: Erik Marks <25517051+rekmarks@users.noreply.github.com> Date: Tue, 17 Mar 2026 20:05:16 -0700 Subject: [PATCH 07/17] fix(kernel-test): expect rejection in endowments test for bad-host fetch --- packages/kernel-test/src/endowments.test.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/packages/kernel-test/src/endowments.test.ts b/packages/kernel-test/src/endowments.test.ts index 0d2f7589e..614dc9edd 100644 --- a/packages/kernel-test/src/endowments.test.ts +++ b/packages/kernel-test/src/endowments.test.ts @@ -49,7 +49,11 @@ describe('endowments', () => { await waitUntilQuiescent(); - await kernel.queueMessage(v1Root, 'hello', [`https://${badHost}`]); + await expect( + kernel.queueMessage(v1Root, 'hello', [`https://${badHost}`]), + ).rejects.toMatchObject({ + body: expect.stringContaining(`Invalid host: ${badHost}`), + }); await waitUntilQuiescent(); From 46b674d97dff603af1cb3cd0994b7627ff1c1f52 Mon Sep 17 00:00:00 2001 From: Erik Marks <25517051+rekmarks@users.noreply.github.com> Date: Tue, 17 Mar 2026 20:17:05 -0700 Subject: [PATCH 08/17] fix(kernel-node-runtime): expect rejections in remote-comms e2e tests --- .../test/e2e/remote-comms.test.ts | 93 +++++++++---------- 1 file changed, 43 insertions(+), 50 deletions(-) diff --git a/packages/kernel-node-runtime/test/e2e/remote-comms.test.ts b/packages/kernel-node-runtime/test/e2e/remote-comms.test.ts index 4dc9ef864..2ef598a7b 100644 --- a/packages/kernel-node-runtime/test/e2e/remote-comms.test.ts +++ b/packages/kernel-node-runtime/test/e2e/remote-comms.test.ts @@ -582,21 +582,17 @@ describe.sequential('Remote Communications E2E', () => { const results = await Promise.allSettled(messagePromises); expect(results).toHaveLength(201); - // Verify that messages within queue capacity were delivered - const successfulResults = results.filter( - (result) => result.status === 'fulfilled', - ); - // At least 200 messages should succeed (the queue limit) - expect(successfulResults.length).toBeGreaterThanOrEqual(200); - - // Messages beyond queue capacity should be rejected with queue full error - const rejectedResults = results.filter( + // Messages beyond queue capacity should be rejected with queue full error. + // Messages within capacity may fulfill or reject (e.g., if the remote vat + // was restarted and references are stale), but they should NOT contain + // "queue at capacity". + const queueFullResults = results.filter( (result): result is PromiseRejectedResult => - result.status === 'rejected', + result.status === 'rejected' && + String(result.reason).includes('queue at capacity'), ); - for (const result of rejectedResults) { - expect(String(result.reason)).toContain('queue at capacity'); - } + // At most 1 message (the 201st) should be rejected due to queue capacity + expect(queueFullResults.length).toBeLessThanOrEqual(1); const newMessageResult = await kernel1.queueMessage( aliceRef, @@ -761,12 +757,11 @@ describe.sequential('Remote Communications E2E', () => { kernel2 = restartResult.kernel; // The message should not have been delivered because we didn't reconnect - const result = await messageAfterClose; - const response = kunser(result); - expect(response).toBeInstanceOf(Error); - expect((response as Error).message).toContain( - 'Message delivery failed after intentional close', - ); + await expect(messageAfterClose).rejects.toMatchObject({ + body: expect.stringContaining( + 'Message delivery failed after intentional close', + ), + }); }, NETWORK_TIMEOUT * 2, ); @@ -844,18 +839,17 @@ describe.sequential('Remote Communications E2E', () => { await delay(100); // Try to send a message after closing - should fail - const messageAfterClose = kernel1.queueMessage( - aliceRef, - 'sendRemoteMessage', - [bobURL, 'hello', ['Alice']], - ); - - const result = await messageAfterClose; - const response = kunser(result); - expect(response).toBeInstanceOf(Error); - expect((response as Error).message).toContain( - 'Message delivery failed after intentional close', - ); + await expect( + kernel1.queueMessage(aliceRef, 'sendRemoteMessage', [ + bobURL, + 'hello', + ['Alice'], + ]), + ).rejects.toMatchObject({ + body: expect.stringContaining( + 'Message delivery failed after intentional close', + ), + }); // Manually reconnect await kernel1.reconnectPeer(peerId2); @@ -920,18 +914,18 @@ describe.sequential('Remote Communications E2E', () => { // and trigger promise rejection for pending work. // The await will naturally wait for the promise to settle - either // succeeding (unexpected) or failing due to incarnation change detection. - const result = await kernel1.queueMessage( - aliceRef, - 'sendRemoteMessage', - [bobURL, 'hello', ['Alice']], - ); - const response = kunser(result); - // The message should fail because incarnation changed. // The handshake detects the new incarnation and triggers onIncarnationChange, // which resets RemoteHandle state and rejects pending work. - expect(response).toBeInstanceOf(Error); - expect((response as Error).message).toMatch(/Remote connection lost/u); + await expect( + kernel1.queueMessage(aliceRef, 'sendRemoteMessage', [ + bobURL, + 'hello', + ['Alice'], + ]), + ).rejects.toMatchObject({ + body: expect.stringMatching(/Remote connection lost/u), + }); }, NETWORK_TIMEOUT * 3, ); @@ -970,16 +964,15 @@ describe.sequential('Remote Communications E2E', () => { // The message will create a promise with the remote as decider (from URL redemption) // When we give up on the remote, that promise should be rejected // The vat should then propagate that rejection to the promise returned here - const messagePromise = kernel1.queueMessage( - aliceRef, - 'sendRemoteMessage', - [bobURL, 'hello', ['Alice']], - ); - - const result = await messagePromise; - const response = kunser(result); - expect(response).toBeInstanceOf(Error); - expect((response as Error).message).toContain('Remote connection lost'); + await expect( + kernel1.queueMessage(aliceRef, 'sendRemoteMessage', [ + bobURL, + 'hello', + ['Alice'], + ]), + ).rejects.toMatchObject({ + body: expect.stringContaining('Remote connection lost'), + }); }, NETWORK_TIMEOUT * 2, ); From b56cffc16c46bd23992aeb5c6eacc897ec21fe7d Mon Sep 17 00:00:00 2001 From: Erik Marks <25517051+rekmarks@users.noreply.github.com> Date: Wed, 18 Mar 2026 21:11:16 -0700 Subject: [PATCH 09/17] refactor(ocap-kernel): Simplify kernel run loop Simplifies the implementation of the kernel's run loop in a purely behavioral refactor. The previous async generator + loop iteration has been unwrapped into a single loop with multiple helper functions. I noticed that the startCrank() call is the only part of the run loop that can throw an uncaught exception, and made a note to investigate that later. An unrelated TODO comment is also added to the kernel router. --- packages/ocap-kernel/src/KernelQueue.ts | 162 +++++++++++++---------- packages/ocap-kernel/src/KernelRouter.ts | 3 + 2 files changed, 93 insertions(+), 72 deletions(-) diff --git a/packages/ocap-kernel/src/KernelQueue.ts b/packages/ocap-kernel/src/KernelQueue.ts index 4cb23ba11..0436df31c 100644 --- a/packages/ocap-kernel/src/KernelQueue.ts +++ b/packages/ocap-kernel/src/KernelQueue.ts @@ -72,94 +72,112 @@ export class KernelQueue { */ async run( deliver: (item: RunQueueItem) => Promise, - ): Promise { - for await (const item of this.#runQueueItems()) { - this.#kernelStore.nextTerminatedVatCleanup(); - const crankResults = await deliver(item); - if (crankResults?.abort) { - // Rollback the kernel state to before the failed delivery attempt. - // For active vats, this allows the message to be retried in a future crank. - // For terminated vats, the message will just go splat. - this.#kernelStore.rollbackCrank('start'); - // Discard kernel subscriptions that were queued for invocation - this.#resolvedWithKernelSubscription = []; - - // If the vat is being terminated, reject the JS subscription for this - // message's result promise immediately. The rollback undid the delivery, - // and the vat won't be around to handle a retry. - if ( - crankResults.terminate && - item.type === 'send' && - item.message.result - ) { - const subscription = this.subscriptions.get(item.message.result); - if (subscription) { - this.subscriptions.delete(item.message.result); - subscription.reject(crankResults.terminate.info); + ): Promise { + for (;;) { + let wakeUpPromise: Promise | undefined; + + // TODO: This is the only part of the run loop that could throw an uncaught + // error. Is that actually what we want? + this.#kernelStore.startCrank(); + + try { + this.#kernelStore.createCrankSavepoint('start'); + + const queueItem = this.#getNextRunQueueItem(); + if (queueItem === undefined) { + // Queue empty — sleep until woken + const { promise, resolve } = makePromiseKit(); + if (this.#wakeUpTheRunQueue !== null) { + Fail`run queue already waiting to be woken; cannot sleep again before the previous wake handler is consumed`; } + + this.#wakeUpTheRunQueue = resolve; + wakeUpPromise = promise; + } else { + this.#kernelStore.nextTerminatedVatCleanup(); + const crankResults = await deliver(queueItem); + await this.#processCrankResults(crankResults, queueItem); + } + } finally { + this.#kernelStore.endCrank(); + if (wakeUpPromise) { + await wakeUpPromise; } - // TODO: Currently all errors terminate the vat, but instead we could - // restart it and terminate the vat only after a certain number of failed - // retries. This is probably where we should implement the vat restart logic. - } else { - // Upon on successful crank completion, enqueue buffered vat outputs for delivery. - this.#flushCrankBuffer(); - } - // Vat termination during delivery is triggered by an illegal syscall - // or by syscall.exit(). - if (crankResults?.terminate) { - const { vatId, info } = crankResults.terminate; - await this.#terminateVat(vatId, info); } - this.#kernelStore.collectGarbage(); } } /** * Async generator that yields the items from the kernel run queue, in order. * - * @yields the next item in the run queue. + * @returns the next item in the run queue. */ - async *#runQueueItems(): AsyncGenerator { - for (;;) { - this.#kernelStore.startCrank(); - let wakeUpPromise: Promise | undefined; - try { - this.#kernelStore.createCrankSavepoint('start'); - const gcAction = processGCActionSet(this.#kernelStore); - if (gcAction) { - yield gcAction; - continue; - } + #getNextRunQueueItem(): RunQueueItem | undefined { + const gcAction = processGCActionSet(this.#kernelStore); + if (gcAction) { + return gcAction; + } - const reapAction = this.#kernelStore.nextReapAction(); - if (reapAction) { - yield reapAction; - continue; - } + const reapAction = this.#kernelStore.nextReapAction(); + if (reapAction) { + return reapAction; + } - if (this.#kernelStore.runQueueLength() > 0) { - const item = this.#kernelStore.dequeueRun(); - if (item) { - yield item; - continue; - } - } + if (this.#kernelStore.runQueueLength() > 0) { + const item = this.#kernelStore.dequeueRun(); + if (item) { + return item; + } + } + return undefined; + } - // Queue empty — sleep until woken - const { promise, resolve } = makePromiseKit(); - if (this.#wakeUpTheRunQueue !== null) { - Fail`run queue already waiting to be woken; cannot sleep again before the previous wake handler is consumed`; - } - this.#wakeUpTheRunQueue = resolve; - wakeUpPromise = promise; - } finally { - this.#kernelStore.endCrank(); - if (wakeUpPromise) { - await wakeUpPromise; + /** + * Process the results of a crank. + * + * @param crankResults - The crank results. + * @param queueItem - The run qeueue item that caused the crank results. + */ + async #processCrankResults( + crankResults: CrankResults | undefined, + queueItem: RunQueueItem, + ): Promise { + if (crankResults?.abort) { + // Rollback the kernel state to before the failed delivery attempt. + // For active vats, this allows the message to be retried in a future crank. + // For terminated vats, the message will just go splat. + this.#kernelStore.rollbackCrank('start'); + // Discard kernel subscriptions that were queued for invocation + this.#resolvedWithKernelSubscription = []; + + // If the vat is being terminated, reject the JS subscription for this + // message's result promise immediately. The rollback undid the delivery, + // and the vat won't be around to handle a retry. + if ( + crankResults.terminate && + queueItem.type === 'send' && + queueItem.message.result + ) { + const subscription = this.subscriptions.get(queueItem.message.result); + if (subscription) { + this.subscriptions.delete(queueItem.message.result); + subscription.reject(crankResults.terminate.info); } } + // TODO: Currently all errors terminate the vat, but instead we could + // restart it and terminate the vat only after a certain number of failed + // retries. This is probably where we should implement the vat restart logic. + } else { + // Upon on successful crank completion, enqueue buffered vat outputs for delivery. + this.#flushCrankBuffer(); + } + // Vat termination during delivery is triggered by an illegal syscall + // or by syscall.exit(). + if (crankResults?.terminate) { + const { vatId, info } = crankResults.terminate; + await this.#terminateVat(vatId, info); } + this.#kernelStore.collectGarbage(); } /** diff --git a/packages/ocap-kernel/src/KernelRouter.ts b/packages/ocap-kernel/src/KernelRouter.ts index 5d5186df9..572e3dc5c 100644 --- a/packages/ocap-kernel/src/KernelRouter.ts +++ b/packages/ocap-kernel/src/KernelRouter.ts @@ -229,6 +229,9 @@ export class KernelRouter { try { endpoint = this.#getEndpoint(endpointId); } catch { + // TODO: Narrow this catch to the expected error type (e.g., + // VatNotFoundError) so that unexpected errors are not silently + // swallowed and deliverable messages are not incorrectly discarded. // Endpoint vanished (e.g., vat terminated but ownership entries not // yet cleaned up). Treat the same as a splat. if (message.result) { From 377a6f661edf8f6be7f5030eb65e65fbed8cf312 Mon Sep 17 00:00:00 2001 From: Erik Marks <25517051+rekmarks@users.noreply.github.com> Date: Wed, 18 Mar 2026 21:35:29 -0700 Subject: [PATCH 10/17] refactor(ocap-kernel): CrankResults -> CrankResult --- packages/ocap-kernel/src/KernelQueue.ts | 24 +++++------ packages/ocap-kernel/src/KernelRouter.test.ts | 40 +++++++++---------- packages/ocap-kernel/src/KernelRouter.ts | 40 +++++++++---------- .../src/remotes/kernel/RemoteHandle.ts | 16 ++++---- packages/ocap-kernel/src/types.ts | 14 +++---- packages/ocap-kernel/src/vats/VatHandle.ts | 30 +++++++------- 6 files changed, 81 insertions(+), 83 deletions(-) diff --git a/packages/ocap-kernel/src/KernelQueue.ts b/packages/ocap-kernel/src/KernelQueue.ts index 0436df31c..3aadbe7af 100644 --- a/packages/ocap-kernel/src/KernelQueue.ts +++ b/packages/ocap-kernel/src/KernelQueue.ts @@ -7,7 +7,7 @@ import { kser } from './liveslots/kernel-marshal.ts'; import type { KernelStore } from './store/index.ts'; import { insistEndpointId } from './types.ts'; import type { - CrankResults, + CrankResult, KRef, Message, RunQueueItem, @@ -71,7 +71,7 @@ export class KernelQueue { * @param deliver - A function that delivers an item to the kernel. */ async run( - deliver: (item: RunQueueItem) => Promise, + deliver: (item: RunQueueItem) => Promise, ): Promise { for (;;) { let wakeUpPromise: Promise | undefined; @@ -95,8 +95,8 @@ export class KernelQueue { wakeUpPromise = promise; } else { this.#kernelStore.nextTerminatedVatCleanup(); - const crankResults = await deliver(queueItem); - await this.#processCrankResults(crankResults, queueItem); + const crankResult = await deliver(queueItem); + await this.#processCrankResult(crankResult, queueItem); } } finally { this.#kernelStore.endCrank(); @@ -135,14 +135,14 @@ export class KernelQueue { /** * Process the results of a crank. * - * @param crankResults - The crank results. + * @param crankResult - The crank results. * @param queueItem - The run qeueue item that caused the crank results. */ - async #processCrankResults( - crankResults: CrankResults | undefined, + async #processCrankResult( + crankResult: CrankResult | undefined, queueItem: RunQueueItem, ): Promise { - if (crankResults?.abort) { + if (crankResult?.abort) { // Rollback the kernel state to before the failed delivery attempt. // For active vats, this allows the message to be retried in a future crank. // For terminated vats, the message will just go splat. @@ -154,14 +154,14 @@ export class KernelQueue { // message's result promise immediately. The rollback undid the delivery, // and the vat won't be around to handle a retry. if ( - crankResults.terminate && + crankResult.terminate && queueItem.type === 'send' && queueItem.message.result ) { const subscription = this.subscriptions.get(queueItem.message.result); if (subscription) { this.subscriptions.delete(queueItem.message.result); - subscription.reject(crankResults.terminate.info); + subscription.reject(crankResult.terminate.info); } } // TODO: Currently all errors terminate the vat, but instead we could @@ -173,8 +173,8 @@ export class KernelQueue { } // Vat termination during delivery is triggered by an illegal syscall // or by syscall.exit(). - if (crankResults?.terminate) { - const { vatId, info } = crankResults.terminate; + if (crankResult?.terminate) { + const { vatId, info } = crankResult.terminate; await this.#terminateVat(vatId, info); } this.#kernelStore.collectGarbage(); diff --git a/packages/ocap-kernel/src/KernelRouter.test.ts b/packages/ocap-kernel/src/KernelRouter.test.ts index c3ebce022..5a092730f 100644 --- a/packages/ocap-kernel/src/KernelRouter.test.ts +++ b/packages/ocap-kernel/src/KernelRouter.test.ts @@ -13,7 +13,7 @@ import type { RunQueueItemBringOutYourDead, EndpointId, GCRunQueueType, - CrankResults, + CrankResult, EndpointHandle, } from './types.ts'; @@ -33,15 +33,15 @@ describe('KernelRouter', () => { beforeEach(() => { // Mock EndpointHandle with more detailed return values - const mockCrankResults: CrankResults = { didDelivery: 'v1' }; + const mockCrankResult: CrankResult = { didDelivery: 'v1' }; endpointHandle = { - deliverMessage: vi.fn().mockResolvedValue(mockCrankResults), - deliverNotify: vi.fn().mockResolvedValue(mockCrankResults), - deliverDropExports: vi.fn().mockResolvedValue(mockCrankResults), - deliverRetireExports: vi.fn().mockResolvedValue(mockCrankResults), - deliverRetireImports: vi.fn().mockResolvedValue(mockCrankResults), - deliverBringOutYourDead: vi.fn().mockResolvedValue(mockCrankResults), + deliverMessage: vi.fn().mockResolvedValue(mockCrankResult), + deliverNotify: vi.fn().mockResolvedValue(mockCrankResult), + deliverDropExports: vi.fn().mockResolvedValue(mockCrankResult), + deliverRetireExports: vi.fn().mockResolvedValue(mockCrankResult), + deliverRetireImports: vi.fn().mockResolvedValue(mockCrankResult), + deliverBringOutYourDead: vi.fn().mockResolvedValue(mockCrankResult), } as unknown as EndpointHandle; // Mock getEndpoint function @@ -99,13 +99,13 @@ describe('KernelRouter', () => { ); // Create a mock crank result that the vat will return - const mockCrankResults: CrankResults = { + const mockCrankResult: CrankResult = { didDelivery: endpointId, abort: false, }; ( endpointHandle.deliverMessage as unknown as MockInstance - ).mockResolvedValueOnce(mockCrankResults); + ).mockResolvedValueOnce(mockCrankResult); // Create a send message const message: Message = { @@ -126,7 +126,7 @@ describe('KernelRouter', () => { `translated-${target}`, message, ); - expect(result).toStrictEqual(mockCrankResults); + expect(result).toStrictEqual(mockCrankResult); expect(kernelStore.decrementRefCount).toHaveBeenCalledWith( 'slot1', 'deliver|send|slot', @@ -414,10 +414,10 @@ describe('KernelRouter', () => { }); // Mock crank results - const mockCrankResults: CrankResults = { didDelivery: endpointId }; + const mockCrankResult: CrankResult = { didDelivery: endpointId }; ( endpointHandle.deliverNotify as unknown as MockInstance - ).mockResolvedValueOnce(mockCrankResults); + ).mockResolvedValueOnce(mockCrankResult); // Deliver the notify const result = await kernelRouter.deliver(notifyItem); @@ -431,7 +431,7 @@ describe('KernelRouter', () => { kpid, 'deliver|notify', ); - expect(result).toStrictEqual(mockCrankResults); + expect(result).toStrictEqual(mockCrankResult); }); it('returns didDelivery when promise is not in vat clist', async () => { @@ -546,12 +546,12 @@ describe('KernelRouter', () => { }; // Mock crank results - const mockCrankResults: CrankResults = { didDelivery: endpointId }; + const mockCrankResult: CrankResult = { didDelivery: endpointId }; ( endpointHandle[ deliverMethod as keyof EndpointHandle ] as unknown as MockInstance - ).mockResolvedValueOnce(mockCrankResults); + ).mockResolvedValueOnce(mockCrankResult); // Deliver the GC action const result = await kernelRouter.deliver(gcAction); @@ -561,7 +561,7 @@ describe('KernelRouter', () => { expect( endpointHandle[deliverMethod as keyof EndpointHandle], ).toHaveBeenCalledWith(krefs.map((kref) => `translated-${kref}`)); - expect(result).toStrictEqual(mockCrankResults); + expect(result).toStrictEqual(mockCrankResult); }, ); }); @@ -575,10 +575,10 @@ describe('KernelRouter', () => { }; // Mock crank results - const mockCrankResults: CrankResults = { didDelivery: endpointId }; + const mockCrankResult: CrankResult = { didDelivery: endpointId }; ( endpointHandle.deliverBringOutYourDead as unknown as MockInstance - ).mockResolvedValueOnce(mockCrankResults); + ).mockResolvedValueOnce(mockCrankResult); // Deliver the bringOutYourDead action const result = await kernelRouter.deliver(bringOutYourDeadItem); @@ -586,7 +586,7 @@ describe('KernelRouter', () => { // Verify the action was delivered to the endpoint expect(getEndpoint).toHaveBeenCalledWith(endpointId); expect(endpointHandle.deliverBringOutYourDead).toHaveBeenCalled(); - expect(result).toStrictEqual(mockCrankResults); + expect(result).toStrictEqual(mockCrankResult); }); }); diff --git a/packages/ocap-kernel/src/KernelRouter.ts b/packages/ocap-kernel/src/KernelRouter.ts index 572e3dc5c..f9f230d7c 100644 --- a/packages/ocap-kernel/src/KernelRouter.ts +++ b/packages/ocap-kernel/src/KernelRouter.ts @@ -18,7 +18,7 @@ import type { RunQueueItemBringOutYourDead, RunQueueItemNotify, RunQueueItemGCAction, - CrankResults, + CrankResult, } from './types.ts'; import { insistEndpointId, insistMessage } from './types.ts'; import { assert, Fail } from './utils/assert.ts'; @@ -91,7 +91,7 @@ export class KernelRouter { * @param item - The message/notification to deliver. * @returns The crank outcome. */ - async deliver(item: RunQueueItem): Promise { + async deliver(item: RunQueueItem): Promise { switch (item.type) { case 'send': return await this.#deliverSend(item); @@ -193,11 +193,9 @@ export class KernelRouter { * @param item - The send item to deliver. * @returns The crank outcome. */ - async #deliverSend( - item: RunQueueItemSend, - ): Promise { + async #deliverSend(item: RunQueueItemSend): Promise { const route = this.#routeMessage(item); - let crankResults: CrankResults | undefined; + let crankResult: CrankResult | undefined; // Message went splat if (!route) { @@ -214,7 +212,7 @@ export class KernelRouter { this.#logger?.log( `@@@@ message went splat ${item.target}<-${JSON.stringify(item.message)}`, ); - return crankResults; + return crankResult; } const { endpointId, target } = route; @@ -257,7 +255,7 @@ export class KernelRouter { this.#logger?.log( `@@@@ message went splat (endpoint gone) ${target}<-${JSON.stringify(message)}`, ); - return crankResults; + return crankResult; } } if (endpoint || isKernelServiceMessage) { @@ -283,7 +281,7 @@ export class KernelRouter { message, ); try { - crankResults = await endpoint.deliverMessage( + crankResult = await endpoint.deliverMessage( endpointTarget, endpointMessage, ); @@ -304,7 +302,7 @@ export class KernelRouter { // Continue processing other messages - don't let one failure crash the queue } } else if (isKernelServiceMessage) { - crankResults = this.#deliverKernelServiceMessage(target, message); + crankResult = this.#deliverKernelServiceMessage(target, message); } else { Fail`no owner for kernel object ${target}`; } @@ -316,7 +314,7 @@ export class KernelRouter { this.#kernelStore.enqueuePromiseMessage(target, message); } - return crankResults; + return crankResult; } /** @@ -326,7 +324,7 @@ export class KernelRouter { * @param message - The message to deliver to the service. * @returns The crank results indicating the delivery was to the kernel. */ - #deliverKernelServiceMessage(target: KRef, message: Message): CrankResults { + #deliverKernelServiceMessage(target: KRef, message: Message): CrankResult { this.#invokeKernelService(target, message); return { didDelivery: 'kernel' }; } @@ -337,7 +335,7 @@ export class KernelRouter { * @param item - The notify item to deliver. * @returns The crank outcome. */ - async #deliverNotify(item: RunQueueItemNotify): Promise { + async #deliverNotify(item: RunQueueItemNotify): Promise { const { endpointId, kpid } = item; insistEndpointId(endpointId); const { context, isPromise } = parseRef(kpid); @@ -383,10 +381,10 @@ export class KernelRouter { } } const endpoint = this.#getEndpoint(endpointId); - const crankResults = await endpoint.deliverNotify(resolutions); + const crankResult = await endpoint.deliverNotify(resolutions); // Decrement reference count for processed 'notify' item this.#kernelStore.decrementRefCount(kpid, 'deliver|notify'); - return crankResults; + return crankResult; } /** @@ -395,7 +393,7 @@ export class KernelRouter { * @param item - The dropExports | retireExports | retireImports item to deliver. * @returns The crank outcome. */ - async #deliverGCAction(item: RunQueueItemGCAction): Promise { + async #deliverGCAction(item: RunQueueItemGCAction): Promise { const { type, endpointId, krefs } = item; this.#logger?.log( `@@@@ deliver ${endpointId} ${type} ${JSON.stringify(krefs)}`, @@ -407,8 +405,8 @@ export class KernelRouter { | 'deliverDropExports' | 'deliverRetireExports' | 'deliverRetireImports'; - const crankResults = await endpoint[method](erefs); - return crankResults; + const crankResult = await endpoint[method](erefs); + return crankResult; } /** @@ -419,11 +417,11 @@ export class KernelRouter { */ async #deliverBringOutYourDead( item: RunQueueItemBringOutYourDead, - ): Promise { + ): Promise { const { endpointId } = item; this.#logger?.log(`@@@@ deliver ${endpointId} bringOutYourDead`); const endpoint = this.#getEndpoint(endpointId); - const crankResults = await endpoint.deliverBringOutYourDead(); - return crankResults; + const crankResult = await endpoint.deliverBringOutYourDead(); + return crankResult; } } diff --git a/packages/ocap-kernel/src/remotes/kernel/RemoteHandle.ts b/packages/ocap-kernel/src/remotes/kernel/RemoteHandle.ts index 7bfa0e510..1e55c8ae3 100644 --- a/packages/ocap-kernel/src/remotes/kernel/RemoteHandle.ts +++ b/packages/ocap-kernel/src/remotes/kernel/RemoteHandle.ts @@ -15,7 +15,7 @@ import type { ERef, EndpointHandle, Message, - CrankResults, + CrankResult, } from '../../types.ts'; import type { RemoteComms } from '../types.ts'; @@ -121,7 +121,7 @@ export class RemoteHandle implements EndpointHandle { #redemptionCounter: number = 1; /** Crank result object to reuse (since it's always the same). */ - readonly #myCrankResult: CrankResults; + readonly #myCrankResult: CrankResult; /** Logger for diagnostic output. */ readonly #logger: Logger; @@ -579,7 +579,7 @@ export class RemoteHandle implements EndpointHandle { * @param message - The message to deliver. * @returns the crank results. */ - async deliverMessage(target: ERef, message: Message): Promise { + async deliverMessage(target: ERef, message: Message): Promise { await this.#sendRemoteCommand({ method: 'deliver', params: ['message', target, message], @@ -593,7 +593,7 @@ export class RemoteHandle implements EndpointHandle { * @param resolutions - One or more promise resolutions to deliver. * @returns the crank results. */ - async deliverNotify(resolutions: VatOneResolution[]): Promise { + async deliverNotify(resolutions: VatOneResolution[]): Promise { await this.#sendRemoteCommand({ method: 'deliver', params: ['notify', resolutions], @@ -607,7 +607,7 @@ export class RemoteHandle implements EndpointHandle { * @param erefs - The refs of the exports to be dropped. * @returns the crank results. */ - async deliverDropExports(erefs: ERef[]): Promise { + async deliverDropExports(erefs: ERef[]): Promise { await this.#sendRemoteCommand({ method: 'deliver', params: ['dropExports', erefs], @@ -621,7 +621,7 @@ export class RemoteHandle implements EndpointHandle { * @param erefs - The refs of the exports to be retired. * @returns the crank results. */ - async deliverRetireExports(erefs: ERef[]): Promise { + async deliverRetireExports(erefs: ERef[]): Promise { await this.#sendRemoteCommand({ method: 'deliver', params: ['retireExports', erefs], @@ -635,7 +635,7 @@ export class RemoteHandle implements EndpointHandle { * @param erefs - The refs of the imports to be retired. * @returns the crank results. */ - async deliverRetireImports(erefs: ERef[]): Promise { + async deliverRetireImports(erefs: ERef[]): Promise { await this.#sendRemoteCommand({ method: 'deliver', params: ['retireImports', erefs], @@ -650,7 +650,7 @@ export class RemoteHandle implements EndpointHandle { * * @returns the crank results. */ - async deliverBringOutYourDead(): Promise { + async deliverBringOutYourDead(): Promise { if (this.#remoteGcRequested) { this.#remoteGcRequested = false; return this.#myCrankResult; diff --git a/packages/ocap-kernel/src/types.ts b/packages/ocap-kernel/src/types.ts index 5a8d670fd..f8905c015 100644 --- a/packages/ocap-kernel/src/types.ts +++ b/packages/ocap-kernel/src/types.ts @@ -608,7 +608,7 @@ export const GCActionStruct = define('GCAction', (value: unknown) => { export const isGCAction = (value: unknown): value is GCAction => is(value, GCActionStruct); -export type CrankResults = { +export type CrankResult = { didDelivery?: EndpointId; // the endpoint to which we made a delivery abort?: boolean; // changes should be discarded, not committed terminate?: { vatId: VatId; reject: boolean; info: SwingSetCapData }; @@ -617,12 +617,12 @@ export type CrankResults = { export type VatDeliveryResult = [VatCheckpoint, string | null]; export type EndpointHandle = { - deliverMessage: (target: ERef, message: Message) => Promise; - deliverNotify: (resolutions: VatOneResolution[]) => Promise; - deliverDropExports: (erefs: ERef[]) => Promise; - deliverRetireExports: (erefs: ERef[]) => Promise; - deliverRetireImports: (erefs: ERef[]) => Promise; - deliverBringOutYourDead: () => Promise; + deliverMessage: (target: ERef, message: Message) => Promise; + deliverNotify: (resolutions: VatOneResolution[]) => Promise; + deliverDropExports: (erefs: ERef[]) => Promise; + deliverRetireExports: (erefs: ERef[]) => Promise; + deliverRetireImports: (erefs: ERef[]) => Promise; + deliverBringOutYourDead: () => Promise; }; /** diff --git a/packages/ocap-kernel/src/vats/VatHandle.ts b/packages/ocap-kernel/src/vats/VatHandle.ts index 2b3c97635..3a11045d4 100644 --- a/packages/ocap-kernel/src/vats/VatHandle.ts +++ b/packages/ocap-kernel/src/vats/VatHandle.ts @@ -25,7 +25,7 @@ import type { VatId, VatConfig, VRef, - CrankResults, + CrankResult, VatDeliveryResult, EndpointHandle, } from '../types.ts'; @@ -211,12 +211,12 @@ export class VatHandle implements EndpointHandle { * @param message - The message to deliver. * @returns The crank results. */ - async deliverMessage(target: VRef, message: Message): Promise { + async deliverMessage(target: VRef, message: Message): Promise { await this.sendVatCommand({ method: 'deliver', params: ['message', target, message], }); - return this.#getDeliveryCrankResults(); + return this.#getDeliveryCrankResult(); } /** @@ -225,12 +225,12 @@ export class VatHandle implements EndpointHandle { * @param resolutions - One or more promise resolutions to deliver. * @returns The crank results. */ - async deliverNotify(resolutions: VatOneResolution[]): Promise { + async deliverNotify(resolutions: VatOneResolution[]): Promise { await this.sendVatCommand({ method: 'deliver', params: ['notify', resolutions], }); - return this.#getDeliveryCrankResults(); + return this.#getDeliveryCrankResult(); } /** @@ -239,12 +239,12 @@ export class VatHandle implements EndpointHandle { * @param vrefs - The VRefs of the exports to be dropped. * @returns The crank results. */ - async deliverDropExports(vrefs: VRef[]): Promise { + async deliverDropExports(vrefs: VRef[]): Promise { await this.sendVatCommand({ method: 'deliver', params: ['dropExports', vrefs], }); - return this.#getDeliveryCrankResults(); + return this.#getDeliveryCrankResult(); } /** @@ -253,12 +253,12 @@ export class VatHandle implements EndpointHandle { * @param vrefs - The VRefs of the exports to be retired. * @returns The crank results. */ - async deliverRetireExports(vrefs: VRef[]): Promise { + async deliverRetireExports(vrefs: VRef[]): Promise { await this.sendVatCommand({ method: 'deliver', params: ['retireExports', vrefs], }); - return this.#getDeliveryCrankResults(); + return this.#getDeliveryCrankResult(); } /** @@ -267,12 +267,12 @@ export class VatHandle implements EndpointHandle { * @param vrefs - The VRefs of the imports to be retired. * @returns The crank results. */ - async deliverRetireImports(vrefs: VRef[]): Promise { + async deliverRetireImports(vrefs: VRef[]): Promise { await this.sendVatCommand({ method: 'deliver', params: ['retireImports', vrefs], }); - return this.#getDeliveryCrankResults(); + return this.#getDeliveryCrankResult(); } /** @@ -280,12 +280,12 @@ export class VatHandle implements EndpointHandle { * * @returns The crank results. */ - async deliverBringOutYourDead(): Promise { + async deliverBringOutYourDead(): Promise { await this.sendVatCommand({ method: 'deliver', params: ['bringOutYourDead'], }); - return this.#getDeliveryCrankResults(); + return this.#getDeliveryCrankResult(); } /** @@ -345,8 +345,8 @@ export class VatHandle implements EndpointHandle { * * @returns The crank outcome. */ - async #getDeliveryCrankResults(): Promise { - const results: CrankResults = { + async #getDeliveryCrankResult(): Promise { + const results: CrankResult = { didDelivery: this.vatId, }; From 69908a703e73ffd197acba02b4a5af0395105e3c Mon Sep 17 00:00:00 2001 From: Erik Marks <25517051+rekmarks@users.noreply.github.com> Date: Wed, 18 Mar 2026 21:49:55 -0700 Subject: [PATCH 11/17] chore: Remove erroneous comment --- packages/ocap-kernel/src/KernelQueue.ts | 3 --- 1 file changed, 3 deletions(-) diff --git a/packages/ocap-kernel/src/KernelQueue.ts b/packages/ocap-kernel/src/KernelQueue.ts index 3aadbe7af..9b2e31600 100644 --- a/packages/ocap-kernel/src/KernelQueue.ts +++ b/packages/ocap-kernel/src/KernelQueue.ts @@ -76,10 +76,7 @@ export class KernelQueue { for (;;) { let wakeUpPromise: Promise | undefined; - // TODO: This is the only part of the run loop that could throw an uncaught - // error. Is that actually what we want? this.#kernelStore.startCrank(); - try { this.#kernelStore.createCrankSavepoint('start'); From 6df20264db86e9e194578f5ec34b8000c0687830 Mon Sep 17 00:00:00 2001 From: Erik Marks <25517051+rekmarks@users.noreply.github.com> Date: Wed, 18 Mar 2026 21:52:08 -0700 Subject: [PATCH 12/17] feat(kernel-utils): add isCapData type guard, use in SubclusterManager --- packages/kernel-utils/src/index.test.ts | 1 + packages/kernel-utils/src/index.ts | 1 + packages/kernel-utils/src/types.test.ts | 28 +++++++++++++++++++ packages/kernel-utils/src/types.ts | 16 +++++++++++ .../ocap-kernel/src/vats/SubclusterManager.ts | 8 +++++- 5 files changed, 53 insertions(+), 1 deletion(-) diff --git a/packages/kernel-utils/src/index.test.ts b/packages/kernel-utils/src/index.test.ts index 6ae4a716e..d59292715 100644 --- a/packages/kernel-utils/src/index.test.ts +++ b/packages/kernel-utils/src/index.test.ts @@ -17,6 +17,7 @@ describe('index', () => { 'fromHex', 'ifDefined', 'installWakeDetector', + 'isCapData', 'isJsonRpcCall', 'isJsonRpcMessage', 'isPrimitive', diff --git a/packages/kernel-utils/src/index.ts b/packages/kernel-utils/src/index.ts index 67103c015..9895e7eb3 100644 --- a/packages/kernel-utils/src/index.ts +++ b/packages/kernel-utils/src/index.ts @@ -17,6 +17,7 @@ export type { } from './types.ts'; export { EmptyJsonArray, + isCapData, isPrimitive, isTypedArray, isTypedObject, diff --git a/packages/kernel-utils/src/types.test.ts b/packages/kernel-utils/src/types.test.ts index 6ed6ae60c..5af1317de 100644 --- a/packages/kernel-utils/src/types.test.ts +++ b/packages/kernel-utils/src/types.test.ts @@ -2,6 +2,7 @@ import { isObject } from '@metamask/utils'; import { describe, it, expect } from 'vitest'; import { + isCapData, isJsonRpcCall, isJsonRpcMessage, isPrimitive, @@ -13,6 +14,33 @@ const isNumber = (value: unknown): value is number => typeof value === 'number'; const alwaysFalse = () => false; const alwaysTrue = () => true; +describe('isCapData', () => { + it.each` + label | value + ${'body and empty slots'} | ${{ body: '#"hello"', slots: [] }} + ${'body with slots'} | ${{ body: '#{"#error":"oops"}', slots: ['ko1'] }} + ${'extra properties'} | ${{ body: '"x"', slots: [], stacks: [] }} + `('returns true for CapData-like $label', ({ value }) => { + expect(isCapData(value)).toBe(true); + }); + + it.each` + label | value + ${'null'} | ${null} + ${'undefined'} | ${undefined} + ${'string'} | ${'not capdata'} + ${'number'} | ${42} + ${'empty object'} | ${{}} + ${'missing body'} | ${{ slots: [] }} + ${'missing slots'} | ${{ body: '"x"' }} + ${'non-string body'} | ${{ body: 123, slots: [] }} + ${'non-array slots'} | ${{ body: '"x"', slots: 'not-array' }} + ${'Error instance'} | ${new Error('fail')} + `('returns false for invalid $label', ({ value }) => { + expect(isCapData(value)).toBe(false); + }); +}); + describe('isPrimitive', () => { it.each` label | value diff --git a/packages/kernel-utils/src/types.ts b/packages/kernel-utils/src/types.ts index 7c4c86878..23274e3b7 100644 --- a/packages/kernel-utils/src/types.ts +++ b/packages/kernel-utils/src/types.ts @@ -94,3 +94,19 @@ export const JsonRpcMessageStruct: Struct = union([ export const isJsonRpcMessage = (value: unknown): value is JsonRpcMessage => is(value, JsonRpcMessageStruct); + +/** + * Check whether a value has the shape of Endo CapData (`{ body: string, slots: unknown[] }`). + * + * @param value - The value to check. + * @returns `true` when `value` looks like CapData. + */ +export const isCapData = ( + value: unknown, +): value is { body: string; slots: unknown[] } => + typeof value === 'object' && + value !== null && + 'body' in value && + typeof (value as { body: unknown }).body === 'string' && + 'slots' in value && + Array.isArray((value as { slots: unknown }).slots); diff --git a/packages/ocap-kernel/src/vats/SubclusterManager.ts b/packages/ocap-kernel/src/vats/SubclusterManager.ts index 5ab3cf417..a4f01a964 100644 --- a/packages/ocap-kernel/src/vats/SubclusterManager.ts +++ b/packages/ocap-kernel/src/vats/SubclusterManager.ts @@ -1,5 +1,6 @@ import type { CapData } from '@endo/marshal'; import { SubclusterNotFoundError } from '@metamask/kernel-errors'; +import { isCapData } from '@metamask/kernel-utils'; import { Logger } from '@metamask/logger'; import type { IOManager } from '../io/IOManager.ts'; @@ -348,7 +349,12 @@ export class SubclusterManager { } catch (rejection) { // queueMessage rejects with CapData for rejected kernel promises. // Deserialize to surface the original Error to the caller. - throw kunser(rejection as CapData); + // If the rejection isn't CapData (e.g., an internal error before the + // kernel promise was created), re-throw as-is. + if (isCapData(rejection)) { + throw kunser(rejection as CapData); + } + throw rejection; } const unserialized = kunser(bootstrapResult); if (unserialized instanceof Error) { From 1299823efebc2e1ce2faa917208a93853fb5498b Mon Sep 17 00:00:00 2001 From: Erik Marks <25517051+rekmarks@users.noreply.github.com> Date: Wed, 18 Mar 2026 22:18:25 -0700 Subject: [PATCH 13/17] docs: Fix #getNextRunQueueItem docstring --- packages/ocap-kernel/src/KernelQueue.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/ocap-kernel/src/KernelQueue.ts b/packages/ocap-kernel/src/KernelQueue.ts index 9b2e31600..78ae111a2 100644 --- a/packages/ocap-kernel/src/KernelQueue.ts +++ b/packages/ocap-kernel/src/KernelQueue.ts @@ -105,9 +105,10 @@ export class KernelQueue { } /** - * Async generator that yields the items from the kernel run queue, in order. + * Get the next item from the kernel run queue. + * **ATTN:** Mutates the kernel store if the queue is not empty. * - * @returns the next item in the run queue. + * @returns The next item in the run queue, or undefined if the queue is empty. */ #getNextRunQueueItem(): RunQueueItem | undefined { const gcAction = processGCActionSet(this.#kernelStore); From 735b586392e153e008514926530d03aae099822f Mon Sep 17 00:00:00 2001 From: Erik Marks <25517051+rekmarks@users.noreply.github.com> Date: Wed, 18 Mar 2026 22:22:33 -0700 Subject: [PATCH 14/17] docs: "crank results" -> "crank result" --- packages/ocap-kernel/src/KernelQueue.ts | 4 ++-- packages/ocap-kernel/src/KernelRouter.ts | 2 +- .../ocap-kernel/src/remotes/kernel/RemoteHandle.ts | 12 ++++++------ packages/ocap-kernel/src/vats/VatHandle.ts | 12 ++++++------ 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/packages/ocap-kernel/src/KernelQueue.ts b/packages/ocap-kernel/src/KernelQueue.ts index 78ae111a2..8a8c29b4c 100644 --- a/packages/ocap-kernel/src/KernelQueue.ts +++ b/packages/ocap-kernel/src/KernelQueue.ts @@ -133,8 +133,8 @@ export class KernelQueue { /** * Process the results of a crank. * - * @param crankResult - The crank results. - * @param queueItem - The run qeueue item that caused the crank results. + * @param crankResult - The crank result. + * @param queueItem - The run queue item that caused the crank result. */ async #processCrankResult( crankResult: CrankResult | undefined, diff --git a/packages/ocap-kernel/src/KernelRouter.ts b/packages/ocap-kernel/src/KernelRouter.ts index f9f230d7c..f39deafb6 100644 --- a/packages/ocap-kernel/src/KernelRouter.ts +++ b/packages/ocap-kernel/src/KernelRouter.ts @@ -322,7 +322,7 @@ export class KernelRouter { * * @param target - The kernel reference of the target service object. * @param message - The message to deliver to the service. - * @returns The crank results indicating the delivery was to the kernel. + * @returns The crank result indicating the delivery was to the kernel. */ #deliverKernelServiceMessage(target: KRef, message: Message): CrankResult { this.#invokeKernelService(target, message); diff --git a/packages/ocap-kernel/src/remotes/kernel/RemoteHandle.ts b/packages/ocap-kernel/src/remotes/kernel/RemoteHandle.ts index 1e55c8ae3..5dc97f3ce 100644 --- a/packages/ocap-kernel/src/remotes/kernel/RemoteHandle.ts +++ b/packages/ocap-kernel/src/remotes/kernel/RemoteHandle.ts @@ -577,7 +577,7 @@ export class RemoteHandle implements EndpointHandle { * * @param target - The ref of the object to which the message is addressed. * @param message - The message to deliver. - * @returns the crank results. + * @returns the crank result. */ async deliverMessage(target: ERef, message: Message): Promise { await this.#sendRemoteCommand({ @@ -591,7 +591,7 @@ export class RemoteHandle implements EndpointHandle { * Send a 'notify' delivery to the remote. * * @param resolutions - One or more promise resolutions to deliver. - * @returns the crank results. + * @returns the crank result. */ async deliverNotify(resolutions: VatOneResolution[]): Promise { await this.#sendRemoteCommand({ @@ -605,7 +605,7 @@ export class RemoteHandle implements EndpointHandle { * Send a 'dropExports' delivery to the remote. * * @param erefs - The refs of the exports to be dropped. - * @returns the crank results. + * @returns the crank result. */ async deliverDropExports(erefs: ERef[]): Promise { await this.#sendRemoteCommand({ @@ -619,7 +619,7 @@ export class RemoteHandle implements EndpointHandle { * Send a 'retireExports' delivery to the remote. * * @param erefs - The refs of the exports to be retired. - * @returns the crank results. + * @returns the crank result. */ async deliverRetireExports(erefs: ERef[]): Promise { await this.#sendRemoteCommand({ @@ -633,7 +633,7 @@ export class RemoteHandle implements EndpointHandle { * Send a 'retireImports' delivery to the remote. * * @param erefs - The refs of the imports to be retired. - * @returns the crank results. + * @returns the crank result. */ async deliverRetireImports(erefs: ERef[]): Promise { await this.#sendRemoteCommand({ @@ -648,7 +648,7 @@ export class RemoteHandle implements EndpointHandle { * its garbage collection cycle. If the current BOYD was triggered by an * incoming remote request, skip sending to prevent infinite ping-pong. * - * @returns the crank results. + * @returns the crank result. */ async deliverBringOutYourDead(): Promise { if (this.#remoteGcRequested) { diff --git a/packages/ocap-kernel/src/vats/VatHandle.ts b/packages/ocap-kernel/src/vats/VatHandle.ts index 3a11045d4..b529afe24 100644 --- a/packages/ocap-kernel/src/vats/VatHandle.ts +++ b/packages/ocap-kernel/src/vats/VatHandle.ts @@ -209,7 +209,7 @@ export class VatHandle implements EndpointHandle { * * @param target - The VRef of the object to which the message is addressed. * @param message - The message to deliver. - * @returns The crank results. + * @returns The crank result. */ async deliverMessage(target: VRef, message: Message): Promise { await this.sendVatCommand({ @@ -223,7 +223,7 @@ export class VatHandle implements EndpointHandle { * Make a 'notify' delivery to the vat. * * @param resolutions - One or more promise resolutions to deliver. - * @returns The crank results. + * @returns The crank result. */ async deliverNotify(resolutions: VatOneResolution[]): Promise { await this.sendVatCommand({ @@ -237,7 +237,7 @@ export class VatHandle implements EndpointHandle { * Make a 'dropExports' delivery to the vat. * * @param vrefs - The VRefs of the exports to be dropped. - * @returns The crank results. + * @returns The crank result. */ async deliverDropExports(vrefs: VRef[]): Promise { await this.sendVatCommand({ @@ -251,7 +251,7 @@ export class VatHandle implements EndpointHandle { * Make a 'retireExports' delivery to the vat. * * @param vrefs - The VRefs of the exports to be retired. - * @returns The crank results. + * @returns The crank result. */ async deliverRetireExports(vrefs: VRef[]): Promise { await this.sendVatCommand({ @@ -265,7 +265,7 @@ export class VatHandle implements EndpointHandle { * Make a 'retireImports' delivery to the vat. * * @param vrefs - The VRefs of the imports to be retired. - * @returns The crank results. + * @returns The crank result. */ async deliverRetireImports(vrefs: VRef[]): Promise { await this.sendVatCommand({ @@ -278,7 +278,7 @@ export class VatHandle implements EndpointHandle { /** * Make a 'bringOutYourDead' delivery to the vat. * - * @returns The crank results. + * @returns The crank result. */ async deliverBringOutYourDead(): Promise { await this.sendVatCommand({ From 79f437e1dd8d9fc5916931cdaca2252c0a21b1fc Mon Sep 17 00:00:00 2001 From: Erik Marks <25517051+rekmarks@users.noreply.github.com> Date: Thu, 19 Mar 2026 02:27:22 -0700 Subject: [PATCH 15/17] refactor: Improve KernelQueue.run() readability --- packages/ocap-kernel/src/KernelQueue.ts | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/packages/ocap-kernel/src/KernelQueue.ts b/packages/ocap-kernel/src/KernelQueue.ts index 8a8c29b4c..c946a49b7 100644 --- a/packages/ocap-kernel/src/KernelQueue.ts +++ b/packages/ocap-kernel/src/KernelQueue.ts @@ -81,19 +81,18 @@ export class KernelQueue { this.#kernelStore.createCrankSavepoint('start'); const queueItem = this.#getNextRunQueueItem(); - if (queueItem === undefined) { - // Queue empty — sleep until woken - const { promise, resolve } = makePromiseKit(); + if (queueItem) { + this.#kernelStore.nextTerminatedVatCleanup(); + const crankResult = await deliver(queueItem); + await this.#processCrankResult(crankResult, queueItem); + } else { if (this.#wakeUpTheRunQueue !== null) { Fail`run queue already waiting to be woken; cannot sleep again before the previous wake handler is consumed`; } + const { promise, resolve } = makePromiseKit(); this.#wakeUpTheRunQueue = resolve; wakeUpPromise = promise; - } else { - this.#kernelStore.nextTerminatedVatCleanup(); - const crankResult = await deliver(queueItem); - await this.#processCrankResult(crankResult, queueItem); } } finally { this.#kernelStore.endCrank(); From f9d093fcff64bcf8fb15c54eb24e7ff4a215cfe0 Mon Sep 17 00:00:00 2001 From: Erik Marks <25517051+rekmarks@users.noreply.github.com> Date: Thu, 19 Mar 2026 12:28:58 -0700 Subject: [PATCH 16/17] docs: Add kernel promise and decider glossary entries Add "kernel promise" entry distinguishing kernel promises from JS promises, and "decider" entry with function call analogy. Update existing entries to specify "kernel promise" where applicable. Co-Authored-By: Claude Opus 4.6 (1M context) --- docs/glossary.md | 45 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 7 deletions(-) diff --git a/docs/glossary.md b/docs/glossary.md index 7847300fd..dc926e0e7 100644 --- a/docs/glossary.md +++ b/docs/glossary.md @@ -60,8 +60,8 @@ delivery](#delivery), [syscalls](#syscall), and vat initialization. ### liveslots A framework for managing object lifecycles within [vats](#vat). Liveslots provides the -runtime environment for vat code and handles object persistence, promise management, and -[syscall](#syscall) coordination. +runtime environment for vat code and handles object persistence, [kernel +promise](#kernel-promise) management, and [syscall](#syscall) coordination. ### crank @@ -74,7 +74,7 @@ aborted and rolled back if errors occur. See the ### syscall A system call made by a [vat](#vat) to request kernel services. Syscalls include -operations like sending messages, resolving [promises](#promise-resolution), and accessing +operations like sending messages, resolving [kernel promises](#kernel-promise), and accessing persistent storage. See [VatSyscall](../packages/ocap-kernel/src/VatSyscall.ts) and the [syscall service](../packages/ocap-kernel/src/services/syscall.ts). @@ -93,11 +93,41 @@ suitable for cross-vat communication. See the [kernel marshal service](../packages/ocap-kernel/src/services/kernel-marshal.ts) for `kser` and `kunser` functions. +### kernel promise + +A persistent record in the kernel store that tracks the state of a promise across [vat](#vat) +boundaries. A kernel promise has a state (`unresolved`, `fulfilled`, or `rejected`), a +[decider](#decider), a list of subscribers, and (once settled) a value. Kernel promises +survive vat restarts and [crank](#crank) rollbacks. + +Kernel promises are distinct from JavaScript promises. Vat code works with JS promises +and `E()` calls; [liveslots](#liveslots) translates between the vat's JS promises and +kernel promise IDs ([krefs](#kref)) via [syscalls](#syscall). Kernel-space code (e.g., +[`enqueueMessage`](../packages/ocap-kernel/src/KernelQueue.ts)) may create both a kernel +promise (for routing) and a JS promise (for the caller to `await`), bridged by a +subscription callback. See the +[promise store methods](../packages/ocap-kernel/src/store/methods/promise.ts). + +### decider + +The endpoint authorized to settle (fulfill or reject) a [kernel promise](#kernel-promise). +Analogous to a function call: when vat A sends `E(obj).foo()` to vat B, vat A is the +caller waiting for a result, and vat B is the callee that computes it. B becomes the +decider of the result's kernel promise at [delivery](#delivery) time, just as a callee +determines the return value of a function call. Only the decider can resolve the kernel +promise — attempts by other endpoints are rejected. After [crank](#crank) rollback, the +decider reverts to its pre-delivery value, which matters for error handling (e.g., +rejecting the kernel promise of a message sent to a terminated [vat](#vat)). See +[`setPromiseDecider`](../packages/ocap-kernel/src/store/methods/promise.ts) and the +authorization check in +[`resolvePromises`](../packages/ocap-kernel/src/KernelQueue.ts). + ### promise resolution -The process of fulfilling or rejecting a promise. Promise resolutions are delivered as -notifications to [vats](#vat) and can trigger cascading resolutions of dependent promises. -See the [promise store methods](../packages/ocap-kernel/src/store/methods/promise.ts) for +The process of fulfilling or rejecting a [kernel promise](#kernel-promise). Promise +resolutions are delivered as notifications to [vats](#vat) and can trigger cascading +resolutions of dependent kernel promises. See the +[promise store methods](../packages/ocap-kernel/src/store/methods/promise.ts) for implementation details. ### garbage collection (GC) @@ -157,7 +187,8 @@ details. ### kernel router The component responsible for routing messages to the correct [vat](#vat) based on target -references and promise states. The router handles [delivery](#delivery) logic. See the +references and [kernel promise](#kernel-promise) states. The router handles +[delivery](#delivery) logic. See the [KernelRouter](../packages/ocap-kernel/src/KernelRouter.ts) for routing logic. ## Abbreviations From 233587ceb63587e3a2fe797586f2f1b6029201c1 Mon Sep 17 00:00:00 2001 From: Erik Marks <25517051+rekmarks@users.noreply.github.com> Date: Thu, 19 Mar 2026 18:54:26 -0700 Subject: [PATCH 17/17] docs: Tweak liveslots glossary entry --- docs/glossary.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/glossary.md b/docs/glossary.md index dc926e0e7..3a83c5845 100644 --- a/docs/glossary.md +++ b/docs/glossary.md @@ -60,8 +60,8 @@ delivery](#delivery), [syscalls](#syscall), and vat initialization. ### liveslots A framework for managing object lifecycles within [vats](#vat). Liveslots provides the -runtime environment for vat code and handles object persistence, [kernel -promise](#kernel-promise) management, and [syscall](#syscall) coordination. +runtime environment for vat code and handles object persistence, distributed promise +management, and [syscall](#syscall) coordination. ### crank