diff --git a/.changeset/accumulate-tool-results.md b/.changeset/accumulate-tool-results.md new file mode 100644 index 000000000..c368ef7ed --- /dev/null +++ b/.changeset/accumulate-tool-results.md @@ -0,0 +1,11 @@ +--- +'@tanstack/ai-isolate-cloudflare': patch +--- + +fix(ai-isolate-cloudflare): accumulate `toolResults` across rounds in the driver round-trip + +The Cloudflare isolate driver was wiping `toolResults` between rounds. `wrap-code` uses sequential `tc_` ids that are re-derived every round when the Worker re-executes user code, so prior-round results must remain in the cache. With the wipe, multi-tool programs (e.g. `await A(); await B();`) would ping-pong between `{tc_0}` and `{tc_1}` and exhaust `maxToolRounds`, surfacing as `MaxRoundsExceeded`. + +Single-tool code worked because only one cache entry was ever needed in a given round. Existing tests covered single-round flows only and did not exercise real `wrap-code` ids end-to-end, so the regression slipped through. + +Added a `tc_`-shaped regression test that fails on the prior implementation and passes with the merge. diff --git a/packages/typescript/ai-isolate-cloudflare/src/isolate-driver.ts b/packages/typescript/ai-isolate-cloudflare/src/isolate-driver.ts index 9cf596db5..69246098c 100644 --- a/packages/typescript/ai-isolate-cloudflare/src/isolate-driver.ts +++ b/packages/typescript/ai-isolate-cloudflare/src/isolate-driver.ts @@ -174,8 +174,12 @@ class CloudflareIsolateContext implements IsolateContext { // Collect logs from this round allLogs = [...allLogs, ...result.logs] - // Execute tool calls locally - toolResults = {} + // Execute tool calls locally. Accumulate across rounds so prior-round + // results stay cached when the Worker re-executes user code. + // wrap-code uses sequential `tc_` ids re-derived every round; if + // we wipe the cache, multi-tool programs ping-pong between missing + // ids and exhaust `maxToolRounds` (MaxRoundsExceeded). + toolResults = { ...(toolResults ?? {}) } for (const toolCall of result.toolCalls) { const binding = this.bindings[toolCall.name] as diff --git a/packages/typescript/ai-isolate-cloudflare/tests/isolate-driver.test.ts b/packages/typescript/ai-isolate-cloudflare/tests/isolate-driver.test.ts index 848f0244d..2c97c9a7b 100644 --- a/packages/typescript/ai-isolate-cloudflare/tests/isolate-driver.test.ts +++ b/packages/typescript/ai-isolate-cloudflare/tests/isolate-driver.test.ts @@ -233,6 +233,70 @@ describe('createCloudflareIsolateDriver', () => { expect(body2.toolResults!['add_1']).toEqual({ success: true, value: 5 }) }) + it('accumulates toolResults across rounds for sequential tool calls', async () => { + // Reproduces a real bug: when user code calls two tools sequentially + // (e.g. `await A(); await B();`), wrap-code re-runs from the start each + // round and re-derives sequential `tc_` ids. If the driver wipes + // toolResults between rounds, round 3 misses tc_0, the wrapper re-throws, + // and the loop ping-pongs between {tc_0} and {tc_1} until MaxRoundsExceeded. + const a = makeBinding('A', async () => 'a') + const b = makeBinding('B', async () => 'b') + + // Round 1: code re-runs, throws on tc_0 + fetchMock.mockResolvedValueOnce({ + ok: true, + json: async () => + ({ + status: 'need_tools', + toolCalls: [{ id: 'tc_0', name: 'A', args: {} }], + logs: [], + continuationId: 'c1', + }) as ExecuteResponse, + }) + + // Round 2: tc_0 cached, code re-runs, throws on tc_1 + fetchMock.mockResolvedValueOnce({ + ok: true, + json: async () => + ({ + status: 'need_tools', + toolCalls: [{ id: 'tc_1', name: 'B', args: {} }], + logs: [], + continuationId: 'c2', + }) as ExecuteResponse, + }) + + // Round 3: tc_0 + tc_1 BOTH must be present, code completes + fetchMock.mockResolvedValueOnce({ + ok: true, + json: async () => + ({ + status: 'done', + success: true, + value: 'ab', + logs: [], + }) as ExecuteResponse, + }) + + const driver = createCloudflareIsolateDriver({ workerUrl: WORKER_URL }) + const context = await driver.createContext({ + bindings: { A: a, B: b }, + }) + + const result = await context.execute( + 'const x = await A({}); const y = await B({}); return x + y', + ) + + expect(result.success).toBe(true) + expect(result.value).toBe('ab') + expect(fetchMock).toHaveBeenCalledTimes(3) + + // Round 3 body MUST include both tc_0 and tc_1 (regression guard) + const body3: ExecuteRequest = JSON.parse(fetchMock.mock.calls[2][1].body) + expect(body3.toolResults!['tc_0']).toEqual({ success: true, value: 'a' }) + expect(body3.toolResults!['tc_1']).toEqual({ success: true, value: 'b' }) + }) + it('handles multiple tool calls in one round', async () => { const getA = makeBinding('getA', async () => 'A') const getB = makeBinding('getB', async () => 'B')