diff --git a/packages/agent/src/adapters/claude/claude-agent.ts b/packages/agent/src/adapters/claude/claude-agent.ts index 6774b73a6..54842af00 100644 --- a/packages/agent/src/adapters/claude/claude-agent.ts +++ b/packages/agent/src/adapters/claude/claude-agent.ts @@ -371,8 +371,18 @@ export class ClaudeAcpAgent extends BaseAcpAgent { switch (message.type) { case "system": if (message.subtype === "compact_boundary") { + // Send used:0 immediately so the client doesn't keep showing + // the stale pre-compaction context size until the next turn. lastAssistantTotalUsage = 0; promptReplayed = true; + await this.client.sessionUpdate({ + sessionId: params.sessionId, + update: { + sessionUpdate: "usage_update", + used: 0, + size: lastContextWindowSize, + }, + }); } if (message.subtype === "local_command_output") { promptReplayed = true; @@ -530,6 +540,11 @@ export class ClaudeAcpAgent extends BaseAcpAgent { } // Store latest assistant usage (excluding subagents) + // Sum all token types as a proxy for post-turn context occupancy: + // current turn's output will become next turn's input. + // Note: per the Anthropic API, input_tokens excludes cache tokens — + // cache_read and cache_creation are reported separately, so summing + // all four fields is not double-counting. if ( "usage" in message.message && message.parent_tool_use_id === null @@ -544,6 +559,7 @@ export class ClaudeAcpAgent extends BaseAcpAgent { }; lastAssistantTotalUsage = usage.input_tokens + + usage.output_tokens + usage.cache_read_input_tokens + usage.cache_creation_input_tokens;