From efbf99abedee32c41d3d688019276b5c9638b52c Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Mon, 23 Mar 2026 10:44:49 -0500 Subject: [PATCH 1/3] feat(gastown): parse H1 headers from agent output as automatic status updates (#1374) * feat(gastown): parse H1 headers from agent output as automatic status updates Parse markdown H1 headers from message.part.updated events in broadcastEvent() and post them to the agent status API. This provides dashboard visibility into agent activity without requiring agents to call gt_status explicitly. - Uses last H1 match (most current) when multiple exist in one text part - Deduplicates via lastStatusForAgent Map to avoid redundant API calls - Truncates status to 120 characters - Cleans up lastStatusForAgent on agent exit/stop/failure/shutdown Refs: #1307 * fix(gastown): require trailing newline in H1 status regex to avoid partial matches The H1 regex was matching incomplete headings during streaming, causing dozens of /status writes per heading as each token delta produced a different partial match. Adding a trailing newline requirement ensures status is only posted once the heading line is fully streamed. --- .../container/src/process-manager.ts | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/cloudflare-gastown/container/src/process-manager.ts b/cloudflare-gastown/container/src/process-manager.ts index befba0a05..872c2d3eb 100644 --- a/cloudflare-gastown/container/src/process-manager.ts +++ b/cloudflare-gastown/container/src/process-manager.ts @@ -33,6 +33,8 @@ const eventAbortControllers = new Map(); const eventSinks = new Set<(agentId: string, event: string, data: unknown) => void>(); // Per-agent idle timers — fires exit when no nudges arrive const idleTimers = new Map>(); +// Tracks last H1 status posted per agent to deduplicate status updates +const lastStatusForAgent = new Map(); let nextPort = 4096; const startTime = Date.now(); @@ -143,6 +145,61 @@ function broadcastEvent(agentId: string, event: string, data: unknown): void { // Best-effort persistence — don't block live streaming }); } + + // Parse H1 markdown headers from streaming text parts and post as agent status. + // This gives dashboard visibility into what an agent is doing without requiring + // the agent to call gt_status explicitly. + if (event === 'message.part.updated' || event === 'message_part.updated') { + const dataObj = data != null && typeof data === 'object' ? data : undefined; + const part = + dataObj && 'part' in dataObj && dataObj.part != null && typeof dataObj.part === 'object' + ? dataObj.part + : undefined; + if ( + part && + 'type' in part && + part.type === 'text' && + 'text' in part && + typeof part.text === 'string' + ) { + // Use last H1 match — most current status when agent writes multiple headers. + // Require a trailing newline so we only match completed headings; without it, + // every streaming delta would match the partial heading being typed and spam + // the /status endpoint with incremental fragments. + const matches = [...part.text.matchAll(/(?:^|\n)# (.+)\n/g)]; + const lastMatch = matches.length > 0 ? matches[matches.length - 1] : null; + if (lastMatch) { + const statusText = lastMatch[1].slice(0, 120); + if (statusText !== lastStatusForAgent.get(agentId)) { + lastStatusForAgent.set(agentId, statusText); + // Post to status API (fire-and-forget, same pattern as event persistence above) + const agentMeta = agents.get(agentId); + const statusAuthToken = + process.env.GASTOWN_CONTAINER_TOKEN ?? + agentMeta?.gastownContainerToken ?? + agentMeta?.gastownSessionToken; + if (agentMeta?.gastownApiUrl && statusAuthToken) { + const statusHeaders: Record = { + 'Content-Type': 'application/json', + Authorization: `Bearer ${statusAuthToken}`, + }; + if (process.env.GASTOWN_CONTAINER_TOKEN || agentMeta.gastownContainerToken) { + statusHeaders['X-Gastown-Agent-Id'] = agentId; + if (agentMeta.rigId) statusHeaders['X-Gastown-Rig-Id'] = agentMeta.rigId; + } + fetch( + `${agentMeta.gastownApiUrl}/api/towns/${agentMeta.townId ?? '_'}/rigs/${agentMeta.rigId ?? '_'}/agents/${agentId}/status`, + { + method: 'POST', + headers: statusHeaders, + body: JSON.stringify({ message: statusText }), + } + ).catch(() => {}); + } + } + } + } + } } /** @@ -423,6 +480,7 @@ async function subscribeToEvents( }); agent.status = 'exited'; agent.exitReason = 'completed'; + lastStatusForAgent.delete(agent.agentId); broadcastEvent(agent.agentId, 'agent.exited', { reason: 'completed' }); void reportAgentCompleted(agent, 'completed'); @@ -505,6 +563,7 @@ async function subscribeToEvents( }); if (agent.status === 'running') { clearIdleTimer(agent.agentId); + lastStatusForAgent.delete(agent.agentId); agent.status = 'failed'; agent.exitReason = 'Event stream error'; broadcastEvent(agent.agentId, 'agent.exited', { @@ -666,6 +725,7 @@ export async function stopAgent(agentId: string): Promise { // Cancel any pending idle timer clearIdleTimer(agentId); + lastStatusForAgent.delete(agentId); // Abort event subscription const controller = eventAbortControllers.get(agentId); @@ -762,6 +822,7 @@ export async function stopAll(): Promise { clearTimeout(timer); } idleTimers.clear(); + lastStatusForAgent.clear(); // Abort all event subscriptions for (const [, controller] of eventAbortControllers) { From cad7f7fdf0d190df101a0a8ff7ed3cf617d1e6a2 Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Mon, 23 Mar 2026 17:54:09 +0000 Subject: [PATCH 2/3] refactor(gastown): replace gt_status with H1 header status in polecat and triage prompts Remove all gt_status tool references from polecat-system and triage-system prompts. Agents now derive dashboard status from H1 markdown headers in their output instead of calling a tool. --- cloudflare-gastown/src/prompts/polecat-system.prompt.ts | 9 ++++++--- cloudflare-gastown/src/prompts/triage-system.prompt.ts | 3 +-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/cloudflare-gastown/src/prompts/polecat-system.prompt.ts b/cloudflare-gastown/src/prompts/polecat-system.prompt.ts index ea57e1077..9a7557fbd 100644 --- a/cloudflare-gastown/src/prompts/polecat-system.prompt.ts +++ b/cloudflare-gastown/src/prompts/polecat-system.prompt.ts @@ -48,7 +48,6 @@ You have these tools available. Use them to coordinate with the Gastown orchestr - **gt_mail_check** — Check for new mail from other agents. Call this periodically or when you suspect coordination messages. - **gt_escalate** — Escalate a problem you cannot solve. Creates an escalation bead. Use this when you are stuck, blocked, or need human intervention. - **gt_checkpoint** — Write crash-recovery data. Call this after significant progress so work can be resumed if the container restarts. -- **gt_status** — Emit a plain-language status update visible on the dashboard. Call this at meaningful phase transitions. ## Workflow @@ -79,9 +78,13 @@ If you are stuck for more than a few attempts at the same problem: ## Status Updates -Periodically call gt_status with a brief, plain-language description of what you are doing. Write it for a teammate watching the dashboard — not a log line, not a stack trace. One or two sentences. Examples: "Installing dependencies and setting up the project structure.", "Writing unit tests for the API endpoints.", "Fixing 3 TypeScript errors before committing." +Your status is automatically derived from H1 markdown headers in your output. When you start a new phase of work, begin your response with an H1 header describing what you're doing. Examples: -Call gt_status when you START a new meaningful phase of work: beginning a new file, running tests, installing packages, pushing a branch. Do NOT call it on every tool use. +# Installing dependencies +# Writing unit tests for the API endpoints +# Fixing TypeScript errors before committing + +These headers appear as status bubbles on the dashboard. You don't need to call any tool — just write naturally with H1 headers at phase transitions. ## Important diff --git a/cloudflare-gastown/src/prompts/triage-system.prompt.ts b/cloudflare-gastown/src/prompts/triage-system.prompt.ts index dc4feb861..ea9d5a6e5 100644 --- a/cloudflare-gastown/src/prompts/triage-system.prompt.ts +++ b/cloudflare-gastown/src/prompts/triage-system.prompt.ts @@ -54,12 +54,11 @@ This will close the triage batch, unhook you, and return you to idle. - **Prefer least-disruptive actions.** RESTART over CLOSE_BEAD. NUDGE over ESCALATE. - **Escalate genuinely hard problems.** If a situation requires human context you don't have, escalate rather than guess. - **Never skip a triage request.** Every pending request must be resolved. -- **Post status updates.** Call gt_status before starting the batch (e.g. "Triaging 3 requests") and after finishing (e.g. "Triage complete — 2 restarted, 1 escalated"). This keeps the dashboard informed. +- **Post status updates.** Use H1 headers to indicate your current phase (e.g. \`# Triaging 3 requests\`, \`# Triage complete — 2 restarted, 1 escalated\`). These are automatically shown on the dashboard. ## Available Tools - **gt_triage_resolve** — Resolve a triage request. Provide the triage_request_bead_id, chosen action, and brief notes. -- **gt_status** — Post a plain-language status update visible on the dashboard. Call this at the start and end of your triage batch. - **gt_mail_send** — Send guidance to a stuck agent. - **gt_escalate** — Forward a problem to the Mayor or human operators. - **gt_bead_close** — Close your hooked bead when all triage requests have been processed. From 08622339167d2243d005f643f5c8fb9147ecb7ed Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Mon, 23 Mar 2026 16:32:28 -0500 Subject: [PATCH 3/3] feat(gastown): replace gt_status tool with H1 header parsing for agent status updates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the gt_status tool and its prompt references. Agents now report status via markdown H1 headers (e.g. "# Installing dependencies") which are automatically parsed from streaming output in broadcastEvent(). Fix the H1 parser to accumulate streaming deltas — the SDK sends part.text as empty during streaming with content in the delta field. Add diagnostic logging for status POST results. Closes #1307 --- cloudflare-gastown/container/plugin/client.ts | 12 +-- cloudflare-gastown/container/plugin/tools.ts | 17 ---- .../container/src/process-manager.ts | 84 +++++++++++++++---- .../src/prompts/polecat-system.prompt.ts | 8 +- .../src/prompts/triage-system.prompt.ts | 2 +- 5 files changed, 76 insertions(+), 47 deletions(-) diff --git a/cloudflare-gastown/container/plugin/client.ts b/cloudflare-gastown/container/plugin/client.ts index 93b972854..e90634ba5 100644 --- a/cloudflare-gastown/container/plugin/client.ts +++ b/cloudflare-gastown/container/plugin/client.ts @@ -137,13 +137,6 @@ export class GastownClient { }); } - async updateAgentStatusMessage(message: string): Promise { - await this.request(this.agentPath('/status'), { - method: 'POST', - body: JSON.stringify({ message }), - }); - } - // -- Rig-scoped endpoints -- async getBead(beadId: string): Promise { @@ -443,7 +436,10 @@ export class MayorGastownClient { async updateConvoy( convoyId: string, - input: { merge_mode?: 'review-then-land' | 'review-and-merge'; feature_branch?: string } + input: { + merge_mode?: 'review-then-land' | 'review-and-merge'; + feature_branch?: string; + } ): Promise { await this.request(this.mayorPath(`/convoys/${convoyId}`), { method: 'PATCH', diff --git a/cloudflare-gastown/container/plugin/tools.ts b/cloudflare-gastown/container/plugin/tools.ts index 1c360c0f2..1bc40ed2f 100644 --- a/cloudflare-gastown/container/plugin/tools.ts +++ b/cloudflare-gastown/container/plugin/tools.ts @@ -244,23 +244,6 @@ export function createTools(client: GastownClient) { }, }), - gt_status: tool({ - description: - 'Emit a plain-language status update visible on the dashboard. ' + - 'Call this when starting a new phase of work (e.g. "Installing dependencies", ' + - '"Writing tests", "Fixing lint errors"). Write it as a brief sentence for a teammate, ' + - 'not a log line. Do NOT call this on every tool use â only at meaningful phase transitions.', - args: { - message: tool.schema - .string() - .describe('A 1-2 sentence plain-language description of what you are currently doing.'), - }, - async execute(args) { - await client.updateAgentStatusMessage(args.message); - return 'Status updated.'; - }, - }), - gt_nudge: tool({ description: 'Send a real-time nudge to another agent. Unlike gt_mail_send (which queues a formal ' + diff --git a/cloudflare-gastown/container/src/process-manager.ts b/cloudflare-gastown/container/src/process-manager.ts index 872c2d3eb..4891b997d 100644 --- a/cloudflare-gastown/container/src/process-manager.ts +++ b/cloudflare-gastown/container/src/process-manager.ts @@ -35,6 +35,18 @@ const eventSinks = new Set<(agentId: string, event: string, data: unknown) => vo const idleTimers = new Map>(); // Tracks last H1 status posted per agent to deduplicate status updates const lastStatusForAgent = new Map(); +// Accumulates streaming text deltas per "agentId:partId" key so we can scan for +// H1 headers. SDK events send part.text as empty during delta streaming; the +// actual content arrives incrementally in the `delta` field. +const accumulatedPartText = new Map(); + +/** Remove all accumulated part text entries for a given agent. */ +function clearAccumulatedText(agentId: string): void { + const prefix = `${agentId}:`; + for (const key of accumulatedPartText.keys()) { + if (key.startsWith(prefix)) accumulatedPartText.delete(key); + } +} let nextPort = 4096; const startTime = Date.now(); @@ -147,8 +159,12 @@ function broadcastEvent(agentId: string, event: string, data: unknown): void { } // Parse H1 markdown headers from streaming text parts and post as agent status. - // This gives dashboard visibility into what an agent is doing without requiring - // the agent to call gt_status explicitly. + // This gives dashboard visibility into what an agent is doing — agents write + // natural H1 headers like "# Installing dependencies" which become status updates. + // + // During streaming, the SDK sends part.text as empty and the actual content in + // the `delta` field. We accumulate deltas per part ID so we can scan the full + // text for completed H1 headers (those followed by a newline). if (event === 'message.part.updated' || event === 'message_part.updated') { const dataObj = data != null && typeof data === 'object' ? data : undefined; const part = @@ -159,26 +175,45 @@ function broadcastEvent(agentId: string, event: string, data: unknown): void { part && 'type' in part && part.type === 'text' && - 'text' in part && - typeof part.text === 'string' + 'id' in part && + typeof part.id === 'string' ) { + const partKey = `${agentId}:${part.id}`; + const delta = + dataObj && 'delta' in dataObj && typeof dataObj.delta === 'string' + ? dataObj.delta + : undefined; + // Accumulate text: if delta is present, append it; otherwise use part.text + // as the full snapshot (non-streaming mode). + let fullText: string; + if (delta !== undefined) { + const prev = accumulatedPartText.get(partKey) ?? ''; + fullText = prev + delta; + accumulatedPartText.set(partKey, fullText); + } else if ('text' in part && typeof part.text === 'string') { + fullText = part.text; + accumulatedPartText.set(partKey, fullText); + } else { + fullText = accumulatedPartText.get(partKey) ?? ''; + } + // Use last H1 match — most current status when agent writes multiple headers. // Require a trailing newline so we only match completed headings; without it, // every streaming delta would match the partial heading being typed and spam // the /status endpoint with incremental fragments. - const matches = [...part.text.matchAll(/(?:^|\n)# (.+)\n/g)]; + const matches = [...fullText.matchAll(/(?:^|\n)# (.+)\n/g)]; const lastMatch = matches.length > 0 ? matches[matches.length - 1] : null; if (lastMatch) { const statusText = lastMatch[1].slice(0, 120); if (statusText !== lastStatusForAgent.get(agentId)) { lastStatusForAgent.set(agentId, statusText); - // Post to status API (fire-and-forget, same pattern as event persistence above) const agentMeta = agents.get(agentId); const statusAuthToken = process.env.GASTOWN_CONTAINER_TOKEN ?? agentMeta?.gastownContainerToken ?? agentMeta?.gastownSessionToken; if (agentMeta?.gastownApiUrl && statusAuthToken) { + const statusUrl = `${agentMeta.gastownApiUrl}/api/towns/${agentMeta.townId ?? '_'}/rigs/${agentMeta.rigId ?? '_'}/agents/${agentId}/status`; const statusHeaders: Record = { 'Content-Type': 'application/json', Authorization: `Bearer ${statusAuthToken}`, @@ -187,14 +222,31 @@ function broadcastEvent(agentId: string, event: string, data: unknown): void { statusHeaders['X-Gastown-Agent-Id'] = agentId; if (agentMeta.rigId) statusHeaders['X-Gastown-Rig-Id'] = agentMeta.rigId; } - fetch( - `${agentMeta.gastownApiUrl}/api/towns/${agentMeta.townId ?? '_'}/rigs/${agentMeta.rigId ?? '_'}/agents/${agentId}/status`, - { - method: 'POST', - headers: statusHeaders, - body: JSON.stringify({ message: statusText }), - } - ).catch(() => {}); + console.log( + `${MANAGER_LOG} H1 status for agent ${agentId}: "${statusText}" → POST ${statusUrl}` + ); + fetch(statusUrl, { + method: 'POST', + headers: statusHeaders, + body: JSON.stringify({ message: statusText }), + }) + .then(resp => { + if (!resp.ok) { + console.warn( + `${MANAGER_LOG} H1 status POST failed: ${resp.status} ${resp.statusText}` + ); + } + }) + .catch(err => { + console.warn( + `${MANAGER_LOG} H1 status POST error:`, + err instanceof Error ? err.message : err + ); + }); + } else { + console.warn( + `${MANAGER_LOG} H1 status: cannot post for agent ${agentId} — missing apiUrl=${!!agentMeta?.gastownApiUrl} authToken=${!!statusAuthToken}` + ); } } } @@ -481,6 +533,7 @@ async function subscribeToEvents( agent.status = 'exited'; agent.exitReason = 'completed'; lastStatusForAgent.delete(agent.agentId); + clearAccumulatedText(agent.agentId); broadcastEvent(agent.agentId, 'agent.exited', { reason: 'completed' }); void reportAgentCompleted(agent, 'completed'); @@ -564,6 +617,7 @@ async function subscribeToEvents( if (agent.status === 'running') { clearIdleTimer(agent.agentId); lastStatusForAgent.delete(agent.agentId); + clearAccumulatedText(agent.agentId); agent.status = 'failed'; agent.exitReason = 'Event stream error'; broadcastEvent(agent.agentId, 'agent.exited', { @@ -726,6 +780,7 @@ export async function stopAgent(agentId: string): Promise { // Cancel any pending idle timer clearIdleTimer(agentId); lastStatusForAgent.delete(agentId); + clearAccumulatedText(agentId); // Abort event subscription const controller = eventAbortControllers.get(agentId); @@ -823,6 +878,7 @@ export async function stopAll(): Promise { } idleTimers.clear(); lastStatusForAgent.clear(); + accumulatedPartText.clear(); // Abort all event subscriptions for (const [, controller] of eventAbortControllers) { diff --git a/cloudflare-gastown/src/prompts/polecat-system.prompt.ts b/cloudflare-gastown/src/prompts/polecat-system.prompt.ts index 9a7557fbd..3a441d65b 100644 --- a/cloudflare-gastown/src/prompts/polecat-system.prompt.ts +++ b/cloudflare-gastown/src/prompts/polecat-system.prompt.ts @@ -78,13 +78,7 @@ If you are stuck for more than a few attempts at the same problem: ## Status Updates -Your status is automatically derived from H1 markdown headers in your output. When you start a new phase of work, begin your response with an H1 header describing what you're doing. Examples: - -# Installing dependencies -# Writing unit tests for the API endpoints -# Fixing TypeScript errors before committing - -These headers appear as status bubbles on the dashboard. You don't need to call any tool — just write naturally with H1 headers at phase transitions. +Use markdown H1 headers (e.g. \`# Installing dependencies\`) at the start of each new phase of work. These headers are automatically parsed from your output and displayed on the dashboard as status updates. Write them as brief, plain-language descriptions for a teammate — not log lines or stack traces. Examples: \`# Installing dependencies\`, \`# Writing unit tests for the API endpoints\`, \`# Fixing TypeScript errors\`. ## Important diff --git a/cloudflare-gastown/src/prompts/triage-system.prompt.ts b/cloudflare-gastown/src/prompts/triage-system.prompt.ts index ea9d5a6e5..a8f9d59aa 100644 --- a/cloudflare-gastown/src/prompts/triage-system.prompt.ts +++ b/cloudflare-gastown/src/prompts/triage-system.prompt.ts @@ -54,7 +54,7 @@ This will close the triage batch, unhook you, and return you to idle. - **Prefer least-disruptive actions.** RESTART over CLOSE_BEAD. NUDGE over ESCALATE. - **Escalate genuinely hard problems.** If a situation requires human context you don't have, escalate rather than guess. - **Never skip a triage request.** Every pending request must be resolved. -- **Post status updates.** Use H1 headers to indicate your current phase (e.g. \`# Triaging 3 requests\`, \`# Triage complete — 2 restarted, 1 escalated\`). These are automatically shown on the dashboard. +- **Post status updates.** Use markdown H1 headers (e.g. \`# Triaging 3 requests\`) to indicate your current phase. These are automatically parsed and displayed on the dashboard. ## Available Tools