diff --git a/.changeset/streaming-structured-output-chat.md b/.changeset/streaming-structured-output-chat.md new file mode 100644 index 000000000..4cdc22452 --- /dev/null +++ b/.changeset/streaming-structured-output-chat.md @@ -0,0 +1,5 @@ +--- +'@tanstack/ai': minor +--- + +feat: `chat({ outputSchema, stream: true })` returns `AsyncIterable` with raw JSON deltas plus a final `CUSTOM` `structured-output.complete` event carrying the validated parsed object. The existing `chat({ outputSchema })` (non-streaming) path is unchanged. Adapters expose this via a new optional `structuredOutputStream` method on `TextAdapter`. Adapters that omit the method fall back to the activity layer's `fallbackStructuredOutputStream`, which wraps the non-streaming `structuredOutput` call so adapters without native streaming JSON support still satisfy the new combination. diff --git a/.changeset/streaming-structured-output-grok.md b/.changeset/streaming-structured-output-grok.md new file mode 100644 index 000000000..48fd94c12 --- /dev/null +++ b/.changeset/streaming-structured-output-grok.md @@ -0,0 +1,5 @@ +--- +'@tanstack/ai-grok': minor +--- + +feat: native streaming structured output. `GrokTextAdapter.structuredOutputStream()` issues a single Chat Completions request with `stream: true` + `response_format: { type: 'json_schema', strict: true }`, surfacing JSON deltas as `TEXT_MESSAGE_CONTENT` chunks and a final `CUSTOM` `structured-output.complete` event with the parsed object — replacing the previous two-request (streamed text → non-streamed JSON) flow when used with `chat({ outputSchema, stream: true })`. diff --git a/.changeset/streaming-structured-output-groq.md b/.changeset/streaming-structured-output-groq.md new file mode 100644 index 000000000..7016872ec --- /dev/null +++ b/.changeset/streaming-structured-output-groq.md @@ -0,0 +1,5 @@ +--- +'@tanstack/ai-groq': minor +--- + +feat: native streaming structured output. `GroqTextAdapter.structuredOutputStream()` issues a single Chat Completions request with `stream: true` + `response_format: { type: 'json_schema', strict: true }`, surfacing JSON deltas as `TEXT_MESSAGE_CONTENT` chunks and a final `CUSTOM` `structured-output.complete` event with the parsed object — replacing the previous two-request (streamed text → non-streamed JSON) flow when used with `chat({ outputSchema, stream: true })`. diff --git a/.changeset/streaming-structured-output-openai.md b/.changeset/streaming-structured-output-openai.md new file mode 100644 index 000000000..666e24b49 --- /dev/null +++ b/.changeset/streaming-structured-output-openai.md @@ -0,0 +1,5 @@ +--- +'@tanstack/ai-openai': minor +--- + +feat: native streaming structured output. `OpenAITextAdapter.structuredOutputStream()` issues a single Responses API request with `stream: true` + `text.format: { type: 'json_schema', strict: true }`, surfacing JSON deltas as `TEXT_MESSAGE_CONTENT` chunks and a final `CUSTOM` `structured-output.complete` event with the parsed object — replacing the previous two-request (streamed text → non-streamed JSON) flow when used with `chat({ outputSchema, stream: true })`. diff --git a/.changeset/streaming-structured-output-openrouter.md b/.changeset/streaming-structured-output-openrouter.md new file mode 100644 index 000000000..2b0b7fe2b --- /dev/null +++ b/.changeset/streaming-structured-output-openrouter.md @@ -0,0 +1,5 @@ +--- +'@tanstack/ai-openrouter': minor +--- + +feat: native streaming structured output. `OpenRouterTextAdapter.structuredOutputStream()` issues a single request with `stream: true` + `response_format: { type: 'json_schema', strict: true }`, surfacing JSON deltas as `TEXT_MESSAGE_CONTENT` chunks and a final `CUSTOM` `structured-output.complete` event with the parsed object — replacing the previous two-request (streamed text → non-streamed JSON) flow when used with `chat({ outputSchema, stream: true })`. diff --git a/examples/ts-react-chat/src/routes/api.structured-output.ts b/examples/ts-react-chat/src/routes/api.structured-output.ts index aa1d045f2..57618576c 100644 --- a/examples/ts-react-chat/src/routes/api.structured-output.ts +++ b/examples/ts-react-chat/src/routes/api.structured-output.ts @@ -1,7 +1,11 @@ import { createFileRoute } from '@tanstack/react-router' -import { chat } from '@tanstack/ai' +import { chat, toServerSentEventsResponse } from '@tanstack/ai' +import { openaiText } from '@tanstack/ai-openai' +import { grokText } from '@tanstack/ai-grok' +import { groqText } from '@tanstack/ai-groq' import { openRouterText } from '@tanstack/ai-openrouter' import { z } from 'zod' +import type { AnyTextAdapter, StreamChunk } from '@tanstack/ai' const GuitarRecommendationSchema = z.object({ title: z.string().describe('Short headline for the recommendation'), @@ -21,23 +25,127 @@ const GuitarRecommendationSchema = z.object({ nextSteps: z.array(z.string()).describe('Practical follow-up actions'), }) +type Provider = 'openai' | 'grok' | 'groq' | 'openrouter' + +const StructuredOutputRequestSchema = z.object({ + prompt: z.string().min(1), + provider: z.enum(['openai', 'grok', 'groq', 'openrouter']).optional(), + model: z.string().optional(), + stream: z.boolean().optional(), +}) + +function adapterFor(provider: Provider, model?: string): AnyTextAdapter { + switch (provider) { + case 'openai': + return openaiText((model || 'gpt-5.2') as 'gpt-5.2') + case 'grok': + return grokText( + (model || 'grok-4-1-fast-reasoning') as 'grok-4-1-fast-reasoning', + ) + case 'groq': + return groqText( + (model || + 'meta-llama/llama-4-maverick-17b-128e-instruct') as 'meta-llama/llama-4-maverick-17b-128e-instruct', + ) + case 'openrouter': + return openRouterText( + (model || 'anthropic/claude-opus-4.7') as 'anthropic/claude-opus-4.7', + ) + } +} + +// Per-provider modelOptions to opt into reasoning surfacing. Without these, +// reasoning models reason silently and the UI never sees REASONING_* events. +function reasoningOptionsFor( + provider: Provider, + model: string | undefined, +): Record | undefined { + switch (provider) { + case 'openai': + // Responses API: `reasoning.summary: 'auto'` is what makes the API emit + // `response.reasoning_summary_text.delta` events. Only valid on + // reasoning models (gpt-5.x, o-series); older models (gpt-4o) reject it. + if ( + model?.startsWith('gpt-5') || + model?.startsWith('o3') || + model?.startsWith('o4') + ) { + return { reasoning: { summary: 'auto' } } + } + return undefined + case 'groq': + // Groq's Chat Completions only streams `delta.reasoning` when + // `reasoning_format: 'parsed'`. Required for gpt-oss / qwen3 / kimi-k2 + // to emit reasoning during structured output (json_schema mode). + if ( + model?.startsWith('openai/gpt-oss') || + model?.startsWith('qwen') || + model?.startsWith('moonshotai/kimi') + ) { + return { reasoning_format: 'parsed' } + } + return undefined + case 'openrouter': + // OpenRouter normalises across providers. `reasoning.effort` triggers + // the upstream model's reasoning + surfaces the deltas. + return { reasoning: { effort: 'medium' } } + case 'grok': + // xAI surfaces `delta.reasoning_content` automatically on reasoning + // models (grok-3-mini, grok-4-fast-reasoning, grok-4-1-fast-reasoning). + // No request param needed. + return undefined + } +} + export const Route = createFileRoute('/api/structured-output')({ server: { handlers: { POST: async ({ request }) => { - const body = await request.json() - const { prompt, model } = body as { - prompt: string - model?: string - } - try { + const parsed = StructuredOutputRequestSchema.safeParse( + await request.json(), + ) + if (!parsed.success) { + return new Response( + JSON.stringify({ error: 'Invalid request body' }), + { + status: 400, + headers: { 'Content-Type': 'application/json' }, + }, + ) + } + const { prompt, provider, model, stream } = parsed.data + const resolvedProvider: Provider = provider || 'openrouter' + const modelOptions = reasoningOptionsFor(resolvedProvider, model) + + if (stream) { + const abortController = new AbortController() + request.signal.addEventListener('abort', () => + abortController.abort(), + ) + const streamIterable = chat({ + adapter: adapterFor(resolvedProvider, model), + modelOptions: modelOptions as never, + messages: [{ role: 'user', content: prompt }], + outputSchema: GuitarRecommendationSchema, + stream: true, + abortController, + }) as AsyncIterable + return toServerSentEventsResponse(streamIterable, { + abortController, + }) + } + + const abortController = new AbortController() + request.signal.addEventListener('abort', () => + abortController.abort(), + ) const result = await chat({ - adapter: openRouterText( - (model || 'openai/gpt-5.2') as 'openai/gpt-5.2', - ), + adapter: adapterFor(resolvedProvider, model), + modelOptions: modelOptions as never, messages: [{ role: 'user', content: prompt }], outputSchema: GuitarRecommendationSchema, + abortController, }) return new Response(JSON.stringify({ data: result }), { diff --git a/examples/ts-react-chat/src/routes/generations.structured-output.tsx b/examples/ts-react-chat/src/routes/generations.structured-output.tsx index 9b123308f..831bcebdd 100644 --- a/examples/ts-react-chat/src/routes/generations.structured-output.tsx +++ b/examples/ts-react-chat/src/routes/generations.structured-output.tsx @@ -1,98 +1,357 @@ -import { useState } from 'react' +import { useRef, useState } from 'react' import { createFileRoute } from '@tanstack/react-router' +import { parsePartialJSON } from '@tanstack/ai' const SAMPLE_PROMPT = 'I play indie rock and have a $1500 budget. Recommend two electric guitars and one acoustic to round out my rig.' -const OPENROUTER_MODELS = [ - { value: 'openai/gpt-5.2', label: 'OpenAI GPT-5.2' }, - { value: 'openai/gpt-5.2-pro', label: 'OpenAI GPT-5.2 Pro' }, - { value: 'openai/gpt-5.1', label: 'OpenAI GPT-5.1' }, - { value: 'anthropic/claude-opus-4.7', label: 'Claude Opus 4.7' }, - { value: 'anthropic/claude-sonnet-4.6', label: 'Claude Sonnet 4.6' }, - { value: 'google/gemini-3.1-pro-preview', label: 'Gemini 3.1 Pro (Preview)' }, - { value: 'x-ai/grok-4.1-fast', label: 'Grok 4.1 Fast' }, -] as const - -interface RecommendationResult { - title: string - summary: string - recommendations: Array<{ - name: string - brand: string - type: 'acoustic' | 'electric' | 'bass' | 'classical' - priceRangeUsd: { min: number; max: number } - reason: string - }> - nextSteps: Array +type Provider = 'openai' | 'grok' | 'groq' | 'openrouter' + +const PROVIDER_MODELS: Record< + Provider, + Array<{ value: string; label: string }> +> = { + openai: [ + { value: 'gpt-5.2', label: 'GPT-5.2 (frontier)' }, + { value: 'gpt-5.2-pro', label: 'GPT-5.2 Pro' }, + { value: 'gpt-5.1', label: 'GPT-5.1' }, + { value: 'gpt-5', label: 'GPT-5' }, + { value: 'gpt-5-mini', label: 'GPT-5 Mini' }, + { value: 'gpt-4o', label: 'GPT-4o' }, + ], + grok: [ + { value: 'grok-4-1-fast-reasoning', label: 'Grok 4.1 Fast (reasoning)' }, + { + value: 'grok-4-1-fast-non-reasoning', + label: 'Grok 4.1 Fast (non-reasoning)', + }, + { value: 'grok-4', label: 'Grok 4' }, + { value: 'grok-3', label: 'Grok 3' }, + ], + groq: [ + { + value: 'meta-llama/llama-4-maverick-17b-128e-instruct', + label: 'Llama 4 Maverick 17B', + }, + { + value: 'meta-llama/llama-4-scout-17b-16e-instruct', + label: 'Llama 4 Scout 17B', + }, + { + value: 'moonshotai/kimi-k2-instruct-0905', + label: 'Kimi K2 Instruct', + }, + { value: 'llama-3.3-70b-versatile', label: 'Llama 3.3 70B Versatile' }, + { value: 'openai/gpt-oss-120b', label: 'GPT-OSS 120B' }, + ], + openrouter: [ + { value: 'anthropic/claude-opus-4.7', label: 'Claude Opus 4.7' }, + { value: 'anthropic/claude-sonnet-4.6', label: 'Claude Sonnet 4.6' }, + { value: 'openai/gpt-5.2', label: 'GPT-5.2 (via OpenRouter)' }, + { value: 'x-ai/grok-4.1-fast', label: 'Grok 4.1 Fast (via OpenRouter)' }, + ], +} + +interface PartialRecommendation { + name?: string + brand?: string + type?: 'acoustic' | 'electric' | 'bass' | 'classical' | string + priceRangeUsd?: { min?: number; max?: number } + reason?: string +} + +interface PartialResult { + title?: string + summary?: string + recommendations?: Array + nextSteps?: Array +} + +interface StreamChunkPayload { + type: string + delta?: string + content?: string + name?: string + value?: { object?: unknown; raw?: string; reasoning?: string } + message?: string +} + +// Pick the last meaningful sentence/line out of an accumulating reasoning +// stream so the UI can render a single rolling line of "what it's thinking +// right now" rather than a growing wall of text. +function latestThought(reasoning: string): string { + const trimmed = reasoning.trimEnd() + if (!trimmed) return '' + // Prefer the last sentence; fall back to the last newline-delimited line. + const sentenceMatch = trimmed.match(/[^.!?\n]+[.!?]?\s*$/) + const candidate = sentenceMatch ? sentenceMatch[0] : trimmed + const last = candidate.split('\n').filter(Boolean).pop() ?? candidate + return last.trim() } function StructuredOutputPage() { + const providerId = 'structured-output-provider' + const modelId = 'structured-output-model' + const promptId = 'structured-output-prompt' const [prompt, setPrompt] = useState(SAMPLE_PROMPT) - const [model, setModel] = useState(OPENROUTER_MODELS[0].value) - const [result, setResult] = useState(null) + const [provider, setProvider] = useState('openai') + const [model, setModel] = useState(PROVIDER_MODELS.openai[0].value) + const [stream, setStream] = useState(true) + const [result, setResult] = useState(null) + const [rawJson, setRawJson] = useState('') + const [deltaCount, setDeltaCount] = useState(0) + const [isStreaming, setIsStreaming] = useState(false) + const [hasFinalResult, setHasFinalResult] = useState(false) + const [reasoningLine, setReasoningLine] = useState('') + const [reasoningFull, setReasoningFull] = useState('') const [error, setError] = useState(null) const [isLoading, setIsLoading] = useState(false) + const abortRef = useRef(null) + + const onProviderChange = (next: Provider) => { + setProvider(next) + setModel(PROVIDER_MODELS[next][0].value) + } + + const reset = () => { + setResult(null) + setRawJson('') + setDeltaCount(0) + setHasFinalResult(false) + setReasoningLine('') + setReasoningFull('') + setError(null) + } const handleGenerate = async () => { if (!prompt.trim()) return setIsLoading(true) - setError(null) - setResult(null) + reset() + setIsStreaming(stream) + + const controller = new AbortController() + abortRef.current = controller try { const response = await fetch('/api/structured-output', { method: 'POST', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ prompt: prompt.trim(), model }), + body: JSON.stringify({ + prompt: prompt.trim(), + provider, + model, + stream, + }), + signal: controller.signal, }) - const payload = await response.json() + if (!response.ok) { - throw new Error(payload.error || 'Request failed') + const errPayload = await response.json().catch(() => ({})) + throw new Error( + errPayload.error || `Request failed (${response.status})`, + ) + } + + if (!stream) { + const payload = await response.json() + setResult(payload.data as PartialResult) + setHasFinalResult(true) + return + } + + // Streaming path — parse SSE, accumulate raw JSON, render the partially + // parsed object live, snap to the validated terminal payload. + const reader = response.body!.getReader() + const decoder = new TextDecoder() + let buffer = '' + let accumulated = '' + let reasoning = '' + let deltas = 0 + let sawComplete = false + + const processBuffer = () => { + let sepIdx = buffer.indexOf('\n\n') + while (sepIdx !== -1) { + const frame = buffer.slice(0, sepIdx) + buffer = buffer.slice(sepIdx + 2) + sepIdx = buffer.indexOf('\n\n') + + for (const line of frame.split('\n')) { + if (!line.startsWith('data: ')) continue + const json = line.slice(6).trim() + if (!json) continue + let chunk: StreamChunkPayload + try { + chunk = JSON.parse(json) as StreamChunkPayload + } catch { + continue + } + + if (chunk.type === 'TEXT_MESSAGE_CONTENT' && chunk.delta) { + accumulated += chunk.delta + deltas += 1 + setRawJson(accumulated) + setDeltaCount(deltas) + // partial-json tolerates incomplete JSON — it returns whatever + // structure can be inferred. Render it directly so the UI fills + // in field by field as the model produces them. + const partial = parsePartialJSON(accumulated) as + | PartialResult + | undefined + if (partial && typeof partial === 'object') { + setResult(partial) + } + } else if ( + chunk.type === 'REASONING_MESSAGE_CONTENT' && + chunk.delta + ) { + reasoning += chunk.delta + setReasoningFull(reasoning) + // One-liner: take the last non-empty line/sentence so consumers + // see "what it's thinking right now" without a wall of text. + setReasoningLine(latestThought(reasoning)) + } else if ( + chunk.type === 'CUSTOM' && + chunk.name === 'structured-output.complete' && + chunk.value?.object + ) { + sawComplete = true + setResult(chunk.value.object as PartialResult) + setHasFinalResult(true) + if ( + typeof (chunk.value as { reasoning?: string }).reasoning === + 'string' + ) { + const finalReasoning = (chunk.value as { reasoning: string }) + .reasoning + setReasoningFull(finalReasoning) + setReasoningLine(latestThought(finalReasoning)) + } + } else if (chunk.type === 'RUN_ERROR') { + throw new Error(chunk.message || 'Stream failed') + } + } + } + } + + while (true) { + const { done, value } = await reader.read() + if (done) break + buffer += decoder.decode(value, { stream: true }) + processBuffer() + } + + // Flush any buffered bytes from incomplete multi-byte UTF-8 sequences + // so the final SSE frame isn't dropped. + buffer += decoder.decode() + processBuffer() + + if (!sawComplete) { + throw new Error('Stream ended before structured-output.complete') } - setResult(payload.data as RecommendationResult) } catch (err) { - setError(err instanceof Error ? err.message : 'Unknown error') + if (err instanceof Error && err.name === 'AbortError') { + setError('Aborted') + } else { + setError(err instanceof Error ? err.message : 'Unknown error') + } } finally { setIsLoading(false) + setIsStreaming(false) + abortRef.current = null } } + const handleAbort = () => abortRef.current?.abort() + + const renderingPartial = isStreaming && !hasFinalResult + const recommendations = result?.recommendations ?? [] + const nextSteps = result?.nextSteps ?? [] + return (
-

- Structured Output (OpenRouter) -

+

Structured Output

Calls chat() with an{' '} - outputSchema via the{' '} - openRouterText adapter and - parses the JSON result. + outputSchema. Toggle{' '} + stream to exercise{' '} + structuredOutputStream on the + selected provider; the UI fills in progressively via{' '} + parsePartialJSON, then snaps + to the validated payload from the terminal{' '} + structured-output.complete{' '} + event. Reasoning models surface a live thinking strip from{' '} + REASONING_MESSAGE_CONTENT{' '} + deltas — openai (Responses API), openrouter, xAI ( + delta.reasoning_content), and + Groq (delta.reasoning) all + stream chain-of-thought.

-
- - +
+
+ + +
+
+ + +
+ +
- +