diff --git a/.changeset/gemini-text-interactions.md b/.changeset/gemini-text-interactions.md new file mode 100644 index 000000000..459624fbe --- /dev/null +++ b/.changeset/gemini-text-interactions.md @@ -0,0 +1,19 @@ +--- +'@tanstack/ai-gemini': minor +--- + +feat(ai-gemini): add experimental `geminiTextInteractions()` adapter for Gemini's stateful Interactions API (Beta) + +Routes through `client.interactions.create` instead of `client.models.generateContent`, so callers can pass `previous_interaction_id` via `modelOptions` and let the server retain conversation history. On each run, the returned interaction id is surfaced via an AG-UI `CUSTOM` event (`name: 'gemini.interactionId'`) emitted just before `RUN_FINISHED` — feed it back on the next turn via `modelOptions.previous_interaction_id`. + +Exported from a dedicated `@tanstack/ai-gemini/experimental` subpath so the experimental status is load-bearing in your editor and bundle: + +```ts +import { geminiTextInteractions } from '@tanstack/ai-gemini/experimental' +``` + +Scope: text/chat output with function tools, plus the built-in tools `google_search`, `code_execution`, `url_context`, `file_search`, and `computer_use`. Built-in tool activity is surfaced as AG-UI `CUSTOM` events named `gemini.googleSearchCall` / `gemini.googleSearchResult` (and the matching `codeExecutionCall`/`Result`, `urlContextCall`/`Result`, `fileSearchCall`/`Result` variants), carrying the raw Interactions delta payload. Function-tool `TOOL_CALL_*` events are unchanged, and `finishReason` stays `stop` when only built-in tools ran — the core chat loop has nothing to execute. + +`google_search_retrieval`, `google_maps`, and `mcp_server` are not supported on this adapter and throw a targeted error explaining the alternative. Image/audio output via Interactions is also not routed through this adapter — use `geminiText()`, `geminiImage`, or `geminiSpeech` for those. + +Marked `@experimental` — the underlying Interactions API is Beta and Google explicitly flags possible breaking changes. diff --git a/docs/adapters/gemini.md b/docs/adapters/gemini.md index a3e4ff7e4..98476a76c 100644 --- a/docs/adapters/gemini.md +++ b/docs/adapters/gemini.md @@ -110,6 +110,110 @@ const stream = chat({ }); ``` +## Stateful Conversations — Interactions API (Experimental) + +Gemini's [Interactions API](https://ai.google.dev/gemini-api/docs/interactions) (currently in Beta) offers server-side conversation state — the Gemini equivalent of OpenAI's Responses API. Instead of replaying the full message history on every turn, you pass a `previous_interaction_id` and the server retains the transcript. This also improves cache hit rates for repeated prefixes. + +The `geminiTextInteractions` adapter routes through `client.interactions.create` and surfaces the server-assigned interaction id via an AG-UI `CUSTOM` event (`name: 'gemini.interactionId'`) emitted just before `RUN_FINISHED`, so you can chain turns. + +> **⚠️ Experimental.** Google marks the Interactions API as Beta and explicitly flags possible breaking changes until it reaches general availability. The adapter is exported from the `@tanstack/ai-gemini/experimental` subpath so the experimental status is load-bearing in your editor and bundle. Text output, function tools, and the built-in tools `google_search`, `code_execution`, `url_context`, `file_search`, and `computer_use` are supported. `google_search_retrieval`, `google_maps`, and `mcp_server` still throw on this adapter — use `geminiText()` for those or wait for follow-up work. + +### Basic Usage + +```typescript +import { chat } from "@tanstack/ai"; +import { geminiTextInteractions } from "@tanstack/ai-gemini/experimental"; + +// Turn 1: introduce yourself, capture the interaction id. +let interactionId: string | undefined; + +for await (const chunk of chat({ + adapter: geminiTextInteractions("gemini-2.5-flash"), + messages: [{ role: "user", content: "Hi, my name is Amir." }], +})) { + if (chunk.type === "CUSTOM" && chunk.name === "gemini.interactionId") { + interactionId = (chunk.value as { interactionId?: string }).interactionId; + } +} + +// Turn 2: only send the new turn's content — the server has the history. +for await (const chunk of chat({ + adapter: geminiTextInteractions("gemini-2.5-flash"), + messages: [{ role: "user", content: "What is my name?" }], + modelOptions: { + previous_interaction_id: interactionId, + }, +})) { + // ...stream "Your name is Amir." back to the client. +} +``` + +### How it differs from `geminiText` + +| Concern | `geminiText` | `geminiTextInteractions` | +| --- | --- | --- | +| Underlying endpoint | `models:generateContent` | `interactions:create` | +| Conversation state | Stateless — send full history each turn | Stateful — server retains transcript via `previous_interaction_id` | +| Provider options shape | camelCase (`generationConfig`, `safetySettings`) | snake_case (`generation_config`, `response_modalities`, `previous_interaction_id`) | +| Built-in tools | `google_search`, `code_execution`, `url_context`, `file_search`, `google_maps`, `google_search_retrieval`, `computer_use` | `google_search`, `code_execution`, `url_context`, `file_search`, `computer_use` (activity surfaced via `CUSTOM` events) | +| Stability | GA | Experimental (Google Beta) | + +### Provider Options + +The adapter exposes Interactions-specific options on `modelOptions`: + +```typescript +import { geminiTextInteractions } from "@tanstack/ai-gemini/experimental"; + +const stream = chat({ + adapter: geminiTextInteractions("gemini-2.5-flash"), + messages, + modelOptions: { + // Stateful chaining — passed only on turn 2+. + previous_interaction_id: "int_abc123", + + // Persist the interaction server-side (default true). Must be true for + // previous_interaction_id to work on the *next* turn. + store: true, + + // Per-request system instruction (interaction-scoped — re-specify each turn). + system_instruction: "You are a helpful assistant.", + + // snake_case generation config distinct from geminiText's camelCase one. + generation_config: { + thinking_level: "LOW", + thinking_summaries: "auto", + stop_sequences: [""], + }, + + response_modalities: ["text"], + }, +}); +``` + +### Reading the interaction id + +The server's interaction id arrives as an AG-UI `CUSTOM` event emitted just before `RUN_FINISHED`: + +```typescript +for await (const chunk of stream) { + if (chunk.type === "CUSTOM" && chunk.name === "gemini.interactionId") { + const id = (chunk.value as { interactionId: string }).interactionId; + // Persist `id` wherever you store per-user conversation pointers — + // pass it back on the next turn as `previous_interaction_id`. + } +} +``` + +### Caveats + +- **Tools, `system_instruction`, and `generation_config` are interaction-scoped.** Per Google's docs these are NOT inherited from a prior interaction via `previous_interaction_id` — pass them again on every turn you need them. +- `store: false` is incompatible with `previous_interaction_id` (no state to recall) and with `background: true`. +- Retention (as of the time of writing): **55 days on the Paid Tier, 1 day on the Free Tier.** See [Google's Interactions API docs](https://ai.google.dev/gemini-api/docs/interactions) for current retention policy. +- Built-in tools in scope (`google_search`, `code_execution`, `url_context`, `file_search`, `computer_use`) are wired through; activity streams back as AG-UI `CUSTOM` events — `gemini.googleSearchCall` / `gemini.googleSearchResult` (and the matching `codeExecutionCall`/`Result`, `urlContextCall`/`Result`, `fileSearchCall`/`Result`) — carrying the raw Interactions delta. Function-tool `TOOL_CALL_*` events are unchanged, and `finishReason` stays `stop` when only built-in tools ran. +- `google_search_retrieval`, `google_maps`, and `mcp_server` still throw a targeted error on this adapter. Use `geminiText()` for the first two, or wait for a dedicated follow-up for `mcp_server`. +- Image and audio output via Interactions aren't routed through this adapter yet — it's text-only. Use `geminiImage` / `geminiSpeech` for non-text generation for now. + ## Model Options Gemini supports various model-specific options: @@ -341,6 +445,22 @@ Creates a Gemini text/chat adapter with an explicit API key. **Returns:** A Gemini text adapter instance. +### `geminiTextInteractions(model, config?)` (experimental) + +Creates a Gemini Interactions API text adapter using environment variables. Backs the stateful conversation pattern via `previous_interaction_id`. + +**Returns:** A Gemini Interactions text adapter instance. + +### `createGeminiTextInteractions(model, apiKey, config?)` (experimental) + +Creates a Gemini Interactions API text adapter with an explicit API key. + +- `model` - The model name (e.g. `gemini-2.5-flash`) +- `apiKey` - Your Google API key +- `config.baseURL?` - Custom base URL (optional) + +**Returns:** A Gemini Interactions text adapter instance. + ### `geminiSummarize(config?)` Creates a Gemini summarization adapter using environment variables. diff --git a/examples/ts-react-chat/src/lib/model-selection.ts b/examples/ts-react-chat/src/lib/model-selection.ts index 95c122cd4..d8c31bcbd 100644 --- a/examples/ts-react-chat/src/lib/model-selection.ts +++ b/examples/ts-react-chat/src/lib/model-selection.ts @@ -2,6 +2,7 @@ export type Provider = | 'openai' | 'anthropic' | 'gemini' + | 'gemini-interactions' | 'ollama' | 'grok' | 'groq' @@ -15,72 +16,65 @@ export interface ModelOption { export const MODEL_OPTIONS: Array = [ // OpenAI - { provider: 'openai', model: 'gpt-4o', label: 'OpenAI - GPT-4o' }, - { provider: 'openai', model: 'gpt-4o-mini', label: 'OpenAI - GPT-4o Mini' }, - { provider: 'openai', model: 'gpt-5', label: 'OpenAI - GPT-5' }, + { provider: 'openai', model: 'gpt-5.2', label: 'OpenAI - GPT-5.2' }, + { provider: 'openai', model: 'gpt-5.2-pro', label: 'OpenAI - GPT-5.2 Pro' }, + { provider: 'openai', model: 'gpt-5.1', label: 'OpenAI - GPT-5.1' }, + { provider: 'openai', model: 'gpt-5-mini', label: 'OpenAI - GPT-5 Mini' }, // Anthropic { provider: 'anthropic', - model: 'claude-sonnet-4-6', - label: 'Anthropic - Claude Sonnet 4.6', - }, - { - provider: 'anthropic', - model: 'claude-sonnet-4-5-20250929', - label: 'Anthropic - Claude Sonnet 4.5', + model: 'claude-opus-4-6', + label: 'Anthropic - Claude Opus 4.6', }, { provider: 'anthropic', - model: 'claude-opus-4-5-20251101', - label: 'Anthropic - Claude Opus 4.5', + model: 'claude-sonnet-4-6', + label: 'Anthropic - Claude Sonnet 4.6', }, { provider: 'anthropic', - model: 'claude-haiku-4-0-20250514', - label: 'Anthropic - Claude Haiku 4.0', + model: 'claude-haiku-4-5', + label: 'Anthropic - Claude Haiku 4.5', }, - // Gemini + // Gemini (stateless `geminiText`) { provider: 'gemini', - model: 'gemini-2.0-flash', - label: 'Gemini - 2.0 Flash', + model: 'gemini-3.1-pro-preview', + label: 'Gemini - 3.1 Pro Preview', }, { provider: 'gemini', - model: 'gemini-2.5-flash', - label: 'Gemini - 2.5 Flash', + model: 'gemini-3.1-flash-lite-preview', + label: 'Gemini - 3.1 Flash Lite Preview', }, + + // Gemini Interactions (stateful, experimental — `@tanstack/ai-gemini/experimental`) { - provider: 'gemini', - model: 'gemini-2.5-pro', - label: 'Gemini - 2.5 Pro', + provider: 'gemini-interactions', + model: 'gemini-3.1-pro-preview', + label: 'Gemini Interactions - 3.1 Pro Preview (experimental)', + }, + { + provider: 'gemini-interactions', + model: 'gemini-3.1-flash-lite-preview', + label: 'Gemini Interactions - 3.1 Flash Lite Preview (experimental)', }, // Openrouter { provider: 'openrouter', - model: 'openai/chatgpt-4o-latest', - label: 'Openrouter - ChatGPT 4o Latest', + model: 'openai/gpt-5.2', + label: 'Openrouter - GPT-5.2', }, { provider: 'openrouter', - model: 'openai/chatgpt-4o-mini', - label: 'Openrouter - ChatGPT 4o Mini', + model: 'openai/gpt-5-mini', + label: 'Openrouter - GPT-5 Mini', }, // Ollama - { - provider: 'ollama', - model: 'mistral:7b', - label: 'Ollama - Mistral 7B', - }, - { - provider: 'ollama', - model: 'mistral', - label: 'Ollama - Mistral', - }, { provider: 'ollama', model: 'gpt-oss:20b', @@ -93,15 +87,20 @@ export const MODEL_OPTIONS: Array = [ }, { provider: 'ollama', - model: 'smollm', - label: 'Ollama - SmolLM', + model: 'mistral', + label: 'Ollama - Mistral', }, // Groq { provider: 'groq', - model: 'llama-3.3-70b-versatile', - label: 'Groq - Llama 3.3 70B', + model: 'openai/gpt-oss-120b', + label: 'Groq - GPT-OSS 120B', + }, + { + provider: 'groq', + model: 'moonshotai/kimi-k2-instruct-0905', + label: 'Groq - Kimi K2 Instruct', }, { provider: 'groq', @@ -110,30 +109,30 @@ export const MODEL_OPTIONS: Array = [ }, { provider: 'groq', - model: 'meta-llama/llama-4-scout-17b-16e-instruct', - label: 'Groq - Llama 4 Scout', + model: 'qwen/qwen3-32b', + label: 'Groq - Qwen3 32B', }, // Grok { provider: 'grok', - model: 'grok-4', - label: 'Grok - Grok 4', + model: 'grok-4.20', + label: 'Grok - Grok 4.20', }, { provider: 'grok', - model: 'grok-4-fast-non-reasoning', - label: 'Grok - Grok 4 Fast', + model: 'grok-4-1-fast-reasoning', + label: 'Grok - Grok 4.1 Fast (Reasoning)', }, { provider: 'grok', - model: 'grok-3', - label: 'Grok - Grok 3', + model: 'grok-4-1-fast-non-reasoning', + label: 'Grok - Grok 4.1 Fast', }, { provider: 'grok', - model: 'grok-3-mini', - label: 'Grok - Grok 3 Mini', + model: 'grok-code-fast-1', + label: 'Grok - Grok Code Fast 1', }, ] diff --git a/examples/ts-react-chat/src/routes/api.tanchat.ts b/examples/ts-react-chat/src/routes/api.tanchat.ts index f571fd9c7..940b5015b 100644 --- a/examples/ts-react-chat/src/routes/api.tanchat.ts +++ b/examples/ts-react-chat/src/routes/api.tanchat.ts @@ -9,6 +9,7 @@ import { openaiText } from '@tanstack/ai-openai' import { ollamaText } from '@tanstack/ai-ollama' import { anthropicText } from '@tanstack/ai-anthropic' import { geminiText } from '@tanstack/ai-gemini' +import { geminiTextInteractions } from '@tanstack/ai-gemini/experimental' import { openRouterText } from '@tanstack/ai-openrouter' import { grokText } from '@tanstack/ai-grok' import { groqText } from '@tanstack/ai-groq' @@ -28,6 +29,7 @@ type Provider = | 'openai' | 'anthropic' | 'gemini' + | 'gemini-interactions' | 'ollama' | 'grok' | 'groq' @@ -127,8 +129,10 @@ export const Route = createFileRoute('/api/tanchat')({ // Extract provider and model from data const provider: Provider = data?.provider || 'openai' - const model: string = data?.model || 'gpt-4o' + const model: string = data?.model || 'gpt-5.2' const conversationId: string | undefined = data?.conversationId + const previousInteractionId: string | undefined = + data?.previousInteractionId // Pre-define typed adapter configurations with full type inference // Model is passed to the adapter factory function for type-safe autocomplete @@ -139,12 +143,14 @@ export const Route = createFileRoute('/api/tanchat')({ anthropic: () => createChatOptions({ adapter: anthropicText( - (model || 'claude-sonnet-4-5') as 'claude-sonnet-4-5', + (model || 'claude-sonnet-4-6') as 'claude-sonnet-4-6', ), }), openrouter: () => createChatOptions({ - adapter: openRouterText('openai/gpt-5.1'), + adapter: openRouterText( + (model || 'openai/gpt-5.2') as 'openai/gpt-5.2', + ), modelOptions: { reasoning: { effort: 'medium', @@ -154,7 +160,7 @@ export const Route = createFileRoute('/api/tanchat')({ gemini: () => createChatOptions({ adapter: geminiText( - (model || 'gemini-2.5-flash') as 'gemini-2.5-flash', + (model || 'gemini-3.1-pro-preview') as 'gemini-3.1-pro-preview', ), modelOptions: { thinkingConfig: { @@ -163,26 +169,35 @@ export const Route = createFileRoute('/api/tanchat')({ }, }, }), + 'gemini-interactions': () => + createChatOptions({ + adapter: geminiTextInteractions( + (model || 'gemini-3.1-pro-preview') as 'gemini-3.1-pro-preview', + ), + modelOptions: { + previous_interaction_id: previousInteractionId, + store: true, + }, + }), grok: () => createChatOptions({ - adapter: grokText((model || 'grok-3') as 'grok-3'), + adapter: grokText((model || 'grok-4.20') as 'grok-4.20'), modelOptions: {}, }), groq: () => createChatOptions({ adapter: groqText( - (model || - 'llama-3.3-70b-versatile') as 'llama-3.3-70b-versatile', + (model || 'openai/gpt-oss-120b') as 'openai/gpt-oss-120b', ), }), ollama: () => createChatOptions({ - adapter: ollamaText((model || 'gpt-oss:120b') as 'gpt-oss:120b'), + adapter: ollamaText((model || 'gpt-oss:20b') as 'gpt-oss:20b'), modelOptions: { think: 'low', options: { top_k: 1 } }, }), openai: () => createChatOptions({ - adapter: openaiText((model || 'gpt-4o') as 'gpt-4o'), + adapter: openaiText((model || 'gpt-5.2') as 'gpt-5.2'), modelOptions: {}, }), } @@ -193,20 +208,27 @@ export const Route = createFileRoute('/api/tanchat')({ // Note: We cast to AsyncIterable because all chat adapters // return streams, but TypeScript sees a union of all possible return types + // Gemini's Interactions API rejects tool parameter schemas that + // include `anyOf` (e.g. Zod unions), so the guitar tool suite + // isn't wired for that provider. Other providers get the full set. + const tools = + provider === 'gemini-interactions' + ? [] + : [ + getGuitars, // Server tool + recommendGuitarToolDef, // No server execute - client will handle + addToCartToolServer, + addToWishListToolDef, + getPersonalGuitarPreferenceToolDef, + // Lazy tools - discovered on demand + compareGuitars, + calculateFinancing, + searchGuitars, + ] + const stream = chat({ ...options, - - tools: [ - getGuitars, // Server tool - recommendGuitarToolDef, // No server execute - client will handle - addToCartToolServer, - addToWishListToolDef, - getPersonalGuitarPreferenceToolDef, - // Lazy tools - discovered on demand - compareGuitars, - calculateFinancing, - searchGuitars, - ], + tools, middleware: [loggingMiddleware], systemPrompts: [SYSTEM_PROMPT], agentLoopStrategy: maxIterations(20), diff --git a/examples/ts-react-chat/src/routes/index.tsx b/examples/ts-react-chat/src/routes/index.tsx index c91dbd746..c4340e6f5 100644 --- a/examples/ts-react-chat/src/routes/index.tsx +++ b/examples/ts-react-chat/src/routes/index.tsx @@ -354,13 +354,29 @@ function ChatPage() { Array<{ id: string; base64: string; mimeType: string; preview: string }> >([]) const fileInputRef = useRef(null) + // Session-scoped Gemini Interactions id — the server surfaces it via a + // `gemini.interactionId` CUSTOM event, and we send it back as + // `previous_interaction_id` on the next turn. State (not ref) so a body + // change triggers `useChat` to re-sync the updated body to the client. + const [interactionId, setInteractionId] = useState( + undefined, + ) + + // Reset the interaction id whenever the user switches model/provider so + // we don't chain against a stale or wrong-model interaction. + useEffect(() => { + setInteractionId(undefined) + }, [selectedModel.provider, selectedModel.model]) const body = useMemo( () => ({ provider: selectedModel.provider, model: selectedModel.model, + ...(selectedModel.provider === 'gemini-interactions' && interactionId + ? { previousInteractionId: interactionId } + : {}), }), - [selectedModel.provider, selectedModel.model], + [selectedModel.provider, selectedModel.model, interactionId], ) const { @@ -380,6 +396,11 @@ function ChatPage() { data, context.toolCallId ? `(tool call: ${context.toolCallId})` : '', ) + if (eventType === 'gemini.interactionId') { + const id = (data as { interactionId?: string } | undefined) + ?.interactionId + if (id) setInteractionId(id) + } }, }) const [input, setInput] = useState('') diff --git a/packages/typescript/ai-gemini/package.json b/packages/typescript/ai-gemini/package.json index 565f89104..aa2ca983a 100644 --- a/packages/typescript/ai-gemini/package.json +++ b/packages/typescript/ai-gemini/package.json @@ -20,6 +20,10 @@ "./tools": { "types": "./dist/esm/tools/index.d.ts", "import": "./dist/esm/tools/index.js" + }, + "./experimental": { + "types": "./dist/esm/experimental/index.d.ts", + "import": "./dist/esm/experimental/index.js" } }, "files": [ diff --git a/packages/typescript/ai-gemini/src/experimental/index.ts b/packages/typescript/ai-gemini/src/experimental/index.ts new file mode 100644 index 000000000..a43f22ea7 --- /dev/null +++ b/packages/typescript/ai-gemini/src/experimental/index.ts @@ -0,0 +1,12 @@ +/** + * @experimental Gemini's Interactions API is in Beta per Google. Shapes and + * behavior may change between minor releases of `@tanstack/ai-gemini`. + */ +export { + GeminiTextInteractionsAdapter, + createGeminiTextInteractions, + geminiTextInteractions, + type GeminiTextInteractionsConfig, + type GeminiTextInteractionsProviderOptions, +} from './text-interactions/adapter' +export type { ExternalTextInteractionsProviderOptions } from './text-interactions/provider-options' diff --git a/packages/typescript/ai-gemini/src/experimental/text-interactions/adapter.ts b/packages/typescript/ai-gemini/src/experimental/text-interactions/adapter.ts new file mode 100644 index 000000000..b15d39196 --- /dev/null +++ b/packages/typescript/ai-gemini/src/experimental/text-interactions/adapter.ts @@ -0,0 +1,911 @@ +import { BaseTextAdapter } from '@tanstack/ai/adapters' +import { + createGeminiClient, + generateId, + getGeminiApiKeyFromEnv, +} from '../../utils' +import type { InternalLogger } from '@tanstack/ai/adapter-internals' +import type { GeminiModels } from '../../model-meta' +import type { + StructuredOutputOptions, + StructuredOutputResult, +} from '@tanstack/ai/adapters' +import type { GoogleGenAI, Interactions } from '@google/genai' +import type { + ContentPart, + ModelMessage, + StreamChunk, + TextOptions, + Tool, +} from '@tanstack/ai' + +import type { ExternalTextInteractionsProviderOptions } from './provider-options' +import type { GeminiMessageMetadataByModality } from '../../message-types' +import type { GeminiClientConfig } from '../../utils' + +type Interaction = Interactions.Interaction +type InteractionSSEEvent = Interactions.InteractionSSEEvent + +/** Cast an event object to StreamChunk. Adapters construct events with string + * literal types which are structurally compatible with the EventType enum. */ +const asChunk = (chunk: Record) => + chunk as unknown as StreamChunk + +export interface GeminiTextInteractionsConfig extends GeminiClientConfig {} + +export type GeminiTextInteractionsProviderOptions = + ExternalTextInteractionsProviderOptions + +type InteractionsInput = NonNullable + +type InteractionsTool = NonNullable< + Extract['tools'] +>[number] + +type TurnInput = Interactions.Turn +type ContentBlock = Interactions.Content + +type ToolCallState = { + name: string + args: string + index: number + started: boolean + ended: boolean +} + +/** + * Tree-shakeable adapter for Gemini's stateful Interactions API. Routes + * through `client.interactions.create` and surfaces the server-assigned + * `interactionId` via an AG-UI `CUSTOM` event with + * `name: 'gemini.interactionId'` emitted just before `RUN_FINISHED`; pass + * that id back on the next turn via `modelOptions.previous_interaction_id` + * to continue the conversation without resending history. + * + * Supports user-defined function tools and the built-in tools + * `google_search`, `code_execution`, `url_context`, `file_search`, and + * `computer_use`. Built-in tool activity is surfaced via `CUSTOM` events + * named `gemini.googleSearchCall`/`gemini.googleSearchResult` (and the + * corresponding per-tool variants) carrying the raw Interactions delta. + * `google_search_retrieval`, `google_maps`, and `mcp_server` are not + * supported on this adapter. + * + * @experimental Interactions API is in Beta per Google; shapes may change. + * @see https://ai.google.dev/gemini-api/docs/interactions + */ +export class GeminiTextInteractionsAdapter< + TModel extends GeminiModels, +> extends BaseTextAdapter< + TModel, + GeminiTextInteractionsProviderOptions, + readonly ['text', 'image', 'audio', 'video', 'document'], + GeminiMessageMetadataByModality +> { + readonly kind = 'text' as const + readonly name = 'gemini-text-interactions' as const + + private client: GoogleGenAI + + constructor(config: GeminiTextInteractionsConfig, model: TModel) { + super({}, model) + this.client = createGeminiClient(config) + } + + async *chatStream( + options: TextOptions, + ): AsyncIterable { + const runId = generateId(this.name) + const timestamp = Date.now() + const { logger } = options + + try { + const request = buildInteractionsRequest(options) + logger.request( + `activity=chat provider=gemini-text-interactions model=${this.model} messages=${options.messages.length} tools=${options.tools?.length ?? 0} stream=true`, + { + provider: 'gemini-text-interactions', + model: this.model, + request, + }, + ) + const stream = await this.client.interactions.create({ + ...request, + stream: true, + }) + + yield* translateInteractionEvents( + stream as AsyncIterable, + options.model, + runId, + timestamp, + this.name, + logger, + ) + } catch (error) { + const message = + error instanceof Error + ? error.message + : 'An unknown error occurred during the interactions stream.' + logger.errors('gemini-text-interactions.chatStream fatal', { + error, + source: 'gemini-text-interactions.chatStream', + }) + yield asChunk({ + type: 'RUN_ERROR', + runId, + model: options.model, + timestamp, + message, + error: { message }, + }) + } + } + + async structuredOutput( + options: StructuredOutputOptions, + ): Promise> { + const { chatOptions, outputSchema } = options + const { logger } = chatOptions + const baseRequest = buildInteractionsRequest(chatOptions) + + const request = { + ...baseRequest, + response_mime_type: 'application/json', + response_format: outputSchema, + } + + try { + logger.request( + `activity=chat provider=gemini-text-interactions model=${this.model} messages=${chatOptions.messages.length} tools=${chatOptions.tools?.length ?? 0} stream=false`, + { + provider: 'gemini-text-interactions', + model: this.model, + request, + }, + ) + const result = await this.client.interactions.create(request) + + const rawText = extractTextFromInteraction(result) + + let parsed: unknown + try { + parsed = JSON.parse(rawText) + } catch { + throw new Error( + `Failed to parse structured output as JSON. Content: ${rawText.slice(0, 200)}${rawText.length > 200 ? '...' : ''}`, + ) + } + + return { data: parsed, rawText } + } catch (error) { + logger.errors('gemini-text-interactions.structuredOutput fatal', { + error, + source: 'gemini-text-interactions.structuredOutput', + }) + throw new Error( + error instanceof Error + ? error.message + : 'An unknown error occurred during structured output generation.', + ) + } + } +} + +/** @experimental Interactions API is in Beta. */ +export function createGeminiTextInteractions( + model: TModel, + apiKey: string, + config?: Omit, +): GeminiTextInteractionsAdapter { + return new GeminiTextInteractionsAdapter({ apiKey, ...config }, model) +} + +/** @experimental Interactions API is in Beta. */ +export function geminiTextInteractions( + model: TModel, + config?: Omit, +): GeminiTextInteractionsAdapter { + const apiKey = getGeminiApiKeyFromEnv() + return createGeminiTextInteractions(model, apiKey, config) +} + +function buildInteractionsRequest( + options: TextOptions, +) { + const modelOpts = options.modelOptions + + const systemInstruction = + modelOpts?.system_instruction ?? options.systemPrompts?.join('\n') + + const generationConfig: Interactions.GenerationConfig = { + ...modelOpts?.generation_config, + } + if (options.temperature !== undefined) { + generationConfig.temperature = options.temperature + } + if (options.topP !== undefined) { + generationConfig.top_p = options.topP + } + if (options.maxTokens !== undefined) { + generationConfig.max_output_tokens = options.maxTokens + } + + const hasGenerationConfig = Object.keys(generationConfig).length > 0 + + const input = convertMessagesToInteractionsInput( + options.messages, + modelOpts?.previous_interaction_id !== undefined, + ) + + return { + model: options.model, + input, + previous_interaction_id: modelOpts?.previous_interaction_id, + system_instruction: systemInstruction, + tools: convertToolsToInteractionsFormat(options.tools), + generation_config: hasGenerationConfig ? generationConfig : undefined, + store: modelOpts?.store, + background: modelOpts?.background, + response_modalities: modelOpts?.response_modalities, + response_format: modelOpts?.response_format, + response_mime_type: modelOpts?.response_mime_type, + } +} + +// When `hasPreviousInteraction` is true the server holds the transcript up +// through the last assistant turn, so we only send messages that come after +// it (a new user turn, a tool result continuing a function call, etc.). +// Otherwise we send the full conversation as `Turn[]`. +function convertMessagesToInteractionsInput( + messages: Array, + hasPreviousInteraction: boolean, +): Array { + const toolCallIdToName = new Map() + for (const msg of messages) { + if (msg.role === 'assistant' && msg.toolCalls) { + for (const tc of msg.toolCalls) { + toolCallIdToName.set(tc.id, tc.function.name) + } + } + } + + const source = hasPreviousInteraction + ? messagesAfterLastAssistant(messages) + : messages + + const turns: Array = [] + for (const msg of source) { + const turn = messageToTurn(msg, toolCallIdToName) + if (turn) turns.push(turn) + } + + if (hasPreviousInteraction && turns.length === 0) { + throw new Error( + 'Gemini Interactions adapter: modelOptions.previous_interaction_id was provided but no new messages were found after the last assistant turn. Append at least one user or tool message before chaining.', + ) + } + + return turns +} + +function messagesAfterLastAssistant( + messages: Array, +): Array { + for (let i = messages.length - 1; i >= 0; i--) { + if (messages[i]?.role === 'assistant') { + return messages.slice(i + 1) + } + } + return messages +} + +function safeParseToolArguments( + raw: string | undefined, +): Record { + if (!raw) return {} + try { + const parsed = JSON.parse(raw) + return parsed && typeof parsed === 'object' ? parsed : {} + } catch { + return {} + } +} + +function messageToTurn( + msg: ModelMessage, + toolCallIdToName: Map, +): TurnInput | undefined { + const parts: Array = [] + + if (Array.isArray(msg.content)) { + for (const part of msg.content) { + parts.push(contentPartToBlock(part)) + } + } else if ( + typeof msg.content === 'string' && + msg.content && + msg.role !== 'tool' + ) { + parts.push({ type: 'text', text: msg.content }) + } + + if (msg.role === 'assistant' && msg.toolCalls?.length) { + for (const toolCall of msg.toolCalls) { + parts.push({ + type: 'function_call', + id: toolCall.id, + name: toolCall.function.name, + arguments: safeParseToolArguments(toolCall.function.arguments), + }) + } + } + + if (msg.role === 'tool' && msg.toolCallId) { + parts.push({ + type: 'function_result', + call_id: msg.toolCallId, + name: toolCallIdToName.get(msg.toolCallId), + result: typeof msg.content === 'string' ? msg.content : '', + }) + } + + if (parts.length === 0) return undefined + + const role = msg.role === 'assistant' ? 'model' : 'user' + + return { role, content: parts } +} + +// `satisfies` pins these arrays to the SDK's narrow mime-type unions: if +// Google removes a format the build breaks, and if they add one ours keeps +// working (we just won't accept the new one until added here). +const IMAGE_MIME_TYPES = [ + 'image/png', + 'image/jpeg', + 'image/webp', + 'image/heic', + 'image/heif', +] as const satisfies ReadonlyArray< + NonNullable +> + +const AUDIO_MIME_TYPES = [ + 'audio/wav', + 'audio/mp3', + 'audio/aiff', + 'audio/aac', + 'audio/ogg', + 'audio/flac', +] as const satisfies ReadonlyArray< + NonNullable +> + +const VIDEO_MIME_TYPES = [ + 'video/mp4', + 'video/mpeg', + 'video/mpg', + 'video/mov', + 'video/avi', + 'video/x-flv', + 'video/webm', + 'video/wmv', + 'video/3gpp', +] as const satisfies ReadonlyArray< + NonNullable +> + +const DOCUMENT_MIME_TYPES = [ + 'application/pdf', +] as const satisfies ReadonlyArray< + NonNullable +> + +function validateMime( + allowed: ReadonlyArray, + value: string | undefined, + kind: string, +): T | undefined { + if (value === undefined) return undefined + if ((allowed as ReadonlyArray).includes(value)) { + return value as T + } + throw new Error( + `Unsupported ${kind} mime type "${value}" for the Gemini Interactions API. Allowed: ${allowed.join(', ')}.`, + ) +} + +function contentPartToBlock(part: ContentPart): ContentBlock { + if (part.type === 'text') { + return { type: 'text', text: part.content } + } + const isData = part.source.type === 'data' + switch (part.type) { + case 'image': { + const mime_type = validateMime( + IMAGE_MIME_TYPES, + part.source.mimeType, + 'image', + ) + return isData + ? { type: 'image', data: part.source.value, mime_type } + : { type: 'image', uri: part.source.value, mime_type } + } + case 'audio': { + const mime_type = validateMime( + AUDIO_MIME_TYPES, + part.source.mimeType, + 'audio', + ) + return isData + ? { type: 'audio', data: part.source.value, mime_type } + : { type: 'audio', uri: part.source.value, mime_type } + } + case 'video': { + const mime_type = validateMime( + VIDEO_MIME_TYPES, + part.source.mimeType, + 'video', + ) + return isData + ? { type: 'video', data: part.source.value, mime_type } + : { type: 'video', uri: part.source.value, mime_type } + } + case 'document': { + const mime_type = validateMime( + DOCUMENT_MIME_TYPES, + part.source.mimeType, + 'document', + ) + return isData + ? { type: 'document', data: part.source.value, mime_type } + : { type: 'document', uri: part.source.value, mime_type } + } + } +} + +// Built-in Gemini tools use snake_case field names in the Interactions API +// that differ from the camelCase fields used on `client.models.generateContent` +// (e.g. `fileSearchStoreNames` vs `file_search_store_names`). Translate +// explicitly so callers keep using the same tool factories across adapters. +function convertToolsToInteractionsFormat( + tools: Array | undefined, +): Array | undefined { + if (!tools || tools.length === 0) return undefined + + const result: Array = [] + + for (const tool of tools) { + switch (tool.name) { + case 'google_search': { + const metadata = (tool.metadata ?? {}) as { + search_types?: Array<'web_search' | 'image_search'> + } + result.push({ + type: 'google_search', + ...(metadata.search_types + ? { search_types: metadata.search_types } + : {}), + }) + break + } + case 'code_execution': { + result.push({ type: 'code_execution' }) + break + } + case 'url_context': { + result.push({ type: 'url_context' }) + break + } + case 'file_search': { + const metadata = (tool.metadata ?? {}) as { + fileSearchStoreNames?: Array + topK?: number + metadataFilter?: string + } + result.push({ + type: 'file_search', + ...(metadata.fileSearchStoreNames + ? { file_search_store_names: metadata.fileSearchStoreNames } + : {}), + ...(metadata.topK !== undefined ? { top_k: metadata.topK } : {}), + ...(metadata.metadataFilter !== undefined + ? { metadata_filter: metadata.metadataFilter } + : {}), + }) + break + } + case 'computer_use': { + const metadata = (tool.metadata ?? {}) as { + environment?: string + excludedPredefinedFunctions?: Array + } + if (metadata.environment && metadata.environment !== 'browser') { + throw new Error( + `computer_use environment "${metadata.environment}" is not supported on the Gemini Interactions API. Only "browser" is accepted.`, + ) + } + result.push({ + type: 'computer_use', + ...(metadata.environment + ? { environment: metadata.environment as 'browser' } + : {}), + ...(metadata.excludedPredefinedFunctions + ? { + excludedPredefinedFunctions: + metadata.excludedPredefinedFunctions, + } + : {}), + }) + break + } + case 'google_search_retrieval': + throw new Error( + '`google_search_retrieval` is not supported on the Gemini Interactions API. Use `googleSearchTool()` (`google_search`) with `geminiTextInteractions()`, or call `geminiText()` for the legacy retrieval tool.', + ) + case 'google_maps': + throw new Error( + '`google_maps` is not yet supported on the Gemini Interactions API. Use `geminiText()` for Google Maps grounding.', + ) + case 'mcp_server': + throw new Error( + '`mcp_server` is not yet supported on the `geminiTextInteractions()` adapter.', + ) + default: { + if (!tool.description) { + throw new Error( + `Tool ${tool.name} requires a description for the Gemini Interactions adapter`, + ) + } + result.push({ + type: 'function', + name: tool.name, + description: tool.description, + parameters: tool.inputSchema ?? { + type: 'object', + properties: {}, + required: [], + }, + }) + } + } + } + + return result +} + +async function* translateInteractionEvents( + stream: AsyncIterable, + model: string, + runId: string, + timestamp: number, + adapterName: string, + logger: InternalLogger, +): AsyncIterable { + const messageId = generateId(adapterName) + let hasEmittedRunStarted = false + let hasEmittedTextMessageStart = false + let textAccumulated = '' + let interactionId: string | undefined + let sawFunctionCall = false + const toolCalls = new Map() + let nextToolIndex = 0 + let thinkingStepId: string | null = null + let thinkingAccumulated = '' + let reasoningMessageId: string | null = null + let hasClosedReasoning = false + + const closeReasoningIfNeeded = function* (): Generator { + if (reasoningMessageId && !hasClosedReasoning) { + hasClosedReasoning = true + yield asChunk({ + type: 'REASONING_MESSAGE_END', + messageId: reasoningMessageId, + model, + timestamp, + }) + yield asChunk({ + type: 'REASONING_END', + messageId: reasoningMessageId, + model, + timestamp, + }) + } + } + + const emitRunStartedIfNeeded = function* (): Generator { + if (!hasEmittedRunStarted) { + hasEmittedRunStarted = true + yield asChunk({ + type: 'RUN_STARTED', + runId, + model, + timestamp, + }) + } + } + + for await (const event of stream) { + logger.provider(`provider=gemini-text-interactions`, { event }) + switch (event.event_type) { + case 'interaction.start': { + interactionId = event.interaction.id + yield* emitRunStartedIfNeeded() + break + } + + case 'content.start': { + yield* emitRunStartedIfNeeded() + break + } + + case 'content.delta': { + yield* emitRunStartedIfNeeded() + const delta = event.delta + switch (delta.type) { + case 'text': { + yield* closeReasoningIfNeeded() + if (!hasEmittedTextMessageStart) { + hasEmittedTextMessageStart = true + yield asChunk({ + type: 'TEXT_MESSAGE_START', + messageId, + model, + timestamp, + role: 'assistant', + }) + } + textAccumulated += delta.text + yield asChunk({ + type: 'TEXT_MESSAGE_CONTENT', + messageId, + model, + timestamp, + delta: delta.text, + content: textAccumulated, + }) + break + } + case 'function_call': { + yield* closeReasoningIfNeeded() + sawFunctionCall = true + const toolCallId = delta.id + const deltaArgs: Record = + typeof delta.arguments === 'string' + ? safeParseToolArguments(delta.arguments) + : delta.arguments + let state = toolCalls.get(toolCallId) + if (!state) { + state = { + name: delta.name, + args: JSON.stringify(deltaArgs), + index: nextToolIndex++, + started: false, + ended: false, + } + toolCalls.set(toolCallId, state) + } else { + // Merge incremental fragments at the object level — the SDK + // types args as an object per delta, so string concatenation + // would produce invalid JSON. + try { + const existing = JSON.parse(state.args) + state.args = JSON.stringify({ + ...(existing && typeof existing === 'object' ? existing : {}), + ...deltaArgs, + }) + } catch { + state.args = JSON.stringify(deltaArgs) + } + if (delta.name) state.name = delta.name + } + if (!state.started) { + state.started = true + yield asChunk({ + type: 'TOOL_CALL_START', + toolCallId, + toolName: state.name, + model, + timestamp, + index: state.index, + }) + } + yield asChunk({ + type: 'TOOL_CALL_ARGS', + toolCallId, + model, + timestamp, + delta: JSON.stringify(deltaArgs), + args: state.args, + }) + break + } + case 'google_search_call': + case 'code_execution_call': + case 'url_context_call': + case 'file_search_call': { + yield* closeReasoningIfNeeded() + yield asChunk({ + type: 'CUSTOM', + name: `gemini.${camelizeDeltaType(delta.type)}`, + value: delta, + model, + timestamp, + }) + break + } + case 'google_search_result': + case 'code_execution_result': + case 'url_context_result': + case 'file_search_result': { + yield* closeReasoningIfNeeded() + yield asChunk({ + type: 'CUSTOM', + name: `gemini.${camelizeDeltaType(delta.type)}`, + value: delta, + model, + timestamp, + }) + break + } + case 'thought_summary': { + const thoughtText = + delta.content && 'text' in delta.content ? delta.content.text : '' + if (!thoughtText) break + if (thinkingStepId === null) { + thinkingStepId = generateId(adapterName) + reasoningMessageId = generateId(adapterName) + yield asChunk({ + type: 'REASONING_START', + messageId: reasoningMessageId, + model, + timestamp, + }) + yield asChunk({ + type: 'REASONING_MESSAGE_START', + messageId: reasoningMessageId, + role: 'reasoning', + model, + timestamp, + }) + yield asChunk({ + type: 'STEP_STARTED', + stepId: thinkingStepId, + model, + timestamp, + stepType: 'thinking', + }) + } + thinkingAccumulated += thoughtText + yield asChunk({ + type: 'REASONING_MESSAGE_CONTENT', + messageId: reasoningMessageId!, + delta: thoughtText, + model, + timestamp, + }) + yield asChunk({ + type: 'STEP_FINISHED', + stepId: thinkingStepId, + model, + timestamp, + delta: thoughtText, + content: thinkingAccumulated, + }) + break + } + default: + break + } + break + } + + case 'content.stop': + case 'interaction.status_update': { + break + } + + case 'interaction.complete': { + if (event.interaction.id) { + interactionId = event.interaction.id + } + + yield* closeReasoningIfNeeded() + + for (const [toolCallId, state] of toolCalls) { + if (state.ended) continue + state.ended = true + let parsedInput: unknown = {} + try { + const parsed = JSON.parse(state.args) + parsedInput = parsed && typeof parsed === 'object' ? parsed : {} + } catch { + parsedInput = {} + } + yield asChunk({ + type: 'TOOL_CALL_END', + toolCallId, + toolName: state.name, + model, + timestamp, + input: parsedInput, + }) + } + + if (hasEmittedTextMessageStart) { + yield asChunk({ + type: 'TEXT_MESSAGE_END', + messageId, + model, + timestamp, + }) + } + + const usage = event.interaction.usage + const finishReason: 'tool_calls' | 'stop' = sawFunctionCall + ? 'tool_calls' + : 'stop' + + if (interactionId) { + yield asChunk({ + type: 'CUSTOM', + name: 'gemini.interactionId', + value: { interactionId }, + model, + timestamp, + }) + } + + yield asChunk({ + type: 'RUN_FINISHED', + runId, + model, + timestamp, + finishReason, + usage: usage + ? { + promptTokens: usage.total_input_tokens ?? 0, + completionTokens: usage.total_output_tokens ?? 0, + totalTokens: usage.total_tokens ?? 0, + } + : undefined, + }) + break + } + + case 'error': { + const message = event.error?.message ?? 'Unknown error' + const code = event.error?.code?.toString() + yield asChunk({ + type: 'RUN_ERROR', + runId, + model, + timestamp, + message, + code, + error: { message, code }, + }) + return + } + + default: + break + } + } +} + +function camelizeDeltaType(type: string): string { + const [first, ...rest] = type.split('_') + return ( + (first ?? '') + + rest.map((w) => w.charAt(0).toUpperCase() + w.slice(1)).join('') + ) +} + +function extractTextFromInteraction(interaction: Interaction): string { + let text = '' + for (const output of interaction.outputs ?? []) { + if (output.type === 'text') { + text += output.text + } + } + return text +} diff --git a/packages/typescript/ai-gemini/src/experimental/text-interactions/provider-options.ts b/packages/typescript/ai-gemini/src/experimental/text-interactions/provider-options.ts new file mode 100644 index 000000000..657cf8e69 --- /dev/null +++ b/packages/typescript/ai-gemini/src/experimental/text-interactions/provider-options.ts @@ -0,0 +1,26 @@ +import type { Interactions } from '@google/genai' + +/** + * Provider options for `geminiTextInteractions()`. Derived from the SDK's + * own `interactions.create` param shape so field types (including the + * allowed `response_modalities` values and the `generation_config` schema) + * stay in sync with `@google/genai` automatically. + * + * `input`, `model`, `tools`, `stream`, and `api_version` are intentionally + * omitted — the adapter derives those from its constructor args and the + * chat options. + * + * @see https://ai.google.dev/gemini-api/docs/interactions + * @experimental Interactions API is in Beta. + */ +export type ExternalTextInteractionsProviderOptions = Pick< + Interactions.CreateModelInteractionParamsStreaming, + | 'previous_interaction_id' + | 'store' + | 'background' + | 'system_instruction' + | 'response_modalities' + | 'response_format' + | 'response_mime_type' + | 'generation_config' +> diff --git a/packages/typescript/ai-gemini/tests/image-adapter.test.ts b/packages/typescript/ai-gemini/tests/image-adapter.test.ts index 4eb4192f6..259d3ebea 100644 --- a/packages/typescript/ai-gemini/tests/image-adapter.test.ts +++ b/packages/typescript/ai-gemini/tests/image-adapter.test.ts @@ -1,5 +1,5 @@ import { describe, it, expect, vi } from 'vitest' -import { resolveDebugOption } from '@tanstack/ai/adapter-internals' +import { generateImage } from '@tanstack/ai' import { GeminiImageAdapter, createGeminiImage } from '../src/adapters/image' import { parseNativeImageSize, @@ -9,8 +9,6 @@ import { validatePrompt, } from '../src/image/image-provider-options' -const testLogger = resolveDebugOption(false) - describe('Gemini Image Adapter', () => { describe('createGeminiImage', () => { it('creates an adapter with the provided API key', () => { @@ -190,12 +188,11 @@ describe('Gemini Image Adapter', () => { }, } - const result = await adapter.generateImages({ - model: 'imagen-3.0-generate-002', + const result = await generateImage({ + adapter, prompt: 'A cat wearing a hat', numberOfImages: 1, size: '1024x1024', - logger: testLogger, }) expect(mockGenerateImages).toHaveBeenCalledWith({ @@ -233,16 +230,14 @@ describe('Gemini Image Adapter', () => { }, } - const result1 = await adapter.generateImages({ - model: 'imagen-3.0-generate-002', + const result1 = await generateImage({ + adapter, prompt: 'Test prompt', - logger: testLogger, }) - const result2 = await adapter.generateImages({ - model: 'imagen-3.0-generate-002', + const result2 = await generateImage({ + adapter, prompt: 'Test prompt', - logger: testLogger, }) expect(result1.id).not.toBe(result2.id) @@ -284,11 +279,10 @@ describe('Gemini Image Adapter', () => { }, } - const result = await adapter.generateImages({ - model: 'gemini-3.1-flash-image-preview', + const result = await generateImage({ + adapter, prompt: 'A futuristic city', size: '16:9_4K', - logger: testLogger, }) expect(mockGenerateContent).toHaveBeenCalledWith({ @@ -342,10 +336,9 @@ describe('Gemini Image Adapter', () => { }, } - const result = await adapter.generateImages({ - model: 'gemini-3.1-flash-image-preview', + const result = await generateImage({ + adapter, prompt: 'A simple sketch', - logger: testLogger, }) expect(mockGenerateContent).toHaveBeenCalledWith({ @@ -390,10 +383,9 @@ describe('Gemini Image Adapter', () => { } await expect( - adapter.generateImages({ - model: 'gemini-3.1-flash-image-preview', + generateImage({ + adapter, prompt: 'A test prompt', - logger: testLogger, }), ).rejects.toThrow(/returned no images/) }) @@ -426,10 +418,9 @@ describe('Gemini Image Adapter', () => { } await expect( - adapter.generateImages({ - model: 'gemini-3.1-flash-image-preview', + generateImage({ + adapter, prompt: 'A test prompt', - logger: testLogger, }), ).rejects.toThrow(/I cannot generate that image/) }) @@ -464,14 +455,13 @@ describe('Gemini Image Adapter', () => { }, } - await adapter.generateImages({ - model: 'gemini-3.1-flash-image-preview', + await generateImage({ + adapter, prompt: 'A simple sketch', modelOptions: { // User tries to strip IMAGE from modalities — must be ignored. responseModalities: ['TEXT'], } as unknown as never, - logger: testLogger, }) const args = mockGenerateContent.mock.calls[0]![0] @@ -515,11 +505,10 @@ describe('Gemini Image Adapter', () => { }, } - const result = await adapter.generateImages({ - model: 'gemini-3.1-flash-image-preview', + const result = await generateImage({ + adapter, prompt: 'A futuristic city', numberOfImages: 3, - logger: testLogger, }) expect(mockGenerateContent).toHaveBeenCalledWith({ @@ -567,11 +556,10 @@ describe('Gemini Image Adapter', () => { }, } - await adapter.generateImages({ - model: 'gemini-3.1-flash-image-preview', + await generateImage({ + adapter, prompt: 'A simple sketch', numberOfImages: 1, - logger: testLogger, }) expect(mockGenerateContent).toHaveBeenCalledWith({ @@ -614,10 +602,9 @@ describe('Gemini Image Adapter', () => { }, } - await adapter.generateImages({ - model: 'gemini-3.1-flash-image-preview', + await generateImage({ + adapter, prompt: 'A simple sketch', - logger: testLogger, }) expect(mockGenerateContent).toHaveBeenCalledWith({ diff --git a/packages/typescript/ai-gemini/tests/text-interactions-adapter.test.ts b/packages/typescript/ai-gemini/tests/text-interactions-adapter.test.ts new file mode 100644 index 000000000..57f42b072 --- /dev/null +++ b/packages/typescript/ai-gemini/tests/text-interactions-adapter.test.ts @@ -0,0 +1,694 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest' +import { z } from 'zod' +import { chat } from '@tanstack/ai' +import type { StreamChunk, Tool } from '@tanstack/ai' +import { GeminiTextInteractionsAdapter } from '../src/experimental/text-interactions/adapter' +import type { GeminiTextInteractionsProviderOptions } from '../src/experimental/text-interactions/adapter' + +const mocks = vi.hoisted(() => { + return { + constructorSpy: vi.fn<(options: { apiKey: string }) => void>(), + interactionsCreateSpy: vi.fn(), + } +}) + +vi.mock('@google/genai', async () => { + const actual = await vi.importActual('@google/genai') + const { constructorSpy, interactionsCreateSpy } = mocks + class MockGoogleGenAI { + get interactions() { + return { create: interactionsCreateSpy } + } + constructor(options: { apiKey: string }) { + constructorSpy(options) + } + } + + return { + ...actual, + GoogleGenAI: MockGoogleGenAI, + } +}) + +const createAdapter = () => + new GeminiTextInteractionsAdapter({ apiKey: 'test-key' }, 'gemini-2.5-flash') + +const mkStream = (events: Array>) => { + return (async function* () { + for (const event of events) { + yield event + } + })() +} + +const collectChunks = async (stream: AsyncIterable) => { + const chunks: Array = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + return chunks +} + +describe('GeminiTextInteractionsAdapter', () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + it('translates a basic text stream into AG-UI chunks and surfaces the interaction id', async () => { + mocks.interactionsCreateSpy.mockResolvedValue( + mkStream([ + { + event_type: 'interaction.start', + interaction: { id: 'int_1', status: 'in_progress' }, + }, + { + event_type: 'content.start', + index: 0, + content: { type: 'text', text: '' }, + }, + { + event_type: 'content.delta', + index: 0, + delta: { type: 'text', text: 'Hello' }, + }, + { + event_type: 'content.delta', + index: 0, + delta: { type: 'text', text: ', world!' }, + }, + { event_type: 'content.stop', index: 0 }, + { + event_type: 'interaction.complete', + interaction: { + id: 'int_1', + status: 'completed', + usage: { + total_input_tokens: 3, + total_output_tokens: 2, + total_tokens: 5, + }, + }, + }, + ]), + ) + + const adapter = createAdapter() + const chunks = await collectChunks( + chat({ + adapter, + messages: [{ role: 'user', content: 'hi' }], + }), + ) + + const types = chunks.map((c) => c.type) + expect(types).toContain('RUN_STARTED') + expect(types).toContain('TEXT_MESSAGE_START') + expect(types).toContain('TEXT_MESSAGE_CONTENT') + expect(types).toContain('TEXT_MESSAGE_END') + expect(types).toContain('RUN_FINISHED') + + const contents = chunks.filter( + (c) => c.type === 'TEXT_MESSAGE_CONTENT', + ) as any[] + expect(contents.map((c) => c.delta).join('')).toBe('Hello, world!') + + const finished = chunks.find((c) => c.type === 'RUN_FINISHED') as any + expect(finished.finishReason).toBe('stop') + expect(finished.usage).toEqual({ + promptTokens: 3, + completionTokens: 2, + totalTokens: 5, + }) + + const interactionCustom = chunks.find( + (c) => c.type === 'CUSTOM' && (c as any).name === 'gemini.interactionId', + ) as any + expect(interactionCustom).toBeDefined() + expect(interactionCustom.value).toEqual({ interactionId: 'int_1' }) + }) + + it('forwards previous_interaction_id on the outgoing request and sends only the latest user turn', async () => { + mocks.interactionsCreateSpy.mockResolvedValue( + mkStream([ + { + event_type: 'interaction.start', + interaction: { id: 'int_2', status: 'in_progress' }, + }, + { + event_type: 'interaction.complete', + interaction: { id: 'int_2', status: 'completed' }, + }, + ]), + ) + + const adapter = createAdapter() + const providerOptions: GeminiTextInteractionsProviderOptions = { + previous_interaction_id: 'int_1', + } + + await collectChunks( + chat({ + adapter, + messages: [ + { role: 'user', content: 'Hi, my name is Amir.' }, + { role: 'assistant', content: 'Nice to meet you, Amir!' }, + { role: 'user', content: 'What is my name?' }, + ], + modelOptions: providerOptions, + }), + ) + + expect(mocks.interactionsCreateSpy).toHaveBeenCalledTimes(1) + const [payload] = mocks.interactionsCreateSpy.mock.calls[0] + expect(payload.previous_interaction_id).toBe('int_1') + expect(payload.model).toBe('gemini-2.5-flash') + expect(payload.stream).toBe(true) + expect(payload.input).toEqual([ + { + role: 'user', + content: [{ type: 'text', text: 'What is my name?' }], + }, + ]) + }) + + it('includes trailing tool result when chaining with previous_interaction_id', async () => { + mocks.interactionsCreateSpy.mockResolvedValue( + mkStream([ + { + event_type: 'interaction.start', + interaction: { id: 'int_followup', status: 'in_progress' }, + }, + { + event_type: 'interaction.complete', + interaction: { id: 'int_followup', status: 'completed' }, + }, + ]), + ) + + const adapter = createAdapter() + await collectChunks( + chat({ + adapter, + messages: [ + { role: 'user', content: 'Weather in Madrid?' }, + { + role: 'assistant', + content: '', + toolCalls: [ + { + id: 'call_1', + type: 'function', + function: { + name: 'lookup_weather', + arguments: '{"location":"Madrid"}', + }, + }, + ], + }, + { + role: 'tool', + toolCallId: 'call_1', + content: '{"tempC":22}', + }, + ], + modelOptions: { + previous_interaction_id: 'int_prev', + } as GeminiTextInteractionsProviderOptions, + }), + ) + + const [payload] = mocks.interactionsCreateSpy.mock.calls[0] + expect(payload.previous_interaction_id).toBe('int_prev') + expect(payload.input).toEqual([ + { + role: 'user', + content: [ + { + type: 'function_result', + call_id: 'call_1', + name: 'lookup_weather', + result: '{"tempC":22}', + }, + ], + }, + ]) + }) + + it('sends full conversation as Turn[] when previous_interaction_id is absent', async () => { + mocks.interactionsCreateSpy.mockResolvedValue( + mkStream([ + { + event_type: 'interaction.start', + interaction: { id: 'int_3', status: 'in_progress' }, + }, + { + event_type: 'interaction.complete', + interaction: { id: 'int_3', status: 'completed' }, + }, + ]), + ) + + const adapter = createAdapter() + await collectChunks( + chat({ + adapter, + messages: [ + { role: 'user', content: 'Hello' }, + { role: 'assistant', content: 'Hi there' }, + { role: 'user', content: 'How are you?' }, + ], + }), + ) + + const [payload] = mocks.interactionsCreateSpy.mock.calls[0] + expect(payload.input).toEqual([ + { role: 'user', content: [{ type: 'text', text: 'Hello' }] }, + { role: 'model', content: [{ type: 'text', text: 'Hi there' }] }, + { role: 'user', content: [{ type: 'text', text: 'How are you?' }] }, + ]) + }) + + it('translates function_call deltas into TOOL_CALL_* events and marks tool_calls finish reason', async () => { + mocks.interactionsCreateSpy.mockResolvedValue( + mkStream([ + { + event_type: 'interaction.start', + interaction: { id: 'int_tool', status: 'in_progress' }, + }, + { + event_type: 'content.start', + index: 0, + content: { type: 'function_call' }, + }, + { + event_type: 'content.delta', + index: 0, + delta: { + type: 'function_call', + id: 'call_1', + name: 'lookup_weather', + arguments: { location: 'Madrid' }, + }, + }, + { event_type: 'content.stop', index: 0 }, + { + event_type: 'interaction.complete', + interaction: { id: 'int_tool', status: 'completed' }, + }, + ]), + ) + + const weatherTool: Tool = { + name: 'lookup_weather', + description: 'Return the weather for a location', + } + + const adapter = createAdapter() + const chunks = await collectChunks( + chat({ + adapter, + messages: [{ role: 'user', content: 'Weather in Madrid?' }], + tools: [weatherTool], + }), + ) + + const [payload] = mocks.interactionsCreateSpy.mock.calls[0] + expect(payload.tools).toEqual([ + expect.objectContaining({ + type: 'function', + name: 'lookup_weather', + description: 'Return the weather for a location', + }), + ]) + + const startEvent = chunks.find((c) => c.type === 'TOOL_CALL_START') as any + expect(startEvent).toBeDefined() + expect(startEvent.toolCallId).toBe('call_1') + expect(startEvent.toolName).toBe('lookup_weather') + + const argsEvent = chunks.find((c) => c.type === 'TOOL_CALL_ARGS') as any + expect(argsEvent.args).toBe('{"location":"Madrid"}') + + const endEvent = chunks.find((c) => c.type === 'TOOL_CALL_END') as any + expect(endEvent.input).toEqual({ location: 'Madrid' }) + + const finished = chunks.find((c) => c.type === 'RUN_FINISHED') as any + expect(finished.finishReason).toBe('tool_calls') + }) + + it('serializes tool results as function_result content blocks', async () => { + mocks.interactionsCreateSpy.mockResolvedValue( + mkStream([ + { + event_type: 'interaction.start', + interaction: { id: 'int_followup', status: 'in_progress' }, + }, + { + event_type: 'interaction.complete', + interaction: { id: 'int_followup', status: 'completed' }, + }, + ]), + ) + + const adapter = createAdapter() + await collectChunks( + chat({ + adapter, + messages: [ + { role: 'user', content: 'Weather in Madrid?' }, + { + role: 'assistant', + content: '', + toolCalls: [ + { + id: 'call_1', + type: 'function', + function: { + name: 'lookup_weather', + arguments: '{"location":"Madrid"}', + }, + }, + ], + }, + { + role: 'tool', + toolCallId: 'call_1', + content: '{"tempC":22}', + }, + ], + }), + ) + + const [payload] = mocks.interactionsCreateSpy.mock.calls[0] + expect(payload.input).toContainEqual({ + role: 'user', + content: [ + expect.objectContaining({ + type: 'function_result', + call_id: 'call_1', + name: 'lookup_weather', + result: '{"tempC":22}', + }), + ], + }) + }) + + it('rejects unsupported image mime types with a clear error', async () => { + mocks.interactionsCreateSpy.mockResolvedValue(mkStream([])) + const adapter = createAdapter() + + const chunks = await collectChunks( + chat({ + adapter, + messages: [ + { + role: 'user', + content: [ + { + type: 'image', + source: { + type: 'data', + value: 'base64-data', + mimeType: 'image/bmp', + }, + }, + ], + }, + ], + }), + ) + + const err = chunks.find((c) => c.type === 'RUN_ERROR') as any + expect(err).toBeDefined() + expect(err.message).toMatch(/image\/bmp/) + expect(err.message).toMatch(/image\/png/) + }) + + it('sends built-in google_search, code_execution, url_context tools with snake_case shape', async () => { + mocks.interactionsCreateSpy.mockResolvedValue( + mkStream([ + { + event_type: 'interaction.start', + interaction: { id: 'int_builtins', status: 'in_progress' }, + }, + { + event_type: 'interaction.complete', + interaction: { id: 'int_builtins', status: 'completed' }, + }, + ]), + ) + + const adapter = createAdapter() + await collectChunks( + chat({ + adapter, + messages: [{ role: 'user', content: 'What happened yesterday?' }], + tools: [ + { + name: 'google_search', + description: '', + metadata: { search_types: ['web_search'] }, + }, + { name: 'code_execution', description: '', metadata: {} }, + { name: 'url_context', description: '', metadata: {} }, + ] as Array, + }), + ) + + const [payload] = mocks.interactionsCreateSpy.mock.calls[0] + expect(payload.tools).toEqual([ + { type: 'google_search', search_types: ['web_search'] }, + { type: 'code_execution' }, + { type: 'url_context' }, + ]) + }) + + it('translates file_search metadata fields into snake_case', async () => { + mocks.interactionsCreateSpy.mockResolvedValue( + mkStream([ + { + event_type: 'interaction.start', + interaction: { id: 'int_fs', status: 'in_progress' }, + }, + { + event_type: 'interaction.complete', + interaction: { id: 'int_fs', status: 'completed' }, + }, + ]), + ) + + const adapter = createAdapter() + await collectChunks( + chat({ + adapter, + messages: [{ role: 'user', content: 'Find it.' }], + tools: [ + { + name: 'file_search', + description: '', + metadata: { + fileSearchStoreNames: ['fileSearchStores/my-store'], + topK: 5, + metadataFilter: 'kind="faq"', + }, + }, + ] as Array, + }), + ) + + const [payload] = mocks.interactionsCreateSpy.mock.calls[0] + expect(payload.tools).toEqual([ + { + type: 'file_search', + file_search_store_names: ['fileSearchStores/my-store'], + top_k: 5, + metadata_filter: 'kind="faq"', + }, + ]) + }) + + it('surfaces built-in tool deltas as gemini.* CUSTOM events and keeps finish reason "stop"', async () => { + mocks.interactionsCreateSpy.mockResolvedValue( + mkStream([ + { + event_type: 'interaction.start', + interaction: { id: 'int_search', status: 'in_progress' }, + }, + { + event_type: 'content.start', + index: 0, + content: { type: 'google_search_call' }, + }, + { + event_type: 'content.delta', + index: 0, + delta: { + type: 'google_search_call', + id: 'call_gs_1', + arguments: { queries: ['weather madrid'] }, + }, + }, + { + event_type: 'content.delta', + index: 0, + delta: { + type: 'google_search_result', + call_id: 'call_gs_1', + result: [{ title: 'Madrid weather', uri: 'https://example.com' }], + }, + }, + { event_type: 'content.stop', index: 0 }, + { + event_type: 'content.delta', + index: 0, + delta: { type: 'text', text: 'It is sunny.' }, + }, + { + event_type: 'interaction.complete', + interaction: { id: 'int_search', status: 'completed' }, + }, + ]), + ) + + const adapter = createAdapter() + const chunks = await collectChunks( + chat({ + adapter, + messages: [{ role: 'user', content: 'Weather in Madrid?' }], + tools: [ + { name: 'google_search', description: '', metadata: {} }, + ] as Array, + }), + ) + + const callChunk = chunks.find( + (c) => + c.type === 'CUSTOM' && (c as any).name === 'gemini.googleSearchCall', + ) as any + expect(callChunk).toBeDefined() + expect(callChunk.value.id).toBe('call_gs_1') + expect(callChunk.value.arguments).toEqual({ queries: ['weather madrid'] }) + + const resultChunk = chunks.find( + (c) => + c.type === 'CUSTOM' && (c as any).name === 'gemini.googleSearchResult', + ) as any + expect(resultChunk).toBeDefined() + expect(resultChunk.value.call_id).toBe('call_gs_1') + + const finished = chunks.find((c) => c.type === 'RUN_FINISHED') as any + expect(finished.finishReason).toBe('stop') + }) + + it('rejects google_search_retrieval with a clear error', async () => { + mocks.interactionsCreateSpy.mockResolvedValue(mkStream([])) + const adapter = createAdapter() + + const chunks = await collectChunks( + chat({ + adapter, + messages: [{ role: 'user', content: 'Search' }], + tools: [ + { name: 'google_search_retrieval', description: '', metadata: {} }, + ] as Array, + }), + ) + + const err = chunks.find((c) => c.type === 'RUN_ERROR') as any + expect(err).toBeDefined() + expect(err.message).toMatch(/google_search_retrieval/) + expect(err.message).toMatch(/Interactions API/) + expect(err.message).toMatch(/google_search/) + }) + + it('rejects google_maps with a clear error', async () => { + mocks.interactionsCreateSpy.mockResolvedValue(mkStream([])) + const adapter = createAdapter() + + const chunks = await collectChunks( + chat({ + adapter, + messages: [{ role: 'user', content: 'Directions' }], + tools: [ + { name: 'google_maps', description: '', metadata: {} }, + ] as Array, + }), + ) + + const err = chunks.find((c) => c.type === 'RUN_ERROR') as any + expect(err).toBeDefined() + expect(err.message).toMatch(/google_maps/) + expect(err.message).toMatch(/Interactions API/) + }) + + it('emits RUN_ERROR on an upstream error event', async () => { + mocks.interactionsCreateSpy.mockResolvedValue( + mkStream([ + { + event_type: 'interaction.start', + interaction: { id: 'int_err', status: 'in_progress' }, + }, + { + event_type: 'error', + error: { code: 500, message: 'boom' }, + }, + ]), + ) + + const adapter = createAdapter() + const chunks = await collectChunks( + chat({ + adapter, + messages: [{ role: 'user', content: 'hi' }], + }), + ) + + const err = chunks.find((c) => c.type === 'RUN_ERROR') as any + expect(err).toBeDefined() + expect(err.message).toBe('boom') + expect(err.code).toBe('500') + }) + + it('structuredOutput parses JSON text from interaction.outputs', async () => { + // First call: agentic loop's streaming turn (no tools, short completion). + mocks.interactionsCreateSpy.mockResolvedValueOnce( + mkStream([ + { + event_type: 'interaction.start', + interaction: { id: 'int_struct_stream', status: 'in_progress' }, + }, + { + event_type: 'content.start', + index: 0, + content: { type: 'text', text: '' }, + }, + { event_type: 'content.stop', index: 0 }, + { + event_type: 'interaction.complete', + interaction: { id: 'int_struct_stream', status: 'completed' }, + }, + ]), + ) + // Second call: non-streaming structured output. + mocks.interactionsCreateSpy.mockResolvedValueOnce({ + id: 'int_structured', + status: 'completed', + outputs: [{ type: 'text', text: '{"foo":"bar"}' }], + }) + + const adapter = createAdapter() + const result = await chat({ + adapter, + messages: [{ role: 'user', content: 'Give JSON' }], + outputSchema: z.object({ foo: z.string() }), + }) + + expect(result).toEqual({ foo: 'bar' }) + + expect(mocks.interactionsCreateSpy).toHaveBeenCalledTimes(2) + const structuredPayload = mocks.interactionsCreateSpy.mock.calls[1][0] + expect(structuredPayload.response_mime_type).toBe('application/json') + expect(structuredPayload.response_format).toBeDefined() + expect(structuredPayload.stream).toBeUndefined() + }) +}) diff --git a/packages/typescript/ai-gemini/vite.config.ts b/packages/typescript/ai-gemini/vite.config.ts index 0e7e7eaea..4faa2d344 100644 --- a/packages/typescript/ai-gemini/vite.config.ts +++ b/packages/typescript/ai-gemini/vite.config.ts @@ -29,7 +29,11 @@ const config = defineConfig({ export default mergeConfig( config, tanstackViteConfig({ - entry: ['./src/index.ts', './src/tools/index.ts'], + entry: [ + './src/index.ts', + './src/tools/index.ts', + './src/experimental/index.ts', + ], srcDir: './src', cjs: false, }), diff --git a/testing/e2e/fixtures/stateful-interactions/conversation.json b/testing/e2e/fixtures/stateful-interactions/conversation.json new file mode 100644 index 000000000..80ed4e0cb --- /dev/null +++ b/testing/e2e/fixtures/stateful-interactions/conversation.json @@ -0,0 +1,20 @@ +{ + "fixtures": [ + { + "match": { + "userMessage": "[stateful-1] what guitars do you have" + }, + "response": { + "content": "We have four guitars in stock: the Fender Stratocaster ($1,299), Gibson Les Paul ($2,499), Taylor 814ce ($3,299), and Martin D-28 ($2,999). What would you like to know more about?" + } + }, + { + "match": { + "userMessage": "[stateful-2] tell me about the cheapest one" + }, + "response": { + "content": "The Fender Stratocaster at $1,299 is our most affordable guitar. It's a versatile electric guitar known for its bright, clear tone and comfortable playability." + } + } + ] +} diff --git a/testing/e2e/src/lib/feature-support.ts b/testing/e2e/src/lib/feature-support.ts index db0696e4e..62b272fbd 100644 --- a/testing/e2e/src/lib/feature-support.ts +++ b/testing/e2e/src/lib/feature-support.ts @@ -124,6 +124,10 @@ export const matrix: Record> = { tts: new Set(['openai', 'grok']), transcription: new Set(['openai', 'grok']), 'video-gen': new Set(['openai']), + // Only Gemini currently surfaces a first-class stateful conversation API via + // the adapter. aimock does not yet mock Gemini's interactions:create endpoint, + // so the stateful-interactions spec is skipped until fixture support lands. + 'stateful-interactions': new Set(['gemini']), } export function isSupported(provider: Provider, feature: Feature): boolean { diff --git a/testing/e2e/src/lib/features.ts b/testing/e2e/src/lib/features.ts index 15000cd7e..fe9d7b295 100644 --- a/testing/e2e/src/lib/features.ts +++ b/testing/e2e/src/lib/features.ts @@ -85,4 +85,8 @@ export const featureConfigs: Record = { tools: [], modelOptions: {}, }, + 'stateful-interactions': { + tools: [], + modelOptions: {}, + }, } diff --git a/testing/e2e/src/lib/providers.ts b/testing/e2e/src/lib/providers.ts index 35b720b61..1341d522c 100644 --- a/testing/e2e/src/lib/providers.ts +++ b/testing/e2e/src/lib/providers.ts @@ -3,11 +3,12 @@ import { createChatOptions } from '@tanstack/ai' import { createOpenaiChat } from '@tanstack/ai-openai' import { createAnthropicChat } from '@tanstack/ai-anthropic' import { createGeminiChat } from '@tanstack/ai-gemini' +import { createGeminiTextInteractions } from '@tanstack/ai-gemini/experimental' import { createOllamaChat } from '@tanstack/ai-ollama' import { createGroqText } from '@tanstack/ai-groq' import { createGrokText } from '@tanstack/ai-grok' import { createOpenRouterText } from '@tanstack/ai-openrouter' -import type { Provider } from '@/lib/types' +import type { Feature, Provider } from '@/lib/types' const LLMOCK_DEFAULT_BASE = process.env.LLMOCK_URL || 'http://127.0.0.1:4010' const DUMMY_KEY = 'sk-e2e-test-dummy-key' @@ -27,6 +28,7 @@ export function createTextAdapter( modelOverride?: string, aimockPort?: number, testId?: string, + feature?: Feature, ): { adapter: AnyTextAdapter } { const model = modelOverride ?? defaultModels[provider] @@ -38,6 +40,24 @@ export function createTextAdapter( // X-Test-Id header for per-test sequenceIndex isolation in aimock const testHeaders = testId ? { 'X-Test-Id': testId } : undefined + // The Gemini Interactions API lives at a different endpoint + // (POST /v1beta/interactions) and uses a different adapter than the + // standard Gemini chat path. + if (provider === 'gemini' && feature === 'stateful-interactions') { + return createChatOptions({ + adapter: createGeminiTextInteractions( + model as 'gemini-2.0-flash', + DUMMY_KEY, + { + httpOptions: { + baseUrl: base, + headers: testHeaders, + }, + }, + ), + }) + } + const factories: Record { adapter: AnyTextAdapter }> = { openai: () => createChatOptions({ diff --git a/testing/e2e/src/lib/types.ts b/testing/e2e/src/lib/types.ts index 00c848157..6d8bc0199 100644 --- a/testing/e2e/src/lib/types.ts +++ b/testing/e2e/src/lib/types.ts @@ -28,6 +28,7 @@ export type Feature = | 'tts' | 'transcription' | 'video-gen' + | 'stateful-interactions' export const ALL_PROVIDERS: Provider[] = [ 'openai', @@ -58,4 +59,5 @@ export const ALL_FEATURES: Feature[] = [ 'tts', 'transcription', 'video-gen', + 'stateful-interactions', ] diff --git a/testing/e2e/src/routes/$provider/$feature.tsx b/testing/e2e/src/routes/$provider/$feature.tsx index de76993a2..bcbd1f30a 100644 --- a/testing/e2e/src/routes/$provider/$feature.tsx +++ b/testing/e2e/src/routes/$provider/$feature.tsx @@ -1,3 +1,4 @@ +import { useState } from 'react' import { createFileRoute } from '@tanstack/react-router' import { fetchServerSentEvents, useChat } from '@tanstack/ai-react' import { clientTools } from '@tanstack/ai-client' @@ -137,49 +138,78 @@ function ChatFeature({ const { testId, aimockPort } = Route.useSearch() + // Track the last interactionId surfaced by the gemini.interactionId CUSTOM + // event so callers (and the e2e spec) can chain turns via + // modelOptions.previous_interaction_id on the next request. + const [interactionId, setInteractionId] = useState( + undefined, + ) + const { messages, sendMessage, isLoading, addToolApprovalResponse, stop } = useChat({ connection: fetchServerSentEvents('/api/chat'), tools, - body: { provider, feature, testId, aimockPort }, + body: { + provider, + feature, + testId, + aimockPort, + previousInteractionId: interactionId, + }, + onCustomEvent: (eventType, data) => { + if ( + eventType === 'gemini.interactionId' && + data && + typeof (data as any).interactionId === 'string' + ) { + setInteractionId((data as any).interactionId) + } + }, }) return ( - { - sendMessage(text) - }} - onSendMessageWithImage={ - showImageInput - ? (text, file) => { - const reader = new FileReader() - reader.onload = () => { - const base64 = (reader.result as string).split(',')[1] - sendMessage({ - content: [ - { type: 'text', content: text }, - { - type: 'image', - source: { - type: 'data', - value: base64, - mimeType: file.type, + <> + {interactionId && ( + + )} + { + sendMessage(text) + }} + onSendMessageWithImage={ + showImageInput + ? (text, file) => { + const reader = new FileReader() + reader.onload = () => { + const base64 = (reader.result as string).split(',')[1] + sendMessage({ + content: [ + { type: 'text', content: text }, + { + type: 'image', + source: { + type: 'data', + value: base64, + mimeType: file.type, + }, }, - }, - ], - }) + ], + }) + } + reader.readAsDataURL(file) } - reader.readAsDataURL(file) - } - : undefined - } - addToolApprovalResponse={ - needsApproval ? addToolApprovalResponse : undefined - } - showImageInput={showImageInput} - onStop={stop} - /> + : undefined + } + addToolApprovalResponse={ + needsApproval ? addToolApprovalResponse : undefined + } + showImageInput={showImageInput} + onStop={stop} + /> + ) } diff --git a/testing/e2e/src/routes/api.chat.ts b/testing/e2e/src/routes/api.chat.ts index 30a00f8cc..51448bb8f 100644 --- a/testing/e2e/src/routes/api.chat.ts +++ b/testing/e2e/src/routes/api.chat.ts @@ -22,6 +22,10 @@ export const Route = createFileRoute('/api/chat')({ typeof data?.testId === 'string' ? data.testId : undefined const aimockPort: number | undefined = data?.aimockPort != null ? Number(data.aimockPort) : undefined + const previousInteractionId: string | undefined = + typeof data?.previousInteractionId === 'string' + ? data.previousInteractionId + : undefined const config = featureConfigs[feature] const modelOverride = config.modelOverrides?.[provider] @@ -30,13 +34,21 @@ export const Route = createFileRoute('/api/chat')({ modelOverride, aimockPort, testId, + feature, ) + const modelOptions = previousInteractionId + ? { + ...config.modelOptions, + previous_interaction_id: previousInteractionId, + } + : config.modelOptions + try { const stream = chat({ ...adapterOptions, tools: config.tools, - modelOptions: config.modelOptions, + modelOptions, systemPrompts: ['You are a helpful assistant for a guitar store.'], agentLoopStrategy: maxIterations(5), messages, diff --git a/testing/e2e/tests/stateful-interactions.spec.ts b/testing/e2e/tests/stateful-interactions.spec.ts new file mode 100644 index 000000000..c90aac094 --- /dev/null +++ b/testing/e2e/tests/stateful-interactions.spec.ts @@ -0,0 +1,50 @@ +import { test, expect } from './fixtures' +import { + sendMessage, + waitForResponse, + getLastAssistantMessage, + featureUrl, +} from './helpers' +import { providersFor } from './test-matrix' + +// E2E coverage for Gemini's stateful Interactions API (geminiTextInteractions). +// +// Two-turn flow: +// 1. Issue a first chat call. The aimock fixture returns a server-assigned +// interactionId in interaction.start. The adapter surfaces it via the +// `gemini.interactionId` CUSTOM event, which the route stores in state +// and renders into a hidden `gemini-interaction-id` element so this +// spec can read it. +// 2. The route also threads that id back through the request body as +// `previousInteractionId`, which api/chat translates into +// `modelOptions.previous_interaction_id`. The adapter then sends only the +// new user turn (not prior history), and aimock matches the second +// fixture by userMessage. +for (const provider of providersFor('stateful-interactions')) { + test.describe(`${provider} — stateful-interactions`, () => { + test('two-turn conversation chained via previous_interaction_id', async ({ + page, + testId, + aimockPort, + }) => { + await page.goto( + featureUrl(provider, 'stateful-interactions', testId, aimockPort), + ) + + await sendMessage(page, '[stateful-1] what guitars do you have') + await waitForResponse(page) + const firstResponse = await getLastAssistantMessage(page) + expect(firstResponse).toContain('Fender Stratocaster') + + const interactionIdEl = page.getByTestId('gemini-interaction-id') + await interactionIdEl.waitFor({ state: 'attached' }) + const interactionId = (await interactionIdEl.textContent())?.trim() + expect(interactionId).toBeTruthy() + + await sendMessage(page, '[stateful-2] tell me about the cheapest one') + await waitForResponse(page) + const secondResponse = await getLastAssistantMessage(page) + expect(secondResponse).toContain('$1,299') + }) + }) +}