diff --git a/.changeset/otel-middleware.md b/.changeset/otel-middleware.md new file mode 100644 index 000000000..aebbc8b6c --- /dev/null +++ b/.changeset/otel-middleware.md @@ -0,0 +1,13 @@ +--- +'@tanstack/ai': minor +--- + +**OpenTelemetry middleware.** `otelMiddleware({ tracer, meter?, captureContent?, redact?, ... })` emits GenAI-semantic-convention traces and metrics for every `chat()` call. + +- Root span per `chat()` + child span per agent-loop iteration (named `chat #`) + grandchild span per tool call. +- `gen_ai.client.operation.duration` (seconds) recorded **once per `chat()` call**; `gen_ai.client.token.usage` (tokens) recorded **per iteration** (one input + one output record). Metric attributes are kept low-cardinality — `gen_ai.response.model` and `gen_ai.response.id` are intentionally excluded. +- `captureContent: true` attaches prompt/completion content as `gen_ai.{user,system,assistant,tool}.message` and `gen_ai.choice` span events. Redactor failures fail closed to a `"[redaction_failed]"` sentinel — raw content never leaks. Assistant text is capped at `maxContentLength` (default 100 000). +- Four extension points for custom attributes, names, span-options, and end-of-span callbacks. Thrown callbacks are caught and logged to `console.warn` with a label so failures remain diagnosable. +- `@opentelemetry/api` is an optional peer dependency. The middleware is exported from the dedicated subpath `@tanstack/ai/middlewares/otel` so that importing `@tanstack/ai/middlewares` does not eagerly require OTel. + +See `docs/advanced/otel.md` for the full guide. diff --git a/docs/advanced/otel.md b/docs/advanced/otel.md new file mode 100644 index 000000000..a8c14673b --- /dev/null +++ b/docs/advanced/otel.md @@ -0,0 +1,171 @@ +--- +title: OpenTelemetry +id: otel +order: 4 +description: "Emit vendor-neutral OpenTelemetry traces and metrics from every TanStack AI chat() call, following the OTel GenAI semantic conventions." +keywords: + - tanstack ai + - opentelemetry + - otel + - observability + - tracing + - metrics + - gen_ai + - semantic conventions +--- + +The `otelMiddleware` factory wires TanStack AI into your existing OpenTelemetry setup. Every `chat()` call produces a root span, one child span per agent-loop iteration, and one grandchild span per tool call — all with [GenAI semantic-convention attributes](https://opentelemetry.io/docs/specs/semconv/gen-ai/). It also records GenAI token and duration histograms when a `Meter` is provided. + +## Setup + +Install `@opentelemetry/api` — it's an optional peer dependency of `@tanstack/ai`: + +```bash +pnpm add @opentelemetry/api +``` + +Wire up your OTel SDK however you already do (e.g. `@opentelemetry/sdk-node`). Then pass a `Tracer` (and optionally a `Meter`) into the middleware. The OTel middleware lives on its own subpath — importing it never affects users who don't need OTel: + +```ts +import { chat } from '@tanstack/ai' +import { otelMiddleware } from '@tanstack/ai/middlewares/otel' +import { openaiText } from '@tanstack/ai-openai/adapters' +import { trace, metrics } from '@opentelemetry/api' + +const otel = otelMiddleware({ + tracer: trace.getTracer('my-app'), + meter: metrics.getMeter('my-app'), +}) + +const result = await chat({ + adapter: openaiText('gpt-4o'), + messages: [{ role: 'user', content: 'hi' }], + middleware: [otel], + stream: false, +}) +``` + +## What gets emitted + +### Spans + +```text +chat gpt-4o (root, kind: INTERNAL) +├── chat gpt-4o #0 (iteration, kind: CLIENT) +│ ├── execute_tool get_weather +│ └── execute_tool get_time +└── chat gpt-4o #1 (iteration, kind: CLIENT) +``` + +Iteration spans are numbered (`#0`, `#1`, ...) so distinct iterations of the same chat are easy to pick apart in trace viewers. + +### Attribute reference + +| Level | Attribute | Value | +| --- | --- | --- | +| root / iteration | `gen_ai.system` | `openai`, `anthropic`, ... | +| iteration | `gen_ai.operation.name` | `chat` | +| root / iteration | `gen_ai.request.model` | requested model | +| iteration | `gen_ai.response.model` | actual model | +| iteration | `gen_ai.request.temperature` | from config | +| iteration | `gen_ai.request.top_p` | from config | +| iteration | `gen_ai.request.max_tokens` | from config | +| iteration | `gen_ai.usage.input_tokens` | per iteration | +| iteration | `gen_ai.usage.output_tokens` | per iteration | +| iteration | `gen_ai.response.finish_reasons` | `[stop]`, `[tool_calls]`, ... | +| root | `gen_ai.usage.input_tokens` | rolled up | +| root | `gen_ai.usage.output_tokens` | rolled up | +| root | `tanstack.ai.iterations` | iteration count | +| tool | `gen_ai.tool.name` | tool name | +| tool | `gen_ai.tool.call.id` | tool call id | +| tool | `gen_ai.tool.type` | `function` | +| tool | `tanstack.ai.tool.outcome` | `success` / `error` | + +### Metrics + +Two GenAI-standard histograms: + +- `gen_ai.client.operation.duration` (seconds) — recorded **once per `chat()` call**, covering all agent-loop iterations and tool execution. On error or abort the record carries an `error.type` attribute (the thrown error's `name`, or `"cancelled"` for aborts). +- `gen_ai.client.token.usage` (tokens) — recorded **once per iteration** (two records: input and output), tagged with `gen_ai.token.type`. + +Both `gen_ai.response.id` and `gen_ai.response.model` are deliberately excluded from metric attributes to keep cardinality low (per-request custom-model names and request IDs would blow up the series set). + +## Privacy: capturing prompts and completions + +By default, only metadata lands on spans. To record prompt and completion content, set `captureContent: true`. Content is captured as OTel span events following the GenAI convention: + +- `gen_ai.user.message`, `gen_ai.system.message`, `gen_ai.assistant.message`, `gen_ai.tool.message`, `gen_ai.choice` + +Pass a `redact` function to strip PII before anything is recorded: + +```ts +otelMiddleware({ + tracer, + captureContent: true, + redact: (text) => text.replace(/\b\d{3}-\d{2}-\d{4}\b/g, '[SSN]'), +}) +``` + +If `redact` throws, the middleware writes the literal sentinel `"[redaction_failed]"` into the span event and logs a warning — it never falls back to the raw content. This is the load-bearing invariant for users who ship traces to third-party backends: a broken redactor should shut off capture, not leak prompts. + +Accumulated assistant text (the `gen_ai.choice` event) is capped at `maxContentLength` characters (default `100 000`); longer completions are truncated with a trailing `"…"` marker. + +Multimodal content (images, audio, video, documents) is represented as placeholder strings (`[image]`, `[audio]`, ...) to preserve message order without dumping binary data onto spans. Use `onSpanEnd` if you need richer multimodal capture. + +Prompt/system/user message events fire from `onConfig` at the start of every iteration, which means the full conversation history (as the adapter will re-send it) is re-emitted on each iteration span. This mirrors what the provider actually sees on the wire. + +## Extension points + +All four extensions are optional. Each wraps user code in try/catch — a thrown callback becomes a log line, never a broken chat. + +### `spanNameFormatter(info)` + +Override default span names. `info.kind` is `'chat' | 'iteration' | 'tool'`. + +```ts +otelMiddleware({ + tracer, + spanNameFormatter: (info) => + info.kind === 'tool' ? `tool:${info.toolName}` : `chat:${info.ctx.model}`, +}) +``` + +### `attributeEnricher(info)` + +Add custom attributes to every span. Fires once per span. + +```ts +otelMiddleware({ + tracer, + attributeEnricher: () => ({ + 'tenant.id': getCurrentTenant(), + }), +}) +``` + +### `onBeforeSpanStart(info, options)` + +Mutate `SpanOptions` immediately before `tracer.startSpan(...)`. Useful for adding links, custom start times, or extra default attributes. + +### `onSpanEnd(info, span)` + +Fires just before every `span.end()`. Common uses: record custom events, emit per-tool metrics via your own `Meter`. + +```ts +const toolDuration = meter.createHistogram('tool.duration') +otelMiddleware({ + tracer, + onSpanEnd: (info, span) => { + if (info.kind === 'tool') { + // span is still recording; read timestamps from your own store if needed + toolDuration.record(1, { 'tool.name': info.toolName }) + } + }, +}) +``` + +## Related + +- [Middleware](./middleware) — the lifecycle this middleware hooks into +- [Debug Logging](./debug-logging) — quick console-output diagnostics, complementary to OTel +- [Observability](./observability) — TanStack AI's built-in event client diff --git a/docs/config.json b/docs/config.json index f24a5fa0a..89d4f5abc 100644 --- a/docs/config.json +++ b/docs/config.json @@ -175,6 +175,10 @@ "label": "Debug Logging", "to": "advanced/debug-logging" }, + { + "label": "OpenTelemetry", + "to": "advanced/otel" + }, { "label": "Observability", "to": "advanced/observability" diff --git a/knip.json b/knip.json index 1fa06e311..7ece05b5b 100644 --- a/knip.json +++ b/knip.json @@ -22,6 +22,9 @@ "packages/react-ai": { "ignore": [] }, + "packages/typescript/ai": { + "ignoreDependencies": ["@opentelemetry/api"] + }, "packages/typescript/ai-anthropic": { "ignore": ["src/tools/**"] }, diff --git a/package.json b/package.json index d45a23b06..549ac3dcd 100644 --- a/package.json +++ b/package.json @@ -33,6 +33,8 @@ "test:types": "nx affected --targets=test:types --exclude=examples/**", "test:knip": "knip", "test:docs": "node scripts/verify-links.ts", + "test:e2e": "pnpm --filter @tanstack/ai-e2e test:e2e", + "test:e2e:ui": "pnpm --filter @tanstack/ai-e2e test:e2e:ui", "build": "nx affected --skip-nx-cache --targets=build --exclude=examples/**", "build:all": "nx run-many --targets=build --exclude=examples/**", "watch": "pnpm run build:all && env NX_DAEMON=true nx watch --all -- pnpm run build:all", diff --git a/packages/typescript/ai/package.json b/packages/typescript/ai/package.json index 2ad62f817..de46265b2 100644 --- a/packages/typescript/ai/package.json +++ b/packages/typescript/ai/package.json @@ -25,6 +25,10 @@ "types": "./dist/esm/middlewares/index.d.ts", "import": "./dist/esm/middlewares/index.js" }, + "./middlewares/otel": { + "types": "./dist/esm/middlewares/otel.d.ts", + "import": "./dist/esm/middlewares/otel.js" + }, "./adapter-internals": { "types": "./dist/esm/adapter-internals.d.ts", "import": "./dist/esm/adapter-internals.js" @@ -65,7 +69,16 @@ "@tanstack/ai-event-client": "workspace:*", "partial-json": "^0.1.7" }, + "peerDependencies": { + "@opentelemetry/api": ">=1.9.0" + }, + "peerDependenciesMeta": { + "@opentelemetry/api": { + "optional": true + } + }, "devDependencies": { + "@opentelemetry/api": "^1.9.0", "@standard-schema/spec": "^1.1.0", "@vitest/coverage-v8": "4.0.14", "zod": "^4.2.0" diff --git a/packages/typescript/ai/src/middlewares/index.ts b/packages/typescript/ai/src/middlewares/index.ts index 6ffacafcd..1470cdcff 100644 --- a/packages/typescript/ai/src/middlewares/index.ts +++ b/packages/typescript/ai/src/middlewares/index.ts @@ -11,3 +11,8 @@ export { type ContentGuardRule, type ContentFilteredInfo, } from './content-guard' + +// otelMiddleware is exported from the dedicated subpath +// `@tanstack/ai/middlewares/otel` so that importing the main middlewares barrel +// does not eagerly require `@opentelemetry/api` (which is an optional peer +// dependency). diff --git a/packages/typescript/ai/src/middlewares/otel.ts b/packages/typescript/ai/src/middlewares/otel.ts new file mode 100644 index 000000000..f4e39cd2f --- /dev/null +++ b/packages/typescript/ai/src/middlewares/otel.ts @@ -0,0 +1,861 @@ +import { + SpanKind, + SpanStatusCode, + context as otelContext, + trace as otelTrace, +} from '@opentelemetry/api' +import type { + AttributeValue, + Exception, + Meter, + Span, + SpanOptions, + Tracer, +} from '@opentelemetry/api' +import type { + ChatMiddleware, + ChatMiddlewareContext, +} from '../activities/chat/middleware/types' + +/** + * Scope (role) of an OTel span emitted by this middleware. + * + * - `chat` — the root span for a single `chat()` call + * - `iteration` — one per agent-loop iteration (one model call) + * - `tool` — one per tool execution inside an iteration + */ +export type OtelSpanScope = 'chat' | 'iteration' | 'tool' + +/** + * Alias retained for backwards compatibility. Prefer {@link OtelSpanScope}. + * + * @deprecated Use `OtelSpanScope` instead — the name shadows OTel's built-in + * `SpanKind` which is also imported by integrations of this middleware. + */ +export type OtelSpanKind = OtelSpanScope + +/** + * Span metadata passed to `spanNameFormatter`, `attributeEnricher`, + * `onBeforeSpanStart`, and `onSpanEnd`. Discriminated by `kind` so that + * tool-only fields narrow automatically inside callback bodies. + */ +export type OtelSpanInfo = + TScope extends 'chat' + ? { kind: 'chat'; ctx: ChatMiddlewareContext } + : TScope extends 'iteration' + ? { kind: 'iteration'; ctx: ChatMiddlewareContext; iteration: number } + : TScope extends 'tool' + ? { + kind: 'tool' + ctx: ChatMiddlewareContext + iteration: number + toolName: string + toolCallId: string + } + : never + +export interface OtelMiddlewareOptions { + /** OTel `Tracer` used to start root, iteration, and tool spans. */ + tracer: Tracer + /** + * Optional OTel `Meter`. When provided, the middleware records + * `gen_ai.client.operation.duration` and `gen_ai.client.token.usage` + * histograms. Omit to disable metrics without disabling tracing. + */ + meter?: Meter + /** + * When `true`, prompt and completion content is attached to iteration spans + * as `gen_ai.*.message` / `gen_ai.choice` events. Defaults to `false` so + * that PII never lands on a span by accident. + */ + captureContent?: boolean + /** + * Invoked on every captured content string before it lands on a span. + * Return a redacted version. If this function throws, the middleware emits + * the literal sentinel `"[redaction_failed]"` instead of the original text + * — it never falls back to raw content. + */ + redact?: (text: string) => string + /** + * Maximum characters kept in the per-iteration assistant text buffer used + * to emit `gen_ai.choice` events. Extra characters are truncated with a + * trailing `"…"` marker. Defaults to 100 000. Set to `0` to disable the + * cap. Exporters typically truncate long attribute values anyway. + */ + maxContentLength?: number + /** Override the default span name for each `kind`. */ + spanNameFormatter?: (info: OtelSpanInfo) => string + /** Add extra attributes to each span. */ + attributeEnricher?: (info: OtelSpanInfo) => Record + /** Mutate `SpanOptions` immediately before `tracer.startSpan(...)`. */ + onBeforeSpanStart?: (info: OtelSpanInfo, options: SpanOptions) => SpanOptions + /** Fires just before every `span.end()`. */ + onSpanEnd?: (info: OtelSpanInfo, span: Span) => void +} + +interface RequestState { + rootSpan: Span + currentIterationSpan: Span | null + toolSpans: Map + iterationCount: number + assistantTextBuffer: string + assistantTextBufferTruncated: boolean + startTime: number +} + +const stateByCtx = new WeakMap() + +const DEFAULT_MAX_CONTENT_LENGTH = 100_000 +const REDACTION_FAILED_SENTINEL = '[redaction_failed]' + +function serializeContent(content: unknown): string { + if (typeof content === 'string') return content + if (!Array.isArray(content)) return '' + const parts: Array = [] + for (const part of content) { + if (!part || typeof part !== 'object') continue + const type = (part as { type?: string }).type + switch (type) { + case 'text': + parts.push( + ( + (part as { text?: string }).text ?? + (part as { content?: string }).content ?? + '' + ).toString(), + ) + break + case 'image': + parts.push('[image]') + break + case 'audio': + parts.push('[audio]') + break + case 'video': + parts.push('[video]') + break + case 'document': + parts.push('[document]') + break + default: + parts.push(`[${type ?? 'unknown'}]`) + } + } + return parts.join(' ') +} + +function messageEventName(role: string): string { + switch (role) { + case 'user': + return 'gen_ai.user.message' + case 'assistant': + return 'gen_ai.assistant.message' + case 'tool': + return 'gen_ai.tool.message' + case 'system': + return 'gen_ai.system.message' + default: + return `gen_ai.${role}.message` + } +} + +function errorMessage(err: unknown): string | undefined { + if (err instanceof Error) return err.message + if (typeof err === 'string') return err + if (err && typeof err === 'object' && 'message' in err) { + const m = (err as { message?: unknown }).message + if (typeof m === 'string') return m + } + return undefined +} + +function errorTypeName(err: unknown): string { + if (err instanceof Error) return err.name || 'Error' + if (err && typeof err === 'object' && 'name' in err) { + const n = (err as { name?: unknown }).name + if (typeof n === 'string') return n + } + return 'Error' +} + +function safeCall(label: string, fn: () => T): T | undefined { + try { + return fn() + } catch (err) { + // Keep middleware non-fatal, but surface callback failures so that broken + // extension points (attributeEnricher, spanNameFormatter, onSpanEnd, ...) + // are observable. Matches the guarantee documented in docs/advanced/otel.md. + console.warn(`[otelMiddleware] ${label} failed`, err) + return undefined + } +} + +export function otelMiddleware(options: OtelMiddlewareOptions): ChatMiddleware { + const { + tracer, + meter, + captureContent = false, + redact = (s) => s, + maxContentLength = DEFAULT_MAX_CONTENT_LENGTH, + spanNameFormatter, + attributeEnricher, + onBeforeSpanStart, + onSpanEnd, + } = options + + const durationHistogram = meter?.createHistogram( + 'gen_ai.client.operation.duration', + { + description: 'GenAI client operation duration', + unit: 's', + }, + ) + const tokenHistogram = meter?.createHistogram('gen_ai.client.token.usage', { + description: 'GenAI client token usage', + unit: '{token}', + }) + + // Redact user content, failing closed to a sentinel string instead of ever + // letting raw text through. Callers that pass `captureContent: true` with a + // third-party PII redactor depend on this invariant. + const redactContent = (text: string): string => { + try { + return redact(text) + } catch (err) { + console.warn('[otelMiddleware] otel.redact failed', err) + return REDACTION_FAILED_SENTINEL + } + } + + const appendAssistantText = (state: RequestState, delta: string): void => { + if (maxContentLength > 0) { + if (state.assistantTextBufferTruncated) return + const remaining = maxContentLength - state.assistantTextBuffer.length + if (remaining <= 0) { + state.assistantTextBufferTruncated = true + state.assistantTextBuffer += '…' + return + } + if (delta.length > remaining) { + state.assistantTextBuffer += delta.slice(0, remaining) + '…' + state.assistantTextBufferTruncated = true + return + } + } + state.assistantTextBuffer += delta + } + + const closeIterationSpan = ( + state: RequestState, + ctx: ChatMiddlewareContext, + ): void => { + if (!state.currentIterationSpan) return + const span = state.currentIterationSpan + const iteration = state.iterationCount - 1 + safeCall('otel.onSpanEnd', () => + onSpanEnd?.( + { kind: 'iteration', ctx, iteration } as OtelSpanInfo<'iteration'>, + span, + ), + ) + span.end() + state.currentIterationSpan = null + } + + return { + name: 'otel', + + onStart(ctx) { + safeCall('otel.onStart', () => { + const info: OtelSpanInfo<'chat'> = { kind: 'chat', ctx } + const name = + safeCall('otel.spanNameFormatter', () => spanNameFormatter?.(info)) ?? + `chat ${ctx.model}` + const baseOptions: SpanOptions = { + kind: SpanKind.INTERNAL, + attributes: { + 'gen_ai.system': ctx.provider, + 'gen_ai.request.model': ctx.model, + // NOTE: `gen_ai.operation.name` is deliberately NOT set on the + // root span. The root represents a `chat()` invocation that may + // span multiple model calls; only iteration spans correspond to + // a single chat operation. Backends that map `operation.name=chat` + // to a "generation" event (e.g. PostHog LLM Analytics) would + // otherwise emit a duplicate generation for the wrapper span. + }, + } + const spanOptions = + safeCall('otel.onBeforeSpanStart', () => + onBeforeSpanStart?.(info, baseOptions), + ) ?? baseOptions + const rootSpan = tracer.startSpan(name, spanOptions) + + const enriched = safeCall('otel.attributeEnricher', () => + attributeEnricher?.(info), + ) + if (enriched) rootSpan.setAttributes(enriched) + + stateByCtx.set(ctx, { + rootSpan, + currentIterationSpan: null, + toolSpans: new Map(), + iterationCount: 0, + assistantTextBuffer: '', + assistantTextBufferTruncated: false, + startTime: Date.now(), + }) + }) + }, + + onConfig(ctx, config) { + if (ctx.phase !== 'beforeModel') return + safeCall('otel.onConfig', () => { + const state = stateByCtx.get(ctx) + if (!state) return + + // The previous iteration's span stays open through tool execution and + // onUsage so that tool spans nest under it and token attributes land + // on it. Close it here, just before opening the next iteration. + closeIterationSpan(state, ctx) + + const info: OtelSpanInfo<'iteration'> = { + kind: 'iteration', + ctx, + iteration: ctx.iteration, + } + const name = + safeCall('otel.spanNameFormatter', () => spanNameFormatter?.(info)) ?? + `chat ${ctx.model} #${ctx.iteration}` + + const baseAttrs: Record = { + 'gen_ai.system': ctx.provider, + 'gen_ai.operation.name': 'chat', + 'gen_ai.request.model': ctx.model, + 'tanstack.ai.iteration': ctx.iteration, + } + if (config.temperature !== undefined) + baseAttrs['gen_ai.request.temperature'] = config.temperature + if (config.topP !== undefined) + baseAttrs['gen_ai.request.top_p'] = config.topP + if (config.maxTokens !== undefined) + baseAttrs['gen_ai.request.max_tokens'] = config.maxTokens + + const baseOptions: SpanOptions = { + kind: SpanKind.CLIENT, + attributes: baseAttrs, + } + const spanOptions = + safeCall('otel.onBeforeSpanStart', () => + onBeforeSpanStart?.(info, baseOptions), + ) ?? baseOptions + + const parentCtx = otelTrace.setSpan( + otelContext.active(), + state.rootSpan, + ) + let iterSpan!: Span + otelContext.with(parentCtx, () => { + // Pass the parent context explicitly as the 3rd arg — this is a + // real-OTel-compatible way to ensure the span is parented to + // `rootSpan` even when the host app has not registered a context + // manager (e.g. in tests or minimal setups). + iterSpan = tracer.startSpan(name, spanOptions, parentCtx) + }) + + const enriched = safeCall('otel.attributeEnricher', () => + attributeEnricher?.(info), + ) + if (enriched) iterSpan.setAttributes(enriched) + + state.currentIterationSpan = iterSpan + state.assistantTextBuffer = '' + state.assistantTextBufferTruncated = false + + if (captureContent) { + // Span events follow the original GenAI semconv (one event per + // message). Backends that read events get content this way. + for (const sys of config.systemPrompts) { + iterSpan.addEvent('gen_ai.system.message', { + content: redactContent(sys), + }) + } + for (const m of config.messages) { + const body = serializeContent(m.content) + if (body.length === 0) continue + iterSpan.addEvent(messageEventName(m.role), { + content: redactContent(body), + }) + } + + // Also emit the current GenAI-semconv attribute form + // (`gen_ai.input.messages`) — backends like PostHog read prompt + // content from this attribute, not from span events. + const inputMessages: Array<{ role: string; content: string }> = [] + for (const sys of config.systemPrompts) { + inputMessages.push({ + role: 'system', + content: redactContent(sys), + }) + } + for (const m of config.messages) { + const body = serializeContent(m.content) + if (body.length === 0) continue + inputMessages.push({ + role: m.role, + content: redactContent(body), + }) + } + if (inputMessages.length > 0) { + const inputJson = JSON.stringify(inputMessages) + // Current OTel GenAI semconv — Sentry / PostHog / Datadog read + // prompt content from this attribute. + iterSpan.setAttribute('gen_ai.input.messages', inputJson) + // Langfuse-native attribute. Highest priority in Langfuse's OTLP + // ingestion (checked before events and gen_ai.input.messages) so + // the Input panel populates reliably. Harmless to other backends — + // the attribute is namespaced and unrecognised keys are ignored. + iterSpan.setAttribute('langfuse.observation.input', inputJson) + + // Mirror the first iteration's input onto the root span and at + // trace level so Langfuse fills Input on the trace card and the + // chat-level observation. Later iterations append tool-call / + // assistant messages that are useful per-iteration but noise at + // the chat / trace level. + if (state.iterationCount === 0) { + state.rootSpan.setAttribute( + 'langfuse.observation.input', + inputJson, + ) + state.rootSpan.setAttribute('langfuse.trace.input', inputJson) + } + } + } + + state.iterationCount += 1 + }) + return undefined + }, + + onChunk(ctx, chunk) { + safeCall('otel.onChunk', () => { + const state = stateByCtx.get(ctx) + if (!state) return + + if (captureContent && chunk.type === 'TEXT_MESSAGE_CONTENT') { + appendAssistantText(state, chunk.delta) + } + + if (chunk.type !== 'RUN_FINISHED') return + const span = state.currentIterationSpan + if (!span) return + + if (chunk.finishReason) { + span.setAttribute('gen_ai.response.finish_reasons', [ + chunk.finishReason, + ]) + } + if (chunk.model) span.setAttribute('gen_ai.response.model', chunk.model) + + // Set usage attributes on the iteration span directly from the chunk + // so they're available before `onUsage` fires. Histogram recording is + // deliberately NOT done here — the chat runner always invokes + // `runOnUsage` when `chunk.usage` is present, and `onUsage` is the + // canonical place for the metric. Recording in both would double-count. + if (chunk.usage) { + span.setAttributes({ + 'gen_ai.usage.input_tokens': chunk.usage.promptTokens, + 'gen_ai.usage.output_tokens': chunk.usage.completionTokens, + }) + } + + if (captureContent && state.assistantTextBuffer.length > 0) { + const completion = redactContent(state.assistantTextBuffer) + const outputJson = JSON.stringify([ + { role: 'assistant', content: completion }, + ]) + // Event form (older semconv) — kept for backends that consume it. + span.addEvent('gen_ai.choice', { content: completion }) + // Attribute form (current semconv) — required by backends like + // PostHog that read completion content from `gen_ai.output.messages`. + span.setAttribute('gen_ai.output.messages', outputJson) + // Langfuse-native attribute (highest priority in Langfuse mapping). + span.setAttribute('langfuse.observation.output', outputJson) + // Mirror to the root span and trace card. Each iteration overwrites, + // so the final iteration's completion lands on the root — which is + // the final answer the user saw, not an intermediate tool-call turn. + state.rootSpan.setAttribute('langfuse.observation.output', outputJson) + state.rootSpan.setAttribute('langfuse.trace.output', outputJson) + state.assistantTextBuffer = '' + state.assistantTextBufferTruncated = false + } + + // Intentionally leave the iteration span open: tool spans started + // after `RUN_FINISHED` (tool_calls finishReason) must nest under it, + // and `onUsage` may still fire. The span is closed in `onConfig` when + // the next iteration starts, or in `onFinish` / `onError` / `onAbort`. + }) + return undefined + }, + + onUsage(ctx, usage) { + safeCall('otel.onUsage', () => { + const state = stateByCtx.get(ctx) + if (!state) return + + // Always record the token histogram — metrics don't depend on having + // an iteration span, and skipping here would drop metric data if an + // adapter emits `onUsage` outside the iteration window. + if (tokenHistogram) { + const metricAttrs = { + 'gen_ai.system': ctx.provider, + 'gen_ai.operation.name': 'chat', + 'gen_ai.request.model': ctx.model, + } + tokenHistogram.record(usage.promptTokens, { + ...metricAttrs, + 'gen_ai.token.type': 'input', + }) + tokenHistogram.record(usage.completionTokens, { + ...metricAttrs, + 'gen_ai.token.type': 'output', + }) + } + + const span = state.currentIterationSpan ?? state.rootSpan + span.setAttributes({ + 'gen_ai.usage.input_tokens': usage.promptTokens, + 'gen_ai.usage.output_tokens': usage.completionTokens, + }) + }) + }, + + onBeforeToolCall(ctx, hookCtx) { + safeCall('otel.onBeforeToolCall', () => { + const state = stateByCtx.get(ctx) + if (!state) return + const parent = state.currentIterationSpan ?? state.rootSpan + + const info: OtelSpanInfo<'tool'> = { + kind: 'tool', + ctx, + toolName: hookCtx.toolName, + toolCallId: hookCtx.toolCallId, + iteration: state.iterationCount - 1, + } + const name = + safeCall('otel.spanNameFormatter', () => spanNameFormatter?.(info)) ?? + `execute_tool ${hookCtx.toolName}` + + const baseAttrs: Record = { + 'gen_ai.tool.name': hookCtx.toolName, + 'gen_ai.tool.call.id': hookCtx.toolCallId, + 'gen_ai.tool.type': 'function', + } + const baseOptions: SpanOptions = { + kind: SpanKind.INTERNAL, + attributes: baseAttrs, + } + const spanOptions = + safeCall('otel.onBeforeSpanStart', () => + onBeforeSpanStart?.(info, baseOptions), + ) ?? baseOptions + + const parentCtx = otelTrace.setSpan(otelContext.active(), parent) + let toolSpan!: Span + otelContext.with(parentCtx, () => { + toolSpan = tracer.startSpan(name, spanOptions, parentCtx) + }) + + const enriched = safeCall('otel.attributeEnricher', () => + attributeEnricher?.(info), + ) + if (enriched) toolSpan.setAttributes(enriched) + + // Stamp the tool args onto the tool span so backends that render an + // input panel per span (e.g. PostHog) have something to show. + if (captureContent) { + const argsBody = + typeof hookCtx.args === 'string' + ? hookCtx.args + : (safeCall('otel.serializeToolArgs', () => + JSON.stringify(hookCtx.args ?? null), + ) ?? '[unserializable_tool_args]') + const redactedArgs = redactContent(argsBody) + const toolInputJson = JSON.stringify([ + { role: 'tool', content: redactedArgs }, + ]) + toolSpan.setAttribute('gen_ai.input.messages', toolInputJson) + // Langfuse-native (highest priority in Langfuse mapping). + toolSpan.setAttribute('langfuse.observation.input', toolInputJson) + } + + state.toolSpans.set(hookCtx.toolCallId, { + span: toolSpan, + toolName: hookCtx.toolName, + }) + }) + return undefined + }, + + onAfterToolCall(ctx, info) { + safeCall('otel.onAfterToolCall', () => { + const state = stateByCtx.get(ctx) + if (!state) return + const entry = state.toolSpans.get(info.toolCallId) + if (!entry) return + const { span: toolSpan } = entry + + const outcome = info.ok ? 'success' : 'error' + toolSpan.setAttribute('tanstack.ai.tool.outcome', outcome) + + if (!info.ok && info.error !== undefined) { + toolSpan.recordException(info.error as Exception) + toolSpan.setStatus({ + code: SpanStatusCode.ERROR, + message: errorMessage(info.error), + }) + } + + if (captureContent) { + // Serialization can throw on circular refs or `BigInt` values. If it + // does, fall back to a sentinel so the rest of this handler (span + // end, onSpanEnd, toolSpans cleanup) still runs — otherwise the tool + // span would dangle until the onFinish/onError sweep. + const body = + typeof info.result === 'string' + ? info.result + : (safeCall('otel.serializeToolResult', () => + JSON.stringify(info.result ?? null), + ) ?? '[unserializable_tool_result]') + const redactedBody = redactContent(body) + if (state.currentIterationSpan) { + state.currentIterationSpan.addEvent('gen_ai.tool.message', { + content: redactedBody, + tool_call_id: info.toolCallId, + }) + } + // Output panel of the tool span itself — `gen_ai.output.messages` is + // what current GenAI semconv consumers (e.g. PostHog) read. + const toolOutputJson = JSON.stringify([ + { role: 'tool', content: redactedBody }, + ]) + toolSpan.setAttribute('gen_ai.output.messages', toolOutputJson) + // Langfuse-native (highest priority in Langfuse mapping). + toolSpan.setAttribute('langfuse.observation.output', toolOutputJson) + } + + safeCall('otel.onSpanEnd', () => + onSpanEnd?.( + { + kind: 'tool', + ctx, + toolName: info.toolName, + toolCallId: info.toolCallId, + iteration: state.iterationCount - 1, + } as OtelSpanInfo<'tool'>, + toolSpan, + ), + ) + toolSpan.end() + state.toolSpans.delete(info.toolCallId) + }) + }, + + onError(ctx, info) { + safeCall('otel.onError', () => { + const state = stateByCtx.get(ctx) + if (!state) return + + const errType = errorTypeName(info.error) + const message = errorMessage(info.error) + const exception = info.error as Exception + + if (state.currentIterationSpan) { + state.currentIterationSpan.recordException(exception) + state.currentIterationSpan.setStatus({ + code: SpanStatusCode.ERROR, + message, + }) + safeCall('otel.onSpanEnd', () => + onSpanEnd?.( + { + kind: 'iteration', + ctx, + iteration: state.iterationCount - 1, + } as OtelSpanInfo<'iteration'>, + state.currentIterationSpan!, + ), + ) + state.currentIterationSpan.end() + state.currentIterationSpan = null + } + + for (const [id, entry] of state.toolSpans) { + const { span, toolName } = entry + span.recordException(exception) + span.setStatus({ code: SpanStatusCode.ERROR, message }) + safeCall('otel.onSpanEnd', () => + onSpanEnd?.( + { + kind: 'tool', + ctx, + toolCallId: id, + toolName, + iteration: state.iterationCount - 1, + } as OtelSpanInfo<'tool'>, + span, + ), + ) + span.end() + state.toolSpans.delete(id) + } + + state.rootSpan.recordException(exception) + state.rootSpan.setStatus({ code: SpanStatusCode.ERROR, message }) + + if (durationHistogram) { + durationHistogram.record(info.duration / 1000, { + 'gen_ai.system': ctx.provider, + 'gen_ai.operation.name': 'chat', + 'gen_ai.request.model': ctx.model, + 'error.type': errType, + }) + } + + safeCall('otel.onSpanEnd', () => + onSpanEnd?.({ kind: 'chat', ctx }, state.rootSpan), + ) + state.rootSpan.end() + stateByCtx.delete(ctx) + }) + }, + + onAbort(ctx, info) { + safeCall('otel.onAbort', () => { + const state = stateByCtx.get(ctx) + if (!state) return + + const closeCancelled = (span: Span): void => { + // `gen_ai.completion.reason` is not part of the GenAI semconv; use a + // TanStack-namespaced attribute so downstream exporters don't treat + // it as standard. The span status still carries the error code. + span.setAttribute('tanstack.ai.completion.reason', 'cancelled') + span.setStatus({ code: SpanStatusCode.ERROR, message: 'cancelled' }) + } + + if (state.currentIterationSpan) { + closeCancelled(state.currentIterationSpan) + safeCall('otel.onSpanEnd', () => + onSpanEnd?.( + { + kind: 'iteration', + ctx, + iteration: state.iterationCount - 1, + } as OtelSpanInfo<'iteration'>, + state.currentIterationSpan!, + ), + ) + state.currentIterationSpan.end() + state.currentIterationSpan = null + } + for (const [id, entry] of state.toolSpans) { + const { span, toolName } = entry + closeCancelled(span) + safeCall('otel.onSpanEnd', () => + onSpanEnd?.( + { + kind: 'tool', + ctx, + toolCallId: id, + toolName, + iteration: state.iterationCount - 1, + } as OtelSpanInfo<'tool'>, + span, + ), + ) + span.end() + state.toolSpans.delete(id) + } + closeCancelled(state.rootSpan) + + if (durationHistogram) { + durationHistogram.record(info.duration / 1000, { + 'gen_ai.system': ctx.provider, + 'gen_ai.operation.name': 'chat', + 'gen_ai.request.model': ctx.model, + 'error.type': 'cancelled', + }) + } + + safeCall('otel.onSpanEnd', () => + onSpanEnd?.({ kind: 'chat', ctx }, state.rootSpan), + ) + state.rootSpan.end() + stateByCtx.delete(ctx) + }) + }, + + onFinish(ctx, info) { + safeCall('otel.onFinish', () => { + const state = stateByCtx.get(ctx) + if (!state) return + + // Close any tool spans that never received `onAfterToolCall` (adapter + // quirk). Done before the iteration span so the hierarchy is closed + // in depth-first order. + for (const [id, entry] of state.toolSpans) { + const { span, toolName } = entry + span.setAttribute('tanstack.ai.tool.outcome', 'unknown') + safeCall('otel.onSpanEnd', () => + onSpanEnd?.( + { + kind: 'tool', + ctx, + toolCallId: id, + toolName, + iteration: state.iterationCount - 1, + } as OtelSpanInfo<'tool'>, + span, + ), + ) + span.end() + state.toolSpans.delete(id) + } + + // The final iteration's span is still open because we keep it open + // through tool execution and `onUsage`. Close it now. + closeIterationSpan(state, ctx) + + if (durationHistogram) { + durationHistogram.record(info.duration / 1000, { + 'gen_ai.system': ctx.provider, + 'gen_ai.operation.name': 'chat', + 'gen_ai.request.model': ctx.model, + }) + } + + if (info.usage) { + state.rootSpan.setAttributes({ + 'gen_ai.usage.input_tokens': info.usage.promptTokens, + 'gen_ai.usage.output_tokens': info.usage.completionTokens, + }) + } + if (info.finishReason) { + state.rootSpan.setAttribute('gen_ai.response.finish_reasons', [ + info.finishReason, + ]) + } + state.rootSpan.setAttribute( + 'tanstack.ai.iterations', + state.iterationCount, + ) + + safeCall('otel.onSpanEnd', () => + onSpanEnd?.({ kind: 'chat', ctx }, state.rootSpan), + ) + state.rootSpan.end() + stateByCtx.delete(ctx) + }) + }, + } +} diff --git a/packages/typescript/ai/tests/middlewares/fake-otel.ts b/packages/typescript/ai/tests/middlewares/fake-otel.ts new file mode 100644 index 000000000..47989470d --- /dev/null +++ b/packages/typescript/ai/tests/middlewares/fake-otel.ts @@ -0,0 +1,272 @@ +import type { + Attributes, + AttributeValue, + Context, + Histogram, + MetricOptions, + Meter, + Span, + SpanContext, + SpanOptions, + SpanStatus, + TimeInput, + Tracer, +} from '@opentelemetry/api' +import { SpanStatusCode, trace as otelTrace } from '@opentelemetry/api' +import type { ChatMiddlewareContext } from '../../src/activities/chat/middleware/types' +import type { ToolCall } from '../../src/types' + +export interface RecordedEvent { + name: string + attributes?: Attributes +} + +export interface RecordedException { + exception: unknown + attributes?: Attributes +} + +export interface FakeSpan extends Span { + name: string + kind?: number + parent?: FakeSpan | null + startTimeMs: number + endTimeMs: number | null + attributes: Record + events: Array + exceptions: Array + status: SpanStatus + ended: boolean +} + +export interface HistogramRecord { + name: string + value: number + attributes?: Attributes + options?: MetricOptions +} + +export interface FakeMeter { + meter: Meter + records: Array +} + +export interface FakeTracer { + tracer: Tracer + spans: Array + activeStack: Array +} + +function makeSpan( + name: string, + options: SpanOptions, + parent: FakeSpan | null, +): FakeSpan { + const spanId = `fake-span-${Math.random().toString(36).slice(2, 10)}` + const span: FakeSpan = { + name, + kind: options.kind, + parent, + startTimeMs: Date.now(), + endTimeMs: null, + attributes: Object.fromEntries( + Object.entries(options.attributes ?? {}).filter( + ([, v]) => v !== undefined, + ), + ) as Record, + events: [], + exceptions: [], + status: { code: SpanStatusCode.UNSET }, + ended: false, + spanContext(): SpanContext { + return { + traceId: 'fake-trace', + spanId, + traceFlags: 1, + } + }, + setAttribute(key, value) { + this.attributes[key] = value as AttributeValue + return this + }, + setAttributes(attrs) { + for (const [k, v] of Object.entries(attrs)) { + this.attributes[k] = v as AttributeValue + } + return this + }, + addEvent(name, attrs) { + this.events.push({ name, attributes: attrs as Attributes | undefined }) + return this + }, + addLink() { + return this + }, + addLinks() { + return this + }, + setStatus(status) { + this.status = status + return this + }, + updateName(n) { + this.name = n + return this + }, + end(_endTime?: TimeInput) { + this.endTimeMs = Date.now() + this.ended = true + }, + isRecording() { + return !this.ended + }, + recordException(exception, attrs) { + this.exceptions.push({ + exception, + attributes: attrs as Attributes | undefined, + }) + }, + } + return span +} + +export function createFakeTracer(): FakeTracer { + const spans: Array = [] + const activeStack: Array = [] + + const tracer: Tracer = { + startSpan(name, options = {}, ctx?: Context) { + // Resolve parent in this order: + // 1. Explicit `ctx` argument (how `otelMiddleware` passes the parent) + // 2. Fallback to the activeStack (for `startActiveSpan` callers) + let parent: FakeSpan | null = null + if (ctx) { + const fromCtx = otelTrace.getSpan(ctx) + if (fromCtx && (fromCtx as FakeSpan).startTimeMs !== undefined) { + parent = fromCtx as FakeSpan + } + } + if (!parent) parent = activeStack[activeStack.length - 1] ?? null + const span = makeSpan(name, options, parent) + spans.push(span) + return span + }, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + startActiveSpan(...args: any[]) { + const name = args[0] as string + const fn = args[args.length - 1] as (span: Span) => unknown + const options = ( + typeof args[1] === 'object' && + args[1] !== null && + !('traceId' in args[1]) + ? args[1] + : {} + ) as SpanOptions + const parent = activeStack[activeStack.length - 1] ?? null + const span = makeSpan(name, options, parent) + spans.push(span) + activeStack.push(span) + try { + return fn(span) + } finally { + activeStack.pop() + } + }, + } + + return { tracer, spans, activeStack } +} + +export function createFakeMeter(): FakeMeter { + const records: Array = [] + + const meter: Meter = { + createHistogram(name: string, options?: MetricOptions): Histogram { + return { + record(value: number, attributes?: Attributes) { + records.push({ name, value, attributes, options }) + }, + } + }, + createCounter() { + throw new Error('not implemented in fake') + }, + createUpDownCounter() { + throw new Error('not implemented in fake') + }, + createObservableGauge() { + throw new Error('not implemented in fake') + }, + createObservableCounter() { + throw new Error('not implemented in fake') + }, + createObservableUpDownCounter() { + throw new Error('not implemented in fake') + }, + createGauge() { + throw new Error('not implemented in fake') + }, + addBatchObservableCallback() {}, + removeBatchObservableCallback() {}, + } as Meter + + return { meter, records } +} + +/** + * Build a minimal ToolCall for tests. Fills required fields with plausible + * defaults; overrides take precedence. + */ +export function makeToolCall( + overrides: { id: string } & Partial> & { + function?: Partial + }, +): ToolCall { + return { + id: overrides.id, + type: overrides.type ?? 'function', + function: { + name: overrides.function?.name ?? 'test_tool', + arguments: overrides.function?.arguments ?? '{}', + }, + ...(overrides.providerMetadata !== undefined + ? { providerMetadata: overrides.providerMetadata } + : {}), + } +} + +/** + * Build a minimal ChatMiddlewareContext for unit tests. Only fields the + * otel middleware reads need realistic values; others can be placeholders. + */ +export function makeCtx( + overrides: Partial = {}, +): ChatMiddlewareContext { + const base = { + requestId: 'req-1', + streamId: 'stream-1', + phase: 'init' as const, + iteration: 0, + chunkIndex: 0, + abort: () => {}, + context: undefined, + defer: () => {}, + provider: 'openai', + model: 'gpt-4o', + source: 'server' as const, + streaming: true, + systemPrompts: [], + options: {}, + modelOptions: {}, + messageCount: 1, + hasTools: false, + currentMessageId: null, + accumulatedContent: '', + messages: [], + createId: (prefix: string) => `${prefix}-1`, + } + return { + ...base, + ...overrides, + } as ChatMiddlewareContext +} diff --git a/packages/typescript/ai/tests/middlewares/otel.test.ts b/packages/typescript/ai/tests/middlewares/otel.test.ts new file mode 100644 index 000000000..433b77fc1 --- /dev/null +++ b/packages/typescript/ai/tests/middlewares/otel.test.ts @@ -0,0 +1,899 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest' +import { SpanKind, SpanStatusCode } from '@opentelemetry/api' +import { otelMiddleware } from '../../src/middlewares/otel' +import { + createFakeTracer, + createFakeMeter, + makeCtx, + makeToolCall, + type FakeSpan, +} from './fake-otel' +import type { + ChatMiddleware, + ChatMiddlewareContext, + ChatMiddlewareConfig, +} from '../../src/activities/chat/middleware/types' +import { ev } from '../test-utils' + +async function runToIterationStart( + mw: ChatMiddleware, + ctx: ChatMiddlewareContext, + config: Partial = {}, +) { + await mw.onStart?.(ctx) + ctx.phase = 'beforeModel' + await mw.onConfig?.(ctx, { + messages: [], + systemPrompts: [], + tools: [], + ...config, + }) +} + +class RateLimitError extends Error { + override name = 'RateLimitError' +} + +describe('otelMiddleware — root span lifecycle', () => { + it('creates a root span on onStart and closes it on onFinish', async () => { + const { tracer, spans } = createFakeTracer() + const mw = otelMiddleware({ tracer }) + const ctx = makeCtx() + + await mw.onStart?.(ctx) + expect(spans).toHaveLength(1) + expect(spans[0]!.name).toBe('chat gpt-4o') + expect(spans[0]!.ended).toBe(false) + expect(spans[0]!.kind).toBe(SpanKind.INTERNAL) + expect(spans[0]!.attributes['gen_ai.system']).toBe('openai') + // `gen_ai.operation.name` is intentionally NOT set on the root span — + // see the matching comment in otel.ts. Only iteration spans carry it. + expect(spans[0]!.attributes['gen_ai.operation.name']).toBeUndefined() + expect(spans[0]!.attributes['gen_ai.request.model']).toBe('gpt-4o') + + await mw.onFinish?.(ctx, { + finishReason: 'stop', + duration: 10, + content: '', + }) + expect(spans[0]!.ended).toBe(true) + expect(spans[0]!.status.code).toBe(SpanStatusCode.UNSET) + }) +}) + +describe('otelMiddleware — iteration span lifecycle', () => { + it('opens an iteration span on onConfig(beforeModel) and keeps it open through RUN_FINISHED', async () => { + const { tracer, spans } = createFakeTracer() + const mw = otelMiddleware({ tracer }) + const ctx = makeCtx() + ctx.phase = 'init' + + await runToIterationStart(mw, ctx, { + messages: [{ role: 'user', content: 'hi' }], + temperature: 0.7, + topP: 0.9, + maxTokens: 512, + }) + + const [rootSpan, iterSpan] = spans + expect(spans).toHaveLength(2) + expect(iterSpan!.parent).toBe(rootSpan) + expect(iterSpan!.name).toBe('chat gpt-4o #0') + expect(iterSpan!.kind).toBe(SpanKind.CLIENT) + expect(iterSpan!.ended).toBe(false) + + await mw.onChunk?.(ctx, { ...ev.runFinished('stop'), model: 'gpt-4o' }) + // The iteration span stays open across RUN_FINISHED so tool spans can + // nest under it and onUsage still has a target. It closes on onFinish. + expect(iterSpan!.ended).toBe(false) + expect(iterSpan!.attributes['gen_ai.response.finish_reasons']).toEqual([ + 'stop', + ]) + expect(iterSpan!.attributes['gen_ai.response.model']).toBe('gpt-4o') + + await mw.onFinish?.(ctx, { + finishReason: 'stop', + duration: 10, + content: '', + }) + expect(iterSpan!.ended).toBe(true) + expect(rootSpan!.ended).toBe(true) + }) + + it('opens a fresh iteration span for each onConfig(beforeModel) and closes the previous one', async () => { + const { tracer, spans } = createFakeTracer() + const mw = otelMiddleware({ tracer }) + const ctx = makeCtx() + + await runToIterationStart(mw, ctx) + await mw.onChunk?.(ctx, ev.runFinished('tool_calls')) + // First iteration span still open at this point. + expect(spans[1]!.ended).toBe(false) + ctx.iteration = 1 + await mw.onConfig?.(ctx, { messages: [], systemPrompts: [], tools: [] }) + // Opening the 2nd iteration closes the 1st. + expect(spans[1]!.ended).toBe(true) + await mw.onChunk?.(ctx, ev.runFinished('stop')) + await mw.onFinish?.(ctx, { + finishReason: 'stop', + duration: 10, + content: '', + }) + + expect(spans).toHaveLength(3) + expect(spans[1]!.ended).toBe(true) + expect(spans[2]!.ended).toBe(true) + expect(spans[1]!.name).toBe('chat gpt-4o #0') + expect(spans[2]!.name).toBe('chat gpt-4o #1') + }) +}) + +describe('otelMiddleware — token histogram', () => { + it('sets usage attributes on the iteration span from RUN_FINISHED chunk.usage without recording a histogram', async () => { + // The chat runner always follows onChunk(RUN_FINISHED) with runOnUsage, so + // histogram recording lives in onUsage alone to avoid double-counting. + // onChunk is responsible only for the per-iteration span attributes. + const { tracer, spans } = createFakeTracer() + const { meter, records } = createFakeMeter() + const mw = otelMiddleware({ tracer, meter }) + const ctx = makeCtx() + + await runToIterationStart(mw, ctx) + await mw.onChunk?.(ctx, { + ...ev.runFinished('stop'), + model: 'gpt-4o', + usage: { promptTokens: 100, completionTokens: 50, totalTokens: 150 }, + }) + + expect(spans[1]!.attributes['gen_ai.usage.input_tokens']).toBe(100) + expect(spans[1]!.attributes['gen_ai.usage.output_tokens']).toBe(50) + expect( + records.filter((r) => r.name === 'gen_ai.client.token.usage'), + ).toHaveLength(0) + }) + + it('records token histograms from onUsage in production hook order (after RUN_FINISHED)', async () => { + const { tracer, spans } = createFakeTracer() + const { meter, records } = createFakeMeter() + const mw = otelMiddleware({ tracer, meter }) + const ctx = makeCtx() + + // Production order: onChunk(RUN_FINISHED) fires first, then runOnUsage. + // This test is the regression guard for the "onUsage no-op" bug. + await runToIterationStart(mw, ctx) + await mw.onChunk?.(ctx, { ...ev.runFinished('stop'), model: 'gpt-4o' }) + await mw.onUsage?.(ctx, { + promptTokens: 42, + completionTokens: 17, + totalTokens: 59, + }) + + const tokenRecords = records.filter( + (r) => r.name === 'gen_ai.client.token.usage', + ) + expect(tokenRecords).toHaveLength(2) + expect( + tokenRecords.find((r) => r.attributes!['gen_ai.token.type'] === 'input')! + .value, + ).toBe(42) + // Iteration span is still open, so usage attrs land on it. + expect(spans[1]!.attributes['gen_ai.usage.input_tokens']).toBe(42) + expect(spans[1]!.attributes['gen_ai.usage.output_tokens']).toBe(17) + }) + + it('records token histogram even if onUsage fires after the iteration span is gone', async () => { + const { tracer, spans } = createFakeTracer() + const { meter, records } = createFakeMeter() + const mw = otelMiddleware({ tracer, meter }) + const ctx = makeCtx() + + await mw.onStart?.(ctx) + await mw.onUsage?.(ctx, { + promptTokens: 5, + completionTokens: 3, + totalTokens: 8, + }) + expect( + records.filter((r) => r.name === 'gen_ai.client.token.usage'), + ).toHaveLength(2) + // Falls back to the root span for attribute assignment. + expect(spans[0]!.attributes['gen_ai.usage.input_tokens']).toBe(5) + }) + + it('skips metrics when meter is not provided', async () => { + const { tracer } = createFakeTracer() + const mw = otelMiddleware({ tracer }) + const ctx = makeCtx() + + await runToIterationStart(mw, ctx) + // Should not throw: + await mw.onUsage?.(ctx, { + promptTokens: 100, + completionTokens: 50, + totalTokens: 150, + }) + }) +}) + +describe('otelMiddleware — duration histogram and rollup', () => { + it('records duration histogram on onFinish and rolls up tokens onto root', async () => { + const { tracer, spans } = createFakeTracer() + const { meter, records } = createFakeMeter() + const mw = otelMiddleware({ tracer, meter }) + const ctx = makeCtx() + + await runToIterationStart(mw, ctx) + await mw.onUsage?.(ctx, { + promptTokens: 100, + completionTokens: 50, + totalTokens: 150, + }) + await mw.onChunk?.(ctx, { ...ev.runFinished('stop'), model: 'gpt-4o' }) + await mw.onFinish?.(ctx, { + finishReason: 'stop', + duration: 1250, + content: '', + usage: { promptTokens: 100, completionTokens: 50, totalTokens: 150 }, + }) + + const durationRecords = records.filter( + (r) => r.name === 'gen_ai.client.operation.duration', + ) + expect(durationRecords).toHaveLength(1) + expect(durationRecords[0]!.value).toBe(1.25) + // `gen_ai.response.model` is intentionally absent from the duration + // histogram attrs (high-cardinality for per-request custom models). + expect( + durationRecords[0]!.attributes!['gen_ai.response.model'], + ).toBeUndefined() + expect(durationRecords[0]!.attributes!['error.type']).toBeUndefined() + // Histogram options are forwarded to the meter (unit/description). + expect(durationRecords[0]!.options?.unit).toBe('s') + const tokenRecord = records.find( + (r) => r.name === 'gen_ai.client.token.usage', + ) + expect(tokenRecord?.options?.unit).toBe('{token}') + + const root = spans[0]! + expect(root.attributes['gen_ai.usage.input_tokens']).toBe(100) + expect(root.attributes['gen_ai.usage.output_tokens']).toBe(50) + expect(root.attributes['tanstack.ai.iterations']).toBe(1) + expect(root.ended).toBe(true) + }) +}) + +describe('otelMiddleware — tool spans', () => { + it('creates a tool span as child of the iteration span (including after RUN_FINISHED)', async () => { + const { tracer, spans } = createFakeTracer() + const mw = otelMiddleware({ tracer }) + const ctx = makeCtx({ hasTools: true, toolNames: ['get_weather'] }) + + await runToIterationStart(mw, ctx) + const iterSpan = spans[1]! + // Real flow: RUN_FINISHED(tool_calls) fires before onBeforeToolCall. + // Tool span must still nest under the iteration span. + await mw.onChunk?.(ctx, { + ...ev.runFinished('tool_calls'), + model: 'gpt-4o', + }) + await mw.onBeforeToolCall?.(ctx, { + toolCall: makeToolCall({ id: 'tc-1', function: { name: 'get_weather' } }), + tool: undefined, + args: { city: 'NYC' }, + toolName: 'get_weather', + toolCallId: 'tc-1', + }) + + const toolSpan = spans[2]! + expect(toolSpan.name).toBe('execute_tool get_weather') + expect(toolSpan.parent).toBe(iterSpan) + expect(toolSpan.kind).toBe(SpanKind.INTERNAL) + expect(toolSpan.attributes['gen_ai.tool.name']).toBe('get_weather') + expect(toolSpan.attributes['gen_ai.tool.call.id']).toBe('tc-1') + expect(toolSpan.attributes['gen_ai.tool.type']).toBe('function') + expect(toolSpan.ended).toBe(false) + + await mw.onAfterToolCall?.(ctx, { + toolCall: makeToolCall({ id: 'tc-1' }), + tool: undefined, + toolName: 'get_weather', + toolCallId: 'tc-1', + ok: true, + duration: 42, + result: { temp: 72 }, + }) + + expect(toolSpan.ended).toBe(true) + expect(toolSpan.attributes['tanstack.ai.tool.outcome']).toBe('success') + }) + + it('records exception and error outcome on tool failure', async () => { + const { tracer, spans } = createFakeTracer() + const mw = otelMiddleware({ tracer }) + const ctx = makeCtx({ hasTools: true }) + + await runToIterationStart(mw, ctx) + await mw.onBeforeToolCall?.(ctx, { + toolCall: makeToolCall({ id: 'tc-2', function: { name: 'broken' } }), + tool: undefined, + args: {}, + toolName: 'broken', + toolCallId: 'tc-2', + }) + const toolSpan = spans[2]! + await mw.onAfterToolCall?.(ctx, { + toolCall: makeToolCall({ id: 'tc-2' }), + tool: undefined, + toolName: 'broken', + toolCallId: 'tc-2', + ok: false, + duration: 5, + error: new Error('boom'), + }) + + expect(toolSpan.attributes['tanstack.ai.tool.outcome']).toBe('error') + expect(toolSpan.exceptions).toHaveLength(1) + expect((toolSpan.exceptions[0]!.exception as Error).message).toBe('boom') + expect(toolSpan.status.code).toBe(SpanStatusCode.ERROR) + }) + + it('preserves non-Error exception shapes through recordException', async () => { + const { tracer, spans } = createFakeTracer() + const mw = otelMiddleware({ tracer }) + const ctx = makeCtx({ hasTools: true }) + + await runToIterationStart(mw, ctx) + await mw.onBeforeToolCall?.(ctx, { + toolCall: makeToolCall({ id: 'tc-obj', function: { name: 'x' } }), + tool: undefined, + args: {}, + toolName: 'x', + toolCallId: 'tc-obj', + }) + const plainError = { code: 'E_SOMETHING', message: 'plain-object error' } + await mw.onAfterToolCall?.(ctx, { + toolCall: makeToolCall({ id: 'tc-obj' }), + tool: undefined, + toolName: 'x', + toolCallId: 'tc-obj', + ok: false, + duration: 1, + error: plainError, + }) + + const toolSpan = spans[2]! + expect(toolSpan.exceptions[0]!.exception).toBe(plainError) + expect(toolSpan.status.message).toBe('plain-object error') + }) + + it('finalizes the tool span even when the result fails to JSON.stringify', async () => { + const { tracer, spans } = createFakeTracer() + const warn = vi.spyOn(console, 'warn').mockImplementation(() => {}) + const mw = otelMiddleware({ tracer, captureContent: true }) + const ctx = makeCtx({ hasTools: true }) + + await runToIterationStart(mw, ctx) + await mw.onBeforeToolCall?.(ctx, { + toolCall: makeToolCall({ id: 'tc-cyc', function: { name: 'circular' } }), + tool: undefined, + args: {}, + toolName: 'circular', + toolCallId: 'tc-cyc', + }) + + // Craft a result that JSON.stringify cannot handle (circular ref). + const circular: Record = {} + circular.self = circular + + await mw.onAfterToolCall?.(ctx, { + toolCall: makeToolCall({ id: 'tc-cyc' }), + tool: undefined, + toolName: 'circular', + toolCallId: 'tc-cyc', + ok: true, + duration: 3, + result: circular, + }) + + const iter = spans[1]! + const toolSpan = spans[2]! + // The span is still properly ended — no dangling state. + expect(toolSpan.ended).toBe(true) + // The tool-message event uses the sentinel instead of raw serialization. + const toolEvt = iter.events.find((e) => e.name === 'gen_ai.tool.message')! + expect(toolEvt.attributes!['content']).toBe('[unserializable_tool_result]') + warn.mockRestore() + }) +}) + +describe('otelMiddleware — captureContent', () => { + it('captureContent=true emits gen_ai.*.message events with redact applied on iteration span', async () => { + const { tracer, spans } = createFakeTracer() + const mw = otelMiddleware({ + tracer, + captureContent: true, + redact: (s) => s.replace(/\d+/g, '[NUM]'), + }) + const ctx = makeCtx() + + await runToIterationStart(mw, ctx, { + messages: [ + { role: 'user', content: 'Hello 42 world' }, + { role: 'assistant', content: 'Hi 7 there' }, + ], + systemPrompts: ['Be helpful 99'], + }) + + const iter = spans[1]! + const userEvt = iter.events.find((e) => e.name === 'gen_ai.user.message') + const sysEvt = iter.events.find((e) => e.name === 'gen_ai.system.message') + const asstEvt = iter.events.find( + (e) => e.name === 'gen_ai.assistant.message', + ) + expect(userEvt!.attributes!['content']).toBe('Hello [NUM] world') + expect(sysEvt!.attributes!['content']).toBe('Be helpful [NUM]') + expect(asstEvt!.attributes!['content']).toBe('Hi [NUM] there') + }) + + it('captureContent=false emits no message events', async () => { + const { tracer, spans } = createFakeTracer() + const mw = otelMiddleware({ tracer }) + const ctx = makeCtx() + + await runToIterationStart(mw, ctx, { + messages: [{ role: 'user', content: 'Hello' }], + }) + + const iter = spans[1]! + expect( + iter.events.filter((e) => e.name.startsWith('gen_ai.')), + ).toHaveLength(0) + }) + + it('multimodal ContentPart arrays become placeholder-tagged strings', async () => { + const { tracer, spans } = createFakeTracer() + const mw = otelMiddleware({ tracer, captureContent: true }) + const ctx = makeCtx() + + await runToIterationStart(mw, ctx, { + messages: [ + { + role: 'user', + content: [ + { type: 'text', content: 'look at this' }, + { type: 'image', source: { type: 'url', value: 'data:...' } }, + ] as const, + }, + ], + }) + + const userEvt = spans[1]!.events.find( + (e) => e.name === 'gen_ai.user.message', + )! + expect(userEvt.attributes!['content']).toBe('look at this [image]') + }) + + it('emits redaction sentinel and never raw content when redact throws', async () => { + const { tracer, spans } = createFakeTracer() + const warn = vi.spyOn(console, 'warn').mockImplementation(() => {}) + const mw = otelMiddleware({ + tracer, + captureContent: true, + redact: () => { + throw new Error('redactor blew up') + }, + }) + const ctx = makeCtx() + + await runToIterationStart(mw, ctx, { + messages: [{ role: 'user', content: 'secret-ssn-123-45-6789' }], + systemPrompts: ['also-secret'], + }) + + const iter = spans[1]! + const userEvt = iter.events.find((e) => e.name === 'gen_ai.user.message')! + const sysEvt = iter.events.find((e) => e.name === 'gen_ai.system.message')! + expect(userEvt.attributes!['content']).toBe('[redaction_failed]') + expect(sysEvt.attributes!['content']).toBe('[redaction_failed]') + // The redactor failure is surfaced via console.warn. + expect(warn).toHaveBeenCalled() + warn.mockRestore() + }) + + it('caps assistantTextBuffer at maxContentLength', async () => { + const { tracer, spans } = createFakeTracer() + const mw = otelMiddleware({ + tracer, + captureContent: true, + maxContentLength: 10, + }) + const ctx = makeCtx() + + await runToIterationStart(mw, ctx) + await mw.onChunk?.(ctx, ev.textContent('abcdefghij')) + // Over the cap — should truncate and stop accumulating. + await mw.onChunk?.(ctx, ev.textContent('klmnopqrstuvwxyz')) + await mw.onChunk?.(ctx, { ...ev.runFinished('stop'), model: 'gpt-4o' }) + + const iter = spans[1]! + const choice = iter.events.find((e) => e.name === 'gen_ai.choice')! + expect(choice.attributes!['content']).toBe('abcdefghij…') + }) +}) + +describe('otelMiddleware — error and abort paths', () => { + it('onError sets ERROR status, records exception, adds error.type to duration histogram', async () => { + const { tracer, spans } = createFakeTracer() + const { meter, records } = createFakeMeter() + const mw = otelMiddleware({ tracer, meter }) + const ctx = makeCtx() + + await runToIterationStart(mw, ctx) + const err = new RateLimitError('rate limited') + await mw.onError?.(ctx, { error: err, duration: 200 }) + + const root = spans[0]! + expect(root.status.code).toBe(SpanStatusCode.ERROR) + expect(root.exceptions).toHaveLength(1) + expect(root.ended).toBe(true) + + const iter = spans[1]! + expect(iter.status.code).toBe(SpanStatusCode.ERROR) + expect(iter.ended).toBe(true) + expect(iter.exceptions).toHaveLength(1) + + const durationRecords = records.filter( + (r) => r.name === 'gen_ai.client.operation.duration', + ) + expect(durationRecords[0]!.attributes!['error.type']).toBe('RateLimitError') + }) + + it('onAbort sets ERROR status, marks cancellation, and records duration', async () => { + const { tracer, spans } = createFakeTracer() + const { meter, records } = createFakeMeter() + const mw = otelMiddleware({ tracer, meter }) + const ctx = makeCtx() + + await runToIterationStart(mw, ctx) + await mw.onAbort?.(ctx, { reason: 'user stop', duration: 80 }) + + expect(spans[0]!.status.code).toBe(SpanStatusCode.ERROR) + expect(spans[0]!.attributes['tanstack.ai.completion.reason']).toBe( + 'cancelled', + ) + expect(spans[0]!.ended).toBe(true) + const durationRecords = records.filter( + (r) => r.name === 'gen_ai.client.operation.duration', + ) + expect(durationRecords).toHaveLength(1) + expect(durationRecords[0]!.value).toBeCloseTo(0.08) + expect(durationRecords[0]!.attributes!['error.type']).toBe('cancelled') + }) + + it('onError fires onSpanEnd for iteration, open tool spans, then root — in depth-first order', async () => { + const { tracer } = createFakeTracer() + const seen: Array<{ + kind: string + toolName?: string + toolCallId?: string + ended: boolean + }> = [] + const mw = otelMiddleware({ + tracer, + onSpanEnd: (info, span) => { + seen.push({ + kind: info.kind, + toolName: info.kind === 'tool' ? info.toolName : undefined, + toolCallId: info.kind === 'tool' ? info.toolCallId : undefined, + ended: (span as FakeSpan).ended, + }) + }, + }) + const ctx = makeCtx({ hasTools: true }) + + await runToIterationStart(mw, ctx) + await mw.onBeforeToolCall?.(ctx, { + toolCall: makeToolCall({ id: 'tc-err', function: { name: 'my_tool' } }), + tool: undefined, + args: {}, + toolName: 'my_tool', + toolCallId: 'tc-err', + }) + + await mw.onError?.(ctx, { error: new Error('fatal'), duration: 100 }) + + expect(seen.map((s) => s.kind)).toEqual(['iteration', 'tool', 'chat']) + expect(seen.every((s) => s.ended === false)).toBe(true) + const toolCall = seen.find((s) => s.kind === 'tool')! + expect(toolCall.toolName).toBe('my_tool') + expect(toolCall.toolCallId).toBe('tc-err') + }) + + it('onAbort fires onSpanEnd for iteration, open tool spans, then root — in depth-first order', async () => { + const { tracer } = createFakeTracer() + const seen: Array<{ + kind: string + toolName?: string + toolCallId?: string + ended: boolean + }> = [] + const mw = otelMiddleware({ + tracer, + onSpanEnd: (info, span) => { + seen.push({ + kind: info.kind, + toolName: info.kind === 'tool' ? info.toolName : undefined, + toolCallId: info.kind === 'tool' ? info.toolCallId : undefined, + ended: (span as FakeSpan).ended, + }) + }, + }) + const ctx = makeCtx({ hasTools: true }) + + await runToIterationStart(mw, ctx) + await mw.onBeforeToolCall?.(ctx, { + toolCall: makeToolCall({ + id: 'tc-abort', + function: { name: 'slow_tool' }, + }), + tool: undefined, + args: {}, + toolName: 'slow_tool', + toolCallId: 'tc-abort', + }) + + await mw.onAbort?.(ctx, { reason: 'user stop', duration: 50 }) + + expect(seen.map((s) => s.kind)).toEqual(['iteration', 'tool', 'chat']) + expect(seen.every((s) => s.ended === false)).toBe(true) + }) + + it('onFinish sweeps dangling tool spans with outcome=unknown before closing the iteration span', async () => { + const { tracer, spans } = createFakeTracer() + const mw = otelMiddleware({ tracer }) + const ctx = makeCtx({ hasTools: true }) + + await runToIterationStart(mw, ctx) + await mw.onBeforeToolCall?.(ctx, { + toolCall: makeToolCall({ + id: 'tc-leak', + function: { name: 'never_resolves' }, + }), + tool: undefined, + args: {}, + toolName: 'never_resolves', + toolCallId: 'tc-leak', + }) + // Never call onAfterToolCall — simulate an adapter that dropped the call. + + await mw.onFinish?.(ctx, { finishReason: 'stop', duration: 1, content: '' }) + + const toolSpan = spans[2]! + expect(toolSpan.ended).toBe(true) + expect(toolSpan.attributes['tanstack.ai.tool.outcome']).toBe('unknown') + }) +}) + +describe('otelMiddleware — tool-message and choice events', () => { + it('onAfterToolCall emits gen_ai.tool.message on iteration span with redacted result', async () => { + const { tracer, spans } = createFakeTracer() + const mw = otelMiddleware({ + tracer, + captureContent: true, + redact: (s) => s.replace(/\d+/g, '[NUM]'), + }) + const ctx = makeCtx({ hasTools: true }) + + await runToIterationStart(mw, ctx) + await mw.onBeforeToolCall?.(ctx, { + toolCall: makeToolCall({ id: 'tc-1', function: { name: 'x' } }), + tool: undefined, + args: {}, + toolName: 'x', + toolCallId: 'tc-1', + }) + await mw.onAfterToolCall?.(ctx, { + toolCall: makeToolCall({ id: 'tc-1' }), + tool: undefined, + toolName: 'x', + toolCallId: 'tc-1', + ok: true, + duration: 5, + result: { value: 42 }, + }) + + const iter = spans[1]! + const toolEvt = iter.events.find((e) => e.name === 'gen_ai.tool.message')! + expect(toolEvt.attributes!['content']).toContain('[NUM]') + expect(toolEvt.attributes!['tool_call_id']).toBe('tc-1') + }) + + it('emits gen_ai.choice event with accumulated assistant text on RUN_FINISHED', async () => { + const { tracer, spans } = createFakeTracer() + const mw = otelMiddleware({ tracer, captureContent: true }) + const ctx = makeCtx() + + await runToIterationStart(mw, ctx) + await mw.onChunk?.(ctx, ev.textContent('Hello ')) + await mw.onChunk?.(ctx, ev.textContent('world')) + await mw.onChunk?.(ctx, { ...ev.runFinished('stop'), model: 'gpt-4o' }) + + const iter = spans[1]! + const choice = iter.events.find((e) => e.name === 'gen_ai.choice')! + expect(choice.attributes!['content']).toBe('Hello world') + }) +}) + +describe('otelMiddleware — concurrent isolation', () => { + it('parallel chat() calls do not cross-contaminate state', async () => { + const { tracer, spans } = createFakeTracer() + const mw = otelMiddleware({ tracer }) + + const ctxA = makeCtx({ requestId: 'A' }) + const ctxB = makeCtx({ requestId: 'B' }) + + await Promise.all([mw.onStart?.(ctxA), mw.onStart?.(ctxB)]) + ctxA.phase = 'beforeModel' + ctxB.phase = 'beforeModel' + await Promise.all([ + mw.onConfig?.(ctxA, { messages: [], systemPrompts: [], tools: [] }), + mw.onConfig?.(ctxB, { messages: [], systemPrompts: [], tools: [] }), + ]) + await Promise.all([ + mw.onChunk?.(ctxA, ev.runFinished('stop', 'A')), + mw.onChunk?.(ctxB, ev.runFinished('tool_calls', 'B')), + ]) + await Promise.all([ + mw.onFinish?.(ctxA, { finishReason: 'stop', duration: 1, content: '' }), + mw.onFinish?.(ctxB, { + finishReason: 'tool_calls', + duration: 1, + content: '', + }), + ]) + + // Total: 2 root spans + 2 iteration spans, all ended. + expect(spans.filter((s) => s.ended).length).toBe(4) + const iters = spans.filter((s) => s.parent !== null) + const reasons = iters.flatMap((s) => { + const v = s.attributes['gen_ai.response.finish_reasons'] + return Array.isArray(v) ? (v as Array) : [] + }) + expect(reasons).toEqual(expect.arrayContaining(['stop', 'tool_calls'])) + }) +}) + +describe('otelMiddleware — extension points', () => { + it('spanNameFormatter overrides default names for chat, iteration, and tool', async () => { + const { tracer, spans } = createFakeTracer() + const mw = otelMiddleware({ + tracer, + spanNameFormatter: (info) => { + if (info.kind === 'chat') return 'my-chat' + if (info.kind === 'iteration') return `iter-${info.iteration}` + return `tool-${info.toolName}` + }, + }) + const ctx = makeCtx({ hasTools: true }) + + await runToIterationStart(mw, ctx) + await mw.onBeforeToolCall?.(ctx, { + toolCall: makeToolCall({ id: 't-1', function: { name: 'lookup' } }), + tool: undefined, + args: {}, + toolName: 'lookup', + toolCallId: 't-1', + }) + + expect(spans[0]!.name).toBe('my-chat') + expect(spans[1]!.name).toBe('iter-0') + expect(spans[2]!.name).toBe('tool-lookup') + }) + + it('attributeEnricher merges attributes onto chat, iteration, and tool spans', async () => { + const { tracer, spans } = createFakeTracer() + const mw = otelMiddleware({ + tracer, + attributeEnricher: (info) => ({ 'test.kind': info.kind }), + }) + const ctx = makeCtx({ hasTools: true }) + + await runToIterationStart(mw, ctx) + await mw.onBeforeToolCall?.(ctx, { + toolCall: makeToolCall({ id: 't-1', function: { name: 'lookup' } }), + tool: undefined, + args: {}, + toolName: 'lookup', + toolCallId: 't-1', + }) + + expect(spans[0]!.attributes['test.kind']).toBe('chat') + expect(spans[1]!.attributes['test.kind']).toBe('iteration') + expect(spans[2]!.attributes['test.kind']).toBe('tool') + }) + + it('onBeforeSpanStart can mutate SpanOptions before startSpan for every kind', async () => { + const { tracer, spans } = createFakeTracer() + const mw = otelMiddleware({ + tracer, + onBeforeSpanStart: (info, options) => ({ + ...options, + attributes: { + ...(options.attributes ?? {}), + 'custom.kind': info.kind, + }, + }), + }) + const ctx = makeCtx({ hasTools: true }) + + await runToIterationStart(mw, ctx) + await mw.onBeforeToolCall?.(ctx, { + toolCall: makeToolCall({ id: 't-1', function: { name: 'x' } }), + tool: undefined, + args: {}, + toolName: 'x', + toolCallId: 't-1', + }) + + expect(spans[0]!.attributes['custom.kind']).toBe('chat') + expect(spans[1]!.attributes['custom.kind']).toBe('iteration') + expect(spans[2]!.attributes['custom.kind']).toBe('tool') + }) + + it('onSpanEnd fires before span.end()', async () => { + const { tracer } = createFakeTracer() + const seen: Array<{ kind: string; ended: boolean }> = [] + const mw = otelMiddleware({ + tracer, + onSpanEnd: (info, span) => { + seen.push({ kind: info.kind, ended: (span as FakeSpan).ended }) + }, + }) + const ctx = makeCtx() + + await runToIterationStart(mw, ctx) + await mw.onChunk?.(ctx, ev.runFinished('stop')) + await mw.onFinish?.(ctx, { finishReason: 'stop', duration: 1, content: '' }) + + expect(seen.map((s) => s.kind)).toEqual(['iteration', 'chat']) + expect(seen.every((s) => s.ended === false)).toBe(true) + }) + + describe('callback resilience', () => { + let warn: ReturnType + + beforeEach(() => { + warn = vi.spyOn(console, 'warn').mockImplementation(() => {}) + }) + + afterEach(() => { + warn.mockRestore() + }) + + it('a throwing user callback does not break the run and is logged', async () => { + const { tracer, spans } = createFakeTracer() + const mw = otelMiddleware({ + tracer, + attributeEnricher: () => { + throw new Error('boom') + }, + }) + const ctx = makeCtx() + + await mw.onStart?.(ctx) + await mw.onFinish?.(ctx, { + finishReason: 'stop', + duration: 1, + content: '', + }) + + expect(spans[0]!.ended).toBe(true) + expect(warn).toHaveBeenCalled() + // Label identifies the failing hook for diagnosis. + const call = warn.mock.calls.find((args: Array) => + String(args[0]).includes('otel.attributeEnricher'), + ) + expect(call).toBeDefined() + }) + }) +}) diff --git a/packages/typescript/ai/vite.config.ts b/packages/typescript/ai/vite.config.ts index cb2d342e3..580db682e 100644 --- a/packages/typescript/ai/vite.config.ts +++ b/packages/typescript/ai/vite.config.ts @@ -33,6 +33,7 @@ export default mergeConfig( './src/index.ts', './src/activities/index.ts', './src/middlewares/index.ts', + './src/middlewares/otel.ts', './src/adapter-internals.ts', ], srcDir: './src', diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index f9192e0e4..beebfd9af 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -84,7 +84,7 @@ importers: version: 7.2.7(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2) vitest: specifier: ^4.0.14 - version: 4.0.15(@types/node@24.10.3)(happy-dom@20.0.11)(jiti@2.6.1)(jsdom@27.3.0(postcss@8.5.9))(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2) + version: 4.0.15(@opentelemetry/api@1.9.1)(@types/node@24.10.3)(happy-dom@20.0.11)(jiti@2.6.1)(jsdom@27.3.0(postcss@8.5.9))(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2) examples/php-slim: devDependencies: @@ -317,7 +317,7 @@ importers: version: 7.2.7(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2) vitest: specifier: ^4.0.14 - version: 4.0.15(@types/node@24.10.3)(happy-dom@20.0.11)(jiti@2.6.1)(jsdom@27.3.0(postcss@8.5.9))(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2) + version: 4.0.15(@opentelemetry/api@1.9.1)(@types/node@24.10.3)(happy-dom@20.0.11)(jiti@2.6.1)(jsdom@27.3.0(postcss@8.5.9))(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2) web-vitals: specifier: ^5.1.0 version: 5.1.0 @@ -465,7 +465,7 @@ importers: version: 7.2.7(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2) vitest: specifier: ^4.0.14 - version: 4.0.15(@types/node@24.10.3)(happy-dom@20.0.11)(jiti@2.6.1)(jsdom@27.3.0(postcss@8.5.9))(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2) + version: 4.0.15(@opentelemetry/api@1.9.1)(@types/node@24.10.3)(happy-dom@20.0.11)(jiti@2.6.1)(jsdom@27.3.0(postcss@8.5.9))(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2) web-vitals: specifier: ^5.1.0 version: 5.1.0 @@ -650,7 +650,7 @@ importers: version: 7.3.1(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2) vitest: specifier: ^4.0.14 - version: 4.1.4(@types/node@24.10.3)(happy-dom@20.0.11)(jsdom@27.3.0(postcss@8.5.9))(vite@7.3.1(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)) + version: 4.1.4(@opentelemetry/api@1.9.1)(@types/node@24.10.3)(happy-dom@20.0.11)(jsdom@27.3.0(postcss@8.5.9))(vite@7.3.1(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)) web-vitals: specifier: ^5.1.0 version: 5.1.0 @@ -768,7 +768,7 @@ importers: version: 2.11.10(solid-js@1.9.10)(vite@7.2.7(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)) vitest: specifier: ^4.0.14 - version: 4.0.15(@types/node@24.10.3)(happy-dom@20.0.11)(jiti@2.6.1)(jsdom@27.3.0(postcss@8.5.9))(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2) + version: 4.0.15(@opentelemetry/api@1.9.1)(@types/node@24.10.3)(happy-dom@20.0.11)(jiti@2.6.1)(jsdom@27.3.0(postcss@8.5.9))(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2) web-vitals: specifier: ^5.1.0 version: 5.1.0 @@ -814,10 +814,10 @@ importers: devDependencies: '@sveltejs/adapter-auto': specifier: ^3.3.1 - version: 3.3.1(@sveltejs/kit@2.49.2(@sveltejs/vite-plugin-svelte@5.1.1(svelte@5.45.10)(vite@7.2.7(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)))(svelte@5.45.10)(vite@7.2.7(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2))) + version: 3.3.1(@sveltejs/kit@2.49.2(@opentelemetry/api@1.9.1)(@sveltejs/vite-plugin-svelte@5.1.1(svelte@5.45.10)(vite@7.2.7(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)))(svelte@5.45.10)(vite@7.2.7(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2))) '@sveltejs/kit': specifier: ^2.15.10 - version: 2.49.2(@sveltejs/vite-plugin-svelte@5.1.1(svelte@5.45.10)(vite@7.2.7(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)))(svelte@5.45.10)(vite@7.2.7(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)) + version: 2.49.2(@opentelemetry/api@1.9.1)(@sveltejs/vite-plugin-svelte@5.1.1(svelte@5.45.10)(vite@7.2.7(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)))(svelte@5.45.10)(vite@7.2.7(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)) '@sveltejs/vite-plugin-svelte': specifier: ^5.1.1 version: 5.1.1(svelte@5.45.10)(vite@7.2.7(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)) @@ -944,6 +944,9 @@ importers: specifier: ^0.1.7 version: 0.1.7 devDependencies: + '@opentelemetry/api': + specifier: ^1.9.0 + version: 1.9.1 '@standard-schema/spec': specifier: ^1.1.0 version: 1.1.0 @@ -1448,7 +1451,7 @@ importers: version: 24.10.3 '@vitest/coverage-v8': specifier: 4.0.14 - version: 4.0.14(vitest@4.0.15(@types/node@24.10.3)(happy-dom@20.0.11)(jiti@2.6.1)(jsdom@27.3.0(postcss@8.5.9))(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)) + version: 4.0.14(vitest@4.0.15(@opentelemetry/api@1.9.1)(@types/node@24.10.3)(happy-dom@20.0.11)(jiti@2.6.1)(jsdom@27.3.0(postcss@8.5.9))(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)) jsdom: specifier: ^27.2.0 version: 27.3.0(postcss@8.5.9) @@ -1463,7 +1466,7 @@ importers: version: 5.9.3 vitest: specifier: ^4.0.14 - version: 4.0.15(@types/node@24.10.3)(happy-dom@20.0.11)(jiti@2.6.1)(jsdom@27.3.0(postcss@8.5.9))(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2) + version: 4.0.15(@opentelemetry/api@1.9.1)(@types/node@24.10.3)(happy-dom@20.0.11)(jiti@2.6.1)(jsdom@27.3.0(postcss@8.5.9))(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2) packages/typescript/ai-solid-ui: dependencies: @@ -1550,7 +1553,7 @@ importers: version: 24.10.3 '@vitest/coverage-v8': specifier: 4.0.14 - version: 4.0.14(vitest@4.0.15(@types/node@24.10.3)(happy-dom@20.0.11)(jiti@2.6.1)(jsdom@27.3.0(postcss@8.5.9))(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)) + version: 4.0.14(vitest@4.0.15(@opentelemetry/api@1.9.1)(@types/node@24.10.3)(happy-dom@20.0.11)(jiti@2.6.1)(jsdom@27.3.0(postcss@8.5.9))(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)) '@vue/test-utils': specifier: ^2.4.6 version: 2.4.6 @@ -1565,7 +1568,7 @@ importers: version: 5.9.3 vitest: specifier: ^4.0.14 - version: 4.0.15(@types/node@24.10.3)(happy-dom@20.0.11)(jiti@2.6.1)(jsdom@27.3.0(postcss@8.5.9))(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2) + version: 4.0.15(@opentelemetry/api@1.9.1)(@types/node@24.10.3)(happy-dom@20.0.11)(jiti@2.6.1)(jsdom@27.3.0(postcss@8.5.9))(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2) vue: specifier: ^3.5.25 version: 3.5.25(typescript@5.9.3) @@ -1674,7 +1677,10 @@ importers: dependencies: '@copilotkit/aimock': specifier: ^1.18.0 - version: 1.18.0(vitest@4.1.4(@types/node@24.10.3)(happy-dom@20.0.11)(jsdom@27.3.0(postcss@8.5.9))(vite@7.3.1(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2))) + version: 1.18.0(vitest@4.1.4(@opentelemetry/api@1.9.1)(@types/node@24.10.3)(happy-dom@20.0.11)(jsdom@27.3.0(postcss@8.5.9))(vite@7.3.1(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2))) + '@opentelemetry/api': + specifier: ^1.9.0 + version: 1.9.1 '@tailwindcss/vite': specifier: ^4.1.18 version: 4.1.18(vite@7.3.1(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)) @@ -3526,6 +3532,10 @@ packages: '@openrouter/sdk@0.12.14': resolution: {integrity: sha512-G32CZ1IkmtsGfQF7/mzcvt7W0Lmd6HUHFGjDWv5knBvL6sJcMmX6i3VPSIpHQYSgEqRQSxFuDROP6iErTu7XcA==} + '@opentelemetry/api@1.9.1': + resolution: {integrity: sha512-gLyJlPHPZYdAk1JENA9LeHejZe1Ti77/pTeFm/nMXmQH/HFZlcS/O2XJB+L8fkbrNSqhdtlvjBVjxwUYanNH5Q==} + engines: {node: '>=8.0.0'} + '@oxc-minify/binding-android-arm-eabi@0.110.0': resolution: {integrity: sha512-43fMTO8/5bMlqfOiNSZNKUzIqeLIYuB9Hr1Ohyf58B1wU11S2dPGibTXOGNaWsfgHy99eeZ1bSgeIHy/fEYqbw==} engines: {node: ^20.19.0 || >=22.12.0} @@ -12454,9 +12464,9 @@ snapshots: '@cloudflare/workers-types@4.20260317.1': {} - '@copilotkit/aimock@1.18.0(vitest@4.1.4(@types/node@24.10.3)(happy-dom@20.0.11)(jsdom@27.3.0(postcss@8.5.9))(vite@7.3.1(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)))': + '@copilotkit/aimock@1.18.0(vitest@4.1.4(@opentelemetry/api@1.9.1)(@types/node@24.10.3)(happy-dom@20.0.11)(jsdom@27.3.0(postcss@8.5.9))(vite@7.3.1(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)))': optionalDependencies: - vitest: 4.1.4(@types/node@24.10.3)(happy-dom@20.0.11)(jsdom@27.3.0(postcss@8.5.9))(vite@7.3.1(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)) + vitest: 4.1.4(@opentelemetry/api@1.9.1)(@types/node@24.10.3)(happy-dom@20.0.11)(jsdom@27.3.0(postcss@8.5.9))(vite@7.3.1(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)) '@crazydos/vue-markdown@1.1.4(vue@3.5.25(typescript@5.9.3))': dependencies: @@ -13470,6 +13480,8 @@ snapshots: dependencies: zod: 4.3.6 + '@opentelemetry/api@1.9.1': {} + '@oxc-minify/binding-android-arm-eabi@0.110.0': optional: true @@ -15170,12 +15182,12 @@ snapshots: dependencies: acorn: 8.15.0 - '@sveltejs/adapter-auto@3.3.1(@sveltejs/kit@2.49.2(@sveltejs/vite-plugin-svelte@5.1.1(svelte@5.45.10)(vite@7.2.7(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)))(svelte@5.45.10)(vite@7.2.7(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)))': + '@sveltejs/adapter-auto@3.3.1(@sveltejs/kit@2.49.2(@opentelemetry/api@1.9.1)(@sveltejs/vite-plugin-svelte@5.1.1(svelte@5.45.10)(vite@7.2.7(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)))(svelte@5.45.10)(vite@7.2.7(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)))': dependencies: - '@sveltejs/kit': 2.49.2(@sveltejs/vite-plugin-svelte@5.1.1(svelte@5.45.10)(vite@7.2.7(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)))(svelte@5.45.10)(vite@7.2.7(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)) + '@sveltejs/kit': 2.49.2(@opentelemetry/api@1.9.1)(@sveltejs/vite-plugin-svelte@5.1.1(svelte@5.45.10)(vite@7.2.7(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)))(svelte@5.45.10)(vite@7.2.7(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)) import-meta-resolve: 4.2.0 - '@sveltejs/kit@2.49.2(@sveltejs/vite-plugin-svelte@5.1.1(svelte@5.45.10)(vite@7.2.7(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)))(svelte@5.45.10)(vite@7.2.7(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2))': + '@sveltejs/kit@2.49.2(@opentelemetry/api@1.9.1)(@sveltejs/vite-plugin-svelte@5.1.1(svelte@5.45.10)(vite@7.2.7(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)))(svelte@5.45.10)(vite@7.2.7(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2))': dependencies: '@standard-schema/spec': 1.0.0 '@sveltejs/acorn-typescript': 1.0.8(acorn@8.15.0) @@ -15193,6 +15205,8 @@ snapshots: sirv: 3.0.2 svelte: 5.45.10 vite: 7.2.7(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2) + optionalDependencies: + '@opentelemetry/api': 1.9.1 '@sveltejs/package@2.5.7(svelte@5.45.10)(typescript@5.9.3)': dependencies: @@ -17222,7 +17236,7 @@ snapshots: vite: 7.2.7(@types/node@25.0.1)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2) vue: 3.5.25(typescript@5.9.3) - '@vitest/coverage-v8@4.0.14(vitest@4.0.15(@types/node@24.10.3)(happy-dom@20.0.11)(jiti@2.6.1)(jsdom@27.3.0(postcss@8.5.9))(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2))': + '@vitest/coverage-v8@4.0.14(vitest@4.0.15(@opentelemetry/api@1.9.1)(@types/node@24.10.3)(happy-dom@20.0.11)(jiti@2.6.1)(jsdom@27.3.0(postcss@8.5.9))(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2))': dependencies: '@bcoe/v8-coverage': 1.0.2 '@vitest/utils': 4.0.14 @@ -17235,7 +17249,7 @@ snapshots: obug: 2.1.1 std-env: 3.10.0 tinyrainbow: 3.1.0 - vitest: 4.0.15(@types/node@24.10.3)(happy-dom@20.0.11)(jiti@2.6.1)(jsdom@27.3.0(postcss@8.5.9))(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2) + vitest: 4.0.15(@opentelemetry/api@1.9.1)(@types/node@24.10.3)(happy-dom@20.0.11)(jiti@2.6.1)(jsdom@27.3.0(postcss@8.5.9))(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2) transitivePeerDependencies: - supports-color @@ -17252,7 +17266,7 @@ snapshots: obug: 2.1.1 std-env: 3.10.0 tinyrainbow: 3.1.0 - vitest: 4.1.4(@types/node@25.0.1)(@vitest/coverage-v8@4.0.14)(happy-dom@20.0.11)(jsdom@27.3.0(postcss@8.5.9))(vite@7.3.1(@types/node@25.0.1)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)) + vitest: 4.1.4(@opentelemetry/api@1.9.1)(@types/node@25.0.1)(@vitest/coverage-v8@4.0.14)(happy-dom@20.0.11)(jsdom@27.3.0(postcss@8.5.9))(vite@7.3.1(@types/node@25.0.1)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)) transitivePeerDependencies: - supports-color @@ -23508,7 +23522,7 @@ snapshots: optionalDependencies: vite: 7.3.1(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2) - vitest@4.0.15(@types/node@24.10.3)(happy-dom@20.0.11)(jiti@2.6.1)(jsdom@27.3.0(postcss@8.5.9))(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2): + vitest@4.0.15(@opentelemetry/api@1.9.1)(@types/node@24.10.3)(happy-dom@20.0.11)(jiti@2.6.1)(jsdom@27.3.0(postcss@8.5.9))(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2): dependencies: '@vitest/expect': 4.0.15 '@vitest/mocker': 4.0.15(vite@7.3.1(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)) @@ -23531,6 +23545,7 @@ snapshots: vite: 7.3.1(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2) why-is-node-running: 2.3.0 optionalDependencies: + '@opentelemetry/api': 1.9.1 '@types/node': 24.10.3 happy-dom: 20.0.11 jsdom: 27.3.0(postcss@8.5.9) @@ -23547,7 +23562,7 @@ snapshots: - tsx - yaml - vitest@4.1.4(@types/node@24.10.3)(happy-dom@20.0.11)(jsdom@27.3.0(postcss@8.5.9))(vite@7.3.1(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)): + vitest@4.1.4(@opentelemetry/api@1.9.1)(@types/node@24.10.3)(happy-dom@20.0.11)(jsdom@27.3.0(postcss@8.5.9))(vite@7.3.1(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)): dependencies: '@vitest/expect': 4.1.4 '@vitest/mocker': 4.1.4(vite@7.3.1(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)) @@ -23570,13 +23585,14 @@ snapshots: vite: 7.3.1(@types/node@24.10.3)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2) why-is-node-running: 2.3.0 optionalDependencies: + '@opentelemetry/api': 1.9.1 '@types/node': 24.10.3 happy-dom: 20.0.11 jsdom: 27.3.0(postcss@8.5.9) transitivePeerDependencies: - msw - vitest@4.1.4(@types/node@25.0.1)(@vitest/coverage-v8@4.0.14)(happy-dom@20.0.11)(jsdom@27.3.0(postcss@8.5.9))(vite@7.3.1(@types/node@25.0.1)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)): + vitest@4.1.4(@opentelemetry/api@1.9.1)(@types/node@25.0.1)(@vitest/coverage-v8@4.0.14)(happy-dom@20.0.11)(jsdom@27.3.0(postcss@8.5.9))(vite@7.3.1(@types/node@25.0.1)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)): dependencies: '@vitest/expect': 4.1.4 '@vitest/mocker': 4.1.4(vite@7.3.1(@types/node@25.0.1)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2)) @@ -23599,6 +23615,7 @@ snapshots: vite: 7.3.1(@types/node@25.0.1)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.1)(tsx@4.21.0)(yaml@2.8.2) why-is-node-running: 2.3.0 optionalDependencies: + '@opentelemetry/api': 1.9.1 '@types/node': 25.0.1 '@vitest/coverage-v8': 4.0.14(vitest@4.1.4) happy-dom: 20.0.11 diff --git a/testing/e2e/package.json b/testing/e2e/package.json index 2cf58826a..a33dbaae4 100644 --- a/testing/e2e/package.json +++ b/testing/e2e/package.json @@ -12,6 +12,7 @@ }, "dependencies": { "@copilotkit/aimock": "^1.18.0", + "@opentelemetry/api": "^1.9.0", "@tailwindcss/vite": "^4.1.18", "@tanstack/ai": "workspace:*", "@tanstack/ai-anthropic": "workspace:*", diff --git a/testing/e2e/src/lib/otel-capture.ts b/testing/e2e/src/lib/otel-capture.ts new file mode 100644 index 000000000..036b07824 --- /dev/null +++ b/testing/e2e/src/lib/otel-capture.ts @@ -0,0 +1,113 @@ +import type { + AttributeValue, + Attributes, + SpanStatusCode, +} from '@opentelemetry/api' + +export interface CapturedEvent { + name: string + attributes?: Attributes +} + +export interface CapturedException { + exception: string + attributes?: Attributes +} + +export interface CapturedSpan { + id: string + name: string + kind?: number + attributes: Record + status: SpanStatusCode + statusMessage?: string + events: Array + exceptions: Array + ended: boolean +} + +export interface CapturedHistogram { + name: string + value: number + attributes?: Attributes + unit?: string +} + +export interface OtelCapture { + spans: Array + histograms: Array +} + +// Keyed by testId. Lives as a module-global — Nitro's dev server keeps a +// single JS context per process, which is all these tests need. +const captures: Map = new Map() + +function bucketFor(captureId: string): OtelCapture { + let bucket = captures.get(captureId) + if (!bucket) { + bucket = { spans: [], histograms: [] } + captures.set(captureId, bucket) + } + return bucket +} + +export function resetOtelCapture(captureId: string): void { + captures.set(captureId, { spans: [], histograms: [] }) +} + +export function getOtelCapture(captureId: string): OtelCapture { + return bucketFor(captureId) +} + +/** + * Append a new span or patch an existing one. + * + * Patches are applied with a shallow `Object.assign`, so nested objects and + * arrays (e.g. `attributes`, `events`, `exceptions`) are replaced, not merged. + * Callers that want to extend those fields must pre-merge before passing the + * patch (see how `api.middleware-test.ts` spreads `{ ...attrs }`). Prefer + * `recordOtelEvent` / `recordOtelException` for those fields — they append + * rather than replace. + */ +export function recordOtelSpan( + captureId: string, + entry: + | CapturedSpan + | { id: string; patch: Partial> }, +): void { + const bucket = bucketFor(captureId) + if ('patch' in entry) { + const existing = bucket.spans.find((s) => s.id === entry.id) + if (!existing) return + Object.assign(existing, entry.patch) + return + } + bucket.spans.push(entry) +} + +export function recordOtelEvent( + captureId: string, + spanId: string, + event: CapturedEvent, +): void { + const existing = bucketFor(captureId).spans.find((s) => s.id === spanId) + if (!existing) return + existing.events.push(event) +} + +export function recordOtelException( + captureId: string, + spanId: string, + exception: CapturedException, +): void { + const existing = bucketFor(captureId).spans.find((s) => s.id === spanId) + if (!existing) return + existing.exceptions.push(exception) +} + +export function recordOtelHistogram( + captureId: string, + entry: CapturedHistogram, +): void { + bucketFor(captureId).histograms.push(entry) +} diff --git a/testing/e2e/src/routes/api.middleware-test.ts b/testing/e2e/src/routes/api.middleware-test.ts index a8c0def9b..6a817c265 100644 --- a/testing/e2e/src/routes/api.middleware-test.ts +++ b/testing/e2e/src/routes/api.middleware-test.ts @@ -6,8 +6,36 @@ import { toolDefinition, } from '@tanstack/ai' import type { ChatMiddleware } from '@tanstack/ai' +import { otelMiddleware } from '@tanstack/ai/middlewares/otel' +import { SpanStatusCode } from '@opentelemetry/api' +import type { + AttributeValue, + Attributes, + Context, + Histogram, + Meter, + MetricOptions, + Span, + SpanContext, + SpanStatus, + Tracer, +} from '@opentelemetry/api' import { z } from 'zod' import { createTextAdapter } from '@/lib/providers' +import { + getOtelCapture, + recordOtelEvent, + recordOtelException, + recordOtelHistogram, + recordOtelSpan, + resetOtelCapture, +} from '@/lib/otel-capture' + +// The otel capture endpoint is only useful during E2E runs. Gate both the +// POST 'otel' mode and the GET capture fetch behind this flag so the route +// cannot be used as an oracle in a production-like build. +const OTEL_TEST_ENABLED = + process.env.E2E_TEST === '1' || process.env.NODE_ENV !== 'production' const weatherTool = toolDefinition({ name: 'get_weather', @@ -44,6 +72,124 @@ const toolSkipMiddleware: ChatMiddleware = { }, } +// Minimal in-memory tracer/meter. Captures into a per-testId bucket so that +// the Playwright spec can fetch the recorded state via GET after the stream +// finishes. Not exported — only used to build otelMiddleware for the test. +function createCaptureTracer(captureId: string): Tracer { + let spanSeq = 0 + const tracer: Tracer = { + startSpan(name, options = {}, _ctx?: Context): Span { + const id = `span-${spanSeq++}` + const attrs: Record = {} + for (const [k, v] of Object.entries(options.attributes ?? {})) { + if (v !== undefined) attrs[k] = v as AttributeValue + } + recordOtelSpan(captureId, { + id, + name, + kind: options.kind, + attributes: attrs, + status: SpanStatusCode.UNSET, + events: [], + exceptions: [], + ended: false, + }) + const status: SpanStatus = { code: SpanStatusCode.UNSET } + let ended = false + const span: Span = { + spanContext(): SpanContext { + return { traceId: 'capture-trace', spanId: id, traceFlags: 1 } + }, + setAttribute(key, value) { + attrs[key] = value as AttributeValue + recordOtelSpan(captureId, { id, patch: { attributes: { ...attrs } } }) + return span + }, + setAttributes(next) { + for (const [k, v] of Object.entries(next)) { + attrs[k] = v as AttributeValue + } + recordOtelSpan(captureId, { id, patch: { attributes: { ...attrs } } }) + return span + }, + addEvent(eventName, eventAttrs) { + recordOtelEvent(captureId, id, { + name: eventName, + attributes: eventAttrs as Attributes | undefined, + }) + return span + }, + addLink() { + return span + }, + addLinks() { + return span + }, + setStatus(next) { + status.code = next.code + status.message = next.message + recordOtelSpan(captureId, { + id, + patch: { status: next.code, statusMessage: next.message }, + }) + return span + }, + updateName(next) { + recordOtelSpan(captureId, { id, patch: { name: next } }) + return span + }, + end() { + ended = true + recordOtelSpan(captureId, { id, patch: { ended: true } }) + }, + isRecording() { + return !ended + }, + recordException(exception, exceptionAttrs) { + recordOtelException(captureId, id, { + exception: String( + (exception as { message?: string } | undefined)?.message ?? + exception, + ), + attributes: exceptionAttrs as Attributes | undefined, + }) + }, + } + return span + }, + // Minimal implementation — our middleware never calls startActiveSpan. + // eslint-disable-next-line @typescript-eslint/no-explicit-any + startActiveSpan(...args: Array) { + const fn = args[args.length - 1] as (span: Span) => unknown + const name = args[0] as string + const span = tracer.startSpan(name, {}) + try { + return fn(span) + } finally { + span.end() + } + }, + } + return tracer +} + +function createCaptureMeter(captureId: string): Meter { + const histogram = (name: string, options?: MetricOptions): Histogram => ({ + record(value: number, attributes?: Attributes) { + recordOtelHistogram(captureId, { + name, + value, + attributes, + unit: options?.unit, + }) + }, + }) + return { + createHistogram: histogram, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + } as any as Meter +} + export const Route = createFileRoute('/api/middleware-test')({ server: { handlers: { @@ -70,12 +216,34 @@ export const Route = createFileRoute('/api/middleware-test')({ testId, ) - const middleware: ChatMiddleware[] = [] + const middleware: Array = [] if (middlewareMode === 'chunk-transform') middleware.push(chunkTransformMiddleware) if (middlewareMode === 'tool-skip') middleware.push(toolSkipMiddleware) + if (middlewareMode === 'otel') { + if (!OTEL_TEST_ENABLED) { + return new Response(null, { status: 404 }) + } + if (!testId) { + return new Response( + JSON.stringify({ error: 'otel mode requires testId' }), + { + status: 400, + headers: { 'Content-Type': 'application/json' }, + }, + ) + } + resetOtelCapture(testId) + middleware.push( + otelMiddleware({ + tracer: createCaptureTracer(testId), + meter: createCaptureMeter(testId), + captureContent: true, + }), + ) + } const tools = scenario === 'with-tool' ? [weatherTool] : [] @@ -100,6 +268,25 @@ export const Route = createFileRoute('/api/middleware-test')({ }) } }, + GET: async ({ request }) => { + if (!OTEL_TEST_ENABLED) { + return new Response(null, { status: 404 }) + } + const url = new URL(request.url) + const testId = url.searchParams.get('testId') + if (!testId) { + return new Response( + JSON.stringify({ error: 'testId query param required' }), + { + status: 400, + headers: { 'Content-Type': 'application/json' }, + }, + ) + } + return new Response(JSON.stringify(getOtelCapture(testId)), { + headers: { 'Content-Type': 'application/json' }, + }) + }, }, }, }) diff --git a/testing/e2e/src/routes/middleware-test.tsx b/testing/e2e/src/routes/middleware-test.tsx index 20c2ba76e..92134835e 100644 --- a/testing/e2e/src/routes/middleware-test.tsx +++ b/testing/e2e/src/routes/middleware-test.tsx @@ -6,6 +6,7 @@ const MIDDLEWARE_MODES = [ { id: 'none', label: 'No Middleware' }, { id: 'chunk-transform', label: 'Chunk Transform (prefix text)' }, { id: 'tool-skip', label: 'Tool Skip (skip with custom result)' }, + { id: 'otel', label: 'OpenTelemetry (capture spans/metrics)' }, ] as const export const Route = createFileRoute('/middleware-test')({ diff --git a/testing/e2e/tests/middleware.spec.ts b/testing/e2e/tests/middleware.spec.ts index d6e1994f7..e039d78ab 100644 --- a/testing/e2e/tests/middleware.spec.ts +++ b/testing/e2e/tests/middleware.spec.ts @@ -1,5 +1,22 @@ +import { SpanKind } from '@opentelemetry/api' import { test, expect } from './fixtures' +async function fetchOtelCapture( + page: import('@playwright/test').Page, + baseURL: string | undefined, + testId: string | undefined, +) { + if (!testId) throw new Error('otel capture test requires a testId fixture') + const url = `${baseURL ?? ''}/api/middleware-test?testId=${encodeURIComponent(testId)}` + const response = await page.request.get(url) + if (!response.ok()) { + throw new Error( + `GET ${url} failed: ${response.status()} ${await response.text()}`, + ) + } + return response.json() +} + test.describe('Middleware Lifecycle', () => { test('onChunk transforms text content', async ({ page, @@ -65,6 +82,117 @@ test.describe('Middleware Lifecycle', () => { expect(toolResults[0].content).toContain('skipped') }) + test('otel middleware emits chat span + per-iteration token histograms', async ({ + page, + testId, + aimockPort, + baseURL, + }) => { + const params = new URLSearchParams() + if (testId) params.set('testId', testId) + if (aimockPort) params.set('aimockPort', String(aimockPort)) + const qs = params.toString() + await page.goto(`/middleware-test${qs ? '?' + qs : ''}`) + await page.waitForTimeout(2000) + await page.locator('#mw-scenario-select').selectOption('basic-text') + await page.locator('#mw-mode-select').selectOption('otel') + await page.locator('#mw-run-button').click() + + await page.waitForFunction( + () => + document + .querySelector('#mw-metadata') + ?.getAttribute('data-test-complete') === 'true', + { timeout: 10000 }, + ) + + const capture = await fetchOtelCapture(page, baseURL, testId) + + // Root span is kind=INTERNAL; iteration spans are kind=CLIENT. This is a + // structural discriminator, immune to accidental attribute renames on + // either span. + const chatSpans = capture.spans.filter( + (s: any) => s.kind === SpanKind.INTERNAL, + ) + expect(chatSpans).toHaveLength(1) + const chatSpan = chatSpans[0] + expect(chatSpan.ended).toBe(true) + // `gen_ai.operation.name` is intentionally NOT set on the root span — + // only iteration spans carry it (see otel.ts). + expect(chatSpan.attributes['gen_ai.operation.name']).toBeUndefined() + + const iterationSpans = capture.spans.filter( + (s: any) => s.kind === SpanKind.CLIENT, + ) + expect(iterationSpans.length).toBeGreaterThanOrEqual(1) + for (const iter of iterationSpans) { + expect(iter.ended).toBe(true) + expect(iter.attributes['gen_ai.operation.name']).toBe('chat') + } + + // Token histogram records show up with correct unit and low-cardinality attrs. + const tokenRecords = capture.histograms.filter( + (h: any) => h.name === 'gen_ai.client.token.usage', + ) + // Guard against the C1 regression: onUsage used to no-op in production order, + // losing every token histogram record. If we ever regress, this assertion fails. + expect(tokenRecords.length).toBeGreaterThanOrEqual(2) + for (const r of tokenRecords) { + expect(r.unit).toBe('{token}') + expect(r.attributes['gen_ai.response.id']).toBeUndefined() + expect(r.attributes['gen_ai.response.model']).toBeUndefined() + } + + // Duration histogram is per-run. + const durationRecords = capture.histograms.filter( + (h: any) => h.name === 'gen_ai.client.operation.duration', + ) + expect(durationRecords.length).toBe(1) + expect(durationRecords[0].unit).toBe('s') + expect( + durationRecords[0].attributes['gen_ai.response.model'], + ).toBeUndefined() + }) + + test('otel middleware nests tool spans under the iteration span that triggered them', async ({ + page, + testId, + aimockPort, + baseURL, + }) => { + const params = new URLSearchParams() + if (testId) params.set('testId', testId) + if (aimockPort) params.set('aimockPort', String(aimockPort)) + const qs = params.toString() + await page.goto(`/middleware-test${qs ? '?' + qs : ''}`) + await page.waitForTimeout(2000) + await page.locator('#mw-scenario-select').selectOption('with-tool') + await page.locator('#mw-mode-select').selectOption('otel') + await page.locator('#mw-run-button').click() + + await page.waitForFunction( + () => + document + .querySelector('#mw-metadata') + ?.getAttribute('data-test-complete') === 'true', + { timeout: 15000 }, + ) + + const capture = await fetchOtelCapture(page, baseURL, testId) + + // Every tool span carries gen_ai.tool.name + ended outcome. This also + // guards against the "iteration span closed before onBeforeToolCall" + // regression — if it regressed, onBeforeToolCall would skip span creation. + const toolSpans = capture.spans.filter( + (s: any) => 'gen_ai.tool.name' in s.attributes, + ) + expect(toolSpans.length).toBeGreaterThanOrEqual(1) + for (const tool of toolSpans) { + expect(tool.ended).toBe(true) + expect(tool.attributes['tanstack.ai.tool.outcome']).toBeDefined() + } + }) + test('no middleware passes content through unchanged', async ({ page, testId,