@tanstack/ai

import type { DefaultMessageMetadataByModality, JSONSchema, Modality, StreamChunk, TextOptions, TokenUsage, } from '../../types' import type { CapabilityHandle } from './middleware/capabilities' /** * Configuration for adapter instances */ export interface TextAdapterConfig { apiKey?: string baseUrl?: string timeout?: number maxRetries?: number headers?: Record<string, string> } /** * Options for structured output generation. * * The internal logger is threaded through `chatOptions.logger` (inherited from * `TextOptions`). Adapter implementations must call `logger.request()` before * SDK calls, `logger.provider()` for each chunk received, and `logger.errors()` * in catch blocks. */ export interface StructuredOutputOptions<TProviderOptions extends object> { /** Text options for the request */ chatOptions: TextOptions<TProviderOptions> /** JSON Schema for structured output - already converted from Zod in the ai layer */ outputSchema: JSONSchema } /** * Result from structured output generation */ export interface StructuredOutputResult<T = unknown> { /** The parsed data conforming to the schema */ data: T /** The raw text response from the model before parsing */ rawText: string /** Token usage information (if provided by the adapter) */ usage?: TokenUsage } /** * Text adapter interface with pre-resolved generics. * * An adapter is created by a provider function: `provider('model')` → `adapter` * All type resolution happens at the provider call site, not in this interface. * * Generic parameters: * - TModel: The specific model name (e.g., 'gpt-4o') * - TProviderOptions: Provider-specific options for this model (already resolved) * - TInputModalities: Supported input modalities for this model (already resolved) * - TMessageMetadata: Metadata types for content parts (already resolved) * - TToolCapabilities: Tuple of tool-kind strings supported by this model, resolved from `supports.tools` * - TToolCallMetadata: Metadata type that round-trips with tool calls (e.g. Gemini's `thoughtSignature`) * - TSystemPromptMetadata: Provider-typed metadata accepted on each * `systemPrompts[i]` entry (e.g. Anthropic `cache_control`). Defaults to * `never` — adapters without per-prompt metadata reject the `metadata` * field at the call site. */ export interface TextAdapter< TModel extends string, TProviderOptions extends Record<string, any>, TInputModalities extends ReadonlyArray<Modality>, TMessageMetadataByModality extends DefaultMessageMetadataByModality, TToolCapabilities extends ReadonlyArray<string> = ReadonlyArray<string>, TToolCallMetadata = unknown, TSystemPromptMetadata = never, > { /** Discriminator for adapter kind */ readonly kind: 'text' /** Provider name identifier (e.g., 'openai', 'anthropic') */ readonly name: string /** The model this adapter is configured for */ readonly model: TModel /** * Capabilities this adapter requires at runtime. `chat()` validates that the * configured middleware provides each one. Model adapters omit this; harness * adapters (e.g. a future `claudeCode()`) declare e.g. `[sandboxCapability]`. * Runtime access to capabilities from inside the adapter is not yet wired — * this is the declaration/validation surface only. */ readonly requires?: ReadonlyArray<CapabilityHandle> /** * @internal Type-only properties for inference. Not assigned at runtime. */ '~types': { providerOptions: TProviderOptions inputModalities: TInputModalities messageMetadataByModality: TMessageMetadataByModality toolCapabilities: TToolCapabilities toolCallMetadata: TToolCallMetadata systemPromptMetadata: TSystemPromptMetadata } /** * Stream text completions from the model */ chatStream: ( options: TextOptions<TProviderOptions>, ) => AsyncIterable<StreamChunk> /** * Generate structured output using the provider's native structured output API. * This method uses stream: false and sends the JSON schema to the provider * to ensure the response conforms to the expected structure. * * @param options - Structured output options containing chat options and JSON schema * @returns Promise with the raw data (validation is done in the chat function) */ structuredOutput: ( options: StructuredOutputOptions<TProviderOptions>, ) => Promise<StructuredOutputResult<unknown>> /** * Stream structured output using the provider's native streaming structured * output API (stream + response_format json_schema in a single request). * * Optional — adapters without native streaming JSON omit this method and the * activity layer synthesizes a stream around the non-streaming * `structuredOutput` call. * * Implementations must emit standard AG-UI lifecycle events (RUN_STARTED, * TEXT_MESSAGE_*, RUN_FINISHED) carrying raw JSON text deltas, plus a final * `CUSTOM` event named `structured-output.complete` whose `value` is * `{ object, raw, reasoning? }`. */ structuredOutputStream?: ( options: StructuredOutputOptions<TProviderOptions>, ) => AsyncIterable<StreamChunk> /** * Declares whether the adapter supports combining `tools` and a * schema-constrained final answer in a single streaming request. * * When `true`, the engine wires `outputSchema` into the regular * `chatStream()` call and skips the separate `runStructuredFinalization` * round-trip. The model's natural final turn carries the * schema-constrained JSON text and the engine harvests it from the agent * loop's accumulated content. * * When `false`, `undefined`, or the method is omitted, the engine runs * the agent loop without `outputSchema` and then issues a separate * `structuredOutput` / `structuredOutputStream` call against the JSON * schema for finalization (the legacy path). * * The method receives the per-call `modelOptions` so providers whose * support depends on the resolved upstream model (e.g. OpenRouter) can * answer per-request. Most adapters can return a constant. */ supportsCombinedToolsAndSchema?: ( modelOptions?: TProviderOptions | undefined, ) => boolean } /** * A TextAdapter with any/unknown type parameters. * Useful as a constraint in generic functions and interfaces. */ export type AnyTextAdapter = TextAdapter<any, any, any, any, any, any, any> /** * Abstract base class for text adapters. * Extend this class to implement a text adapter for a specific provider. * * Generic parameters match TextAdapter - all pre-resolved by the provider function. */ export abstract class BaseTextAdapter< TModel extends string, TProviderOptions extends Record<string, any>, TInputModalities extends ReadonlyArray<Modality>, TMessageMetadataByModality extends DefaultMessageMetadataByModality, TToolCapabilities extends ReadonlyArray<string> = ReadonlyArray<string>, TToolCallMetadata = unknown, TSystemPromptMetadata = never, > implements TextAdapter< TModel, TProviderOptions, TInputModalities, TMessageMetadataByModality, TToolCapabilities, TToolCallMetadata, TSystemPromptMetadata > { readonly kind = 'text' as const abstract readonly name: string readonly model: TModel readonly requires?: ReadonlyArray<CapabilityHandle> = undefined // Type-only property - never assigned at runtime declare '~types': { providerOptions: TProviderOptions inputModalities: TInputModalities messageMetadataByModality: TMessageMetadataByModality toolCapabilities: TToolCapabilities toolCallMetadata: TToolCallMetadata systemPromptMetadata: TSystemPromptMetadata } protected config: TextAdapterConfig constructor(config: TextAdapterConfig = {}, model: TModel) { this.config = config this.model = model } abstract chatStream( options: TextOptions<TProviderOptions>, ): AsyncIterable<StreamChunk> /** * Generate structured output using the provider's native structured output API. * Concrete implementations should override this to use provider-specific structured output. */ abstract structuredOutput( options: StructuredOutputOptions<TProviderOptions>, ): Promise<StructuredOutputResult<unknown>> protected generateId(): string { return `${this.name}-${Date.now()}-${Math.random().toString(36).substring(7)}` } }