@tanstack/ai

import { DefaultMessageMetadataByModality, JSONSchema, Modality, StreamChunk, TextOptions, TokenUsage } from '../../types.js'; import { CapabilityHandle } from './middleware/capabilities.js'; /** * Configuration for adapter instances */ export interface TextAdapterConfig { apiKey?: string; baseUrl?: string; timeout?: number; maxRetries?: number; headers?: Record<string, string>; } /** * Options for structured output generation. * * The internal logger is threaded through `chatOptions.logger` (inherited from * `TextOptions`). Adapter implementations must call `logger.request()` before * SDK calls, `logger.provider()` for each chunk received, and `logger.errors()` * in catch blocks. */ export interface StructuredOutputOptions<TProviderOptions extends object> { /** Text options for the request */ chatOptions: TextOptions<TProviderOptions>; /** JSON Schema for structured output - already converted from Zod in the ai layer */ outputSchema: JSONSchema; } /** * Result from structured output generation */ export interface StructuredOutputResult<T = unknown> { /** The parsed data conforming to the schema */ data: T; /** The raw text response from the model before parsing */ rawText: string; /** Token usage information (if provided by the adapter) */ usage?: TokenUsage; } /** * Text adapter interface with pre-resolved generics. * * An adapter is created by a provider function: `provider('model')` → `adapter` * All type resolution happens at the provider call site, not in this interface. * * Generic parameters: * - TModel: The specific model name (e.g., 'gpt-4o') * - TProviderOptions: Provider-specific options for this model (already resolved) * - TInputModalities: Supported input modalities for this model (already resolved) * - TMessageMetadata: Metadata types for content parts (already resolved) * - TToolCapabilities: Tuple of tool-kind strings supported by this model, resolved from `supports.tools` * - TToolCallMetadata: Metadata type that round-trips with tool calls (e.g. Gemini's `thoughtSignature`) * - TSystemPromptMetadata: Provider-typed metadata accepted on each * `systemPrompts[i]` entry (e.g. Anthropic `cache_control`). Defaults to * `never` — adapters without per-prompt metadata reject the `metadata` * field at the call site. */ export interface TextAdapter<TModel extends string, TProviderOptions extends Record<string, any>, TInputModalities extends ReadonlyArray<Modality>, TMessageMetadataByModality extends DefaultMessageMetadataByModality, TToolCapabilities extends ReadonlyArray<string> = ReadonlyArray<string>, TToolCallMetadata = unknown, TSystemPromptMetadata = never> { /** Discriminator for adapter kind */ readonly kind: 'text'; /** Provider name identifier (e.g., 'openai', 'anthropic') */ readonly name: string; /** The model this adapter is configured for */ readonly model: TModel; /** * Capabilities this adapter requires at runtime. `chat()` validates that the * configured middleware provides each one. Model adapters omit this; harness * adapters (e.g. a future `claudeCode()`) declare e.g. `[sandboxCapability]`. * Runtime access to capabilities from inside the adapter is not yet wired — * this is the declaration/validation surface only. */ readonly requires?: ReadonlyArray<CapabilityHandle>; /** * @internal Type-only properties for inference. Not assigned at runtime. */ '~types': { providerOptions: TProviderOptions; inputModalities: TInputModalities; messageMetadataByModality: TMessageMetadataByModality; toolCapabilities: TToolCapabilities; toolCallMetadata: TToolCallMetadata; systemPromptMetadata: TSystemPromptMetadata; }; /** * Stream text completions from the model */ chatStream: (options: TextOptions<TProviderOptions>) => AsyncIterable<StreamChunk>; /** * Generate structured output using the provider's native structured output API. * This method uses stream: false and sends the JSON schema to the provider * to ensure the response conforms to the expected structure. * * @param options - Structured output options containing chat options and JSON schema * @returns Promise with the raw data (validation is done in the chat function) */ structuredOutput: (options: StructuredOutputOptions<TProviderOptions>) => Promise<StructuredOutputResult<unknown>>; /** * Stream structured output using the provider's native streaming structured * output API (stream + response_format json_schema in a single request). * * Optional — adapters without native streaming JSON omit this method and the * activity layer synthesizes a stream around the non-streaming * `structuredOutput` call. * * Implementations must emit standard AG-UI lifecycle events (RUN_STARTED, * TEXT_MESSAGE_*, RUN_FINISHED) carrying raw JSON text deltas, plus a final * `CUSTOM` event named `structured-output.complete` whose `value` is * `{ object, raw, reasoning? }`. */ structuredOutputStream?: (options: StructuredOutputOptions<TProviderOptions>) => AsyncIterable<StreamChunk>; /** * Declares whether the adapter supports combining `tools` and a * schema-constrained final answer in a single streaming request. * * When `true`, the engine wires `outputSchema` into the regular * `chatStream()` call and skips the separate `runStructuredFinalization` * round-trip. The model's natural final turn carries the * schema-constrained JSON text and the engine harvests it from the agent * loop's accumulated content. * * When `false`, `undefined`, or the method is omitted, the engine runs * the agent loop without `outputSchema` and then issues a separate * `structuredOutput` / `structuredOutputStream` call against the JSON * schema for finalization (the legacy path). * * The method receives the per-call `modelOptions` so providers whose * support depends on the resolved upstream model (e.g. OpenRouter) can * answer per-request. Most adapters can return a constant. */ supportsCombinedToolsAndSchema?: (modelOptions?: TProviderOptions | undefined) => boolean; } /** * A TextAdapter with any/unknown type parameters. * Useful as a constraint in generic functions and interfaces. */ export type AnyTextAdapter = TextAdapter<any, any, any, any, any, any, any>; /** * Abstract base class for text adapters. * Extend this class to implement a text adapter for a specific provider. * * Generic parameters match TextAdapter - all pre-resolved by the provider function. */ export declare abstract class BaseTextAdapter<TModel extends string, TProviderOptions extends Record<string, any>, TInputModalities extends ReadonlyArray<Modality>, TMessageMetadataByModality extends DefaultMessageMetadataByModality, TToolCapabilities extends ReadonlyArray<string> = ReadonlyArray<string>, TToolCallMetadata = unknown, TSystemPromptMetadata = never> implements TextAdapter<TModel, TProviderOptions, TInputModalities, TMessageMetadataByModality, TToolCapabilities, TToolCallMetadata, TSystemPromptMetadata> { readonly kind: "text"; abstract readonly name: string; readonly model: TModel; readonly requires?: ReadonlyArray<CapabilityHandle>; '~types': { providerOptions: TProviderOptions; inputModalities: TInputModalities; messageMetadataByModality: TMessageMetadataByModality; toolCapabilities: TToolCapabilities; toolCallMetadata: TToolCallMetadata; systemPromptMetadata: TSystemPromptMetadata; }; protected config: TextAdapterConfig; constructor(config: TextAdapterConfig | undefined, model: TModel); abstract chatStream(options: TextOptions<TProviderOptions>): AsyncIterable<StreamChunk>; /** * Generate structured output using the provider's native structured output API. * Concrete implementations should override this to use provider-specific structured output. */ abstract structuredOutput(options: StructuredOutputOptions<TProviderOptions>): Promise<StructuredOutputResult<unknown>>; protected generateId(): string; }