@tanstack/ai
Version:
Type-safe TypeScript AI SDK for streaming chat, tool calling, agents, structured outputs, and multimodal generation.
231 lines (212 loc) • 8.4 kB
text/typescript
import type {
DefaultMessageMetadataByModality,
JSONSchema,
Modality,
StreamChunk,
TextOptions,
TokenUsage,
} from '../../types'
import type { CapabilityHandle } from './middleware/capabilities'
/**
* Configuration for adapter instances
*/
export interface TextAdapterConfig {
apiKey?: string
baseUrl?: string
timeout?: number
maxRetries?: number
headers?: Record<string, string>
}
/**
* Options for structured output generation.
*
* The internal logger is threaded through `chatOptions.logger` (inherited from
* `TextOptions`). Adapter implementations must call `logger.request()` before
* SDK calls, `logger.provider()` for each chunk received, and `logger.errors()`
* in catch blocks.
*/
export interface StructuredOutputOptions<TProviderOptions extends object> {
/** Text options for the request */
chatOptions: TextOptions<TProviderOptions>
/** JSON Schema for structured output - already converted from Zod in the ai layer */
outputSchema: JSONSchema
}
/**
* Result from structured output generation
*/
export interface StructuredOutputResult<T = unknown> {
/** The parsed data conforming to the schema */
data: T
/** The raw text response from the model before parsing */
rawText: string
/** Token usage information (if provided by the adapter) */
usage?: TokenUsage
}
/**
* Text adapter interface with pre-resolved generics.
*
* An adapter is created by a provider function: `provider('model')` → `adapter`
* All type resolution happens at the provider call site, not in this interface.
*
* Generic parameters:
* - TModel: The specific model name (e.g., 'gpt-4o')
* - TProviderOptions: Provider-specific options for this model (already resolved)
* - TInputModalities: Supported input modalities for this model (already resolved)
* - TMessageMetadata: Metadata types for content parts (already resolved)
* - TToolCapabilities: Tuple of tool-kind strings supported by this model, resolved from `supports.tools`
* - TToolCallMetadata: Metadata type that round-trips with tool calls (e.g. Gemini's `thoughtSignature`)
* - TSystemPromptMetadata: Provider-typed metadata accepted on each
* `systemPrompts[i]` entry (e.g. Anthropic `cache_control`). Defaults to
* `never` — adapters without per-prompt metadata reject the `metadata`
* field at the call site.
*/
export interface TextAdapter<
TModel extends string,
TProviderOptions extends Record<string, any>,
TInputModalities extends ReadonlyArray<Modality>,
TMessageMetadataByModality extends DefaultMessageMetadataByModality,
TToolCapabilities extends ReadonlyArray<string> = ReadonlyArray<string>,
TToolCallMetadata = unknown,
TSystemPromptMetadata = never,
> {
/** Discriminator for adapter kind */
readonly kind: 'text'
/** Provider name identifier (e.g., 'openai', 'anthropic') */
readonly name: string
/** The model this adapter is configured for */
readonly model: TModel
/**
* Capabilities this adapter requires at runtime. `chat()` validates that the
* configured middleware provides each one. Model adapters omit this; harness
* adapters (e.g. a future `claudeCode()`) declare e.g. `[sandboxCapability]`.
* Runtime access to capabilities from inside the adapter is not yet wired —
* this is the declaration/validation surface only.
*/
readonly requires?: ReadonlyArray<CapabilityHandle>
/**
* @internal Type-only properties for inference. Not assigned at runtime.
*/
'~types': {
providerOptions: TProviderOptions
inputModalities: TInputModalities
messageMetadataByModality: TMessageMetadataByModality
toolCapabilities: TToolCapabilities
toolCallMetadata: TToolCallMetadata
systemPromptMetadata: TSystemPromptMetadata
}
/**
* Stream text completions from the model
*/
chatStream: (
options: TextOptions<TProviderOptions>,
) => AsyncIterable<StreamChunk>
/**
* Generate structured output using the provider's native structured output API.
* This method uses stream: false and sends the JSON schema to the provider
* to ensure the response conforms to the expected structure.
*
* @param options - Structured output options containing chat options and JSON schema
* @returns Promise with the raw data (validation is done in the chat function)
*/
structuredOutput: (
options: StructuredOutputOptions<TProviderOptions>,
) => Promise<StructuredOutputResult<unknown>>
/**
* Stream structured output using the provider's native streaming structured
* output API (stream + response_format json_schema in a single request).
*
* Optional — adapters without native streaming JSON omit this method and the
* activity layer synthesizes a stream around the non-streaming
* `structuredOutput` call.
*
* Implementations must emit standard AG-UI lifecycle events (RUN_STARTED,
* TEXT_MESSAGE_*, RUN_FINISHED) carrying raw JSON text deltas, plus a final
* `CUSTOM` event named `structured-output.complete` whose `value` is
* `{ object, raw, reasoning? }`.
*/
structuredOutputStream?: (
options: StructuredOutputOptions<TProviderOptions>,
) => AsyncIterable<StreamChunk>
/**
* Declares whether the adapter supports combining `tools` and a
* schema-constrained final answer in a single streaming request.
*
* When `true`, the engine wires `outputSchema` into the regular
* `chatStream()` call and skips the separate `runStructuredFinalization`
* round-trip. The model's natural final turn carries the
* schema-constrained JSON text and the engine harvests it from the agent
* loop's accumulated content.
*
* When `false`, `undefined`, or the method is omitted, the engine runs
* the agent loop without `outputSchema` and then issues a separate
* `structuredOutput` / `structuredOutputStream` call against the JSON
* schema for finalization (the legacy path).
*
* The method receives the per-call `modelOptions` so providers whose
* support depends on the resolved upstream model (e.g. OpenRouter) can
* answer per-request. Most adapters can return a constant.
*/
supportsCombinedToolsAndSchema?: (
modelOptions?: TProviderOptions | undefined,
) => boolean
}
/**
* A TextAdapter with any/unknown type parameters.
* Useful as a constraint in generic functions and interfaces.
*/
export type AnyTextAdapter = TextAdapter<any, any, any, any, any, any, any>
/**
* Abstract base class for text adapters.
* Extend this class to implement a text adapter for a specific provider.
*
* Generic parameters match TextAdapter - all pre-resolved by the provider function.
*/
export abstract class BaseTextAdapter<
TModel extends string,
TProviderOptions extends Record<string, any>,
TInputModalities extends ReadonlyArray<Modality>,
TMessageMetadataByModality extends DefaultMessageMetadataByModality,
TToolCapabilities extends ReadonlyArray<string> = ReadonlyArray<string>,
TToolCallMetadata = unknown,
TSystemPromptMetadata = never,
> implements TextAdapter<
TModel,
TProviderOptions,
TInputModalities,
TMessageMetadataByModality,
TToolCapabilities,
TToolCallMetadata,
TSystemPromptMetadata
> {
readonly kind = 'text' as const
abstract readonly name: string
readonly model: TModel
readonly requires?: ReadonlyArray<CapabilityHandle> = undefined
// Type-only property - never assigned at runtime
declare '~types': {
providerOptions: TProviderOptions
inputModalities: TInputModalities
messageMetadataByModality: TMessageMetadataByModality
toolCapabilities: TToolCapabilities
toolCallMetadata: TToolCallMetadata
systemPromptMetadata: TSystemPromptMetadata
}
protected config: TextAdapterConfig
constructor(config: TextAdapterConfig = {}, model: TModel) {
this.config = config
this.model = model
}
abstract chatStream(
options: TextOptions<TProviderOptions>,
): AsyncIterable<StreamChunk>
/**
* Generate structured output using the provider's native structured output API.
* Concrete implementations should override this to use provider-specific structured output.
*/
abstract structuredOutput(
options: StructuredOutputOptions<TProviderOptions>,
): Promise<StructuredOutputResult<unknown>>
protected generateId(): string {
return `${this.name}-${Date.now()}-${Math.random().toString(36).substring(7)}`
}
}