@tanstack/ai
Version:
Type-safe TypeScript AI SDK for streaming chat, tool calling, agents, structured outputs, and multimodal generation.
401 lines (366 loc) • 14.9 kB
text/typescript
import { EventType } from '@ag-ui/core'
import { toRunErrorPayload } from '../error-payload'
import { MAX_TOKENS_KEYS } from '../../utilities/sampling-keys'
import { BaseSummarizeAdapter } from './adapter'
import type {
StreamChunk,
SummarizationOptions,
SummarizationResult,
TextOptions,
} from '../../types'
/**
* Minimal contract for a text adapter that supports `chatStream`. Lets
* `ChatStreamSummarizeAdapter` work with any text adapter without coupling
* to a specific implementation.
*
* The provider-options shape is intentionally `any` here — the wrapper only
* forwards `modelOptions` straight through, so a text adapter with a richer
* per-model options type (e.g. `ResolveProviderOptions<TModel>`) is still
* acceptable. Summarize-level type safety is enforced via
* `SummarizationOptions<TProviderOptions>` on the wrapper itself.
*/
export interface ChatStreamCapable {
chatStream: (options: TextOptions<any>) => AsyncIterable<StreamChunk>
}
/**
* Provider-native max-output-tokens key per summarize-adapter `name`. summarize
* is provider-agnostic and forwards `modelOptions` opaquely to the wrapped text
* adapter, so `maxLength` must be written under the exact key the underlying
* provider reads — no adapter reads a generic `maxTokens`. Ollama is the one
* exception: it nests sampling under `options`, so it has no entry here and is
* handled as a special nested case in `applyMaxLength`/`applyDefaultTemperature`.
*
* Keep in sync with each adapter's wire mapping:
* - OpenAI (Responses): `max_output_tokens`
* - Anthropic / Grok: `max_tokens`
* - Groq: `max_completion_tokens`
* - Gemini: `maxOutputTokens`
* - OpenRouter: `maxCompletionTokens`
* - Ollama: nested `options.num_predict` (no entry — see `applyMaxLength`)
*/
const MAX_TOKENS_KEY_BY_ADAPTER: Record<string, string> = {
openai: 'max_output_tokens',
anthropic: 'max_tokens',
grok: 'max_tokens',
groq: 'max_completion_tokens',
gemini: 'maxOutputTokens',
openrouter: 'maxCompletionTokens',
}
/**
* Every flat key any supported provider uses to cap output tokens (plus the
* generic `maxTokens` spelling no adapter reads). Used to detect a
* caller-supplied token limit so the summarize default never overrides an
* explicit caller value. Shared with the OTel middleware via
* `MAX_TOKENS_KEYS` so the two spelling sets cannot drift.
*/
const KNOWN_MAX_TOKENS_KEYS = MAX_TOKENS_KEYS
/**
* Whether `applyMaxLength` knows how to place a token limit for this adapter
* `name` (either the nested Ollama shape or a flat provider-native key).
* Used to surface a warning when `maxLength` would otherwise be silently
* dropped for an unrecognised adapter name.
*/
function isKnownMaxTokensAdapter(adapterName: string): boolean {
return (
adapterName === 'ollama' ||
MAX_TOKENS_KEY_BY_ADAPTER[adapterName] !== undefined
)
}
/**
* Apply the low-temperature summarize default to a working copy of the
* caller's `modelOptions`, placed where the wrapped provider actually reads
* it (nested under `options` for Ollama, flat otherwise). The caller always
* wins: if they already set `temperature` in that location, it is untouched.
*/
function applyDefaultTemperature(
adapterName: string,
temperature: number,
modelOptions: Record<string, unknown>,
): Record<string, unknown> {
const merged: Record<string, unknown> = { ...modelOptions }
if (adapterName === 'ollama') {
const existing =
merged.options && typeof merged.options === 'object'
? (merged.options as Record<string, unknown>)
: undefined
if (existing && 'temperature' in existing) return merged
merged.options = { temperature, ...existing }
return merged
}
if ('temperature' in merged) return merged
merged.temperature = temperature
return merged
}
/**
* Resolve `maxLength` to the provider-native max-output-tokens key for the
* given summarize-adapter `name` (this wrapper's OWN `name`, not the wrapped
* text adapter's) and merge it into a working copy of the caller's
* `modelOptions`. The caller always wins: if they already set any recognised
* token-limit key (flat or, for Ollama, nested `options.num_predict`), the
* default is left untouched. Unknown/unrecognised adapter names fall back to
* NOT setting a token key (the prompt hint still asks the model to stay under
* `maxLength`) rather than writing a dead key no provider reads.
*
* Caveat (intentional): "caller wins" keys off ANY recognised spelling in
* `KNOWN_MAX_TOKENS_KEYS`, but only the adapter's native key is read on the
* wire. So a caller who sets a NON-native spelling for this provider — e.g.
* `maxTokens`, or Anthropic's `max_tokens` against an OpenAI adapter — suppresses
* the summarize default WITHOUT getting their own value applied either: neither
* cap reaches the wire. This favours never clobbering a migration leftover over
* guaranteeing a cap; the prompt-level hint still asks the model to stay under
* `maxLength`. Rename the key to the provider-native spelling to forward it.
*/
function applyMaxLength(
adapterName: string,
maxLength: number,
modelOptions: Record<string, unknown>,
): Record<string, unknown> {
const merged: Record<string, unknown> = { ...modelOptions }
if (adapterName === 'ollama') {
// Honor a caller-set limit in either shape: a recognised flat key (e.g.
// left over from a migration) or the nested `options.num_predict`.
const callerSetFlatLimit = KNOWN_MAX_TOKENS_KEYS.some(
(k) => typeof merged[k] === 'number',
)
const existing =
merged.options && typeof merged.options === 'object'
? (merged.options as Record<string, unknown>)
: undefined
if (
callerSetFlatLimit ||
(existing && typeof existing.num_predict === 'number')
) {
return merged
}
merged.options = { num_predict: maxLength, ...existing }
return merged
}
const key = MAX_TOKENS_KEY_BY_ADAPTER[adapterName]
if (key === undefined) return merged
const callerSetLimit = KNOWN_MAX_TOKENS_KEYS.some(
(k) => typeof merged[k] === 'number',
)
if (callerSetLimit) return merged
merged[key] = maxLength
return merged
}
/**
* Extract the per-model `modelOptions` type a text adapter accepts. Used by
* provider summarize factories so their `modelOptions` IntelliSense matches
* what the underlying text adapter actually understands.
*/
export type InferTextProviderOptions<TAdapter> = TAdapter extends {
'~types': { providerOptions: infer P }
}
? P extends object
? P
: object
: object
/**
* Summarize adapter that wraps any `ChatStreamCapable` text adapter and
* prompts it for summarization. Not tied to any wire format.
*/
export class ChatStreamSummarizeAdapter<
TModel extends string,
TProviderOptions extends object = Record<string, unknown>,
> extends BaseSummarizeAdapter<TModel, TProviderOptions> {
readonly name: string
private readonly textAdapter: ChatStreamCapable
constructor(
textAdapter: ChatStreamCapable,
model: TModel,
name: string = 'chat-stream-summarize',
) {
super({}, model)
this.name = name
this.textAdapter = textAdapter
}
async summarize(
options: SummarizationOptions<TProviderOptions>,
): Promise<SummarizationResult> {
const systemPrompt = this.buildSummarizationPrompt(options)
let summary = ''
const id = this.generateId()
let model = options.model
let usage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 }
options.logger.request(
`activity=summarize provider=${this.name} model=${options.model} text-length=${options.text.length} maxLength=${options.maxLength ?? 'unset'}`,
{ provider: this.name, model: options.model },
)
try {
for await (const chunk of this.textAdapter.chatStream(
this.buildTextOptions(options, systemPrompt),
)) {
if (chunk.type === 'TEXT_MESSAGE_CONTENT') {
if (chunk.content) {
summary = chunk.content
} else if (chunk.delta) {
// Append delta only when present — a content-less chunk with no
// delta would otherwise concat literal `'undefined'`.
summary += chunk.delta
}
model = chunk.model || model
}
if (chunk.type === 'RUN_FINISHED') {
if (chunk.usage) {
usage = chunk.usage
}
}
// Surface failures: the underlying chatStream emits RUN_ERROR instead
// of throwing, so without this branch summarize() would return an
// empty summary and pretend a failed run succeeded.
if (chunk.type === 'RUN_ERROR') {
const message =
(chunk.error && typeof chunk.error.message === 'string'
? chunk.error.message
: null) ?? 'Summarization failed'
const code =
chunk.error && typeof chunk.error.code === 'string'
? chunk.error.code
: undefined
const err = new Error(message)
if (code) {
;(err as Error & { code?: string }).code = code
}
throw err
}
}
} catch (error: unknown) {
// Narrow before logging: raw SDK errors can carry request metadata
// (including auth headers) which we must never surface to user loggers.
options.logger.errors(`${this.name}.summarize fatal`, {
error: toRunErrorPayload(error, `${this.name}.summarize failed`),
source: `${this.name}.summarize`,
})
throw error
}
return { id, model, summary, usage }
}
override async *summarizeStream(
options: SummarizationOptions<TProviderOptions>,
): AsyncIterable<StreamChunk> {
const systemPrompt = this.buildSummarizationPrompt(options)
options.logger.request(
`activity=summarizeStream provider=${this.name} model=${options.model} text-length=${options.text.length} maxLength=${options.maxLength ?? 'unset'}`,
{ provider: this.name, model: options.model },
)
const id = this.generateId()
let summary = ''
let model = options.model
let usage: SummarizationResult['usage'] = {
promptTokens: 0,
completionTokens: 0,
totalTokens: 0,
}
try {
for await (const chunk of this.textAdapter.chatStream(
this.buildTextOptions(options, systemPrompt),
)) {
// Accumulate the same way `summarize()` does so consumers see deltas
// AND the terminal `generation:result` event below carries the same
// final summary that non-streaming returns.
if (chunk.type === 'TEXT_MESSAGE_CONTENT') {
if (chunk.content) {
summary = chunk.content
} else if (chunk.delta) {
summary += chunk.delta
}
if (chunk.model) model = chunk.model
}
// Emit the GenerationClient-shaped result event just before the
// terminal RUN_FINISHED so subscribers (useSummarize) populate
// `result` before flipping `status` to success.
if (chunk.type === 'RUN_FINISHED') {
if (chunk.usage) usage = chunk.usage
if (chunk.model) model = chunk.model
yield {
type: EventType.CUSTOM,
name: 'generation:result',
value: { id, model, summary, usage } satisfies SummarizationResult,
model,
timestamp: Date.now(),
}
}
yield chunk
}
} catch (error: unknown) {
options.logger.errors(`${this.name}.summarizeStream fatal`, {
error: toRunErrorPayload(error, `${this.name}.summarizeStream failed`),
source: `${this.name}.summarizeStream`,
})
throw error
}
}
/**
* Build the TextOptions passed to the underlying chatStream. Provider
* `modelOptions` from the summarize call are forwarded as-is so knobs like
* Anthropic cache headers, Gemini safety settings, or Ollama tuning params
* still reach the wire layer.
*/
protected buildTextOptions(
options: SummarizationOptions<TProviderOptions>,
systemPrompt: string,
): TextOptions<TProviderOptions> {
// Sampling knobs now live in provider-native `modelOptions`. Apply the
// low-temperature default where the wrapped provider actually reads it
// (nested under `options` for Ollama, flat otherwise) so callers can still
// override it. Resolving the placement from this summarize adapter's OWN
// `name` keeps the default off the wire correctly per provider — a flat
// `temperature` would be silently dropped by Ollama while still showing up
// in OTel.
let working: Record<string, unknown> = {
...(options.modelOptions as Record<string, unknown> | undefined),
}
working = applyDefaultTemperature(this.name, 0.3, working)
// `maxLength` must reach the wire under the provider-native token key (it
// differs per provider, and no adapter reads a generic `maxTokens`).
// Resolve it from this summarize adapter's `name` (the constructor arg,
// not the wrapped text adapter's name), never overriding a caller-supplied
// token limit.
if (options.maxLength !== undefined) {
if (!isKnownMaxTokensAdapter(this.name)) {
options.logger.warn(
`summarize: maxLength=${options.maxLength} could not be mapped to a provider token key for adapter name "${this.name}" — it was dropped from modelOptions (the prompt still asks the model to stay under it). Construct ChatStreamSummarizeAdapter with a recognised provider name to forward the cap.`,
{ provider: this.name },
)
}
working = applyMaxLength(this.name, options.maxLength, working)
}
const modelOptions = working as TProviderOptions
return {
model: options.model,
messages: [{ role: 'user', content: options.text }],
systemPrompts: [systemPrompt],
modelOptions,
logger: options.logger,
}
}
protected buildSummarizationPrompt(
options: SummarizationOptions<TProviderOptions>,
): string {
let prompt = 'You are a professional summarizer. '
switch (options.style) {
case 'bullet-points':
prompt += 'Provide a summary in bullet point format. '
break
case 'paragraph':
prompt += 'Provide a summary in paragraph format. '
break
case 'concise':
prompt += 'Provide a very concise summary in 1-2 sentences. '
break
case undefined:
prompt += 'Provide a clear and concise summary. '
break
default:
prompt += 'Provide a clear and concise summary. '
}
if (options.focus && options.focus.length > 0) {
prompt += `Focus on the following aspects: ${options.focus.join(', ')}. `
}
if (options.maxLength) {
prompt += `Keep the summary under ${options.maxLength} tokens. `
}
return prompt
}
}