@llumiverse/drivers

Version:

LLM driver implementations. Currently supported are: openai, huggingface, bedrock, replicate.

89 lines (81 loc) • 3.77 kB

text/typescript

import type { OutputConfig, ThinkingConfigParam } from "@anthropic-ai/sdk/resources/messages.js"; import { hasSamplingParameterRestriction, isClaudeVersionGTE, supportsAdaptiveThinking, } from "@llumiverse/core"; /** * Common Claude model options relevant to thinking/effort configuration. * Works with both VertexAIClaudeOptions and BedrockClaudeOptions. */ export interface ClaudeThinkingInput { thinking_budget_tokens?: number; effort?: NonNullable<OutputConfig['effort']>; /** Controls whether thinking content is included in the response. Does not enable thinking. */ include_thoughts?: boolean; } /** * Result of resolving Claude thinking and effort configuration. */ export interface ClaudeThinkingResult { /** Thinking/reasoning config to include in the API payload. */ thinking: ThinkingConfigParam | undefined; /** Output config (effort) to include in the API payload, if applicable. */ outputConfig: OutputConfig | undefined; /** Whether sampling parameters (temperature, top_p, top_k) should be stripped. */ hasSamplingRestriction: boolean; /** Whether the model supports thinking at all (>= Claude 3.7). */ supportsThinking: boolean; } /** * Resolve thinking and effort configuration for a Claude model. * * - Extended thinking: enabled by setting `thinking_budget_tokens`. * - Adaptive thinking: enabled by setting `effort` on models that support it (Opus 4.6+, Sonnet 4.6+). * - `include_thoughts`: display-only; does not enable thinking. * * @param model - The model identifier string * @param options - User-provided Claude options (thinking_budget_tokens, effort, include_thoughts) */ export function resolveClaudeThinking(model: string, options?: ClaudeThinkingInput): ClaudeThinkingResult { const supportsAdaptive = supportsAdaptiveThinking(model); const samplingRestriction = hasSamplingParameterRestriction(model); const supportsThinking = isClaudeVersionGTE(model, 3, 7); const budgetTokens = options?.thinking_budget_tokens; // Adaptive thinking is active when the caller supplies an effort level on a // model that supports it. Extended thinking is active when a budget is set. const adaptiveEnabled = supportsAdaptive && options?.effort != null; const extendedEnabled = budgetTokens != null; let thinking: ThinkingConfigParam | undefined; if (!supportsThinking) { // Pre-3.7 models: no thinking support thinking = undefined; } else if (extendedEnabled) { // Explicit budget — use extended thinking regardless of adaptive support. // On adaptive models this uses the deprecated path, but user input takes priority. thinking = { type: "enabled" as const, budget_tokens: budgetTokens, }; } else if (supportsAdaptive) { // Adaptive models: enable when effort is set, omit otherwise (thinking is OFF by default). // display controls whether thinking blocks are returned; defaults to omitted. thinking = adaptiveEnabled ? { type: "adaptive" as const, display: options?.include_thoughts ? "summarized" : "omitted" } : undefined; } else { // Older thinking models (3.7, 4.5): no adaptive support, thinking is always disabled // unless an explicit budget is provided (handled above). thinking = { type: "disabled" as const }; } // Output config for effort parameter (Opus 4.5+, Sonnet 4.6+, all 4.7+) const outputConfig: OutputConfig | undefined = options?.effort ? { effort: options.effort } : undefined; return { thinking, outputConfig, hasSamplingRestriction: samplingRestriction, supportsThinking, }; }