UNPKG

graphlit-client

Version:
1,014 lines • 156 kB
import * as Types from "../generated/graphql-types.js"; import { getModelName } from "../model-mapping.js"; import { ProviderError, isRetryableServerError, isRateLimitError, isNetworkError, extractRequestId, } from "../types/internal.js"; import { createHash } from "node:crypto"; /** * Helper to check if a string is valid JSON */ function isValidJSON(str) { try { JSON.parse(str); return true; } catch { return false; } } /** * Simplify schema for Groq by removing complex features that may cause issues */ function simplifySchemaForGroq(schema) { if (typeof schema !== "object" || schema === null) { return JSON.stringify(schema); } // Remove complex JSON Schema features that Groq might not support const simplified = { type: schema.type || "object", properties: {}, required: schema.required || [], }; // Only keep basic properties and types if (schema.properties) { for (const [key, value] of Object.entries(schema.properties)) { const prop = value; simplified.properties[key] = { type: prop.type || "string", description: prop.description || "", // Remove complex features like patterns, formats, etc. }; // Keep enum if present (but simplified) if (prop.enum && Array.isArray(prop.enum)) { simplified.properties[key].enum = prop.enum; } } } return JSON.stringify(simplified); } /** * Clean schema for Google Gemini by removing unsupported fields */ function cleanSchemaForGoogle(schema) { if (typeof schema !== "object" || schema === null) { return schema; } if (Array.isArray(schema)) { return schema.map((item) => cleanSchemaForGoogle(item)); } const cleaned = {}; for (const [key, value] of Object.entries(schema)) { // Skip fields that Google doesn't support if (key === "$schema" || key === "additionalProperties") { continue; } // Handle format field for string types - Google only supports 'enum' and 'date-time' if (key === "format" && typeof value === "string") { // Only keep supported formats if (value === "enum" || value === "date-time") { cleaned[key] = value; } // Skip unsupported formats like "date", "time", "email", etc. continue; } // Recursively clean nested objects cleaned[key] = cleanSchemaForGoogle(value); } return cleaned; } function shortHash(value) { return createHash("sha256").update(value).digest("hex").slice(0, 16); } function stablePromptCacheKey(specification, stablePrefix) { if (specification.serviceType !== Types.ModelServiceTypes.OpenAi) { return undefined; } const specId = specification.id; if (!specId) { return undefined; } return `spec:${specId}:${shortHash(JSON.stringify(stablePrefix ?? ""))}`; } function trimGooglePromptCache(cache) { const maxEntries = cache.maxEntries ?? 100; while (cache.entries.size > maxEntries) { const oldestKey = cache.entries.keys().next().value; if (!oldestKey) { break; } cache.entries.delete(oldestKey); } } function isGoogleCachedContentNotFound(error) { const candidate = error; const status = candidate?.status ?? candidate?.statusCode ?? candidate?.code; return (status === 404 || status === "404" || /cached content.*not found|not found.*cached content/i.test(candidate?.message || "")); } function getGoogleSystemInstructionParts(systemPrompt) { const prompts = Array.isArray(systemPrompt) ? systemPrompt : [systemPrompt]; return prompts .map((prompt) => prompt?.trim() || "") .filter((prompt) => prompt.length > 0); } /** * Stream with OpenAI SDK */ export async function streamWithOpenAI(specification, messages, tools, openaiClient, // OpenAI client instance onEvent, onComplete, abortSignal, reasoningEffort) { let fullMessage = ""; let toolCalls = []; let usageData = null; // Performance metrics const startTime = Date.now(); let firstTokenTime = 0; let firstMeaningfulContentTime = 0; let tokenCount = 0; let toolArgumentTokens = 0; let lastEventTime = 0; const interTokenDelays = []; // Tool calling metrics const toolMetrics = { totalTools: 0, successfulTools: 0, failedTools: 0, toolTimes: [], currentToolStart: 0, roundStartTime: startTime, rounds: [], currentRound: 1, }; try { const modelName = getModelName(specification); if (!modelName) { throw new Error(`No model name found for specification: ${specification.name} (service: ${specification.serviceType})`); } if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`🤖 [OpenAI] Model Config: Service=OpenAI | Model=${modelName} | Temperature=${specification.openAI?.temperature} | MaxTokens=${specification.openAI?.completionTokenLimit || "null"} | Tools=${tools?.length || 0} | ReasoningEffort=${reasoningEffort || "none"} | Spec="${specification.name}"`); } const streamConfig = { model: modelName, messages, stream: true, stream_options: { include_usage: true }, temperature: specification.openAI?.temperature, //top_p: specification.openAI?.probability, }; const promptCacheKey = stablePromptCacheKey(specification, { system: messages .filter((message) => message.role === "system") .map((message) => message.content), tools, model: modelName, }); if (promptCacheKey) { streamConfig.prompt_cache_key = promptCacheKey; } // Only add max_completion_tokens if it's defined if (specification.openAI?.completionTokenLimit) { streamConfig.max_completion_tokens = specification.openAI.completionTokenLimit; } // Add tools if provided if (tools && tools.length > 0) { streamConfig.tools = tools.map((tool) => ({ type: "function", function: { name: tool.name, description: tool.description, parameters: tool.schema ? JSON.parse(tool.schema) : {}, }, })); } // Add reasoning effort for o1 models if (reasoningEffort) { // OpenAI o1 models support reasoning_effort parameter streamConfig.reasoning_effort = reasoningEffort.toLowerCase(); if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`🧠 [OpenAI] Reasoning effort set to: ${reasoningEffort}`); } } if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`⏱️ [OpenAI] Starting LLM call at: ${new Date().toISOString()}`); } const stream = await openaiClient.chat.completions.create(streamConfig, { signal: abortSignal, }); for await (const chunk of stream) { const delta = chunk.choices[0]?.delta; // Capture usage data from final chunk if (chunk.usage || chunk.x_groq?.usage) { usageData = chunk.usage || chunk.x_groq?.usage; if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`[OpenAI] Usage data captured:`, usageData); } } // Debug log chunk details if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`[OpenAI] Chunk:`, JSON.stringify(chunk, null, 2)); if (delta?.content) { console.log(`[OpenAI] Content delta: "${delta.content}" (${delta.content.length} chars)`); } if (delta?.tool_calls) { console.log(`[OpenAI] Tool calls:`, delta.tool_calls); } if (chunk.choices[0]?.finish_reason) { console.log(`[OpenAI] Finish reason: ${chunk.choices[0].finish_reason}`); } } if (delta?.content) { fullMessage += delta.content; tokenCount++; const currentTime = Date.now(); // Track TTFT (first token regardless of type) if (firstTokenTime === 0) { firstTokenTime = currentTime - startTime; if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`\n⚡ [OpenAI] Time to First Token (TTFT): ${firstTokenTime}ms`); } } // Track first meaningful content (excludes tool calls) if (firstMeaningfulContentTime === 0 && delta.content.trim()) { firstMeaningfulContentTime = currentTime - startTime; if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`\n🎯 [OpenAI] Time to First Meaningful Content: ${firstMeaningfulContentTime}ms`); } } // Track inter-token delays if (lastEventTime > 0) { const delay = currentTime - lastEventTime; interTokenDelays.push(delay); } lastEventTime = currentTime; if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`[OpenAI] Token #${tokenCount}: "${delta.content}" | Accumulated: ${fullMessage.length} chars`); } onEvent({ type: "token", token: delta.content, }); } // Handle tool calls if (delta?.tool_calls) { for (const toolCallDelta of delta.tool_calls) { const index = toolCallDelta.index; if (!toolCalls[index]) { toolCalls[index] = { id: toolCallDelta.id || `tool_${Date.now()}_${index}`, name: "", arguments: "", }; // Track tool metrics toolMetrics.totalTools++; toolMetrics.currentToolStart = Date.now(); toolMetrics.toolTimes.push({ name: toolCallDelta.function?.name || "unknown", startTime: toolMetrics.currentToolStart, argumentBuildTime: 0, totalTime: 0, }); // Track TTFT for first tool if no content yet if (firstTokenTime === 0) { firstTokenTime = Date.now() - startTime; if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`\n⚡ [OpenAI] Time to First Token (Tool Call): ${firstTokenTime}ms`); } } if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`[OpenAI] Starting new tool call: ${toolCalls[index].id}`); } onEvent({ type: "tool_call_start", toolCall: { id: toolCalls[index].id, name: toolCallDelta.function?.name || "", }, }); } if (toolCallDelta.function?.name) { toolCalls[index].name = toolCallDelta.function.name; if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`[OpenAI] Tool name: ${toolCallDelta.function.name}`); } } if (toolCallDelta.function?.arguments) { toolCalls[index].arguments += toolCallDelta.function.arguments; // Count tool argument tokens (rough estimate: ~4 chars per token) toolArgumentTokens += Math.ceil(toolCallDelta.function.arguments.length / 4); // Debug logging for partial JSON accumulation if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`[OpenAI] Tool ${toolCalls[index].name} - Partial JSON chunk: "${toolCallDelta.function.arguments}"`); console.log(`[OpenAI] Tool ${toolCalls[index].name} - Total accumulated: ${toolCalls[index].arguments.length} chars`); } onEvent({ type: "tool_call_delta", toolCallId: toolCalls[index].id, argumentDelta: toolCallDelta.function.arguments, }); } } } } // Emit complete events for tool calls and finalize metrics for (let i = 0; i < toolCalls.length; i++) { const toolCall = toolCalls[i]; const currentTime = Date.now(); // Update tool metrics if (i < toolMetrics.toolTimes.length) { const toolTime = toolMetrics.toolTimes[i]; toolTime.argumentBuildTime = currentTime - toolTime.startTime; toolTime.totalTime = toolTime.argumentBuildTime; // For streaming, this is the same toolTime.name = toolCall.name; // Update with final name } // Track tool success/failure try { JSON.parse(toolCall.arguments); toolMetrics.successfulTools++; if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`[OpenAI] ✅ Valid JSON for ${toolCall.name}`); } } catch (e) { toolMetrics.failedTools++; console.error(`[OpenAI] ❌ Invalid JSON for ${toolCall.name}: ${e}`); } // Log the final JSON for debugging if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`[OpenAI] Tool ${toolCall.name} complete with arguments (${toolCall.arguments.length} chars):`); console.log(toolCall.arguments); } onEvent({ type: "tool_call_parsed", toolCall: { id: toolCall.id, name: toolCall.name, arguments: toolCall.arguments, }, }); } // Final summary logging if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING && toolCalls.length > 0) { console.log(`[OpenAI] Successfully processed ${toolCalls.length} tool calls`); } // Calculate final metrics including tool calling insights const totalTime = Date.now() - startTime; const totalTokens = tokenCount + toolArgumentTokens; const tokensPerSecond = totalTokens > 0 ? totalTokens / (totalTime / 1000) : 0; // Finalize round metrics if (toolCalls.length > 0) { const roundEndTime = Date.now(); const totalToolTime = toolMetrics.toolTimes.reduce((sum, tool) => sum + tool.totalTime, 0); const llmTime = totalTime - totalToolTime; toolMetrics.rounds.push({ roundNumber: toolMetrics.currentRound, llmTime: llmTime, toolTime: totalToolTime, toolCount: toolCalls.length, }); } if (process.env.DEBUG_GRAPHLIT_SDK_METRICS) { const metricsData = { totalTime: `${totalTime}ms`, ttft: `${firstTokenTime}ms`, ttfmc: firstMeaningfulContentTime > 0 ? `${firstMeaningfulContentTime}ms` : null, contentTokens: tokenCount, toolTokens: toolArgumentTokens, totalTokens: totalTokens, tps: tokensPerSecond.toFixed(2), }; console.log(`📊 [OpenAI] Performance: Total=${metricsData.totalTime} | TTFT=${metricsData.ttft}${metricsData.ttfmc ? ` | TTFMC=${metricsData.ttfmc}` : ""} | Tokens(content/tool/total)=${metricsData.contentTokens}/${metricsData.toolTokens}/${metricsData.totalTokens} | TPS=${metricsData.tps}`); // Tool calling metrics if (toolCalls.length > 0) { const successRate = ((toolMetrics.successfulTools / toolMetrics.totalTools) * 100).toFixed(1); const avgToolTime = toolMetrics.toolTimes.reduce((sum, tool) => sum + tool.totalTime, 0) / toolMetrics.toolTimes.length; console.log(`🔧 [OpenAI] Tools: Total=${toolMetrics.totalTools} | Success=${toolMetrics.successfulTools} | Failed=${toolMetrics.failedTools} | SuccessRate=${successRate}% | AvgTime=${avgToolTime.toFixed(2)}ms`); // Tool timing details (consolidated) const toolTimings = toolMetrics.toolTimes .map((tool, idx) => `${tool.name}:${tool.argumentBuildTime}ms`) .join(" | "); if (toolTimings) { console.log(`🔨 [OpenAI] Tool Timings: ${toolTimings}`); } // Round metrics (consolidated) const roundMetrics = toolMetrics.rounds .map((round) => { const efficiency = round.toolCount > 0 ? ((round.llmTime / (round.llmTime + round.toolTime)) * 100).toFixed(1) : 100; return `R${round.roundNumber}(LLM:${round.llmTime}ms,Tools:${round.toolTime}ms,Eff:${efficiency}%)`; }) .join(" | "); if (roundMetrics) { console.log(`🔄 [OpenAI] Rounds: ${roundMetrics}`); } } if (interTokenDelays.length > 0) { const avgDelay = interTokenDelays.reduce((a, b) => a + b, 0) / interTokenDelays.length; const sortedDelays = [...interTokenDelays].sort((a, b) => a - b); const p50Delay = sortedDelays[Math.floor(sortedDelays.length * 0.5)]; const p95Delay = sortedDelays[Math.floor(sortedDelays.length * 0.95)]; const p99Delay = sortedDelays[Math.floor(sortedDelays.length * 0.99)]; console.log(`⏳ [OpenAI] Inter-Token: Avg=${avgDelay.toFixed(2)}ms | P50=${p50Delay}ms | P95=${p95Delay}ms | P99=${p99Delay}ms`); } console.log(`✅ [OpenAI] Final message (${fullMessage.length} chars): "${fullMessage}"`); } // Emit completion event so UIEventAdapter flushes the chunk buffer // before the SSE stream closes onEvent({ type: "complete", tokens: tokenCount, }); // Pass usage data if available onComplete(fullMessage, toolCalls, usageData); } catch (error) { // Normalize OpenAI errors into ProviderError const errorMessage = error.message || error.toString(); if (isRateLimitError(error)) { if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`⚠️ [OpenAI] Rate limit hit`); } throw new ProviderError(`OpenAI rate limit exceeded: ${errorMessage}`, { provider: "openai", statusCode: 429, retryable: true, requestId: extractRequestId(error), cause: error, }); } if (isNetworkError(error)) { if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`⚠️ [OpenAI] Network error: ${errorMessage}`); } throw new ProviderError(`OpenAI network error: ${errorMessage}`, { provider: "openai", statusCode: 503, retryable: true, requestId: extractRequestId(error), cause: error, }); } if (isRetryableServerError(error)) { if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`⚠️ [OpenAI] Server error: ${errorMessage}`); } throw new ProviderError(`OpenAI server error: ${errorMessage}`, { provider: "openai", statusCode: error.status || error.statusCode || 500, retryable: true, requestId: extractRequestId(error), cause: error, }); } // Don't emit error event here - let the client handle it to avoid duplicates throw error; } } export async function streamWithAnthropic(specification, messages, systemPrompt, tools, anthropicClient, // Properly typed Anthropic client onEvent, onComplete, abortSignal, thinkingConfig) { let fullMessage = ""; let toolCalls = []; let usageData = null; // Performance metrics const startTime = Date.now(); let firstTokenTime = 0; let firstMeaningfulContentTime = 0; let tokenCount = 0; let toolArgumentTokens = 0; let lastEventTime = 0; const interTokenDelays = []; // Tool calling metrics const toolMetrics = { totalTools: 0, successfulTools: 0, failedTools: 0, toolTimes: [], currentToolStart: 0, roundStartTime: startTime, rounds: [], currentRound: 1, }; try { const modelName = getModelName(specification); if (!modelName) { throw new Error(`No model name found for Anthropic specification: ${specification.name}`); } // Calculate smart default for max_tokens based on thinking mode const defaultMaxTokens = thinkingConfig ? 32768 : 8192; if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`🤖 [Anthropic] Model Config: Service=Anthropic | Model=${modelName} | Temperature=${specification.anthropic?.temperature} | MaxTokens=${specification.anthropic?.completionTokenLimit || defaultMaxTokens} | SystemPrompt=${systemPrompt ? "Yes" : "No"} | Tools=${tools?.length || 0} | Thinking=${!!thinkingConfig} | Spec="${specification.name}"`); } // Use proper Anthropic SDK types for the config const streamConfig = { model: modelName, messages, stream: true, max_tokens: specification.anthropic?.completionTokenLimit || defaultMaxTokens, }; // Handle temperature based on thinking configuration and model // Claude 4.7 Opus (adaptive thinking) does not accept sampling parameters at all. const isAdaptiveThinking = thinkingConfig?.type === "adaptive"; if (thinkingConfig && !isAdaptiveThinking) { // When legacy thinking budget is enabled, temperature must be 1 streamConfig.temperature = 1; if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`🧠 [Anthropic] Setting temperature to 1 (required for extended thinking)`); } } else if (!isAdaptiveThinking) { // Only add temperature if it's defined and valid for non-thinking requests if (specification.anthropic?.temperature !== undefined && specification.anthropic?.temperature !== null && typeof specification.anthropic?.temperature === "number") { streamConfig.temperature = specification.anthropic.temperature; } } if (systemPrompt?.length) { streamConfig.system = systemPrompt; } // Add tools if provided if (tools && tools.length > 0) { streamConfig.tools = tools.map((tool) => ({ name: tool.name, description: tool.description, input_schema: tool.schema ? JSON.parse(tool.schema) : {}, })); streamConfig.tools[streamConfig.tools.length - 1] = { ...streamConfig.tools[streamConfig.tools.length - 1], cache_control: { type: "ephemeral" }, }; } // Check if this is a 1M context model (beta flag, same underlying model ID) const is1MContext = specification.anthropic?.model === Types.AnthropicModels.Claude_4_6Opus_1M || specification.anthropic?.model === Types.AnthropicModels.Claude_4_6Opus_1M_20260205; // Add thinking config if provided if (thinkingConfig) { if (thinkingConfig.type === "adaptive") { // Claude 4.7 Opus: adaptive thinking, effort controls depth via output_config streamConfig.thinking = { type: "adaptive" }; if (thinkingConfig.effort) { streamConfig.output_config = { effort: thinkingConfig.effort }; } if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`🧠 [Anthropic] Adaptive thinking enabled | Effort: ${thinkingConfig.effort ?? "default"}`); } } else { streamConfig.thinking = thinkingConfig; if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`🧠 [Anthropic] Extended thinking enabled | Budget: ${thinkingConfig.budget_tokens} tokens`); } // Adjust max_tokens to account for thinking budget // 1M context models have a 1,000,000 token window; standard models have 200,000 const contextWindowLimit = is1MContext ? 1000000 : 200000; const totalTokens = streamConfig.max_tokens + thinkingConfig.budget_tokens; if (totalTokens > contextWindowLimit) { console.warn(`⚠️ [Anthropic] Total tokens (${totalTokens}) exceeds ${is1MContext ? "1M" : "200K"} context window, adjusting completion tokens...`); streamConfig.max_tokens = Math.max(1000, contextWindowLimit - thinkingConfig.budget_tokens); } } } // Build request options with optional abort signal and 1M context beta header const requestOptions = {}; if (abortSignal) { requestOptions.signal = abortSignal; } if (is1MContext) { requestOptions.headers = { "anthropic-beta": "context-1m-2025-08-07", }; } if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`⏱️ [Anthropic] Starting LLM call at: ${new Date().toISOString()}${is1MContext ? " | 1M context beta enabled" : ""}`); } const stream = await anthropicClient.messages.create(streamConfig, Object.keys(requestOptions).length > 0 ? requestOptions : undefined); let activeContentBlock = false; let currentContentBlockIndex; let currentContentBlockType; let thinkingContent = ""; let thinkingSignature = ""; let completeThinkingContent = ""; // Accumulate all thinking content for conversation history let completeThinkingSignature = ""; // Accumulate signature for conversation history for await (const chunk of stream) { // Debug log all chunk types if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`[Anthropic] Received chunk type: ${chunk.type}`); } // Capture usage data from various message events // Prioritize message_start.message usage data as it's more complete if (chunk.type === "message_start" && chunk.message?.usage) { usageData = chunk.message.usage; if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`[Anthropic] Usage data captured from message_start.message:`, usageData); } } else if (chunk.type === "message_delta" && chunk.usage && !usageData?.input_tokens) { // Only use message_delta if we don't have input_tokens yet usageData = chunk.usage; if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`[Anthropic] Usage data captured from ${chunk.type}:`, usageData); } } else if ((chunk.type === "message_delta" || chunk.type === "message_start") && chunk.usage) { // Merge usage data if we have partial data if (usageData) { usageData = { ...usageData, ...chunk.usage }; } else { usageData = chunk.usage; } if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`[Anthropic] Usage data merged from ${chunk.type}:`, usageData); } } if (chunk.type === "content_block_start") { activeContentBlock = true; currentContentBlockIndex = chunk.index; currentContentBlockType = chunk.content_block.type; if (chunk.content_block.type === "thinking") { // Start of thinking block (native extended thinking) thinkingContent = ""; thinkingSignature = ""; onEvent({ type: "reasoning_start", format: "thinking_tag", }); if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log("[Anthropic] Extended thinking block started"); } } else if (chunk.content_block.type === "tool_use") { const toolCall = { id: chunk.content_block.id, name: chunk.content_block.name, arguments: "", }; toolCalls.push(toolCall); // Track tool metrics toolMetrics.totalTools++; toolMetrics.currentToolStart = Date.now(); toolMetrics.toolTimes.push({ name: toolCall.name, startTime: toolMetrics.currentToolStart, argumentBuildTime: 0, totalTime: 0, }); // Track TTFT for first tool if no content yet if (firstTokenTime === 0) { firstTokenTime = Date.now() - startTime; if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`\n⚡ [Anthropic] Time to First Token (Tool Call): ${firstTokenTime}ms`); } } onEvent({ type: "tool_call_start", toolCall: { id: toolCall.id, name: toolCall.name, }, }); } } else if (chunk.type === "content_block_delta") { // Handle thinking blocks with native extended thinking if (chunk.delta.type === "thinking_delta" && "thinking" in chunk.delta) { // Accumulate thinking content thinkingContent += chunk.delta.thinking; // Track first token time if (firstTokenTime === 0) { firstTokenTime = Date.now() - startTime; } onEvent({ type: "reasoning_delta", content: chunk.delta.thinking, format: "thinking_tag", }); if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`[Anthropic] Thinking delta: "${chunk.delta.thinking}"`); } } else if (chunk.delta.type === "signature_delta" && "signature" in chunk.delta) { // Handle signature for thinking blocks thinkingSignature += chunk.delta.signature; if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`[Anthropic] Signature delta: "${chunk.delta.signature}"`); } } else if (chunk.delta.type === "text_delta" && "text" in chunk.delta) { fullMessage += chunk.delta.text; tokenCount++; const currentTime = Date.now(); // Track TTFT (first token regardless of type) if (firstTokenTime === 0) { firstTokenTime = currentTime - startTime; if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`\n⚡ [Anthropic] Time to First Token (TTFT): ${firstTokenTime}ms`); } } // Track first meaningful content (excludes tool calls) if (firstMeaningfulContentTime === 0 && chunk.delta.text.trim()) { firstMeaningfulContentTime = currentTime - startTime; if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`\n🎯 [Anthropic] Time to First Meaningful Content: ${firstMeaningfulContentTime}ms`); } } // Track inter-token delays if (lastEventTime > 0) { const delay = currentTime - lastEventTime; interTokenDelays.push(delay); } lastEventTime = currentTime; if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`[Anthropic] Token #${tokenCount}: "${chunk.delta.text}" | Accumulated: ${fullMessage.length} chars`); } onEvent({ type: "token", token: chunk.delta.text, }); } else if (chunk.delta.type === "input_json_delta") { // Find the current tool call and append arguments const currentTool = toolCalls[toolCalls.length - 1]; if (currentTool) { currentTool.arguments += chunk.delta.partial_json; // Count tool argument tokens (rough estimate: ~4 chars per token) toolArgumentTokens += Math.ceil(chunk.delta.partial_json.length / 4); // Debug logging for partial JSON accumulation if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`[Anthropic] Tool ${currentTool.name} - Partial JSON chunk: "${chunk.delta.partial_json}"`); console.log(`[Anthropic] Tool ${currentTool.name} - Total accumulated: ${currentTool.arguments.length} chars`); } onEvent({ type: "tool_call_delta", toolCallId: currentTool.id, argumentDelta: chunk.delta.partial_json, }); } } } else if (chunk.type === "content_block_stop") { activeContentBlock = false; // Check if we're stopping a thinking block if (currentContentBlockType === "thinking" && chunk.index === currentContentBlockIndex) { // Emit the complete thinking block with signature onEvent({ type: "reasoning_end", fullContent: thinkingContent, signature: thinkingSignature || undefined, }); // Accumulate thinking content and signature for conversation history preservation if (thinkingContent.trim()) { completeThinkingContent += thinkingContent; } if (thinkingSignature.trim()) { completeThinkingSignature = thinkingSignature; // Use the last signature } if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`[Anthropic] Thinking block completed:`, { contentLength: thinkingContent.length, hasSignature: !!thinkingSignature, signature: thinkingSignature, totalThinkingLength: completeThinkingContent.length, }); } // Reset current thinking state (but keep completeThinkingContent) thinkingContent = ""; thinkingSignature = ""; } currentContentBlockType = undefined; currentContentBlockIndex = undefined; // Tool call complete const currentTool = toolCalls[toolCalls.length - 1]; if (currentTool && chunk.content_block?.type === "tool_use") { const currentTime = Date.now(); // Update tool metrics const toolIndex = toolCalls.length - 1; if (toolIndex < toolMetrics.toolTimes.length) { const toolTime = toolMetrics.toolTimes[toolIndex]; toolTime.argumentBuildTime = currentTime - toolTime.startTime; toolTime.totalTime = toolTime.argumentBuildTime; toolTime.name = currentTool.name; } // Track tool success/failure try { JSON.parse(currentTool.arguments); toolMetrics.successfulTools++; if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`[Anthropic] ✅ Valid JSON for ${currentTool.name}`); } } catch (e) { toolMetrics.failedTools++; console.error(`[Anthropic] ❌ Invalid JSON for ${currentTool.name}: ${e}`); } // Log the final JSON for debugging if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING || !isValidJSON(currentTool.arguments)) { console.log(`[Anthropic] Tool ${currentTool.name} complete with arguments (${currentTool.arguments.length} chars):`); console.log(currentTool.arguments); // Check if JSON appears truncated const lastChars = currentTool.arguments.slice(-10); if (!lastChars.includes("}") && currentTool.arguments.length > 100) { console.warn(`[Anthropic] WARNING: JSON may be truncated - doesn't end with '}': ...${lastChars}`); } } onEvent({ type: "tool_call_parsed", toolCall: { id: currentTool.id, name: currentTool.name, arguments: currentTool.arguments, }, }); } } else if (chunk.type === "message_stop" && activeContentBlock) { // Handle Anthropic bug: message_stop without content_block_stop console.warn(`[Anthropic] Received message_stop without content_block_stop - handling as implicit block stop`); activeContentBlock = false; // Emit synthetic content_block_stop for the current tool const currentTool = toolCalls[toolCalls.length - 1]; if (currentTool) { // Log the incomplete tool console.warn(`[Anthropic] Synthetic content_block_stop for incomplete tool ${currentTool.name} (${currentTool.arguments.length} chars)`); // Only emit tool_call_complete if we have valid JSON if (isValidJSON(currentTool.arguments)) { onEvent({ type: "tool_call_parsed", toolCall: { id: currentTool.id, name: currentTool.name, arguments: currentTool.arguments, }, }); } else { console.error(`[Anthropic] Tool ${currentTool.name} has incomplete JSON, skipping tool_call_complete event`); } } } } // Final check: normalize and validate tool calls const validToolCalls = toolCalls .map((tc, idx) => { // For tools with no parameters, Anthropic doesn't send input_json_delta // So we need to convert empty arguments to valid JSON if (tc.arguments === "") { tc.arguments = "{}"; if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`[Anthropic] Normalized empty arguments to "{}" for tool ${tc.name}`); } } if (!isValidJSON(tc.arguments)) { console.warn(`[Anthropic] Filtering out incomplete tool call ${idx} (${tc.name}) with INVALID JSON (${tc.arguments.length} chars)`); return null; } return tc; }) .filter((tc) => tc !== null); if (toolCalls.length !== validToolCalls.length) { console.log(`[Anthropic] Filtered out ${toolCalls.length - validToolCalls.length} incomplete tool calls`); console.log(`[Anthropic] Successfully processed ${validToolCalls.length} valid tool calls`); } // Calculate final metrics including tool calling insights const totalTime = Date.now() - startTime; const totalTokens = tokenCount + toolArgumentTokens; const tokensPerSecond = totalTokens > 0 ? totalTokens / (totalTime / 1000) : 0; // Finalize round metrics if (validToolCalls.length > 0) { const roundEndTime = Date.now(); const totalToolTime = toolMetrics.toolTimes.reduce((sum, tool) => sum + tool.totalTime, 0); const llmTime = totalTime - totalToolTime; toolMetrics.rounds.push({ roundNumber: toolMetrics.currentRound, llmTime: llmTime, toolTime: totalToolTime, toolCount: validToolCalls.length, }); } if (process.env.DEBUG_GRAPHLIT_SDK_METRICS) { const metricsData = { totalTime: `${totalTime}ms`, ttft: `${firstTokenTime}ms`, ttfmc: firstMeaningfulContentTime > 0 ? `${firstMeaningfulContentTime}ms` : null, contentTokens: tokenCount, toolTokens: toolArgumentTokens, totalTokens: totalTokens, tps: tokensPerSecond.toFixed(2), }; console.log(`📊 [Anthropic] Performance: Total=${metricsData.totalTime} | TTFT=${metricsData.ttft}${metricsData.ttfmc ? ` | TTFMC=${metricsData.ttfmc}` : ""} | Tokens(content/tool/total)=${metricsData.contentTokens}/${metricsData.toolTokens}/${metricsData.totalTokens} | TPS=${metricsData.tps}`); // Tool calling metrics if (validToolCalls.length > 0) { const successRate = ((toolMetrics.successfulTools / toolMetrics.totalTools) * 100).toFixed(1); const avgToolTime = toolMetrics.toolTimes.reduce((sum, tool) => sum + tool.totalTime, 0) / toolMetrics.toolTimes.length; console.log(`🔧 [Anthropic] Tools: Total=${toolMetrics.totalTools} | Success=${toolMetrics.successfulTools} | Failed=${toolMetrics.failedTools} | SuccessRate=${successRate}% | AvgTime=${avgToolTime.toFixed(2)}ms`); // Tool timing details (consolidated) const toolTimings = toolMetrics.toolTimes .map((tool, idx) => `${tool.name}:${tool.argumentBuildTime}ms`) .join(" | "); if (toolTimings) { console.log(`🔨 [Anthropic] Tool Timings: ${toolTimings}`); } // Round metrics (consolidated) const roundMetrics = toolMetrics.rounds .map((round) => { const efficiency = round.toolCount > 0 ? ((round.llmTime / (round.llmTime + round.toolTime)) * 100).toFixed(1) : 100; return `R${round.roundNumber}(LLM:${round.llmTime}ms,Tools:${round.toolTime}ms,Eff:${efficiency}%)`; }) .join(" | "); if (roundMetrics) { console.log(`🔄 [Anthropic] Rounds: ${roundMetrics}`); } } if (interTokenDelays.length > 0) { const avgDelay = interTokenDelays.reduce((a, b) => a + b, 0) / interTokenDelays.length; const sortedDelays = [...interTokenDelays].sort((a, b) => a - b); const p50Delay = sortedDelays[Math.floor(sortedDelays.length * 0.5)]; const p95Delay = sortedDelays[Math.floor(sortedDelays.length * 0.95)]; const p99Delay = sortedDelays[Math.floor(sortedDelays.length * 0.99)]; console.log(`⏳ [Anthropic] Inter-Token: Avg=${avgDelay.toFixed(2)}ms | P50=${p50Delay}ms | P95=${p95Delay}ms | P99=${p99Delay}ms`); } console.log(`✅ [Anthropic] Final message (${fullMessage.length} chars): "${fullMessage}"`); } // Build structured reasoning metadata (replaces XML-in-message approach) let reasoningMetadata; if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`🧠 [Anthropic] Debug - validToolCalls: ${validToolCalls.length}, thinking content: ${completeThinkingContent.length} chars, fullMessage: ${fullMessage.length} chars`); } if (completeThinkingContent.trim()) { reasoningMetadata = { content: completeThinkingContent, format: "thinking_tag", }; if (completeThinkingSignature) { reasoningMetadata.signature = completeThinkingSignature; } if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`🧠 [Anthropic] Structured reasoning metadata: ${completeThinkingContent.length} chars, signature: ${completeThinkingSignature?.length || 0}`); } } // Emit completion event so UIEventAdapter flushes the chunk buffer // before the SSE stream closes onEvent({ type: "complete", tokens: tokenCount, }); onComplete(fullMessage, validToolCalls, usageData, reasoningMetadata); } catch (error) { // Normalize Anthropic errors into ProviderError const errorMessage = error.message || error.toString(); if (isRateLimitError(error)) { if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`⚠️ [Anthropic] Rate limit / overloaded`); } throw new ProviderError(`Anthropic rate limit exceeded: ${errorMessage}`, { provider: "anthropic", statusCode: error.status || error.statusCode || 429, retryable: true, requestId: extractRequestId(error), cause: error, }); } if (isRetryableServerError(error)) { if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`⚠️ [Anthropic] Server error: ${errorMessage}`); } throw new ProviderError(`Anthropic server error: ${errorMessage}`, { provider: "anthropic", statusCode: error.status || error.statusCode || 500, retryable: true, requestId: extractRequestId(error), cause: error, }); } if (isNetworkError(error)) { if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) { console.log(`⚠️ [Anthropic] Network error: ${errorMessage}`); } throw new ProviderError(`Anthropic network error: ${errorMessage}`, { provider: "anthropic", statusCode: 503, retryable: true, requestId: extractRequestId(error), c