graphlit-client
Version:
Graphlit API Client for TypeScript
1,014 lines ⢠156 kB
JavaScript
import * as Types from "../generated/graphql-types.js";
import { getModelName } from "../model-mapping.js";
import { ProviderError, isRetryableServerError, isRateLimitError, isNetworkError, extractRequestId, } from "../types/internal.js";
import { createHash } from "node:crypto";
/**
* Helper to check if a string is valid JSON
*/
function isValidJSON(str) {
try {
JSON.parse(str);
return true;
}
catch {
return false;
}
}
/**
* Simplify schema for Groq by removing complex features that may cause issues
*/
function simplifySchemaForGroq(schema) {
if (typeof schema !== "object" || schema === null) {
return JSON.stringify(schema);
}
// Remove complex JSON Schema features that Groq might not support
const simplified = {
type: schema.type || "object",
properties: {},
required: schema.required || [],
};
// Only keep basic properties and types
if (schema.properties) {
for (const [key, value] of Object.entries(schema.properties)) {
const prop = value;
simplified.properties[key] = {
type: prop.type || "string",
description: prop.description || "",
// Remove complex features like patterns, formats, etc.
};
// Keep enum if present (but simplified)
if (prop.enum && Array.isArray(prop.enum)) {
simplified.properties[key].enum = prop.enum;
}
}
}
return JSON.stringify(simplified);
}
/**
* Clean schema for Google Gemini by removing unsupported fields
*/
function cleanSchemaForGoogle(schema) {
if (typeof schema !== "object" || schema === null) {
return schema;
}
if (Array.isArray(schema)) {
return schema.map((item) => cleanSchemaForGoogle(item));
}
const cleaned = {};
for (const [key, value] of Object.entries(schema)) {
// Skip fields that Google doesn't support
if (key === "$schema" || key === "additionalProperties") {
continue;
}
// Handle format field for string types - Google only supports 'enum' and 'date-time'
if (key === "format" && typeof value === "string") {
// Only keep supported formats
if (value === "enum" || value === "date-time") {
cleaned[key] = value;
}
// Skip unsupported formats like "date", "time", "email", etc.
continue;
}
// Recursively clean nested objects
cleaned[key] = cleanSchemaForGoogle(value);
}
return cleaned;
}
function shortHash(value) {
return createHash("sha256").update(value).digest("hex").slice(0, 16);
}
function stablePromptCacheKey(specification, stablePrefix) {
if (specification.serviceType !== Types.ModelServiceTypes.OpenAi) {
return undefined;
}
const specId = specification.id;
if (!specId) {
return undefined;
}
return `spec:${specId}:${shortHash(JSON.stringify(stablePrefix ?? ""))}`;
}
function trimGooglePromptCache(cache) {
const maxEntries = cache.maxEntries ?? 100;
while (cache.entries.size > maxEntries) {
const oldestKey = cache.entries.keys().next().value;
if (!oldestKey) {
break;
}
cache.entries.delete(oldestKey);
}
}
function isGoogleCachedContentNotFound(error) {
const candidate = error;
const status = candidate?.status ?? candidate?.statusCode ?? candidate?.code;
return (status === 404 ||
status === "404" ||
/cached content.*not found|not found.*cached content/i.test(candidate?.message || ""));
}
function getGoogleSystemInstructionParts(systemPrompt) {
const prompts = Array.isArray(systemPrompt) ? systemPrompt : [systemPrompt];
return prompts
.map((prompt) => prompt?.trim() || "")
.filter((prompt) => prompt.length > 0);
}
/**
* Stream with OpenAI SDK
*/
export async function streamWithOpenAI(specification, messages, tools, openaiClient, // OpenAI client instance
onEvent, onComplete, abortSignal, reasoningEffort) {
let fullMessage = "";
let toolCalls = [];
let usageData = null;
// Performance metrics
const startTime = Date.now();
let firstTokenTime = 0;
let firstMeaningfulContentTime = 0;
let tokenCount = 0;
let toolArgumentTokens = 0;
let lastEventTime = 0;
const interTokenDelays = [];
// Tool calling metrics
const toolMetrics = {
totalTools: 0,
successfulTools: 0,
failedTools: 0,
toolTimes: [],
currentToolStart: 0,
roundStartTime: startTime,
rounds: [],
currentRound: 1,
};
try {
const modelName = getModelName(specification);
if (!modelName) {
throw new Error(`No model name found for specification: ${specification.name} (service: ${specification.serviceType})`);
}
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`đ¤ [OpenAI] Model Config: Service=OpenAI | Model=${modelName} | Temperature=${specification.openAI?.temperature} | MaxTokens=${specification.openAI?.completionTokenLimit || "null"} | Tools=${tools?.length || 0} | ReasoningEffort=${reasoningEffort || "none"} | Spec="${specification.name}"`);
}
const streamConfig = {
model: modelName,
messages,
stream: true,
stream_options: { include_usage: true },
temperature: specification.openAI?.temperature,
//top_p: specification.openAI?.probability,
};
const promptCacheKey = stablePromptCacheKey(specification, {
system: messages
.filter((message) => message.role === "system")
.map((message) => message.content),
tools,
model: modelName,
});
if (promptCacheKey) {
streamConfig.prompt_cache_key = promptCacheKey;
}
// Only add max_completion_tokens if it's defined
if (specification.openAI?.completionTokenLimit) {
streamConfig.max_completion_tokens =
specification.openAI.completionTokenLimit;
}
// Add tools if provided
if (tools && tools.length > 0) {
streamConfig.tools = tools.map((tool) => ({
type: "function",
function: {
name: tool.name,
description: tool.description,
parameters: tool.schema ? JSON.parse(tool.schema) : {},
},
}));
}
// Add reasoning effort for o1 models
if (reasoningEffort) {
// OpenAI o1 models support reasoning_effort parameter
streamConfig.reasoning_effort = reasoningEffort.toLowerCase();
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`đ§ [OpenAI] Reasoning effort set to: ${reasoningEffort}`);
}
}
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`âąď¸ [OpenAI] Starting LLM call at: ${new Date().toISOString()}`);
}
const stream = await openaiClient.chat.completions.create(streamConfig, {
signal: abortSignal,
});
for await (const chunk of stream) {
const delta = chunk.choices[0]?.delta;
// Capture usage data from final chunk
if (chunk.usage || chunk.x_groq?.usage) {
usageData = chunk.usage || chunk.x_groq?.usage;
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`[OpenAI] Usage data captured:`, usageData);
}
}
// Debug log chunk details
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`[OpenAI] Chunk:`, JSON.stringify(chunk, null, 2));
if (delta?.content) {
console.log(`[OpenAI] Content delta: "${delta.content}" (${delta.content.length} chars)`);
}
if (delta?.tool_calls) {
console.log(`[OpenAI] Tool calls:`, delta.tool_calls);
}
if (chunk.choices[0]?.finish_reason) {
console.log(`[OpenAI] Finish reason: ${chunk.choices[0].finish_reason}`);
}
}
if (delta?.content) {
fullMessage += delta.content;
tokenCount++;
const currentTime = Date.now();
// Track TTFT (first token regardless of type)
if (firstTokenTime === 0) {
firstTokenTime = currentTime - startTime;
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`\n⥠[OpenAI] Time to First Token (TTFT): ${firstTokenTime}ms`);
}
}
// Track first meaningful content (excludes tool calls)
if (firstMeaningfulContentTime === 0 && delta.content.trim()) {
firstMeaningfulContentTime = currentTime - startTime;
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`\nđŻ [OpenAI] Time to First Meaningful Content: ${firstMeaningfulContentTime}ms`);
}
}
// Track inter-token delays
if (lastEventTime > 0) {
const delay = currentTime - lastEventTime;
interTokenDelays.push(delay);
}
lastEventTime = currentTime;
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`[OpenAI] Token #${tokenCount}: "${delta.content}" | Accumulated: ${fullMessage.length} chars`);
}
onEvent({
type: "token",
token: delta.content,
});
}
// Handle tool calls
if (delta?.tool_calls) {
for (const toolCallDelta of delta.tool_calls) {
const index = toolCallDelta.index;
if (!toolCalls[index]) {
toolCalls[index] = {
id: toolCallDelta.id || `tool_${Date.now()}_${index}`,
name: "",
arguments: "",
};
// Track tool metrics
toolMetrics.totalTools++;
toolMetrics.currentToolStart = Date.now();
toolMetrics.toolTimes.push({
name: toolCallDelta.function?.name || "unknown",
startTime: toolMetrics.currentToolStart,
argumentBuildTime: 0,
totalTime: 0,
});
// Track TTFT for first tool if no content yet
if (firstTokenTime === 0) {
firstTokenTime = Date.now() - startTime;
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`\n⥠[OpenAI] Time to First Token (Tool Call): ${firstTokenTime}ms`);
}
}
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`[OpenAI] Starting new tool call: ${toolCalls[index].id}`);
}
onEvent({
type: "tool_call_start",
toolCall: {
id: toolCalls[index].id,
name: toolCallDelta.function?.name || "",
},
});
}
if (toolCallDelta.function?.name) {
toolCalls[index].name = toolCallDelta.function.name;
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`[OpenAI] Tool name: ${toolCallDelta.function.name}`);
}
}
if (toolCallDelta.function?.arguments) {
toolCalls[index].arguments += toolCallDelta.function.arguments;
// Count tool argument tokens (rough estimate: ~4 chars per token)
toolArgumentTokens += Math.ceil(toolCallDelta.function.arguments.length / 4);
// Debug logging for partial JSON accumulation
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`[OpenAI] Tool ${toolCalls[index].name} - Partial JSON chunk: "${toolCallDelta.function.arguments}"`);
console.log(`[OpenAI] Tool ${toolCalls[index].name} - Total accumulated: ${toolCalls[index].arguments.length} chars`);
}
onEvent({
type: "tool_call_delta",
toolCallId: toolCalls[index].id,
argumentDelta: toolCallDelta.function.arguments,
});
}
}
}
}
// Emit complete events for tool calls and finalize metrics
for (let i = 0; i < toolCalls.length; i++) {
const toolCall = toolCalls[i];
const currentTime = Date.now();
// Update tool metrics
if (i < toolMetrics.toolTimes.length) {
const toolTime = toolMetrics.toolTimes[i];
toolTime.argumentBuildTime = currentTime - toolTime.startTime;
toolTime.totalTime = toolTime.argumentBuildTime; // For streaming, this is the same
toolTime.name = toolCall.name; // Update with final name
}
// Track tool success/failure
try {
JSON.parse(toolCall.arguments);
toolMetrics.successfulTools++;
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`[OpenAI] â
Valid JSON for ${toolCall.name}`);
}
}
catch (e) {
toolMetrics.failedTools++;
console.error(`[OpenAI] â Invalid JSON for ${toolCall.name}: ${e}`);
}
// Log the final JSON for debugging
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`[OpenAI] Tool ${toolCall.name} complete with arguments (${toolCall.arguments.length} chars):`);
console.log(toolCall.arguments);
}
onEvent({
type: "tool_call_parsed",
toolCall: {
id: toolCall.id,
name: toolCall.name,
arguments: toolCall.arguments,
},
});
}
// Final summary logging
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING && toolCalls.length > 0) {
console.log(`[OpenAI] Successfully processed ${toolCalls.length} tool calls`);
}
// Calculate final metrics including tool calling insights
const totalTime = Date.now() - startTime;
const totalTokens = tokenCount + toolArgumentTokens;
const tokensPerSecond = totalTokens > 0 ? totalTokens / (totalTime / 1000) : 0;
// Finalize round metrics
if (toolCalls.length > 0) {
const roundEndTime = Date.now();
const totalToolTime = toolMetrics.toolTimes.reduce((sum, tool) => sum + tool.totalTime, 0);
const llmTime = totalTime - totalToolTime;
toolMetrics.rounds.push({
roundNumber: toolMetrics.currentRound,
llmTime: llmTime,
toolTime: totalToolTime,
toolCount: toolCalls.length,
});
}
if (process.env.DEBUG_GRAPHLIT_SDK_METRICS) {
const metricsData = {
totalTime: `${totalTime}ms`,
ttft: `${firstTokenTime}ms`,
ttfmc: firstMeaningfulContentTime > 0
? `${firstMeaningfulContentTime}ms`
: null,
contentTokens: tokenCount,
toolTokens: toolArgumentTokens,
totalTokens: totalTokens,
tps: tokensPerSecond.toFixed(2),
};
console.log(`đ [OpenAI] Performance: Total=${metricsData.totalTime} | TTFT=${metricsData.ttft}${metricsData.ttfmc ? ` | TTFMC=${metricsData.ttfmc}` : ""} | Tokens(content/tool/total)=${metricsData.contentTokens}/${metricsData.toolTokens}/${metricsData.totalTokens} | TPS=${metricsData.tps}`);
// Tool calling metrics
if (toolCalls.length > 0) {
const successRate = ((toolMetrics.successfulTools / toolMetrics.totalTools) *
100).toFixed(1);
const avgToolTime = toolMetrics.toolTimes.reduce((sum, tool) => sum + tool.totalTime, 0) /
toolMetrics.toolTimes.length;
console.log(`đ§ [OpenAI] Tools: Total=${toolMetrics.totalTools} | Success=${toolMetrics.successfulTools} | Failed=${toolMetrics.failedTools} | SuccessRate=${successRate}% | AvgTime=${avgToolTime.toFixed(2)}ms`);
// Tool timing details (consolidated)
const toolTimings = toolMetrics.toolTimes
.map((tool, idx) => `${tool.name}:${tool.argumentBuildTime}ms`)
.join(" | ");
if (toolTimings) {
console.log(`đ¨ [OpenAI] Tool Timings: ${toolTimings}`);
}
// Round metrics (consolidated)
const roundMetrics = toolMetrics.rounds
.map((round) => {
const efficiency = round.toolCount > 0
? ((round.llmTime / (round.llmTime + round.toolTime)) *
100).toFixed(1)
: 100;
return `R${round.roundNumber}(LLM:${round.llmTime}ms,Tools:${round.toolTime}ms,Eff:${efficiency}%)`;
})
.join(" | ");
if (roundMetrics) {
console.log(`đ [OpenAI] Rounds: ${roundMetrics}`);
}
}
if (interTokenDelays.length > 0) {
const avgDelay = interTokenDelays.reduce((a, b) => a + b, 0) / interTokenDelays.length;
const sortedDelays = [...interTokenDelays].sort((a, b) => a - b);
const p50Delay = sortedDelays[Math.floor(sortedDelays.length * 0.5)];
const p95Delay = sortedDelays[Math.floor(sortedDelays.length * 0.95)];
const p99Delay = sortedDelays[Math.floor(sortedDelays.length * 0.99)];
console.log(`âł [OpenAI] Inter-Token: Avg=${avgDelay.toFixed(2)}ms | P50=${p50Delay}ms | P95=${p95Delay}ms | P99=${p99Delay}ms`);
}
console.log(`â
[OpenAI] Final message (${fullMessage.length} chars): "${fullMessage}"`);
}
// Emit completion event so UIEventAdapter flushes the chunk buffer
// before the SSE stream closes
onEvent({
type: "complete",
tokens: tokenCount,
});
// Pass usage data if available
onComplete(fullMessage, toolCalls, usageData);
}
catch (error) {
// Normalize OpenAI errors into ProviderError
const errorMessage = error.message || error.toString();
if (isRateLimitError(error)) {
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`â ď¸ [OpenAI] Rate limit hit`);
}
throw new ProviderError(`OpenAI rate limit exceeded: ${errorMessage}`, {
provider: "openai",
statusCode: 429,
retryable: true,
requestId: extractRequestId(error),
cause: error,
});
}
if (isNetworkError(error)) {
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`â ď¸ [OpenAI] Network error: ${errorMessage}`);
}
throw new ProviderError(`OpenAI network error: ${errorMessage}`, {
provider: "openai",
statusCode: 503,
retryable: true,
requestId: extractRequestId(error),
cause: error,
});
}
if (isRetryableServerError(error)) {
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`â ď¸ [OpenAI] Server error: ${errorMessage}`);
}
throw new ProviderError(`OpenAI server error: ${errorMessage}`, {
provider: "openai",
statusCode: error.status || error.statusCode || 500,
retryable: true,
requestId: extractRequestId(error),
cause: error,
});
}
// Don't emit error event here - let the client handle it to avoid duplicates
throw error;
}
}
export async function streamWithAnthropic(specification, messages, systemPrompt, tools, anthropicClient, // Properly typed Anthropic client
onEvent, onComplete, abortSignal, thinkingConfig) {
let fullMessage = "";
let toolCalls = [];
let usageData = null;
// Performance metrics
const startTime = Date.now();
let firstTokenTime = 0;
let firstMeaningfulContentTime = 0;
let tokenCount = 0;
let toolArgumentTokens = 0;
let lastEventTime = 0;
const interTokenDelays = [];
// Tool calling metrics
const toolMetrics = {
totalTools: 0,
successfulTools: 0,
failedTools: 0,
toolTimes: [],
currentToolStart: 0,
roundStartTime: startTime,
rounds: [],
currentRound: 1,
};
try {
const modelName = getModelName(specification);
if (!modelName) {
throw new Error(`No model name found for Anthropic specification: ${specification.name}`);
}
// Calculate smart default for max_tokens based on thinking mode
const defaultMaxTokens = thinkingConfig ? 32768 : 8192;
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`đ¤ [Anthropic] Model Config: Service=Anthropic | Model=${modelName} | Temperature=${specification.anthropic?.temperature} | MaxTokens=${specification.anthropic?.completionTokenLimit || defaultMaxTokens} | SystemPrompt=${systemPrompt ? "Yes" : "No"} | Tools=${tools?.length || 0} | Thinking=${!!thinkingConfig} | Spec="${specification.name}"`);
}
// Use proper Anthropic SDK types for the config
const streamConfig = {
model: modelName,
messages,
stream: true,
max_tokens: specification.anthropic?.completionTokenLimit || defaultMaxTokens,
};
// Handle temperature based on thinking configuration and model
// Claude 4.7 Opus (adaptive thinking) does not accept sampling parameters at all.
const isAdaptiveThinking = thinkingConfig?.type === "adaptive";
if (thinkingConfig && !isAdaptiveThinking) {
// When legacy thinking budget is enabled, temperature must be 1
streamConfig.temperature = 1;
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`đ§ [Anthropic] Setting temperature to 1 (required for extended thinking)`);
}
}
else if (!isAdaptiveThinking) {
// Only add temperature if it's defined and valid for non-thinking requests
if (specification.anthropic?.temperature !== undefined &&
specification.anthropic?.temperature !== null &&
typeof specification.anthropic?.temperature === "number") {
streamConfig.temperature = specification.anthropic.temperature;
}
}
if (systemPrompt?.length) {
streamConfig.system = systemPrompt;
}
// Add tools if provided
if (tools && tools.length > 0) {
streamConfig.tools = tools.map((tool) => ({
name: tool.name,
description: tool.description,
input_schema: tool.schema ? JSON.parse(tool.schema) : {},
}));
streamConfig.tools[streamConfig.tools.length - 1] = {
...streamConfig.tools[streamConfig.tools.length - 1],
cache_control: { type: "ephemeral" },
};
}
// Check if this is a 1M context model (beta flag, same underlying model ID)
const is1MContext = specification.anthropic?.model ===
Types.AnthropicModels.Claude_4_6Opus_1M ||
specification.anthropic?.model ===
Types.AnthropicModels.Claude_4_6Opus_1M_20260205;
// Add thinking config if provided
if (thinkingConfig) {
if (thinkingConfig.type === "adaptive") {
// Claude 4.7 Opus: adaptive thinking, effort controls depth via output_config
streamConfig.thinking = { type: "adaptive" };
if (thinkingConfig.effort) {
streamConfig.output_config = { effort: thinkingConfig.effort };
}
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`đ§ [Anthropic] Adaptive thinking enabled | Effort: ${thinkingConfig.effort ?? "default"}`);
}
}
else {
streamConfig.thinking = thinkingConfig;
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`đ§ [Anthropic] Extended thinking enabled | Budget: ${thinkingConfig.budget_tokens} tokens`);
}
// Adjust max_tokens to account for thinking budget
// 1M context models have a 1,000,000 token window; standard models have 200,000
const contextWindowLimit = is1MContext ? 1000000 : 200000;
const totalTokens = streamConfig.max_tokens + thinkingConfig.budget_tokens;
if (totalTokens > contextWindowLimit) {
console.warn(`â ď¸ [Anthropic] Total tokens (${totalTokens}) exceeds ${is1MContext ? "1M" : "200K"} context window, adjusting completion tokens...`);
streamConfig.max_tokens = Math.max(1000, contextWindowLimit - thinkingConfig.budget_tokens);
}
}
}
// Build request options with optional abort signal and 1M context beta header
const requestOptions = {};
if (abortSignal) {
requestOptions.signal = abortSignal;
}
if (is1MContext) {
requestOptions.headers = {
"anthropic-beta": "context-1m-2025-08-07",
};
}
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`âąď¸ [Anthropic] Starting LLM call at: ${new Date().toISOString()}${is1MContext ? " | 1M context beta enabled" : ""}`);
}
const stream = await anthropicClient.messages.create(streamConfig, Object.keys(requestOptions).length > 0 ? requestOptions : undefined);
let activeContentBlock = false;
let currentContentBlockIndex;
let currentContentBlockType;
let thinkingContent = "";
let thinkingSignature = "";
let completeThinkingContent = ""; // Accumulate all thinking content for conversation history
let completeThinkingSignature = ""; // Accumulate signature for conversation history
for await (const chunk of stream) {
// Debug log all chunk types
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`[Anthropic] Received chunk type: ${chunk.type}`);
}
// Capture usage data from various message events
// Prioritize message_start.message usage data as it's more complete
if (chunk.type === "message_start" && chunk.message?.usage) {
usageData = chunk.message.usage;
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`[Anthropic] Usage data captured from message_start.message:`, usageData);
}
}
else if (chunk.type === "message_delta" &&
chunk.usage &&
!usageData?.input_tokens) {
// Only use message_delta if we don't have input_tokens yet
usageData = chunk.usage;
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`[Anthropic] Usage data captured from ${chunk.type}:`, usageData);
}
}
else if ((chunk.type === "message_delta" || chunk.type === "message_start") &&
chunk.usage) {
// Merge usage data if we have partial data
if (usageData) {
usageData = { ...usageData, ...chunk.usage };
}
else {
usageData = chunk.usage;
}
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`[Anthropic] Usage data merged from ${chunk.type}:`, usageData);
}
}
if (chunk.type === "content_block_start") {
activeContentBlock = true;
currentContentBlockIndex = chunk.index;
currentContentBlockType = chunk.content_block.type;
if (chunk.content_block.type === "thinking") {
// Start of thinking block (native extended thinking)
thinkingContent = "";
thinkingSignature = "";
onEvent({
type: "reasoning_start",
format: "thinking_tag",
});
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log("[Anthropic] Extended thinking block started");
}
}
else if (chunk.content_block.type === "tool_use") {
const toolCall = {
id: chunk.content_block.id,
name: chunk.content_block.name,
arguments: "",
};
toolCalls.push(toolCall);
// Track tool metrics
toolMetrics.totalTools++;
toolMetrics.currentToolStart = Date.now();
toolMetrics.toolTimes.push({
name: toolCall.name,
startTime: toolMetrics.currentToolStart,
argumentBuildTime: 0,
totalTime: 0,
});
// Track TTFT for first tool if no content yet
if (firstTokenTime === 0) {
firstTokenTime = Date.now() - startTime;
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`\n⥠[Anthropic] Time to First Token (Tool Call): ${firstTokenTime}ms`);
}
}
onEvent({
type: "tool_call_start",
toolCall: {
id: toolCall.id,
name: toolCall.name,
},
});
}
}
else if (chunk.type === "content_block_delta") {
// Handle thinking blocks with native extended thinking
if (chunk.delta.type === "thinking_delta" &&
"thinking" in chunk.delta) {
// Accumulate thinking content
thinkingContent += chunk.delta.thinking;
// Track first token time
if (firstTokenTime === 0) {
firstTokenTime = Date.now() - startTime;
}
onEvent({
type: "reasoning_delta",
content: chunk.delta.thinking,
format: "thinking_tag",
});
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`[Anthropic] Thinking delta: "${chunk.delta.thinking}"`);
}
}
else if (chunk.delta.type === "signature_delta" &&
"signature" in chunk.delta) {
// Handle signature for thinking blocks
thinkingSignature += chunk.delta.signature;
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`[Anthropic] Signature delta: "${chunk.delta.signature}"`);
}
}
else if (chunk.delta.type === "text_delta" && "text" in chunk.delta) {
fullMessage += chunk.delta.text;
tokenCount++;
const currentTime = Date.now();
// Track TTFT (first token regardless of type)
if (firstTokenTime === 0) {
firstTokenTime = currentTime - startTime;
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`\n⥠[Anthropic] Time to First Token (TTFT): ${firstTokenTime}ms`);
}
}
// Track first meaningful content (excludes tool calls)
if (firstMeaningfulContentTime === 0 && chunk.delta.text.trim()) {
firstMeaningfulContentTime = currentTime - startTime;
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`\nđŻ [Anthropic] Time to First Meaningful Content: ${firstMeaningfulContentTime}ms`);
}
}
// Track inter-token delays
if (lastEventTime > 0) {
const delay = currentTime - lastEventTime;
interTokenDelays.push(delay);
}
lastEventTime = currentTime;
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`[Anthropic] Token #${tokenCount}: "${chunk.delta.text}" | Accumulated: ${fullMessage.length} chars`);
}
onEvent({
type: "token",
token: chunk.delta.text,
});
}
else if (chunk.delta.type === "input_json_delta") {
// Find the current tool call and append arguments
const currentTool = toolCalls[toolCalls.length - 1];
if (currentTool) {
currentTool.arguments += chunk.delta.partial_json;
// Count tool argument tokens (rough estimate: ~4 chars per token)
toolArgumentTokens += Math.ceil(chunk.delta.partial_json.length / 4);
// Debug logging for partial JSON accumulation
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`[Anthropic] Tool ${currentTool.name} - Partial JSON chunk: "${chunk.delta.partial_json}"`);
console.log(`[Anthropic] Tool ${currentTool.name} - Total accumulated: ${currentTool.arguments.length} chars`);
}
onEvent({
type: "tool_call_delta",
toolCallId: currentTool.id,
argumentDelta: chunk.delta.partial_json,
});
}
}
}
else if (chunk.type === "content_block_stop") {
activeContentBlock = false;
// Check if we're stopping a thinking block
if (currentContentBlockType === "thinking" &&
chunk.index === currentContentBlockIndex) {
// Emit the complete thinking block with signature
onEvent({
type: "reasoning_end",
fullContent: thinkingContent,
signature: thinkingSignature || undefined,
});
// Accumulate thinking content and signature for conversation history preservation
if (thinkingContent.trim()) {
completeThinkingContent += thinkingContent;
}
if (thinkingSignature.trim()) {
completeThinkingSignature = thinkingSignature; // Use the last signature
}
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`[Anthropic] Thinking block completed:`, {
contentLength: thinkingContent.length,
hasSignature: !!thinkingSignature,
signature: thinkingSignature,
totalThinkingLength: completeThinkingContent.length,
});
}
// Reset current thinking state (but keep completeThinkingContent)
thinkingContent = "";
thinkingSignature = "";
}
currentContentBlockType = undefined;
currentContentBlockIndex = undefined;
// Tool call complete
const currentTool = toolCalls[toolCalls.length - 1];
if (currentTool && chunk.content_block?.type === "tool_use") {
const currentTime = Date.now();
// Update tool metrics
const toolIndex = toolCalls.length - 1;
if (toolIndex < toolMetrics.toolTimes.length) {
const toolTime = toolMetrics.toolTimes[toolIndex];
toolTime.argumentBuildTime = currentTime - toolTime.startTime;
toolTime.totalTime = toolTime.argumentBuildTime;
toolTime.name = currentTool.name;
}
// Track tool success/failure
try {
JSON.parse(currentTool.arguments);
toolMetrics.successfulTools++;
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`[Anthropic] â
Valid JSON for ${currentTool.name}`);
}
}
catch (e) {
toolMetrics.failedTools++;
console.error(`[Anthropic] â Invalid JSON for ${currentTool.name}: ${e}`);
}
// Log the final JSON for debugging
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING ||
!isValidJSON(currentTool.arguments)) {
console.log(`[Anthropic] Tool ${currentTool.name} complete with arguments (${currentTool.arguments.length} chars):`);
console.log(currentTool.arguments);
// Check if JSON appears truncated
const lastChars = currentTool.arguments.slice(-10);
if (!lastChars.includes("}") &&
currentTool.arguments.length > 100) {
console.warn(`[Anthropic] WARNING: JSON may be truncated - doesn't end with '}': ...${lastChars}`);
}
}
onEvent({
type: "tool_call_parsed",
toolCall: {
id: currentTool.id,
name: currentTool.name,
arguments: currentTool.arguments,
},
});
}
}
else if (chunk.type === "message_stop" && activeContentBlock) {
// Handle Anthropic bug: message_stop without content_block_stop
console.warn(`[Anthropic] Received message_stop without content_block_stop - handling as implicit block stop`);
activeContentBlock = false;
// Emit synthetic content_block_stop for the current tool
const currentTool = toolCalls[toolCalls.length - 1];
if (currentTool) {
// Log the incomplete tool
console.warn(`[Anthropic] Synthetic content_block_stop for incomplete tool ${currentTool.name} (${currentTool.arguments.length} chars)`);
// Only emit tool_call_complete if we have valid JSON
if (isValidJSON(currentTool.arguments)) {
onEvent({
type: "tool_call_parsed",
toolCall: {
id: currentTool.id,
name: currentTool.name,
arguments: currentTool.arguments,
},
});
}
else {
console.error(`[Anthropic] Tool ${currentTool.name} has incomplete JSON, skipping tool_call_complete event`);
}
}
}
}
// Final check: normalize and validate tool calls
const validToolCalls = toolCalls
.map((tc, idx) => {
// For tools with no parameters, Anthropic doesn't send input_json_delta
// So we need to convert empty arguments to valid JSON
if (tc.arguments === "") {
tc.arguments = "{}";
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`[Anthropic] Normalized empty arguments to "{}" for tool ${tc.name}`);
}
}
if (!isValidJSON(tc.arguments)) {
console.warn(`[Anthropic] Filtering out incomplete tool call ${idx} (${tc.name}) with INVALID JSON (${tc.arguments.length} chars)`);
return null;
}
return tc;
})
.filter((tc) => tc !== null);
if (toolCalls.length !== validToolCalls.length) {
console.log(`[Anthropic] Filtered out ${toolCalls.length - validToolCalls.length} incomplete tool calls`);
console.log(`[Anthropic] Successfully processed ${validToolCalls.length} valid tool calls`);
}
// Calculate final metrics including tool calling insights
const totalTime = Date.now() - startTime;
const totalTokens = tokenCount + toolArgumentTokens;
const tokensPerSecond = totalTokens > 0 ? totalTokens / (totalTime / 1000) : 0;
// Finalize round metrics
if (validToolCalls.length > 0) {
const roundEndTime = Date.now();
const totalToolTime = toolMetrics.toolTimes.reduce((sum, tool) => sum + tool.totalTime, 0);
const llmTime = totalTime - totalToolTime;
toolMetrics.rounds.push({
roundNumber: toolMetrics.currentRound,
llmTime: llmTime,
toolTime: totalToolTime,
toolCount: validToolCalls.length,
});
}
if (process.env.DEBUG_GRAPHLIT_SDK_METRICS) {
const metricsData = {
totalTime: `${totalTime}ms`,
ttft: `${firstTokenTime}ms`,
ttfmc: firstMeaningfulContentTime > 0
? `${firstMeaningfulContentTime}ms`
: null,
contentTokens: tokenCount,
toolTokens: toolArgumentTokens,
totalTokens: totalTokens,
tps: tokensPerSecond.toFixed(2),
};
console.log(`đ [Anthropic] Performance: Total=${metricsData.totalTime} | TTFT=${metricsData.ttft}${metricsData.ttfmc ? ` | TTFMC=${metricsData.ttfmc}` : ""} | Tokens(content/tool/total)=${metricsData.contentTokens}/${metricsData.toolTokens}/${metricsData.totalTokens} | TPS=${metricsData.tps}`);
// Tool calling metrics
if (validToolCalls.length > 0) {
const successRate = ((toolMetrics.successfulTools / toolMetrics.totalTools) *
100).toFixed(1);
const avgToolTime = toolMetrics.toolTimes.reduce((sum, tool) => sum + tool.totalTime, 0) /
toolMetrics.toolTimes.length;
console.log(`đ§ [Anthropic] Tools: Total=${toolMetrics.totalTools} | Success=${toolMetrics.successfulTools} | Failed=${toolMetrics.failedTools} | SuccessRate=${successRate}% | AvgTime=${avgToolTime.toFixed(2)}ms`);
// Tool timing details (consolidated)
const toolTimings = toolMetrics.toolTimes
.map((tool, idx) => `${tool.name}:${tool.argumentBuildTime}ms`)
.join(" | ");
if (toolTimings) {
console.log(`đ¨ [Anthropic] Tool Timings: ${toolTimings}`);
}
// Round metrics (consolidated)
const roundMetrics = toolMetrics.rounds
.map((round) => {
const efficiency = round.toolCount > 0
? ((round.llmTime / (round.llmTime + round.toolTime)) *
100).toFixed(1)
: 100;
return `R${round.roundNumber}(LLM:${round.llmTime}ms,Tools:${round.toolTime}ms,Eff:${efficiency}%)`;
})
.join(" | ");
if (roundMetrics) {
console.log(`đ [Anthropic] Rounds: ${roundMetrics}`);
}
}
if (interTokenDelays.length > 0) {
const avgDelay = interTokenDelays.reduce((a, b) => a + b, 0) / interTokenDelays.length;
const sortedDelays = [...interTokenDelays].sort((a, b) => a - b);
const p50Delay = sortedDelays[Math.floor(sortedDelays.length * 0.5)];
const p95Delay = sortedDelays[Math.floor(sortedDelays.length * 0.95)];
const p99Delay = sortedDelays[Math.floor(sortedDelays.length * 0.99)];
console.log(`âł [Anthropic] Inter-Token: Avg=${avgDelay.toFixed(2)}ms | P50=${p50Delay}ms | P95=${p95Delay}ms | P99=${p99Delay}ms`);
}
console.log(`â
[Anthropic] Final message (${fullMessage.length} chars): "${fullMessage}"`);
}
// Build structured reasoning metadata (replaces XML-in-message approach)
let reasoningMetadata;
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`đ§ [Anthropic] Debug - validToolCalls: ${validToolCalls.length}, thinking content: ${completeThinkingContent.length} chars, fullMessage: ${fullMessage.length} chars`);
}
if (completeThinkingContent.trim()) {
reasoningMetadata = {
content: completeThinkingContent,
format: "thinking_tag",
};
if (completeThinkingSignature) {
reasoningMetadata.signature = completeThinkingSignature;
}
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`đ§ [Anthropic] Structured reasoning metadata: ${completeThinkingContent.length} chars, signature: ${completeThinkingSignature?.length || 0}`);
}
}
// Emit completion event so UIEventAdapter flushes the chunk buffer
// before the SSE stream closes
onEvent({
type: "complete",
tokens: tokenCount,
});
onComplete(fullMessage, validToolCalls, usageData, reasoningMetadata);
}
catch (error) {
// Normalize Anthropic errors into ProviderError
const errorMessage = error.message || error.toString();
if (isRateLimitError(error)) {
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`â ď¸ [Anthropic] Rate limit / overloaded`);
}
throw new ProviderError(`Anthropic rate limit exceeded: ${errorMessage}`, {
provider: "anthropic",
statusCode: error.status || error.statusCode || 429,
retryable: true,
requestId: extractRequestId(error),
cause: error,
});
}
if (isRetryableServerError(error)) {
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`â ď¸ [Anthropic] Server error: ${errorMessage}`);
}
throw new ProviderError(`Anthropic server error: ${errorMessage}`, {
provider: "anthropic",
statusCode: error.status || error.statusCode || 500,
retryable: true,
requestId: extractRequestId(error),
cause: error,
});
}
if (isNetworkError(error)) {
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`â ď¸ [Anthropic] Network error: ${errorMessage}`);
}
throw new ProviderError(`Anthropic network error: ${errorMessage}`, {
provider: "anthropic",
statusCode: 503,
retryable: true,
requestId: extractRequestId(error),
c