graphlit-client
Version:
Graphlit API Client for TypeScript
1,023 lines ⢠89.8 kB
JavaScript
import { getModelName } from "../model-mapping.js";
/**
* Helper to check if a string is valid JSON
*/
function isValidJSON(str) {
try {
JSON.parse(str);
return true;
}
catch {
return false;
}
}
/**
* Simplify schema for Groq by removing complex features that may cause issues
*/
function simplifySchemaForGroq(schema) {
if (typeof schema !== "object" || schema === null) {
return JSON.stringify(schema);
}
// Remove complex JSON Schema features that Groq might not support
const simplified = {
type: schema.type || "object",
properties: {},
required: schema.required || [],
};
// Only keep basic properties and types
if (schema.properties) {
for (const [key, value] of Object.entries(schema.properties)) {
const prop = value;
simplified.properties[key] = {
type: prop.type || "string",
description: prop.description || "",
// Remove complex features like patterns, formats, etc.
};
// Keep enum if present (but simplified)
if (prop.enum && Array.isArray(prop.enum)) {
simplified.properties[key].enum = prop.enum;
}
}
}
return JSON.stringify(simplified);
}
/**
* Clean schema for Google Gemini by removing unsupported fields
*/
function cleanSchemaForGoogle(schema) {
if (typeof schema !== "object" || schema === null) {
return schema;
}
if (Array.isArray(schema)) {
return schema.map((item) => cleanSchemaForGoogle(item));
}
const cleaned = {};
for (const [key, value] of Object.entries(schema)) {
// Skip fields that Google doesn't support
if (key === "$schema" || key === "additionalProperties") {
continue;
}
// Handle format field for string types - Google only supports 'enum' and 'date-time'
if (key === "format" && typeof value === "string") {
// Only keep supported formats
if (value === "enum" || value === "date-time") {
cleaned[key] = value;
}
// Skip unsupported formats like "date", "time", "email", etc.
continue;
}
// Recursively clean nested objects
cleaned[key] = cleanSchemaForGoogle(value);
}
return cleaned;
}
/**
* Stream with OpenAI SDK
*/
export async function streamWithOpenAI(specification, messages, tools, openaiClient, // OpenAI client instance
onEvent, onComplete) {
let fullMessage = "";
let toolCalls = [];
// Performance metrics
const startTime = Date.now();
let firstTokenTime = 0;
let firstMeaningfulContentTime = 0;
let tokenCount = 0;
let toolArgumentTokens = 0;
let lastEventTime = 0;
const interTokenDelays = [];
// Tool calling metrics
const toolMetrics = {
totalTools: 0,
successfulTools: 0,
failedTools: 0,
toolTimes: [],
currentToolStart: 0,
roundStartTime: startTime,
rounds: [],
currentRound: 1,
};
try {
const modelName = getModelName(specification);
if (!modelName) {
throw new Error(`No model name found for specification: ${specification.name} (service: ${specification.serviceType})`);
}
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`š¤ [OpenAI] Model Config: Service=OpenAI | Model=${modelName} | Temperature=${specification.openAI?.temperature} | MaxTokens=${specification.openAI?.completionTokenLimit || "null"} | Tools=${tools?.length || 0} | Spec="${specification.name}"`);
}
const streamConfig = {
model: modelName,
messages,
stream: true,
temperature: specification.openAI?.temperature,
//top_p: specification.openAI?.probability,
};
// Only add max_completion_tokens if it's defined
if (specification.openAI?.completionTokenLimit) {
streamConfig.max_completion_tokens =
specification.openAI.completionTokenLimit;
}
// Add tools if provided
if (tools && tools.length > 0) {
streamConfig.tools = tools.map((tool) => ({
type: "function",
function: {
name: tool.name,
description: tool.description,
parameters: tool.schema ? JSON.parse(tool.schema) : {},
},
}));
}
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`ā±ļø [OpenAI] Starting LLM call at: ${new Date().toISOString()}`);
}
const stream = await openaiClient.chat.completions.create(streamConfig);
for await (const chunk of stream) {
const delta = chunk.choices[0]?.delta;
// Debug log chunk details
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`[OpenAI] Chunk:`, JSON.stringify(chunk, null, 2));
if (delta?.content) {
console.log(`[OpenAI] Content delta: "${delta.content}" (${delta.content.length} chars)`);
}
if (delta?.tool_calls) {
console.log(`[OpenAI] Tool calls:`, delta.tool_calls);
}
if (chunk.choices[0]?.finish_reason) {
console.log(`[OpenAI] Finish reason: ${chunk.choices[0].finish_reason}`);
}
}
if (delta?.content) {
fullMessage += delta.content;
tokenCount++;
const currentTime = Date.now();
// Track TTFT (first token regardless of type)
if (firstTokenTime === 0) {
firstTokenTime = currentTime - startTime;
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`\nā” [OpenAI] Time to First Token (TTFT): ${firstTokenTime}ms`);
}
}
// Track first meaningful content (excludes tool calls)
if (firstMeaningfulContentTime === 0 && delta.content.trim()) {
firstMeaningfulContentTime = currentTime - startTime;
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`\nšÆ [OpenAI] Time to First Meaningful Content: ${firstMeaningfulContentTime}ms`);
}
}
// Track inter-token delays
if (lastEventTime > 0) {
const delay = currentTime - lastEventTime;
interTokenDelays.push(delay);
}
lastEventTime = currentTime;
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`[OpenAI] Token #${tokenCount}: "${delta.content}" | Accumulated: ${fullMessage.length} chars`);
}
onEvent({
type: "token",
token: delta.content,
});
}
// Handle tool calls
if (delta?.tool_calls) {
for (const toolCallDelta of delta.tool_calls) {
const index = toolCallDelta.index;
if (!toolCalls[index]) {
toolCalls[index] = {
id: toolCallDelta.id || `tool_${Date.now()}_${index}`,
name: "",
arguments: "",
};
// Track tool metrics
toolMetrics.totalTools++;
toolMetrics.currentToolStart = Date.now();
toolMetrics.toolTimes.push({
name: toolCallDelta.function?.name || "unknown",
startTime: toolMetrics.currentToolStart,
argumentBuildTime: 0,
totalTime: 0,
});
// Track TTFT for first tool if no content yet
if (firstTokenTime === 0) {
firstTokenTime = Date.now() - startTime;
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`\nā” [OpenAI] Time to First Token (Tool Call): ${firstTokenTime}ms`);
}
}
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`[OpenAI] Starting new tool call: ${toolCalls[index].id}`);
}
onEvent({
type: "tool_call_start",
toolCall: {
id: toolCalls[index].id,
name: toolCallDelta.function?.name || "",
},
});
}
if (toolCallDelta.function?.name) {
toolCalls[index].name = toolCallDelta.function.name;
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`[OpenAI] Tool name: ${toolCallDelta.function.name}`);
}
}
if (toolCallDelta.function?.arguments) {
toolCalls[index].arguments += toolCallDelta.function.arguments;
// Count tool argument tokens (rough estimate: ~4 chars per token)
toolArgumentTokens += Math.ceil(toolCallDelta.function.arguments.length / 4);
// Debug logging for partial JSON accumulation
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`[OpenAI] Tool ${toolCalls[index].name} - Partial JSON chunk: "${toolCallDelta.function.arguments}"`);
console.log(`[OpenAI] Tool ${toolCalls[index].name} - Total accumulated: ${toolCalls[index].arguments.length} chars`);
}
onEvent({
type: "tool_call_delta",
toolCallId: toolCalls[index].id,
argumentDelta: toolCallDelta.function.arguments,
});
}
}
}
}
// Emit complete events for tool calls and finalize metrics
for (let i = 0; i < toolCalls.length; i++) {
const toolCall = toolCalls[i];
const currentTime = Date.now();
// Update tool metrics
if (i < toolMetrics.toolTimes.length) {
const toolTime = toolMetrics.toolTimes[i];
toolTime.argumentBuildTime = currentTime - toolTime.startTime;
toolTime.totalTime = toolTime.argumentBuildTime; // For streaming, this is the same
toolTime.name = toolCall.name; // Update with final name
}
// Track tool success/failure
try {
JSON.parse(toolCall.arguments);
toolMetrics.successfulTools++;
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`[OpenAI] ā
Valid JSON for ${toolCall.name}`);
}
}
catch (e) {
toolMetrics.failedTools++;
console.error(`[OpenAI] ā Invalid JSON for ${toolCall.name}: ${e}`);
}
// Log the final JSON for debugging
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`[OpenAI] Tool ${toolCall.name} complete with arguments (${toolCall.arguments.length} chars):`);
console.log(toolCall.arguments);
}
onEvent({
type: "tool_call_parsed",
toolCall: {
id: toolCall.id,
name: toolCall.name,
arguments: toolCall.arguments,
},
});
}
// Final summary logging
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING && toolCalls.length > 0) {
console.log(`[OpenAI] Successfully processed ${toolCalls.length} tool calls`);
}
// Calculate final metrics including tool calling insights
const totalTime = Date.now() - startTime;
const totalTokens = tokenCount + toolArgumentTokens;
const tokensPerSecond = totalTokens > 0 ? totalTokens / (totalTime / 1000) : 0;
// Finalize round metrics
if (toolCalls.length > 0) {
const roundEndTime = Date.now();
const totalToolTime = toolMetrics.toolTimes.reduce((sum, tool) => sum + tool.totalTime, 0);
const llmTime = totalTime - totalToolTime;
toolMetrics.rounds.push({
roundNumber: toolMetrics.currentRound,
llmTime: llmTime,
toolTime: totalToolTime,
toolCount: toolCalls.length,
});
}
if (process.env.DEBUG_GRAPHLIT_SDK_METRICS) {
const metricsData = {
totalTime: `${totalTime}ms`,
ttft: `${firstTokenTime}ms`,
ttfmc: firstMeaningfulContentTime > 0
? `${firstMeaningfulContentTime}ms`
: null,
contentTokens: tokenCount,
toolTokens: toolArgumentTokens,
totalTokens: totalTokens,
tps: tokensPerSecond.toFixed(2),
};
console.log(`š [OpenAI] Performance: Total=${metricsData.totalTime} | TTFT=${metricsData.ttft}${metricsData.ttfmc ? ` | TTFMC=${metricsData.ttfmc}` : ""} | Tokens(content/tool/total)=${metricsData.contentTokens}/${metricsData.toolTokens}/${metricsData.totalTokens} | TPS=${metricsData.tps}`);
// Tool calling metrics
if (toolCalls.length > 0) {
const successRate = ((toolMetrics.successfulTools / toolMetrics.totalTools) *
100).toFixed(1);
const avgToolTime = toolMetrics.toolTimes.reduce((sum, tool) => sum + tool.totalTime, 0) /
toolMetrics.toolTimes.length;
console.log(`š§ [OpenAI] Tools: Total=${toolMetrics.totalTools} | Success=${toolMetrics.successfulTools} | Failed=${toolMetrics.failedTools} | SuccessRate=${successRate}% | AvgTime=${avgToolTime.toFixed(2)}ms`);
// Tool timing details (consolidated)
const toolTimings = toolMetrics.toolTimes
.map((tool, idx) => `${tool.name}:${tool.argumentBuildTime}ms`)
.join(" | ");
if (toolTimings) {
console.log(`šØ [OpenAI] Tool Timings: ${toolTimings}`);
}
// Round metrics (consolidated)
const roundMetrics = toolMetrics.rounds
.map((round) => {
const efficiency = round.toolCount > 0
? ((round.llmTime / (round.llmTime + round.toolTime)) *
100).toFixed(1)
: 100;
return `R${round.roundNumber}(LLM:${round.llmTime}ms,Tools:${round.toolTime}ms,Eff:${efficiency}%)`;
})
.join(" | ");
if (roundMetrics) {
console.log(`š [OpenAI] Rounds: ${roundMetrics}`);
}
}
if (interTokenDelays.length > 0) {
const avgDelay = interTokenDelays.reduce((a, b) => a + b, 0) / interTokenDelays.length;
const sortedDelays = [...interTokenDelays].sort((a, b) => a - b);
const p50Delay = sortedDelays[Math.floor(sortedDelays.length * 0.5)];
const p95Delay = sortedDelays[Math.floor(sortedDelays.length * 0.95)];
const p99Delay = sortedDelays[Math.floor(sortedDelays.length * 0.99)];
console.log(`ā³ [OpenAI] Inter-Token: Avg=${avgDelay.toFixed(2)}ms | P50=${p50Delay}ms | P95=${p95Delay}ms | P99=${p99Delay}ms`);
}
console.log(`ā
[OpenAI] Final message (${fullMessage.length} chars): "${fullMessage}"`);
}
onComplete(fullMessage, toolCalls);
}
catch (error) {
// Handle OpenAI-specific errors
const errorMessage = error.message || error.toString();
// Check for rate limit errors
if (error.status === 429 ||
error.statusCode === 429 ||
error.code === "rate_limit_exceeded") {
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`ā ļø [OpenAI] Rate limit hit`);
}
const rateLimitError = new Error("OpenAI rate limit exceeded");
rateLimitError.statusCode = 429;
throw rateLimitError;
}
// Check for network errors
if (errorMessage.includes("fetch failed") ||
error.code === "ECONNRESET" ||
error.code === "ETIMEDOUT") {
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`ā ļø [OpenAI] Network error: ${errorMessage}`);
}
const networkError = new Error(`OpenAI network error: ${errorMessage}`);
networkError.statusCode = 503; // Service unavailable
throw networkError;
}
// Don't emit error event here - let the client handle it to avoid duplicates
throw error;
}
}
/**
* Stream with Anthropic SDK
*/
export async function streamWithAnthropic(specification, messages, systemPrompt, tools, anthropicClient, // Anthropic client instance
onEvent, onComplete) {
let fullMessage = "";
let toolCalls = [];
// Performance metrics
const startTime = Date.now();
let firstTokenTime = 0;
let firstMeaningfulContentTime = 0;
let tokenCount = 0;
let toolArgumentTokens = 0;
let lastEventTime = 0;
const interTokenDelays = [];
// Tool calling metrics
const toolMetrics = {
totalTools: 0,
successfulTools: 0,
failedTools: 0,
toolTimes: [],
currentToolStart: 0,
roundStartTime: startTime,
rounds: [],
currentRound: 1,
};
try {
const modelName = getModelName(specification);
if (!modelName) {
throw new Error(`No model name found for Anthropic specification: ${specification.name}`);
}
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`š¤ [Anthropic] Model Config: Service=Anthropic | Model=${modelName} | Temperature=${specification.anthropic?.temperature} | MaxTokens=${specification.anthropic?.completionTokenLimit || 8192} | SystemPrompt=${systemPrompt ? "Yes" : "No"} | Tools=${tools?.length || 0} | Spec="${specification.name}"`);
}
const streamConfig = {
model: modelName,
messages,
stream: true,
temperature: specification.anthropic?.temperature,
//top_p: specification.anthropic?.probability,
max_tokens: specification.anthropic?.completionTokenLimit || 8192, // required
};
if (systemPrompt) {
streamConfig.system = systemPrompt;
}
// Add tools if provided
if (tools && tools.length > 0) {
streamConfig.tools = tools.map((tool) => ({
name: tool.name,
description: tool.description,
input_schema: tool.schema ? JSON.parse(tool.schema) : {},
}));
}
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`ā±ļø [Anthropic] Starting LLM call at: ${new Date().toISOString()}`);
}
const stream = await anthropicClient.messages.create(streamConfig);
let activeContentBlock = false;
for await (const chunk of stream) {
// Debug log all chunk types
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`[Anthropic] Received chunk type: ${chunk.type}`);
}
if (chunk.type === "content_block_start") {
activeContentBlock = true;
if (chunk.content_block.type === "tool_use") {
const toolCall = {
id: chunk.content_block.id,
name: chunk.content_block.name,
arguments: "",
};
toolCalls.push(toolCall);
// Track tool metrics
toolMetrics.totalTools++;
toolMetrics.currentToolStart = Date.now();
toolMetrics.toolTimes.push({
name: toolCall.name,
startTime: toolMetrics.currentToolStart,
argumentBuildTime: 0,
totalTime: 0,
});
// Track TTFT for first tool if no content yet
if (firstTokenTime === 0) {
firstTokenTime = Date.now() - startTime;
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`\nā” [Anthropic] Time to First Token (Tool Call): ${firstTokenTime}ms`);
}
}
onEvent({
type: "tool_call_start",
toolCall: {
id: toolCall.id,
name: toolCall.name,
},
});
}
}
else if (chunk.type === "content_block_delta") {
if (chunk.delta.type === "text_delta") {
fullMessage += chunk.delta.text;
tokenCount++;
const currentTime = Date.now();
// Track TTFT (first token regardless of type)
if (firstTokenTime === 0) {
firstTokenTime = currentTime - startTime;
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`\nā” [Anthropic] Time to First Token (TTFT): ${firstTokenTime}ms`);
}
}
// Track first meaningful content (excludes tool calls)
if (firstMeaningfulContentTime === 0 && chunk.delta.text.trim()) {
firstMeaningfulContentTime = currentTime - startTime;
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`\nšÆ [Anthropic] Time to First Meaningful Content: ${firstMeaningfulContentTime}ms`);
}
}
// Track inter-token delays
if (lastEventTime > 0) {
const delay = currentTime - lastEventTime;
interTokenDelays.push(delay);
}
lastEventTime = currentTime;
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`[Anthropic] Token #${tokenCount}: "${chunk.delta.text}" | Accumulated: ${fullMessage.length} chars`);
}
onEvent({
type: "token",
token: chunk.delta.text,
});
}
else if (chunk.delta.type === "input_json_delta") {
// Find the current tool call and append arguments
const currentTool = toolCalls[toolCalls.length - 1];
if (currentTool) {
currentTool.arguments += chunk.delta.partial_json;
// Count tool argument tokens (rough estimate: ~4 chars per token)
toolArgumentTokens += Math.ceil(chunk.delta.partial_json.length / 4);
// Debug logging for partial JSON accumulation
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`[Anthropic] Tool ${currentTool.name} - Partial JSON chunk: "${chunk.delta.partial_json}"`);
console.log(`[Anthropic] Tool ${currentTool.name} - Total accumulated: ${currentTool.arguments.length} chars`);
}
onEvent({
type: "tool_call_delta",
toolCallId: currentTool.id,
argumentDelta: chunk.delta.partial_json,
});
}
}
}
else if (chunk.type === "content_block_stop") {
activeContentBlock = false;
// Tool call complete
const currentTool = toolCalls[toolCalls.length - 1];
if (currentTool) {
const currentTime = Date.now();
// Update tool metrics
const toolIndex = toolCalls.length - 1;
if (toolIndex < toolMetrics.toolTimes.length) {
const toolTime = toolMetrics.toolTimes[toolIndex];
toolTime.argumentBuildTime = currentTime - toolTime.startTime;
toolTime.totalTime = toolTime.argumentBuildTime;
toolTime.name = currentTool.name;
}
// Track tool success/failure
try {
JSON.parse(currentTool.arguments);
toolMetrics.successfulTools++;
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`[Anthropic] ā
Valid JSON for ${currentTool.name}`);
}
}
catch (e) {
toolMetrics.failedTools++;
console.error(`[Anthropic] ā Invalid JSON for ${currentTool.name}: ${e}`);
}
// Log the final JSON for debugging
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING ||
!isValidJSON(currentTool.arguments)) {
console.log(`[Anthropic] Tool ${currentTool.name} complete with arguments (${currentTool.arguments.length} chars):`);
console.log(currentTool.arguments);
// Check if JSON appears truncated
const lastChars = currentTool.arguments.slice(-10);
if (!lastChars.includes("}") &&
currentTool.arguments.length > 100) {
console.warn(`[Anthropic] WARNING: JSON may be truncated - doesn't end with '}': ...${lastChars}`);
}
}
onEvent({
type: "tool_call_parsed",
toolCall: {
id: currentTool.id,
name: currentTool.name,
arguments: currentTool.arguments,
},
});
}
}
else if (chunk.type === "message_stop" && activeContentBlock) {
// Handle Anthropic bug: message_stop without content_block_stop
console.warn(`[Anthropic] Received message_stop without content_block_stop - handling as implicit block stop`);
activeContentBlock = false;
// Emit synthetic content_block_stop for the current tool
const currentTool = toolCalls[toolCalls.length - 1];
if (currentTool) {
// Log the incomplete tool
console.warn(`[Anthropic] Synthetic content_block_stop for incomplete tool ${currentTool.name} (${currentTool.arguments.length} chars)`);
// Only emit tool_call_complete if we have valid JSON
if (isValidJSON(currentTool.arguments)) {
onEvent({
type: "tool_call_parsed",
toolCall: {
id: currentTool.id,
name: currentTool.name,
arguments: currentTool.arguments,
},
});
}
else {
console.error(`[Anthropic] Tool ${currentTool.name} has incomplete JSON, skipping tool_call_complete event`);
}
}
}
}
// Final check: filter out any remaining incomplete tool calls
const validToolCalls = toolCalls.filter((tc, idx) => {
if (!isValidJSON(tc.arguments)) {
console.warn(`[Anthropic] Filtering out incomplete tool call ${idx} (${tc.name}) with INVALID JSON (${tc.arguments.length} chars)`);
return false;
}
return true;
});
if (toolCalls.length !== validToolCalls.length) {
console.log(`[Anthropic] Filtered out ${toolCalls.length - validToolCalls.length} incomplete tool calls`);
console.log(`[Anthropic] Successfully processed ${validToolCalls.length} valid tool calls`);
}
// Calculate final metrics including tool calling insights
const totalTime = Date.now() - startTime;
const totalTokens = tokenCount + toolArgumentTokens;
const tokensPerSecond = totalTokens > 0 ? totalTokens / (totalTime / 1000) : 0;
// Finalize round metrics
if (validToolCalls.length > 0) {
const roundEndTime = Date.now();
const totalToolTime = toolMetrics.toolTimes.reduce((sum, tool) => sum + tool.totalTime, 0);
const llmTime = totalTime - totalToolTime;
toolMetrics.rounds.push({
roundNumber: toolMetrics.currentRound,
llmTime: llmTime,
toolTime: totalToolTime,
toolCount: validToolCalls.length,
});
}
if (process.env.DEBUG_GRAPHLIT_SDK_METRICS) {
const metricsData = {
totalTime: `${totalTime}ms`,
ttft: `${firstTokenTime}ms`,
ttfmc: firstMeaningfulContentTime > 0
? `${firstMeaningfulContentTime}ms`
: null,
contentTokens: tokenCount,
toolTokens: toolArgumentTokens,
totalTokens: totalTokens,
tps: tokensPerSecond.toFixed(2),
};
console.log(`š [Anthropic] Performance: Total=${metricsData.totalTime} | TTFT=${metricsData.ttft}${metricsData.ttfmc ? ` | TTFMC=${metricsData.ttfmc}` : ""} | Tokens(content/tool/total)=${metricsData.contentTokens}/${metricsData.toolTokens}/${metricsData.totalTokens} | TPS=${metricsData.tps}`);
// Tool calling metrics
if (validToolCalls.length > 0) {
const successRate = ((toolMetrics.successfulTools / toolMetrics.totalTools) *
100).toFixed(1);
const avgToolTime = toolMetrics.toolTimes.reduce((sum, tool) => sum + tool.totalTime, 0) /
toolMetrics.toolTimes.length;
console.log(`š§ [Anthropic] Tools: Total=${toolMetrics.totalTools} | Success=${toolMetrics.successfulTools} | Failed=${toolMetrics.failedTools} | SuccessRate=${successRate}% | AvgTime=${avgToolTime.toFixed(2)}ms`);
// Tool timing details (consolidated)
const toolTimings = toolMetrics.toolTimes
.map((tool, idx) => `${tool.name}:${tool.argumentBuildTime}ms`)
.join(" | ");
if (toolTimings) {
console.log(`šØ [Anthropic] Tool Timings: ${toolTimings}`);
}
// Round metrics (consolidated)
const roundMetrics = toolMetrics.rounds
.map((round) => {
const efficiency = round.toolCount > 0
? ((round.llmTime / (round.llmTime + round.toolTime)) *
100).toFixed(1)
: 100;
return `R${round.roundNumber}(LLM:${round.llmTime}ms,Tools:${round.toolTime}ms,Eff:${efficiency}%)`;
})
.join(" | ");
if (roundMetrics) {
console.log(`š [Anthropic] Rounds: ${roundMetrics}`);
}
}
if (interTokenDelays.length > 0) {
const avgDelay = interTokenDelays.reduce((a, b) => a + b, 0) / interTokenDelays.length;
const sortedDelays = [...interTokenDelays].sort((a, b) => a - b);
const p50Delay = sortedDelays[Math.floor(sortedDelays.length * 0.5)];
const p95Delay = sortedDelays[Math.floor(sortedDelays.length * 0.95)];
const p99Delay = sortedDelays[Math.floor(sortedDelays.length * 0.99)];
console.log(`ā³ [Anthropic] Inter-Token: Avg=${avgDelay.toFixed(2)}ms | P50=${p50Delay}ms | P95=${p95Delay}ms | P99=${p99Delay}ms`);
}
console.log(`ā
[Anthropic] Final message (${fullMessage.length} chars): "${fullMessage}"`);
}
onComplete(fullMessage, validToolCalls);
}
catch (error) {
// Handle Anthropic-specific errors
const errorMessage = error.message || error.toString();
// Check for overloaded errors
if (error.type === "overloaded_error" ||
errorMessage.includes("Overloaded")) {
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`ā ļø [Anthropic] Service overloaded`);
}
// Treat overloaded as a rate limit error for retry logic
const overloadError = new Error("Anthropic service overloaded");
overloadError.statusCode = 503; // Service unavailable
throw overloadError;
}
// Check for rate limit errors
if (error.status === 429 ||
error.statusCode === 429 ||
error.type === "rate_limit_error") {
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`ā ļø [Anthropic] Rate limit hit`);
}
const rateLimitError = new Error("Anthropic rate limit exceeded");
rateLimitError.statusCode = 429;
throw rateLimitError;
}
// Don't emit error event here - let the client handle it to avoid duplicates
throw error;
}
}
/**
* Stream with Google SDK
*/
export async function streamWithGoogle(specification, messages, systemPrompt, tools, googleClient, // Google GenerativeAI client instance
onEvent, onComplete) {
let fullMessage = "";
let toolCalls = [];
// Performance metrics
const startTime = Date.now();
let firstTokenTime = 0;
let firstMeaningfulContentTime = 0;
let tokenCount = 0;
let toolArgumentTokens = 0;
let lastEventTime = 0;
const interTokenDelays = [];
// Tool calling metrics
const toolMetrics = {
totalTools: 0,
successfulTools: 0,
failedTools: 0,
toolTimes: [],
currentToolStart: 0,
roundStartTime: startTime,
rounds: [],
currentRound: 1,
};
try {
const modelName = getModelName(specification);
if (!modelName) {
throw new Error(`No model name found for Google specification: ${specification.name}`);
}
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`š¤ [Google] Model Config: Service=Google | Model=${modelName} | Temperature=${specification.google?.temperature} | MaxTokens=${specification.google?.completionTokenLimit || "null"} | SystemPrompt=${systemPrompt ? "Yes" : "No"} | Tools=${tools?.length || 0} | Spec="${specification.name}"`);
}
const streamConfig = {
model: modelName,
messages,
stream: true,
temperature: specification.google?.temperature,
//top_p: specification.google?.probability,
};
// Only add max_tokens if it's defined
if (specification.google?.completionTokenLimit) {
streamConfig.max_tokens = specification.google.completionTokenLimit;
}
if (systemPrompt) {
streamConfig.system = systemPrompt;
}
// Add tools if provided
if (tools && tools.length > 0) {
streamConfig.tools = tools.map((tool) => ({
name: tool.name,
description: tool.description,
input_schema: tool.schema ? JSON.parse(tool.schema) : {},
}));
}
// Configure tools for Google - expects a single array of function declarations
const googleTools = tools && tools.length > 0
? [
{
functionDeclarations: tools.map((tool) => {
const rawSchema = tool.schema ? JSON.parse(tool.schema) : {};
const cleanedSchema = cleanSchemaForGoogle(rawSchema);
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
const hadCleanup = JSON.stringify(rawSchema) !== JSON.stringify(cleanedSchema);
if (hadCleanup) {
console.log(`[Google] Cleaned schema for tool ${tool.name} - removed unsupported fields`);
}
}
return {
name: tool.name,
description: tool.description,
parameters: cleanedSchema,
};
}),
},
]
: undefined;
const model = googleClient.getGenerativeModel({
model: modelName,
generationConfig: {
temperature: streamConfig.temperature,
maxOutputTokens: streamConfig.max_tokens,
},
tools: googleTools,
});
// Convert messages to Google chat format
const history = messages.slice(0, -1); // All but last message
const prompt = messages[messages.length - 1]?.parts[0]?.text || "";
const chat = model.startChat({ history });
const result = await chat.sendMessageStream(prompt);
for await (const chunk of result.stream) {
const text = chunk.text();
// Debug log chunk details
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`[Google] Raw chunk:`, JSON.stringify(chunk, null, 2));
if (text) {
console.log(`[Google] Text delta: "${text}" (${text.length} chars)`);
}
}
if (text) {
fullMessage += text;
tokenCount++;
const currentTime = Date.now();
// Track TTFT (first token regardless of type)
if (firstTokenTime === 0) {
firstTokenTime = currentTime - startTime;
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`\nā” [Google] Time to First Token (TTFT): ${firstTokenTime}ms`);
}
}
// Track first meaningful content
if (firstMeaningfulContentTime === 0 && text.trim()) {
firstMeaningfulContentTime = currentTime - startTime;
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`\nšÆ [Google] Time to First Meaningful Content: ${firstMeaningfulContentTime}ms`);
}
}
onEvent({
type: "token",
token: text,
});
}
// Google streams function calls as part of the candidates
// Check if this chunk contains function calls
try {
const candidate = chunk.candidates?.[0];
if (candidate?.content?.parts) {
for (const part of candidate.content.parts) {
if (part.functionCall) {
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`[Google] Received function call: ${part.functionCall.name}`);
console.log(`[Google] Function args:`, JSON.stringify(part.functionCall.args || {}));
}
const toolCall = {
id: `google_tool_${Date.now()}_${toolCalls.length}`,
name: part.functionCall.name,
arguments: JSON.stringify(part.functionCall.args || {}),
};
toolCalls.push(toolCall);
// Track tool metrics
toolMetrics.totalTools++;
const argumentString = JSON.stringify(part.functionCall.args || {});
toolArgumentTokens += Math.ceil(argumentString.length / 4);
toolMetrics.toolTimes.push({
name: part.functionCall.name,
startTime: Date.now(),
argumentBuildTime: 0, // Google returns complete args at once
totalTime: 0,
});
// Track TTFT for first tool if no content yet
if (firstTokenTime === 0) {
firstTokenTime = Date.now() - startTime;
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`\nā” [Google] Time to First Token (Tool Call): ${firstTokenTime}ms`);
}
}
// Emit tool call events
onEvent({
type: "tool_call_start",
toolCall: {
id: toolCall.id,
name: toolCall.name,
},
});
onEvent({
type: "tool_call_delta",
toolCallId: toolCall.id,
argumentDelta: toolCall.arguments,
});
// Update tool metrics and validate JSON
const toolIndex = toolCalls.length - 1;
if (toolIndex < toolMetrics.toolTimes.length) {
const toolTime = toolMetrics.toolTimes[toolIndex];
toolTime.totalTime = Date.now() - toolTime.startTime;
toolTime.argumentBuildTime = toolTime.totalTime; // Google returns complete args
}
try {
JSON.parse(toolCall.arguments);
toolMetrics.successfulTools++;
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`[Google] ā
Valid JSON for ${toolCall.name}`);
}
}
catch (e) {
toolMetrics.failedTools++;
console.error(`[Google] ā Invalid JSON for ${toolCall.name}: ${e}`);
}
// Log completion
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`[Google] Tool ${toolCall.name} complete with arguments (${toolCall.arguments.length} chars):`);
console.log(toolCall.arguments);
}
onEvent({
type: "tool_call_parsed",
toolCall: {
id: toolCall.id,
name: toolCall.name,
arguments: toolCall.arguments,
},
});
}
}
}
}
catch (error) {
// Silently ignore parsing errors
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.error(`[Google] Error parsing chunk for function calls:`, error);
}
}
}
// Google might also return function calls or additional text in the final response
try {
const response = await result.response;
const candidate = response.candidates?.[0];
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING &&
candidate?.content?.parts) {
console.log(`[Google] Processing final response with ${candidate.content.parts.length} parts`);
}
if (candidate?.content?.parts) {
for (const part of candidate.content.parts) {
// Check for any final text we might have missed
if (part.text) {
const finalText = part.text;
// Only add if it's not already included in fullMessage
if (!fullMessage.endsWith(finalText)) {
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`[Google] Adding final text: ${finalText.length} chars`);
}
fullMessage += finalText;
onEvent({
type: "token",
token: finalText,
});
}
}
// Check for function calls
if (part.functionCall &&
!toolCalls.some((tc) => tc.name === part.functionCall.name)) {
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.log(`[Google] Found function call in final response: ${part.functionCall.name}`);
}
const toolCall = {
id: `google_tool_${Date.now()}_${toolCalls.length}`,
name: part.functionCall.name,
arguments: JSON.stringify(part.functionCall.args || {}),
};
toolCalls.push(toolCall);
// Emit events for function calls found in final response
onEvent({
type: "tool_call_start",
toolCall: {
id: toolCall.id,
name: toolCall.name,
},
});
onEvent({
type: "tool_call_parsed",
toolCall: {
id: toolCall.id,
name: toolCall.name,
arguments: toolCall.arguments,
},
});
}
}
}
}
catch (error) {
// Log parsing errors when debugging
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
console.error(`[Google] Error processing final response:`, error);
}
}
// Final summary logging
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING && toolCalls.length > 0) {
console.log(`[Google] Successfully processed ${toolCalls.length} tool calls`);
}
// Calculate final metrics including tool calling insights
const totalTime = Date.now() - startTime;
const totalTokens = tokenCount + toolArgumentTokens;
const tokensPerSecond = totalTokens > 0 ? totalTokens / (totalTime / 1000) : 0;
// Finalize round metrics
if (toolCalls.length > 0) {
const roundEndTime = Date.now();
const totalToolTime = toolMetrics.toolTimes.reduce((sum, tool) => sum + tool.totalTime, 0);
const llmTime = totalTime - totalToolTime;
toolMetrics.rounds.push({
roundNumber: toolMetrics.currentRound,
llmTime: llmTime,
toolTime: totalToolTime,
toolCount: toolCalls.length,
});
}
if (process.env.DEBUG_GRAPHLIT_SDK_METRICS) {
const metricsData = {
totalTime: `${totalTime}ms`,
ttft: `${firstTokenTime}ms`,
ttfmc: firstMeaningfulContentTime > 0
? `${firstMeaningfulContentTime}ms`
: null,
contentTokens: tokenCount,
toolTokens: toolArgumentTokens,
totalTokens: totalTokens,
tps: tokensPerSecond.toFixed(2),
};
console