UNPKG

@llumiverse/core

Version:

Provide an universal API to LLMs. Support for existing LLMs can be added by writing a driver.

github.com/vertesia/llumiverse

vertesia/llumiverse

288 lines • 15.5 kB

JavaScript

import { LlumiverseError } from "@llumiverse/common"; export class DefaultCompletionStream { driver; prompt; options; chunks; // Counter for number of chunks instead of storing strings completion; constructor(driver, prompt, options) { this.driver = driver; this.prompt = prompt; this.options = options; this.chunks = 0; } async *[Symbol.asyncIterator]() { // reset state this.completion = undefined; this.chunks = 0; const accumulatedResults = []; // Accumulate CompletionResult[] from chunks const accumulatedToolUse = new Map(); // Accumulate tool_use by id this.driver.logger.debug(`[${this.driver.provider}] Streaming Execution of ${this.options.model} with prompt`); const start = Date.now(); let finish_reason = undefined; let promptTokens = 0; let resultTokens = undefined; let promptCachedTokens = undefined; let promptCacheWriteTokens = undefined; let promptNewTokens = undefined; try { const stream = await this.driver.requestTextCompletionStream(this.prompt, this.options); for await (const chunk of stream) { if (chunk) { if (typeof chunk === 'string') { this.chunks++; yield chunk; } else { if (chunk.finish_reason) { //Do not replace non-null values with null values finish_reason = chunk.finish_reason; //Used to skip empty finish_reason chunks coming after "stop" or "length" } if (chunk.token_usage) { //Tokens returned include prior parts of stream, //so overwrite rather than accumulate //Math.max used as some models report final token count at beginning of stream promptTokens = Math.max(promptTokens, chunk.token_usage.prompt ?? 0); resultTokens = Math.max(resultTokens ?? 0, chunk.token_usage.result ?? 0); if (chunk.token_usage.prompt_cached != null) promptCachedTokens = chunk.token_usage.prompt_cached; if (chunk.token_usage.prompt_cache_write != null) promptCacheWriteTokens = chunk.token_usage.prompt_cache_write; if (chunk.token_usage.prompt_new != null) promptNewTokens = chunk.token_usage.prompt_new; } // Accumulate tool_use from chunks // Note: During streaming, tool_input comes as string chunks that need concatenation if (chunk.tool_use && chunk.tool_use.length > 0) { for (const tool of chunk.tool_use) { const existing = accumulatedToolUse.get(tool.id); if (existing) { // Merge tool input (for streaming where arguments come as string pieces) if (tool.tool_input !== null && tool.tool_input !== undefined) { const existingInput = existing.tool_input; const newInput = tool.tool_input; if (typeof existingInput === 'string' && typeof newInput === 'string') { // Concatenate string arguments existing.tool_input = existingInput + newInput; } else if (existingInput && typeof existingInput === 'object' && newInput && typeof newInput === 'object') { // Merge objects existing.tool_input = { ...existingInput, ...newInput }; } else { existing.tool_input = tool.tool_input; } } // Update tool name if provided (might come in later chunk) if (tool.tool_name) { existing.tool_name = tool.tool_name; } // Update actual ID if provided (OpenAI sends id only in first chunk) if (tool._actual_id) { existing._actual_id = tool._actual_id; } } else { // New tool call accumulatedToolUse.set(tool.id, { ...tool }); } } } if (Array.isArray(chunk.result) && chunk.result.length > 0) { // Process each result in the chunk, combining consecutive text/JSON for (const result of chunk.result) { // Check if we can combine with the last accumulated result const lastResult = accumulatedResults[accumulatedResults.length - 1]; if (lastResult && ((lastResult.type === 'text' && result.type === 'text') || (lastResult.type === 'json' && result.type === 'json'))) { // Combine consecutive text or JSON results if (result.type === 'text') { lastResult.value += result.value; } else if (result.type === 'json') { // For JSON, combine the parsed objects directly try { const lastParsed = lastResult.value; const currentParsed = result.value; if (lastParsed !== null && typeof lastParsed === 'object' && currentParsed !== null && typeof currentParsed === 'object') { const combined = { ...lastParsed, ...currentParsed }; lastResult.value = combined; } else { // If not objects, convert to string and concatenate const lastStr = typeof lastParsed === 'string' ? lastParsed : JSON.stringify(lastParsed); const currentStr = typeof currentParsed === 'string' ? currentParsed : JSON.stringify(currentParsed); lastResult.value = lastStr + currentStr; } } catch { // If anything fails, just concatenate string representations lastResult.value = String(lastResult.value) + String(result.value); } } } else { // Add as new result accumulatedResults.push(result); } } // Convert CompletionResult[] to string for streaming // Only yield if we have results to show const resultText = chunk.result.map(r => { switch (r.type) { case 'text': return r.value; case 'json': return JSON.stringify(r.value); case 'image': // Show truncated image placeholder for streaming const truncatedValue = typeof r.value === 'string' ? r.value.slice(0, 10) : String(r.value).slice(0, 10); return `\n[Image: ${truncatedValue}...]\n`; default: return String(r.value || ''); } }).join(''); if (resultText) { this.chunks++; yield resultText; } } } } } } catch (error) { // Don't wrap if already a LlumiverseError if (LlumiverseError.isLlumiverseError(error)) { throw error; } throw this.driver.formatLlumiverseError(error, { provider: this.driver.provider, model: this.options.model, operation: 'stream', }); } // Return undefined only if we never received any token data from the provider. // Use !== undefined (not truthiness) because resultTokens === 0 is valid (e.g. empty output with stop). const tokens = resultTokens !== undefined ? { prompt: promptTokens, result: resultTokens, total: resultTokens + promptTokens, ...(promptCachedTokens != null && { prompt_cached: promptCachedTokens }), ...(promptCacheWriteTokens != null && { prompt_cache_write: promptCacheWriteTokens }), ...(promptNewTokens != null && { prompt_new: promptNewTokens }), } : undefined; // Convert accumulated tool_use Map to array let toolUseArray = accumulatedToolUse.size > 0 ? Array.from(accumulatedToolUse.values()) : undefined; // Finalize tool calls: restore actual IDs and parse JSON arguments if (toolUseArray) { const truncatedToolIds = new Set(); for (const tool of toolUseArray) { // Restore actual ID from OpenAI (was stored in _actual_id during streaming) if (tool._actual_id) { tool.id = tool._actual_id; delete tool._actual_id; } // Parse tool_input strings as JSON if needed (streaming sends arguments as string chunks) if (typeof tool.tool_input === 'string') { try { tool.tool_input = JSON.parse(tool.tool_input); } catch { // JSON parse failed — tool_input was likely truncated by max_tokens. // Set to empty object to prevent string tool_input from corrupting the conversation. tool.tool_input = {}; truncatedToolIds.add(tool.id); } } } // If finish_reason is "length" (max_tokens hit), drop truncated tool calls entirely — // they were cut off mid-generation and would produce invalid results. if (finish_reason === 'length' && truncatedToolIds.size > 0) { toolUseArray = toolUseArray.filter(t => !truncatedToolIds.has(t.id)); if (toolUseArray.length === 0) { toolUseArray = undefined; } } } this.completion = { result: accumulatedResults, // Return the accumulated CompletionResult[] instead of text prompt: this.prompt, execution_time: Date.now() - start, token_usage: tokens, finish_reason: finish_reason, chunks: this.chunks, tool_use: toolUseArray, }; // Build conversation context for multi-turn support const conversation = this.driver.buildStreamingConversation(this.prompt, accumulatedResults, toolUseArray, this.options); if (conversation !== undefined) { this.completion.conversation = conversation; } try { if (this.completion) { this.driver.validateResult(this.completion, this.options); } } catch (error) { // Don't wrap if already a LlumiverseError if (LlumiverseError.isLlumiverseError(error)) { throw error; } throw this.driver.formatLlumiverseError(error, { provider: this.driver.provider, model: this.options.model, operation: 'stream', }); } } } export class FallbackCompletionStream { driver; prompt; options; completion; constructor(driver, prompt, options) { this.driver = driver; this.prompt = prompt; this.options = options; } async *[Symbol.asyncIterator]() { // reset state this.completion = undefined; this.driver.logger.debug(`[${this.driver.provider}] Streaming is not supported, falling back to blocking execution`); try { const completion = await this.driver._execute(this.prompt, this.options); // For fallback streaming, yield the text content but keep the original completion const content = completion.result.map(r => { switch (r.type) { case 'text': return r.value; case 'json': return JSON.stringify(r.value); case 'image': // Show truncated image placeholder for streaming const truncatedValue = typeof r.value === 'string' ? r.value.slice(0, 10) : String(r.value).slice(0, 10); return `[Image: ${truncatedValue}...]`; default: return String(r.value || ''); } }).join(''); yield content; this.completion = completion; // Return the original completion with untouched CompletionResult[] } catch (error) { // Don't wrap if already a LlumiverseError if (LlumiverseError.isLlumiverseError(error)) { throw error; } throw this.driver.formatLlumiverseError(error, { provider: this.driver.provider, model: this.options.model, operation: 'stream', }); } } } //# sourceMappingURL=CompletionStream.js.map