@llumiverse/core
Version:
Provide an universal API to LLMs. Support for existing LLMs can be added by writing a driver.
288 lines • 15.5 kB
JavaScript
import { LlumiverseError } from "@llumiverse/common";
export class DefaultCompletionStream {
driver;
prompt;
options;
chunks; // Counter for number of chunks instead of storing strings
completion;
constructor(driver, prompt, options) {
this.driver = driver;
this.prompt = prompt;
this.options = options;
this.chunks = 0;
}
async *[Symbol.asyncIterator]() {
// reset state
this.completion = undefined;
this.chunks = 0;
const accumulatedResults = []; // Accumulate CompletionResult[] from chunks
const accumulatedToolUse = new Map(); // Accumulate tool_use by id
this.driver.logger.debug(`[${this.driver.provider}] Streaming Execution of ${this.options.model} with prompt`);
const start = Date.now();
let finish_reason = undefined;
let promptTokens = 0;
let resultTokens = undefined;
let promptCachedTokens = undefined;
let promptCacheWriteTokens = undefined;
let promptNewTokens = undefined;
try {
const stream = await this.driver.requestTextCompletionStream(this.prompt, this.options);
for await (const chunk of stream) {
if (chunk) {
if (typeof chunk === 'string') {
this.chunks++;
yield chunk;
}
else {
if (chunk.finish_reason) { //Do not replace non-null values with null values
finish_reason = chunk.finish_reason; //Used to skip empty finish_reason chunks coming after "stop" or "length"
}
if (chunk.token_usage) {
//Tokens returned include prior parts of stream,
//so overwrite rather than accumulate
//Math.max used as some models report final token count at beginning of stream
promptTokens = Math.max(promptTokens, chunk.token_usage.prompt ?? 0);
resultTokens = Math.max(resultTokens ?? 0, chunk.token_usage.result ?? 0);
if (chunk.token_usage.prompt_cached != null)
promptCachedTokens = chunk.token_usage.prompt_cached;
if (chunk.token_usage.prompt_cache_write != null)
promptCacheWriteTokens = chunk.token_usage.prompt_cache_write;
if (chunk.token_usage.prompt_new != null)
promptNewTokens = chunk.token_usage.prompt_new;
}
// Accumulate tool_use from chunks
// Note: During streaming, tool_input comes as string chunks that need concatenation
if (chunk.tool_use && chunk.tool_use.length > 0) {
for (const tool of chunk.tool_use) {
const existing = accumulatedToolUse.get(tool.id);
if (existing) {
// Merge tool input (for streaming where arguments come as string pieces)
if (tool.tool_input !== null && tool.tool_input !== undefined) {
const existingInput = existing.tool_input;
const newInput = tool.tool_input;
if (typeof existingInput === 'string' && typeof newInput === 'string') {
// Concatenate string arguments
existing.tool_input = existingInput + newInput;
}
else if (existingInput && typeof existingInput === 'object' && newInput && typeof newInput === 'object') {
// Merge objects
existing.tool_input = { ...existingInput, ...newInput };
}
else {
existing.tool_input = tool.tool_input;
}
}
// Update tool name if provided (might come in later chunk)
if (tool.tool_name) {
existing.tool_name = tool.tool_name;
}
// Update actual ID if provided (OpenAI sends id only in first chunk)
if (tool._actual_id) {
existing._actual_id = tool._actual_id;
}
}
else {
// New tool call
accumulatedToolUse.set(tool.id, { ...tool });
}
}
}
if (Array.isArray(chunk.result) && chunk.result.length > 0) {
// Process each result in the chunk, combining consecutive text/JSON
for (const result of chunk.result) {
// Check if we can combine with the last accumulated result
const lastResult = accumulatedResults[accumulatedResults.length - 1];
if (lastResult &&
((lastResult.type === 'text' && result.type === 'text') ||
(lastResult.type === 'json' && result.type === 'json'))) {
// Combine consecutive text or JSON results
if (result.type === 'text') {
lastResult.value += result.value;
}
else if (result.type === 'json') {
// For JSON, combine the parsed objects directly
try {
const lastParsed = lastResult.value;
const currentParsed = result.value;
if (lastParsed !== null && typeof lastParsed === 'object' &&
currentParsed !== null && typeof currentParsed === 'object') {
const combined = { ...lastParsed, ...currentParsed };
lastResult.value = combined;
}
else {
// If not objects, convert to string and concatenate
const lastStr = typeof lastParsed === 'string' ? lastParsed : JSON.stringify(lastParsed);
const currentStr = typeof currentParsed === 'string' ? currentParsed : JSON.stringify(currentParsed);
lastResult.value = lastStr + currentStr;
}
}
catch {
// If anything fails, just concatenate string representations
lastResult.value = String(lastResult.value) + String(result.value);
}
}
}
else {
// Add as new result
accumulatedResults.push(result);
}
}
// Convert CompletionResult[] to string for streaming
// Only yield if we have results to show
const resultText = chunk.result.map(r => {
switch (r.type) {
case 'text':
return r.value;
case 'json':
return JSON.stringify(r.value);
case 'image':
// Show truncated image placeholder for streaming
const truncatedValue = typeof r.value === 'string' ? r.value.slice(0, 10) : String(r.value).slice(0, 10);
return `\n[Image: ${truncatedValue}...]\n`;
default:
return String(r.value || '');
}
}).join('');
if (resultText) {
this.chunks++;
yield resultText;
}
}
}
}
}
}
catch (error) {
// Don't wrap if already a LlumiverseError
if (LlumiverseError.isLlumiverseError(error)) {
throw error;
}
throw this.driver.formatLlumiverseError(error, {
provider: this.driver.provider,
model: this.options.model,
operation: 'stream',
});
}
// Return undefined only if we never received any token data from the provider.
// Use !== undefined (not truthiness) because resultTokens === 0 is valid (e.g. empty output with stop).
const tokens = resultTokens !== undefined ? {
prompt: promptTokens,
result: resultTokens,
total: resultTokens + promptTokens,
...(promptCachedTokens != null && { prompt_cached: promptCachedTokens }),
...(promptCacheWriteTokens != null && { prompt_cache_write: promptCacheWriteTokens }),
...(promptNewTokens != null && { prompt_new: promptNewTokens }),
} : undefined;
// Convert accumulated tool_use Map to array
let toolUseArray = accumulatedToolUse.size > 0 ? Array.from(accumulatedToolUse.values()) : undefined;
// Finalize tool calls: restore actual IDs and parse JSON arguments
if (toolUseArray) {
const truncatedToolIds = new Set();
for (const tool of toolUseArray) {
// Restore actual ID from OpenAI (was stored in _actual_id during streaming)
if (tool._actual_id) {
tool.id = tool._actual_id;
delete tool._actual_id;
}
// Parse tool_input strings as JSON if needed (streaming sends arguments as string chunks)
if (typeof tool.tool_input === 'string') {
try {
tool.tool_input = JSON.parse(tool.tool_input);
}
catch {
// JSON parse failed — tool_input was likely truncated by max_tokens.
// Set to empty object to prevent string tool_input from corrupting the conversation.
tool.tool_input = {};
truncatedToolIds.add(tool.id);
}
}
}
// If finish_reason is "length" (max_tokens hit), drop truncated tool calls entirely —
// they were cut off mid-generation and would produce invalid results.
if (finish_reason === 'length' && truncatedToolIds.size > 0) {
toolUseArray = toolUseArray.filter(t => !truncatedToolIds.has(t.id));
if (toolUseArray.length === 0) {
toolUseArray = undefined;
}
}
}
this.completion = {
result: accumulatedResults, // Return the accumulated CompletionResult[] instead of text
prompt: this.prompt,
execution_time: Date.now() - start,
token_usage: tokens,
finish_reason: finish_reason,
chunks: this.chunks,
tool_use: toolUseArray,
};
// Build conversation context for multi-turn support
const conversation = this.driver.buildStreamingConversation(this.prompt, accumulatedResults, toolUseArray, this.options);
if (conversation !== undefined) {
this.completion.conversation = conversation;
}
try {
if (this.completion) {
this.driver.validateResult(this.completion, this.options);
}
}
catch (error) {
// Don't wrap if already a LlumiverseError
if (LlumiverseError.isLlumiverseError(error)) {
throw error;
}
throw this.driver.formatLlumiverseError(error, {
provider: this.driver.provider,
model: this.options.model,
operation: 'stream',
});
}
}
}
export class FallbackCompletionStream {
driver;
prompt;
options;
completion;
constructor(driver, prompt, options) {
this.driver = driver;
this.prompt = prompt;
this.options = options;
}
async *[Symbol.asyncIterator]() {
// reset state
this.completion = undefined;
this.driver.logger.debug(`[${this.driver.provider}] Streaming is not supported, falling back to blocking execution`);
try {
const completion = await this.driver._execute(this.prompt, this.options);
// For fallback streaming, yield the text content but keep the original completion
const content = completion.result.map(r => {
switch (r.type) {
case 'text':
return r.value;
case 'json':
return JSON.stringify(r.value);
case 'image':
// Show truncated image placeholder for streaming
const truncatedValue = typeof r.value === 'string' ? r.value.slice(0, 10) : String(r.value).slice(0, 10);
return `[Image: ${truncatedValue}...]`;
default:
return String(r.value || '');
}
}).join('');
yield content;
this.completion = completion; // Return the original completion with untouched CompletionResult[]
}
catch (error) {
// Don't wrap if already a LlumiverseError
if (LlumiverseError.isLlumiverseError(error)) {
throw error;
}
throw this.driver.formatLlumiverseError(error, {
provider: this.driver.provider,
model: this.options.model,
operation: 'stream',
});
}
}
}
//# sourceMappingURL=CompletionStream.js.map