@llumiverse/core
Version:
Provide an universal API to LLMs. Support for existing LLMs can be added by writing a driver.
105 lines • 4.04 kB
JavaScript
export class DefaultCompletionStream {
driver;
prompt;
options;
chunks;
completion;
constructor(driver, prompt, options) {
this.driver = driver;
this.prompt = prompt;
this.options = options;
this.chunks = [];
}
async *[Symbol.asyncIterator]() {
// reset state
this.completion = undefined;
if (this.chunks.length > 0) {
this.chunks = [];
}
const chunks = this.chunks;
this.driver.logger.debug(`[${this.driver.provider}] Streaming Execution of ${this.options.model} with prompt`);
const start = Date.now();
let stream;
let finish_reason = undefined;
let promptTokens = 0;
let resultTokens = undefined;
try {
stream = await this.driver.requestTextCompletionStream(this.prompt, this.options);
for await (const chunk of stream) {
if (chunk) {
if (typeof chunk === 'string') {
chunks.push(chunk);
yield chunk;
}
else {
if (chunk.finish_reason) { //Do not replace non-null values with null values
finish_reason = chunk.finish_reason; //Used to skip empty finish_reason chunks coming after "stop" or "length"
}
if (chunk.token_usage) {
//Tokens returned include prior parts of stream,
//so overwrite rather than accumulate
//Math.max used as some models report final token count at beginning of stream
promptTokens = Math.max(promptTokens, chunk.token_usage.prompt ?? 0);
resultTokens = Math.max(resultTokens ?? 0, chunk.token_usage.result ?? 0);
}
if (chunk.result) {
chunks.push(chunk.result);
yield chunk.result;
}
}
}
}
}
catch (error) {
error.prompt = this.prompt;
throw error;
}
const content = chunks.join('');
// Return undefined for the ExecutionTokenUsage object if there is nothing to fill it with.
// Allows for checking for truthyness on token_usage, rather than it's internals. For testing and downstream usage.
let tokens = resultTokens ?
{ prompt: promptTokens, result: resultTokens, total: resultTokens + promptTokens, } : undefined;
this.completion = {
result: content,
prompt: this.prompt,
execution_time: Date.now() - start,
token_usage: tokens,
finish_reason: finish_reason,
chunks: chunks.length,
};
try {
this.driver.validateResult(this.completion, this.options);
}
catch (error) {
error.prompt = this.prompt;
throw error;
}
}
}
export class FallbackCompletionStream {
driver;
prompt;
options;
completion;
constructor(driver, prompt, options) {
this.driver = driver;
this.prompt = prompt;
this.options = options;
}
async *[Symbol.asyncIterator]() {
// reset state
this.completion = undefined;
this.driver.logger.debug(`[${this.driver.provider}] Streaming is not supported, falling back to blocking execution`);
try {
const completion = await this.driver._execute(this.prompt, this.options);
const content = typeof completion.result === 'string' ? completion.result : JSON.stringify(completion.result);
yield content;
this.completion = completion;
}
catch (error) {
error.prompt = this.prompt;
throw error;
}
}
}
//# sourceMappingURL=CompletionStream.js.map