UNPKG

@llumiverse/core

Version:

Provide an universal API to LLMs. Support for existing LLMs can be added by writing a driver.

105 lines 4.04 kB
export class DefaultCompletionStream { driver; prompt; options; chunks; completion; constructor(driver, prompt, options) { this.driver = driver; this.prompt = prompt; this.options = options; this.chunks = []; } async *[Symbol.asyncIterator]() { // reset state this.completion = undefined; if (this.chunks.length > 0) { this.chunks = []; } const chunks = this.chunks; this.driver.logger.debug(`[${this.driver.provider}] Streaming Execution of ${this.options.model} with prompt`); const start = Date.now(); let stream; let finish_reason = undefined; let promptTokens = 0; let resultTokens = undefined; try { stream = await this.driver.requestTextCompletionStream(this.prompt, this.options); for await (const chunk of stream) { if (chunk) { if (typeof chunk === 'string') { chunks.push(chunk); yield chunk; } else { if (chunk.finish_reason) { //Do not replace non-null values with null values finish_reason = chunk.finish_reason; //Used to skip empty finish_reason chunks coming after "stop" or "length" } if (chunk.token_usage) { //Tokens returned include prior parts of stream, //so overwrite rather than accumulate //Math.max used as some models report final token count at beginning of stream promptTokens = Math.max(promptTokens, chunk.token_usage.prompt ?? 0); resultTokens = Math.max(resultTokens ?? 0, chunk.token_usage.result ?? 0); } if (chunk.result) { chunks.push(chunk.result); yield chunk.result; } } } } } catch (error) { error.prompt = this.prompt; throw error; } const content = chunks.join(''); // Return undefined for the ExecutionTokenUsage object if there is nothing to fill it with. // Allows for checking for truthyness on token_usage, rather than it's internals. For testing and downstream usage. let tokens = resultTokens ? { prompt: promptTokens, result: resultTokens, total: resultTokens + promptTokens, } : undefined; this.completion = { result: content, prompt: this.prompt, execution_time: Date.now() - start, token_usage: tokens, finish_reason: finish_reason, chunks: chunks.length, }; try { this.driver.validateResult(this.completion, this.options); } catch (error) { error.prompt = this.prompt; throw error; } } } export class FallbackCompletionStream { driver; prompt; options; completion; constructor(driver, prompt, options) { this.driver = driver; this.prompt = prompt; this.options = options; } async *[Symbol.asyncIterator]() { // reset state this.completion = undefined; this.driver.logger.debug(`[${this.driver.provider}] Streaming is not supported, falling back to blocking execution`); try { const completion = await this.driver._execute(this.prompt, this.options); const content = typeof completion.result === 'string' ? completion.result : JSON.stringify(completion.result); yield content; this.completion = completion; } catch (error) { error.prompt = this.prompt; throw error; } } } //# sourceMappingURL=CompletionStream.js.map