UNPKG

node-llama-cpp

Version:

Run AI models locally on your machine with node.js bindings for llama.cpp. Enforce a JSON schema on the model output on the generation level

200 lines 7.36 kB
import { DisposedError } from "lifecycle-utils"; import { maxRecentDetokenizerTokens } from "../consts.js"; import { pushAll } from "./pushAll.js"; export class TokenStreamRegulator { /** @internal */ _queue = []; /** @internal */ _LastTokens = []; addChunk({ tokens, text }) { const queuedRelease = QueuedTokenRelease._create(tokens, text); this._queue.push(queuedRelease); return queuedRelease; } popFreeChunkTokens() { const res = []; while (this._queue.length > 0 && this._queue[0].isFree) { const tokens = this._queue.shift().tokens; pushAll(res, tokens); pushAll(this._LastTokens, tokens); } if (this._LastTokens.length > maxRecentDetokenizerTokens) this._LastTokens.splice(0, this._LastTokens.length - maxRecentDetokenizerTokens); return res; } getPartiallyFreeChunk(tokenizer) { if (this._queue.length > 0 && this._queue[0].isPartiallyFree) { const queuedRelease = this._queue[0]; if (queuedRelease.hasTextLocks && !queuedRelease.hasTokenLocks) return { tokens: [], text: queuedRelease.text.slice(0, queuedRelease.getFreeTextIndex()) }; else if (queuedRelease.hasTokenLocks && !queuedRelease.hasTextLocks) { const tokens = queuedRelease.tokens.slice(0, queuedRelease.getFreeTokenIndex()); return { tokens, text: tokenizer.detokenize(tokens, false, this._LastTokens) }; } const freeTokenIndex = queuedRelease.getFreeTokenIndex(); const tokens = queuedRelease.tokens.slice(0, freeTokenIndex); const tokensText = tokenizer.detokenize(tokens, false, this._LastTokens); const freeTextIndex = queuedRelease.getFreeTextIndex(); const text = queuedRelease.text.slice(0, freeTextIndex); if (text.length > tokensText.length) { return { tokens, text: tokensText }; } else if (text.length < tokensText.length) { const resTokens = []; let resTokensText = ""; const lastTokens = this._LastTokens.slice(); for (const token of tokens) { const tokenText = tokenizer.detokenize([token], false, lastTokens); lastTokens.push(token); // ensure partial tokens are detokenized correctly if (resTokensText.length + tokenText.length > text.length) resTokensText = tokenizer.detokenize(resTokens, false, this._LastTokens); if (resTokensText.length + tokenText.length > text.length) { const remainingText = text.slice(resTokensText.length); const remainingTokens = tokenizer(remainingText, false, "trimLeadingSpace"); pushAll(resTokens, remainingTokens); break; } resTokens.push(token); resTokensText += tokenText; } return { tokens: resTokens, text }; } return { tokens: queuedRelease.tokens.slice(0, freeTokenIndex), text: queuedRelease.text.slice(0, freeTextIndex) }; } return { tokens: [], text: "" }; } getAllQueuedChunkTokens() { return this._queue.flatMap((queuedRelease) => queuedRelease.tokens); } getLastQueuedChunkTokens(maxTokens = maxRecentDetokenizerTokens) { const res = []; for (let i = this._queue.length - 1; i >= 0 && res.length < maxTokens; i--) { const tokens = this._queue[i].tokens; for (let j = tokens.length - 1; j >= 0 && res.length < maxTokens; j--) res.unshift(tokens[j]); } return this._queue.flatMap((queuedRelease) => queuedRelease.tokens); } clearQueue() { this._queue.length = 0; } reset() { this.clearQueue(); this._LastTokens.length = 0; } removeChunkIfLast(queuedTokenRelease) { if (this._queue.at(-1) === queuedTokenRelease) return this._queue.pop() != null; return false; } } export class QueuedTokenRelease { /** @internal */ _textLocks = new Set(); /** @internal */ _tokenLocks = new Set(); /** @internal */ _tokens; /** @internal */ _text; constructor(tokens, text) { this._tokens = tokens; this._text = text; } get tokens() { return this._tokens; } get text() { return this._text; } get isFree() { return this._textLocks.size === 0 && this._tokenLocks.size === 0; } get hasTextLocks() { return this._textLocks.size > 0; } get hasTokenLocks() { return this._tokenLocks.size > 0; } get isPartiallyFree() { if (this.isFree) return true; const freeTextIndex = this.getFreeTextIndex(); const freeTokenIndex = this.getFreeTokenIndex(); return freeTextIndex > 0 && freeTokenIndex > 0; } getFreeTextIndex() { if (this._textLocks.size === 0) return this.text.length; return [...this._textLocks] .reduce((res, lock) => Math.min(res, lock.index), this.text.length); } getFreeTokenIndex() { if (this._tokenLocks.size === 0) return this.tokens.length; return [...this._tokenLocks] .reduce((res, lock) => Math.min(res, lock.index), this.tokens.length); } createTextIndexLock(startIndex) { const lock = QueuedTokenReleaseLock._create(startIndex, this._textLocks); if (startIndex >= 0 && startIndex < this.text.length) this._textLocks.add(lock); return lock; } createTokenIndexLock(startIndex) { const lock = QueuedTokenReleaseLock._create(startIndex, this._tokenLocks); if (startIndex >= 0 && startIndex < this.tokens.length) this._tokenLocks.add(lock); return lock; } modifyTokensAndText(tokens, text) { this._tokens = tokens; this._text = text; } /** @internal */ static _create(tokens, text) { return new QueuedTokenRelease(tokens, text); } } export class QueuedTokenReleaseLock { /** @internal */ _index; /** @internal */ _locks; constructor(index, locks) { this._index = index; this._locks = locks; } get index() { return this._index; } duplicate() { if (!this._locks.has(this)) throw new DisposedError(); const lock = QueuedTokenReleaseLock._create(this._index, this._locks); this._locks.add(lock); return lock; } dispose() { this._locks.delete(this); } [Symbol.dispose]() { this.dispose(); } /** @internal */ static _create(length, locks) { return new QueuedTokenReleaseLock(length, locks); } } //# sourceMappingURL=TokenStreamRegulator.js.map