UNPKG

llama-flow

Version:

The Typescript-first prompt engineering toolkit for working with chat based LLMs.

131 lines (130 loc) 4.25 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.RecursiveCharacterTextSplitter = exports.CharacterTextSplitter = void 0; class TextSplitter { chunkSize = 1000; chunkOverlap = 200; constructor(fields) { this.chunkSize = fields?.chunkSize ?? this.chunkSize; this.chunkOverlap = fields?.chunkOverlap ?? this.chunkOverlap; if (this.chunkOverlap >= this.chunkSize) { throw new Error('Cannot have chunkOverlap >= chunkSize'); } } createDocuments(texts) { const documents = []; for (let i = 0; i < texts.length; i += 1) { const text = texts[i]; for (const chunk of this.splitText(text)) { documents.push(chunk); } } return documents; } splitDocuments(documents) { return this.createDocuments(documents); } joinDocs(docs, separator) { const text = docs.join(separator).trim(); return text === '' ? null : text; } mergeSplits(splits, separator) { const docs = []; const currentDoc = []; let total = 0; for (const d of splits) { const _len = d.length; if (total + _len >= this.chunkSize) { if (total > this.chunkSize) { console.warn(`Created a chunk of size ${total}, + which is longer than the specified ${this.chunkSize}`); } if (currentDoc.length > 0) { const doc = this.joinDocs(currentDoc, separator); if (doc !== null) { docs.push(doc); } while (total > this.chunkOverlap || (total + _len > this.chunkSize && total > 0)) { total -= currentDoc[0].length; currentDoc.shift(); } } } currentDoc.push(d); total += _len; } const doc = this.joinDocs(currentDoc, separator); if (doc !== null) { docs.push(doc); } return docs; } } class CharacterTextSplitter extends TextSplitter { separator = '\n\n'; constructor(fields) { super(fields); this.separator = fields?.separator ?? this.separator; } splitText(text) { let splits; if (this.separator) { splits = text.split(this.separator); } else { splits = text.split(''); } return this.mergeSplits(splits, this.separator); } } exports.CharacterTextSplitter = CharacterTextSplitter; class RecursiveCharacterTextSplitter extends TextSplitter { separators = ['\n\n', '\n', '.', ',', ' ', '']; constructor(fields) { super(fields); this.separators = fields?.separators ?? this.separators; } splitText(text) { const finalChunks = []; let separator = this.separators[this.separators.length - 1]; for (const s of this.separators) { if (s === '') { separator = s; break; } if (text.includes(s)) { separator = s; break; } } let splits; if (separator) { splits = text.split(separator); } else { splits = text.split(''); } let goodSplits = []; for (const s of splits) { if (s.length < this.chunkSize) { goodSplits.push(s); } else { if (goodSplits.length) { const mergedText = this.mergeSplits(goodSplits, separator); finalChunks.push(...mergedText); goodSplits = []; } const otherInfo = this.splitText(s); finalChunks.push(...otherInfo); } } if (goodSplits.length) { const mergedText = this.mergeSplits(goodSplits, separator); finalChunks.push(...mergedText); } return finalChunks; } } exports.RecursiveCharacterTextSplitter = RecursiveCharacterTextSplitter;