llama-flow
Version:
The Typescript-first prompt engineering toolkit for working with chat based LLMs.
131 lines (130 loc) • 4.25 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.RecursiveCharacterTextSplitter = exports.CharacterTextSplitter = void 0;
class TextSplitter {
chunkSize = 1000;
chunkOverlap = 200;
constructor(fields) {
this.chunkSize = fields?.chunkSize ?? this.chunkSize;
this.chunkOverlap = fields?.chunkOverlap ?? this.chunkOverlap;
if (this.chunkOverlap >= this.chunkSize) {
throw new Error('Cannot have chunkOverlap >= chunkSize');
}
}
createDocuments(texts) {
const documents = [];
for (let i = 0; i < texts.length; i += 1) {
const text = texts[i];
for (const chunk of this.splitText(text)) {
documents.push(chunk);
}
}
return documents;
}
splitDocuments(documents) {
return this.createDocuments(documents);
}
joinDocs(docs, separator) {
const text = docs.join(separator).trim();
return text === '' ? null : text;
}
mergeSplits(splits, separator) {
const docs = [];
const currentDoc = [];
let total = 0;
for (const d of splits) {
const _len = d.length;
if (total + _len >= this.chunkSize) {
if (total > this.chunkSize) {
console.warn(`Created a chunk of size ${total}, +
which is longer than the specified ${this.chunkSize}`);
}
if (currentDoc.length > 0) {
const doc = this.joinDocs(currentDoc, separator);
if (doc !== null) {
docs.push(doc);
}
while (total > this.chunkOverlap ||
(total + _len > this.chunkSize && total > 0)) {
total -= currentDoc[0].length;
currentDoc.shift();
}
}
}
currentDoc.push(d);
total += _len;
}
const doc = this.joinDocs(currentDoc, separator);
if (doc !== null) {
docs.push(doc);
}
return docs;
}
}
class CharacterTextSplitter extends TextSplitter {
separator = '\n\n';
constructor(fields) {
super(fields);
this.separator = fields?.separator ?? this.separator;
}
splitText(text) {
let splits;
if (this.separator) {
splits = text.split(this.separator);
}
else {
splits = text.split('');
}
return this.mergeSplits(splits, this.separator);
}
}
exports.CharacterTextSplitter = CharacterTextSplitter;
class RecursiveCharacterTextSplitter extends TextSplitter {
separators = ['\n\n', '\n', '.', ',', ' ', ''];
constructor(fields) {
super(fields);
this.separators = fields?.separators ?? this.separators;
}
splitText(text) {
const finalChunks = [];
let separator = this.separators[this.separators.length - 1];
for (const s of this.separators) {
if (s === '') {
separator = s;
break;
}
if (text.includes(s)) {
separator = s;
break;
}
}
let splits;
if (separator) {
splits = text.split(separator);
}
else {
splits = text.split('');
}
let goodSplits = [];
for (const s of splits) {
if (s.length < this.chunkSize) {
goodSplits.push(s);
}
else {
if (goodSplits.length) {
const mergedText = this.mergeSplits(goodSplits, separator);
finalChunks.push(...mergedText);
goodSplits = [];
}
const otherInfo = this.splitText(s);
finalChunks.push(...otherInfo);
}
}
if (goodSplits.length) {
const mergedText = this.mergeSplits(goodSplits, separator);
finalChunks.push(...mergedText);
}
return finalChunks;
}
}
exports.RecursiveCharacterTextSplitter = RecursiveCharacterTextSplitter;