ai-utils.js
Version:
Build AI applications, chatbots, and agents with JavaScript and TypeScript.
42 lines (41 loc) • 1.93 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.splitRecursivelyAtTokenAsSplitFunction = exports.splitRecursivelyAtToken = exports.splitRecursivelyAtCharacterAsSplitFunction = exports.splitRecursivelyAtCharacter = exports.splitRecursively = void 0;
// when segments is a string, it splits by character, otherwise according to the provided segments
function splitRecursively({ maxChunkSize, segments, }) {
if (segments.length < maxChunkSize) {
return Array.isArray(segments) ? [segments.join("")] : [segments];
}
const half = Math.ceil(segments.length / 2);
const left = segments.slice(0, half);
const right = segments.slice(half);
return [
...splitRecursively({
segments: left,
maxChunkSize,
}),
...splitRecursively({
segments: right,
maxChunkSize,
}),
];
}
exports.splitRecursively = splitRecursively;
const splitRecursivelyAtCharacter = async ({ maxChunkSize, text, }) => splitRecursively({
maxChunkSize,
segments: text,
});
exports.splitRecursivelyAtCharacter = splitRecursivelyAtCharacter;
const splitRecursivelyAtCharacterAsSplitFunction = ({ maxChunkSize }) => async ({ text }) => (0, exports.splitRecursivelyAtCharacter)({ maxChunkSize, text });
exports.splitRecursivelyAtCharacterAsSplitFunction = splitRecursivelyAtCharacterAsSplitFunction;
const splitRecursivelyAtToken = async ({ tokenizer, maxChunkSize, text, }) => splitRecursively({
maxChunkSize,
segments: (await tokenizer.tokenizeWithTexts(text)).tokenTexts,
});
exports.splitRecursivelyAtToken = splitRecursivelyAtToken;
const splitRecursivelyAtTokenAsSplitFunction = ({ tokenizer, maxChunkSize, }) => async ({ text }) => (0, exports.splitRecursivelyAtToken)({
tokenizer,
maxChunkSize,
text,
});
exports.splitRecursivelyAtTokenAsSplitFunction = splitRecursivelyAtTokenAsSplitFunction;