ai-utils.js
Version:
Build AI applications, chatbots, and agents with JavaScript and TypeScript.
34 lines (33 loc) • 1.3 kB
JavaScript
// when segments is a string, it splits by character, otherwise according to the provided segments
export function splitRecursively({ maxChunkSize, segments, }) {
if (segments.length < maxChunkSize) {
return Array.isArray(segments) ? [segments.join("")] : [segments];
}
const half = Math.ceil(segments.length / 2);
const left = segments.slice(0, half);
const right = segments.slice(half);
return [
...splitRecursively({
segments: left,
maxChunkSize,
}),
...splitRecursively({
segments: right,
maxChunkSize,
}),
];
}
export const splitRecursivelyAtCharacter = async ({ maxChunkSize, text, }) => splitRecursively({
maxChunkSize,
segments: text,
});
export const splitRecursivelyAtCharacterAsSplitFunction = ({ maxChunkSize }) => async ({ text }) => splitRecursivelyAtCharacter({ maxChunkSize, text });
export const splitRecursivelyAtToken = async ({ tokenizer, maxChunkSize, text, }) => splitRecursively({
maxChunkSize,
segments: (await tokenizer.tokenizeWithTexts(text)).tokenTexts,
});
export const splitRecursivelyAtTokenAsSplitFunction = ({ tokenizer, maxChunkSize, }) => async ({ text }) => splitRecursivelyAtToken({
tokenizer,
maxChunkSize,
text,
});