UNPKG

ai-utils.js

Version:

Build AI applications, chatbots, and agents with JavaScript and TypeScript.

34 lines (33 loc) 1.3 kB
// when segments is a string, it splits by character, otherwise according to the provided segments export function splitRecursively({ maxChunkSize, segments, }) { if (segments.length < maxChunkSize) { return Array.isArray(segments) ? [segments.join("")] : [segments]; } const half = Math.ceil(segments.length / 2); const left = segments.slice(0, half); const right = segments.slice(half); return [ ...splitRecursively({ segments: left, maxChunkSize, }), ...splitRecursively({ segments: right, maxChunkSize, }), ]; } export const splitRecursivelyAtCharacter = async ({ maxChunkSize, text, }) => splitRecursively({ maxChunkSize, segments: text, }); export const splitRecursivelyAtCharacterAsSplitFunction = ({ maxChunkSize }) => async ({ text }) => splitRecursivelyAtCharacter({ maxChunkSize, text }); export const splitRecursivelyAtToken = async ({ tokenizer, maxChunkSize, text, }) => splitRecursively({ maxChunkSize, segments: (await tokenizer.tokenizeWithTexts(text)).tokenTexts, }); export const splitRecursivelyAtTokenAsSplitFunction = ({ tokenizer, maxChunkSize, }) => async ({ text }) => splitRecursivelyAtToken({ tokenizer, maxChunkSize, text, });