UNPKG

ai-utils.js

Version:

Build AI applications, chatbots, and agents with JavaScript and TypeScript.

42 lines (41 loc) 1.93 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.splitRecursivelyAtTokenAsSplitFunction = exports.splitRecursivelyAtToken = exports.splitRecursivelyAtCharacterAsSplitFunction = exports.splitRecursivelyAtCharacter = exports.splitRecursively = void 0; // when segments is a string, it splits by character, otherwise according to the provided segments function splitRecursively({ maxChunkSize, segments, }) { if (segments.length < maxChunkSize) { return Array.isArray(segments) ? [segments.join("")] : [segments]; } const half = Math.ceil(segments.length / 2); const left = segments.slice(0, half); const right = segments.slice(half); return [ ...splitRecursively({ segments: left, maxChunkSize, }), ...splitRecursively({ segments: right, maxChunkSize, }), ]; } exports.splitRecursively = splitRecursively; const splitRecursivelyAtCharacter = async ({ maxChunkSize, text, }) => splitRecursively({ maxChunkSize, segments: text, }); exports.splitRecursivelyAtCharacter = splitRecursivelyAtCharacter; const splitRecursivelyAtCharacterAsSplitFunction = ({ maxChunkSize }) => async ({ text }) => (0, exports.splitRecursivelyAtCharacter)({ maxChunkSize, text }); exports.splitRecursivelyAtCharacterAsSplitFunction = splitRecursivelyAtCharacterAsSplitFunction; const splitRecursivelyAtToken = async ({ tokenizer, maxChunkSize, text, }) => splitRecursively({ maxChunkSize, segments: (await tokenizer.tokenizeWithTexts(text)).tokenTexts, }); exports.splitRecursivelyAtToken = splitRecursivelyAtToken; const splitRecursivelyAtTokenAsSplitFunction = ({ tokenizer, maxChunkSize, }) => async ({ text }) => (0, exports.splitRecursivelyAtToken)({ tokenizer, maxChunkSize, text, }); exports.splitRecursivelyAtTokenAsSplitFunction = splitRecursivelyAtTokenAsSplitFunction;