UNPKG

llm-splitter

Version:

Efficient, configurable text chunking utility for LLM vectorization. Returns rich chunk metadata.

37 lines 1.65 kB
/** * Get the text of a chunk from positional parameters. * * Note that for arrays, the returned result will be an array and that the first and/or last * element of the array may be a substring of that array item's text. * * @param {string|string[]} input - The input (string or array of strings) to split. * @param {number} start - The start of the chunk. * @param {number} end - The end of the chunk. * @returns {string|string[]} The text or array of texts of the chunk. */ export function getChunk(input, start, end) { const matches = []; let offset = 0; const inputs = Array.isArray(input) ? input : [input]; for (const item of inputs) { // Error if not string. if (typeof item !== 'string') throw new TypeError(`Input must be a string or array of strings, got ${typeof item} for ${item}`); const itemLength = item.length; const itemStart = offset; const itemEnd = offset + itemLength; // Check if this item overlaps with the requested chunk if (start < itemEnd && itemStart < end) { // Calculate the actual start and end positions within this item const chunkStart = Math.max(0, start - itemStart); const chunkEnd = Math.min(itemLength, end - itemStart); // Extract the substring from this item const chunk = item.substring(chunkStart, chunkEnd); matches.push(chunk); } offset += itemLength; } // Return single string for single input, array for array input return Array.isArray(input) ? matches : matches[0] || ''; } //# sourceMappingURL=get-chunk.js.map