codetrix
Version:
A lightweight lodash-style utility library
41 lines (40 loc) • 1.11 kB
JavaScript
import { tokenize } from "./text";
/**
* Splits text into chunks of approximately `size` words.
* Useful for embedding storage, vector DBs, and AI context management.
*
* @param str - The input text
* @param size - Maximum words per chunk
* @returns Array of text chunks
*
* @example
* chunkText("This is a long text...", 5);
*/
export function chunkText(str, size) {
const words = tokenize(str);
const chunks = [];
for (let i = 0; i < words.length; i += size) {
chunks.push(words.slice(i, i + size).join(" "));
}
return chunks;
}
/**
* Computes the average vector (centroid).
* Useful for clustering embeddings or document representation.
*
* @param vectors - Array of vectors
* @returns The average vector
*
* @example
* vectorAverage([[1,2],[3,4]]); // [2,3]
*/
export function vectorAverage(vectors) {
if (vectors.length === 0)
return [];
const dim = vectors[0].length;
const sum = new Array(dim).fill(0);
for (const v of vectors) {
v.forEach((val, i) => (sum[i] += val));
}
return sum.map((s) => s / vectors.length);
}