UNPKG

chonkie

Version:

🦛 CHONK your texts in TS with Chonkie!✨The no-nonsense lightweight and efficient chunking library.

84 lines (83 loc) • 2.79 kB
/** Custom base types for Chonkie. */ /** * Represents the data structure for a chunk object. * * @property {string} text - The text of the chunk. * @property {number} startIndex - The starting index of the chunk in the original text. * @property {number} endIndex - The ending index of the chunk in the original text. * @property {number} tokenCount - The number of tokens in the chunk. */ interface ChunkData { text: string; startIndex: number; endIndex: number; tokenCount: number; embedding?: number[]; } /** * Represents a chunk of text with associated metadata. * * @property {string} text - The text of the chunk. * @property {number} startIndex - The starting index of the chunk in the original text. * @property {number} endIndex - The ending index of the chunk in the original text. * @property {number} tokenCount - The number of tokens in the chunk. * @property {number[]} [embedding] - The embedding for the chunk. */ export declare class Chunk { /** The text of the chunk. */ text: string; /** The starting index of the chunk in the original text. */ startIndex: number; /** The ending index of the chunk in the original text. */ endIndex: number; /** The number of tokens in the chunk. */ tokenCount: number; /** Optional embedding for the chunk. */ embedding?: number[]; /** * Constructs a new Chunk object. * * @param {ChunkData} data - The data to construct the Chunk from. */ constructor(data: { text: string; startIndex: number; endIndex: number; tokenCount: number; embedding?: number[]; }); /** Return a string representation of the Chunk. * * @returns {string} The text of the chunk. */ toString(): string; /** Return a detailed string representation of the Chunk. * * @returns {string} The detailed string representation of the Chunk. */ toRepresentation(): string; /** Return a slice of the chunk's text. * * @param {number} [start] - The starting index of the slice. * @param {number} [end] - The ending index of the slice. * @returns {string} The slice of the chunk's text. */ slice(start?: number, end?: number): string; /** Return the Chunk as a dictionary-like object. * * @returns {ChunkData} The dictionary-like object. */ toDict(): ChunkData; /** Create a Chunk object from a dictionary-like object. * * @param {ChunkData} data - The dictionary-like object. * @returns {Chunk} The Chunk object. */ static fromDict(data: ChunkData): Chunk; /** Return a deep copy of the chunk. * * @returns {Chunk} The deep copy of the chunk. */ copy(): Chunk; } export {};