UNPKG

chonkie

Version:

🦛 CHONK your texts in TS with Chonkie!✨The no-nonsense lightweight and efficient chunking library.

95 lines • 3.13 kB
"use strict"; /** Custom base types for Chonkie. */ Object.defineProperty(exports, "__esModule", { value: true }); exports.Chunk = void 0; /** * Represents a chunk of text with associated metadata. * * @property {string} text - The text of the chunk. * @property {number} startIndex - The starting index of the chunk in the original text. * @property {number} endIndex - The ending index of the chunk in the original text. * @property {number} tokenCount - The number of tokens in the chunk. * @property {number[]} [embedding] - The embedding for the chunk. */ class Chunk { /** * Constructs a new Chunk object. * * @param {ChunkData} data - The data to construct the Chunk from. */ constructor(data) { this.text = data.text; this.startIndex = data.startIndex; this.endIndex = data.endIndex; this.tokenCount = data.tokenCount; this.embedding = data.embedding; // Basic validation, more can be added if needed if (this.startIndex > this.endIndex) { throw new Error("Start index must be less than or equal to end index."); } if (this.tokenCount < 0) { throw new Error("Token count must be a non-negative integer."); } } /** Return a string representation of the Chunk. * * @returns {string} The text of the chunk. */ toString() { return this.text; } /** Return a detailed string representation of the Chunk. * * @returns {string} The detailed string representation of the Chunk. */ toRepresentation() { let repr = `Chunk(text='${this.text}', tokenCount=${this.tokenCount}, startIndex=${this.startIndex}, endIndex=${this.endIndex}`; repr += ')'; return repr; } /** Return a slice of the chunk's text. * * @param {number} [start] - The starting index of the slice. * @param {number} [end] - The ending index of the slice. * @returns {string} The slice of the chunk's text. */ slice(start, end) { return this.text.slice(start, end); } /** Return the Chunk as a dictionary-like object. * * @returns {ChunkData} The dictionary-like object. */ toDict() { return { text: this.text, startIndex: this.startIndex, endIndex: this.endIndex, tokenCount: this.tokenCount, embedding: this.embedding, }; } /** Create a Chunk object from a dictionary-like object. * * @param {ChunkData} data - The dictionary-like object. * @returns {Chunk} The Chunk object. */ static fromDict(data) { return new Chunk({ text: data.text, startIndex: data.startIndex, endIndex: data.endIndex, tokenCount: data.tokenCount, embedding: data.embedding, }); } /** Return a deep copy of the chunk. * * @returns {Chunk} The deep copy of the chunk. */ copy() { return Chunk.fromDict(this.toDict()); } } exports.Chunk = Chunk; //# sourceMappingURL=base.js.map