UNPKG

chonkie

Version:

🦛 CHONK your texts in TS with Chonkie!✨The no-nonsense lightweight and efficient chunking library.

110 lines • 5.03 kB
"use strict"; /** Recursive chunker client for Chonkie API. */ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; Object.defineProperty(exports, "__esModule", { value: true }); exports.RecursiveChunker = void 0; const base_1 = require("./base"); const recursive_1 = require("../types/recursive"); const fs = __importStar(require("fs")); const path = __importStar(require("path")); class RecursiveChunker extends base_1.CloudClient { constructor(apiKey, config = {}) { super({ apiKey }); this.config = { tokenizerOrTokenCounter: config.tokenizerOrTokenCounter || "gpt2", chunkSize: config.chunkSize || 512, recipe: config.recipe || "default", lang: config.lang || "en", minCharactersPerChunk: config.minCharactersPerChunk || 12, }; } chunk(input) { return __awaiter(this, void 0, void 0, function* () { const formData = new FormData(); if (input.filepath) { const fileContent = fs.readFileSync(input.filepath); const fileName = path.basename(input.filepath) || 'file.txt'; formData.append("file", new Blob([fileContent]), fileName); } else if (input.text) { // JSON encode the text formData.append("text", JSON.stringify(input.text)); // Append empty file to ensure multipart form formData.append("file", new Blob(), "text_input.txt"); } else { throw new Error("Either text or filepath must be provided"); } formData.append("tokenizer_or_token_counter", this.config.tokenizerOrTokenCounter); formData.append("chunk_size", this.config.chunkSize.toString()); formData.append("recipe", this.config.recipe); formData.append("lang", this.config.lang); formData.append("min_characters_per_chunk", this.config.minCharactersPerChunk.toString()); formData.append("return_type", "chunks"); const data = yield this.request("/v1/chunk/recursive", { method: "POST", body: formData, }); // Convert from snake_case to camelCase const camelCaseData = data.map((chunk) => { return { text: chunk.text, startIndex: chunk.start_index, endIndex: chunk.end_index, tokenCount: chunk.token_count, embedding: chunk.embedding || undefined, level: chunk.level, }; }); return camelCaseData.map((chunk) => recursive_1.RecursiveChunk.fromDict(chunk)); }); } chunkBatch(inputs) { return __awaiter(this, void 0, void 0, function* () { return Promise.all(inputs.map(input => this.chunk(input))); }); } } exports.RecursiveChunker = RecursiveChunker; //# sourceMappingURL=recursive.js.map