UNPKG

@hpbyte/h-codex-core

Version:

Core indexing and search functionality for h-codex

122 lines 4.79 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.recursiveChunker = exports.RecursiveChunker = void 0; const fs = __importStar(require("node:fs/promises")); const path = __importStar(require("path")); const crypto_1 = __importDefault(require("crypto")); const utils_1 = require("../../utils"); class RecursiveChunker { async chunk({ filePath, projectId }) { try { const content = await fs.readFile(filePath, 'utf8'); const chunks = this.splitRecursive(content); return this.processChunks(chunks, filePath, projectId); } catch (error) { console.error(`Error processing file ${filePath}:`, error); return []; } } splitRecursive(content, separators = ['\n\n', '\n', ' ']) { if (content.length <= utils_1.maxChunkSize) { return [content]; } for (const separator of separators) { const splits = content.split(separator); if (splits.length > 1) { const result = []; for (const split of splits) { if (split.length > utils_1.maxChunkSize) { result.push(...this.splitRecursive(split, separators.slice(1))); } else { result.push(split); } } return result; } } const chunks = []; for (let i = 0; i < content.length; i += utils_1.maxChunkSize) { chunks.push(content.slice(i, i + utils_1.maxChunkSize)); } return chunks; } processChunks(chunks, filePath, projectId) { if (chunks.length === 0) return []; const processedChunks = []; let currentLineNumber = 1; for (let i = 0; i < chunks.length; i++) { let chunk = chunks[i]; while ((0, utils_1.countLengthWithoutWhitespace)(chunk) < utils_1.coalesce && i < chunks.length - 1) { const nextChunk = chunks[i + 1]; const combinedChunk = chunk + '\n' + nextChunk; if (combinedChunk.length <= utils_1.maxChunkSize) { chunk = combinedChunk; i++; } else { break; } } const lineCount = (chunk.match(/\n/g) || []).length + 1; const endLine = currentLineNumber + lineCount - 1; processedChunks.push({ content: chunk.trim(), filePath, projectId, startLine: currentLineNumber, endLine, nodeType: 'text', language: this.getLanguage(filePath), hash: crypto_1.default.createHash('sha256').update(chunk).digest('hex'), size: chunk.length, }); currentLineNumber = endLine + 1; } return processedChunks; } getLanguage(filePath) { return path.extname(filePath).toLowerCase().replace('.', ''); } } exports.RecursiveChunker = RecursiveChunker; exports.recursiveChunker = new RecursiveChunker(); //# sourceMappingURL=recursive-chunker.js.map