@hpbyte/h-codex-core
Version:
Core indexing and search functionality for h-codex
122 lines • 4.79 kB
JavaScript
;
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.recursiveChunker = exports.RecursiveChunker = void 0;
const fs = __importStar(require("node:fs/promises"));
const path = __importStar(require("path"));
const crypto_1 = __importDefault(require("crypto"));
const utils_1 = require("../../utils");
class RecursiveChunker {
async chunk({ filePath, projectId }) {
try {
const content = await fs.readFile(filePath, 'utf8');
const chunks = this.splitRecursive(content);
return this.processChunks(chunks, filePath, projectId);
}
catch (error) {
console.error(`Error processing file ${filePath}:`, error);
return [];
}
}
splitRecursive(content, separators = ['\n\n', '\n', ' ']) {
if (content.length <= utils_1.maxChunkSize) {
return [content];
}
for (const separator of separators) {
const splits = content.split(separator);
if (splits.length > 1) {
const result = [];
for (const split of splits) {
if (split.length > utils_1.maxChunkSize) {
result.push(...this.splitRecursive(split, separators.slice(1)));
}
else {
result.push(split);
}
}
return result;
}
}
const chunks = [];
for (let i = 0; i < content.length; i += utils_1.maxChunkSize) {
chunks.push(content.slice(i, i + utils_1.maxChunkSize));
}
return chunks;
}
processChunks(chunks, filePath, projectId) {
if (chunks.length === 0)
return [];
const processedChunks = [];
let currentLineNumber = 1;
for (let i = 0; i < chunks.length; i++) {
let chunk = chunks[i];
while ((0, utils_1.countLengthWithoutWhitespace)(chunk) < utils_1.coalesce && i < chunks.length - 1) {
const nextChunk = chunks[i + 1];
const combinedChunk = chunk + '\n' + nextChunk;
if (combinedChunk.length <= utils_1.maxChunkSize) {
chunk = combinedChunk;
i++;
}
else {
break;
}
}
const lineCount = (chunk.match(/\n/g) || []).length + 1;
const endLine = currentLineNumber + lineCount - 1;
processedChunks.push({
content: chunk.trim(),
filePath,
projectId,
startLine: currentLineNumber,
endLine,
nodeType: 'text',
language: this.getLanguage(filePath),
hash: crypto_1.default.createHash('sha256').update(chunk).digest('hex'),
size: chunk.length,
});
currentLineNumber = endLine + 1;
}
return processedChunks;
}
getLanguage(filePath) {
return path.extname(filePath).toLowerCase().replace('.', '');
}
}
exports.RecursiveChunker = RecursiveChunker;
exports.recursiveChunker = new RecursiveChunker();
//# sourceMappingURL=recursive-chunker.js.map