UNPKG

mcard-js

Version:

MCard - Content-addressable storage with cryptographic hashing, handle resolution, and vector search for Node.js and browsers

142 lines 6.2 kB
/** * Loader runtimes for file ingestion. * * - LoaderRuntime: Uses loadFileToCollection utility * - CollectionLoaderRuntime: Direct file ingestion into CardCollection */ import * as fs from 'fs'; import * as path from 'path'; import { MCard } from '../../../model/MCard.js'; import { SqliteNodeEngine } from '../../../storage/SqliteNodeEngine.js'; import { CardCollection } from '../../../model/CardCollection.js'; import { computeTimingMetrics, extractLoaderParams, findProjectRoot, listFiles } from '../FileSystemUtils.js'; export class LoaderRuntime { /** * Loader runtime: load files using the standardized loadFileToCollection utility. * Returns unified metrics and results matching Python's loader builtin. */ async execute(_code, context, config) { const { loadFileToCollection } = await import('../../../Loader.js'); const ctx = context; const loaderParams = extractLoaderParams(ctx, { sourceDir: 'test_data', dbPath: ':memory:' }); const { sourceDir, recursive, dbPath } = loaderParams; const projectRoot = findProjectRoot(); const sourcePath = path.isAbsolute(sourceDir) ? sourceDir : path.join(projectRoot, sourceDir); if (!fs.existsSync(sourcePath)) { return { success: false, error: `Source directory not found: ${sourceDir}` }; } try { let resolvedDbPath = dbPath || ':memory:'; if (dbPath && dbPath !== ':memory:' && !path.isAbsolute(dbPath)) { resolvedDbPath = path.join(projectRoot, dbPath); const dbDir = path.dirname(resolvedDbPath); if (!fs.existsSync(dbDir)) { fs.mkdirSync(dbDir, { recursive: true }); } } // Clean slate - remove existing db and any WAL/SHM files (matches Python behavior) if (resolvedDbPath !== ':memory:' && fs.existsSync(resolvedDbPath)) { try { fs.unlinkSync(resolvedDbPath); // Also remove WAL and SHM files if they exist const walPath = resolvedDbPath + '-wal'; const shmPath = resolvedDbPath + '-shm'; if (fs.existsSync(walPath)) fs.unlinkSync(walPath); if (fs.existsSync(shmPath)) fs.unlinkSync(shmPath); } catch (e) { // Ignore deletion errors } } const engine = new SqliteNodeEngine(resolvedDbPath); const collection = new CardCollection(engine); const loaderResult = await loadFileToCollection(sourcePath, collection, { recursive, includeProblematic: false }); const response = { success: true, metrics: { total_files: loaderResult.metrics.filesCount, total_directories: loaderResult.metrics.directoriesCount, directory_levels: loaderResult.metrics.directoryLevels, total_size_bytes: loaderResult.results.reduce((acc, r) => acc + (r.size || 0), 0), duration_ms: 0 }, files: loaderResult.results.map(r => ({ hash: r.hash, filename: r.filename, content_type: r.contentType, size_bytes: r.size, path: r.filePath })) }; await engine.close(); return response; } catch (err) { return { success: false, error: err instanceof Error ? err.message : String(err) }; } } } export class CollectionLoaderRuntime { /** * Collection loader runtime: ingest files into a CardCollection and return a normalized ingest report. */ async execute(_code, context, config) { const ctx = context; const loaderParams = extractLoaderParams(ctx, { sourceDir: 'chapters/chapter_04_load_dir/test_data', dbPath: 'data/loader_clm.db', }); const { sourceDir, recursive, dbPath = 'data/loader_clm.db' } = loaderParams; const projectRoot = findProjectRoot(); const sourcePath = path.isAbsolute(sourceDir) ? sourceDir : path.join(projectRoot, sourceDir); if (!fs.existsSync(sourcePath)) { return { success: false, error: `Source directory not found: ${sourceDir}` }; } const resolvedDbPath = path.isAbsolute(dbPath) ? dbPath : path.join(projectRoot, dbPath); try { const engine = new SqliteNodeEngine(resolvedDbPath); const collection = new CardCollection(engine); const files = listFiles(sourcePath, recursive); let ingested = 0; let skipped = 0; const errors = []; const startTime = Date.now(); for (const filePath of files) { try { const content = fs.readFileSync(filePath); const card = await MCard.create(new Uint8Array(content)); await collection.add(card); ingested += 1; } catch (err) { skipped += 1; errors.push(`${filePath}: ${err instanceof Error ? err.message : String(err)}`); } } const metrics = computeTimingMetrics(startTime, ingested); return { success: true, report: { total_files: files.length, ingested, skipped, errors, }, ingest_metrics: { db_path: resolvedDbPath, time_s: metrics.time_s, files_per_sec: metrics.files_per_sec, }, }; } catch (err) { return { success: false, error: err instanceof Error ? err.message : String(err) }; } } } //# sourceMappingURL=loader.js.map