mcard-js
Version:
MCard - Content-addressable storage with cryptographic hashing, handle resolution, and vector search for Node.js and browsers
142 lines • 6.2 kB
JavaScript
/**
* Loader runtimes for file ingestion.
*
* - LoaderRuntime: Uses loadFileToCollection utility
* - CollectionLoaderRuntime: Direct file ingestion into CardCollection
*/
import * as fs from 'fs';
import * as path from 'path';
import { MCard } from '../../../model/MCard.js';
import { SqliteNodeEngine } from '../../../storage/SqliteNodeEngine.js';
import { CardCollection } from '../../../model/CardCollection.js';
import { computeTimingMetrics, extractLoaderParams, findProjectRoot, listFiles } from '../FileSystemUtils.js';
export class LoaderRuntime {
/**
* Loader runtime: load files using the standardized loadFileToCollection utility.
* Returns unified metrics and results matching Python's loader builtin.
*/
async execute(_code, context, config) {
const { loadFileToCollection } = await import('../../../Loader.js');
const ctx = context;
const loaderParams = extractLoaderParams(ctx, { sourceDir: 'test_data', dbPath: ':memory:' });
const { sourceDir, recursive, dbPath } = loaderParams;
const projectRoot = findProjectRoot();
const sourcePath = path.isAbsolute(sourceDir) ? sourceDir : path.join(projectRoot, sourceDir);
if (!fs.existsSync(sourcePath)) {
return { success: false, error: `Source directory not found: ${sourceDir}` };
}
try {
let resolvedDbPath = dbPath || ':memory:';
if (dbPath && dbPath !== ':memory:' && !path.isAbsolute(dbPath)) {
resolvedDbPath = path.join(projectRoot, dbPath);
const dbDir = path.dirname(resolvedDbPath);
if (!fs.existsSync(dbDir)) {
fs.mkdirSync(dbDir, { recursive: true });
}
}
// Clean slate - remove existing db and any WAL/SHM files (matches Python behavior)
if (resolvedDbPath !== ':memory:' && fs.existsSync(resolvedDbPath)) {
try {
fs.unlinkSync(resolvedDbPath);
// Also remove WAL and SHM files if they exist
const walPath = resolvedDbPath + '-wal';
const shmPath = resolvedDbPath + '-shm';
if (fs.existsSync(walPath))
fs.unlinkSync(walPath);
if (fs.existsSync(shmPath))
fs.unlinkSync(shmPath);
}
catch (e) {
// Ignore deletion errors
}
}
const engine = new SqliteNodeEngine(resolvedDbPath);
const collection = new CardCollection(engine);
const loaderResult = await loadFileToCollection(sourcePath, collection, {
recursive,
includeProblematic: false
});
const response = {
success: true,
metrics: {
total_files: loaderResult.metrics.filesCount,
total_directories: loaderResult.metrics.directoriesCount,
directory_levels: loaderResult.metrics.directoryLevels,
total_size_bytes: loaderResult.results.reduce((acc, r) => acc + (r.size || 0), 0),
duration_ms: 0
},
files: loaderResult.results.map(r => ({
hash: r.hash,
filename: r.filename,
content_type: r.contentType,
size_bytes: r.size,
path: r.filePath
}))
};
await engine.close();
return response;
}
catch (err) {
return { success: false, error: err instanceof Error ? err.message : String(err) };
}
}
}
export class CollectionLoaderRuntime {
/**
* Collection loader runtime: ingest files into a CardCollection and return a normalized ingest report.
*/
async execute(_code, context, config) {
const ctx = context;
const loaderParams = extractLoaderParams(ctx, {
sourceDir: 'chapters/chapter_04_load_dir/test_data',
dbPath: 'data/loader_clm.db',
});
const { sourceDir, recursive, dbPath = 'data/loader_clm.db' } = loaderParams;
const projectRoot = findProjectRoot();
const sourcePath = path.isAbsolute(sourceDir) ? sourceDir : path.join(projectRoot, sourceDir);
if (!fs.existsSync(sourcePath)) {
return { success: false, error: `Source directory not found: ${sourceDir}` };
}
const resolvedDbPath = path.isAbsolute(dbPath) ? dbPath : path.join(projectRoot, dbPath);
try {
const engine = new SqliteNodeEngine(resolvedDbPath);
const collection = new CardCollection(engine);
const files = listFiles(sourcePath, recursive);
let ingested = 0;
let skipped = 0;
const errors = [];
const startTime = Date.now();
for (const filePath of files) {
try {
const content = fs.readFileSync(filePath);
const card = await MCard.create(new Uint8Array(content));
await collection.add(card);
ingested += 1;
}
catch (err) {
skipped += 1;
errors.push(`${filePath}: ${err instanceof Error ? err.message : String(err)}`);
}
}
const metrics = computeTimingMetrics(startTime, ingested);
return {
success: true,
report: {
total_files: files.length,
ingested,
skipped,
errors,
},
ingest_metrics: {
db_path: resolvedDbPath,
time_s: metrics.time_s,
files_per_sec: metrics.files_per_sec,
},
};
}
catch (err) {
return { success: false, error: err instanceof Error ? err.message : String(err) };
}
}
}
//# sourceMappingURL=loader.js.map