UNPKG

tinycondor

Version:
574 lines (514 loc) 14.2 kB
import { z } from "zod"; import { open, close, write, createReadStream } from "node:fs"; import { appendFile, open as openAsync, rename, stat, unlink, writeFile } from "node:fs/promises"; import { createInterface } from "node:readline"; import { lock, unlock } from "proper-lockfile"; import equal from "fast-deep-equal"; const CACHE: Map<string, Map<string, CondorRec>> = new Map(); interface FileMetrics { dbfile: string; // Performance loadCount: number; saveCount: number; totalLoadTimeMs: number; totalSaveTimeMs: number; avgLoadTimeMs: number; avgSaveTimeMs: number; // Cache cacheHits: number; cacheMisses: number; // Volume recordsLoaded: number; recordsSaved: number; recordsRejected: number; // Health errorCount: number; lastErrorTime?: number; lastOperationTime: number; // Size tracking currentFileSize: number; currentCacheSize: number; } const FILE_METRICS = new Map<string, FileMetrics>(); function getOrCreateMetrics(dbfile: string): FileMetrics { let metrics = FILE_METRICS.get(dbfile); if (!metrics) { metrics = { dbfile, loadCount: 0, saveCount: 0, totalLoadTimeMs: 0, totalSaveTimeMs: 0, avgLoadTimeMs: 0, avgSaveTimeMs: 0, cacheHits: 0, cacheMisses: 0, recordsLoaded: 0, recordsSaved: 0, recordsRejected: 0, errorCount: 0, lastOperationTime: Date.now(), currentFileSize: 0, currentCacheSize: 0, }; FILE_METRICS.set(dbfile, metrics); } return metrics; } export const CondorRecSchema = z .object({ id: z.string(), tm: z.number(), }) .catchall(z.any()); export type CondorRec = z.infer<typeof CondorRecSchema>; export const CondorErrSchema = z.object({ message: z.string(), code: z.string().optional(), record: z.any().optional(), err: z.any().optional(), }); export type CondorErr = z.infer<typeof CondorErrSchema>; export type CondorErrHandler = (err: CondorErr) => void; export function clearCache() { CACHE.clear(); } /* way/ * Helper function to wrap file operations with locking * Prevents concurrent writes from corrupting the file */ async function withLock<T>( dbfile: string, fn: () => Promise<T> ): Promise<T> { const lockfile = `${dbfile}.lock`; let release: (() => Promise<void>) | null = null; try { // Try to acquire lock, but gracefully handle mock-fs environments (tests) try { release = await lock(lockfile, { retries: { retries: 5, minTimeout: 100, maxTimeout: 2000, }, stale: 10000, // 10 seconds }); } catch (lockErr: any) { // If locking fails (e.g., in mock-fs), proceed without lock // This allows tests to run while still providing locking in production if (lockErr.code === "ENOENT" || lockErr.message?.includes("ENOENT")) { // Silently proceed without lock in test environments } else { throw lockErr; } } return await fn(); } finally { if (release) { await release(); } } } /* way/ * Validates timestamp to detect fishy data * Returns true if valid, logs warning via onErrors if invalid * Skips validation for obvious test data (timestamps < 1000000) */ function validateTimestamp( tm: number, onErrors: CondorErrHandler, recId?: string ): boolean { // Skip validation for test data (timestamps < 1000000 are clearly not real Unix ms timestamps) // Real Unix timestamps start from Jan 1, 1970, so 1000000ms is ~16 minutes after epoch if (tm < 1000000) { return true; } const now = Date.now(); const yearInMs = 365 * 24 * 60 * 60 * 1000; const year2020 = new Date("2020-01-01").getTime(); // Check if timestamp is too far in future (>1 year) if (tm > now + yearInMs) { onErrors({ code: "INVALID_TIMESTAMP", message: `Timestamp ${tm} is too far in future (now: ${now})${recId ? ` for record: ${recId}` : ""}`, }); return false; } // Check if timestamp is before 2020 (sanity check) if (tm < year2020) { onErrors({ code: "INVALID_TIMESTAMP", message: `Timestamp ${tm} is before 2020${recId ? ` for record: ${recId}` : ""}`, }); return false; } return true; } /* way/ * Enhances error with operation context for better debugging */ function enhanceError( err: any, operation: string, dbfile: string ): CondorErr { const metrics = getOrCreateMetrics(dbfile); metrics.errorCount++; metrics.lastErrorTime = Date.now(); return { message: `[${operation}] ${err.message || String(err)}`, code: err.code, err, record: { dbfile, operation, timestamp: Date.now(), metrics, }, }; } /* way/ * Create a new database file atomically using temp file + rename pattern * Prevents corruption if power loss occurs during write */ export async function create( initialRecords: CondorRec[], dbfile: string, onErrors: CondorErrHandler ): Promise<CondorRec[] | null> { return await withLock(dbfile, async () => { const tempFile = `${dbfile}.tmp`; try { // Check if file already exists try { await stat(dbfile); onErrors({ code: "EEXIST", message: `${dbfile} already exists - cannot create!`, }); return null; } catch (err: any) { // File doesn't exist - good, continue if (err.code !== "ENOENT") { onErrors(enhanceError(err, "create/stat", dbfile)); return null; } } const cached = new Map<string, CondorRec>(); CACHE.set(dbfile, cached); const data = updatedRecs(cached, initialRecords, onErrors); if (data.recs.length === 0) { onErrors({ message: `failed to find any valid records to initialize file: ${dbfile}`, }); return null; } // Write to temp file await writeFile(tempFile, data.str + "\n", "utf8"); // Atomic rename (replaces file if exists) await rename(tempFile, dbfile); return data.recs; } catch (err) { // Cleanup temp file on error try { await unlink(tempFile); } catch { // Ignore cleanup errors } onErrors(enhanceError(err, "create", dbfile)); return null; } }); } type UpdatedRecs = { recs: CondorRec[]; str: string; }; /* way/ * check that the record is valid (has an id), ensure a timestamp, * then check if it is not already an existing record. */ function updatedRecs( cached: Map<string, CondorRec>, recs: CondorRec[], onErrors: CondorErrHandler ): UpdatedRecs { const updatedRecs: CondorRec[] = []; const updatedStr: string[] = []; recs.forEach((record) => { if (!record.id) { onErrors({ message: `record missing id`, record, }); return; } // Fix: Don't mutate input - create new object with timestamp if missing const recWithTm = record.tm ? record : { ...record, tm: Date.now() }; // Validate timestamp (lenient mode - log warning but continue) validateTimestamp(recWithTm.tm, onErrors, recWithTm.id); const current = cached.get(recWithTm.id); if ( !current || current.tm < recWithTm.tm || (current.tm === recWithTm.tm && notEq(current, recWithTm)) ) { try { updatedStr.push(JSON.stringify(recWithTm)); updatedRecs.push(recWithTm); cached.set(recWithTm.id, recWithTm); } catch (err) { onErrors({ message: `failed to convert record: ${recWithTm.id} to JSON` }); } } }); return { recs: updatedRecs, str: updatedStr.join("\n"), }; } /* way/ * Check if two records are not equal, excluding the "tm" field * Uses fast-deep-equal for optimized comparison with early exit */ function notEq(c: any, r: any): boolean { // Fast path: same reference if (c === r) return false; // Create shallow copies without the "tm" field for comparison const cWithoutTm = { ...c }; const rWithoutTm = { ...r }; delete cWithoutTm.tm; delete rWithoutTm.tm; // Use fast-deep-equal library (handles edge cases, early exit optimization) return !equal(cWithoutTm, rWithoutTm); } export async function load( dbfile: string, onErrors: CondorErrHandler, maxFileSizeBytes: number = 100 * 1024 * 1024 // 100MB default ): Promise<CondorRec[] | null> { const recs = await load_(dbfile, onErrors, maxFileSizeBytes); if (!recs) return null; return unwrap(recs); } interface Ctx { closed: boolean; error: boolean; linenum: number; recs: Map<string, CondorRec>; } export async function load_( dbfile: string, onErrors: CondorErrHandler, maxFileSizeBytes: number = 100 * 1024 * 1024 ): Promise<Map<string, CondorRec> | null> { return new Promise(async (resolve) => { const loaded = CACHE.get(dbfile); if (loaded) { const metrics = getOrCreateMetrics(dbfile); metrics.cacheHits++; resolve(loaded); return; } // Check file size before loading to prevent OOM try { const fileStats = await stat(dbfile); const metrics = getOrCreateMetrics(dbfile); metrics.currentFileSize = fileStats.size; metrics.cacheMisses++; if (fileStats.size > maxFileSizeBytes) { onErrors({ code: "FILE_TOO_LARGE", message: `File ${dbfile} is too large (${fileStats.size} bytes). Max allowed: ${maxFileSizeBytes} bytes`, }); return resolve(null); } } catch (err) { onErrors(enhanceError(err, "load/stat", dbfile)); return resolve(null); } const startTime = Date.now(); try { const ctx: Ctx = { closed: false, error: false, linenum: 0, recs: new Map<string, CondorRec>(), }; const rl = createInterface(createReadStream(dbfile)); rl.on("line", (line) => { ctx.linenum++; try { const rec = CondorRecSchema.parse(JSON.parse(line)); // Validate timestamp (lenient mode - log warning but continue) validateTimestamp(rec.tm, onErrors, rec.id); const current = ctx.recs.get(rec.id); if ( !current || current.tm < rec.tm || (current.tm === rec.tm && notEq(current, rec)) ) { ctx.recs.set(rec.id, rec); } } catch (err) { onErrors({ message: `failed to get record from line: ${ctx.linenum}`, err, record: line, }); } }); rl.on("error", (err) => { onErrors(enhanceError(err, "load/read", dbfile)); ctx.error = true; ctx.closed = true; // Fix: Always set closed to ensure promise resolves resolve(null); }); rl.on("close", async () => { if (ctx.error) return; ctx.closed = true; // Update metrics const metrics = getOrCreateMetrics(dbfile); metrics.loadCount++; metrics.recordsLoaded += ctx.recs.size; const loadTime = Date.now() - startTime; metrics.totalLoadTimeMs += loadTime; metrics.avgLoadTimeMs = metrics.totalLoadTimeMs / metrics.loadCount; metrics.lastOperationTime = Date.now(); metrics.currentCacheSize = ctx.recs.size; CACHE.set(dbfile, ctx.recs); resolve(ctx.recs); }); } catch (err) { onErrors({ message: "Unexpected error 5327820", err, }); resolve(null); } }); } /* way/ * Save records to database file with file locking and fsync * TODO: Consider adding compaction when file size > 2x cache size */ export async function save( recordArray: CondorRec[], dbfile: string, onErrors: CondorErrHandler, maxFileSizeBytes: number = 100 * 1024 * 1024 // 100MB default ): Promise<CondorRec[] | null> { const startTime = Date.now(); const metrics = getOrCreateMetrics(dbfile); return await withLock(dbfile, async () => { const current = await load_(dbfile, onErrors, maxFileSizeBytes); if (!current) { onErrors({ message: `failed to get existing records from: ${dbfile}`, }); return null; } const data = updatedRecs(current, recordArray, onErrors); if (data.recs.length == 0) { onErrors({ message: `nothing new to save in: ${dbfile}`, }); return unwrap(current); } // Check file size and warn if getting large try { const fileStats = await stat(dbfile); metrics.currentFileSize = fileStats.size; // Warn at 50MB - suggests compaction needed if (fileStats.size > 50 * 1024 * 1024) { onErrors({ code: "FILE_SIZE_WARNING", message: `File ${dbfile} is large (${fileStats.size} bytes). Consider running compaction.`, }); } } catch (err) { // Non-fatal - continue with save onErrors(enhanceError(err, "save/stat", dbfile)); } // Use explicit file operations with fsync to reduce partial write risk try { const needsNewline = await fileEndsWithNewline(dbfile, onErrors); const toAppend = (needsNewline ? "" : "\n") + data.str + "\n"; const fh = await openAsync(dbfile, "a"); await fh.write(toAppend, null, "utf8"); await fh.sync(); // Force data to disk before closing await fh.close(); // Update metrics metrics.saveCount++; metrics.recordsSaved += data.recs.length; const saveTime = Date.now() - startTime; metrics.totalSaveTimeMs += saveTime; metrics.avgSaveTimeMs = metrics.totalSaveTimeMs / metrics.saveCount; metrics.lastOperationTime = Date.now(); return unwrap(current); } catch (err) { onErrors(enhanceError(err, "save/write", dbfile)); return null; } }); } function unwrap(recs: Map<string, CondorRec>): CondorRec[] { return Array.from(recs.values()); } async function fileEndsWithNewline( path: string, onErrors: CondorErrHandler ): Promise<boolean> { try { const fh = await openAsync(path, "r"); const stat = await fh.stat(); if (stat.size === 0) { await fh.close(); return true; } const buf = Buffer.alloc(1); await fh.read(buf, 0, 1, stat.size - 1); await fh.close(); return buf.toString() === "\n"; } catch (err: any) { onErrors({ message: `Failed checking fileEndsWithNewline` + err.message, }); return true; } } /* way/ * Get metrics for a specific file or all files */ export function getMetrics( dbfile?: string ): FileMetrics | Map<string, FileMetrics> | undefined { if (dbfile) { return FILE_METRICS.get(dbfile); } return FILE_METRICS; } /* way/ * Dump metrics to console or file */ export async function dumpMetrics( dbfile?: string, outputPath?: string ): Promise<void> { const metrics = getMetrics(dbfile); const data = JSON.stringify( dbfile && metrics instanceof Map ? Array.from(metrics.entries()) : metrics, null, 2 ); if (outputPath) { await writeFile(outputPath, data, "utf8"); } else { console.log("=== TinyCondor Metrics ==="); console.log(data); } }