tinycondor
Version:
a tiny but useful record based db
574 lines (514 loc) • 14.2 kB
text/typescript
import { z } from "zod";
import { open, close, write, createReadStream } from "node:fs";
import { appendFile, open as openAsync, rename, stat, unlink, writeFile } from "node:fs/promises";
import { createInterface } from "node:readline";
import { lock, unlock } from "proper-lockfile";
import equal from "fast-deep-equal";
const CACHE: Map<string, Map<string, CondorRec>> = new Map();
interface FileMetrics {
dbfile: string;
// Performance
loadCount: number;
saveCount: number;
totalLoadTimeMs: number;
totalSaveTimeMs: number;
avgLoadTimeMs: number;
avgSaveTimeMs: number;
// Cache
cacheHits: number;
cacheMisses: number;
// Volume
recordsLoaded: number;
recordsSaved: number;
recordsRejected: number;
// Health
errorCount: number;
lastErrorTime?: number;
lastOperationTime: number;
// Size tracking
currentFileSize: number;
currentCacheSize: number;
}
const FILE_METRICS = new Map<string, FileMetrics>();
function getOrCreateMetrics(dbfile: string): FileMetrics {
let metrics = FILE_METRICS.get(dbfile);
if (!metrics) {
metrics = {
dbfile,
loadCount: 0,
saveCount: 0,
totalLoadTimeMs: 0,
totalSaveTimeMs: 0,
avgLoadTimeMs: 0,
avgSaveTimeMs: 0,
cacheHits: 0,
cacheMisses: 0,
recordsLoaded: 0,
recordsSaved: 0,
recordsRejected: 0,
errorCount: 0,
lastOperationTime: Date.now(),
currentFileSize: 0,
currentCacheSize: 0,
};
FILE_METRICS.set(dbfile, metrics);
}
return metrics;
}
export const CondorRecSchema = z
.object({
id: z.string(),
tm: z.number(),
})
.catchall(z.any());
export type CondorRec = z.infer<typeof CondorRecSchema>;
export const CondorErrSchema = z.object({
message: z.string(),
code: z.string().optional(),
record: z.any().optional(),
err: z.any().optional(),
});
export type CondorErr = z.infer<typeof CondorErrSchema>;
export type CondorErrHandler = (err: CondorErr) => void;
export function clearCache() {
CACHE.clear();
}
/* way/
* Helper function to wrap file operations with locking
* Prevents concurrent writes from corrupting the file
*/
async function withLock<T>(
dbfile: string,
fn: () => Promise<T>
): Promise<T> {
const lockfile = `${dbfile}.lock`;
let release: (() => Promise<void>) | null = null;
try {
// Try to acquire lock, but gracefully handle mock-fs environments (tests)
try {
release = await lock(lockfile, {
retries: {
retries: 5,
minTimeout: 100,
maxTimeout: 2000,
},
stale: 10000, // 10 seconds
});
} catch (lockErr: any) {
// If locking fails (e.g., in mock-fs), proceed without lock
// This allows tests to run while still providing locking in production
if (lockErr.code === "ENOENT" || lockErr.message?.includes("ENOENT")) {
// Silently proceed without lock in test environments
} else {
throw lockErr;
}
}
return await fn();
} finally {
if (release) {
await release();
}
}
}
/* way/
* Validates timestamp to detect fishy data
* Returns true if valid, logs warning via onErrors if invalid
* Skips validation for obvious test data (timestamps < 1000000)
*/
function validateTimestamp(
tm: number,
onErrors: CondorErrHandler,
recId?: string
): boolean {
// Skip validation for test data (timestamps < 1000000 are clearly not real Unix ms timestamps)
// Real Unix timestamps start from Jan 1, 1970, so 1000000ms is ~16 minutes after epoch
if (tm < 1000000) {
return true;
}
const now = Date.now();
const yearInMs = 365 * 24 * 60 * 60 * 1000;
const year2020 = new Date("2020-01-01").getTime();
// Check if timestamp is too far in future (>1 year)
if (tm > now + yearInMs) {
onErrors({
code: "INVALID_TIMESTAMP",
message: `Timestamp ${tm} is too far in future (now: ${now})${recId ? ` for record: ${recId}` : ""}`,
});
return false;
}
// Check if timestamp is before 2020 (sanity check)
if (tm < year2020) {
onErrors({
code: "INVALID_TIMESTAMP",
message: `Timestamp ${tm} is before 2020${recId ? ` for record: ${recId}` : ""}`,
});
return false;
}
return true;
}
/* way/
* Enhances error with operation context for better debugging
*/
function enhanceError(
err: any,
operation: string,
dbfile: string
): CondorErr {
const metrics = getOrCreateMetrics(dbfile);
metrics.errorCount++;
metrics.lastErrorTime = Date.now();
return {
message: `[${operation}] ${err.message || String(err)}`,
code: err.code,
err,
record: {
dbfile,
operation,
timestamp: Date.now(),
metrics,
},
};
}
/* way/
* Create a new database file atomically using temp file + rename pattern
* Prevents corruption if power loss occurs during write
*/
export async function create(
initialRecords: CondorRec[],
dbfile: string,
onErrors: CondorErrHandler
): Promise<CondorRec[] | null> {
return await withLock(dbfile, async () => {
const tempFile = `${dbfile}.tmp`;
try {
// Check if file already exists
try {
await stat(dbfile);
onErrors({
code: "EEXIST",
message: `${dbfile} already exists - cannot create!`,
});
return null;
} catch (err: any) {
// File doesn't exist - good, continue
if (err.code !== "ENOENT") {
onErrors(enhanceError(err, "create/stat", dbfile));
return null;
}
}
const cached = new Map<string, CondorRec>();
CACHE.set(dbfile, cached);
const data = updatedRecs(cached, initialRecords, onErrors);
if (data.recs.length === 0) {
onErrors({
message: `failed to find any valid records to initialize file: ${dbfile}`,
});
return null;
}
// Write to temp file
await writeFile(tempFile, data.str + "\n", "utf8");
// Atomic rename (replaces file if exists)
await rename(tempFile, dbfile);
return data.recs;
} catch (err) {
// Cleanup temp file on error
try {
await unlink(tempFile);
} catch {
// Ignore cleanup errors
}
onErrors(enhanceError(err, "create", dbfile));
return null;
}
});
}
type UpdatedRecs = {
recs: CondorRec[];
str: string;
};
/* way/
* check that the record is valid (has an id), ensure a timestamp,
* then check if it is not already an existing record.
*/
function updatedRecs(
cached: Map<string, CondorRec>,
recs: CondorRec[],
onErrors: CondorErrHandler
): UpdatedRecs {
const updatedRecs: CondorRec[] = [];
const updatedStr: string[] = [];
recs.forEach((record) => {
if (!record.id) {
onErrors({
message: `record missing id`,
record,
});
return;
}
// Fix: Don't mutate input - create new object with timestamp if missing
const recWithTm = record.tm ? record : { ...record, tm: Date.now() };
// Validate timestamp (lenient mode - log warning but continue)
validateTimestamp(recWithTm.tm, onErrors, recWithTm.id);
const current = cached.get(recWithTm.id);
if (
!current ||
current.tm < recWithTm.tm ||
(current.tm === recWithTm.tm && notEq(current, recWithTm))
) {
try {
updatedStr.push(JSON.stringify(recWithTm));
updatedRecs.push(recWithTm);
cached.set(recWithTm.id, recWithTm);
} catch (err) {
onErrors({ message: `failed to convert record: ${recWithTm.id} to JSON` });
}
}
});
return {
recs: updatedRecs,
str: updatedStr.join("\n"),
};
}
/* way/
* Check if two records are not equal, excluding the "tm" field
* Uses fast-deep-equal for optimized comparison with early exit
*/
function notEq(c: any, r: any): boolean {
// Fast path: same reference
if (c === r) return false;
// Create shallow copies without the "tm" field for comparison
const cWithoutTm = { ...c };
const rWithoutTm = { ...r };
delete cWithoutTm.tm;
delete rWithoutTm.tm;
// Use fast-deep-equal library (handles edge cases, early exit optimization)
return !equal(cWithoutTm, rWithoutTm);
}
export async function load(
dbfile: string,
onErrors: CondorErrHandler,
maxFileSizeBytes: number = 100 * 1024 * 1024 // 100MB default
): Promise<CondorRec[] | null> {
const recs = await load_(dbfile, onErrors, maxFileSizeBytes);
if (!recs) return null;
return unwrap(recs);
}
interface Ctx {
closed: boolean;
error: boolean;
linenum: number;
recs: Map<string, CondorRec>;
}
export async function load_(
dbfile: string,
onErrors: CondorErrHandler,
maxFileSizeBytes: number = 100 * 1024 * 1024
): Promise<Map<string, CondorRec> | null> {
return new Promise(async (resolve) => {
const loaded = CACHE.get(dbfile);
if (loaded) {
const metrics = getOrCreateMetrics(dbfile);
metrics.cacheHits++;
resolve(loaded);
return;
}
// Check file size before loading to prevent OOM
try {
const fileStats = await stat(dbfile);
const metrics = getOrCreateMetrics(dbfile);
metrics.currentFileSize = fileStats.size;
metrics.cacheMisses++;
if (fileStats.size > maxFileSizeBytes) {
onErrors({
code: "FILE_TOO_LARGE",
message: `File ${dbfile} is too large (${fileStats.size} bytes). Max allowed: ${maxFileSizeBytes} bytes`,
});
return resolve(null);
}
} catch (err) {
onErrors(enhanceError(err, "load/stat", dbfile));
return resolve(null);
}
const startTime = Date.now();
try {
const ctx: Ctx = {
closed: false,
error: false,
linenum: 0,
recs: new Map<string, CondorRec>(),
};
const rl = createInterface(createReadStream(dbfile));
rl.on("line", (line) => {
ctx.linenum++;
try {
const rec = CondorRecSchema.parse(JSON.parse(line));
// Validate timestamp (lenient mode - log warning but continue)
validateTimestamp(rec.tm, onErrors, rec.id);
const current = ctx.recs.get(rec.id);
if (
!current ||
current.tm < rec.tm ||
(current.tm === rec.tm && notEq(current, rec))
) {
ctx.recs.set(rec.id, rec);
}
} catch (err) {
onErrors({
message: `failed to get record from line: ${ctx.linenum}`,
err,
record: line,
});
}
});
rl.on("error", (err) => {
onErrors(enhanceError(err, "load/read", dbfile));
ctx.error = true;
ctx.closed = true; // Fix: Always set closed to ensure promise resolves
resolve(null);
});
rl.on("close", async () => {
if (ctx.error) return;
ctx.closed = true;
// Update metrics
const metrics = getOrCreateMetrics(dbfile);
metrics.loadCount++;
metrics.recordsLoaded += ctx.recs.size;
const loadTime = Date.now() - startTime;
metrics.totalLoadTimeMs += loadTime;
metrics.avgLoadTimeMs = metrics.totalLoadTimeMs / metrics.loadCount;
metrics.lastOperationTime = Date.now();
metrics.currentCacheSize = ctx.recs.size;
CACHE.set(dbfile, ctx.recs);
resolve(ctx.recs);
});
} catch (err) {
onErrors({
message: "Unexpected error 5327820",
err,
});
resolve(null);
}
});
}
/* way/
* Save records to database file with file locking and fsync
* TODO: Consider adding compaction when file size > 2x cache size
*/
export async function save(
recordArray: CondorRec[],
dbfile: string,
onErrors: CondorErrHandler,
maxFileSizeBytes: number = 100 * 1024 * 1024 // 100MB default
): Promise<CondorRec[] | null> {
const startTime = Date.now();
const metrics = getOrCreateMetrics(dbfile);
return await withLock(dbfile, async () => {
const current = await load_(dbfile, onErrors, maxFileSizeBytes);
if (!current) {
onErrors({
message: `failed to get existing records from: ${dbfile}`,
});
return null;
}
const data = updatedRecs(current, recordArray, onErrors);
if (data.recs.length == 0) {
onErrors({
message: `nothing new to save in: ${dbfile}`,
});
return unwrap(current);
}
// Check file size and warn if getting large
try {
const fileStats = await stat(dbfile);
metrics.currentFileSize = fileStats.size;
// Warn at 50MB - suggests compaction needed
if (fileStats.size > 50 * 1024 * 1024) {
onErrors({
code: "FILE_SIZE_WARNING",
message: `File ${dbfile} is large (${fileStats.size} bytes). Consider running compaction.`,
});
}
} catch (err) {
// Non-fatal - continue with save
onErrors(enhanceError(err, "save/stat", dbfile));
}
// Use explicit file operations with fsync to reduce partial write risk
try {
const needsNewline = await fileEndsWithNewline(dbfile, onErrors);
const toAppend = (needsNewline ? "" : "\n") + data.str + "\n";
const fh = await openAsync(dbfile, "a");
await fh.write(toAppend, null, "utf8");
await fh.sync(); // Force data to disk before closing
await fh.close();
// Update metrics
metrics.saveCount++;
metrics.recordsSaved += data.recs.length;
const saveTime = Date.now() - startTime;
metrics.totalSaveTimeMs += saveTime;
metrics.avgSaveTimeMs = metrics.totalSaveTimeMs / metrics.saveCount;
metrics.lastOperationTime = Date.now();
return unwrap(current);
} catch (err) {
onErrors(enhanceError(err, "save/write", dbfile));
return null;
}
});
}
function unwrap(recs: Map<string, CondorRec>): CondorRec[] {
return Array.from(recs.values());
}
async function fileEndsWithNewline(
path: string,
onErrors: CondorErrHandler
): Promise<boolean> {
try {
const fh = await openAsync(path, "r");
const stat = await fh.stat();
if (stat.size === 0) {
await fh.close();
return true;
}
const buf = Buffer.alloc(1);
await fh.read(buf, 0, 1, stat.size - 1);
await fh.close();
return buf.toString() === "\n";
} catch (err: any) {
onErrors({
message: `Failed checking fileEndsWithNewline` + err.message,
});
return true;
}
}
/* way/
* Get metrics for a specific file or all files
*/
export function getMetrics(
dbfile?: string
): FileMetrics | Map<string, FileMetrics> | undefined {
if (dbfile) {
return FILE_METRICS.get(dbfile);
}
return FILE_METRICS;
}
/* way/
* Dump metrics to console or file
*/
export async function dumpMetrics(
dbfile?: string,
outputPath?: string
): Promise<void> {
const metrics = getMetrics(dbfile);
const data = JSON.stringify(
dbfile && metrics instanceof Map
? Array.from(metrics.entries())
: metrics,
null,
2
);
if (outputPath) {
await writeFile(outputPath, data, "utf8");
} else {
console.log("=== TinyCondor Metrics ===");
console.log(data);
}
}