@syngrisi/syngrisi
Version:
Syngrisi - Visual Testing Tool
464 lines (406 loc) • 21.1 kB
text/typescript
/* eslint-disable @typescript-eslint/no-explicit-any */
import { promises as fsp } from 'fs';
import path from 'path';
import mongoose from 'mongoose';
import { config } from '@config';
import { subDays, dateToISO8601 } from '@utils';
import { createTable } from '@utils/stringTable';
import { IOutputWriter } from '../lib/output-writer';
import {
Snapshot,
Check,
Baseline,
} from '../lib';
function parseHrtimeToSeconds(hrtime: [number, number]): string {
return (hrtime[0] + (hrtime[1] / 1e9)).toFixed(3);
}
const normalizeId = (id: unknown): string | null => {
if (!id) {
return null;
}
try {
return id.toString();
} catch (error) {
return null;
}
};
async function countPngFiles(dirPath: string): Promise<number> {
const dir = await fsp.opendir(dirPath);
let count = 0;
try {
for await (const dirent of dir) {
if (!dirent.isDirectory() && dirent.name.endsWith('.png')) {
count += 1;
}
}
} finally {
try {
await dir.close();
} catch (error) {
const err = error as NodeJS.ErrnoException;
if (err?.code !== 'ERR_DIR_CLOSED') {
throw error;
}
}
}
return count;
}
async function collectBaselineSnapshotIds(): Promise<string[]> {
const aggregation = Baseline.aggregate([
{ $match: { snapshootId: { $ne: null } } },
{ $group: { _id: '$snapshootId' } },
{ $project: { _id: 1 } }
]);
const results = await aggregation.exec();
return results
.map(doc => normalizeId(doc._id))
.filter((id): id is string => Boolean(id));
}
type CheckSnapshotMatch = Record<string, unknown>;
async function collectCheckSnapshotIds(matchFilter: CheckSnapshotMatch = {}) {
const buildPipeline = (field: 'baselineId' | 'actualSnapshotId' | 'diffId') => ([
{ $match: { ...matchFilter, [field]: { $ne: null } } },
{ $group: { _id: `$${field}` } },
{ $project: { _id: 1 } }
]);
const normalizeDocs = (docs: { _id: unknown }[]) => docs
.map((doc) => normalizeId(doc._id))
.filter((id): id is string => Boolean(id));
const [
baselineResults,
actualResults,
diffResults,
] = await Promise.all([
Check.aggregate(buildPipeline('baselineId')).exec(),
Check.aggregate(buildPipeline('actualSnapshotId')).exec(),
Check.aggregate(buildPipeline('diffId')).exec(),
]);
return {
baselineIds: normalizeDocs(baselineResults),
actualIds: normalizeDocs(actualResults),
diffIds: normalizeDocs(diffResults),
};
}
async function deleteFilesWithLimit(files: string[], limit: number) {
let index = 0;
let success = 0;
const failures: { file: string; reason: unknown }[] = [];
async function worker() {
while (true) {
let current: string | undefined;
// Extract next filename
if (index < files.length) {
current = files[index];
index += 1;
}
if (!current) {
break;
}
try {
await fsp.unlink(path.join(config.defaultImagesPath, current));
success += 1;
} catch (error) {
failures.push({ file: current, reason: error });
}
}
}
const workers = Array.from({ length: Math.min(limit, files.length) }, () => worker());
await Promise.all(workers);
return { success, failures };
}
export interface HandleOldChecksOptions {
days: number;
remove: boolean;
}
/**
* Handle old checks task
* Removes checks and related items that are older than specified days
*
* IMPORTANT: Baseline records are NEVER removed automatically by this task.
* Baselines represent the reference/golden images and should not be lost.
* Only Checks and their associated Snapshots (actual, diff) are removed.
* Baseline snapshots are preserved if they are still referenced by any Baseline.
*
* PERFORMANCE OPTIMIZATIONS:
* - Uses countDocuments() instead of loading full collections for statistics
* - Uses aggregation pipelines with $group to collect unique IDs (avoids 16MB distinct limit)
* - Only loads required fields (projections) when full documents are needed
* - Processes file operations in batches to avoid memory spikes
* - Reuses computed data to avoid redundant queries
* - Aggregation pipelines work efficiently even with millions of documents
*
* RECOMMENDED DATABASE INDEXES for optimal performance:
* - Check.createdDate (for date-based queries)
* - Check.baselineId, Check.actualSnapshotId, Check.diffId (for distinct queries)
* - Snapshot.filename (for distinct filename queries)
* - Baseline.snapshootId (for baseline preservation checks)
*
* @param options - Task options
* @param output - Output writer for streaming results
*/
export async function handleOldChecksTask(
options: HandleOldChecksOptions,
output: IOutputWriter
): Promise<void> {
try {
const startTime = process.hrtime();
output.write('- starting...\n');
// Validate that the images directory exists
try {
await fsp.access(config.defaultImagesPath);
output.write(`> validated images directory: ${config.defaultImagesPath}`);
} catch (error) {
throw new Error(`Images directory does not exist or is not accessible: ${config.defaultImagesPath}`);
}
output.write('STAGE #1 Calculate common stats');
const trashHoldDate = subDays(new Date(), options.days);
output.write('> count all checks');
const allChecksCountBefore = await Check.countDocuments().exec();
output.write('> count snapshots');
const allSnapshotsCountBefore = await Snapshot.countDocuments().exec();
output.write('> get files data');
const allFilesBefore = await countPngFiles(config.defaultImagesPath);
output.write('> count old checks');
const oldChecksCount = await Check.countDocuments({ createdDate: { $lt: trashHoldDate } }).exec();
output.write('>>> collect all baselineId snapshot IDs from old Checks ');
// Use aggregation to avoid 16MB distinct limit
const baselineIdResults = await Check.aggregate([
{ $match: { createdDate: { $lt: trashHoldDate }, baselineId: { $ne: null } } },
{ $group: { _id: '$baselineId' } },
{ $project: { _id: 1 } }
]).exec();
const oldChecksBaselineSnapshotIds = baselineIdResults
.map(doc => normalizeId(doc._id))
.filter((id): id is string => Boolean(id));
output.write('>>> collect all actualSnapshotId from old Checks ');
const actualSnapshotIdResults = await Check.aggregate([
{ $match: { createdDate: { $lt: trashHoldDate }, actualSnapshotId: { $ne: null } } },
{ $group: { _id: '$actualSnapshotId' } },
{ $project: { _id: 1 } }
]).exec();
const oldChecksActualSnapshotIds = actualSnapshotIdResults
.map(doc => normalizeId(doc._id))
.filter((id): id is string => Boolean(id));
output.write('>>> collect all diffId snapshot IDs from old Checks ');
const diffIdResults = await Check.aggregate([
{ $match: { createdDate: { $lt: trashHoldDate }, diffId: { $ne: null } } },
{ $group: { _id: '$diffId' } },
{ $project: { _id: 1 } }
]).exec();
const oldChecksDiffSnapshotIds = diffIdResults
.map(doc => normalizeId(doc._id))
.filter((id): id is string => Boolean(id));
output.write('>>> calculate all unique snapshots ids for old Checks ');
const allOldSnapshotsUniqueIds = Array.from(new Set([
...oldChecksBaselineSnapshotIds.filter(x => x != null),
...oldChecksActualSnapshotIds.filter(x => x != null),
...oldChecksDiffSnapshotIds.filter(x => x != null)
]));
output.write('>>> collect filenames from old snapshots');
// Only load filenames, not entire snapshot documents
const oldSnapshotsData = await Snapshot.find(
{ _id: { $in: allOldSnapshotsUniqueIds } },
{ filename: 1 }
).lean().exec() as { filename?: string }[];
// Calculate total size of old snapshot files
output.write('>>> calculate total size of old snapshot files');
const oldSnapshotsFilenames = Array.from(new Set(oldSnapshotsData.map(x => x.filename).filter((f): f is string => !!f)));
let totalOldFilesSize = 0;
// Process files in batches to avoid too many concurrent operations
const BATCH_SIZE = 100;
for (let i = 0; i < oldSnapshotsFilenames.length; i += BATCH_SIZE) {
const batch = oldSnapshotsFilenames.slice(i, i + BATCH_SIZE);
const batchResults = await Promise.allSettled(
batch.map(async (filename) => {
const filePath = path.join(config.defaultImagesPath, filename);
const stats = await fsp.stat(filePath);
return stats.size;
})
);
for (const result of batchResults) {
if (result.status === 'fulfilled') {
totalOldFilesSize += result.value;
}
// Silently skip files that don't exist
}
}
const totalOldFilesSizeGB = (totalOldFilesSize / (1024 * 1024 * 1024)).toFixed(3);
const outTable = createTable([
{ item: 'all checks', count: allChecksCountBefore },
{ item: 'all snapshots', count: allSnapshotsCountBefore },
{ item: 'all files', count: allFilesBefore },
{ item: `checks older than: '${options.days}' days`, count: oldChecksCount },
{ item: 'old checks baseline snapshot ids', count: oldChecksBaselineSnapshotIds.length },
{ item: 'old checks actual snapshot ids', count: oldChecksActualSnapshotIds.length },
{ item: 'old checks diff snapshot ids', count: oldChecksDiffSnapshotIds.length },
{ item: 'all old snapshots unique Ids', count: allOldSnapshotsUniqueIds.length },
{ item: 'old snapshot filenames', count: oldSnapshotsFilenames.length },
{ item: 'total size of old files', count: `${totalOldFilesSizeGB} GB` },
]);
output.write(outTable);
if (options.remove) {
output.write(`STAGE #2 Remove checks that older that: '${options.days}' days, '${dateToISO8601(trashHoldDate)}'\n`);
// Check if MongoDB is running as a replica set (required for transactions)
let session: mongoose.ClientSession | null = null;
let useTransactions = false;
try {
// Check MongoDB topology to determine if transactions are supported
const admin = mongoose.connection.db?.admin();
const serverInfo = await admin?.serverStatus();
const isReplicaSet = serverInfo?.repl?.setName !== undefined;
if (isReplicaSet) {
session = await mongoose.startSession();
session.startTransaction();
useTransactions = true;
output.write('> using transactions for data consistency (replica set detected)');
} else {
output.write('> standalone MongoDB detected, proceeding without transactions');
}
} catch (error) {
output.write('> could not determine MongoDB topology, proceeding without transactions');
session = null;
}
let collectedCheckSnapshotIds: Awaited<ReturnType<typeof collectCheckSnapshotIds>> | null = null;
let collectedBaselineSnapshotIds: string[] = [];
try {
output.write('> collect current snapshot references');
collectedCheckSnapshotIds = await collectCheckSnapshotIds({ createdDate: { $gte: trashHoldDate } });
collectedBaselineSnapshotIds = await collectBaselineSnapshotIds();
output.write('>>> snapshot references collected');
} catch (collectError) {
output.write('>>> failed to collect snapshot references');
throw collectError;
}
try {
output.write('> remove checks');
const checkRemovingResult = useTransactions && session
? await Check.deleteMany({ createdDate: { $lt: trashHoldDate } }, { session })
: await Check.deleteMany({ createdDate: { $lt: trashHoldDate } });
output.write(`>>> removed: '${checkRemovingResult.deletedCount}'`);
output.write('> remove snapshots');
if (!collectedCheckSnapshotIds) {
throw new Error('snapshot ids were not collected');
}
const checksBaselineSnapshotIds = new Set(collectedCheckSnapshotIds.baselineIds);
const checksActualSnapshotIds = new Set(collectedCheckSnapshotIds.actualIds);
const baselinesSnapshotIds = new Set(collectedBaselineSnapshotIds);
output.write('>> remove baselines snapshots');
output.write('>> remove all old snapshots that not related to new baseline and check items');
const deletableBaselineSnapshots = oldChecksBaselineSnapshotIds.filter((id) => {
if (!id) return false;
return !checksBaselineSnapshotIds.has(id)
&& !checksActualSnapshotIds.has(id)
&& !baselinesSnapshotIds.has(id);
});
const removedByBaselineSnapshotsResult = useTransactions && session
? await Snapshot.deleteMany({ _id: { $in: deletableBaselineSnapshots } }, { session })
: await Snapshot.deleteMany({ _id: { $in: deletableBaselineSnapshots } });
output.write(`>>> removed: '${removedByBaselineSnapshotsResult.deletedCount}'`);
output.write('>> remove actual snapshots');
output.write('>> remove all old snapshots that not related to new baseline and check items');
const deletableActualSnapshots = oldChecksActualSnapshotIds.filter((id) => {
if (!id) return false;
return !checksBaselineSnapshotIds.has(id)
&& !checksActualSnapshotIds.has(id)
&& !baselinesSnapshotIds.has(id);
});
const removedByActualSnapshotsResult = useTransactions && session
? await Snapshot.deleteMany({ _id: { $in: deletableActualSnapshots } }, { session })
: await Snapshot.deleteMany({ _id: { $in: deletableActualSnapshots } });
output.write(`>>> removed: '${removedByActualSnapshotsResult.deletedCount}'`);
output.write('>> remove all old diff snapshots');
// NOTE: Diff snapshots are temporary comparison artifacts and are not referenced by Baselines.
// Baselines only reference golden/baseline images via snapshootId field, never diff images.
// Therefore, diff snapshots can be safely deleted without checking Baseline references.
const removedByDiffSnapshotsResult = useTransactions && session
? await Snapshot.deleteMany({
$and: [
{ _id: { $in: oldChecksDiffSnapshotIds } },
],
}, { session })
: await Snapshot.deleteMany({
$and: [
{ _id: { $in: oldChecksDiffSnapshotIds } },
],
});
output.write(`>>> removed: '${removedByDiffSnapshotsResult.deletedCount}'`);
// Commit the transaction after all DB operations (if using transactions)
if (useTransactions && session) {
await session.commitTransaction();
output.write('>>> Database transaction committed successfully');
}
output.write('> remove files');
output.write('>>> using previously collected old snapshots filenames');
const oldSnapshotsUniqueFilenames = oldSnapshotsFilenames;
output.write(`>> found: ${oldSnapshotsUniqueFilenames.length}`);
output.write('> get all current snapshots filenames');
// Use aggregation to avoid 16MB distinct limit
const currentFilenamesResults = await Snapshot.aggregate([
{ $match: { filename: { $ne: null } } },
{ $group: { _id: '$filename' } },
{ $project: { _id: 1 } }
]).exec();
const allCurrentSnapshotsFilenames = currentFilenamesResults.map(doc => doc._id as string);
const currentSnapshotsSet = new Set(allCurrentSnapshotsFilenames);
output.write('>> calculate intersection between all current snapshot filenames and old snapshots filenames');
const filesIntersection = oldSnapshotsUniqueFilenames.filter((filename) => currentSnapshotsSet.has(filename));
output.write(`>> found: ${filesIntersection.length}`);
output.write('>> calculate filenames to remove');
let filesToDelete = oldSnapshotsUniqueFilenames.filter((filename) => !currentSnapshotsSet.has(filename));
output.write(`>> found: ${filesToDelete.length}`);
// Re-check current snapshots right before deletion to prevent race condition
output.write('>> re-validating files to delete to prevent race condition');
const revalidateFilenamesResults = await Snapshot.aggregate([
{ $match: { filename: { $ne: null } } },
{ $group: { _id: '$filename' } },
{ $project: { _id: 1 } }
]).exec();
const currentSnapshotsBeforeDeletion = new Set(revalidateFilenamesResults.map(doc => doc._id as string));
filesToDelete = filesToDelete.filter((filename: string) => !currentSnapshotsBeforeDeletion.has(filename));
output.write(`>> validated: ${filesToDelete.length} files safe to delete`);
output.write(`>> remove these files: ${filesToDelete.length}`);
const { success, failures } = await deleteFilesWithLimit(filesToDelete, 25);
if (failures.length > 0) {
output.write(`>> warning: ${failures.length} files failed to delete:`);
failures.forEach((failure) => {
output.write(` - ${failure.file}: ${failure.reason}`);
});
}
output.write(`>> done: ${success} files deleted successfully, ${failures.length} failed`);
output.write('STAGE #3 Calculate common stats after Removing');
output.write('> count all checks');
const allChecksCountAfter = await Check.countDocuments().exec();
output.write('> count snapshots');
const allSnapshotsCountAfter = await Snapshot.countDocuments().exec();
output.write('> get files data');
const allFilesAfter = await countPngFiles(config.defaultImagesPath);
const outTableAfter = createTable([
{ item: 'all checks', count: allChecksCountAfter },
{ item: 'all snapshots', count: allSnapshotsCountAfter },
{ item: 'all files', count: allFilesAfter },
]);
output.write(outTableAfter);
} catch (operationError) {
output.write('>>> Error during operation...');
if (useTransactions && session) {
output.write('>>> Rolling back transaction...');
await session.abortTransaction();
}
throw operationError;
} finally {
if (session) {
session.endSession();
}
}
}
const elapsedSeconds = parseHrtimeToSeconds(process.hrtime(startTime));
const elapsedMinutes = (parseFloat(elapsedSeconds) / 60).toFixed(2);
output.write(`> done in ${elapsedSeconds} seconds (${elapsedMinutes} min)`);
} catch (e: unknown) {
const errMsg = e instanceof Error ? e.message : String(e);
output.write(errMsg);
throw e;
} finally {
output.end();
}
}