mongo-checker
Version:
CLI tool for searching duplicate values in a MongoDB collection by a chosen field.
126 lines (103 loc) ⢠3.66 kB
JavaScript
import { MongoClient } from "mongodb";
import { performance } from "perf_hooks";
/* --- core logic --- */
export async function runMongoChecker(config) {
const requiredKeys = [
"uri",
"db",
"collection",
"field",
"maxDuplicatesToShow",
"allowDiskUse"
];
for (const key of requiredKeys) {
if (!config[key] && config[key] !== false) {
console.error(`ā Error: config missing required parameter - '${key}'.`);
process.exit(1);
}
}
if (
!Number.isInteger(config.maxDuplicatesToShow) ||
config.maxDuplicatesToShow <= 0
) {
console.error("ā Error: 'maxDuplicatesToShow' must be a positive integer.");
process.exit(1);
}
if (typeof config.allowDiskUse !== "boolean") {
console.error("ā Error: 'allowDiskUse' must be a boolean (true/false).");
process.exit(1);
}
const {
uri,
db,
collection,
field,
maxDuplicatesToShow,
allowDiskUse
} = config;
const client = new MongoClient(uri);
console.log("š Connecting to MongoDB...\n");
console.log(`š URI: ${uri}`);
console.log(`šļø Database: ${db}`);
console.log(`š Collection: ${collection}`);
console.log(`š Search field: ${field}`);
console.log(`š¾ Allow disk use: ${allowDiskUse}`);
console.log(`šļø Max show duplicates: ${maxDuplicatesToShow}\n`);
const spinnerFrames = ["ā ", "ā ", "ā ¹", "ā ø", "ā ¼", "ā “", "ā ¦", "ā §", "ā ", "ā "];
let spinnerIndex = 0;
const start = performance.now();
const spinnerInterval = setInterval(() => {
const frame = spinnerFrames[spinnerIndex % spinnerFrames.length];
process.stdout.write(
`\r${frame} Searching for duplicates by field "${field}"...`
);
spinnerIndex++;
}, 100);
try {
await client.connect();
const dbConn = client.db(db);
const coll = dbConn.collection(collection);
const duplicates = await coll.aggregate(
[
{ $group: { _id: `$${field}`, count: { $sum: 1 } } },
{ $match: { count: { $gt: 1 } } },
{ $sort: { count: -1 } }
],
{ allowDiskUse }
).toArray();
clearInterval(spinnerInterval);
if (process.stdout.clearLine) process.stdout.clearLine(0);
if (process.stdout.cursorTo) process.stdout.cursorTo(0);
console.log(`ā
Found ${duplicates.length} duplicates for field - "${field}"`);
const shown = Math.min(maxDuplicatesToShow, duplicates.length);
for (let i = 0; i < shown; i++) {
const d = duplicates[i];
let val;
if (d._id instanceof Date) {
val = d._id.toISOString();
} else if (typeof d._id === "object" && d._id !== null) {
val = JSON.stringify(d._id);
} else {
val = String(d._id);
}
console.log(`\nš ${val} ā ${d.count.toLocaleString("en-US")} times`);
}
if (duplicates.length > shown) {
console.log(`\n...and ${duplicates.length - shown} more duplicates`);
}
const duration = performance.now() - start;
const minutes = Math.floor(duration / 60000);
const seconds = Math.floor((duration % 60000) / 1000);
const milliseconds = Math.floor(duration % 1000);
let timeString = "";
if (minutes > 0) timeString += `${minutes} min `;
if (seconds > 0) timeString += `${seconds} sec `;
timeString += `${milliseconds} ms`;
console.log(`\nā±ļø Lead time: ${timeString}`);
} finally {
clearInterval(spinnerInterval);
if (process.stdout.clearLine) process.stdout.clearLine(0);
if (process.stdout.cursorTo) process.stdout.cursorTo(0);
await client.close();
}
}