UNPKG

codemodctl

Version:

CLI tool and utilities for workflow engine operations, file sharding, and codeowner analysis

112 lines (110 loc) 4.21 kB
#!/usr/bin/env node import crypto from "node:crypto"; //#region src/utils/consistent-sharding.ts const HASH_RING_SIZE = 1e6; /** * Generates a numeric hash from a filename using SHA1 * Uses only the first 8 characters of the hex digest to avoid JavaScript number precision issues */ function getNumericFileNameSha1(filename) { const hex = crypto.createHash("sha1").update(filename).digest("hex").substring(0, 8); return parseInt(hex, 16); } /** * Maps a filename to a consistent position on the hash ring (0 to HASH_RING_SIZE-1) * This position remains constant regardless of shard count changes */ function getFileHashPosition(filename) { return getNumericFileNameSha1(filename) % HASH_RING_SIZE; } /** * Get the position for a specific shard index on the hash ring * Shards get fixed positions that don't change when other shards are added */ function getShardPosition(shardIndex) { return parseInt(crypto.createHash("sha1").update(`shard-${shardIndex}`).digest("hex").substring(0, 8), 16) % HASH_RING_SIZE; } /** * Gets the shard index for a filename using consistent hashing * Files are assigned to the next shard clockwise on the hash ring * * @param filename - The file path to hash * @param shardCount - Total number of shards * @returns Shard index (0-based) */ function getShardForFilename(filename, { shardCount }) { if (shardCount <= 0) throw new Error("Shard count must be greater than 0"); const filePosition = getFileHashPosition(filename); const shardInfo = []; for (let i = 0; i < shardCount; i++) shardInfo.push({ index: i, position: getShardPosition(i) }); shardInfo.sort((a, b) => a.position - b.position); for (const shard of shardInfo) if (filePosition <= shard.position) return shard.index; return shardInfo[0].index; } /** * Checks if a file belongs to a specific shard by simply checking if it's in the shard's files list * * @param filename - The file path to check * @param shard - Shard object containing files array * @returns True if file is in the shard's files list */ function fitsInShard(filename, shard) { return shard._meta_files.includes(filename); } /** * Distributes files across shards using deterministic hashing * * @param filenames - Array of file paths * @param shardCount - Total number of shards * @returns Map of shard index to array of filenames */ function distributeFilesAcrossShards(filenames, shardCount) { if (shardCount <= 0) throw new Error("Shard count must be greater than 0"); const shardMap = /* @__PURE__ */ new Map(); for (let i = 0; i < shardCount; i++) shardMap.set(i, []); for (const filename of filenames) { const shardIndex = getShardForFilename(filename, { shardCount }); shardMap.get(shardIndex).push(filename); } return shardMap; } /** * Calculate optimal number of shards based on target shard size * * @param totalFiles - Total number of files * @param targetShardSize - Desired number of files per shard * @returns Number of shards needed */ function calculateOptimalShardCount(totalFiles, targetShardSize) { return Math.ceil(totalFiles / targetShardSize); } /** * Analyzes file reassignment when scaling from oldShardCount to newShardCount * Returns statistics about how many files would need to be reassigned * * @param filenames - Array of file paths to analyze * @param oldShardCount - Current number of shards * @param newShardCount - Target number of shards * @returns Object with reassignment statistics */ function analyzeShardScaling(filenames, oldShardCount, newShardCount) { let reassignedFiles = 0; for (const filename of filenames) { const oldShard = getShardForFilename(filename, { shardCount: oldShardCount }); const newShard = getShardForFilename(filename, { shardCount: newShardCount }); if (oldShard !== newShard) reassignedFiles++; } const stableFiles = filenames.length - reassignedFiles; const reassignmentPercentage = filenames.length > 0 ? reassignedFiles / filenames.length * 100 : 0; return { totalFiles: filenames.length, reassignedFiles, reassignmentPercentage, stableFiles }; } //#endregion export { analyzeShardScaling, calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename };