codemodctl
Version:
CLI tool and utilities for workflow engine operations, file sharding, and codeowner analysis
112 lines (110 loc) • 4.21 kB
JavaScript
import crypto from "node:crypto";
//#region src/utils/consistent-sharding.ts
const HASH_RING_SIZE = 1e6;
/**
* Generates a numeric hash from a filename using SHA1
* Uses only the first 8 characters of the hex digest to avoid JavaScript number precision issues
*/
function getNumericFileNameSha1(filename) {
const hex = crypto.createHash("sha1").update(filename).digest("hex").substring(0, 8);
return parseInt(hex, 16);
}
/**
* Maps a filename to a consistent position on the hash ring (0 to HASH_RING_SIZE-1)
* This position remains constant regardless of shard count changes
*/
function getFileHashPosition(filename) {
return getNumericFileNameSha1(filename) % HASH_RING_SIZE;
}
/**
* Get the position for a specific shard index on the hash ring
* Shards get fixed positions that don't change when other shards are added
*/
function getShardPosition(shardIndex) {
return parseInt(crypto.createHash("sha1").update(`shard-${shardIndex}`).digest("hex").substring(0, 8), 16) % HASH_RING_SIZE;
}
/**
* Gets the shard index for a filename using consistent hashing
* Files are assigned to the next shard clockwise on the hash ring
*
* @param filename - The file path to hash
* @param shardCount - Total number of shards
* @returns Shard index (0-based)
*/
function getShardForFilename(filename, { shardCount }) {
if (shardCount <= 0) throw new Error("Shard count must be greater than 0");
const filePosition = getFileHashPosition(filename);
const shardInfo = [];
for (let i = 0; i < shardCount; i++) shardInfo.push({
index: i,
position: getShardPosition(i)
});
shardInfo.sort((a, b) => a.position - b.position);
for (const shard of shardInfo) if (filePosition <= shard.position) return shard.index;
return shardInfo[0].index;
}
/**
* Checks if a file belongs to a specific shard by simply checking if it's in the shard's files list
*
* @param filename - The file path to check
* @param shard - Shard object containing files array
* @returns True if file is in the shard's files list
*/
function fitsInShard(filename, shard) {
return shard._meta_files.includes(filename);
}
/**
* Distributes files across shards using deterministic hashing
*
* @param filenames - Array of file paths
* @param shardCount - Total number of shards
* @returns Map of shard index to array of filenames
*/
function distributeFilesAcrossShards(filenames, shardCount) {
if (shardCount <= 0) throw new Error("Shard count must be greater than 0");
const shardMap = /* @__PURE__ */ new Map();
for (let i = 0; i < shardCount; i++) shardMap.set(i, []);
for (const filename of filenames) {
const shardIndex = getShardForFilename(filename, { shardCount });
shardMap.get(shardIndex).push(filename);
}
return shardMap;
}
/**
* Calculate optimal number of shards based on target shard size
*
* @param totalFiles - Total number of files
* @param targetShardSize - Desired number of files per shard
* @returns Number of shards needed
*/
function calculateOptimalShardCount(totalFiles, targetShardSize) {
return Math.ceil(totalFiles / targetShardSize);
}
/**
* Analyzes file reassignment when scaling from oldShardCount to newShardCount
* Returns statistics about how many files would need to be reassigned
*
* @param filenames - Array of file paths to analyze
* @param oldShardCount - Current number of shards
* @param newShardCount - Target number of shards
* @returns Object with reassignment statistics
*/
function analyzeShardScaling(filenames, oldShardCount, newShardCount) {
let reassignedFiles = 0;
for (const filename of filenames) {
const oldShard = getShardForFilename(filename, { shardCount: oldShardCount });
const newShard = getShardForFilename(filename, { shardCount: newShardCount });
if (oldShard !== newShard) reassignedFiles++;
}
const stableFiles = filenames.length - reassignedFiles;
const reassignmentPercentage = filenames.length > 0 ? reassignedFiles / filenames.length * 100 : 0;
return {
totalFiles: filenames.length,
reassignedFiles,
reassignmentPercentage,
stableFiles
};
}
//#endregion
export { analyzeShardScaling, calculateOptimalShardCount, distributeFilesAcrossShards, fitsInShard, getFileHashPosition, getNumericFileNameSha1, getShardForFilename };