codemodctl
Version:
CLI tool and utilities for workflow engine operations, file sharding, and codeowner analysis
99 lines (97 loc) • 4.87 kB
JavaScript
import { getApplicableFiles } from "./codemod-cli-DailrcEf.js";
import { calculateOptimalShardCount, distributeFilesAcrossShards } from "./consistent-sharding-BfgFDhwr.js";
import path from "node:path";
//#region src/utils/directory-analysis.ts
/**
* Groups files by their immediate subdirectory within the target directory
*
* @param files - Array of file paths to group
* @param target - Target directory to analyze subdirectories within
* @param projectRoot - Root directory of the project for resolving relative paths
* @returns Map of subdirectory paths to their file lists
*/
function groupFilesByDirectory(files, target, projectRoot) {
const resolvedTarget = path.resolve(projectRoot, target);
const filesByDirectory = /* @__PURE__ */ new Map();
for (const filePath of files) {
const normalizedFile = path.normalize(filePath);
const resolvedFile = path.resolve(projectRoot, normalizedFile);
if (!resolvedFile.startsWith(resolvedTarget)) continue;
const relativePath = path.relative(projectRoot, resolvedFile);
const relativeFromTarget = path.relative(resolvedTarget, resolvedFile);
if (!relativeFromTarget.includes(path.sep)) continue;
const firstDir = relativeFromTarget.split(path.sep)[0];
if (!firstDir) continue;
const subdirectory = path.relative(projectRoot, path.join(resolvedTarget, firstDir));
if (!filesByDirectory.has(subdirectory)) filesByDirectory.set(subdirectory, []);
filesByDirectory.get(subdirectory).push(relativePath);
}
return filesByDirectory;
}
/**
* Creates directory-based shards using consistent hashing within each directory group.
* Maintains consistency with existing state when provided.
*
* @param filesByDirectory - Map of directory paths to their file lists
* @param shardSize - Target number of files per shard
* @param existingState - Optional existing state for consistency
* @returns Array of directory-based shards
*/
function createDirectoryShards(filesByDirectory, shardSize, existingState) {
const allShards = [];
const existingByDirectory = /* @__PURE__ */ new Map();
if (existingState) for (const shard of existingState) {
if (!existingByDirectory.has(shard.directory)) existingByDirectory.set(shard.directory, []);
existingByDirectory.get(shard.directory).push(shard);
}
for (const [directory, files] of filesByDirectory.entries()) {
const fileCount = files.length;
const optimalShardCount = calculateOptimalShardCount(fileCount, shardSize);
const existingShards = existingByDirectory.get(directory) || [];
const existingShardCount = existingShards.length > 0 ? existingShards[0]?.shardCount ?? 0 : 0;
const shardCount = existingShardCount > 0 ? existingShardCount : optimalShardCount;
console.log(`Directory "${directory}" contains ${fileCount} files, ${existingShardCount > 0 ? `maintaining ${shardCount} existing shards` : `creating ${shardCount} new shards`}`);
const shardMap = distributeFilesAcrossShards(files, shardCount);
for (let shardIndex = 0; shardIndex < shardCount; shardIndex++) {
const shardFiles = shardMap.get(shardIndex) || [];
allShards.push({
directory,
shard: shardIndex + 1,
shardCount,
_meta_files: shardFiles.sort(),
name: `${directory} (${shardIndex + 1}/${shardCount})`
});
}
}
return allShards;
}
/**
* Main function to analyze directories and generate shard configuration.
* Maintains consistency with existing state when provided.
*
* @param options - Configuration options for directory analysis
* @returns Promise resolving to directory analysis result
* @throws Error if no files found in target subdirectories
*/
async function analyzeDirectories(options) {
const { shardSize, target, rulePath, language, projectRoot = process.cwd(), existingState } = options;
if (existingState) console.debug(`Using existing state with ${existingState.length} shards`);
console.log("Analyzing files with CLI command...");
const applicableFiles = await getApplicableFiles(rulePath, language, projectRoot);
console.log("Grouping files by directory...");
const resolvedTarget = path.resolve(projectRoot, target);
const filesByDirectory = groupFilesByDirectory(applicableFiles, resolvedTarget, projectRoot);
if (filesByDirectory.size === 0) throw new Error(`No files found in subdirectories of target: ${target}`);
console.log(`Found ${filesByDirectory.size} subdirectories in target`);
console.log("Generating directory-based shards...");
const shards = createDirectoryShards(filesByDirectory, shardSize, existingState);
const totalFiles = Array.from(filesByDirectory.values()).reduce((sum, files) => sum + files.length, 0);
console.log(`Generated ${shards.length} total shards for ${totalFiles} files across ${filesByDirectory.size} directories`);
return {
shards,
totalFiles
};
}
//#endregion
export { analyzeDirectories, createDirectoryShards, groupFilesByDirectory };