hikma-engine
Version:
Code Knowledge Graph Indexer - A sophisticated TypeScript-based indexer that transforms Git repositories into multi-dimensional knowledge stores for AI agents
239 lines (238 loc) • 9.94 kB
JavaScript
/**
* @file Responsible for discovering all relevant files within a project directory,
* respecting `.gitignore` rules. It supports incremental indexing by identifying
* files that have changed since a last known state.
*/
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
Object.defineProperty(exports, "__esModule", { value: true });
exports.FileScanner = void 0;
const glob_1 = require("glob");
const fs = __importStar(require("fs"));
const path = __importStar(require("path"));
const logger_1 = require("../utils/logger");
const error_handling_1 = require("../utils/error-handling");
const crypto = __importStar(require("crypto"));
/**
* Manages file discovery within a given project.
*/
class FileScanner {
/**
* @param {string} projectRoot - The absolute path to the root of the project.
* @param {ConfigManager} config - Configuration manager instance.
*/
constructor(projectRoot, config) {
this.logger = (0, logger_1.getLogger)('FileScanner');
this.projectRoot = projectRoot;
this.config = config;
}
/**
* Reads the .gitignore file and converts its patterns into glob-compatible ignore rules.
* @returns {Promise<string[]>} A promise that resolves to an array of .gitignore patterns.
*/
async getGitIgnorePatterns() {
const gitignorePath = path.join(this.projectRoot, '.gitignore');
const indexingConfig = this.config.getIndexingConfig();
let patterns = [...indexingConfig.ignorePatterns];
if (fs.existsSync(gitignorePath)) {
try {
const content = await fs.promises.readFile(gitignorePath, 'utf-8');
const gitignorePatterns = content
.split('\n')
.map((line) => line.trim())
.filter((line) => line !== '' && !line.startsWith('#'));
patterns.push(...gitignorePatterns);
this.logger.debug(`Loaded ${gitignorePatterns.length} patterns from .gitignore`);
}
catch (error) {
this.logger.warn('Failed to read .gitignore file', {
error: (0, error_handling_1.getErrorMessage)(error),
});
}
}
return patterns;
}
/**
* Filters files based on size limits and other criteria.
* @param {string[]} files - Array of file paths to filter.
* @returns {Promise<string[]>} Filtered array of file paths.
*/
async filterFiles(files) {
const indexingConfig = this.config.getIndexingConfig();
const filteredFiles = [];
for (const file of files) {
try {
const stats = await fs.promises.stat(file);
// Check file size
if (stats.size > indexingConfig.maxFileSize) {
this.logger.debug(`Skipping large file: ${file} (${stats.size} bytes)`);
continue;
}
// Check if file is readable
await fs.promises.access(file, fs.constants.R_OK);
filteredFiles.push(file);
}
catch (error) {
this.logger.warn(`Skipping inaccessible file: ${file}`, {
error: (0, error_handling_1.getErrorMessage)(error),
});
}
}
return filteredFiles;
}
async findAllFiles(patterns, changedFiles) {
const operation = this.logger.operation('File discovery');
try {
const ignorePatterns = await this.getGitIgnorePatterns();
this.logger.info('Starting file discovery', {
projectRoot: this.projectRoot,
patterns: patterns.length,
ignorePatterns: ignorePatterns.length,
incrementalMode: !!changedFiles,
});
let files = await (0, glob_1.glob)(patterns, {
cwd: this.projectRoot,
ignore: ignorePatterns,
nodir: true,
absolute: true,
});
this.logger.debug(`Found ${files.length} files matching patterns`);
// Filter for incremental indexing
if (changedFiles && changedFiles.length > 0) {
const changedFilesAbsolute = changedFiles.map((f) => path.isAbsolute(f) ? f : path.resolve(this.projectRoot, f));
files = files.filter((file) => changedFilesAbsolute.includes(file));
this.logger.info(`Filtered to ${files.length} changed files for incremental indexing`);
}
// Apply additional filters (size, accessibility, etc.)
const filteredFiles = await this.filterFiles(files);
const metadataPromises = filteredFiles.map(filePath => this.getFileMetadata(filePath));
const metadata = await Promise.all(metadataPromises);
this.logger.info(`File discovery completed`, {
totalFound: files.length,
afterFiltering: filteredFiles.length,
filtered: files.length - filteredFiles.length,
});
operation();
return metadata;
}
catch (error) {
this.logger.error('File discovery failed', { error: (0, error_handling_1.getErrorMessage)(error) });
operation();
throw error;
}
}
/**
* Gets file statistics for the project.
* @returns {Promise<{totalFiles: number, totalSize: number, filesByExtension: Record<string, number>}>}
*/
async getFileStats() {
const indexingConfig = this.config.getIndexingConfig();
const files = await this.findAllFiles(indexingConfig.filePatterns);
let totalSize = 0;
const filesByExtension = {};
for (const file of files) {
try {
// Convert sizeKb back to bytes for totalSize calculation
totalSize += file.sizeKb * 1024;
const ext = file.extension ? `.${file.extension}` : '';
filesByExtension[ext] = (filesByExtension[ext] || 0) + 1;
}
catch (error) {
this.logger.warn(`Failed to get stats for file: ${file.path}`, {
error: (0, error_handling_1.getErrorMessage)(error),
});
}
}
return {
totalFiles: files.length,
totalSize,
filesByExtension,
};
}
async getFileMetadata(filePath) {
try {
const stats = await fs.promises.stat(filePath);
const content = await fs.promises.readFile(filePath);
const hash = crypto.createHash('sha256').update(content).digest('hex');
const ext = path.extname(filePath).toLowerCase().slice(1);
const language = this.detectLanguage(ext);
const fileType = this.classifyFileType(filePath);
return {
path: filePath,
name: path.basename(filePath),
extension: ext,
language,
sizeKb: Math.ceil(stats.size / 1024),
contentHash: hash,
fileType,
};
}
catch (error) {
this.logger.error(`Failed to get metadata for file: ${filePath}`, {
error: (0, error_handling_1.getErrorMessage)(error),
});
throw error;
}
}
detectLanguage(extension) {
const languageMap = {
js: 'JavaScript',
ts: 'TypeScript',
py: 'Python',
java: 'Java',
go: 'Go',
// Add more as needed
};
return languageMap[extension] || 'Unknown';
}
classifyFileType(filePath) {
const normalizedPath = filePath.toLowerCase();
const fileName = path.basename(normalizedPath);
if (normalizedPath.includes('test') || normalizedPath.includes('spec') || fileName.includes('.test.') || fileName.includes('.spec.')) {
return 'test';
}
if (normalizedPath.includes('config') || fileName.endsWith('.json') || fileName.endsWith('.yml') || fileName.endsWith('.yaml')) {
return 'config';
}
if (normalizedPath.includes('node_modules') || normalizedPath.includes('vendor')) {
return 'vendor';
}
if (normalizedPath.includes('scripts') || normalizedPath.includes('build') || normalizedPath.includes('dist')) {
return 'dev';
}
return 'source';
}
}
exports.FileScanner = FileScanner;
;