@traversets/code-extractor
Version:
The TypeScript Code Extractor and Analyzer can be handy for RAG (Retrieval-Augmented Generation) systems for codebases. It provides a detailed and structured representation of the codebase that can be converted into embeddings, enabling more effective adv
479 lines • 19.5 kB
JavaScript
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.TypeScriptCodeMapper = void 0;
const fs = __importStar(require("fs"));
const glob_1 = require("glob");
const path = __importStar(require("path"));
const ts = __importStar(require("typescript"));
const utils_1 = require("../utils");
const result_1 = require("../result");
class TypeScriptCodeMapper {
program;
typeChecker;
constructor() {
this.initializeTypescriptProgram();
}
/**
* Initializes a TypeScript program by reading the TS configuration file and creating a new program instance.
* This method sets up the program and type checker for further compilation and analysis.
*/
initializeTypescriptProgram() {
try {
const rootDir = process.cwd();
const tsConfigPath = path.join(rootDir, "tsconfig.json");
const configFile = ts.readConfigFile(tsConfigPath, ts.sys.readFile);
const compilerOptions = ts.parseJsonConfigFileContent(configFile.config, ts.sys, rootDir);
this.program = ts.createProgram(compilerOptions.fileNames, compilerOptions.options);
this.typeChecker = this.getTypeChecker();
}
catch (error) {
(0, utils_1.logError)(error, "initializeTypescriptProgram", "");
throw Error(error);
}
}
/**
* Extracts information about a TypeScript class declaration.
* This function iterates over the members of the class, identifying methods,
* properties, interfaces, and enums, and compiles this information into an IClassInfo object.
*
* @param node The TypeScript class declaration to extract information from.
* @param sourceFile The source file containing the class declaration.
* @returns An IClassInfo object containing the name, methods, properties, interfaces, and enums of the class.
*/
extractClassMetaData(node, sourceFile) {
try {
const className = node?.name?.getText(sourceFile);
const classInfo = {
name: className,
functions: [],
properties: [],
interfaces: [],
enums: [],
};
node.members.forEach((member) => {
this.processClassMembers(node, sourceFile, classInfo, member);
});
return result_1.Result.ok(classInfo);
}
catch (error) {
(0, utils_1.logError)(error, "extractClassInfo", { node, sourceFile });
throw Error(error);
}
}
aggregateFunctions(node, sourceFile, info) {
const functionInfo = this.getFunctionDetails(node, sourceFile)?.getValue() ?? null;
if (functionInfo) {
info?.functions?.push(functionInfo);
}
}
/**
* Extracts property information from a TypeScript property declaration and adds it
* to the class or module information object if valid. This aggregation helps build
* a complete representation of the class/module structure.
*/
aggergateProperties(node, sourceFile, info) {
const propertyInfo = this.extractPropertyParameters(node, sourceFile).getValue();
if (propertyInfo) {
info?.properties?.push(propertyInfo);
}
}
/**
* Processes interface declarations and aggregates them into the parent class or module
* information object. Essential for maintaining the hierarchical structure of interfaces
* within their containing scope.
*/
aggregateInterfaces(node, sourceFile, info) {
const interfaceInfo = this.extractInterfaceInfo(node, sourceFile).getValue();
if (interfaceInfo) {
info?.interfaces?.push(interfaceInfo);
}
}
/**
* Extracts and aggregates enum information from enum declarations into the parent
* class or module information object. Helps maintain a complete type system
* representation within the code structure.
*/
aggregateEnums(node, sourceFile, info) {
const enumInfo = this.extractEnumInfo(node, sourceFile).getValue();
if (enumInfo) {
info?.enums?.push(enumInfo);
}
}
/**
* Retrieves and processes child elements of a class declaration, extracting
* relevant information about methods, properties, interfaces, and enums.
*
* @param node The class declaration node to process.
* @param member The current class element being processed.
* @param sourceFile The source file containing the class declaration.
* @param index The current index within the class declaration.
* @param classInfo The object to store extracted class information.
*/
processClassMembers(node, sourceFile, info, member) {
const currentElement = member ? member : node;
if (ts.isMethodDeclaration(currentElement) ||
ts.isFunctionDeclaration(currentElement) ||
ts.isArrowFunction(currentElement)) {
this.aggregateFunctions(currentElement, sourceFile, info);
}
if (ts.isPropertyDeclaration(currentElement)) {
this.aggergateProperties(currentElement, sourceFile, info);
}
if (ts.isInterfaceDeclaration(node)) {
this.aggregateInterfaces(node, sourceFile, info);
}
if (ts.isEnumDeclaration(node)) {
this.aggregateEnums(node, sourceFile, info);
}
}
/**
* Extracts property information from a TypeScript property declaration.
*
* This function takes a node representing a property declaration and its source file,
* and returns an object containing the property's name and type. If the type is not
* explicitly specified, it is inferred from the property declaration.
*
* @param node
* @param sourceFile
* @returns An object with 'name' and 'type' properties.
*/
extractPropertyParameters(node, sourceFile) {
try {
const name = node.name.getText(sourceFile);
let type;
if (node.type) {
type = this.getTypeAtLocation(node).getValue();
}
else {
const inferredType = this.typeChecker?.getTypeAtLocation(node);
type = inferredType
? this.typeChecker?.typeToString(inferredType)
: undefined;
}
const property = {
name,
type,
};
return result_1.Result.ok(property);
}
catch (error) {
(0, utils_1.logError)(error, "extractPropertyParameters", { node, sourceFile });
throw Error(error);
}
}
/**
* Extracts the parameters of a function from a given node.
*
* @param node The node containing the function parameters.
* @param sourceFile The source file containing the node.
* @returns An array of function parameter objects.
*/
extractFunctionParameters(node, sourceFile) {
const properties = node.parameters.map((param) => {
const name = param.name.getText(sourceFile);
const type = param.type
? this.getTypeAtLocation(param).getValue()
: undefined;
return {
name,
type,
};
});
return result_1.Result.ok(properties);
}
extractArrowFunctionParameters(node, sourceFile) {
const properties = node.parameters.map((param) => {
const name = param.name.getText(sourceFile);
const type = param.type
? this.getTypeAtLocation(param).getValue()
: undefined;
return {
name,
type,
};
});
return result_1.Result.ok(properties);
}
/**
* Extracts and returns function details from a given function declaration or method declaration node.
*
* @param node The function declaration or method declaration node to extract details from.
* @param sourceFile The source file containing the node.
* @returns An object containing function details, or null if the node has no name.
*/
getFunctionDetails(node, sourceFile) {
try {
if (!node.name) {
return null;
}
const name = node.name.getText(sourceFile);
const content = this.getFunctionNodeText(node, sourceFile);
const parameters = this.extractFunctionParameters(node, sourceFile).getValue();
const details = this.functionDetailsMapper(name, content, parameters, node);
return result_1.Result.ok(details);
}
catch (error) {
(0, utils_1.logError)(error, "extractFunctionInfo", { node, sourceFile });
throw Error(error);
}
}
/**
* Maps a function declaration or method declaration to a details object,
* extracting relevant information such as name, content, parameters, return type, and comments.
*
* @param name The name of the function.
* @param content The content of the function.
* @param parameters An array of property definitions for the function parameters.
* @param node The TypeScript function or method declaration node.
* @returns An object containing the function details.
*/
functionDetailsMapper(name, content, parameters, node) {
return {
name,
content,
parameters,
returnType: node.type ? this.getTypeAtLocation(node).getValue() : "any",
comments: this.getComment(node),
};
}
/**
* Retrieves the type of a given function or method declaration.
*
* @param node A function or method declaration node.
* @returns A string representation of the function or method type, or undefined if type checking is unavailable.
*/
getTypeAtLocation(node) {
const type = this.typeChecker?.typeToString(this.typeChecker.getTypeAtLocation(node));
return result_1.Result.ok(type);
}
/**
* Retrieves and concatenates JSDoc comments associated with a given TypeScript node.
*
* @param {TNode} node - The TypeScript node to extract comments from.
* @returns {string} Concatenated JSDoc comments.
*/
getComment(node) {
return ts
.getJSDocCommentsAndTags(node)
.map((comment) => comment.comment || "")
.join("\n");
}
/**
* Generates a string representation of a given function or method declaration node.
* This method leverages the TypeScript printer to produce a source code string,
* removing any comments and using line feed as the new line character.
*
* @param node The function or method declaration node to be printed.
* @param sourceFile The source file that contains the node to be printed.
* @returns A string representation of the given node.
*/
getFunctionNodeText(node, sourceFile) {
const printer = ts.createPrinter({
newLine: ts.NewLineKind.LineFeed,
removeComments: true,
});
return printer.printNode(ts.EmitHint.Unspecified, node, sourceFile);
}
/**
* Finds the root directory of a project by searching for a 'package.json' file
* starting from the given current directory and traversing up the directory tree.
*
* @param {string} [currentDir=process.cwd()] - The directory to start searching from.
* @returns {string} The root directory of the project, or the current working directory if no 'package.json' file is found.
*/
findProjectRoot(currentDir = process.cwd()) {
while (currentDir !== path.parse(currentDir).root) {
const packageJsonPath = path.join(currentDir, "package.json");
if (fs.existsSync(packageJsonPath)) {
return currentDir;
}
currentDir = path.dirname(currentDir);
}
return process.cwd();
}
/**
* Retrieves a list of TypeScript files, excluding test and mock files.
* @returns A promise that resolves with a list of TypeScript files.
*/
async getTsFiles() {
return await (0, glob_1.glob)("**/!(*.d|*.spec|*.test|*.mock).ts?(x)", {
ignore: ["node_modules/**", "dist/**", ".env"],
});
}
/**
* Extracts module information from a TypeScript source file.
*
* @param sourceFile The TypeScript source file.
* @param relativePath The relative path of the module.
* @returns The module information.
*/
extractModuleInfo(sourceFile, relativePath) {
return {
path: relativePath,
classes: [],
functions: [],
interfaces: [],
enums: [],
dependencies: this.buildDependencyGraph(sourceFile),
};
}
/**
* Retrieves a source file from the TypeScript program by its filename.
*
* @param fileName - The path to the source file to retrieve
* @returns The SourceFile object if found, undefined otherwise
*/
getSourceFile(fileName) {
return this.program?.getSourceFile(fileName);
}
/**
* Gets an array of all root file names in the TypeScript program.
* Root files are the entry points specified in the tsconfig.json or passed to the compiler.
*
* @returns A readonly array of file paths, or undefined if the program is not initialized
*/
getRootFileNames() {
return this.program?.getRootFileNames();
}
/**
* Returns the current TypeScript program instance.
* The program object represents the entire TypeScript project and provides
* access to the compiler's internal state.
*
* @returns The TypeScript Program object, or undefined if not initialized
*/
getProgram() {
return this.program;
}
/**
* Retrieves the TypeChecker instance from the current program.
* The TypeChecker is responsible for type analysis and provides
* APIs for querying type information.
*
* @returns The TypeScript TypeChecker object, or undefined if the program is not initialized
* @remarks This method creates a new type checker instance each time it's called,
* consider caching the result if multiple calls are needed
*/
getTypeChecker() {
const program = this.getProgram();
return program ? program.getTypeChecker() : undefined;
}
/**
* Builds a hierarchical map of the codebase by traversing TypeScript files
* and extracting module and class information.
*/
async buildCodebaseMap() {
const rootDir = process.cwd();
const codebaseMap = {};
const repoNames = path.basename(rootDir);
codebaseMap[repoNames] = { modules: {} };
const tsFiles = await this.getTsFiles();
tsFiles.forEach((filePath) => {
const moduleRalativePath = path.relative(rootDir, filePath);
const sourceFile = this.getSourceFile(filePath);
if (!sourceFile) {
throw Error(`No source file found for ${filePath}`);
}
const moduleInfo = this.extractModuleInfo(sourceFile, moduleRalativePath);
ts.forEachChild(sourceFile, (node) => {
if (ts.isClassDeclaration(node)) {
const classInfo = this.extractClassMetaData(node, sourceFile).getValue();
if (classInfo) {
moduleInfo?.classes?.push(classInfo);
}
this.processClassMembers(node, sourceFile, moduleInfo);
}
if (ts.isMethodDeclaration(node) ||
ts.isFunctionDeclaration(node) ||
(ts.isVariableDeclaration(node) && ts.isArrowFunction(node))) {
this.aggregateFunctions(node, sourceFile, moduleInfo);
}
if (ts.isPropertyDeclaration(node)) {
this.aggergateProperties(node, sourceFile, moduleInfo);
}
if (ts.isInterfaceDeclaration(node)) {
this.aggregateInterfaces(node, sourceFile, moduleInfo);
}
if (ts.isEnumDeclaration(node)) {
this.aggregateEnums(node, sourceFile, moduleInfo);
}
codebaseMap[repoNames].modules[moduleRalativePath] = moduleInfo;
});
});
return result_1.Result.ok(codebaseMap);
}
extractInterfaceInfo(node, sourceFile) {
try {
const interfaceName = node.name.getText(sourceFile);
const properties = node.members
.filter(ts.isPropertySignature)
.map((prop) => {
const name = prop.name.getText(sourceFile);
const type = prop.type
? this.getTypeAtLocation(prop).getValue()
: "any";
return { name, type };
});
return result_1.Result.ok({
name: interfaceName,
properties,
summary: this.getComment(node),
});
}
catch (error) {
(0, utils_1.logError)(error, "extractInterfaceInfo", {
node,
sourceFile,
});
throw Error(error);
}
}
extractEnumInfo(node, sourceFile) {
const enumName = node.name.getText(sourceFile);
const members = node.members.map((member) => {
const name = member.name.getText(sourceFile);
const value = member.initializer
? member.initializer.getText(sourceFile)
: undefined;
return { name, value };
});
return result_1.Result.ok({
name: enumName,
members: members,
summary: this.getComment(node),
});
}
buildDependencyGraph(sourceFile) {
const imports = sourceFile.statements.filter(ts.isImportDeclaration);
return imports.map((i) => {
return ts
.createPrinter()
.printNode(ts.EmitHint.Unspecified, i, sourceFile);
});
}
}
exports.TypeScriptCodeMapper = TypeScriptCodeMapper;
//# sourceMappingURL=typescript-code-mapper.service.js.map