UNPKG

@traversets/code-extractor

Version:

The TypeScript Code Extractor and Analyzer can be handy for RAG (Retrieval-Augmented Generation) systems for codebases. It provides a detailed and structured representation of the codebase that can be converted into embeddings, enabling more effective adv

479 lines 19.5 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); __setModuleDefault(result, mod); return result; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.TypeScriptCodeMapper = void 0; const fs = __importStar(require("fs")); const glob_1 = require("glob"); const path = __importStar(require("path")); const ts = __importStar(require("typescript")); const utils_1 = require("../utils"); const result_1 = require("../result"); class TypeScriptCodeMapper { program; typeChecker; constructor() { this.initializeTypescriptProgram(); } /** * Initializes a TypeScript program by reading the TS configuration file and creating a new program instance. * This method sets up the program and type checker for further compilation and analysis. */ initializeTypescriptProgram() { try { const rootDir = process.cwd(); const tsConfigPath = path.join(rootDir, "tsconfig.json"); const configFile = ts.readConfigFile(tsConfigPath, ts.sys.readFile); const compilerOptions = ts.parseJsonConfigFileContent(configFile.config, ts.sys, rootDir); this.program = ts.createProgram(compilerOptions.fileNames, compilerOptions.options); this.typeChecker = this.getTypeChecker(); } catch (error) { (0, utils_1.logError)(error, "initializeTypescriptProgram", ""); throw Error(error); } } /** * Extracts information about a TypeScript class declaration. * This function iterates over the members of the class, identifying methods, * properties, interfaces, and enums, and compiles this information into an IClassInfo object. * * @param node The TypeScript class declaration to extract information from. * @param sourceFile The source file containing the class declaration. * @returns An IClassInfo object containing the name, methods, properties, interfaces, and enums of the class. */ extractClassMetaData(node, sourceFile) { try { const className = node?.name?.getText(sourceFile); const classInfo = { name: className, functions: [], properties: [], interfaces: [], enums: [], }; node.members.forEach((member) => { this.processClassMembers(node, sourceFile, classInfo, member); }); return result_1.Result.ok(classInfo); } catch (error) { (0, utils_1.logError)(error, "extractClassInfo", { node, sourceFile }); throw Error(error); } } aggregateFunctions(node, sourceFile, info) { const functionInfo = this.getFunctionDetails(node, sourceFile)?.getValue() ?? null; if (functionInfo) { info?.functions?.push(functionInfo); } } /** * Extracts property information from a TypeScript property declaration and adds it * to the class or module information object if valid. This aggregation helps build * a complete representation of the class/module structure. */ aggergateProperties(node, sourceFile, info) { const propertyInfo = this.extractPropertyParameters(node, sourceFile).getValue(); if (propertyInfo) { info?.properties?.push(propertyInfo); } } /** * Processes interface declarations and aggregates them into the parent class or module * information object. Essential for maintaining the hierarchical structure of interfaces * within their containing scope. */ aggregateInterfaces(node, sourceFile, info) { const interfaceInfo = this.extractInterfaceInfo(node, sourceFile).getValue(); if (interfaceInfo) { info?.interfaces?.push(interfaceInfo); } } /** * Extracts and aggregates enum information from enum declarations into the parent * class or module information object. Helps maintain a complete type system * representation within the code structure. */ aggregateEnums(node, sourceFile, info) { const enumInfo = this.extractEnumInfo(node, sourceFile).getValue(); if (enumInfo) { info?.enums?.push(enumInfo); } } /** * Retrieves and processes child elements of a class declaration, extracting * relevant information about methods, properties, interfaces, and enums. * * @param node The class declaration node to process. * @param member The current class element being processed. * @param sourceFile The source file containing the class declaration. * @param index The current index within the class declaration. * @param classInfo The object to store extracted class information. */ processClassMembers(node, sourceFile, info, member) { const currentElement = member ? member : node; if (ts.isMethodDeclaration(currentElement) || ts.isFunctionDeclaration(currentElement) || ts.isArrowFunction(currentElement)) { this.aggregateFunctions(currentElement, sourceFile, info); } if (ts.isPropertyDeclaration(currentElement)) { this.aggergateProperties(currentElement, sourceFile, info); } if (ts.isInterfaceDeclaration(node)) { this.aggregateInterfaces(node, sourceFile, info); } if (ts.isEnumDeclaration(node)) { this.aggregateEnums(node, sourceFile, info); } } /** * Extracts property information from a TypeScript property declaration. * * This function takes a node representing a property declaration and its source file, * and returns an object containing the property's name and type. If the type is not * explicitly specified, it is inferred from the property declaration. * * @param node * @param sourceFile * @returns An object with 'name' and 'type' properties. */ extractPropertyParameters(node, sourceFile) { try { const name = node.name.getText(sourceFile); let type; if (node.type) { type = this.getTypeAtLocation(node).getValue(); } else { const inferredType = this.typeChecker?.getTypeAtLocation(node); type = inferredType ? this.typeChecker?.typeToString(inferredType) : undefined; } const property = { name, type, }; return result_1.Result.ok(property); } catch (error) { (0, utils_1.logError)(error, "extractPropertyParameters", { node, sourceFile }); throw Error(error); } } /** * Extracts the parameters of a function from a given node. * * @param node The node containing the function parameters. * @param sourceFile The source file containing the node. * @returns An array of function parameter objects. */ extractFunctionParameters(node, sourceFile) { const properties = node.parameters.map((param) => { const name = param.name.getText(sourceFile); const type = param.type ? this.getTypeAtLocation(param).getValue() : undefined; return { name, type, }; }); return result_1.Result.ok(properties); } extractArrowFunctionParameters(node, sourceFile) { const properties = node.parameters.map((param) => { const name = param.name.getText(sourceFile); const type = param.type ? this.getTypeAtLocation(param).getValue() : undefined; return { name, type, }; }); return result_1.Result.ok(properties); } /** * Extracts and returns function details from a given function declaration or method declaration node. * * @param node The function declaration or method declaration node to extract details from. * @param sourceFile The source file containing the node. * @returns An object containing function details, or null if the node has no name. */ getFunctionDetails(node, sourceFile) { try { if (!node.name) { return null; } const name = node.name.getText(sourceFile); const content = this.getFunctionNodeText(node, sourceFile); const parameters = this.extractFunctionParameters(node, sourceFile).getValue(); const details = this.functionDetailsMapper(name, content, parameters, node); return result_1.Result.ok(details); } catch (error) { (0, utils_1.logError)(error, "extractFunctionInfo", { node, sourceFile }); throw Error(error); } } /** * Maps a function declaration or method declaration to a details object, * extracting relevant information such as name, content, parameters, return type, and comments. * * @param name The name of the function. * @param content The content of the function. * @param parameters An array of property definitions for the function parameters. * @param node The TypeScript function or method declaration node. * @returns An object containing the function details. */ functionDetailsMapper(name, content, parameters, node) { return { name, content, parameters, returnType: node.type ? this.getTypeAtLocation(node).getValue() : "any", comments: this.getComment(node), }; } /** * Retrieves the type of a given function or method declaration. * * @param node A function or method declaration node. * @returns A string representation of the function or method type, or undefined if type checking is unavailable. */ getTypeAtLocation(node) { const type = this.typeChecker?.typeToString(this.typeChecker.getTypeAtLocation(node)); return result_1.Result.ok(type); } /** * Retrieves and concatenates JSDoc comments associated with a given TypeScript node. * * @param {TNode} node - The TypeScript node to extract comments from. * @returns {string} Concatenated JSDoc comments. */ getComment(node) { return ts .getJSDocCommentsAndTags(node) .map((comment) => comment.comment || "") .join("\n"); } /** * Generates a string representation of a given function or method declaration node. * This method leverages the TypeScript printer to produce a source code string, * removing any comments and using line feed as the new line character. * * @param node The function or method declaration node to be printed. * @param sourceFile The source file that contains the node to be printed. * @returns A string representation of the given node. */ getFunctionNodeText(node, sourceFile) { const printer = ts.createPrinter({ newLine: ts.NewLineKind.LineFeed, removeComments: true, }); return printer.printNode(ts.EmitHint.Unspecified, node, sourceFile); } /** * Finds the root directory of a project by searching for a 'package.json' file * starting from the given current directory and traversing up the directory tree. * * @param {string} [currentDir=process.cwd()] - The directory to start searching from. * @returns {string} The root directory of the project, or the current working directory if no 'package.json' file is found. */ findProjectRoot(currentDir = process.cwd()) { while (currentDir !== path.parse(currentDir).root) { const packageJsonPath = path.join(currentDir, "package.json"); if (fs.existsSync(packageJsonPath)) { return currentDir; } currentDir = path.dirname(currentDir); } return process.cwd(); } /** * Retrieves a list of TypeScript files, excluding test and mock files. * @returns A promise that resolves with a list of TypeScript files. */ async getTsFiles() { return await (0, glob_1.glob)("**/!(*.d|*.spec|*.test|*.mock).ts?(x)", { ignore: ["node_modules/**", "dist/**", ".env"], }); } /** * Extracts module information from a TypeScript source file. * * @param sourceFile The TypeScript source file. * @param relativePath The relative path of the module. * @returns The module information. */ extractModuleInfo(sourceFile, relativePath) { return { path: relativePath, classes: [], functions: [], interfaces: [], enums: [], dependencies: this.buildDependencyGraph(sourceFile), }; } /** * Retrieves a source file from the TypeScript program by its filename. * * @param fileName - The path to the source file to retrieve * @returns The SourceFile object if found, undefined otherwise */ getSourceFile(fileName) { return this.program?.getSourceFile(fileName); } /** * Gets an array of all root file names in the TypeScript program. * Root files are the entry points specified in the tsconfig.json or passed to the compiler. * * @returns A readonly array of file paths, or undefined if the program is not initialized */ getRootFileNames() { return this.program?.getRootFileNames(); } /** * Returns the current TypeScript program instance. * The program object represents the entire TypeScript project and provides * access to the compiler's internal state. * * @returns The TypeScript Program object, or undefined if not initialized */ getProgram() { return this.program; } /** * Retrieves the TypeChecker instance from the current program. * The TypeChecker is responsible for type analysis and provides * APIs for querying type information. * * @returns The TypeScript TypeChecker object, or undefined if the program is not initialized * @remarks This method creates a new type checker instance each time it's called, * consider caching the result if multiple calls are needed */ getTypeChecker() { const program = this.getProgram(); return program ? program.getTypeChecker() : undefined; } /** * Builds a hierarchical map of the codebase by traversing TypeScript files * and extracting module and class information. */ async buildCodebaseMap() { const rootDir = process.cwd(); const codebaseMap = {}; const repoNames = path.basename(rootDir); codebaseMap[repoNames] = { modules: {} }; const tsFiles = await this.getTsFiles(); tsFiles.forEach((filePath) => { const moduleRalativePath = path.relative(rootDir, filePath); const sourceFile = this.getSourceFile(filePath); if (!sourceFile) { throw Error(`No source file found for ${filePath}`); } const moduleInfo = this.extractModuleInfo(sourceFile, moduleRalativePath); ts.forEachChild(sourceFile, (node) => { if (ts.isClassDeclaration(node)) { const classInfo = this.extractClassMetaData(node, sourceFile).getValue(); if (classInfo) { moduleInfo?.classes?.push(classInfo); } this.processClassMembers(node, sourceFile, moduleInfo); } if (ts.isMethodDeclaration(node) || ts.isFunctionDeclaration(node) || (ts.isVariableDeclaration(node) && ts.isArrowFunction(node))) { this.aggregateFunctions(node, sourceFile, moduleInfo); } if (ts.isPropertyDeclaration(node)) { this.aggergateProperties(node, sourceFile, moduleInfo); } if (ts.isInterfaceDeclaration(node)) { this.aggregateInterfaces(node, sourceFile, moduleInfo); } if (ts.isEnumDeclaration(node)) { this.aggregateEnums(node, sourceFile, moduleInfo); } codebaseMap[repoNames].modules[moduleRalativePath] = moduleInfo; }); }); return result_1.Result.ok(codebaseMap); } extractInterfaceInfo(node, sourceFile) { try { const interfaceName = node.name.getText(sourceFile); const properties = node.members .filter(ts.isPropertySignature) .map((prop) => { const name = prop.name.getText(sourceFile); const type = prop.type ? this.getTypeAtLocation(prop).getValue() : "any"; return { name, type }; }); return result_1.Result.ok({ name: interfaceName, properties, summary: this.getComment(node), }); } catch (error) { (0, utils_1.logError)(error, "extractInterfaceInfo", { node, sourceFile, }); throw Error(error); } } extractEnumInfo(node, sourceFile) { const enumName = node.name.getText(sourceFile); const members = node.members.map((member) => { const name = member.name.getText(sourceFile); const value = member.initializer ? member.initializer.getText(sourceFile) : undefined; return { name, value }; }); return result_1.Result.ok({ name: enumName, members: members, summary: this.getComment(node), }); } buildDependencyGraph(sourceFile) { const imports = sourceFile.statements.filter(ts.isImportDeclaration); return imports.map((i) => { return ts .createPrinter() .printNode(ts.EmitHint.Unspecified, i, sourceFile); }); } } exports.TypeScriptCodeMapper = TypeScriptCodeMapper; //# sourceMappingURL=typescript-code-mapper.service.js.map