@juspay/neurolink
Version:
Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio
309 lines • 10.3 kB
JavaScript
/**
* Source Code Processor
*
* Processes source code files for 50+ programming languages.
* Uses extension-based detection as primary method (more reliable than MIME types for code).
*
* Key features:
* - Supports 50+ programming languages via extension detection
* - Handles exact filename matches (Dockerfile, Makefile, etc.)
* - Line count truncation to prevent token overflow
* - Language detection for syntax highlighting metadata
*
* Priority: 120 (lower priority - text-based content, processed after binary/document formats)
*
* @module processors/code/SourceCodeProcessor
*
* @example
* ```typescript
* import { sourceCodeProcessor, processSourceCode, isSourceCodeFile } from "./code/index.js";
*
* // Check if a file is source code
* if (isSourceCodeFile("text/plain", "app.ts")) {
* const result = await processSourceCode({
* id: "file-123",
* name: "app.ts",
* mimetype: "text/plain",
* size: 1024,
* buffer: codeBuffer,
* });
*
* if (result.success) {
* console.log(`Language: ${result.data.language}`);
* console.log(`Lines: ${result.data.lineCount}`);
* }
* }
* ```
*/
import { basename as pathBasename } from "node:path";
import { BaseFileProcessor } from "../base/BaseFileProcessor.js";
import { EXACT_FILENAME_MAP, SIZE_LIMITS, SOURCE_CODE_EXTENSIONS, } from "../config/index.js";
import { detectLanguageFromFilename } from "../config/languageMap.js";
// =============================================================================
// TYPES
// =============================================================================
// Re-import for local use within this file
// =============================================================================
// SOURCE CODE PROCESSOR
// =============================================================================
/**
* Source Code Processor - handles 50+ programming languages.
*
* Uses extension-based detection as the primary method since MIME types
* for source code are often unreliable (many are just "text/plain").
*
* Priority: 120 (lower priority than binary/document formats)
*
* @example
* ```typescript
* const processor = new SourceCodeProcessor();
*
* const result = await processor.processFile({
* id: "file-123",
* name: "main.py",
* mimetype: "text/plain",
* size: 2048,
* buffer: pythonCodeBuffer,
* });
*
* if (result.success) {
* console.log(`Language: ${result.data.language}`); // "Python"
* }
* ```
*/
export class SourceCodeProcessor extends BaseFileProcessor {
/**
* Supported file extensions for source code.
* Includes 50+ extensions covering all major programming languages.
*/
static supportedExtensions = [
...SOURCE_CODE_EXTENSIONS,
];
/**
* Common MIME types for source code files.
* Note: Extension-based detection is preferred as MIME types are often unreliable.
*/
static supportedMimeTypes = [
"text/plain",
"text/x-python",
"text/javascript",
"text/typescript",
"application/javascript",
"application/typescript",
"application/x-javascript",
"text/x-java",
"text/x-java-source",
"text/x-c",
"text/x-csrc",
"text/x-c++",
"text/x-c++src",
"text/x-csharp",
"text/x-go",
"text/x-rust",
"text/x-ruby",
"text/x-php",
"text/x-sh",
"text/x-shellscript",
"application/x-sh",
"text/x-perl",
"text/x-lua",
"text/x-sql",
"text/x-swift",
"text/x-kotlin",
"text/x-scala",
"text/x-haskell",
"text/x-elixir",
"text/x-erlang",
"text/x-clojure",
"text/x-fsharp",
"text/x-ocaml",
"text/x-lisp",
"text/x-scheme",
"text/x-groovy",
"text/x-powershell",
"text/x-r",
"text/x-julia",
"text/x-nim",
"text/x-zig",
"text/x-dart",
"text/x-crystal",
"text/x-d",
"text/x-asm",
"text/x-fortran",
"text/x-cobol",
"text/x-pascal",
"text/x-ada",
"text/css",
"text/x-scss",
"text/x-sass",
"text/x-less",
"application/x-httpd-php",
];
constructor() {
super({
maxSizeMB: SIZE_LIMITS.SOURCE_CODE_MAX_MB,
timeoutMs: 30000,
supportedMimeTypes: SourceCodeProcessor.supportedMimeTypes,
supportedExtensions: SourceCodeProcessor.supportedExtensions,
fileTypeName: "SourceCode",
defaultFilename: "code.txt",
});
}
/**
* Override to use extension-based detection as primary method.
* Source code MIME types are often unreliable (e.g., "text/plain" for .ts files),
* so we check extensions first.
*
* Also handles exact filename matches for special files like Dockerfile, Makefile.
*
* @param mimetype - MIME type of the file (often unreliable for source code)
* @param filename - Filename for extension-based detection
* @returns true if the file is a supported source code file
*/
isFileSupported(mimetype, filename) {
if (!filename) {
return false;
}
// Check exact filename matches first (Dockerfile, Makefile, etc.)
if (EXACT_FILENAME_MAP[filename]) {
return true;
}
// Also check basename for exact matches (in case full path is passed)
const basename = pathBasename(filename);
if (EXACT_FILENAME_MAP[basename]) {
return true;
}
// Check by extension (more reliable for source code than MIME type)
const ext = this.getExtension(filename);
if (ext &&
SourceCodeProcessor.supportedExtensions.includes(ext.toLowerCase())) {
return true;
}
// Fall back to MIME type check
return super.isFileSupported(mimetype, filename);
}
/**
* Build the processed source code result.
* Decodes the buffer as UTF-8, detects language, and truncates if needed.
*
* @param buffer - Raw file content
* @param fileInfo - Original file information
* @returns Processed source code with metadata
*/
buildProcessedResult(buffer, fileInfo) {
const content = buffer.toString("utf-8");
const lines = content.split("\n");
const originalLineCount = lines.length;
const language = detectLanguageFromFilename(fileInfo.name || "");
const maxLines = SIZE_LIMITS.MAX_SOURCE_CODE_LINES;
// Truncate if too many lines
let finalContent = content;
let truncated = false;
if (lines.length > maxLines) {
truncated = true;
finalContent = lines.slice(0, maxLines).join("\n");
finalContent += `\n\n// ... truncated at ${maxLines} lines, total ${originalLineCount} lines ...`;
}
return {
content: finalContent,
language,
lineCount: Math.min(lines.length, maxLines),
truncated,
encoding: "utf-8",
buffer,
mimetype: fileInfo.mimetype || "text/plain",
size: fileInfo.size,
filename: this.getFilename(fileInfo),
};
}
/**
* Extract file extension from filename.
*
* @param filename - Filename to extract extension from
* @returns Extension with leading dot (e.g., ".ts") or null if no extension
*/
getExtension(filename) {
const match = filename.toLowerCase().match(/\.[^.]+$/);
return match ? match[0] : null;
}
}
// =============================================================================
// SINGLETON INSTANCE
// =============================================================================
/**
* Singleton instance of the SourceCodeProcessor.
* Use this for all source code processing to share configuration.
*/
export const sourceCodeProcessor = new SourceCodeProcessor();
// =============================================================================
// HELPER FUNCTIONS
// =============================================================================
/**
* Check if a file is a source code file.
*
* @param mimetype - MIME type of the file
* @param filename - Filename for extension-based detection
* @returns true if the file is a supported source code file
*
* @example
* ```typescript
* if (isSourceCodeFile("text/plain", "app.ts")) {
* console.log("This is a TypeScript file");
* }
* ```
*/
export function isSourceCodeFile(mimetype, filename) {
return sourceCodeProcessor.isFileSupported(mimetype, filename);
}
/**
* Validate source code file size against configured limit.
*
* @param sizeBytes - File size in bytes
* @returns true if the file size is within limits
*/
export function validateSourceCodeSize(sizeBytes) {
const maxBytes = SIZE_LIMITS.SOURCE_CODE_MAX_MB * 1024 * 1024;
return sizeBytes <= maxBytes;
}
/**
* Process a source code file.
*
* @param fileInfo - File information (can include URL or buffer)
* @param options - Optional processing options
* @returns Processing result with success flag and either data or error
*
* @example
* ```typescript
* const result = await processSourceCode({
* id: "file-123",
* name: "main.py",
* mimetype: "text/plain",
* size: 2048,
* buffer: pythonCodeBuffer,
* });
*
* if (result.success) {
* console.log(`Detected language: ${result.data.language}`);
* console.log(`Line count: ${result.data.lineCount}`);
* console.log(`Truncated: ${result.data.truncated}`);
* }
* ```
*/
export async function processSourceCode(fileInfo, options) {
return sourceCodeProcessor.processFile(fileInfo, options);
}
/**
* Alias for backward compatibility with Curator codebase.
* Detects programming language from a filename.
*
* @param filename - The filename to detect language from
* @returns The detected language name or 'Unknown'
*
* @example
* ```typescript
* detectLanguage("app.ts") // Returns "TypeScript"
* detectLanguage("Dockerfile") // Returns "Dockerfile"
* ```
*/
export const detectLanguage = detectLanguageFromFilename;
//# sourceMappingURL=SourceCodeProcessor.js.map