UNPKG

buroventures-harald-code-core

Version:

Harald Code Core - Core functionality for AI-powered coding assistant

github.com/haraldroine/harald-code

haraldroine/harald-code

300 lines • 12.2 kB

JavaScript

/** * @license * Copyright 2025 Google LLC * SPDX-License-Identifier: Apache-2.0 */ import * as fs from 'fs/promises'; import * as path from 'path'; import { marked } from 'marked'; // Simple console logger for import processing const logger = { // eslint-disable-next-line @typescript-eslint/no-explicit-any debug: (...args) => console.debug('[DEBUG] [ImportProcessor]', ...args), // eslint-disable-next-line @typescript-eslint/no-explicit-any warn: (...args) => console.warn('[WARN] [ImportProcessor]', ...args), // eslint-disable-next-line @typescript-eslint/no-explicit-any error: (...args) => console.error('[ERROR] [ImportProcessor]', ...args), }; // Helper to find the project root (looks for .git directory) async function findProjectRoot(startDir) { let currentDir = path.resolve(startDir); while (true) { const gitPath = path.join(currentDir, '.git'); try { const stats = await fs.lstat(gitPath); if (stats.isDirectory()) { return currentDir; } } catch { // .git not found, continue to parent } const parentDir = path.dirname(currentDir); if (parentDir === currentDir) { // Reached filesystem root break; } currentDir = parentDir; } // Fallback to startDir if .git not found return path.resolve(startDir); } // Add a type guard for error objects function hasMessage(err) { return (typeof err === 'object' && err !== null && 'message' in err && typeof err.message === 'string'); } // Helper to find all code block and inline code regions using marked /** * Finds all import statements in content without using regex * @returns Array of {start, _end, path} objects for each import found */ function findImports(content) { const imports = []; let i = 0; const len = content.length; while (i < len) { // Find next @ symbol i = content.indexOf('@', i); if (i === -1) break; // Check if it's a word boundary (not part of another word) if (i > 0 && !isWhitespace(content[i - 1])) { i++; continue; } // Find the end of the import path (whitespace or newline) let j = i + 1; while (j < len && !isWhitespace(content[j]) && content[j] !== '\n' && content[j] !== '\r') { j++; } // Extract the path (everything after @) const importPath = content.slice(i + 1, j); // Basic validation (starts with ./ or / or letter) if (importPath.length > 0 && (importPath[0] === '.' || importPath[0] === '/' || isLetter(importPath[0]))) { imports.push({ start: i, _end: j, path: importPath, }); } i = j + 1; } return imports; } function isWhitespace(char) { return char === ' ' || char === '\t' || char === '\n' || char === '\r'; } function isLetter(char) { const code = char.charCodeAt(0); return ((code >= 65 && code <= 90) || // A-Z (code >= 97 && code <= 122)); // a-z } function findCodeRegions(content) { const regions = []; const tokens = marked.lexer(content); // Map from raw content to a queue of its start indices in the original content. const rawContentIndices = new Map(); function walk(token) { if (token.type === 'code' || token.type === 'codespan') { if (!rawContentIndices.has(token.raw)) { const indices = []; let lastIndex = -1; while ((lastIndex = content.indexOf(token.raw, lastIndex + 1)) !== -1) { indices.push(lastIndex); } rawContentIndices.set(token.raw, indices); } const indices = rawContentIndices.get(token.raw); if (indices && indices.length > 0) { // Assume tokens are processed in order of appearance. // Dequeue the next available index for this raw content. const idx = indices.shift(); regions.push([idx, idx + token.raw.length]); } } if ('tokens' in token && token.tokens) { for (const child of token.tokens) { walk(child); } } } for (const token of tokens) { walk(token); } return regions; } /** * Processes import statements in GEMINI.md content * Supports @path/to/file syntax for importing content from other files * @param content - The content to process for imports * @param basePath - The directory path where the current file is located * @param debugMode - Whether to enable debug logging * @param importState - State tracking for circular import prevention * @param projectRoot - The project root directory for allowed directories * @param importFormat - The format of the import tree * @returns Processed content with imports resolved and import tree */ export async function processImports(content, basePath, debugMode = false, importState = { processedFiles: new Set(), maxDepth: 5, currentDepth: 0, }, projectRoot, importFormat = 'tree') { if (!projectRoot) { projectRoot = await findProjectRoot(basePath); } if (importState.currentDepth >= importState.maxDepth) { if (debugMode) { logger.warn(`Maximum import depth (${importState.maxDepth}) reached. Stopping import processing.`); } return { content, importTree: { path: importState.currentFile || 'unknown' }, }; } // --- FLAT FORMAT LOGIC --- if (importFormat === 'flat') { // Use a queue to process files in order of first encounter, and a set to avoid duplicates const flatFiles = []; // Track processed files across the entire operation const processedFiles = new Set(); // Helper to recursively process imports async function processFlat(fileContent, fileBasePath, filePath, depth) { // Normalize the file path to ensure consistent comparison const normalizedPath = path.normalize(filePath); // Skip if already processed if (processedFiles.has(normalizedPath)) return; // Mark as processed before processing to prevent infinite recursion processedFiles.add(normalizedPath); // Add this file to the flat list flatFiles.push({ path: normalizedPath, content: fileContent }); // Find imports in this file const codeRegions = findCodeRegions(fileContent); const imports = findImports(fileContent); // Process imports in reverse order to handle indices correctly for (let i = imports.length - 1; i >= 0; i--) { const { start, _end, path: importPath } = imports[i]; // Skip if inside a code region if (codeRegions.some(([regionStart, regionEnd]) => start >= regionStart && start < regionEnd)) { continue; } // Validate import path if (!validateImportPath(importPath, fileBasePath, [projectRoot || ''])) { continue; } const fullPath = path.resolve(fileBasePath, importPath); const normalizedFullPath = path.normalize(fullPath); // Skip if already processed if (processedFiles.has(normalizedFullPath)) continue; try { await fs.access(fullPath); const importedContent = await fs.readFile(fullPath, 'utf-8'); // Process the imported file await processFlat(importedContent, path.dirname(fullPath), normalizedFullPath, depth + 1); } catch (error) { if (debugMode) { logger.warn(`Failed to import ${fullPath}: ${hasMessage(error) ? error.message : 'Unknown error'}`); } // Continue with other imports even if one fails } } } // Start with the root file (current file) const rootPath = path.normalize(importState.currentFile || path.resolve(basePath)); await processFlat(content, basePath, rootPath, 0); // Concatenate all unique files in order, Claude-style const flatContent = flatFiles .map((f) => `--- File: ${f.path} ---\n${f.content.trim()}\n--- End of File: ${f.path} ---`) .join('\n\n'); return { content: flatContent, importTree: { path: rootPath }, // Tree not meaningful in flat mode }; } // --- TREE FORMAT LOGIC (existing) --- const codeRegions = findCodeRegions(content); let result = ''; let lastIndex = 0; const imports = []; const importsList = findImports(content); for (const { start, _end, path: importPath } of importsList) { // Add content before this import result += content.substring(lastIndex, start); lastIndex = _end; // Skip if inside a code region if (codeRegions.some(([s, e]) => start >= s && start < e)) { result += `@${importPath}`; continue; } // Validate import path to prevent path traversal attacks if (!validateImportPath(importPath, basePath, [projectRoot || ''])) { result += ``; continue; } const fullPath = path.resolve(basePath, importPath); if (importState.processedFiles.has(fullPath)) { result += ``; continue; } try { await fs.access(fullPath); const fileContent = await fs.readFile(fullPath, 'utf-8'); // Mark this file as processed for this import chain const newImportState = { ...importState, processedFiles: new Set(importState.processedFiles), currentDepth: importState.currentDepth + 1, currentFile: fullPath, }; newImportState.processedFiles.add(fullPath); const imported = await processImports(fileContent, path.dirname(fullPath), debugMode, newImportState, projectRoot, importFormat); result += `\n${imported.content}\n`; imports.push(imported.importTree); } catch (err) { let message = 'Unknown error'; if (hasMessage(err)) { message = err.message; } else if (typeof err === 'string') { message = err; } logger.error(`Failed to import ${importPath}: ${message}`); result += ``; } } // Add any remaining content after the last match result += content.substring(lastIndex); return { content: result, importTree: { path: importState.currentFile || 'unknown', imports: imports.length > 0 ? imports : undefined, }, }; } export function validateImportPath(importPath, basePath, allowedDirectories) { // Reject URLs if (/^(file|https?):\/\//.test(importPath)) { return false; } const resolvedPath = path.resolve(basePath, importPath); return allowedDirectories.some((allowedDir) => { const normalizedAllowedDir = path.resolve(allowedDir); const isSamePath = resolvedPath === normalizedAllowedDir; const isSubPath = resolvedPath.startsWith(normalizedAllowedDir + path.sep); return isSamePath || isSubPath; }); } //# sourceMappingURL=memoryImportProcessor.js.map