UNPKG

repomix

Version:

A tool to pack repository contents to single file for AI consumption

github.com/yamadashy/repomix

yamadashy/repomix

265 lines • 12.1 kB

JavaScript

var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; import { createWriteStream } from 'node:fs'; import * as fs from 'node:fs/promises'; import * as path from 'node:path'; import { Readable, Transform } from 'node:stream'; import { pipeline } from 'node:stream/promises'; import { unzip } from 'fflate'; import { RepomixError } from '../../shared/errorHandle.js'; import { logger } from '../../shared/logger.js'; import { buildGitHubArchiveUrl, buildGitHubMasterArchiveUrl, buildGitHubTagArchiveUrl, checkGitHubResponse, getArchiveFilename, } from './gitHubArchiveApi.js'; /** * Downloads and extracts a GitHub repository archive */ export const downloadGitHubArchive = (repoInfo_1, targetDirectory_1, ...args_1) => __awaiter(void 0, [repoInfo_1, targetDirectory_1, ...args_1], void 0, function* (repoInfo, targetDirectory, options = {}, onProgress, deps = { fetch: globalThis.fetch, fs, pipeline, Transform, createWriteStream, }) { const { timeout = 30000, retries = 3 } = options; // Ensure target directory exists yield deps.fs.mkdir(targetDirectory, { recursive: true }); let lastError = null; // Try downloading with multiple URL formats: main branch, master branch (fallback), then tag format const archiveUrls = [ buildGitHubArchiveUrl(repoInfo), buildGitHubMasterArchiveUrl(repoInfo), buildGitHubTagArchiveUrl(repoInfo), ].filter(Boolean); for (const archiveUrl of archiveUrls) { for (let attempt = 1; attempt <= retries; attempt++) { try { logger.trace(`Downloading GitHub archive from: ${archiveUrl} (attempt ${attempt}/${retries})`); yield downloadAndExtractArchive(archiveUrl, targetDirectory, repoInfo, timeout, onProgress, deps); logger.trace('Successfully downloaded and extracted GitHub archive'); return; // Success - exit early } catch (error) { lastError = error; logger.trace(`Archive download attempt ${attempt} failed:`, lastError.message); // If it's a 404-like error and we have more URLs to try, don't retry this URL const isNotFoundError = lastError instanceof RepomixError && (lastError.message.includes('not found') || lastError.message.includes('404')); if (isNotFoundError && archiveUrls.length > 1) { break; } // If it's the last attempt, don't wait if (attempt < retries) { const delay = Math.min(1000 * 2 ** (attempt - 1), 5000); // Exponential backoff, max 5s logger.trace(`Retrying in ${delay}ms...`); yield new Promise((resolve) => setTimeout(resolve, delay)); } } } } // If we get here, all attempts failed throw new RepomixError(`Failed to download GitHub archive after ${retries} attempts. ${(lastError === null || lastError === void 0 ? void 0 : lastError.message) || 'Unknown error'}`); }); /** * Downloads and extracts archive from a single URL */ const downloadAndExtractArchive = (archiveUrl_1, targetDirectory_1, repoInfo_1, timeout_1, onProgress_1, ...args_1) => __awaiter(void 0, [archiveUrl_1, targetDirectory_1, repoInfo_1, timeout_1, onProgress_1, ...args_1], void 0, function* (archiveUrl, targetDirectory, repoInfo, timeout, onProgress, deps = { fetch: globalThis.fetch, fs, pipeline, Transform, createWriteStream, }) { // Download the archive const tempArchivePath = path.join(targetDirectory, getArchiveFilename(repoInfo)); yield downloadFile(archiveUrl, tempArchivePath, timeout, onProgress, deps); try { // Extract the archive yield extractZipArchive(tempArchivePath, targetDirectory, repoInfo, { fs: deps.fs }); } finally { // Clean up the downloaded archive file try { yield deps.fs.unlink(tempArchivePath); } catch (error) { logger.trace('Failed to cleanup archive file:', error.message); } } }); /** * Downloads a file from URL with progress tracking */ const downloadFile = (url_1, filePath_1, timeout_1, onProgress_1, ...args_1) => __awaiter(void 0, [url_1, filePath_1, timeout_1, onProgress_1, ...args_1], void 0, function* (url, filePath, timeout, onProgress, deps = { fetch: globalThis.fetch, fs, pipeline, Transform, createWriteStream, }) { const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), timeout); try { const response = yield deps.fetch(url, { signal: controller.signal, }); checkGitHubResponse(response); if (!response.body) { throw new RepomixError('No response body received'); } const totalSize = response.headers.get('content-length'); const total = totalSize ? Number.parseInt(totalSize, 10) : null; let downloaded = 0; let lastProgressUpdate = 0; // Use Readable.fromWeb for better stream handling const nodeStream = Readable.fromWeb(response.body); // Transform stream for progress tracking const progressStream = new deps.Transform({ transform(chunk, _encoding, callback) { downloaded += chunk.length; // Update progress at most every 100ms to avoid too frequent updates const now = Date.now(); if (onProgress && now - lastProgressUpdate > 100) { lastProgressUpdate = now; onProgress({ downloaded, total, percentage: total ? Math.round((downloaded / total) * 100) : null, }); } callback(null, chunk); }, flush(callback) { // Send final progress update if (onProgress) { onProgress({ downloaded, total, percentage: total ? 100 : null, }); } callback(); }, }); // Write to file const writeStream = deps.createWriteStream(filePath); yield deps.pipeline(nodeStream, progressStream, writeStream); } finally { clearTimeout(timeoutId); } }); /** * Extracts a ZIP archive using fflate library */ const extractZipArchive = (archivePath_1, targetDirectory_1, repoInfo_1, ...args_1) => __awaiter(void 0, [archivePath_1, targetDirectory_1, repoInfo_1, ...args_1], void 0, function* (archivePath, targetDirectory, repoInfo, deps = { fs, }) { try { // Always use in-memory extraction for simplicity and reliability yield extractZipArchiveInMemory(archivePath, targetDirectory, repoInfo, deps); } catch (error) { throw new RepomixError(`Failed to extract archive: ${error.message}`); } }); /** * Extracts ZIP archive by loading it entirely into memory (faster for small files) */ const extractZipArchiveInMemory = (archivePath_1, targetDirectory_1, repoInfo_1, ...args_1) => __awaiter(void 0, [archivePath_1, targetDirectory_1, repoInfo_1, ...args_1], void 0, function* (archivePath, targetDirectory, repoInfo, deps = { fs, }) { // Read the ZIP file as a buffer const zipBuffer = yield deps.fs.readFile(archivePath); const zipUint8Array = new Uint8Array(zipBuffer); // Extract ZIP using fflate yield new Promise((resolve, reject) => { unzip(zipUint8Array, (err, extracted) => { if (err) { reject(new RepomixError(`Failed to extract ZIP archive: ${err.message}`)); return; } // Process extracted files concurrently in the callback processExtractedFiles(extracted, targetDirectory, repoInfo, deps).then(resolve).catch(reject); }); }); }); /** * Process extracted files sequentially to avoid EMFILE errors */ const processExtractedFiles = (extracted_1, targetDirectory_1, repoInfo_1, ...args_1) => __awaiter(void 0, [extracted_1, targetDirectory_1, repoInfo_1, ...args_1], void 0, function* (extracted, targetDirectory, repoInfo, deps = { fs, }) { const repoPrefix = `${repoInfo.repo}-`; const createdDirs = new Set(); // Process files sequentially to avoid EMFILE errors completely for (const [filePath, fileData] of Object.entries(extracted)) { // GitHub archives have a top-level directory like "repo-branch/" // We need to remove this prefix from the file paths let relativePath = filePath; // Find and remove the repo prefix const pathParts = filePath.split('/'); if (pathParts.length > 0 && pathParts[0].startsWith(repoPrefix)) { // Remove the first directory (repo-branch/) relativePath = pathParts.slice(1).join('/'); } // Skip empty paths (root directory) if (!relativePath) { continue; } // Sanitize relativePath to prevent path traversal attacks const sanitized = path.normalize(relativePath).replace(/^(\.\.([\/\\]|$))+/, ''); // Reject absolute paths outright if (path.isAbsolute(sanitized)) { logger.trace(`Absolute path detected in archive, skipping: ${relativePath}`); continue; } const targetPath = path.resolve(targetDirectory, sanitized); if (!targetPath.startsWith(path.resolve(targetDirectory))) { logger.trace(`Unsafe path detected in archive, skipping: ${relativePath}`); continue; } // Check if this entry is a directory (ends with /) or empty file data indicates directory const isDirectory = filePath.endsWith('/') || (fileData.length === 0 && relativePath.endsWith('/')); if (isDirectory) { // Create directory immediately if (!createdDirs.has(targetPath)) { logger.trace(`Creating directory: ${targetPath}`); yield deps.fs.mkdir(targetPath, { recursive: true }); createdDirs.add(targetPath); } } else { // Create parent directory if needed and write file const parentDir = path.dirname(targetPath); if (!createdDirs.has(parentDir)) { logger.trace(`Creating parent directory for file: ${parentDir}`); yield deps.fs.mkdir(parentDir, { recursive: true }); createdDirs.add(parentDir); } // Write file sequentially logger.trace(`Writing file: ${targetPath}`); try { yield deps.fs.writeFile(targetPath, fileData); } catch (fileError) { logger.trace(`Failed to write file ${targetPath}: ${fileError.message}`); throw fileError; } } } }); /** * Checks if archive download is supported for the given repository info */ export const isArchiveDownloadSupported = (_repoInfo) => { // Archive download is supported for all GitHub repositories // In the future, we might add conditions here (e.g., size limits, private repos) return true; }; //# sourceMappingURL=gitHubArchive.js.map