repomix
Version:
A tool to pack repository contents to single file for AI consumption
265 lines • 12.1 kB
JavaScript
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
import { createWriteStream } from 'node:fs';
import * as fs from 'node:fs/promises';
import * as path from 'node:path';
import { Readable, Transform } from 'node:stream';
import { pipeline } from 'node:stream/promises';
import { unzip } from 'fflate';
import { RepomixError } from '../../shared/errorHandle.js';
import { logger } from '../../shared/logger.js';
import { buildGitHubArchiveUrl, buildGitHubMasterArchiveUrl, buildGitHubTagArchiveUrl, checkGitHubResponse, getArchiveFilename, } from './gitHubArchiveApi.js';
/**
* Downloads and extracts a GitHub repository archive
*/
export const downloadGitHubArchive = (repoInfo_1, targetDirectory_1, ...args_1) => __awaiter(void 0, [repoInfo_1, targetDirectory_1, ...args_1], void 0, function* (repoInfo, targetDirectory, options = {}, onProgress, deps = {
fetch: globalThis.fetch,
fs,
pipeline,
Transform,
createWriteStream,
}) {
const { timeout = 30000, retries = 3 } = options;
// Ensure target directory exists
yield deps.fs.mkdir(targetDirectory, { recursive: true });
let lastError = null;
// Try downloading with multiple URL formats: main branch, master branch (fallback), then tag format
const archiveUrls = [
buildGitHubArchiveUrl(repoInfo),
buildGitHubMasterArchiveUrl(repoInfo),
buildGitHubTagArchiveUrl(repoInfo),
].filter(Boolean);
for (const archiveUrl of archiveUrls) {
for (let attempt = 1; attempt <= retries; attempt++) {
try {
logger.trace(`Downloading GitHub archive from: ${archiveUrl} (attempt ${attempt}/${retries})`);
yield downloadAndExtractArchive(archiveUrl, targetDirectory, repoInfo, timeout, onProgress, deps);
logger.trace('Successfully downloaded and extracted GitHub archive');
return; // Success - exit early
}
catch (error) {
lastError = error;
logger.trace(`Archive download attempt ${attempt} failed:`, lastError.message);
// If it's a 404-like error and we have more URLs to try, don't retry this URL
const isNotFoundError = lastError instanceof RepomixError &&
(lastError.message.includes('not found') || lastError.message.includes('404'));
if (isNotFoundError && archiveUrls.length > 1) {
break;
}
// If it's the last attempt, don't wait
if (attempt < retries) {
const delay = Math.min(1000 * 2 ** (attempt - 1), 5000); // Exponential backoff, max 5s
logger.trace(`Retrying in ${delay}ms...`);
yield new Promise((resolve) => setTimeout(resolve, delay));
}
}
}
}
// If we get here, all attempts failed
throw new RepomixError(`Failed to download GitHub archive after ${retries} attempts. ${(lastError === null || lastError === void 0 ? void 0 : lastError.message) || 'Unknown error'}`);
});
/**
* Downloads and extracts archive from a single URL
*/
const downloadAndExtractArchive = (archiveUrl_1, targetDirectory_1, repoInfo_1, timeout_1, onProgress_1, ...args_1) => __awaiter(void 0, [archiveUrl_1, targetDirectory_1, repoInfo_1, timeout_1, onProgress_1, ...args_1], void 0, function* (archiveUrl, targetDirectory, repoInfo, timeout, onProgress, deps = {
fetch: globalThis.fetch,
fs,
pipeline,
Transform,
createWriteStream,
}) {
// Download the archive
const tempArchivePath = path.join(targetDirectory, getArchiveFilename(repoInfo));
yield downloadFile(archiveUrl, tempArchivePath, timeout, onProgress, deps);
try {
// Extract the archive
yield extractZipArchive(tempArchivePath, targetDirectory, repoInfo, { fs: deps.fs });
}
finally {
// Clean up the downloaded archive file
try {
yield deps.fs.unlink(tempArchivePath);
}
catch (error) {
logger.trace('Failed to cleanup archive file:', error.message);
}
}
});
/**
* Downloads a file from URL with progress tracking
*/
const downloadFile = (url_1, filePath_1, timeout_1, onProgress_1, ...args_1) => __awaiter(void 0, [url_1, filePath_1, timeout_1, onProgress_1, ...args_1], void 0, function* (url, filePath, timeout, onProgress, deps = {
fetch: globalThis.fetch,
fs,
pipeline,
Transform,
createWriteStream,
}) {
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), timeout);
try {
const response = yield deps.fetch(url, {
signal: controller.signal,
});
checkGitHubResponse(response);
if (!response.body) {
throw new RepomixError('No response body received');
}
const totalSize = response.headers.get('content-length');
const total = totalSize ? Number.parseInt(totalSize, 10) : null;
let downloaded = 0;
let lastProgressUpdate = 0;
// Use Readable.fromWeb for better stream handling
const nodeStream = Readable.fromWeb(response.body);
// Transform stream for progress tracking
const progressStream = new deps.Transform({
transform(chunk, _encoding, callback) {
downloaded += chunk.length;
// Update progress at most every 100ms to avoid too frequent updates
const now = Date.now();
if (onProgress && now - lastProgressUpdate > 100) {
lastProgressUpdate = now;
onProgress({
downloaded,
total,
percentage: total ? Math.round((downloaded / total) * 100) : null,
});
}
callback(null, chunk);
},
flush(callback) {
// Send final progress update
if (onProgress) {
onProgress({
downloaded,
total,
percentage: total ? 100 : null,
});
}
callback();
},
});
// Write to file
const writeStream = deps.createWriteStream(filePath);
yield deps.pipeline(nodeStream, progressStream, writeStream);
}
finally {
clearTimeout(timeoutId);
}
});
/**
* Extracts a ZIP archive using fflate library
*/
const extractZipArchive = (archivePath_1, targetDirectory_1, repoInfo_1, ...args_1) => __awaiter(void 0, [archivePath_1, targetDirectory_1, repoInfo_1, ...args_1], void 0, function* (archivePath, targetDirectory, repoInfo, deps = {
fs,
}) {
try {
// Always use in-memory extraction for simplicity and reliability
yield extractZipArchiveInMemory(archivePath, targetDirectory, repoInfo, deps);
}
catch (error) {
throw new RepomixError(`Failed to extract archive: ${error.message}`);
}
});
/**
* Extracts ZIP archive by loading it entirely into memory (faster for small files)
*/
const extractZipArchiveInMemory = (archivePath_1, targetDirectory_1, repoInfo_1, ...args_1) => __awaiter(void 0, [archivePath_1, targetDirectory_1, repoInfo_1, ...args_1], void 0, function* (archivePath, targetDirectory, repoInfo, deps = {
fs,
}) {
// Read the ZIP file as a buffer
const zipBuffer = yield deps.fs.readFile(archivePath);
const zipUint8Array = new Uint8Array(zipBuffer);
// Extract ZIP using fflate
yield new Promise((resolve, reject) => {
unzip(zipUint8Array, (err, extracted) => {
if (err) {
reject(new RepomixError(`Failed to extract ZIP archive: ${err.message}`));
return;
}
// Process extracted files concurrently in the callback
processExtractedFiles(extracted, targetDirectory, repoInfo, deps).then(resolve).catch(reject);
});
});
});
/**
* Process extracted files sequentially to avoid EMFILE errors
*/
const processExtractedFiles = (extracted_1, targetDirectory_1, repoInfo_1, ...args_1) => __awaiter(void 0, [extracted_1, targetDirectory_1, repoInfo_1, ...args_1], void 0, function* (extracted, targetDirectory, repoInfo, deps = {
fs,
}) {
const repoPrefix = `${repoInfo.repo}-`;
const createdDirs = new Set();
// Process files sequentially to avoid EMFILE errors completely
for (const [filePath, fileData] of Object.entries(extracted)) {
// GitHub archives have a top-level directory like "repo-branch/"
// We need to remove this prefix from the file paths
let relativePath = filePath;
// Find and remove the repo prefix
const pathParts = filePath.split('/');
if (pathParts.length > 0 && pathParts[0].startsWith(repoPrefix)) {
// Remove the first directory (repo-branch/)
relativePath = pathParts.slice(1).join('/');
}
// Skip empty paths (root directory)
if (!relativePath) {
continue;
}
// Sanitize relativePath to prevent path traversal attacks
const sanitized = path.normalize(relativePath).replace(/^(\.\.([\/\\]|$))+/, '');
// Reject absolute paths outright
if (path.isAbsolute(sanitized)) {
logger.trace(`Absolute path detected in archive, skipping: ${relativePath}`);
continue;
}
const targetPath = path.resolve(targetDirectory, sanitized);
if (!targetPath.startsWith(path.resolve(targetDirectory))) {
logger.trace(`Unsafe path detected in archive, skipping: ${relativePath}`);
continue;
}
// Check if this entry is a directory (ends with /) or empty file data indicates directory
const isDirectory = filePath.endsWith('/') || (fileData.length === 0 && relativePath.endsWith('/'));
if (isDirectory) {
// Create directory immediately
if (!createdDirs.has(targetPath)) {
logger.trace(`Creating directory: ${targetPath}`);
yield deps.fs.mkdir(targetPath, { recursive: true });
createdDirs.add(targetPath);
}
}
else {
// Create parent directory if needed and write file
const parentDir = path.dirname(targetPath);
if (!createdDirs.has(parentDir)) {
logger.trace(`Creating parent directory for file: ${parentDir}`);
yield deps.fs.mkdir(parentDir, { recursive: true });
createdDirs.add(parentDir);
}
// Write file sequentially
logger.trace(`Writing file: ${targetPath}`);
try {
yield deps.fs.writeFile(targetPath, fileData);
}
catch (fileError) {
logger.trace(`Failed to write file ${targetPath}: ${fileError.message}`);
throw fileError;
}
}
}
});
/**
* Checks if archive download is supported for the given repository info
*/
export const isArchiveDownloadSupported = (_repoInfo) => {
// Archive download is supported for all GitHub repositories
// In the future, we might add conditions here (e.g., size limits, private repos)
return true;
};
//# sourceMappingURL=gitHubArchive.js.map