UNPKG

llm-prepare

Version:

A utility designed to streamline the preparation of diverse text sources for Large Language Model (LLM) consumption. It intelligently flattens project structures, truncating, and formatting for ICL prompts.

236 lines (209 loc) • 8.69 kB

JavaScript

/** * Output module - Handles writing to different destinations */ import fs from 'fs/promises'; import path from 'path'; /** * Writes output to the specified destination * @param {string} text - The text to write * @param {string|null} outputPath - Path to the output file (null for stdout) * @param {number|null} chunkSize - Size in KB for each output file chunk * @return {Promise<void>} */ export async function writeOutput(text, outputPath, chunkSize = null) { // Write to stdout if no output path specified if (!outputPath) { process.stdout.write(text); return; } // Validate parameters if (typeof text !== 'string') { throw new Error('Invalid text parameter: must be a string'); } if (chunkSize !== null && (typeof chunkSize !== 'number' || chunkSize <= 0)) { throw new Error(`Invalid chunk size: ${chunkSize}. Must be a positive number.`); } // Ensure the output directory exists try { const dirPath = path.dirname(outputPath); await fs.mkdir(dirPath, { recursive: true }); } catch (error) { // Handle specific error codes if (error.code === 'EEXIST') { // Directory already exists, continue } else if (error.code === 'EACCES') { throw new Error(`Permission denied: Cannot create directory for ${outputPath}. Check file permissions.`); } else if (error.code === 'ENAMETOOLONG') { throw new Error(`Path too long: ${outputPath}. Try using a shorter output path.`); } else { throw new Error(`Failed to create directory for ${outputPath}: ${error.message} (${error.code})`); } } // If no chunking needed, write directly to the file if (!chunkSize) { try { await fs.writeFile(outputPath, text, 'utf8'); } catch (error) { // Handle specific error codes if (error.code === 'EACCES') { throw new Error(`Permission denied: Cannot write to ${outputPath}. Check file permissions.`); } else if (error.code === 'ENOSPC') { throw new Error(`No space left on device: Cannot write to ${outputPath}. Free up disk space.`); } else if (error.code === 'ENAMETOOLONG') { throw new Error(`Path too long: ${outputPath}. Try using a shorter output path.`); } else { throw new Error(`Failed to write to file ${outputPath}: ${error.message} (${error.code})`); } } return; } // Handle chunked output with improved error handling try { const chunkSizeBytes = chunkSize * 1024; // Convert KB to bytes const totalBytes = Buffer.byteLength(text, 'utf8'); // If content is smaller than chunk size, write directly if (totalBytes <= chunkSizeBytes) { try { await fs.writeFile(outputPath, text, 'utf8'); } catch (error) { handleFileWriteError(error, outputPath); } return; } // Create multiple chunks const chunks = splitTextIntoChunks(text, chunkSizeBytes); const fileExt = path.extname(outputPath); const baseName = outputPath.slice(0, outputPath.length - fileExt.length); // Write each chunk to a separate file for (let i = 0; i < chunks.length; i++) { const chunkFileName = `${baseName}_part${i + 1}${fileExt}`; try { await fs.writeFile(chunkFileName, chunks[i], 'utf8'); } catch (error) { throw new Error(`Failed to write chunk ${i + 1} to ${chunkFileName}: ${handleFileWriteError(error, chunkFileName, true)}`); } } } catch (error) { throw new Error(`Failed to write chunked output: ${error.message}`); } } /** * Helper function to handle file write errors with specific messages * @param {Error} error - The error object * @param {string} filePath - Path to the file being written * @param {boolean} returnMessage - Whether to return the error message instead of throwing * @throws {Error} - Throws an error with a specific message based on the error code * @returns {string} - Returns the error message if returnMessage is true */ function handleFileWriteError(error, filePath, returnMessage = false) { let message; switch (error.code) { case 'EACCES': message = `Permission denied: Cannot write to ${filePath}. Check file permissions.`; break; case 'ENOSPC': message = `No space left on device: Cannot write to ${filePath}. Free up disk space.`; break; case 'ENAMETOOLONG': message = `Path too long: ${filePath}. Try using a shorter output path.`; break; case 'EISDIR': message = `Cannot write to ${filePath} because it is a directory. Specify a file path instead.`; break; case 'ENOENT': message = `Cannot write to ${filePath} because a component of the path does not exist.`; break; default: message = `Failed to write to file ${filePath}: ${error.message} (${error.code})`; } if (returnMessage) { return message; } else { throw new Error(message); } } /** * Splits text into chunks that don't exceed the specified byte size * Tries to make intelligent splits at paragraph or sentence boundaries when possible * @param {string} text - The text to split * @param {number} maxBytes - Maximum byte size for each chunk * @return {string[]} Array of text chunks */ function splitTextIntoChunks(text, maxBytes) { const chunks = []; let currentChunk = ''; let currentChunkBytes = 0; // Split by paragraphs first (empty lines) const paragraphs = text.split(/\n\s*\n/); for (const paragraph of paragraphs) { const paragraphBytes = Buffer.byteLength(paragraph, 'utf8'); // If a single paragraph is larger than max chunk size, we need to split it further if (paragraphBytes > maxBytes) { // If current chunk has content, save it first if (currentChunkBytes > 0) { chunks.push(currentChunk); currentChunk = ''; currentChunkBytes = 0; } // Split the paragraph into sentences const sentences = paragraph.split(/(?<=[.!?])\s+/); for (const sentence of sentences) { const sentenceBytes = Buffer.byteLength(sentence, 'utf8'); // If a single sentence is larger than chunk size, we have to split arbitrarily if (sentenceBytes > maxBytes) { // Process the large sentence by breaking it into smaller pieces let remainingSentence = sentence; while (remainingSentence.length > 0) { // Calculate how much of the sentence we can fit let bytesToTake = maxBytes; let textToTake = remainingSentence.slice(0, Math.floor(maxBytes / 2)); // Start with a conservative estimate // Expand until we can't fit more while (Buffer.byteLength(textToTake, 'utf8') < maxBytes && textToTake.length < remainingSentence.length) { textToTake = remainingSentence.slice(0, textToTake.length + 1); } // If we went over, back off by one character if (Buffer.byteLength(textToTake, 'utf8') > maxBytes) { textToTake = remainingSentence.slice(0, textToTake.length - 1); } chunks.push(textToTake); remainingSentence = remainingSentence.slice(textToTake.length); } } else if (currentChunkBytes + sentenceBytes + 2 > maxBytes) { // +2 for newline // Current sentence doesn't fit in the chunk, store current chunk and start a new one chunks.push(currentChunk); currentChunk = sentence; currentChunkBytes = sentenceBytes; } else { // Add sentence to current chunk if (currentChunkBytes > 0) { currentChunk += ' ' + sentence; currentChunkBytes += sentenceBytes + 1; // +1 for space } else { currentChunk = sentence; currentChunkBytes = sentenceBytes; } } } } else if (currentChunkBytes + paragraphBytes + 2 > maxBytes) { // +2 for newline // Current paragraph doesn't fit in the chunk, store current chunk and start a new one chunks.push(currentChunk); currentChunk = paragraph; currentChunkBytes = paragraphBytes; } else { // Add paragraph to current chunk if (currentChunkBytes > 0) { currentChunk += '\n\n' + paragraph; currentChunkBytes += paragraphBytes + 2; // +2 for newlines } else { currentChunk = paragraph; currentChunkBytes = paragraphBytes; } } } // Add the last chunk if it has content if (currentChunkBytes > 0) { chunks.push(currentChunk); } return chunks; }