cursorifier
Version:
Transform GitHub repositories into cursor rules instructions using multiple LLM providers (Anthropic, OpenAI, Ollama, etc.)
330 lines (290 loc) • 14.2 kB
JavaScript
import Anthropic from '@anthropic-ai/sdk';
import { getEncoding } from 'js-tiktoken';
import fs from 'node:fs/promises';
import path from 'node:path';
import pc from 'picocolors';
import readline from 'node:readline/promises';
import { stdin as input, stdout as output } from 'node:process';
// Environment variables for chunk configuration, with defaults
const CHUNK_SIZE = Number(process.env.CHUNK_SIZE || '100000');
const costPerToken = 3e-6; // 3$ per million tokens
export async function generateWithLLM(repoContent, guidelines, outputDir = '.', description, ruleType, provider = 'claude-3-7-sonnet-latest', chunkSize = CHUNK_SIZE) {
// If this is a test run with dummy API key, just return a mock response
const apiKey = process.env.ANTHROPIC_API_KEY;
if (apiKey === 'dummy-key') {
console.log('Using mock response for testing');
return generateMockResponse(repoContent);
}
return await generateWithClaude(repoContent, guidelines, outputDir, description, ruleType, provider, chunkSize);
}
/**
* Creates a visual progress bar
*/
function progressBar(current, total, length = 30) {
const percentage = current / total;
const filledLength = Math.round(length * percentage);
const emptyLength = length - filledLength;
const filled = '█'.repeat(filledLength);
const empty = '░'.repeat(emptyLength);
const percentageText = Math.round(percentage * 100).toString().padStart(3);
return `${filled}${empty} ${percentageText}%`;
}
function formatTokenCount(count) {
const formatted = count.toLocaleString();
if (count < 50000)
return pc.green(formatted);
if (count < 100000)
return pc.yellow(formatted);
return pc.red(formatted);
}
/**
* Calculate the number of chunks needed for processing
*/
function calculateChunkCount(totalTokens, chunkSize) {
if (totalTokens <= chunkSize)
return 1;
return Math.ceil(totalTokens / chunkSize);
}
/**
* Iterator that yields one chunk at a time to save memory
*/
async function* chunkIterator(text, chunkSize) {
console.log(pc.cyan('\n┌─────────────────────────────────────────┐'));
console.log(pc.cyan('│ CONTENT CHUNKING │'));
console.log(pc.cyan('└─────────────────────────────────────────┘\n'));
// Get tokenizer for the model
const encoding = getEncoding('cl100k_base');
const tokens = encoding.encode(text);
const totalTokens = tokens.length;
const cSize = chunkSize || CHUNK_SIZE;
console.log(`● Document size: ${formatTokenCount(totalTokens)} tokens`);
console.log(`● Chunk size: ${formatTokenCount(cSize)} tokens`);
// Calculate and display the estimated cost
const estimatedCost = (totalTokens * costPerToken).toFixed(4);
console.log(pc.yellow(`● Estimated input processing cost: $${estimatedCost} (${formatTokenCount(totalTokens)} tokens × $${costPerToken} per token)`));
// Create a user dialog to confirm proceeding
const rl = readline.createInterface({ input, output });
try {
const answer = await rl.question(pc.yellow('\nProceed with processing? (y/n): '));
const proceed = answer.toLowerCase() === 'y' || answer.toLowerCase() === 'yes';
if (!proceed) {
console.log(pc.red('\nOperation cancelled by user.'));
process.exit(0);
}
}
finally {
rl.close();
}
// Calculate the total number of chunks for progress reporting
const totalChunks = calculateChunkCount(totalTokens, chunkSize || CHUNK_SIZE);
console.log(pc.green(`✓ Will process ${totalChunks} chunks\n`));
// Yield chunks one at a time
let i = 0;
let chunkIndex = 0;
while (i < tokens.length) {
// Get the current chunk of tokens
const chunkTokens = tokens.slice(i, Math.min(i + cSize, tokens.length));
const chunk = encoding.decode(chunkTokens);
// Yield the current chunk along with its metadata
yield {
chunk,
index: chunkIndex,
tokenCount: chunkTokens.length,
totalChunks
};
// Move forward to the next chunk (no overlap)
i += cSize;
chunkIndex++;
}
process.stdout.write('\n\n');
}
async function generateWithClaude(repoContent, guidelines, outputDir = '.', description, ruleType, provider = 'claude-3-7-sonnet-latest', chunkSize) {
// Check for API key in environment
const apiKey = process.env.ANTHROPIC_API_KEY;
if (!apiKey) {
throw new Error('ANTHROPIC_API_KEY environment variable is not set. Please set it to use Claude.');
}
const client = new Anthropic({
apiKey,
});
// Process text chunk by chunk using the iterator
let currentSummary = ''; // This will store our progressively built summary
// Helper function to extract content between <cursorrules> tags
function extractCursorrules(text) {
const regex = /<cursorrules>([\s\S]*?)<\/cursorrules>/;
const match = text.match(regex);
if (!match) {
throw new Error('Response does not contain <cursorrules> tags. Make sure the model includes the required tags in its response.');
}
return match[1].trim();
}
// Create a chunk iterator to process one chunk at a time
const chunkGen = chunkIterator(repoContent, chunkSize);
for await (const { chunk, index, tokenCount, totalChunks } of chunkGen) {
const chunkDisplay = `[${index + 1}/${totalChunks}]`;
console.log(`${pc.yellow('⟳')} Processing chunk ${pc.yellow(chunkDisplay)} ${progressBar(index + 1, totalChunks)}`);
// Display chunk information
console.log(pc.cyan(`┌${'─'.repeat(58)}┐`));
console.log(pc.cyan(`│ Chunk: ${String(index + 1).padEnd(10)} Token Count: ${formatTokenCount(tokenCount).padEnd(12)} │`));
console.log(pc.cyan(`└${'─'.repeat(58)}┘\n`));
const isFirstChunk = index === 0;
const systemPrompt = 'You are an expert AI system designed to analyze code repositories and generate Cursor AI rules. Your task is to create a .cursorrules file based on the provided repository content and guidelines.';
let userPrompt;
if (isFirstChunk) {
// For the first chunk, start creating the rules
userPrompt = `I need your help to create a Cursor rule (.cursorrules) file for my project. Please follow this process:
1. First, carefully read and understand this codebase chunk:
<repository_chunk>
${chunk}
</repository_chunk>
2. Now, review these guidelines for creating effective Cursor rules:
<guidelines>
${guidelines}
</guidelines>
${description ? `3. I specifically want to create rules for: "${description}"` : ''}
${ruleType ? `4. The rule type should be: "${ruleType}"` : ''}
${description || ruleType ? '5' : '3'}. Analyze the repository content and structure, considering:
- Main technologies, frameworks, and languages used
- Coding patterns, naming conventions, and architectural decisions
- Overall codebase structure including key directories and file types
- Project-specific practices and testing guidelines
- Guidelines and standards documented in comments or markdown files by developers
Present your analysis inside <repository_analysis> tags.
${description || ruleType ? '6' : '4'}. Create a complete .cursorrules file that:
- Is specific to this repository's structure and technologies
- Includes best practices and guidelines from code, comments, and documentation
- Organizes rules to match the codebase structure
- Is concise and actionable
- Includes testing best practices and guidelines
- Uses valid Markdown format${ruleType ? `
- Follows the rule type: "${ruleType}"` : ''}${description ? `
- Addresses the specific request: "${description}"` : ''}
Include your final .cursorrules content inside <cursorrules> tags.
Be concise - the final cursorrules file text must be not more than one page long.
Example structure:
<cursorrules>
...markdown content of the .cursorrules file, following the guidelines and analysis...
</cursorrules>`;
}
else {
// For subsequent chunks, enhance the existing summary
userPrompt = `I need your help to update a Cursor rule (.cursorrules) file based on a new chunk of my project:
1. Here is the current .cursorrules file content:
<current_rules>
${currentSummary}
</current_rules>
2. Now, carefully review this new repository chunk:
<repository_chunk>
${chunk}
</repository_chunk>
3. Review these guidelines for creating effective Cursor rules:
<guidelines>
${guidelines}
</guidelines>
${description ? `4. Remember, I specifically want to create rules for: "${description}"` : ''}
${ruleType ? `${description ? '5' : '4'}. The rule type should be: "${ruleType}"` : ''}
${description || ruleType ? (description && ruleType ? '6' : '5') : '4'}. Analyze this new chunk for:
- New technologies, frameworks, or languages not previously covered
- Additional coding patterns, naming conventions, or architectural decisions
- Further insights into codebase structure
- Project-specific practices and testing guidelines
- Guidelines and standards documented in comments or markdown files by developers
Present your analysis inside <new_insights> tags.
${description || ruleType ? (description && ruleType ? '7' : '6') : '5'}. Update the existing rules by:
- Preserving all valuable information from existing rules
- Maintaining the same structure and organization
- Adding new information only for patterns not already covered
- Being specific about code structure and patterns
- Including testing-related insights and best practices
- Being concise but comprehensive${ruleType ? `
- Following the rule type: "${ruleType}"` : ''}${description ? `
- Addressing the specific request: "${description}"` : ''}
Include your final updated .cursorrules content inside <cursorrules> tags.
Be concise - the final cursorrules file text must be not more than one page long.`;
}
process.stdout.write(`${pc.blue('🔄')} Sending to Claude ${provider}... `);
try {
const startTime = Date.now();
const response = await client.messages.create({
model: provider,
max_tokens: 8000,
system: systemPrompt,
messages: [
{
role: 'user',
content: userPrompt
}
]
});
currentSummary = response.content[0].text;
const endTime = Date.now();
const processingTime = ((endTime - startTime) / 1000).toFixed(2);
process.stdout.write(pc.green('✓\n'));
// Save intermediate output to file in the specified directory
const intermediateFileName = path.join(outputDir, `cursorrules_chunk_${index + 1}_of_${totalChunks}.md`);
await fs.writeFile(intermediateFileName, currentSummary);
console.log(`${pc.green('✓')} Saved intermediate output to ${pc.blue(intermediateFileName)} ${pc.gray(`(${processingTime}s)`)}\n`);
}
catch (error) {
process.stdout.write(pc.red('✗\n'));
if (error instanceof Error) {
throw new Error(`${pc.red('Error generating with Claude on chunk')} ${index + 1}: ${error.message}`);
}
throw new Error(`${pc.red('Unknown error occurred while generating with Claude on chunk')} ${index + 1}`);
}
}
console.log(pc.green('\n┌─────────────────────────────────────────┐'));
console.log(pc.green('│ PROCESSING COMPLETE │'));
console.log(pc.green('└─────────────────────────────────────────┘\n'));
// Only extract the cursorrules content at the very end
return extractCursorrules(currentSummary);
}
function generateMockResponse(repoContent) {
// Extract some information from the repo content for the mock response
const repoLines = repoContent.split('\n');
const repoName = repoLines.find(line => line.includes('# Project:'))?.replace('# Project:', '').trim() || 'Repository';
return `# .cursorrules for ${repoName}
## Project Overview
This project appears to be a TypeScript/Node.js application that processes GitHub repositories.
## Coding Standards
- Follow TypeScript best practices with strict typing
- Use async/await for asynchronous operations
- Prefer functional programming patterns where appropriate
- Use descriptive variable and function names
## File Structure Guidelines
- Place core logic in the \`src/\` directory
- Organize code by feature or functionality
- Keep related functionality together
- Use index.ts files for clean exports
## Style Conventions
- Use camelCase for variables and functions
- Use PascalCase for classes and interfaces
- Use 2-space indentation
- End files with a newline
## Testing Standards
- Write unit tests for all functionality
- Use descriptive test names
- Follow AAA (Arrange-Act-Assert) pattern
- Mock external dependencies
## Error Handling
- Use try/catch blocks for error handling
- Provide descriptive error messages
- Handle edge cases appropriately
- Log errors with appropriate severity levels
## Comments and Documentation
- Document public APIs
- Add comments for complex logic
- Use JSDoc for function documentation
- Keep comments up-to-date with code changes
## Performance Considerations
- Optimize for speed and efficiency
- Use appropriate data structures
- Minimize unnecessary computations
- Consider memory usage for large operations
## Security Best Practices
- Validate all inputs
- Avoid hardcoded credentials
- Use proper error handling
- Follow secure coding practices`;
}
//# sourceMappingURL=llmGenerator.js.map