UNPKG

mongodb-rag

Version:

RAG (Retrieval Augmented Generation) library for MongoDB Vector Search

1,712 lines (1,426 loc) • 123 kB
# cli/asciiLogo.js ```js // src/cli/asciiLogo.js import chalk from 'chalk'; export function displayLogo() { // Using template literals for better readability const logo = ` ###%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%### ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%## ##%%%%%%%%%%%%%%%%%%##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%#%%%%%%%%%%%%%%%## #%%%%%%%%%%%%%%%%%%%*:-*#%%%%%%%%%%%%%##*+-::........:-=*##%%%%%%%%%%%%%##=.*%%%%%%%%%%%%%%%%# @#%%%%%%%%%%%%%%%%%%%%+...:=*#%%%%%%#+-:....................:+#%%%%%%%#+-:..=%%%%%%%%%%%%%%%%%#@ @#%%%%%%%%%%%%%%%%%%%%%#-.....:-*#%%%%#+-......:.:::::....-+#%%%%%*=:.....:*%%%%%%%%%%%%%%%%%%#@ @#%%%%%%%%%%%%%%%%%%%%%%%#=:::::::.-*%%%%%*=:::::::::::-*%%%%%*-:::::::::+#%%%%%%%%%%%%%%%%%%%#@ @#%%%%%%%%%%%%%%%%%%%%%%%%%%*=-::::::::=#@%%%+-::::::=#%%%#+-::::::::-+##%%%%%%%%%%%%%%%%%%%%%#@ @#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%#+-:::::::+%%%#+:::-#%%%+-:::::::=*#%%%%%%%%%%%%%%%%%%%%%%%%%%#@ @#%%%%%%%%%%%%%%%%%%%%%%%#==*%%@@@@%#+-:::::+%%%*-+%%%+:--::-=*%%%%%%%*==#%%%%%%%%%%%%%%%%%%%%#@ @#%%%%%%%%%%%%%%%%%%%%%%#-::::+%@@@@@@%#+-----#%%%%%#=----=*%%%%%%%%*-::::*%%%%%%%%%%%%%%%%%%%#@ @#%%%%%%%%%%%%%%%%%%%%%#=:::=%%@%+**#%@%%%+----*@%%#----=#%%%%#*++#@%%+-::-*%%%%%%%%%%%%%%%%%%#@ @#%%%%%%%%%%%%%%%%%%%%%+---=%%%+-====*%%@%%%=---*@*----*%%%%#*=----=%%%+--:=#%%%%%%%%%%%%%%%%%#@ @#%%%%%%%%%%%%%%%%%%%%#----#%%=-===+%#:.-#@%%+---=---+%%%#-..+%+----=%%#=---*%%%%%%%%%%%%%%%%%#@ @#%%%%%%%%%%%%%%%%%%%%*----%%#-====#%%-.:#@%%%*=---=#%%%%%+..+@%+----#%%=---+%%%%%%%%%%%%%%%%%#@ @#%%%%%%%%%%%%%%%%%%%%*----#%#=====#%%%@%%%%@%%%%%%%%%%#%%%@@%%%+====*%%=---+%%%%%%%%%%%%%%%%%#@ MongoDB RAG - Vector Search Magic @#%%%%%%%%%%%%%%%%%%%%#====*%%+====+*%%%%#**#%%%%%%%%%****#%%%#+====+%%#==--*%%%%%%%%%%%%%%%%%#@ @#%%%%%%%%%%%%%%%%%%%%%*====#%%++++++++++***#%%+-:-+%%#***++++++++++#%%+====#%%%%%%%%%%%%%%%%%#@ @#%%%%%%%%%%%%%%%%%%%%%#+====%%%#++++******#@%#::::.*%%#***+++++++*%%%+=+==#%%%%%%%%%%%%%%%%%%#@ @#%%%%%%%%%%%%%%%%%%%%%%#++++=*@@%%##**##%%@@@%+:::-%%%@%%##***##%%%#=++++*%%%%%%%%%%%%%%%%%%%#@ @#%%%%%%%%%%%%%%%%%%%%%%%%*++++=+%@@@@@@@@*=+@@%=:-%@@*=+%@@%@@@@@*=+++++#%%%%%%%%%%%%%%%%%%%%#@ @#%%%%%%%%%%%%%%%%%%%%%%%%%#*++***+======++++*@@%+#@%*+++++=====++*++++*%%%%%%%%%%%%%%%%%%%%%%#@ @#%%%%%%%%%%%%%%%%%%%%%%%#@%%%#*************#%@@@%%%%%#**++++++*+++**#%%%%#%%%%%%%%%%%%%%%%%%%#@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ `; // Create a banner with the version number const version = process.env.npm_package_version || '0.15.0'; const banner = ` ${chalk.blue(logo)} ${chalk.yellow('šŸš€ MongoDB RAG CLI')} ${chalk.green(`v${version}`)} ${chalk.cyan('Transform your data into intelligent insights with vector search\n')}`; console.log(banner); } ``` # cli/celebration.js ```js // src/cli/celebration.js import chalk from 'chalk'; const frames = [ ` šŸŽ‰ \\|/ | / \\ `, ` šŸŽŠ /|\\ | / \\ `, ` ✨ \\|/ | / \\ ` ]; // Create a multi-color border effect function colorBorder(message) { const border = '==============================='; return ` ${chalk.blue(border)} ${chalk.green(message)} ${chalk.magenta(border)} `; } export function celebrate(message) { let frameIndex = 0; const animation = setInterval(() => { process.stdout.write('\x1B[2J\x1B[3J\x1B[H'); console.log(colorBorder(message)); console.log(chalk.cyan(frames[frameIndex])); frameIndex = (frameIndex + 1) % frames.length; }, 200); // Stop after 2 seconds setTimeout(() => { clearInterval(animation); process.stdout.write('\x1B[2J\x1B[3J\x1B[H'); console.log(colorBorder(message)); }, 2000); } ``` # cli/createRagApp.js ```js // src/cli/createRagApp.js import fs from 'fs'; import path from 'path'; import { execSync } from 'child_process'; import chalk from 'chalk'; import { displayLogo } from './asciiLogo.js'; import MongoSpinner from './spinner.js'; import { celebrate } from './celebration.js'; import FunProgressBar from './progressBar.js'; function generateReadme(projectName) { return `# ${projectName} ![MongoDB-RAG](https://img.shields.io/badge/MongoDB--RAG-Enabled-brightgreen?style=flat&logo=https://raw.githubusercontent.com/mongodb-developer/mongodb-rag/main/static/logo-square.png) This RAG (Retrieval Augmented Generation) application was created with [mongodb-rag](https://npmjs.com/package/mongodb-rag). ## Getting Started 1. Configure your environment variables in \`.env\`: - Set your MongoDB connection string - Configure your embedding provider (OpenAI or Ollama) - Adjust other settings as needed 2. Install dependencies: \`\`\`bash npm install \`\`\` 3. Start the development server: \`\`\`bash npm run dev \`\`\` ## Available CLI Commands The mongodb-rag CLI provides several commands to manage your RAG application: ### Configuration - \`npx mongodb-rag init\` - Initialize configuration - \`npx mongodb-rag test-connection\` - Test provider connection - \`npx mongodb-rag show-config\` - Display current configuration - \`npx mongodb-rag edit-config\` - Modify configuration ### Vector Search Index Management - \`npx mongodb-rag create-index\` - Create vector search index - \`npx mongodb-rag show-indexes\` - List all indexes - \`npx mongodb-rag delete-index\` - Remove an index ### Document Management - \`npx mongodb-rag ingest --file <path>\` - Ingest a single file - \`npx mongodb-rag ingest --directory <path>\` - Ingest a directory of files - Options: - \`--recursive\` - Process subdirectories - \`--chunk-size <number>\` - Tokens per chunk - \`--chunk-overlap <number>\` - Overlap between chunks - \`--chunk-method <method>\` - Chunking strategy (fixed/recursive/semantic) ### Search - \`npx mongodb-rag search "your query"\` - Search documents - Options: - \`--maxResults <number>\` - Maximum results (default: 5) - \`--minScore <number>\` - Minimum similarity score (default: 0.7) ## API Endpoints - POST \`/ingest\` - Ingest documents - GET \`/search?query=<text>\` - Search documents - DELETE \`/delete/:id\` - Delete a document ## Documentation For detailed documentation, visit: - [MongoDB RAG Documentation](https://mongodb-developer.github.io/mongodb-rag) - [MongoDB Vector Search](https://www.mongodb.com/docs/atlas/atlas-vector-search/) ## License This project is licensed under the MIT License. `; } function createBackendFiles(projectPath) { // Create backend directory structure fs.mkdirSync(path.join(projectPath, 'backend'), { recursive: true }); fs.mkdirSync(path.join(projectPath, 'backend', 'config'), { recursive: true }); // Create backend package.json fs.writeFileSync(path.join(projectPath, 'backend', 'package.json'), JSON.stringify({ name: `${path.basename(projectPath)}-backend`, version: "1.0.0", type: "module", scripts: { "start": "node server.js", "dev": "nodemon server.js" }, dependencies: { "express": "^4.18.2", "mongodb": "^6.3.0", "dotenv": "^16.0.3", "cors": "^2.8.5", "mongodb-rag": "latest", "nodemon": "^3.0.2" } }, null, 2)); // Create backend config fs.writeFileSync(path.join(projectPath, 'backend', 'config', 'dbConfig.js'), ` import dotenv from 'dotenv'; dotenv.config(); export const config = { mongoUrl: process.env.MONGODB_URI, database: "mongorag", collection: "documents", embedding: { provider: process.env.EMBEDDING_PROVIDER || "openai", apiKey: process.env.EMBEDDING_API_KEY, model: process.env.EMBEDDING_MODEL || "text-embedding-3-small", dimensions: 1536 }, indexName: process.env.VECTOR_INDEX || "default" }; `); // Create backend server.js fs.writeFileSync(path.join(projectPath, 'backend', 'server.js'), ` import express from 'express'; import cors from 'cors'; import { MongoRAG } from 'mongodb-rag'; import { config } from './config/dbConfig.js'; const app = express(); app.use(express.json()); app.use(cors()); const rag = new MongoRAG(config); // Ingest Documents app.post('/api/ingest', async (req, res) => { try { const { documents } = req.body; const result = await rag.ingestBatch(documents); res.json(result); } catch (error) { res.status(500).json({ error: error.message }); } }); // Search Documents app.get('/api/search', async (req, res) => { try { const { query } = req.query; const results = await rag.search(query, { maxResults: 5 }); res.json(results); } catch (error) { res.status(500).json({ error: error.message }); } }); // Delete a document app.delete('/api/documents/:id', async (req, res) => { try { const col = await rag._getCollection(); await col.deleteOne({ documentId: req.params.id }); res.json({ message: 'Deleted successfully' }); } catch (error) { res.status(500).json({ error: error.message }); } }); const PORT = process.env.PORT || 5000; app.listen(PORT, () => { console.log(\`šŸš€ Server running on port \${PORT}\`); }); `); } function createFrontendFiles(projectPath) { // Create frontend directory structure fs.mkdirSync(path.join(projectPath, 'frontend'), { recursive: true }); fs.mkdirSync(path.join(projectPath, 'frontend', 'src'), { recursive: true }); fs.mkdirSync(path.join(projectPath, 'frontend', 'src', 'components'), { recursive: true }); fs.mkdirSync(path.join(projectPath, 'frontend', 'public'), { recursive: true }); // Create frontend package.json using Vite fs.writeFileSync(path.join(projectPath, 'frontend', 'package.json'), JSON.stringify({ name: `${path.basename(projectPath)}-frontend`, version: "1.0.0", type: "module", scripts: { "dev": "vite", "build": "vite build", "preview": "vite preview" }, dependencies: { "react": "^18.2.0", "react-dom": "^18.2.0", "axios": "^1.6.2" }, devDependencies: { "@vitejs/plugin-react": "^4.2.1", "vite": "^5.0.8" } }, null, 2)); // Create frontend components const componentFiles = { 'Header.jsx': ` export function Header() { return ( <header className="header"> <h1>MongoDB RAG Application</h1> </header> ); }`, 'Chatbot.jsx': ` import { useState } from 'react'; import axios from 'axios'; export function Chatbot() { const [query, setQuery] = useState(''); const [results, setResults] = useState([]); const [loading, setLoading] = useState(false); const handleSearch = async (e) => { e.preventDefault(); setLoading(true); try { const response = await axios.get(\`http://localhost:5000/api/search?query=\${query}\`); setResults(response.data); } catch (error) { console.error('Search failed:', error); } setLoading(false); }; return ( <div className="chatbot"> <form onSubmit={handleSearch}> <input type="text" value={query} onChange={(e) => setQuery(e.target.value)} placeholder="Ask a question..." /> <button type="submit" disabled={loading}> {loading ? 'Searching...' : 'Search'} </button> </form> <div className="results"> {results.map((result, index) => ( <div key={index} className="result"> <p>{result.content}</p> <small>Score: {result.score}</small> </div> ))} </div> </div> ); }`, 'Footer.jsx': ` export function Footer() { return ( <footer className="footer"> <p>Built with <a href="https://npmjs.com/package/mongodb-rag">mongodb-rag</a></p> </footer> ); }` }; // Create each component file Object.entries(componentFiles).forEach(([filename, content]) => { fs.writeFileSync( path.join(projectPath, 'frontend', 'src', 'components', filename), content ); }); // Create main App.jsx fs.writeFileSync(path.join(projectPath, 'frontend', 'src', 'App.jsx'), ` import { Header } from './components/Header'; import { Chatbot } from './components/Chatbot'; import { Footer } from './components/Footer'; import './styles.css'; function App() { return ( <div className="app"> <Header /> <main> <Chatbot /> </main> <Footer /> </div> ); } export default App; `); // Create styles.css fs.writeFileSync(path.join(projectPath, 'frontend', 'src', 'styles.css'), ` .app { max-width: 800px; margin: 0 auto; padding: 2rem; } .header { text-align: center; margin-bottom: 2rem; } .chatbot { margin: 2rem 0; } .chatbot form { display: flex; gap: 1rem; margin-bottom: 1rem; } .chatbot input { flex: 1; padding: 0.5rem; font-size: 1rem; } .chatbot button { padding: 0.5rem 1rem; background: #00684A; color: white; border: none; border-radius: 4px; cursor: pointer; } .chatbot button:disabled { background: #ccc; } .results { margin-top: 2rem; } .result { padding: 1rem; border: 1px solid #ddd; margin-bottom: 1rem; border-radius: 4px; } .footer { text-align: center; margin-top: 2rem; padding-top: 1rem; border-top: 1px solid #ddd; } `); // Create index.html fs.writeFileSync(path.join(projectPath, 'frontend', 'index.html'), ` <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" /> <title>MongoDB RAG App</title> </head> <body> <div id="root"></div> <script type="module" src="/src/main.jsx"></script> </body> </html> `); // Create main.jsx fs.writeFileSync(path.join(projectPath, 'frontend', 'src', 'main.jsx'), ` import React from 'react' import ReactDOM from 'react-dom/client' import App from './App' ReactDOM.createRoot(document.getElementById('root')).render( <React.StrictMode> <App /> </React.StrictMode>, ) `); } export async function createRagApp(projectName) { const spinner = new MongoSpinner(); const progressBar = new FunProgressBar(); // Display the ASCII logo displayLogo(); const projectPath = path.resolve(process.cwd(), projectName); if (fs.existsSync(projectPath)) { console.error(chalk.red(`Error: Directory "${projectName}" already exists.`)); process.exit(1); } console.log(chalk.green(`\nšŸš€ Creating a new RAG app in ${projectPath}\n`)); // Start the creation process with spinner spinner.start('Initializing your RAG app'); fs.mkdirSync(projectPath, { recursive: true }); await new Promise(resolve => setTimeout(resolve, 1000)); spinner.stop(true); // Show progress while creating files console.log(chalk.cyan('\nPreparing your MongoDB RAG environment...')); let currentProgress = 0; progressBar.update(currentProgress); // Create backend console.log(chalk.blue('\nšŸ“ Creating backend...')); createBackendFiles(projectPath); // Create frontend console.log(chalk.blue('\nšŸ“ Creating frontend...')); createFrontendFiles(projectPath); // Create root package.json for workspace fs.writeFileSync(path.join(projectPath, 'package.json'), JSON.stringify({ name: projectName, version: "1.0.0", private: true, workspaces: ["frontend", "backend"], scripts: { "dev": "concurrently \"npm run dev -w frontend\" \"npm run dev -w backend\"", "build": "npm run build -w frontend", "start": "npm run start -w backend" }, devDependencies: { "concurrently": "^8.2.2" } }, null, 2)); // Create root .env fs.writeFileSync(path.join(projectPath, '.env'), ` MONGODB_URI=mongodb+srv://your_user:your_password@your-cluster.mongodb.net/mongorag PORT=5000 # Embedding Configuration EMBEDDING_PROVIDER=openai EMBEDDING_API_KEY=your-embedding-api-key EMBEDDING_MODEL=text-embedding-3-small # MongoDB Vector Search Index VECTOR_INDEX=default `); // Install dependencies console.log(chalk.blue('\nšŸ“¦ Installing dependencies...')); execSync(`cd ${projectPath} && npm install`, { stdio: 'inherit' }); // Create README fs.writeFileSync( path.join(projectPath, 'README.md'), generateReadme(projectName) ); // Show success message celebrate('Full-Stack RAG App Created Successfully! šŸŽ‰'); console.log(chalk.green('\nāœ… Project created successfully!')); console.log(chalk.yellow('\nNext steps:')); console.log(chalk.cyan(` 1. cd ${projectName}`)); console.log(chalk.cyan(' 2. Update .env with your MongoDB and API credentials')); console.log(chalk.cyan(' 3. npm run dev # This will start both frontend and backend')); } ``` # cli/playground.js ```js // src/cli/playground.js import { execSync } from 'child_process'; import fs from 'fs'; import path from 'path'; import express from 'express'; import { fileURLToPath } from 'url'; import open from 'open'; import { MongoRAG } from '../index.js'; import { createServer } from 'http'; import { Server } from 'socket.io'; import multer from 'multer'; import cors from 'cors'; import dotenv from 'dotenv'; import pdfParse from 'pdf-parse/lib/pdf-parse.js'; import detect from 'detect-port'; // Library to find available ports dotenv.config(); const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); // Properly resolve playground UI paths based on how the package is installed const findPlaygroundUiPath = () => { // Possible locations for playground-ui based on different installation scenarios const possiblePaths = [ // When installed as a dependency in node_modules path.resolve(__dirname, '../../src/playground-ui'), // When running from source/development environment path.resolve(__dirname, '../playground-ui'), // When globally installed path.resolve(process.env.npm_config_prefix || '/usr/local', 'lib/node_modules/mongodb-rag/src/playground-ui') ]; for (const potentialPath of possiblePaths) { if (fs.existsSync(potentialPath)) { // console.log(`āœ… Found playground UI at: ${potentialPath}`); return potentialPath; } } console.warn('āš ļø Could not locate playground-ui directory'); return null; }; const PLAYGROUND_UI_PATH = findPlaygroundUiPath(); const DEFAULT_BACKEND_PORT = 4000; const DEFAULT_FRONTEND_PORT = 3000; // Use environment variables or fallback let backendPort = process.env.BACKEND_PORT || DEFAULT_BACKEND_PORT; let playgroundPort = process.env.PLAYGROUND_PORT || DEFAULT_FRONTEND_PORT; // Function to find an available port if the default is in use const findAvailablePort = async (preferredPort, defaultPort) => { const availablePort = await detect(preferredPort); return availablePort === preferredPort ? preferredPort : await detect(defaultPort); }; console.log('Current directory:', __dirname); console.log('Process working directory:', process.cwd()); const upload = multer({ storage: multer.memoryStorage() }); export async function startPlayground() { const app = express(); const server = createServer(app); const io = new Server(server, { cors: { origin: '*' } }); app.use(express.json()); app.use(cors()); let mongodbUrl = process.env.MONGODB_URI; let databaseName = 'playground'; let collectionName = 'documents'; const configPath = path.join(process.cwd(), '.mongodb-rag.json'); console.log("šŸ” Looking for config file at:", configPath); console.log("šŸ” Current Working Directory:", process.cwd()); if (fs.existsSync(configPath)) { try { const config = JSON.parse(fs.readFileSync(configPath, 'utf-8')); console.log("āœ… Loaded Configuration from File:", config); mongodbUrl = config.mongoUrl || mongodbUrl; databaseName = config.database || databaseName; collectionName = config.collection || collectionName; console.log("šŸ“Œ Loaded MongoDB URL:", mongodbUrl); console.log("šŸ“Œ Loaded Database Name:", databaseName); console.log("šŸ“Œ Loaded Collection Name:", collectionName); } catch (error) { console.error("āŒ Error reading .mongodb-rag.json:", error.message); } } else { console.warn("🚨 Config file not found at:", configPath); } console.log("šŸ“Œ Final MongoDB URL:", mongodbUrl); console.log("šŸ“Œ Final Database Name:", databaseName); console.log("šŸ“Œ Final Collection Name:", collectionName); // Declare rag here to ensure it's in scope for all routes let rag = null; // Initialize RAG with configuration try { console.log("🟢 Before initializing MongoRAG:"); console.log(" šŸ“Œ MongoDB URL:", mongodbUrl); console.log(" šŸ“Œ Database Name:", databaseName); console.log(" šŸ“Œ Collection Name:", collectionName); rag = new MongoRAG({ mongoUrl: mongodbUrl, database: databaseName, collection: collectionName, embedding: { provider: process.env.EMBEDDING_PROVIDER || 'ollama', apiKey: process.env.EMBEDDING_API_KEY, model: process.env.EMBEDDING_MODEL || 'llama3', dimensions: 1536, baseUrl: process.env.EMBEDDING_BASE_URL || 'http://localhost:11434' } }); console.log("āœ… MongoRAG Final Config:", JSON.stringify(rag.config, null, 2)); console.log("āœ… After initializing MongoRAG:"); console.log(" šŸ“Œ Database in rag.config:", rag.config.database); console.log(" šŸ“Œ Collection in rag.config:", rag.config.collection); await rag.connect(); console.log('āœ… Successfully connected to MongoDB'); } catch (error) { console.error("āš ļø MongoDB Connection Error:", error.message); console.info("ā„¹ļø Playground will start with limited functionality"); } // API endpoints... app.post('/api/save-config', (req, res) => { fs.writeFileSync('.mongodb-rag.json', JSON.stringify(req.body, null, 2)); res.json({ message: "Configuration saved successfully!" }); }); app.get("/api/config", (req, res) => { if (!rag) { return res.status(503).json({ error: "MongoDB connection not available" }); } res.json({ mongoUrl: rag.config.mongoUrl || "Unknown", database: rag.config.database || "Unknown", collection: rag.config.collection || "Unknown", provider: rag.config.embedding.provider || "Unknown", apiKey: rag.config.embedding.apiKey || "Unknown", model: rag.config.embedding.model || "Unknown", dimensions: rag.config.embedding.dimensions || 1536, batchSize: rag.config.embedding.batchSize || 100, maxResults: rag.config.search.maxResults || 5, minScore: rag.config.search.minScore || 0.7, indexName: rag.config.indexName || "vector_index", embeddingFieldPath: rag.config.embeddingFieldPath || "embedding" }); }); app.post("/api/config", (req, res) => { const newConfig = req.body; // Update rag.config rag.config.database = newConfig.database; rag.config.collection = newConfig.collection; rag.config.embeddingFieldPath = newConfig.embeddingFieldPath || "embedding"; rag.config.indexName = newConfig.indexName; // Save to file (so changes persist) fs.writeFileSync(".mongodb-rag.json", JSON.stringify(newConfig, null, 2)); res.json(newConfig); }); // Add this new endpoint for creating vector search indexes app.post('/api/indexes/create', async (req, res) => { const existingIndexes = await collection.listIndexes().toArray(); if (existingIndexes.some(idx => idx.name === name)) { return res.status(400).json({ error: "Index already exists" }); } if (!rag) { return res.status(503).json({ error: "MongoDB connection not available", message: "Please check your MongoDB connection settings and try again" }); } try { const { name, dimensions, path, similarity } = req.body; const client = await rag.getClient(); const collection = client.db(rag.database).collection(rag.collection); const indexConfig = { name: name || "vector_index", type: "vectorSearch", definition: { fields: [{ type: "vector", path: path || "embedding", numDimensions: dimensions || 1536, similarity: similarity || "cosine" }] } }; const result = await collection.createSearchIndex(indexConfig); res.json({ success: true, message: "Vector search index created successfully", indexName: indexConfig.name, result }); } catch (error) { console.error('Error creating index:', error); res.status(500).json({ error: error.message }); } }); // Add this endpoint to fetch documents app.get('/api/documents', async (req, res) => { if (!rag) { return res.status(503).json({ error: "MongoDB connection not available" }); } try { const client = await rag.getClient(); const collection = client.db(rag.config.database).collection(rag.config.collection); const documents = await collection.find({}).toArray(); res.json({ documents }); } catch (error) { console.error('Error fetching documents:', error); res.status(500).json({ error: error.message }); } }); // Add this endpoint to fetch indexes app.get('/api/indexes', async (req, res) => { if (!rag) { return res.status(503).json({ error: "MongoDB connection not available" }); } try { const client = await rag.getClient(); console.log("Using database for indexes:", rag.config.database); console.log("Using collection for indexes:", rag.config.collection); const collection = client.db(rag.config.database).collection(rag.config.collection); // Fetch regular indexes console.log("Fetching regular indexes..."); const regularIndexes = await collection.listIndexes().toArray(); console.log("Regular indexes fetched:", regularIndexes); // Fetch search indexes console.log("Fetching search indexes..."); const searchIndexes = await collection.aggregate([ { $listSearchIndexes: {} } ]).toArray(); console.log("Search indexes fetched:", searchIndexes); res.json({ regularIndexes, searchIndexes }); } catch (error) { console.error('Error fetching indexes:', error); res.status(500).json({ error: error.message }); } }); // Add this endpoint to handle search queries app.post('/api/search', async (req, res) => { if (!rag) { return res.status(503).json({ error: "MongoDB connection not available" }); } const { query } = req.body; try { const client = await rag.getClient(); const collection = client.db(rag.config.database).collection(rag.config.collection); // Perform a search using the query const results = await collection.find({ $text: { $search: query } }).toArray(); res.json({ results }); } catch (error) { console.error('Error performing search:', error); res.status(500).json({ error: error.message }); } }); io.on('connection', (socket) => { socket.on('disconnect', () => { console.log('User disconnected'); }); }); // Start the backend server backendPort = await findAvailablePort(backendPort, DEFAULT_BACKEND_PORT); server.listen(backendPort, () => { console.log(`šŸš€ Playground backend running at http://localhost:${backendPort}`); }); // Serve the React UI with proper path resolution and build process if (PLAYGROUND_UI_PATH) { const frontendBuildPath = path.join(PLAYGROUND_UI_PATH, 'build'); const frontendDistPath = path.join(PLAYGROUND_UI_PATH, 'dist'); // Determine which directory to use (build or dist) let uiBuildPath = fs.existsSync(frontendBuildPath) ? frontendBuildPath : (fs.existsSync(frontendDistPath) ? frontendDistPath : null); if (!uiBuildPath) { console.warn("āš ļø Frontend build not found. Attempting to build..."); try { // Check if package.json exists in the playground-ui directory const packageJsonPath = path.join(PLAYGROUND_UI_PATH, 'package.json'); if (!fs.existsSync(packageJsonPath)) { throw new Error("package.json not found in playground-ui directory"); } // Attempt to install dependencies and build execSync(`cd "${PLAYGROUND_UI_PATH}" && npm install && npm run build`, { stdio: 'inherit', timeout: 300000 // 5 minute timeout for build process }); // Re-check build paths after building uiBuildPath = fs.existsSync(frontendBuildPath) ? frontendBuildPath : (fs.existsSync(frontendDistPath) ? frontendDistPath : null); if (!uiBuildPath) { throw new Error("Build completed but build directory not found"); } } catch (error) { console.error(`āš ļø Failed to build frontend: ${error.message}`); console.info("ā„¹ļø Starting in API-only mode"); return; // Exit frontend setup but keep backend running } } const uiApp = express(); uiApp.use(express.static(uiBuildPath)); uiApp.get('*', (req, res) => { res.sendFile(path.join(uiBuildPath, 'index.html')); }); // Start the frontend server playgroundPort = await findAvailablePort(playgroundPort, DEFAULT_FRONTEND_PORT); uiApp.listen(playgroundPort, () => { console.log(`šŸš€ Playground UI running at http://localhost:${playgroundPort}`); open(`http://localhost:${playgroundPort}`); }); } else { console.warn("āš ļø Playground UI components not found. Running in API-only mode."); console.info("ā„¹ļø You can still use the API endpoints at http://localhost:" + backendPort); } } ``` # cli/progressBar.js ```js // src/cli/progressBar.js import chalk from 'chalk'; const funFacts = [ "Did you know? Vector search helps find similar items even if they use different words!", "MongoDB Atlas Vector Search uses cosine similarity by default šŸ“", "RAG helps combine the power of vector search with your own data šŸ”‹", "Vector embeddings can capture semantic meaning beyond keywords šŸŽÆ", "MongoDB can handle billions of vectors efficiently! šŸš€", "Vector search is like giving your database a human-like understanding 🧠" ]; class FunProgressBar { constructor() { this.width = 40; this.currentFact = 0; } update(progress) { const filled = Math.round(this.width * progress); const empty = this.width - filled; const filledBar = 'ā–ˆ'.repeat(filled); const emptyBar = 'ā–‘'.repeat(empty); process.stdout.clearLine(); process.stdout.cursorTo(0); const percentage = Math.round(progress * 100); process.stdout.write( chalk.blue(`[${filledBar}${emptyBar}] ${percentage}%\n`) + chalk.yellow(`Fun Fact: ${funFacts[this.currentFact]}\n`) ); this.currentFact = (this.currentFact + 1) % funFacts.length; } } export default FunProgressBar; ``` # cli/spinner.js ```js // src/cli/spinner.js import chalk from 'chalk'; class MongoSpinner { constructor() { this.frames = [ 'šŸ” ā—‹ ā—‹ ā—‹ ', 'šŸ” ā— ā—‹ ā—‹ ', 'šŸ” ā— ā— ā—‹ ', 'šŸ” ā— ā— ā— ' ]; this.messages = [ 'Preparing vector magic...', 'Calculating embeddings...', 'Optimizing search space...', 'Almost there...' ]; this.frameIndex = 0; this.interval = null; this.currentMessage = ''; this.isEnabled = this._checkEnabled(); } _checkEnabled() { // Check for required properties and methods const requiredMethods = { clearLine: process.stdout.clearLine, cursorTo: process.stdout.cursorTo, write: process.stdout.write }; // Verify all methods exist and are functions const hasRequiredMethods = Object.values(requiredMethods) .every(method => method && typeof method === 'function'); return Boolean( process.stdout.isTTY && !process.env.NO_SPINNER && !process.env.CI && hasRequiredMethods ); } start(text = '') { this.currentMessage = text; if (!this.isEnabled) { // In non-interactive mode, just log once this._fallbackLog(this.messages[0], text); return; } process.stdout.write('\n'); this.interval = setInterval(() => { const frame = this.frames[this.frameIndex]; const message = this.messages[this.frameIndex]; process.stdout.clearLine(); process.stdout.cursorTo(0); process.stdout.write( chalk.blue(frame) + ' ' + chalk.cyan(message) + ' ' + chalk.yellow(this.currentMessage) ); this.frameIndex = (this.frameIndex + 1) % this.frames.length; }, 800); } updateMessage(text) { this.currentMessage = text; if (!this.isEnabled) { this._fallbackLog(text); return; } } stop(success = true) { if (this.interval) { clearInterval(this.interval); this.interval = null; } if (this.isEnabled) { process.stdout.clearLine(); process.stdout.cursorTo(0); if (success) { process.stdout.write( chalk.green('✨ Vector magic complete! ') + chalk.blue('šŸŽ‰\n') ); } else { process.stdout.write( chalk.red('āœ– Operation failed ') + chalk.yellow('😢\n') ); } } } _fallbackLog(...messages) { // Use plain console.log for non-interactive environments console.log(...messages); } } export default MongoSpinner; ``` # core/BatchProcessor.js ```js import debug from 'debug'; import EventEmitter from 'events'; const log = debug('mongodb-rag:batch'); /** * Handles batch processing of items with retry logic and concurrency control * @extends EventEmitter * @fires BatchProcessor#progress * @fires BatchProcessor#batchError */ class BatchProcessor extends EventEmitter { /** * Creates a new batch processor instance * @param {Object} options - Configuration options * @param {number} [options.batchSize=100] - Number of items to process in each batch * @param {number} [options.concurrency=2] - Number of batches to process concurrently * @param {number} [options.retryAttempts=3] - Maximum number of retry attempts per batch * @param {number} [options.retryDelay=1000] - Delay between retries in milliseconds */ constructor(options = {}) { super(); this.options = { batchSize: options.batchSize || 100, concurrency: options.concurrency || 2, retryAttempts: options.retryAttempts || 3, retryDelay: options.retryDelay || 1000, ...options }; this.stats = { processed: 0, failed: 0, retried: 0, total: 0 }; } /** * Processes a single batch of items with retry logic * @param {Array} items - Items to process in this batch * @param {Function} processor - Function to process the items * @returns {Promise<{results: Array, errors: Array}>} Results and errors from processing * @fires BatchProcessor#batchError */ async processBatch(items, processor) { const results = []; const errors = []; try { log(`Processing batch of ${items.length} items`); for (let attempt = 1; attempt <= this.options.retryAttempts; attempt++) { try { const batchResults = await processor(items); results.push(...batchResults); this.stats.processed += items.length; break; } catch (error) { if (attempt === this.options.retryAttempts) { log(`Batch failed after ${attempt} attempts`); this.stats.failed += items.length; errors.push({ items, error }); throw error; } this.stats.retried += items.length; log(`Retry attempt ${attempt} after error: ${error.message}`); await this._sleep(this.options.retryDelay * attempt); } } } catch (error) { this.emit('batchError', { items, error }); } return { results, errors }; } /** * Processes all items in batches with concurrency control * @param {Array} items - All items to process * @param {Function} processor - Function to process each batch * @returns {Promise<{results: Array, errors: Array}>} Combined results and errors from all batches * @fires BatchProcessor#progress */ async process(items, processor) { this.stats.total = items.length; const batches = this._createBatches(items); const results = []; const errors = []; // Process batches with concurrency control for (let i = 0; i < batches.length; i += this.options.concurrency) { const batchPromises = batches .slice(i, i + this.options.concurrency) .map(batch => this.processBatch(batch, processor)); const batchResults = await Promise.allSettled(batchPromises); batchResults.forEach((result, index) => { if (result.status === 'fulfilled') { results.push(...result.value.results); errors.push(...result.value.errors); } else { const failedBatch = batches[i + index]; errors.push({ items: failedBatch, error: result.reason }); } }); /** * Progress event * @event BatchProcessor#progress * @type {Object} * @property {number} processed - Number of successfully processed items * @property {number} failed - Number of failed items * @property {number} total - Total number of items * @property {number} percent - Percentage of completion */ this.emit('progress', { processed: this.stats.processed, failed: this.stats.failed, total: this.stats.total, percent: Math.round((this.stats.processed + this.stats.failed) / this.stats.total * 100) }); } return { results, errors }; } /** * Splits array of items into batches * @private * @param {Array} items - Items to split into batches * @returns {Array<Array>} Array of batches */ _createBatches(items) { const batches = []; for (let i = 0; i < items.length; i += this.options.batchSize) { batches.push(items.slice(i, i + this.options.batchSize)); } return batches; } /** * Utility function to pause execution * @private * @param {number} ms - Milliseconds to sleep * @returns {Promise<void>} */ _sleep(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } /** * Returns current processing statistics * @returns {Object} Current processing stats including success rate * @property {number} processed - Number of successfully processed items * @property {number} failed - Number of failed items * @property {number} retried - Number of retried items * @property {number} total - Total number of items * @property {string} successRate - Percentage of successful processing */ getStats() { return { ...this.stats, successRate: this.stats.total ? (this.stats.processed / this.stats.total * 100).toFixed(2) + '%' : '0%' }; } } export default BatchProcessor; ``` # core/CacheManager.js ```js import debug from 'debug'; const log = debug('mongodb-rag:cache'); /** * Manages an in-memory cache with TTL and size limits * Provides caching functionality with automatic expiration and eviction */ class CacheManager { /** * Creates a new cache manager instance * @param {Object} options - Configuration options * @param {number} [options.ttl=3600] - Time-to-live in seconds (default 1 hour) * @param {number} [options.maxSize=1000] - Maximum number of entries in cache */ constructor(options = {}) { this.options = { ttl: options.ttl || 3600, // 1 hour default TTL maxSize: options.maxSize || 1000, ...options }; this.cache = new Map(); this.keyTimestamps = new Map(); this.stats = { hits: 0, misses: 0, evictions: 0 }; } /** * Retrieves a value from cache * @param {string} key - Cache key to lookup * @returns {Promise<*|null>} Cached value or null if not found/expired */ async get(key) { const entry = this.cache.get(key); if (!entry) { this.stats.misses++; return null; } if (this._isExpired(key)) { this.delete(key); this.stats.misses++; return null; } this.stats.hits++; return entry.value; } /** * Stores a value in cache * @param {string} key - Cache key * @param {*} value - Value to store * @returns {Promise<boolean>} True if value was stored successfully */ async set(key, value) { if (this.cache.size >= this.options.maxSize) { this._evictOldest(); } this.cache.set(key, { value, timestamp: Date.now() }); this.keyTimestamps.set(key, Date.now()); return true; } /** * Removes an entry from cache * @param {string} key - Cache key to remove * @returns {Promise<void>} */ async delete(key) { this.cache.delete(key); this.keyTimestamps.delete(key); } /** * Clears all entries from cache and resets statistics * @returns {Promise<void>} */ async clear() { this.cache.clear(); this.keyTimestamps.clear(); this.stats = { hits: 0, misses: 0, evictions: 0 }; } /** * Returns current cache statistics * @returns {Object} Cache statistics * @property {number} hits - Number of cache hits * @property {number} misses - Number of cache misses * @property {number} evictions - Number of evicted entries * @property {number} size - Current number of entries * @property {number} maxSize - Maximum allowed entries * @property {number} hitRate - Cache hit rate (0-1) */ getStats() { return { ...this.stats, size: this.cache.size, maxSize: this.options.maxSize, hitRate: this._calculateHitRate() }; } /** * Checks if a cache entry has expired * @private * @param {string} key - Cache key to check * @returns {boolean} True if entry has expired */ _isExpired(key) { const timestamp = this.keyTimestamps.get(key); if (!timestamp) return true; const age = Date.now() - timestamp; return age > this.options.ttl * 1000; } /** * Removes the oldest entry from cache * @private */ _evictOldest() { const oldestKey = Array.from(this.keyTimestamps.entries()) .sort(([, a], [, b]) => a - b)[0]?.[0]; if (oldestKey) { this.delete(oldestKey); this.stats.evictions++; } } /** * Calculates the cache hit rate * @private * @returns {number} Hit rate between 0 and 1 */ _calculateHitRate() { const total = this.stats.hits + this.stats.misses; return total === 0 ? 0 : this.stats.hits / total; } /** * Creates a deterministic cache key from a value * @static * @param {string} type - Type prefix for the key * @param {*} value - Value to create key from * @returns {string} Base64-encoded cache key */ static createKey(type, value) { const hash = Buffer.from(JSON.stringify(value)) .toString('base64') .replace(/[/+=]/g, '_'); return `${type}:${hash}`; } } export default CacheManager; ``` # core/chunker.js ```js // chunker.js import debug from 'debug'; import natural from 'natural'; const log = debug('mongodb-rag:chunker'); class Chunker { constructor(options = {}) { this.options = { strategy: options.strategy || 'sliding', maxChunkSize: options.maxChunkSize || 500, overlap: options.overlap || 50, splitter: options.splitter || 'sentence' }; } async chunkDocument(document) { log(`Chunking document ${document.id} using ${this.options.strategy} strategy`); switch (this.options.strategy) { case 'sliding': return this.slidingWindowChunk(document); case 'semantic': return this.semanticChunk(document); case 'recursive': return this.recursiveChunk(document); default: throw new Error(`Unknown chunking strategy: ${this.options.strategy}`); } } slidingWindowChunk(document) { const text = document.content; const chunks = []; const sentences = this.splitIntoSentences(text); let currentChunk = []; let currentLength = 0; for (const sentence of sentences) { const sentenceLength = sentence.length; if (currentLength + sentenceLength > this.options.maxChunkSize && currentChunk.length > 0) { chunks.push(this.createChunk(document, currentChunk.join(' '))); const overlapSentences = this.calculateOverlap(currentChunk); currentChunk = overlapSentences; currentLength = overlapSentences.join(' ').length; } currentChunk.push(sentence); currentLength += sentenceLength; } if (currentChunk.length > 0) { chunks.push(this.createChunk(document, currentChunk.join(' '))); } log(`Created ${chunks.length} chunks for document ${document.id}`); return chunks; } semanticChunk(document) { const text = document.content; const tokenizer = new natural.SentenceTokenizer(); const sentences = tokenizer.tokenize(text); const chunks = []; let currentChunk = []; let currentLength = 0; for (const sentence of sentences) { const sentenceLength = sentence.length; if (currentLength + sentenceLength > this.options.maxChunkSize && currentChunk.length > 0) { chunks.push(this.createChunk(document, currentChunk.join(' '))); currentChunk = []; currentLength = 0; } currentChunk.push(sentence); currentLength += sentenceLength; } if (currentChunk.length > 0) { chunks.push(this.createChunk(document, currentChunk.join(' '))); } log(`Created ${chunks.length} semantic chunks for document ${document.id}`); return chunks; } recursiveChunk(document) { const text = document.content; const chunks = []; const sections = text.split(/\n\s*\n/); // Split based on paragraphs for (const section of sections) { if (section.length <= this.options.maxChunkSize) { chunks.push(this.createChunk(document, section)); } else { // If a section is too large, break it into sentences const sentences = this.splitIntoSentences(section); let currentChunk = []; let currentLength = 0; for (const sentence of sentences) { const sentenceLength = sentence.length; if (currentLength + sentenceLength > this.options.maxChunkSize && currentChunk.length > 0) { chunks.push(this.createChunk(document, currentChunk.join(' '))); currentChunk = []; currentLength = 0; } currentChunk.push(sentence); currentLength += sentenceLength; } if (currentChunk.length > 0) { chunks.push(this.createChunk(document, currentChunk.join(' '))); } } } log(`Created ${chunks.length} recursive chunks for document ${document.id}`); return chunks; } splitIntoSentences(text) { return text .replace(/([.!?])\s+/g, '$1\n') .split('\n') .map(s => s.trim()) .filter(s => s.length > 0); } calculateOverlap(sentences) { const overlapTokenCount = Math.ceil(sentences.length * (this.options.overlap / 100)); return sentences.slice(-overlapTokenCount); } createChunk(document, content) { return { documentId: document.id, content: content, metadata: { ...document.metadata, chunkIndex: Date.now(), strategy: this.options.strategy } }; } } export default Chunker; ``` # core/IndexManager.js ```js // src/core/IndexManager.js import debug from 'debug'; const log = debug('mongodb-rag:index'); class IndexManager { constructor(collection, config = {}) { this.collection = collection; this.options = { indexName: config.indexName || 'vector_index', dimensions: config.embedding?.dimensions || 1536, similarity: config.search?.similarity || 'cosine', embeddingPath: config.embeddingFieldPath || 'embedding', ...config }; } async ensureIndexes() { try { console.log('Checking existing indexes...'); const existingIndexes = await this.collection.listIndexes().toArray(); const hasVectorIndex = existingIndexes.some(index => index.name === this.options.indexName); if (!hasVectorIndex) { console.log('Creating missing vector search index...'); const indexDefinition = { name: this.options.indexName, type: 'vectorSearch', definition: { fields: [ { type: 'vector', path: this.options.embeddingPath, numDimensions: this.options.dimensions, similarity: this.options.similarity, quantization: 'none' } ] } }; await this.collection.createSearchIndex(indexDefinition); console.log(`Vector search index '${this.options.indexName}' created successfully.`); } else { console.log(`Vector search index '${this.options.indexName}' already exists.`); } // Ensure supporting metadata indexes exist await this.createSupportingIndexes();