mongodb-rag
Version:
RAG (Retrieval Augmented Generation) library for MongoDB Vector Search
1,712 lines (1,426 loc) ⢠123 kB
Markdown
# cli/asciiLogo.js
```js
// src/cli/asciiLogo.js
import chalk from 'chalk';
export function displayLogo() {
// Using template literals for better readability
const logo = `
###%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%###
##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%##
##%%%%%%%%%%%%%%%%%%##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%#%%%%%%%%%%%%%%%##
#%%%%%%%%%%%%%%%%%%%*:-*#%%%%%%%%%%%%%##*+-::........:-=*##%%%%%%%%%%%%%##=.*%%%%%%%%%%%%%%%%#
@#%%%%%%%%%%%%%%%%%%%%+...:=*#%%%%%%#+-:....................:+#%%%%%%%#+-:..=%%%%%%%%%%%%%%%%%#@
@#%%%%%%%%%%%%%%%%%%%%%#-.....:-*#%%%%#+-......:.:::::....-+#%%%%%*=:.....:*%%%%%%%%%%%%%%%%%%#@
@#%%%%%%%%%%%%%%%%%%%%%%%#=:::::::.-*%%%%%*=:::::::::::-*%%%%%*-:::::::::+#%%%%%%%%%%%%%%%%%%%#@
@#%%%%%%%%%%%%%%%%%%%%%%%%%%*=-::::::::=#@%%%+-::::::=#%%%#+-::::::::-+##%%%%%%%%%%%%%%%%%%%%%#@
@#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%#+-:::::::+%%%#+:::-#%%%+-:::::::=*#%%%%%%%%%%%%%%%%%%%%%%%%%%#@
@#%%%%%%%%%%%%%%%%%%%%%%%#==*%%@@@@%#+-:::::+%%%*-+%%%+:--::-=*%%%%%%%*==#%%%%%%%%%%%%%%%%%%%%#@
@#%%%%%%%%%%%%%%%%%%%%%%#-::::+%@@@@@@%#+-----#%%%%%#=----=*%%%%%%%%*-::::*%%%%%%%%%%%%%%%%%%%#@
@#%%%%%%%%%%%%%%%%%%%%%#=:::=%%@%+**#%@%%%+----*@%%#----=#%%%%#*++#@%%+-::-*%%%%%%%%%%%%%%%%%%#@
@#%%%%%%%%%%%%%%%%%%%%%+---=%%%+-====*%%@%%%=---*@*----*%%%%#*=----=%%%+--:=#%%%%%%%%%%%%%%%%%#@
@#%%%%%%%%%%%%%%%%%%%%#----#%%=-===+%#:.-#@%%+---=---+%%%#-..+%+----=%%#=---*%%%%%%%%%%%%%%%%%#@
@#%%%%%%%%%%%%%%%%%%%%*----%%#-====#%%-.:#@%%%*=---=#%%%%%+..+@%+----#%%=---+%%%%%%%%%%%%%%%%%#@
@#%%%%%%%%%%%%%%%%%%%%*----#%#=====#%%%@%%%%@%%%%%%%%%%#%%%@@%%%+====*%%=---+%%%%%%%%%%%%%%%%%#@
MongoDB RAG - Vector Search Magic
@#%%%%%%%%%%%%%%%%%%%%#====*%%+====+*%%%%#**#%%%%%%%%%****#%%%#+====+%%#==--*%%%%%%%%%%%%%%%%%#@
@#%%%%%%%%%%%%%%%%%%%%%*====#%%++++++++++***#%%+-:-+%%#***++++++++++#%%+====#%%%%%%%%%%%%%%%%%#@
@#%%%%%%%%%%%%%%%%%%%%%#+====%%%#++++******#@%#::::.*%%#***+++++++*%%%+=+==#%%%%%%%%%%%%%%%%%%#@
@#%%%%%%%%%%%%%%%%%%%%%%#++++=*@@%%##**##%%@@@%+:::-%%%@%%##***##%%%#=++++*%%%%%%%%%%%%%%%%%%%#@
@#%%%%%%%%%%%%%%%%%%%%%%%%*++++=+%@@@@@@@@*=+@@%=:-%@@*=+%@@%@@@@@*=+++++#%%%%%%%%%%%%%%%%%%%%#@
@#%%%%%%%%%%%%%%%%%%%%%%%%%#*++***+======++++*@@%+#@%*+++++=====++*++++*%%%%%%%%%%%%%%%%%%%%%%#@
@#%%%%%%%%%%%%%%%%%%%%%%%#@%%%#*************#%@@@%%%%%#**++++++*+++**#%%%%#%%%%%%%%%%%%%%%%%%%#@
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
`;
// Create a banner with the version number
const version = process.env.npm_package_version || '0.15.0';
const banner = `
${chalk.blue(logo)}
${chalk.yellow('š MongoDB RAG CLI')} ${chalk.green(`v${version}`)}
${chalk.cyan('Transform your data into intelligent insights with vector search\n')}`;
console.log(banner);
}
```
# cli/celebration.js
```js
// src/cli/celebration.js
import chalk from 'chalk';
const frames = [
`
š
\\|/
|
/ \\
`,
`
š
/|\\
|
/ \\
`,
`
āØ
\\|/
|
/ \\
`
];
// Create a multi-color border effect
function colorBorder(message) {
const border = '===============================';
return `
${chalk.blue(border)}
${chalk.green(message)}
${chalk.magenta(border)}
`;
}
export function celebrate(message) {
let frameIndex = 0;
const animation = setInterval(() => {
process.stdout.write('\x1B[2J\x1B[3J\x1B[H');
console.log(colorBorder(message));
console.log(chalk.cyan(frames[frameIndex]));
frameIndex = (frameIndex + 1) % frames.length;
}, 200);
// Stop after 2 seconds
setTimeout(() => {
clearInterval(animation);
process.stdout.write('\x1B[2J\x1B[3J\x1B[H');
console.log(colorBorder(message));
}, 2000);
}
```
# cli/createRagApp.js
```js
// src/cli/createRagApp.js
import fs from 'fs';
import path from 'path';
import { execSync } from 'child_process';
import chalk from 'chalk';
import { displayLogo } from './asciiLogo.js';
import MongoSpinner from './spinner.js';
import { celebrate } from './celebration.js';
import FunProgressBar from './progressBar.js';
function generateReadme(projectName) {
return `# ${projectName}

This RAG (Retrieval Augmented Generation) application was created with [mongodb-rag](https://npmjs.com/package/mongodb-rag).
## Getting Started
1. Configure your environment variables in \`.env\`:
- Set your MongoDB connection string
- Configure your embedding provider (OpenAI or Ollama)
- Adjust other settings as needed
2. Install dependencies:
\`\`\`bash
npm install
\`\`\`
3. Start the development server:
\`\`\`bash
npm run dev
\`\`\`
## Available CLI Commands
The mongodb-rag CLI provides several commands to manage your RAG application:
### Configuration
- \`npx mongodb-rag init\` - Initialize configuration
- \`npx mongodb-rag test-connection\` - Test provider connection
- \`npx mongodb-rag show-config\` - Display current configuration
- \`npx mongodb-rag edit-config\` - Modify configuration
### Vector Search Index Management
- \`npx mongodb-rag create-index\` - Create vector search index
- \`npx mongodb-rag show-indexes\` - List all indexes
- \`npx mongodb-rag delete-index\` - Remove an index
### Document Management
- \`npx mongodb-rag ingest --file <path>\` - Ingest a single file
- \`npx mongodb-rag ingest --directory <path>\` - Ingest a directory of files
- Options:
- \`--recursive\` - Process subdirectories
- \`--chunk-size <number>\` - Tokens per chunk
- \`--chunk-overlap <number>\` - Overlap between chunks
- \`--chunk-method <method>\` - Chunking strategy (fixed/recursive/semantic)
### Search
- \`npx mongodb-rag search "your query"\` - Search documents
- Options:
- \`--maxResults <number>\` - Maximum results (default: 5)
- \`--minScore <number>\` - Minimum similarity score (default: 0.7)
## API Endpoints
- POST \`/ingest\` - Ingest documents
- GET \`/search?query=<text>\` - Search documents
- DELETE \`/delete/:id\` - Delete a document
## Documentation
For detailed documentation, visit:
- [MongoDB RAG Documentation](https://mongodb-developer.github.io/mongodb-rag)
- [MongoDB Vector Search](https://www.mongodb.com/docs/atlas/atlas-vector-search/)
## License
This project is licensed under the MIT License.
`;
}
function createBackendFiles(projectPath) {
// Create backend directory structure
fs.mkdirSync(path.join(projectPath, 'backend'), { recursive: true });
fs.mkdirSync(path.join(projectPath, 'backend', 'config'), { recursive: true });
// Create backend package.json
fs.writeFileSync(path.join(projectPath, 'backend', 'package.json'), JSON.stringify({
name: `${path.basename(projectPath)}-backend`,
version: "1.0.0",
type: "module",
scripts: {
"start": "node server.js",
"dev": "nodemon server.js"
},
dependencies: {
"express": "^4.18.2",
"mongodb": "^6.3.0",
"dotenv": "^16.0.3",
"cors": "^2.8.5",
"mongodb-rag": "latest",
"nodemon": "^3.0.2"
}
}, null, 2));
// Create backend config
fs.writeFileSync(path.join(projectPath, 'backend', 'config', 'dbConfig.js'), `
import dotenv from 'dotenv';
dotenv.config();
export const config = {
mongoUrl: process.env.MONGODB_URI,
database: "mongorag",
collection: "documents",
embedding: {
provider: process.env.EMBEDDING_PROVIDER || "openai",
apiKey: process.env.EMBEDDING_API_KEY,
model: process.env.EMBEDDING_MODEL || "text-embedding-3-small",
dimensions: 1536
},
indexName: process.env.VECTOR_INDEX || "default"
};
`);
// Create backend server.js
fs.writeFileSync(path.join(projectPath, 'backend', 'server.js'), `
import express from 'express';
import cors from 'cors';
import { MongoRAG } from 'mongodb-rag';
import { config } from './config/dbConfig.js';
const app = express();
app.use(express.json());
app.use(cors());
const rag = new MongoRAG(config);
// Ingest Documents
app.post('/api/ingest', async (req, res) => {
try {
const { documents } = req.body;
const result = await rag.ingestBatch(documents);
res.json(result);
} catch (error) {
res.status(500).json({ error: error.message });
}
});
// Search Documents
app.get('/api/search', async (req, res) => {
try {
const { query } = req.query;
const results = await rag.search(query, { maxResults: 5 });
res.json(results);
} catch (error) {
res.status(500).json({ error: error.message });
}
});
// Delete a document
app.delete('/api/documents/:id', async (req, res) => {
try {
const col = await rag._getCollection();
await col.deleteOne({ documentId: req.params.id });
res.json({ message: 'Deleted successfully' });
} catch (error) {
res.status(500).json({ error: error.message });
}
});
const PORT = process.env.PORT || 5000;
app.listen(PORT, () => {
console.log(\`š Server running on port \${PORT}\`);
});
`);
}
function createFrontendFiles(projectPath) {
// Create frontend directory structure
fs.mkdirSync(path.join(projectPath, 'frontend'), { recursive: true });
fs.mkdirSync(path.join(projectPath, 'frontend', 'src'), { recursive: true });
fs.mkdirSync(path.join(projectPath, 'frontend', 'src', 'components'), { recursive: true });
fs.mkdirSync(path.join(projectPath, 'frontend', 'public'), { recursive: true });
// Create frontend package.json using Vite
fs.writeFileSync(path.join(projectPath, 'frontend', 'package.json'), JSON.stringify({
name: `${path.basename(projectPath)}-frontend`,
version: "1.0.0",
type: "module",
scripts: {
"dev": "vite",
"build": "vite build",
"preview": "vite preview"
},
dependencies: {
"react": "^18.2.0",
"react-dom": "^18.2.0",
"axios": "^1.6.2"
},
devDependencies: {
"@vitejs/plugin-react": "^4.2.1",
"vite": "^5.0.8"
}
}, null, 2));
// Create frontend components
const componentFiles = {
'Header.jsx': `
export function Header() {
return (
<header className="header">
<h1>MongoDB RAG Application</h1>
</header>
);
}`,
'Chatbot.jsx': `
import { useState } from 'react';
import axios from 'axios';
export function Chatbot() {
const [query, setQuery] = useState('');
const [results, setResults] = useState([]);
const [loading, setLoading] = useState(false);
const handleSearch = async (e) => {
e.preventDefault();
setLoading(true);
try {
const response = await axios.get(\`http://localhost:5000/api/search?query=\${query}\`);
setResults(response.data);
} catch (error) {
console.error('Search failed:', error);
}
setLoading(false);
};
return (
<div className="chatbot">
<form onSubmit={handleSearch}>
<input
type="text"
value={query}
onChange={(e) => setQuery(e.target.value)}
placeholder="Ask a question..."
/>
<button type="submit" disabled={loading}>
{loading ? 'Searching...' : 'Search'}
</button>
</form>
<div className="results">
{results.map((result, index) => (
<div key={index} className="result">
<p>{result.content}</p>
<small>Score: {result.score}</small>
</div>
))}
</div>
</div>
);
}`,
'Footer.jsx': `
export function Footer() {
return (
<footer className="footer">
<p>Built with <a href="https://npmjs.com/package/mongodb-rag">mongodb-rag</a></p>
</footer>
);
}`
};
// Create each component file
Object.entries(componentFiles).forEach(([filename, content]) => {
fs.writeFileSync(
path.join(projectPath, 'frontend', 'src', 'components', filename),
content
);
});
// Create main App.jsx
fs.writeFileSync(path.join(projectPath, 'frontend', 'src', 'App.jsx'), `
import { Header } from './components/Header';
import { Chatbot } from './components/Chatbot';
import { Footer } from './components/Footer';
import './styles.css';
function App() {
return (
<div className="app">
<Header />
<main>
<Chatbot />
</main>
<Footer />
</div>
);
}
export default App;
`);
// Create styles.css
fs.writeFileSync(path.join(projectPath, 'frontend', 'src', 'styles.css'), `
.app {
max-width: 800px;
margin: 0 auto;
padding: 2rem;
}
.header {
text-align: center;
margin-bottom: 2rem;
}
.chatbot {
margin: 2rem 0;
}
.chatbot form {
display: flex;
gap: 1rem;
margin-bottom: 1rem;
}
.chatbot input {
flex: 1;
padding: 0.5rem;
font-size: 1rem;
}
.chatbot button {
padding: 0.5rem 1rem;
background: #00684A;
color: white;
border: none;
border-radius: 4px;
cursor: pointer;
}
.chatbot button:disabled {
background: #ccc;
}
.results {
margin-top: 2rem;
}
.result {
padding: 1rem;
border: 1px solid #ddd;
margin-bottom: 1rem;
border-radius: 4px;
}
.footer {
text-align: center;
margin-top: 2rem;
padding-top: 1rem;
border-top: 1px solid #ddd;
}
`);
// Create index.html
fs.writeFileSync(path.join(projectPath, 'frontend', 'index.html'), `
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>MongoDB RAG App</title>
</head>
<body>
<div id="root"></div>
<script type="module" src="/src/main.jsx"></script>
</body>
</html>
`);
// Create main.jsx
fs.writeFileSync(path.join(projectPath, 'frontend', 'src', 'main.jsx'), `
import React from 'react'
import ReactDOM from 'react-dom/client'
import App from './App'
ReactDOM.createRoot(document.getElementById('root')).render(
<React.StrictMode>
<App />
</React.StrictMode>,
)
`);
}
export async function createRagApp(projectName) {
const spinner = new MongoSpinner();
const progressBar = new FunProgressBar();
// Display the ASCII logo
displayLogo();
const projectPath = path.resolve(process.cwd(), projectName);
if (fs.existsSync(projectPath)) {
console.error(chalk.red(`Error: Directory "${projectName}" already exists.`));
process.exit(1);
}
console.log(chalk.green(`\nš Creating a new RAG app in ${projectPath}\n`));
// Start the creation process with spinner
spinner.start('Initializing your RAG app');
fs.mkdirSync(projectPath, { recursive: true });
await new Promise(resolve => setTimeout(resolve, 1000));
spinner.stop(true);
// Show progress while creating files
console.log(chalk.cyan('\nPreparing your MongoDB RAG environment...'));
let currentProgress = 0;
progressBar.update(currentProgress);
// Create backend
console.log(chalk.blue('\nš Creating backend...'));
createBackendFiles(projectPath);
// Create frontend
console.log(chalk.blue('\nš Creating frontend...'));
createFrontendFiles(projectPath);
// Create root package.json for workspace
fs.writeFileSync(path.join(projectPath, 'package.json'), JSON.stringify({
name: projectName,
version: "1.0.0",
private: true,
workspaces: ["frontend", "backend"],
scripts: {
"dev": "concurrently \"npm run dev -w frontend\" \"npm run dev -w backend\"",
"build": "npm run build -w frontend",
"start": "npm run start -w backend"
},
devDependencies: {
"concurrently": "^8.2.2"
}
}, null, 2));
// Create root .env
fs.writeFileSync(path.join(projectPath, '.env'), `
MONGODB_URI=mongodb+srv://your_user:your_password@your-cluster.mongodb.net/mongorag
PORT=5000
# Embedding Configuration
EMBEDDING_PROVIDER=openai
EMBEDDING_API_KEY=your-embedding-api-key
EMBEDDING_MODEL=text-embedding-3-small
# MongoDB Vector Search Index
VECTOR_INDEX=default
`);
// Install dependencies
console.log(chalk.blue('\nš¦ Installing dependencies...'));
execSync(`cd ${projectPath} && npm install`, { stdio: 'inherit' });
// Create README
fs.writeFileSync(
path.join(projectPath, 'README.md'),
generateReadme(projectName)
);
// Show success message
celebrate('Full-Stack RAG App Created Successfully! š');
console.log(chalk.green('\nā
Project created successfully!'));
console.log(chalk.yellow('\nNext steps:'));
console.log(chalk.cyan(` 1. cd ${projectName}`));
console.log(chalk.cyan(' 2. Update .env with your MongoDB and API credentials'));
console.log(chalk.cyan(' 3. npm run dev # This will start both frontend and backend'));
}
```
# cli/playground.js
```js
// src/cli/playground.js
import { execSync } from 'child_process';
import fs from 'fs';
import path from 'path';
import express from 'express';
import { fileURLToPath } from 'url';
import open from 'open';
import { MongoRAG } from '../index.js';
import { createServer } from 'http';
import { Server } from 'socket.io';
import multer from 'multer';
import cors from 'cors';
import dotenv from 'dotenv';
import pdfParse from 'pdf-parse/lib/pdf-parse.js';
import detect from 'detect-port'; // Library to find available ports
dotenv.config();
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
// Properly resolve playground UI paths based on how the package is installed
const findPlaygroundUiPath = () => {
// Possible locations for playground-ui based on different installation scenarios
const possiblePaths = [
// When installed as a dependency in node_modules
path.resolve(__dirname, '../../src/playground-ui'),
// When running from source/development environment
path.resolve(__dirname, '../playground-ui'),
// When globally installed
path.resolve(process.env.npm_config_prefix || '/usr/local', 'lib/node_modules/mongodb-rag/src/playground-ui')
];
for (const potentialPath of possiblePaths) {
if (fs.existsSync(potentialPath)) {
// console.log(`ā
Found playground UI at: ${potentialPath}`);
return potentialPath;
}
}
console.warn('ā ļø Could not locate playground-ui directory');
return null;
};
const PLAYGROUND_UI_PATH = findPlaygroundUiPath();
const DEFAULT_BACKEND_PORT = 4000;
const DEFAULT_FRONTEND_PORT = 3000;
// Use environment variables or fallback
let backendPort = process.env.BACKEND_PORT || DEFAULT_BACKEND_PORT;
let playgroundPort = process.env.PLAYGROUND_PORT || DEFAULT_FRONTEND_PORT;
// Function to find an available port if the default is in use
const findAvailablePort = async (preferredPort, defaultPort) => {
const availablePort = await detect(preferredPort);
return availablePort === preferredPort ? preferredPort : await detect(defaultPort);
};
console.log('Current directory:', __dirname);
console.log('Process working directory:', process.cwd());
const upload = multer({ storage: multer.memoryStorage() });
export async function startPlayground() {
const app = express();
const server = createServer(app);
const io = new Server(server, { cors: { origin: '*' } });
app.use(express.json());
app.use(cors());
let mongodbUrl = process.env.MONGODB_URI;
let databaseName = 'playground';
let collectionName = 'documents';
const configPath = path.join(process.cwd(), '.mongodb-rag.json');
console.log("š Looking for config file at:", configPath);
console.log("š Current Working Directory:", process.cwd());
if (fs.existsSync(configPath)) {
try {
const config = JSON.parse(fs.readFileSync(configPath, 'utf-8'));
console.log("ā
Loaded Configuration from File:", config);
mongodbUrl = config.mongoUrl || mongodbUrl;
databaseName = config.database || databaseName;
collectionName = config.collection || collectionName;
console.log("š Loaded MongoDB URL:", mongodbUrl);
console.log("š Loaded Database Name:", databaseName);
console.log("š Loaded Collection Name:", collectionName);
} catch (error) {
console.error("ā Error reading .mongodb-rag.json:", error.message);
}
} else {
console.warn("šØ Config file not found at:", configPath);
}
console.log("š Final MongoDB URL:", mongodbUrl);
console.log("š Final Database Name:", databaseName);
console.log("š Final Collection Name:", collectionName);
// Declare rag here to ensure it's in scope for all routes
let rag = null;
// Initialize RAG with configuration
try {
console.log("š¢ Before initializing MongoRAG:");
console.log(" š MongoDB URL:", mongodbUrl);
console.log(" š Database Name:", databaseName);
console.log(" š Collection Name:", collectionName);
rag = new MongoRAG({
mongoUrl: mongodbUrl,
database: databaseName,
collection: collectionName,
embedding: {
provider: process.env.EMBEDDING_PROVIDER || 'ollama',
apiKey: process.env.EMBEDDING_API_KEY,
model: process.env.EMBEDDING_MODEL || 'llama3',
dimensions: 1536,
baseUrl: process.env.EMBEDDING_BASE_URL || 'http://localhost:11434'
}
});
console.log("ā
MongoRAG Final Config:", JSON.stringify(rag.config, null, 2));
console.log("ā
After initializing MongoRAG:");
console.log(" š Database in rag.config:", rag.config.database);
console.log(" š Collection in rag.config:", rag.config.collection);
await rag.connect();
console.log('ā
Successfully connected to MongoDB');
} catch (error) {
console.error("ā ļø MongoDB Connection Error:", error.message);
console.info("ā¹ļø Playground will start with limited functionality");
}
// API endpoints...
app.post('/api/save-config', (req, res) => {
fs.writeFileSync('.mongodb-rag.json', JSON.stringify(req.body, null, 2));
res.json({ message: "Configuration saved successfully!" });
});
app.get("/api/config", (req, res) => {
if (!rag) {
return res.status(503).json({ error: "MongoDB connection not available" });
}
res.json({
mongoUrl: rag.config.mongoUrl || "Unknown",
database: rag.config.database || "Unknown",
collection: rag.config.collection || "Unknown",
provider: rag.config.embedding.provider || "Unknown",
apiKey: rag.config.embedding.apiKey || "Unknown",
model: rag.config.embedding.model || "Unknown",
dimensions: rag.config.embedding.dimensions || 1536,
batchSize: rag.config.embedding.batchSize || 100,
maxResults: rag.config.search.maxResults || 5,
minScore: rag.config.search.minScore || 0.7,
indexName: rag.config.indexName || "vector_index",
embeddingFieldPath: rag.config.embeddingFieldPath || "embedding"
});
});
app.post("/api/config", (req, res) => {
const newConfig = req.body;
// Update rag.config
rag.config.database = newConfig.database;
rag.config.collection = newConfig.collection;
rag.config.embeddingFieldPath = newConfig.embeddingFieldPath || "embedding";
rag.config.indexName = newConfig.indexName;
// Save to file (so changes persist)
fs.writeFileSync(".mongodb-rag.json", JSON.stringify(newConfig, null, 2));
res.json(newConfig);
});
// Add this new endpoint for creating vector search indexes
app.post('/api/indexes/create', async (req, res) => {
const existingIndexes = await collection.listIndexes().toArray();
if (existingIndexes.some(idx => idx.name === name)) {
return res.status(400).json({ error: "Index already exists" });
}
if (!rag) {
return res.status(503).json({
error: "MongoDB connection not available",
message: "Please check your MongoDB connection settings and try again"
});
}
try {
const { name, dimensions, path, similarity } = req.body;
const client = await rag.getClient();
const collection = client.db(rag.database).collection(rag.collection);
const indexConfig = {
name: name || "vector_index",
type: "vectorSearch",
definition: {
fields: [{
type: "vector",
path: path || "embedding",
numDimensions: dimensions || 1536,
similarity: similarity || "cosine"
}]
}
};
const result = await collection.createSearchIndex(indexConfig);
res.json({
success: true,
message: "Vector search index created successfully",
indexName: indexConfig.name,
result
});
} catch (error) {
console.error('Error creating index:', error);
res.status(500).json({ error: error.message });
}
});
// Add this endpoint to fetch documents
app.get('/api/documents', async (req, res) => {
if (!rag) {
return res.status(503).json({ error: "MongoDB connection not available" });
}
try {
const client = await rag.getClient();
const collection = client.db(rag.config.database).collection(rag.config.collection);
const documents = await collection.find({}).toArray();
res.json({ documents });
} catch (error) {
console.error('Error fetching documents:', error);
res.status(500).json({ error: error.message });
}
});
// Add this endpoint to fetch indexes
app.get('/api/indexes', async (req, res) => {
if (!rag) {
return res.status(503).json({ error: "MongoDB connection not available" });
}
try {
const client = await rag.getClient();
console.log("Using database for indexes:", rag.config.database);
console.log("Using collection for indexes:", rag.config.collection);
const collection = client.db(rag.config.database).collection(rag.config.collection);
// Fetch regular indexes
console.log("Fetching regular indexes...");
const regularIndexes = await collection.listIndexes().toArray();
console.log("Regular indexes fetched:", regularIndexes);
// Fetch search indexes
console.log("Fetching search indexes...");
const searchIndexes = await collection.aggregate([
{ $listSearchIndexes: {} }
]).toArray();
console.log("Search indexes fetched:", searchIndexes);
res.json({ regularIndexes, searchIndexes });
} catch (error) {
console.error('Error fetching indexes:', error);
res.status(500).json({ error: error.message });
}
});
// Add this endpoint to handle search queries
app.post('/api/search', async (req, res) => {
if (!rag) {
return res.status(503).json({ error: "MongoDB connection not available" });
}
const { query } = req.body;
try {
const client = await rag.getClient();
const collection = client.db(rag.config.database).collection(rag.config.collection);
// Perform a search using the query
const results = await collection.find({ $text: { $search: query } }).toArray();
res.json({ results });
} catch (error) {
console.error('Error performing search:', error);
res.status(500).json({ error: error.message });
}
});
io.on('connection', (socket) => {
socket.on('disconnect', () => {
console.log('User disconnected');
});
});
// Start the backend server
backendPort = await findAvailablePort(backendPort, DEFAULT_BACKEND_PORT);
server.listen(backendPort, () => {
console.log(`š Playground backend running at http://localhost:${backendPort}`);
});
// Serve the React UI with proper path resolution and build process
if (PLAYGROUND_UI_PATH) {
const frontendBuildPath = path.join(PLAYGROUND_UI_PATH, 'build');
const frontendDistPath = path.join(PLAYGROUND_UI_PATH, 'dist');
// Determine which directory to use (build or dist)
let uiBuildPath = fs.existsSync(frontendBuildPath) ? frontendBuildPath :
(fs.existsSync(frontendDistPath) ? frontendDistPath : null);
if (!uiBuildPath) {
console.warn("ā ļø Frontend build not found. Attempting to build...");
try {
// Check if package.json exists in the playground-ui directory
const packageJsonPath = path.join(PLAYGROUND_UI_PATH, 'package.json');
if (!fs.existsSync(packageJsonPath)) {
throw new Error("package.json not found in playground-ui directory");
}
// Attempt to install dependencies and build
execSync(`cd "${PLAYGROUND_UI_PATH}" && npm install && npm run build`, {
stdio: 'inherit',
timeout: 300000 // 5 minute timeout for build process
});
// Re-check build paths after building
uiBuildPath = fs.existsSync(frontendBuildPath) ? frontendBuildPath :
(fs.existsSync(frontendDistPath) ? frontendDistPath : null);
if (!uiBuildPath) {
throw new Error("Build completed but build directory not found");
}
} catch (error) {
console.error(`ā ļø Failed to build frontend: ${error.message}`);
console.info("ā¹ļø Starting in API-only mode");
return; // Exit frontend setup but keep backend running
}
}
const uiApp = express();
uiApp.use(express.static(uiBuildPath));
uiApp.get('*', (req, res) => {
res.sendFile(path.join(uiBuildPath, 'index.html'));
});
// Start the frontend server
playgroundPort = await findAvailablePort(playgroundPort, DEFAULT_FRONTEND_PORT);
uiApp.listen(playgroundPort, () => {
console.log(`š Playground UI running at http://localhost:${playgroundPort}`);
open(`http://localhost:${playgroundPort}`);
});
} else {
console.warn("ā ļø Playground UI components not found. Running in API-only mode.");
console.info("ā¹ļø You can still use the API endpoints at http://localhost:" + backendPort);
}
}
```
# cli/progressBar.js
```js
// src/cli/progressBar.js
import chalk from 'chalk';
const funFacts = [
"Did you know? Vector search helps find similar items even if they use different words!",
"MongoDB Atlas Vector Search uses cosine similarity by default š",
"RAG helps combine the power of vector search with your own data š",
"Vector embeddings can capture semantic meaning beyond keywords šÆ",
"MongoDB can handle billions of vectors efficiently! š",
"Vector search is like giving your database a human-like understanding š§ "
];
class FunProgressBar {
constructor() {
this.width = 40;
this.currentFact = 0;
}
update(progress) {
const filled = Math.round(this.width * progress);
const empty = this.width - filled;
const filledBar = 'ā'.repeat(filled);
const emptyBar = 'ā'.repeat(empty);
process.stdout.clearLine();
process.stdout.cursorTo(0);
const percentage = Math.round(progress * 100);
process.stdout.write(
chalk.blue(`[${filledBar}${emptyBar}] ${percentage}%\n`) +
chalk.yellow(`Fun Fact: ${funFacts[this.currentFact]}\n`)
);
this.currentFact = (this.currentFact + 1) % funFacts.length;
}
}
export default FunProgressBar;
```
# cli/spinner.js
```js
// src/cli/spinner.js
import chalk from 'chalk';
class MongoSpinner {
constructor() {
this.frames = [
'š ā ā ā ',
'š ā ā ā ',
'š ā ā ā ',
'š ā ā ā '
];
this.messages = [
'Preparing vector magic...',
'Calculating embeddings...',
'Optimizing search space...',
'Almost there...'
];
this.frameIndex = 0;
this.interval = null;
this.currentMessage = '';
this.isEnabled = this._checkEnabled();
}
_checkEnabled() {
// Check for required properties and methods
const requiredMethods = {
clearLine: process.stdout.clearLine,
cursorTo: process.stdout.cursorTo,
write: process.stdout.write
};
// Verify all methods exist and are functions
const hasRequiredMethods = Object.values(requiredMethods)
.every(method => method && typeof method === 'function');
return Boolean(
process.stdout.isTTY &&
!process.env.NO_SPINNER &&
!process.env.CI &&
hasRequiredMethods
);
}
start(text = '') {
this.currentMessage = text;
if (!this.isEnabled) {
// In non-interactive mode, just log once
this._fallbackLog(this.messages[0], text);
return;
}
process.stdout.write('\n');
this.interval = setInterval(() => {
const frame = this.frames[this.frameIndex];
const message = this.messages[this.frameIndex];
process.stdout.clearLine();
process.stdout.cursorTo(0);
process.stdout.write(
chalk.blue(frame) + ' ' +
chalk.cyan(message) + ' ' +
chalk.yellow(this.currentMessage)
);
this.frameIndex = (this.frameIndex + 1) % this.frames.length;
}, 800);
}
updateMessage(text) {
this.currentMessage = text;
if (!this.isEnabled) {
this._fallbackLog(text);
return;
}
}
stop(success = true) {
if (this.interval) {
clearInterval(this.interval);
this.interval = null;
}
if (this.isEnabled) {
process.stdout.clearLine();
process.stdout.cursorTo(0);
if (success) {
process.stdout.write(
chalk.green('⨠Vector magic complete! ') +
chalk.blue('š\n')
);
} else {
process.stdout.write(
chalk.red('ā Operation failed ') +
chalk.yellow('š¢\n')
);
}
}
}
_fallbackLog(...messages) {
// Use plain console.log for non-interactive environments
console.log(...messages);
}
}
export default MongoSpinner;
```
# core/BatchProcessor.js
```js
import debug from 'debug';
import EventEmitter from 'events';
const log = debug('mongodb-rag:batch');
/**
* Handles batch processing of items with retry logic and concurrency control
* @extends EventEmitter
* @fires BatchProcessor#progress
* @fires BatchProcessor#batchError
*/
class BatchProcessor extends EventEmitter {
/**
* Creates a new batch processor instance
* @param {Object} options - Configuration options
* @param {number} [options.batchSize=100] - Number of items to process in each batch
* @param {number} [options.concurrency=2] - Number of batches to process concurrently
* @param {number} [options.retryAttempts=3] - Maximum number of retry attempts per batch
* @param {number} [options.retryDelay=1000] - Delay between retries in milliseconds
*/
constructor(options = {}) {
super();
this.options = {
batchSize: options.batchSize || 100,
concurrency: options.concurrency || 2,
retryAttempts: options.retryAttempts || 3,
retryDelay: options.retryDelay || 1000,
...options
};
this.stats = {
processed: 0,
failed: 0,
retried: 0,
total: 0
};
}
/**
* Processes a single batch of items with retry logic
* @param {Array} items - Items to process in this batch
* @param {Function} processor - Function to process the items
* @returns {Promise<{results: Array, errors: Array}>} Results and errors from processing
* @fires BatchProcessor#batchError
*/
async processBatch(items, processor) {
const results = [];
const errors = [];
try {
log(`Processing batch of ${items.length} items`);
for (let attempt = 1; attempt <= this.options.retryAttempts; attempt++) {
try {
const batchResults = await processor(items);
results.push(...batchResults);
this.stats.processed += items.length;
break;
} catch (error) {
if (attempt === this.options.retryAttempts) {
log(`Batch failed after ${attempt} attempts`);
this.stats.failed += items.length;
errors.push({ items, error });
throw error;
}
this.stats.retried += items.length;
log(`Retry attempt ${attempt} after error: ${error.message}`);
await this._sleep(this.options.retryDelay * attempt);
}
}
} catch (error) {
this.emit('batchError', { items, error });
}
return { results, errors };
}
/**
* Processes all items in batches with concurrency control
* @param {Array} items - All items to process
* @param {Function} processor - Function to process each batch
* @returns {Promise<{results: Array, errors: Array}>} Combined results and errors from all batches
* @fires BatchProcessor#progress
*/
async process(items, processor) {
this.stats.total = items.length;
const batches = this._createBatches(items);
const results = [];
const errors = [];
// Process batches with concurrency control
for (let i = 0; i < batches.length; i += this.options.concurrency) {
const batchPromises = batches
.slice(i, i + this.options.concurrency)
.map(batch => this.processBatch(batch, processor));
const batchResults = await Promise.allSettled(batchPromises);
batchResults.forEach((result, index) => {
if (result.status === 'fulfilled') {
results.push(...result.value.results);
errors.push(...result.value.errors);
} else {
const failedBatch = batches[i + index];
errors.push({
items: failedBatch,
error: result.reason
});
}
});
/**
* Progress event
* @event BatchProcessor#progress
* @type {Object}
* @property {number} processed - Number of successfully processed items
* @property {number} failed - Number of failed items
* @property {number} total - Total number of items
* @property {number} percent - Percentage of completion
*/
this.emit('progress', {
processed: this.stats.processed,
failed: this.stats.failed,
total: this.stats.total,
percent: Math.round((this.stats.processed + this.stats.failed) / this.stats.total * 100)
});
}
return { results, errors };
}
/**
* Splits array of items into batches
* @private
* @param {Array} items - Items to split into batches
* @returns {Array<Array>} Array of batches
*/
_createBatches(items) {
const batches = [];
for (let i = 0; i < items.length; i += this.options.batchSize) {
batches.push(items.slice(i, i + this.options.batchSize));
}
return batches;
}
/**
* Utility function to pause execution
* @private
* @param {number} ms - Milliseconds to sleep
* @returns {Promise<void>}
*/
_sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
/**
* Returns current processing statistics
* @returns {Object} Current processing stats including success rate
* @property {number} processed - Number of successfully processed items
* @property {number} failed - Number of failed items
* @property {number} retried - Number of retried items
* @property {number} total - Total number of items
* @property {string} successRate - Percentage of successful processing
*/
getStats() {
return {
...this.stats,
successRate: this.stats.total ?
(this.stats.processed / this.stats.total * 100).toFixed(2) + '%' :
'0%'
};
}
}
export default BatchProcessor;
```
# core/CacheManager.js
```js
import debug from 'debug';
const log = debug('mongodb-rag:cache');
/**
* Manages an in-memory cache with TTL and size limits
* Provides caching functionality with automatic expiration and eviction
*/
class CacheManager {
/**
* Creates a new cache manager instance
* @param {Object} options - Configuration options
* @param {number} [options.ttl=3600] - Time-to-live in seconds (default 1 hour)
* @param {number} [options.maxSize=1000] - Maximum number of entries in cache
*/
constructor(options = {}) {
this.options = {
ttl: options.ttl || 3600, // 1 hour default TTL
maxSize: options.maxSize || 1000,
...options
};
this.cache = new Map();
this.keyTimestamps = new Map();
this.stats = {
hits: 0,
misses: 0,
evictions: 0
};
}
/**
* Retrieves a value from cache
* @param {string} key - Cache key to lookup
* @returns {Promise<*|null>} Cached value or null if not found/expired
*/
async get(key) {
const entry = this.cache.get(key);
if (!entry) {
this.stats.misses++;
return null;
}
if (this._isExpired(key)) {
this.delete(key);
this.stats.misses++;
return null;
}
this.stats.hits++;
return entry.value;
}
/**
* Stores a value in cache
* @param {string} key - Cache key
* @param {*} value - Value to store
* @returns {Promise<boolean>} True if value was stored successfully
*/
async set(key, value) {
if (this.cache.size >= this.options.maxSize) {
this._evictOldest();
}
this.cache.set(key, {
value,
timestamp: Date.now()
});
this.keyTimestamps.set(key, Date.now());
return true;
}
/**
* Removes an entry from cache
* @param {string} key - Cache key to remove
* @returns {Promise<void>}
*/
async delete(key) {
this.cache.delete(key);
this.keyTimestamps.delete(key);
}
/**
* Clears all entries from cache and resets statistics
* @returns {Promise<void>}
*/
async clear() {
this.cache.clear();
this.keyTimestamps.clear();
this.stats = {
hits: 0,
misses: 0,
evictions: 0
};
}
/**
* Returns current cache statistics
* @returns {Object} Cache statistics
* @property {number} hits - Number of cache hits
* @property {number} misses - Number of cache misses
* @property {number} evictions - Number of evicted entries
* @property {number} size - Current number of entries
* @property {number} maxSize - Maximum allowed entries
* @property {number} hitRate - Cache hit rate (0-1)
*/
getStats() {
return {
...this.stats,
size: this.cache.size,
maxSize: this.options.maxSize,
hitRate: this._calculateHitRate()
};
}
/**
* Checks if a cache entry has expired
* @private
* @param {string} key - Cache key to check
* @returns {boolean} True if entry has expired
*/
_isExpired(key) {
const timestamp = this.keyTimestamps.get(key);
if (!timestamp) return true;
const age = Date.now() - timestamp;
return age > this.options.ttl * 1000;
}
/**
* Removes the oldest entry from cache
* @private
*/
_evictOldest() {
const oldestKey = Array.from(this.keyTimestamps.entries())
.sort(([, a], [, b]) => a - b)[0]?.[0];
if (oldestKey) {
this.delete(oldestKey);
this.stats.evictions++;
}
}
/**
* Calculates the cache hit rate
* @private
* @returns {number} Hit rate between 0 and 1
*/
_calculateHitRate() {
const total = this.stats.hits + this.stats.misses;
return total === 0 ? 0 : this.stats.hits / total;
}
/**
* Creates a deterministic cache key from a value
* @static
* @param {string} type - Type prefix for the key
* @param {*} value - Value to create key from
* @returns {string} Base64-encoded cache key
*/
static createKey(type, value) {
const hash = Buffer.from(JSON.stringify(value))
.toString('base64')
.replace(/[/+=]/g, '_');
return `${type}:${hash}`;
}
}
export default CacheManager;
```
# core/chunker.js
```js
// chunker.js
import debug from 'debug';
import natural from 'natural';
const log = debug('mongodb-rag:chunker');
class Chunker {
constructor(options = {}) {
this.options = {
strategy: options.strategy || 'sliding',
maxChunkSize: options.maxChunkSize || 500,
overlap: options.overlap || 50,
splitter: options.splitter || 'sentence'
};
}
async chunkDocument(document) {
log(`Chunking document ${document.id} using ${this.options.strategy} strategy`);
switch (this.options.strategy) {
case 'sliding':
return this.slidingWindowChunk(document);
case 'semantic':
return this.semanticChunk(document);
case 'recursive':
return this.recursiveChunk(document);
default:
throw new Error(`Unknown chunking strategy: ${this.options.strategy}`);
}
}
slidingWindowChunk(document) {
const text = document.content;
const chunks = [];
const sentences = this.splitIntoSentences(text);
let currentChunk = [];
let currentLength = 0;
for (const sentence of sentences) {
const sentenceLength = sentence.length;
if (currentLength + sentenceLength > this.options.maxChunkSize && currentChunk.length > 0) {
chunks.push(this.createChunk(document, currentChunk.join(' ')));
const overlapSentences = this.calculateOverlap(currentChunk);
currentChunk = overlapSentences;
currentLength = overlapSentences.join(' ').length;
}
currentChunk.push(sentence);
currentLength += sentenceLength;
}
if (currentChunk.length > 0) {
chunks.push(this.createChunk(document, currentChunk.join(' ')));
}
log(`Created ${chunks.length} chunks for document ${document.id}`);
return chunks;
}
semanticChunk(document) {
const text = document.content;
const tokenizer = new natural.SentenceTokenizer();
const sentences = tokenizer.tokenize(text);
const chunks = [];
let currentChunk = [];
let currentLength = 0;
for (const sentence of sentences) {
const sentenceLength = sentence.length;
if (currentLength + sentenceLength > this.options.maxChunkSize && currentChunk.length > 0) {
chunks.push(this.createChunk(document, currentChunk.join(' ')));
currentChunk = [];
currentLength = 0;
}
currentChunk.push(sentence);
currentLength += sentenceLength;
}
if (currentChunk.length > 0) {
chunks.push(this.createChunk(document, currentChunk.join(' ')));
}
log(`Created ${chunks.length} semantic chunks for document ${document.id}`);
return chunks;
}
recursiveChunk(document) {
const text = document.content;
const chunks = [];
const sections = text.split(/\n\s*\n/); // Split based on paragraphs
for (const section of sections) {
if (section.length <= this.options.maxChunkSize) {
chunks.push(this.createChunk(document, section));
} else {
// If a section is too large, break it into sentences
const sentences = this.splitIntoSentences(section);
let currentChunk = [];
let currentLength = 0;
for (const sentence of sentences) {
const sentenceLength = sentence.length;
if (currentLength + sentenceLength > this.options.maxChunkSize && currentChunk.length > 0) {
chunks.push(this.createChunk(document, currentChunk.join(' ')));
currentChunk = [];
currentLength = 0;
}
currentChunk.push(sentence);
currentLength += sentenceLength;
}
if (currentChunk.length > 0) {
chunks.push(this.createChunk(document, currentChunk.join(' ')));
}
}
}
log(`Created ${chunks.length} recursive chunks for document ${document.id}`);
return chunks;
}
splitIntoSentences(text) {
return text
.replace(/([.!?])\s+/g, '$1\n')
.split('\n')
.map(s => s.trim())
.filter(s => s.length > 0);
}
calculateOverlap(sentences) {
const overlapTokenCount = Math.ceil(sentences.length * (this.options.overlap / 100));
return sentences.slice(-overlapTokenCount);
}
createChunk(document, content) {
return {
documentId: document.id,
content: content,
metadata: {
...document.metadata,
chunkIndex: Date.now(),
strategy: this.options.strategy
}
};
}
}
export default Chunker;
```
# core/IndexManager.js
```js
// src/core/IndexManager.js
import debug from 'debug';
const log = debug('mongodb-rag:index');
class IndexManager {
constructor(collection, config = {}) {
this.collection = collection;
this.options = {
indexName: config.indexName || 'vector_index',
dimensions: config.embedding?.dimensions || 1536,
similarity: config.search?.similarity || 'cosine',
embeddingPath: config.embeddingFieldPath || 'embedding',
...config
};
}
async ensureIndexes() {
try {
console.log('Checking existing indexes...');
const existingIndexes = await this.collection.listIndexes().toArray();
const hasVectorIndex = existingIndexes.some(index => index.name === this.options.indexName);
if (!hasVectorIndex) {
console.log('Creating missing vector search index...');
const indexDefinition = {
name: this.options.indexName,
type: 'vectorSearch',
definition: {
fields: [
{
type: 'vector',
path: this.options.embeddingPath,
numDimensions: this.options.dimensions,
similarity: this.options.similarity,
quantization: 'none'
}
]
}
};
await this.collection.createSearchIndex(indexDefinition);
console.log(`Vector search index '${this.options.indexName}' created successfully.`);
} else {
console.log(`Vector search index '${this.options.indexName}' already exists.`);
}
// Ensure supporting metadata indexes exist
await this.createSupportingIndexes();