@taiyokimura/rag-mcp
Version:
RAG (Retrieval-Augmented Generation) MCP Server with Supabase and Cohere integration
1,150 lines (1,134 loc) ⢠45.3 kB
JavaScript
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
import { CallToolRequestSchema, ListToolsRequestSchema, } from '@modelcontextprotocol/sdk/types.js';
import { createClient } from '@supabase/supabase-js';
import { CohereClient } from 'cohere-ai';
import * as fs from 'fs';
import * as path from 'path';
import { glob } from 'glob';
import * as mimeTypes from 'mime-types';
import * as dotenv from 'dotenv';
import { execSync } from 'child_process';
// Load environment variables
dotenv.config();
// Environment variables
const SUPABASE_URL = process.env.SUPABASE_URL;
const SUPABASE_ANON_KEY = process.env.SUPABASE_ANON_KEY;
const COHERE_API_KEY = process.env.COHERE_API_KEY;
const MCP_NAME = process.env.MCP_NAME ?? 'rag-mcp';
// Initialize clients
let supabase = null;
let cohere = null;
if (SUPABASE_URL && SUPABASE_ANON_KEY) {
supabase = createClient(SUPABASE_URL, SUPABASE_ANON_KEY);
}
if (COHERE_API_KEY) {
cohere = new CohereClient({
token: COHERE_API_KEY,
});
}
// File processing utilities
const SUPPORTED_EXTENSIONS = [
'.js', '.ts', '.jsx', '.tsx', '.py', '.java', '.cpp', '.c', '.h',
'.cs', '.php', '.rb', '.go', '.rs', '.swift', '.kt', '.scala',
'.html', '.css', '.scss', '.sass', '.less', '.xml', '.json',
'.yaml', '.yml', '.md', '.txt', '.sql', '.sh', '.bash', '.zsh',
'.ps1', '.bat', '.dockerfile', '.gitignore', '.env'
];
const IGNORE_PATTERNS = [
'**/node_modules/**',
'**/build/**',
'**/dist/**',
'**/.git/**',
'**/coverage/**',
'**/*.log',
'**/tmp/**',
'**/temp/**'
];
const CONFIG_DIR = '.rag-mcp';
const CONFIG_FILE = 'repository.yaml';
const SCHEMA_FILE = 'schema.sql';
const LOG_FILE = 'indexing.log';
const SCHEMA_SQL_CONTENT = `-- RAG MCP Database Schema for Supabase
-- This file contains the database schema for the RAG MCP server
-- Please execute these SQL commands in your Supabase SQL editor
-- Enable the vector extension (required for embeddings)
CREATE EXTENSION IF NOT EXISTS vector;
-- Create the repositories table
CREATE TABLE IF NOT EXISTS repositories (
id BIGSERIAL PRIMARY KEY,
repository_id TEXT UNIQUE NOT NULL, -- Git URL or local identifier
repository_root TEXT NOT NULL,
repository_name TEXT,
repository_type TEXT DEFAULT 'git', -- 'git' or 'local'
description TEXT,
last_indexed_at TIMESTAMP WITH TIME ZONE,
total_files INTEGER DEFAULT 0,
total_size_bytes BIGINT DEFAULT 0,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
-- Create the file_embeddings table
CREATE TABLE IF NOT EXISTS file_embeddings (
id BIGSERIAL PRIMARY KEY,
repository_id BIGINT NOT NULL REFERENCES repositories(id) ON DELETE CASCADE,
file_path TEXT NOT NULL, -- Relative path from repository root
absolute_path TEXT NOT NULL, -- Full absolute path to file
content TEXT NOT NULL,
mime_type TEXT,
file_size INTEGER,
embedding vector(1024), -- Cohere embed-english-v3.0 produces 1024-dimensional vectors
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
UNIQUE(repository_id, file_path) -- Prevent duplicates within same repository
);
-- Create indexes for better performance
CREATE INDEX IF NOT EXISTS idx_repositories_repository_id ON repositories(repository_id);
CREATE INDEX IF NOT EXISTS idx_repositories_repository_type ON repositories(repository_type);
CREATE INDEX IF NOT EXISTS idx_repositories_last_indexed_at ON repositories(last_indexed_at);
CREATE INDEX IF NOT EXISTS idx_file_embeddings_repository_id ON file_embeddings(repository_id);
CREATE INDEX IF NOT EXISTS idx_file_embeddings_file_path ON file_embeddings(file_path);
CREATE INDEX IF NOT EXISTS idx_file_embeddings_mime_type ON file_embeddings(mime_type);
CREATE INDEX IF NOT EXISTS idx_file_embeddings_created_at ON file_embeddings(created_at);
-- Create a vector similarity index using HNSW (Hierarchical Navigable Small World)
-- This index significantly improves the performance of similarity searches
CREATE INDEX IF NOT EXISTS idx_file_embeddings_embedding ON file_embeddings
USING hnsw (embedding vector_cosine_ops);
-- Create a function to search for similar files within a specific repository
CREATE OR REPLACE FUNCTION search_similar_files(
query_embedding vector(1024),
target_repository_id text,
match_threshold float DEFAULT 0.7,
match_count int DEFAULT 5
)
RETURNS TABLE (
id bigint,
repository_id bigint,
repository_name text,
repository_root text,
file_path text,
absolute_path text,
content text,
mime_type text,
file_size integer,
similarity float,
created_at timestamp with time zone
)
LANGUAGE plpgsql
AS $$
BEGIN
RETURN QUERY
SELECT
fe.id,
fe.repository_id,
r.repository_name,
r.repository_root,
fe.file_path,
fe.absolute_path,
fe.content,
fe.mime_type,
fe.file_size,
1 - (fe.embedding <=> query_embedding) AS similarity,
fe.created_at
FROM file_embeddings fe
JOIN repositories r ON fe.repository_id = r.id
WHERE r.repository_id = target_repository_id
AND 1 - (fe.embedding <=> query_embedding) > match_threshold
ORDER BY fe.embedding <=> query_embedding
LIMIT match_count;
END;
$$;
-- Create a function to update the updated_at timestamp
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ language 'plpgsql';
-- Create triggers to automatically update the updated_at column
CREATE TRIGGER update_repositories_updated_at
BEFORE UPDATE ON repositories
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
CREATE TRIGGER update_file_embeddings_updated_at
BEFORE UPDATE ON file_embeddings
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
-- Create RLS (Row Level Security) policies if needed
-- Uncomment the following lines if you want to enable RLS
-- ALTER TABLE repositories ENABLE ROW LEVEL SECURITY;
-- ALTER TABLE file_embeddings ENABLE ROW LEVEL SECURITY;
-- Example RLS policy (adjust according to your authentication needs)
-- CREATE POLICY "Users can view their own files" ON file_embeddings
-- FOR SELECT USING (auth.uid() = user_id);
-- Create a view for repository statistics
CREATE OR REPLACE VIEW repository_statistics AS
SELECT
r.id,
r.repository_id,
r.repository_name,
r.repository_root,
r.repository_type,
r.last_indexed_at,
COUNT(fe.id) AS total_files,
COUNT(DISTINCT fe.mime_type) AS unique_mime_types,
COALESCE(SUM(fe.file_size), 0) AS total_size_bytes,
COALESCE(AVG(fe.file_size), 0) AS average_file_size,
MIN(fe.created_at) AS oldest_file,
MAX(fe.created_at) AS newest_file,
r.created_at AS repository_created_at,
r.updated_at AS repository_updated_at
FROM repositories r
LEFT JOIN file_embeddings fe ON r.id = fe.repository_id
GROUP BY r.id, r.repository_id, r.repository_name, r.repository_root,
r.repository_type, r.last_indexed_at, r.created_at, r.updated_at;
-- Grant necessary permissions (adjust according to your needs)
-- GRANT SELECT, INSERT, UPDATE, DELETE ON repositories TO authenticated;
-- GRANT SELECT, INSERT, UPDATE, DELETE ON file_embeddings TO authenticated;
-- GRANT SELECT ON repository_statistics TO authenticated;
-- GRANT USAGE ON SEQUENCE repositories_id_seq TO authenticated;
-- GRANT USAGE ON SEQUENCE file_embeddings_id_seq TO authenticated;
`;
async function ensureConfigDirectory(rootDir) {
const configDir = path.join(rootDir, CONFIG_DIR);
if (!fs.existsSync(configDir)) {
fs.mkdirSync(configDir, { recursive: true });
}
return configDir;
}
async function loadRepositoryConfig(rootDir) {
try {
const configDir = await ensureConfigDirectory(rootDir);
const configPath = path.join(configDir, CONFIG_FILE);
if (!fs.existsSync(configPath)) {
return null;
}
const configContent = fs.readFileSync(configPath, 'utf-8');
// Simple YAML parser for our specific format
const config = {
repositoryId: '',
repositoryName: '',
repositoryType: 'local',
createdAt: ''
};
const lines = configContent.split('\n');
for (const line of lines) {
const trimmed = line.trim();
if (trimmed.startsWith('repositoryId:')) {
config.repositoryId = trimmed.split(':', 2)[1].trim().replace(/['"]/g, '');
}
else if (trimmed.startsWith('repositoryName:')) {
config.repositoryName = trimmed.split(':', 2)[1].trim().replace(/['"]/g, '');
}
else if (trimmed.startsWith('repositoryType:')) {
config.repositoryType = trimmed.split(':', 2)[1].trim().replace(/['"]/g, '');
}
else if (trimmed.startsWith('description:')) {
config.description = trimmed.split(':', 2)[1].trim().replace(/['"]/g, '');
}
else if (trimmed.startsWith('createdAt:')) {
config.createdAt = trimmed.split(':', 2)[1].trim().replace(/['"]/g, '');
}
else if (trimmed.startsWith('lastIndexedAt:')) {
config.lastIndexedAt = trimmed.split(':', 2)[1].trim().replace(/['"]/g, '');
}
}
return config.repositoryId ? config : null;
}
catch (error) {
console.error('Error loading repository config:', error);
return null;
}
}
async function saveSchemaFile(rootDir) {
try {
const configDir = await ensureConfigDirectory(rootDir);
const schemaPath = path.join(configDir, SCHEMA_FILE);
fs.writeFileSync(schemaPath, SCHEMA_SQL_CONTENT, 'utf-8');
return true;
}
catch (error) {
console.error('Error saving schema file:', error);
return false;
}
}
async function saveRepositoryConfig(rootDir, config) {
try {
const configDir = await ensureConfigDirectory(rootDir);
const configPath = path.join(configDir, CONFIG_FILE);
const yamlContent = `# RAG MCP Repository Configuration
# This file identifies this repository for the RAG MCP system
# Do not modify unless you know what you're doing
repositoryId: "${config.repositoryId}"
repositoryName: "${config.repositoryName}"
repositoryType: "${config.repositoryType}"
${config.description ? `description: "${config.description}"` : '# description: "Optional description of this repository"'}
createdAt: "${config.createdAt}"
${config.lastIndexedAt ? `lastIndexedAt: "${config.lastIndexedAt}"` : '# lastIndexedAt: "Will be updated when files are indexed"'}
# Note: This file is automatically managed by RAG MCP
# Repository ID uniquely identifies this project in the database
`;
fs.writeFileSync(configPath, yamlContent, 'utf-8');
// Also create schema.sql file
await saveSchemaFile(rootDir);
// Also create .gitignore entry if in git repository
try {
const gitignorePath = path.join(rootDir, '.gitignore');
let gitignoreContent = '';
if (fs.existsSync(gitignorePath)) {
gitignoreContent = fs.readFileSync(gitignorePath, 'utf-8');
}
if (!gitignoreContent.includes('.rag-mcp/')) {
gitignoreContent += gitignoreContent.endsWith('\n') ? '' : '\n';
gitignoreContent += '# RAG MCP configuration (optional to ignore)\n.rag-mcp/\n';
fs.writeFileSync(gitignorePath, gitignoreContent, 'utf-8');
}
}
catch {
// Ignore gitignore errors
}
return true;
}
catch (error) {
console.error('Error saving repository config:', error);
return false;
}
}
// Logging utilities
class IndexingLogger {
logPath;
logStream = null;
constructor(rootDir) {
const configDir = path.join(rootDir, CONFIG_DIR);
this.logPath = path.join(configDir, LOG_FILE);
}
async init() {
try {
// Ensure config directory exists
const configDir = path.dirname(this.logPath);
if (!fs.existsSync(configDir)) {
fs.mkdirSync(configDir, { recursive: true });
}
// Create log file with timestamp header
const header = `=== RAG MCP Indexing Log ===\nStarted: ${new Date().toISOString()}\n\n`;
fs.writeFileSync(this.logPath, header, 'utf-8');
this.logStream = fs.createWriteStream(this.logPath, { flags: 'a' });
// Open log file in default editor/viewer
this.openLogFile();
}
catch (error) {
console.error('Failed to initialize logging:', error);
}
}
log(message) {
const timestamp = new Date().toISOString();
const logEntry = `[${timestamp}] ${message}\n`;
// Write to console
console.error(message);
// Write to file
if (this.logStream) {
this.logStream.write(logEntry);
}
}
async close() {
if (this.logStream) {
const footer = `\nCompleted: ${new Date().toISOString()}\n=== End of Log ===\n`;
this.logStream.write(footer);
this.logStream.end();
this.logStream = null;
}
}
openLogFile() {
try {
// Detect platform and open with appropriate command
const platform = process.platform;
let command;
switch (platform) {
case 'darwin': // macOS
command = `open "${this.logPath}"`;
break;
case 'win32': // Windows
command = `start "" "${this.logPath}"`;
break;
default: // Linux and others
command = `xdg-open "${this.logPath}"`;
break;
}
execSync(command, { stdio: 'ignore' });
console.error(`š Log file opened: ${this.logPath}`);
}
catch (error) {
console.error(`Could not open log file automatically. View at: ${this.logPath}`);
}
}
getLogPath() {
return this.logPath;
}
}
async function generateRepositoryId(rootDir) {
try {
// Try to get Git repository URL
try {
const gitRemote = execSync('git remote get-url origin', {
cwd: rootDir,
encoding: 'utf-8'
}).trim();
return gitRemote;
}
catch {
// If not a git repository, use directory name with timestamp
const dirName = path.basename(rootDir);
const timestamp = new Date().toISOString().split('T')[0]; // YYYY-MM-DD
return `local:${dirName}:${timestamp}:${rootDir}`;
}
}
catch (error) {
// Fallback to directory-based identifier with timestamp
const dirName = path.basename(rootDir);
const timestamp = new Date().toISOString().split('T')[0];
return `local:${dirName}:${timestamp}:${rootDir}`;
}
}
async function promptUserForRepositorySetup(rootDir) {
const suggestedId = await generateRepositoryId(rootDir);
const dirName = path.basename(rootDir);
const isGitRepo = !suggestedId.startsWith('local:');
console.error('\n=== RAG MCP Repository Setup ===');
console.error(`Directory: ${rootDir}`);
console.error(`Detected type: ${isGitRepo ? 'Git Repository' : 'Local Directory'}`);
console.error(`Suggested Repository ID: ${suggestedId}`);
console.error('\nThis will create the following files in .rag-mcp/ directory:');
console.error(' - repository.yaml (repository configuration)');
console.error(' - schema.sql (Supabase database schema)');
console.error('\nThe Repository ID will be used to separate this project\'s data from other projects.');
console.error('\nOptions:');
console.error('1. Use suggested Repository ID');
console.error('2. Enter custom Repository ID');
console.error('3. Cancel setup');
// In a real implementation, you'd use readline or similar for user input
// For now, we'll auto-accept the suggested ID with a warning
console.error('\nā ļø Auto-accepting suggested Repository ID for automated setup.');
console.error('To customize, manually edit .rag-mcp/repository.yaml after creation.');
const config = {
repositoryId: suggestedId,
repositoryName: dirName,
repositoryType: isGitRepo ? 'git' : 'local',
description: `Auto-generated configuration for ${dirName}`,
createdAt: new Date().toISOString()
};
return config;
}
async function getRepositoryInfo(rootDir) {
try {
// First, try to load existing configuration
let config = await loadRepositoryConfig(rootDir);
if (!config) {
console.error('No RAG MCP configuration found for this repository.');
// Prompt user for setup
config = await promptUserForRepositorySetup(rootDir);
if (!config) {
throw new Error('Repository setup cancelled by user');
}
// Save the configuration
const saved = await saveRepositoryConfig(rootDir, config);
if (!saved) {
throw new Error('Failed to save repository configuration');
}
console.error(`ā Repository configuration saved to ${path.join(rootDir, CONFIG_DIR, CONFIG_FILE)}`);
console.error(`ā Database schema saved to ${path.join(rootDir, CONFIG_DIR, SCHEMA_FILE)}`);
console.error(`\nš Next steps:`);
console.error(`1. Set up your Supabase project at https://supabase.com`);
console.error(`2. Copy and execute the SQL from .rag-mcp/schema.sql in your Supabase SQL editor`);
console.error(`3. Configure your environment variables (SUPABASE_URL, SUPABASE_ANON_KEY, COHERE_API_KEY)`);
}
else {
console.error(`ā Using existing repository configuration: ${config.repositoryId}`);
}
return {
repositoryId: config.repositoryId,
repositoryRoot: rootDir
};
}
catch (error) {
console.error('Error getting repository info:', error);
// Fallback to old behavior
const dirName = path.basename(rootDir);
return {
repositoryId: `local:${dirName}:${rootDir}`,
repositoryRoot: rootDir
};
}
}
async function readFileContent(filePath, repositoryRoot) {
try {
const stats = fs.statSync(filePath);
// Skip large files (>1MB)
if (stats.size > 1024 * 1024) {
console.error(`Skipping large file: ${filePath} (${stats.size} bytes)`);
return null;
}
const content = fs.readFileSync(filePath, 'utf-8');
const mimeType = mimeTypes.lookup(filePath) || 'text/plain';
const relativePath = path.relative(repositoryRoot, filePath);
return {
filePath,
relativePath,
content,
mimeType,
size: stats.size
};
}
catch (error) {
console.error(`Error reading file ${filePath}:`, error);
return null;
}
}
async function getAllFiles(rootDir) {
try {
const patterns = SUPPORTED_EXTENSIONS.map(ext => `**/*${ext}`);
const allFiles = [];
for (const pattern of patterns) {
const files = await glob(pattern, {
cwd: rootDir,
ignore: IGNORE_PATTERNS,
absolute: true,
nodir: true
});
allFiles.push(...files);
}
// Remove duplicates
return [...new Set(allFiles)];
}
catch (error) {
console.error('Error getting files:', error);
return [];
}
}
async function generateEmbedding(text) {
if (!cohere) {
console.error('Cohere client not initialized');
return null;
}
try {
// Use Cohere API v1 (current library version)
// Note: This uses embed-english-v3.0 which produces 1024-dimensional vectors
const response = await cohere.embed({
texts: [text],
model: 'embed-english-v3.0',
inputType: 'search_document'
});
// Handle the response type properly for v1 API
const embeddings = response.embeddings;
if (Array.isArray(embeddings) && embeddings.length > 0) {
return embeddings[0];
}
return null;
}
catch (error) {
console.error('Error generating embedding:', error);
return null;
}
}
async function ensureRepositoryExists(repositoryInfo) {
if (!supabase) {
console.error('Supabase client not initialized');
return null;
}
try {
// Check if repository exists
const { data: existingRepo, error: selectError } = await supabase
.from('repositories')
.select('id')
.eq('repository_id', repositoryInfo.repositoryId)
.single();
if (selectError && selectError.code !== 'PGRST116') { // PGRST116 = no rows returned
console.error('Error checking repository:', selectError);
return null;
}
if (existingRepo) {
return existingRepo.id;
}
// Create new repository
const repositoryName = repositoryInfo.repositoryId.startsWith('local:')
? path.basename(repositoryInfo.repositoryRoot)
: repositoryInfo.repositoryId.split('/').pop()?.replace('.git', '') || 'unknown';
const repositoryType = repositoryInfo.repositoryId.startsWith('local:') ? 'local' : 'git';
const { data: newRepo, error: insertError } = await supabase
.from('repositories')
.insert({
repository_id: repositoryInfo.repositoryId,
repository_root: repositoryInfo.repositoryRoot,
repository_name: repositoryName,
repository_type: repositoryType,
last_indexed_at: new Date().toISOString()
})
.select('id')
.single();
if (insertError) {
console.error('Error creating repository:', insertError);
return null;
}
return newRepo.id;
}
catch (error) {
console.error('Error ensuring repository exists:', error);
return null;
}
}
async function storeFileInDatabase(fileContent, embedding, repositoryInfo) {
if (!supabase) {
console.error('Supabase client not initialized');
return false;
}
try {
// Ensure repository exists and get its ID
const repoId = await ensureRepositoryExists(repositoryInfo);
if (!repoId) {
console.error('Failed to ensure repository exists');
return false;
}
const { error } = await supabase
.from('file_embeddings')
.upsert({
repository_id: repoId,
file_path: fileContent.relativePath,
absolute_path: fileContent.filePath,
content: fileContent.content,
mime_type: fileContent.mimeType,
file_size: fileContent.size,
embedding: embedding,
updated_at: new Date().toISOString()
}, {
onConflict: 'repository_id,file_path'
});
if (error) {
console.error('Error storing file in database:', error);
return false;
}
return true;
}
catch (error) {
console.error('Error storing file in database:', error);
return false;
}
}
async function searchSimilarFiles(query, repositoryInfo, limit = 5) {
if (!supabase || !cohere) {
console.error('Supabase or Cohere client not initialized');
return [];
}
try {
// Generate embedding for the query
const queryEmbedding = await generateEmbedding(query);
if (!queryEmbedding) {
return [];
}
// Search for similar files using vector similarity within the specific repository
const { data, error } = await supabase.rpc('search_similar_files', {
query_embedding: queryEmbedding,
target_repository_id: repositoryInfo.repositoryId,
match_threshold: 0.7,
match_count: limit
});
if (error) {
console.error('Error searching similar files:', error);
return [];
}
return data || [];
}
catch (error) {
console.error('Error searching similar files:', error);
return [];
}
}
async function initializeRepository(repoPath) {
const logger = new IndexingLogger(repoPath);
try {
await logger.init();
logger.log(`Initializing repository: ${repoPath}`);
// Get repository information
const repositoryInfo = await getRepositoryInfo(repoPath);
logger.log(`Repository ID: ${repositoryInfo.repositoryId}`);
const files = await getAllFiles(repoPath);
logger.log(`Found ${files.length} files to process`);
let processed = 0;
let failed = 0;
for (const filePath of files) {
const fileContent = await readFileContent(filePath, repositoryInfo.repositoryRoot);
if (!fileContent) {
failed++;
logger.log(`ā Failed to read: ${path.relative(repoPath, filePath)}`);
continue;
}
const embedding = await generateEmbedding(fileContent.content);
if (!embedding) {
failed++;
logger.log(`ā Failed to embed: ${fileContent.relativePath}`);
continue;
}
const stored = await storeFileInDatabase(fileContent, embedding, repositoryInfo);
if (stored) {
processed++;
logger.log(`ā
Processed: ${fileContent.relativePath} (${fileContent.size} bytes)`);
}
else {
failed++;
logger.log(`ā Failed to store: ${fileContent.relativePath}`);
}
// Progress logging
if ((processed + failed) % 10 === 0) {
logger.log(`š Progress: ${processed + failed}/${files.length} files processed (${processed} success, ${failed} failed)`);
}
}
const stats = {
totalFiles: files.length,
processed,
failed,
repositoryId: repositoryInfo.repositoryId
};
logger.log(`š Repository initialization completed: ${processed} files processed, ${failed} failed`);
// Update repository statistics
try {
const totalSize = files.reduce((sum, filePath) => {
try {
const stats = fs.statSync(filePath);
return sum + stats.size;
}
catch {
return sum;
}
}, 0);
await supabase
.from('repositories')
.update({
total_files: processed,
total_size_bytes: totalSize,
last_indexed_at: new Date().toISOString()
})
.eq('repository_id', repositoryInfo.repositoryId);
logger.log(`š Repository statistics updated: ${processed} files, ${totalSize} bytes`);
// Update local configuration file
const config = await loadRepositoryConfig(repoPath);
if (config) {
config.lastIndexedAt = new Date().toISOString();
await saveRepositoryConfig(repoPath, config);
logger.log(`š Local configuration updated`);
}
}
catch (error) {
logger.log(`ā ļø Error updating repository statistics: ${error}`);
}
await logger.close();
return {
success: true,
message: `Successfully processed ${processed} files out of ${files.length} total files for repository: ${repositoryInfo.repositoryId}`,
stats: {
...stats,
logPath: logger.getLogPath()
}
};
}
catch (error) {
logger.log(`š„ Error initializing repository: ${error}`);
await logger.close();
return {
success: false,
message: `Error initializing repository: ${error}`
};
}
}
// Create MCP server
const server = new Server({
name: MCP_NAME,
version: '1.0.0',
}, {
capabilities: {
tools: {},
},
});
// List available tools
server.setRequestHandler(ListToolsRequestSchema, async () => {
return {
tools: [
{
name: 'initialize_repository',
description: 'Initialize repository by embedding all files and storing them in the database',
inputSchema: {
type: 'object',
properties: {
repository_path: {
type: 'string',
description: 'Path to the repository to initialize'
}
},
required: ['repository_path']
}
},
{
name: 'search_code',
description: 'Search for code or files related to a query using vector similarity',
inputSchema: {
type: 'object',
properties: {
query: {
type: 'string',
description: 'Search query to find related code or files'
},
limit: {
type: 'number',
description: 'Maximum number of results to return (default: 5)',
default: 5
}
},
required: ['query']
}
},
{
name: 'show_repository_config',
description: 'Show the current repository configuration',
inputSchema: {
type: 'object',
properties: {
repository_path: {
type: 'string',
description: 'Path to the repository (default: current directory)',
default: '.'
}
}
}
},
{
name: 'update_repository_config',
description: 'Update repository configuration settings',
inputSchema: {
type: 'object',
properties: {
repository_path: {
type: 'string',
description: 'Path to the repository (default: current directory)',
default: '.'
},
repository_name: {
type: 'string',
description: 'New repository name'
},
description: {
type: 'string',
description: 'New repository description'
}
}
}
},
{
name: 'regenerate_schema',
description: 'Regenerate the database schema file (.rag-mcp/schema.sql)',
inputSchema: {
type: 'object',
properties: {
repository_path: {
type: 'string',
description: 'Path to the repository (default: current directory)',
default: '.'
}
}
}
}
]
};
});
// Handle tool calls
server.setRequestHandler(CallToolRequestSchema, async (request) => {
const { name, arguments: args } = request.params;
try {
switch (name) {
case 'initialize_repository': {
const { repository_path } = args;
if (!repository_path) {
throw new Error('Repository path is required');
}
if (!fs.existsSync(repository_path)) {
throw new Error(`Repository path does not exist: ${repository_path}`);
}
const result = await initializeRepository(repository_path);
return {
content: [
{
type: 'text',
text: JSON.stringify(result, null, 2)
}
]
};
}
case 'search_code': {
const { query, limit = 5 } = args;
if (!query) {
throw new Error('Query is required');
}
// Get current repository information
const currentDir = process.cwd();
const repositoryInfo = await getRepositoryInfo(currentDir);
const results = await searchSimilarFiles(query, repositoryInfo, limit);
return {
content: [
{
type: 'text',
text: JSON.stringify({
query,
repository_id: repositoryInfo.repositoryId,
repository_name: results.length > 0 ? results[0].repository_name : null,
repository_root: repositoryInfo.repositoryRoot,
results: results.map(result => ({
file_path: result.file_path,
absolute_path: result.absolute_path,
similarity: result.similarity,
content_preview: result.content.substring(0, 500) + (result.content.length > 500 ? '...' : ''),
mime_type: result.mime_type,
file_size: result.file_size
}))
}, null, 2)
}
]
};
}
case 'show_repository_config': {
const { repository_path = process.cwd() } = args;
try {
const config = await loadRepositoryConfig(repository_path);
if (!config) {
return {
content: [
{
type: 'text',
text: JSON.stringify({
status: 'not_configured',
message: 'No RAG MCP configuration found for this repository',
path: repository_path,
config_file_path: path.join(repository_path, CONFIG_DIR, CONFIG_FILE)
}, null, 2)
}
]
};
}
return {
content: [
{
type: 'text',
text: JSON.stringify({
status: 'configured',
repository_path,
config_file_path: path.join(repository_path, CONFIG_DIR, CONFIG_FILE),
config
}, null, 2)
}
]
};
}
catch (error) {
throw new Error(`Error reading repository config: ${error}`);
}
}
case 'update_repository_config': {
const { repository_path = process.cwd(), repository_name, description } = args;
try {
let config = await loadRepositoryConfig(repository_path);
if (!config) {
throw new Error('No repository configuration found. Please initialize the repository first.');
}
// Update fields if provided
if (repository_name) {
config.repositoryName = repository_name;
}
if (description !== undefined) {
config.description = description;
}
const saved = await saveRepositoryConfig(repository_path, config);
if (!saved) {
throw new Error('Failed to save updated configuration');
}
return {
content: [
{
type: 'text',
text: JSON.stringify({
status: 'updated',
message: 'Repository configuration updated successfully',
repository_path,
config_file_path: path.join(repository_path, CONFIG_DIR, CONFIG_FILE),
updated_config: config
}, null, 2)
}
]
};
}
catch (error) {
throw new Error(`Error updating repository config: ${error}`);
}
}
case 'regenerate_schema': {
const { repository_path = process.cwd() } = args;
try {
const saved = await saveSchemaFile(repository_path);
if (!saved) {
throw new Error('Failed to regenerate schema file');
}
return {
content: [
{
type: 'text',
text: JSON.stringify({
status: 'regenerated',
message: 'Database schema file regenerated successfully',
repository_path,
schema_file_path: path.join(repository_path, CONFIG_DIR, SCHEMA_FILE),
next_steps: [
'Copy the SQL content from .rag-mcp/schema.sql',
'Execute it in your Supabase SQL editor',
'Make sure to update any existing tables if needed'
]
}, null, 2)
}
]
};
}
catch (error) {
throw new Error(`Error regenerating schema: ${error}`);
}
}
default:
throw new Error(`Unknown tool: ${name}`);
}
}
catch (error) {
return {
content: [
{
type: 'text',
text: `Error: ${error}`
}
],
isError: true
};
}
});
async function checkAndCreateDatabaseSchema() {
if (!supabase) {
console.error('Supabase client not initialized');
return false;
}
try {
// Check if the file_embeddings table exists
const { data, error } = await supabase
.from('file_embeddings')
.select('count')
.limit(1);
if (error && error.code === '42P01') {
// Table doesn't exist, create it
console.error('Database table not found. Please execute the SQL schema from db/schema.sql in your Supabase SQL editor.');
console.error('Schema location: https://github.com/taiyokimura/rag-mcp/blob/main/db/schema.sql');
return false;
}
if (error) {
console.error('Error checking database schema:', error);
return false;
}
console.error('Database schema verified ā');
return true;
}
catch (error) {
console.error('Error checking database schema:', error);
return false;
}
}
async function autoInitializeCurrentDirectory() {
try {
const currentDir = process.cwd();
console.error(`Auto-initializing current directory: ${currentDir}`);
// Get repository information for current directory
const repositoryInfo = await getRepositoryInfo(currentDir);
console.error(`Repository ID: ${repositoryInfo.repositoryId}`);
// Check if repository exists and has files
const { data: existingRepo, error: repoError } = await supabase
.from('repositories')
.select('id, total_files')
.eq('repository_id', repositoryInfo.repositoryId)
.single();
if (repoError && repoError.code !== 'PGRST116') {
console.error('Error checking repository:', repoError);
return;
}
if (existingRepo && existingRepo.total_files > 0) {
console.error('Repository already indexed with files. Skipping auto-initialization.');
console.error('Use the initialize_repository tool to re-index if needed.');
return;
}
console.error('No existing files found. Starting auto-initialization...');
const result = await initializeRepository(currentDir);
if (result.success) {
console.error('ā Auto-initialization completed successfully');
console.error(`ā Processed ${result.stats?.processed} files for repository: ${result.stats?.repositoryId}`);
console.error(`š Log file: ${result.stats?.logPath}`);
}
else {
console.error('ā Auto-initialization failed:', result.message);
}
}
catch (error) {
console.error('Error during auto-initialization:', error);
}
}
// Start the server
async function main() {
// Check required environment variables
if (!SUPABASE_URL || !SUPABASE_ANON_KEY) {
console.error('Error: SUPABASE_URL and SUPABASE_ANON_KEY environment variables are required');
process.exit(1);
}
if (!COHERE_API_KEY) {
console.error('Error: COHERE_API_KEY environment variable is required');
process.exit(1);
}
console.error('RAG MCP Server starting...');
console.error(`Server name: ${MCP_NAME}`);
console.error('Required environment variables:');
console.error('- SUPABASE_URL: ' + (SUPABASE_URL ? 'ā' : 'ā'));
console.error('- SUPABASE_ANON_KEY: ' + (SUPABASE_ANON_KEY ? 'ā' : 'ā'));
console.error('- COHERE_API_KEY: ' + (COHERE_API_KEY ? 'ā' : 'ā'));
// Check database schema
const schemaReady = await checkAndCreateDatabaseSchema();
if (!schemaReady) {
console.error('Database schema not ready. Please set up the database first.');
console.error('See: db/setup-instructions.md for detailed instructions');
}
const transport = new StdioServerTransport();
await server.connect(transport);
console.error('RAG MCP Server connected and ready');
// Auto-initialize current directory if database schema is ready
if (schemaReady) {
console.error('ā
Database schema verified');
// Run auto-initialization in background to not block the server startup
setTimeout(async () => {
await autoInitializeCurrentDirectory();
}, 1000);
}
else {
console.error('š” Files will be indexed when database schema is ready');
}
}
// Handle process termination
process.on('SIGINT', async () => {
console.error('Shutting down RAG MCP Server...');
process.exit(0);
});
process.on('SIGTERM', async () => {
console.error('Shutting down RAG MCP Server...');
process.exit(0);
});
if (import.meta.url === `file://${process.argv[1]}`) {
main().catch((error) => {
console.error('Fatal error:', error);
process.exit(1);
});
}
//# sourceMappingURL=index.js.map