@boundless-oss/atlas
Version:
Atlas - MCP Server for comprehensive startup project management
660 lines • 26.2 kB
JavaScript
import { randomUUID } from 'crypto';
import { createHash } from 'crypto';
import { createTool, createSuccessResult, createErrorResult } from '../../core/tool-framework.js';
import { promises as fs } from 'fs';
import path from 'path';
/**
* Convert embedding vector to/from binary storage
*/
function embeddingToBuffer(embedding) {
return Buffer.from(embedding.buffer);
}
function bufferToEmbedding(buffer) {
return new Float32Array(buffer.buffer, buffer.byteOffset, buffer.byteLength / 4);
}
/**
* Calculate text hash for caching
*/
function calculateTextHash(text) {
return createHash('sha256').update(text).digest('hex');
}
/**
* Search through indexed documents using semantic search
*/
const ragSearchTool = createTool({
name: 'rag_search',
description: 'Search through indexed documents using semantic search',
category: 'rag-retrieval',
readOnly: true,
inputSchema: {
type: 'object',
properties: {
query: {
type: 'string',
description: 'Search query text',
minLength: 1,
maxLength: 1000
},
limit: {
type: 'integer',
description: 'Maximum number of results (default: 10)',
minimum: 1,
maximum: 100,
default: 10
},
threshold: {
type: 'number',
description: 'Minimum similarity threshold (0-1)',
minimum: 0,
maximum: 1,
default: 0.5
},
collection: {
type: 'string',
description: 'Limit search to a specific collection',
maxLength: 200
},
filters: {
type: 'object',
description: 'Metadata filters to apply',
additionalProperties: true
}
},
required: ['query'],
additionalProperties: false
},
async execute(input, context) {
try {
const startTime = Date.now();
// This is a placeholder for the actual embedding model integration
// In a real implementation, we would use the embedding model here
// For now, we'll simulate search with text matching
let query = `
SELECT
c.id,
c.content,
c.chunk_index,
c.chunk_type,
c.metadata,
d.id as document_id,
d.path as document_path,
d.title as document_title
FROM rag_chunks c
JOIN rag_documents d ON c.document_id = d.id
WHERE c.project_id = ?
`;
const params = [context.projectId || 'default'];
// Add collection filter if specified
if (input.collection) {
query += `
AND EXISTS (
SELECT 1 FROM rag_collection_documents cd
JOIN rag_collections col ON cd.collection_id = col.id
WHERE cd.document_id = d.id
AND col.name = ?
AND col.project_id = ?
)
`;
params.push(input.collection, context.projectId || 'default');
}
// For now, use simple text search
// In production, this would use vector similarity
query += ` AND c.content LIKE ? ORDER BY c.id LIMIT ?`;
params.push(`%${input.query}%`, input.limit || 10);
const result = await context.db.query(query, params);
if (!result.success) {
return createErrorResult({
code: 'DATABASE_ERROR',
message: 'Failed to search documents',
details: { error: result.error },
category: 'system'
});
}
const chunks = result.data || [];
const executionTime = Date.now() - startTime;
// Log search to history
await context.db.run(`INSERT INTO rag_search_history
(id, project_id, query, limit_count, threshold, filters, result_count, execution_time)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)`, [
randomUUID(),
context.projectId || 'default',
input.query,
input.limit || 10,
input.threshold || 0.5,
JSON.stringify(input.filters || {}),
chunks.length,
executionTime
]);
return createSuccessResult({
results: chunks.map((chunk) => ({
chunk: {
id: chunk.id,
content: chunk.content,
index: chunk.chunk_index,
type: chunk.chunk_type,
metadata: JSON.parse(chunk.metadata || '{}')
},
document: {
id: chunk.document_id,
path: chunk.document_path,
title: chunk.document_title
},
score: 0.8 // Placeholder score
})),
query: input.query,
executionTime,
message: `Found ${chunks.length} relevant chunk${chunks.length !== 1 ? 's' : ''}`
});
}
catch (error) {
return createErrorResult({
code: 'EXECUTION_ERROR',
message: `Failed to search: ${error instanceof Error ? error.message : 'Unknown error'}`,
category: 'execution'
});
}
}
});
/**
* Index a single document for RAG
*/
const ragIndexDocumentTool = createTool({
name: 'rag_index_document',
description: 'Index a single document for RAG',
category: 'rag-retrieval',
inputSchema: {
type: 'object',
properties: {
path: {
type: 'string',
description: 'Path to the document to index (relative to project root)',
minLength: 1,
maxLength: 500
}
},
required: ['path'],
additionalProperties: false
},
async execute(input, context) {
try {
const projectPath = process.cwd();
const documentPath = path.resolve(projectPath, input.path);
// Check if file exists
try {
await fs.access(documentPath);
}
catch {
return createErrorResult({
code: 'FILE_NOT_FOUND',
message: `Document not found: ${input.path}`,
category: 'validation'
});
}
// Read file content
const content = await fs.readFile(documentPath, 'utf-8');
const stats = await fs.stat(documentPath);
// Extract metadata
const title = path.basename(documentPath, path.extname(documentPath));
const documentId = randomUUID();
// Check if document already exists
const existingDoc = await context.db.get('SELECT id FROM rag_documents WHERE project_id = ? AND path = ?', [context.projectId || 'default', input.path]);
if (existingDoc.success && existingDoc.data) {
// Update existing document
await context.db.run(`UPDATE rag_documents
SET content = ?, size = ?, last_modified = ?, updated_at = ?, embedding_status = ?
WHERE id = ?`, [
content,
stats.size,
stats.mtimeMs,
Date.now(),
'pending',
existingDoc.data.id
]);
// Delete old chunks
await context.db.run('DELETE FROM rag_chunks WHERE document_id = ?', [existingDoc.data.id]);
}
else {
// Insert new document
const result = await context.db.run(`INSERT INTO rag_documents
(id, project_id, path, content, title, size, last_modified, embedding_status)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)`, [
documentId,
context.projectId || 'default',
input.path,
content,
title,
stats.size,
stats.mtimeMs,
'pending'
]);
if (!result.success) {
return createErrorResult({
code: 'DATABASE_ERROR',
message: 'Failed to index document',
details: { error: result.error },
category: 'system'
});
}
}
// Create chunks (simplified for now)
const chunkSize = 500;
const chunkOverlap = 50;
const chunks = [];
for (let i = 0; i < content.length; i += chunkSize - chunkOverlap) {
const chunkContent = content.substring(i, i + chunkSize);
if (chunkContent.trim()) {
chunks.push({
id: randomUUID(),
documentId: existingDoc.data?.id || documentId,
content: chunkContent,
index: chunks.length,
startOffset: i,
endOffset: Math.min(i + chunkSize, content.length),
type: 'paragraph' // Simplified
});
}
}
// Insert chunks
for (const chunk of chunks) {
await context.db.run(`INSERT INTO rag_chunks
(id, project_id, document_id, content, chunk_index, start_offset, end_offset, chunk_type)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)`, [
chunk.id,
context.projectId || 'default',
chunk.documentId,
chunk.content,
chunk.index,
chunk.startOffset,
chunk.endOffset,
chunk.type
]);
}
// Update chunk count
await context.db.run('UPDATE rag_documents SET chunk_count = ?, embedding_status = ? WHERE id = ?', [chunks.length, 'completed', existingDoc.data?.id || documentId]);
return createSuccessResult({
document: {
id: existingDoc.data?.id || documentId,
path: input.path,
title,
chunkCount: chunks.length
},
message: `Successfully indexed document: ${input.path}`,
details: `Created ${chunks.length} chunks from ${stats.size} bytes`
});
}
catch (error) {
return createErrorResult({
code: 'EXECUTION_ERROR',
message: `Failed to index document: ${error instanceof Error ? error.message : 'Unknown error'}`,
category: 'execution'
});
}
}
});
/**
* Index all markdown documents in a directory
*/
const ragIndexDirectoryTool = createTool({
name: 'rag_index_directory',
description: 'Index all markdown documents in a directory',
category: 'rag-retrieval',
inputSchema: {
type: 'object',
properties: {
path: {
type: 'string',
description: 'Path to the directory to index (relative to project root)',
minLength: 1,
maxLength: 500
}
},
required: ['path'],
additionalProperties: false
},
async execute(input, context) {
try {
const projectPath = process.cwd();
const directoryPath = path.resolve(projectPath, input.path);
// Check if directory exists
try {
const stats = await fs.stat(directoryPath);
if (!stats.isDirectory()) {
return createErrorResult({
code: 'NOT_A_DIRECTORY',
message: `Not a directory: ${input.path}`,
category: 'validation'
});
}
}
catch {
return createErrorResult({
code: 'DIRECTORY_NOT_FOUND',
message: `Directory not found: ${input.path}`,
category: 'validation'
});
}
// Find all markdown files
const files = await findMarkdownFiles(directoryPath);
const results = {
indexed: 0,
failed: 0,
errors: []
};
// Index each file
for (const file of files) {
const relativePath = path.relative(projectPath, file);
const indexResult = await ragIndexDocumentTool.execute({ path: relativePath }, context);
if (indexResult.success) {
results.indexed++;
}
else {
results.failed++;
results.errors.push(`${relativePath}: ${indexResult.error?.message}`);
}
}
return createSuccessResult({
summary: {
totalFiles: files.length,
indexed: results.indexed,
failed: results.failed
},
errors: results.errors,
message: `Indexed ${results.indexed} document${results.indexed !== 1 ? 's' : ''} from ${input.path}`
});
}
catch (error) {
return createErrorResult({
code: 'EXECUTION_ERROR',
message: `Failed to index directory: ${error instanceof Error ? error.message : 'Unknown error'}`,
category: 'execution'
});
}
}
});
/**
* Index a predefined collection of documents
*/
const ragIndexCollectionTool = createTool({
name: 'rag_index_collection',
description: 'Index a predefined collection of documents',
category: 'rag-retrieval',
inputSchema: {
type: 'object',
properties: {
collection: {
type: 'string',
description: 'Name of the collection to index',
minLength: 1,
maxLength: 200
}
},
required: ['collection'],
additionalProperties: false
},
async execute(input, context) {
try {
// Get collection
const collectionResult = await context.db.get('SELECT * FROM rag_collections WHERE project_id = ? AND name = ?', [context.projectId || 'default', input.collection]);
if (!collectionResult.success || !collectionResult.data) {
// Check default collections from config
const configResult = await context.db.get('SELECT * FROM rag_config WHERE project_id = ?', [context.projectId || 'default']);
// Create default collections if needed
if (input.collection === 'docs' || input.collection === 'readme') {
const collectionId = randomUUID();
const paths = input.collection === 'docs' ? ['./docs'] : ['./README.md', './docs/README.md'];
await context.db.run(`INSERT INTO rag_collections
(id, project_id, name, description, paths)
VALUES (?, ?, ?, ?, ?)`, [
collectionId,
context.projectId || 'default',
input.collection,
input.collection === 'docs' ? 'Documentation files' : 'README files',
JSON.stringify(paths)
]);
// Re-fetch the collection
const newCollection = await context.db.get('SELECT * FROM rag_collections WHERE id = ?', [collectionId]);
if (newCollection.data) {
collectionResult.data = newCollection.data;
}
}
if (!collectionResult.data) {
return createErrorResult({
code: 'COLLECTION_NOT_FOUND',
message: `Collection not found: ${input.collection}`,
category: 'validation'
});
}
}
const collection = collectionResult.data;
const paths = JSON.parse(collection.paths || '[]');
const results = {
indexed: 0,
failed: 0,
errors: []
};
// Index each path in the collection
for (const collectionPath of paths) {
try {
const stats = await fs.stat(path.resolve(process.cwd(), collectionPath));
if (stats.isDirectory()) {
// Index directory
const dirResult = await ragIndexDirectoryTool.execute({ path: collectionPath }, context);
if (dirResult.success) {
results.indexed += dirResult.data.summary.indexed;
results.failed += dirResult.data.summary.failed;
results.errors.push(...dirResult.data.errors);
}
else {
results.failed++;
results.errors.push(`${collectionPath}: ${dirResult.error?.message}`);
}
}
else {
// Index single file
const fileResult = await ragIndexDocumentTool.execute({ path: collectionPath }, context);
if (fileResult.success) {
results.indexed++;
// Add to collection
await context.db.run(`INSERT OR IGNORE INTO rag_collection_documents
(collection_id, document_id)
VALUES (?, ?)`, [collection.id, fileResult.data.document.id]);
}
else {
results.failed++;
results.errors.push(`${collectionPath}: ${fileResult.error?.message}`);
}
}
}
catch (error) {
results.failed++;
results.errors.push(`${collectionPath}: ${error instanceof Error ? error.message : 'Unknown error'}`);
}
}
// Update collection stats
await context.db.run(`UPDATE rag_collections
SET document_count = ?, last_indexed = ?, updated_at = ?
WHERE id = ?`, [results.indexed, Date.now(), Date.now(), collection.id]);
return createSuccessResult({
collection: {
name: input.collection,
documentsIndexed: results.indexed,
failed: results.failed
},
errors: results.errors,
message: `Indexed collection "${input.collection}": ${results.indexed} documents`
});
}
catch (error) {
return createErrorResult({
code: 'EXECUTION_ERROR',
message: `Failed to index collection: ${error instanceof Error ? error.message : 'Unknown error'}`,
category: 'execution'
});
}
}
});
/**
* Get statistics about the RAG index
*/
const ragGetStatsTool = createTool({
name: 'rag_get_stats',
description: 'Get statistics about the RAG index',
category: 'rag-retrieval',
readOnly: true,
inputSchema: {
type: 'object',
properties: {},
additionalProperties: false
},
async execute(input, context) {
try {
// Get document stats
const docStats = await context.db.get(`SELECT
COUNT(*) as total_documents,
SUM(chunk_count) as total_chunks,
SUM(size) as total_size
FROM rag_documents
WHERE project_id = ?`, [context.projectId || 'default']);
// Get collection stats
const collStats = await context.db.query(`SELECT
COUNT(*) as total_collections
FROM rag_collections
WHERE project_id = ?`, [context.projectId || 'default']);
// Get last indexed time
const lastIndexed = await context.db.get(`SELECT MAX(created_at) as last_indexed
FROM rag_documents
WHERE project_id = ?`, [context.projectId || 'default']);
// Get collections details
const collections = await context.db.query(`SELECT
name,
document_count,
chunk_count
FROM rag_collections
WHERE project_id = ?`, [context.projectId || 'default']);
const stats = {
totalDocuments: docStats.data?.total_documents || 0,
totalChunks: docStats.data?.total_chunks || 0,
totalCollections: collStats.data?.[0]?.total_collections || 0,
indexSize: docStats.data?.total_size || 0,
lastIndexed: lastIndexed.data?.last_indexed ?
new Date(lastIndexed.data.last_indexed).toISOString() : 'Never',
collections: (collections.data || []).reduce((acc, col) => {
acc[col.name] = {
documentCount: col.document_count || 0,
chunkCount: col.chunk_count || 0,
sizeBytes: 0 // Would need to calculate from documents
};
return acc;
}, {})
};
return createSuccessResult({
stats,
message: 'RAG index statistics retrieved successfully'
});
}
catch (error) {
return createErrorResult({
code: 'EXECUTION_ERROR',
message: `Failed to get statistics: ${error instanceof Error ? error.message : 'Unknown error'}`,
category: 'execution'
});
}
}
});
/**
* Clear all indexed documents
*/
const ragClearIndexTool = createTool({
name: 'rag_clear_index',
description: 'Clear all indexed documents',
category: 'rag-retrieval',
inputSchema: {
type: 'object',
properties: {
collection: {
type: 'string',
description: 'Clear only documents in this collection',
maxLength: 200
}
},
additionalProperties: false
},
async execute(input, context) {
try {
if (input.collection) {
// Clear specific collection
const collection = await context.db.get('SELECT id FROM rag_collections WHERE project_id = ? AND name = ?', [context.projectId || 'default', input.collection]);
if (!collection.success || !collection.data) {
return createErrorResult({
code: 'COLLECTION_NOT_FOUND',
message: `Collection not found: ${input.collection}`,
category: 'validation'
});
}
// Delete documents in collection
await context.db.run(`DELETE FROM rag_documents
WHERE id IN (
SELECT document_id
FROM rag_collection_documents
WHERE collection_id = ?
)`, [collection.data.id]);
// Clear collection stats
await context.db.run('UPDATE rag_collections SET document_count = 0, chunk_count = 0 WHERE id = ?', [collection.data.id]);
return createSuccessResult({
message: `Cleared collection "${input.collection}"`,
collection: input.collection
});
}
else {
// Clear all documents
await context.db.run('DELETE FROM rag_documents WHERE project_id = ?', [context.projectId || 'default']);
// Clear all collection stats
await context.db.run('UPDATE rag_collections SET document_count = 0, chunk_count = 0 WHERE project_id = ?', [context.projectId || 'default']);
return createSuccessResult({
message: 'Cleared all indexed documents'
});
}
}
catch (error) {
return createErrorResult({
code: 'EXECUTION_ERROR',
message: `Failed to clear index: ${error instanceof Error ? error.message : 'Unknown error'}`,
category: 'execution'
});
}
}
});
/**
* Helper function to find markdown files recursively
*/
async function findMarkdownFiles(dir) {
const files = [];
const entries = await fs.readdir(dir, { withFileTypes: true });
for (const entry of entries) {
const fullPath = path.join(dir, entry.name);
if (entry.isDirectory() && !entry.name.startsWith('.')) {
files.push(...await findMarkdownFiles(fullPath));
}
else if (entry.isFile() && entry.name.match(/\.(md|markdown)$/i)) {
files.push(fullPath);
}
}
return files;
}
/**
* Setup RAG retrieval tools
*/
export async function setupRAGRetrievalTools() {
return {
module: 'rag-retrieval',
tools: [
ragSearchTool,
ragIndexDocumentTool,
ragIndexDirectoryTool,
ragIndexCollectionTool,
ragGetStatsTool,
ragClearIndexTool
]
};
}
//# sourceMappingURL=tools.js.map