UNPKG

brain-mcp

Version:

Brain MCP Server - Semantic knowledge base access for Claude Code via Model Context Protocol. Provides intelligent search and navigation of files from multiple locations through native MCP tools.

github.com/samleeney/brain

samleeney/brain

320 lines • 12.8 kB

JavaScript

"use strict"; /** * Intelligent chunking service for markdown content * Creates semantic chunks optimized for embedding and retrieval */ Object.defineProperty(exports, "__esModule", { value: true }); exports.ChunkingService = void 0; const types_1 = require("../models/types"); class ChunkingService { static DEFAULT_OPTIONS = { maxChunkSize: 6000, // Maximum safe size (~7500 tokens) to stay under 8192 limit overlapSize: 600, // 10% overlap preserveHeadings: true, minChunkSize: 100 }; /** * Generate semantic chunks from markdown content */ static createChunks(content, title, headings, notePath, options = {}) { const opts = { ...this.DEFAULT_OPTIONS, ...options }; const chunks = []; // Simple chunking: split content by max size with overlap let chunkIndex = 0; let position = 0; while (position < content.length && chunkIndex < 10000) { // Safety limit // Calculate chunk end position let chunkEnd = Math.min(position + opts.maxChunkSize, content.length); // If not at the end, try to break at a word boundary if (chunkEnd < content.length) { const lastSpace = content.lastIndexOf(' ', chunkEnd); if (lastSpace > position + opts.minChunkSize) { chunkEnd = lastSpace; } } // Extract chunk content const chunkContent = content.substring(position, chunkEnd).trim(); if (chunkContent.length >= opts.minChunkSize) { chunks.push({ id: `${notePath}#chunk${chunkIndex}`, content: chunkContent, startLine: 0, // Line numbers not meaningful with this approach endLine: 0, headingContext: [], chunkType: types_1.ChunkType.PARAGRAPH }); chunkIndex++; } // Move position forward if (chunkEnd < content.length) { // Always move forward by at least maxChunkSize - overlapSize to avoid tiny steps const newPosition = Math.max(position + opts.maxChunkSize - opts.overlapSize, chunkEnd - opts.overlapSize); // Safety check - ensure we're moving forward if (newPosition <= position) { position = position + 100; // Force advancement } else { position = newPosition; } } else { break; } } return chunks; } /** * Create a title chunk with context */ static createTitleChunk(content, title, notePath) { const lines = content.split('\n'); let firstParagraph = ''; let endLine = 0; // Skip frontmatter and empty lines let startLine = 0; while (startLine < lines.length && (lines[startLine].trim() === '' || lines[startLine].trim() === '---')) { startLine++; } // Skip title line if it exists if (startLine < lines.length && lines[startLine].startsWith('#')) { startLine++; } // Collect first meaningful paragraph for (let i = startLine; i < lines.length; i++) { const line = lines[i].trim(); if (line === '') { if (firstParagraph.trim().length > 0) { endLine = i; break; } continue; } firstParagraph += line + ' '; // Stop if we have enough content if (firstParagraph.length > 300) { endLine = i + 1; break; } } if (firstParagraph.trim().length < 50) { return null; // Not enough meaningful content } const titleContent = `${title}\n\n${firstParagraph.trim()}`; return { id: `${notePath}#title`, content: titleContent, startLine: 0, endLine: endLine, headingContext: [title], chunkType: types_1.ChunkType.TITLE }; } /** * Create chunks based on heading structure */ static createHeadingChunks(lines, headings, notePath, options) { const chunks = []; for (let i = 0; i < headings.length; i++) { const heading = headings[i]; const nextHeading = i + 1 < headings.length ? headings[i + 1] : null; // Determine section boundaries const startLine = heading.lineNumber; const endLine = nextHeading ? nextHeading.lineNumber - 1 : lines.length - 1; // Extract section content const sectionLines = lines.slice(startLine, endLine + 1); const sectionContent = sectionLines.join('\n').trim(); if (sectionContent.length < options.minChunkSize) { continue; // Skip very small sections } // Build heading context (hierarchical path) const headingContext = this.buildHeadingContext(heading, headings); // If section is small enough, create single chunk if (sectionContent.length <= options.maxChunkSize) { chunks.push({ id: `${notePath}#${heading.slug}`, content: sectionContent, startLine, endLine, headingContext, chunkType: types_1.ChunkType.HEADING_SECTION }); } else { // Split large sections into sub-chunks const subChunks = this.splitLargeSection(sectionContent, startLine, headingContext, notePath, heading.slug, options); chunks.push(...subChunks); } } return chunks; } /** * Create chunks based on paragraph boundaries */ static createParagraphChunks(lines, notePath, options) { const chunks = []; let currentChunk = ''; let chunkStartLine = 0; let chunkIndex = 0; for (let i = 0; i < lines.length; i++) { const line = lines[i]; // Check if we hit a natural break (empty line + enough content) if (line.trim() === '' && currentChunk.trim().length >= options.minChunkSize) { if (currentChunk.length >= options.maxChunkSize) { // Save current chunk chunks.push({ id: `${notePath}#para${chunkIndex}`, content: currentChunk.trim(), startLine: chunkStartLine, endLine: i - 1, headingContext: [], chunkType: types_1.ChunkType.PARAGRAPH }); // Start new chunk with overlap const overlapContent = this.extractOverlap(currentChunk, options.overlapSize); currentChunk = overlapContent; chunkStartLine = Math.max(0, i - this.estimateOverlapLines(overlapContent)); chunkIndex++; } } else { currentChunk += line + '\n'; } } // Add final chunk if there's content if (currentChunk.trim().length >= options.minChunkSize) { chunks.push({ id: `${notePath}#para${chunkIndex}`, content: currentChunk.trim(), startLine: chunkStartLine, endLine: lines.length - 1, headingContext: [], chunkType: types_1.ChunkType.PARAGRAPH }); } return chunks; } /** * Split large sections into manageable chunks */ static splitLargeSection(content, startLine, headingContext, notePath, headingSlug, options) { const chunks = []; const paragraphs = content.split('\n\n'); let currentChunk = ''; let currentStartLine = startLine; let chunkIndex = 0; for (const paragraph of paragraphs) { if (currentChunk.length + paragraph.length + 2 > options.maxChunkSize) { if (currentChunk.trim().length >= options.minChunkSize) { chunks.push({ id: `${notePath}#${headingSlug}-${chunkIndex}`, content: currentChunk.trim(), startLine: currentStartLine, endLine: currentStartLine + this.estimateLines(currentChunk), headingContext, chunkType: types_1.ChunkType.HEADING_SECTION }); chunkIndex++; } // Start new chunk with overlap const overlapContent = this.extractOverlap(currentChunk, options.overlapSize); currentChunk = overlapContent + (overlapContent ? '\n\n' : '') + paragraph; currentStartLine += this.estimateLines(currentChunk) - this.estimateLines(overlapContent); } else { currentChunk += (currentChunk ? '\n\n' : '') + paragraph; } } // Add final chunk if (currentChunk.trim().length >= options.minChunkSize) { chunks.push({ id: `${notePath}#${headingSlug}-${chunkIndex}`, content: currentChunk.trim(), startLine: currentStartLine, endLine: startLine + this.estimateLines(content), headingContext, chunkType: types_1.ChunkType.HEADING_SECTION }); } return chunks; } /** * Build hierarchical heading context */ static buildHeadingContext(currentHeading, allHeadings) { const context = []; // Find parent headings for (let i = allHeadings.indexOf(currentHeading) - 1; i >= 0; i--) { const heading = allHeadings[i]; if (heading.level < currentHeading.level) { context.unshift(heading.text); // Stop at immediate parent for this level if (heading.level === currentHeading.level - 1) { break; } } } // Add current heading context.push(currentHeading.text); return context; } /** * Extract overlap content from end of chunk */ static extractOverlap(content, overlapSize) { if (content.length <= overlapSize) { return content; } // Try to break at sentence boundary within overlap const overlapText = content.slice(-overlapSize); const sentenceEnd = overlapText.lastIndexOf('. '); if (sentenceEnd > 0 && sentenceEnd > overlapSize * 0.5) { return overlapText.slice(sentenceEnd + 2); } return overlapText; } /** * Remove duplicate or very similar chunks */ static deduplicateChunks(chunks, options) { const uniqueChunks = []; for (const chunk of chunks) { const isDuplicate = uniqueChunks.some(existing => { // Skip title chunks in deduplication if (chunk.chunkType === types_1.ChunkType.TITLE || existing.chunkType === types_1.ChunkType.TITLE) { return false; } // Check for high similarity (simple string comparison) const similarity = this.calculateStringSimilarity(chunk.content, existing.content); return similarity > 0.85; }); if (!isDuplicate) { uniqueChunks.push(chunk); } } return uniqueChunks; } /** * Calculate simple string similarity (Jaccard index) */ static calculateStringSimilarity(str1, str2) { const words1 = new Set(str1.toLowerCase().split(/\s+/)); const words2 = new Set(str2.toLowerCase().split(/\s+/)); const intersection = new Set([...words1].filter(x => words2.has(x))); const union = new Set([...words1, ...words2]); return intersection.size / union.size; } /** * Estimate number of lines in text */ static estimateLines(text) { return text.split('\n').length; } /** * Estimate lines needed for overlap content */ static estimateOverlapLines(text) { return Math.max(1, text.split('\n').length); } } exports.ChunkingService = ChunkingService; //# sourceMappingURL=ChunkingService.js.map