UNPKG

mongodocs-mcp

Version:

Lightning-fast semantic search for MongoDB documentation via Model Context Protocol. 10,000+ documents, <500ms search.

143 lines 5.35 kB
/** * Content Quality Scorer for MongoDB Documentation * Prioritizes technical content over meta-documentation */ export class ContentQualityScorer { /** * Score document quality based on content and metadata */ scoreDocument(doc) { const path = doc.metadata.path || ''; const content = doc.content; const title = doc.metadata.title || ''; let score = 0.5; // Base score const reasons = []; let contentType = 'conceptual'; let boostFactor = 1.0; // HIGH PRIORITY: Technical documentation with examples if (this.isTechnicalContent(path, content, title)) { score += 0.4; contentType = 'technical'; boostFactor = 1.5; reasons.push('Technical documentation'); // Extra boost for method/operator documentation if (this.hasMethodDocumentation(content)) { score += 0.2; boostFactor = 1.8; reasons.push('Contains method documentation'); } // Extra boost for code examples if (this.hasCodeExamples(content)) { score += 0.2; boostFactor = 1.6; reasons.push('Contains code examples'); } // Extra boost for query examples if (this.hasQueryExamples(content)) { score += 0.3; boostFactor = 2.0; reasons.push('Contains query examples'); } } // MEDIUM PRIORITY: Conceptual documentation else if (this.isConceptualContent(path, content, title)) { score += 0.2; contentType = 'conceptual'; boostFactor = 1.2; reasons.push('Conceptual documentation'); } // LOW PRIORITY: Meta documentation else if (this.isMetaContent(path, content, title)) { score -= 0.3; contentType = 'meta'; boostFactor = 0.3; reasons.push('Meta documentation'); } // PRIORITY PATHS: Boost high-value directories if (this.isHighPriorityPath(path)) { score += 0.2; boostFactor *= 1.3; reasons.push('High-priority path'); } // PENALIZE: Low-value content if (this.isLowValueContent(content)) { score -= 0.2; boostFactor *= 0.7; reasons.push('Low-value content'); } // Ensure score is within bounds score = Math.max(0.0, Math.min(1.0, score)); return { score, reasons, contentType, boostFactor }; } isTechnicalContent(path, content, title) { const technicalIndicators = [ // Path indicators /\/(reference|tutorial|examples?|operators?|methods?|aggregation|query|crud)\//i, // Content indicators /db\.\w+\.\w+\(/, /\$\w+/, // MongoDB operators /find\(|aggregate\(|insertOne\(|updateOne\(/, // Title indicators /method|operator|function|command/i ]; return technicalIndicators.some(pattern => pattern.test(path) || pattern.test(content) || pattern.test(title)); } isConceptualContent(path, content, title) { const conceptualIndicators = [ /\/(concepts?|fundamentals?|introduction|overview|guide)\//i, /what is|how to|understanding|concepts?/i ]; return conceptualIndicators.some(pattern => pattern.test(path) || pattern.test(content) || pattern.test(title)); } isMetaContent(path, content, title) { const metaIndicators = [ /readme|contributing|license|changelog|authors/i, /pull.request|issue.template|code.of.conduct/i, /self.review|external.review|documentation.team/i ]; return metaIndicators.some(pattern => pattern.test(path) || pattern.test(content) || pattern.test(title)); } hasMethodDocumentation(content) { return /db\.\w+\.\w+\(/.test(content) && /parameters?|returns?|examples?/i.test(content); } hasCodeExamples(content) { return /```|.. code-block::|^\s{4,}\w+/.test(content); } hasQueryExamples(content) { return /find\(|aggregate\(|match|group|sort|limit/.test(content) && this.hasCodeExamples(content); } isHighPriorityPath(path) { const highPriorityPaths = [ /\/tutorial\//i, /\/reference\//i, /\/examples?\//i, /\/crud\//i, /\/aggregation\//i, /\/query\//i, /\/operators?\//i ]; return highPriorityPaths.some(pattern => pattern.test(path)); } isLowValueContent(content) { // Very short content if (content.length < 200) return true; // Mostly markup const markupRatio = (content.match(/[<>{}[\]()]/g) || []).length / content.length; if (markupRatio > 0.3) return true; // Mostly whitespace const whitespaceRatio = (content.match(/\s/g) || []).length / content.length; if (whitespaceRatio > 0.8) return true; return false; } } //# sourceMappingURL=content-quality-scorer.js.map