@dollhousemcp/mcp-server
Version:
DollhouseMCP - A Model Context Protocol (MCP) server that enables dynamic AI persona management from markdown files, allowing Claude and other compatible AI assistants to activate and switch between different behavioral personas.
134 lines • 3.79 kB
TypeScript
/**
* NLP Scoring Manager - Jaccard similarity and Shannon entropy for semantic analysis
*
* Implements intelligent document similarity scoring using:
* - Jaccard similarity for vocabulary overlap
* - Shannon entropy for information density
* - Combined scoring for meaningful semantic relationships
*
* Key insights from analysis:
* - High Jaccard (>60%) + Moderate entropy (4.5-6.0) = Same technical domain
* - High Jaccard + Low entropy (<3.0) = Stop word pollution, superficial
* - Low Jaccard + Similar entropy = Different domains, equally complex
*
* Part of Enhanced Capability Index (#1085)
*/
import { IndexConfigManager } from './config/IndexConfig.js';
/**
* Scoring result with detailed metrics
*/
export interface ScoringResult {
jaccard: number;
entropy: number;
combinedScore: number;
interpretation: string;
tokenCount: number;
overlapCount: number;
}
/**
* Pairwise similarity between two elements
*/
export interface PairwiseSimilarity {
element1: string;
element2: string;
similarity: ScoringResult;
timestamp: string;
}
/**
* Configuration for scoring algorithm
*/
export interface ScoringConfig {
minTokenLength: number;
cacheExpiry: number;
maxCacheSize: number;
entropyBands: {
low: number;
moderate: number;
high: number;
};
jaccardThresholds: {
low: number;
moderate: number;
high: number;
};
}
export declare class NLPScoringManager {
private cache;
private cacheAccessOrder;
private config;
private unicodeValidator;
private cleanupInterval?;
constructor(config?: Partial<ScoringConfig>, indexConfigManager?: IndexConfigManager);
/**
* Clean and tokenize text for analysis
* Works with any language - no hardcoded stop words
*/
private cleanAndTokenize;
/**
* Calculate Jaccard similarity between two text strings
*
* Jaccard = |A ∩ B| / |A ∪ B|
*
* Returns value between 0 (no overlap) and 1 (identical)
*/
calculateJaccard(text1: string, text2: string): number;
/**
* Calculate Shannon entropy for text
*
* H(X) = -Σ p(x) * log2(p(x))
*
* Measures information density/vocabulary richness
* Higher entropy = more diverse vocabulary
*/
calculateEntropy(text: string): number;
/**
* Calculate combined relevance score using Jaccard and entropy
*
* Interprets the relationship between similarity and complexity
*/
scoreRelevance(text1: string, text2: string): ScoringResult;
/**
* Build a pairwise similarity matrix for multiple texts
*
* Useful for clustering and relationship discovery
*/
buildSimilarityMatrix(elements: Map<string, string>): Map<string, Map<string, ScoringResult>>;
/**
* Find most similar elements to a given text
*/
findSimilar(targetText: string, candidates: Map<string, string>, topK?: number): Array<{
name: string;
score: ScoringResult;
}>;
/**
* Extract key terms from text based on entropy contribution
*
* Terms that contribute most to entropy are likely important
*/
extractKeyTerms(text: string, topK?: number): string[];
/**
* Add result to cache with LRU eviction
*/
private addToCache;
/**
* Update access order for LRU tracking
*/
private updateAccessOrder;
/**
* Clean expired cache entries
*/
private cleanExpiredCache;
/**
* Clear the cache
*/
clearCache(): void;
/**
* Get cache statistics
*/
getCacheStats(): {
size: number;
oldestEntry: number | null;
};
dispose(): void;
}
//# sourceMappingURL=NLPScoringManager.d.ts.map