devcontext
Version:
DevContext is a cutting-edge Model Context Protocol (MCP) server designed to provide developers with continuous, project-centric context awareness.
1,415 lines (1,211 loc) • 44.9 kB
JavaScript
/**
* SemanticPatternRecognizerLogic.js
*
* Logic for recognizing semantic patterns in code entities.
* Uses both textual and structural analysis to identify patterns.
*/
import * as TextTokenizerLogic from "./TextTokenizerLogic.js";
import * as CodeStructureAnalyzerLogic from "./CodeStructureAnalyzerLogic.js";
import * as RelationshipContextManagerLogic from "./RelationshipContextManagerLogic.js";
import { executeQuery } from "../db.js";
import { v4 as uuidv4 } from "uuid";
/**
* @typedef {Object} Pattern
* @property {string} id - Unique identifier for the pattern
* @property {string} name - Human-readable name for the pattern
* @property {string} description - Description of what the pattern represents
* @property {string} language - Programming language this pattern applies to (e.g., 'javascript', 'python', or 'any' for language-agnostic patterns)
* @property {string} category - Category of the pattern (e.g., 'design_pattern', 'antipattern', 'common_idiom')
* @property {string} representation - Textual or structured representation of the pattern
* @property {string} detection_rules - JSON string of rules used to detect this pattern
* @property {number} importance - Importance score of this pattern (0-1)
* @property {string} created_at - When this pattern was created
* @property {string} updated_at - When this pattern was last updated
*/
/**
* @typedef {Object} PatternDefinition
* @property {string} name - Human-readable name for the pattern
* @property {string} description - Description of what the pattern represents
* @property {string} language - Programming language this pattern applies to (e.g., 'javascript', 'python', or 'any' for language-agnostic patterns)
* @property {string} category - Category of the pattern
* @property {string} representation - Textual or structured representation of the pattern
* @property {Object} detection_rules - Rules used to detect this pattern
* @property {number} importance - Importance score of this pattern (0-1)
*/
/**
* @typedef {Object} CodeEntity
* @property {string} id - Unique identifier for the code entity
* @property {string} path - File path of the code entity
* @property {string} type - Type of code entity ('file', 'function', 'class', etc.)
* @property {string} name - Name of the code entity
* @property {string} content - Content of the code entity
* @property {string} raw_content - Raw unprocessed content of the entity
* @property {string} language - Programming language of the entity
* @property {Object} custom_metadata - Optional metadata including structural information
*/
/**
* Recognizes semantic patterns in a code entity
*
* @param {CodeEntity} entity - The code entity to analyze
* @returns {Promise<{patterns: Pattern[], confidence: number}>} Matched patterns and overall confidence
*/
export async function recognizePatterns(entity) {
try {
// 1. Extract key information from the entity
const { content, raw_content, language, type, custom_metadata } = entity;
// If entity has no content, return empty result
if (!content && !raw_content) {
return { patterns: [], confidence: 0 };
}
const entityContent = raw_content || content;
// 2. Get structural features - either from metadata or by analyzing
let structuralFeatures = custom_metadata?.structuralFeatures;
if (!structuralFeatures) {
// Build AST and extract structural features
const ast = await CodeStructureAnalyzerLogic.buildAST(
entityContent,
language
);
structuralFeatures =
await CodeStructureAnalyzerLogic.extractStructuralFeatures(ast);
}
// 3. Get token-based features using TextTokenizerLogic
const tokenizedContent = TextTokenizerLogic.tokenize(entityContent);
const keywords = TextTokenizerLogic.extractKeywords(tokenizedContent);
const codeNgrams = TextTokenizerLogic.extractNGrams(tokenizedContent, 3); // Extract up to 3-grams
// 4. Retrieve known patterns from database
const knownPatterns = await getKnownPatterns({
language: language, // Filter by entity's language
minConfidence: 0.3, // Only get reasonably confident patterns
});
if (knownPatterns.length === 0) {
return { patterns: [], confidence: 0 };
}
// 5. Match patterns against the entity
const matchResults = await Promise.all(
knownPatterns.map((pattern) =>
matchPattern(
pattern,
entityContent,
structuralFeatures,
keywords,
codeNgrams,
type
)
)
);
// 6. Filter patterns with positive matches and sort by confidence
const matchedPatterns = matchResults
.filter((result) => result.confidence > 0.1) // Only include patterns with reasonable confidence
.sort((a, b) => b.confidence - a.confidence);
// 7. Calculate overall confidence (weighted average based on pattern importance)
let overallConfidence = 0;
let totalImportance = 0;
if (matchedPatterns.length > 0) {
for (const match of matchedPatterns) {
const importance = match.pattern.importance || 0.5; // Default importance if not specified
overallConfidence += match.confidence * importance;
totalImportance += importance;
}
overallConfidence =
totalImportance > 0
? overallConfidence / totalImportance
: matchedPatterns[0].confidence; // If no importance values, use highest confidence
}
// 8. Return matched patterns and overall confidence
return {
patterns: matchedPatterns.map((match) => match.pattern),
confidence: overallConfidence,
};
} catch (error) {
console.error("Error in pattern recognition:", error);
return { patterns: [], confidence: 0 };
}
}
/**
* Retrieves known patterns from the database with optional filtering
*
* @param {Object} filterOptions - Options to filter the patterns
* @param {string} [filterOptions.type] - Filter by pattern type
* @param {number} [filterOptions.minConfidence] - Filter by minimum confidence score
* @param {string} [filterOptions.language] - Filter by programming language
* @returns {Promise<Pattern[]>} Array of patterns matching the filters
*/
export async function getKnownPatterns(filterOptions = {}) {
try {
const { type, minConfidence, language } = filterOptions;
// Build the query
let query = "SELECT * FROM project_patterns WHERE 1=1";
const params = [];
// Apply type filter
if (type) {
query += " AND pattern_type = ?";
params.push(type);
}
// Apply confidence filter
if (minConfidence !== undefined && !isNaN(minConfidence)) {
query += " AND confidence_score >= ?";
params.push(minConfidence);
}
// Apply language filter
if (language) {
query += " AND (language = ? OR language = ? OR language IS NULL)";
params.push(language, "any"); // Include language-specific, universal patterns, and legacy NULL values
}
// Order by confidence and frequency
query += " ORDER BY confidence_score DESC, frequency DESC";
// Execute the query
const patterns = await executeQuery(query, params);
// Parse detection_rules JSON for each pattern
return patterns.map((pattern) => ({
...pattern,
detection_rules: JSON.parse(pattern.detection_rules || "{}"),
}));
} catch (error) {
console.error("Error retrieving patterns with filters:", error);
throw new Error(`Failed to retrieve patterns: ${error.message}`);
}
}
/**
* Retrieves known patterns from the database
*
* @param {string} language - Programming language to filter by (optional)
* @returns {Promise<Pattern[]>} Array of known patterns
* @private
*/
async function _getKnownPatternsInternal(language) {
try {
let query = "SELECT * FROM project_patterns";
const params = [];
// Filter by language if specified
if (language) {
query += " WHERE language = ? OR language = ? OR language IS NULL";
params.push(language, "any"); // Include language-specific, universal patterns, and legacy NULL values
}
const patterns = await executeQuery(query, params);
// Parse detection_rules JSON
return patterns.map((pattern) => ({
...pattern,
detection_rules: JSON.parse(pattern.detection_rules || "{}"),
}));
} catch (error) {
console.error("Error retrieving known patterns:", error);
return [];
}
}
/**
* Matches a pattern against an entity
*
* @param {Pattern} pattern - The pattern to match
* @param {string} content - The entity content
* @param {Object} structuralFeatures - Structural features of the entity
* @param {string[]} keywords - Extracted keywords from the entity
* @param {Object[]} codeNgrams - N-grams extracted from the entity
* @param {string} entityType - Type of the entity (file, function, class, etc.)
* @returns {Promise<{pattern: Pattern, confidence: number}>} Match result with confidence
* @private
*/
async function matchPattern(
pattern,
content,
structuralFeatures,
keywords,
codeNgrams,
entityType
) {
try {
const { detection_rules } = pattern;
let textualMatchScore = 0;
let structuralMatchScore = 0;
let typeMatchScore = 0;
// Check if pattern applies to this entity type
if (
detection_rules.applicable_types &&
Array.isArray(detection_rules.applicable_types)
) {
typeMatchScore = detection_rules.applicable_types.includes(entityType)
? 1
: 0;
// If pattern explicitly doesn't apply to this type, return zero confidence
if (typeMatchScore === 0 && detection_rules.strict_type_matching) {
return { pattern, confidence: 0 };
}
} else {
// If no type restrictions, full score
typeMatchScore = 1;
}
// Perform textual matching
if (detection_rules.keywords && Array.isArray(detection_rules.keywords)) {
const keywordMatches = detection_rules.keywords.filter((keyword) =>
keywords.includes(keyword)
);
textualMatchScore =
keywordMatches.length / detection_rules.keywords.length;
}
// Check for text patterns
if (
detection_rules.text_patterns &&
Array.isArray(detection_rules.text_patterns)
) {
let patternMatchCount = 0;
for (const textPattern of detection_rules.text_patterns) {
if (typeof textPattern === "string") {
if (content.includes(textPattern)) {
patternMatchCount++;
}
} else if (
textPattern instanceof RegExp ||
(typeof textPattern === "object" && textPattern.pattern)
) {
// Handle regex pattern objects
const pattern =
textPattern instanceof RegExp
? textPattern
: new RegExp(textPattern.pattern, textPattern.flags || "");
if (pattern.test(content)) {
patternMatchCount++;
}
}
}
const textPatternScore =
detection_rules.text_patterns.length > 0
? patternMatchCount / detection_rules.text_patterns.length
: 0;
// Combine with keyword score
textualMatchScore =
textualMatchScore > 0
? (textualMatchScore + textPatternScore) / 2
: textPatternScore;
}
// Perform structural matching
if (
detection_rules.structural_rules &&
Array.isArray(detection_rules.structural_rules)
) {
let structRuleMatchCount = 0;
for (const rule of detection_rules.structural_rules) {
const { feature, condition, value } = rule;
// Skip invalid rules
if (!feature || !condition || value === undefined) continue;
// Get the actual feature value
const featureValue = structuralFeatures[feature];
// Skip if feature doesn't exist
if (featureValue === undefined) continue;
// Evaluate condition
let matches = false;
switch (condition) {
case "equals":
matches = featureValue === value;
break;
case "contains":
matches = Array.isArray(featureValue)
? featureValue.includes(value)
: String(featureValue).includes(String(value));
break;
case "greater_than":
matches = Number(featureValue) > Number(value);
break;
case "less_than":
matches = Number(featureValue) < Number(value);
break;
case "matches_regex":
matches = new RegExp(value).test(String(featureValue));
break;
default:
matches = false;
}
if (matches) {
structRuleMatchCount++;
}
}
structuralMatchScore =
detection_rules.structural_rules.length > 0
? structRuleMatchCount / detection_rules.structural_rules.length
: 0;
}
// Calculate combined confidence
const weights = detection_rules.weights || {
textual: 0.4,
structural: 0.4,
type: 0.2,
};
// Calculate weighted average
const confidence =
textualMatchScore * weights.textual +
structuralMatchScore * weights.structural +
typeMatchScore * weights.type;
return { pattern, confidence };
} catch (error) {
console.error(`Error matching pattern ${pattern.name}:`, error);
return { pattern, confidence: 0 };
}
}
/**
* Adds a new pattern to the pattern repository
*
* @param {PatternDefinition} patternDefinition - The pattern definition to add
* @returns {Promise<string>} The ID of the newly added pattern
*/
export async function addPatternToRepository(patternDefinition) {
try {
// 1. Generate a unique ID for the pattern
const pattern_id = uuidv4();
// 2. Extract and prepare pattern data with defaults
const {
pattern_type,
name = `Pattern_${pattern_id.substring(0, 8)}`,
description = "",
representation,
detection_rules = "{}",
language = "any",
} = patternDefinition;
// 3. Ensure representation and detection_rules are in string format for storage
const representationStr =
typeof representation === "object"
? JSON.stringify(representation)
: representation;
const detectionRulesStr =
typeof detection_rules === "object"
? JSON.stringify(detection_rules)
: detection_rules;
// 4. Set default scores and counters
const frequency = 1;
const utility_score = 0.1;
const confidence_score = 0.5;
const reinforcement_count = 1;
const created_at = new Date().toISOString();
const updated_at = created_at;
// 5. Insert the pattern into the database
const query = `
INSERT INTO project_patterns (
pattern_id,
pattern_type,
name,
description,
representation,
detection_rules,
language,
frequency,
utility_score,
confidence_score,
reinforcement_count,
created_at,
updated_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`;
const params = [
pattern_id,
pattern_type,
name,
description,
representationStr,
detectionRulesStr,
language,
frequency,
utility_score,
confidence_score,
reinforcement_count,
created_at,
updated_at,
];
await executeQuery(query, params);
console.log(`Added new pattern "${name}" (${pattern_id}) to repository`);
// 6. Return the generated pattern ID
return pattern_id;
} catch (error) {
console.error("Error adding pattern to repository:", error);
throw new Error(`Failed to add pattern: ${error.message}`);
}
}
/**
* Finds code entities that match a specific pattern
*
* @param {string} patternId - ID of the pattern to match against
* @param {number} [limit=10] - Maximum number of matches to return
* @returns {Promise<CodeEntity[]>} Array of code entities that match the pattern
*/
export async function findSimilarCodeByPattern(patternId, limit = 10) {
try {
// 1. Retrieve the pattern from the database
const patternQuery = "SELECT * FROM project_patterns WHERE pattern_id = ?";
const patterns = await executeQuery(patternQuery, [patternId]);
if (patterns.length === 0) {
console.warn(`Pattern with ID ${patternId} not found`);
return [];
}
const pattern = {
...patterns[0],
detection_rules: JSON.parse(patterns[0].detection_rules || "{}"),
};
// 2. Determine if we can optimize by filtering entities
const preFilters = [];
const preFilterParams = [];
// Filter by language if the pattern is language-specific
if (pattern.language && pattern.language !== "any") {
preFilters.push("language = ?");
preFilterParams.push(pattern.language);
}
// Filter by entity type if the pattern has applicable types
if (
pattern.detection_rules.applicable_types &&
Array.isArray(pattern.detection_rules.applicable_types) &&
pattern.detection_rules.applicable_types.length > 0
) {
const typePlaceholders = pattern.detection_rules.applicable_types
.map(() => "?")
.join(", ");
preFilters.push(`type IN (${typePlaceholders})`);
preFilterParams.push(...pattern.detection_rules.applicable_types);
}
// 3. Create a query to get candidate entities
let entityQuery = "SELECT * FROM code_entities";
if (preFilters.length > 0) {
entityQuery += " WHERE " + preFilters.join(" AND ");
}
// 4. Perform keyword search optimization if possible
if (
pattern.detection_rules.keywords &&
Array.isArray(pattern.detection_rules.keywords) &&
pattern.detection_rules.keywords.length > 0
) {
// Get the first few keywords to use as a pre-filter
// This optimization assumes there's a full-text search or that content is indexed
// We'll limit to 3 keywords to avoid over-filtering
const keywordsToUse = pattern.detection_rules.keywords.slice(0, 3);
// Search for entities with content containing any of these keywords
// This is a simplified approach - a real implementation might use a more sophisticated
// full-text search or entity_keywords table
if (keywordsToUse.length > 0) {
const keywordConditions = keywordsToUse
.map((keyword) => "content LIKE ?")
.join(" OR ");
if (preFilters.length > 0) {
entityQuery += ` AND (${keywordConditions})`;
} else {
entityQuery += ` WHERE (${keywordConditions})`;
}
// Add the LIKE parameters with wildcards
keywordsToUse.forEach((keyword) => {
preFilterParams.push(`%${keyword}%`);
});
}
}
// Add a reasonable limit to avoid processing too many entities
// We'll process more than the requested limit since some might not match
const processingLimit = Math.min(limit * 5, 100);
entityQuery += ` LIMIT ${processingLimit}`;
// 5. Get candidate entities
const entities = await executeQuery(entityQuery, preFilterParams);
// 6. Check each entity for pattern matches
const matchResults = [];
for (const entity of entities) {
// Perform pattern matching similar to recognizePatterns but for a single pattern
try {
// Extract content and prepare for analysis
const entityContent = entity.raw_content || entity.content;
if (!entityContent) continue;
// Get token-based features
const tokenizedContent = TextTokenizerLogic.tokenize(entityContent);
const keywords = TextTokenizerLogic.extractKeywords(tokenizedContent);
const codeNgrams = TextTokenizerLogic.extractNGrams(
tokenizedContent,
3
);
// Get or generate structural features if needed for this pattern
let structuralFeatures = entity.custom_metadata?.structuralFeatures;
// Only parse the AST if the pattern has structural rules and we don't already have features
const needsStructuralAnalysis =
pattern.detection_rules.structural_rules && !structuralFeatures;
if (needsStructuralAnalysis) {
try {
const ast = await CodeStructureAnalyzerLogic.buildAST(
entityContent,
entity.language
);
structuralFeatures =
await CodeStructureAnalyzerLogic.extractStructuralFeatures(ast);
} catch (error) {
console.warn(
`Could not analyze structure for entity ${entity.id}:`,
error
);
structuralFeatures = {};
}
}
// Match this entity against the pattern
const matchResult = await matchPattern(
pattern,
entityContent,
structuralFeatures || {},
keywords,
codeNgrams,
entity.type
);
// If confidence is above threshold, add to results
if (matchResult.confidence > 0.3) {
// Using a slightly higher threshold than recognizePatterns
matchResults.push({
entity,
confidence: matchResult.confidence,
});
}
} catch (error) {
console.warn(
`Error matching entity ${entity.id} against pattern:`,
error
);
}
}
// 7. Sort by confidence and limit results
matchResults.sort((a, b) => b.confidence - a.confidence);
// 8. Return the entities, limited by the requested limit
return matchResults.slice(0, limit).map((result) => result.entity);
} catch (error) {
console.error("Error finding similar code by pattern:", error);
return [];
}
}
/**
* Generates a pattern definition from example code entities
*
* @param {CodeEntity[]} examples - Code entities that exemplify the pattern
* @param {string} name - Name to give the generated pattern
* @param {string} [patternType='derived_from_examples'] - Type of pattern to create
* @returns {PatternDefinition} Generated pattern definition
*/
export function generatePatternFromExamples(
examples,
name,
patternType = "derived_from_examples"
) {
if (!examples || examples.length === 0) {
throw new Error("At least one example is required to generate a pattern");
}
// 1. Extract necessary information from examples
const language = identifyCommonLanguage(examples);
const entityType = identifyCommonEntityType(examples);
// 2. Extract textual features from all examples
const textualFeatures = extractTextualFeatures(examples);
// 3. Extract structural features if possible
const structuralFeatures = extractStructuralFeatures(examples);
// 4. Generate a description based on examples
const description = `Pattern derived from ${examples.length} examples related to ${name}`;
// 5. Create detection rules based on commonalities
const detectionRules = {
keywords: textualFeatures.commonKeywords,
text_patterns: textualFeatures.commonNgrams.map((ngram) => ngram.text),
structural_rules: structuralFeatures.rules,
applicable_types: [entityType],
weights: {
textual: 0.5,
structural: 0.4,
type: 0.1,
},
};
// 6. Create a representation based on the most representative example
// Choose the example with the highest number of common features
let bestExampleIndex = 0;
let bestMatchScore = -1;
examples.forEach((example, index) => {
const content = example.raw_content || example.content;
if (!content) return;
let matchScore = 0;
// Count how many common keywords and n-grams this example contains
const tokenizedContent = TextTokenizerLogic.tokenize(content);
const keywords = TextTokenizerLogic.extractKeywords(tokenizedContent);
textualFeatures.commonKeywords.forEach((keyword) => {
if (keywords.includes(keyword)) matchScore++;
});
textualFeatures.commonNgrams.forEach((ngram) => {
if (content.includes(ngram.text)) matchScore++;
});
if (matchScore > bestMatchScore) {
bestMatchScore = matchScore;
bestExampleIndex = index;
}
});
// Use the best example as the representation template
const representativeExample = examples[bestExampleIndex];
const representation = {
template:
representativeExample.raw_content || representativeExample.content,
variables: textualFeatures.variableTokens,
structure: structuralFeatures.commonPattern,
};
// 7. Return the pattern definition
return {
pattern_type: patternType,
name,
description,
language,
representation: JSON.stringify(representation),
detection_rules: detectionRules,
importance: 0.5, // Default moderate importance
};
}
/**
* Identifies the common programming language from examples
*
* @param {CodeEntity[]} examples - Code entities to analyze
* @returns {string} Common language or 'any' if mixed
* @private
*/
function identifyCommonLanguage(examples) {
const languages = examples.map((ex) => ex.language).filter(Boolean);
if (languages.length === 0) return "any";
// Check if all examples have the same language
const firstLanguage = languages[0];
const allSameLanguage = languages.every((lang) => lang === firstLanguage);
return allSameLanguage ? firstLanguage : "any";
}
/**
* Identifies the common entity type from examples
*
* @param {CodeEntity[]} examples - Code entities to analyze
* @returns {string} Common entity type
* @private
*/
function identifyCommonEntityType(examples) {
const types = examples.map((ex) => ex.type).filter(Boolean);
if (types.length === 0) return "any";
// Check if all examples have the same type
const firstType = types[0];
const allSameType = types.every((type) => type === firstType);
return allSameType ? firstType : "any";
}
/**
* Extracts textual features from examples
*
* @param {CodeEntity[]} examples - Code entities to analyze
* @returns {Object} Extracted textual features
* @private
*/
function extractTextualFeatures(examples) {
// 1. Extract tokens, keywords, and n-grams from each example
const allKeywords = [];
const allNgrams = [];
const allTokens = [];
examples.forEach((example) => {
const content = example.raw_content || example.content;
if (!content) return;
const tokenizedContent = TextTokenizerLogic.tokenize(content);
const keywords = TextTokenizerLogic.extractKeywords(tokenizedContent);
const ngrams = TextTokenizerLogic.extractNGrams(tokenizedContent, 3);
allKeywords.push(keywords);
allNgrams.push(ngrams);
allTokens.push(tokenizedContent);
});
// 2. Find common keywords across examples
let commonKeywords = [];
if (allKeywords.length > 0) {
// Start with first example's keywords
commonKeywords = [...allKeywords[0]];
// Intersect with all other examples
for (let i = 1; i < allKeywords.length; i++) {
commonKeywords = commonKeywords.filter((keyword) =>
allKeywords[i].includes(keyword)
);
}
// Limit to most significant keywords (top 10)
commonKeywords = commonKeywords.slice(0, 10);
}
// 3. Find common n-grams
let commonNgrams = [];
if (allNgrams.length > 0) {
// Create a frequency map of n-grams
const ngramFrequency = new Map();
allNgrams.forEach((exampleNgrams) => {
exampleNgrams.forEach((ngram) => {
const key = ngram.text;
ngramFrequency.set(key, (ngramFrequency.get(key) || 0) + 1);
});
});
// Find n-grams that appear in at least half of the examples
const threshold = Math.max(1, Math.floor(examples.length / 2));
commonNgrams = Array.from(ngramFrequency.entries())
.filter(([_, count]) => count >= threshold)
.map(([text, _]) => ({ text }))
.slice(0, 5); // Limit to top 5 common n-grams
}
// 4. Identify variable tokens (tokens that vary across examples)
const variableTokens = [];
// If we have more than one example, find tokens that vary in position
if (allTokens.length > 1) {
const firstTokens = allTokens[0];
// Simple approach: look for positions where token differs across examples
// For each token position in the first example:
for (let i = 0; i < Math.min(firstTokens.length, 30); i++) {
// Limit to first 30 tokens
if (i >= firstTokens.length) break;
const token = firstTokens[i];
let isVariable = false;
// Check if this position has different tokens in other examples
for (let j = 1; j < allTokens.length; j++) {
const otherTokens = allTokens[j];
if (i >= otherTokens.length || otherTokens[i] !== token) {
isVariable = true;
break;
}
}
if (isVariable) {
variableTokens.push({
position: i,
examples: examples
.map((ex) => {
const tokens = TextTokenizerLogic.tokenize(
ex.raw_content || ex.content || ""
);
return i < tokens.length ? tokens[i] : null;
})
.filter(Boolean),
});
}
}
}
return {
commonKeywords,
commonNgrams,
variableTokens,
};
}
/**
* Extracts structural features from examples
*
* @param {CodeEntity[]} examples - Code entities to analyze
* @returns {Object} Extracted structural features
* @private
*/
function extractStructuralFeatures(examples) {
// Default result with empty values
const defaultResult = {
rules: [],
commonPattern: null,
};
try {
// 1. Extract structural features from each example if possible
const allFeatures = [];
for (const example of examples) {
const content = example.raw_content || example.content;
if (!content) continue;
// Use existing structural features if available
if (example.custom_metadata?.structuralFeatures) {
allFeatures.push(example.custom_metadata.structuralFeatures);
continue;
}
// Otherwise try to extract features (synchronously)
try {
// Note: We're calling async functions synchronously here which is not ideal,
// but for simplicity in this example we'll assume they can work synchronously
const ast = CodeStructureAnalyzerLogic.buildAST(
content,
example.language
);
if (!ast) continue;
const features =
CodeStructureAnalyzerLogic.extractStructuralFeatures(ast);
if (features) {
allFeatures.push(features);
}
} catch (error) {
console.warn(
`Could not extract structural features for example: ${error.message}`
);
}
}
if (allFeatures.length === 0) {
return defaultResult;
}
// 2. Find common structural properties
const structuralRules = [];
// Start with the first example's features
const firstFeatures = allFeatures[0];
// For each property in the first example, check if it's common across all examples
for (const [feature, value] of Object.entries(firstFeatures)) {
// Skip if the value is complex or undefined
if (typeof value === "undefined" || typeof value === "object") continue;
// Check if this feature has the same value across all examples
const isCommon = allFeatures.every((features) => {
return features[feature] === value;
});
// If common, add a structural rule
if (isCommon) {
structuralRules.push({
feature,
condition: "equals",
value,
});
}
// If not exactly the same but similar (for numeric values)
else if (typeof value === "number") {
// Calculate range
const values = allFeatures
.map((f) => f[feature])
.filter((v) => typeof v === "number");
const min = Math.min(...values);
const max = Math.max(...values);
// If there's a reasonable range, add a range rule
if (max - min < max * 0.5) {
// Max is no more than 50% larger than min
structuralRules.push({
feature,
condition: "greater_than",
value: min * 0.9, // 10% below minimum observed
});
structuralRules.push({
feature,
condition: "less_than",
value: max * 1.1, // 10% above maximum observed
});
}
}
}
// 3. Identify common structural pattern
// For simplicity, we'll use the most important structural features
const commonPattern = {
nodeType: examples[0].type,
structuralRules: structuralRules.slice(0, 3), // Top 3 rules
complexity:
allFeatures.reduce((sum, f) => sum + (f.complexity || 0), 0) /
allFeatures.length,
};
return {
rules: structuralRules,
commonPattern,
};
} catch (error) {
console.error("Error extracting structural features:", error);
return defaultResult;
}
}
/**
* Detects design patterns in a set of code entities
*
* @param {CodeEntity[]} entities - Code entities to analyze
* @returns {Array<{patternType: string, entities: string[], confidence: number}>} Detected design patterns
*/
export async function detectDesignPatterns(entities) {
if (!entities || entities.length === 0) {
return [];
}
// Results array
const detectedPatterns = [];
// Get entity IDs for relationship lookup
const entityIds = entities.map((entity) => entity.id);
// Get relationships between entities if available
let relationships = [];
try {
relationships = await RelationshipContextManagerLogic.getRelationships(
entityIds
);
} catch (error) {
console.warn("Error retrieving relationships between entities:", error);
// Continue without relationships
}
// Define pattern detectors
const patternDetectors = [
detectSingletonPattern,
detectFactoryPattern,
detectObserverPattern,
// Add more pattern detectors here as needed
];
// Apply each detector
for (const detector of patternDetectors) {
const result = await detector(entities, relationships);
if (result.length > 0) {
detectedPatterns.push(...result);
}
}
return detectedPatterns;
}
/**
* Detects Singleton pattern
*
* @param {CodeEntity[]} entities - Code entities to analyze
* @param {Array} relationships - Relationships between entities
* @returns {Array<{patternType: string, entities: string[], confidence: number}>} Detected patterns
* @private
*/
async function detectSingletonPattern(entities, relationships) {
const results = [];
// Find class entities
const classEntities = entities.filter(
(entity) => entity.type === "class" || entity.type === "interface"
);
for (const classEntity of classEntities) {
let confidence = 0;
let evidence = [];
const content = classEntity.raw_content || classEntity.content;
if (!content) continue;
// Look for private/protected constructor
const hasPrivateConstructor =
/private\s+constructor|protected\s+constructor/.test(content);
if (hasPrivateConstructor) {
confidence += 0.3;
evidence.push("private/protected constructor");
}
// Look for static instance field
const hasStaticInstance =
/static\s+(\w+)\s*:\s*\w+|static\s+(\w+)\s*=/.test(content);
if (hasStaticInstance) {
confidence += 0.3;
evidence.push("static instance field");
}
// Look for getInstance method
const hasGetInstanceMethod =
/static\s+getInstance\s*\(|static\s+instance\s*\(|static\s+get\s+instance\s*\(/.test(
content
);
if (hasGetInstanceMethod) {
confidence += 0.4;
evidence.push("getInstance method");
}
// Look for self-assignment in constructor
const hasSelfAssignment =
/this\._instance\s*=\s*this|instance\s*=\s*this/.test(content);
if (hasSelfAssignment) {
confidence += 0.2;
evidence.push("self-assignment in constructor");
}
// Check if this class is being instantiated elsewhere
const isInstantiatedElsewhere = relationships.some(
(rel) =>
rel.relationship_type === "instantiates" &&
rel.target_entity_id === classEntity.id
);
// If instantiated in multiple places, it's less likely to be a Singleton
if (isInstantiatedElsewhere) {
confidence -= 0.2;
evidence.push("instantiated elsewhere (negative)");
}
// If confidence is high enough, add to results
if (confidence >= 0.6) {
results.push({
patternType: "Singleton",
entities: [classEntity.id],
confidence,
evidence,
});
}
}
return results;
}
/**
* Detects Factory pattern
*
* @param {CodeEntity[]} entities - Code entities to analyze
* @param {Array} relationships - Relationships between entities
* @returns {Array<{patternType: string, entities: string[], confidence: number}>} Detected patterns
* @private
*/
async function detectFactoryPattern(entities, relationships) {
const results = [];
// Find class and function entities
const classEntities = entities.filter((entity) => entity.type === "class");
const functionEntities = entities.filter(
(entity) => entity.type === "function" || entity.type === "method"
);
// Look for factory classes
for (const classEntity of classEntities) {
let confidence = 0;
let evidence = [];
const involvedEntities = [classEntity.id];
const content = classEntity.raw_content || classEntity.content;
if (!content) continue;
// Class name suggests Factory
if (/Factory|Builder|Creator|Producer/i.test(classEntity.name)) {
confidence += 0.2;
evidence.push("name suggests factory");
}
// Look for create/make/build methods in the class
const hasCreateMethods =
/\b(create|make|build|produce|get)\w*\s*\([^)]*\)\s*{/.test(content);
if (hasCreateMethods) {
confidence += 0.3;
evidence.push("has creation methods");
}
// Check if this class has relationships that indicate creation of other objects
const creationRelationships = relationships.filter(
(rel) =>
rel.source_entity_id === classEntity.id &&
(rel.relationship_type === "creates" ||
rel.relationship_type === "instantiates")
);
if (creationRelationships.length > 0) {
confidence += 0.3;
evidence.push(`creates ${creationRelationships.length} other entities`);
// Add related entities
creationRelationships.forEach((rel) => {
if (!involvedEntities.includes(rel.target_entity_id)) {
involvedEntities.push(rel.target_entity_id);
}
});
}
// Look for method return types that match other known entities
const otherClassNames = classEntities
.filter((e) => e.id !== classEntity.id)
.map((e) => e.name);
let returnTypeMatches = 0;
for (const otherClass of otherClassNames) {
const returnTypeRegex = new RegExp(
`:\\s*${otherClass}\\b|return\\s+(new\\s+)?${otherClass}\\b`
);
if (returnTypeRegex.test(content)) {
returnTypeMatches++;
}
}
if (returnTypeMatches > 0) {
confidence += 0.2;
evidence.push(`returns known types (${returnTypeMatches})`);
}
// If confidence is high enough, add to results
if (confidence >= 0.5) {
results.push({
patternType: "Factory",
entities: involvedEntities,
confidence,
evidence,
});
}
}
// Look for standalone factory functions
for (const functionEntity of functionEntities) {
let confidence = 0;
let evidence = [];
const involvedEntities = [functionEntity.id];
const content = functionEntity.raw_content || functionEntity.content;
if (!content) continue;
// Function name suggests Factory
if (/create|make|build|produce|factory|new/i.test(functionEntity.name)) {
confidence += 0.3;
evidence.push("name suggests factory function");
}
// Check if this function has relationships that indicate creation of objects
const creationRelationships = relationships.filter(
(rel) =>
rel.source_entity_id === functionEntity.id &&
(rel.relationship_type === "creates" ||
rel.relationship_type === "instantiates")
);
if (creationRelationships.length > 0) {
confidence += 0.3;
evidence.push(`creates ${creationRelationships.length} entities`);
// Add related entities
creationRelationships.forEach((rel) => {
if (!involvedEntities.includes(rel.target_entity_id)) {
involvedEntities.push(rel.target_entity_id);
}
});
}
// Look for 'new' keyword
if (/return\s+new\s+\w+/.test(content)) {
confidence += 0.3;
evidence.push("returns new instance");
}
// If confidence is high enough, add to results
if (confidence >= 0.5) {
results.push({
patternType: "Factory",
entities: involvedEntities,
confidence,
evidence,
});
}
}
return results;
}
/**
* Detects Observer pattern
*
* @param {CodeEntity[]} entities - Code entities to analyze
* @param {Array} relationships - Relationships between entities
* @returns {Array<{patternType: string, entities: string[], confidence: number}>} Detected patterns
* @private
*/
async function detectObserverPattern(entities, relationships) {
const results = [];
// Find class entities
const classEntities = entities.filter((entity) => entity.type === "class");
// Look for potential subject classes
for (const potentialSubject of classEntities) {
let confidence = 0;
let evidence = [];
const involvedEntities = [potentialSubject.id];
const content = potentialSubject.raw_content || potentialSubject.content;
if (!content) continue;
// Look for observer list/collection
const hasObserverCollection =
/(\w+)?\s*observers\s*=|(\w+)?\s*listeners\s*=/.test(content);
if (hasObserverCollection) {
confidence += 0.2;
evidence.push("has observer collection");
}
// Look for add/remove/notify observer methods
const hasAddObserver =
/add(Observer|Listener|Subscriber|Handler)|subscribe/.test(content);
if (hasAddObserver) {
confidence += 0.2;
evidence.push("has add observer method");
}
const hasRemoveObserver =
/remove(Observer|Listener|Subscriber|Handler)|unsubscribe/.test(content);
if (hasRemoveObserver) {
confidence += 0.2;
evidence.push("has remove observer method");
}
const hasNotifyMethod =
/notify|notifyObservers|emit|trigger|dispatch|fire/.test(content);
if (hasNotifyMethod) {
confidence += 0.3;
evidence.push("has notify method");
}
// Look for potential observers
let potentialObservers = [];
// Check relationships for "observes" relationship
const observerRelationships = relationships.filter(
(rel) =>
rel.target_entity_id === potentialSubject.id &&
rel.relationship_type === "observes"
);
if (observerRelationships.length > 0) {
confidence += 0.3;
evidence.push(`has ${observerRelationships.length} explicit observers`);
// Add observer entities
observerRelationships.forEach((rel) => {
const observerId = rel.source_entity_id;
if (!involvedEntities.includes(observerId)) {
involvedEntities.push(observerId);
potentialObservers.push(observerId);
}
});
}
// If no explicit observers found, look for classes with "update" or "handle" methods
if (potentialObservers.length === 0) {
for (const potentialObserver of classEntities) {
if (potentialObserver.id === potentialSubject.id) continue;
const observerContent =
potentialObserver.raw_content || potentialObserver.content;
if (!observerContent) continue;
const hasUpdateMethod =
/\bupdate\s*\(|\bhandle\w+\s*\(|\bon\w+\s*\(/.test(observerContent);
if (hasUpdateMethod) {
potentialObservers.push(potentialObserver.id);
if (!involvedEntities.includes(potentialObserver.id)) {
involvedEntities.push(potentialObserver.id);
}
confidence += 0.1;
evidence.push(`found potential observer: ${potentialObserver.name}`);
}
}
}
// If confidence is high enough and we have potential observers, add to results
if (confidence >= 0.5 && potentialObservers.length > 0) {
results.push({
patternType: "Observer",
entities: involvedEntities,
confidence,
evidence,
});
}
}
return results;
}