UNPKG

@gaiaverse/semantic-turning-point-detector

Version:

Detects key semantic turning points in conversations using recursive semantic distance analysis. Ideal for conversation analysis, dialogue segmentation, insight detection, and AI-assisted reasoning tasks.

github.com/gaiaverseltd/semantic-turning-point-detector

gaiaverseltd/semantic-turning-point-detector

855 lines (852 loc) • 79.6 kB

JavaScript

"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.SemanticTurningPointDetector = void 0; // file: semanticTurningPointDetector.ts const fs_extra_1 = __importDefault(require("fs-extra")); const winston_1 = __importDefault(require("winston")); // setup winston fs_extra_1.default.ensureDirSync('results'); // Ensure the results directory exists const logger = winston_1.default.createLogger({ level: 'info', format: winston_1.default.format.combine(winston_1.default.format.timestamp(), winston_1.default.format.json()), transports: [ new winston_1.default.transports.Console({ format: winston_1.default.format.combine(winston_1.default.format.colorize(), winston_1.default.format.timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }), winston_1.default.format.printf(({ timestamp, level, message }) => { return `${timestamp} ${level}: ${message}`; })) }), new winston_1.default.transports.File({ filename: 'results/semanticTurningPointDetector.log', format: winston_1.default.format.json() }) ] }); /***************************************************************************************** * SEMANTIC TURNING POINT DETECTOR * * A TypeScript implementation of the Adaptive Recursive Convergence (ARC) with * Cascading Re-Dimensional Attention (CRA) framework for conversation analysis. * * This detector identifies semantic "Turning Points" in conversations as a concrete * application of the ARC/CRA theoretical framework for multi-step reasoning * and dynamic dimensional expansion. * * Framework implementation: * 1. Analyze semantic relationships between messages using embeddings (dimension n) * 2. Calculate semantic distances that correspond to the contraction mapping * 3. Apply the complexity function χ to determine dimensional saturation * 4. Use the transition operator Ψ to determine whether to stay in dimension n or escalate * 5. Employ meta-messages and recursive analysis for dimensional expansion (n → n+1) * 6. Merge and prune results to demonstrate formal convergence *****************************************************************************************/ const async_1 = __importDefault(require("async")); const openai_1 = require("openai"); const lru_cache_1 = require("lru-cache"); const crypto_1 = __importDefault(require("crypto")); const tokensUtil_1 = require("./tokensUtil"); const conversation_1 = require("./conversation"); const Message_1 = require("./Message"); const stripContent_1 = require("./stripContent"); const prompt_1 = require("./prompt"); // Cache for token counts to avoid recalculating - implements atomic memory concept const tokenCountCache = new lru_cache_1.LRUCache({ max: 10000, ttl: 1000 * 60 * 60 * 24 }); // ----------------------------------------------------------------------------- // Main Detector Class // ----------------------------------------------------------------------------- class SemanticTurningPointDetector { config; openai; originalMessages = []; convergenceHistory = []; /** * Creates a new instance of the semantic turning point detector */ constructor(config = {}) { // Default configuration (from your provided code) this.config = { apiKey: config.apiKey || process.env.OPENAI_API_KEY || '', classificationModel: config.classificationModel || 'gpt-4o-mini', embeddingModel: config.embeddingModel || 'text-embedding-3-small', embeddingEndpoint: config.embeddingEndpoint, semanticShiftThreshold: config.semanticShiftThreshold || 0.22, minTokensPerChunk: config.minTokensPerChunk || 250, maxTokensPerChunk: config.maxTokensPerChunk || 2000, maxRecursionDepth: config.maxRecursionDepth || 3, onlySignificantTurningPoints: config.onlySignificantTurningPoints ?? true, significanceThreshold: config.significanceThreshold || 0.5, minMessagesPerChunk: config.minMessagesPerChunk || 3, maxTurningPoints: config.maxTurningPoints || 5, debug: config.debug || false, endpoint: config.endpoint, complexitySaturationThreshold: config.complexitySaturationThreshold || 4.5, measureConvergence: config.measureConvergence ?? true }; // Initialize OpenAI client this.openai = new openai_1.OpenAI({ apiKey: this.config.apiKey, baseURL: this.config.endpoint }); if (this.config.debug) { logger.info('[TurningPointDetector] Initialized with config:', { ...this.config, apiKey: '[REDACTED]' }); } } /** * Main entry point: Detect turning points in a conversation * Implements the full ARC/CRA framework */ async detectTurningPoints(messages) { logger.info('Starting turning point detection using ARC/CRA framework for conversation with', messages.length, 'messages'); this.convergenceHistory = []; // Store original messages for reference const totalTokens = await this.getMessageArrayTokenCount(messages); logger.info(`Total conversation tokens: ${totalTokens}`); // Ensure originalMessages is a fresh copy if messages might be mutated elsewhere this.originalMessages = messages.map(m => ({ ...m })); // Begin dimensional analysis at level 0 return this.multiLayerDetection(messages, 0); } /** * Multi-layer detection implementing the ARC/CRA dimensional processing * This is the primary implementation of the transition operator Ψ */ async multiLayerDetection(messages, dimension) { logger.info(`Starting dimensional analysis at n=${dimension}`); // Check recursion depth - hard limit on dimensional expansion if (dimension >= this.config.maxRecursionDepth) { logger.info(`Maximum dimension (n=${dimension}) reached, processing directly without further expansion`); // Pass originalMessages context only at dimension 0 if needed by detectTurningPointsInChunk->classifyTurningPoint return await this.detectTurningPointsInChunk(messages, dimension, 0, this.originalMessages); } // For very small conversations (or at deeper levels), use sliding window let localTurningPoints = []; // Adjusted condition to handle small message counts more directly if (messages.length < this.config.minMessagesPerChunk * 2 && dimension === 0) { logger.info(`Dimension ${dimension}: Small conversation (${messages.length} msgs), processing directly`); // Optionally adjust threshold for small conversations const originalThreshold = this.config.semanticShiftThreshold; this.config.semanticShiftThreshold = Math.max(0.3, originalThreshold * 1.1); // Slightly higher threshold localTurningPoints = await this.detectTurningPointsInChunk(messages, dimension, 0, this.originalMessages); // Restore config this.config.semanticShiftThreshold = originalThreshold; } else { // Chunk the conversation const { chunks } = await this.chunkConversation(messages, dimension); logger.info(`Dimension ${dimension}: Split into ${chunks.length} chunks`); if (chunks.length === 0) { logger.info(`Dimension ${dimension}: No valid chunks created, returning empty.`); return []; } // Process each chunk in parallel to find local turning points const chunkTurningPoints = new Array(chunks.length); const durationsSeconds = new Array(chunks.length).fill(-1); const limit = this.config.endpoint ? 1 : 5; // Limit API calls await async_1.default.eachOfLimit(chunks, limit, async (chunk, indexStr) => { const index = Number(indexStr); const startTime = Date.now(); if (index % 10 === 0 || limit === 1 || this.config.debug) { logger.info(` - Dimension ${dimension}: Processing chunk ${index + 1}/${chunks.length} (${chunk.length} messages)`); } // Pass originalMessages context only at dimension 0 chunkTurningPoints[index] = await this.detectTurningPointsInChunk(chunk, dimension, index, this.originalMessages); const durationSecs = (Date.now() - startTime) / 1000; durationsSeconds[index] = durationSecs; if (index % 10 === 0 || limit === 1 || this.config.debug) { const processedCount = durationsSeconds.filter(d => d > 0).length; if (processedCount > 0) { const averageDuration = durationsSeconds.filter(d => d > 0).reduce((a, b) => a + b, 0) / processedCount; const remainingChunks = durationsSeconds.length - processedCount; const remainingTime = (averageDuration * remainingChunks).toFixed(1); const percentageComplete = (processedCount / durationsSeconds.length * 100); logger.info(` - Chunk ${index + 1} processed in ${durationSecs.toFixed(1)}s. Est. remaining: ${remainingTime}s (${percentageComplete.toFixed(1)}% complete)`); } else { logger.info(` - Chunk ${index + 1} processed in ${durationSecs.toFixed(1)}s.`); } } }); // Flatten all turning points from all chunks localTurningPoints = chunkTurningPoints.flat(); } logger.info(`Dimension ${dimension}: Found ${localTurningPoints.length} raw turning points`); // If we found zero or one turning point at this level, return it directly (after potential filtering if needed) if (localTurningPoints.length <= 1) { // Apply filtering even for single points if configured return this.config.onlySignificantTurningPoints ? this.filterSignificantTurningPoints(localTurningPoints) : localTurningPoints; } // First merge any similar turning points at this level const mergedLocalTurningPoints = this.mergeSimilarTurningPoints(localTurningPoints); logger.info(`Dimension ${dimension}: Merged similar TPs to ${mergedLocalTurningPoints.length}`); // If merging resulted in 0 or 1 TP, return it (after filtering) if (mergedLocalTurningPoints.length <= 1) { return this.config.onlySignificantTurningPoints ? this.filterSignificantTurningPoints(mergedLocalTurningPoints) : mergedLocalTurningPoints; } // ------------------- CRITICAL ARC/CRA IMPLEMENTATION ------------------- // Determine whether to expand dimension based on complexity saturation // Calculate the maximum complexity in this dimension const maxComplexity = Math.max(0, ...mergedLocalTurningPoints.map(tp => tp.complexityScore)); // Ensure non-negative // Implement Transition Operator Ψ const needsDimensionalEscalation = maxComplexity >= this.config.complexitySaturationThreshold; logger.info(`Dimension ${dimension}: Max complexity = ${maxComplexity.toFixed(2)}, Saturation threshold = ${this.config.complexitySaturationThreshold}`); logger.info(`Dimension ${dimension}: Needs Escalation (Ψ)? ${needsDimensionalEscalation}`); // Conditions to STOP escalation and finalize at this dimension: // 1. Max recursion depth reached // 2. Too few turning points to warrant higher-level analysis // 3. Complexity hasn't saturated (no need to escalate) if (dimension >= this.config.maxRecursionDepth - 1 || mergedLocalTurningPoints.length <= 2 || // Adjusted slightly, maybe 2 TPs isn't enough to find meta-patterns !needsDimensionalEscalation) { logger.info(`Dimension ${dimension}: Finalizing at this level. Applying final filtering.`); // Track convergence for this dimension if (this.config.measureConvergence) { this.convergenceHistory.push({ previousTurningPoints: [], // No previous state at the final level of processing currentTurningPoints: mergedLocalTurningPoints, // TPs before final filtering dimension, distanceMeasure: 0, // No comparison needed at final step hasConverged: true, // Considered converged as processing stops here didEscalate: false }); } // Filter the merged points before returning return this.filterSignificantTurningPoints(mergedLocalTurningPoints); } // ----- DIMENSIONAL ESCALATION (n → n+1) ----- logger.info(`Dimension ${dimension}: Escalating to dimension ${dimension + 1}`); // Create meta-messages from the merged turning points at this level // Pass originalMessages for context if needed by createMetaMessagesFromTurningPoints const metaMessages = this.createMetaMessagesFromTurningPoints(mergedLocalTurningPoints, this.originalMessages); logger.info(`Dimension ${dimension}: Created ${metaMessages.length} meta-messages for dimension ${dimension + 1}`); if (metaMessages.length < 2) { logger.info(`Dimension ${dimension}: Not enough meta-messages (${metaMessages.length}) to perform higher-level analysis. Finalizing with current TPs.`); if (this.config.measureConvergence) { this.convergenceHistory.push({ previousTurningPoints: mergedLocalTurningPoints, // State before attempted escalation currentTurningPoints: mergedLocalTurningPoints, // State after failed escalation dimension: dimension + 1, // Represents the attempted next dimension distanceMeasure: 0, // No change hasConverged: true, // Converged because escalation failed didEscalate: false // Escalation attempted but yielded no processable result }); } return this.filterSignificantTurningPoints(mergedLocalTurningPoints); } // Recursively process the meta-messages to find higher-dimensional turning points const higherDimensionTurningPoints = await this.multiLayerDetection(metaMessages, dimension + 1); logger.info(`Dimension ${dimension + 1}: Found ${higherDimensionTurningPoints.length} higher-dimension TPs.`); // Track convergence and dimension escalation if (this.config.measureConvergence) { const convergenceState = { previousTurningPoints: mergedLocalTurningPoints, // TPs from dim n currentTurningPoints: higherDimensionTurningPoints, // TPs found in dim n+1 dimension: dimension + 1, distanceMeasure: this.calculateStateDifference(mergedLocalTurningPoints, higherDimensionTurningPoints), hasConverged: higherDimensionTurningPoints.length > 0, // Converged if TPs were found at higher level didEscalate: true }; this.convergenceHistory.push(convergenceState); logger.info(`Dimension ${dimension} → ${dimension + 1}: Convergence distance: ${convergenceState.distanceMeasure.toFixed(3)}. Converged: ${convergenceState.hasConverged}`); } // Combine turning points from local (n) and higher (n+1) dimensions // The combine function will handle merging, prioritizing higher-dim, and filtering return this.combineTurningPoints(mergedLocalTurningPoints, higherDimensionTurningPoints); } /** * Calculate a difference measure between two states (sets of turning points) * Used for convergence tracking. Considers significance and location. */ calculateStateDifference(state1, state2) { // Handle empty states if (state1.length === 0 && state2.length === 0) return 0.0; // No difference if (state1.length === 0 || state2.length === 0) return 1.0; // Maximum difference // 1. Average Significance Difference const avgSig1 = state1.reduce((sum, tp) => sum + tp.significance, 0) / state1.length; const avgSig2 = state2.reduce((sum, tp) => sum + tp.significance, 0) / state2.length; const sigDiff = Math.abs(avgSig1 - avgSig2); // Range [0, 1] // 2. Structural Difference (using Jaccard index on span ranges) const spans1 = new Set(state1.map(tp => `${tp.span.startIndex}-${tp.span.endIndex}`)); const spans2 = new Set(state2.map(tp => `${tp.span.startIndex}-${tp.span.endIndex}`)); const intersection = new Set([...spans1].filter(span => spans2.has(span))); const union = new Set([...spans1, ...spans2]); const jaccardDistance = union.size > 0 ? 1.0 - (intersection.size / union.size) : 0.0; // Range [0, 1] // Combine the measures (e.g., weighted average) const combinedDistance = (sigDiff * 0.5) + (jaccardDistance * 0.5); return Math.min(1.0, Math.max(0.0, combinedDistance)); // Ensure bounds [0, 1] } /** * Apply complexity function χ from the ARC/CRA framework */ calculateComplexityScore(significance, semanticShiftMagnitude) { // Base complexity from significance (maps [0,1] to [1, 5]) let complexity = 1 + significance * 4; // Adjust based on semantic shift magnitude (distance, scaled 0-1) // Larger shifts slightly increase complexity, centered around a baseline distance const baselineDistance = 0.3; // Assumes threshold is around here complexity += (semanticShiftMagnitude - baselineDistance) * 1.0; // Adjust sensitivity as needed // Ensure complexity is within the [1, 5] range return Math.max(1, Math.min(5, complexity)); } /** * Detect turning points within a single chunk of the conversation */ /** * Detect turning points within a single chunk of the conversation * This represents the local refinement process in the current dimension */ async detectTurningPointsInChunk(messages, dimension, chunkIndex, // Optional index for logging purposes originalMessages) { if (messages.length < 2) return []; // Generate embeddings for all messages in the chunk const embeddings = await this.generateMessageEmbeddings(messages, dimension); // Find significant semantic shifts between adjacent messages const turningPoints = []; const distances = []; // Store distances for logging const allDistances = []; // Store all distances for logging for (let i = 0; i < embeddings.length - 1; i++) { const current = embeddings[i]; const next = embeddings[i + 1]; // Calculate semantic distance between current and next message const distance = this.calculateSemanticDistance(current.embedding, next.embedding); const beforeMessage = messages.find((m) => m.id === current.id); const afterMessage = messages.find((m) => m.id === next.id); let thresholdScaleFactor; const baseThreshold = this.config.semanticShiftThreshold; if (baseThreshold > 0.7) { // For high initial thresholds (like 0.75), scale down more aggressively thresholdScaleFactor = Math.pow(0.25, dimension); // More aggressive (0.25 instead of 0.4) } else if (baseThreshold > 0.5) { // For medium thresholds thresholdScaleFactor = Math.pow(0.35, dimension); } else { // For already low thresholds thresholdScaleFactor = Math.pow(0.5, dimension); } const dimensionAdjustedThreshold = baseThreshold * thresholdScaleFactor; if (dimensionAdjustedThreshold <= distance) { distances.push({ current: current.index, next: next.index, distance: distance, }); // Store distance for logging } allDistances.push({ current: current.index, next: next.index, distance: distance, }); } logger.info(`For a total number of points: ${embeddings.length}, there were ${distances.length} distances found as being greater than the threshold of ${this.config.semanticShiftThreshold}. - The top 3 greatest distances are: ${allDistances.slice(0, 3).sort((a, b) => b.distance - a.distance).map(d => d.distance.toFixed(3)).join(', ')} This means there were ${distances.length} potential turning points detected ${dimension === 0 ? "with valid user-assistant turn pairs" : "with valid meta-messages"}`); if (distances.length === 0) { logger.info(`No significant semantic shifts detected in chunk ${chunkIndex}`); return []; } for (let d = 0; d < distances.length - 1; d++) { const distanceObj = distances[d]; const i = distanceObj.current; // Current message index const current = embeddings[i]; // Current message embedding const next = embeddings[distanceObj.next]; // Next message embedding // If the distance exceeds our threshold, we've found a turning point // Use direct array indices to get the messages const distance = distanceObj.distance; // Semantic distance between current and next message const beforeMessage = messages[i]; const afterMessage = messages[i + 1]; if (beforeMessage == undefined || afterMessage == undefined) { logger.info(`detectTurningPointsInChunk: warning beforeMessage or afterMessage is undefined, beforeMessage: ${beforeMessage}, afterMessage: ${afterMessage}`); continue; } // Classify the turning point using LLM const turningPoint = await this.classifyTurningPoint(beforeMessage, afterMessage, distance, dimension, originalMessages, d); logger.info(` ...${chunkIndex ? `[Chunk ${chunkIndex}] ` : ""}Potential turning point detected between messages ${current.id} and ${next.id} (distance: ${distance.toFixed(3)}, complexity: ${turningPoint.complexityScore.toFixed(1)}), signif: ${turningPoint.significance.toFixed(2)} category: ${turningPoint.category}`); if (turningPoint.significance > 1) { if (turningPoint.significance > 10) { turningPoint.significance = turningPoint.significance / 100; } else { turningPoint.significance = turningPoint.significance / 10; // Adjusting for scale } } turningPoints.push(turningPoint); } return turningPoints; } /** * Use LLM to classify a turning point and generate metadata. * *** MODIFIED to prioritize message.spanData over regex *** */ async classifyTurningPoint(beforeMessage, afterMessage, distance, dimension, originalMessages, index = 0) { let span; if (dimension > 0) { if (beforeMessage instanceof Message_1.MetaMessage === false || afterMessage instanceof Message_1.MetaMessage === false) { throw new Error("Before or after message is not a MetaMessage"); } const beforeMessageMeta = beforeMessage; const afterMessageMeta = afterMessage; // For higher dimensions, use meta-message and inner methods to get the the span ids for the start and end span = { startId: beforeMessageMeta.getMessagesInTurningPointSpanToMessagesArray()[0].id, endId: afterMessageMeta.getMessagesInTurningPointSpanToMessagesArray()[0].id, startIndex: this.originalMessages.findIndex((candidateM) => { return beforeMessageMeta.getMessagesInTurningPointSpanToMessagesArray()[0].id === candidateM.id; }), endIndex: this.originalMessages.findIndex((candidateM) => { return afterMessageMeta.getMessagesInTurningPointSpanToMessagesArray()[0].id === candidateM.id; }), originalSpan: { startId: beforeMessage.id, endId: afterMessage.id, startIndex: index, endIndex: index + 1, } }; } else { // For dimension 0, use original message IDs and find indices span = { startId: beforeMessage.id, endId: afterMessage.id, startIndex: Message_1.MetaMessage.findIndexOfMessageFromId({ id: beforeMessage.id, beforeMessage, afterMessage, messages: originalMessages, }), endIndex: Message_1.MetaMessage.findIndexOfMessageFromId({ id: afterMessage.id, beforeMessage, afterMessage, messages: originalMessages, }), }; } // --- REMOVED Regex block for extracting originalSpan from meta-message content --- // const originalSpan = { startIndex: 0, endIndex: 0, startMessageId: '', endMessageId: '' }; // if (beforeMessage.author === 'meta' || afterMessage.author === 'meta') { // ... regex matching logic ... // } // --- End Removal --- // --- LLM Prompt Setup (using original prompt structure) --- const systemPrompt = (0, prompt_1.formSystemMessage)({ dimension, distance }); const userMessage = (0, prompt_1.formUserMessage)({ config: this.config, afterMessage, beforeMessage, dimension, addUserInstructions: this.config.customUserInstruction && this.config.customUserInstruction.length > 0 ? true : false, }); const contextualAidText = this.prepareContextualInfoMeta(beforeMessage, afterMessage, span, originalMessages, dimension, 2, dimension > 0); try { // --- Call LLM (using original parameters and schema) --- const response = await this.openai.chat.completions.create({ model: this.config.classificationModel, messages: [ { role: 'system', content: `${this.config.customSystemInstruction ? this.config.customSystemInstruction : systemPrompt}\n\n${contextualAidText}\n------- end of contextual background info see below as reminder of instructions -------\n\n${this.config.customSystemInstruction ? this.config.customSystemInstruction : (0, prompt_1.formSystemPromptEnding)(dimension)}`, }, { role: 'user', content: this.config.customUserInstruction ? `${this.config.customUserInstruction}\n\n${userMessage}\n\n${this.config.customUserInstruction}` : userMessage }, ], temperature: 0.6, //@ts-ignore - Allow vendor-specific params if needed repeat_penalty: this.config.endpoint ? 1.005 : undefined, top_k: this.config.endpoint ? 20 : undefined, stop: ['<|im_end|>'], response_format: (0, prompt_1.formResponseFormatSchema)(dimension), top_p: 0.9, }); const content = response.choices[0]?.message?.content || '{}'; let classification = {}; try { classification = JSON.parse(content); console.info(` got classification: ${JSON.stringify(classification, null, 2)}`); } catch (err) { logger.info('Error parsing LLM response as JSON:', err.message); // Attempt to extract JSON from markdown code block if necessary const jsonMatch = content.match(/```json\s*([\s\S]*?)\s*```/); if (jsonMatch && jsonMatch[1]) { try { classification = JSON.parse(jsonMatch[1]); logger.info('Successfully extracted JSON from markdown block.'); } catch (parseErr) { logger.info('Failed to parse extracted JSON:', parseErr.message); classification = {}; // Reset on secondary failure } } else { const plainJsonMatch = content.match(/\{[\s\S]*\}/); // Fallback to find any JSON structure if (plainJsonMatch) { try { classification = JSON.parse(plainJsonMatch[0]); logger.info('Successfully extracted JSON using simple match.'); } catch (parseErr) { logger.info('Failed to parse simple JSON match:', parseErr.message); classification = {}; } } else { logger.info('Could not extract JSON from response:', content); classification = {}; } } // Provide default values if parsing failed completely if (Object.keys(classification).length === 0) { classification = { label: 'Parsing Error - Unclassified', category: 'Other', keywords: [], emotionalTone: 'neutral', sentiment: 'neutral', significance: 0.1, quotes: [], best_id: span.startId }; } } // --- Validate and Sanitize LLM Output --- const validatedClassification = { label: typeof classification.label === 'string' ? classification.label.substring(0, 50) : 'Unknown Turning Point', category: typeof classification.category === 'string' ? classification.category : 'Other', keywords: Array.isArray(classification.keywords) ? classification.keywords.map(String).slice(0, 4) : [], // Limit count emotionalTone: typeof classification.emotionalTone === 'string' ? classification.emotionalTone : 'neutral', sentiment: ['positive', 'negative', 'neutral'].includes(classification.sentiment) ? classification.sentiment : 'neutral', significance: typeof classification.significance === 'number' ? Math.max(0, Math.min(1, classification.significance)) : 0.5, quotes: Array.isArray(classification.quotes) ? classification.quotes.map(String).slice(0, 3) : [], // Limit count best_id: typeof classification.best_id === 'string' ? classification.best_id : span.startId, // Default to start of span }; // Calculate complexity score const complexityScore = this.calculateComplexityScore(validatedClassification.significance, distance // Use the raw distance (0-1) ); // --- Construct TurningPoint Object --- return { id: `tp-${dimension}-${span.startIndex}-${span.endIndex}`, label: validatedClassification.label, category: validatedClassification.category, span: span, // Use the span derived at the beginning // deprecatedSpan is no longer populated from regex results semanticShiftMagnitude: distance, keywords: validatedClassification.keywords, quotes: validatedClassification.quotes, emotionalTone: validatedClassification.emotionalTone, sentiment: validatedClassification.sentiment, detectionLevel: dimension, significance: validatedClassification.significance, complexityScore: complexityScore }; } catch (err) { logger.info(`Error during LLM call for turning point classification: ${err.message}`); // Fallback classification on API error if (this.config.throwOnError) { } else { return { id: `tp-err-${dimension}-${span.startId}`, label: 'LLM Error - Unclassified', category: 'Other', span: span, semanticShiftMagnitude: distance, keywords: [], quotes: [], emotionalTone: 'neutral', sentiment: 'neutral', detectionLevel: dimension, significance: 0.1, complexityScore: 1.0 // Minimum complexity }; } } } /** * Updated to utilize new classes of Message and MetaMessage for better structure and clarity * @param turningPoints * @param originalMessages * @returns */ createMetaMessagesFromTurningPoints(turningPoints, originalMessages) { if (turningPoints.length === 0) return []; // Group turning points by category (first-level abstraction) const groupedByCategory = {}; turningPoints.forEach((tp) => { const category = tp.category; if (!groupedByCategory[category]) { groupedByCategory[category] = []; } groupedByCategory[category].push(tp); }); logger.info(`Grouped categories: `, JSON.stringify(groupedByCategory, null, 2)); // Create meta-messages (one per category to find higher-level patterns) const metaMessages = []; // First create category messages - represents dimension n to n+1 transformation Object.entries(groupedByCategory).forEach(([category, points], index) => { // Use the factory method from MetaMessage class to create a properly typed meta-message const metaMessage = Message_1.MetaMessage.createCategoryMetaMessage(category, points, index, originalMessages); metaMessages.push(metaMessage); }); // Create timeline/section meta-messages const sortedPoints = [...turningPoints].sort((a, b) => a.span.startIndex - b.span.startIndex); const sectionCount = Math.min(4, Math.ceil(sortedPoints.length / 2)); const pointsPerSection = Math.ceil(sortedPoints.length / sectionCount); // Create chronological section meta-messages for (let i = 0; i < sectionCount; i++) { const sectionPoints = sortedPoints.slice(i * pointsPerSection, Math.min((i + 1) * pointsPerSection, sortedPoints.length)); if (sectionPoints.length === 0) continue; // Create a section meta-message using the factory method const sectionMetaMessage = Message_1.MetaMessage.createSectionMetaMessage(sectionPoints, i, this.originalMessages); console.info('created sectionMetageMessage'); metaMessages.push(sectionMetaMessage); } logger.info(`Created ${metaMessages.length} meta-messages for dimensional expansion: ${metaMessages .map((m) => m.id) .join(", ")}`); return metaMessages; } // --- Remaining methods are kept identical to your second provided version --- /** * Filter turning points to keep only significant ones * (Using original logic from the second code block) */ filterSignificantTurningPoints(turningPoints) { if (!this.config.onlySignificantTurningPoints || turningPoints.length === 0) { // Ensure sorted return even if not filtering return turningPoints.sort((a, b) => a.span.startIndex - b.span.startIndex); } logger.info(`Filtering ${turningPoints.length} TPs based on significance >= ${this.config.significanceThreshold} and maxPoints = ${this.config.maxTurningPoints}`); // Sort by significance, complexity, magnitude const sorted = [...turningPoints].sort((a, b) => { if (b.significance !== a.significance) return b.significance - a.significance; if (b.complexityScore !== a.complexityScore) return b.complexityScore - a.complexityScore; return b.semanticShiftMagnitude - a.semanticShiftMagnitude; }); const result = []; const coveredIndices = new Set(); // Use indices for overlap check const maxPoints = this.config.maxTurningPoints; for (const tp of sorted) { // Check significance threshold first if (tp.significance < this.config.significanceThreshold) { // Only consider points below threshold if we haven't found enough significant ones yet if (result.length >= Math.ceil(maxPoints / 2)) { // Heuristic: if we have half the max points, stop adding insignificant ones continue; } } // Check for significant overlap with already selected points let overlapRatio = 0; let isOverlapping = false; const tpSpanSize = tp.span.endIndex - tp.span.startIndex + 1; if (tpSpanSize > 0) { let overlapCount = 0; for (let i = tp.span.startIndex; i <= tp.span.endIndex; i++) { if (coveredIndices.has(i)) { overlapCount++; } } overlapRatio = overlapCount / tpSpanSize; } // Define significant overlap threshold (e.g., 40% from original code) const overlapThreshold = 0.4; isOverlapping = overlapRatio > overlapThreshold; if (!isOverlapping && result.length < maxPoints) { result.push(tp); // Mark indices covered by this TP for (let i = tp.span.startIndex; i <= tp.span.endIndex; i++) { coveredIndices.add(i); } } else if (isOverlapping) { logger.info(` TP ${tp.id} (Sig: ${tp.significance.toFixed(2)}) overlaps significantly (${(overlapRatio * 100).toFixed(0)}%) with existing TPs. Skipping.`); } else if (result.length >= maxPoints) { logger.info(` Reached max turning points (${maxPoints}). Skipping TP ${tp.id}.`); } } // Ensure at least one TP is returned if any were found initially if (result.length === 0 && sorted.length > 0) { logger.info("No TPs met significance/overlap criteria, returning the single most significant one."); result.push(sorted[0]); } // Add a second diverse TP if only one was kept and more exist (original logic) else if (result.length === 1 && sorted.length > 1) { for (let i = 1; i < sorted.length; i++) { const nextTp = sorted[i]; // Check if it's sufficiently far from the first one (e.g., > 3 messages gap) if (Math.abs(nextTp.span.startIndex - result[0].span.startIndex) > 3) { // Check minimal overlap with the first one let overlapsFirst = false; for (let j = nextTp.span.startIndex; j <= nextTp.span.endIndex; j++) { if (j >= result[0].span.startIndex && j <= result[0].span.endIndex) { overlapsFirst = true; break; } } if (!overlapsFirst) { logger.info("Adding a second, non-overlapping TP for diversity."); result.push(nextTp); break; } } } } logger.info(`Filtered down to ${result.length} significant turning points.`); // Final sort by position in conversation return result.sort((a, b) => a.span.startIndex - b.span.startIndex); } /** * Combine turning points from different dimensions * (Using original logic from the second code block) */ combineTurningPoints(localTurningPoints, higherDimensionTurningPoints) { logger.info(`Combining ${localTurningPoints.length} local (dim ${localTurningPoints[0]?.detectionLevel ?? 'N/A'}) and ${higherDimensionTurningPoints.length} higher (dim ${higherDimensionTurningPoints[0]?.detectionLevel ?? 'N/A'}) TPs.`); // Prioritize higher-dimensional turning points by boosting their significance (original logic) const boostedHigher = higherDimensionTurningPoints.map(tp => ({ ...tp, // Apply a boost, ensuring it doesn't exceed 1.0 significance: Math.min(1.0, tp.significance * 1.2), // Adjusted boost factor slightly // Keep original detectionLevel for merging logic })); // Combine all turning points const allTurningPoints = [...localTurningPoints, ...boostedHigher]; logger.info(`Total TPs before cross-level merge: ${allTurningPoints.length}`); // Merge overlapping turning points across dimensions, prioritizing higher dimensions/significance const mergedTurningPoints = this.mergeAcrossLevels(allTurningPoints); logger.info(`Merged across levels to ${mergedTurningPoints.length} TPs.`); // Filter the combined & merged list to keep the most significant ones overall const filteredTurningPoints = this.filterSignificantTurningPoints(mergedTurningPoints); logger.info(`Final combined and filtered TPs: ${filteredTurningPoints.length}`); // Sort by position in conversation before returning return filteredTurningPoints.sort((a, b) => a.span.startIndex - b.span.startIndex); } /** * Merge similar or overlapping turning points *within* the same dimension * (Using original logic from the second code block) */ mergeSimilarTurningPoints(turningPoints) { if (turningPoints.length <= 1) return turningPoints; // Sort turning points by start index const sorted = [...turningPoints].sort((a, b) => a.span.startIndex - b.span.startIndex); const merged = []; let currentTp = sorted[0]; // Use a more descriptive name for (let i = 1; i < sorted.length; i++) { const nextTp = sorted[i]; // Check conditions for merging (original logic) const isOverlapping = (nextTp.span.startIndex <= currentTp.span.endIndex + 2); // Allow small gap const isSimilarCategory = (nextTp.category === currentTp.category); // Added closeness check from original code const hasCloseIndices = (nextTp.span.startIndex - currentTp.span.endIndex) <= 3; // Merge if overlapping OR close, AND same category if ((isOverlapping || hasCloseIndices) && isSimilarCategory) { logger.info(` Merging similar TPs (Dim ${currentTp.detectionLevel}): ${currentTp.id} and ${nextTp.id}`); // Merge the turning points const newLabel = this.createMergedLabel(currentTp.label, nextTp.label); // Create merged span (min start, max end) const mergedSpan = this.ensureChronologicalSpan({ startId: currentTp.span.startIndex <= nextTp.span.startIndex ? currentTp.span.startId : nextTp.span.startId, endId: currentTp.span.endIndex >= nextTp.span.endIndex ? currentTp.span.endId : nextTp.span.endId, startIndex: Math.min(currentTp.span.startIndex, nextTp.span.startIndex), endIndex: Math.max(currentTp.span.endIndex, nextTp.span.endIndex) }); // Update the deprecated span too (original logic, though less relevant now) // Note: deprecatedSpan might not exist if TPs came from meta-messages const mergedDeprecatedSpan = (currentTp.deprecatedSpan && nextTp.deprecatedSpan) ? { startIndex: Math.min(currentTp.deprecatedSpan.startIndex, nextTp.deprecatedSpan.startIndex), endIndex: Math.max(currentTp.deprecatedSpan.endIndex, nextTp.deprecatedSpan.endIndex), startMessageId: mergedSpan.startIndex === currentTp.deprecatedSpan.startIndex ? currentTp.deprecatedSpan.startMessageId : nextTp.deprecatedSpan.startMessageId, endMessageId: mergedSpan.endIndex === currentTp.deprecatedSpan.endIndex ? currentTp.deprecatedSpan.endMessageId : nextTp.deprecatedSpan.endMessageId } : undefined; // Handle cases where deprecatedSpan might be missing // Combine keywords and quotes (unique, limited) const mergedKeywords = Array.from(new Set([...(currentTp.keywords || []), ...(nextTp.keywords || [])])).slice(0, 5); const mergedQuotes = Array.from(new Set([...(currentTp.quotes || []), ...(nextTp.quotes || [])])).slice(0, 3); // Limit quotes too // Update the current TP to be the merged version currentTp = { ...currentTp, // Keep most properties of the first TP id: `${currentTp.id}-merged-${nextTp.span.startIndex}`, // Indicate merge in ID label: newLabel, span: mergedSpan, // Only include deprecatedSpan if it was successfully merged ...(mergedDeprecatedSpan && { deprecatedSpan: mergedDeprecatedSpan }), semanticShiftMagnitude: (currentTp.semanticShiftMagnitude + nextTp.semanticShiftMagnitude) / 2, keywords: mergedKeywords, quotes: mergedQuotes, // Boost significance slightly, cap at 1.0 (original logic) significance: Math.min(1.0, ((currentTp.significance + nextTp.significance) / 2) * 1.1), // Take max complexity (original logic) complexityScore: Math.max(currentTp.complexityScore, nextTp.complexityScore), // Combine emotional tone/sentiment logically (e.g., take the one from the more significant TP) emotionalTone: currentTp.significance >= nextTp.significance ? currentTp.emotionalTone : nextTp.emotionalTone, sentiment: currentTp.significance >= nextTp.significance ? currentTp.sentiment : nextTp.sentiment, }; } else { // If not merging, push the completed current TP and move to the next merged.push(currentTp); currentTp = nextTp; } } // Add the last processed TP merged.push(currentTp); return merged; } /** * Merge turning points across different dimensions with priority to higher dimensions * (Using original logic from the second code block) */ mergeAcrossLevels(turningPoints) { if (turningPoints.length <= 1) return turningPoints; // Sort by dimension DESC (higher first), then by significance DESC, then by start index ASC const sorted = [...turningPoints].sort((a, b) => { if (b.detectionLevel !== a.detectionLevel) return b.detectionLevel - a.detectionLevel; // Add secondary sort by significance within the same level if (b.significance !== a.significance) return b.significance - a.significance; return a.span.startIndex - b.span.startIndex; // Tertiary sort by position }); const merged = []; // Use a Set of covered *indices* for more granular overlap checking const coveredIndices = new Set(); logger.info(` Merging across levels. Input count: ${sorted.length}. Prioritizing higher dimension/significance.`); for (const tp of sorted) { // Check how much of this TP's span is already covered let overlapCount = 0; const spanSize = tp.span.endIndex - tp.span.startIndex + 1; if (spanSize <= 0) continue; // Skip invalid spans for (let i = tp.span.startIndex; i <= tp.span.endIndex; i++) { if (coveredIndices.has(i)) { overlapCount++; } } const overlapRatio = overlapCount / spanSize; // Define significant overlap threshold (e.g., 50% - adjust as needed) const significantOverlapThreshold = 0.5; // Keep the TP if it's not significantly overlapped by higher-priority ones if (overlapRatio < significantOverlapThreshold) { merged.push(tp); // Mark its indices as covered *only if it wasn't already significantly covered* for (let i = tp.span.startIndex; i <= tp.span.endIndex; i++) { coveredIndices.add(i);