@gaiaverse/semantic-turning-point-detector

Version:

Detects key semantic turning points in conversations using recursive semantic distance analysis. Ideal for conversation analysis, dialogue segmentation, insight detection, and AI-assisted reasoning tasks.

github.com/gaiaverseltd/semantic-turning-point-detector

gaiaverseltd/semantic-turning-point-detector

1,310 lines (1,164 loc) • 143 kB

text/typescript

// file: semanticTurningPointDetector.ts import fs from "fs-extra"; import winston from "winston"; import { Ollama } from "ollama"; import dotenv from "dotenv"; dotenv.config(); import async from "async"; import { OpenAI } from "openai"; import { LRUCache } from "lru-cache"; import crypto from "crypto"; import { countTokens, createEmbeddingCache } from "./tokensUtil"; import { MetaMessage, Message, MessageSpan } from "./Message"; import { returnFormattedMessageContent } from "./stripContent"; import { circularToneSimilarity, computeSignificance, computeSignificanceWithChoquet, formAnalysisResponseFormat, formAnalysisSystemPromptEnding, formScoringResponseFormat, formScoringSystemPromptEnding, formSystemMessage, } from "./prompt"; import { ChunkingResult, ConvergenceState, EpistemicPrimitives, MessageEmbedding, TurningPoint, turningPointCategories, TurningPointCategory, TurningPointDetectorConfig, } from "./types"; import { CounterfactualAnalyzer } from "./counterfactual"; // Cache for token counts to avoid recalculating - implements atomic memory concept const tokenCountCache = new LRUCache<string, number>({ max: 10000, ttl: 1000 * 60 * 60 * 24, }); /** * Semantic Turning Point Detector using ARC/CRA/DAO Framework * * Identifies semantically significant moments in conversations where meaning shifts, * emotional ruptures occur, or intellectual pivots happen. Uses multi-dimensional * analysis with optional phi-field enhancement for improved accuracy. * * @example Basic Usage * ```typescript * const detector = new SemanticTurningPointDetector({ * apiKey: process.env.OPENAI_API_KEY, * classificationModel: "gpt-4o-mini", * semanticShiftThreshold: 0.4, * maxTurningPoints: 10 * }); * * const result = await detector.detectTurningPoints(messages); * console.log(`Found ${result.points.length} turning points`); * ``` * * @example Advanced Configuration * ```typescript * const detector = new SemanticTurningPointDetector({ * enableExperimentalPhi: true, // Enhanced significance scoring * enableCounterfactualAnalysis: true, // Additional validation * maxRecursionDepth: 4, // Multi-dimensional analysis depth * dynamicallyAdjustSemanticShiftThreshold: true * }); * ``` * * ## Key Parameters * - `semanticShiftThreshold` (0.2-0.7): Sensitivity control * - `maxRecursionDepth` (2-5): Analysis depth * - `enableExperimentalPhi`: Activates phi-field enhancement * - `maxTurningPoints`: Limit final results * * ## Scoring * - **Confidence**: How semantically distinct the turning point is * - **Necessity**: How epistemically essential it is (phi-aware) * * @see {@link TurningPointDetectorConfig} Configuration options * @see {@link TurningPoint} Result structure */ export class SemanticTurningPointDetector { private config: TurningPointDetectorConfig; /** * For ease of use in llm requests, openai's client is used as it allows configurable endpoints. Further expoloration might be reasonable in leveraging other libaries, such as ollama, llmstudio, genai, etc, for more direct compatibility with other LLM providers. Though at this time, the OpenAI client is sufficient for requests done by this detector. */ private openai: OpenAI; /** * This provides the array of the initial messages that were passed to the detector. This is noted as such as throughout the process, ARC involves analyzing subsets of the original messages, and the original messages are not modified. */ private originalMessages: Message[] = []; /** * AN array of changes of state across iterations, used for convergence measurement. * This is used to track the evolution of turning points across iterations and dimensions. * This is used when returning the final results, to determine whether the turning points have converged. */ private convergenceHistory: ConvergenceState[] = []; /** * Used to help mitigate repeat embedding requests for the same message content. And can be configured to avoid excessive RAM usage via `embeddingCacheRamLimitMB`. */ private embeddingCache: LRUCache<string, Float32Array>; private endpointType: "ollama" | "openai" | "unknown" | "openrouter" = "unknown"; private ollama: Ollama | null = null; readonly logger: winston.Logger | Console; private counterfactualAnalyzer?: CounterfactualAnalyzer; /** * Creates a new instance of the semantic turning point detector */ constructor(config: Partial<TurningPointDetectorConfig> = {}) { // Default configuration (from your provided code) this.config = { apiKey: config.apiKey || process.env.OPENAI_API_KEY || "", classificationModel: config.classificationModel || "gpt-4o-mini", embeddingModel: config.embeddingModel || "text-embedding-3-small", embeddingEndpoint: config.embeddingEndpoint, semanticShiftThreshold: config.semanticShiftThreshold || 0.22, minTokensPerChunk: config.minTokensPerChunk || 250, maxTokensPerChunk: config.maxTokensPerChunk || 2000, concurrency: (config.concurrency ?? config?.endpoint) ? 1 : 4, embeddingConcurrency: config.embeddingConcurrency ?? 5, logger: config?.logger ?? undefined, embeddingCacheRamLimitMB: config.embeddingCacheRamLimitMB || 256, maxRecursionDepth: config.maxRecursionDepth || 3, onlySignificantTurningPoints: config.onlySignificantTurningPoints ?? true, significanceThreshold: config.significanceThreshold || 0.0, minMessagesPerChunk: config.minMessagesPerChunk || 3, maxTurningPoints: config.maxTurningPoints || 5, debug: config.debug || false, turningPointCategories: config?.turningPointCategories && config?.turningPointCategories.length > 0 ? config.turningPointCategories : turningPointCategories, endpoint: config.endpoint, temperature: config?.temperature ?? 0.6, top_p: config?.top_p ?? 0.95, complexitySaturationThreshold: config.complexitySaturationThreshold || 4.5, measureConvergence: config.measureConvergence ?? true, enableExperimentalPhi: config.enableExperimentalPhi ?? false, dynamicallyAdjustSemanticShiftThreshold: config.dynamicallyAdjustSemanticShiftThreshold ?? false, phiMergeThresholdMultiplier: config.phiMergeThresholdMultiplier ?? 0.5, overlapThreshold: config.overlapThreshold ?? 0.4, enableDynamicComplexitySaturation: config.enableDynamicComplexitySaturation ?? false, dynamicSaturationTargetPercentile: config.dynamicSaturationTargetPercentile ?? 0.15, dynamicSaturationMinSamples: config.dynamicSaturationMinSamples ?? 10, epistemicThreshold: config.epistemicThreshold ?? 0.01, enableCounterfactualAnalysis: config.enableCounterfactualAnalysis ?? false, // NEW: Enable counterfactual analysis }; // Initialize counterfactual analyzer if enabled if (config.enableCounterfactualAnalysis) { this.counterfactualAnalyzer = new CounterfactualAnalyzer(); } this.endpointType = config?.endpoint ? config.endpoint.includes("api.openai.com") ? "openai" : config.endpoint.includes("openrouter.ai") ? "openrouter" : "unknown" : "unknown"; if (this.config.logger === undefined) { fs.ensureDirSync("results"); this.logger = winston.createLogger({ level: "info", format: winston.format.combine( winston.format.timestamp(), winston.format.json(), ), transports: [ new winston.transports.Console({ format: winston.format.combine( winston.format.colorize(), winston.format.timestamp({ format: "YYYY-MM-DD HH:mm:ss" }), winston.format.printf(({ timestamp, level, message }) => { return `${timestamp} ${level}: ${message}`; }), ), }), new winston.transports.File({ filename: "results/semanticTurningPointDetector.log", format: winston.format.json(), }), ], }); } // now validate the turning point categories (that wil simply log warnings), and also after the logging is setup above. if ( config?.turningPointCategories && config?.turningPointCategories.length > 0 ) { this.validateTurningPointCategories(config.turningPointCategories); } // Initialize OpenAI client this.openai = new OpenAI({ apiKey: this.config.apiKey ?? process.env.LLM_API_KEY ?? process.env.OPENAI_API_KEY, baseURL: this.config.endpoint, }); /** * Initialize the embedding cache with the specified RAM limit. */ this.embeddingCache = createEmbeddingCache( this.config.embeddingCacheRamLimitMB, ); if (this.config.debug) { this.logger.info( `[TurningPointDetector] Initialized with config:\n${JSON.stringify( { ...this.config, apiKey: "[REDACTED]", }, null, 2, )}`, ); this.logger.info( `[TurningPointDetector] Embedding cache initialized with ${this.embeddingCache.max} max entries (${this.config.embeddingCacheRamLimitMB}MB limit)`, ); } } public getModelName(): string { return this.config.classificationModel; } /** * Calculates a thematic similarity score between two turning points based on their * emotional tone, sentiment, and LLM-assigned significance. This serves as the core * metric for φ-aware grouping and sectioning. * * @param tp1 - The first turning point. * @param tp2 - The second turning point. * @returns A similarity score between 0 and 1. */ private calculateThematicSimilarity( tp1: TurningPoint, tp2: TurningPoint, ): number { const weights = { tone: 0.5, sentiment: 0.3, significance: 0.2 }; const toneSim = circularToneSimilarity( tp1.emotionalTone, tp2.emotionalTone, ); const sentimentSim = tp1.sentiment === tp2.sentiment ? 1 : 0; const sigDiff = Math.abs(tp1.significance - tp2.significance); const sigSim = 1 - sigDiff; // assumes significance already 0–1 return ( toneSim * weights.tone + sentimentSim * weights.sentiment + sigSim * weights.significance ); } /** * Recalculates the significance score of a turning point using phi-awareness. * This creates a powerful feedback loop where the emergent phi field enhances * the base significance score. */ private recalculateSignificanceWithPhi( tp: TurningPoint, phi: number, ): number { // Extract emotion intensity based on emotional tone const emotionIntensity: { [key: string]: number } = { joyful: 0.9, excited: 0.8, surprised: 0.9, worried: 0.7, anxious: 0.7, angry: 0.9, furious: 0.95, skeptical: 0.6, disgusted: 0.8, sad: 0.8, discouraged: 0.7, hopeful: 0.4, neutral: 0.3, }; const intensity = emotionIntensity[tp.emotionalTone.toLowerCase()] || 0.3; // Re-run through Choquet but with phi-aware parameters return computeSignificanceWithChoquet( { // Reuse original significance as structural certainty but amplify with phi certainty: Math.min(1.0, tp.significance * (1.0 + (phi - 0.5) * 0.6)), // Use complexity as novelty credibility (normalized to 0-1) novelty: Math.min(1.0, tp.complexityScore / 5), // Amplify affective delta based on phi affectiveDelta: Math.min(1.0, intensity * (1.0 + (phi - 0.5) * 0.8)), // Scale semantic shift magnitude to 0-10 range impact: Math.min(10.0, tp.semanticShiftMagnitude * 10), }, tp.emotionalTone, { enableExperimentalPhi: true, phiScore: phi, dimension: tp.detectionLevel, averageDistance: tp.semanticShiftMagnitude, }, ); } /** * Computes the φ (Significance) field by interpreting LLM-derived emotional and * significance data from each turning point. This creates a rich, self-referential * measure of thematic importance. */ private computePhiSignificanceField( turningPoints: TurningPoint[], ): Map<string, number> { const phiMap = new Map<string, number>(); if (turningPoints.length === 0) return phiMap; // Map emotional tones to intensity scores (0 // Update emotion mapping to match the new wheel of emotions const emotionalIntensity: { [key: string]: number } = { // High intensity furious: 0.95, angry: 0.9, disgusted: 0.9, surprised: 0.8, // Medium intensity anxious: 0.7, worried: 0.7, sad: 0.7, discouraged: 0.7, // Low intensity joyful: 0.6, excited: 0.6, hopeful: 0.4, skeptical: 0.5, // Default/Neutral neutral: 0.1, unknown: 0.1, }; for (const tp of turningPoints) { // Normalize the 0-100 significance score from the LLM to a 0-1 scale only if it seems like signficance is from 0-100, or if 0-10, accordingly // assess if signifance needs to diviced by 100 const isSignfianceFromZeroToHundred = tp.significance >= 0 && tp.significance <= 100 && Number.isInteger(tp.significance); const isSignficanceCorrectlyScaledAlready = tp.significance >= 0 && tp.significance <= 1; const normSignificance = isSignficanceCorrectlyScaledAlready ? tp.significance : isSignfianceFromZeroToHundred ? (tp.significance || 0) / 100 : (tp.significance || 0) / 10; // Get the intensity from the emotional tone, defaulting to a low value const toneIntensity = emotionalIntensity[tp.emotionalTone.toLowerCase()] || 0.1; // Sentiment can provide a small boost for stronger emotions const sentimentModifier = tp.sentiment === "negative" ? 1.1 : 1.0; // --- The Phi Calculation --- // This weighted formula prioritizes the LLM's direct significance assessment, // but amplifies it with emotional intensity. const phi = normSignificance * 0.7 + toneIntensity * sentimentModifier * 0.3; // Clamp the final score to ensure it's within the [0, 1] range phiMap.set(tp.id, Math.max(0, Math.min(1, phi))); } return phiMap; } /** * Main entry point: Detect turning points in a conversation * Implements the full ARC/CRA framework */ public async detectTurningPoints(messages: Message[]): Promise<{ confidence: number; necessity: number; // The new Choquet-based score points: TurningPoint[]; }> { this.logger.info( "Starting turning-point detection (ARC/CRA) on with provided " + messages.length + " messages", ); // log the key config aspects, enableExperimentalPhi, endpoint, and maxTurningPoints, significanceThreshold, semanticShiftThreshold, minTokensPerChunk, maxTokensPerChunk, classificationModel, embeddingModel, endpointType this.logger.info(` Turning Point Detection Configuration: dynamicallyAdjustSemanticShiftThreshold: ${this.config.dynamicallyAdjustSemanticShiftThreshold}, dynamicallyAdjustComplexitySaturation: ${this.config.enableDynamicComplexitySaturation}, enableExperimentalPhi: ${this.config.enableExperimentalPhi}, endpoint: ${this.config.endpoint}, maxTurningPoints: ${this.config.maxTurningPoints}, significanceThreshold: ${this.config.significanceThreshold}, semanticShiftThreshold: ${this.config.semanticShiftThreshold}, minTokensPerChunk: ${this.config.minTokensPerChunk}, maxTokensPerChunk: ${this.config.maxTokensPerChunk}, classificationModel: ${this.config.classificationModel}, `); this.convergenceHistory = []; const isEndpointOllamaBased = await this.isOllamaEndpoint( this.config.endpoint, ); if (isEndpointOllamaBased) { this.endpointType = "ollama"; const url = new URL(this.config.endpoint); const host = `${url.protocol}//${url.hostname}${url.port ? `:${url.port}` : ""}`; this.logger.info( `Detected Ollama endpoint: ${host}. Initializing Ollama client.`, ); this.ollama = new Ollama({ host }); } // ── cache original conversation for downstream helpers const totalTokens = await this.getMessageArrayTokenCount(messages); this.logger.info(`Total conversation tokens: ${totalTokens}`); this.originalMessages = messages.map((m) => ({ ...m })); // ── 1️⃣ full multi-layer detection (dim-0 entry) const turningPointsFound = await this.multiLayerDetection(messages, 0); this.logger.info( `Multi-layer detection returned ${turningPointsFound?.length} turning points`, ); const phiScoresByPoint = this.computePhiSignificanceField(turningPointsFound); // ── 2️⃣ compute per-TP confidence (softmax) and necessity (Choquet) scores const confidenceScoresByPoint: number[] = []; const necessityScoresByPoint: number[] = []; // Helper to collapse per-message embeddings into a single mean vector const meanEmbedding = (embs: MessageEmbedding[]): Float32Array => { // determine the ongoing length from a valid embedding const embeddingDimension = embs.find((emb) => emb.embedding.length > 0) ?.embedding.length; if (embeddingDimension === undefined || embeddingDimension <= 0) { this.logger.warn("No valid embeddings found, returning empty vector"); return new Float32Array(); } if (embs.length === 0) return new Float32Array(embeddingDimension); const dim = embs[0].embedding.length; const softMax = (values: number[]): number[] => { const maxVal = Math.max(...values); const exps = values.map((v) => Math.exp(v - maxVal)); const sumExps = exps.reduce((sum, v) => sum + v, 0); return exps.map((v) => v / sumExps); }; const magnitudes = embs.map(({ embedding }) => Math.sqrt(embedding.reduce((sum, v) => sum + v * v, 0)), ); const attnWeights = softMax(magnitudes); const acc = new Float32Array(dim); for (let idx = 0; idx < embs.length; idx++) { const { embedding } = embs[idx]; const weight = attnWeights[idx]; for (let i = 0; i < dim; i++) { acc[i] += embedding[i] * weight; } } return acc; }; const calculateStructuralNecessity = async ( tp: TurningPoint, allTPs: TurningPoint[], conversationEmbeddings: MessageEmbedding[], phi?: number, ): Promise<number> => { if (!this.config.enableExperimentalPhi || phi === undefined) { return tp.significance * 0.6; } // Simple, elegant structural calculation const tpEmbedding = meanEmbedding( conversationEmbeddings.slice(tp.span.startIndex, tp.span.endIndex + 1), ); const conversationCenterEmbedding = meanEmbedding(conversationEmbeddings); const centralityScore = 1 - this.calculateSemanticDistance( tpEmbedding, conversationCenterEmbedding, ); const relativePosition = tp.span.startIndex / messages.length; const positionWeight = 1 - Math.abs(relativePosition - 0.5) * 1.5; let uniquenessScore = 1.0; if (allTPs.length > 1) { const otherTPs = allTPs.filter((other) => other.id !== tp.id); const thematicSimilarities = otherTPs.map((other) => this.calculateThematicSimilarity(tp, other), ); const avgSimilarity = thematicSimilarities.reduce((sum, sim) => sum + sim, 0) / thematicSimilarities.length; uniquenessScore = 1 - avgSimilarity; } const spanCoverage = (tp.span.endIndex - tp.span.startIndex + 1) / messages.length; const coverageWeight = Math.min(1.0, spanCoverage * 3); // Fixed, interpretable weights const structuralComponents = [ centralityScore * 0.3, positionWeight * 0.25, uniquenessScore * 0.25, coverageWeight * 0.2, ]; const baseStructural = structuralComponents.reduce( (sum, comp) => sum + comp, 0, ); const necessity = baseStructural * phi; // Direct phi amplification return Math.min(1.0, Math.max(0.0, necessity)); }; await async.eachOfLimit( turningPointsFound, this.config.concurrency, async (tp, idxStr) => { const pre = messages.slice(0, tp.span.startIndex); const turn = messages.slice(tp.span.startIndex, tp.span.endIndex + 1); const post = messages.slice(tp.span.endIndex + 1); if (pre.length === 0 || post.length === 0) { this.logger.info(`TP ${tp.id} at edges of convo – skipping scores`); return; } const [preE, turnE, postE] = await Promise.all([ this.generateMessageEmbeddings(pre, 0), this.generateMessageEmbeddings(turn, 0), this.generateMessageEmbeddings(post, 0), ]); const vPre = meanEmbedding(preE); const vTurn = meanEmbedding(turnE); const vPost = meanEmbedding(postE); const distPre = this.calculateSemanticDistance(vPre, vTurn); const distPost = this.calculateSemanticDistance(vTurn, vPost); // Calculate Softmax-based "Confidence" (unchanged - this is appropriate) const confidence = (distPre + distPost) / 2; confidenceScoresByPoint.push(confidence); // Calculate TRUE φ-aware "Necessity" using structural analysis const phi = this.config.enableExperimentalPhi ? phiScoresByPoint.get(tp.id) : undefined; const allConversationEmbeddings = await this.generateMessageEmbeddings( messages, 0, ); const necessity = await calculateStructuralNecessity( tp, turningPointsFound, allConversationEmbeddings, phi, ); necessityScoresByPoint.push(necessity); this.logger.info( `TP ${tp.id}: distPre=${distPre.toFixed(3)}, distPost=${distPost.toFixed(3)}, conf=${confidence.toFixed(3)}, necessity=${necessity.toFixed(3)}${phi ? `, φ=${phi.toFixed(3)}` : " (φ-disabled)"}`, ); }, ); // ── 3️⃣ Aggregate conversation-level scores const validConf = confidenceScoresByPoint.filter((v) => v > 0); const aggregateConfidence = validConf.length === 0 ? 0 : validConf.reduce((s, v) => s + v, 0) / validConf.length; const validNec = necessityScoresByPoint.filter((v) => v > 0); const aggregateNecessity = validNec.length === 0 ? 0 : validNec.reduce((s, v) => s + v, 0) / validNec.length; this.logger.info( `Aggregate scores: Confidence=${aggregateConfidence.toFixed(3)}, Necessity=${aggregateNecessity.toFixed(3)}, enabledExperimentalPhi=${this.config.enableExperimentalPhi}`, ); return { confidence: aggregateConfidence, necessity: this.config.enableExperimentalPhi ? aggregateNecessity : null, // If φ is disabled, necessity is not applicable, as we require the ideation of some metric involving essentiality or points: turningPointsFound, }; } /** * Multi-layer detection implementing the ARC/CRA dimensional processing * This is the primary implementation of the transition operator Ψ */ private async multiLayerDetection( messages: Message[], dimension: number, ): Promise<TurningPoint[]> { this.logger.info(`Starting dimensional analysis at n=${dimension}`); // Check recursion depth - hard limit on dimensional expansion if (dimension >= this.config.maxRecursionDepth) { this.logger.info( `Maximum dimension (n=${dimension}) reached, processing directly without further expansion`, ); // Pass originalMessages context only at dimension 0 if needed by detectTurningPointsInChunk->classifyTurningPoint return await this.detectTurningPointsInChunk( messages, dimension, 0, this.originalMessages, ); } // For very small conversations (or at deeper levels), use sliding window let localTurningPoints: TurningPoint[] = []; // Adjusted condition to handle small message counts more directly if ( messages.length < this.config.minMessagesPerChunk * 2 && dimension === 0 ) { this.logger.info( `Dimension ${dimension}: Small conversation (${messages.length} msgs), processing directly`, ); // Optionally adjust threshold for small conversations const originalThreshold = this.config.semanticShiftThreshold; this.config.semanticShiftThreshold = Math.max( 0.3, originalThreshold * 1.1, ); // Slightly higher threshold localTurningPoints = await this.detectTurningPointsInChunk( messages, dimension, 0, this.originalMessages, ); // Restore config this.config.semanticShiftThreshold = originalThreshold; } else { // Chunk the conversation const { chunks } = await this.chunkConversation(messages, dimension); this.logger.info( `Dimension ${dimension}: Split into ${chunks.length} chunks`, ); if (chunks.length === 0) { this.logger.info( `Dimension ${dimension}: No valid chunks created, returning empty.`, ); return []; } // Process each chunk in parallel to find local turning points const chunkTurningPoints: TurningPoint[][] = new Array(chunks.length); const durationsSeconds: number[] = new Array(chunks.length).fill(-1); const limit = this.config.concurrency; await async.eachOfLimit(chunks, limit, async (chunk, indexStr) => { const index = Number(indexStr); const startTime = Date.now(); if (index % 10 === 0 || limit < 10 || this.config.debug) { this.logger.info( ` - Dimension ${dimension}: Processing chunk ${index + 1}/${chunks.length} (${chunk.length} messages)`, ); } // Pass originalMessages context only at dimension 0 chunkTurningPoints[index] = await this.detectTurningPointsInChunk( chunk, dimension, index, this.originalMessages, ); const durationSecs = (Date.now() - startTime) / 1000; durationsSeconds[index] = durationSecs; if (index % 10 === 0 || limit < 10 || this.config.debug) { const processedCount = durationsSeconds.filter((d) => d > 0).length; if (processedCount > 0) { const averageDuration = durationsSeconds.filter((d) => d > 0).reduce((a, b) => a + b, 0) / processedCount; const remainingChunks = durationsSeconds.length - processedCount; const remainingTime = (averageDuration * remainingChunks).toFixed( 1, ); const percentageComplete = (processedCount / durationsSeconds.length) * 100; this.logger.info( ` - Chunk ${index + 1} processed in ${durationSecs.toFixed(1)}s. Est. remaining: ${remainingTime}s (${percentageComplete.toFixed(1)}% complete)`, ); } else { this.logger.info( ` - Chunk ${index + 1} processed in ${durationSecs.toFixed(1)}s.`, ); } } }); // Flatten all turning points from all chunks localTurningPoints = chunkTurningPoints.flat(); } this.logger.info( `Dimension ${dimension}: Found ${localTurningPoints.length} raw turning points`, ); // --- PHI-AWARE ARC STEP 1: Calculate initial Phi Field --- // Calculate phi based on the raw, unmerged turning points. This will guide the merging process itself. const initialPhiMap = this.config.enableExperimentalPhi ? this.computePhiSignificanceField(localTurningPoints) : new Map<string, number>(); // If we found zero or one turning point at this level, return it directly (after potential filtering if needed) if (localTurningPoints.length <= 1) { // --- REVISED LOGIC --- // Even in this early exit, we must compute the phiMap to ensure the // final filtering step uses the correct, potentially φ-aware, ranking logic. const phiMapForFilter = this.config.enableExperimentalPhi ? initialPhiMap : new Map(); // Apply filtering even for single points, now with the correct phiMap context. return this.config.onlySignificantTurningPoints ? this.filterSignificantTurningPoints( localTurningPoints, phiMapForFilter, ) : localTurningPoints; } // First merge any similar turning points at this level const mergedLocalTurningPoints = this.mergeSimilarTurningPoints( localTurningPoints, this.config.enableExperimentalPhi ? initialPhiMap : new Map<string, number>(), ); this.logger.info( `Dimension ${dimension}: Merged similar TPs to ${mergedLocalTurningPoints.length}`, ); // If merging resulted in 0 or 1 TP, return it (after filtering) if (mergedLocalTurningPoints.length <= 1) { return this.config.onlySignificantTurningPoints ? this.filterSignificantTurningPoints( mergedLocalTurningPoints, initialPhiMap, ) // Use initialPhiMap as it's relevant to this set of points : mergedLocalTurningPoints; } // --- CRITICAL ARC/CRA + PHI INTEGRATION --- // 1. Re-compute the Significance Field (φ) for the now-merged turning points. // This provides a more stable phi for the escalation decision. const phiMap = this.config.enableExperimentalPhi ? this.computePhiSignificanceField(mergedLocalTurningPoints) : new Map<string, number>(); // 2. If phi is active, update complexity scores to be φ-aware. if (this.config.enableExperimentalPhi) { this.logger.info( `Dimension ${dimension}: Enhancing significance scores with phi-field influence.`, ); // Update both complexity AND significance scores to be φ-aware for (const tp of mergedLocalTurningPoints) { if (phiMap.has(tp.id)) { // Update complexity score (already implemented) tp.complexityScore = this.calculateComplexityScoreWithPhi( tp, phiMap.get(tp.id)!, ); // NEW: Update significance score with phi-awareness tp.significance = this.recalculateSignificanceWithPhi( tp, phiMap.get(tp.id)!, ); // Store phi on the turning point for reference tp.phi = phiMap.get(tp.id)!; } } } // 3. Determine dimensional escalation based on the (now potentially φ-aware) complexity. const effectiveThreshold = this.calculateDynamicComplexitySaturation( mergedLocalTurningPoints, ); // Update the config for this decision (but don't modify the original) const maxComplexity = Math.max( 0, ...mergedLocalTurningPoints.map((tp) => tp.complexityScore), ); // const needsDimensionalEscalation = maxComplexity >= this.config.complexitySaturationThreshold; const needsDimensionalEscalation = maxComplexity >= effectiveThreshold; // Use local var this.logger.info( `Dimension ${dimension}: Max complexity = ${maxComplexity.toFixed(2)}, Saturation threshold = ${this.config.complexitySaturationThreshold}`, ); this.logger.info( `Dimension ${dimension}: Needs Escalation (Ψ)? ${needsDimensionalEscalation}`, ); if ( dimension >= this.config.maxRecursionDepth - 1 || mergedLocalTurningPoints.length <= 2 || !needsDimensionalEscalation ) { this.logger.info( `Dimension ${dimension}: Finalizing at this level. Applying final filtering.`, ); // Track convergence for this dimension if (this.config.measureConvergence) { this.convergenceHistory.push({ previousTurningPoints: [], // No previous state at the final level of processing currentTurningPoints: mergedLocalTurningPoints, // TPs before final filtering dimension, distanceMeasure: 0, // No comparison needed at final step hasConverged: true, // Considered converged as processing stops here didEscalate: false, }); } // Filter the merged points before returning return this.filterSignificantTurningPoints( mergedLocalTurningPoints, phiMap, ); } // ----- DIMENSIONAL ESCALATION (n → n+1) ----- this.logger.info( `Dimension ${dimension}: Escalating to dimension ${dimension + 1}`, ); // Create meta-messages from the merged turning points at this level // Pass originalMessages for context if needed by createMetaMessagesFromTurningPoints const metaMessages = this.createMetaMessagesFromTurningPoints( mergedLocalTurningPoints, this.originalMessages, ); this.logger.info( `Dimension ${dimension}: Created ${metaMessages.length} meta-messages for dimension ${dimension + 1}`, ); if (metaMessages.length < 2) { this.logger.info( `Dimension ${dimension}: Not enough meta-messages (${metaMessages.length}) to perform higher-level analysis. Finalizing with current TPs.`, ); if (this.config.measureConvergence) { this.convergenceHistory.push({ previousTurningPoints: mergedLocalTurningPoints, // State before attempted escalation currentTurningPoints: mergedLocalTurningPoints, // State after failed escalation dimension: dimension + 1, // Represents the attempted next dimension distanceMeasure: 0, // No change hasConverged: true, // Converged because escalation failed didEscalate: false, // Escalation attempted but yielded no processable result }); } return this.filterSignificantTurningPoints( mergedLocalTurningPoints, this.config.enableExperimentalPhi ? phiMap : new Map<string, number>(), ); } // Recursively process the meta-messages to find higher-dimensional turning points const higherDimensionTurningPoints = await this.multiLayerDetection( metaMessages, dimension + 1, ); this.logger.info( `Dimension ${dimension + 1}: Found ${higherDimensionTurningPoints.length} higher-dimension TPs.`, ); // Track convergence and dimension escalation if (this.config.measureConvergence) { const convergenceState: ConvergenceState = { previousTurningPoints: mergedLocalTurningPoints, // TPs from dim n currentTurningPoints: higherDimensionTurningPoints, // TPs found in dim n+1 dimension: dimension + 1, distanceMeasure: this.calculateStateDifference( mergedLocalTurningPoints, higherDimensionTurningPoints, phiMap, // Pass the phi map, only used if `enableExperimentalPhi` is true via config ), hasConverged: higherDimensionTurningPoints.length > 0, // Converged if TPs were found at higher level didEscalate: true, }; this.convergenceHistory.push(convergenceState); this.logger.info( `Dimension ${dimension} → ${dimension + 1}: Convergence distance: ${convergenceState.distanceMeasure.toFixed(3)}. Converged: ${convergenceState.hasConverged}`, ); } // Combine turning points from local (n) and higher (n+1) dimensions // The combine function will handle merging, prioritizing higher-dim, and filtering return this.combineTurningPoints( mergedLocalTurningPoints, higherDimensionTurningPoints, phiMap, // Pass the phi map for context (only used if `enableExperimentalPhi` is true via config) ); } /** * Calculates a difference measure between two states (sets of turning points) for * convergence tracking. When the experimental phi feature is enabled, this metric * becomes φ-aware by blending the LLM-assigned significance with the emergent * phi score for a more holistic comparison. * * @param state1 - The first set of turning points. * @param state2 - The second set of turning points. * @param phiMap - The map of phi scores for turning points in the states. * @returns A single numeric value representing the distance between the two states. */ private calculateStateDifference( state1: TurningPoint[], state2: TurningPoint[], phiMap: Map<string, number>, // Pass the phi map for context ): number { // Handle empty states if (state1.length === 0 && state2.length === 0) return 0.0; if (state1.length === 0 || state2.length === 0) return 1.0; // Helper to calculate the average adjusted significance for a state const getAvgAdjustedSig = (state: TurningPoint[]): number => { const totalSig = state.reduce((sum, tp) => { // If phi is enabled, use a composite score of LLM-significance and emergent-phi if (this.config.enableExperimentalPhi && phiMap.has(tp.id)) { return sum + (tp.significance + phiMap.get(tp.id)!) / 2; } // Otherwise, use only the LLM-assigned significance return sum + tp.significance; }, 0); return state.length > 0 ? totalSig / state.length : 0; }; // 1. Calculate the difference in average adjusted significance const sigDiff = Math.abs( getAvgAdjustedSig(state1) - getAvgAdjustedSig(state2), ); // 2. Calculate structural difference using Jaccard index on the message spans const spans1 = new Set( state1.map((tp) => `${tp.span.startIndex}-${tp.span.endIndex}`), ); const spans2 = new Set( state2.map((tp) => `${tp.span.startIndex}-${tp.span.endIndex}`), ); const intersection = new Set( [...spans1].filter((span) => spans2.has(span)), ); const union = new Set([...spans1, ...spans2]); const jaccardDistance = union.size > 0 ? 1.0 - intersection.size / union.size : 0.0; // 3. Return a weighted combination of the two difference measures const combinedDistance = sigDiff * 0.5 + jaccardDistance * 0.5; return Math.min(1.0, Math.max(0.0, combinedDistance)); } /** * Apply complexity function χ from the ARC/CRA framework * - Complexity is part of CRA specifically within the ARC/CRA Duality framework */ private calculateComplexityScore( significance: number, semanticShiftMagnitude: number, ): number { // Return to the older, simpler approach // Complexity should reflect content significance, not distance redundancy const complexity = 1 + significance * 4; return Math.max(1, Math.min(5, complexity)); } /** * @experimental * Calculates the complexity score for a Turning Point, dynamically modulated by the * experimental φ (Significance) field. This function is only called when * `config.enableExperimentalPhi` is true. * * @param tp - The TurningPoint object being scored. * @param phi - The calculated φ score (emergent significance) for this turning point. * @returns A φ-aware complexity score, clamped between 1 and 5. */ private calculateComplexityScoreWithPhi( tp: TurningPoint, phi: number, ): number { const baseComplexity = this.calculateComplexityScore( tp.significance, tp.semanticShiftMagnitude, ); const phiAdjustment = (phi - 0.5) * 0.6; // Adjust the phi influence factor as needed const adjustedComplexity = baseComplexity + phiAdjustment; // REMOVE: * baseComplexity return Math.max(1, Math.min(5, adjustedComplexity)); } /** * Detect turning points within a single chunk of the conversation * This represents the local refinement process in the current dimension * - Or in other words, this is the Ψ operator in the ARC/CRA framework * - or specifically, within the ARC framework */ private async detectTurningPointsInChunk( messages: MetaMessage[] | Message[], dimension: number, chunkIndex: number, // Optional index for logging purposes originalMessages: Message[], ): Promise<TurningPoint[]> { if (messages.length < 2) return []; /** * Higher dimensions , given how it is then the exponent value, will cause then the factor to be more aggressive, or in otherwords, the threshold to be lower. * This is because the higher the dimension, the more complex the conversation is, and thus the more likely that the semantic shifts are more subtle and nuanced. * @param dimension * @param baseThreshold * @returns */ const dynamicallyAdjustThresholdBasedOnDimension = ( dimension: number, baseThreshold: number, ): number => { // Defines the decay factor based on the base threshold. // The decay rate changes based on the initial sensitivity. const decayFactors = [ { limit: 0.9, factor: 0.4 }, // Very high thresholds decay slower { limit: 0.8, factor: 0.25 }, // High thresholds decay aggressively { limit: 0.5, factor: 0.35 }, // Medium thresholds ]; // Find the appropriate decay factor, defaulting to 0.5 for low thresholds. const decayFactor = decayFactors.find((d) => baseThreshold > d.limit)?.factor || 0.5; // Apply exponential decay based on the dimension. const thresholdScaleFactor = Math.pow(decayFactor, dimension); return thresholdScaleFactor * baseThreshold; }; // Generate embeddings for all messages in the chunk const embeddings = await this.generateMessageEmbeddings( messages, dimension, ); // Find significant semantic shifts between adjacent messages const turningPoints: TurningPoint[] = []; const distances: { current: number; next: number; distance: number; }[] = []; // Store distances for logging const allDistances: { current: number; next: number; distance: number; }[] = []; // Store all distances for logging for (let i = 0; i < embeddings.length - 1; i++) { const current = embeddings[i]; const next = embeddings[i + 1]; // Calculate semantic distance between current and next message const distance = this.calculateSemanticDistance( current.embedding, next.embedding, ); const dimensionAdjustedThreshold = this.config.dynamicallyAdjustSemanticShiftThreshold && this.config.dynamicallyAdjustSemanticShiftThreshold === true ? dynamicallyAdjustThresholdBasedOnDimension( dimension, this.config.semanticShiftThreshold, ) : this.config.semanticShiftThreshold; this.logger.debug( `Anlyzing with dimensionAdjustedThreshold: ${dimensionAdjustedThreshold.toFixed(3)}, compared to original threshold: ${this.config.semanticShiftThreshold.toFixed(3)}, with the difference in embeddings or distance of: ${distance.toFixed(3)}`, ); if (distance > dimensionAdjustedThreshold) { distances.push({ current: current.index, next: next.index, distance: distance, }); // Store distance for logging this.logger.debug( ` - After analyzing, determined this distance is to be added to the list of distances to process: ${distance.toFixed(3)}`, ); } else { this.logger.debug( ` - After analyzing, determined this distance is NOT significant enough to be added to the list of distances to process: ${distance.toFixed(3)}, from the difference of the two embeddings: ${current.embedding.length} and ${next.embedding.length}`, ); } allDistances.push({ current: current.index, next: next.index, distance: distance, }); } this.logger.info( `For a total number of points: ${embeddings.length}, there were ${distances.length} distances found as being greater ${this.config.dynamicallyAdjustSemanticShiftThreshold && this.config.dynamicallyAdjustSemanticShiftThreshold === true ? `than the dynamically adjusted threshold of ${dynamicallyAdjustThresholdBasedOnDimension(dimension, this.config.semanticShiftThreshold).toFixed(3)}` : `than the threshold of ${this.config.semanticShiftThreshold.toFixed(3)}` }. Across this span of messages of length ${messages.length}, the following distances were found: - The top 3 greatest distances are: ${allDistances .sort((a, b) => b.distance - a.distance) // Sort FIRST .slice(0, 3) // Then take the top 3 .map((d) => d.distance.toFixed(3)) .join(", ")} Found ${distances.length} potential turning points at this level (${dimension === 0 ? "base messages" : "meta-messages"}).` ); if (distances.length === 0) { this.logger.info( `No significant semantic shifts detected in chunk ${chunkIndex}`, ); return []; } await async.eachOfLimit( distances, this.config.concurrency, async (distanceObj, idxStr) => { const d = Number(idxStr); const i = distanceObj.current; // Current message index const current = embeddings[i]; // Current message embedding const next = embeddings[distanceObj.next]; // Next message embedding // If the distance exceeds our threshold, we've found a turning point // Use direct array indices to get the messages const distance = distanceObj.distance; // Semantic distance between current and next message const beforeMessage = messages[i]; const afterMessage = messages[i + 1]; if (beforeMessage == undefined || afterMessage == undefined) { this.logger.info( `detectTurningPointsInChunk: warning beforeMessage or afterMessage is undefined, beforeMessage: ${beforeMessage}, afterMessage: ${afterMessage}`, ); return; } // Classify the turning point using LLM const turningPoint = await this.classifyTurningPoint( beforeMessage, afterMessage, distance, dimension, originalMessages, d, ); if (d === 0) { this.logger.info(`Now proceeding to process every turning point`); } this.logger.info( ` ...${chunkIndex ? `[Chunk ${chunkIndex}] ` : "" }Potential turning point detected between messages ${current.id } and ${next.id} (distance: ${distance.toFixed( 3, )}, complexity: ${turningPoint.complexityScore.toFixed( 1, )}), signif: ${turningPoint.significance.toFixed(2)} category: ${turningPoint.category }, number of quotes: ${turningPoint.quotes.length}, emotionalTone: ${turningPoint.emotionalTone}`, ); // normaliz turningPoints.push(turningPoint); }, ); return turningPoints; } /** * Use LLM to classify a turning point and generate metadata. * *** MODIFIED to prioritize message.spanData over regex *** */ /** * Use LLM to classify a turning point and generate metadata. * This implementation uses a highly modular prompt architecture with * multiple distinct user messages to ensure clarity. The payload consists of: * - A system message that sets the core identity and universal constraints. * - A static context user message containing framework and evaluation criteria. * - A dynamic data user message that provides conversation context and the specific messages to analyze. * - A final user instruction message that tells the model what to do with all this information. */ private async classifyTurningPoint( beforeMessage: Message, afterMessage: Message, distance: number, dimension: number, originalMessages: Message[], index: number = 0, ): Promise<TurningPoint> { let span: MessageSpan; if (dimension > 0) { if ( !(beforeMessage instanceof MetaMessage) || !(afterMessage instanceof MetaMessage) ) { throw new Error( "Before or after message is not a MetaMessage at higher dimension", ); } const beforeMessageMeta = beforeMessage as MetaMessage; const afterMessageMeta = afterMessage as MetaMessage; // For higher dimensions, extract the starting and ending message from within the meta-message's inner list span = { startId: beforeMessageMeta.getMessagesInTurningPointSpanToMessagesArray()[0] .id, endId: afterMessageMeta.getMessagesInTurningPointSpanToMessagesArray()[0].id, startIndex: this.originalMessages.findIndex( (candidateM) => candidateM.id === beforeMessageMeta.getMessagesInTurningPointSpanToMessagesArray()[0] .id, ), endIndex: this.originalMessages.findIndex( (candidateM) => candidateM.id === afterMessageMeta.getMessagesInTurningPointSpanToMessagesArray()[0] .id, ), originalSpan: { startId: beforeMessage.id, endId: afterMessage.id, startIndex: index, endIndex: index + 1, }, }; } else { // For base-level conversations, use the original message IDs and find their indices. span = { startId: beforeMessage.id, endId: afterMessage.id, startIndex: MetaMessage.findIndexOfMessageFromId({ id: beforeMessage.id, beforeMessage, afterMessage,