@gaiaverse/semantic-turning-point-detector
Version:
Detects key semantic turning points in conversations using recursive semantic distance analysis. Ideal for conversation analysis, dialogue segmentation, insight detection, and AI-assisted reasoning tasks.
855 lines (852 loc) • 79.6 kB
JavaScript
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.SemanticTurningPointDetector = void 0;
// file: semanticTurningPointDetector.ts
const fs_extra_1 = __importDefault(require("fs-extra"));
const winston_1 = __importDefault(require("winston"));
// setup winston
fs_extra_1.default.ensureDirSync('results'); // Ensure the results directory exists
const logger = winston_1.default.createLogger({
level: 'info',
format: winston_1.default.format.combine(winston_1.default.format.timestamp(), winston_1.default.format.json()),
transports: [
new winston_1.default.transports.Console({
format: winston_1.default.format.combine(winston_1.default.format.colorize(), winston_1.default.format.timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }), winston_1.default.format.printf(({ timestamp, level, message }) => {
return `${timestamp} ${level}: ${message}`;
}))
}),
new winston_1.default.transports.File({
filename: 'results/semanticTurningPointDetector.log',
format: winston_1.default.format.json()
})
]
});
/*****************************************************************************************
* SEMANTIC TURNING POINT DETECTOR
*
* A TypeScript implementation of the Adaptive Recursive Convergence (ARC) with
* Cascading Re-Dimensional Attention (CRA) framework for conversation analysis.
*
* This detector identifies semantic "Turning Points" in conversations as a concrete
* application of the ARC/CRA theoretical framework for multi-step reasoning
* and dynamic dimensional expansion.
*
* Framework implementation:
* 1. Analyze semantic relationships between messages using embeddings (dimension n)
* 2. Calculate semantic distances that correspond to the contraction mapping
* 3. Apply the complexity function χ to determine dimensional saturation
* 4. Use the transition operator Ψ to determine whether to stay in dimension n or escalate
* 5. Employ meta-messages and recursive analysis for dimensional expansion (n → n+1)
* 6. Merge and prune results to demonstrate formal convergence
*****************************************************************************************/
const async_1 = __importDefault(require("async"));
const openai_1 = require("openai");
const lru_cache_1 = require("lru-cache");
const crypto_1 = __importDefault(require("crypto"));
const tokensUtil_1 = require("./tokensUtil");
const conversation_1 = require("./conversation");
const Message_1 = require("./Message");
const stripContent_1 = require("./stripContent");
const prompt_1 = require("./prompt");
// Cache for token counts to avoid recalculating - implements atomic memory concept
const tokenCountCache = new lru_cache_1.LRUCache({
max: 10000,
ttl: 1000 * 60 * 60 * 24
});
// -----------------------------------------------------------------------------
// Main Detector Class
// -----------------------------------------------------------------------------
class SemanticTurningPointDetector {
config;
openai;
originalMessages = [];
convergenceHistory = [];
/**
* Creates a new instance of the semantic turning point detector
*/
constructor(config = {}) {
// Default configuration (from your provided code)
this.config = {
apiKey: config.apiKey || process.env.OPENAI_API_KEY || '',
classificationModel: config.classificationModel || 'gpt-4o-mini',
embeddingModel: config.embeddingModel || 'text-embedding-3-small',
embeddingEndpoint: config.embeddingEndpoint,
semanticShiftThreshold: config.semanticShiftThreshold || 0.22,
minTokensPerChunk: config.minTokensPerChunk || 250,
maxTokensPerChunk: config.maxTokensPerChunk || 2000,
maxRecursionDepth: config.maxRecursionDepth || 3,
onlySignificantTurningPoints: config.onlySignificantTurningPoints ?? true,
significanceThreshold: config.significanceThreshold || 0.5,
minMessagesPerChunk: config.minMessagesPerChunk || 3,
maxTurningPoints: config.maxTurningPoints || 5,
debug: config.debug || false,
endpoint: config.endpoint,
complexitySaturationThreshold: config.complexitySaturationThreshold || 4.5,
measureConvergence: config.measureConvergence ?? true
};
// Initialize OpenAI client
this.openai = new openai_1.OpenAI({
apiKey: this.config.apiKey,
baseURL: this.config.endpoint
});
if (this.config.debug) {
logger.info('[TurningPointDetector] Initialized with config:', {
...this.config,
apiKey: '[REDACTED]'
});
}
}
/**
* Main entry point: Detect turning points in a conversation
* Implements the full ARC/CRA framework
*/
async detectTurningPoints(messages) {
logger.info('Starting turning point detection using ARC/CRA framework for conversation with', messages.length, 'messages');
this.convergenceHistory = [];
// Store original messages for reference
const totalTokens = await this.getMessageArrayTokenCount(messages);
logger.info(`Total conversation tokens: ${totalTokens}`);
// Ensure originalMessages is a fresh copy if messages might be mutated elsewhere
this.originalMessages = messages.map(m => ({ ...m }));
// Begin dimensional analysis at level 0
return this.multiLayerDetection(messages, 0);
}
/**
* Multi-layer detection implementing the ARC/CRA dimensional processing
* This is the primary implementation of the transition operator Ψ
*/
async multiLayerDetection(messages, dimension) {
logger.info(`Starting dimensional analysis at n=${dimension}`);
// Check recursion depth - hard limit on dimensional expansion
if (dimension >= this.config.maxRecursionDepth) {
logger.info(`Maximum dimension (n=${dimension}) reached, processing directly without further expansion`);
// Pass originalMessages context only at dimension 0 if needed by detectTurningPointsInChunk->classifyTurningPoint
return await this.detectTurningPointsInChunk(messages, dimension, 0, this.originalMessages);
}
// For very small conversations (or at deeper levels), use sliding window
let localTurningPoints = [];
// Adjusted condition to handle small message counts more directly
if (messages.length < this.config.minMessagesPerChunk * 2 && dimension === 0) {
logger.info(`Dimension ${dimension}: Small conversation (${messages.length} msgs), processing directly`);
// Optionally adjust threshold for small conversations
const originalThreshold = this.config.semanticShiftThreshold;
this.config.semanticShiftThreshold = Math.max(0.3, originalThreshold * 1.1); // Slightly higher threshold
localTurningPoints = await this.detectTurningPointsInChunk(messages, dimension, 0, this.originalMessages);
// Restore config
this.config.semanticShiftThreshold = originalThreshold;
}
else {
// Chunk the conversation
const { chunks } = await this.chunkConversation(messages, dimension);
logger.info(`Dimension ${dimension}: Split into ${chunks.length} chunks`);
if (chunks.length === 0) {
logger.info(`Dimension ${dimension}: No valid chunks created, returning empty.`);
return [];
}
// Process each chunk in parallel to find local turning points
const chunkTurningPoints = new Array(chunks.length);
const durationsSeconds = new Array(chunks.length).fill(-1);
const limit = this.config.endpoint ? 1 : 5; // Limit API calls
await async_1.default.eachOfLimit(chunks, limit, async (chunk, indexStr) => {
const index = Number(indexStr);
const startTime = Date.now();
if (index % 10 === 0 || limit === 1 || this.config.debug) {
logger.info(` - Dimension ${dimension}: Processing chunk ${index + 1}/${chunks.length} (${chunk.length} messages)`);
}
// Pass originalMessages context only at dimension 0
chunkTurningPoints[index] = await this.detectTurningPointsInChunk(chunk, dimension, index, this.originalMessages);
const durationSecs = (Date.now() - startTime) / 1000;
durationsSeconds[index] = durationSecs;
if (index % 10 === 0 || limit === 1 || this.config.debug) {
const processedCount = durationsSeconds.filter(d => d > 0).length;
if (processedCount > 0) {
const averageDuration = durationsSeconds.filter(d => d > 0).reduce((a, b) => a + b, 0) / processedCount;
const remainingChunks = durationsSeconds.length - processedCount;
const remainingTime = (averageDuration * remainingChunks).toFixed(1);
const percentageComplete = (processedCount / durationsSeconds.length * 100);
logger.info(` - Chunk ${index + 1} processed in ${durationSecs.toFixed(1)}s. Est. remaining: ${remainingTime}s (${percentageComplete.toFixed(1)}% complete)`);
}
else {
logger.info(` - Chunk ${index + 1} processed in ${durationSecs.toFixed(1)}s.`);
}
}
});
// Flatten all turning points from all chunks
localTurningPoints = chunkTurningPoints.flat();
}
logger.info(`Dimension ${dimension}: Found ${localTurningPoints.length} raw turning points`);
// If we found zero or one turning point at this level, return it directly (after potential filtering if needed)
if (localTurningPoints.length <= 1) {
// Apply filtering even for single points if configured
return this.config.onlySignificantTurningPoints
? this.filterSignificantTurningPoints(localTurningPoints)
: localTurningPoints;
}
// First merge any similar turning points at this level
const mergedLocalTurningPoints = this.mergeSimilarTurningPoints(localTurningPoints);
logger.info(`Dimension ${dimension}: Merged similar TPs to ${mergedLocalTurningPoints.length}`);
// If merging resulted in 0 or 1 TP, return it (after filtering)
if (mergedLocalTurningPoints.length <= 1) {
return this.config.onlySignificantTurningPoints
? this.filterSignificantTurningPoints(mergedLocalTurningPoints)
: mergedLocalTurningPoints;
}
// ------------------- CRITICAL ARC/CRA IMPLEMENTATION -------------------
// Determine whether to expand dimension based on complexity saturation
// Calculate the maximum complexity in this dimension
const maxComplexity = Math.max(0, ...mergedLocalTurningPoints.map(tp => tp.complexityScore)); // Ensure non-negative
// Implement Transition Operator Ψ
const needsDimensionalEscalation = maxComplexity >= this.config.complexitySaturationThreshold;
logger.info(`Dimension ${dimension}: Max complexity = ${maxComplexity.toFixed(2)}, Saturation threshold = ${this.config.complexitySaturationThreshold}`);
logger.info(`Dimension ${dimension}: Needs Escalation (Ψ)? ${needsDimensionalEscalation}`);
// Conditions to STOP escalation and finalize at this dimension:
// 1. Max recursion depth reached
// 2. Too few turning points to warrant higher-level analysis
// 3. Complexity hasn't saturated (no need to escalate)
if (dimension >= this.config.maxRecursionDepth - 1 ||
mergedLocalTurningPoints.length <= 2 || // Adjusted slightly, maybe 2 TPs isn't enough to find meta-patterns
!needsDimensionalEscalation) {
logger.info(`Dimension ${dimension}: Finalizing at this level. Applying final filtering.`);
// Track convergence for this dimension
if (this.config.measureConvergence) {
this.convergenceHistory.push({
previousTurningPoints: [], // No previous state at the final level of processing
currentTurningPoints: mergedLocalTurningPoints, // TPs before final filtering
dimension,
distanceMeasure: 0, // No comparison needed at final step
hasConverged: true, // Considered converged as processing stops here
didEscalate: false
});
}
// Filter the merged points before returning
return this.filterSignificantTurningPoints(mergedLocalTurningPoints);
}
// ----- DIMENSIONAL ESCALATION (n → n+1) -----
logger.info(`Dimension ${dimension}: Escalating to dimension ${dimension + 1}`);
// Create meta-messages from the merged turning points at this level
// Pass originalMessages for context if needed by createMetaMessagesFromTurningPoints
const metaMessages = this.createMetaMessagesFromTurningPoints(mergedLocalTurningPoints, this.originalMessages);
logger.info(`Dimension ${dimension}: Created ${metaMessages.length} meta-messages for dimension ${dimension + 1}`);
if (metaMessages.length < 2) {
logger.info(`Dimension ${dimension}: Not enough meta-messages (${metaMessages.length}) to perform higher-level analysis. Finalizing with current TPs.`);
if (this.config.measureConvergence) {
this.convergenceHistory.push({
previousTurningPoints: mergedLocalTurningPoints, // State before attempted escalation
currentTurningPoints: mergedLocalTurningPoints, // State after failed escalation
dimension: dimension + 1, // Represents the attempted next dimension
distanceMeasure: 0, // No change
hasConverged: true, // Converged because escalation failed
didEscalate: false // Escalation attempted but yielded no processable result
});
}
return this.filterSignificantTurningPoints(mergedLocalTurningPoints);
}
// Recursively process the meta-messages to find higher-dimensional turning points
const higherDimensionTurningPoints = await this.multiLayerDetection(metaMessages, dimension + 1);
logger.info(`Dimension ${dimension + 1}: Found ${higherDimensionTurningPoints.length} higher-dimension TPs.`);
// Track convergence and dimension escalation
if (this.config.measureConvergence) {
const convergenceState = {
previousTurningPoints: mergedLocalTurningPoints, // TPs from dim n
currentTurningPoints: higherDimensionTurningPoints, // TPs found in dim n+1
dimension: dimension + 1,
distanceMeasure: this.calculateStateDifference(mergedLocalTurningPoints, higherDimensionTurningPoints),
hasConverged: higherDimensionTurningPoints.length > 0, // Converged if TPs were found at higher level
didEscalate: true
};
this.convergenceHistory.push(convergenceState);
logger.info(`Dimension ${dimension} → ${dimension + 1}: Convergence distance: ${convergenceState.distanceMeasure.toFixed(3)}. Converged: ${convergenceState.hasConverged}`);
}
// Combine turning points from local (n) and higher (n+1) dimensions
// The combine function will handle merging, prioritizing higher-dim, and filtering
return this.combineTurningPoints(mergedLocalTurningPoints, higherDimensionTurningPoints);
}
/**
* Calculate a difference measure between two states (sets of turning points)
* Used for convergence tracking. Considers significance and location.
*/
calculateStateDifference(state1, state2) {
// Handle empty states
if (state1.length === 0 && state2.length === 0)
return 0.0; // No difference
if (state1.length === 0 || state2.length === 0)
return 1.0; // Maximum difference
// 1. Average Significance Difference
const avgSig1 = state1.reduce((sum, tp) => sum + tp.significance, 0) / state1.length;
const avgSig2 = state2.reduce((sum, tp) => sum + tp.significance, 0) / state2.length;
const sigDiff = Math.abs(avgSig1 - avgSig2); // Range [0, 1]
// 2. Structural Difference (using Jaccard index on span ranges)
const spans1 = new Set(state1.map(tp => `${tp.span.startIndex}-${tp.span.endIndex}`));
const spans2 = new Set(state2.map(tp => `${tp.span.startIndex}-${tp.span.endIndex}`));
const intersection = new Set([...spans1].filter(span => spans2.has(span)));
const union = new Set([...spans1, ...spans2]);
const jaccardDistance = union.size > 0 ? 1.0 - (intersection.size / union.size) : 0.0; // Range [0, 1]
// Combine the measures (e.g., weighted average)
const combinedDistance = (sigDiff * 0.5) + (jaccardDistance * 0.5);
return Math.min(1.0, Math.max(0.0, combinedDistance)); // Ensure bounds [0, 1]
}
/**
* Apply complexity function χ from the ARC/CRA framework
*/
calculateComplexityScore(significance, semanticShiftMagnitude) {
// Base complexity from significance (maps [0,1] to [1, 5])
let complexity = 1 + significance * 4;
// Adjust based on semantic shift magnitude (distance, scaled 0-1)
// Larger shifts slightly increase complexity, centered around a baseline distance
const baselineDistance = 0.3; // Assumes threshold is around here
complexity += (semanticShiftMagnitude - baselineDistance) * 1.0; // Adjust sensitivity as needed
// Ensure complexity is within the [1, 5] range
return Math.max(1, Math.min(5, complexity));
}
/**
* Detect turning points within a single chunk of the conversation
*/
/**
* Detect turning points within a single chunk of the conversation
* This represents the local refinement process in the current dimension
*/
async detectTurningPointsInChunk(messages, dimension, chunkIndex, // Optional index for logging purposes
originalMessages) {
if (messages.length < 2)
return [];
// Generate embeddings for all messages in the chunk
const embeddings = await this.generateMessageEmbeddings(messages, dimension);
// Find significant semantic shifts between adjacent messages
const turningPoints = [];
const distances = []; // Store distances for logging
const allDistances = []; // Store all distances for logging
for (let i = 0; i < embeddings.length - 1; i++) {
const current = embeddings[i];
const next = embeddings[i + 1];
// Calculate semantic distance between current and next message
const distance = this.calculateSemanticDistance(current.embedding, next.embedding);
const beforeMessage = messages.find((m) => m.id === current.id);
const afterMessage = messages.find((m) => m.id === next.id);
let thresholdScaleFactor;
const baseThreshold = this.config.semanticShiftThreshold;
if (baseThreshold > 0.7) {
// For high initial thresholds (like 0.75), scale down more aggressively
thresholdScaleFactor = Math.pow(0.25, dimension); // More aggressive (0.25 instead of 0.4)
}
else if (baseThreshold > 0.5) {
// For medium thresholds
thresholdScaleFactor = Math.pow(0.35, dimension);
}
else {
// For already low thresholds
thresholdScaleFactor = Math.pow(0.5, dimension);
}
const dimensionAdjustedThreshold = baseThreshold * thresholdScaleFactor;
if (dimensionAdjustedThreshold <= distance) {
distances.push({
current: current.index,
next: next.index,
distance: distance,
}); // Store distance for logging
}
allDistances.push({
current: current.index,
next: next.index,
distance: distance,
});
}
logger.info(`For a total number of points: ${embeddings.length}, there were ${distances.length} distances found as being greater than the threshold of ${this.config.semanticShiftThreshold}.
- The top 3 greatest distances are: ${allDistances.slice(0, 3).sort((a, b) => b.distance - a.distance).map(d => d.distance.toFixed(3)).join(', ')}
This means there were ${distances.length} potential turning points detected ${dimension === 0 ? "with valid user-assistant turn pairs" : "with valid meta-messages"}`);
if (distances.length === 0) {
logger.info(`No significant semantic shifts detected in chunk ${chunkIndex}`);
return [];
}
for (let d = 0; d < distances.length - 1; d++) {
const distanceObj = distances[d];
const i = distanceObj.current; // Current message index
const current = embeddings[i]; // Current message embedding
const next = embeddings[distanceObj.next]; // Next message embedding
// If the distance exceeds our threshold, we've found a turning point
// Use direct array indices to get the messages
const distance = distanceObj.distance; // Semantic distance between current and next message
const beforeMessage = messages[i];
const afterMessage = messages[i + 1];
if (beforeMessage == undefined || afterMessage == undefined) {
logger.info(`detectTurningPointsInChunk: warning beforeMessage or afterMessage is undefined, beforeMessage: ${beforeMessage}, afterMessage: ${afterMessage}`);
continue;
}
// Classify the turning point using LLM
const turningPoint = await this.classifyTurningPoint(beforeMessage, afterMessage, distance, dimension, originalMessages, d);
logger.info(` ...${chunkIndex ? `[Chunk ${chunkIndex}] ` : ""}Potential turning point detected between messages ${current.id} and ${next.id} (distance: ${distance.toFixed(3)}, complexity: ${turningPoint.complexityScore.toFixed(1)}), signif: ${turningPoint.significance.toFixed(2)} category: ${turningPoint.category}`);
if (turningPoint.significance > 1) {
if (turningPoint.significance > 10) {
turningPoint.significance = turningPoint.significance / 100;
}
else {
turningPoint.significance = turningPoint.significance / 10; // Adjusting for scale
}
}
turningPoints.push(turningPoint);
}
return turningPoints;
}
/**
* Use LLM to classify a turning point and generate metadata.
* *** MODIFIED to prioritize message.spanData over regex ***
*/
async classifyTurningPoint(beforeMessage, afterMessage, distance, dimension, originalMessages, index = 0) {
let span;
if (dimension > 0) {
if (beforeMessage instanceof Message_1.MetaMessage === false || afterMessage instanceof Message_1.MetaMessage === false) {
throw new Error("Before or after message is not a MetaMessage");
}
const beforeMessageMeta = beforeMessage;
const afterMessageMeta = afterMessage;
// For higher dimensions, use meta-message and inner methods to get the the span ids for the start and end
span = {
startId: beforeMessageMeta.getMessagesInTurningPointSpanToMessagesArray()[0].id,
endId: afterMessageMeta.getMessagesInTurningPointSpanToMessagesArray()[0].id,
startIndex: this.originalMessages.findIndex((candidateM) => {
return beforeMessageMeta.getMessagesInTurningPointSpanToMessagesArray()[0].id === candidateM.id;
}),
endIndex: this.originalMessages.findIndex((candidateM) => {
return afterMessageMeta.getMessagesInTurningPointSpanToMessagesArray()[0].id === candidateM.id;
}),
originalSpan: {
startId: beforeMessage.id,
endId: afterMessage.id,
startIndex: index,
endIndex: index + 1,
}
};
}
else {
// For dimension 0, use original message IDs and find indices
span = {
startId: beforeMessage.id,
endId: afterMessage.id,
startIndex: Message_1.MetaMessage.findIndexOfMessageFromId({
id: beforeMessage.id,
beforeMessage,
afterMessage,
messages: originalMessages,
}),
endIndex: Message_1.MetaMessage.findIndexOfMessageFromId({
id: afterMessage.id,
beforeMessage,
afterMessage,
messages: originalMessages,
}),
};
}
// --- REMOVED Regex block for extracting originalSpan from meta-message content ---
// const originalSpan = { startIndex: 0, endIndex: 0, startMessageId: '', endMessageId: '' };
// if (beforeMessage.author === 'meta' || afterMessage.author === 'meta') {
// ... regex matching logic ...
// }
// --- End Removal ---
// --- LLM Prompt Setup (using original prompt structure) ---
const systemPrompt = (0, prompt_1.formSystemMessage)({
dimension,
distance
});
const userMessage = (0, prompt_1.formUserMessage)({
config: this.config,
afterMessage,
beforeMessage,
dimension,
addUserInstructions: this.config.customUserInstruction && this.config.customUserInstruction.length > 0 ? true : false,
});
const contextualAidText = this.prepareContextualInfoMeta(beforeMessage, afterMessage, span, originalMessages, dimension, 2, dimension > 0);
try {
// --- Call LLM (using original parameters and schema) ---
const response = await this.openai.chat.completions.create({
model: this.config.classificationModel,
messages: [
{
role: 'system', content: `${this.config.customSystemInstruction ? this.config.customSystemInstruction : systemPrompt}\n\n${contextualAidText}\n------- end of contextual background info see below as reminder of instructions -------\n\n${this.config.customSystemInstruction ? this.config.customSystemInstruction : (0, prompt_1.formSystemPromptEnding)(dimension)}`,
},
{ role: 'user', content: this.config.customUserInstruction ? `${this.config.customUserInstruction}\n\n${userMessage}\n\n${this.config.customUserInstruction}` : userMessage },
],
temperature: 0.6,
//@ts-ignore - Allow vendor-specific params if needed
repeat_penalty: this.config.endpoint ? 1.005 : undefined,
top_k: this.config.endpoint ? 20 : undefined,
stop: ['<|im_end|>'],
response_format: (0, prompt_1.formResponseFormatSchema)(dimension),
top_p: 0.9,
});
const content = response.choices[0]?.message?.content || '{}';
let classification = {};
try {
classification = JSON.parse(content);
console.info(` got classification: ${JSON.stringify(classification, null, 2)}`);
}
catch (err) {
logger.info('Error parsing LLM response as JSON:', err.message);
// Attempt to extract JSON from markdown code block if necessary
const jsonMatch = content.match(/```json\s*([\s\S]*?)\s*```/);
if (jsonMatch && jsonMatch[1]) {
try {
classification = JSON.parse(jsonMatch[1]);
logger.info('Successfully extracted JSON from markdown block.');
}
catch (parseErr) {
logger.info('Failed to parse extracted JSON:', parseErr.message);
classification = {}; // Reset on secondary failure
}
}
else {
const plainJsonMatch = content.match(/\{[\s\S]*\}/); // Fallback to find any JSON structure
if (plainJsonMatch) {
try {
classification = JSON.parse(plainJsonMatch[0]);
logger.info('Successfully extracted JSON using simple match.');
}
catch (parseErr) {
logger.info('Failed to parse simple JSON match:', parseErr.message);
classification = {};
}
}
else {
logger.info('Could not extract JSON from response:', content);
classification = {};
}
}
// Provide default values if parsing failed completely
if (Object.keys(classification).length === 0) {
classification = {
label: 'Parsing Error - Unclassified', category: 'Other', keywords: [],
emotionalTone: 'neutral', sentiment: 'neutral', significance: 0.1,
quotes: [], best_id: span.startId
};
}
}
// --- Validate and Sanitize LLM Output ---
const validatedClassification = {
label: typeof classification.label === 'string' ? classification.label.substring(0, 50) : 'Unknown Turning Point',
category: typeof classification.category === 'string' ? classification.category : 'Other',
keywords: Array.isArray(classification.keywords) ? classification.keywords.map(String).slice(0, 4) : [], // Limit count
emotionalTone: typeof classification.emotionalTone === 'string' ? classification.emotionalTone : 'neutral',
sentiment: ['positive', 'negative', 'neutral'].includes(classification.sentiment) ? classification.sentiment : 'neutral',
significance: typeof classification.significance === 'number' ? Math.max(0, Math.min(1, classification.significance)) : 0.5,
quotes: Array.isArray(classification.quotes) ? classification.quotes.map(String).slice(0, 3) : [], // Limit count
best_id: typeof classification.best_id === 'string' ? classification.best_id : span.startId, // Default to start of span
};
// Calculate complexity score
const complexityScore = this.calculateComplexityScore(validatedClassification.significance, distance // Use the raw distance (0-1)
);
// --- Construct TurningPoint Object ---
return {
id: `tp-${dimension}-${span.startIndex}-${span.endIndex}`,
label: validatedClassification.label,
category: validatedClassification.category,
span: span, // Use the span derived at the beginning
// deprecatedSpan is no longer populated from regex results
semanticShiftMagnitude: distance,
keywords: validatedClassification.keywords,
quotes: validatedClassification.quotes,
emotionalTone: validatedClassification.emotionalTone,
sentiment: validatedClassification.sentiment,
detectionLevel: dimension,
significance: validatedClassification.significance,
complexityScore: complexityScore
};
}
catch (err) {
logger.info(`Error during LLM call for turning point classification: ${err.message}`);
// Fallback classification on API error
if (this.config.throwOnError) {
}
else {
return {
id: `tp-err-${dimension}-${span.startId}`,
label: 'LLM Error - Unclassified',
category: 'Other',
span: span,
semanticShiftMagnitude: distance,
keywords: [],
quotes: [],
emotionalTone: 'neutral',
sentiment: 'neutral',
detectionLevel: dimension,
significance: 0.1,
complexityScore: 1.0 // Minimum complexity
};
}
}
}
/**
* Updated to utilize new classes of Message and MetaMessage for better structure and clarity
* @param turningPoints
* @param originalMessages
* @returns
*/
createMetaMessagesFromTurningPoints(turningPoints, originalMessages) {
if (turningPoints.length === 0)
return [];
// Group turning points by category (first-level abstraction)
const groupedByCategory = {};
turningPoints.forEach((tp) => {
const category = tp.category;
if (!groupedByCategory[category]) {
groupedByCategory[category] = [];
}
groupedByCategory[category].push(tp);
});
logger.info(`Grouped categories: `, JSON.stringify(groupedByCategory, null, 2));
// Create meta-messages (one per category to find higher-level patterns)
const metaMessages = [];
// First create category messages - represents dimension n to n+1 transformation
Object.entries(groupedByCategory).forEach(([category, points], index) => {
// Use the factory method from MetaMessage class to create a properly typed meta-message
const metaMessage = Message_1.MetaMessage.createCategoryMetaMessage(category, points, index, originalMessages);
metaMessages.push(metaMessage);
});
// Create timeline/section meta-messages
const sortedPoints = [...turningPoints].sort((a, b) => a.span.startIndex - b.span.startIndex);
const sectionCount = Math.min(4, Math.ceil(sortedPoints.length / 2));
const pointsPerSection = Math.ceil(sortedPoints.length / sectionCount);
// Create chronological section meta-messages
for (let i = 0; i < sectionCount; i++) {
const sectionPoints = sortedPoints.slice(i * pointsPerSection, Math.min((i + 1) * pointsPerSection, sortedPoints.length));
if (sectionPoints.length === 0)
continue;
// Create a section meta-message using the factory method
const sectionMetaMessage = Message_1.MetaMessage.createSectionMetaMessage(sectionPoints, i, this.originalMessages);
console.info('created sectionMetageMessage');
metaMessages.push(sectionMetaMessage);
}
logger.info(`Created ${metaMessages.length} meta-messages for dimensional expansion: ${metaMessages
.map((m) => m.id)
.join(", ")}`);
return metaMessages;
}
// --- Remaining methods are kept identical to your second provided version ---
/**
* Filter turning points to keep only significant ones
* (Using original logic from the second code block)
*/
filterSignificantTurningPoints(turningPoints) {
if (!this.config.onlySignificantTurningPoints || turningPoints.length === 0) {
// Ensure sorted return even if not filtering
return turningPoints.sort((a, b) => a.span.startIndex - b.span.startIndex);
}
logger.info(`Filtering ${turningPoints.length} TPs based on significance >= ${this.config.significanceThreshold} and maxPoints = ${this.config.maxTurningPoints}`);
// Sort by significance, complexity, magnitude
const sorted = [...turningPoints].sort((a, b) => {
if (b.significance !== a.significance)
return b.significance - a.significance;
if (b.complexityScore !== a.complexityScore)
return b.complexityScore - a.complexityScore;
return b.semanticShiftMagnitude - a.semanticShiftMagnitude;
});
const result = [];
const coveredIndices = new Set(); // Use indices for overlap check
const maxPoints = this.config.maxTurningPoints;
for (const tp of sorted) {
// Check significance threshold first
if (tp.significance < this.config.significanceThreshold) {
// Only consider points below threshold if we haven't found enough significant ones yet
if (result.length >= Math.ceil(maxPoints / 2)) { // Heuristic: if we have half the max points, stop adding insignificant ones
continue;
}
}
// Check for significant overlap with already selected points
let overlapRatio = 0;
let isOverlapping = false;
const tpSpanSize = tp.span.endIndex - tp.span.startIndex + 1;
if (tpSpanSize > 0) {
let overlapCount = 0;
for (let i = tp.span.startIndex; i <= tp.span.endIndex; i++) {
if (coveredIndices.has(i)) {
overlapCount++;
}
}
overlapRatio = overlapCount / tpSpanSize;
}
// Define significant overlap threshold (e.g., 40% from original code)
const overlapThreshold = 0.4;
isOverlapping = overlapRatio > overlapThreshold;
if (!isOverlapping && result.length < maxPoints) {
result.push(tp);
// Mark indices covered by this TP
for (let i = tp.span.startIndex; i <= tp.span.endIndex; i++) {
coveredIndices.add(i);
}
}
else if (isOverlapping) {
logger.info(` TP ${tp.id} (Sig: ${tp.significance.toFixed(2)}) overlaps significantly (${(overlapRatio * 100).toFixed(0)}%) with existing TPs. Skipping.`);
}
else if (result.length >= maxPoints) {
logger.info(` Reached max turning points (${maxPoints}). Skipping TP ${tp.id}.`);
}
}
// Ensure at least one TP is returned if any were found initially
if (result.length === 0 && sorted.length > 0) {
logger.info("No TPs met significance/overlap criteria, returning the single most significant one.");
result.push(sorted[0]);
}
// Add a second diverse TP if only one was kept and more exist (original logic)
else if (result.length === 1 && sorted.length > 1) {
for (let i = 1; i < sorted.length; i++) {
const nextTp = sorted[i];
// Check if it's sufficiently far from the first one (e.g., > 3 messages gap)
if (Math.abs(nextTp.span.startIndex - result[0].span.startIndex) > 3) {
// Check minimal overlap with the first one
let overlapsFirst = false;
for (let j = nextTp.span.startIndex; j <= nextTp.span.endIndex; j++) {
if (j >= result[0].span.startIndex && j <= result[0].span.endIndex) {
overlapsFirst = true;
break;
}
}
if (!overlapsFirst) {
logger.info("Adding a second, non-overlapping TP for diversity.");
result.push(nextTp);
break;
}
}
}
}
logger.info(`Filtered down to ${result.length} significant turning points.`);
// Final sort by position in conversation
return result.sort((a, b) => a.span.startIndex - b.span.startIndex);
}
/**
* Combine turning points from different dimensions
* (Using original logic from the second code block)
*/
combineTurningPoints(localTurningPoints, higherDimensionTurningPoints) {
logger.info(`Combining ${localTurningPoints.length} local (dim ${localTurningPoints[0]?.detectionLevel ?? 'N/A'}) and ${higherDimensionTurningPoints.length} higher (dim ${higherDimensionTurningPoints[0]?.detectionLevel ?? 'N/A'}) TPs.`);
// Prioritize higher-dimensional turning points by boosting their significance (original logic)
const boostedHigher = higherDimensionTurningPoints.map(tp => ({
...tp,
// Apply a boost, ensuring it doesn't exceed 1.0
significance: Math.min(1.0, tp.significance * 1.2), // Adjusted boost factor slightly
// Keep original detectionLevel for merging logic
}));
// Combine all turning points
const allTurningPoints = [...localTurningPoints, ...boostedHigher];
logger.info(`Total TPs before cross-level merge: ${allTurningPoints.length}`);
// Merge overlapping turning points across dimensions, prioritizing higher dimensions/significance
const mergedTurningPoints = this.mergeAcrossLevels(allTurningPoints);
logger.info(`Merged across levels to ${mergedTurningPoints.length} TPs.`);
// Filter the combined & merged list to keep the most significant ones overall
const filteredTurningPoints = this.filterSignificantTurningPoints(mergedTurningPoints);
logger.info(`Final combined and filtered TPs: ${filteredTurningPoints.length}`);
// Sort by position in conversation before returning
return filteredTurningPoints.sort((a, b) => a.span.startIndex - b.span.startIndex);
}
/**
* Merge similar or overlapping turning points *within* the same dimension
* (Using original logic from the second code block)
*/
mergeSimilarTurningPoints(turningPoints) {
if (turningPoints.length <= 1)
return turningPoints;
// Sort turning points by start index
const sorted = [...turningPoints].sort((a, b) => a.span.startIndex - b.span.startIndex);
const merged = [];
let currentTp = sorted[0]; // Use a more descriptive name
for (let i = 1; i < sorted.length; i++) {
const nextTp = sorted[i];
// Check conditions for merging (original logic)
const isOverlapping = (nextTp.span.startIndex <= currentTp.span.endIndex + 2); // Allow small gap
const isSimilarCategory = (nextTp.category === currentTp.category);
// Added closeness check from original code
const hasCloseIndices = (nextTp.span.startIndex - currentTp.span.endIndex) <= 3;
// Merge if overlapping OR close, AND same category
if ((isOverlapping || hasCloseIndices) && isSimilarCategory) {
logger.info(` Merging similar TPs (Dim ${currentTp.detectionLevel}): ${currentTp.id} and ${nextTp.id}`);
// Merge the turning points
const newLabel = this.createMergedLabel(currentTp.label, nextTp.label);
// Create merged span (min start, max end)
const mergedSpan = this.ensureChronologicalSpan({
startId: currentTp.span.startIndex <= nextTp.span.startIndex ? currentTp.span.startId : nextTp.span.startId,
endId: currentTp.span.endIndex >= nextTp.span.endIndex ? currentTp.span.endId : nextTp.span.endId,
startIndex: Math.min(currentTp.span.startIndex, nextTp.span.startIndex),
endIndex: Math.max(currentTp.span.endIndex, nextTp.span.endIndex)
});
// Update the deprecated span too (original logic, though less relevant now)
// Note: deprecatedSpan might not exist if TPs came from meta-messages
const mergedDeprecatedSpan = (currentTp.deprecatedSpan && nextTp.deprecatedSpan) ? {
startIndex: Math.min(currentTp.deprecatedSpan.startIndex, nextTp.deprecatedSpan.startIndex),
endIndex: Math.max(currentTp.deprecatedSpan.endIndex, nextTp.deprecatedSpan.endIndex),
startMessageId: mergedSpan.startIndex === currentTp.deprecatedSpan.startIndex ?
currentTp.deprecatedSpan.startMessageId : nextTp.deprecatedSpan.startMessageId,
endMessageId: mergedSpan.endIndex === currentTp.deprecatedSpan.endIndex ?
currentTp.deprecatedSpan.endMessageId : nextTp.deprecatedSpan.endMessageId
} : undefined; // Handle cases where deprecatedSpan might be missing
// Combine keywords and quotes (unique, limited)
const mergedKeywords = Array.from(new Set([...(currentTp.keywords || []), ...(nextTp.keywords || [])])).slice(0, 5);
const mergedQuotes = Array.from(new Set([...(currentTp.quotes || []), ...(nextTp.quotes || [])])).slice(0, 3); // Limit quotes too
// Update the current TP to be the merged version
currentTp = {
...currentTp, // Keep most properties of the first TP
id: `${currentTp.id}-merged-${nextTp.span.startIndex}`, // Indicate merge in ID
label: newLabel,
span: mergedSpan,
// Only include deprecatedSpan if it was successfully merged
...(mergedDeprecatedSpan && { deprecatedSpan: mergedDeprecatedSpan }),
semanticShiftMagnitude: (currentTp.semanticShiftMagnitude + nextTp.semanticShiftMagnitude) / 2,
keywords: mergedKeywords,
quotes: mergedQuotes,
// Boost significance slightly, cap at 1.0 (original logic)
significance: Math.min(1.0, ((currentTp.significance + nextTp.significance) / 2) * 1.1),
// Take max complexity (original logic)
complexityScore: Math.max(currentTp.complexityScore, nextTp.complexityScore),
// Combine emotional tone/sentiment logically (e.g., take the one from the more significant TP)
emotionalTone: currentTp.significance >= nextTp.significance ? currentTp.emotionalTone : nextTp.emotionalTone,
sentiment: currentTp.significance >= nextTp.significance ? currentTp.sentiment : nextTp.sentiment,
};
}
else {
// If not merging, push the completed current TP and move to the next
merged.push(currentTp);
currentTp = nextTp;
}
}
// Add the last processed TP
merged.push(currentTp);
return merged;
}
/**
* Merge turning points across different dimensions with priority to higher dimensions
* (Using original logic from the second code block)
*/
mergeAcrossLevels(turningPoints) {
if (turningPoints.length <= 1)
return turningPoints;
// Sort by dimension DESC (higher first), then by significance DESC, then by start index ASC
const sorted = [...turningPoints].sort((a, b) => {
if (b.detectionLevel !== a.detectionLevel)
return b.detectionLevel - a.detectionLevel;
// Add secondary sort by significance within the same level
if (b.significance !== a.significance)
return b.significance - a.significance;
return a.span.startIndex - b.span.startIndex; // Tertiary sort by position
});
const merged = [];
// Use a Set of covered *indices* for more granular overlap checking
const coveredIndices = new Set();
logger.info(` Merging across levels. Input count: ${sorted.length}. Prioritizing higher dimension/significance.`);
for (const tp of sorted) {
// Check how much of this TP's span is already covered
let overlapCount = 0;
const spanSize = tp.span.endIndex - tp.span.startIndex + 1;
if (spanSize <= 0)
continue; // Skip invalid spans
for (let i = tp.span.startIndex; i <= tp.span.endIndex; i++) {
if (coveredIndices.has(i)) {
overlapCount++;
}
}
const overlapRatio = overlapCount / spanSize;
// Define significant overlap threshold (e.g., 50% - adjust as needed)
const significantOverlapThreshold = 0.5;
// Keep the TP if it's not significantly overlapped by higher-priority ones
if (overlapRatio < significantOverlapThreshold) {
merged.push(tp);
// Mark its indices as covered *only if it wasn't already significantly covered*
for (let i = tp.span.startIndex; i <= tp.span.endIndex; i++) {
coveredIndices.add(i);