@gaiaverse/semantic-turning-point-detector
Version:
Detects key semantic turning points in conversations using recursive semantic distance analysis. Ideal for conversation analysis, dialogue segmentation, insight detection, and AI-assisted reasoning tasks.
1,310 lines (1,164 loc) • 143 kB
text/typescript
// file: semanticTurningPointDetector.ts
import fs from "fs-extra";
import winston from "winston";
import { Ollama } from "ollama";
import dotenv from "dotenv";
dotenv.config();
import async from "async";
import { OpenAI } from "openai";
import { LRUCache } from "lru-cache";
import crypto from "crypto";
import { countTokens, createEmbeddingCache } from "./tokensUtil";
import { MetaMessage, Message, MessageSpan } from "./Message";
import { returnFormattedMessageContent } from "./stripContent";
import {
circularToneSimilarity,
computeSignificance,
computeSignificanceWithChoquet,
formAnalysisResponseFormat,
formAnalysisSystemPromptEnding,
formScoringResponseFormat,
formScoringSystemPromptEnding,
formSystemMessage,
} from "./prompt";
import {
ChunkingResult,
ConvergenceState,
EpistemicPrimitives,
MessageEmbedding,
TurningPoint,
turningPointCategories,
TurningPointCategory,
TurningPointDetectorConfig,
} from "./types";
import { CounterfactualAnalyzer } from "./counterfactual";
// Cache for token counts to avoid recalculating - implements atomic memory concept
const tokenCountCache = new LRUCache<string, number>({
max: 10000,
ttl: 1000 * 60 * 60 * 24,
});
/**
* Semantic Turning Point Detector using ARC/CRA/DAO Framework
*
* Identifies semantically significant moments in conversations where meaning shifts,
* emotional ruptures occur, or intellectual pivots happen. Uses multi-dimensional
* analysis with optional phi-field enhancement for improved accuracy.
*
* @example Basic Usage
* ```typescript
* const detector = new SemanticTurningPointDetector({
* apiKey: process.env.OPENAI_API_KEY,
* classificationModel: "gpt-4o-mini",
* semanticShiftThreshold: 0.4,
* maxTurningPoints: 10
* });
*
* const result = await detector.detectTurningPoints(messages);
* console.log(`Found ${result.points.length} turning points`);
* ```
*
* @example Advanced Configuration
* ```typescript
* const detector = new SemanticTurningPointDetector({
* enableExperimentalPhi: true, // Enhanced significance scoring
* enableCounterfactualAnalysis: true, // Additional validation
* maxRecursionDepth: 4, // Multi-dimensional analysis depth
* dynamicallyAdjustSemanticShiftThreshold: true
* });
* ```
*
* ## Key Parameters
* - `semanticShiftThreshold` (0.2-0.7): Sensitivity control
* - `maxRecursionDepth` (2-5): Analysis depth
* - `enableExperimentalPhi`: Activates phi-field enhancement
* - `maxTurningPoints`: Limit final results
*
* ## Scoring
* - **Confidence**: How semantically distinct the turning point is
* - **Necessity**: How epistemically essential it is (phi-aware)
*
* @see {@link TurningPointDetectorConfig} Configuration options
* @see {@link TurningPoint} Result structure
*/
export class SemanticTurningPointDetector {
private config: TurningPointDetectorConfig;
/**
* For ease of use in llm requests, openai's client is used as it allows configurable endpoints. Further expoloration might be reasonable in leveraging other libaries, such as ollama, llmstudio, genai, etc, for more direct compatibility with other LLM providers. Though at this time, the OpenAI client is sufficient for requests done by this detector.
*/
private openai: OpenAI;
/**
* This provides the array of the initial messages that were passed to the detector. This is noted as such as throughout the process, ARC involves analyzing subsets of the original messages, and the original messages are not modified.
*/
private originalMessages: Message[] = [];
/**
* AN array of changes of state across iterations, used for convergence measurement.
* This is used to track the evolution of turning points across iterations and dimensions.
* This is used when returning the final results, to determine whether the turning points have converged.
*/
private convergenceHistory: ConvergenceState[] = [];
/**
* Used to help mitigate repeat embedding requests for the same message content. And can be configured to avoid excessive RAM usage via `embeddingCacheRamLimitMB`.
*/
private embeddingCache: LRUCache<string, Float32Array>;
private endpointType: "ollama" | "openai" | "unknown" | "openrouter" =
"unknown";
private ollama: Ollama | null = null;
readonly logger: winston.Logger | Console;
private counterfactualAnalyzer?: CounterfactualAnalyzer;
/**
* Creates a new instance of the semantic turning point detector
*/
constructor(config: Partial<TurningPointDetectorConfig> = {}) {
// Default configuration (from your provided code)
this.config = {
apiKey: config.apiKey || process.env.OPENAI_API_KEY || "",
classificationModel: config.classificationModel || "gpt-4o-mini",
embeddingModel: config.embeddingModel || "text-embedding-3-small",
embeddingEndpoint: config.embeddingEndpoint,
semanticShiftThreshold: config.semanticShiftThreshold || 0.22,
minTokensPerChunk: config.minTokensPerChunk || 250,
maxTokensPerChunk: config.maxTokensPerChunk || 2000,
concurrency: (config.concurrency ?? config?.endpoint) ? 1 : 4,
embeddingConcurrency: config.embeddingConcurrency ?? 5,
logger: config?.logger ?? undefined,
embeddingCacheRamLimitMB: config.embeddingCacheRamLimitMB || 256,
maxRecursionDepth: config.maxRecursionDepth || 3,
onlySignificantTurningPoints: config.onlySignificantTurningPoints ?? true,
significanceThreshold: config.significanceThreshold || 0.0,
minMessagesPerChunk: config.minMessagesPerChunk || 3,
maxTurningPoints: config.maxTurningPoints || 5,
debug: config.debug || false,
turningPointCategories:
config?.turningPointCategories &&
config?.turningPointCategories.length > 0
? config.turningPointCategories
: turningPointCategories,
endpoint: config.endpoint,
temperature: config?.temperature ?? 0.6,
top_p: config?.top_p ?? 0.95,
complexitySaturationThreshold:
config.complexitySaturationThreshold || 4.5,
measureConvergence: config.measureConvergence ?? true,
enableExperimentalPhi: config.enableExperimentalPhi ?? false,
dynamicallyAdjustSemanticShiftThreshold:
config.dynamicallyAdjustSemanticShiftThreshold ?? false,
phiMergeThresholdMultiplier: config.phiMergeThresholdMultiplier ?? 0.5,
overlapThreshold: config.overlapThreshold ?? 0.4,
enableDynamicComplexitySaturation:
config.enableDynamicComplexitySaturation ?? false,
dynamicSaturationTargetPercentile:
config.dynamicSaturationTargetPercentile ?? 0.15,
dynamicSaturationMinSamples: config.dynamicSaturationMinSamples ?? 10,
epistemicThreshold: config.epistemicThreshold ?? 0.01,
enableCounterfactualAnalysis:
config.enableCounterfactualAnalysis ?? false, // NEW: Enable counterfactual analysis
};
// Initialize counterfactual analyzer if enabled
if (config.enableCounterfactualAnalysis) {
this.counterfactualAnalyzer = new CounterfactualAnalyzer();
}
this.endpointType = config?.endpoint
? config.endpoint.includes("api.openai.com")
? "openai"
: config.endpoint.includes("openrouter.ai")
? "openrouter"
: "unknown"
: "unknown";
if (this.config.logger === undefined) {
fs.ensureDirSync("results");
this.logger = winston.createLogger({
level: "info",
format: winston.format.combine(
winston.format.timestamp(),
winston.format.json(),
),
transports: [
new winston.transports.Console({
format: winston.format.combine(
winston.format.colorize(),
winston.format.timestamp({ format: "YYYY-MM-DD HH:mm:ss" }),
winston.format.printf(({ timestamp, level, message }) => {
return `${timestamp} ${level}: ${message}`;
}),
),
}),
new winston.transports.File({
filename: "results/semanticTurningPointDetector.log",
format: winston.format.json(),
}),
],
});
}
// now validate the turning point categories (that wil simply log warnings), and also after the logging is setup above.
if (
config?.turningPointCategories &&
config?.turningPointCategories.length > 0
) {
this.validateTurningPointCategories(config.turningPointCategories);
}
// Initialize OpenAI client
this.openai = new OpenAI({
apiKey:
this.config.apiKey ??
process.env.LLM_API_KEY ??
process.env.OPENAI_API_KEY,
baseURL: this.config.endpoint,
});
/**
* Initialize the embedding cache with the specified RAM limit.
*/
this.embeddingCache = createEmbeddingCache(
this.config.embeddingCacheRamLimitMB,
);
if (this.config.debug) {
this.logger.info(
`[TurningPointDetector] Initialized with config:\n${JSON.stringify(
{
...this.config,
apiKey: "[REDACTED]",
},
null,
2,
)}`,
);
this.logger.info(
`[TurningPointDetector] Embedding cache initialized with ${this.embeddingCache.max} max entries (${this.config.embeddingCacheRamLimitMB}MB limit)`,
);
}
}
public getModelName(): string {
return this.config.classificationModel;
}
/**
* Calculates a thematic similarity score between two turning points based on their
* emotional tone, sentiment, and LLM-assigned significance. This serves as the core
* metric for φ-aware grouping and sectioning.
*
* @param tp1 - The first turning point.
* @param tp2 - The second turning point.
* @returns A similarity score between 0 and 1.
*/
private calculateThematicSimilarity(
tp1: TurningPoint,
tp2: TurningPoint,
): number {
const weights = { tone: 0.5, sentiment: 0.3, significance: 0.2 };
const toneSim = circularToneSimilarity(
tp1.emotionalTone,
tp2.emotionalTone,
);
const sentimentSim = tp1.sentiment === tp2.sentiment ? 1 : 0;
const sigDiff = Math.abs(tp1.significance - tp2.significance);
const sigSim = 1 - sigDiff; // assumes significance already 0–1
return (
toneSim * weights.tone +
sentimentSim * weights.sentiment +
sigSim * weights.significance
);
}
/**
* Recalculates the significance score of a turning point using phi-awareness.
* This creates a powerful feedback loop where the emergent phi field enhances
* the base significance score.
*/
private recalculateSignificanceWithPhi(
tp: TurningPoint,
phi: number,
): number {
// Extract emotion intensity based on emotional tone
const emotionIntensity: { [key: string]: number } = {
joyful: 0.9,
excited: 0.8,
surprised: 0.9,
worried: 0.7,
anxious: 0.7,
angry: 0.9,
furious: 0.95,
skeptical: 0.6,
disgusted: 0.8,
sad: 0.8,
discouraged: 0.7,
hopeful: 0.4,
neutral: 0.3,
};
const intensity = emotionIntensity[tp.emotionalTone.toLowerCase()] || 0.3;
// Re-run through Choquet but with phi-aware parameters
return computeSignificanceWithChoquet(
{
// Reuse original significance as structural certainty but amplify with phi
certainty: Math.min(1.0, tp.significance * (1.0 + (phi - 0.5) * 0.6)),
// Use complexity as novelty credibility (normalized to 0-1)
novelty: Math.min(1.0, tp.complexityScore / 5),
// Amplify affective delta based on phi
affectiveDelta: Math.min(1.0, intensity * (1.0 + (phi - 0.5) * 0.8)),
// Scale semantic shift magnitude to 0-10 range
impact: Math.min(10.0, tp.semanticShiftMagnitude * 10),
},
tp.emotionalTone,
{
enableExperimentalPhi: true,
phiScore: phi,
dimension: tp.detectionLevel,
averageDistance: tp.semanticShiftMagnitude,
},
);
}
/**
* Computes the φ (Significance) field by interpreting LLM-derived emotional and
* significance data from each turning point. This creates a rich, self-referential
* measure of thematic importance.
*/
private computePhiSignificanceField(
turningPoints: TurningPoint[],
): Map<string, number> {
const phiMap = new Map<string, number>();
if (turningPoints.length === 0) return phiMap;
// Map emotional tones to intensity scores (0 // Update emotion mapping to match the new wheel of emotions
const emotionalIntensity: { [key: string]: number } = {
// High intensity
furious: 0.95,
angry: 0.9,
disgusted: 0.9,
surprised: 0.8,
// Medium intensity
anxious: 0.7,
worried: 0.7,
sad: 0.7,
discouraged: 0.7,
// Low intensity
joyful: 0.6,
excited: 0.6,
hopeful: 0.4,
skeptical: 0.5,
// Default/Neutral
neutral: 0.1,
unknown: 0.1,
};
for (const tp of turningPoints) {
// Normalize the 0-100 significance score from the LLM to a 0-1 scale only if it seems like signficance is from 0-100, or if 0-10, accordingly
// assess if signifance needs to diviced by 100
const isSignfianceFromZeroToHundred =
tp.significance >= 0 &&
tp.significance <= 100 &&
Number.isInteger(tp.significance);
const isSignficanceCorrectlyScaledAlready =
tp.significance >= 0 && tp.significance <= 1;
const normSignificance = isSignficanceCorrectlyScaledAlready
? tp.significance
: isSignfianceFromZeroToHundred
? (tp.significance || 0) / 100
: (tp.significance || 0) / 10;
// Get the intensity from the emotional tone, defaulting to a low value
const toneIntensity =
emotionalIntensity[tp.emotionalTone.toLowerCase()] || 0.1;
// Sentiment can provide a small boost for stronger emotions
const sentimentModifier = tp.sentiment === "negative" ? 1.1 : 1.0;
// --- The Phi Calculation ---
// This weighted formula prioritizes the LLM's direct significance assessment,
// but amplifies it with emotional intensity.
const phi =
normSignificance * 0.7 + toneIntensity * sentimentModifier * 0.3;
// Clamp the final score to ensure it's within the [0, 1] range
phiMap.set(tp.id, Math.max(0, Math.min(1, phi)));
}
return phiMap;
}
/**
* Main entry point: Detect turning points in a conversation
* Implements the full ARC/CRA framework
*/
public async detectTurningPoints(messages: Message[]): Promise<{
confidence: number;
necessity: number; // The new Choquet-based score
points: TurningPoint[];
}> {
this.logger.info(
"Starting turning-point detection (ARC/CRA) on with provided " +
messages.length +
" messages",
);
// log the key config aspects, enableExperimentalPhi, endpoint, and maxTurningPoints, significanceThreshold, semanticShiftThreshold, minTokensPerChunk, maxTokensPerChunk, classificationModel, embeddingModel, endpointType
this.logger.info(` Turning Point Detection Configuration:
dynamicallyAdjustSemanticShiftThreshold: ${this.config.dynamicallyAdjustSemanticShiftThreshold},
dynamicallyAdjustComplexitySaturation: ${this.config.enableDynamicComplexitySaturation},
enableExperimentalPhi: ${this.config.enableExperimentalPhi},
endpoint: ${this.config.endpoint},
maxTurningPoints: ${this.config.maxTurningPoints},
significanceThreshold: ${this.config.significanceThreshold},
semanticShiftThreshold: ${this.config.semanticShiftThreshold},
minTokensPerChunk: ${this.config.minTokensPerChunk},
maxTokensPerChunk: ${this.config.maxTokensPerChunk},
classificationModel: ${this.config.classificationModel},
`);
this.convergenceHistory = [];
const isEndpointOllamaBased = await this.isOllamaEndpoint(
this.config.endpoint,
);
if (isEndpointOllamaBased) {
this.endpointType = "ollama";
const url = new URL(this.config.endpoint);
const host = `${url.protocol}//${url.hostname}${url.port ? `:${url.port}` : ""}`;
this.logger.info(
`Detected Ollama endpoint: ${host}. Initializing Ollama client.`,
);
this.ollama = new Ollama({ host });
}
// ── cache original conversation for downstream helpers
const totalTokens = await this.getMessageArrayTokenCount(messages);
this.logger.info(`Total conversation tokens: ${totalTokens}`);
this.originalMessages = messages.map((m) => ({ ...m }));
// ── 1️⃣ full multi-layer detection (dim-0 entry)
const turningPointsFound = await this.multiLayerDetection(messages, 0);
this.logger.info(
`Multi-layer detection returned ${turningPointsFound?.length} turning points`,
);
const phiScoresByPoint =
this.computePhiSignificanceField(turningPointsFound);
// ── 2️⃣ compute per-TP confidence (softmax) and necessity (Choquet) scores
const confidenceScoresByPoint: number[] = [];
const necessityScoresByPoint: number[] = [];
// Helper to collapse per-message embeddings into a single mean vector
const meanEmbedding = (embs: MessageEmbedding[]): Float32Array => {
// determine the ongoing length from a valid embedding
const embeddingDimension = embs.find((emb) => emb.embedding.length > 0)
?.embedding.length;
if (embeddingDimension === undefined || embeddingDimension <= 0) {
this.logger.warn("No valid embeddings found, returning empty vector");
return new Float32Array();
}
if (embs.length === 0) return new Float32Array(embeddingDimension);
const dim = embs[0].embedding.length;
const softMax = (values: number[]): number[] => {
const maxVal = Math.max(...values);
const exps = values.map((v) => Math.exp(v - maxVal));
const sumExps = exps.reduce((sum, v) => sum + v, 0);
return exps.map((v) => v / sumExps);
};
const magnitudes = embs.map(({ embedding }) =>
Math.sqrt(embedding.reduce((sum, v) => sum + v * v, 0)),
);
const attnWeights = softMax(magnitudes);
const acc = new Float32Array(dim);
for (let idx = 0; idx < embs.length; idx++) {
const { embedding } = embs[idx];
const weight = attnWeights[idx];
for (let i = 0; i < dim; i++) {
acc[i] += embedding[i] * weight;
}
}
return acc;
};
const calculateStructuralNecessity = async (
tp: TurningPoint,
allTPs: TurningPoint[],
conversationEmbeddings: MessageEmbedding[],
phi?: number,
): Promise<number> => {
if (!this.config.enableExperimentalPhi || phi === undefined) {
return tp.significance * 0.6;
}
// Simple, elegant structural calculation
const tpEmbedding = meanEmbedding(
conversationEmbeddings.slice(tp.span.startIndex, tp.span.endIndex + 1),
);
const conversationCenterEmbedding = meanEmbedding(conversationEmbeddings);
const centralityScore =
1 -
this.calculateSemanticDistance(
tpEmbedding,
conversationCenterEmbedding,
);
const relativePosition = tp.span.startIndex / messages.length;
const positionWeight = 1 - Math.abs(relativePosition - 0.5) * 1.5;
let uniquenessScore = 1.0;
if (allTPs.length > 1) {
const otherTPs = allTPs.filter((other) => other.id !== tp.id);
const thematicSimilarities = otherTPs.map((other) =>
this.calculateThematicSimilarity(tp, other),
);
const avgSimilarity =
thematicSimilarities.reduce((sum, sim) => sum + sim, 0) /
thematicSimilarities.length;
uniquenessScore = 1 - avgSimilarity;
}
const spanCoverage =
(tp.span.endIndex - tp.span.startIndex + 1) / messages.length;
const coverageWeight = Math.min(1.0, spanCoverage * 3);
// Fixed, interpretable weights
const structuralComponents = [
centralityScore * 0.3,
positionWeight * 0.25,
uniquenessScore * 0.25,
coverageWeight * 0.2,
];
const baseStructural = structuralComponents.reduce(
(sum, comp) => sum + comp,
0,
);
const necessity = baseStructural * phi; // Direct phi amplification
return Math.min(1.0, Math.max(0.0, necessity));
};
await async.eachOfLimit(
turningPointsFound,
this.config.concurrency,
async (tp, idxStr) => {
const pre = messages.slice(0, tp.span.startIndex);
const turn = messages.slice(tp.span.startIndex, tp.span.endIndex + 1);
const post = messages.slice(tp.span.endIndex + 1);
if (pre.length === 0 || post.length === 0) {
this.logger.info(`TP ${tp.id} at edges of convo – skipping scores`);
return;
}
const [preE, turnE, postE] = await Promise.all([
this.generateMessageEmbeddings(pre, 0),
this.generateMessageEmbeddings(turn, 0),
this.generateMessageEmbeddings(post, 0),
]);
const vPre = meanEmbedding(preE);
const vTurn = meanEmbedding(turnE);
const vPost = meanEmbedding(postE);
const distPre = this.calculateSemanticDistance(vPre, vTurn);
const distPost = this.calculateSemanticDistance(vTurn, vPost);
// Calculate Softmax-based "Confidence" (unchanged - this is appropriate)
const confidence = (distPre + distPost) / 2;
confidenceScoresByPoint.push(confidence);
// Calculate TRUE φ-aware "Necessity" using structural analysis
const phi = this.config.enableExperimentalPhi
? phiScoresByPoint.get(tp.id)
: undefined;
const allConversationEmbeddings = await this.generateMessageEmbeddings(
messages,
0,
);
const necessity = await calculateStructuralNecessity(
tp,
turningPointsFound,
allConversationEmbeddings,
phi,
);
necessityScoresByPoint.push(necessity);
this.logger.info(
`TP ${tp.id}: distPre=${distPre.toFixed(3)}, distPost=${distPost.toFixed(3)}, conf=${confidence.toFixed(3)}, necessity=${necessity.toFixed(3)}${phi ? `, φ=${phi.toFixed(3)}` : " (φ-disabled)"}`,
);
},
);
// ── 3️⃣ Aggregate conversation-level scores
const validConf = confidenceScoresByPoint.filter((v) => v > 0);
const aggregateConfidence =
validConf.length === 0
? 0
: validConf.reduce((s, v) => s + v, 0) / validConf.length;
const validNec = necessityScoresByPoint.filter((v) => v > 0);
const aggregateNecessity =
validNec.length === 0
? 0
: validNec.reduce((s, v) => s + v, 0) / validNec.length;
this.logger.info(
`Aggregate scores: Confidence=${aggregateConfidence.toFixed(3)}, Necessity=${aggregateNecessity.toFixed(3)}, enabledExperimentalPhi=${this.config.enableExperimentalPhi}`,
);
return {
confidence: aggregateConfidence,
necessity: this.config.enableExperimentalPhi ? aggregateNecessity : null, // If φ is disabled, necessity is not applicable, as we require the ideation of some metric involving essentiality or
points: turningPointsFound,
};
}
/**
* Multi-layer detection implementing the ARC/CRA dimensional processing
* This is the primary implementation of the transition operator Ψ
*/
private async multiLayerDetection(
messages: Message[],
dimension: number,
): Promise<TurningPoint[]> {
this.logger.info(`Starting dimensional analysis at n=${dimension}`);
// Check recursion depth - hard limit on dimensional expansion
if (dimension >= this.config.maxRecursionDepth) {
this.logger.info(
`Maximum dimension (n=${dimension}) reached, processing directly without further expansion`,
);
// Pass originalMessages context only at dimension 0 if needed by detectTurningPointsInChunk->classifyTurningPoint
return await this.detectTurningPointsInChunk(
messages,
dimension,
0,
this.originalMessages,
);
}
// For very small conversations (or at deeper levels), use sliding window
let localTurningPoints: TurningPoint[] = [];
// Adjusted condition to handle small message counts more directly
if (
messages.length < this.config.minMessagesPerChunk * 2 &&
dimension === 0
) {
this.logger.info(
`Dimension ${dimension}: Small conversation (${messages.length} msgs), processing directly`,
);
// Optionally adjust threshold for small conversations
const originalThreshold = this.config.semanticShiftThreshold;
this.config.semanticShiftThreshold = Math.max(
0.3,
originalThreshold * 1.1,
); // Slightly higher threshold
localTurningPoints = await this.detectTurningPointsInChunk(
messages,
dimension,
0,
this.originalMessages,
);
// Restore config
this.config.semanticShiftThreshold = originalThreshold;
} else {
// Chunk the conversation
const { chunks } = await this.chunkConversation(messages, dimension);
this.logger.info(
`Dimension ${dimension}: Split into ${chunks.length} chunks`,
);
if (chunks.length === 0) {
this.logger.info(
`Dimension ${dimension}: No valid chunks created, returning empty.`,
);
return [];
}
// Process each chunk in parallel to find local turning points
const chunkTurningPoints: TurningPoint[][] = new Array(chunks.length);
const durationsSeconds: number[] = new Array(chunks.length).fill(-1);
const limit = this.config.concurrency;
await async.eachOfLimit(chunks, limit, async (chunk, indexStr) => {
const index = Number(indexStr);
const startTime = Date.now();
if (index % 10 === 0 || limit < 10 || this.config.debug) {
this.logger.info(
` - Dimension ${dimension}: Processing chunk ${index + 1}/${chunks.length} (${chunk.length} messages)`,
);
}
// Pass originalMessages context only at dimension 0
chunkTurningPoints[index] = await this.detectTurningPointsInChunk(
chunk,
dimension,
index,
this.originalMessages,
);
const durationSecs = (Date.now() - startTime) / 1000;
durationsSeconds[index] = durationSecs;
if (index % 10 === 0 || limit < 10 || this.config.debug) {
const processedCount = durationsSeconds.filter((d) => d > 0).length;
if (processedCount > 0) {
const averageDuration =
durationsSeconds.filter((d) => d > 0).reduce((a, b) => a + b, 0) /
processedCount;
const remainingChunks = durationsSeconds.length - processedCount;
const remainingTime = (averageDuration * remainingChunks).toFixed(
1,
);
const percentageComplete =
(processedCount / durationsSeconds.length) * 100;
this.logger.info(
` - Chunk ${index + 1} processed in ${durationSecs.toFixed(1)}s. Est. remaining: ${remainingTime}s (${percentageComplete.toFixed(1)}% complete)`,
);
} else {
this.logger.info(
` - Chunk ${index + 1} processed in ${durationSecs.toFixed(1)}s.`,
);
}
}
});
// Flatten all turning points from all chunks
localTurningPoints = chunkTurningPoints.flat();
}
this.logger.info(
`Dimension ${dimension}: Found ${localTurningPoints.length} raw turning points`,
);
// --- PHI-AWARE ARC STEP 1: Calculate initial Phi Field ---
// Calculate phi based on the raw, unmerged turning points. This will guide the merging process itself.
const initialPhiMap = this.config.enableExperimentalPhi
? this.computePhiSignificanceField(localTurningPoints)
: new Map<string, number>();
// If we found zero or one turning point at this level, return it directly (after potential filtering if needed)
if (localTurningPoints.length <= 1) {
// --- REVISED LOGIC ---
// Even in this early exit, we must compute the phiMap to ensure the
// final filtering step uses the correct, potentially φ-aware, ranking logic.
const phiMapForFilter = this.config.enableExperimentalPhi
? initialPhiMap
: new Map();
// Apply filtering even for single points, now with the correct phiMap context.
return this.config.onlySignificantTurningPoints
? this.filterSignificantTurningPoints(
localTurningPoints,
phiMapForFilter,
)
: localTurningPoints;
}
// First merge any similar turning points at this level
const mergedLocalTurningPoints = this.mergeSimilarTurningPoints(
localTurningPoints,
this.config.enableExperimentalPhi
? initialPhiMap
: new Map<string, number>(),
);
this.logger.info(
`Dimension ${dimension}: Merged similar TPs to ${mergedLocalTurningPoints.length}`,
);
// If merging resulted in 0 or 1 TP, return it (after filtering)
if (mergedLocalTurningPoints.length <= 1) {
return this.config.onlySignificantTurningPoints
? this.filterSignificantTurningPoints(
mergedLocalTurningPoints,
initialPhiMap,
) // Use initialPhiMap as it's relevant to this set of points
: mergedLocalTurningPoints;
}
// --- CRITICAL ARC/CRA + PHI INTEGRATION ---
// 1. Re-compute the Significance Field (φ) for the now-merged turning points.
// This provides a more stable phi for the escalation decision.
const phiMap = this.config.enableExperimentalPhi
? this.computePhiSignificanceField(mergedLocalTurningPoints)
: new Map<string, number>();
// 2. If phi is active, update complexity scores to be φ-aware.
if (this.config.enableExperimentalPhi) {
this.logger.info(
`Dimension ${dimension}: Enhancing significance scores with phi-field influence.`,
);
// Update both complexity AND significance scores to be φ-aware
for (const tp of mergedLocalTurningPoints) {
if (phiMap.has(tp.id)) {
// Update complexity score (already implemented)
tp.complexityScore = this.calculateComplexityScoreWithPhi(
tp,
phiMap.get(tp.id)!,
);
// NEW: Update significance score with phi-awareness
tp.significance = this.recalculateSignificanceWithPhi(
tp,
phiMap.get(tp.id)!,
);
// Store phi on the turning point for reference
tp.phi = phiMap.get(tp.id)!;
}
}
}
// 3. Determine dimensional escalation based on the (now potentially φ-aware) complexity.
const effectiveThreshold = this.calculateDynamicComplexitySaturation(
mergedLocalTurningPoints,
);
// Update the config for this decision (but don't modify the original)
const maxComplexity = Math.max(
0,
...mergedLocalTurningPoints.map((tp) => tp.complexityScore),
);
// const needsDimensionalEscalation = maxComplexity >= this.config.complexitySaturationThreshold;
const needsDimensionalEscalation = maxComplexity >= effectiveThreshold; // Use local var
this.logger.info(
`Dimension ${dimension}: Max complexity = ${maxComplexity.toFixed(2)}, Saturation threshold = ${this.config.complexitySaturationThreshold}`,
);
this.logger.info(
`Dimension ${dimension}: Needs Escalation (Ψ)? ${needsDimensionalEscalation}`,
);
if (
dimension >= this.config.maxRecursionDepth - 1 ||
mergedLocalTurningPoints.length <= 2 ||
!needsDimensionalEscalation
) {
this.logger.info(
`Dimension ${dimension}: Finalizing at this level. Applying final filtering.`,
);
// Track convergence for this dimension
if (this.config.measureConvergence) {
this.convergenceHistory.push({
previousTurningPoints: [], // No previous state at the final level of processing
currentTurningPoints: mergedLocalTurningPoints, // TPs before final filtering
dimension,
distanceMeasure: 0, // No comparison needed at final step
hasConverged: true, // Considered converged as processing stops here
didEscalate: false,
});
}
// Filter the merged points before returning
return this.filterSignificantTurningPoints(
mergedLocalTurningPoints,
phiMap,
);
}
// ----- DIMENSIONAL ESCALATION (n → n+1) -----
this.logger.info(
`Dimension ${dimension}: Escalating to dimension ${dimension + 1}`,
);
// Create meta-messages from the merged turning points at this level
// Pass originalMessages for context if needed by createMetaMessagesFromTurningPoints
const metaMessages = this.createMetaMessagesFromTurningPoints(
mergedLocalTurningPoints,
this.originalMessages,
);
this.logger.info(
`Dimension ${dimension}: Created ${metaMessages.length} meta-messages for dimension ${dimension + 1}`,
);
if (metaMessages.length < 2) {
this.logger.info(
`Dimension ${dimension}: Not enough meta-messages (${metaMessages.length}) to perform higher-level analysis. Finalizing with current TPs.`,
);
if (this.config.measureConvergence) {
this.convergenceHistory.push({
previousTurningPoints: mergedLocalTurningPoints, // State before attempted escalation
currentTurningPoints: mergedLocalTurningPoints, // State after failed escalation
dimension: dimension + 1, // Represents the attempted next dimension
distanceMeasure: 0, // No change
hasConverged: true, // Converged because escalation failed
didEscalate: false, // Escalation attempted but yielded no processable result
});
}
return this.filterSignificantTurningPoints(
mergedLocalTurningPoints,
this.config.enableExperimentalPhi ? phiMap : new Map<string, number>(),
);
}
// Recursively process the meta-messages to find higher-dimensional turning points
const higherDimensionTurningPoints = await this.multiLayerDetection(
metaMessages,
dimension + 1,
);
this.logger.info(
`Dimension ${dimension + 1}: Found ${higherDimensionTurningPoints.length} higher-dimension TPs.`,
);
// Track convergence and dimension escalation
if (this.config.measureConvergence) {
const convergenceState: ConvergenceState = {
previousTurningPoints: mergedLocalTurningPoints, // TPs from dim n
currentTurningPoints: higherDimensionTurningPoints, // TPs found in dim n+1
dimension: dimension + 1,
distanceMeasure: this.calculateStateDifference(
mergedLocalTurningPoints,
higherDimensionTurningPoints,
phiMap, // Pass the phi map, only used if `enableExperimentalPhi` is true via config
),
hasConverged: higherDimensionTurningPoints.length > 0, // Converged if TPs were found at higher level
didEscalate: true,
};
this.convergenceHistory.push(convergenceState);
this.logger.info(
`Dimension ${dimension} → ${dimension + 1}: Convergence distance: ${convergenceState.distanceMeasure.toFixed(3)}. Converged: ${convergenceState.hasConverged}`,
);
}
// Combine turning points from local (n) and higher (n+1) dimensions
// The combine function will handle merging, prioritizing higher-dim, and filtering
return this.combineTurningPoints(
mergedLocalTurningPoints,
higherDimensionTurningPoints,
phiMap, // Pass the phi map for context (only used if `enableExperimentalPhi` is true via config)
);
}
/**
* Calculates a difference measure between two states (sets of turning points) for
* convergence tracking. When the experimental phi feature is enabled, this metric
* becomes φ-aware by blending the LLM-assigned significance with the emergent
* phi score for a more holistic comparison.
*
* @param state1 - The first set of turning points.
* @param state2 - The second set of turning points.
* @param phiMap - The map of phi scores for turning points in the states.
* @returns A single numeric value representing the distance between the two states.
*/
private calculateStateDifference(
state1: TurningPoint[],
state2: TurningPoint[],
phiMap: Map<string, number>, // Pass the phi map for context
): number {
// Handle empty states
if (state1.length === 0 && state2.length === 0) return 0.0;
if (state1.length === 0 || state2.length === 0) return 1.0;
// Helper to calculate the average adjusted significance for a state
const getAvgAdjustedSig = (state: TurningPoint[]): number => {
const totalSig = state.reduce((sum, tp) => {
// If phi is enabled, use a composite score of LLM-significance and emergent-phi
if (this.config.enableExperimentalPhi && phiMap.has(tp.id)) {
return sum + (tp.significance + phiMap.get(tp.id)!) / 2;
}
// Otherwise, use only the LLM-assigned significance
return sum + tp.significance;
}, 0);
return state.length > 0 ? totalSig / state.length : 0;
};
// 1. Calculate the difference in average adjusted significance
const sigDiff = Math.abs(
getAvgAdjustedSig(state1) - getAvgAdjustedSig(state2),
);
// 2. Calculate structural difference using Jaccard index on the message spans
const spans1 = new Set(
state1.map((tp) => `${tp.span.startIndex}-${tp.span.endIndex}`),
);
const spans2 = new Set(
state2.map((tp) => `${tp.span.startIndex}-${tp.span.endIndex}`),
);
const intersection = new Set(
[...spans1].filter((span) => spans2.has(span)),
);
const union = new Set([...spans1, ...spans2]);
const jaccardDistance =
union.size > 0 ? 1.0 - intersection.size / union.size : 0.0;
// 3. Return a weighted combination of the two difference measures
const combinedDistance = sigDiff * 0.5 + jaccardDistance * 0.5;
return Math.min(1.0, Math.max(0.0, combinedDistance));
}
/**
* Apply complexity function χ from the ARC/CRA framework
* - Complexity is part of CRA specifically within the ARC/CRA Duality framework
*/
private calculateComplexityScore(
significance: number,
semanticShiftMagnitude: number,
): number {
// Return to the older, simpler approach
// Complexity should reflect content significance, not distance redundancy
const complexity = 1 + significance * 4;
return Math.max(1, Math.min(5, complexity));
}
/**
* @experimental
* Calculates the complexity score for a Turning Point, dynamically modulated by the
* experimental φ (Significance) field. This function is only called when
* `config.enableExperimentalPhi` is true.
*
* @param tp - The TurningPoint object being scored.
* @param phi - The calculated φ score (emergent significance) for this turning point.
* @returns A φ-aware complexity score, clamped between 1 and 5.
*/
private calculateComplexityScoreWithPhi(
tp: TurningPoint,
phi: number,
): number {
const baseComplexity = this.calculateComplexityScore(
tp.significance,
tp.semanticShiftMagnitude,
);
const phiAdjustment = (phi - 0.5) * 0.6; // Adjust the phi influence factor as needed
const adjustedComplexity = baseComplexity + phiAdjustment; // REMOVE: * baseComplexity
return Math.max(1, Math.min(5, adjustedComplexity));
}
/**
* Detect turning points within a single chunk of the conversation
* This represents the local refinement process in the current dimension
* - Or in other words, this is the Ψ operator in the ARC/CRA framework
* - or specifically, within the ARC framework
*/
private async detectTurningPointsInChunk(
messages: MetaMessage[] | Message[],
dimension: number,
chunkIndex: number, // Optional index for logging purposes
originalMessages: Message[],
): Promise<TurningPoint[]> {
if (messages.length < 2) return [];
/**
* Higher dimensions , given how it is then the exponent value, will cause then the factor to be more aggressive, or in otherwords, the threshold to be lower.
* This is because the higher the dimension, the more complex the conversation is, and thus the more likely that the semantic shifts are more subtle and nuanced.
* @param dimension
* @param baseThreshold
* @returns
*/
const dynamicallyAdjustThresholdBasedOnDimension = (
dimension: number,
baseThreshold: number,
): number => {
// Defines the decay factor based on the base threshold.
// The decay rate changes based on the initial sensitivity.
const decayFactors = [
{ limit: 0.9, factor: 0.4 }, // Very high thresholds decay slower
{ limit: 0.8, factor: 0.25 }, // High thresholds decay aggressively
{ limit: 0.5, factor: 0.35 }, // Medium thresholds
];
// Find the appropriate decay factor, defaulting to 0.5 for low thresholds.
const decayFactor =
decayFactors.find((d) => baseThreshold > d.limit)?.factor || 0.5;
// Apply exponential decay based on the dimension.
const thresholdScaleFactor = Math.pow(decayFactor, dimension);
return thresholdScaleFactor * baseThreshold;
};
// Generate embeddings for all messages in the chunk
const embeddings = await this.generateMessageEmbeddings(
messages,
dimension,
);
// Find significant semantic shifts between adjacent messages
const turningPoints: TurningPoint[] = [];
const distances: {
current: number;
next: number;
distance: number;
}[] = []; // Store distances for logging
const allDistances: {
current: number;
next: number;
distance: number;
}[] = []; // Store all distances for logging
for (let i = 0; i < embeddings.length - 1; i++) {
const current = embeddings[i];
const next = embeddings[i + 1];
// Calculate semantic distance between current and next message
const distance = this.calculateSemanticDistance(
current.embedding,
next.embedding,
);
const dimensionAdjustedThreshold =
this.config.dynamicallyAdjustSemanticShiftThreshold &&
this.config.dynamicallyAdjustSemanticShiftThreshold === true
? dynamicallyAdjustThresholdBasedOnDimension(
dimension,
this.config.semanticShiftThreshold,
)
: this.config.semanticShiftThreshold;
this.logger.debug(
`Anlyzing with dimensionAdjustedThreshold: ${dimensionAdjustedThreshold.toFixed(3)}, compared to original threshold: ${this.config.semanticShiftThreshold.toFixed(3)}, with the difference in embeddings or distance of: ${distance.toFixed(3)}`,
);
if (distance > dimensionAdjustedThreshold) {
distances.push({
current: current.index,
next: next.index,
distance: distance,
}); // Store distance for logging
this.logger.debug(
` - After analyzing, determined this distance is to be added to the list of distances to process: ${distance.toFixed(3)}`,
);
} else {
this.logger.debug(
` - After analyzing, determined this distance is NOT significant enough to be added to the list of distances to process: ${distance.toFixed(3)}, from the difference of the two embeddings: ${current.embedding.length} and ${next.embedding.length}`,
);
}
allDistances.push({
current: current.index,
next: next.index,
distance: distance,
});
}
this.logger.info(
`For a total number of points: ${embeddings.length}, there were ${distances.length} distances found as being greater ${this.config.dynamicallyAdjustSemanticShiftThreshold &&
this.config.dynamicallyAdjustSemanticShiftThreshold === true
? `than the dynamically adjusted threshold of ${dynamicallyAdjustThresholdBasedOnDimension(dimension, this.config.semanticShiftThreshold).toFixed(3)}`
: `than the threshold of ${this.config.semanticShiftThreshold.toFixed(3)}`
}. Across this span of messages of length ${messages.length}, the following distances were found:
- The top 3 greatest distances are: ${allDistances
.sort((a, b) => b.distance - a.distance) // Sort FIRST
.slice(0, 3) // Then take the top 3
.map((d) => d.distance.toFixed(3))
.join(", ")}
Found ${distances.length} potential turning points at this level (${dimension === 0 ? "base messages" : "meta-messages"}).`
);
if (distances.length === 0) {
this.logger.info(
`No significant semantic shifts detected in chunk ${chunkIndex}`,
);
return [];
}
await async.eachOfLimit(
distances,
this.config.concurrency,
async (distanceObj, idxStr) => {
const d = Number(idxStr);
const i = distanceObj.current; // Current message index
const current = embeddings[i]; // Current message embedding
const next = embeddings[distanceObj.next]; // Next message embedding
// If the distance exceeds our threshold, we've found a turning point
// Use direct array indices to get the messages
const distance = distanceObj.distance; // Semantic distance between current and next message
const beforeMessage = messages[i];
const afterMessage = messages[i + 1];
if (beforeMessage == undefined || afterMessage == undefined) {
this.logger.info(
`detectTurningPointsInChunk: warning beforeMessage or afterMessage is undefined, beforeMessage: ${beforeMessage}, afterMessage: ${afterMessage}`,
);
return;
}
// Classify the turning point using LLM
const turningPoint = await this.classifyTurningPoint(
beforeMessage,
afterMessage,
distance,
dimension,
originalMessages,
d,
);
if (d === 0) {
this.logger.info(`Now proceeding to process every turning point`);
}
this.logger.info(
` ...${chunkIndex ? `[Chunk ${chunkIndex}] ` : ""
}Potential turning point detected between messages ${current.id
} and ${next.id} (distance: ${distance.toFixed(
3,
)}, complexity: ${turningPoint.complexityScore.toFixed(
1,
)}), signif: ${turningPoint.significance.toFixed(2)} category: ${turningPoint.category
}, number of quotes: ${turningPoint.quotes.length}, emotionalTone: ${turningPoint.emotionalTone}`,
);
// normaliz
turningPoints.push(turningPoint);
},
);
return turningPoints;
}
/**
* Use LLM to classify a turning point and generate metadata.
* *** MODIFIED to prioritize message.spanData over regex ***
*/
/**
* Use LLM to classify a turning point and generate metadata.
* This implementation uses a highly modular prompt architecture with
* multiple distinct user messages to ensure clarity. The payload consists of:
* - A system message that sets the core identity and universal constraints.
* - A static context user message containing framework and evaluation criteria.
* - A dynamic data user message that provides conversation context and the specific messages to analyze.
* - A final user instruction message that tells the model what to do with all this information.
*/
private async classifyTurningPoint(
beforeMessage: Message,
afterMessage: Message,
distance: number,
dimension: number,
originalMessages: Message[],
index: number = 0,
): Promise<TurningPoint> {
let span: MessageSpan;
if (dimension > 0) {
if (
!(beforeMessage instanceof MetaMessage) ||
!(afterMessage instanceof MetaMessage)
) {
throw new Error(
"Before or after message is not a MetaMessage at higher dimension",
);
}
const beforeMessageMeta = beforeMessage as MetaMessage;
const afterMessageMeta = afterMessage as MetaMessage;
// For higher dimensions, extract the starting and ending message from within the meta-message's inner list
span = {
startId:
beforeMessageMeta.getMessagesInTurningPointSpanToMessagesArray()[0]
.id,
endId:
afterMessageMeta.getMessagesInTurningPointSpanToMessagesArray()[0].id,
startIndex: this.originalMessages.findIndex(
(candidateM) =>
candidateM.id ===
beforeMessageMeta.getMessagesInTurningPointSpanToMessagesArray()[0]
.id,
),
endIndex: this.originalMessages.findIndex(
(candidateM) =>
candidateM.id ===
afterMessageMeta.getMessagesInTurningPointSpanToMessagesArray()[0]
.id,
),
originalSpan: {
startId: beforeMessage.id,
endId: afterMessage.id,
startIndex: index,
endIndex: index + 1,
},
};
} else {
// For base-level conversations, use the original message IDs and find their indices.
span = {
startId: beforeMessage.id,
endId: afterMessage.id,
startIndex: MetaMessage.findIndexOfMessageFromId({
id: beforeMessage.id,
beforeMessage,
afterMessage,