UNPKG

@gaiaverse/semantic-turning-point-detector

Version:

Detects key semantic turning points in conversations using recursive semantic distance analysis. Ideal for conversation analysis, dialogue segmentation, insight detection, and AI-assisted reasoning tasks.

462 lines (459 loc) 27.2 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.MetaMessage = void 0; exports.isMetaMessage = isMetaMessage; const stripContent_1 = require("./stripContent"); /** * MetaMessage class provides structured representation of higher dimensional messages * with guaranteed span information and proper indexing */ class MetaMessage { id; author = 'meta'; message; spanData; representedTurningPoints; // maps a turning points id to the messages that are within the span of the turning point messagesByTurningPoint = new Map(); dimension; constructor(id, content, spanData, representedTurningPoints, originalMessages, dimension = 0) { this.id = id; this.message = content; this.spanData = spanData; this.representedTurningPoints = representedTurningPoints; this.dimension = dimension; for (const tp of representedTurningPoints || []) { // Store the original messages that are associated with this turning point // based on the turning point's indices which reference the ORIGINAL message array const messages = []; // Check if indices are valid - they might reference positions in a larger array // than what we currently have const startIndex = Math.min(tp.span.startIndex, originalMessages.length - 1); const endIndex = Math.min(tp.span.endIndex, originalMessages.length - 1); // If the indices are valid for our current array, use them if (startIndex >= 0 && startIndex < originalMessages.length && endIndex >= 0 && endIndex < originalMessages.length) { messages.push(...originalMessages.slice(startIndex, endIndex + 1)); } // Otherwise, try to find messages by ID else { const startMessage = originalMessages.find(m => m.id === tp.span.startId); const endMessage = originalMessages.find(m => m.id === tp.span.endId); if (startMessage) messages.push(startMessage); if (endMessage && startMessage !== endMessage) messages.push(endMessage); } if (messages.length === 0) { console.warn(`Warning: No messages found for turning point ${tp.id}. Using indirect reference.`); // As a last resort, just use the original messages directly // This at least prevents crashes, though contextual information might be limited this.messagesByTurningPoint.set(tp.id, originalMessages); } else { this.messagesByTurningPoint.set(tp.id, messages); } } } /** * Reliably returns the index of this meta-message in the original conversation * - Meta-messages always include spanData with reliable indices, making originalMessages unnecessary. Originally, originalMessages was used in a format that managed containing messages within a single interface. It relied on creating regex IDs to extract indices and determine if a message was a meta-message. However, this approach failed due to design flaws, as more complex origin message string IDs could not reliably be converted into a meta ID. As a result, this new class was developed to encapsulate a meta-message, which can encompass a group of turning points. This is distinct from a baseMessage, which represents a single turning point between two actual messages. */ getIndex(originalMessages, isStart = true) { return isStart ? this.spanData.startIndex : this.spanData.endIndex; } getMessagesInTurningPointSpan() { return this.messagesByTurningPoint; } getMessagesInTurningPointSpanToMessagesArray() { // convert the map values to a single array return Array.from(this.messagesByTurningPoint.values()).flat(); } /** * Creates a string representation with embedded span information for debugging */ toString() { return `MetaMessage(id=${this.id}, span=${this.spanData.startIndex}-${this.spanData.endIndex})`; } /** * Like `getMessagesContentContextualAid`, but for a single turning point rather than a meta message group * - does not include any header, only a 4thlevel header for the message(s) content * @param dimension - the dimension of the message * @param messagesToUse - the number of messages to use * @param maxContentLengthChar - the maximum content length in characters * @param beforeMessage */ static getMessagesContentContextualAidFromJustProvidedBeforeAndAfterMessages(beforeMessage, afterMessage, dimension = 0, messagesToUse = 2, maxContentLengthChar = 8000, originalMessages = [], type = 'within') { if (originalMessages.some(m => m instanceof MetaMessage)) { throw new Error(`Error: Original messages should not contain any meta-messages. Found: ${originalMessages.filter(m => m instanceof MetaMessage).map(m => m.id).join(', ')}`); } // create a span id for this before and after message (simialr to how the meta message is created) const prefix = beforeMessage instanceof MetaMessage && afterMessage instanceof MetaMessage ? 'meta-' : 'base-'; const spanId = `${prefix}${beforeMessage.id}-${afterMessage.id}`; if (originalMessages.length === 0) { throw new Error(`No messages found for turning point ${spanId} in span ${beforeMessage.spanData.startIndex}-${afterMessage.spanData.endIndex}, originalMessages length: ${originalMessages.length}\n- tp.span data: ${JSON.stringify(beforeMessage.spanData)}, tp. after span data: ${JSON.stringify(afterMessage.spanData)}`); } // const messageContent = MetaMessage.getMessagesContentContextualAid( // metaMessage.getTurningPoints(), // messages, // metaMessage, // dimension, // type, // 0, // maxContentLengthChar // ); // use new param props if (beforeMessage instanceof MetaMessage && afterMessage instanceof MetaMessage) { const beforeMessageContextual = beforeMessage.getMessagesContentContextualAid({ dimension, contextualType: type, messagesToUse: messagesToUse ?? 1, maxContentLengthChar }); const afterMessageContextual = afterMessage.getMessagesContentContextualAid({ dimension, contextualType: type, messagesToUse: messagesToUse ?? 1, maxContentLengthChar }); return `## Below is contextual content of the actual converation message content concerning the meta message group of turning points that are being analyzed, and the messages that are before and after the turning point to analyze\n\n` + `### These are messages of the conversation are ${type === 'within' ? 'are at the start of the messages within' : 'before'} the group of turning points that are being analyzed\n` + `${beforeMessageContextual.split('\n').map(line => ` ${line}`).join('\n')}` + `\n### These are messages that end at this group of turning points that are ${type === 'within' ? 'within' : 'after'} the group of turning points that are being analyzed\n` + `${afterMessageContextual.split('\n').map(line => ` ${line}`).join('\n')}` + `\n---- end of messages content before and after the turning point to analyze ----\n\n\n`; } else { throw new Error(`Before and after messages must be instances of MetaMessage`); } } /** * Finds the index of a specific message content element (baseMessage), from a given provided id string that is either a MetaMessage or a BaseMessage * - determines the instance of the message (meta or base) and returns the index of the message in the original messages array * @param param0 * @returns */ static findIndexOfMessageFromId = ({ id, beforeMessage, afterMessage, messages, consoleLogger = console, }) => { // Check if message has getIndex method (MetaMessage instances) // - if so, check if the beforeMessage (MetaMessage) has the same id as the one we are looking for, if so use that for faster lookup if (beforeMessage && typeof beforeMessage?.getIndex === "function" && beforeMessage.id === id) { return beforeMessage.getIndex(messages); } // Check if message has getIndex method (MetaMessage instances) // - if so, check if the afterMessage (MetaMessage) has the same id as the one we are looking for, if so use that for faster lookup if (afterMessage && typeof afterMessage.getIndex === "function" && afterMessage.id === id) { return afterMessage.getIndex(messages); } // IMPORTANT FIX: Check if ID is a numeric string (an index from meta-message parsing) // This handles the case where we extract "4" from "SpanIndices: 4-10" if (/^\d+$/.test(id)) { // It's a numeric index from meta-message content, use it directly const numericIndex = parseInt(id, 10); if (numericIndex >= 0 && numericIndex < messages.length) { return numericIndex; } // If it's outside valid range, log but continue to other checks consoleLogger.info(`Warning: Numeric ID ${id} is outside valid range for original messages originalMessages possible id list: ${messages .map((msg) => msg.id) .join(", ")}`); throw new Error(`Numeric ID ${id} is outside valid range for original messages`); } // Special handling for meta-message IDs if (id.startsWith("meta-")) { const messagesArray = beforeMessage && afterMessage ? [beforeMessage, afterMessage] : messages; // For meta-messages, use their spanData directly if available const metaMessage = messagesArray.find((msg) => msg.id === id); if (metaMessage?.spanData) { if (messages[metaMessage.spanData.startIndex] === undefined) { throw new Error(`Meta-message ${id} has spanData with startId ${metaMessage.spanData.startId} that is not found in original messages.`); } return metaMessage.spanData.startIndex; } // Still need the fallback parsing for legacy meta-messages const msgWithSpan = messagesArray.find((m) => m.id === id); if (msgWithSpan && msgWithSpan.author === "meta") { // const spanMatch = msgWithSpan.message.match(/SpanIndices: (\d+)-(\d+)/); // if (spanMatch && spanMatch.length >= 2) { // return parseInt(spanMatch[1], 10); // } throw new Error(`Incorrect meta-message format for ID ${id}. Expected spanData to be available but found none. Message: ${msgWithSpan.message}, some code is still using old messages, check to ensure new classes are being used.`); } consoleLogger.error(`Error: Meta-message ${id} missing required spanData metameasge:${JSON.stringify(metaMessage, null, 2)}`); throw new Error(`Meta-message ${id} missing required spanData. All meta-messages should have spanData.`); } // Regular lookup for non-meta messages const index = messages.findIndex((msg) => msg.id === id); if (index === -1) { console.log(`Error: Message ID ${id} not found in original messages`); throw new Error(`Message with ID ${id} not found in original messages array.`); } return index; }; /** * Retrieves and formats message content from turning points to provide contextual analysis. * * This method extracts messages from the first and last turning points in the group, * formats them according to the specified parameters, and returns a structured * representation that can be used for analysis or display. * * @param options Configuration options for content retrieval and formatting * @param options.dimension - Dimensional level of analysis (0 = base conversation, 1+ = meta-analysis of turning point groups) * @param options.contextualType - How to present message context: * - "within": Shows messages within the turning point group (first and last messages) * - "before-and-after": Shows messages that appear before and after the turning point group * @param options.messagesToUse - Number of messages to include in each context section (default: 2) * @param options.maxContentLengthChar - Maximum length in characters for each message content (default: 8000) * * @returns Formatted string containing structured message content with appropriate headers and context * * @example * // Get messages within a turning point group * const withinContent = metaMessage.getMessagesContentContextualAid({ * dimension: 1, * contextualType: "within" * }); * * @example * // Get messages before and after a turning point group with custom limits * const surroundingContent = metaMessage.getMessagesContentContextualAid({ * contextualType: "before-and-after", * messagesToUse: 3, * maxContentLengthChar: 5000 * }); */ getMessagesContentContextualAid({ dimension = 0, contextualType = "within", messagesToUse = 2, maxContentLengthChar = 8000 }) { // Get turning points and original messages const turningPoints = this.getTurningPoints(); const originalMessages = this.getMessagesInTurningPointSpanToMessagesArray(); console.info(`getMessagesContentContextualAid: ${this.id} - ${turningPoints.length} turning points, ` + `original messages length: ${originalMessages.length}, ` + `org ids: ${turningPoints.map(tp => tp.id).join(', ')}`); // Find turning points with extreme indices (first/last) const getTurningPointWithExtremeIndex = (turningPoints, isStart = true) => { let extremeIndex = isStart ? turningPoints[0].span.startIndex : turningPoints[0].span.endIndex; let extremeTurningPoint = turningPoints[0]; for (let i = 1; i < turningPoints.length; i++) { const currentTurningPoint = turningPoints[i]; const currentIndex = isStart ? currentTurningPoint.span.startIndex : currentTurningPoint.span.endIndex; const isMoreExtreme = isStart ? currentIndex < extremeIndex : currentIndex > extremeIndex; if (isMoreExtreme) { extremeIndex = currentIndex; extremeTurningPoint = currentTurningPoint; } } return extremeTurningPoint; }; // Get first and last turning points const firstTurningPoint = getTurningPointWithExtremeIndex(turningPoints, true); const lastTurningPoint = getTurningPointWithExtremeIndex(turningPoints, false); // Get associated messages const startMessagesContext = this.messagesByTurningPoint.get(firstTurningPoint.id) || []; const endMessagesContext = this.messagesByTurningPoint.get(lastTurningPoint.id) || []; // Validate we have messages if (startMessagesContext.length === 0 && endMessagesContext.length === 0) { throw new Error(`No messages found for turning point IDs ${firstTurningPoint.id}-${lastTurningPoint.id} ` + `in span ${firstTurningPoint.span.startIndex}-${lastTurningPoint.span.endIndex}. ` + `Original messages length: ${originalMessages?.length}.\n` + `- First turning point span data: ${JSON.stringify(firstTurningPoint.span)}\n` + `- Last turning point span data: ${JSON.stringify(lastTurningPoint.span)}`); } // Format the message content from start and end turning points const startMessages = startMessagesContext .slice(0, messagesToUse) .map(m => (0, stripContent_1.returnFormattedMessageContent)({ max_character_length: maxContentLengthChar, }, m, dimension)) .join('\n'); const endMessages = endMessagesContext .slice(-1 * messagesToUse) .map(m => (0, stripContent_1.returnFormattedMessageContent)({ max_character_length: maxContentLengthChar, }, m, dimension)) .join('\n'); // Format for "within" context type if (contextualType === 'within') { return this.formatWithinContextOutput(startMessages, endMessages); } // Format for "before-and-after" context type else { return this.formatBeforeAfterContextOutput({ firstTurningPoint, lastTurningPoint, originalMessages, dimension, messagesToUse, maxContentLengthChar }); } } /** * Formats the "within" context output */ formatWithinContextOutput(startMessages, endMessages) { return [ `## Messages Within This Turning Point Group (ID: "${this.id}")`, `------ Begin of messages within grouping of turning points id="${this.id}" ------`, `### First Messages in This Turning Point Group`, startMessages.split('\n').map(line => ` ${line}`).join('\n'), `### Last Messages in This Turning Point Group`, endMessages.split('\n').map(line => ` ${line}`).join('\n'), `------ End of messages within grouping of turning points id="${this.id}" ------\n\n`, ].join('\n'); } /** * Formats the "before-and-after" context output */ formatBeforeAfterContextOutput({ firstTurningPoint, lastTurningPoint, originalMessages, dimension, messagesToUse, maxContentLengthChar }) { // Get messages for these turning points const beforeTPMessages = this.messagesByTurningPoint.get(lastTurningPoint.id) || []; const afterTPMessages = this.messagesByTurningPoint.get(firstTurningPoint.id) || []; // Find messages that come before the first message in the turning points const beforeMessages = beforeTPMessages.length > 0 ? originalMessages .filter(m => originalMessages.indexOf(m) < originalMessages.indexOf(beforeTPMessages[0])) .slice(-messagesToUse) : []; // Find messages that come after the last message in the turning points const afterMessages = afterTPMessages.length > 0 ? originalMessages .filter(m => originalMessages.indexOf(m) > originalMessages.indexOf(afterTPMessages[afterTPMessages.length - 1])) .slice(0, messagesToUse) : []; // Format the content const dimensionDescription = dimension === 0 ? 'paired messages forming a potential turning point' : 'group of related turning points'; const beforeMessagesContent = beforeMessages.length > 0 ? beforeMessages .map(m => (0, stripContent_1.returnFormattedMessageContent)({ max_character_length: maxContentLengthChar, }, m, dimension)) .join('\n') : `No messages exist before this ${dimensionDescription}.`; const afterMessagesContent = afterMessages.length > 0 ? afterMessages .map(m => (0, stripContent_1.returnFormattedMessageContent)({ max_character_length: maxContentLengthChar, }, m, dimension)) .join('\n') : `No messages exist after this ${dimensionDescription}.`; return [ `## Context Surrounding This Turning Point`, `- These messages provide context for analyzing the turning point but are NOT part of the turning point itself.`, `- The turning point consists of ${dimensionDescription} that represent a significant shift in the conversation.`, `- This contextual information helps with analysis but should not be the primary basis for classification.`, `### Messages Before This Turning Point Group`, beforeMessagesContent.split('\n').map(line => ` ${line}`).join('\n'), `### Messages After This Turning Point Group`, afterMessagesContent.split('\n').map(line => ` ${line}`).join('\n'), `---- End of contextual messages surrounding this turning point ----\n\n` ].join('\n'); } /** * Factory method to create a category meta-message from turning points */ static createCategoryMetaMessage(category, points, index, originalMessages, dimension = 0) { if (originalMessages.some(m => m instanceof MetaMessage)) { throw new Error(`Error: Original messages should not contain any meta-messages. Found: ${originalMessages.filter(m => m instanceof MetaMessage).map(m => m.id).join(', ')}`); } // Find the overall span of all turning points in this category const minStartIndex = Math.min(...points.map(p => p.span.startIndex)); const maxEndIndex = Math.max(...points.map(p => p.span.endIndex)); // Find corresponding message IDs const startMsgId = points.find(p => p.span.startIndex === minStartIndex)?.span.startId || ''; const endMsgId = points.find(p => p.span.endIndex === maxEndIndex)?.span.endId || ''; // Generate content const quotes = points.flatMap(tp => tp.quotes || []).filter(Boolean).sort((a, b) => a.length - b.length).filter(q => q.length > 5 && q.length < 1000).slice(0, 3); const keywords = points.flatMap(tp => tp.keywords || []).filter(Boolean); const categoryContent = ` ### ${category} Turning Points (within this Meta Grouping) - The point here is to form a higher level Turning Point based on this list of turning points. Significance: ${Math.max(...points.map(p => p.significance)).toFixed(2)} Complexity: ${Math.max(...points.map(p => p.complexityScore)).toFixed(2)} Keywords: ${Array.from(new Set(keywords)).slice(0, 10).join(', ')} Quotes: ${quotes.map(q => `"${q.replace(/\n/g, ' ')}"`).join(', ')} SpanIndices: ${minStartIndex}-${maxEndIndex} SpanMessageIds: ${startMsgId}-${endMsgId} Emotional Tones: ${Array.from(new Set(points.flatMap(tp => tp.emotionalTone || []))).slice(0, 5).join(', ')} Sentimentality: ${Math.max(...points.map(p => (p.sentiment?.toLocaleLowerCase()?.includes('positive') ? 1 : -1) || 0)) >= 1 ? 'positive' : 'negative'} `; // Add contextual information let builtContext = ``; const startMessagesContext = originalMessages.slice(Math.max(0, minStartIndex - 3), minStartIndex).filter(Boolean); const endMessagesContext = originalMessages.slice(maxEndIndex, maxEndIndex + 3).filter(Boolean); if (startMessagesContext.length > 0 || endMessagesContext.length > 0) { builtContext = `\n\n## Contextual Aid\n- The following text provides broader context to showcase a truncated view of the messages within this span in the turning point.`; if (startMessagesContext.length > 0) { builtContext += `\n### Messages of the start of turning points of this grouping of turning point(s) that are within span as Context of the message content within this group of turning points\n` + startMessagesContext.map(m => `Author: ${m.author}\nID: "${m.spanData?.startId ?? m.id}"\nContent:\n\n${(0, stripContent_1.returnFormattedMessageContent)({ max_character_length: 5000, }, m, 0)}) }`).join('\n\n'); } if (endMessagesContext.length > 0) { builtContext += `\n### The messages in between the turning points have been omitted for brevity\n`; } if (endMessagesContext.length > 0) { builtContext += `\n### Messages near the end of the span of this grouping of turning point(s) span as Context of the message content within this group of turning points\n` + endMessagesContext.map(m => `Author: ${m.author}\nID: "${m.spanData?.startId ?? m.id}"\nContent:\n\n${(0, stripContent_1.returnFormattedMessageContent)({ max_character_length: 5000, }, m, 0)}`).join('\n\n'); } } // Create span data with guaranteed indices const span = { startId: startMsgId, endId: endMsgId, startIndex: minStartIndex, endIndex: maxEndIndex }; return new MetaMessage(`meta-cat-${index}`, categoryContent + builtContext, span, points, originalMessages, dimension); } /** * Factory method to create a section meta-message */ static createSectionMetaMessage(sectionPoints, sectionIndex, originalMessages) { // Find the overall span of all turning points in this section const minStartIndex = Math.min(...sectionPoints.map(p => p.span.startIndex)); const maxEndIndex = Math.max(...sectionPoints.map(p => p.span.endIndex)); // Find corresponding message IDs const startMsgId = sectionPoints.find(p => p.span.startIndex === minStartIndex)?.span.startId || ''; const endMsgId = sectionPoints.find(p => p.span.endIndex === maxEndIndex)?.span.endId || ''; // Create section meta-message content const sectionContent = ` # Conversation Section ${sectionIndex + 1} Span: ${sectionPoints[0].span.startId}${sectionPoints[sectionPoints.length - 1].span.endId} SpanIndices: ${minStartIndex}-${maxEndIndex} SpanMessageIds: ${startMsgId}-${endMsgId} Contains ${sectionPoints.length} turning points Max Complexity: ${Math.max(...sectionPoints.map(p => p.complexityScore)).toFixed(2)} ## Turning Points in this Section: ${sectionPoints.map(tp => `- ${tp.label} (${tp.category}) [${tp.span.startIndex}-${tp.span.endIndex}]`).join('\n')} ## Keywords: ${Array.from(new Set(sectionPoints.flatMap(tp => tp.keywords || []))).slice(0, 10).join(', ')} `; // Create with guaranteed span data const span = { startId: startMsgId, endId: endMsgId, startIndex: minStartIndex, endIndex: maxEndIndex }; return new MetaMessage(`meta-section-${sectionIndex}`, sectionContent, span, sectionPoints, originalMessages); } getTurningPoints() { return this.representedTurningPoints || []; } } exports.MetaMessage = MetaMessage; /** * Type guard to check if a message is a MetaMessage instance */ function isMetaMessage(message) { return message instanceof MetaMessage || (message && message.author === 'meta' && typeof message.getIndex === 'function'); } //# sourceMappingURL=Message.js.map