@gaiaverse/semantic-turning-point-detector

Version:

Detects key semantic turning points in conversations using recursive semantic distance analysis. Ideal for conversation analysis, dialogue segmentation, insight detection, and AI-assisted reasoning tasks.

github.com/gaiaverseltd/semantic-turning-point-detector

gaiaverseltd/semantic-turning-point-detector

133 lines (115 loc) • 4.75 kB

text/typescript

import { Message } from "./Message"; import { TurningPointDetectorConfig } from "./types"; /** * Defines the formatting style for replaced headings. * - 'plain': Just the heading text (removes '#' markers only). * - 'bold': Surrounds the heading text with '**'. * - 'italic': Surrounds the heading text with '*'. * - 'bold-italic': Surrounds the heading text with '***'. * - 'prefix': Prepends a specific string (defined in `headingPrefix`) to the heading text. */ export type HeadingStyle = 'plain' | 'bold' | 'italic' | 'bold-italic' | 'prefix'; /** * Configuration options for selectivelyStripMarkdown function. */ export type StripMarkdownOptions = { /** * If true, removes list markers (*, -, +, 1.) while keeping the item text. * @default false */ removeLists?: boolean; /** * Defines how heading syntax (#) should be replaced. * @default 'bold' */ headingStyle?: HeadingStyle; /** * The prefix string to use when `headingStyle` is 'prefix'. * @default 'heading: ' */ headingPrefix?: string; } /** * Selectively removes or reformats Markdown elements like headings and optionally lists. * Headings (#) are replaced based on the specified `headingStyle`. * Lists (*, -, +, 1.) can optionally be stripped to plain text (controlled by `removeLists`). * Content remains on the same line, and overall newlines are preserved. * * @param markdown The input Markdown string. * @param options Configuration options for stripping and formatting. * @returns The processed string. */ export function selectivelyStripMarkdown( markdown: string, options?: StripMarkdownOptions ): string { let result = markdown; // --- Configuration Defaults --- const shouldRemoveLists = options?.removeLists ?? false; const headingStyle = options?.headingStyle ?? 'bold'; // Default to 'bold' const headingPrefix = options?.headingPrefix ?? 'heading: '; // Default prefix // --- Heading Replacement --- // Use a replacer function to dynamically format the heading text result = result.replace(/^#{1,6}\s+(.*)/gm, (match, headingText) => { // 'match' is the full matched string, e.g., "## Heading Title" // 'headingText' is the captured group (.*), e.g., "Heading Title" switch (headingStyle) { case 'italic': return `*${headingText}*`; case 'bold-italic': return `***${headingText}***`; case 'prefix': return `${headingPrefix}${headingText}`; case 'plain': return headingText; case 'bold': // Fallthrough for default 'bold' default: return `**${headingText}**`; } }); // --- List Removal (Optional) --- if (shouldRemoveLists) { // Remove unordered list markers (*, -, +), preserving indentation result = result.replace(/^(\s*)(?:[-*+])\s+(.*)/gm, '$1$2'); // Remove ordered list markers (1., 2.), preserving indentation result = result.replace(/^(\s*)(?:\d+\.)\s+(.*)/gm, '$1$2'); } return result; } /** * A helper function that formats a given message in a form that ensures the content is not long and easily distinguishable as part of contextual information when requesting a llm or nlp model to process it. * @param semanticSettings * @param m * @param dimension * @param addHeader * @param sliceId * @returns */ export function returnFormattedMessageContent(semanticSettings: Partial<TurningPointDetectorConfig>, m: Message, dimension: number = 0, addHeader = false, sliceId = false): string { const messageContent = selectivelyStripMarkdown( m.message) const header = addHeader ? `${dimension === 0 ? `"${m.author}"` : `Turning Point: "${m.author}"`} [${dimension === 0 ? 'Author\'s name' : 'Source of Turning Point (this is a turning point comprising of messages (2-or-more) that are part of a larger single conversation)' }): "${m.author}" \nID: "${m.id}"` : ']'; return `${header}\n` + `------ start of message content from ${dimension === 0 ? 'author' : 'meta'}:"${m.author.replace( // replace all non-word characters and whitespace with an empty string /[^\w\s]/g, '-' )}"` + ` author="${m.author}" id="${m.id}" dimension="${dimension}"------\n\n` + `${messageContent ?.slice( 0, dimension === 0 ? Math.min( semanticSettings?.max_character_length != undefined ? semanticSettings?.max_character_length / 2 : 20000, 8000 ) : messageContent.length ) .split("\n") .map((line) => ` ${line}`) .join("\n")}\n[content may be truncated, original length: ${m.message.length}]\n\n---------- end of message content for id="${m.id}" }" author="${m.author}" ----------\n`; }