@gaiaverse/semantic-turning-point-detector
Version:
Detects key semantic turning points in conversations using recursive semantic distance analysis. Ideal for conversation analysis, dialogue segmentation, insight detection, and AI-assisted reasoning tasks.
77 lines (76 loc) • 3.89 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.selectivelyStripMarkdown = selectivelyStripMarkdown;
exports.returnFormattedMessageContent = returnFormattedMessageContent;
/**
* Selectively removes or reformats Markdown elements like headings and optionally lists.
* Headings (#) are replaced based on the specified `headingStyle`.
* Lists (*, -, +, 1.) can optionally be stripped to plain text (controlled by `removeLists`).
* Content remains on the same line, and overall newlines are preserved.
*
* @param markdown The input Markdown string.
* @param options Configuration options for stripping and formatting.
* @returns The processed string.
*/
function selectivelyStripMarkdown(markdown, options) {
let result = markdown;
// --- Configuration Defaults ---
const shouldRemoveLists = options?.removeLists ?? false;
const headingStyle = options?.headingStyle ?? 'bold'; // Default to 'bold'
const headingPrefix = options?.headingPrefix ?? 'heading: '; // Default prefix
// --- Heading Replacement ---
// Use a replacer function to dynamically format the heading text
result = result.replace(/^#{1,6}\s+(.*)/gm, (match, headingText) => {
// 'match' is the full matched string, e.g., "## Heading Title"
// 'headingText' is the captured group (.*), e.g., "Heading Title"
switch (headingStyle) {
case 'italic':
return `*${headingText}*`;
case 'bold-italic':
return `***${headingText}***`;
case 'prefix':
return `${headingPrefix}${headingText}`;
case 'plain':
return headingText;
case 'bold': // Fallthrough for default 'bold'
default:
return `**${headingText}**`;
}
});
// --- List Removal (Optional) ---
if (shouldRemoveLists) {
// Remove unordered list markers (*, -, +), preserving indentation
result = result.replace(/^(\s*)(?:[-*+])\s+(.*)/gm, '$1$2');
// Remove ordered list markers (1., 2.), preserving indentation
result = result.replace(/^(\s*)(?:\d+\.)\s+(.*)/gm, '$1$2');
}
return result;
}
/**
* A helper function that formats a given message in a form that ensures the content is not long and easily distinguishable as part of contextual information when requesting a llm or nlp model to process it.
* @param semanticSettings
* @param m
* @param dimension
* @param addHeader
* @param sliceId
* @returns
*/
function returnFormattedMessageContent(semanticSettings, m, dimension = 0, addHeader = false, sliceId = false) {
const messageContent = selectivelyStripMarkdown(m.message);
const header = addHeader ? `${dimension === 0 ? `"${m.author}"` : `Turning Point: "${m.author}"`}
[${dimension === 0 ? 'Author\'s name' : 'Source of Turning Point (this is a turning point comprising of messages (2-or-more) that are part of a larger single conversation)'}): "${m.author}" \nID: "${m.id}"` : ']';
return `${header}\n` +
`------ start of message content from ${dimension === 0 ? 'author' : 'meta'}:"${m.author.replace(
// replace all non-word characters and whitespace with an empty string
/[^\w\s]/g, '-')}"` +
` author="${m.author}" id="${m.id}" dimension="${dimension}"------\n\n` +
`${messageContent
?.slice(0, dimension === 0 ? Math.min(semanticSettings?.max_character_length != undefined
? semanticSettings?.max_character_length / 2
: 20000, 8000) : messageContent.length)
.split("\n")
.map((line) => ` ${line}`)
.join("\n")}\n[content may be truncated, original length: ${m.message.length}]\n\n---------- end of message content for id="${m.id}"
}" author="${m.author}" ----------\n`;
}
//# sourceMappingURL=stripContent.js.map