create-ai-chat-context-experimental
Version:
Phase 2: TypeScript rewrite - AI Chat Context & Memory System with conversation extraction and AICF format support (powered by aicf-core v2.1.0).
205 lines • 7.8 kB
JavaScript
;
/**
* This file is part of create-ai-chat-context-experimental.
* Licensed under the GNU Affero General Public License v3.0 or later (AGPL-3.0-or-later).
* See LICENSE file for details.
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.GenericParser = void 0;
const index_js_1 = require("../types/index.js");
const index_js_2 = require("../types/index.js");
/**
* Parse generic/unknown conversation formats
* Attempts to extract messages from various text formats
*/
class GenericParser {
/**
* Parse generic data into messages
*
* @param rawData - Raw data in unknown format
* @param conversationId - Conversation ID
* @returns Result with Message[] or error
*/
parse(rawData, conversationId) {
try {
if (!rawData || rawData.trim().length === 0) {
return (0, index_js_2.Ok)([]);
}
const messages = this.extractMessages(rawData, conversationId);
return (0, index_js_2.Ok)(messages);
}
catch (error) {
const message = error instanceof Error ? error.message : 'Unknown error';
return (0, index_js_2.Err)(new index_js_1.ExtractionError(`Failed to parse generic data: ${message}`, error));
}
}
/**
* Extract messages from generic data
* @param rawData - Raw data string
* @param conversationId - Conversation ID
* @returns Message[]
*/
extractMessages(rawData, conversationId) {
const messages = [];
const messageIndex = 0;
// Try JSON format first
const jsonMessages = this.tryParseJSON(rawData, conversationId);
if (jsonMessages.length > 0) {
return jsonMessages;
}
// Try line-based format (role: content)
const lineMessages = this.tryParseLineFormat(rawData, conversationId);
if (lineMessages.length > 0) {
return lineMessages;
}
// Try markdown-style format (## User / ## Assistant)
const markdownMessages = this.tryParseMarkdownFormat(rawData, conversationId);
if (markdownMessages.length > 0) {
return markdownMessages;
}
// Fallback: treat entire content as single message
if (rawData.trim().length > 10) {
messages.push({
id: `generic-message-${messageIndex}`,
conversationId,
timestamp: new Date().toISOString(),
role: 'user',
content: rawData.trim(), // ✅ FULL content, not truncated
});
}
return messages;
}
/**
* Try to parse JSON format
* @param rawData - Raw data
* @param conversationId - Conversation ID
* @returns Message[] or empty array
*/
tryParseJSON(rawData, conversationId) {
try {
const data = JSON.parse(rawData);
// Check if it's an array of messages
if (Array.isArray(data)) {
return data
.filter((item) => item && typeof item === 'object')
.map((item, index) => ({
id: `generic-json-${index}`,
conversationId,
timestamp: item.timestamp ?? new Date().toISOString(),
role: (item.role ?? item.type ?? 'user'),
content: (item.content ?? item.message ?? item.text ?? '').toString(),
}))
.filter((msg) => msg.content.length > 0);
}
// Check if it's a single message object
if (data.content || data.message || data.text) {
return [
{
id: 'generic-json-0',
conversationId,
timestamp: data.timestamp ?? new Date().toISOString(),
role: (data.role ?? data.type ?? 'user'),
content: (data.content ?? data.message ?? data.text ?? '').toString(),
},
];
}
}
catch {
// Not valid JSON, continue to next format
}
return [];
}
/**
* Try to parse line-based format (role: content)
* @param rawData - Raw data
* @param conversationId - Conversation ID
* @returns Message[] or empty array
*/
tryParseLineFormat(rawData, conversationId) {
const messages = [];
const lines = rawData.split('\n');
let messageIndex = 0;
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed)
continue;
// Look for "role: content" format
const match = trimmed.match(/^(user|assistant|ai|human|me|you)\s*:\s*(.+)$/i);
if (match && match[1] && match[2]) {
const role = match[1].toLowerCase();
const content = match[2].trim();
if (content.length > 5) {
messages.push({
id: `generic-line-${messageIndex}`,
conversationId,
timestamp: new Date().toISOString(),
role: role === 'assistant' || role === 'ai' ? 'assistant' : 'user',
content, // ✅ FULL content, not truncated
});
messageIndex++;
}
}
}
return messages;
}
/**
* Try to parse markdown-style format (## User / ## Assistant)
* @param rawData - Raw data
* @param conversationId - Conversation ID
* @returns Message[] or empty array
*/
tryParseMarkdownFormat(rawData, conversationId) {
const messages = [];
const sections = rawData.split(/^##\s+/m);
let messageIndex = 0;
for (const section of sections) {
if (!section.trim())
continue;
// Extract role from first line
const lines = section.split('\n');
const roleHeader = lines[0]?.trim().toLowerCase() ?? '';
const content = lines.slice(1).join('\n').trim();
if (content.length > 5) {
const isAssistant = roleHeader.includes('assistant') || roleHeader.includes('ai');
const isUser = roleHeader.includes('user') || roleHeader.includes('human');
if (isAssistant || isUser) {
messages.push({
id: `generic-markdown-${messageIndex}`,
conversationId,
timestamp: new Date().toISOString(),
role: isAssistant ? 'assistant' : 'user',
content, // ✅ FULL content, not truncated
});
messageIndex++;
}
}
}
return messages;
}
/**
* Detect if data is in generic format
* @param rawData - Raw data
* @returns true if data appears to be in a parseable format
*/
isGenericData(rawData) {
if (!rawData || rawData.trim().length === 0) {
return false;
}
// Check for JSON
if (rawData.trim().startsWith('{') || rawData.trim().startsWith('[')) {
return true;
}
// Check for line-based format
if (/^(user|assistant|ai|human|me|you)\s*:/im.test(rawData)) {
return true;
}
// Check for markdown format
if (/^##\s+(user|assistant|ai|human)/im.test(rawData)) {
return true;
}
// Generic text is always parseable
return true;
}
}
exports.GenericParser = GenericParser;
//# sourceMappingURL=GenericParser.js.map