UNPKG

create-ai-chat-context-experimental

Version:

Phase 2: TypeScript rewrite - AI Chat Context & Memory System with conversation extraction and AICF format support (powered by aicf-core v2.1.0).

205 lines 7.8 kB
"use strict"; /** * This file is part of create-ai-chat-context-experimental. * Licensed under the GNU Affero General Public License v3.0 or later (AGPL-3.0-or-later). * See LICENSE file for details. */ Object.defineProperty(exports, "__esModule", { value: true }); exports.GenericParser = void 0; const index_js_1 = require("../types/index.js"); const index_js_2 = require("../types/index.js"); /** * Parse generic/unknown conversation formats * Attempts to extract messages from various text formats */ class GenericParser { /** * Parse generic data into messages * * @param rawData - Raw data in unknown format * @param conversationId - Conversation ID * @returns Result with Message[] or error */ parse(rawData, conversationId) { try { if (!rawData || rawData.trim().length === 0) { return (0, index_js_2.Ok)([]); } const messages = this.extractMessages(rawData, conversationId); return (0, index_js_2.Ok)(messages); } catch (error) { const message = error instanceof Error ? error.message : 'Unknown error'; return (0, index_js_2.Err)(new index_js_1.ExtractionError(`Failed to parse generic data: ${message}`, error)); } } /** * Extract messages from generic data * @param rawData - Raw data string * @param conversationId - Conversation ID * @returns Message[] */ extractMessages(rawData, conversationId) { const messages = []; const messageIndex = 0; // Try JSON format first const jsonMessages = this.tryParseJSON(rawData, conversationId); if (jsonMessages.length > 0) { return jsonMessages; } // Try line-based format (role: content) const lineMessages = this.tryParseLineFormat(rawData, conversationId); if (lineMessages.length > 0) { return lineMessages; } // Try markdown-style format (## User / ## Assistant) const markdownMessages = this.tryParseMarkdownFormat(rawData, conversationId); if (markdownMessages.length > 0) { return markdownMessages; } // Fallback: treat entire content as single message if (rawData.trim().length > 10) { messages.push({ id: `generic-message-${messageIndex}`, conversationId, timestamp: new Date().toISOString(), role: 'user', content: rawData.trim(), // ✅ FULL content, not truncated }); } return messages; } /** * Try to parse JSON format * @param rawData - Raw data * @param conversationId - Conversation ID * @returns Message[] or empty array */ tryParseJSON(rawData, conversationId) { try { const data = JSON.parse(rawData); // Check if it's an array of messages if (Array.isArray(data)) { return data .filter((item) => item && typeof item === 'object') .map((item, index) => ({ id: `generic-json-${index}`, conversationId, timestamp: item.timestamp ?? new Date().toISOString(), role: (item.role ?? item.type ?? 'user'), content: (item.content ?? item.message ?? item.text ?? '').toString(), })) .filter((msg) => msg.content.length > 0); } // Check if it's a single message object if (data.content || data.message || data.text) { return [ { id: 'generic-json-0', conversationId, timestamp: data.timestamp ?? new Date().toISOString(), role: (data.role ?? data.type ?? 'user'), content: (data.content ?? data.message ?? data.text ?? '').toString(), }, ]; } } catch { // Not valid JSON, continue to next format } return []; } /** * Try to parse line-based format (role: content) * @param rawData - Raw data * @param conversationId - Conversation ID * @returns Message[] or empty array */ tryParseLineFormat(rawData, conversationId) { const messages = []; const lines = rawData.split('\n'); let messageIndex = 0; for (const line of lines) { const trimmed = line.trim(); if (!trimmed) continue; // Look for "role: content" format const match = trimmed.match(/^(user|assistant|ai|human|me|you)\s*:\s*(.+)$/i); if (match && match[1] && match[2]) { const role = match[1].toLowerCase(); const content = match[2].trim(); if (content.length > 5) { messages.push({ id: `generic-line-${messageIndex}`, conversationId, timestamp: new Date().toISOString(), role: role === 'assistant' || role === 'ai' ? 'assistant' : 'user', content, // ✅ FULL content, not truncated }); messageIndex++; } } } return messages; } /** * Try to parse markdown-style format (## User / ## Assistant) * @param rawData - Raw data * @param conversationId - Conversation ID * @returns Message[] or empty array */ tryParseMarkdownFormat(rawData, conversationId) { const messages = []; const sections = rawData.split(/^##\s+/m); let messageIndex = 0; for (const section of sections) { if (!section.trim()) continue; // Extract role from first line const lines = section.split('\n'); const roleHeader = lines[0]?.trim().toLowerCase() ?? ''; const content = lines.slice(1).join('\n').trim(); if (content.length > 5) { const isAssistant = roleHeader.includes('assistant') || roleHeader.includes('ai'); const isUser = roleHeader.includes('user') || roleHeader.includes('human'); if (isAssistant || isUser) { messages.push({ id: `generic-markdown-${messageIndex}`, conversationId, timestamp: new Date().toISOString(), role: isAssistant ? 'assistant' : 'user', content, // ✅ FULL content, not truncated }); messageIndex++; } } } return messages; } /** * Detect if data is in generic format * @param rawData - Raw data * @returns true if data appears to be in a parseable format */ isGenericData(rawData) { if (!rawData || rawData.trim().length === 0) { return false; } // Check for JSON if (rawData.trim().startsWith('{') || rawData.trim().startsWith('[')) { return true; } // Check for line-based format if (/^(user|assistant|ai|human|me|you)\s*:/im.test(rawData)) { return true; } // Check for markdown format if (/^##\s+(user|assistant|ai|human)/im.test(rawData)) { return true; } // Generic text is always parseable return true; } } exports.GenericParser = GenericParser; //# sourceMappingURL=GenericParser.js.map