UNPKG

@aituber-onair/core

Version:

Core library for AITuber OnAir providing voice synthesis and chat processing

548 lines 20.9 kB
import { EventEmitter } from './EventEmitter'; import { ChatProcessor } from './ChatProcessor'; import { MemoryManager } from './MemoryManager'; import { ChatServiceFactory, } from '@aituber-onair/chat'; import { OpenAISummarizer } from '../services/chat/providers/openai/OpenAISummarizer'; import { GeminiSummarizer } from '../services/chat/providers/gemini/GeminiSummarizer'; import { ClaudeSummarizer } from '../services/chat/providers/claude/ClaudeSummarizer'; import { VoiceEngineAdapter, } from '@aituber-onair/voice'; import { textToScreenplay, screenplayToText, } from '@aituber-onair/chat'; import { ToolExecutor } from './ToolExecutor'; const SPEECH_CHUNK_SEPARATOR_PRESETS_BASE = { ja: ['。', '!', '?', '、', ',', '…'], en: ['.', '!', '?'], ko: ['.', '!', '?', '。', '!', '?'], zh: ['。', '!', '?', ',', '、'], }; const SPEECH_CHUNK_SEPARATOR_PRESETS = { ...SPEECH_CHUNK_SEPARATOR_PRESETS_BASE, all: Array.from(new Set(Object.values(SPEECH_CHUNK_SEPARATOR_PRESETS_BASE).flat())), }; const FALLBACK_SEPARATORS = ['.', '!', '?', '。', '!', '?']; const ALWAYS_SPLIT_CHARACTERS = ['\n', '\r']; /** * Event types for AITuberOnAirCore */ export var AITuberOnAirCoreEvent; (function (AITuberOnAirCoreEvent) { /** Processing started */ AITuberOnAirCoreEvent["PROCESSING_START"] = "processingStart"; /** Processing ended */ AITuberOnAirCoreEvent["PROCESSING_END"] = "processingEnd"; /** Assistant (partial) response */ AITuberOnAirCoreEvent["ASSISTANT_PARTIAL"] = "assistantPartial"; /** Assistant response completed */ AITuberOnAirCoreEvent["ASSISTANT_RESPONSE"] = "assistantResponse"; /** Speech started */ AITuberOnAirCoreEvent["SPEECH_START"] = "speechStart"; /** Speech ended */ AITuberOnAirCoreEvent["SPEECH_END"] = "speechEnd"; /** Error occurred */ AITuberOnAirCoreEvent["ERROR"] = "error"; /** Tool use */ AITuberOnAirCoreEvent["TOOL_USE"] = "toolUse"; /** Tool result */ AITuberOnAirCoreEvent["TOOL_RESULT"] = "toolResult"; /** Chat history set */ AITuberOnAirCoreEvent["CHAT_HISTORY_SET"] = "chatHistorySet"; /** Chat history cleared */ AITuberOnAirCoreEvent["CHAT_HISTORY_CLEARED"] = "chatHistoryCleared"; /** Memory created */ AITuberOnAirCoreEvent["MEMORY_CREATED"] = "memoryCreated"; /** Memory removed */ AITuberOnAirCoreEvent["MEMORY_REMOVED"] = "memoryRemoved"; /** Memory loaded */ AITuberOnAirCoreEvent["MEMORY_LOADED"] = "memoryLoaded"; /** Memory saved */ AITuberOnAirCoreEvent["MEMORY_SAVED"] = "memorySaved"; /** Storage cleared */ AITuberOnAirCoreEvent["STORAGE_CLEARED"] = "storageCleared"; })(AITuberOnAirCoreEvent || (AITuberOnAirCoreEvent = {})); /** * AITuberOnAirCore is a core class that integrates the main features of AITuber * - Chat processing (ChatService, ChatProcessor) * - Speech synthesis (VoiceService) * - Memory management (MemoryManager) */ export class AITuberOnAirCore extends EventEmitter { /** * Constructor * @param options Configuration options */ constructor(options) { super(); this.isProcessing = false; this.toolExecutor = new ToolExecutor(); this.debug = options.debug || false; const speechChunkingOptions = options.speechChunking ?? {}; this.speechChunkEnabled = speechChunkingOptions.enabled ?? false; this.speechChunkMinWords = Math.max(0, speechChunkingOptions.minWords ?? 0); this.speechChunkLocale = speechChunkingOptions.locale ?? 'ja'; this.speechChunkSeparators = speechChunkingOptions.separators; // Determine provider name (default is 'openai') const providerName = options.chatProvider || 'openai'; // Register tools options.tools?.forEach((t) => this.toolExecutor.register(t.definition, t.handler)); // Build chat service options const chatServiceOptions = { apiKey: options.apiKey, model: options.model, ...options.providerOptions, tools: this.toolExecutor.listDefinitions(), }; // Add MCP servers for providers that support remote MCP if ((providerName === 'claude' || providerName === 'openai' || providerName === 'gemini') && options.mcpServers) { chatServiceOptions.mcpServers = options.mcpServers; // Also set MCP servers in ToolExecutor for handling MCP tool calls this.toolExecutor.setMCPServers(options.mcpServers); } // Initialize ChatService this.chatService = ChatServiceFactory.createChatService(providerName, chatServiceOptions); // Initialize MemoryManager (optional) if (options.memoryOptions?.enableSummarization) { let summarizer; if (providerName === 'gemini') { summarizer = new GeminiSummarizer(options.apiKey, options.model, options.memoryOptions.summaryPromptTemplate); } else if (providerName === 'claude') { summarizer = new ClaudeSummarizer(options.apiKey, options.model, options.memoryOptions.summaryPromptTemplate); } else { summarizer = new OpenAISummarizer(options.apiKey, options.model, options.memoryOptions.summaryPromptTemplate); } this.memoryManager = new MemoryManager(options.memoryOptions, summarizer, options.memoryStorage); } // Initialize ChatProcessor this.chatProcessor = new ChatProcessor(this.chatService, { ...options.chatOptions, useMemory: !!this.memoryManager, }, this.memoryManager, this.handleToolUse.bind(this)); // Forward events this.setupEventForwarding(); // Initialize VoiceService (optional) if (options.voiceOptions) { this.voiceService = new VoiceEngineAdapter(options.voiceOptions); } this.log('AITuberOnAirCore initialized'); } /** * Process text chat * @param text User input text * @returns Success or failure of processing */ async processChat(text) { if (this.isProcessing) { this.log('Already processing another chat'); return false; } try { this.isProcessing = true; this.emit(AITuberOnAirCoreEvent.PROCESSING_START, { text }); // Process text chat await this.chatProcessor.processTextChat(text); return true; } catch (error) { this.log('Error in processChat:', error); this.emit(AITuberOnAirCoreEvent.ERROR, error); return false; } finally { this.isProcessing = false; this.emit(AITuberOnAirCoreEvent.PROCESSING_END); } } /** * Process image-based chat * @param imageDataUrl Image data URL * @param visionPrompt Custom prompt for describing the image (optional) * @returns Success or failure of processing */ async processVisionChat(imageDataUrl, visionPrompt) { if (this.isProcessing) { this.log('Already processing another chat'); return false; } try { this.isProcessing = true; this.emit(AITuberOnAirCoreEvent.PROCESSING_START, { type: 'vision' }); // Update vision prompt if provided if (visionPrompt) { this.chatProcessor.updateOptions({ visionPrompt }); } // Process image in ChatProcessor await this.chatProcessor.processVisionChat(imageDataUrl); return true; } catch (error) { this.log('Error in processVisionChat:', error); this.emit(AITuberOnAirCoreEvent.ERROR, error); return false; } finally { this.isProcessing = false; this.emit(AITuberOnAirCoreEvent.PROCESSING_END); } } /** * Stop speech playback */ stopSpeech() { if (this.voiceService) { this.voiceService.stop(); this.emit(AITuberOnAirCoreEvent.SPEECH_END); } } /** * Get chat history */ getChatHistory() { return this.chatProcessor.getChatLog(); } /** * Set chat history from external source * @param messages Message array to set as chat history */ setChatHistory(messages) { this.chatProcessor.setChatLog(messages); this.emit(AITuberOnAirCoreEvent.CHAT_HISTORY_SET, messages); } /** * Clear chat history */ clearChatHistory() { this.chatProcessor.clearChatLog(); this.emit(AITuberOnAirCoreEvent.CHAT_HISTORY_CLEARED); if (this.memoryManager) { this.memoryManager.clearAllMemories(); } } /** * Update voice service * @param options New voice service options */ updateVoiceService(options) { if (this.voiceService) { this.voiceService.updateOptions(options); } else { this.voiceService = new VoiceEngineAdapter(options); } } /** * Update speech chunking behaviour */ updateSpeechChunking(options) { if (options.enabled !== undefined) { this.speechChunkEnabled = options.enabled; } if (options.minWords !== undefined) { this.speechChunkMinWords = Math.max(0, options.minWords); } if (options.locale) { this.speechChunkLocale = options.locale; } if ('separators' in options) { this.speechChunkSeparators = options.separators; } } /** * Speak text with custom voice options * @param text Text to speak * @param options Speech options * @returns Promise that resolves when speech is complete */ async speakTextWithOptions(text, options) { if (!this.voiceService) { this.log('Voice service is not initialized'); return; } this.log(`Speaking text with options: ${JSON.stringify(options)}`); // Store the original voice options let originalVoiceOptions; let temporaryVoiceOptionKeys; try { // Apply temporary voice options if provided if (options?.temporaryVoiceOptions) { const serviceWithOptions = this.voiceService; const currentOptions = serviceWithOptions.options || {}; // Save a shallow copy of current options for restoration originalVoiceOptions = { ...currentOptions }; // Track which keys are newly introduced so we can remove them later temporaryVoiceOptionKeys = Object.keys(options.temporaryVoiceOptions).filter((key) => !(key in currentOptions)); this.voiceService.updateOptions(options.temporaryVoiceOptions); } // Set up audio options const audioOptions = { enableAnimation: options?.enableAnimation, audioElementId: options?.audioElementId, }; const screenplay = textToScreenplay(text); // generate raw text(text with emotion tags) const rawText = screenplayToText(screenplay); // pass screenplay object as event data this.emit(AITuberOnAirCoreEvent.SPEECH_START, { screenplay, rawText }); // Play the audio await this.voiceService.speakText(rawText, audioOptions); // Speech end event this.emit(AITuberOnAirCoreEvent.SPEECH_END); } catch (error) { this.log('Error in speakTextWithOptions:', error); this.emit(AITuberOnAirCoreEvent.ERROR, error); } finally { // Restore original options if they were changed if (this.voiceService) { const resetOptions = { ...(originalVoiceOptions ?? {}), }; if (temporaryVoiceOptionKeys) { for (const key of temporaryVoiceOptionKeys) { resetOptions[key] = undefined; } } this.voiceService.updateOptions(resetOptions); } } } /** * Setup forwarding of ChatProcessor events */ setupEventForwarding() { this.chatProcessor.on('processingStart', (data) => { this.emit(AITuberOnAirCoreEvent.PROCESSING_START, data); }); this.chatProcessor.on('processingEnd', () => { this.emit(AITuberOnAirCoreEvent.PROCESSING_END); }); this.chatProcessor.on('assistantPartialResponse', (text) => { this.emit(AITuberOnAirCoreEvent.ASSISTANT_PARTIAL, text); }); this.chatProcessor.on('assistantResponse', async (data) => { const { message, screenplay } = data; // Generate the raw text with emotion tags using utility function const rawText = screenplayToText(screenplay); // Fire assistant response event this.emit(AITuberOnAirCoreEvent.ASSISTANT_RESPONSE, { message, screenplay, rawText, }); // Speech synthesis and playback (if VoiceService exists) if (this.voiceService) { try { this.emit(AITuberOnAirCoreEvent.SPEECH_START, screenplay); const chunks = this.splitTextForSpeech(screenplay.text); const emotion = screenplay.emotion; const playbackPromises = chunks .filter((chunk) => chunk) .map((chunk) => { const chunkScreenplay = emotion ? { emotion, text: chunk } : { text: chunk }; return this.voiceService.speak(chunkScreenplay, { enableAnimation: true, }); }); await Promise.all(playbackPromises); this.emit(AITuberOnAirCoreEvent.SPEECH_END); } catch (error) { this.log('Error in speech synthesis:', error); this.emit(AITuberOnAirCoreEvent.ERROR, error); } } }); this.chatProcessor.on('error', (error) => { this.emit(AITuberOnAirCoreEvent.ERROR, error); }); if (this.memoryManager) { this.memoryManager.on('error', (error) => { this.emit(AITuberOnAirCoreEvent.ERROR, error); }); } } /** * Handle tool use * @param blocks Tool use blocks * @returns Tool result blocks */ async handleToolUse(blocks) { this.emit(AITuberOnAirCoreEvent.TOOL_USE, blocks); const results = await this.toolExecutor.run(blocks); this.emit(AITuberOnAirCoreEvent.TOOL_RESULT, results); return results; } /** * Split screenplay text into smaller chunks for sequential speech synthesis. * Falls back to the original text when no delimiters are present. */ splitTextForSpeech(text) { const normalized = text?.trim(); if (!normalized) { return []; } if (!this.speechChunkEnabled) { return [normalized]; } const activeSeparators = this.getActiveSpeechSeparators(); const baseChunks = this.segmentTextBySeparators(normalized, activeSeparators); if (baseChunks.length === 0) { return [normalized]; } const minWords = this.speechChunkMinWords; if (minWords <= 1) { return baseChunks; } const merged = []; let buffer = ''; const pushBuffer = () => { const trimmed = buffer.trim(); if (trimmed.length > 0) { merged.push(trimmed); } buffer = ''; }; baseChunks.forEach((chunk, index) => { if (!buffer) { if (this.countApproxWords(chunk) >= minWords) { merged.push(chunk); } else { buffer = chunk; } return; } const candidate = `${buffer}${chunk}`; if (this.countApproxWords(candidate) >= minWords || index === baseChunks.length - 1) { buffer = candidate; pushBuffer(); } else { buffer = candidate; } }); pushBuffer(); return merged.length > 0 ? merged : [normalized]; } getActiveSpeechSeparators() { const baseSeparators = this.speechChunkSeparators && this.speechChunkSeparators.length > 0 ? this.speechChunkSeparators : SPEECH_CHUNK_SEPARATOR_PRESETS[this.speechChunkLocale] || FALLBACK_SEPARATORS; const unique = new Set(); [...baseSeparators, ...ALWAYS_SPLIT_CHARACTERS].forEach((char) => { if (char && char.length > 0) { unique.add(char); } }); return Array.from(unique); } segmentTextBySeparators(text, separators) { if (separators.length === 0) { return [text]; } const separatorSet = new Set(separators); const chunks = []; let buffer = ''; for (const char of text) { buffer += char; if (separatorSet.has(char)) { const trimmed = buffer.trim(); if (trimmed.length > 0) { chunks.push(trimmed); } buffer = ''; } } const tail = buffer.trim(); if (tail.length > 0) { chunks.push(tail); } return chunks.length > 0 ? chunks : [text]; } countApproxWords(text) { const trimmed = text.trim(); if (!trimmed) { return 0; } const spaceSeparated = trimmed.split(/\s+/u).filter(Boolean); if (spaceSeparated.length > 1) { return spaceSeparated.length; } return trimmed.length; } /** * Output debug log (only in debug mode) */ log(...args) { if (this.debug) { console.log('[AITuberOnAirCore]', ...args); } } /** * Generate new content based on the system prompt and the provided message history (one-shot). * The provided message history is used only for this generation and does not affect the internal chat history. * This is ideal for generating standalone content like blog posts, reports, or summaries from existing conversations. * * @param prompt The system prompt to guide the content generation * @param messageHistory The message history to use as context * @returns The generated content as a string */ async generateOneShotContentFromHistory(prompt, messageHistory) { const messages = [{ role: 'system', content: prompt }]; messages.push(...messageHistory); const result = await this.chatService.chatOnce(messages, false, () => { }); return result.blocks .filter((b) => b.type === 'text') .map((b) => b.text) .join(''); } /** * Check if memory functionality is enabled */ isMemoryEnabled() { return !!this.memoryManager; } /** * Remove all event listeners */ offAll() { this.removeAllListeners(); } /** * Get current provider information * @returns Provider information object */ getProviderInfo() { // Safe method to get internal information without depending on specific provider implementation // If only available in specific provider implementations, cast to any type const provider = this.chatService.provider; const model = this.chatService.model; return { name: provider ? provider : 'unknown', model: model ? model : undefined, }; } /** * Get list of available providers * @returns Array of available provider names */ static getAvailableProviders() { return ChatServiceFactory.getAvailableProviders(); } /** * Get list of models supported by the specified provider * @param providerName Provider name * @returns Array of supported models */ static getSupportedModels(providerName) { return ChatServiceFactory.getSupportedModels(providerName); } } //# sourceMappingURL=AITuberOnAirCore.js.map