@aituber-onair/core
Version:
Core library for AITuber OnAir providing voice synthesis and chat processing
548 lines • 20.9 kB
JavaScript
import { EventEmitter } from './EventEmitter';
import { ChatProcessor } from './ChatProcessor';
import { MemoryManager } from './MemoryManager';
import { ChatServiceFactory, } from '@aituber-onair/chat';
import { OpenAISummarizer } from '../services/chat/providers/openai/OpenAISummarizer';
import { GeminiSummarizer } from '../services/chat/providers/gemini/GeminiSummarizer';
import { ClaudeSummarizer } from '../services/chat/providers/claude/ClaudeSummarizer';
import { VoiceEngineAdapter, } from '@aituber-onair/voice';
import { textToScreenplay, screenplayToText, } from '@aituber-onair/chat';
import { ToolExecutor } from './ToolExecutor';
const SPEECH_CHUNK_SEPARATOR_PRESETS_BASE = {
ja: ['。', '!', '?', '、', ',', '…'],
en: ['.', '!', '?'],
ko: ['.', '!', '?', '。', '!', '?'],
zh: ['。', '!', '?', ',', '、'],
};
const SPEECH_CHUNK_SEPARATOR_PRESETS = {
...SPEECH_CHUNK_SEPARATOR_PRESETS_BASE,
all: Array.from(new Set(Object.values(SPEECH_CHUNK_SEPARATOR_PRESETS_BASE).flat())),
};
const FALLBACK_SEPARATORS = ['.', '!', '?', '。', '!', '?'];
const ALWAYS_SPLIT_CHARACTERS = ['\n', '\r'];
/**
* Event types for AITuberOnAirCore
*/
export var AITuberOnAirCoreEvent;
(function (AITuberOnAirCoreEvent) {
/** Processing started */
AITuberOnAirCoreEvent["PROCESSING_START"] = "processingStart";
/** Processing ended */
AITuberOnAirCoreEvent["PROCESSING_END"] = "processingEnd";
/** Assistant (partial) response */
AITuberOnAirCoreEvent["ASSISTANT_PARTIAL"] = "assistantPartial";
/** Assistant response completed */
AITuberOnAirCoreEvent["ASSISTANT_RESPONSE"] = "assistantResponse";
/** Speech started */
AITuberOnAirCoreEvent["SPEECH_START"] = "speechStart";
/** Speech ended */
AITuberOnAirCoreEvent["SPEECH_END"] = "speechEnd";
/** Error occurred */
AITuberOnAirCoreEvent["ERROR"] = "error";
/** Tool use */
AITuberOnAirCoreEvent["TOOL_USE"] = "toolUse";
/** Tool result */
AITuberOnAirCoreEvent["TOOL_RESULT"] = "toolResult";
/** Chat history set */
AITuberOnAirCoreEvent["CHAT_HISTORY_SET"] = "chatHistorySet";
/** Chat history cleared */
AITuberOnAirCoreEvent["CHAT_HISTORY_CLEARED"] = "chatHistoryCleared";
/** Memory created */
AITuberOnAirCoreEvent["MEMORY_CREATED"] = "memoryCreated";
/** Memory removed */
AITuberOnAirCoreEvent["MEMORY_REMOVED"] = "memoryRemoved";
/** Memory loaded */
AITuberOnAirCoreEvent["MEMORY_LOADED"] = "memoryLoaded";
/** Memory saved */
AITuberOnAirCoreEvent["MEMORY_SAVED"] = "memorySaved";
/** Storage cleared */
AITuberOnAirCoreEvent["STORAGE_CLEARED"] = "storageCleared";
})(AITuberOnAirCoreEvent || (AITuberOnAirCoreEvent = {}));
/**
* AITuberOnAirCore is a core class that integrates the main features of AITuber
* - Chat processing (ChatService, ChatProcessor)
* - Speech synthesis (VoiceService)
* - Memory management (MemoryManager)
*/
export class AITuberOnAirCore extends EventEmitter {
/**
* Constructor
* @param options Configuration options
*/
constructor(options) {
super();
this.isProcessing = false;
this.toolExecutor = new ToolExecutor();
this.debug = options.debug || false;
const speechChunkingOptions = options.speechChunking ?? {};
this.speechChunkEnabled = speechChunkingOptions.enabled ?? false;
this.speechChunkMinWords = Math.max(0, speechChunkingOptions.minWords ?? 0);
this.speechChunkLocale = speechChunkingOptions.locale ?? 'ja';
this.speechChunkSeparators = speechChunkingOptions.separators;
// Determine provider name (default is 'openai')
const providerName = options.chatProvider || 'openai';
// Register tools
options.tools?.forEach((t) => this.toolExecutor.register(t.definition, t.handler));
// Build chat service options
const chatServiceOptions = {
apiKey: options.apiKey,
model: options.model,
...options.providerOptions,
tools: this.toolExecutor.listDefinitions(),
};
// Add MCP servers for providers that support remote MCP
if ((providerName === 'claude' ||
providerName === 'openai' ||
providerName === 'gemini') &&
options.mcpServers) {
chatServiceOptions.mcpServers = options.mcpServers;
// Also set MCP servers in ToolExecutor for handling MCP tool calls
this.toolExecutor.setMCPServers(options.mcpServers);
}
// Initialize ChatService
this.chatService = ChatServiceFactory.createChatService(providerName, chatServiceOptions);
// Initialize MemoryManager (optional)
if (options.memoryOptions?.enableSummarization) {
let summarizer;
if (providerName === 'gemini') {
summarizer = new GeminiSummarizer(options.apiKey, options.model, options.memoryOptions.summaryPromptTemplate);
}
else if (providerName === 'claude') {
summarizer = new ClaudeSummarizer(options.apiKey, options.model, options.memoryOptions.summaryPromptTemplate);
}
else {
summarizer = new OpenAISummarizer(options.apiKey, options.model, options.memoryOptions.summaryPromptTemplate);
}
this.memoryManager = new MemoryManager(options.memoryOptions, summarizer, options.memoryStorage);
}
// Initialize ChatProcessor
this.chatProcessor = new ChatProcessor(this.chatService, {
...options.chatOptions,
useMemory: !!this.memoryManager,
}, this.memoryManager, this.handleToolUse.bind(this));
// Forward events
this.setupEventForwarding();
// Initialize VoiceService (optional)
if (options.voiceOptions) {
this.voiceService = new VoiceEngineAdapter(options.voiceOptions);
}
this.log('AITuberOnAirCore initialized');
}
/**
* Process text chat
* @param text User input text
* @returns Success or failure of processing
*/
async processChat(text) {
if (this.isProcessing) {
this.log('Already processing another chat');
return false;
}
try {
this.isProcessing = true;
this.emit(AITuberOnAirCoreEvent.PROCESSING_START, { text });
// Process text chat
await this.chatProcessor.processTextChat(text);
return true;
}
catch (error) {
this.log('Error in processChat:', error);
this.emit(AITuberOnAirCoreEvent.ERROR, error);
return false;
}
finally {
this.isProcessing = false;
this.emit(AITuberOnAirCoreEvent.PROCESSING_END);
}
}
/**
* Process image-based chat
* @param imageDataUrl Image data URL
* @param visionPrompt Custom prompt for describing the image (optional)
* @returns Success or failure of processing
*/
async processVisionChat(imageDataUrl, visionPrompt) {
if (this.isProcessing) {
this.log('Already processing another chat');
return false;
}
try {
this.isProcessing = true;
this.emit(AITuberOnAirCoreEvent.PROCESSING_START, { type: 'vision' });
// Update vision prompt if provided
if (visionPrompt) {
this.chatProcessor.updateOptions({ visionPrompt });
}
// Process image in ChatProcessor
await this.chatProcessor.processVisionChat(imageDataUrl);
return true;
}
catch (error) {
this.log('Error in processVisionChat:', error);
this.emit(AITuberOnAirCoreEvent.ERROR, error);
return false;
}
finally {
this.isProcessing = false;
this.emit(AITuberOnAirCoreEvent.PROCESSING_END);
}
}
/**
* Stop speech playback
*/
stopSpeech() {
if (this.voiceService) {
this.voiceService.stop();
this.emit(AITuberOnAirCoreEvent.SPEECH_END);
}
}
/**
* Get chat history
*/
getChatHistory() {
return this.chatProcessor.getChatLog();
}
/**
* Set chat history from external source
* @param messages Message array to set as chat history
*/
setChatHistory(messages) {
this.chatProcessor.setChatLog(messages);
this.emit(AITuberOnAirCoreEvent.CHAT_HISTORY_SET, messages);
}
/**
* Clear chat history
*/
clearChatHistory() {
this.chatProcessor.clearChatLog();
this.emit(AITuberOnAirCoreEvent.CHAT_HISTORY_CLEARED);
if (this.memoryManager) {
this.memoryManager.clearAllMemories();
}
}
/**
* Update voice service
* @param options New voice service options
*/
updateVoiceService(options) {
if (this.voiceService) {
this.voiceService.updateOptions(options);
}
else {
this.voiceService = new VoiceEngineAdapter(options);
}
}
/**
* Update speech chunking behaviour
*/
updateSpeechChunking(options) {
if (options.enabled !== undefined) {
this.speechChunkEnabled = options.enabled;
}
if (options.minWords !== undefined) {
this.speechChunkMinWords = Math.max(0, options.minWords);
}
if (options.locale) {
this.speechChunkLocale = options.locale;
}
if ('separators' in options) {
this.speechChunkSeparators = options.separators;
}
}
/**
* Speak text with custom voice options
* @param text Text to speak
* @param options Speech options
* @returns Promise that resolves when speech is complete
*/
async speakTextWithOptions(text, options) {
if (!this.voiceService) {
this.log('Voice service is not initialized');
return;
}
this.log(`Speaking text with options: ${JSON.stringify(options)}`);
// Store the original voice options
let originalVoiceOptions;
let temporaryVoiceOptionKeys;
try {
// Apply temporary voice options if provided
if (options?.temporaryVoiceOptions) {
const serviceWithOptions = this.voiceService;
const currentOptions = serviceWithOptions.options || {};
// Save a shallow copy of current options for restoration
originalVoiceOptions = { ...currentOptions };
// Track which keys are newly introduced so we can remove them later
temporaryVoiceOptionKeys = Object.keys(options.temporaryVoiceOptions).filter((key) => !(key in currentOptions));
this.voiceService.updateOptions(options.temporaryVoiceOptions);
}
// Set up audio options
const audioOptions = {
enableAnimation: options?.enableAnimation,
audioElementId: options?.audioElementId,
};
const screenplay = textToScreenplay(text);
// generate raw text(text with emotion tags)
const rawText = screenplayToText(screenplay);
// pass screenplay object as event data
this.emit(AITuberOnAirCoreEvent.SPEECH_START, { screenplay, rawText });
// Play the audio
await this.voiceService.speakText(rawText, audioOptions);
// Speech end event
this.emit(AITuberOnAirCoreEvent.SPEECH_END);
}
catch (error) {
this.log('Error in speakTextWithOptions:', error);
this.emit(AITuberOnAirCoreEvent.ERROR, error);
}
finally {
// Restore original options if they were changed
if (this.voiceService) {
const resetOptions = {
...(originalVoiceOptions ?? {}),
};
if (temporaryVoiceOptionKeys) {
for (const key of temporaryVoiceOptionKeys) {
resetOptions[key] = undefined;
}
}
this.voiceService.updateOptions(resetOptions);
}
}
}
/**
* Setup forwarding of ChatProcessor events
*/
setupEventForwarding() {
this.chatProcessor.on('processingStart', (data) => {
this.emit(AITuberOnAirCoreEvent.PROCESSING_START, data);
});
this.chatProcessor.on('processingEnd', () => {
this.emit(AITuberOnAirCoreEvent.PROCESSING_END);
});
this.chatProcessor.on('assistantPartialResponse', (text) => {
this.emit(AITuberOnAirCoreEvent.ASSISTANT_PARTIAL, text);
});
this.chatProcessor.on('assistantResponse', async (data) => {
const { message, screenplay } = data;
// Generate the raw text with emotion tags using utility function
const rawText = screenplayToText(screenplay);
// Fire assistant response event
this.emit(AITuberOnAirCoreEvent.ASSISTANT_RESPONSE, {
message,
screenplay,
rawText,
});
// Speech synthesis and playback (if VoiceService exists)
if (this.voiceService) {
try {
this.emit(AITuberOnAirCoreEvent.SPEECH_START, screenplay);
const chunks = this.splitTextForSpeech(screenplay.text);
const emotion = screenplay.emotion;
const playbackPromises = chunks
.filter((chunk) => chunk)
.map((chunk) => {
const chunkScreenplay = emotion
? { emotion, text: chunk }
: { text: chunk };
return this.voiceService.speak(chunkScreenplay, {
enableAnimation: true,
});
});
await Promise.all(playbackPromises);
this.emit(AITuberOnAirCoreEvent.SPEECH_END);
}
catch (error) {
this.log('Error in speech synthesis:', error);
this.emit(AITuberOnAirCoreEvent.ERROR, error);
}
}
});
this.chatProcessor.on('error', (error) => {
this.emit(AITuberOnAirCoreEvent.ERROR, error);
});
if (this.memoryManager) {
this.memoryManager.on('error', (error) => {
this.emit(AITuberOnAirCoreEvent.ERROR, error);
});
}
}
/**
* Handle tool use
* @param blocks Tool use blocks
* @returns Tool result blocks
*/
async handleToolUse(blocks) {
this.emit(AITuberOnAirCoreEvent.TOOL_USE, blocks);
const results = await this.toolExecutor.run(blocks);
this.emit(AITuberOnAirCoreEvent.TOOL_RESULT, results);
return results;
}
/**
* Split screenplay text into smaller chunks for sequential speech synthesis.
* Falls back to the original text when no delimiters are present.
*/
splitTextForSpeech(text) {
const normalized = text?.trim();
if (!normalized) {
return [];
}
if (!this.speechChunkEnabled) {
return [normalized];
}
const activeSeparators = this.getActiveSpeechSeparators();
const baseChunks = this.segmentTextBySeparators(normalized, activeSeparators);
if (baseChunks.length === 0) {
return [normalized];
}
const minWords = this.speechChunkMinWords;
if (minWords <= 1) {
return baseChunks;
}
const merged = [];
let buffer = '';
const pushBuffer = () => {
const trimmed = buffer.trim();
if (trimmed.length > 0) {
merged.push(trimmed);
}
buffer = '';
};
baseChunks.forEach((chunk, index) => {
if (!buffer) {
if (this.countApproxWords(chunk) >= minWords) {
merged.push(chunk);
}
else {
buffer = chunk;
}
return;
}
const candidate = `${buffer}${chunk}`;
if (this.countApproxWords(candidate) >= minWords ||
index === baseChunks.length - 1) {
buffer = candidate;
pushBuffer();
}
else {
buffer = candidate;
}
});
pushBuffer();
return merged.length > 0 ? merged : [normalized];
}
getActiveSpeechSeparators() {
const baseSeparators = this.speechChunkSeparators && this.speechChunkSeparators.length > 0
? this.speechChunkSeparators
: SPEECH_CHUNK_SEPARATOR_PRESETS[this.speechChunkLocale] ||
FALLBACK_SEPARATORS;
const unique = new Set();
[...baseSeparators, ...ALWAYS_SPLIT_CHARACTERS].forEach((char) => {
if (char && char.length > 0) {
unique.add(char);
}
});
return Array.from(unique);
}
segmentTextBySeparators(text, separators) {
if (separators.length === 0) {
return [text];
}
const separatorSet = new Set(separators);
const chunks = [];
let buffer = '';
for (const char of text) {
buffer += char;
if (separatorSet.has(char)) {
const trimmed = buffer.trim();
if (trimmed.length > 0) {
chunks.push(trimmed);
}
buffer = '';
}
}
const tail = buffer.trim();
if (tail.length > 0) {
chunks.push(tail);
}
return chunks.length > 0 ? chunks : [text];
}
countApproxWords(text) {
const trimmed = text.trim();
if (!trimmed) {
return 0;
}
const spaceSeparated = trimmed.split(/\s+/u).filter(Boolean);
if (spaceSeparated.length > 1) {
return spaceSeparated.length;
}
return trimmed.length;
}
/**
* Output debug log (only in debug mode)
*/
log(...args) {
if (this.debug) {
console.log('[AITuberOnAirCore]', ...args);
}
}
/**
* Generate new content based on the system prompt and the provided message history (one-shot).
* The provided message history is used only for this generation and does not affect the internal chat history.
* This is ideal for generating standalone content like blog posts, reports, or summaries from existing conversations.
*
* @param prompt The system prompt to guide the content generation
* @param messageHistory The message history to use as context
* @returns The generated content as a string
*/
async generateOneShotContentFromHistory(prompt, messageHistory) {
const messages = [{ role: 'system', content: prompt }];
messages.push(...messageHistory);
const result = await this.chatService.chatOnce(messages, false, () => { });
return result.blocks
.filter((b) => b.type === 'text')
.map((b) => b.text)
.join('');
}
/**
* Check if memory functionality is enabled
*/
isMemoryEnabled() {
return !!this.memoryManager;
}
/**
* Remove all event listeners
*/
offAll() {
this.removeAllListeners();
}
/**
* Get current provider information
* @returns Provider information object
*/
getProviderInfo() {
// Safe method to get internal information without depending on specific provider implementation
// If only available in specific provider implementations, cast to any type
const provider = this.chatService.provider;
const model = this.chatService.model;
return {
name: provider ? provider : 'unknown',
model: model ? model : undefined,
};
}
/**
* Get list of available providers
* @returns Array of available provider names
*/
static getAvailableProviders() {
return ChatServiceFactory.getAvailableProviders();
}
/**
* Get list of models supported by the specified provider
* @param providerName Provider name
* @returns Array of supported models
*/
static getSupportedModels(providerName) {
return ChatServiceFactory.getSupportedModels(providerName);
}
}
//# sourceMappingURL=AITuberOnAirCore.js.map