UNPKG

echogarden

Version:

An easy-to-use speech toolset. Includes tools for synthesis, recognition, alignment, speech translation, language detection, source separation and more.

166 lines 7.66 kB
import { decodeToChannels, SampleFormat } from '../audio/AudioBufferConversion.js'; import { getShortLanguageCode, lcidToIsoLanguageCode } from '../utilities/Locale.js'; import { Logger } from '../utilities/Logger.js'; import { logToStderr } from '../utilities/Utilities.js'; const log = logToStderr; export function synthesize(text, voiceName, rate = 0, useSpeechPlatform = false) { return new Promise(async (resolve, reject) => { const logger = new Logger(); logger.start('Initialize winax module'); const { default: WinAX } = await import('winax'); const ActiveXObject = global.ActiveXObject; logger.start('Create SAPI COM object'); const sapiVoice = new ActiveXObject(useSpeechPlatform ? 'Speech.SPVoice' : 'SAPI.SPVoice'); sapiVoice.EventInterests = 33790; logger.start('Get SAPI voice list and select best match'); if (voiceName) { const voiceObjects = sapiVoice.GetVoices(); for (let i = 0; i < voiceObjects.Count; i++) { const voiceObject = voiceObjects.Item(i); const candidateVoiceName = voiceObject.GetDescription(); if (candidateVoiceName == voiceName) { sapiVoice.Voice = voiceObject; } } } sapiVoice.Rate = rate; // Create phone converter for language const sapiPhoneConverter = new ActiveXObject(useSpeechPlatform ? 'Speech.SpPhoneConverter' : 'SAPI.SpPhoneConverter'); const sapiLanguageCodeHex = sapiVoice.Voice.GetAttribute('Language'); const sapiLanguageCode = parseInt(sapiLanguageCodeHex, 16); sapiPhoneConverter.LanguageId = sapiLanguageCode; logger.start('Synthesize with SAPI'); const sampleRate = 22050; const bytesPerSecond = sampleRate * 2; const sapiOutputStream = new ActiveXObject('SAPI.SpMemoryStream'); sapiOutputStream.Format.Type = 22; // format code code for SAFT22kHz16BitMono sapiVoice.AudioOutputStream = sapiOutputStream; const dispatchMessagesInterval = setInterval(() => { WinAX.peekAndDispatchMessages(); }, 50); const connectionPoints = WinAX.getConnectionPoints(sapiVoice); const connectionPoint = connectionPoints[0]; const methods = connectionPoint.getMethods(); const events = []; let lastWordEvent = null; let lastWordCharPos = -1; connectionPoint.advise({ StartStream: () => { }, Word: (streamId, streamPos, charPos, length) => { if (lastWordCharPos == charPos) { return; } const wordText = text.substring(charPos, charPos + length); const startTime = streamPos / bytesPerSecond; const wordEvent = { type: 'word', text: wordText, startTime, endTime: -1, timeline: [] }; events.push(wordEvent); lastWordEvent = wordEvent; lastWordCharPos = charPos; }, Phoneme: (streamId, streamPos, duration, nextPhoneId, feature, currentPhoneId) => { if (events.length == 0) { return; } const phoneText = sapiPhoneConverter.IdToPhone(currentPhoneId); if (phoneText == ',' || phoneText == '_') { return; } const startTime = streamPos / bytesPerSecond; const endTime = startTime + (duration / 1000); events.push({ type: 'phone', text: phoneText, startTime, endTime }); }, EndStream: (streamId, streamPos) => { clearInterval(dispatchMessagesInterval); const audioData = new Uint8Array(sapiOutputStream.GetData()); const audioChannels = decodeToChannels(audioData, 1, 16, SampleFormat.PCM); WinAX.release(sapiOutputStream); WinAX.release(sapiVoice); logger.end(); resolve({ rawAudio: { audioChannels, sampleRate }, timeline: eventsToTimeline(events, audioChannels[0].length / sampleRate) }); } }); sapiVoice.Speak(text); }); } export async function getVoiceList(useSpeechPlatform = false) { const { default: WinAX } = await import('winax'); const ActiveXObject = global.ActiveXObject; const sapiVoice = new ActiveXObject(useSpeechPlatform ? 'Speech.SPVoice' : 'SAPI.SPVoice'); const voiceObjects = sapiVoice.GetVoices(); const voices = []; for (let i = 0; i < voiceObjects.Count; i++) { const voiceObject = voiceObjects.Item(i); const voiceName = voiceObject.GetDescription(); const voiceGender = voiceObject.GetAttribute('Gender')?.toLowerCase(); const sapiLanguageCodeHex = voiceObject.GetAttribute('Language'); const sapiLanguageCode = parseInt(sapiLanguageCodeHex, 16); const languageCodes = await lcidToIsoLanguageCode(sapiLanguageCode); if (!languageCodes) { throw new Error(`Couldn't translate SAPI language code ${sapiLanguageCode} to ISO, for voice '${voiceName}'`); } const resultLanguageCodes = []; for (const languageCode of languageCodes) { if (!resultLanguageCodes.includes(languageCode)) { resultLanguageCodes.push(languageCode); } const shortLanguageCode = getShortLanguageCode(languageCode); if (!resultLanguageCodes.includes(shortLanguageCode)) { resultLanguageCodes.push(shortLanguageCode); } } voices.push({ name: voiceName, languages: resultLanguageCodes, gender: voiceGender || 'unknown' }); } WinAX.release(sapiVoice); return voices; } export async function AssertSAPIAvailable(testForSpeechPlatform = false) { if (process.platform != 'win32') { throw new Error(`SAPI is not available on your platform. SAPI is a Microsoft Windows technology that is only runs on a Windows OS.`); } try { const { default: WinAX } = await import('winax'); } catch (e) { throw new Error(`winax package, which is required for SAPI support, was not found. You can install it by running 'npm install winax -g'.`); } const ActiveXObject = global.ActiveXObject; try { const voice = new ActiveXObject('SAPI.SPVoice'); } catch (e) { throw new Error(`Failed creating a SAPI instance: ${e}`); } try { const voice = new ActiveXObject('Speech.SPVoice'); } catch (e) { throw new Error(`Failed creating an msspeech instance. Please ensure you installed the Microsoft Speech Platform runtime correctly.`); } } function eventsToTimeline(events, totalDuration) { const timeline = []; for (const event of events) { if (event.type == 'word') { timeline.push(event); } else if (event.type == 'phone') { if (timeline.length == 0) { throw new Error('Unexpected: phone event preceded a word event'); } const lastWordEntry = timeline[timeline.length - 1]; lastWordEntry.endTime = event.endTime; const phoneTimeline = lastWordEntry.timeline; phoneTimeline.push(event); } } if (timeline.length > 0 && timeline[timeline.length - 1].endTime == -1) { timeline[timeline.length - 1].endTime = totalDuration; } return timeline; } //# sourceMappingURL=SapiTTS.js.map