echogarden
Version:
An easy-to-use speech toolset. Includes tools for synthesis, recognition, alignment, speech translation, language detection, source separation and more.
166 lines • 7.66 kB
JavaScript
import { decodeToChannels, SampleFormat } from '../audio/AudioBufferConversion.js';
import { getShortLanguageCode, lcidToIsoLanguageCode } from '../utilities/Locale.js';
import { Logger } from '../utilities/Logger.js';
import { logToStderr } from '../utilities/Utilities.js';
const log = logToStderr;
export function synthesize(text, voiceName, rate = 0, useSpeechPlatform = false) {
return new Promise(async (resolve, reject) => {
const logger = new Logger();
logger.start('Initialize winax module');
const { default: WinAX } = await import('winax');
const ActiveXObject = global.ActiveXObject;
logger.start('Create SAPI COM object');
const sapiVoice = new ActiveXObject(useSpeechPlatform ? 'Speech.SPVoice' : 'SAPI.SPVoice');
sapiVoice.EventInterests = 33790;
logger.start('Get SAPI voice list and select best match');
if (voiceName) {
const voiceObjects = sapiVoice.GetVoices();
for (let i = 0; i < voiceObjects.Count; i++) {
const voiceObject = voiceObjects.Item(i);
const candidateVoiceName = voiceObject.GetDescription();
if (candidateVoiceName == voiceName) {
sapiVoice.Voice = voiceObject;
}
}
}
sapiVoice.Rate = rate;
// Create phone converter for language
const sapiPhoneConverter = new ActiveXObject(useSpeechPlatform ? 'Speech.SpPhoneConverter' : 'SAPI.SpPhoneConverter');
const sapiLanguageCodeHex = sapiVoice.Voice.GetAttribute('Language');
const sapiLanguageCode = parseInt(sapiLanguageCodeHex, 16);
sapiPhoneConverter.LanguageId = sapiLanguageCode;
logger.start('Synthesize with SAPI');
const sampleRate = 22050;
const bytesPerSecond = sampleRate * 2;
const sapiOutputStream = new ActiveXObject('SAPI.SpMemoryStream');
sapiOutputStream.Format.Type = 22; // format code code for SAFT22kHz16BitMono
sapiVoice.AudioOutputStream = sapiOutputStream;
const dispatchMessagesInterval = setInterval(() => {
WinAX.peekAndDispatchMessages();
}, 50);
const connectionPoints = WinAX.getConnectionPoints(sapiVoice);
const connectionPoint = connectionPoints[0];
const methods = connectionPoint.getMethods();
const events = [];
let lastWordEvent = null;
let lastWordCharPos = -1;
connectionPoint.advise({
StartStream: () => {
},
Word: (streamId, streamPos, charPos, length) => {
if (lastWordCharPos == charPos) {
return;
}
const wordText = text.substring(charPos, charPos + length);
const startTime = streamPos / bytesPerSecond;
const wordEvent = { type: 'word', text: wordText, startTime, endTime: -1, timeline: [] };
events.push(wordEvent);
lastWordEvent = wordEvent;
lastWordCharPos = charPos;
},
Phoneme: (streamId, streamPos, duration, nextPhoneId, feature, currentPhoneId) => {
if (events.length == 0) {
return;
}
const phoneText = sapiPhoneConverter.IdToPhone(currentPhoneId);
if (phoneText == ',' || phoneText == '_') {
return;
}
const startTime = streamPos / bytesPerSecond;
const endTime = startTime + (duration / 1000);
events.push({ type: 'phone', text: phoneText, startTime, endTime });
},
EndStream: (streamId, streamPos) => {
clearInterval(dispatchMessagesInterval);
const audioData = new Uint8Array(sapiOutputStream.GetData());
const audioChannels = decodeToChannels(audioData, 1, 16, SampleFormat.PCM);
WinAX.release(sapiOutputStream);
WinAX.release(sapiVoice);
logger.end();
resolve({ rawAudio: { audioChannels, sampleRate }, timeline: eventsToTimeline(events, audioChannels[0].length / sampleRate) });
}
});
sapiVoice.Speak(text);
});
}
export async function getVoiceList(useSpeechPlatform = false) {
const { default: WinAX } = await import('winax');
const ActiveXObject = global.ActiveXObject;
const sapiVoice = new ActiveXObject(useSpeechPlatform ? 'Speech.SPVoice' : 'SAPI.SPVoice');
const voiceObjects = sapiVoice.GetVoices();
const voices = [];
for (let i = 0; i < voiceObjects.Count; i++) {
const voiceObject = voiceObjects.Item(i);
const voiceName = voiceObject.GetDescription();
const voiceGender = voiceObject.GetAttribute('Gender')?.toLowerCase();
const sapiLanguageCodeHex = voiceObject.GetAttribute('Language');
const sapiLanguageCode = parseInt(sapiLanguageCodeHex, 16);
const languageCodes = await lcidToIsoLanguageCode(sapiLanguageCode);
if (!languageCodes) {
throw new Error(`Couldn't translate SAPI language code ${sapiLanguageCode} to ISO, for voice '${voiceName}'`);
}
const resultLanguageCodes = [];
for (const languageCode of languageCodes) {
if (!resultLanguageCodes.includes(languageCode)) {
resultLanguageCodes.push(languageCode);
}
const shortLanguageCode = getShortLanguageCode(languageCode);
if (!resultLanguageCodes.includes(shortLanguageCode)) {
resultLanguageCodes.push(shortLanguageCode);
}
}
voices.push({
name: voiceName,
languages: resultLanguageCodes,
gender: voiceGender || 'unknown'
});
}
WinAX.release(sapiVoice);
return voices;
}
export async function AssertSAPIAvailable(testForSpeechPlatform = false) {
if (process.platform != 'win32') {
throw new Error(`SAPI is not available on your platform. SAPI is a Microsoft Windows technology that is only runs on a Windows OS.`);
}
try {
const { default: WinAX } = await import('winax');
}
catch (e) {
throw new Error(`winax package, which is required for SAPI support, was not found. You can install it by running 'npm install winax -g'.`);
}
const ActiveXObject = global.ActiveXObject;
try {
const voice = new ActiveXObject('SAPI.SPVoice');
}
catch (e) {
throw new Error(`Failed creating a SAPI instance: ${e}`);
}
try {
const voice = new ActiveXObject('Speech.SPVoice');
}
catch (e) {
throw new Error(`Failed creating an msspeech instance. Please ensure you installed the Microsoft Speech Platform runtime correctly.`);
}
}
function eventsToTimeline(events, totalDuration) {
const timeline = [];
for (const event of events) {
if (event.type == 'word') {
timeline.push(event);
}
else if (event.type == 'phone') {
if (timeline.length == 0) {
throw new Error('Unexpected: phone event preceded a word event');
}
const lastWordEntry = timeline[timeline.length - 1];
lastWordEntry.endTime = event.endTime;
const phoneTimeline = lastWordEntry.timeline;
phoneTimeline.push(event);
}
}
if (timeline.length > 0 && timeline[timeline.length - 1].endTime == -1) {
timeline[timeline.length - 1].endTime = totalDuration;
}
return timeline;
}
//# sourceMappingURL=SapiTTS.js.map