UNPKG

iobroker.sayit

Version:

Text to speech interface for ioBroker.

511 lines (460 loc) 20.6 kB
'use strict'; const fs = require('node:fs'); const mp3Duration = require('mp3-duration'); const engines = require('../admin/engines.js'); const sayitEngines = engines.sayitEngines; const { URLSearchParams } = require('node:url'); const { PollyClient, SynthesizeSpeechCommand } = require('@aws-sdk/client-polly'); let cp; let googleTTS; let axios; class Text2Speech { constructor(adapter, options) { this.adapter = adapter; this.options = options; this.addToQueue = options.addToQueue; this.getCachedFileName = options.getCachedFileName; this.isCached = options.isCached; this.MP3FILE = options.MP3FILE; this.polly = null; } async getFileInStates(fileName) { if (fileName.match(/^\/?[-_\w]+\.\d+\//)) { if (fileName.startsWith('/')) { fileName = fileName.substring(1); } // maybe it is "sayit.0/tts.userfiles/gong.mp3" const parts = fileName.split('/'); const id = parts[0]; parts.splice(0, 1); const file = parts.join('/'); let data; try { data = await this.adapter.readFileAsync(id, file); return data?.file; } catch (e) { this.adapter.log.warn(`Cannot read length of file ${fileName}: ${e}`); } } return null; } static splitText(text, max) { max = max || 70; if (text.length > max) { const parts = text.split(/,|.|;|:/); const result = []; for (let p = 0; p < parts.length; p++) { if (parts[p].length < max) { result.push(parts[p]); continue; } const _parts = parts[p].split(' '); let i = 0; for (let w = 0; w < _parts.length; w++) { if (_parts[i] && (`${result[i] || ''} ${_parts[w]}`).length > max) { i++; } if (!result[i]) { result.push(_parts[w]); } else { result[i] += ` ${_parts[w]}`; } } } return result; } else { return [text]; } } cacheFile(text, language, md5filename) { if (this.adapter.config.cache) { const stat = fs.statSync(this.MP3FILE); if (stat.size < 100) { this.adapter.log.warn(`Received file is too short: ${fs.readFileSync(this.MP3FILE).toString()}`); } else { this.adapter.log.debug(`Caching File ${md5filename} for "${language};${text}" now`); try { fs.copyFileSync(this.MP3FILE, md5filename); } catch (error) { this.adapter.log.error(error) } } } } async exec(cmd, args, cwd) { cp = cp || require('node:child_process'); return new Promise((resolve, reject) => { try { const ls = cp.exec(`${cmd}${args && args.length ? ` ${args.join(' ')}` : ''}`, {cwd}, code => { if (!code) { resolve && resolve(); } else { reject && reject(`Exit code: ${code}`); } reject = null; resolve = null; }); ls.on('error', e => this.adapter.log.error(`sayIt.play: there was an error while playing the file: ${e.toString()}`)); ls.stdout.on('data', data => this.adapter.log.debug(`stdout: ${data}`)); ls.stderr.on('data', data => this.adapter.log.error(`stderr: ${data}`)); } catch (e) { reject && reject(e.toString()); reject = null; resolve = null; } }); } async sayItGetSpeechGoogle(text, language, volume, testOptions) { if (!text.length) { throw new Error('No text to speak'); } googleTTS = googleTTS || require('google-tts-api'); if (text.length > 70) { const parts = Text2Speech.splitText(text); for (let t = 1; t < parts.length; t++) { this.addToQueue(parts[t], language, volume, testOptions); } text = parts[0]; } language = language || (testOptions && testOptions.engine) || this.adapter.config.engine; // get base64 text const data = await googleTTS.getAudioBase64(text, { lang: language, slow: false, host: 'https://translate.google.com', timeout: 10000, }); const buf = Buffer.from(data, 'base64'); if (buf.length < 100) { throw new Error('received file is too short'); } else if (buf.toString().includes('302 Moved')) { throw new Error(`https://translate.google.com\nCannot get file: ${buf.toString()}`); } else { fs.writeFileSync(this.MP3FILE, buf, 'binary'); } } async sayItGetSpeechYandexCloud(text, language, _, testOptions) { if (language === 'ru' || language === 'ru_YA' || language === 'ru_YA_CLOUD') { language = 'ru-RU'; } axios = axios || require('axios'); const yandexCloudVoice = (testOptions && testOptions.yandexCloudVoice) || this.adapter.config.yandexCloudVoice || ''; const yandexKey = (testOptions && testOptions.yandexKey) || this.adapter.config.yandexKey; const yandexEmotion = (testOptions && testOptions.yandexEmotion) || this.adapter.config.yandexEmotion; const yandexFolderID = (testOptions && testOptions.yandexFolderID) || this.adapter.config.yandexFolderID; const params = new URLSearchParams(); params.append('text', text); params.append('voice', yandexCloudVoice.replace(' Premium', '')); params.append('folderId', yandexFolderID || ''); if (yandexEmotion !== 'none' && yandexEmotion) { params.append('emotion', yandexEmotion); } params.append('lang', language); const response = await axios.post(`https://tts.api.cloud.yandex.net/speech/v1/tts:synthesize`, params, { headers: { 'Authorization': `Api-Key ${yandexKey}`, }, responseType: 'arraybuffer', }); if (!response.data || response.data.length < 100) { throw new Error('Cannot get file: received file is too short'); } fs.writeFileSync(this.MP3FILE, response.data, 'binary'); } async sayItGetSpeechYandex(text, language, _, testOptions) { if (language === 'ru' || language === 'ru_YA') { language = 'ru-RU'; } /*emotion: good, neutral, evil, mixed drunk: true, false ill: true, false robot: true, false */ const yandexVoice = (testOptions && testOptions.yandexVoice) || this.adapter.config.yandexVoice; const yandexKey = (testOptions && testOptions.yandexKey) || this.adapter.config.yandexKey; const yandexEmotion = (testOptions && testOptions.yandexEmotion) || this.adapter.config.yandexEmotion; const yandexDrunk = (testOptions && testOptions.yandexDrunk !== undefined ? testOptions.yandexDrunk : this.adapter.config.yandexDrunk) || this.adapter.config.yandexDrunk; const yandexIll = (testOptions && testOptions.yandexIll !== undefined ? testOptions.yandexIll : this.adapter.config.yandexIll) || this.adapter.config.yandexIll; const yandexRobot = (testOptions && testOptions.yandexRobot !== undefined ? testOptions.yandexRobot : this.adapter.config.yandexRobot) || this.adapter.config.yandexRobot; let url = `https://tts.voicetech.yandex.net/generate?lang=${language}&format=mp3&speaker=${yandexVoice}&key=${yandexKey}&text=${encodeURI(text.trim())}`; if (yandexEmotion && yandexEmotion !== 'none') { url += `&emotion=${yandexEmotion}`; } if (yandexDrunk === 'true' || yandexDrunk === true) { url += '&drunk=true'; } if (yandexIll === 'true' || yandexIll === true) { url += '&ill=true'; } if (yandexRobot === 'true' || yandexRobot === true) { url += '&robot=true'; } axios = axios || require('axios'); const response = await axios.get(url, {responseType: 'arraybuffer'}); if (!response.data || response.data.length < 100) { throw new Error('Cannot get file: received file is too short'); } fs.writeFileSync(this.MP3FILE, response.data, 'binary'); } async sayItGetSpeechPolly(text, language, _, testOptions) { let _polly; if (testOptions) { _polly = new PollyClient({ region: (testOptions && testOptions.awsRegion) || this.adapter.config.awsRegion, credentials: { accessKeyId: (testOptions && testOptions.awsAccessKey) || this.adapter.config.awsAccessKey, secretAccessKey: (testOptions && testOptions.awsSecretKey) || this.adapter.config.awsSecretKey, }, }); } else { this.polly = this.polly || new PollyClient({ region: this.adapter.config.awsRegion, credentials: { accessKeyId: this.adapter.config.awsAccessKey, secretAccessKey: this.adapter.config.awsSecretKey, }, }); _polly = this.polly; } let type = 'text'; if (text.match(/<[-+\w\s'"=]+>/)) { if (!text.match(/^<speak>/)) { text = `<speak>${text}</speak>`; } type = 'ssml'; } const pParams = { OutputFormat: 'mp3', Text: text, TextType: type || 'text', VoiceId: sayitEngines[language].ename || 'Marlene', Engine: sayitEngines[language].neural ? 'neural' : undefined, }; const command = new SynthesizeSpeechCommand(pParams); const data = await _polly.send(command); const byteArray = data && data.AudioStream && (await data.AudioStream.transformToByteArray()); // process data. if (!byteArray || !byteArray.length) { throw new Error('No data received'); } else { fs.writeFileSync(this.MP3FILE, Buffer.from(byteArray), 'binary'); } } async sayItGetSpeechCoquiTTS(text, language, _, testOptions) { language = language.substring(0, 2); let cmd; const coquiVocoder = (testOptions && testOptions.coquiVocoder) || this.adapter.config.coquiVocoder; if (coquiVocoder === 'default') { cmd = `tts --text ". ${text} ." --model_name tts_models/${language}/${coquiVocoder.replace(' ', '/')} --out_path ${__dirname}/say.wav`; } else { if (coquiVocoder === 'libri-tts wavegrad' || coquiVocoder === 'libri-tts fullband-melgan') { language = 'universal'; } cmd = `tts --text ". ${text} ." --model_name tts_models/${language}/${coquiVocoder.replace(' ', '/')} --vocoder_name vocoder_models/${language}/${coquiVocoder.replace(' ', '/')} --out_path ${__dirname}/say.wav`; } try { await this.exec(cmd); } catch (e) { throw new Error(`Cannot create (coqui) "say.wav": ${e}`); } try { await this.exec(`lame ${__dirname}/say.wav ${this.MP3FILE}`) } catch (e) { throw new Error(`Cannot create (lame) "say.mp3": ${e}`); } } sendToPromise(adapter, command, message, timeout) { return this.adapter.getForeignStateAsync(`system.adapter.${adapter}.alive`) .then(state => { if (!state || !state.val) { return Promise.reject(`Instance "${adapter}" is not running`); } return new Promise((resolve, reject) => { let timer = setTimeout(() => { timer = null; reject(`Timeout (${timeout} ms) by sendTo "${adapter}"`); }, timeout || 5000); this.adapter.sendTo(adapter, command, message, response => { if (timer) { timer && clearTimeout(timer); timer = null; if (response.error) { reject(response.error); } else { resolve(response); } } }); }); }); } async sayItGetSpeechCloud(text, language, _, testOptions) { let type = 'text'; if (text.match(/<[-+\w\s'"=]+>/)) { if (!text.match(/^<speak>/)) { text = `<speak>${text}</speak>`; } type = 'ssml'; } const apiKey = testOptions ? testOptions.cloudAppKey : this.adapter.config.cloudAppKey; const cloudInstance = (testOptions && testOptions.cloudInstance) || this.adapter.config.cloudInstance; let response; if (apiKey) { axios = axios || require('axios'); let cloudUrl; const params = { text, apiKey, textType: type, voiceId: sayitEngines[language].ename, engine: sayitEngines[language].neural ? 'neural' : undefined, }; if (apiKey.startsWith('@pro_')) { cloudUrl = 'https://iobroker.pro:3001'; } else { cloudUrl = 'https://iobroker.net:3001'; } const _response = await axios.post(`${cloudUrl}/api/v1/polly`, params, { headers: { 'Content-Type': 'application/json', }, responseType: 'arraybuffer', }) if (_response.data) { response = {base64: Buffer.from(_response.data, 'binary').toString('base64')}; } else { throw new Error('No data received'); } } else if (cloudInstance) { // send message to cloud instance response = await this.sendToPromise(cloudInstance, 'tts', { text, voiceId: sayitEngines[language].ename, textType: type, engine: sayitEngines[language].neural ? 'neural' : undefined, }, 10000); } else { throw new Error('No cloud instance or app key defined'); } fs.writeFileSync(this.MP3FILE, Buffer.from(response.base64, 'base64'), 'binary'); } async sayItGetSpeechPicoTTS(text, language) { try { await this.exec(`pico2wave -l ${language} -w ${__dirname}/say.wav "${text}"`); } catch (e) { throw new Error(`Cannot create (pico2wave) "say.wav": ${e}`); } try { await this.exec(`lame ${__dirname}/say.wav ${this.MP3FILE}`) } catch (e) { throw new Error(`Cannot create (lame) "say.mp3": ${e}`); } } async getDuration(fileName) { // create a new parser from a node ReadStream if (fileName === this.adapter.config.announce && this.adapter.config.annoDuration) { return this.adapter.config.annoDuration - 1; } if (fs.existsSync(fileName)) { if (fileName.endsWith('.mp3')) { return new Promise(resolve => mp3Duration(fileName, (err, duration) => { if (err) { try { const stat = fs.statSync(fileName); const size = stat.size; resolve(Math.ceil(size / 4096)); } catch (e) { this.adapter.log.warn(`Cannot read length of file ${fileName}`); resolve(0); } } else { resolve(Math.ceil(duration)); } })); } else { try { const stat = fs.statSync(fileName); const size = stat.size; return Math.ceil(size / 4096); } catch (e) { this.adapter.log.warn(`Cannot read length of file ${fileName}`); return 0; } } } else { const data = await this.getFileInStates(fileName); if (data) { if (fileName.endsWith('.mp3')) { return new Promise(resolve => mp3Duration(data, (err, duration) => { if (err) { try { const stat = fs.statSync(fileName); const size = stat.size; resolve(Math.ceil(size / 4096)); } catch (e) { this.adapter.log.warn(`Cannot read length of file ${fileName}`); resolve(0); } } else { resolve(Math.ceil(duration)); } })); } else { try { const size = data.file.length; return Math.ceil(size / 4096); } catch (e) { this.adapter.log.warn(`Cannot read length of file ${fileName}`); return 0; } } } return 0; } }; async sayItGetSpeech(text, language, volume, testOptions) { if (this.adapter.config.cache && !testOptions) { const md5filename = this.isCached(this.options.cacheDir, `${language};${text}`, this.options.outFileExt, this.adapter.config.cacheExpiryDays); if (md5filename) { return md5filename; } this.adapter.log.debug(`Cache File ${md5filename} for "${language};${text}" not found`); } if (sayitEngines[language] && sayitEngines[language].engine) { if (!sayitEngines[language].ssml) { // remove SSML text = text.replace(/<\/?[-+\w\s'"=]+>/g, ''); } const engine = sayitEngines[language].engine; if (engine === 'google') { await this.sayItGetSpeechGoogle(text, language, volume, testOptions); } else if (engine === 'yandex') { await this.sayItGetSpeechYandex(text, language, null, testOptions); } else if (engine === 'yandexCloud') { await this.sayItGetSpeechYandexCloud(text, language, null, testOptions); } else if (engine === 'polly') { await this.sayItGetSpeechPolly(text, language, null, testOptions); } else if (engine === 'coquiTTS') { await this.sayItGetSpeechCoquiTTS(text, language, null, testOptions); } else if (engine === 'cloud') { await this.sayItGetSpeechCloud(text, language, null, testOptions); } else if (engine === 'PicoTTS') { await this.sayItGetSpeechPicoTTS(text, language, null, testOptions); } else { throw new Error(`Engine ${engine} not yet supported.`); } } else { // fallback to google await this.sayItGetSpeechGoogle(text, language, volume, testOptions); } if (this.adapter.config.cache) { this.cacheFile(text, language, this.getCachedFileName(this.options.cacheDir, `${language};${text}`, this.options.outFileExt)); } return this.MP3FILE; }; } module.exports = Text2Speech;