UNPKG

web-speech-profanity

Version:

Web Speech API adapter to use Cognitive Services Speech Services for both speech-to-text and text-to-speech service.

github.com/krrnk/web-speech-cognitive-services

krrnk/web-speech-cognitive-services

68 lines (58 loc) • 2.86 kB

JavaScript

import { decode } from 'base64-arraybuffer'; import buildSSML from './buildSSML'; import isSSML from './isSSML'; const DEFAULT_LANGUAGE = 'en-US'; const DEFAULT_OUTPUT_FORMAT = 'riff-16khz-16bit-mono-pcm'; const DEFAULT_VOICE = 'Microsoft Server Speech Text to Speech Voice (en-US, AriaNeural)'; const EMPTY_MP3_BASE64 = 'SUQzBAAAAAAAI1RTU0UAAAAPAAADTGF2ZjU3LjU2LjEwMQAAAAAAAAAAAAAA//tAwAAAAAAAAAAAAAAAAAAAAAAASW5mbwAAAA8AAAACAAABhgC7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7u7//////////////////////////////////////////////////////////////////8AAAAATGF2YzU3LjY0AAAAAAAAAAAAAAAAJAUHAAAAAAAAAYYoRBqpAAAAAAD/+xDEAAPAAAGkAAAAIAAANIAAAARMQU1FMy45OS41VVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVf/7EMQpg8AAAaQAAAAgAAA0gAAABFVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVV'; export default async function({ deploymentId, fetchCredentials, lang = DEFAULT_LANGUAGE, outputFormat = DEFAULT_OUTPUT_FORMAT, pitch, rate, text, voice = DEFAULT_VOICE, volume }) { if (!text) { // If text is empty, play a short audio clip. This allows developers to easily prime the AudioContext object by playing an empty string. return decode(EMPTY_MP3_BASE64); } const { authorizationToken, region, speechSynthesisHostname, subscriptionKey } = await fetchCredentials(); if ((authorizationToken && subscriptionKey) || (!authorizationToken && !subscriptionKey)) { throw new Error('Only "authorizationToken" or "subscriptionKey" should be set.'); } else if ((region && speechSynthesisHostname) || (!region && !speechSynthesisHostname)) { throw new Error('Only "region" or "speechSynthesisHostnamename" should be set.'); } const ssml = isSSML(text) ? text : buildSSML({ lang, pitch, rate, text, voice, volume }); // Although calling encodeURI on hostname does not actually works, it fails faster and safer. const hostname = speechSynthesisHostname || (deploymentId ? `${ encodeURI(region) }.voice.speech.microsoft.com` : `${ encodeURI(region) }.tts.speech.microsoft.com`); const search = deploymentId ? `?deploymentId=${ encodeURI(deploymentId) }` : ''; const url = `https://${ hostname }/cognitiveservices/v1${ search }`; const res = await fetch(url, { headers: { 'Content-Type': 'application/ssml+xml', 'X-Microsoft-OutputFormat': outputFormat, ...(authorizationToken ? { Authorization: `Bearer ${ authorizationToken }` } : { 'Ocp-Apim-Subscription-Key': subscriptionKey }) }, method: 'POST', body: ssml }); if (!res.ok) { throw new Error(`web-speech-cognitive-services: Failed to syntheis speech, server returned ${ res.status }`); } return res.arrayBuffer(); }