UNPKG

watson-developer-cloud

Version:

Client library to use the IBM Watson Services and AlchemyAPI

1,130 lines (543 loc) 23.1 kB
/** * Copyright 2014 IBM Corp. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ 'use strict'; var pick = require('object.pick'); var extend = require('extend'); var requestFactory = require('../lib/requestwrapper'); var util = require('util'); var BaseService = require('../lib/base_service'); /** * * @param options * @constructor */ function TextToSpeechV1(options) { BaseService.call(this, options); } util.inherits(TextToSpeechV1, BaseService); TextToSpeechV1.prototype.name = 'text_to_speech'; TextToSpeechV1.prototype.version = 'v1'; TextToSpeechV1.URL = 'https://stream.watsonplatform.net/text-to-speech/api'; /** * Streaming speech synthesis of the text in a query parameter * * @param {Object} options * @param {String} options.text * @param {String} [options.voice] * @param {String} [options.accept] * @param {Boolean} [options.X-Watson-Learning-Opt-Out] * @param {Function} callback */ TextToSpeechV1.prototype.synthesize = function(params, callback) { params = extend({accept:'audio/ogg; codecs=opus'}, params); if (!params.text){ callback(new Error('Missing required parameters: text')); return; } var parameters = { options: { method: 'POST', url: '/v1/synthesize', body: JSON.stringify(pick(params, ['text'])), qs: pick(params, ['accept', 'voice']), headers: extend({ 'content-type': 'application/json' }, pick(params, ['X-Watson-Learning-Opt-Out'])), encoding: null }, defaultOptions: this._options }; return requestFactory(parameters, callback); }; /** * Retrieves the voices available for speech synthesis */ TextToSpeechV1.prototype.voices = function(params, callback) { var parameters = { options: { method: 'GET', url: '/v1/voices', json: true }, defaultOptions: this._options }; return requestFactory(parameters, callback); }; /** * new customization features Summary of API calls API Number Method API URL Description API-01 POST /api/v1/customizations Create new custom model API-02 GET /api/v1/customizations?language="en-US" List custom models for a language API-03 DELETE /api/v1/customizations/{customization_id} Delete custom model API-04 GET /api/v1/customizations/{customization_id}?<alphabet=spr> Query contents of custom model (optionally converts IPA to SPR phones) API-05 POST /api/v1/customizations/{customization_id} Update contents of custom model, including adding one or more words API-06 PUT /api/v1/customizations/{customization_id}/words/{word} Add a single word to custom model API-07 POST /api/v1/customizations/{customization_id}/words Add one or more words to custom model API-08 GET /api/v1/customizations/{customization_id}/words/{word} Query details for a word in custom model API-09 GET /api/v1/customizations/{customization_id}/words List contents of words in custom model API-10 DELETE /api/v1/customizations/{customization_id}/words/{word} Delete single word from custom model API-11 GET /api/v1/pronunciation?text=aword&voice=voiceModel&format=ipa|spr Gets the IPA (or optionally the SPR) pronunciation of a given word for a given voice model (default=en-US_MichaelVoice) TTS API Implementation */ /* Create Custom Voice Model Creates a new custom voice model for the specified language. Note: not all languages are supported (only US English for Beta release). API: POST /text-to-speech/api/v1/customizations example: http://9.186.107.61:9080/text-to-speech/api/v1/customizations https://stream-d.watsonplatform.net/text-to-speech/api/v1/customizations Request: { "name":"my custom voice", "language":"en-US", "description":"This is my first custom voice" } Input: name -- string, not null. Voice model name; language -- string, null/not null. If language is null, default is en-US . description -- string, null/not null. Description of the custom voice model. Output: If success -- { "customization_id":"74f4807e-b5ff-4866-824e-6bba1a84fe96" } Here 74f4807e-b5ff-4866-824e-6bba1a84fe96 is the UUID of new voice model. If failed -- { "code":400, "error":"Invalid value for 'language'.", "code_description":"Bad Request" } Http Status Code: HTTP Code Notes 200 Success 400 Invalid parameter 'parameter-name' in request. 400 Required parameter 'parameter-name' is missing. 400 This 'language' is currently not supported. 500 Service internal error */ TextToSpeechV1.prototype.createCustomizations = function(params, callback) { var parameters = { options: { method: 'POST', url: '/v1/customizations', body: JSON.stringify(pick(params, ['text'])), qs: pick(params, ['accept', 'voice']), headers: extend({ 'content-type': 'application/json' }, pick(params, ['X-Watson-Learning-Opt-Out'])), json: true }, defaultOptions: this._options }; return requestFactory(parameters, callback); }; /* List Custom Voices List all available custom voice models for a language (or all languages). API: GET /text-to-speech/api/v1/customizations?language="en-US" example: http://9.186.107.61:9080/text-to-speech/api/v1/customizations?language=en-US https://stream-d.watsonplatform.net/text-to-speech/api/v1/customizations?language=en-US Request: empty body Input: language -- string, null/not null. If it is null, API returns all custom voice models owned by the requester (based on bluemix-instance-id); if it's not null, returns custom voice models for this specified language id owned by the requester; Output: If success -- { "customizations":[ { "customization_id":"1fa3b971-45ad-4da5-a2a0-019d6eb24227", "created":1432114231877, "language":"en-US", "owner":"GUID", "name":"Lorem sit amet", "description":"Lorem ipsum dolor sit amet, consectetur adipisicing elit. Beatae officiis similique consequuntur rem cumque rerum ex qui quo odio eligendi quia, consequatur quos suscipit id inventore itaque est aliquid! Temporibus reiciendis debitis tempore distinctio excepturi voluptate consequatur, iste illum dicta, voluptatibus alias doloribus quaerat fugiat eius odit magnam voluptatum officia." } ] } Here 'customizations' is a list of custom voice models, 'owner' is a GUID which associates the user with the custom model, 'created' is Unix-time of when the model was created, 'customization_id' is a GUID associated with the custom model and 'language' is the language of the model. If failed -- { "code":400, "error":"Invalid value for 'language'.", "code_description":"Bad Request" } Http Status Code: HTTP Code Notes 200 Success 400 Invalid value for 'language'. 500 Service internal error Testing case: http://9.186.107.61:9080/text-to-speech/test/tts/ListDictionaries.html Delete Custom Voice Model Delete a custom voice model. API: DELETE /text-to-speech/api/v1/customizations/{customization_id} example: http://9.186.107.61:9080/text-to-speech/api/v1/customizations/74f4807e-b5ff-4866-824e-6bba1a84fe96 https://stream-d.watsonplatform.net/text-to-speech/api/v1/customizations/74f4807e-b5ff-4866-824e-6bba1a84fe96 Request: empty body Input: customization_id -- UUID string, not null. Output: If success -- Empty body If failed -- { "code":401, "error":"Invalid customization_id (XXX) for user.", "code_description":Unauthorized" } Http Status Code: HTTP Code Notes 204 Success 400 Invalid value for 'customization_id'. 401 Invalid customization_id (xxx) for user 500 Service internal error Testing case: http://9.186.107.61:9080/text-to-speech/test/tts/DeleteDictionary.html Query Custom Voice Model List contents of custom voice model. API: GET /text-to-speech/api/v1/customizations/{customization_id}?<alphabet=spr> example: http://9.186.107.61:9080/text-to-speech/api/v1/customizations/74f4807e-b5ff-4866-824e-6bba1a84fe96 https://stream-d.watsonplatform.net/text-to-speech/api/v1/customizations/74f4807e-b5ff-4866-824e-6bba1a84fe96 Request: empty body Input: customization_id -- GUID string, not null. alphabet -- an optional query parameter which tells system to return original pronunciation or if set to "spr", pronunciations after conversion to SPR phoneme set (this is needed by TTS engine). Only words with phonetic pronunciations are converted to SPR (so sounds-like remain as sounds-like) Output: If success -- { "customization_id":"7524b592-aeb5-4ab3-a3a6-639dfc4eaf4b", "created":1432021460585, "language":"en-US", "owner":GUID, "name":"Lorem ipsum dolor", "last_modified":1439955282335, "description":"Lorem ipsum dolor sit amet, consectetur adipisicing elit. Repellendus amet assumenda nostrum ea, nesciunt veniam voluptates. Sapiente labore accusamus illum placeat voluptatem, architecto repellat fugit blanditiis velit in esse vitae, necessitatibus ducimus nobis? Natus accusantium maiores corporis ipsam praesentium, totam iusto minus temporibus sapiente, aperiam, id ullam vero adipisci accusamus.", "words": [ {"word":"gastroenteritis", "translation":"`[1gAstroEntxrYFXs]"}, {"word": "trinitroglycerin", "translation":"try nitro glycerin"}, {"word":"div.", "translation":"division"}, {"word": "proc.", "translation":"proceedings"] } Here 'words' is a list of the words in the custom model, 'owner' is a GUID which associates the user with the custom model, 'created' is Unix-time of when the model was created, 'last_modified' is Unix-time of when the model was last modified, 'customization_id' is a GUID associated with the custom model and 'language' is the language of the model. If failed -- { "code":401, "error":"Invalid customization_id (XXX) for user.", "code_description":Unauthorized" } Http Status Code: HTTP Code Notes 200 Success 400 Invalid value for 'customization_id'. 401 Invalid customization_id (xxx) for user. 500 Service internal error 304 Not modified Testing case: http://9.186.107.61:9080/text-to-speech/test/tts/QueryDictionary.html Update Custom Voice Update custom voice information (name or description). Can also be used to add one or more words to the dictionary. API: POST /text-to-speech/api/v1/customizations/{customization_id} example: http://9.186.107.61:9080/text-to-speech/api/v1/customizations/74f4807e-b5ff-4866-824e-6bba1a84fe96 https://stream-d.watsonplatform.net/text-to-speech/api/v1/customizations/74f4807e-b5ff-4866-824e-6bba1a84fe96 Request: { "name":"new voice name", "description":"This is my new custom voice", "words": [ {"word":"gastroenteritis", "translation":"<phoneme alphabet="ibm" ph="1gAstroEntxrYFXs"></phoneme>"}, {"word":"tomato", "translation":"<phoneme alphabet="ipa" ph="təmˈɑto"></phoneme>"}, {"word": "trinitroglycerin", "translation":"try nitro glycerin"}, {"word":"div.", "translation":"division"}, {"word": "proc.", "translation":"proceedings"}] } Input: customization_id -- UUID string, not null. name -- string, null/not null. If it's null, voice name field will not be updated. description -- string, null/not null. If it's null, description field will not be updated. words -- a JSON array composed of 'word' objects. If array is empty, word entries will not be updated. word -- a JSON object composed of "word":"word_string" and "translation":"pronunciation|sounds-like string". The format for the pronunciation is based on the SSML format for representing the phonetic string of a word. That is, "<phoneme alphabet="ipa" ph="təmˈɑto"></phoneme>" for adding an IPA pronunciation or "<phoneme alphabet="ibm" ph="1gAstroEntxrYFXs"></phoneme>" for adding an SPR pronunciation. SPR is the native IBM phone set. Output: If success -- Empty body If failed -- { "code":401, "error":"Invalid customization_id (XXX) for user.", "code_description":Unauthorized" } Http Status Code: HTTP Code Notes 201 Success 400 Invalid parameter 'parameter-name' in request. 400 Invalid value for 'customization_id'. 400 In SSML: <phoneme alphabet="ipa" ph="XXX"></phoneme>, attribute 'ph' is not a standard IPA format. 401 Invalid customization_id (xxx) for user. 500 Service internal error Testing case: http://9.186.107.61:9080/text-to-speech/test/tts/UpdateDictionary.html Update Custom Voice Word Adds a single word to a given customization_id API: PUT /text-to-speech/api/v1/customizations/{customization_id}/words/{word} example: http://9.186.107.61:9080/text-to-speech/api/v1/customizations/74f4807e-b5ff-4866-824e-6bba1a84fe96/words/aword https://stream-d.watsonplatform.net/text-to-speech/api/v1/customizations/74f4807e-b5ff-4866-824e-6bba1a84fe96/words/aword Request: { "translation":"pronunciation|sounds-like" } Input: word -- string, containing word to add translation - a valid IPA or SPR pronunciation and/or one or more words which when combined sound like the word being added - the pronunciation should be in SSML format, e.g. "<phoneme alphabet="ipa" ph="təmˈɑto"></phoneme>" for adding an IPA pronunciation or "<phoneme alphabet="ibm" ph="1gAstroEntxrYFXs"></phoneme>" for adding an SPR pronunciation. SPR is the native IBM phone set. Output: If success -- Empty body If failed -- { "code":401|400,400, "error":"Invalid customization_id (XXX) for user" | "Invalid request" | "In SSML: <phoneme alphabet="ipa" ph="XXX"></phoneme>, attribute 'ph' is not a standard IPA format.", "code_description":Unauthorized"|"Bad Request"|"Bad Request" } Http Status Code: HTTP Code Notes 201 Success 400 Invalid parameter 'parameter-name' in request. 400 Invalid value for 'customization_id'. 400 In SSML: <phoneme alphabet="ipa" ph="XXX"></phoneme>, attribute 'ph' is not a standard IPA format. 401 Invalid customization_id (xxx) for user. 500 Service internal error Testing case: http://9.186.107.61:9080/text-to-speech/test/tts/UpdateDictionaryWord.html Update Custom Voice Words Adds one or more words and their associated pronunciation to a given customization_id API: POST /text-to-speech/api/v1/customizations/{customization_id}/words example: http://9.186.107.61:9080/text-to-speech/api/v1/customizations/74f4807e-b5ff-4866-824e-6bba1a84fe96/words https://stream-d.watsonplatform.net/text-to-speech/api/v1/customizations/74f4807e-b5ff-4866-824e-6bba1a84fe96/words Request: { "words": [ {"word":"gastroenteritis", "translation":"<phoneme alphabet="ibm" ph="1gAstroEntxrYFXs"></phoneme>"}, {"word": "trinitroglycerin", "translation":"try nitro glycerin"}, {"word":"div.", "translation":"division"}, {"word": "proc.", "translation":"proceedings"] } Input: words -- JSON array of JSON tuples containing word and translation objects word -- string,containing word to add translation - valid IPA pronunciation and/or one or more words which when combined sound like the word being added Output: If success -- Empty body If failed -- { "code":401|400,400, "error":"Invalid customization_id (XXX) for user" | "Invalid request" | "In SSML: <phoneme alphabet="ipa" ph="XXX"></phoneme>, attribute 'ph' is not a standard IPA format.", "code_description":Unauthorized"|"Bad Request"|"Bad Request" } Http Status Code: HTTP Code Notes 201 Success 400 Invalid value for 'customization_id'. 400 Invalid parameter 'parameter-name' in request. 400 In SSML: <phoneme alphabet="ipa" ph="XXX"></phoneme>, attribute 'ph' is not a standard IPA format. 401 Invalid customization_id (xxx) for user. 500 Service internal error Testing case: http://9.186.107.61:9080/text-to-speech/test/tts/UpdateDictionaryWords.html Get Word Get details for a single word belonging to customization_id API: GET /text-to-speech/api/v1/customizations/{customization_id}/words/{word} example: http://9.186.107.61:9080/text-to-speech/api/v1/customizations/74f4807e-b5ff-4866-824e-6bba1a84fe96/words/aword https://stream-d.watsonplatform.net/text-to-speech/api/v1/customizations/74f4807e-b5ff-4866-824e-6bba1a84fe96/words/aword Request: { } Input: word -- string,containing word to add Output: If success -- { "translation":"pronunciation|sounds-like" } If failed -- { "code":400, "error":"Word: <aword> not found in customization_id: XXX", "code_description":"Bad Request" } Http Status Code: HTTP Code Notes 200 Success 400 Invalid value for 'customization_id'. 400 Word: xxx not found in customization_id: xxx-xxx 401 Invalid customization_id (xxx) for user. 500 Service internal error Testing case: http://9.186.107.61:9080/text-to-speech/test/tts/GetWord.html Get Words List all the words associated with a customization_id API: GET /text-to-speech/api/v1/customizations/{customization_id}/words example: http://9.186.107.61:9080/text-to-speech/api/v1/customizations/74f4807e-b5ff-4866-824e-6bba1a84fe96/words https://stream-d.watsonplatform.net/text-to-speech/api/v1/customizations/74f4807e-b5ff-4866-824e-6bba1a84fe96/words Request: { } Input: Output: If success -- { "words": [ {"word": "aword", "translation":"pronunciation|sounds-like"}, {"word": "anotherword", "translation":"pronunciation|sounds-like"}, ... ] } If failed -- { "code":401, "error":"Invalid customization_id (XXX) for user.", "code_description":Unauthorized" } Http Status Code: HTTP Code Notes 200 Success 400 Invalid value for 'customization_id'. 401 Invalid customization_id (xxx) for user. 500 Service internal error Testing case: http://9.186.107.61:9080/text-to-speech/test/tts/GetWords.html Delete Word Deletes a single word belonging to customization_id API: DELETE /text-to-speech/api/v1/customizations/{customization_id}/words/{word} example: http://9.186.107.61:9080/text-to-speech/api/v1/customizations/74f4807e-b5ff-4866-824e-6bba1a84fe96/words/aword https://stream-d.watsonplatform.net/text-to-speech/api/v1/customizations/74f4807e-b5ff-4866-824e-6bba1a84fe96/words/aword Request: { } Input: word -- string,containing word to delete Output: If success -- Empty body If failed -- { "code":401, "error":"Invalid customization_id (XXX) for user.", "code_description":Unauthorized" } Http Status Code: HTTP Code Notes 204 No Content (means success but no content being returned) 400 Invalid value for 'customization_id'. 400 Word: xxx not found in customization_id: xxx-xxx 401 Invalid customization_id (xxx) for user. 500 Service internal error Testing case: http://9.186.107.61:9080/text-to-speech/test/tts/DeleteWord.html Get Pronunciation Get the pronunciation for a word. By default, the pronunciation is returned in IPA phone set. API: GET /text-to-speech/api/v1/pronunciation?voice=xx-XX_VoiceModel&text="aword"&format="ipa|ibm" example: http://9.186.107.61:9080/speechct/text-to-speech/api/v1/pronunciation?voice=en-US_LisaVoice&text=aword https://stream-d.watsonplatform.net/text-to-speech/api/v1/pronunciation?voice=en-US_LisaVoice&text=aword Request: { } Input: text -- string, contains word for which pronunciation is requested voice -- string, null or not null. If it is null, assumes the request is for the default voice; if it's not null, returns pronunciation using the phoneme set specified in the 'format' parameter for the given 'voice' format -- specifies which phoneme set to use to return the pronunciation, either "ipa" or "spr" (default is "ipa") Output: If success -- { "pronunciation":"pronunciation" } If failed -- { "code":404 "error":"Model <XXX> not found" "code_description":"Not Found" } Cassandra database structure Model Name: ttsmodel Column Name Data Type Key Type Comments userid varchar PARTITION KEY string comes from Bluemix dictid uuid PRIMARY KEY, INDEX an unique UUID string createtime timestamp PRIMARY KEY sort by desc dictname varchar created by user lang int language id description text dictionary list<text> the content words of dictionary lastmodified timestamp used for If-Modified-Since CQL: CREATE KEYSPACE speechct WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : 2 }; use speechct; CREATE TABLE ttsmodel (userid varchar, dictid uuid, createtime timestamp, dictname varchar, lang int, description text, dictionary list<text>, lastmodified timestamp, PRIMARY KEY (userid,createtime,dictid)) WITH CLUSTERING ORDER BY (createtime DESC); create index dictid_index on ttsmodel(dictid); create index lang_index on ttsmodel(lang); */ module.exports = TextToSpeechV1;