UNPKG

watson-developer-cloud

Version:
628 lines 225 kB
/** * Copyright 2018 IBM All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /// <reference types="node" /> import { AxiosResponse } from 'axios'; import { BaseService } from 'ibm-cloud-sdk-core'; import { FileObject } from 'ibm-cloud-sdk-core'; /** * The IBM&reg; Speech to Text service provides APIs that use IBM's speech-recognition capabilities to produce transcripts of spoken audio. The service can transcribe speech from various languages and audio formats. In addition to basic transcription, the service can produce detailed information about many different aspects of the audio. For most languages, the service supports two sampling rates, broadband and narrowband. It returns all JSON response content in the UTF-8 character set. For speech recognition, the service supports synchronous and asynchronous HTTP Representational State Transfer (REST) interfaces. It also supports a WebSocket interface that provides a full-duplex, low-latency communication channel: Clients send requests and audio to the service and receive results over a single connection asynchronously. The service also offers two customization interfaces. Use language model customization to expand the vocabulary of a base model with domain-specific terminology. Use acoustic model customization to adapt a base model for the acoustic characteristics of your audio. For language model customization, the service also supports grammars. A grammar is a formal language specification that lets you restrict the phrases that the service can recognize. Language model customization is generally available for production use with most supported languages. Acoustic model customization is beta functionality that is available for all supported languages. */ declare class SpeechToTextV1 extends BaseService { static URL: string; name: string; serviceVersion: string; /** * Construct a SpeechToTextV1 object. * * @param {Object} options - Options for the service. * @param {string} [options.url] - The base url to use when contacting the service (e.g. 'https://gateway.watsonplatform.net/speech-to-text/api'). The base url may differ between Bluemix regions. * @param {string} [options.username] - The username used to authenticate with the service. Username and password credentials are only required to run your application locally or outside of Bluemix. When running on Bluemix, the credentials will be automatically loaded from the `VCAP_SERVICES` environment variable. * @param {string} [options.password] - The password used to authenticate with the service. Username and password credentials are only required to run your application locally or outside of Bluemix. When running on Bluemix, the credentials will be automatically loaded from the `VCAP_SERVICES` environment variable. * @param {string} [options.iam_access_token] - An IAM access token fully managed by the application. Responsibility falls on the application to refresh the token, either before it expires or reactively upon receiving a 401 from the service, as any requests made with an expired token will fail. * @param {string} [options.iam_apikey] - An API key that can be used to request IAM tokens. If this API key is provided, the SDK will manage the token and handle the refreshing. * @param {string} [options.iam_url] - An optional URL for the IAM service API. Defaults to 'https://iam.bluemix.net/identity/token'. * @param {boolean} [options.use_unauthenticated] - Set to `true` to avoid including an authorization header. This option may be useful for requests that are proxied. * @param {Object} [options.headers] - Default headers that shall be included with every request to the service. * @param {boolean} [options.headers.X-Watson-Learning-Opt-Out] - Set to `true` to opt-out of data collection. By default, all IBM Watson services log requests and their results. Logging is done only to improve the services for future users. The logged data is not shared or made public. If you are concerned with protecting the privacy of users' personal information or otherwise do not want your requests to be logged, you can opt out of logging. * @constructor * @returns {SpeechToTextV1} */ constructor(options: SpeechToTextV1.Options); /************************* * models ************************/ /** * Get a model. * * Gets information for a single specified language model that is available for use with the service. The information * includes the name of the model and its minimum sampling rate in Hertz, among other things. * * **See also:** [Languages and models](https://cloud.ibm.com/docs/services/speech-to-text/models.html). * * @param {Object} params - The parameters to send to the service. * @param {string} params.model_id - The identifier of the model in the form of its name from the output of the **Get * a model** method. * @param {Object} [params.headers] - Custom request headers * @param {Function} [callback] - The callback that handles the response. * @returns {Promise<any>|void} */ getModel(params: SpeechToTextV1.GetModelParams, callback?: SpeechToTextV1.Callback<SpeechToTextV1.SpeechModel>): Promise<any> | void; /** * List models. * * Lists all language models that are available for use with the service. The information includes the name of the * model and its minimum sampling rate in Hertz, among other things. * * **See also:** [Languages and models](https://cloud.ibm.com/docs/services/speech-to-text/models.html). * * @param {Object} [params] - The parameters to send to the service. * @param {Object} [params.headers] - Custom request headers * @param {Function} [callback] - The callback that handles the response. * @returns {Promise<any>|void} */ listModels(params?: SpeechToTextV1.ListModelsParams, callback?: SpeechToTextV1.Callback<SpeechToTextV1.SpeechModels>): Promise<any> | void; /************************* * synchronous ************************/ /** * Recognize audio. * * Sends audio and returns transcription results for a recognition request. You can pass a maximum of 100 MB and a * minimum of 100 bytes of audio with a request. The service automatically detects the endianness of the incoming * audio and, for audio that includes multiple channels, downmixes the audio to one-channel mono during transcoding. * The method returns only final results; to enable interim results, use the WebSocket API. * * **See also:** [Making a basic HTTP * request](https://cloud.ibm.com/docs/services/speech-to-text/http.html#HTTP-basic). * * ### Streaming mode * * For requests to transcribe live audio as it becomes available, you must set the `Transfer-Encoding` header to * `chunked` to use streaming mode. In streaming mode, the service closes the connection (status code 408) if it does * not receive at least 15 seconds of audio (including silence) in any 30-second period. The service also closes the * connection (status code 400) if it detects no speech for `inactivity_timeout` seconds of streaming audio; use the * `inactivity_timeout` parameter to change the default of 30 seconds. * * **See also:** * * [Audio transmission](https://cloud.ibm.com/docs/services/speech-to-text/input.html#transmission) * * [Timeouts](https://cloud.ibm.com/docs/services/speech-to-text/input.html#timeouts) * * ### Audio formats (content types) * * The service accepts audio in the following formats (MIME types). * * For formats that are labeled **Required**, you must use the `Content-Type` header with the request to specify the * format of the audio. * * For all other formats, you can omit the `Content-Type` header or specify `application/octet-stream` with the * header to have the service automatically detect the format of the audio. (With the `curl` command, you can specify * either `\"Content-Type:\"` or `\"Content-Type: application/octet-stream\"`.) * * Where indicated, the format that you specify must include the sampling rate and can optionally include the number * of channels and the endianness of the audio. * * `audio/alaw` (**Required.** Specify the sampling rate (`rate`) of the audio.) * * `audio/basic` (**Required.** Use only with narrowband models.) * * `audio/flac` * * `audio/g729` (Use only with narrowband models.) * * `audio/l16` (**Required.** Specify the sampling rate (`rate`) and optionally the number of channels (`channels`) * and endianness (`endianness`) of the audio.) * * `audio/mp3` * * `audio/mpeg` * * `audio/mulaw` (**Required.** Specify the sampling rate (`rate`) of the audio.) * * `audio/ogg` (The service automatically detects the codec of the input audio.) * * `audio/ogg;codecs=opus` * * `audio/ogg;codecs=vorbis` * * `audio/wav` (Provide audio with a maximum of nine channels.) * * `audio/webm` (The service automatically detects the codec of the input audio.) * * `audio/webm;codecs=opus` * * `audio/webm;codecs=vorbis` * * The sampling rate of the audio must match the sampling rate of the model for the recognition request: for broadband * models, at least 16 kHz; for narrowband models, at least 8 kHz. If the sampling rate of the audio is higher than * the minimum required rate, the service down-samples the audio to the appropriate rate. If the sampling rate of the * audio is lower than the minimum required rate, the request fails. * * **See also:** [Audio formats](https://cloud.ibm.com/docs/services/speech-to-text/audio-formats.html). * * ### Multipart speech recognition * * **Note:** The Watson SDKs do not support multipart speech recognition. * * The HTTP `POST` method of the service also supports multipart speech recognition. With multipart requests, you pass * all audio data as multipart form data. You specify some parameters as request headers and query parameters, but you * pass JSON metadata as form data to control most aspects of the transcription. * * The multipart approach is intended for use with browsers for which JavaScript is disabled or when the parameters * used with the request are greater than the 8 KB limit imposed by most HTTP servers and proxies. You can encounter * this limit, for example, if you want to spot a very large number of keywords. * * **See also:** [Making a multipart HTTP * request](https://cloud.ibm.com/docs/services/speech-to-text/http.html#HTTP-multi). * * @param {Object} params - The parameters to send to the service. * @param {NodeJS.ReadableStream|FileObject|Buffer} params.audio - The audio to transcribe. * @param {string} [params.model] - The identifier of the model that is to be used for the recognition request. See * [Languages and models](https://cloud.ibm.com/docs/services/speech-to-text/models.html). * @param {string} [params.language_customization_id] - The customization ID (GUID) of a custom language model that is * to be used with the recognition request. The base model of the specified custom language model must match the model * specified with the `model` parameter. You must make the request with credentials for the instance of the service * that owns the custom model. By default, no custom language model is used. See [Custom * models](https://cloud.ibm.com/docs/services/speech-to-text/input.html#custom-input). * * **Note:** Use this parameter instead of the deprecated `customization_id` parameter. * @param {string} [params.acoustic_customization_id] - The customization ID (GUID) of a custom acoustic model that is * to be used with the recognition request. The base model of the specified custom acoustic model must match the model * specified with the `model` parameter. You must make the request with credentials for the instance of the service * that owns the custom model. By default, no custom acoustic model is used. See [Custom * models](https://cloud.ibm.com/docs/services/speech-to-text/input.html#custom-input). * @param {string} [params.base_model_version] - The version of the specified base model that is to be used with * recognition request. Multiple versions of a base model can exist when a model is updated for internal improvements. * The parameter is intended primarily for use with custom models that have been upgraded for a new base model. The * default value depends on whether the parameter is used with or without a custom model. See [Base model * version](https://cloud.ibm.com/docs/services/speech-to-text/input.html#version). * @param {number} [params.customization_weight] - If you specify the customization ID (GUID) of a custom language * model with the recognition request, the customization weight tells the service how much weight to give to words * from the custom language model compared to those from the base model for the current request. * * Specify a value between 0.0 and 1.0. Unless a different customization weight was specified for the custom model * when it was trained, the default value is 0.3. A customization weight that you specify overrides a weight that was * specified when the custom model was trained. * * The default value yields the best performance in general. Assign a higher value if your audio makes frequent use of * OOV words from the custom model. Use caution when setting the weight: a higher value can improve the accuracy of * phrases from the custom model's domain, but it can negatively affect performance on non-domain phrases. * * See [Custom models](https://cloud.ibm.com/docs/services/speech-to-text/input.html#custom-input). * @param {number} [params.inactivity_timeout] - The time in seconds after which, if only silence (no speech) is * detected in streaming audio, the connection is closed with a 400 error. The parameter is useful for stopping audio * submission from a live microphone when a user simply walks away. Use `-1` for infinity. See [Inactivity * timeout](https://cloud.ibm.com/docs/services/speech-to-text/input.html#timeouts-inactivity). * @param {string[]} [params.keywords] - An array of keyword strings to spot in the audio. Each keyword string can * include one or more string tokens. Keywords are spotted only in the final results, not in interim hypotheses. If * you specify any keywords, you must also specify a keywords threshold. You can spot a maximum of 1000 keywords. Omit * the parameter or specify an empty array if you do not need to spot keywords. See [Keyword * spotting](https://cloud.ibm.com/docs/services/speech-to-text/output.html#keyword_spotting). * @param {number} [params.keywords_threshold] - A confidence value that is the lower bound for spotting a keyword. A * word is considered to match a keyword if its confidence is greater than or equal to the threshold. Specify a * probability between 0.0 and 1.0. If you specify a threshold, you must also specify one or more keywords. The * service performs no keyword spotting if you omit either parameter. See [Keyword * spotting](https://cloud.ibm.com/docs/services/speech-to-text/output.html#keyword_spotting). * @param {number} [params.max_alternatives] - The maximum number of alternative transcripts that the service is to * return. By default, the service returns a single transcript. If you specify a value of `0`, the service uses the * default value, `1`. See [Maximum * alternatives](https://cloud.ibm.com/docs/services/speech-to-text/output.html#max_alternatives). * @param {number} [params.word_alternatives_threshold] - A confidence value that is the lower bound for identifying a * hypothesis as a possible word alternative (also known as "Confusion Networks"). An alternative word is considered * if its confidence is greater than or equal to the threshold. Specify a probability between 0.0 and 1.0. By default, * the service computes no alternative words. See [Word * alternatives](https://cloud.ibm.com/docs/services/speech-to-text/output.html#word_alternatives). * @param {boolean} [params.word_confidence] - If `true`, the service returns a confidence measure in the range of 0.0 * to 1.0 for each word. By default, the service returns no word confidence scores. See [Word * confidence](https://cloud.ibm.com/docs/services/speech-to-text/output.html#word_confidence). * @param {boolean} [params.timestamps] - If `true`, the service returns time alignment for each word. By default, no * timestamps are returned. See [Word * timestamps](https://cloud.ibm.com/docs/services/speech-to-text/output.html#word_timestamps). * @param {boolean} [params.profanity_filter] - If `true`, the service filters profanity from all output except for * keyword results by replacing inappropriate words with a series of asterisks. Set the parameter to `false` to return * results with no censoring. Applies to US English transcription only. See [Profanity * filtering](https://cloud.ibm.com/docs/services/speech-to-text/output.html#profanity_filter). * @param {boolean} [params.smart_formatting] - If `true`, the service converts dates, times, series of digits and * numbers, phone numbers, currency values, and internet addresses into more readable, conventional representations in * the final transcript of a recognition request. For US English, the service also converts certain keyword strings to * punctuation symbols. By default, the service performs no smart formatting. * * **Note:** Applies to US English, Japanese, and Spanish transcription only. * * See [Smart formatting](https://cloud.ibm.com/docs/services/speech-to-text/output.html#smart_formatting). * @param {boolean} [params.speaker_labels] - If `true`, the response includes labels that identify which words were * spoken by which participants in a multi-person exchange. By default, the service returns no speaker labels. Setting * `speaker_labels` to `true` forces the `timestamps` parameter to be `true`, regardless of whether you specify * `false` for the parameter. * * **Note:** Applies to US English, Japanese, and Spanish transcription only. To determine whether a language model * supports speaker labels, you can also use the **Get a model** method and check that the attribute `speaker_labels` * is set to `true`. * * See [Speaker labels](https://cloud.ibm.com/docs/services/speech-to-text/output.html#speaker_labels). * @param {string} [params.customization_id] - **Deprecated.** Use the `language_customization_id` parameter to * specify the customization ID (GUID) of a custom language model that is to be used with the recognition request. Do * not specify both parameters with a request. * @param {string} [params.grammar_name] - The name of a grammar that is to be used with the recognition request. If * you specify a grammar, you must also use the `language_customization_id` parameter to specify the name of the * custom language model for which the grammar is defined. The service recognizes only strings that are recognized by * the specified grammar; it does not recognize other custom words from the model's words resource. See * [Grammars](https://cloud.ibm.com/docs/services/speech-to-text/input.html#grammars-input). * @param {boolean} [params.redaction] - If `true`, the service redacts, or masks, numeric data from final * transcripts. The feature redacts any number that has three or more consecutive digits by replacing each digit with * an `X` character. It is intended to redact sensitive numeric data, such as credit card numbers. By default, the * service performs no redaction. * * When you enable redaction, the service automatically enables smart formatting, regardless of whether you explicitly * disable that feature. To ensure maximum security, the service also disables keyword spotting (ignores the * `keywords` and `keywords_threshold` parameters) and returns only a single final transcript (forces the * `max_alternatives` parameter to be `1`). * * **Note:** Applies to US English, Japanese, and Korean transcription only. * * See [Numeric redaction](https://cloud.ibm.com/docs/services/speech-to-text/output.html#redaction). * @param {string} [params.content_type] - The format (MIME type) of the audio. For more information about specifying * an audio format, see **Audio formats (content types)** in the method description. * @param {Object} [params.headers] - Custom request headers * @param {Function} [callback] - The callback that handles the response. * @returns {Promise<any>|void} */ recognize(params: SpeechToTextV1.RecognizeParams, callback?: SpeechToTextV1.Callback<SpeechToTextV1.SpeechRecognitionResults>): Promise<any> | void; /************************* * asynchronous ************************/ /** * Check a job. * * Returns information about the specified job. The response always includes the status of the job and its creation * and update times. If the status is `completed`, the response includes the results of the recognition request. You * must use credentials for the instance of the service that owns a job to list information about it. * * You can use the method to retrieve the results of any job, regardless of whether it was submitted with a callback * URL and the `recognitions.completed_with_results` event, and you can retrieve the results multiple times for as * long as they remain available. Use the **Check jobs** method to request information about the most recent jobs * associated with the calling credentials. * * **See also:** [Checking the status and retrieving the results of a * job](https://cloud.ibm.com/docs/services/speech-to-text/async.html#job). * * @param {Object} params - The parameters to send to the service. * @param {string} params.id - The identifier of the asynchronous job that is to be used for the request. You must * make the request with credentials for the instance of the service that owns the job. * @param {Object} [params.headers] - Custom request headers * @param {Function} [callback] - The callback that handles the response. * @returns {Promise<any>|void} */ checkJob(params: SpeechToTextV1.CheckJobParams, callback?: SpeechToTextV1.Callback<SpeechToTextV1.RecognitionJob>): Promise<any> | void; /** * Check jobs. * * Returns the ID and status of the latest 100 outstanding jobs associated with the credentials with which it is * called. The method also returns the creation and update times of each job, and, if a job was created with a * callback URL and a user token, the user token for the job. To obtain the results for a job whose status is * `completed` or not one of the latest 100 outstanding jobs, use the **Check a job** method. A job and its results * remain available until you delete them with the **Delete a job** method or until the job's time to live expires, * whichever comes first. * * **See also:** [Checking the status of the latest * jobs](https://cloud.ibm.com/docs/services/speech-to-text/async.html#jobs). * * @param {Object} [params] - The parameters to send to the service. * @param {Object} [params.headers] - Custom request headers * @param {Function} [callback] - The callback that handles the response. * @returns {Promise<any>|void} */ checkJobs(params?: SpeechToTextV1.CheckJobsParams, callback?: SpeechToTextV1.Callback<SpeechToTextV1.RecognitionJobs>): Promise<any> | void; /** * Create a job. * * Creates a job for a new asynchronous recognition request. The job is owned by the instance of the service whose * credentials are used to create it. How you learn the status and results of a job depends on the parameters you * include with the job creation request: * * By callback notification: Include the `callback_url` parameter to specify a URL to which the service is to send * callback notifications when the status of the job changes. Optionally, you can also include the `events` and * `user_token` parameters to subscribe to specific events and to specify a string that is to be included with each * notification for the job. * * By polling the service: Omit the `callback_url`, `events`, and `user_token` parameters. You must then use the * **Check jobs** or **Check a job** methods to check the status of the job, using the latter to retrieve the results * when the job is complete. * * The two approaches are not mutually exclusive. You can poll the service for job status or obtain results from the * service manually even if you include a callback URL. In both cases, you can include the `results_ttl` parameter to * specify how long the results are to remain available after the job is complete. Using the HTTPS **Check a job** * method to retrieve results is more secure than receiving them via callback notification over HTTP because it * provides confidentiality in addition to authentication and data integrity. * * The method supports the same basic parameters as other HTTP and WebSocket recognition requests. It also supports * the following parameters specific to the asynchronous interface: * * `callback_url` * * `events` * * `user_token` * * `results_ttl` * * You can pass a maximum of 1 GB and a minimum of 100 bytes of audio with a request. The service automatically * detects the endianness of the incoming audio and, for audio that includes multiple channels, downmixes the audio to * one-channel mono during transcoding. The method returns only final results; to enable interim results, use the * WebSocket API. * * **See also:** [Creating a job](https://cloud.ibm.com/docs/services/speech-to-text/async.html#create). * * ### Streaming mode * * For requests to transcribe live audio as it becomes available, you must set the `Transfer-Encoding` header to * `chunked` to use streaming mode. In streaming mode, the service closes the connection (status code 408) if it does * not receive at least 15 seconds of audio (including silence) in any 30-second period. The service also closes the * connection (status code 400) if it detects no speech for `inactivity_timeout` seconds of streaming audio; use the * `inactivity_timeout` parameter to change the default of 30 seconds. * * **See also:** * * [Audio transmission](https://cloud.ibm.com/docs/services/speech-to-text/input.html#transmission) * * [Timeouts](https://cloud.ibm.com/docs/services/speech-to-text/input.html#timeouts) * * ### Audio formats (content types) * * The service accepts audio in the following formats (MIME types). * * For formats that are labeled **Required**, you must use the `Content-Type` header with the request to specify the * format of the audio. * * For all other formats, you can omit the `Content-Type` header or specify `application/octet-stream` with the * header to have the service automatically detect the format of the audio. (With the `curl` command, you can specify * either `\"Content-Type:\"` or `\"Content-Type: application/octet-stream\"`.) * * Where indicated, the format that you specify must include the sampling rate and can optionally include the number * of channels and the endianness of the audio. * * `audio/alaw` (**Required.** Specify the sampling rate (`rate`) of the audio.) * * `audio/basic` (**Required.** Use only with narrowband models.) * * `audio/flac` * * `audio/g729` (Use only with narrowband models.) * * `audio/l16` (**Required.** Specify the sampling rate (`rate`) and optionally the number of channels (`channels`) * and endianness (`endianness`) of the audio.) * * `audio/mp3` * * `audio/mpeg` * * `audio/mulaw` (**Required.** Specify the sampling rate (`rate`) of the audio.) * * `audio/ogg` (The service automatically detects the codec of the input audio.) * * `audio/ogg;codecs=opus` * * `audio/ogg;codecs=vorbis` * * `audio/wav` (Provide audio with a maximum of nine channels.) * * `audio/webm` (The service automatically detects the codec of the input audio.) * * `audio/webm;codecs=opus` * * `audio/webm;codecs=vorbis` * * The sampling rate of the audio must match the sampling rate of the model for the recognition request: for broadband * models, at least 16 kHz; for narrowband models, at least 8 kHz. If the sampling rate of the audio is higher than * the minimum required rate, the service down-samples the audio to the appropriate rate. If the sampling rate of the * audio is lower than the minimum required rate, the request fails. * * **See also:** [Audio formats](https://cloud.ibm.com/docs/services/speech-to-text/audio-formats.html). * * @param {Object} params - The parameters to send to the service. * @param {NodeJS.ReadableStream|FileObject|Buffer} params.audio - The audio to transcribe. * @param {string} [params.model] - The identifier of the model that is to be used for the recognition request. See * [Languages and models](https://cloud.ibm.com/docs/services/speech-to-text/models.html). * @param {string} [params.callback_url] - A URL to which callback notifications are to be sent. The URL must already * be successfully white-listed by using the **Register a callback** method. You can include the same callback URL * with any number of job creation requests. Omit the parameter to poll the service for job completion and results. * * Use the `user_token` parameter to specify a unique user-specified string with each job to differentiate the * callback notifications for the jobs. * @param {string} [params.events] - If the job includes a callback URL, a comma-separated list of notification events * to which to subscribe. Valid events are * * `recognitions.started` generates a callback notification when the service begins to process the job. * * `recognitions.completed` generates a callback notification when the job is complete. You must use the **Check a * job** method to retrieve the results before they time out or are deleted. * * `recognitions.completed_with_results` generates a callback notification when the job is complete. The * notification includes the results of the request. * * `recognitions.failed` generates a callback notification if the service experiences an error while processing the * job. * * The `recognitions.completed` and `recognitions.completed_with_results` events are incompatible. You can specify * only of the two events. * * If the job includes a callback URL, omit the parameter to subscribe to the default events: `recognitions.started`, * `recognitions.completed`, and `recognitions.failed`. If the job does not include a callback URL, omit the * parameter. * @param {string} [params.user_token] - If the job includes a callback URL, a user-specified string that the service * is to include with each callback notification for the job; the token allows the user to maintain an internal * mapping between jobs and notification events. If the job does not include a callback URL, omit the parameter. * @param {number} [params.results_ttl] - The number of minutes for which the results are to be available after the * job has finished. If not delivered via a callback, the results must be retrieved within this time. Omit the * parameter to use a time to live of one week. The parameter is valid with or without a callback URL. * @param {string} [params.language_customization_id] - The customization ID (GUID) of a custom language model that is * to be used with the recognition request. The base model of the specified custom language model must match the model * specified with the `model` parameter. You must make the request with credentials for the instance of the service * that owns the custom model. By default, no custom language model is used. See [Custom * models](https://cloud.ibm.com/docs/services/speech-to-text/input.html#custom-input). * * **Note:** Use this parameter instead of the deprecated `customization_id` parameter. * @param {string} [params.acoustic_customization_id] - The customization ID (GUID) of a custom acoustic model that is * to be used with the recognition request. The base model of the specified custom acoustic model must match the model * specified with the `model` parameter. You must make the request with credentials for the instance of the service * that owns the custom model. By default, no custom acoustic model is used. See [Custom * models](https://cloud.ibm.com/docs/services/speech-to-text/input.html#custom-input). * @param {string} [params.base_model_version] - The version of the specified base model that is to be used with * recognition request. Multiple versions of a base model can exist when a model is updated for internal improvements. * The parameter is intended primarily for use with custom models that have been upgraded for a new base model. The * default value depends on whether the parameter is used with or without a custom model. See [Base model * version](https://cloud.ibm.com/docs/services/speech-to-text/input.html#version). * @param {number} [params.customization_weight] - If you specify the customization ID (GUID) of a custom language * model with the recognition request, the customization weight tells the service how much weight to give to words * from the custom language model compared to those from the base model for the current request. * * Specify a value between 0.0 and 1.0. Unless a different customization weight was specified for the custom model * when it was trained, the default value is 0.3. A customization weight that you specify overrides a weight that was * specified when the custom model was trained. * * The default value yields the best performance in general. Assign a higher value if your audio makes frequent use of * OOV words from the custom model. Use caution when setting the weight: a higher value can improve the accuracy of * phrases from the custom model's domain, but it can negatively affect performance on non-domain phrases. * * See [Custom models](https://cloud.ibm.com/docs/services/speech-to-text/input.html#custom-input). * @param {number} [params.inactivity_timeout] - The time in seconds after which, if only silence (no speech) is * detected in streaming audio, the connection is closed with a 400 error. The parameter is useful for stopping audio * submission from a live microphone when a user simply walks away. Use `-1` for infinity. See [Inactivity * timeout](https://cloud.ibm.com/docs/services/speech-to-text/input.html#timeouts-inactivity). * @param {string[]} [params.keywords] - An array of keyword strings to spot in the audio. Each keyword string can * include one or more string tokens. Keywords are spotted only in the final results, not in interim hypotheses. If * you specify any keywords, you must also specify a keywords threshold. You can spot a maximum of 1000 keywords. Omit * the parameter or specify an empty array if you do not need to spot keywords. See [Keyword * spotting](https://cloud.ibm.com/docs/services/speech-to-text/output.html#keyword_spotting). * @param {number} [params.keywords_threshold] - A confidence value that is the lower bound for spotting a keyword. A * word is considered to match a keyword if its confidence is greater than or equal to the threshold. Specify a * probability between 0.0 and 1.0. If you specify a threshold, you must also specify one or more keywords. The * service performs no keyword spotting if you omit either parameter. See [Keyword * spotting](https://cloud.ibm.com/docs/services/speech-to-text/output.html#keyword_spotting). * @param {number} [params.max_alternatives] - The maximum number of alternative transcripts that the service is to * return. By default, the service returns a single transcript. If you specify a value of `0`, the service uses the * default value, `1`. See [Maximum * alternatives](https://cloud.ibm.com/docs/services/speech-to-text/output.html#max_alternatives). * @param {number} [params.word_alternatives_threshold] - A confidence value that is the lower bound for identifying a * hypothesis as a possible word alternative (also known as "Confusion Networks"). An alternative word is considered * if its confidence is greater than or equal to the threshold. Specify a probability between 0.0 and 1.0. By default, * the service computes no alternative words. See [Word * alternatives](https://cloud.ibm.com/docs/services/speech-to-text/output.html#word_alternatives). * @param {boolean} [params.word_confidence] - If `true`, the service returns a confidence measure in the range of 0.0 * to 1.0 for each word. By default, the service returns no word confidence scores. See [Word * confidence](https://cloud.ibm.com/docs/services/speech-to-text/output.html#word_confidence). * @param {boolean} [params.timestamps] - If `true`, the service returns time alignment for each word. By default, no * timestamps are returned. See [Word * timestamps](https://cloud.ibm.com/docs/services/speech-to-text/output.html#word_timestamps). * @param {boolean} [params.profanity_filter] - If `true`, the service filters profanity from all output except for * keyword results by replacing inappropriate words with a series of asterisks. Set the parameter to `false` to return * results with no censoring. Applies to US English transcription only. See [Profanity * filtering](https://cloud.ibm.com/docs/services/speech-to-text/output.html#profanity_filter). * @param {boolean} [params.smart_formatting] - If `true`, the service converts dates, times, series of digits and * numbers, phone numbers, currency values, and internet addresses into more readable, conventional representations in * the final transcript of a recognition request. For US English, the service also converts certain keyword strings to * punctuation symbols. By default, the service performs no smart formatting. * * **Note:** Applies to US English, Japanese, and Spanish transcription only. * * See [Smart formatting](https://cloud.ibm.com/docs/services/speech-to-text/output.html#smart_formatting). * @param {boolean} [params.speaker_labels] - If `true`, the response includes labels that identify which words were * spoken by which participants in a multi-person exchange. By default, the service returns no speaker labels. Setting * `speaker_labels` to `true` forces the `timestamps` parameter to be `true`, regardless of whether you specify * `false` for the parameter. * * **Note:** Applies to US English, Japanese, and Spanish transcription only. To determine whether a language model * supports speaker labels, you can also use the **Get a model** method and check that the attribute `speaker_labels` * is set to `true`. * * See [Speaker labels](https://cloud.ibm.com/docs/services/speech-to-text/output.html#speaker_labels). * @param {string} [params.customization_id] - **Deprecated.** Use the `language_customization_id` parameter to * specify the customization ID (GUID) of a custom language model that is to be used with the recognition request. Do * not specify both parameters with a request. * @param {string} [params.grammar_name] - The name of a grammar that is to be used with the recognition request. If * you specify a grammar, you must also use the `language_customization_id` parameter to specify the name of the * custom language model for which the grammar is defined. The service recognizes only strings that are recognized by * the specified grammar; it does not recognize other custom words from the model's words resource. See * [Grammars](https://cloud.ibm.com/docs/services/speech-to-text/input.html#grammars-input). * @param {boolean} [params.redaction] - If `true`, the service redacts, or masks, numeric data from final * transcripts. The feature redacts any number that has three or more consecutive digits by replacing each digit with * an `X` character. It is intended to redact sensitive numeric data, such as credit card numbers. By default, the * service performs no redaction. * * When you enable redaction, the service automatically enables smart formatting, regardless of whether you explicitly * disable that feature. To ensure maximum security, the service also disables keyword spotting (ignores the * `keywords` and `keywords_threshold` parameters) and returns only a single final transcript (forces the * `max_alternatives` parameter to be `1`). * * **Note:** Applies to US English, Japanese, and Korean transcription only. * * See [Numeric redaction](https://cloud.ibm.com/docs/services/speech-to-text/output.html#redaction). * @param {string} [params.content_type] - The format (MIME type) of the audio. For more information about specifying * an audio format, see **Audio formats (content types)** in the method description. * @param {Object} [params.headers] - Custom request headers * @param {Function} [callback] - The callback that handles the response. * @returns {Promise<any>|void} */ createJob(params: SpeechToTextV1.CreateJobParams, callback?: SpeechToTextV1.Callback<SpeechToTextV1.RecognitionJob>): Promise<any> | void; /** * Delete a job. * * Deletes the specified job. You cannot delete a job that the service is actively processing. Once you delete a job, * its results are no longer available. The service automatically deletes a job and its results when the time to live * for the results expires. You must use credentials for the instance of the service that owns a job to delete it. * * **See also:** [Deleting a job](https://cloud.ibm.com/docs/services/speech-to-text/async.html#delete-async). * * @param {Object} params - The parameters to send to the service. * @param {string} params.id - The identifier of the asynchronous job that is to be used for the request. You must * make the request with credentials for the instance of the service that owns the job. * @param {Object} [params.headers] - Custom request headers * @param {Function} [callback] - The callback that handles the response. * @returns {Promise<any>|void} */ deleteJob(params: SpeechToTextV1.DeleteJobParams, callback?: SpeechToTextV1.Callback<SpeechToTextV1.Empty>): Promise<any> | void; /** * Register a callback. * * Registers a callback URL with the service for use with subsequent asynchronous recognition requests. The service * attempts to register, or white-list, the callback URL if it is not already registered by sending a `GET` request to * the callback URL. The service passes a random alphanumeric challenge string via the `challenge_string` parameter of * the request. The request includes an `Accept` header that specifies `text/plain` as the required response type. * * To be registered successfully, the callback URL must respond to the `GET` request from the service. The response * must send status code 200 and must include the challenge string in its body. Set the `Content-Type` response header * to `text/plain`. Upon receiving this response, the service responds to the original registration request with * response code 201. * * The service sends only a single `GET` request to the callback URL. If the service does not receive a reply with a * response code of 200 and a body that echoes the challenge string sent by the service within five seconds, it does * not white-list the URL; it instead sends status code 400 in response to the **Register a callback** request. If the * requested callback URL is already white-listed, the service responds to the initial registration request with * response code 200. * * If you specify a user secret with the request, the service uses it as a key to calculate an HMAC-SHA1 signature of * the challenge string in its response to the `POST` request. It sends this signature in the `X-Callback-Signature` * header of its `GET` request to the URL during registration. It also uses the secret to calculate a signature over * the payload of every callback notification that uses the URL. The signature provides authentication and data * integrity for HTTP communications. * * After you successfully register a callback URL, you can use it with an indefinite number of recognition requests. * You can register a maximum of 20 callback URLS in a one-hour span of time. * * **See also:** [Registering a callback URL](https://cloud.ibm.com/docs/services/speech-to-text/async.html#register). * * @param {Object} params - The parameters to send to the service. * @param {string} params.callback_url - An HTTP or HTTPS URL to which callback notifications are to be sent. To be * white-listed, the URL must successfully echo the challenge string during URL verification. During verification, the * client can also check the signature that the service sends in the `X-Callback-Signature` header to verify the * origin of the request. * @param {string} [params.user_secret] - A user-specified string that the service uses to generate the HMAC-SHA1 * signature that it sends via the `X-Callback-Signature` header. The service includes the header during URL * verification and with every notification sent to the callback URL. It calculates the signature over the payload of * the notification. If you omit the parameter, the service does not send the header. * @param {Object} [params.headers] - Custom request headers * @param {Function} [callback] - The callback that handles the response. * @returns {Promise<any>|void} */ registerCallback(params: SpeechToTextV1.RegisterCallbackParams, callback?: SpeechToTextV1.Callback<SpeechToTextV1.RegisterStatus>): Promise<any> | void; /** * Unregister a callback. * * Unregisters a callback URL that was previously white-listed with a **Register a callback** request for use with the * asynchronous interface. Once unregistered, the URL can no longer be used with asynchronous recognition requests. * * **See also:** [Unregistering a callback * URL](https://cloud.ibm.com/docs/services/speech-to-text/async.html#unregister). * * @param {Object} params - The parameters to send to the service. * @param {string} params.callback_url - The callback URL that is to be unregistered. * @param {Object} [params.headers] - Custom request headers * @param {Function} [callback] - The callback that handles the response. * @returns {Promise<any>|void} */ unregisterCallback(params: SpeechToTextV1.UnregisterCallbackParams, callback?: SpeechToTextV1.Callback<SpeechToTextV1.Empty>): Promise<any> | void; /************************* * customLanguageModels ************************/ /** * Create a custom language model. * * Creates a new custom language model for a specified base model. The custom language model can be used only with the * base model for which it is created. The model is owned by the instance of the service whose credentials are used to * create it. * * **See also:** [Create a custom language * model](https://cloud.ibm.com/docs/services/speech-to-text/language-create.html#createModel-language). * * @param {Object} params - The parameters to send to the service. * @param {string} params.name - A user-defined name for the new custom language model. Use a name that is unique * among all custom language models that you own. Use a localized name that matches the language of the custom model. * Use a name that describes the domain of the custom model, such as `Medical custom model` or `Legal custom model`. * @param {string} params.base_model_name - The name of the base language model that is to be customized by the new * custom language model. The new custom model can be