@speechmatics/real-time-client
Version:
Client for the Speechmatics real-time API
391 lines (363 loc) • 11.4 kB
JavaScript
'use strict';
var ws = require('ws');
var typescriptEventTarget = require('typescript-event-target');
var __defProp = Object.defineProperty;
var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
var __publicField = (obj, key, value) => __defNormalProp(obj, typeof key !== "symbol" ? key + "" : key, value);
class SocketStateChangeEvent extends Event {
constructor(socketState) {
super("socketStateChange");
this.socketState = socketState;
}
}
class ReceiveMessageEvent extends Event {
constructor(data) {
super("receiveMessage");
this.data = data;
}
}
class SendMessageEvent extends Event {
constructor(data) {
super("sendMessage");
this.data = data;
}
}
class RealtimeClient extends typescriptEventTarget.TypedEventTarget {
constructor(config = {}) {
super();
__publicField(this, "url");
__publicField(this, "appId");
__publicField(this, "enableLegacy");
__publicField(this, "socket");
// Track the last AudioAdded sequence number, used when stopping transcription to avoid missing audio
// https://docs.speechmatics.com/rt-api-ref#audioadded
__publicField(this, "lastAudioAddedSeqNo", 0);
this.url = config.url ?? "wss://eu2.rt.speechmatics.com/v2";
this.appId = config.appId;
this.enableLegacy = config.enableLegacy ?? false;
}
get socketState() {
if (!this.socket) return void 0;
return {
[ws.WebSocket.CONNECTING]: "connecting",
[ws.WebSocket.OPEN]: "open",
[ws.WebSocket.CLOSING]: "closing",
[ws.WebSocket.CLOSED]: "closed"
}[this.socket.readyState];
}
async connect(jwt) {
return new Promise((resolve, reject) => {
const url = new URL(this.url);
url.searchParams.append("jwt", jwt);
if (this.appId) {
url.searchParams.append("sm-app", this.appId);
}
if (this.enableLegacy) {
url.searchParams.append("sm-enable-legacy-rt", "true");
}
this.socket = new ws.WebSocket(url.toString());
this.dispatchTypedEvent(
"socketStateChange",
new SocketStateChangeEvent(this.socketState)
);
this.socket.addEventListener(
"open",
() => {
resolve();
},
{ once: true }
);
this.socket.addEventListener("error", (error) => {
this.dispatchTypedEvent(
"socketStateChange",
new SocketStateChangeEvent(this.socketState)
);
reject(error);
});
this.socket.addEventListener("close", () => {
this.dispatchTypedEvent(
"socketStateChange",
new SocketStateChangeEvent(this.socketState)
);
});
this.socket.addEventListener("message", (socketMessage) => {
const data = JSON.parse(socketMessage.data);
if (!dataIsRealtimeTranscriptionMessage(data)) {
console.warn(
"message does not look like a valid message: ",
JSON.stringify(data)
);
return;
}
if (data.message === "AudioAdded") {
this.lastAudioAddedSeqNo = data.seq_no;
}
this.dispatchTypedEvent(
"receiveMessage",
new ReceiveMessageEvent(data)
);
});
});
}
sendMessage(message) {
if (!this.socket) {
throw new SpeechmaticsRealtimeError("Client socket not initialized");
}
this.socket.send(JSON.stringify(message));
this.dispatchTypedEvent("sendMessage", new SendMessageEvent(message));
}
sendAudio(data) {
if (!this.socket || this.socket.readyState !== this.socket.OPEN) {
throw new SpeechmaticsRealtimeError("Socket not ready to receive audio");
}
this.socket.send(data);
}
async start(jwt, config) {
await this.connect(jwt);
const waitForRecognitionStarted = new Promise(
(resolve, reject) => {
this.addEventListener("receiveMessage", ({ data }) => {
if (data.message === "RecognitionStarted") {
resolve(data);
} else if (data.message === "Error") {
reject(new Error(data.type));
}
});
const startRecognitionMessage = {
audio_format: defaultAudioFormat,
...config,
message: "StartRecognition"
};
this.sendMessage(startRecognitionMessage);
}
);
return Promise.race([
waitForRecognitionStarted,
rejectAfter(
RT_CLIENT_RESPONSE_TIMEOUT_MS,
"RecognitionStarted"
)
]);
}
/** Sends an `"EndOfStream"` message, resolving if acknowledged by an `"EndOfTranscript"` from server, rejecting if not received */
async stopRecognition({ noTimeout } = {}) {
const waitForEndOfTranscript = new Promise((resolve) => {
this.addEventListener("receiveMessage", ({ data }) => {
if (data.message === "EndOfTranscript") {
this.socket?.close();
resolve();
}
});
this.sendMessage({
message: "EndOfStream",
last_seq_no: this.lastAudioAddedSeqNo
});
});
if (noTimeout) {
return;
}
return Promise.race([
waitForEndOfTranscript,
rejectAfter(RT_CLIENT_RESPONSE_TIMEOUT_MS, "EndOfTranscript")
]);
}
}
function dataIsRealtimeTranscriptionMessage(data) {
if (typeof data !== "object" || data === null) {
return false;
}
if (!("message" in data)) {
return false;
}
if (typeof data.message !== "string") {
return false;
}
return true;
}
const defaultAudioFormat = {
type: "file"
};
const RT_CLIENT_RESPONSE_TIMEOUT_MS = 1e4;
class SpeechmaticsRealtimeError extends Error {
constructor(message, options) {
super(message, options);
this.name = "SpeechmaticsRealtimeError";
}
}
function rejectAfter(timeoutMs, key) {
return new Promise((_, reject) => {
setTimeout(
() => reject(
new SpeechmaticsRealtimeError(
`Timed out after ${timeoutMs}ms waiting for ${key}`
)
),
timeoutMs
);
});
}
const AddPartialTranscriptMessageEnum = {
AddPartialTranscript: "AddPartialTranscript"
};
const AddPartialTranslationMessageEnum = {
AddPartialTranslation: "AddPartialTranslation"
};
const AddTranscriptMessageEnum = {
AddTranscript: "AddTranscript"
};
const AddTranslationMessageEnum = {
AddTranslation: "AddTranslation"
};
const AudioAddedMessageEnum = {
AudioAdded: "AudioAdded"
};
const AudioEventEndedMessageEnum = {
AudioEventEnded: "AudioEventEnded"
};
const AudioEventStartedMessageEnum = {
AudioEventStarted: "AudioEventStarted"
};
const AudioFormatFileTypeEnum = {
File: "file"
};
const AudioFormatRawTypeEnum = {
Raw: "raw"
};
const AudioFormatRawEncodingEnum = {
PcmF32le: "pcm_f32le",
PcmS16le: "pcm_s16le",
Mulaw: "mulaw"
};
const DiarizationConfig = {
None: "none",
Speaker: "speaker"
};
const EndOfStreamMessageEnum = {
EndOfStream: "EndOfStream"
};
const EndOfTranscriptMessageEnum = {
EndOfTranscript: "EndOfTranscript"
};
const InfoMessageEnum = {
Info: "Info"
};
const InfoTypeEnum = {
RecognitionQuality: "recognition_quality",
ModelRedirect: "model_redirect",
Deprecated: "deprecated",
ConcurrentSessionUsage: "concurrent_session_usage"
};
const MaxDelayModeConfig = {
Flexible: "flexible",
Fixed: "fixed"
};
const ModelErrorMessageEnum = {
Error: "Error"
};
const ModelErrorTypeEnum = {
InvalidMessage: "invalid_message",
InvalidModel: "invalid_model",
InvalidConfig: "invalid_config",
InvalidAudioType: "invalid_audio_type",
NotAuthorised: "not_authorised",
InsufficientFunds: "insufficient_funds",
NotAllowed: "not_allowed",
JobError: "job_error",
DataError: "data_error",
BufferError: "buffer_error",
ProtocolError: "protocol_error",
TimelimitExceeded: "timelimit_exceeded",
QuotaExceeded: "quota_exceeded",
UnknownError: "unknown_error"
};
const OperatingPoint = {
Standard: "standard",
Enhanced: "enhanced"
};
const RealtimeMessageMessageEnum = {
StartRecognition: "StartRecognition",
AddAudio: "AddAudio",
EndOfStream: "EndOfStream",
SetRecognitionConfig: "SetRecognitionConfig",
RecognitionStarted: "RecognitionStarted",
AudioAdded: "AudioAdded",
AddPartialTranscript: "AddPartialTranscript",
AddTranscript: "AddTranscript",
AddPartialTranslation: "AddPartialTranslation",
AddTranslation: "AddTranslation",
EndOfTranscript: "EndOfTranscript",
AudioEventStarted: "AudioEventStarted",
AudioEventEnded: "AudioEventEnded",
Info: "Info",
Warning: "Warning",
Error: "Error"
};
const RecognitionDisplayDirectionEnum = {
Ltr: "ltr",
Rtl: "rtl"
};
const RecognitionResultTypeEnum = {
Word: "word",
Punctuation: "punctuation"
};
const RecognitionResultAttachesToEnum = {
Next: "next",
Previous: "previous",
None: "none",
Both: "both"
};
const RecognitionStartedMessageEnum = {
RecognitionStarted: "RecognitionStarted"
};
const SetRecognitionConfigMessageEnum = {
SetRecognitionConfig: "SetRecognitionConfig"
};
const StartRecognitionMessageEnum = {
StartRecognition: "StartRecognition"
};
const WarningMessageEnum = {
Warning: "Warning"
};
const WarningTypeEnum = {
DurationLimitExceeded: "duration_limit_exceeded"
};
async function getFeatures(region = "eu2") {
const resp = await fetch(
`https://${region}.rt.speechmatics.com/v1/discovery/features`
);
return resp.json();
}
exports.AddPartialTranscriptMessageEnum = AddPartialTranscriptMessageEnum;
exports.AddPartialTranslationMessageEnum = AddPartialTranslationMessageEnum;
exports.AddTranscriptMessageEnum = AddTranscriptMessageEnum;
exports.AddTranslationMessageEnum = AddTranslationMessageEnum;
exports.AudioAddedMessageEnum = AudioAddedMessageEnum;
exports.AudioEventEndedMessageEnum = AudioEventEndedMessageEnum;
exports.AudioEventStartedMessageEnum = AudioEventStartedMessageEnum;
exports.AudioFormatFileTypeEnum = AudioFormatFileTypeEnum;
exports.AudioFormatRawEncodingEnum = AudioFormatRawEncodingEnum;
exports.AudioFormatRawTypeEnum = AudioFormatRawTypeEnum;
exports.DiarizationConfig = DiarizationConfig;
exports.EndOfStreamMessageEnum = EndOfStreamMessageEnum;
exports.EndOfTranscriptMessageEnum = EndOfTranscriptMessageEnum;
exports.InfoMessageEnum = InfoMessageEnum;
exports.InfoTypeEnum = InfoTypeEnum;
exports.MaxDelayModeConfig = MaxDelayModeConfig;
exports.ModelErrorMessageEnum = ModelErrorMessageEnum;
exports.ModelErrorTypeEnum = ModelErrorTypeEnum;
exports.OperatingPoint = OperatingPoint;
exports.RealtimeClient = RealtimeClient;
exports.RealtimeMessageMessageEnum = RealtimeMessageMessageEnum;
exports.ReceiveMessageEvent = ReceiveMessageEvent;
exports.RecognitionDisplayDirectionEnum = RecognitionDisplayDirectionEnum;
exports.RecognitionResultAttachesToEnum = RecognitionResultAttachesToEnum;
exports.RecognitionResultTypeEnum = RecognitionResultTypeEnum;
exports.RecognitionStartedMessageEnum = RecognitionStartedMessageEnum;
exports.SendMessageEvent = SendMessageEvent;
exports.SetRecognitionConfigMessageEnum = SetRecognitionConfigMessageEnum;
exports.SocketStateChangeEvent = SocketStateChangeEvent;
exports.SpeechmaticsRealtimeError = SpeechmaticsRealtimeError;
exports.StartRecognitionMessageEnum = StartRecognitionMessageEnum;
exports.WarningMessageEnum = WarningMessageEnum;
exports.WarningTypeEnum = WarningTypeEnum;
exports.getFeatures = getFeatures;