assemblyai
Version:
The AssemblyAI JavaScript SDK provides an easy-to-use interface for interacting with the AssemblyAI API, which supports async and real-time transcription, as well as the latest LeMUR models.
755 lines (739 loc) • 27.6 kB
JavaScript
'use strict';
var web = require('stream/web');
var ws = require('ws');
var fs = require('fs');
var stream = require('stream');
const DEFAULT_FETCH_INIT = {
cache: "no-store",
};
const buildUserAgent = (userAgent) => defaultUserAgentString +
(userAgent === false
? ""
: " AssemblyAI/1.0 (" +
Object.entries({ ...defaultUserAgent, ...userAgent })
.map(([key, item]) => item ? `${key}=${item.name}/${item.version}` : "")
.join(" ") +
")");
let defaultUserAgentString = "";
if (typeof navigator !== "undefined" && navigator.userAgent) {
defaultUserAgentString += navigator.userAgent;
}
const defaultUserAgent = {
sdk: { name: "JavaScript", version: "4.11.0" },
};
if (typeof process !== "undefined") {
if (process.versions.node && defaultUserAgentString.indexOf("Node") === -1) {
defaultUserAgent.runtime_env = {
name: "Node",
version: process.versions.node,
};
}
if (process.versions.bun && defaultUserAgentString.indexOf("Bun") === -1) {
defaultUserAgent.runtime_env = {
name: "Bun",
version: process.versions.bun,
};
}
}
if (typeof Deno !== "undefined") {
if (process.versions.bun && defaultUserAgentString.indexOf("Deno") === -1) {
defaultUserAgent.runtime_env = { name: "Deno", version: Deno.version.deno };
}
}
/**
* Base class for services that communicate with the API.
*/
class BaseService {
/**
* Create a new service.
* @param params - The parameters to use for the service.
*/
constructor(params) {
this.params = params;
if (params.userAgent === false) {
this.userAgent = undefined;
}
else {
this.userAgent = buildUserAgent(params.userAgent || {});
}
}
async fetch(input, init) {
init = { ...DEFAULT_FETCH_INIT, ...init };
let headers = {
Authorization: this.params.apiKey,
"Content-Type": "application/json",
};
if (DEFAULT_FETCH_INIT?.headers)
headers = { ...headers, ...DEFAULT_FETCH_INIT.headers };
if (init?.headers)
headers = { ...headers, ...init.headers };
if (this.userAgent) {
headers["User-Agent"] = this.userAgent;
}
init.headers = headers;
if (!input.startsWith("http"))
input = this.params.baseUrl + input;
const response = await fetch(input, init);
if (response.status >= 400) {
let json;
const text = await response.text();
if (text) {
try {
json = JSON.parse(text);
}
catch {
/* empty */
}
if (json?.error)
throw new Error(json.error);
throw new Error(text);
}
throw new Error(`HTTP Error: ${response.status} ${response.statusText}`);
}
return response;
}
async fetchJson(input, init) {
const response = await this.fetch(input, init);
return response.json();
}
}
class LemurService extends BaseService {
summary(params) {
return this.fetchJson("/lemur/v3/generate/summary", {
method: "POST",
body: JSON.stringify(params),
});
}
questionAnswer(params) {
return this.fetchJson("/lemur/v3/generate/question-answer", {
method: "POST",
body: JSON.stringify(params),
});
}
actionItems(params) {
return this.fetchJson("/lemur/v3/generate/action-items", {
method: "POST",
body: JSON.stringify(params),
});
}
task(params) {
return this.fetchJson("/lemur/v3/generate/task", {
method: "POST",
body: JSON.stringify(params),
});
}
getResponse(id) {
return this.fetchJson(`/lemur/v3/${id}`);
}
/**
* Delete the data for a previously submitted LeMUR request.
* @param id - ID of the LeMUR request
*/
purgeRequestData(id) {
return this.fetchJson(`/lemur/v3/${id}`, {
method: "DELETE",
});
}
}
const factory = (url, params) => new ws(url, params);
const RealtimeErrorType = {
BadSampleRate: 4000,
AuthFailed: 4001,
/**
* @deprecated Use InsufficientFunds or FreeTierUser instead
*/
InsufficientFundsOrFreeAccount: 4002,
InsufficientFunds: 4002,
FreeTierUser: 4003,
NonexistentSessionId: 4004,
SessionExpired: 4008,
ClosedSession: 4010,
RateLimited: 4029,
UniqueSessionViolation: 4030,
SessionTimeout: 4031,
AudioTooShort: 4032,
AudioTooLong: 4033,
AudioTooSmallToTranscode: 4034,
/**
* @deprecated Don't use
*/
BadJson: 4100,
BadSchema: 4101,
TooManyStreams: 4102,
Reconnected: 4103,
/**
* @deprecated Don't use
*/
ReconnectAttemptsExhausted: 1013,
WordBoostParameterParsingFailed: 4104,
};
const RealtimeErrorMessages = {
[RealtimeErrorType.BadSampleRate]: "Sample rate must be a positive integer",
[RealtimeErrorType.AuthFailed]: "Not Authorized",
[RealtimeErrorType.InsufficientFunds]: "Insufficient funds",
[RealtimeErrorType.FreeTierUser]: "This feature is paid-only and requires you to add a credit card. Please visit https://app.assemblyai.com/ to add a credit card to your account.",
[RealtimeErrorType.NonexistentSessionId]: "Session ID does not exist",
[RealtimeErrorType.SessionExpired]: "Session has expired",
[RealtimeErrorType.ClosedSession]: "Session is closed",
[RealtimeErrorType.RateLimited]: "Rate limited",
[RealtimeErrorType.UniqueSessionViolation]: "Unique session violation",
[RealtimeErrorType.SessionTimeout]: "Session Timeout",
[RealtimeErrorType.AudioTooShort]: "Audio too short",
[RealtimeErrorType.AudioTooLong]: "Audio too long",
[RealtimeErrorType.AudioTooSmallToTranscode]: "Audio too small to transcode",
[RealtimeErrorType.BadJson]: "Bad JSON",
[RealtimeErrorType.BadSchema]: "Bad schema",
[RealtimeErrorType.TooManyStreams]: "Too many streams",
[RealtimeErrorType.Reconnected]: "This session has been reconnected. This WebSocket is no longer valid.",
[RealtimeErrorType.ReconnectAttemptsExhausted]: "Reconnect attempts exhausted",
[RealtimeErrorType.WordBoostParameterParsingFailed]: "Could not parse word boost parameter",
};
class RealtimeError extends Error {
}
const defaultRealtimeUrl = "wss://api.assemblyai.com/v2/realtime/ws";
const forceEndOfUtteranceMessage = `{"force_end_utterance":true}`;
const terminateSessionMessage = `{"terminate_session":true}`;
/**
* RealtimeTranscriber connects to the Streaming Speech-to-Text API and lets you transcribe audio in real-time.
*/
class RealtimeTranscriber {
/**
* Create a new RealtimeTranscriber.
* @param params - Parameters to configure the RealtimeTranscriber
*/
constructor(params) {
this.listeners = {};
this.realtimeUrl = params.realtimeUrl ?? defaultRealtimeUrl;
this.sampleRate = params.sampleRate ?? 16_000;
this.wordBoost = params.wordBoost;
this.encoding = params.encoding;
this.endUtteranceSilenceThreshold = params.endUtteranceSilenceThreshold;
this.disablePartialTranscripts = params.disablePartialTranscripts;
if ("token" in params && params.token)
this.token = params.token;
if ("apiKey" in params && params.apiKey)
this.apiKey = params.apiKey;
if (!(this.token || this.apiKey)) {
throw new Error("API key or temporary token is required.");
}
}
connectionUrl() {
const url = new URL(this.realtimeUrl);
if (url.protocol !== "wss:") {
throw new Error("Invalid protocol, must be wss");
}
const searchParams = new URLSearchParams();
if (this.token) {
searchParams.set("token", this.token);
}
searchParams.set("sample_rate", this.sampleRate.toString());
if (this.wordBoost && this.wordBoost.length > 0) {
searchParams.set("word_boost", JSON.stringify(this.wordBoost));
}
if (this.encoding) {
searchParams.set("encoding", this.encoding);
}
searchParams.set("enable_extra_session_information", "true");
if (this.disablePartialTranscripts) {
searchParams.set("disable_partial_transcripts", this.disablePartialTranscripts.toString());
}
url.search = searchParams.toString();
return url;
}
/**
* Add a listener for an event.
* @param event - The event to listen for.
* @param listener - The function to call when the event is emitted.
*/
// eslint-disable-next-line @typescript-eslint/no-explicit-any
on(event, listener) {
this.listeners[event] = listener;
}
/**
* Connect to the server and begin a new session.
* @returns A promise that resolves when the connection is established and the session begins.
*/
connect() {
return new Promise((resolve) => {
if (this.socket) {
throw new Error("Already connected");
}
const url = this.connectionUrl();
if (this.token) {
this.socket = factory(url.toString());
}
else {
this.socket = factory(url.toString(), {
headers: { Authorization: this.apiKey },
});
}
this.socket.binaryType = "arraybuffer";
this.socket.onopen = () => {
if (this.endUtteranceSilenceThreshold === undefined ||
this.endUtteranceSilenceThreshold === null) {
return;
}
this.configureEndUtteranceSilenceThreshold(this.endUtteranceSilenceThreshold);
};
this.socket.onclose = ({ code, reason }) => {
if (!reason) {
if (code in RealtimeErrorMessages) {
reason = RealtimeErrorMessages[code];
}
}
this.listeners.close?.(code, reason);
};
this.socket.onerror = (event) => {
if (event.error)
this.listeners.error?.(event.error);
else
this.listeners.error?.(new Error(event.message));
};
this.socket.onmessage = ({ data }) => {
const message = JSON.parse(data.toString());
if ("error" in message) {
this.listeners.error?.(new RealtimeError(message.error));
return;
}
switch (message.message_type) {
case "SessionBegins": {
const openObject = {
sessionId: message.session_id,
expiresAt: new Date(message.expires_at),
};
resolve(openObject);
this.listeners.open?.(openObject);
break;
}
case "PartialTranscript": {
// message.created is actually a string when coming from the socket
message.created = new Date(message.created);
this.listeners.transcript?.(message);
this.listeners["transcript.partial"]?.(message);
break;
}
case "FinalTranscript": {
// message.created is actually a string when coming from the socket
message.created = new Date(message.created);
this.listeners.transcript?.(message);
this.listeners["transcript.final"]?.(message);
break;
}
case "SessionInformation": {
this.listeners.session_information?.(message);
break;
}
case "SessionTerminated": {
this.sessionTerminatedResolve?.();
break;
}
}
};
});
}
/**
* Send audio data to the server.
* @param audio - The audio data to send to the server.
*/
sendAudio(audio) {
this.send(audio);
}
/**
* Create a writable stream that can be used to send audio data to the server.
* @returns A writable stream that can be used to send audio data to the server.
*/
stream() {
return new web.WritableStream({
write: (chunk) => {
this.sendAudio(chunk);
},
});
}
/**
* Manually end an utterance
*/
forceEndUtterance() {
this.send(forceEndOfUtteranceMessage);
}
/**
* Configure the threshold for how long to wait before ending an utterance. Default is 700ms.
* @param threshold - The duration of the end utterance silence threshold in milliseconds.
* This value must be an integer between 0 and 20_000.
*/
configureEndUtteranceSilenceThreshold(threshold) {
this.send(`{"end_utterance_silence_threshold":${threshold}}`);
}
send(data) {
if (!this.socket || this.socket.readyState !== this.socket.OPEN) {
throw new Error("Socket is not open for communication");
}
this.socket.send(data);
}
/**
* Close the connection to the server.
* @param waitForSessionTermination - If true, the method will wait for the session to be terminated before closing the connection.
* While waiting for the session to be terminated, you will receive the final transcript and session information.
*/
async close(waitForSessionTermination = true) {
if (this.socket) {
if (this.socket.readyState === this.socket.OPEN) {
if (waitForSessionTermination) {
const sessionTerminatedPromise = new Promise((resolve) => {
this.sessionTerminatedResolve = resolve;
});
this.socket.send(terminateSessionMessage);
await sessionTerminatedPromise;
}
else {
this.socket.send(terminateSessionMessage);
}
}
if (this.socket?.removeAllListeners)
this.socket.removeAllListeners();
this.socket.close();
}
this.listeners = {};
this.socket = undefined;
}
}
/**
* @deprecated Use RealtimeTranscriber instead
*/
class RealtimeService extends RealtimeTranscriber {
}
class RealtimeTranscriberFactory extends BaseService {
constructor(params) {
super(params);
this.rtFactoryParams = params;
}
/**
* @deprecated Use transcriber(...) instead
*/
createService(params) {
return this.transcriber(params);
}
transcriber(params) {
const serviceParams = { ...params };
if (!serviceParams.token && !serviceParams.apiKey) {
serviceParams.apiKey = this.rtFactoryParams.apiKey;
}
return new RealtimeTranscriber(serviceParams);
}
async createTemporaryToken(params) {
const data = await this.fetchJson("/v2/realtime/token", {
method: "POST",
body: JSON.stringify(params),
});
return data.token;
}
}
/**
* @deprecated Use RealtimeTranscriberFactory instead
*/
class RealtimeServiceFactory extends RealtimeTranscriberFactory {
}
function getPath(path) {
if (path.startsWith("http"))
return null;
if (path.startsWith("https"))
return null;
if (path.startsWith("data:"))
return null;
if (path.startsWith("file://"))
return path.substring(7);
if (path.startsWith("file:"))
return path.substring(5);
return path;
}
class TranscriptService extends BaseService {
constructor(params, files) {
super(params);
this.files = files;
}
/**
* Transcribe an audio file. This will create a transcript and wait until the transcript status is "completed" or "error".
* @param params - The parameters to transcribe an audio file.
* @param options - The options to transcribe an audio file.
* @returns A promise that resolves to the transcript. The transcript status is "completed" or "error".
*/
async transcribe(params, options) {
const transcript = await this.submit(params);
return await this.waitUntilReady(transcript.id, options);
}
/**
* Submits a transcription job for an audio file. This will not wait until the transcript status is "completed" or "error".
* @param params - The parameters to start the transcription of an audio file.
* @returns A promise that resolves to the queued transcript.
*/
async submit(params) {
let audioUrl;
let transcriptParams = undefined;
if ("audio" in params) {
const { audio, ...audioTranscriptParams } = params;
if (typeof audio === "string") {
const path = getPath(audio);
if (path !== null) {
// audio is local path, upload local file
audioUrl = await this.files.upload(path);
}
else {
if (audio.startsWith("data:")) {
audioUrl = await this.files.upload(audio);
}
else {
// audio is not a local path, and not a data-URI, assume it's a normal URL
audioUrl = audio;
}
}
}
else {
// audio is of uploadable type
audioUrl = await this.files.upload(audio);
}
transcriptParams = { ...audioTranscriptParams, audio_url: audioUrl };
}
else {
transcriptParams = params;
}
const data = await this.fetchJson("/v2/transcript", {
method: "POST",
body: JSON.stringify(transcriptParams),
});
return data;
}
/**
* Create a transcript.
* @param params - The parameters to create a transcript.
* @param options - The options used for creating the new transcript.
* @returns A promise that resolves to the transcript.
* @deprecated Use `transcribe` instead to transcribe a audio file that includes polling, or `submit` to transcribe a audio file without polling.
*/
async create(params, options) {
const path = getPath(params.audio_url);
if (path !== null) {
const uploadUrl = await this.files.upload(path);
params.audio_url = uploadUrl;
}
const data = await this.fetchJson("/v2/transcript", {
method: "POST",
body: JSON.stringify(params),
});
if (options?.poll ?? true) {
return await this.waitUntilReady(data.id, options);
}
return data;
}
/**
* Wait until the transcript ready, either the status is "completed" or "error".
* @param transcriptId - The ID of the transcript.
* @param options - The options to wait until the transcript is ready.
* @returns A promise that resolves to the transcript. The transcript status is "completed" or "error".
*/
async waitUntilReady(transcriptId, options) {
const pollingInterval = options?.pollingInterval ?? 3_000;
const pollingTimeout = options?.pollingTimeout ?? -1;
const startTime = Date.now();
// eslint-disable-next-line no-constant-condition
while (true) {
const transcript = await this.get(transcriptId);
if (transcript.status === "completed" || transcript.status === "error") {
return transcript;
}
else if (pollingTimeout > 0 &&
Date.now() - startTime > pollingTimeout) {
throw new Error("Polling timeout");
}
else {
await new Promise((resolve) => setTimeout(resolve, pollingInterval));
}
}
}
/**
* Retrieve a transcript.
* @param id - The identifier of the transcript.
* @returns A promise that resolves to the transcript.
*/
get(id) {
return this.fetchJson(`/v2/transcript/${id}`);
}
/**
* Retrieves a page of transcript listings.
* @param params - The parameters to filter the transcript list by, or the URL to retrieve the transcript list from.
*/
async list(params) {
let url = "/v2/transcript";
if (typeof params === "string") {
url = params;
}
else if (params) {
url = `${url}?${new URLSearchParams(Object.keys(params).map((key) => [
key,
params[key]?.toString() || "",
]))}`;
}
const data = await this.fetchJson(url);
for (const transcriptListItem of data.transcripts) {
transcriptListItem.created = new Date(transcriptListItem.created);
if (transcriptListItem.completed) {
transcriptListItem.completed = new Date(transcriptListItem.completed);
}
}
return data;
}
/**
* Delete a transcript
* @param id - The identifier of the transcript.
* @returns A promise that resolves to the transcript.
*/
delete(id) {
return this.fetchJson(`/v2/transcript/${id}`, { method: "DELETE" });
}
/**
* Search through the transcript for a specific set of keywords.
* You can search for individual words, numbers, or phrases containing up to five words or numbers.
* @param id - The identifier of the transcript.
* @param words - Keywords to search for.
* @returns A promise that resolves to the sentences.
*/
wordSearch(id, words) {
const params = new URLSearchParams({ words: words.join(",") });
return this.fetchJson(`/v2/transcript/${id}/word-search?${params.toString()}`);
}
/**
* Retrieve all sentences of a transcript.
* @param id - The identifier of the transcript.
* @returns A promise that resolves to the sentences.
*/
sentences(id) {
return this.fetchJson(`/v2/transcript/${id}/sentences`);
}
/**
* Retrieve all paragraphs of a transcript.
* @param id - The identifier of the transcript.
* @returns A promise that resolves to the paragraphs.
*/
paragraphs(id) {
return this.fetchJson(`/v2/transcript/${id}/paragraphs`);
}
/**
* Retrieve subtitles of a transcript.
* @param id - The identifier of the transcript.
* @param format - The format of the subtitles.
* @param chars_per_caption - The maximum number of characters per caption.
* @returns A promise that resolves to the subtitles text.
*/
async subtitles(id, format = "srt", chars_per_caption) {
let url = `/v2/transcript/${id}/${format}`;
if (chars_per_caption) {
const params = new URLSearchParams();
params.set("chars_per_caption", chars_per_caption.toString());
url += `?${params.toString()}`;
}
const response = await this.fetch(url);
return await response.text();
}
/**
* Retrieve the redacted audio URL of a transcript.
* @param id - The identifier of the transcript.
* @returns A promise that resolves to the details of the redacted audio.
* @deprecated Use `redactedAudio` instead.
*/
redactions(id) {
return this.redactedAudio(id);
}
/**
* Retrieve the redacted audio URL of a transcript.
* @param id - The identifier of the transcript.
* @returns A promise that resolves to the details of the redacted audio.
*/
redactedAudio(id) {
return this.fetchJson(`/v2/transcript/${id}/redacted-audio`);
}
/**
* Retrieve the redacted audio file of a transcript.
* @param id - The identifier of the transcript.
* @returns A promise that resolves to the fetch HTTP response of the redacted audio file.
*/
async redactedAudioFile(id) {
const { redacted_audio_url, status } = await this.redactedAudio(id);
if (status !== "redacted_audio_ready") {
throw new Error(`Redacted audio status is ${status}`);
}
const response = await fetch(redacted_audio_url);
if (!response.ok) {
throw new Error(`Failed to fetch redacted audio: ${response.statusText}`);
}
return {
arrayBuffer: response.arrayBuffer.bind(response),
blob: response.blob.bind(response),
body: response.body,
bodyUsed: response.bodyUsed,
};
}
}
const readFile = async (path) => stream.Readable.toWeb(fs.createReadStream(path));
class FileService extends BaseService {
/**
* Upload a local file to AssemblyAI.
* @param input - The local file path to upload, or a stream or buffer of the file to upload.
* @returns A promise that resolves to the uploaded file URL.
*/
async upload(input) {
let fileData;
if (typeof input === "string") {
if (input.startsWith("data:")) {
fileData = dataUrlToBlob(input);
}
else {
fileData = await readFile(input);
}
}
else
fileData = input;
const data = await this.fetchJson("/v2/upload", {
method: "POST",
body: fileData,
headers: {
"Content-Type": "application/octet-stream",
},
duplex: "half",
});
return data.upload_url;
}
}
function dataUrlToBlob(dataUrl) {
const arr = dataUrl.split(",");
const mime = arr[0].match(/:(.*?);/)[1];
const bstr = atob(arr[1]);
let n = bstr.length;
const u8arr = new Uint8Array(n);
while (n--) {
u8arr[n] = bstr.charCodeAt(n);
}
return new Blob([u8arr], { type: mime });
}
const defaultBaseUrl = "https://api.assemblyai.com";
class AssemblyAI {
/**
* Create a new AssemblyAI client.
* @param params - The parameters for the service, including the API key and base URL, if any.
*/
constructor(params) {
params.baseUrl = params.baseUrl || defaultBaseUrl;
if (params.baseUrl && params.baseUrl.endsWith("/")) {
params.baseUrl = params.baseUrl.slice(0, -1);
}
this.files = new FileService(params);
this.transcripts = new TranscriptService(params, this.files);
this.lemur = new LemurService(params);
this.realtime = new RealtimeTranscriberFactory(params);
}
}
exports.AssemblyAI = AssemblyAI;
exports.FileService = FileService;
exports.LemurService = LemurService;
exports.RealtimeService = RealtimeService;
exports.RealtimeServiceFactory = RealtimeServiceFactory;
exports.RealtimeTranscriber = RealtimeTranscriber;
exports.RealtimeTranscriberFactory = RealtimeTranscriberFactory;
exports.TranscriptService = TranscriptService;