@coursebuilder/core
Version:
Core package for Course Builder
224 lines (221 loc) • 7.25 kB
JavaScript
import {
z
} from "./chunk-JLNB6NRA.js";
import {
__name
} from "./chunk-VLQXSCFN.js";
// src/providers/deepgram.ts
var defaultGetCallbackUrl = /* @__PURE__ */ __name(({ baseUrl, params }) => {
const callbackParams = new URLSearchParams(params);
return `${baseUrl}?${callbackParams.toString()}`;
}, "defaultGetCallbackUrl");
function Deepgram(options) {
return {
id: "deepgram",
name: "Deepgram",
type: "transcription",
callbackUrl: options.callbackUrl,
apiKey: options.apiKey,
// Additional configuration options can be added here based on Deepgram's API requirements
options,
// Define how to initiate a transcription request to Deepgram
initiateTranscription: async (transcriptOptions) => {
const deepgramUrl = `https://api.deepgram.com/v1/listen`;
const getCallbackUrl = options.getCallbackUrl || defaultGetCallbackUrl;
const utteranceSpiltThreshold = 0.5;
const deepgramParams = new URLSearchParams({
model: "whisper-large",
punctuate: "true",
paragraphs: "true",
utterances: "true",
utt_split: String(utteranceSpiltThreshold),
callback: getCallbackUrl({
baseUrl: `${options.callbackUrl}`,
params: {
videoResourceId: transcriptOptions.resourceId
}
})
});
const deepgramResponse = await fetch(`${deepgramUrl}?${deepgramParams.toString()}`, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Token ${options.apiKey}`
},
body: JSON.stringify({
url: transcriptOptions.mediaUrl
})
});
return await deepgramResponse.json();
},
// Define how to handle the callback with the transcription result
handleCallback: (callbackData) => {
const srt = srtFromTranscriptResult(callbackData);
const wordLevelSrt = wordLevelSrtFromTranscriptResult(callbackData);
const transcript = transcriptAsParagraphsWithTimestamps(callbackData);
return {
srt,
transcript,
wordLevelSrt
};
}
};
}
__name(Deepgram, "Deepgram");
var ParagraphSchema = z.object({
text: z.string(),
sentences: z.array(z.object({
end: z.number(),
start: z.number(),
text: z.string()
}))
});
var WordSchema = z.object({
word: z.string(),
start: z.number(),
end: z.number(),
confidence: z.number(),
punctuated_word: z.string()
});
var DeepgramResultsSchema = z.object({
channels: z.array(z.object({
alternatives: z.array(z.object({
transcript: z.string(),
paragraphs: z.object({
paragraphs: z.array(ParagraphSchema)
}).optional(),
words: z.array(WordSchema)
}))
}))
});
function srtFromTranscriptResult(results) {
return srtProcessor(results.channels[0]?.alternatives[0]?.words);
}
__name(srtFromTranscriptResult, "srtFromTranscriptResult");
function wordLevelSrtFromTranscriptResult(results) {
return srtProcessor(results.channels[0]?.alternatives[0]?.words, true);
}
__name(wordLevelSrtFromTranscriptResult, "wordLevelSrtFromTranscriptResult");
function convertTime(inputSeconds) {
if (!inputSeconds) {
return "--:--:--";
}
const date = new Date(inputSeconds * 1e3);
const hours = String(date.getUTCHours()).padStart(2, "0");
const minutes = String(date.getUTCMinutes()).padStart(2, "0");
const seconds = String(date.getUTCSeconds()).padStart(2, "0");
return `${hours}:${minutes}:${seconds}`;
}
__name(convertTime, "convertTime");
function formatTimeString(str) {
const [h, m, s] = str.split(":");
if (h == "00") {
return `${m}:${s}`;
}
return `${h}:${m}:${s}`;
}
__name(formatTimeString, "formatTimeString");
function transcriptAsParagraphsWithTimestamps(results) {
let paragraphs = [];
if (results.channels[0]?.alternatives[0]?.paragraphs) {
paragraphs = results.channels[0].alternatives[0].paragraphs.paragraphs;
} else if (results.channels[0]?.alternatives[0]?.transcript) {
const text = results.channels[0].alternatives[0].transcript;
paragraphs = [
{
text,
sentences: [
{
text,
start: 0,
end: results.channels[0].alternatives[0].words[results.channels[0].alternatives[0].words?.length - 1 || 0]?.end || 0
}
]
}
];
}
return paragraphs?.reduce((acc, paragraph) => {
const startTime = formatTimeString(convertTime(paragraph?.sentences?.[0]?.start));
const text = paragraph.sentences.map((x) => x.text).join(" ");
return `${acc}[${startTime}] ${text}
`;
}, ``) || "";
}
__name(transcriptAsParagraphsWithTimestamps, "transcriptAsParagraphsWithTimestamps");
function convertTimeSrt(inputSeconds) {
if (!inputSeconds) {
return "--:--:--";
}
const date = new Date(inputSeconds * 1e3);
const hours = String(date.getUTCHours()).padStart(2, "0");
const minutes = String(date.getUTCMinutes()).padStart(2, "0");
const seconds = String(date.getUTCSeconds()).padStart(2, "0");
const milliseconds = String(date.getUTCMilliseconds()).padStart(3, "0");
return `${hours}:${minutes}:${seconds},${milliseconds}`;
}
__name(convertTimeSrt, "convertTimeSrt");
function srtProcessor(words, toWordLevelTimestamps = false) {
if (!words) {
return "";
}
if (toWordLevelTimestamps) {
const srtEntries2 = words.map((word, index) => {
const startTime = convertTimeSrt(word.start);
const endTime = convertTimeSrt(word.end);
const text = word.punctuated_word;
return `${index + 1}
${startTime} --> ${endTime}
${text}
`;
});
return srtEntries2.join("\n\n");
}
const timeLimitInSeconds = 5.5;
const charLimit = 42;
let currentTimeInSeconds = 0;
let currentCharCount = 0;
const arrayByTimes = [];
let tempArray = [];
words.forEach((item, index) => {
const timeExceeded = currentTimeInSeconds + (item.end - item.start) >= timeLimitInSeconds;
const charCountExceeded = currentCharCount + item.punctuated_word.length > charLimit;
if (timeExceeded || charCountExceeded || index === words.length - 1) {
if (tempArray.length) {
arrayByTimes.push(tempArray);
tempArray = [];
currentTimeInSeconds = 0;
currentCharCount = 0;
}
}
if (!timeExceeded || !charCountExceeded) {
tempArray.push(item);
currentTimeInSeconds += item.end - item.start;
currentCharCount += item.punctuated_word.length;
}
if (index === words.length - 1 && (!timeExceeded || !charCountExceeded)) {
arrayByTimes.push(tempArray);
}
});
const srtEntries = arrayByTimes.map((timeBlock, index) => {
const startTime = convertTimeSrt(timeBlock[0]?.start);
const endTime = convertTimeSrt(timeBlock[timeBlock.length - 1]?.end);
const text = timeBlock.map((x) => x.punctuated_word).join(" ");
return `${index + 1}
${startTime} --> ${endTime}
${text}
`;
});
return srtEntries.join("\n\n");
}
__name(srtProcessor, "srtProcessor");
export {
Deepgram,
ParagraphSchema,
WordSchema,
DeepgramResultsSchema,
srtFromTranscriptResult,
wordLevelSrtFromTranscriptResult,
transcriptAsParagraphsWithTimestamps,
srtProcessor
};
//# sourceMappingURL=chunk-C7YJXK7J.js.map