UNPKG

chat-about-video

Version:

Chat about a video clip using ChatGPT hosted in OpenAI or Azure, or Gemini provided by Google

github.com/james-hu/chat-about-video

james-hu/chat-about-video

245 lines (244 loc) • 13.3 kB

JavaScript

"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.buildImagesPromptFromVideo = exports.generateTempConversationId = exports.Conversation = exports.accumulateUsage = exports.ChatAboutVideo = void 0; const tslib_1 = require("tslib"); const misc_utils_1 = require("@handy-common-utils/misc-utils"); const promise_utils_1 = require("@handy-common-utils/promise-utils"); const node_path_1 = tslib_1.__importDefault(require("node:path")); const defaultCompletionOptions = { systemPromptText: "You are an AI specialized in analyzing video content. The user will provide frames from a video and ask questions about that video. Your task is to provide objective, concise, and accurate answers based solely on the provided frames. Do not acknowledge or repeat the user's questions, and avoid any explanations. Provide only the necessary information and answer the questions directly.", backoffOnThrottling: [1000, 2000, 3000, 5000, 10000, 10000], backoffOnServerError: [2000, 5000, 10000, 20000, 30000], backoffOnConnectivityError: [1000, 2000, 5000, 10000], backoffOnDownloadError: [500, 800, 1000, 2000], }; function isGeminiOptions(options) { var _a; const opts = options; return ((_a = opts === null || opts === void 0 ? void 0 : opts.clientSettings) === null || _a === void 0 ? void 0 : _a.modelParams) != null; } function isChatGptOptions(options) { const opts = options; return !isGeminiOptions(options) && (opts === null || opts === void 0 ? void 0 : opts.storage) != null; } class ChatAboutVideo { constructor(options, log = (0, misc_utils_1.consoleWithoutColour)()) { var _a, _b, _c; this.log = log; const effectiveOptions = Object.assign(Object.assign({}, options), { completionOptions: Object.assign(Object.assign({}, defaultCompletionOptions), options.completionOptions) }); this.options = effectiveOptions; if (isGeminiOptions(effectiveOptions)) { this.log && this.log.debug(`Using Gemini API (model=${effectiveOptions.clientSettings.modelParams.model})`); this.apiPromise = Promise.resolve().then(() => tslib_1.__importStar(require('./gemini'))).then((gemini) => new gemini.GeminiApi(effectiveOptions)); } else if (isChatGptOptions(effectiveOptions)) { this.log && this.log.debug(`Using ChatGpt API (endpoint=${effectiveOptions.endpoint}, apiVersion=${(_a = effectiveOptions.clientSettings) === null || _a === void 0 ? void 0 : _a.apiVersion}, deployment=${(_b = effectiveOptions.clientSettings) === null || _b === void 0 ? void 0 : _b.deployment}, model=${(_c = effectiveOptions.completionOptions) === null || _c === void 0 ? void 0 : _c.model})`); this.apiPromise = Promise.resolve().then(() => tslib_1.__importStar(require('./chat-gpt'))).then((chatGpt) => new chatGpt.ChatGptApi(effectiveOptions)); } else { throw new Error('Unable to determine which API to use, did you miss something in the options passed to the constructor of ChatAboutVideo?'); } } /** * Get the underlying API instance. * @returns The underlying API instance. */ async getApi() { return this.apiPromise; } async startConversation(videoFileOrVideosOrOptions, optionsOrUndefined) { const videoFile = typeof videoFileOrVideosOrOptions === 'string' ? videoFileOrVideosOrOptions : undefined; const videosOrImages = Array.isArray(videoFileOrVideosOrOptions) ? videoFileOrVideosOrOptions : undefined; let options = Object.assign(Object.assign(Object.assign({}, this.options.completionOptions), optionsOrUndefined), (!videoFile && !videosOrImages ? videoFileOrVideosOrOptions : undefined)); const cleanupFuncs = []; const conversationId = (0, misc_utils_1.generateRandomString)(24); // equivalent to uuid const api = await this.apiPromise; let initialPrompt = undefined; if (options.startPromptText) { const { prompt } = await api.buildTextPrompt(options.startPromptText, conversationId); initialPrompt = await api.appendToPrompt(prompt, initialPrompt); } // A single video if (videoFile) { const { prompt, options: additionalOptions, cleanup } = await api.buildVideoPrompt(videoFile, conversationId); initialPrompt = await api.appendToPrompt(prompt, initialPrompt); options = Object.assign(Object.assign({}, options), additionalOptions); if (cleanup) { cleanupFuncs.push(cleanup); } } // Multiple videos or groups of images if (videosOrImages) { for (const videoOrImages of videosOrImages) { if (videoOrImages.promptText) { const { prompt: promptBeforeVideoOrImages } = await api.buildTextPrompt(videoOrImages.promptText, conversationId); initialPrompt = await api.appendToPrompt(promptBeforeVideoOrImages, initialPrompt); } const video = videoOrImages; const images = videoOrImages; if (video.videoFile) { const { prompt, options: additionalOptions, cleanup } = await api.buildVideoPrompt(video.videoFile, conversationId); initialPrompt = await api.appendToPrompt(prompt, initialPrompt); options = Object.assign(Object.assign({}, options), additionalOptions); if (cleanup) { cleanupFuncs.push(cleanup); } } if (images.images) { const { prompt, options: additionalOptions, cleanup } = await api.buildImagesPrompt(images.images, conversationId); initialPrompt = await api.appendToPrompt(prompt, initialPrompt); options = Object.assign(Object.assign({}, options), additionalOptions); if (cleanup) { cleanupFuncs.push(cleanup); } } } } const conversation = new Conversation(conversationId, api, initialPrompt, options, () => Promise.all(cleanupFuncs.map((cleanup) => cleanup())), this.log); return conversation; } } exports.ChatAboutVideo = ChatAboutVideo; /** * Add up usage. * @param totalUsage Existing usage that will be updated. * @param incrementalUsage New usage to add. If it is undefined, then there will be no change to totalUsage. * @returns nothing, the totalUsage is updated in place. */ function accumulateUsage(totalUsage, incrementalUsage) { var _a, _b; if (!incrementalUsage) { return totalUsage; } totalUsage.totalTokens += incrementalUsage.totalTokens; if (incrementalUsage.promptTokens != null) { totalUsage.promptTokens = ((_a = totalUsage.promptTokens) !== null && _a !== void 0 ? _a : 0) + incrementalUsage.promptTokens; } if (incrementalUsage.completionTokens != null) { totalUsage.completionTokens = ((_b = totalUsage.completionTokens) !== null && _b !== void 0 ? _b : 0) + incrementalUsage.completionTokens; } } exports.accumulateUsage = accumulateUsage; class Conversation { constructor(conversationId, api, prompt, options, cleanup, log = (0, misc_utils_1.consoleWithoutColour)()) { this.conversationId = conversationId; this.api = api; this.prompt = prompt; this.options = options; this.cleanup = cleanup; this.log = log; this.log && this.log.debug(`Conversation ${this.conversationId} started`, { conversation: this.prompt, options }); } /** * Get the underlying API instance. * @returns The underlying API instance. */ getApi() { return this.api; } /** * Get usage statistics of the conversation. * Please note that the usage statistics would be undefined before the first `say` call. * It could also be undefined if the underlying API does not support usage statistics. * The usage statistics may not cover those failed requests due to content filtering or other reasons. * Therefore, it could be less than the billable usage. * @returns The usage statistics of the conversation. Or undefined if not available. */ getUsage() { return this.usage; } /** * Get the prompt for the current conversation. * The prompt is the accumulated messages in the conversation so far. * @returns The prompt which is the accumulated messages in the conversation so far. */ getPrompt() { return this.prompt; } /** * Say something in the conversation, and get the response from AI * @param message The message to say in the conversation. * @param options Options for fine control. * @returns The response/completion */ async say(message, options) { var _a; const { prompt: newPromptPart } = await this.api.buildTextPrompt(message); const updatedPrompt = await this.api.appendToPrompt(newPromptPart, this.prompt); const effectiveOptions = Object.assign(Object.assign({}, this.options), options); const response = await (0, promise_utils_1.withRetry)(() => { var _a; return (0, promise_utils_1.withRetry)(() => { var _a; return (0, promise_utils_1.withRetry)(() => { var _a; return (0, promise_utils_1.withRetry)(() => this.api.generateContent(updatedPrompt, effectiveOptions), (_a = effectiveOptions.backoffOnThrottling) !== null && _a !== void 0 ? _a : [], (error) => this.api.isThrottlingError(error)); }, (_a = effectiveOptions.backoffOnServerError) !== null && _a !== void 0 ? _a : [], (error) => this.api.isServerError(error)); }, (_a = effectiveOptions.backoffOnConnectivityError) !== null && _a !== void 0 ? _a : [], (error) => this.api.isConnectivityError(error)); }, (_a = effectiveOptions.backoffOnDownloadError) !== null && _a !== void 0 ? _a : [], (error) => this.api.isDownloadError(error)); const incrementalUsage = await this.api.getUsageMetadata(response); if (incrementalUsage) { if (this.usage) { accumulateUsage(this.usage, incrementalUsage); } else { this.usage = Object.assign({}, incrementalUsage); } } const responseText = await this.api.getResponseText(response); this.prompt = await this.api.appendToPrompt(response, updatedPrompt); this.log && this.log.debug(`Conversation ${this.conversationId} progressed`, { conversation: this.prompt, effectiveOptions, totalUsage: this.usage, incrementalUsage, }); return responseText; } async end() { if (this.cleanup) { await this.cleanup(); this.log && this.log.debug(`Conversation ${this.conversationId} cleaned up`, { totalUsage: this.usage }); } } } exports.Conversation = Conversation; /** * Convenient function to generate a temporary conversation ID. * @returns A temporary conversation ID. */ function generateTempConversationId() { return `tmp-${(0, misc_utils_1.generateRandomString)(24)}`; } exports.generateTempConversationId = generateTempConversationId; /** * Build prompt for sending frame images of a video content to AI. * This function is usually used for implementing the `buildVideoPrompt` function of ChatApi by utilising already implemented `buildImagesPrompt` function. * It extracts frame images from the video and builds a prompt containing those images for the conversation. * @param api The API instance. * @param extractVideoFrames The options for extracting video frames. * @param tmpDir The temporary directory to store the extracted frames. * @param videoFile Path to a video file in local file system. * @param conversationId The conversation ID. * @returns The prompt and options for the conversation. */ async function buildImagesPromptFromVideo(api, extractVideoFrames, tmpDir, videoFile, conversationId = generateTempConversationId()) { const videoFramesDir = extractVideoFrames.framesDirectoryResolver(videoFile, tmpDir, conversationId); const { relativePaths, cleanup: cleanupExtractedFrames } = await extractVideoFrames.extractor(videoFile, videoFramesDir, extractVideoFrames.interval, undefined, extractVideoFrames.width, extractVideoFrames.height, undefined, undefined, extractVideoFrames.limit); const output = await api.buildImagesPrompt(relativePaths.map((relativePath) => ({ imageFile: node_path_1.default.join(videoFramesDir, relativePath), })), conversationId); return Object.assign(Object.assign({}, output), { cleanup: async () => { const tasks = []; if (extractVideoFrames.deleteFilesWhenConversationEnds) { tasks.push(cleanupExtractedFrames()); } if (output.cleanup) { tasks.push(output.cleanup()); } await Promise.all(tasks); } }); } exports.buildImagesPromptFromVideo = buildImagesPromptFromVideo;