chat-about-video
Version:
Chat about a video clip using ChatGPT hosted in OpenAI or Azure, or Gemini provided by Google
175 lines (174 loc) • 7.1 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.GeminiApi = void 0;
const tslib_1 = require("tslib");
const generative_ai_1 = require("@google/generative-ai");
const promise_utils_1 = require("@handy-common-utils/promise-utils");
const promises_1 = tslib_1.__importDefault(require("node:fs/promises"));
const node_os_1 = tslib_1.__importDefault(require("node:os"));
const node_path_1 = tslib_1.__importDefault(require("node:path"));
const chat_1 = require("../chat");
const utils_1 = require("../utils");
class GeminiApi {
constructor(options) {
var _a, _b;
this.options = options;
if (options.endpoint != null) {
options.clientSettings.requestOptions = (_a = options.clientSettings.requestOptions) !== null && _a !== void 0 ? _a : {};
options.clientSettings.requestOptions.baseUrl = options.endpoint;
}
if (!this.options.completionOptions) {
this.options.completionOptions = {};
}
if (this.options.completionOptions.systemPromptText) {
this.options.completionOptions.systemInstruction = this.options.completionOptions.systemPromptText;
}
this.extractVideoFrames = (0, utils_1.effectiveExtractVideoFramesOptions)(options.extractVideoFrames);
this.tmpDir = (_b = options.tmpDir) !== null && _b !== void 0 ? _b : node_os_1.default.tmpdir();
const genAI = new generative_ai_1.GoogleGenerativeAI(options.credential.key);
this.client = genAI.getGenerativeModel(options.clientSettings.modelParams, options.clientSettings.requestOptions);
}
async getClient() {
return this.client;
}
async generateContent(prompt, options) {
var _a, _b;
const effectiveOptions = Object.assign(Object.assign({}, this.options.completionOptions), options);
let generationConfig = effectiveOptions.generationConfig;
let responseMimeType = undefined;
let responseSchema = undefined;
if (effectiveOptions.jsonResponse === true) {
responseMimeType = 'application/json';
}
else if ((_a = effectiveOptions.jsonResponse) === null || _a === void 0 ? void 0 : _a.schema) {
responseMimeType = 'application/json';
responseSchema = effectiveOptions.jsonResponse.schema;
}
if (responseMimeType) {
generationConfig = Object.assign(Object.assign({}, generationConfig), { responseMimeType, responseSchema });
}
const safetySettings = effectiveOptions.safetySettings;
// Only need to prevent overwriting when both arrays exist
if (Array.isArray(options.safetySettings) && Array.isArray((_b = this.options.completionOptions) === null || _b === void 0 ? void 0 : _b.safetySettings)) {
for (const safetySetting of this.options.completionOptions.safetySettings) {
if (!safetySettings.some((s) => String(s.category) === String(safetySetting.category))) {
safetySettings.push(safetySetting);
}
}
}
// Google does not allow unknown properties
const request = {
contents: prompt,
tools: effectiveOptions.tools,
toolConfig: effectiveOptions.toolConfig,
systemInstruction: effectiveOptions.systemInstruction,
cachedContent: effectiveOptions.cachedContent,
safetySettings,
generationConfig,
};
return this.client.generateContent(request);
}
async getResponseText(result) {
return result.response.text().replace(/\n$/, '').trim();
}
async getUsageMetadata(result) {
const usage = result.response.usageMetadata;
if (usage) {
return {
totalTokens: usage.totalTokenCount,
promptTokens: usage.promptTokenCount,
completionTokens: usage.candidatesTokenCount,
};
}
return undefined;
}
isThrottlingError(error) {
return (error === null || error === void 0 ? void 0 : error.status) === 429;
}
isServerError(error) {
const status = error === null || error === void 0 ? void 0 : error.status;
return status != null && typeof status === 'number' && status >= 500 && status <= 599;
}
isConnectivityError(error) {
return ['Request timed out.', 'Connection error.'].includes(error === null || error === void 0 ? void 0 : error.message);
}
isDownloadError(_error) {
// To be updated
return false;
}
async appendToPrompt(newPromptOrResponse, prompt) {
var _a;
prompt = prompt !== null && prompt !== void 0 ? prompt : [];
if (isGeminiResponse(newPromptOrResponse)) {
const responseText = (_a = (await this.getResponseText(newPromptOrResponse))) !== null && _a !== void 0 ? _a : '';
prompt.push({
role: 'model',
parts: [
{
text: responseText,
},
],
});
}
else {
prompt.push(...newPromptOrResponse);
}
return prompt;
}
async buildTextPrompt(text, _conversationId) {
return {
prompt: [
{
role: 'user',
parts: [
{
text,
},
],
},
],
};
}
async buildVideoPrompt(videoFile, conversationId = (0, chat_1.generateTempConversationId)()) {
return (0, chat_1.buildImagesPromptFromVideo)(this, this.extractVideoFrames, this.tmpDir, videoFile, conversationId);
}
async buildImagesPrompt(imageInputs, _conversationId) {
const parts2D = await (0, promise_utils_1.withConcurrency)(5, imageInputs, async (imageInput) => {
const parts = [];
if (imageInput.promptText) {
parts.push({
text: imageInput.promptText,
});
}
const imageContent = await promises_1.default.readFile(imageInput.imageFile);
parts.push({
inlineData: {
data: imageContent.toString('base64'),
mimeType: fileExtToMimeType[node_path_1.default.extname(imageInput.imageFile).slice(1)],
},
});
return parts;
});
const prompt = [
{
role: 'user',
parts: parts2D.flat(),
},
];
return {
prompt,
};
}
}
exports.GeminiApi = GeminiApi;
function isGeminiResponse(obj) {
var _a;
return typeof ((_a = obj === null || obj === void 0 ? void 0 : obj.response) === null || _a === void 0 ? void 0 : _a.text) === 'function';
}
const fileExtToMimeType = {
jpg: 'image/jpeg',
jpeg: 'image/jpeg',
png: 'image/png',
webp: 'image/webp',
gif: 'image/gif',
};