@aj-archipelago/cortex
Version:
Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.
155 lines (138 loc) • 6.54 kB
JavaScript
import Gemini15ChatPlugin from './gemini15ChatPlugin.js';
import mime from 'mime-types';
class Gemini15VisionPlugin extends Gemini15ChatPlugin {
constructor(pathway, model) {
super(pathway, model);
this.isMultiModal = true;
}
// Override the convertMessagesToGemini method to handle multimodal vision messages
// This function can operate on messages in Gemini native format or in OpenAI's format
// It will convert the messages to the Gemini format
convertMessagesToGemini(messages) {
let modifiedMessages = [];
let lastAuthor = '';
let systemParts = [];
// Check if the messages are already in the Gemini format
if (messages[0] && Object.prototype.hasOwnProperty.call(messages[0], 'parts')) {
modifiedMessages = messages;
} else {
messages.forEach(message => {
const { role, author, content } = message;
if (role === 'system') {
if (Array.isArray(content)) {
content.forEach(item => systemParts.push({ text: item }));
} else {
systemParts.push({ text: content });
}
return;
}
// Convert content to Gemini format, trying to maintain compatibility
const convertPartToGemini = (inputPart) => {
try {
// First try to parse as JSON if it's a string
const part = typeof inputPart === 'string' ? JSON.parse(inputPart) : inputPart;
const {type, text, image_url, gcs} = part;
let fileUrl = gcs || image_url?.url;
if (typeof part === 'string') {
return { text: inputPart };
} else if (type === 'text') {
return { text: text };
} else if (type === 'image_url') {
if (!fileUrl) {
return null;
}
if (fileUrl.startsWith('gs://')) {
// Validate GCS URL has at least a bucket name after gs://
const gcsPath = fileUrl.slice(5); // Remove 'gs://'
if (!gcsPath || gcsPath.length < 1) {
return null;
}
return {
fileData: {
mimeType: mime.lookup(fileUrl) || 'image/jpeg',
fileUri: fileUrl
}
};
} else if (fileUrl.includes('base64,')) {
const base64Data = fileUrl.split('base64,')[1];
if (!base64Data) {
return null;
}
return {
inlineData: {
mimeType: 'image/jpeg',
data: base64Data
}
};
} else if (fileUrl.includes('youtube.com/') || fileUrl.includes('youtu.be/')) {
return {
fileData: {
mimeType: 'video/youtube',
fileUri: fileUrl
}
};
}
return null;
}
} catch (e) {
// If JSON parsing fails or any other error, treat as plain text
return inputPart ? { text: inputPart } : null;
}
return inputPart ? { text: inputPart } : null;
};
const addPartToMessages = (geminiPart) => {
if (!geminiPart) { return; }
// Gemini requires alternating user: and model: messages
if ((role === lastAuthor || author === lastAuthor) && modifiedMessages.length > 0) {
modifiedMessages[modifiedMessages.length - 1].parts.push(geminiPart);
}
// Gemini only supports user: and model: roles
else if (role === 'user' || role === 'assistant' || author) {
modifiedMessages.push({
role: author || role,
parts: [geminiPart],
});
lastAuthor = author || role;
}
};
// Content can either be in the "vision" format (array) or in the "chat" format (string)
if (Array.isArray(content)) {
content.forEach(part => {
addPartToMessages(convertPartToGemini(part));
});
}
else {
addPartToMessages(convertPartToGemini(content));
}
});
}
// Gemini requires an odd number of messages
if (modifiedMessages.length % 2 === 0) {
modifiedMessages = modifiedMessages.slice(1);
}
let system = null;
if (systemParts.length > 0) {
system = { role: 'user', parts: systemParts };
}
return {
modifiedMessages,
system,
};
}
async execute(text, parameters, prompt, cortexRequest) {
let result = null;
try {
result = await super.execute(text, parameters, prompt, cortexRequest);
} catch (e) {
const { data } = e;
if (data && data.error) {
if (data.error.code === 400 && data.error.message === 'Precondition check failed.') {
throw new Error('One or more of the included files is too large to process. Please try again with a smaller file.');
}
}
throw e;
}
return result;
}
}
export default Gemini15VisionPlugin;