@just-every/ensemble
Version:
LLM provider abstraction layer with unified streaming interface
54 lines • 2.05 kB
JavaScript
import { ensembleRequest, findModel } from '../index.js';
const imageDescriptionCache = {};
function generateImageHash(imageData) {
const sample = imageData.substring(0, 100);
const length = imageData.length;
return `${sample}_${length}`;
}
export async function convertImageToText(imageData, modelId) {
if (!imageData.startsWith('data:image/')) {
return imageData;
}
console.log(`Converting image to text description for model ${modelId}`);
const imageHash = generateImageHash(imageData);
if (imageDescriptionCache[imageHash]) {
console.log(`Using cached image description for ${modelId}`);
return imageDescriptionCache[imageHash];
}
try {
const stream = ensembleRequest([
{
type: 'message',
role: 'system',
content: 'Please describe the following image in a couple of sentences. Focus on the main visual elements and key details that someone would need to understand what is shown in the image.',
},
{
type: 'message',
role: 'user',
content: imageData,
},
], {
modelClass: 'vision_mini',
});
for await (const event of stream) {
if (event.type === 'message_complete' && 'content' in event) {
imageDescriptionCache[imageHash] = event.content.trim();
return imageDescriptionCache[imageHash];
}
}
}
catch (error) {
console.error('Error generating image description:', error);
}
return 'Image found, but could not be converted to text';
}
export async function convertImageToTextIfNeeded(imageData, modelId) {
if (!imageData.startsWith('data:image/')) {
return false;
}
if (modelId && findModel(modelId)?.features?.input_modality?.includes('image')) {
return false;
}
return await convertImageToText(imageData, modelId || 'unknown');
}
//# sourceMappingURL=image_to_text.js.map