UNPKG

@daitanjs/senses

Version:

A library for AI-powered image generation and analysis using OpenAI.

399 lines (394 loc) 14.3 kB
// src/senses/src/index.js import { getLogger as getLogger4 } from "@daitanjs/development"; // src/senses/src/imagegeneration.js import { getLogger } from "@daitanjs/development"; import { getConfigManager } from "@daitanjs/config"; import { DaitanConfigurationError, DaitanApiError, DaitanFileOperationError, DaitanInvalidInputError, DaitanError } from "@daitanjs/error"; import { query as apiQuery } from "@daitanjs/apiqueries"; import fs from "fs/promises"; import path from "path"; import { Buffer } from "buffer"; var logger = getLogger("daitan-senses-imagegen"); var OPENAI_IMAGE_GENERATION_API_URL = "https://api.openai.com/v1/images/generations"; var generateImage = async ({ prompt, outputPath, n = 1, size = "1024x1024", response_format = "b64_json", model = "dall-e-3", quality, style, user }) => { const callId = `imageGen-${Date.now().toString(36)}`; logger.info(`[${callId}] generateImage: Initiated.`, { model, n, size, promptPreview: String(prompt).substring(0, 50) + "..." }); if (!prompt || typeof prompt !== "string" || !prompt.trim()) { throw new DaitanInvalidInputError("Prompt must be a non-empty string."); } if (model === "dall-e-3" && n > 1) { logger.warn(`[${callId}] DALL-E 3 only supports n=1. Setting n to 1.`); n = 1; } const dalle2_sizes = ["256x256", "512x512", "1024x1024"]; const dalle3_sizes = ["1024x1024", "1792x1024", "1024x1792"]; if (model === "dall-e-2" && !dalle2_sizes.includes(size)) { throw new DaitanInvalidInputError( `DALL-E 2 does not support size ${size}.` ); } if (model === "dall-e-3" && !dalle3_sizes.includes(size)) { throw new DaitanInvalidInputError( `DALL-E 3 does not support size ${size}.` ); } const configManager = getConfigManager(); const apiKey = configManager.getApiKeyForProvider("openai"); if (!apiKey) { throw new DaitanConfigurationError("OpenAI API key is not configured."); } const requestBody = { prompt, n, size, response_format, model }; if (quality && model === "dall-e-3") requestBody.quality = quality; if (style && model === "dall-e-3") requestBody.style = style; if (user) requestBody.user = user; try { const responseData = await apiQuery({ url: OPENAI_IMAGE_GENERATION_API_URL, method: "POST", headers: { Authorization: `Bearer ${apiKey}` }, data: requestBody, summary: `OpenAI Image Generation: ${prompt.substring(0, 30)}` }); if (!responseData?.data?.[0]) { throw new DaitanApiError( "OpenAI API returned an unexpected successful response structure.", "OpenAI Image Generation" ); } const results = { revisedPrompt: responseData.data.map((item) => item.revised_prompt).filter(Boolean), created: responseData.created, rawData: responseData.data }; if (results.revisedPrompt.length === 1) results.revisedPrompt = results.revisedPrompt[0]; if (results.revisedPrompt.length === 0) delete results.revisedPrompt; if (response_format === "b64_json") { results.base64Data = responseData.data.map((item) => item.b64_json); if (results.base64Data.length === 1) results.base64Data = results.base64Data[0]; if (outputPath && typeof window === "undefined") { const savedPaths = await saveBase64Images( outputPath, results.base64Data ); results.outputPath = savedPaths.length === 1 ? savedPaths[0] : savedPaths; } } else { results.urls = responseData.data.map((item) => item.url); } return results; } catch (error) { logger.error( `[${callId}] Error during OpenAI image generation: ${error.message}` ); if (error instanceof DaitanError) throw error; throw new DaitanApiError( `OpenAI image generation failed: ${error.message}`, "OpenAI Image Generation", error.response?.status, { responseData: error.response?.data }, error ); } }; async function saveBase64Images(outputPath, base64Data) { const imagesToSave = Array.isArray(base64Data) ? base64Data : [base64Data]; const savedPaths = []; for (let i = 0; i < imagesToSave.length; i++) { const imageBuffer = Buffer.from(imagesToSave[i], "base64"); let currentOutputPath = outputPath; if (imagesToSave.length > 1) { const ext = path.extname(outputPath) || ".png"; const base = path.basename(outputPath, ext); const dir = path.dirname(outputPath); currentOutputPath = path.join(dir, `${base}_${i}${ext}`); } try { await fs.mkdir(path.dirname(currentOutputPath), { recursive: true }); await fs.writeFile(currentOutputPath, imageBuffer); savedPaths.push(currentOutputPath); } catch (fileError) { throw new DaitanFileOperationError( `Failed to save image to ${currentOutputPath}: ${fileError.message}`, { path: currentOutputPath }, fileError ); } } return savedPaths; } // src/senses/src/vision.js import fs2 from "fs/promises"; import { getLogger as getLogger2 } from "@daitanjs/development"; import { getConfigManager as getConfigManager2 } from "@daitanjs/config"; import { DaitanConfigurationError as DaitanConfigurationError2, DaitanApiError as DaitanApiError2, DaitanFileOperationError as DaitanFileOperationError2, DaitanInvalidInputError as DaitanInvalidInputError2, DaitanError as DaitanError2 } from "@daitanjs/error"; import { query as apiQuery2 } from "@daitanjs/apiqueries"; import { isValidURL } from "@daitanjs/validation"; var logger2 = getLogger2("daitan-senses-vision"); var OPENAI_VISION_API_URL = "https://api.openai.com/v1/chat/completions"; var DEFAULT_OPENAI_VISION_MODEL = "gpt-4o-mini"; var encodeImageLocalFileToBase64WithMime = async (imagePath) => { if (typeof window !== "undefined") { throw new DaitanConfigurationError2( "encodeImageLocalFileToBase64 is for Node.js only." ); } try { const buffer = await fs2.readFile(imagePath); const mimeTypes = await import("mime-types"); const contentType = mimeTypes.lookup(imagePath) || "application/octet-stream"; return { base64: buffer.toString("base64"), contentType }; } catch (error) { throw new DaitanFileOperationError2( `Error reading image file "${imagePath}": ${error.message}`, { path: imagePath }, error ); } }; var analyzeImage = async ({ imageSource, prompt = "Describe this image in detail.", model, max_tokens = 500, detailLevel = "auto", llmConfigOptions = {} // Kept for API consistency but not used in this direct call }) => { const callId = `analyzeImage-${Date.now().toString(36)}`; logger2.info(`[${callId}] analyzeImage: Initiated.`); if (!imageSource || typeof imageSource !== "string" || !imageSource.trim()) { throw new DaitanInvalidInputError2( "Image source (URL, local path, or data URL) must be a non-empty string." ); } const configManager = getConfigManager2(); const apiKey = configManager.get("OPENAI_API_KEY"); if (!apiKey) { throw new DaitanConfigurationError2("OpenAI API key is not configured."); } const effectiveModel = model || configManager.get("OPENAI_VISION_MODEL") || DEFAULT_OPENAI_VISION_MODEL; let imageUrlObject; if (imageSource.startsWith("data:image/")) { imageUrlObject = { url: imageSource, detail: detailLevel }; } else if (isValidURL(imageSource)) { imageUrlObject = { url: imageSource, detail: detailLevel }; } else { const { base64, contentType } = await encodeImageLocalFileToBase64WithMime( imageSource ); imageUrlObject = { url: `data:${contentType};base64,${base64}`, detail: detailLevel }; } const requestBody = { model: effectiveModel, messages: [ { role: "user", content: [ { type: "text", text: prompt }, { type: "image_url", image_url: imageUrlObject } ] } ], max_tokens }; logger2.debug( `[${callId}] Sending vision analysis request to OpenAI. Model: ${effectiveModel}` ); try { const responseData = await apiQuery2({ url: OPENAI_VISION_API_URL, method: "POST", headers: { Authorization: `Bearer ${apiKey}` }, data: requestBody, summary: `OpenAI Vision Analysis: "${prompt.substring(0, 30)}..."` }); const analysisText = responseData?.choices?.[0]?.message?.content; if (typeof analysisText !== "string") { throw new DaitanApiError2("Invalid response structure from OpenAI Vision API.", "OpenAI Vision", 200, { responseData }); } return { analysis: analysisText, usage: responseData.usage || null, modelUsed: responseData.model || effectiveModel, rawResponse: responseData }; } catch (error) { if (error instanceof DaitanError2) throw error; throw new DaitanApiError2( `OpenAI Vision API request failed: ${error.message}`, "OpenAI Vision", error.httpStatusCode || 500, { modelUsed: effectiveModel }, error ); } }; // src/senses/src/capture.js import { getLogger as getLogger3 } from "@daitanjs/development"; import { DaitanBrowserSpecificError, DaitanOperationError } from "@daitanjs/error"; var captureLogger = getLogger3("daitan-senses-capture"); function ensureBrowserMediaAPIs() { if (typeof window === "undefined" || !navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) { throw new DaitanBrowserSpecificError( "Media capture is only available in a browser environment with support for the MediaDevices API." ); } } var captureAudio = async (options = {}) => { ensureBrowserMediaAPIs(); const { mimeType = "audio/webm;codecs=opus" } = options; try { const stream = await navigator.mediaDevices.getUserMedia({ audio: true, video: false }); const mediaRecorder = new MediaRecorder(stream, { mimeType }); const audioChunks = []; mediaRecorder.addEventListener("dataavailable", (event) => { audioChunks.push(event.data); }); const recordingPromise = new Promise((resolve, reject) => { mediaRecorder.addEventListener("stop", () => { const audioBlob = new Blob(audioChunks, { type: mediaRecorder.mimeType }); const objectURL = URL.createObjectURL(audioBlob); stream.getTracks().forEach((track) => track.stop()); resolve({ blob: audioBlob, objectURL, mimeType: mediaRecorder.mimeType }); }); mediaRecorder.addEventListener("error", (event) => { stream.getTracks().forEach((track) => track.stop()); reject( new DaitanOperationError( `MediaRecorder error: ${event.error.message}` ) ); }); }); const controls = { start: () => mediaRecorder.start(), stop: () => mediaRecorder.stop(), pause: () => mediaRecorder.pause(), resume: () => mediaRecorder.resume(), state: () => mediaRecorder.state }; return { recording: recordingPromise, controls }; } catch (error) { let errorMessage = `Failed to get microphone access: ${error.message}`; if (error.name === "NotAllowedError" || error.name === "PermissionDeniedError") { errorMessage = "Permission to use the microphone was denied. Please allow microphone access in your browser settings."; } else if (error.name === "NotFoundError" || error.name === "DevicesNotFoundError") { errorMessage = "No microphone was found on this device."; } captureLogger.error("Error initializing audio capture.", { errorName: error.name, message: errorMessage }); throw new DaitanOperationError(errorMessage, { originalError: error }); } }; var captureVideo = async (options = {}) => { ensureBrowserMediaAPIs(); const { mimeType = "video/webm;codecs=vp8,opus", includeAudio = true, constraints = { video: true, audio: includeAudio } } = options; try { const stream = await navigator.mediaDevices.getUserMedia(constraints); const mediaRecorder = new MediaRecorder(stream, { mimeType }); const videoChunks = []; mediaRecorder.addEventListener("dataavailable", (event) => { videoChunks.push(event.data); }); const recordingPromise = new Promise((resolve, reject) => { mediaRecorder.addEventListener("stop", () => { const videoBlob = new Blob(videoChunks, { type: mediaRecorder.mimeType }); const objectURL = URL.createObjectURL(videoBlob); stream.getTracks().forEach((track) => track.stop()); resolve({ blob: videoBlob, objectURL, mimeType: mediaRecorder.mimeType }); }); mediaRecorder.addEventListener("error", (event) => { stream.getTracks().forEach((track) => track.stop()); reject( new DaitanOperationError( `MediaRecorder error: ${event.error.message}` ) ); }); }); const controls = { start: () => mediaRecorder.start(), stop: () => mediaRecorder.stop(), pause: () => mediaRecorder.pause(), resume: () => mediaRecorder.resume(), state: () => mediaRecorder.state }; return { recording: recordingPromise, controls }; } catch (error) { let errorMessage = `Failed to get camera/microphone access: ${error.message}`; if (error.name === "NotAllowedError" || error.name === "PermissionDeniedError") { errorMessage = "Permission to use the camera and/or microphone was denied."; } else if (error.name === "NotFoundError" || error.name === "DevicesNotFoundError") { errorMessage = "No camera and/or microphone was found on this device."; } captureLogger.error("Error initializing video capture.", { errorName: error.name, message: errorMessage }); throw new DaitanOperationError(errorMessage, { originalError: error }); } }; // src/senses/src/index.js var sensesIndexLogger = getLogger4("daitan-senses-index"); sensesIndexLogger.debug("Exporting DaitanJS Senses module functionalities..."); sensesIndexLogger.info("DaitanJS Senses module exports ready."); export { analyzeImage, captureAudio, captureVideo, generateImage }; //# sourceMappingURL=index.js.map