UNPKG

@daitanjs/senses

Version:

A library for AI-powered image generation and analysis using OpenAI.

421 lines (415 loc) 16.3 kB
var __create = Object.create; var __defProp = Object.defineProperty; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropNames = Object.getOwnPropertyNames; var __getProtoOf = Object.getPrototypeOf; var __hasOwnProp = Object.prototype.hasOwnProperty; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps( // If the importer is in node compatibility mode or this is not an ESM // file that has been converted to a CommonJS file using a Babel- // compatible transform (i.e. "__esModule" has not been set), then set // "default" to the CommonJS "module.exports" for node compatibility. isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target, mod )); var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); // src/senses/src/index.js var src_exports = {}; __export(src_exports, { analyzeImage: () => analyzeImage, captureAudio: () => captureAudio, captureVideo: () => captureVideo, generateImage: () => generateImage }); module.exports = __toCommonJS(src_exports); var import_development4 = require("@daitanjs/development"); // src/senses/src/imagegeneration.js var import_development = require("@daitanjs/development"); var import_config = require("@daitanjs/config"); var import_error = require("@daitanjs/error"); var import_apiqueries = require("@daitanjs/apiqueries"); var import_promises = __toESM(require("fs/promises"), 1); var import_path = __toESM(require("path"), 1); var import_buffer = require("buffer"); var logger = (0, import_development.getLogger)("daitan-senses-imagegen"); var OPENAI_IMAGE_GENERATION_API_URL = "https://api.openai.com/v1/images/generations"; var generateImage = async ({ prompt, outputPath, n = 1, size = "1024x1024", response_format = "b64_json", model = "dall-e-3", quality, style, user }) => { const callId = `imageGen-${Date.now().toString(36)}`; logger.info(`[${callId}] generateImage: Initiated.`, { model, n, size, promptPreview: String(prompt).substring(0, 50) + "..." }); if (!prompt || typeof prompt !== "string" || !prompt.trim()) { throw new import_error.DaitanInvalidInputError("Prompt must be a non-empty string."); } if (model === "dall-e-3" && n > 1) { logger.warn(`[${callId}] DALL-E 3 only supports n=1. Setting n to 1.`); n = 1; } const dalle2_sizes = ["256x256", "512x512", "1024x1024"]; const dalle3_sizes = ["1024x1024", "1792x1024", "1024x1792"]; if (model === "dall-e-2" && !dalle2_sizes.includes(size)) { throw new import_error.DaitanInvalidInputError( `DALL-E 2 does not support size ${size}.` ); } if (model === "dall-e-3" && !dalle3_sizes.includes(size)) { throw new import_error.DaitanInvalidInputError( `DALL-E 3 does not support size ${size}.` ); } const configManager = (0, import_config.getConfigManager)(); const apiKey = configManager.getApiKeyForProvider("openai"); if (!apiKey) { throw new import_error.DaitanConfigurationError("OpenAI API key is not configured."); } const requestBody = { prompt, n, size, response_format, model }; if (quality && model === "dall-e-3") requestBody.quality = quality; if (style && model === "dall-e-3") requestBody.style = style; if (user) requestBody.user = user; try { const responseData = await (0, import_apiqueries.query)({ url: OPENAI_IMAGE_GENERATION_API_URL, method: "POST", headers: { Authorization: `Bearer ${apiKey}` }, data: requestBody, summary: `OpenAI Image Generation: ${prompt.substring(0, 30)}` }); if (!responseData?.data?.[0]) { throw new import_error.DaitanApiError( "OpenAI API returned an unexpected successful response structure.", "OpenAI Image Generation" ); } const results = { revisedPrompt: responseData.data.map((item) => item.revised_prompt).filter(Boolean), created: responseData.created, rawData: responseData.data }; if (results.revisedPrompt.length === 1) results.revisedPrompt = results.revisedPrompt[0]; if (results.revisedPrompt.length === 0) delete results.revisedPrompt; if (response_format === "b64_json") { results.base64Data = responseData.data.map((item) => item.b64_json); if (results.base64Data.length === 1) results.base64Data = results.base64Data[0]; if (outputPath && typeof window === "undefined") { const savedPaths = await saveBase64Images( outputPath, results.base64Data ); results.outputPath = savedPaths.length === 1 ? savedPaths[0] : savedPaths; } } else { results.urls = responseData.data.map((item) => item.url); } return results; } catch (error) { logger.error( `[${callId}] Error during OpenAI image generation: ${error.message}` ); if (error instanceof import_error.DaitanError) throw error; throw new import_error.DaitanApiError( `OpenAI image generation failed: ${error.message}`, "OpenAI Image Generation", error.response?.status, { responseData: error.response?.data }, error ); } }; async function saveBase64Images(outputPath, base64Data) { const imagesToSave = Array.isArray(base64Data) ? base64Data : [base64Data]; const savedPaths = []; for (let i = 0; i < imagesToSave.length; i++) { const imageBuffer = import_buffer.Buffer.from(imagesToSave[i], "base64"); let currentOutputPath = outputPath; if (imagesToSave.length > 1) { const ext = import_path.default.extname(outputPath) || ".png"; const base = import_path.default.basename(outputPath, ext); const dir = import_path.default.dirname(outputPath); currentOutputPath = import_path.default.join(dir, `${base}_${i}${ext}`); } try { await import_promises.default.mkdir(import_path.default.dirname(currentOutputPath), { recursive: true }); await import_promises.default.writeFile(currentOutputPath, imageBuffer); savedPaths.push(currentOutputPath); } catch (fileError) { throw new import_error.DaitanFileOperationError( `Failed to save image to ${currentOutputPath}: ${fileError.message}`, { path: currentOutputPath }, fileError ); } } return savedPaths; } // src/senses/src/vision.js var import_promises2 = __toESM(require("fs/promises"), 1); var import_development2 = require("@daitanjs/development"); var import_config2 = require("@daitanjs/config"); var import_error2 = require("@daitanjs/error"); var import_apiqueries2 = require("@daitanjs/apiqueries"); var import_validation = require("@daitanjs/validation"); var logger2 = (0, import_development2.getLogger)("daitan-senses-vision"); var OPENAI_VISION_API_URL = "https://api.openai.com/v1/chat/completions"; var DEFAULT_OPENAI_VISION_MODEL = "gpt-4o-mini"; var encodeImageLocalFileToBase64WithMime = async (imagePath) => { if (typeof window !== "undefined") { throw new import_error2.DaitanConfigurationError( "encodeImageLocalFileToBase64 is for Node.js only." ); } try { const buffer = await import_promises2.default.readFile(imagePath); const mimeTypes = await import("mime-types"); const contentType = mimeTypes.lookup(imagePath) || "application/octet-stream"; return { base64: buffer.toString("base64"), contentType }; } catch (error) { throw new import_error2.DaitanFileOperationError( `Error reading image file "${imagePath}": ${error.message}`, { path: imagePath }, error ); } }; var analyzeImage = async ({ imageSource, prompt = "Describe this image in detail.", model, max_tokens = 500, detailLevel = "auto", llmConfigOptions = {} // Kept for API consistency but not used in this direct call }) => { const callId = `analyzeImage-${Date.now().toString(36)}`; logger2.info(`[${callId}] analyzeImage: Initiated.`); if (!imageSource || typeof imageSource !== "string" || !imageSource.trim()) { throw new import_error2.DaitanInvalidInputError( "Image source (URL, local path, or data URL) must be a non-empty string." ); } const configManager = (0, import_config2.getConfigManager)(); const apiKey = configManager.get("OPENAI_API_KEY"); if (!apiKey) { throw new import_error2.DaitanConfigurationError("OpenAI API key is not configured."); } const effectiveModel = model || configManager.get("OPENAI_VISION_MODEL") || DEFAULT_OPENAI_VISION_MODEL; let imageUrlObject; if (imageSource.startsWith("data:image/")) { imageUrlObject = { url: imageSource, detail: detailLevel }; } else if ((0, import_validation.isValidURL)(imageSource)) { imageUrlObject = { url: imageSource, detail: detailLevel }; } else { const { base64, contentType } = await encodeImageLocalFileToBase64WithMime( imageSource ); imageUrlObject = { url: `data:${contentType};base64,${base64}`, detail: detailLevel }; } const requestBody = { model: effectiveModel, messages: [ { role: "user", content: [ { type: "text", text: prompt }, { type: "image_url", image_url: imageUrlObject } ] } ], max_tokens }; logger2.debug( `[${callId}] Sending vision analysis request to OpenAI. Model: ${effectiveModel}` ); try { const responseData = await (0, import_apiqueries2.query)({ url: OPENAI_VISION_API_URL, method: "POST", headers: { Authorization: `Bearer ${apiKey}` }, data: requestBody, summary: `OpenAI Vision Analysis: "${prompt.substring(0, 30)}..."` }); const analysisText = responseData?.choices?.[0]?.message?.content; if (typeof analysisText !== "string") { throw new import_error2.DaitanApiError("Invalid response structure from OpenAI Vision API.", "OpenAI Vision", 200, { responseData }); } return { analysis: analysisText, usage: responseData.usage || null, modelUsed: responseData.model || effectiveModel, rawResponse: responseData }; } catch (error) { if (error instanceof import_error2.DaitanError) throw error; throw new import_error2.DaitanApiError( `OpenAI Vision API request failed: ${error.message}`, "OpenAI Vision", error.httpStatusCode || 500, { modelUsed: effectiveModel }, error ); } }; // src/senses/src/capture.js var import_development3 = require("@daitanjs/development"); var import_error3 = require("@daitanjs/error"); var captureLogger = (0, import_development3.getLogger)("daitan-senses-capture"); function ensureBrowserMediaAPIs() { if (typeof window === "undefined" || !navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) { throw new import_error3.DaitanBrowserSpecificError( "Media capture is only available in a browser environment with support for the MediaDevices API." ); } } var captureAudio = async (options = {}) => { ensureBrowserMediaAPIs(); const { mimeType = "audio/webm;codecs=opus" } = options; try { const stream = await navigator.mediaDevices.getUserMedia({ audio: true, video: false }); const mediaRecorder = new MediaRecorder(stream, { mimeType }); const audioChunks = []; mediaRecorder.addEventListener("dataavailable", (event) => { audioChunks.push(event.data); }); const recordingPromise = new Promise((resolve, reject) => { mediaRecorder.addEventListener("stop", () => { const audioBlob = new Blob(audioChunks, { type: mediaRecorder.mimeType }); const objectURL = URL.createObjectURL(audioBlob); stream.getTracks().forEach((track) => track.stop()); resolve({ blob: audioBlob, objectURL, mimeType: mediaRecorder.mimeType }); }); mediaRecorder.addEventListener("error", (event) => { stream.getTracks().forEach((track) => track.stop()); reject( new import_error3.DaitanOperationError( `MediaRecorder error: ${event.error.message}` ) ); }); }); const controls = { start: () => mediaRecorder.start(), stop: () => mediaRecorder.stop(), pause: () => mediaRecorder.pause(), resume: () => mediaRecorder.resume(), state: () => mediaRecorder.state }; return { recording: recordingPromise, controls }; } catch (error) { let errorMessage = `Failed to get microphone access: ${error.message}`; if (error.name === "NotAllowedError" || error.name === "PermissionDeniedError") { errorMessage = "Permission to use the microphone was denied. Please allow microphone access in your browser settings."; } else if (error.name === "NotFoundError" || error.name === "DevicesNotFoundError") { errorMessage = "No microphone was found on this device."; } captureLogger.error("Error initializing audio capture.", { errorName: error.name, message: errorMessage }); throw new import_error3.DaitanOperationError(errorMessage, { originalError: error }); } }; var captureVideo = async (options = {}) => { ensureBrowserMediaAPIs(); const { mimeType = "video/webm;codecs=vp8,opus", includeAudio = true, constraints = { video: true, audio: includeAudio } } = options; try { const stream = await navigator.mediaDevices.getUserMedia(constraints); const mediaRecorder = new MediaRecorder(stream, { mimeType }); const videoChunks = []; mediaRecorder.addEventListener("dataavailable", (event) => { videoChunks.push(event.data); }); const recordingPromise = new Promise((resolve, reject) => { mediaRecorder.addEventListener("stop", () => { const videoBlob = new Blob(videoChunks, { type: mediaRecorder.mimeType }); const objectURL = URL.createObjectURL(videoBlob); stream.getTracks().forEach((track) => track.stop()); resolve({ blob: videoBlob, objectURL, mimeType: mediaRecorder.mimeType }); }); mediaRecorder.addEventListener("error", (event) => { stream.getTracks().forEach((track) => track.stop()); reject( new import_error3.DaitanOperationError( `MediaRecorder error: ${event.error.message}` ) ); }); }); const controls = { start: () => mediaRecorder.start(), stop: () => mediaRecorder.stop(), pause: () => mediaRecorder.pause(), resume: () => mediaRecorder.resume(), state: () => mediaRecorder.state }; return { recording: recordingPromise, controls }; } catch (error) { let errorMessage = `Failed to get camera/microphone access: ${error.message}`; if (error.name === "NotAllowedError" || error.name === "PermissionDeniedError") { errorMessage = "Permission to use the camera and/or microphone was denied."; } else if (error.name === "NotFoundError" || error.name === "DevicesNotFoundError") { errorMessage = "No camera and/or microphone was found on this device."; } captureLogger.error("Error initializing video capture.", { errorName: error.name, message: errorMessage }); throw new import_error3.DaitanOperationError(errorMessage, { originalError: error }); } }; // src/senses/src/index.js var sensesIndexLogger = (0, import_development4.getLogger)("daitan-senses-index"); sensesIndexLogger.debug("Exporting DaitanJS Senses module functionalities..."); sensesIndexLogger.info("DaitanJS Senses module exports ready."); // Annotate the CommonJS export names for ESM import in node: 0 && (module.exports = { analyzeImage, captureAudio, captureVideo, generateImage }); //# sourceMappingURL=index.cjs.map