@daitanjs/senses
Version:
A library for AI-powered image generation and analysis using OpenAI.
399 lines (394 loc) • 14.3 kB
JavaScript
// src/senses/src/index.js
import { getLogger as getLogger4 } from "@daitanjs/development";
// src/senses/src/imagegeneration.js
import { getLogger } from "@daitanjs/development";
import { getConfigManager } from "@daitanjs/config";
import {
DaitanConfigurationError,
DaitanApiError,
DaitanFileOperationError,
DaitanInvalidInputError,
DaitanError
} from "@daitanjs/error";
import { query as apiQuery } from "@daitanjs/apiqueries";
import fs from "fs/promises";
import path from "path";
import { Buffer } from "buffer";
var logger = getLogger("daitan-senses-imagegen");
var OPENAI_IMAGE_GENERATION_API_URL = "https://api.openai.com/v1/images/generations";
var generateImage = async ({
prompt,
outputPath,
n = 1,
size = "1024x1024",
response_format = "b64_json",
model = "dall-e-3",
quality,
style,
user
}) => {
const callId = `imageGen-${Date.now().toString(36)}`;
logger.info(`[${callId}] generateImage: Initiated.`, {
model,
n,
size,
promptPreview: String(prompt).substring(0, 50) + "..."
});
if (!prompt || typeof prompt !== "string" || !prompt.trim()) {
throw new DaitanInvalidInputError("Prompt must be a non-empty string.");
}
if (model === "dall-e-3" && n > 1) {
logger.warn(`[${callId}] DALL-E 3 only supports n=1. Setting n to 1.`);
n = 1;
}
const dalle2_sizes = ["256x256", "512x512", "1024x1024"];
const dalle3_sizes = ["1024x1024", "1792x1024", "1024x1792"];
if (model === "dall-e-2" && !dalle2_sizes.includes(size)) {
throw new DaitanInvalidInputError(
`DALL-E 2 does not support size ${size}.`
);
}
if (model === "dall-e-3" && !dalle3_sizes.includes(size)) {
throw new DaitanInvalidInputError(
`DALL-E 3 does not support size ${size}.`
);
}
const configManager = getConfigManager();
const apiKey = configManager.getApiKeyForProvider("openai");
if (!apiKey) {
throw new DaitanConfigurationError("OpenAI API key is not configured.");
}
const requestBody = { prompt, n, size, response_format, model };
if (quality && model === "dall-e-3") requestBody.quality = quality;
if (style && model === "dall-e-3") requestBody.style = style;
if (user) requestBody.user = user;
try {
const responseData = await apiQuery({
url: OPENAI_IMAGE_GENERATION_API_URL,
method: "POST",
headers: { Authorization: `Bearer ${apiKey}` },
data: requestBody,
summary: `OpenAI Image Generation: ${prompt.substring(0, 30)}`
});
if (!responseData?.data?.[0]) {
throw new DaitanApiError(
"OpenAI API returned an unexpected successful response structure.",
"OpenAI Image Generation"
);
}
const results = {
revisedPrompt: responseData.data.map((item) => item.revised_prompt).filter(Boolean),
created: responseData.created,
rawData: responseData.data
};
if (results.revisedPrompt.length === 1)
results.revisedPrompt = results.revisedPrompt[0];
if (results.revisedPrompt.length === 0) delete results.revisedPrompt;
if (response_format === "b64_json") {
results.base64Data = responseData.data.map((item) => item.b64_json);
if (results.base64Data.length === 1)
results.base64Data = results.base64Data[0];
if (outputPath && typeof window === "undefined") {
const savedPaths = await saveBase64Images(
outputPath,
results.base64Data
);
results.outputPath = savedPaths.length === 1 ? savedPaths[0] : savedPaths;
}
} else {
results.urls = responseData.data.map((item) => item.url);
}
return results;
} catch (error) {
logger.error(
`[${callId}] Error during OpenAI image generation: ${error.message}`
);
if (error instanceof DaitanError) throw error;
throw new DaitanApiError(
`OpenAI image generation failed: ${error.message}`,
"OpenAI Image Generation",
error.response?.status,
{ responseData: error.response?.data },
error
);
}
};
async function saveBase64Images(outputPath, base64Data) {
const imagesToSave = Array.isArray(base64Data) ? base64Data : [base64Data];
const savedPaths = [];
for (let i = 0; i < imagesToSave.length; i++) {
const imageBuffer = Buffer.from(imagesToSave[i], "base64");
let currentOutputPath = outputPath;
if (imagesToSave.length > 1) {
const ext = path.extname(outputPath) || ".png";
const base = path.basename(outputPath, ext);
const dir = path.dirname(outputPath);
currentOutputPath = path.join(dir, `${base}_${i}${ext}`);
}
try {
await fs.mkdir(path.dirname(currentOutputPath), { recursive: true });
await fs.writeFile(currentOutputPath, imageBuffer);
savedPaths.push(currentOutputPath);
} catch (fileError) {
throw new DaitanFileOperationError(
`Failed to save image to ${currentOutputPath}: ${fileError.message}`,
{ path: currentOutputPath },
fileError
);
}
}
return savedPaths;
}
// src/senses/src/vision.js
import fs2 from "fs/promises";
import { getLogger as getLogger2 } from "@daitanjs/development";
import { getConfigManager as getConfigManager2 } from "@daitanjs/config";
import {
DaitanConfigurationError as DaitanConfigurationError2,
DaitanApiError as DaitanApiError2,
DaitanFileOperationError as DaitanFileOperationError2,
DaitanInvalidInputError as DaitanInvalidInputError2,
DaitanError as DaitanError2
} from "@daitanjs/error";
import { query as apiQuery2 } from "@daitanjs/apiqueries";
import { isValidURL } from "@daitanjs/validation";
var logger2 = getLogger2("daitan-senses-vision");
var OPENAI_VISION_API_URL = "https://api.openai.com/v1/chat/completions";
var DEFAULT_OPENAI_VISION_MODEL = "gpt-4o-mini";
var encodeImageLocalFileToBase64WithMime = async (imagePath) => {
if (typeof window !== "undefined") {
throw new DaitanConfigurationError2(
"encodeImageLocalFileToBase64 is for Node.js only."
);
}
try {
const buffer = await fs2.readFile(imagePath);
const mimeTypes = await import("mime-types");
const contentType = mimeTypes.lookup(imagePath) || "application/octet-stream";
return { base64: buffer.toString("base64"), contentType };
} catch (error) {
throw new DaitanFileOperationError2(
`Error reading image file "${imagePath}": ${error.message}`,
{ path: imagePath },
error
);
}
};
var analyzeImage = async ({
imageSource,
prompt = "Describe this image in detail.",
model,
max_tokens = 500,
detailLevel = "auto",
llmConfigOptions = {}
// Kept for API consistency but not used in this direct call
}) => {
const callId = `analyzeImage-${Date.now().toString(36)}`;
logger2.info(`[${callId}] analyzeImage: Initiated.`);
if (!imageSource || typeof imageSource !== "string" || !imageSource.trim()) {
throw new DaitanInvalidInputError2(
"Image source (URL, local path, or data URL) must be a non-empty string."
);
}
const configManager = getConfigManager2();
const apiKey = configManager.get("OPENAI_API_KEY");
if (!apiKey) {
throw new DaitanConfigurationError2("OpenAI API key is not configured.");
}
const effectiveModel = model || configManager.get("OPENAI_VISION_MODEL") || DEFAULT_OPENAI_VISION_MODEL;
let imageUrlObject;
if (imageSource.startsWith("data:image/")) {
imageUrlObject = { url: imageSource, detail: detailLevel };
} else if (isValidURL(imageSource)) {
imageUrlObject = { url: imageSource, detail: detailLevel };
} else {
const { base64, contentType } = await encodeImageLocalFileToBase64WithMime(
imageSource
);
imageUrlObject = { url: `data:${contentType};base64,${base64}`, detail: detailLevel };
}
const requestBody = {
model: effectiveModel,
messages: [
{
role: "user",
content: [
{ type: "text", text: prompt },
{ type: "image_url", image_url: imageUrlObject }
]
}
],
max_tokens
};
logger2.debug(
`[${callId}] Sending vision analysis request to OpenAI. Model: ${effectiveModel}`
);
try {
const responseData = await apiQuery2({
url: OPENAI_VISION_API_URL,
method: "POST",
headers: { Authorization: `Bearer ${apiKey}` },
data: requestBody,
summary: `OpenAI Vision Analysis: "${prompt.substring(0, 30)}..."`
});
const analysisText = responseData?.choices?.[0]?.message?.content;
if (typeof analysisText !== "string") {
throw new DaitanApiError2("Invalid response structure from OpenAI Vision API.", "OpenAI Vision", 200, { responseData });
}
return {
analysis: analysisText,
usage: responseData.usage || null,
modelUsed: responseData.model || effectiveModel,
rawResponse: responseData
};
} catch (error) {
if (error instanceof DaitanError2) throw error;
throw new DaitanApiError2(
`OpenAI Vision API request failed: ${error.message}`,
"OpenAI Vision",
error.httpStatusCode || 500,
{ modelUsed: effectiveModel },
error
);
}
};
// src/senses/src/capture.js
import { getLogger as getLogger3 } from "@daitanjs/development";
import {
DaitanBrowserSpecificError,
DaitanOperationError
} from "@daitanjs/error";
var captureLogger = getLogger3("daitan-senses-capture");
function ensureBrowserMediaAPIs() {
if (typeof window === "undefined" || !navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
throw new DaitanBrowserSpecificError(
"Media capture is only available in a browser environment with support for the MediaDevices API."
);
}
}
var captureAudio = async (options = {}) => {
ensureBrowserMediaAPIs();
const { mimeType = "audio/webm;codecs=opus" } = options;
try {
const stream = await navigator.mediaDevices.getUserMedia({
audio: true,
video: false
});
const mediaRecorder = new MediaRecorder(stream, { mimeType });
const audioChunks = [];
mediaRecorder.addEventListener("dataavailable", (event) => {
audioChunks.push(event.data);
});
const recordingPromise = new Promise((resolve, reject) => {
mediaRecorder.addEventListener("stop", () => {
const audioBlob = new Blob(audioChunks, {
type: mediaRecorder.mimeType
});
const objectURL = URL.createObjectURL(audioBlob);
stream.getTracks().forEach((track) => track.stop());
resolve({
blob: audioBlob,
objectURL,
mimeType: mediaRecorder.mimeType
});
});
mediaRecorder.addEventListener("error", (event) => {
stream.getTracks().forEach((track) => track.stop());
reject(
new DaitanOperationError(
`MediaRecorder error: ${event.error.message}`
)
);
});
});
const controls = {
start: () => mediaRecorder.start(),
stop: () => mediaRecorder.stop(),
pause: () => mediaRecorder.pause(),
resume: () => mediaRecorder.resume(),
state: () => mediaRecorder.state
};
return { recording: recordingPromise, controls };
} catch (error) {
let errorMessage = `Failed to get microphone access: ${error.message}`;
if (error.name === "NotAllowedError" || error.name === "PermissionDeniedError") {
errorMessage = "Permission to use the microphone was denied. Please allow microphone access in your browser settings.";
} else if (error.name === "NotFoundError" || error.name === "DevicesNotFoundError") {
errorMessage = "No microphone was found on this device.";
}
captureLogger.error("Error initializing audio capture.", {
errorName: error.name,
message: errorMessage
});
throw new DaitanOperationError(errorMessage, { originalError: error });
}
};
var captureVideo = async (options = {}) => {
ensureBrowserMediaAPIs();
const {
mimeType = "video/webm;codecs=vp8,opus",
includeAudio = true,
constraints = { video: true, audio: includeAudio }
} = options;
try {
const stream = await navigator.mediaDevices.getUserMedia(constraints);
const mediaRecorder = new MediaRecorder(stream, { mimeType });
const videoChunks = [];
mediaRecorder.addEventListener("dataavailable", (event) => {
videoChunks.push(event.data);
});
const recordingPromise = new Promise((resolve, reject) => {
mediaRecorder.addEventListener("stop", () => {
const videoBlob = new Blob(videoChunks, {
type: mediaRecorder.mimeType
});
const objectURL = URL.createObjectURL(videoBlob);
stream.getTracks().forEach((track) => track.stop());
resolve({
blob: videoBlob,
objectURL,
mimeType: mediaRecorder.mimeType
});
});
mediaRecorder.addEventListener("error", (event) => {
stream.getTracks().forEach((track) => track.stop());
reject(
new DaitanOperationError(
`MediaRecorder error: ${event.error.message}`
)
);
});
});
const controls = {
start: () => mediaRecorder.start(),
stop: () => mediaRecorder.stop(),
pause: () => mediaRecorder.pause(),
resume: () => mediaRecorder.resume(),
state: () => mediaRecorder.state
};
return { recording: recordingPromise, controls };
} catch (error) {
let errorMessage = `Failed to get camera/microphone access: ${error.message}`;
if (error.name === "NotAllowedError" || error.name === "PermissionDeniedError") {
errorMessage = "Permission to use the camera and/or microphone was denied.";
} else if (error.name === "NotFoundError" || error.name === "DevicesNotFoundError") {
errorMessage = "No camera and/or microphone was found on this device.";
}
captureLogger.error("Error initializing video capture.", {
errorName: error.name,
message: errorMessage
});
throw new DaitanOperationError(errorMessage, { originalError: error });
}
};
// src/senses/src/index.js
var sensesIndexLogger = getLogger4("daitan-senses-index");
sensesIndexLogger.debug("Exporting DaitanJS Senses module functionalities...");
sensesIndexLogger.info("DaitanJS Senses module exports ready.");
export {
analyzeImage,
captureAudio,
captureVideo,
generateImage
};
//# sourceMappingURL=index.js.map