@daitanjs/senses
Version:
A library for AI-powered image generation and analysis using OpenAI.
421 lines (415 loc) • 16.3 kB
JavaScript
var __create = Object.create;
var __defProp = Object.defineProperty;
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
var __getOwnPropNames = Object.getOwnPropertyNames;
var __getProtoOf = Object.getPrototypeOf;
var __hasOwnProp = Object.prototype.hasOwnProperty;
var __export = (target, all) => {
for (var name in all)
__defProp(target, name, { get: all[name], enumerable: true });
};
var __copyProps = (to, from, except, desc) => {
if (from && typeof from === "object" || typeof from === "function") {
for (let key of __getOwnPropNames(from))
if (!__hasOwnProp.call(to, key) && key !== except)
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
}
return to;
};
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
// If the importer is in node compatibility mode or this is not an ESM
// file that has been converted to a CommonJS file using a Babel-
// compatible transform (i.e. "__esModule" has not been set), then set
// "default" to the CommonJS "module.exports" for node compatibility.
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
mod
));
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
// src/senses/src/index.js
var src_exports = {};
__export(src_exports, {
analyzeImage: () => analyzeImage,
captureAudio: () => captureAudio,
captureVideo: () => captureVideo,
generateImage: () => generateImage
});
module.exports = __toCommonJS(src_exports);
var import_development4 = require("@daitanjs/development");
// src/senses/src/imagegeneration.js
var import_development = require("@daitanjs/development");
var import_config = require("@daitanjs/config");
var import_error = require("@daitanjs/error");
var import_apiqueries = require("@daitanjs/apiqueries");
var import_promises = __toESM(require("fs/promises"), 1);
var import_path = __toESM(require("path"), 1);
var import_buffer = require("buffer");
var logger = (0, import_development.getLogger)("daitan-senses-imagegen");
var OPENAI_IMAGE_GENERATION_API_URL = "https://api.openai.com/v1/images/generations";
var generateImage = async ({
prompt,
outputPath,
n = 1,
size = "1024x1024",
response_format = "b64_json",
model = "dall-e-3",
quality,
style,
user
}) => {
const callId = `imageGen-${Date.now().toString(36)}`;
logger.info(`[${callId}] generateImage: Initiated.`, {
model,
n,
size,
promptPreview: String(prompt).substring(0, 50) + "..."
});
if (!prompt || typeof prompt !== "string" || !prompt.trim()) {
throw new import_error.DaitanInvalidInputError("Prompt must be a non-empty string.");
}
if (model === "dall-e-3" && n > 1) {
logger.warn(`[${callId}] DALL-E 3 only supports n=1. Setting n to 1.`);
n = 1;
}
const dalle2_sizes = ["256x256", "512x512", "1024x1024"];
const dalle3_sizes = ["1024x1024", "1792x1024", "1024x1792"];
if (model === "dall-e-2" && !dalle2_sizes.includes(size)) {
throw new import_error.DaitanInvalidInputError(
`DALL-E 2 does not support size ${size}.`
);
}
if (model === "dall-e-3" && !dalle3_sizes.includes(size)) {
throw new import_error.DaitanInvalidInputError(
`DALL-E 3 does not support size ${size}.`
);
}
const configManager = (0, import_config.getConfigManager)();
const apiKey = configManager.getApiKeyForProvider("openai");
if (!apiKey) {
throw new import_error.DaitanConfigurationError("OpenAI API key is not configured.");
}
const requestBody = { prompt, n, size, response_format, model };
if (quality && model === "dall-e-3") requestBody.quality = quality;
if (style && model === "dall-e-3") requestBody.style = style;
if (user) requestBody.user = user;
try {
const responseData = await (0, import_apiqueries.query)({
url: OPENAI_IMAGE_GENERATION_API_URL,
method: "POST",
headers: { Authorization: `Bearer ${apiKey}` },
data: requestBody,
summary: `OpenAI Image Generation: ${prompt.substring(0, 30)}`
});
if (!responseData?.data?.[0]) {
throw new import_error.DaitanApiError(
"OpenAI API returned an unexpected successful response structure.",
"OpenAI Image Generation"
);
}
const results = {
revisedPrompt: responseData.data.map((item) => item.revised_prompt).filter(Boolean),
created: responseData.created,
rawData: responseData.data
};
if (results.revisedPrompt.length === 1)
results.revisedPrompt = results.revisedPrompt[0];
if (results.revisedPrompt.length === 0) delete results.revisedPrompt;
if (response_format === "b64_json") {
results.base64Data = responseData.data.map((item) => item.b64_json);
if (results.base64Data.length === 1)
results.base64Data = results.base64Data[0];
if (outputPath && typeof window === "undefined") {
const savedPaths = await saveBase64Images(
outputPath,
results.base64Data
);
results.outputPath = savedPaths.length === 1 ? savedPaths[0] : savedPaths;
}
} else {
results.urls = responseData.data.map((item) => item.url);
}
return results;
} catch (error) {
logger.error(
`[${callId}] Error during OpenAI image generation: ${error.message}`
);
if (error instanceof import_error.DaitanError) throw error;
throw new import_error.DaitanApiError(
`OpenAI image generation failed: ${error.message}`,
"OpenAI Image Generation",
error.response?.status,
{ responseData: error.response?.data },
error
);
}
};
async function saveBase64Images(outputPath, base64Data) {
const imagesToSave = Array.isArray(base64Data) ? base64Data : [base64Data];
const savedPaths = [];
for (let i = 0; i < imagesToSave.length; i++) {
const imageBuffer = import_buffer.Buffer.from(imagesToSave[i], "base64");
let currentOutputPath = outputPath;
if (imagesToSave.length > 1) {
const ext = import_path.default.extname(outputPath) || ".png";
const base = import_path.default.basename(outputPath, ext);
const dir = import_path.default.dirname(outputPath);
currentOutputPath = import_path.default.join(dir, `${base}_${i}${ext}`);
}
try {
await import_promises.default.mkdir(import_path.default.dirname(currentOutputPath), { recursive: true });
await import_promises.default.writeFile(currentOutputPath, imageBuffer);
savedPaths.push(currentOutputPath);
} catch (fileError) {
throw new import_error.DaitanFileOperationError(
`Failed to save image to ${currentOutputPath}: ${fileError.message}`,
{ path: currentOutputPath },
fileError
);
}
}
return savedPaths;
}
// src/senses/src/vision.js
var import_promises2 = __toESM(require("fs/promises"), 1);
var import_development2 = require("@daitanjs/development");
var import_config2 = require("@daitanjs/config");
var import_error2 = require("@daitanjs/error");
var import_apiqueries2 = require("@daitanjs/apiqueries");
var import_validation = require("@daitanjs/validation");
var logger2 = (0, import_development2.getLogger)("daitan-senses-vision");
var OPENAI_VISION_API_URL = "https://api.openai.com/v1/chat/completions";
var DEFAULT_OPENAI_VISION_MODEL = "gpt-4o-mini";
var encodeImageLocalFileToBase64WithMime = async (imagePath) => {
if (typeof window !== "undefined") {
throw new import_error2.DaitanConfigurationError(
"encodeImageLocalFileToBase64 is for Node.js only."
);
}
try {
const buffer = await import_promises2.default.readFile(imagePath);
const mimeTypes = await import("mime-types");
const contentType = mimeTypes.lookup(imagePath) || "application/octet-stream";
return { base64: buffer.toString("base64"), contentType };
} catch (error) {
throw new import_error2.DaitanFileOperationError(
`Error reading image file "${imagePath}": ${error.message}`,
{ path: imagePath },
error
);
}
};
var analyzeImage = async ({
imageSource,
prompt = "Describe this image in detail.",
model,
max_tokens = 500,
detailLevel = "auto",
llmConfigOptions = {}
// Kept for API consistency but not used in this direct call
}) => {
const callId = `analyzeImage-${Date.now().toString(36)}`;
logger2.info(`[${callId}] analyzeImage: Initiated.`);
if (!imageSource || typeof imageSource !== "string" || !imageSource.trim()) {
throw new import_error2.DaitanInvalidInputError(
"Image source (URL, local path, or data URL) must be a non-empty string."
);
}
const configManager = (0, import_config2.getConfigManager)();
const apiKey = configManager.get("OPENAI_API_KEY");
if (!apiKey) {
throw new import_error2.DaitanConfigurationError("OpenAI API key is not configured.");
}
const effectiveModel = model || configManager.get("OPENAI_VISION_MODEL") || DEFAULT_OPENAI_VISION_MODEL;
let imageUrlObject;
if (imageSource.startsWith("data:image/")) {
imageUrlObject = { url: imageSource, detail: detailLevel };
} else if ((0, import_validation.isValidURL)(imageSource)) {
imageUrlObject = { url: imageSource, detail: detailLevel };
} else {
const { base64, contentType } = await encodeImageLocalFileToBase64WithMime(
imageSource
);
imageUrlObject = { url: `data:${contentType};base64,${base64}`, detail: detailLevel };
}
const requestBody = {
model: effectiveModel,
messages: [
{
role: "user",
content: [
{ type: "text", text: prompt },
{ type: "image_url", image_url: imageUrlObject }
]
}
],
max_tokens
};
logger2.debug(
`[${callId}] Sending vision analysis request to OpenAI. Model: ${effectiveModel}`
);
try {
const responseData = await (0, import_apiqueries2.query)({
url: OPENAI_VISION_API_URL,
method: "POST",
headers: { Authorization: `Bearer ${apiKey}` },
data: requestBody,
summary: `OpenAI Vision Analysis: "${prompt.substring(0, 30)}..."`
});
const analysisText = responseData?.choices?.[0]?.message?.content;
if (typeof analysisText !== "string") {
throw new import_error2.DaitanApiError("Invalid response structure from OpenAI Vision API.", "OpenAI Vision", 200, { responseData });
}
return {
analysis: analysisText,
usage: responseData.usage || null,
modelUsed: responseData.model || effectiveModel,
rawResponse: responseData
};
} catch (error) {
if (error instanceof import_error2.DaitanError) throw error;
throw new import_error2.DaitanApiError(
`OpenAI Vision API request failed: ${error.message}`,
"OpenAI Vision",
error.httpStatusCode || 500,
{ modelUsed: effectiveModel },
error
);
}
};
// src/senses/src/capture.js
var import_development3 = require("@daitanjs/development");
var import_error3 = require("@daitanjs/error");
var captureLogger = (0, import_development3.getLogger)("daitan-senses-capture");
function ensureBrowserMediaAPIs() {
if (typeof window === "undefined" || !navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
throw new import_error3.DaitanBrowserSpecificError(
"Media capture is only available in a browser environment with support for the MediaDevices API."
);
}
}
var captureAudio = async (options = {}) => {
ensureBrowserMediaAPIs();
const { mimeType = "audio/webm;codecs=opus" } = options;
try {
const stream = await navigator.mediaDevices.getUserMedia({
audio: true,
video: false
});
const mediaRecorder = new MediaRecorder(stream, { mimeType });
const audioChunks = [];
mediaRecorder.addEventListener("dataavailable", (event) => {
audioChunks.push(event.data);
});
const recordingPromise = new Promise((resolve, reject) => {
mediaRecorder.addEventListener("stop", () => {
const audioBlob = new Blob(audioChunks, {
type: mediaRecorder.mimeType
});
const objectURL = URL.createObjectURL(audioBlob);
stream.getTracks().forEach((track) => track.stop());
resolve({
blob: audioBlob,
objectURL,
mimeType: mediaRecorder.mimeType
});
});
mediaRecorder.addEventListener("error", (event) => {
stream.getTracks().forEach((track) => track.stop());
reject(
new import_error3.DaitanOperationError(
`MediaRecorder error: ${event.error.message}`
)
);
});
});
const controls = {
start: () => mediaRecorder.start(),
stop: () => mediaRecorder.stop(),
pause: () => mediaRecorder.pause(),
resume: () => mediaRecorder.resume(),
state: () => mediaRecorder.state
};
return { recording: recordingPromise, controls };
} catch (error) {
let errorMessage = `Failed to get microphone access: ${error.message}`;
if (error.name === "NotAllowedError" || error.name === "PermissionDeniedError") {
errorMessage = "Permission to use the microphone was denied. Please allow microphone access in your browser settings.";
} else if (error.name === "NotFoundError" || error.name === "DevicesNotFoundError") {
errorMessage = "No microphone was found on this device.";
}
captureLogger.error("Error initializing audio capture.", {
errorName: error.name,
message: errorMessage
});
throw new import_error3.DaitanOperationError(errorMessage, { originalError: error });
}
};
var captureVideo = async (options = {}) => {
ensureBrowserMediaAPIs();
const {
mimeType = "video/webm;codecs=vp8,opus",
includeAudio = true,
constraints = { video: true, audio: includeAudio }
} = options;
try {
const stream = await navigator.mediaDevices.getUserMedia(constraints);
const mediaRecorder = new MediaRecorder(stream, { mimeType });
const videoChunks = [];
mediaRecorder.addEventListener("dataavailable", (event) => {
videoChunks.push(event.data);
});
const recordingPromise = new Promise((resolve, reject) => {
mediaRecorder.addEventListener("stop", () => {
const videoBlob = new Blob(videoChunks, {
type: mediaRecorder.mimeType
});
const objectURL = URL.createObjectURL(videoBlob);
stream.getTracks().forEach((track) => track.stop());
resolve({
blob: videoBlob,
objectURL,
mimeType: mediaRecorder.mimeType
});
});
mediaRecorder.addEventListener("error", (event) => {
stream.getTracks().forEach((track) => track.stop());
reject(
new import_error3.DaitanOperationError(
`MediaRecorder error: ${event.error.message}`
)
);
});
});
const controls = {
start: () => mediaRecorder.start(),
stop: () => mediaRecorder.stop(),
pause: () => mediaRecorder.pause(),
resume: () => mediaRecorder.resume(),
state: () => mediaRecorder.state
};
return { recording: recordingPromise, controls };
} catch (error) {
let errorMessage = `Failed to get camera/microphone access: ${error.message}`;
if (error.name === "NotAllowedError" || error.name === "PermissionDeniedError") {
errorMessage = "Permission to use the camera and/or microphone was denied.";
} else if (error.name === "NotFoundError" || error.name === "DevicesNotFoundError") {
errorMessage = "No camera and/or microphone was found on this device.";
}
captureLogger.error("Error initializing video capture.", {
errorName: error.name,
message: errorMessage
});
throw new import_error3.DaitanOperationError(errorMessage, { originalError: error });
}
};
// src/senses/src/index.js
var sensesIndexLogger = (0, import_development4.getLogger)("daitan-senses-index");
sensesIndexLogger.debug("Exporting DaitanJS Senses module functionalities...");
sensesIndexLogger.info("DaitanJS Senses module exports ready.");
// Annotate the CommonJS export names for ESM import in node:
0 && (module.exports = {
analyzeImage,
captureAudio,
captureVideo,
generateImage
});
//# sourceMappingURL=index.cjs.map