UNPKG

@ai-sdk/openai

Version:

The **[OpenAI provider](https://ai-sdk.dev/providers/ai-sdk-providers/openai)** for the [AI SDK](https://ai-sdk.dev/docs) contains language model support for the OpenAI chat and completion APIs and embedding model support for the OpenAI embeddings API.

350 lines (326 loc) 11.4 kB
import { ImageModelV3, ImageModelV3File, SharedV3Warning, } from '@ai-sdk/provider'; import { combineHeaders, convertBase64ToUint8Array, convertToFormData, createJsonResponseHandler, downloadBlob, postFormDataToApi, postJsonToApi, } from '@ai-sdk/provider-utils'; import { OpenAIConfig } from '../openai-config'; import { openaiFailedResponseHandler } from '../openai-error'; import { openaiImageResponseSchema } from './openai-image-api'; import { OpenAIImageModelId, hasDefaultResponseFormat, modelMaxImagesPerCall, } from './openai-image-options'; interface OpenAIImageModelConfig extends OpenAIConfig { _internal?: { currentDate?: () => Date; }; } export class OpenAIImageModel implements ImageModelV3 { readonly specificationVersion = 'v3'; get maxImagesPerCall(): number { return modelMaxImagesPerCall[this.modelId] ?? 1; } get provider(): string { return this.config.provider; } constructor( readonly modelId: OpenAIImageModelId, private readonly config: OpenAIImageModelConfig, ) {} async doGenerate({ prompt, files, mask, n, size, aspectRatio, seed, providerOptions, headers, abortSignal, }: Parameters<ImageModelV3['doGenerate']>[0]): Promise< Awaited<ReturnType<ImageModelV3['doGenerate']>> > { const warnings: Array<SharedV3Warning> = []; if (aspectRatio != null) { warnings.push({ type: 'unsupported', feature: 'aspectRatio', details: 'This model does not support aspect ratio. Use `size` instead.', }); } if (seed != null) { warnings.push({ type: 'unsupported', feature: 'seed' }); } const currentDate = this.config._internal?.currentDate?.() ?? new Date(); if (files != null) { const { value: response, responseHeaders } = await postFormDataToApi({ url: this.config.url({ path: '/images/edits', modelId: this.modelId, }), headers: combineHeaders(this.config.headers(), headers), formData: convertToFormData<OpenAIImageEditInput>({ model: this.modelId, prompt, image: await Promise.all( files.map(file => file.type === 'file' ? new Blob( [ file.data instanceof Uint8Array ? new Blob([file.data as BlobPart], { type: file.mediaType, }) : new Blob([convertBase64ToUint8Array(file.data)], { type: file.mediaType, }), ], { type: file.mediaType }, ) : downloadBlob(file.url), ), ), mask: mask != null ? await fileToBlob(mask) : undefined, n, size, ...(providerOptions.openai ?? {}), }), failedResponseHandler: openaiFailedResponseHandler, successfulResponseHandler: createJsonResponseHandler( openaiImageResponseSchema, ), abortSignal, fetch: this.config.fetch, }); return { images: response.data.map(item => item.b64_json), warnings, usage: response.usage != null ? { inputTokens: response.usage.input_tokens ?? undefined, outputTokens: response.usage.output_tokens ?? undefined, totalTokens: response.usage.total_tokens ?? undefined, } : undefined, response: { timestamp: currentDate, modelId: this.modelId, headers: responseHeaders, }, providerMetadata: { openai: { images: response.data.map((item, index) => ({ ...(item.revised_prompt ? { revisedPrompt: item.revised_prompt } : {}), created: response.created ?? undefined, size: response.size ?? undefined, quality: response.quality ?? undefined, background: response.background ?? undefined, outputFormat: response.output_format ?? undefined, ...distributeTokenDetails( response.usage?.input_tokens_details, index, response.data.length, ), })), }, }, }; } const { value: response, responseHeaders } = await postJsonToApi({ url: this.config.url({ path: '/images/generations', modelId: this.modelId, }), headers: combineHeaders(this.config.headers(), headers), body: { model: this.modelId, prompt, n, size, ...(providerOptions.openai ?? {}), ...(!hasDefaultResponseFormat(this.modelId) ? { response_format: 'b64_json' } : {}), }, failedResponseHandler: openaiFailedResponseHandler, successfulResponseHandler: createJsonResponseHandler( openaiImageResponseSchema, ), abortSignal, fetch: this.config.fetch, }); return { images: response.data.map(item => item.b64_json), warnings, usage: response.usage != null ? { inputTokens: response.usage.input_tokens ?? undefined, outputTokens: response.usage.output_tokens ?? undefined, totalTokens: response.usage.total_tokens ?? undefined, } : undefined, response: { timestamp: currentDate, modelId: this.modelId, headers: responseHeaders, }, providerMetadata: { openai: { images: response.data.map((item, index) => ({ ...(item.revised_prompt ? { revisedPrompt: item.revised_prompt } : {}), created: response.created ?? undefined, size: response.size ?? undefined, quality: response.quality ?? undefined, background: response.background ?? undefined, outputFormat: response.output_format ?? undefined, ...distributeTokenDetails( response.usage?.input_tokens_details, index, response.data.length, ), })), }, }, }; } } /** * Distributes input token details evenly across images, with the remainder * assigned to the last image so that summing across all entries gives the * exact total. */ function distributeTokenDetails( details: | { image_tokens?: number | null; text_tokens?: number | null } | null | undefined, index: number, total: number, ): { imageTokens?: number; textTokens?: number } { if (details == null) { return {}; } const result: { imageTokens?: number; textTokens?: number } = {}; if (details.image_tokens != null) { const base = Math.floor(details.image_tokens / total); const remainder = details.image_tokens - base * (total - 1); result.imageTokens = index === total - 1 ? remainder : base; } if (details.text_tokens != null) { const base = Math.floor(details.text_tokens / total); const remainder = details.text_tokens - base * (total - 1); result.textTokens = index === total - 1 ? remainder : base; } return result; } type OpenAIImageEditInput = { /** * Allows to set transparency for the background of the generated image(s). * This parameter is only supported for `gpt-image-1`. Must be one of * `transparent`, `opaque` or `auto` (default value). When `auto` is used, the * model will automatically determine the best background for the image. * * If `transparent`, the output format needs to support transparency, so it * should be set to either `png` (default value) or `webp`. * */ background?: 'transparent' | 'opaque' | 'auto'; /** * The image(s) to edit. Must be a supported image file or an array of images. * * For `gpt-image-1`, each image should be a `png`, `webp`, or `jpg` file less * than 50MB. You can provide up to 16 images. * * For `dall-e-2`, you can only provide one image, and it should be a square * `png` file less than 4MB. * */ image: Blob | Blob[]; input_fidelity?: ('high' | 'low') | null; /** * An additional image whose fully transparent areas (e.g. where alpha is zero) indicate where `image` should be edited. If there are multiple images provided, the mask will be applied on the first image. Must be a valid PNG file, less than 4MB, and have the same dimensions as `image`. */ mask?: Blob; /** * The model to use for image generation. Only `dall-e-2` and `gpt-image-1` are supported. Defaults to `dall-e-2` unless a parameter specific to `gpt-image-1` is used. */ model?: 'dall-e-2' | 'gpt-image-1' | 'gpt-image-1-mini' | (string & {}); /** * The number of images to generate. Must be between 1 and 10. */ n?: number; /** * The compression level (0-100%) for the generated images. This parameter * is only supported for `gpt-image-1` with the `webp` or `jpeg` output * formats, and defaults to 100. * */ output_compression?: number; /** * The format in which the generated images are returned. This parameter is * only supported for `gpt-image-1`. Must be one of `png`, `jpeg`, or `webp`. * The default value is `png`. * */ output_format?: 'png' | 'jpeg' | 'webp'; partial_images?: number | null; /** * A text description of the desired image(s). The maximum length is 1000 characters for `dall-e-2`, and 32000 characters for `gpt-image-1`. */ prompt?: string; /** * The quality of the image that will be generated. `high`, `medium` and `low` are only supported for `gpt-image-1`. `dall-e-2` only supports `standard` quality. Defaults to `auto`. * */ quality?: 'standard' | 'low' | 'medium' | 'high' | 'auto'; /** * The format in which the generated images are returned. Must be one of `url` or `b64_json`. URLs are only valid for 60 minutes after the image has been generated. This parameter is only supported for `dall-e-2`, as `gpt-image-1` will always return base64-encoded images. */ response_format?: 'url' | 'b64_json'; /** * The size of the generated images. Must be one of `1024x1024`, `1536x1024` (landscape), `1024x1536` (portrait), or `auto` (default value) for `gpt-image-1`, and one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`. */ size?: `${number}x${number}`; /** * Edit the image in streaming mode. Defaults to `false`. See the * [Image generation guide](https://platform.openai.com/docs/guides/image-generation) for more information. * */ stream?: boolean; /** * A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids). * */ user?: string; }; async function fileToBlob( file: ImageModelV3File | undefined, ): Promise<Blob | undefined> { if (!file) return undefined; if (file.type === 'url') { return downloadBlob(file.url); } const data = file.data instanceof Uint8Array ? file.data : convertBase64ToUint8Array(file.data); return new Blob([data as BlobPart], { type: file.mediaType }); }