@llumiverse/common
Version:
Public types, enums and options used by Llumiverse API.
521 lines (473 loc) • 20.3 kB
text/typescript
import { ModelOptionsInfo, ModelOptionInfoItem, OptionType, SharedOptions, ModelOptions } from "../types.js";
import { textOptionsFallback } from "./fallback.js";
// Union type of all VertexAI options
export type VertexAIOptions = ImagenOptions | VertexAIClaudeOptions | VertexAIGeminiOptions;
export enum ImagenTaskType {
TEXT_IMAGE = "TEXT_IMAGE",
EDIT_MODE_INPAINT_REMOVAL = "EDIT_MODE_INPAINT_REMOVAL",
EDIT_MODE_INPAINT_INSERTION = "EDIT_MODE_INPAINT_INSERTION",
EDIT_MODE_BGSWAP = "EDIT_MODE_BGSWAP",
EDIT_MODE_OUTPAINT = "EDIT_MODE_OUTPAINT",
CUSTOMIZATION_SUBJECT = "CUSTOMIZATION_SUBJECT",
CUSTOMIZATION_STYLE = "CUSTOMIZATION_STYLE",
CUSTOMIZATION_CONTROLLED = "CUSTOMIZATION_CONTROLLED",
CUSTOMIZATION_INSTRUCT = "CUSTOMIZATION_INSTRUCT",
}
export enum ImagenMaskMode {
MASK_MODE_USER_PROVIDED = "MASK_MODE_USER_PROVIDED",
MASK_MODE_BACKGROUND = "MASK_MODE_BACKGROUND",
MASK_MODE_FOREGROUND = "MASK_MODE_FOREGROUND",
MASK_MODE_SEMANTIC = "MASK_MODE_SEMANTIC",
}
export interface ImagenOptions {
_option_id: "vertexai-imagen"
//General and generate options
number_of_images?: number;
seed?: number;
person_generation?: "dont_allow" | "allow_adults" | "allow_all";
safety_setting?: "block_none" | "block_only_high" | "block_medium_and_above" | "block_low_and_above"; //The "off" option does not seem to work for Imagen 3, might be only for text models
image_file_type?: "image/jpeg" | "image/png";
jpeg_compression_quality?: number;
aspect_ratio?: "1:1" | "4:3" | "3:4" | "16:9" | "9:16";
add_watermark?: boolean;
enhance_prompt?: boolean;
//Capability options
edit_mode?: ImagenTaskType
guidance_scale?: number;
edit_steps?: number;
mask_mode?: ImagenMaskMode;
mask_dilation?: number;
mask_class?: number[];
//Customization options
controlType?: "CONTROL_TYPE_FACE_MESH" | "CONTROL_TYPE_CANNY" | "CONTROL_TYPE_SCRIBBLE";
controlImageComputation?: boolean;
subjectType?: "SUBJECT_TYPE_PERSON" | "SUBJECT_TYPE_ANIMAL" | "SUBJECT_TYPE_PRODUCT" | "SUBJECT_TYPE_DEFAULT";
}
export interface VertexAIClaudeOptions {
_option_id: "vertexai-claude"
max_tokens?: number;
temperature?: number;
top_p?: number;
top_k?: number;
stop_sequence?: string[];
thinking_mode?: boolean;
thinking_budget_tokens?: number;
include_thoughts?: boolean;
}
export interface VertexAIGeminiOptions {
_option_id: "vertexai-gemini"
max_tokens?: number;
temperature?: number;
top_p?: number;
top_k?: number;
stop_sequence?: string[];
presence_penalty?: number;
frequency_penalty?: number;
seed?: number;
include_thoughts?: boolean;
thinking_budget_tokens?: number;
image_aspect_ratio?: "1:1" | "2:3" | "3:2" | "3:4" | "4:3" | "9:16" | "16:9" | "21:9";
}
export function getVertexAiOptions(model: string, option?: ModelOptions): ModelOptionsInfo {
if (model.includes("imagen-")) {
return getImagenOptions(model, option);
} else if (model.includes("gemini")) {
return getGeminiOptions(model, option);
} else if (model.includes("claude")) {
return getClaudeOptions(model, option);
} else if (model.includes("llama")) {
return getLlamaOptions(model);
}
return textOptionsFallback;
}
function getImagenOptions(model: string, option?: ModelOptions): ModelOptionsInfo {
const commonOptions: ModelOptionInfoItem[] = [
{
name: SharedOptions.number_of_images, type: OptionType.numeric, min: 1, max: 4, default: 1,
integer: true, description: "Number of Images to generate",
},
{
name: SharedOptions.seed, type: OptionType.numeric, min: 0, max: 4294967295, default: 12,
integer: true, description: "The seed of the generated image"
},
{
name: "person_generation", type: OptionType.enum, enum: { "Disallow the inclusion of people or faces in images": "dont_allow", "Allow generation of adults only": "allow_adult", "Allow generation of people of all ages": "allow_all" },
default: "allow_adult", description: "The safety setting for allowing the generation of people in the image"
},
{
name: "safety_setting", type: OptionType.enum, enum: { "Block very few problematic prompts and responses": "block_none", "Block only few problematic prompts and responses": "block_only_high", "Block some problematic prompts and responses": "block_medium_and_above", "Strictest filtering": "block_low_and_above" },
default: "block_medium_and_above", description: "The overall safety setting"
},
];
const outputOptions: ModelOptionInfoItem[] = [
{
name: "image_file_type", type: OptionType.enum, enum: { "JPEG": "image/jpeg", "PNG": "image/png" },
default: "image/png", description: "The file type of the generated image",
refresh: true,
},
]
const jpegQuality: ModelOptionInfoItem = {
name: "jpeg_compression_quality", type: OptionType.numeric, min: 0, max: 100, default: 75,
integer: true, description: "The compression quality of the JPEG image",
}
if ((option as ImagenOptions)?.image_file_type === "image/jpeg") {
outputOptions.push(jpegQuality);
}
if (model.includes("generate")) {
// Generate models
const modeOptions: ModelOptionInfoItem[] = [
{
name: "aspect_ratio", type: OptionType.enum, enum: { "1:1": "1:1", "4:3": "4:3", "3:4": "3:4", "16:9": "16:9", "9:16": "9:16" },
default: "1:1", description: "The aspect ratio of the generated image"
},
{
name: "add_watermark", type: OptionType.boolean, default: false, description: "Add an invisible watermark to the generated image, useful for detection of AI images"
},
];
const enhanceOptions: ModelOptionInfoItem[] = !model.includes("generate-001") ? [
{
name: "enhance_prompt", type: OptionType.boolean, default: true, description: "VertexAI automatically rewrites the prompt to better reflect the prompt's intent."
},
] : [];
return {
_option_id: "vertexai-imagen",
options: [
...commonOptions,
...modeOptions,
...outputOptions,
...enhanceOptions,
]
};
}
if (model.includes("capability")) {
// Edit models
let guidanceScaleDefault = 75;
if ((option as ImagenOptions)?.edit_mode === ImagenTaskType.EDIT_MODE_INPAINT_INSERTION) {
guidanceScaleDefault = 60;
}
const modeOptions: ModelOptionInfoItem[] = [
{
name: "edit_mode", type: OptionType.enum,
enum: {
"EDIT_MODE_INPAINT_REMOVAL": "EDIT_MODE_INPAINT_REMOVAL",
"EDIT_MODE_INPAINT_INSERTION": "EDIT_MODE_INPAINT_INSERTION",
"EDIT_MODE_BGSWAP": "EDIT_MODE_BGSWAP",
"EDIT_MODE_OUTPAINT": "EDIT_MODE_OUTPAINT",
"CUSTOMIZATION_SUBJECT": "CUSTOMIZATION_SUBJECT",
"CUSTOMIZATION_STYLE": "CUSTOMIZATION_STYLE",
"CUSTOMIZATION_CONTROLLED": "CUSTOMIZATION_CONTROLLED",
"CUSTOMIZATION_INSTRUCT": "CUSTOMIZATION_INSTRUCT",
},
description: "The editing mode. CUSTOMIZATION options use few-shot learning to generate images based on a few examples."
},
{
name: "guidance_scale", type: OptionType.numeric, min: 0, max: 500, default: guidanceScaleDefault,
integer: true, description: "How closely the generation follows the prompt"
}
];
const maskOptions: ModelOptionInfoItem[] = ((option as ImagenOptions)?.edit_mode?.includes("EDIT")) ? [
{
name: "mask_mode", type: OptionType.enum,
enum: {
"MASK_MODE_USER_PROVIDED": "MASK_MODE_USER_PROVIDED",
"MASK_MODE_BACKGROUND": "MASK_MODE_BACKGROUND",
"MASK_MODE_FOREGROUND": "MASK_MODE_FOREGROUND",
"MASK_MODE_SEMANTIC": "MASK_MODE_SEMANTIC",
},
default: "MASK_MODE_USER_PROVIDED",
description: "How should the mask for the generation be provided"
},
{
name: "mask_dilation", type: OptionType.numeric, min: 0, max: 1,
integer: true, description: "The mask dilation, grows the mask by a percentage of image width to compensate for imprecise masks."
},
] : [];
const maskClassOptions: ModelOptionInfoItem[] = ((option as ImagenOptions)?.mask_mode === ImagenMaskMode.MASK_MODE_SEMANTIC) ? [
{
name: "mask_class", type: OptionType.string_list, default: [],
description: "Input Class IDs. Create a mask based on image class, based on https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/imagen-api-customization#segment-ids"
}
] : [];
const editOptions: ModelOptionInfoItem[] = (option as ImagenOptions)?.edit_mode?.includes("EDIT") ? [
{
name: "edit_steps", type: OptionType.numeric, default: 75,
integer: true, description: "The number of steps for the base image generation, more steps means more time and better quality"
},
] : [];
const customizationOptions: ModelOptionInfoItem[] = (option as ImagenOptions)?.edit_mode === ImagenTaskType.CUSTOMIZATION_CONTROLLED
|| (option as ImagenOptions)?.edit_mode === ImagenTaskType.CUSTOMIZATION_SUBJECT ? [
{
name: "controlType", type: OptionType.enum, enum: { "Face Mesh": "CONTROL_TYPE_FACE_MESH", "Canny": "CONTROL_TYPE_CANNY", "Scribble": "CONTROL_TYPE_SCRIBBLE" },
default: "CONTROL_TYPE_CANNY", description: "Method used to generate the control image"
},
{
name: "controlImageComputation", type: OptionType.boolean, default: true, description: "Should the control image be computed from the input image, or is it provided"
}
] : [];
return {
_option_id: "vertexai-imagen",
options: [
...modeOptions,
...commonOptions,
...maskOptions,
...maskClassOptions,
...editOptions,
...customizationOptions,
...outputOptions,
]
};
}
return textOptionsFallback;
}
function getGeminiOptions(model: string, _option?: ModelOptions): ModelOptionsInfo {
// Special handling for gemini-2.5-flash-image
if (model.includes("gemini-2.5-flash-image")) {
const max_tokens_limit = 32768;
const excludeOptions = ["max_tokens", "presence_penalty", "frequency_penalty", "seed", "top_k"];
let commonOptions = textOptionsFallback.options.filter((option) => !excludeOptions.includes(option.name));
// Set max temperature to 2.0 for gemini-2.5-flash-image
commonOptions = commonOptions.map((option) => {
if (
option.name === SharedOptions.temperature &&
option.type === OptionType.numeric
) {
return {
...option,
max: 2.0,
};
}
return option;
});
const max_tokens: ModelOptionInfoItem[] = [{
name: SharedOptions.max_tokens,
type: OptionType.numeric,
min: 1,
max: max_tokens_limit,
integer: true,
step: 200,
description: "Maximum output tokens"
}];
const imageAspectRatio: ModelOptionInfoItem[] = [{
name: "image_aspect_ratio",
type: OptionType.enum,
enum: {
"1:1": "1:1",
"2:3": "2:3",
"3:2": "3:2",
"3:4": "3:4",
"4:3": "4:3",
"9:16": "9:16",
"16:9": "16:9",
"21:9": "21:9"
},
description: "Aspect ratio of the generated images"
}];
return {
_option_id: "vertexai-gemini",
options: [
...max_tokens,
...commonOptions,
...imageAspectRatio,
]
};
}
const max_tokens_limit = getGeminiMaxTokensLimit(model);
const excludeOptions = ["max_tokens"];
const commonOptions = textOptionsFallback.options.filter((option) => !excludeOptions.includes(option.name));
const max_tokens: ModelOptionInfoItem[] = [{
name: SharedOptions.max_tokens, type: OptionType.numeric, min: 1, max: max_tokens_limit,
integer: true, step: 200, description: "The maximum number of tokens to generate"
}];
const seedOption: ModelOptionInfoItem = {
name: SharedOptions.seed, type: OptionType.numeric, integer: true, description: "The seed for the generation, useful for reproducibility"
};
if (model.includes("-2.5-")) {
// Gemini 2.5 thinking models
// Set budget token ranges based on model variant
let budgetMin = -1;
let budgetMax = 24576;
let budgetDescription = "";
if (model.includes("flash-lite")) {
budgetMin = -1;
budgetMax = 24576;
budgetDescription = "The target number of tokens to use for reasoning. " +
"Flash Lite default: Model does not think. " +
"Range: 512-24576 tokens. " +
"Set to 0 to disable thinking, -1 for dynamic thinking.";
} else if (model.includes("flash")) {
budgetMin = -1;
budgetMax = 24576;
budgetDescription = "The target number of tokens to use for reasoning. " +
"Flash default: Dynamic thinking (model decides when and how much to think). " +
"Range: 0-24576 tokens. " +
"Set to 0 to disable thinking, -1 for dynamic thinking.";
} else if (model.includes("pro")) {
budgetMin = -1;
budgetMax = 32768;
budgetDescription = "The target number of tokens to use for reasoning. " +
"Pro default: Dynamic thinking (model decides when and how much to think). " +
"Range: 128-32768 tokens. " +
"Cannot disable thinking - minimum 128 tokens. Set to -1 for dynamic thinking.";
}
const geminiThinkingOptions: ModelOptionInfoItem[] = [
{
name: "include_thoughts",
type: OptionType.boolean,
default: false,
description: "Include the model's reasoning process in the response"
},
{
name: "thinking_budget_tokens",
type: OptionType.numeric,
min: budgetMin,
max: budgetMax,
default: undefined,
integer: true,
step: 100,
description: budgetDescription,
}
];
return {
_option_id: "vertexai-gemini",
options: [
...max_tokens,
...commonOptions,
seedOption,
...geminiThinkingOptions,
]
};
}
return {
_option_id: "vertexai-gemini",
options: [
...max_tokens,
...commonOptions,
seedOption,
]
};
}
function getClaudeOptions(model: string, option?: ModelOptions): ModelOptionsInfo {
const max_tokens_limit = getClaudeMaxTokensLimit(model);
const excludeOptions = ["max_tokens", "presence_penalty", "frequency_penalty"];
const commonOptions = textOptionsFallback.options.filter((option) => !excludeOptions.includes(option.name));
const max_tokens: ModelOptionInfoItem[] = [{
name: SharedOptions.max_tokens, type: OptionType.numeric, min: 1, max: max_tokens_limit,
integer: true, step: 200, description: "The maximum number of tokens to generate"
}];
if (model.includes("-3-7") || model.includes("-4")) {
const claudeModeOptions: ModelOptionInfoItem[] = [
{
name: "thinking_mode",
type: OptionType.boolean,
default: false,
description: "If true, use the extended reasoning mode"
},
];
const claudeThinkingOptions: ModelOptionInfoItem[] = (option as VertexAIClaudeOptions)?.thinking_mode ? [
{
name: "thinking_budget_tokens",
type: OptionType.numeric,
min: 1024,
default: 1024,
integer: true,
step: 100,
description: "The target number of tokens to use for reasoning, not a hard limit."
},
{
name: "include_thoughts",
type: OptionType.boolean,
default: false,
description: "Include the model's reasoning process in the response"
}
] : [];
return {
_option_id: "vertexai-claude",
options: [
...max_tokens,
...commonOptions,
...claudeModeOptions,
...claudeThinkingOptions,
]
};
}
return {
_option_id: "vertexai-claude",
options: [
...max_tokens,
...commonOptions,
]
};
}
function getLlamaOptions(model: string): ModelOptionsInfo {
const max_tokens_limit = getLlamaMaxTokensLimit(model);
const excludeOptions = ["max_tokens", "presence_penalty", "frequency_penalty", "stop_sequence"];
let commonOptions = textOptionsFallback.options.filter((option) => !excludeOptions.includes(option.name));
const max_tokens: ModelOptionInfoItem[] = [{
name: SharedOptions.max_tokens, type: OptionType.numeric, min: 1, max: max_tokens_limit,
integer: true, step: 200, description: "The maximum number of tokens to generate"
}];
// Set max temperature to 1.0 for Llama models
commonOptions = commonOptions.map((option) => {
if (
option.name === SharedOptions.temperature &&
option.type === OptionType.numeric
) {
return {
...option,
max: 1.0,
};
}
return option;
});
return {
_option_id: "text-fallback",
options: [
...max_tokens,
...commonOptions,
]
};
}
function getGeminiMaxTokensLimit(model: string): number {
if (model.includes("gemini-2.5-flash-image")) {
return 32768;
}
if (model.includes("thinking") || model.includes("-2.5-")) {
return 65536;
}
if (model.includes("ultra") || model.includes("vision")) {
return 2048;
}
return 8192;
}
function getClaudeMaxTokensLimit(model: string): number {
if (model.includes("-4-")) {
if (model.includes("opus-")) {
return 32768;
}
return 65536;
}
else if (model.includes("-3-7-")) {
return 128000;
}
else if (model.includes("-3-5-")) {
return 8192;
}
else {
return 4096;
}
}
function getLlamaMaxTokensLimit(_model: string): number {
return 8192;
}
export function getMaxTokensLimitVertexAi(model: string): number {
if (model.includes("imagen-")) {
return 0; // Imagen models do not have a max tokens limit in the same way as text models
} else if (model.includes("claude")) {
return getClaudeMaxTokensLimit(model);
} else if (model.includes("gemini")) {
return getGeminiMaxTokensLimit(model);
} else if (model.includes("llama")) {
return getLlamaMaxTokensLimit(model);
}
return 8192; // Default fallback limit
}