@pulumi/databricks

import * as pulumi from "@pulumi/pulumi"; import * as inputs from "./types/input"; import * as outputs from "./types/output"; /** * This resource allows you to manage [Model Serving](https://docs.databricks.com/machine-learning/model-serving/index.html) endpoints in Databricks, including custom models, external models, and foundation models. For newer foundation models, including Llama 4, please use the databricks.ModelServingProvisionedThroughput resource. * * > This resource can only be used with a workspace-level provider! * * > If you replace `servedModels` with `servedEntities` in an existing serving endpoint, the serving endpoint will briefly go into an update state (~30 seconds) and increment the config version. * * ## Example Usage * * Creating a CPU serving endpoint * * ```typescript * import * as pulumi from "@pulumi/pulumi"; * import * as databricks from "@pulumi/databricks"; * * const _this = new databricks.ModelServing("this", { * name: "ads-serving-endpoint", * config: { * servedEntities: [ * { * name: "prod_model", * entityName: "ads-model", * entityVersion: "2", * workloadSize: "Small", * scaleToZeroEnabled: true, * }, * { * name: "candidate_model", * entityName: "ads-model", * entityVersion: "4", * workloadSize: "Small", * scaleToZeroEnabled: false, * }, * ], * trafficConfig: { * routes: [ * { * servedModelName: "prod_model", * trafficPercentage: 90, * }, * { * servedModelName: "candidate_model", * trafficPercentage: 10, * }, * ], * }, * }, * }); * ``` * * Creating a Foundation Model endpoint * * ```typescript * import * as pulumi from "@pulumi/pulumi"; * import * as databricks from "@pulumi/databricks"; * * const llama = new databricks.ModelServing("llama", { * name: "llama_3_2_3b_instruct", * aiGateway: { * usageTrackingConfig: { * enabled: true, * }, * }, * config: { * servedEntities: [{ * name: "meta_llama_v3_2_3b_instruct-3", * entityName: "system.ai.llama_v3_2_3b_instruct", * entityVersion: "2", * scaleToZeroEnabled: true, * maxProvisionedThroughput: 44000, * }], * }, * }); * ``` * * Creating an External Model endpoint * * ```typescript * import * as pulumi from "@pulumi/pulumi"; * import * as databricks from "@pulumi/databricks"; * * const gpt4o = new databricks.ModelServing("gpt_4o", { * name: "gpt-4o-mini", * aiGateway: { * usageTrackingConfig: { * enabled: true, * }, * rateLimits: [{ * calls: 10, * key: "endpoint", * renewalPeriod: "minute", * }], * inferenceTableConfig: { * enabled: true, * tableNamePrefix: "gpt-4o-mini", * catalogName: "ml", * schemaName: "ai_gateway", * }, * guardrails: { * input: { * invalidKeywords: ["SuperSecretProject"], * pii: { * behavior: "BLOCK", * }, * }, * output: { * pii: { * behavior: "BLOCK", * }, * }, * }, * }, * config: { * servedEntities: [{ * name: "gpt-4o-mini", * externalModel: { * name: "gpt-4o-mini", * provider: "openai", * task: "llm/v1/chat", * openaiConfig: { * openaiApiKey: "{{secrets/llm_scope/openai_api_key}}", * }, * }, * }], * }, * }); * ``` * * ## Access Control * * * databricks.Permissions can control which groups or individual users can *Manage*, *Query* or *View* individual serving endpoints. * * ## Related Resources * * The following resources are often used in the same context: * * * databricks.ModelServingProvisionedThroughput to create [Foundation Model provisioned throughput](https://docs.databricks.com/aws/en/machine-learning/foundation-model-apis/deploy-prov-throughput-foundation-model-apis) endpoints in Databricks. * * databricks.RegisteredModel to create [Models in Unity Catalog](https://docs.databricks.com/en/mlflow/models-in-uc.html) in Databricks. * * End to end workspace management guide. * * databricks.Directory to manage directories in [Databricks Workspace](https://docs.databricks.com/workspace/workspace-objects.html). * * databricks.MlflowModel to create models in the [workspace model registry](https://docs.databricks.com/en/mlflow/model-registry.html) in Databricks. * * databricks.Notebook to manage [Databricks Notebooks](https://docs.databricks.com/notebooks/index.html). * * databricks.Notebook data to export a notebook from Databricks Workspace. * * databricks.Repo to manage [Databricks Repos](https://docs.databricks.com/repos.html). * * ## Import * * The model serving resource can be imported using the name of the endpoint. * * hcl * * import { * * to = databricks_model_serving.this * * id = "<model-serving-endpoint-name>" * * } * * Alternatively, when using `terraform` version 1.4 or earlier, import using the `pulumi import` command: * * bash * * ```sh * $ pulumi import databricks:index/modelServing:ModelServing this <model-serving-endpoint-name> * ``` */ export declare class ModelServing extends pulumi.CustomResource { /** * Get an existing ModelServing resource's state with the given name, ID, and optional extra * properties used to qualify the lookup. * * @param name The _unique_ name of the resulting resource. * @param id The _unique_ provider ID of the resource to lookup. * @param state Any extra arguments used during the lookup. * @param opts Optional settings to control the behavior of the CustomResource. */ static get(name: string, id: pulumi.Input<pulumi.ID>, state?: ModelServingState, opts?: pulumi.CustomResourceOptions): ModelServing; /** * Returns true if the given object is an instance of ModelServing. This is designed to work even * when multiple copies of the Pulumi SDK have been loaded into the same process. */ static isInstance(obj: any): obj is ModelServing; /** * A block with AI Gateway configuration for the serving endpoint. *Note: only external model endpoints are supported as of now.* */ readonly aiGateway: pulumi.Output<outputs.ModelServingAiGateway | undefined>; /** * The Budget Policy ID set for this serving endpoint. */ readonly budgetPolicyId: pulumi.Output<string | undefined>; /** * The model serving endpoint configuration. This is optional and can be added and modified after creation. If `config` was provided in a previous apply but is not provided in the current apply, no change to the model serving endpoint will occur. To recreate the model serving endpoint without the `config` block, the model serving endpoint must be destroyed and recreated. */ readonly config: pulumi.Output<outputs.ModelServingConfig>; /** * The description of the model serving endpoint. */ readonly description: pulumi.Output<string | undefined>; /** * A block with Email notification setting. */ readonly emailNotifications: pulumi.Output<outputs.ModelServingEmailNotifications | undefined>; /** * Invocation url of the endpoint. */ readonly endpointUrl: pulumi.Output<string>; /** * The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the updated name. */ readonly name: pulumi.Output<string>; /** * A list of rate limit blocks to be applied to the serving endpoint. *Note: only external and foundation model endpoints are supported as of now.* * * @deprecated Please use AI Gateway to manage rate limits. */ readonly rateLimits: pulumi.Output<outputs.ModelServingRateLimit[] | undefined>; /** * A boolean enabling route optimization for the endpoint. *Note: only available for custom models.* */ readonly routeOptimized: pulumi.Output<boolean | undefined>; /** * Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations. */ readonly servingEndpointId: pulumi.Output<string>; /** * Tags to be attached to the serving endpoint and automatically propagated to billing logs. */ readonly tags: pulumi.Output<outputs.ModelServingTag[] | undefined>; /** * Create a ModelServing resource with the given unique name, arguments, and options. * * @param name The _unique_ name of the resource. * @param args The arguments to use to populate this resource's properties. * @param opts A bag of options that control this resource's behavior. */ constructor(name: string, args?: ModelServingArgs, opts?: pulumi.CustomResourceOptions); } /** * Input properties used for looking up and filtering ModelServing resources. */ export interface ModelServingState { /** * A block with AI Gateway configuration for the serving endpoint. *Note: only external model endpoints are supported as of now.* */ aiGateway?: pulumi.Input<inputs.ModelServingAiGateway>; /** * The Budget Policy ID set for this serving endpoint. */ budgetPolicyId?: pulumi.Input<string>; /** * The model serving endpoint configuration. This is optional and can be added and modified after creation. If `config` was provided in a previous apply but is not provided in the current apply, no change to the model serving endpoint will occur. To recreate the model serving endpoint without the `config` block, the model serving endpoint must be destroyed and recreated. */ config?: pulumi.Input<inputs.ModelServingConfig>; /** * The description of the model serving endpoint. */ description?: pulumi.Input<string>; /** * A block with Email notification setting. */ emailNotifications?: pulumi.Input<inputs.ModelServingEmailNotifications>; /** * Invocation url of the endpoint. */ endpointUrl?: pulumi.Input<string>; /** * The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the updated name. */ name?: pulumi.Input<string>; /** * A list of rate limit blocks to be applied to the serving endpoint. *Note: only external and foundation model endpoints are supported as of now.* * * @deprecated Please use AI Gateway to manage rate limits. */ rateLimits?: pulumi.Input<pulumi.Input<inputs.ModelServingRateLimit>[]>; /** * A boolean enabling route optimization for the endpoint. *Note: only available for custom models.* */ routeOptimized?: pulumi.Input<boolean>; /** * Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations. */ servingEndpointId?: pulumi.Input<string>; /** * Tags to be attached to the serving endpoint and automatically propagated to billing logs. */ tags?: pulumi.Input<pulumi.Input<inputs.ModelServingTag>[]>; } /** * The set of arguments for constructing a ModelServing resource. */ export interface ModelServingArgs { /** * A block with AI Gateway configuration for the serving endpoint. *Note: only external model endpoints are supported as of now.* */ aiGateway?: pulumi.Input<inputs.ModelServingAiGateway>; /** * The Budget Policy ID set for this serving endpoint. */ budgetPolicyId?: pulumi.Input<string>; /** * The model serving endpoint configuration. This is optional and can be added and modified after creation. If `config` was provided in a previous apply but is not provided in the current apply, no change to the model serving endpoint will occur. To recreate the model serving endpoint without the `config` block, the model serving endpoint must be destroyed and recreated. */ config?: pulumi.Input<inputs.ModelServingConfig>; /** * The description of the model serving endpoint. */ description?: pulumi.Input<string>; /** * A block with Email notification setting. */ emailNotifications?: pulumi.Input<inputs.ModelServingEmailNotifications>; /** * The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the updated name. */ name?: pulumi.Input<string>; /** * A list of rate limit blocks to be applied to the serving endpoint. *Note: only external and foundation model endpoints are supported as of now.* * * @deprecated Please use AI Gateway to manage rate limits. */ rateLimits?: pulumi.Input<pulumi.Input<inputs.ModelServingRateLimit>[]>; /** * A boolean enabling route optimization for the endpoint. *Note: only available for custom models.* */ routeOptimized?: pulumi.Input<boolean>; /** * Tags to be attached to the serving endpoint and automatically propagated to billing logs. */ tags?: pulumi.Input<pulumi.Input<inputs.ModelServingTag>[]>; }