@pulumi/databricks
Version:
A Pulumi package for creating and managing databricks cloud resources.
236 lines • 9.02 kB
JavaScript
;
// *** WARNING: this file was generated by pulumi-language-nodejs. ***
// *** Do not edit by hand unless you're certain you know what you are doing! ***
Object.defineProperty(exports, "__esModule", { value: true });
exports.ModelServing = void 0;
const pulumi = require("@pulumi/pulumi");
const utilities = require("./utilities");
/**
* This resource allows you to manage [Model Serving](https://docs.databricks.com/machine-learning/model-serving/index.html) endpoints in Databricks, including custom models, external models, and foundation models. For newer foundation models, including Llama 4, please use the databricks.ModelServingProvisionedThroughput resource.
*
* > This resource can only be used with a workspace-level provider!
*
* > If you replace `servedModels` with `servedEntities` in an existing serving endpoint, the serving endpoint will briefly go into an update state (~30 seconds) and increment the config version.
*
* ## Example Usage
*
* Creating a CPU serving endpoint
*
* ```typescript
* import * as pulumi from "@pulumi/pulumi";
* import * as databricks from "@pulumi/databricks";
*
* const _this = new databricks.ModelServing("this", {
* name: "ads-serving-endpoint",
* config: {
* servedEntities: [
* {
* name: "prod_model",
* entityName: "ads-model",
* entityVersion: "2",
* workloadSize: "Small",
* scaleToZeroEnabled: true,
* },
* {
* name: "candidate_model",
* entityName: "ads-model",
* entityVersion: "4",
* workloadSize: "Small",
* scaleToZeroEnabled: false,
* },
* ],
* trafficConfig: {
* routes: [
* {
* servedModelName: "prod_model",
* trafficPercentage: 90,
* },
* {
* servedModelName: "candidate_model",
* trafficPercentage: 10,
* },
* ],
* },
* },
* });
* ```
*
* Creating a Foundation Model endpoint
*
* ```typescript
* import * as pulumi from "@pulumi/pulumi";
* import * as databricks from "@pulumi/databricks";
*
* const llama = new databricks.ModelServing("llama", {
* name: "llama_3_2_3b_instruct",
* aiGateway: {
* usageTrackingConfig: {
* enabled: true,
* },
* },
* config: {
* servedEntities: [{
* name: "meta_llama_v3_2_3b_instruct-3",
* entityName: "system.ai.llama_v3_2_3b_instruct",
* entityVersion: "2",
* scaleToZeroEnabled: true,
* maxProvisionedThroughput: 44000,
* }],
* },
* });
* ```
*
* Creating an External Model endpoint
*
* ```typescript
* import * as pulumi from "@pulumi/pulumi";
* import * as databricks from "@pulumi/databricks";
*
* const gpt4o = new databricks.ModelServing("gpt_4o", {
* name: "gpt-4o-mini",
* aiGateway: {
* usageTrackingConfig: {
* enabled: true,
* },
* rateLimits: [{
* calls: 10,
* key: "endpoint",
* renewalPeriod: "minute",
* }],
* inferenceTableConfig: {
* enabled: true,
* tableNamePrefix: "gpt-4o-mini",
* catalogName: "ml",
* schemaName: "ai_gateway",
* },
* guardrails: {
* input: {
* invalidKeywords: ["SuperSecretProject"],
* pii: {
* behavior: "BLOCK",
* },
* },
* output: {
* pii: {
* behavior: "BLOCK",
* },
* },
* },
* },
* config: {
* servedEntities: [{
* name: "gpt-4o-mini",
* externalModel: {
* name: "gpt-4o-mini",
* provider: "openai",
* task: "llm/v1/chat",
* openaiConfig: {
* openaiApiKey: "{{secrets/llm_scope/openai_api_key}}",
* },
* },
* }],
* },
* });
* ```
*
* ## Access Control
*
* * databricks.Permissions can control which groups or individual users can *Manage*, *Query* or *View* individual serving endpoints.
*
* ## Related Resources
*
* The following resources are often used in the same context:
*
* * databricks.ModelServingProvisionedThroughput to create [Foundation Model provisioned throughput](https://docs.databricks.com/aws/en/machine-learning/foundation-model-apis/deploy-prov-throughput-foundation-model-apis) endpoints in Databricks.
* * databricks.RegisteredModel to create [Models in Unity Catalog](https://docs.databricks.com/en/mlflow/models-in-uc.html) in Databricks.
* * End to end workspace management guide.
* * databricks.Directory to manage directories in [Databricks Workspace](https://docs.databricks.com/workspace/workspace-objects.html).
* * databricks.MlflowModel to create models in the [workspace model registry](https://docs.databricks.com/en/mlflow/model-registry.html) in Databricks.
* * databricks.Notebook to manage [Databricks Notebooks](https://docs.databricks.com/notebooks/index.html).
* * databricks.Notebook data to export a notebook from Databricks Workspace.
* * databricks.Repo to manage [Databricks Repos](https://docs.databricks.com/repos.html).
*
* ## Import
*
* The model serving resource can be imported using the name of the endpoint.
*
* hcl
*
* import {
*
* to = databricks_model_serving.this
*
* id = "<model-serving-endpoint-name>"
*
* }
*
* Alternatively, when using `terraform` version 1.4 or earlier, import using the `pulumi import` command:
*
* bash
*
* ```sh
* $ pulumi import databricks:index/modelServing:ModelServing this <model-serving-endpoint-name>
* ```
*/
class ModelServing extends pulumi.CustomResource {
/**
* Get an existing ModelServing resource's state with the given name, ID, and optional extra
* properties used to qualify the lookup.
*
* @param name The _unique_ name of the resulting resource.
* @param id The _unique_ provider ID of the resource to lookup.
* @param state Any extra arguments used during the lookup.
* @param opts Optional settings to control the behavior of the CustomResource.
*/
static get(name, id, state, opts) {
return new ModelServing(name, state, { ...opts, id: id });
}
/**
* Returns true if the given object is an instance of ModelServing. This is designed to work even
* when multiple copies of the Pulumi SDK have been loaded into the same process.
*/
static isInstance(obj) {
if (obj === undefined || obj === null) {
return false;
}
return obj['__pulumiType'] === ModelServing.__pulumiType;
}
constructor(name, argsOrState, opts) {
let resourceInputs = {};
opts = opts || {};
if (opts.id) {
const state = argsOrState;
resourceInputs["aiGateway"] = state?.aiGateway;
resourceInputs["budgetPolicyId"] = state?.budgetPolicyId;
resourceInputs["config"] = state?.config;
resourceInputs["description"] = state?.description;
resourceInputs["emailNotifications"] = state?.emailNotifications;
resourceInputs["endpointUrl"] = state?.endpointUrl;
resourceInputs["name"] = state?.name;
resourceInputs["rateLimits"] = state?.rateLimits;
resourceInputs["routeOptimized"] = state?.routeOptimized;
resourceInputs["servingEndpointId"] = state?.servingEndpointId;
resourceInputs["tags"] = state?.tags;
}
else {
const args = argsOrState;
resourceInputs["aiGateway"] = args?.aiGateway;
resourceInputs["budgetPolicyId"] = args?.budgetPolicyId;
resourceInputs["config"] = args?.config;
resourceInputs["description"] = args?.description;
resourceInputs["emailNotifications"] = args?.emailNotifications;
resourceInputs["name"] = args?.name;
resourceInputs["rateLimits"] = args?.rateLimits;
resourceInputs["routeOptimized"] = args?.routeOptimized;
resourceInputs["tags"] = args?.tags;
resourceInputs["endpointUrl"] = undefined /*out*/;
resourceInputs["servingEndpointId"] = undefined /*out*/;
}
opts = pulumi.mergeOptions(utilities.resourceOptsDefaults(), opts);
super(ModelServing.__pulumiType, name, resourceInputs, opts);
}
}
exports.ModelServing = ModelServing;
/** @internal */
ModelServing.__pulumiType = 'databricks:index/modelServing:ModelServing';
//# sourceMappingURL=modelServing.js.map