UNPKG

@pulumi/databricks

Version:

A Pulumi package for creating and managing databricks cloud resources.

236 lines 9.02 kB
"use strict"; // *** WARNING: this file was generated by pulumi-language-nodejs. *** // *** Do not edit by hand unless you're certain you know what you are doing! *** Object.defineProperty(exports, "__esModule", { value: true }); exports.ModelServing = void 0; const pulumi = require("@pulumi/pulumi"); const utilities = require("./utilities"); /** * This resource allows you to manage [Model Serving](https://docs.databricks.com/machine-learning/model-serving/index.html) endpoints in Databricks, including custom models, external models, and foundation models. For newer foundation models, including Llama 4, please use the databricks.ModelServingProvisionedThroughput resource. * * > This resource can only be used with a workspace-level provider! * * > If you replace `servedModels` with `servedEntities` in an existing serving endpoint, the serving endpoint will briefly go into an update state (~30 seconds) and increment the config version. * * ## Example Usage * * Creating a CPU serving endpoint * * ```typescript * import * as pulumi from "@pulumi/pulumi"; * import * as databricks from "@pulumi/databricks"; * * const _this = new databricks.ModelServing("this", { * name: "ads-serving-endpoint", * config: { * servedEntities: [ * { * name: "prod_model", * entityName: "ads-model", * entityVersion: "2", * workloadSize: "Small", * scaleToZeroEnabled: true, * }, * { * name: "candidate_model", * entityName: "ads-model", * entityVersion: "4", * workloadSize: "Small", * scaleToZeroEnabled: false, * }, * ], * trafficConfig: { * routes: [ * { * servedModelName: "prod_model", * trafficPercentage: 90, * }, * { * servedModelName: "candidate_model", * trafficPercentage: 10, * }, * ], * }, * }, * }); * ``` * * Creating a Foundation Model endpoint * * ```typescript * import * as pulumi from "@pulumi/pulumi"; * import * as databricks from "@pulumi/databricks"; * * const llama = new databricks.ModelServing("llama", { * name: "llama_3_2_3b_instruct", * aiGateway: { * usageTrackingConfig: { * enabled: true, * }, * }, * config: { * servedEntities: [{ * name: "meta_llama_v3_2_3b_instruct-3", * entityName: "system.ai.llama_v3_2_3b_instruct", * entityVersion: "2", * scaleToZeroEnabled: true, * maxProvisionedThroughput: 44000, * }], * }, * }); * ``` * * Creating an External Model endpoint * * ```typescript * import * as pulumi from "@pulumi/pulumi"; * import * as databricks from "@pulumi/databricks"; * * const gpt4o = new databricks.ModelServing("gpt_4o", { * name: "gpt-4o-mini", * aiGateway: { * usageTrackingConfig: { * enabled: true, * }, * rateLimits: [{ * calls: 10, * key: "endpoint", * renewalPeriod: "minute", * }], * inferenceTableConfig: { * enabled: true, * tableNamePrefix: "gpt-4o-mini", * catalogName: "ml", * schemaName: "ai_gateway", * }, * guardrails: { * input: { * invalidKeywords: ["SuperSecretProject"], * pii: { * behavior: "BLOCK", * }, * }, * output: { * pii: { * behavior: "BLOCK", * }, * }, * }, * }, * config: { * servedEntities: [{ * name: "gpt-4o-mini", * externalModel: { * name: "gpt-4o-mini", * provider: "openai", * task: "llm/v1/chat", * openaiConfig: { * openaiApiKey: "{{secrets/llm_scope/openai_api_key}}", * }, * }, * }], * }, * }); * ``` * * ## Access Control * * * databricks.Permissions can control which groups or individual users can *Manage*, *Query* or *View* individual serving endpoints. * * ## Related Resources * * The following resources are often used in the same context: * * * databricks.ModelServingProvisionedThroughput to create [Foundation Model provisioned throughput](https://docs.databricks.com/aws/en/machine-learning/foundation-model-apis/deploy-prov-throughput-foundation-model-apis) endpoints in Databricks. * * databricks.RegisteredModel to create [Models in Unity Catalog](https://docs.databricks.com/en/mlflow/models-in-uc.html) in Databricks. * * End to end workspace management guide. * * databricks.Directory to manage directories in [Databricks Workspace](https://docs.databricks.com/workspace/workspace-objects.html). * * databricks.MlflowModel to create models in the [workspace model registry](https://docs.databricks.com/en/mlflow/model-registry.html) in Databricks. * * databricks.Notebook to manage [Databricks Notebooks](https://docs.databricks.com/notebooks/index.html). * * databricks.Notebook data to export a notebook from Databricks Workspace. * * databricks.Repo to manage [Databricks Repos](https://docs.databricks.com/repos.html). * * ## Import * * The model serving resource can be imported using the name of the endpoint. * * hcl * * import { * * to = databricks_model_serving.this * * id = "<model-serving-endpoint-name>" * * } * * Alternatively, when using `terraform` version 1.4 or earlier, import using the `pulumi import` command: * * bash * * ```sh * $ pulumi import databricks:index/modelServing:ModelServing this <model-serving-endpoint-name> * ``` */ class ModelServing extends pulumi.CustomResource { /** * Get an existing ModelServing resource's state with the given name, ID, and optional extra * properties used to qualify the lookup. * * @param name The _unique_ name of the resulting resource. * @param id The _unique_ provider ID of the resource to lookup. * @param state Any extra arguments used during the lookup. * @param opts Optional settings to control the behavior of the CustomResource. */ static get(name, id, state, opts) { return new ModelServing(name, state, { ...opts, id: id }); } /** * Returns true if the given object is an instance of ModelServing. This is designed to work even * when multiple copies of the Pulumi SDK have been loaded into the same process. */ static isInstance(obj) { if (obj === undefined || obj === null) { return false; } return obj['__pulumiType'] === ModelServing.__pulumiType; } constructor(name, argsOrState, opts) { let resourceInputs = {}; opts = opts || {}; if (opts.id) { const state = argsOrState; resourceInputs["aiGateway"] = state?.aiGateway; resourceInputs["budgetPolicyId"] = state?.budgetPolicyId; resourceInputs["config"] = state?.config; resourceInputs["description"] = state?.description; resourceInputs["emailNotifications"] = state?.emailNotifications; resourceInputs["endpointUrl"] = state?.endpointUrl; resourceInputs["name"] = state?.name; resourceInputs["rateLimits"] = state?.rateLimits; resourceInputs["routeOptimized"] = state?.routeOptimized; resourceInputs["servingEndpointId"] = state?.servingEndpointId; resourceInputs["tags"] = state?.tags; } else { const args = argsOrState; resourceInputs["aiGateway"] = args?.aiGateway; resourceInputs["budgetPolicyId"] = args?.budgetPolicyId; resourceInputs["config"] = args?.config; resourceInputs["description"] = args?.description; resourceInputs["emailNotifications"] = args?.emailNotifications; resourceInputs["name"] = args?.name; resourceInputs["rateLimits"] = args?.rateLimits; resourceInputs["routeOptimized"] = args?.routeOptimized; resourceInputs["tags"] = args?.tags; resourceInputs["endpointUrl"] = undefined /*out*/; resourceInputs["servingEndpointId"] = undefined /*out*/; } opts = pulumi.mergeOptions(utilities.resourceOptsDefaults(), opts); super(ModelServing.__pulumiType, name, resourceInputs, opts); } } exports.ModelServing = ModelServing; /** @internal */ ModelServing.__pulumiType = 'databricks:index/modelServing:ModelServing'; //# sourceMappingURL=modelServing.js.map