@pulumi/gcp
Version:
A Pulumi package for creating and managing Google Cloud Platform resources.
367 lines (366 loc) • 14.2 kB
TypeScript
import * as pulumi from "@pulumi/pulumi";
import * as inputs from "../types/input";
import * as outputs from "../types/output";
/**
* Create an Endpoint and deploy a Model Garden model to it.
*
* To get more information about EndpointWithModelGardenDeployment, see:
*
* * [API documentation](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations/deploy)
* * How-to Guides
* * [Overview of Model Garden](https://cloud.google.com/vertex-ai/generative-ai/docs/model-garden/explore-models)
* * [Overview of self-deployed models](https://cloud.google.com/vertex-ai/generative-ai/docs/model-garden/self-deployed-models)
* * [Use models in Model Garden](https://cloud.google.com/vertex-ai/generative-ai/docs/model-garden/use-models)
*
* ## Example Usage
*
* ### Vertex Ai Deploy Basic
*
* ```typescript
* import * as pulumi from "@pulumi/pulumi";
* import * as gcp from "@pulumi/gcp";
*
* const deploy = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy", {
* publisherModelName: "publishers/google/models/paligemma@paligemma-224-float32",
* location: "us-central1",
* modelConfig: {
* acceptEula: true,
* },
* });
* ```
* ### Vertex Ai Deploy Huggingface Model
*
* ```typescript
* import * as pulumi from "@pulumi/pulumi";
* import * as gcp from "@pulumi/gcp";
*
* const deploy = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy", {
* huggingFaceModelId: "Qwen/Qwen3-0.6B",
* location: "us-central1",
* modelConfig: {
* acceptEula: true,
* },
* });
* ```
* ### Vertex Ai Deploy With Configs
*
* ```typescript
* import * as pulumi from "@pulumi/pulumi";
* import * as gcp from "@pulumi/gcp";
*
* const deploy = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy", {
* publisherModelName: "publishers/google/models/paligemma@paligemma-224-float32",
* location: "us-central1",
* modelConfig: {
* acceptEula: true,
* },
* deployConfig: {
* dedicatedResources: {
* machineSpec: {
* machineType: "g2-standard-16",
* acceleratorType: "NVIDIA_L4",
* acceleratorCount: 1,
* },
* minReplicaCount: 1,
* },
* },
* });
* ```
* ### Vertex Ai Deploy Multiple Models In Parallel
*
* ```typescript
* import * as pulumi from "@pulumi/pulumi";
* import * as gcp from "@pulumi/gcp";
*
* const deploy_gemma_11_2b_it = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-gemma-1_1-2b-it", {
* publisherModelName: "publishers/google/models/gemma@gemma-1.1-2b-it",
* location: "us-central1",
* modelConfig: {
* acceptEula: true,
* },
* deployConfig: {
* dedicatedResources: {
* machineSpec: {
* machineType: "g2-standard-12",
* acceleratorType: "us-central1",
* acceleratorCount: 1,
* },
* minReplicaCount: 1,
* },
* },
* });
* const deploy_qwen3_06b = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-qwen3-0_6b", {
* huggingFaceModelId: "Qwen/Qwen3-0.6B",
* location: "us-central1",
* modelConfig: {
* acceptEula: true,
* },
* deployConfig: {
* dedicatedResources: {
* machineSpec: {
* machineType: "g2-standard-12",
* acceleratorType: "NVIDIA_L4",
* acceleratorCount: 1,
* },
* minReplicaCount: 1,
* },
* },
* });
* const deploy_llama_32_1b = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-llama-3_2-1b", {
* publisherModelName: "publishers/meta/models/llama3-2@llama-3.2-1b",
* location: "us-central1",
* modelConfig: {
* acceptEula: true,
* },
* deployConfig: {
* dedicatedResources: {
* machineSpec: {
* machineType: "g2-standard-12",
* acceleratorType: "NVIDIA_L4",
* acceleratorCount: 1,
* },
* minReplicaCount: 1,
* },
* },
* });
* ```
* ### Vertex Ai Deploy Multiple Models In Sequence
*
* ```typescript
* import * as pulumi from "@pulumi/pulumi";
* import * as gcp from "@pulumi/gcp";
*
* const deploy_gemma_11_2b_it = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-gemma-1_1-2b-it", {
* publisherModelName: "publishers/google/models/gemma@gemma-1.1-2b-it",
* location: "us-central1",
* modelConfig: {
* acceptEula: true,
* },
* deployConfig: {
* dedicatedResources: {
* machineSpec: {
* machineType: "g2-standard-12",
* acceleratorType: "NVIDIA_L4",
* acceleratorCount: 1,
* },
* minReplicaCount: 1,
* },
* },
* });
* const deploy_qwen3_06b = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-qwen3-0_6b", {
* huggingFaceModelId: "Qwen/Qwen3-0.6B",
* location: "us-central1",
* modelConfig: {
* acceptEula: true,
* },
* deployConfig: {
* dedicatedResources: {
* machineSpec: {
* machineType: "g2-standard-12",
* acceleratorType: "NVIDIA_L4",
* acceleratorCount: 1,
* },
* minReplicaCount: 1,
* },
* },
* }, {
* dependsOn: [deploy_gemma_11_2b_it],
* });
* const deploy_llama_32_1b = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-llama-3_2-1b", {
* publisherModelName: "publishers/meta/models/llama3-2@llama-3.2-1b",
* location: "us-central1",
* modelConfig: {
* acceptEula: true,
* },
* deployConfig: {
* dedicatedResources: {
* machineSpec: {
* machineType: "g2-standard-12",
* acceleratorType: "NVIDIA_L4",
* acceleratorCount: 1,
* },
* minReplicaCount: 1,
* },
* },
* }, {
* dependsOn: [deploy_qwen3_06b],
* });
* ```
*
* ## Import
*
* This resource does not support import.
*/
export declare class AiEndpointWithModelGardenDeployment extends pulumi.CustomResource {
/**
* Get an existing AiEndpointWithModelGardenDeployment resource's state with the given name, ID, and optional extra
* properties used to qualify the lookup.
*
* @param name The _unique_ name of the resulting resource.
* @param id The _unique_ provider ID of the resource to lookup.
* @param state Any extra arguments used during the lookup.
* @param opts Optional settings to control the behavior of the CustomResource.
*/
static get(name: string, id: pulumi.Input<pulumi.ID>, state?: AiEndpointWithModelGardenDeploymentState, opts?: pulumi.CustomResourceOptions): AiEndpointWithModelGardenDeployment;
/**
* Returns true if the given object is an instance of AiEndpointWithModelGardenDeployment. This is designed to work even
* when multiple copies of the Pulumi SDK have been loaded into the same process.
*/
static isInstance(obj: any): obj is AiEndpointWithModelGardenDeployment;
/**
* The deploy config to use for the deployment.
* Structure is documented below.
*/
readonly deployConfig: pulumi.Output<outputs.vertex.AiEndpointWithModelGardenDeploymentDeployConfig | undefined>;
/**
* Output only. The display name assigned to the model deployed to the endpoint.
* This is not required to delete the resource but is used for debug logging.
*/
readonly deployedModelDisplayName: pulumi.Output<string>;
/**
* Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint.
* It is required to undeploy the model from the endpoint during resource deletion as described in
* https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel.
*/
readonly deployedModelId: pulumi.Output<string>;
/**
* Resource ID segment making up resource `endpoint`. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
*/
readonly endpoint: pulumi.Output<string>;
/**
* The endpoint config to use for the deployment.
* Structure is documented below.
*/
readonly endpointConfig: pulumi.Output<outputs.vertex.AiEndpointWithModelGardenDeploymentEndpointConfig | undefined>;
/**
* The Hugging Face model to deploy.
* Format: Hugging Face model ID like `google/gemma-2-2b-it`.
*/
readonly huggingFaceModelId: pulumi.Output<string | undefined>;
/**
* Resource ID segment making up resource `location`. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
*/
readonly location: pulumi.Output<string>;
/**
* The model config to use for the deployment.
* Structure is documented below.
*/
readonly modelConfig: pulumi.Output<outputs.vertex.AiEndpointWithModelGardenDeploymentModelConfig | undefined>;
/**
* The ID of the project in which the resource belongs.
* If it is not provided, the provider project is used.
*/
readonly project: pulumi.Output<string>;
/**
* The Model Garden model to deploy.
* Format:
* `publishers/{publisher}/models/{publisher_model}@{version_id}`, or
* `publishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001`.
*/
readonly publisherModelName: pulumi.Output<string | undefined>;
/**
* Create a AiEndpointWithModelGardenDeployment resource with the given unique name, arguments, and options.
*
* @param name The _unique_ name of the resource.
* @param args The arguments to use to populate this resource's properties.
* @param opts A bag of options that control this resource's behavior.
*/
constructor(name: string, args: AiEndpointWithModelGardenDeploymentArgs, opts?: pulumi.CustomResourceOptions);
}
/**
* Input properties used for looking up and filtering AiEndpointWithModelGardenDeployment resources.
*/
export interface AiEndpointWithModelGardenDeploymentState {
/**
* The deploy config to use for the deployment.
* Structure is documented below.
*/
deployConfig?: pulumi.Input<inputs.vertex.AiEndpointWithModelGardenDeploymentDeployConfig>;
/**
* Output only. The display name assigned to the model deployed to the endpoint.
* This is not required to delete the resource but is used for debug logging.
*/
deployedModelDisplayName?: pulumi.Input<string>;
/**
* Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint.
* It is required to undeploy the model from the endpoint during resource deletion as described in
* https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel.
*/
deployedModelId?: pulumi.Input<string>;
/**
* Resource ID segment making up resource `endpoint`. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
*/
endpoint?: pulumi.Input<string>;
/**
* The endpoint config to use for the deployment.
* Structure is documented below.
*/
endpointConfig?: pulumi.Input<inputs.vertex.AiEndpointWithModelGardenDeploymentEndpointConfig>;
/**
* The Hugging Face model to deploy.
* Format: Hugging Face model ID like `google/gemma-2-2b-it`.
*/
huggingFaceModelId?: pulumi.Input<string>;
/**
* Resource ID segment making up resource `location`. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
*/
location?: pulumi.Input<string>;
/**
* The model config to use for the deployment.
* Structure is documented below.
*/
modelConfig?: pulumi.Input<inputs.vertex.AiEndpointWithModelGardenDeploymentModelConfig>;
/**
* The ID of the project in which the resource belongs.
* If it is not provided, the provider project is used.
*/
project?: pulumi.Input<string>;
/**
* The Model Garden model to deploy.
* Format:
* `publishers/{publisher}/models/{publisher_model}@{version_id}`, or
* `publishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001`.
*/
publisherModelName?: pulumi.Input<string>;
}
/**
* The set of arguments for constructing a AiEndpointWithModelGardenDeployment resource.
*/
export interface AiEndpointWithModelGardenDeploymentArgs {
/**
* The deploy config to use for the deployment.
* Structure is documented below.
*/
deployConfig?: pulumi.Input<inputs.vertex.AiEndpointWithModelGardenDeploymentDeployConfig>;
/**
* The endpoint config to use for the deployment.
* Structure is documented below.
*/
endpointConfig?: pulumi.Input<inputs.vertex.AiEndpointWithModelGardenDeploymentEndpointConfig>;
/**
* The Hugging Face model to deploy.
* Format: Hugging Face model ID like `google/gemma-2-2b-it`.
*/
huggingFaceModelId?: pulumi.Input<string>;
/**
* Resource ID segment making up resource `location`. It identifies the resource within its parent collection as described in https://google.aip.dev/122.
*/
location: pulumi.Input<string>;
/**
* The model config to use for the deployment.
* Structure is documented below.
*/
modelConfig?: pulumi.Input<inputs.vertex.AiEndpointWithModelGardenDeploymentModelConfig>;
/**
* The ID of the project in which the resource belongs.
* If it is not provided, the provider project is used.
*/
project?: pulumi.Input<string>;
/**
* The Model Garden model to deploy.
* Format:
* `publishers/{publisher}/models/{publisher_model}@{version_id}`, or
* `publishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001`.
*/
publisherModelName?: pulumi.Input<string>;
}