UNPKG

@pulumi/gcp

Version:

A Pulumi package for creating and managing Google Cloud Platform resources.

367 lines (366 loc) • 14.2 kB
import * as pulumi from "@pulumi/pulumi"; import * as inputs from "../types/input"; import * as outputs from "../types/output"; /** * Create an Endpoint and deploy a Model Garden model to it. * * To get more information about EndpointWithModelGardenDeployment, see: * * * [API documentation](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations/deploy) * * How-to Guides * * [Overview of Model Garden](https://cloud.google.com/vertex-ai/generative-ai/docs/model-garden/explore-models) * * [Overview of self-deployed models](https://cloud.google.com/vertex-ai/generative-ai/docs/model-garden/self-deployed-models) * * [Use models in Model Garden](https://cloud.google.com/vertex-ai/generative-ai/docs/model-garden/use-models) * * ## Example Usage * * ### Vertex Ai Deploy Basic * * ```typescript * import * as pulumi from "@pulumi/pulumi"; * import * as gcp from "@pulumi/gcp"; * * const deploy = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy", { * publisherModelName: "publishers/google/models/paligemma@paligemma-224-float32", * location: "us-central1", * modelConfig: { * acceptEula: true, * }, * }); * ``` * ### Vertex Ai Deploy Huggingface Model * * ```typescript * import * as pulumi from "@pulumi/pulumi"; * import * as gcp from "@pulumi/gcp"; * * const deploy = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy", { * huggingFaceModelId: "Qwen/Qwen3-0.6B", * location: "us-central1", * modelConfig: { * acceptEula: true, * }, * }); * ``` * ### Vertex Ai Deploy With Configs * * ```typescript * import * as pulumi from "@pulumi/pulumi"; * import * as gcp from "@pulumi/gcp"; * * const deploy = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy", { * publisherModelName: "publishers/google/models/paligemma@paligemma-224-float32", * location: "us-central1", * modelConfig: { * acceptEula: true, * }, * deployConfig: { * dedicatedResources: { * machineSpec: { * machineType: "g2-standard-16", * acceleratorType: "NVIDIA_L4", * acceleratorCount: 1, * }, * minReplicaCount: 1, * }, * }, * }); * ``` * ### Vertex Ai Deploy Multiple Models In Parallel * * ```typescript * import * as pulumi from "@pulumi/pulumi"; * import * as gcp from "@pulumi/gcp"; * * const deploy_gemma_11_2b_it = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-gemma-1_1-2b-it", { * publisherModelName: "publishers/google/models/gemma@gemma-1.1-2b-it", * location: "us-central1", * modelConfig: { * acceptEula: true, * }, * deployConfig: { * dedicatedResources: { * machineSpec: { * machineType: "g2-standard-12", * acceleratorType: "us-central1", * acceleratorCount: 1, * }, * minReplicaCount: 1, * }, * }, * }); * const deploy_qwen3_06b = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-qwen3-0_6b", { * huggingFaceModelId: "Qwen/Qwen3-0.6B", * location: "us-central1", * modelConfig: { * acceptEula: true, * }, * deployConfig: { * dedicatedResources: { * machineSpec: { * machineType: "g2-standard-12", * acceleratorType: "NVIDIA_L4", * acceleratorCount: 1, * }, * minReplicaCount: 1, * }, * }, * }); * const deploy_llama_32_1b = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-llama-3_2-1b", { * publisherModelName: "publishers/meta/models/llama3-2@llama-3.2-1b", * location: "us-central1", * modelConfig: { * acceptEula: true, * }, * deployConfig: { * dedicatedResources: { * machineSpec: { * machineType: "g2-standard-12", * acceleratorType: "NVIDIA_L4", * acceleratorCount: 1, * }, * minReplicaCount: 1, * }, * }, * }); * ``` * ### Vertex Ai Deploy Multiple Models In Sequence * * ```typescript * import * as pulumi from "@pulumi/pulumi"; * import * as gcp from "@pulumi/gcp"; * * const deploy_gemma_11_2b_it = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-gemma-1_1-2b-it", { * publisherModelName: "publishers/google/models/gemma@gemma-1.1-2b-it", * location: "us-central1", * modelConfig: { * acceptEula: true, * }, * deployConfig: { * dedicatedResources: { * machineSpec: { * machineType: "g2-standard-12", * acceleratorType: "NVIDIA_L4", * acceleratorCount: 1, * }, * minReplicaCount: 1, * }, * }, * }); * const deploy_qwen3_06b = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-qwen3-0_6b", { * huggingFaceModelId: "Qwen/Qwen3-0.6B", * location: "us-central1", * modelConfig: { * acceptEula: true, * }, * deployConfig: { * dedicatedResources: { * machineSpec: { * machineType: "g2-standard-12", * acceleratorType: "NVIDIA_L4", * acceleratorCount: 1, * }, * minReplicaCount: 1, * }, * }, * }, { * dependsOn: [deploy_gemma_11_2b_it], * }); * const deploy_llama_32_1b = new gcp.vertex.AiEndpointWithModelGardenDeployment("deploy-llama-3_2-1b", { * publisherModelName: "publishers/meta/models/llama3-2@llama-3.2-1b", * location: "us-central1", * modelConfig: { * acceptEula: true, * }, * deployConfig: { * dedicatedResources: { * machineSpec: { * machineType: "g2-standard-12", * acceleratorType: "NVIDIA_L4", * acceleratorCount: 1, * }, * minReplicaCount: 1, * }, * }, * }, { * dependsOn: [deploy_qwen3_06b], * }); * ``` * * ## Import * * This resource does not support import. */ export declare class AiEndpointWithModelGardenDeployment extends pulumi.CustomResource { /** * Get an existing AiEndpointWithModelGardenDeployment resource's state with the given name, ID, and optional extra * properties used to qualify the lookup. * * @param name The _unique_ name of the resulting resource. * @param id The _unique_ provider ID of the resource to lookup. * @param state Any extra arguments used during the lookup. * @param opts Optional settings to control the behavior of the CustomResource. */ static get(name: string, id: pulumi.Input<pulumi.ID>, state?: AiEndpointWithModelGardenDeploymentState, opts?: pulumi.CustomResourceOptions): AiEndpointWithModelGardenDeployment; /** * Returns true if the given object is an instance of AiEndpointWithModelGardenDeployment. This is designed to work even * when multiple copies of the Pulumi SDK have been loaded into the same process. */ static isInstance(obj: any): obj is AiEndpointWithModelGardenDeployment; /** * The deploy config to use for the deployment. * Structure is documented below. */ readonly deployConfig: pulumi.Output<outputs.vertex.AiEndpointWithModelGardenDeploymentDeployConfig | undefined>; /** * Output only. The display name assigned to the model deployed to the endpoint. * This is not required to delete the resource but is used for debug logging. */ readonly deployedModelDisplayName: pulumi.Output<string>; /** * Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint. * It is required to undeploy the model from the endpoint during resource deletion as described in * https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel. */ readonly deployedModelId: pulumi.Output<string>; /** * Resource ID segment making up resource `endpoint`. It identifies the resource within its parent collection as described in https://google.aip.dev/122. */ readonly endpoint: pulumi.Output<string>; /** * The endpoint config to use for the deployment. * Structure is documented below. */ readonly endpointConfig: pulumi.Output<outputs.vertex.AiEndpointWithModelGardenDeploymentEndpointConfig | undefined>; /** * The Hugging Face model to deploy. * Format: Hugging Face model ID like `google/gemma-2-2b-it`. */ readonly huggingFaceModelId: pulumi.Output<string | undefined>; /** * Resource ID segment making up resource `location`. It identifies the resource within its parent collection as described in https://google.aip.dev/122. */ readonly location: pulumi.Output<string>; /** * The model config to use for the deployment. * Structure is documented below. */ readonly modelConfig: pulumi.Output<outputs.vertex.AiEndpointWithModelGardenDeploymentModelConfig | undefined>; /** * The ID of the project in which the resource belongs. * If it is not provided, the provider project is used. */ readonly project: pulumi.Output<string>; /** * The Model Garden model to deploy. * Format: * `publishers/{publisher}/models/{publisher_model}@{version_id}`, or * `publishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001`. */ readonly publisherModelName: pulumi.Output<string | undefined>; /** * Create a AiEndpointWithModelGardenDeployment resource with the given unique name, arguments, and options. * * @param name The _unique_ name of the resource. * @param args The arguments to use to populate this resource's properties. * @param opts A bag of options that control this resource's behavior. */ constructor(name: string, args: AiEndpointWithModelGardenDeploymentArgs, opts?: pulumi.CustomResourceOptions); } /** * Input properties used for looking up and filtering AiEndpointWithModelGardenDeployment resources. */ export interface AiEndpointWithModelGardenDeploymentState { /** * The deploy config to use for the deployment. * Structure is documented below. */ deployConfig?: pulumi.Input<inputs.vertex.AiEndpointWithModelGardenDeploymentDeployConfig>; /** * Output only. The display name assigned to the model deployed to the endpoint. * This is not required to delete the resource but is used for debug logging. */ deployedModelDisplayName?: pulumi.Input<string>; /** * Output only. The unique numeric ID that Vertex AI assigns to the model at the time it is deployed to the endpoint. * It is required to undeploy the model from the endpoint during resource deletion as described in * https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel. */ deployedModelId?: pulumi.Input<string>; /** * Resource ID segment making up resource `endpoint`. It identifies the resource within its parent collection as described in https://google.aip.dev/122. */ endpoint?: pulumi.Input<string>; /** * The endpoint config to use for the deployment. * Structure is documented below. */ endpointConfig?: pulumi.Input<inputs.vertex.AiEndpointWithModelGardenDeploymentEndpointConfig>; /** * The Hugging Face model to deploy. * Format: Hugging Face model ID like `google/gemma-2-2b-it`. */ huggingFaceModelId?: pulumi.Input<string>; /** * Resource ID segment making up resource `location`. It identifies the resource within its parent collection as described in https://google.aip.dev/122. */ location?: pulumi.Input<string>; /** * The model config to use for the deployment. * Structure is documented below. */ modelConfig?: pulumi.Input<inputs.vertex.AiEndpointWithModelGardenDeploymentModelConfig>; /** * The ID of the project in which the resource belongs. * If it is not provided, the provider project is used. */ project?: pulumi.Input<string>; /** * The Model Garden model to deploy. * Format: * `publishers/{publisher}/models/{publisher_model}@{version_id}`, or * `publishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001`. */ publisherModelName?: pulumi.Input<string>; } /** * The set of arguments for constructing a AiEndpointWithModelGardenDeployment resource. */ export interface AiEndpointWithModelGardenDeploymentArgs { /** * The deploy config to use for the deployment. * Structure is documented below. */ deployConfig?: pulumi.Input<inputs.vertex.AiEndpointWithModelGardenDeploymentDeployConfig>; /** * The endpoint config to use for the deployment. * Structure is documented below. */ endpointConfig?: pulumi.Input<inputs.vertex.AiEndpointWithModelGardenDeploymentEndpointConfig>; /** * The Hugging Face model to deploy. * Format: Hugging Face model ID like `google/gemma-2-2b-it`. */ huggingFaceModelId?: pulumi.Input<string>; /** * Resource ID segment making up resource `location`. It identifies the resource within its parent collection as described in https://google.aip.dev/122. */ location: pulumi.Input<string>; /** * The model config to use for the deployment. * Structure is documented below. */ modelConfig?: pulumi.Input<inputs.vertex.AiEndpointWithModelGardenDeploymentModelConfig>; /** * The ID of the project in which the resource belongs. * If it is not provided, the provider project is used. */ project?: pulumi.Input<string>; /** * The Model Garden model to deploy. * Format: * `publishers/{publisher}/models/{publisher_model}@{version_id}`, or * `publishers/hf-{hugging-face-author}/models/{hugging-face-model-name}@001`. */ publisherModelName?: pulumi.Input<string>; }