@catladder/pipeline

Version:

Panter workflow for cloud CI/CD and DevOps

catladder.git.panter.biz/catladder/docs

41 lines (36 loc) • 1.05 kB

text/typescript

import type { Config } from "../src"; const config = { appName: "my-ai-app", customerName: "pan", components: { llm: { dir: "llm", build: false, vars: { secret: ["HF_TOKEN"], }, deploy: { type: "google-cloudrun", projectId: "google-project-id", region: "europe-west4", // verify in which regions GPUs are available, europe-west6 is not supported as of time of writing service: { image: "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu124.2-3.ubuntu2204.py311", args: [ "--model-id=meta-llama/Llama-3.2-1B-Instruct", "--max-concurrent-requests=1", ], cpu: 8, noCpuThrottling: true, // must be set to true for GPUs gpu: 1, gpuType: "nvidia-l4", memory: "32Gi", }, }, }, }, } satisfies Config; export default config; export const information = { title: "Cloud Run: llama example", };