@geek-fun/serverlessinsight
Version:
Full life cycle cross providers serverless application management for your fast-growing business.
56 lines (51 loc) • 1.69 kB
YAML
version: 0.0.1
provider:
name: aliyun
region: cn-hangzhou
service: insight-poc-gpu
tags:
owner: geek-fun
functions:
insight_poc_fn:
name: insight-poc-gpu-fn
container:
image: registry.cn-hangzhou.aliyuncs.com/serverless_devs/modelscope:ubuntu20.04-cuda11.3.0-py37-torch1.11.0-tf1.15.5-1.5.0
cmd: "ollama run modelscope.cn/DeepSeek-R1-Distill-Qwen-1.5B-GGUF"
port: 9000
memory: 4096
gpu: TESLA_16
timeout: 900
log: true
network:
vpc_id: vpc-bp1jpzq0807mrww7i8uko
subnet_ids:
- vsw-bp1tp32pxn8c3mm07e9s2
- vsw-bp1gs2ggjq85apbf5yd9d
security_group:
name: insight-poc-gpu-fn-sg
ingress:
- TCP:0.0.0.0/0:80
- TCP:0.0.0.0/0:443
- TCP:0.0.0.0/0:22/22
- ICMP:0.0.0.0/0:ALL
egress:
- ALL:0.0.0.0/0:ALL
storage:
nas:
- mount_path: /mnt/nas
storage_class: STANDARD_CAPACITY
environment:
OLLAMA_MODELS: /mnt/nas/models
LLAMA_GPU_OVERHEAD: 0
OLLAMA_HOST: http://0.0.0.0:11434
OLLAMA_INTEL_GPU: false
OLLAMA_KEEP_ALIVE: 2562047h47m16.854775807s
OLLAMA_LOAD_TIMEOUT: 5m0s
OLLAMA_MAX_LOADED_MODELS: 1
OLLAMA_MAX_QUEUE: 1
OLLAMA_MULTIUSER_CACHE: false
OLLAMA_NOHISTORY: false
OLLAMA_NOPRUNE: false
OLLAMA_NUM_PARALLEL: 1
OLLAMA_ORIGINS: [ * http://localhost https://localhost http://localhost:* https://localhost:* http://127.0.0.1 https://127.0.0.1 http://127.0.0.1:* https://127.0.0.1:* http://0.0.0.0 https://0.0.0.0 http://0.0.0.0:* https://0.0.0.0:* app://* file://* tauri://* vscode-webview://* ]
OLLAMA_SCHED_SPREAD: fals