From 5cf797405ba193ebf482d5daf42342a1a0110585 Mon Sep 17 00:00:00 2001 From: Fang Han Date: Tue, 11 Nov 2025 22:55:03 -0800 Subject: [PATCH] [Feat] Add per-model runtimeClassName configuration support Signed-off-by: Fang Han --- helm/templates/deployment-vllm-multi.yaml | 4 +- helm/templates/ray-cluster.yaml | 8 +- helm/tests/runtimeClassName_test.yaml | 136 ++++++++++++++++++++++ helm/values-example.yaml | 5 + helm/values.schema.json | 3 + helm/values.yaml | 1 + 6 files changed, 151 insertions(+), 6 deletions(-) create mode 100644 helm/tests/runtimeClassName_test.yaml diff --git a/helm/templates/deployment-vllm-multi.yaml b/helm/templates/deployment-vllm-multi.yaml index 207be3297..a4b770e58 100644 --- a/helm/templates/deployment-vllm-multi.yaml +++ b/helm/templates/deployment-vllm-multi.yaml @@ -446,8 +446,8 @@ spec: {{- end }} {{- end }} - {{- if .Values.servingEngineSpec.runtimeClassName }} - runtimeClassName: {{ .Values.servingEngineSpec.runtimeClassName }} + {{- with (ternary $modelSpec.runtimeClassName .Values.servingEngineSpec.runtimeClassName (hasKey $modelSpec "runtimeClassName")) }} + runtimeClassName: {{ . }} {{- end }} {{- if .Values.servingEngineSpec.schedulerName }} schedulerName: {{ .Values.servingEngineSpec.schedulerName }} diff --git a/helm/templates/ray-cluster.yaml b/helm/templates/ray-cluster.yaml index 41f45bf38..8319b5892 100644 --- a/helm/templates/ray-cluster.yaml +++ b/helm/templates/ray-cluster.yaml @@ -231,8 +231,8 @@ spec: {{- toYaml . | nindent 10 }} {{- end }} {{- end }} - {{- if .Values.servingEngineSpec.runtimeClassName }} - runtimeClassName: {{ .Values.servingEngineSpec.runtimeClassName }} + {{- with (ternary $modelSpec.runtimeClassName .Values.servingEngineSpec.runtimeClassName (hasKey $modelSpec "runtimeClassName")) }} + runtimeClassName: {{ . }} {{- end }} {{- if .Values.servingEngineSpec.schedulerName }} schedulerName: {{ .Values.servingEngineSpec.schedulerName }} @@ -441,8 +441,8 @@ spec: {{- end }} {{- end }} - {{- if .Values.servingEngineSpec.runtimeClassName }} - runtimeClassName: {{ .Values.servingEngineSpec.runtimeClassName }} + {{- with (ternary $modelSpec.runtimeClassName .Values.servingEngineSpec.runtimeClassName (hasKey $modelSpec "runtimeClassName")) }} + runtimeClassName: {{ . }} {{- end }} {{- if .Values.servingEngineSpec.schedulerName }} schedulerName: {{ .Values.servingEngineSpec.schedulerName }} diff --git a/helm/tests/runtimeClassName_test.yaml b/helm/tests/runtimeClassName_test.yaml new file mode 100644 index 000000000..77a83a61a --- /dev/null +++ b/helm/tests/runtimeClassName_test.yaml @@ -0,0 +1,136 @@ +suite: test runtimeClassName configuration +templates: + - deployment-vllm-multi.yaml + - ray-cluster.yaml +tests: + - it: should use global runtimeClassName when no model override is set + set: + servingEngineSpec: + enableEngine: true + runtimeClassName: "nvidia" + modelSpec: + - name: "test-model" + repository: "vllm/vllm-openai" + tag: "latest" + modelURL: "facebook/opt-125m" + replicaCount: 1 + requestCPU: 1 + requestMemory: "1Gi" + requestGPU: 1 + asserts: + - template: deployment-vllm-multi.yaml + equal: + path: spec.template.spec.runtimeClassName + value: "nvidia" + + - it: should use model-specific runtimeClassName when set + set: + servingEngineSpec: + enableEngine: true + runtimeClassName: "nvidia" + modelSpec: + - name: "test-model-custom" + repository: "vllm/vllm-openai" + tag: "latest" + modelURL: "facebook/opt-125m" + runtimeClassName: "custom-runtime" + replicaCount: 1 + requestCPU: 1 + requestMemory: "1Gi" + requestGPU: 1 + asserts: + - template: deployment-vllm-multi.yaml + equal: + path: spec.template.spec.runtimeClassName + value: "custom-runtime" + + - it: should use model-specific runtimeClassName in Ray cluster head and worker nodes + set: + servingEngineSpec: + enableEngine: true + runtimeClassName: "nvidia" + modelSpec: + - name: "ray-model-custom" + repository: "vllm/vllm-openai" + tag: "latest" + modelURL: "facebook/opt-125m" + runtimeClassName: "custom-ray" + replicaCount: 2 + requestCPU: 1 + requestMemory: "1Gi" + requestGPU: 1 + raySpec: + enabled: true + headNode: + requestCPU: 1 + requestMemory: "1Gi" + requestGPU: 1 + asserts: + - template: ray-cluster.yaml + documentIndex: 0 + equal: + path: spec.headGroupSpec.template.spec.runtimeClassName + value: "custom-ray" + - template: ray-cluster.yaml + documentIndex: 0 + equal: + path: spec.workerGroupSpecs[0].template.spec.runtimeClassName + value: "custom-ray" + + - it: should default to nvidia if runtimeClassName is not provided + set: + servingEngineSpec: + enableEngine: true + modelSpec: + - name: "test-model-no-runtime" + repository: "vllm/vllm-openai" + tag: "latest" + modelURL: "facebook/opt-125m" + replicaCount: 1 + requestCPU: 1 + requestMemory: "1Gi" + requestGPU: 1 + asserts: + - template: deployment-vllm-multi.yaml + equal: + path: spec.template.spec.runtimeClassName + value: "nvidia" + + - it: should use model-specific runtimeClassName when no global is set + set: + servingEngineSpec: + enableEngine: true + modelSpec: + - name: "test-model-only-model-runtime" + repository: "vllm/vllm-openai" + tag: "latest" + modelURL: "facebook/opt-125m" + runtimeClassName: "model-only-runtime" + replicaCount: 1 + requestCPU: 1 + requestMemory: "1Gi" + requestGPU: 1 + asserts: + - template: deployment-vllm-multi.yaml + equal: + path: spec.template.spec.runtimeClassName + value: "model-only-runtime" + + - it: should not set runtimeClassName when global is explicitly set to empty string + set: + servingEngineSpec: + enableEngine: true + runtimeClassName: "" + modelSpec: + - name: "test-model-empty-runtime" + repository: "vllm/vllm-openai" + tag: "latest" + modelURL: "facebook/opt-125m" + replicaCount: 1 + requestCPU: 1 + requestMemory: "1Gi" + requestGPU: 1 + asserts: + - template: deployment-vllm-multi.yaml + notExists: + path: spec.template.spec.runtimeClassName diff --git a/helm/values-example.yaml b/helm/values-example.yaml index 004880fbe..3809276ae 100644 --- a/helm/values-example.yaml +++ b/helm/values-example.yaml @@ -1,9 +1,14 @@ servingEngineSpec: + # Global runtime class for all models (can be overridden per model) + runtimeClassName: "nvidia" + modelSpec: - name: "opt125m" repository: "lmcache/vllm-openai" tag: "latest" modelURL: "facebook/opt-125m" + # Override global runtimeClassName for this specific model + runtimeClassName: "custom-runtime" replicaCount: 1 diff --git a/helm/values.schema.json b/helm/values.schema.json index a7fb2a332..cead7f133 100644 --- a/helm/values.schema.json +++ b/helm/values.schema.json @@ -109,6 +109,9 @@ "priorityClassName": { "type": "string" }, + "runtimeClassName": { + "type": "string" + }, "pvcStorage": { "type": "string" }, diff --git a/helm/values.yaml b/helm/values.yaml index f1680db68..6414ed8cc 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -21,6 +21,7 @@ servingEngineSpec: # - annotations: (Optional, map) The annotations to add to the deployment, e.g., {model: "opt125m"} # - serviceAccountName: (Optional, string) The name of the service account to use for the deployment, e.g., "vllm-service-account" # - priorityClassName: (Optional, string) The name of the priority class name for the deployment, e.g., "high-priority" + # - runtimeClassName: (Optional, string) Runtime class for the pod, e.g., "nvidia". If not specified, falls back to servingEngineSpec.runtimeClassName # - podAnnotations: (Optional, map) The annotations to add to the pod, e.g., {model: "opt125m"} # - name: (string) The name of the model, e.g., "example-model" # - repository: (string) The repository of the model, e.g., "vllm/vllm-openai"