vllm-project · zerofishnoodles · Nov 19, 2025 · Nov 12, 2025 · Nov 19, 2025 · Nov 19, 2025
diff --git a/helm/templates/deployment-vllm-multi.yaml b/helm/templates/deployment-vllm-multi.yaml
@@ -465,8 +465,8 @@ spec:
       {{-   end }}
       {{- end }}
 
-      {{- if .Values.servingEngineSpec.runtimeClassName }}
-      runtimeClassName: {{ .Values.servingEngineSpec.runtimeClassName }}
+      {{- with (ternary $modelSpec.runtimeClassName .Values.servingEngineSpec.runtimeClassName (hasKey $modelSpec "runtimeClassName")) }}
+      runtimeClassName: {{ . }}
       {{- end }}
       {{- if .Values.servingEngineSpec.schedulerName }}
       schedulerName: {{ .Values.servingEngineSpec.schedulerName }}

diff --git a/helm/templates/ray-cluster.yaml b/helm/templates/ray-cluster.yaml
@@ -231,8 +231,8 @@ spec:
           {{-   toYaml . | nindent 10 }}
         {{-   end }}
         {{- end }}
-        {{- if .Values.servingEngineSpec.runtimeClassName }}
-        runtimeClassName: {{ .Values.servingEngineSpec.runtimeClassName }}
+        {{- with (ternary $modelSpec.runtimeClassName .Values.servingEngineSpec.runtimeClassName (hasKey $modelSpec "runtimeClassName")) }}
+        runtimeClassName: {{ . }}
         {{- end }}
         {{- if .Values.servingEngineSpec.schedulerName }}
         schedulerName: {{ .Values.servingEngineSpec.schedulerName }}
@@ -441,8 +441,8 @@ spec:
           {{-   end }}
           {{- end }}
 
-          {{- if .Values.servingEngineSpec.runtimeClassName }}
-          runtimeClassName: {{ .Values.servingEngineSpec.runtimeClassName }}
+          {{- with (ternary $modelSpec.runtimeClassName .Values.servingEngineSpec.runtimeClassName (hasKey $modelSpec "runtimeClassName")) }}
+          runtimeClassName: {{ . }}
           {{- end }}
           {{- if .Values.servingEngineSpec.schedulerName }}
           schedulerName: {{ .Values.servingEngineSpec.schedulerName }}

diff --git a/helm/tests/runtimeClassName_test.yaml b/helm/tests/runtimeClassName_test.yaml
@@ -0,0 +1,136 @@
+suite: test runtimeClassName configuration
+templates:
+  - deployment-vllm-multi.yaml
+  - ray-cluster.yaml
+tests:
+  - it: should use global runtimeClassName when no model override is set
+    set:
+      servingEngineSpec:
+        enableEngine: true
+        runtimeClassName: "nvidia"
+        modelSpec:
+          - name: "test-model"
+            repository: "vllm/vllm-openai"
+            tag: "latest"
+            modelURL: "facebook/opt-125m"
+            replicaCount: 1
+            requestCPU: 1
+            requestMemory: "1Gi"
+            requestGPU: 1
+    asserts:
+      - template: deployment-vllm-multi.yaml
+        equal:
+          path: spec.template.spec.runtimeClassName
+          value: "nvidia"
+
+  - it: should use model-specific runtimeClassName when set
+    set:
+      servingEngineSpec:
+        enableEngine: true
+        runtimeClassName: "nvidia"
+        modelSpec:
+          - name: "test-model-custom"
+            repository: "vllm/vllm-openai"
+            tag: "latest"
+            modelURL: "facebook/opt-125m"
+            runtimeClassName: "custom-runtime"
+            replicaCount: 1
+            requestCPU: 1
+            requestMemory: "1Gi"
+            requestGPU: 1
+    asserts:
+      - template: deployment-vllm-multi.yaml
+        equal:
+          path: spec.template.spec.runtimeClassName
+          value: "custom-runtime"
+
+  - it: should use model-specific runtimeClassName in Ray cluster head and worker nodes
+    set:
+      servingEngineSpec:
+        enableEngine: true
+        runtimeClassName: "nvidia"
+        modelSpec:
+          - name: "ray-model-custom"
+            repository: "vllm/vllm-openai"
+            tag: "latest"
+            modelURL: "facebook/opt-125m"
+            runtimeClassName: "custom-ray"
+            replicaCount: 2
+            requestCPU: 1
+            requestMemory: "1Gi"
+            requestGPU: 1
+            raySpec:
+              enabled: true
+              headNode:
+                requestCPU: 1
+                requestMemory: "1Gi"
+                requestGPU: 1
+    asserts:
+      - template: ray-cluster.yaml
+        documentIndex: 0
+        equal:
+          path: spec.headGroupSpec.template.spec.runtimeClassName
+          value: "custom-ray"
+      - template: ray-cluster.yaml
+        documentIndex: 0
+        equal:
+          path: spec.workerGroupSpecs[0].template.spec.runtimeClassName
+          value: "custom-ray"
+
+  - it: should default to nvidia if runtimeClassName is not provided
+    set:
+      servingEngineSpec:
+        enableEngine: true
+        modelSpec:
+          - name: "test-model-no-runtime"
+            repository: "vllm/vllm-openai"
+            tag: "latest"
+            modelURL: "facebook/opt-125m"
+            replicaCount: 1
+            requestCPU: 1
+            requestMemory: "1Gi"
+            requestGPU: 1
+    asserts:
+      - template: deployment-vllm-multi.yaml
+        equal:
+          path: spec.template.spec.runtimeClassName
+          value: "nvidia"
+
+  - it: should use model-specific runtimeClassName when no global is set
+    set:
+      servingEngineSpec:
+        enableEngine: true
+        modelSpec:
+          - name: "test-model-only-model-runtime"
+            repository: "vllm/vllm-openai"
+            tag: "latest"
+            modelURL: "facebook/opt-125m"
+            runtimeClassName: "model-only-runtime"
+            replicaCount: 1
+            requestCPU: 1
+            requestMemory: "1Gi"
+            requestGPU: 1
+    asserts:
+      - template: deployment-vllm-multi.yaml
+        equal:
+          path: spec.template.spec.runtimeClassName
+          value: "model-only-runtime"
+
+  - it: should not set runtimeClassName when global is explicitly set to empty string
+    set:
+      servingEngineSpec:
+        enableEngine: true
+        runtimeClassName: ""
+        modelSpec:
+          - name: "test-model-empty-runtime"
+            repository: "vllm/vllm-openai"
+            tag: "latest"
+            modelURL: "facebook/opt-125m"
+            replicaCount: 1
+            requestCPU: 1
+            requestMemory: "1Gi"
+            requestGPU: 1
+    asserts:
+      - template: deployment-vllm-multi.yaml
+        notExists:
+          path: spec.template.spec.runtimeClassName
diff --git a/helm/values-example.yaml b/helm/values-example.yaml
@@ -1,9 +1,14 @@
 servingEngineSpec:
+  # Global runtime class for all models (can be overridden per model)
+  runtimeClassName: "nvidia"
+
   modelSpec:
   - name: "opt125m"
     repository: "lmcache/vllm-openai"
     tag: "latest"
     modelURL: "facebook/opt-125m"
+    # Override global runtimeClassName for this specific model
+    runtimeClassName: "custom-runtime"
 
     replicaCount: 1
 

diff --git a/helm/values.schema.json b/helm/values.schema.json
@@ -109,6 +109,9 @@
               "priorityClassName": {
                 "type": "string"
               },
+              "runtimeClassName": {
+                "type": "string"
+              },
               "pvcStorage": {
                 "type": "string"
               },

diff --git a/helm/values.yaml b/helm/values.yaml
@@ -21,6 +21,7 @@ servingEngineSpec:
   # - annotations: (Optional, map) The annotations to add to the deployment, e.g., {model: "opt125m"}
   # - serviceAccountName: (Optional, string) The name of the service account to use for the deployment, e.g., "vllm-service-account"
   # - priorityClassName: (Optional, string) The name of the priority class name for the deployment, e.g., "high-priority"
+  # - runtimeClassName: (Optional, string) Runtime class for the pod, e.g., "nvidia". If not specified, falls back to servingEngineSpec.runtimeClassName
   # - podAnnotations: (Optional, map) The annotations to add to the pod, e.g., {model: "opt125m"}
   # - name: (string) The name of the model, e.g., "example-model"
   # - repository: (string) The repository of the model, e.g., "vllm/vllm-openai"