Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions helm/templates/deployment-vllm-multi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -465,8 +465,8 @@ spec:
{{- end }}
{{- end }}

{{- if .Values.servingEngineSpec.runtimeClassName }}
runtimeClassName: {{ .Values.servingEngineSpec.runtimeClassName }}
{{- with (ternary $modelSpec.runtimeClassName .Values.servingEngineSpec.runtimeClassName (hasKey $modelSpec "runtimeClassName")) }}
runtimeClassName: {{ . }}
{{- end }}
{{- if .Values.servingEngineSpec.schedulerName }}
schedulerName: {{ .Values.servingEngineSpec.schedulerName }}
Expand Down
8 changes: 4 additions & 4 deletions helm/templates/ray-cluster.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -231,8 +231,8 @@ spec:
{{- toYaml . | nindent 10 }}
{{- end }}
{{- end }}
{{- if .Values.servingEngineSpec.runtimeClassName }}
runtimeClassName: {{ .Values.servingEngineSpec.runtimeClassName }}
{{- with (ternary $modelSpec.runtimeClassName .Values.servingEngineSpec.runtimeClassName (hasKey $modelSpec "runtimeClassName")) }}
runtimeClassName: {{ . }}
{{- end }}
{{- if .Values.servingEngineSpec.schedulerName }}
schedulerName: {{ .Values.servingEngineSpec.schedulerName }}
Expand Down Expand Up @@ -441,8 +441,8 @@ spec:
{{- end }}
{{- end }}

{{- if .Values.servingEngineSpec.runtimeClassName }}
runtimeClassName: {{ .Values.servingEngineSpec.runtimeClassName }}
{{- with (ternary $modelSpec.runtimeClassName .Values.servingEngineSpec.runtimeClassName (hasKey $modelSpec "runtimeClassName")) }}
runtimeClassName: {{ . }}
{{- end }}
{{- if .Values.servingEngineSpec.schedulerName }}
schedulerName: {{ .Values.servingEngineSpec.schedulerName }}
Expand Down
136 changes: 136 additions & 0 deletions helm/tests/runtimeClassName_test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
suite: test runtimeClassName configuration
templates:
- deployment-vllm-multi.yaml
- ray-cluster.yaml
tests:
- it: should use global runtimeClassName when no model override is set
set:
servingEngineSpec:
enableEngine: true
runtimeClassName: "nvidia"
modelSpec:
- name: "test-model"
repository: "vllm/vllm-openai"
tag: "latest"
modelURL: "facebook/opt-125m"
replicaCount: 1
requestCPU: 1
requestMemory: "1Gi"
requestGPU: 1
asserts:
- template: deployment-vllm-multi.yaml
equal:
path: spec.template.spec.runtimeClassName
value: "nvidia"

- it: should use model-specific runtimeClassName when set
set:
servingEngineSpec:
enableEngine: true
runtimeClassName: "nvidia"
modelSpec:
- name: "test-model-custom"
repository: "vllm/vllm-openai"
tag: "latest"
modelURL: "facebook/opt-125m"
runtimeClassName: "custom-runtime"
replicaCount: 1
requestCPU: 1
requestMemory: "1Gi"
requestGPU: 1
asserts:
- template: deployment-vllm-multi.yaml
equal:
path: spec.template.spec.runtimeClassName
value: "custom-runtime"

- it: should use model-specific runtimeClassName in Ray cluster head and worker nodes
set:
servingEngineSpec:
enableEngine: true
runtimeClassName: "nvidia"
modelSpec:
- name: "ray-model-custom"
repository: "vllm/vllm-openai"
tag: "latest"
modelURL: "facebook/opt-125m"
runtimeClassName: "custom-ray"
replicaCount: 2
requestCPU: 1
requestMemory: "1Gi"
requestGPU: 1
raySpec:
enabled: true
headNode:
requestCPU: 1
requestMemory: "1Gi"
requestGPU: 1
asserts:
- template: ray-cluster.yaml
documentIndex: 0
equal:
path: spec.headGroupSpec.template.spec.runtimeClassName
value: "custom-ray"
- template: ray-cluster.yaml
documentIndex: 0
equal:
path: spec.workerGroupSpecs[0].template.spec.runtimeClassName
value: "custom-ray"

- it: should default to nvidia if runtimeClassName is not provided
set:
servingEngineSpec:
enableEngine: true
modelSpec:
- name: "test-model-no-runtime"
repository: "vllm/vllm-openai"
tag: "latest"
modelURL: "facebook/opt-125m"
replicaCount: 1
requestCPU: 1
requestMemory: "1Gi"
requestGPU: 1
asserts:
- template: deployment-vllm-multi.yaml
equal:
path: spec.template.spec.runtimeClassName
value: "nvidia"

- it: should use model-specific runtimeClassName when no global is set
set:
servingEngineSpec:
enableEngine: true
modelSpec:
- name: "test-model-only-model-runtime"
repository: "vllm/vllm-openai"
tag: "latest"
modelURL: "facebook/opt-125m"
runtimeClassName: "model-only-runtime"
replicaCount: 1
requestCPU: 1
requestMemory: "1Gi"
requestGPU: 1
asserts:
- template: deployment-vllm-multi.yaml
equal:
path: spec.template.spec.runtimeClassName
value: "model-only-runtime"

- it: should not set runtimeClassName when global is explicitly set to empty string
set:
servingEngineSpec:
enableEngine: true
runtimeClassName: ""
modelSpec:
- name: "test-model-empty-runtime"
repository: "vllm/vllm-openai"
tag: "latest"
modelURL: "facebook/opt-125m"
replicaCount: 1
requestCPU: 1
requestMemory: "1Gi"
requestGPU: 1
asserts:
- template: deployment-vllm-multi.yaml
notExists:
path: spec.template.spec.runtimeClassName
5 changes: 5 additions & 0 deletions helm/values-example.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
servingEngineSpec:
# Global runtime class for all models (can be overridden per model)
runtimeClassName: "nvidia"

modelSpec:
- name: "opt125m"
repository: "lmcache/vllm-openai"
tag: "latest"
modelURL: "facebook/opt-125m"
# Override global runtimeClassName for this specific model
runtimeClassName: "custom-runtime"

replicaCount: 1

Expand Down
3 changes: 3 additions & 0 deletions helm/values.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,9 @@
"priorityClassName": {
"type": "string"
},
"runtimeClassName": {
"type": "string"
},
"pvcStorage": {
"type": "string"
},
Expand Down
1 change: 1 addition & 0 deletions helm/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ servingEngineSpec:
# - annotations: (Optional, map) The annotations to add to the deployment, e.g., {model: "opt125m"}
# - serviceAccountName: (Optional, string) The name of the service account to use for the deployment, e.g., "vllm-service-account"
# - priorityClassName: (Optional, string) The name of the priority class name for the deployment, e.g., "high-priority"
# - runtimeClassName: (Optional, string) Runtime class for the pod, e.g., "nvidia". If not specified, falls back to servingEngineSpec.runtimeClassName
# - podAnnotations: (Optional, map) The annotations to add to the pod, e.g., {model: "opt125m"}
# - name: (string) The name of the model, e.g., "example-model"
# - repository: (string) The repository of the model, e.g., "vllm/vllm-openai"
Expand Down