Skip to content

Commit bb15bba

Browse files
[Feat][lora] add lora operator and modify vllm router to support (#446)
* [FEAT][lora] lora controller with create and delete Signed-off-by: Rui Zhang <[email protected]> * [FEAT][lora] add pod watch Signed-off-by: Rui Zhang <[email protected]> * [Feat][lora] add router api support Signed-off-by: Rui Zhang <[email protected]> * pre-commit Signed-off-by: Rui Zhang <[email protected]> * fix conflixt Signed-off-by: Rui Zhang <[email protected]> * fix lora for static discovery and pd Signed-off-by: Rui Zhang <[email protected]> * add tutorial Signed-off-by: Rui Zhang <[email protected]> --------- Signed-off-by: Rui Zhang <[email protected]> Co-authored-by: Rui Zhang <[email protected]>
1 parent 802250a commit bb15bba

25 files changed

+2215
-142
lines changed

operator/PROJECT

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,4 +35,13 @@ resources:
3535
kind: CacheServer
3636
path: production-stack/api/v1alpha1
3737
version: v1alpha1
38+
- api:
39+
crdVersion: v1
40+
namespaced: true
41+
controller: true
42+
domain: vllm.ai
43+
group: production-stack
44+
kind: LoraAdapter
45+
path: production-stack/api/v1alpha1
46+
version: v1alpha1
3847
version: "3"
Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
/*
2+
Copyright 2025.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package v1alpha1
18+
19+
import (
20+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
21+
)
22+
23+
// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN!
24+
// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized.
25+
26+
// LoraAdapterSpec defines the desired state of LoraAdapter.
27+
type LoraAdapterSpec struct {
28+
// AdapterSource defines where to get the LoRA adapter from.
29+
// +kubebuilder:validation:Required
30+
AdapterSource AdapterSource `json:"adapterSource"`
31+
// BaseModel is the name of the base model this adapter is for.
32+
// +kubebuilder:validation:Required
33+
BaseModel string `json:"baseModel"`
34+
// DeploymentConfig defines how the adapter should be deployed
35+
DeploymentConfig DeploymentConfig `json:"deploymentConfig,omitempty"`
36+
// VLLMApiKey defines the configuration for vLLM API key authentication
37+
VLLMApiKey *VLLMApiKeyConfig `json:"vllmApiKey,omitempty"`
38+
}
39+
40+
type AdapterSource struct {
41+
// AdapterName is the name of the adapter to apply.
42+
// +kubebuilder:validation:Required
43+
AdapterName string `json:"adapterName"`
44+
// AdapterPath is the path to the LoRA adapter weights. For local sources: required, specifies the path to the adapter For remote sources: optional, will be updated by the controller with the download path
45+
AdapterPath string `json:"adapterPath,omitempty"`
46+
// CredentialsSecretRef references a secret containing storage credentials.
47+
CredentialsSecretRef *SecretRef `json:"credentialsSecretRef,omitempty"`
48+
// MaxAdapters is the maximum number of adapters to load.
49+
MaxAdapters int32 `json:"maxAdapters,omitempty"`
50+
// Pattern is the pattern to use for the adapter name.
51+
Pattern string `json:"pattern,omitempty"`
52+
// Repository is the repository to get the LoRA adapter from.
53+
Repository *string `json:"repository,omitempty"`
54+
// Type is the type of the adapter source.
55+
// +kubebuilder:validation:Required
56+
// +kubebuilder:validation:Enum=local;s3;http;huggingface
57+
Type string `json:"type"`
58+
}
59+
60+
// +mapType=atomic
61+
type SecretRef struct {
62+
// Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?
63+
Name string `json:"name,omitempty"`
64+
}
65+
66+
type DeploymentConfig struct {
67+
// Algorithm specifies which placement algorithm to use.
68+
// +kubebuilder:validation:Required
69+
// +kubebuilder:validation:Enum=default;ordered;equalized
70+
// +kubebuilder:default=default
71+
Algorithm string `json:"algorithm"`
72+
// Replicas is the number of replicas that should load this adapter.
73+
// +kubebuilder:validation:Minimum=0
74+
Replicas *int32 `json:"replicas,omitempty"`
75+
}
76+
77+
// VLLMApiKeyConfig defines how to obtain the vLLM API key
78+
type VLLMApiKeyConfig struct {
79+
// Direct API key value
80+
// +optional
81+
Value string `json:"value,omitempty"`
82+
// Reference to a secret containing the API key
83+
// +optional
84+
SecretRef *VLLMApiKeySecretRef `json:"secretRef,omitempty"`
85+
}
86+
87+
// VLLMApiKeySecretRef defines the reference to a secret containing the API key
88+
type VLLMApiKeySecretRef struct {
89+
// Name of the secret
90+
// +kubebuilder:validation:Required
91+
SecretName string `json:"secretName"`
92+
// Key in the secret containing the API key
93+
// +kubebuilder:validation:Required
94+
SecretKey string `json:"secretKey"`
95+
}
96+
97+
// LoraAdapterStatus defines the observed state of LoraAdapter.
98+
type LoraAdapterStatus struct {
99+
// Condition contains details for one aspect of the current state of this API Resource.
100+
Conditions []Condition `json:"conditions,omitempty"`
101+
// LoadedAdapters tracks the loading status of adapters and their pod assignments.
102+
LoadedAdapters []LoadedAdapter `json:"loadedAdapters,omitempty"`
103+
// Message provides additional information about the current phase.
104+
Message string `json:"message,omitempty"`
105+
// Phase represents the current phase of the adapter deployment.
106+
Phase string `json:"phase,omitempty"`
107+
// ObservedGeneration represents the .metadata.generation that the condition was set based upon.
108+
// +kubebuilder:validation:Minimum=0
109+
ObservedGeneration int64 `json:"observedGeneration,omitempty"`
110+
}
111+
112+
// Condition contains details for one aspect of the current state of this API Resource.
113+
type Condition struct {
114+
// LastTransitionTime is the last time the condition transitioned from one status to another.
115+
// +kubebuilder:validation:Format=date-time
116+
// +kubebuilder:validation:Required
117+
LastTransitionTime metav1.Time `json:"lastTransitionTime"`
118+
// Message is a human-readable message indicating details about why the current state is set.
119+
// +kubebuilder:validation:MaxLength=32768
120+
// +kubebuilder:validation:Required
121+
Message string `json:"message"`
122+
// Reason is a brief reason for the condition's current status.
123+
// +kubebuilder:validation:MaxLength=1024
124+
// +kubebuilder:validation:MinLength=1
125+
// +kubebuilder:validation:Pattern=`^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$`
126+
// +kubebuilder:validation:Required
127+
Reason string `json:"reason"`
128+
// Status is the status of the condition.
129+
// +kubebuilder:validation:Enum=True;False;Unknown
130+
// +kubebuilder:validation:Required
131+
Status string `json:"status"`
132+
// type of condition in CamelCase.
133+
// +kubebuilder:validation:MaxLength=316
134+
// +kubebuilder:validation:Pattern= ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
135+
// +kubebuilder:validation:Required
136+
Type string `json:"type"`
137+
}
138+
139+
// LoadedAdapter represents an adapter that has been loaded into a pod
140+
type LoadedAdapter struct {
141+
// LoadTime is when the adapter was loaded
142+
// +kubebuilder:validation:Format=date-time
143+
LoadTime metav1.Time `json:"loadTime,omitempty"`
144+
// Name is the name of the adapter
145+
// +kubebuilder:validation:Required
146+
Name string `json:"name"`
147+
// Path is the path where the adapter is loaded
148+
// +kubebuilder:validation:Required
149+
Path string `json:"path"`
150+
// PodAssignments represents the pods this adapter has been assigned to
151+
PodAssignments PodAssignment `json:"podAssignments"`
152+
// Status is the status of the adapter
153+
// +kubebuilder:validation:Required
154+
Status string `json:"status"`
155+
}
156+
157+
// PodAssignment represents a pod that has been assigned to load this adapter
158+
type PodAssignment struct {
159+
// Pod represents the pod information
160+
// +kubebuilder:validation:Required
161+
PodName string `json:"podName"`
162+
// Namespace is the namespace of the pod
163+
// +kubebuilder:validation:Required
164+
Namespace string `json:"namespace"`
165+
}
166+
167+
// // +mapType=atomic
168+
// type ObjectReference struct {
169+
// // APIVersion is the API version of the referent.
170+
// APIVersion string `json:"apiVersion,omitempty"`
171+
// // If referring to a piece of an object instead of an entire object.
172+
// FieldPath string `json:"fieldPath,omitempty"`
173+
// // Kind is the kind of the referent.
174+
// Kind string `json:"kind,omitempty"`
175+
// // Name is the name of the referent.
176+
// Name string `json:"name,omitempty"`
177+
// // Namespace is the namespace of the referent.
178+
// Namespace string `json:"namespace,omitempty"`
179+
// // ResourceVersion is the resource version of the referent.
180+
// ResourceVersion string `json:"resourceVersion,omitempty"`
181+
// // UID is the unique identifier of the referent.
182+
// UID string `json:"uid,omitempty"`
183+
// }
184+
185+
// +kubebuilder:object:root=true
186+
// +kubebuilder:subresource:status
187+
188+
// LoraAdapter is the Schema for the loraadapters API.
189+
// +kubebuilder:printcolumn:name="Phase",type=string,JSONPath=`.status.phase`
190+
// +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp`
191+
type LoraAdapter struct {
192+
metav1.TypeMeta `json:",inline"`
193+
metav1.ObjectMeta `json:"metadata,omitempty"`
194+
195+
Spec LoraAdapterSpec `json:"spec,omitempty"`
196+
Status LoraAdapterStatus `json:"status,omitempty"`
197+
}
198+
199+
// +kubebuilder:object:root=true
200+
201+
// LoraAdapterList contains a list of LoraAdapter.
202+
type LoraAdapterList struct {
203+
metav1.TypeMeta `json:",inline"`
204+
metav1.ListMeta `json:"metadata,omitempty"`
205+
Items []LoraAdapter `json:"items"`
206+
}
207+
208+
func init() {
209+
SchemeBuilder.Register(&LoraAdapter{}, &LoraAdapterList{})
210+
}

0 commit comments

Comments
 (0)