Skip to content

Commit 556b7ed

Browse files
committed
DRA: add DRAControlPlaneController feature gate for "classic DRA"
In the API, the effect of the feature gate is that alpha fields get dropped on create. They get preserved during updates if already set. The PodSchedulingContext registration is *not* restricted by the feature gate. This enables deleting stale PodSchedulingContext objects after disabling the feature gate. The scheduler checks the new feature gate before setting up an informer for PodSchedulingContext objects and when deciding whether it can schedule a pod. If any claim depends on a control plane controller, the scheduler bails out, leading to: Status: Pending ... Warning FailedScheduling 73s default-scheduler 0/1 nodes are available: resourceclaim depends on disabled DRAControlPlaneController feature. no new claims to deallocate, preemption: 0/1 nodes are available: 1 Preemption is not helpful for scheduling. The rest of the changes prepare for testing the new feature separately from "structured parameters". The goal is to have base "dra" jobs which just enable and test those, then "classic-dra" jobs which add DRAControlPlaneController.
1 parent c6867b0 commit 556b7ed

File tree

15 files changed

+649
-98
lines changed

15 files changed

+649
-98
lines changed

pkg/features/kube_features.go

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,17 @@ const (
220220
// alpha: v1.26
221221
//
222222
// Enables support for resources with custom parameters and a lifecycle
223-
// that is independent of a Pod.
223+
// that is independent of a Pod. Resource allocation is done by a DRA driver's
224+
// "control plane controller" in cooperation with the scheduler.
225+
DRAControlPlaneController featuregate.Feature = "DRAControlPlaneController"
226+
227+
// owner: @pohly
228+
// kep: http://kep.k8s.io/4381
229+
// alpha: v1.29
230+
//
231+
// Enables support for resources with custom parameters and a lifecycle
232+
// that is independent of a Pod. Resource allocation is done by the scheduler
233+
// based on "structured parameters".
224234
DynamicResourceAllocation featuregate.Feature = "DynamicResourceAllocation"
225235

226236
// owner: @harche
@@ -1044,6 +1054,8 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
10441054

10451055
DevicePluginCDIDevices: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.33
10461056

1057+
DRAControlPlaneController: {Default: false, PreRelease: featuregate.Alpha},
1058+
10471059
DynamicResourceAllocation: {Default: false, PreRelease: featuregate.Alpha},
10481060

10491061
EventedPLEG: {Default: false, PreRelease: featuregate.Alpha},

pkg/registry/resource/deviceclass/strategy.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,11 @@ import (
2323
"k8s.io/apimachinery/pkg/runtime"
2424
"k8s.io/apimachinery/pkg/util/validation/field"
2525
"k8s.io/apiserver/pkg/storage/names"
26+
utilfeature "k8s.io/apiserver/pkg/util/feature"
2627
"k8s.io/kubernetes/pkg/api/legacyscheme"
2728
"k8s.io/kubernetes/pkg/apis/resource"
2829
"k8s.io/kubernetes/pkg/apis/resource/validation"
30+
"k8s.io/kubernetes/pkg/features"
2931
)
3032

3133
// deviceClassStrategy implements behavior for DeviceClass objects
@@ -43,6 +45,7 @@ func (deviceClassStrategy) NamespaceScoped() bool {
4345
func (deviceClassStrategy) PrepareForCreate(ctx context.Context, obj runtime.Object) {
4446
class := obj.(*resource.DeviceClass)
4547
class.Generation = 1
48+
dropDisabledFields(class, nil)
4649
}
4750

4851
func (deviceClassStrategy) Validate(ctx context.Context, obj runtime.Object) field.ErrorList {
@@ -65,6 +68,8 @@ func (deviceClassStrategy) PrepareForUpdate(ctx context.Context, obj, old runtim
6568
class := obj.(*resource.DeviceClass)
6669
oldClass := old.(*resource.DeviceClass)
6770

71+
dropDisabledFields(class, oldClass)
72+
6873
// Any changes to the spec increment the generation number.
6974
if !apiequality.Semantic.DeepEqual(oldClass.Spec, class.Spec) {
7075
class.Generation = oldClass.Generation + 1
@@ -83,3 +88,22 @@ func (deviceClassStrategy) WarningsOnUpdate(ctx context.Context, obj, old runtim
8388
func (deviceClassStrategy) AllowUnconditionalUpdate() bool {
8489
return true
8590
}
91+
92+
// dropDisabledFields removes fields which are covered by the optional DRAControlPlaneController feature gate.
93+
func dropDisabledFields(newClass, oldClass *resource.DeviceClass) {
94+
if utilfeature.DefaultFeatureGate.Enabled(features.DRAControlPlaneController) {
95+
// No need to drop anything.
96+
return
97+
}
98+
99+
if oldClass == nil {
100+
// Always drop on create.
101+
newClass.Spec.SuitableNodes = nil
102+
return
103+
}
104+
105+
// Drop on update only if not already set.
106+
if oldClass.Spec.SuitableNodes == nil {
107+
newClass.Spec.SuitableNodes = nil
108+
}
109+
}

pkg/registry/resource/deviceclass/strategy_test.go

Lines changed: 153 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -19,18 +19,42 @@ package deviceclass
1919
import (
2020
"testing"
2121

22+
"github.com/stretchr/testify/assert"
23+
2224
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2325
genericapirequest "k8s.io/apiserver/pkg/endpoints/request"
26+
utilfeature "k8s.io/apiserver/pkg/util/feature"
27+
featuregatetesting "k8s.io/component-base/featuregate/testing"
28+
"k8s.io/kubernetes/pkg/apis/core"
2429
"k8s.io/kubernetes/pkg/apis/resource"
30+
"k8s.io/kubernetes/pkg/features"
2531
)
2632

27-
var deviceClass = &resource.DeviceClass{
33+
var obj = &resource.DeviceClass{
34+
ObjectMeta: metav1.ObjectMeta{
35+
Name: "valid-class",
36+
Generation: 1,
37+
},
38+
}
39+
40+
var objWithGatedFields = &resource.DeviceClass{
2841
ObjectMeta: metav1.ObjectMeta{
29-
Name: "valid-class",
42+
Name: "valid-class",
43+
Generation: 1,
44+
},
45+
Spec: resource.DeviceClassSpec{
46+
SuitableNodes: &core.NodeSelector{
47+
NodeSelectorTerms: []core.NodeSelectorTerm{{
48+
MatchExpressions: []core.NodeSelectorRequirement{{
49+
Key: "foo",
50+
Operator: core.NodeSelectorOpExists,
51+
}},
52+
}},
53+
},
3054
},
3155
}
3256

33-
func TestClassStrategy(t *testing.T) {
57+
func TestStrategy(t *testing.T) {
3458
if Strategy.NamespaceScoped() {
3559
t.Errorf("DeviceClass must not be namespace scoped")
3660
}
@@ -39,42 +63,135 @@ func TestClassStrategy(t *testing.T) {
3963
}
4064
}
4165

42-
func TestClassStrategyCreate(t *testing.T) {
66+
func TestStrategyCreate(t *testing.T) {
4367
ctx := genericapirequest.NewDefaultContext()
44-
deviceClass := deviceClass.DeepCopy()
4568

46-
Strategy.PrepareForCreate(ctx, deviceClass)
47-
errs := Strategy.Validate(ctx, deviceClass)
48-
if len(errs) != 0 {
49-
t.Errorf("unexpected error validating for create %v", errs)
69+
testcases := map[string]struct {
70+
obj *resource.DeviceClass
71+
controlPlaneController bool
72+
expectValidationError bool
73+
expectObj *resource.DeviceClass
74+
}{
75+
"simple": {
76+
obj: obj,
77+
expectObj: obj,
78+
},
79+
"validation-error": {
80+
obj: func() *resource.DeviceClass {
81+
obj := obj.DeepCopy()
82+
obj.Name = "%#@$%$"
83+
return obj
84+
}(),
85+
expectValidationError: true,
86+
},
87+
"drop-fields": {
88+
obj: objWithGatedFields,
89+
controlPlaneController: false,
90+
expectObj: obj,
91+
},
92+
"keep-fields": {
93+
obj: objWithGatedFields,
94+
controlPlaneController: true,
95+
expectObj: objWithGatedFields,
96+
},
97+
}
98+
99+
for name, tc := range testcases {
100+
t.Run(name, func(t *testing.T) {
101+
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.DRAControlPlaneController, tc.controlPlaneController)
102+
103+
obj := tc.obj.DeepCopy()
104+
Strategy.PrepareForCreate(ctx, obj)
105+
if errs := Strategy.Validate(ctx, obj); len(errs) != 0 {
106+
if !tc.expectValidationError {
107+
t.Fatalf("unexpected validation errors: %q", errs)
108+
}
109+
return
110+
} else if tc.expectValidationError {
111+
t.Fatal("expected validation error(s), got none")
112+
}
113+
if warnings := Strategy.WarningsOnCreate(ctx, obj); len(warnings) != 0 {
114+
t.Fatalf("unexpected warnings: %q", warnings)
115+
}
116+
Strategy.Canonicalize(obj)
117+
assert.Equal(t, tc.expectObj, obj)
118+
})
50119
}
51120
}
52121

53-
func TestClassStrategyUpdate(t *testing.T) {
54-
t.Run("no-changes-okay", func(t *testing.T) {
55-
ctx := genericapirequest.NewDefaultContext()
56-
deviceClass := deviceClass.DeepCopy()
57-
newClass := deviceClass.DeepCopy()
58-
newClass.ResourceVersion = "4"
59-
60-
Strategy.PrepareForUpdate(ctx, newClass, deviceClass)
61-
errs := Strategy.ValidateUpdate(ctx, newClass, deviceClass)
62-
if len(errs) != 0 {
63-
t.Errorf("unexpected validation errors: %v", errs)
64-
}
65-
})
66-
67-
t.Run("name-change-not-allowed", func(t *testing.T) {
68-
ctx := genericapirequest.NewDefaultContext()
69-
deviceClass := deviceClass.DeepCopy()
70-
newClass := deviceClass.DeepCopy()
71-
newClass.Name = "valid-class-2"
72-
newClass.ResourceVersion = "4"
73-
74-
Strategy.PrepareForUpdate(ctx, newClass, deviceClass)
75-
errs := Strategy.ValidateUpdate(ctx, newClass, deviceClass)
76-
if len(errs) == 0 {
77-
t.Errorf("expected a validation error")
78-
}
79-
})
122+
func TestStrategyUpdate(t *testing.T) {
123+
ctx := genericapirequest.NewDefaultContext()
124+
125+
testcases := map[string]struct {
126+
oldObj *resource.DeviceClass
127+
newObj *resource.DeviceClass
128+
controlPlaneController bool
129+
expectValidationError bool
130+
expectObj *resource.DeviceClass
131+
}{
132+
"no-changes-okay": {
133+
oldObj: obj,
134+
newObj: obj,
135+
expectObj: obj,
136+
},
137+
"name-change-not-allowed": {
138+
oldObj: obj,
139+
newObj: func() *resource.DeviceClass {
140+
obj := obj.DeepCopy()
141+
obj.Name += "-2"
142+
return obj
143+
}(),
144+
expectValidationError: true,
145+
},
146+
"drop-fields": {
147+
oldObj: obj,
148+
newObj: objWithGatedFields,
149+
controlPlaneController: false,
150+
expectObj: obj,
151+
},
152+
"keep-fields": {
153+
oldObj: obj,
154+
newObj: objWithGatedFields,
155+
controlPlaneController: true,
156+
expectObj: func() *resource.DeviceClass {
157+
obj := objWithGatedFields.DeepCopy()
158+
// Spec changes -> generation gets bumped.
159+
obj.Generation++
160+
return obj
161+
}(),
162+
},
163+
"keep-existing-fields": {
164+
oldObj: objWithGatedFields,
165+
newObj: objWithGatedFields,
166+
controlPlaneController: false,
167+
expectObj: objWithGatedFields,
168+
},
169+
}
170+
171+
for name, tc := range testcases {
172+
t.Run(name, func(t *testing.T) {
173+
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.DRAControlPlaneController, tc.controlPlaneController)
174+
oldObj := tc.oldObj.DeepCopy()
175+
newObj := tc.newObj.DeepCopy()
176+
newObj.ResourceVersion = "4"
177+
178+
Strategy.PrepareForUpdate(ctx, newObj, oldObj)
179+
if errs := Strategy.ValidateUpdate(ctx, newObj, oldObj); len(errs) != 0 {
180+
if !tc.expectValidationError {
181+
t.Fatalf("unexpected validation errors: %q", errs)
182+
}
183+
return
184+
} else if tc.expectValidationError {
185+
t.Fatal("expected validation error(s), got none")
186+
}
187+
if warnings := Strategy.WarningsOnUpdate(ctx, newObj, oldObj); len(warnings) != 0 {
188+
t.Fatalf("unexpected warnings: %q", warnings)
189+
}
190+
Strategy.Canonicalize(newObj)
191+
192+
expectObj := tc.expectObj.DeepCopy()
193+
expectObj.ResourceVersion = "4"
194+
assert.Equal(t, expectObj, newObj)
195+
})
196+
}
80197
}

pkg/registry/resource/resourceclaim/strategy.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,11 @@ import (
2828
"k8s.io/apiserver/pkg/registry/generic"
2929
"k8s.io/apiserver/pkg/storage"
3030
"k8s.io/apiserver/pkg/storage/names"
31+
utilfeature "k8s.io/apiserver/pkg/util/feature"
3132
"k8s.io/kubernetes/pkg/api/legacyscheme"
3233
"k8s.io/kubernetes/pkg/apis/resource"
3334
"k8s.io/kubernetes/pkg/apis/resource/validation"
35+
"k8s.io/kubernetes/pkg/features"
3436
"sigs.k8s.io/structured-merge-diff/v4/fieldpath"
3537
)
3638

@@ -65,6 +67,8 @@ func (resourceclaimStrategy) PrepareForCreate(ctx context.Context, obj runtime.O
6567
claim := obj.(*resource.ResourceClaim)
6668
// Status must not be set by user on create.
6769
claim.Status = resource.ResourceClaimStatus{}
70+
71+
dropDisabledFields(claim, nil)
6872
}
6973

7074
func (resourceclaimStrategy) Validate(ctx context.Context, obj runtime.Object) field.ErrorList {
@@ -87,6 +91,8 @@ func (resourceclaimStrategy) PrepareForUpdate(ctx context.Context, obj, old runt
8791
newClaim := obj.(*resource.ResourceClaim)
8892
oldClaim := old.(*resource.ResourceClaim)
8993
newClaim.Status = oldClaim.Status
94+
95+
dropDisabledFields(newClaim, oldClaim)
9096
}
9197

9298
func (resourceclaimStrategy) ValidateUpdate(ctx context.Context, obj, old runtime.Object) field.ErrorList {
@@ -127,6 +133,8 @@ func (resourceclaimStatusStrategy) PrepareForUpdate(ctx context.Context, obj, ol
127133
oldClaim := old.(*resource.ResourceClaim)
128134
newClaim.Spec = oldClaim.Spec
129135
metav1.ResetObjectMetaForStatus(&newClaim.ObjectMeta, &oldClaim.ObjectMeta)
136+
137+
dropDisabledFields(newClaim, oldClaim)
130138
}
131139

132140
func (resourceclaimStatusStrategy) ValidateUpdate(ctx context.Context, obj, old runtime.Object) field.ErrorList {
@@ -163,3 +171,36 @@ func toSelectableFields(claim *resource.ResourceClaim) fields.Set {
163171
fields := generic.ObjectMetaFieldsSet(&claim.ObjectMeta, true)
164172
return fields
165173
}
174+
175+
// dropDisabledFields removes fields which are covered by the optional DRAControlPlaneController feature gate.
176+
func dropDisabledFields(newClaim, oldClaim *resource.ResourceClaim) {
177+
if utilfeature.DefaultFeatureGate.Enabled(features.DRAControlPlaneController) {
178+
// No need to drop anything.
179+
return
180+
}
181+
182+
if oldClaim == nil {
183+
// Always drop on create. There's no status yet, so nothing to do there.
184+
newClaim.Spec.Controller = ""
185+
return
186+
}
187+
188+
// Drop on (status) update only if not already set.
189+
if oldClaim.Spec.Controller == "" {
190+
newClaim.Spec.Controller = ""
191+
}
192+
// If the claim is handled by a control plane controller, allow
193+
// setting it also in the status. Stripping that field would be bad.
194+
if oldClaim.Spec.Controller == "" &&
195+
newClaim.Status.Allocation != nil &&
196+
oldClaim.Status.Allocation == nil &&
197+
(oldClaim.Status.Allocation == nil || oldClaim.Status.Allocation.Controller == "") {
198+
newClaim.Status.Allocation.Controller = ""
199+
}
200+
// If there is an existing allocation which used a control plane controller, then
201+
// allow requesting its deallocation.
202+
if !oldClaim.Status.DeallocationRequested &&
203+
(newClaim.Status.Allocation == nil || newClaim.Status.Allocation.Controller == "") {
204+
newClaim.Status.DeallocationRequested = false
205+
}
206+
}

0 commit comments

Comments
 (0)