Skip to content

Commit 46d9e08

Browse files
🐛 Fix e2e test flakes when webhooks are scaffolded
Projects with webhooks may experience flaky e2e tests due to webhook server not being ready when the metrics test creates the curl-metrics pod.
1 parent 9eee7a7 commit 46d9e08

File tree

9 files changed

+214
-79
lines changed

9 files changed

+214
-79
lines changed

docs/book/src/cronjob-tutorial/testdata/project/test/e2e/e2e_test.go

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -197,24 +197,38 @@ var _ = Describe("Manager", Ordered, func() {
197197
Expect(err).NotTo(HaveOccurred())
198198
Expect(token).NotTo(BeEmpty())
199199

200-
By("waiting for the metrics endpoint to be ready")
201-
verifyMetricsEndpointReady := func(g Gomega) {
202-
cmd := exec.Command("kubectl", "get", "endpoints", metricsServiceName, "-n", namespace)
200+
By("ensuring the controller pod is ready")
201+
verifyControllerPodReady := func(g Gomega) {
202+
cmd := exec.Command("kubectl", "get", "pod", controllerPodName, "-n", namespace,
203+
"-o", "jsonpath={.status.conditions[?(@.type=='Ready')].status}")
203204
output, err := utils.Run(cmd)
204205
g.Expect(err).NotTo(HaveOccurred())
205-
g.Expect(output).To(ContainSubstring("8443"), "Metrics endpoint is not ready")
206+
g.Expect(output).To(Equal("True"), "Controller pod not ready")
206207
}
207-
Eventually(verifyMetricsEndpointReady).Should(Succeed())
208+
Eventually(verifyControllerPodReady, 3*time.Minute, time.Second).Should(Succeed())
208209

209210
By("verifying that the controller manager is serving the metrics server")
210211
verifyMetricsServerStarted := func(g Gomega) {
211212
cmd := exec.Command("kubectl", "logs", controllerPodName, "-n", namespace)
212213
output, err := utils.Run(cmd)
213214
g.Expect(err).NotTo(HaveOccurred())
214-
g.Expect(output).To(ContainSubstring("controller-runtime.metrics\tServing metrics server"),
215+
g.Expect(output).To(ContainSubstring("Serving metrics server"),
215216
"Metrics server not yet started")
216217
}
217-
Eventually(verifyMetricsServerStarted).Should(Succeed())
218+
Eventually(verifyMetricsServerStarted, 3*time.Minute, time.Second).Should(Succeed())
219+
220+
By("waiting for the webhook service endpoints to be ready")
221+
verifyWebhookEndpointsReady := func(g Gomega) {
222+
cmd := exec.Command("kubectl", "get", "endpointslices.discovery.k8s.io", "-n", namespace,
223+
"-l", "kubernetes.io/service-name=project-webhook-service",
224+
"-o", "jsonpath={range .items[*]}{range .endpoints[*]}{.addresses[*]}{end}{end}")
225+
output, err := utils.Run(cmd)
226+
g.Expect(err).NotTo(HaveOccurred(), "Webhook endpoints should exist")
227+
g.Expect(output).ShouldNot(BeEmpty(), "Webhook endpoints not yet ready")
228+
}
229+
Eventually(verifyWebhookEndpointsReady, 3*time.Minute, time.Second).Should(Succeed())
230+
231+
// +kubebuilder:scaffold:e2e-metrics-webhooks-readiness
218232

219233
By("creating the curl-metrics pod to access the metrics endpoint")
220234
cmd = exec.Command("kubectl", "run", "curl-metrics", "--restart=Never",

docs/book/src/getting-started/testdata/project/test/e2e/e2e_test.go

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -192,24 +192,27 @@ var _ = Describe("Manager", Ordered, func() {
192192
Expect(err).NotTo(HaveOccurred())
193193
Expect(token).NotTo(BeEmpty())
194194

195-
By("waiting for the metrics endpoint to be ready")
196-
verifyMetricsEndpointReady := func(g Gomega) {
197-
cmd := exec.Command("kubectl", "get", "endpoints", metricsServiceName, "-n", namespace)
195+
By("ensuring the controller pod is ready")
196+
verifyControllerPodReady := func(g Gomega) {
197+
cmd := exec.Command("kubectl", "get", "pod", controllerPodName, "-n", namespace,
198+
"-o", "jsonpath={.status.conditions[?(@.type=='Ready')].status}")
198199
output, err := utils.Run(cmd)
199200
g.Expect(err).NotTo(HaveOccurred())
200-
g.Expect(output).To(ContainSubstring("8443"), "Metrics endpoint is not ready")
201+
g.Expect(output).To(Equal("True"), "Controller pod not ready")
201202
}
202-
Eventually(verifyMetricsEndpointReady).Should(Succeed())
203+
Eventually(verifyControllerPodReady, 3*time.Minute, time.Second).Should(Succeed())
203204

204205
By("verifying that the controller manager is serving the metrics server")
205206
verifyMetricsServerStarted := func(g Gomega) {
206207
cmd := exec.Command("kubectl", "logs", controllerPodName, "-n", namespace)
207208
output, err := utils.Run(cmd)
208209
g.Expect(err).NotTo(HaveOccurred())
209-
g.Expect(output).To(ContainSubstring("controller-runtime.metrics\tServing metrics server"),
210+
g.Expect(output).To(ContainSubstring("Serving metrics server"),
210211
"Metrics server not yet started")
211212
}
212-
Eventually(verifyMetricsServerStarted).Should(Succeed())
213+
Eventually(verifyMetricsServerStarted, 3*time.Minute, time.Second).Should(Succeed())
214+
215+
// +kubebuilder:scaffold:e2e-metrics-webhooks-readiness
213216

214217
By("creating the curl-metrics pod to access the metrics endpoint")
215218
cmd = exec.Command("kubectl", "run", "curl-metrics", "--restart=Never",

docs/book/src/multiversion-tutorial/testdata/project/test/e2e/e2e_test.go

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -204,24 +204,38 @@ var _ = Describe("Manager", Ordered, func() {
204204
Expect(err).NotTo(HaveOccurred())
205205
Expect(token).NotTo(BeEmpty())
206206

207-
By("waiting for the metrics endpoint to be ready")
208-
verifyMetricsEndpointReady := func(g Gomega) {
209-
cmd := exec.Command("kubectl", "get", "endpoints", metricsServiceName, "-n", namespace)
207+
By("ensuring the controller pod is ready")
208+
verifyControllerPodReady := func(g Gomega) {
209+
cmd := exec.Command("kubectl", "get", "pod", controllerPodName, "-n", namespace,
210+
"-o", "jsonpath={.status.conditions[?(@.type=='Ready')].status}")
210211
output, err := utils.Run(cmd)
211212
g.Expect(err).NotTo(HaveOccurred())
212-
g.Expect(output).To(ContainSubstring("8443"), "Metrics endpoint is not ready")
213+
g.Expect(output).To(Equal("True"), "Controller pod not ready")
213214
}
214-
Eventually(verifyMetricsEndpointReady).Should(Succeed())
215+
Eventually(verifyControllerPodReady, 3*time.Minute, time.Second).Should(Succeed())
215216

216217
By("verifying that the controller manager is serving the metrics server")
217218
verifyMetricsServerStarted := func(g Gomega) {
218219
cmd := exec.Command("kubectl", "logs", controllerPodName, "-n", namespace)
219220
output, err := utils.Run(cmd)
220221
g.Expect(err).NotTo(HaveOccurred())
221-
g.Expect(output).To(ContainSubstring("controller-runtime.metrics\tServing metrics server"),
222+
g.Expect(output).To(ContainSubstring("Serving metrics server"),
222223
"Metrics server not yet started")
223224
}
224-
Eventually(verifyMetricsServerStarted).Should(Succeed())
225+
Eventually(verifyMetricsServerStarted, 3*time.Minute, time.Second).Should(Succeed())
226+
227+
By("waiting for the webhook service endpoints to be ready")
228+
verifyWebhookEndpointsReady := func(g Gomega) {
229+
cmd := exec.Command("kubectl", "get", "endpointslices.discovery.k8s.io", "-n", namespace,
230+
"-l", "kubernetes.io/service-name=project-webhook-service",
231+
"-o", "jsonpath={range .items[*]}{range .endpoints[*]}{.addresses[*]}{end}{end}")
232+
output, err := utils.Run(cmd)
233+
g.Expect(err).NotTo(HaveOccurred(), "Webhook endpoints should exist")
234+
g.Expect(output).ShouldNot(BeEmpty(), "Webhook endpoints not yet ready")
235+
}
236+
Eventually(verifyWebhookEndpointsReady, 3*time.Minute, time.Second).Should(Succeed())
237+
238+
// +kubebuilder:scaffold:e2e-metrics-webhooks-readiness
225239

226240
By("creating the curl-metrics pod to access the metrics endpoint")
227241
cmd = exec.Command("kubectl", "run", "curl-metrics", "--restart=Never",

docs/book/src/reference/markers/scaffold.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,8 @@ properly registered with the manager, so that the controller can reconcile the r
108108
| **(No longer supported)** `+kubebuilder:scaffold:crdkustomizecainjectionpatch` | `config/crd` | Marks where CA injection patches are added for the webhooks. Replaced by `+kubebuilder:scaffold:crdkustomizecainjectionns` and `+kubebuilder:scaffold:crdkustomizecainjectioname` |
109109
| `+kubebuilder:scaffold:manifestskustomizesamples` | `config/samples` | Marks where Kustomize sample manifests are injected. |
110110
| `+kubebuilder:scaffold:e2e-webhooks-checks` | `test/e2e` | Adds e2e checks for webhooks depending on the types of webhooks scaffolded. |
111+
| `+kubebuilder:scaffold:e2e-metrics-webhooks-readiness` | `test/e2e` | Adds readiness logic so metrics e2e tests wait for webhook service endpoints before creating pods. |
112+
| `+kubebuilder:scaffold:e2e-metrics-webhooks-readiness` | `test/e2e` | Adds readiness logic so metrics e2e tests wait for webhook service endpoints before creating pods. |
111113

112114
<aside class="warning">
113115
<h3> **(No longer supported)** `+kubebuilder:scaffold:crdkustomizecainjectionpatch` </h3>

pkg/plugins/golang/v4/scaffolds/internal/templates/test/e2e/test.go

Lines changed: 60 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
log "log/slog"
2323
"os"
2424
"path/filepath"
25+
"strings"
2526

2627
"sigs.k8s.io/kubebuilder/v4/pkg/machinery"
2728
)
@@ -31,7 +32,10 @@ var (
3132
_ machinery.Inserter = &WebhookTestUpdater{}
3233
)
3334

34-
const webhookChecksMarker = "e2e-webhooks-checks"
35+
const (
36+
webhookChecksMarker = "e2e-webhooks-checks"
37+
metricsWebhookReadinessMarker = "e2e-metrics-webhooks-readiness"
38+
)
3539

3640
// Test defines the basic setup for the e2e test
3741
type Test struct {
@@ -75,6 +79,7 @@ func (*WebhookTestUpdater) GetIfExistsAction() machinery.IfExistsAction {
7579
func (f *WebhookTestUpdater) GetMarkers() []machinery.Marker {
7680
return []machinery.Marker{
7781
machinery.NewMarkerFor(f.GetPath(), webhookChecksMarker),
82+
machinery.NewMarkerFor(f.GetPath(), metricsWebhookReadinessMarker),
7883
}
7984
}
8085

@@ -99,36 +104,46 @@ func (f *WebhookTestUpdater) GetCodeFragments() machinery.CodeFragmentsMap {
99104
markers := f.GetMarkers()
100105

101106
for _, marker := range markers {
102-
if !bytes.Contains(content, []byte(marker.String())) {
107+
markerStr := marker.String()
108+
if !bytes.Contains(content, []byte(markerStr)) {
103109
log.Warn("Marker not found in file, skipping webhook test code injection",
104-
"marker", marker.String(),
110+
"marker", markerStr,
105111
"file_path", filePath)
106112
continue // skip this marker
107113
}
108114

109-
var fragments []string
110-
fragments = append(fragments, webhookChecksFragment)
115+
switch {
116+
case strings.Contains(markerStr, webhookChecksMarker):
117+
var fragments []string
118+
fragments = append(fragments, webhookChecksFragment)
111119

112-
if f.Resource != nil && f.Resource.HasDefaultingWebhook() {
113-
mutatingWebhookCode := fmt.Sprintf(mutatingWebhookChecksFragment, f.ProjectName)
114-
fragments = append(fragments, mutatingWebhookCode)
115-
}
120+
if f.Resource != nil && f.Resource.HasDefaultingWebhook() {
121+
mutatingWebhookCode := fmt.Sprintf(mutatingWebhookChecksFragment, f.ProjectName)
122+
fragments = append(fragments, mutatingWebhookCode)
123+
}
116124

117-
if f.Resource != nil && f.Resource.HasValidationWebhook() {
118-
validatingWebhookCode := fmt.Sprintf(validatingWebhookChecksFragment, f.ProjectName)
119-
fragments = append(fragments, validatingWebhookCode)
120-
}
125+
if f.Resource != nil && f.Resource.HasValidationWebhook() {
126+
validatingWebhookCode := fmt.Sprintf(validatingWebhookChecksFragment, f.ProjectName)
127+
fragments = append(fragments, validatingWebhookCode)
128+
}
121129

122-
if f.Resource != nil && f.Resource.HasConversionWebhook() {
123-
conversionWebhookCode := fmt.Sprintf(
124-
conversionWebhookChecksFragment,
125-
f.Resource.Kind,
126-
f.Resource.Plural+"."+f.Resource.Group+"."+f.Resource.Domain,
127-
)
128-
fragments = append(fragments, conversionWebhookCode)
129-
}
130+
if f.Resource != nil && f.Resource.HasConversionWebhook() {
131+
conversionWebhookCode := fmt.Sprintf(
132+
conversionWebhookChecksFragment,
133+
f.Resource.Kind,
134+
f.Resource.Plural+"."+f.Resource.Group+"."+f.Resource.Domain,
135+
)
136+
fragments = append(fragments, conversionWebhookCode)
137+
}
130138

131-
codeFragments[marker] = fragments
139+
if len(fragments) > 0 {
140+
codeFragments[marker] = fragments
141+
}
142+
case strings.Contains(markerStr, metricsWebhookReadinessMarker):
143+
webhookServiceName := fmt.Sprintf("%s-webhook-service", f.ProjectName)
144+
fragments := []string{fmt.Sprintf(metricsWebhookReadinessFragment, webhookServiceName)}
145+
codeFragments[marker] = fragments
146+
}
132147
}
133148

134149
if len(codeFragments) == 0 {
@@ -198,6 +213,19 @@ const conversionWebhookChecksFragment = `It("should have CA injection for %[1]s
198213
199214
`
200215

216+
const metricsWebhookReadinessFragment = `By("waiting for the webhook service endpoints to be ready")
217+
verifyWebhookEndpointsReady := func(g Gomega) {
218+
cmd := exec.Command("kubectl", "get", "endpointslices.discovery.k8s.io", "-n", namespace,
219+
"-l", "kubernetes.io/service-name=%s",
220+
"-o", "jsonpath={range .items[*]}{range .endpoints[*]}{.addresses[*]}{end}{end}")
221+
output, err := utils.Run(cmd)
222+
g.Expect(err).NotTo(HaveOccurred(), "Webhook endpoints should exist")
223+
g.Expect(output).ShouldNot(BeEmpty(), "Webhook endpoints not yet ready")
224+
}
225+
Eventually(verifyWebhookEndpointsReady, 3*time.Minute, time.Second).Should(Succeed())
226+
227+
`
228+
201229
var testCodeTemplate = `//go:build e2e
202230
// +build e2e
203231
@@ -375,24 +403,27 @@ var _ = Describe("Manager", Ordered, func() {
375403
Expect(err).NotTo(HaveOccurred())
376404
Expect(token).NotTo(BeEmpty())
377405
378-
By("waiting for the metrics endpoint to be ready")
379-
verifyMetricsEndpointReady := func(g Gomega) {
380-
cmd := exec.Command("kubectl", "get", "endpoints", metricsServiceName, "-n", namespace)
406+
By("ensuring the controller pod is ready")
407+
verifyControllerPodReady := func(g Gomega) {
408+
cmd := exec.Command("kubectl", "get", "pod", controllerPodName, "-n", namespace,
409+
"-o", "jsonpath={.status.conditions[?(@.type=='Ready')].status}")
381410
output, err := utils.Run(cmd)
382411
g.Expect(err).NotTo(HaveOccurred())
383-
g.Expect(output).To(ContainSubstring("8443"), "Metrics endpoint is not ready")
412+
g.Expect(output).To(Equal("True"), "Controller pod not ready")
384413
}
385-
Eventually(verifyMetricsEndpointReady).Should(Succeed())
414+
Eventually(verifyControllerPodReady, 3*time.Minute, time.Second).Should(Succeed())
386415
387416
By("verifying that the controller manager is serving the metrics server")
388417
verifyMetricsServerStarted := func(g Gomega) {
389418
cmd := exec.Command("kubectl", "logs", controllerPodName, "-n", namespace)
390419
output, err := utils.Run(cmd)
391420
g.Expect(err).NotTo(HaveOccurred())
392-
g.Expect(output).To(ContainSubstring("controller-runtime.metrics\tServing metrics server"),
421+
g.Expect(output).To(ContainSubstring("Serving metrics server"),
393422
"Metrics server not yet started")
394423
}
395-
Eventually(verifyMetricsServerStarted).Should(Succeed())
424+
Eventually(verifyMetricsServerStarted, 3*time.Minute, time.Second).Should(Succeed())
425+
426+
// +kubebuilder:scaffold:e2e-metrics-webhooks-readiness
396427
397428
By("creating the curl-metrics pod to access the metrics endpoint")
398429
cmd = exec.Command("kubectl", "run", "curl-metrics", "--restart=Never",

test/e2e/v4/plugin_cluster_test.go

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@ func Run(kbc *utils.TestContext, hasWebhook, isToUseInstaller, isToUseHelmChart,
299299

300300
if hasMetrics {
301301
By("checking the metrics values to validate that the created resource object gets reconciled")
302-
metricsOutput := getMetricsOutput(kbc)
302+
metricsOutput := getMetricsOutput(controllerPodName, kbc)
303303
Expect(metricsOutput).To(ContainSubstring(fmt.Sprintf(
304304
`controller_runtime_reconcile_total{controller="%s",result="success"} 1`,
305305
strings.ToLower(kbc.Kind),
@@ -392,7 +392,7 @@ func Run(kbc *utils.TestContext, hasWebhook, isToUseInstaller, isToUseHelmChart,
392392

393393
if hasMetrics {
394394
By("validating conversion metrics to confirm conversion operations")
395-
metricsOutput := getMetricsOutput(kbc)
395+
metricsOutput := getMetricsOutput(controllerPodName, kbc)
396396
conversionMetric := `controller_runtime_reconcile_total{controller="conversiontest",result="success"} 1`
397397
Expect(metricsOutput).To(ContainSubstring(conversionMetric),
398398
"Expected metric for successful ConversionTest reconciliation")
@@ -438,7 +438,7 @@ func getControllerName(kbc *utils.TestContext) string {
438438
}
439439

440440
// getMetricsOutput return the metrics output from curl pod
441-
func getMetricsOutput(kbc *utils.TestContext) string {
441+
func getMetricsOutput(controllerPodName string, kbc *utils.TestContext) string {
442442
_, err := kbc.Kubectl.Command(
443443
"get", "clusterrolebinding", fmt.Sprintf("metrics-%s", kbc.TestSuffix),
444444
)
@@ -484,11 +484,40 @@ func getMetricsOutput(kbc *utils.TestContext) string {
484484
// when using controller-runtime's WithAuthenticationAndAuthorization() with self-signed certificates.
485485
// This delay appears to stem from Kubernetes itself, potentially due to changes in how it initializes
486486
// service account tokens or handles TLS/service readiness.
487-
//
488-
// Without this delay, tests that curl the /metrics endpoint using a token can fail from k8s 1.33+.
489-
// As a temporary workaround, we wait briefly before attempting to access metrics.
490-
By("waiting briefly to ensure that the certs are provisioned and metrics are available")
491-
time.Sleep(15 * time.Second)
487+
// Instead of using a fixed sleep, we check that the controller pod is fully ready, which ensures
488+
// all containers are running, volumes are mounted, and readiness probes pass. This is especially
489+
// important when webhooks are configured, as the webhook server must be serving before we can
490+
// create the curl-metrics pod (otherwise validating webhooks will reject the pod creation).
491+
By("ensuring the controller pod is fully ready before creating test pods")
492+
verifyControllerPodReady := func(g Gomega) {
493+
var output string
494+
output, err = kbc.Kubectl.Get(
495+
true,
496+
"pod", controllerPodName,
497+
"-o", "jsonpath={.status.conditions[?(@.type=='Ready')].status}",
498+
)
499+
g.Expect(err).NotTo(HaveOccurred())
500+
g.Expect(output).To(Equal("True"), "Controller pod not ready")
501+
}
502+
Eventually(verifyControllerPodReady, 3*time.Minute, time.Second).Should(Succeed())
503+
504+
webhookServiceName := fmt.Sprintf("e2e-%s-webhook-service", kbc.TestSuffix)
505+
if _, err = kbc.Kubectl.Get(false, "service", webhookServiceName); err == nil {
506+
By("waiting for the webhook service endpoints to be ready")
507+
checkWebhookEndpoint := func(g Gomega) {
508+
var output string
509+
output, err = kbc.Kubectl.Command(
510+
"get", "endpointslices.discovery.k8s.io",
511+
"-n", kbc.Kubectl.Namespace,
512+
"-l", fmt.Sprintf("kubernetes.io/service-name=%s", webhookServiceName),
513+
"-o", "jsonpath={range .items[*]}{range .endpoints[*]}{.addresses[*]}{end}{end}",
514+
)
515+
g.Expect(err).NotTo(HaveOccurred(), "webhook endpoints should exist")
516+
g.Expect(output).ShouldNot(BeEmpty(), "webhook endpoints not yet ready")
517+
}
518+
Eventually(checkWebhookEndpoint, 3*time.Minute, time.Second).Should(Succeed(),
519+
"Webhook service endpoints should be ready")
520+
}
492521

493522
By("creating a curl pod to access the metrics endpoint")
494523
cmdOpts := cmdOptsToCreateCurlPod(kbc, token)

0 commit comments

Comments
 (0)