Skip to content

Commit 4d19a89

Browse files
authored
Merge branch 'main' into dependabot/docker/dot-devcontainer/devcontainer-deps-7e5472b6aa
2 parents 5095ef0 + 95646e0 commit 4d19a89

File tree

15 files changed

+243
-34
lines changed

15 files changed

+243
-34
lines changed

.github/actions/e2e/create-cluster/action.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,22 +6,22 @@ inputs:
66
# required: false
77
# default: "1.27"
88
client-id:
9-
description:
9+
description: "ID of the client to create the cluster with"
1010
required: true
1111
tenant-id:
12-
description:
12+
description: "ID of the tenant to create the cluster in"
1313
required: true
1414
subscription-id:
15-
description:
15+
description: "ID of the subscription to create the cluster in"
1616
required: true
1717
resource_group:
1818
description: "Name of the resource group to create the cluster within"
1919
required: true
2020
cluster_name:
21-
description: 'Name of the cluster to be created'
21+
description: "Name of the cluster to be created"
2222
required: true
2323
acr_name:
24-
description: "Name of the acr holding the karpenter image"
24+
description: "Name of the acr holding the Karpenter image"
2525
required: true
2626
git_ref:
2727
description: "The git commit, tag, or branch to check out"

.github/actions/e2e/install-karpenter/action.yaml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,25 +2,25 @@ name: InstallKarpenter
22
description: 'Installs Karpenter on the aks cluster'
33
inputs:
44
client-id:
5-
description:
5+
description: "ID of the client to install Karpenter with"
66
required: true
77
tenant-id:
8-
description:
8+
description: "ID of the tenant containing the cluster to install Karpenter into"
99
required: true
1010
subscription-id:
11-
description:
11+
description: "ID of the subscription containing the cluster to install Karpenter into"
1212
required: true
1313
# region:
1414
# description: "Region to create aks cluster"
1515
# required: true
1616
resource_group:
17-
description: "Name of the resource group to create the cluster within"
17+
description: "Name of the resource group containing the cluster to install Karpenter into"
1818
required: true
1919
cluster_name:
20-
description: 'Name of the cluster to be created'
20+
description: "Name of the cluster to install Karpenter into."
2121
required: true
2222
acr_name:
23-
description: "Name of the acr holding the karpenter image"
23+
description: "Name of the acr holding the Karpenter image"
2424
required: true
2525
git_ref:
2626
description: "The git commit, tag, or branch to check out"
@@ -29,7 +29,7 @@ inputs:
2929
description: "the azure location to run the e2e test in"
3030
default: "eastus"
3131
provisionmode:
32-
description: "the karpenter provisioning mode to run the e2e test in"
32+
description: "the Karpenter provisioning mode to run the e2e test in"
3333
default: "aksscriptless"
3434
runs:
3535
using: "composite"

Makefile-az.mk

Lines changed: 62 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ az-mkacr: az-mkrg ## Create test ACR
6262
az acr login --name $(AZURE_ACR_NAME)
6363

6464
az-acrimport: ## Imports an image to an acr registry
65-
az acr import --name $(AZURE_ACR_NAME) --source "mcr.microsoft.com/oss/kubernetes/pause:3.6" --image "pause:3.6"
65+
az acr import --name $(AZURE_ACR_NAME) --source "mcr.microsoft.com/oss/kubernetes/pause:3.6" --image "pause:3.6" --force
6666

6767
az-cleanenv: az-rmnodeclaims-fin az-rmnodeclasses-fin ## Deletes a few common karpenter testing resources(pods, nodepools, nodeclaims, aksnodeclasses)
6868
kubectl delete deployments -n default --all
@@ -72,38 +72,72 @@ az-cleanenv: az-rmnodeclaims-fin az-rmnodeclasses-fin ## Deletes a few common ka
7272
kubectl delete aksnodeclasses --all
7373

7474
az-mkaks: az-mkacr ## Create test AKS cluster (with --vm-set-type AvailabilitySet for compatibility with standalone VMs)
75-
az aks create --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --attach-acr $(AZURE_ACR_NAME) --location $(AZURE_LOCATION) \
76-
--enable-managed-identity --node-count 3 --generate-ssh-keys --vm-set-type AvailabilitySet -o none $(if $(AZURE_VM_SIZE),--node-vm-size $(AZURE_VM_SIZE),)
75+
@hack/deploy/check-cluster-exists.sh $(AZURE_CLUSTER_NAME) $(AZURE_RESOURCE_GROUP) az-mkaks; \
76+
EXIT_CODE=$$?; \
77+
if [ $$EXIT_CODE -eq 1 ]; then \
78+
az aks create --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --attach-acr $(AZURE_ACR_NAME) --location $(AZURE_LOCATION) \
79+
--enable-managed-identity --node-count 3 --generate-ssh-keys --vm-set-type AvailabilitySet -o none $(if $(AZURE_VM_SIZE),--node-vm-size $(AZURE_VM_SIZE),) \
80+
--tags "make-command=az-mkaks"; \
81+
elif [ $$EXIT_CODE -eq 2 ]; then \
82+
exit 1; \
83+
fi
7784
az aks get-credentials --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --overwrite-existing
7885
skaffold config set default-repo $(AZURE_ACR_NAME).$(AZURE_ACR_SUFFIX)/karpenter
7986

8087
az-mkaks-cniv1: az-mkacr ## Create test AKS cluster (with --network-plugin azure)
81-
az aks create --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --attach-acr $(AZURE_ACR_NAME) \
82-
--enable-managed-identity --node-count 3 --generate-ssh-keys -o none --network-plugin azure \
83-
--enable-oidc-issuer --enable-workload-identity $(if $(AZURE_VM_SIZE),--node-vm-size $(AZURE_VM_SIZE),)
88+
@hack/deploy/check-cluster-exists.sh $(AZURE_CLUSTER_NAME) $(AZURE_RESOURCE_GROUP) az-mkaks-cniv1; \
89+
EXIT_CODE=$$?; \
90+
if [ $$EXIT_CODE -eq 1 ]; then \
91+
az aks create --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --attach-acr $(AZURE_ACR_NAME) \
92+
--enable-managed-identity --node-count 3 --generate-ssh-keys -o none --network-plugin azure \
93+
--enable-oidc-issuer --enable-workload-identity $(if $(AZURE_VM_SIZE),--node-vm-size $(AZURE_VM_SIZE),) \
94+
--tags "make-command=az-mkaks-cniv1"; \
95+
elif [ $$EXIT_CODE -eq 2 ]; then \
96+
exit 1; \
97+
fi
8498
az aks get-credentials --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --overwrite-existing
8599
skaffold config set default-repo $(AZURE_ACR_NAME).$(AZURE_ACR_SUFFIX)/karpenter
86100

87-
88101
az-mkaks-cilium: az-mkacr ## Create test AKS cluster (with --network-dataplane cilium, --network-plugin azure, and --network-plugin-mode overlay)
89-
az aks create --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --attach-acr $(AZURE_ACR_NAME) \
90-
--enable-managed-identity --node-count 3 --generate-ssh-keys -o none --network-dataplane cilium --network-plugin azure --network-plugin-mode overlay \
91-
--enable-oidc-issuer --enable-workload-identity $(if $(AZURE_VM_SIZE),--node-vm-size $(AZURE_VM_SIZE),)
102+
@hack/deploy/check-cluster-exists.sh $(AZURE_CLUSTER_NAME) $(AZURE_RESOURCE_GROUP) az-mkaks-cilium; \
103+
EXIT_CODE=$$?; \
104+
if [ $$EXIT_CODE -eq 1 ]; then \
105+
az aks create --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --attach-acr $(AZURE_ACR_NAME) \
106+
--enable-managed-identity --node-count 3 --generate-ssh-keys -o none --network-dataplane cilium --network-plugin azure --network-plugin-mode overlay \
107+
--enable-oidc-issuer --enable-workload-identity $(if $(AZURE_VM_SIZE),--node-vm-size $(AZURE_VM_SIZE),) \
108+
--tags "make-command=az-mkaks-cilium"; \
109+
elif [ $$EXIT_CODE -eq 2 ]; then \
110+
exit 1; \
111+
fi
92112
az aks get-credentials --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --overwrite-existing
93113
skaffold config set default-repo $(AZURE_ACR_NAME).$(AZURE_ACR_SUFFIX)/karpenter
94114

95115
az-mkaks-overlay: az-mkacr ## Create test AKS cluster (with --network-plugin-mode overlay)
96-
az aks create --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --attach-acr $(AZURE_ACR_NAME) \
97-
--enable-managed-identity --node-count 3 --generate-ssh-keys -o none --network-plugin azure --network-plugin-mode overlay \
98-
--enable-oidc-issuer --enable-workload-identity $(if $(AZURE_VM_SIZE),--node-vm-size $(AZURE_VM_SIZE),)
116+
@hack/deploy/check-cluster-exists.sh $(AZURE_CLUSTER_NAME) $(AZURE_RESOURCE_GROUP) az-mkaks-overlay; \
117+
EXIT_CODE=$$?; \
118+
if [ $$EXIT_CODE -eq 1 ]; then \
119+
az aks create --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --attach-acr $(AZURE_ACR_NAME) \
120+
--enable-managed-identity --node-count 3 --generate-ssh-keys -o none --network-plugin azure --network-plugin-mode overlay \
121+
--enable-oidc-issuer --enable-workload-identity $(if $(AZURE_VM_SIZE),--node-vm-size $(AZURE_VM_SIZE),) \
122+
--tags "make-command=az-mkaks-overlay"; \
123+
elif [ $$EXIT_CODE -eq 2 ]; then \
124+
exit 1; \
125+
fi
99126
az aks get-credentials --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --overwrite-existing
100127
skaffold config set default-repo $(AZURE_ACR_NAME).$(AZURE_ACR_SUFFIX)/karpenter
101128

102129
az-mkaks-perftest: az-mkacr ## Create test AKS cluster (with Azure Overlay, larger system pool VMs and larger pod-cidr)
103-
az aks create --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --attach-acr $(AZURE_ACR_NAME) \
104-
--enable-managed-identity --node-count 2 --generate-ssh-keys -o none --network-plugin azure --network-plugin-mode overlay \
105-
--enable-oidc-issuer --enable-workload-identity \
106-
--node-vm-size $(if $(AZURE_VM_SIZE),$(AZURE_VM_SIZE),Standard_D16s_v6) --pod-cidr "10.128.0.0/11"
130+
@hack/deploy/check-cluster-exists.sh $(AZURE_CLUSTER_NAME) $(AZURE_RESOURCE_GROUP) az-mkaks-perftest; \
131+
EXIT_CODE=$$?; \
132+
if [ $$EXIT_CODE -eq 1 ]; then \
133+
az aks create --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --attach-acr $(AZURE_ACR_NAME) \
134+
--enable-managed-identity --node-count 2 --generate-ssh-keys -o none --network-plugin azure --network-plugin-mode overlay \
135+
--enable-oidc-issuer --enable-workload-identity \
136+
--node-vm-size $(if $(AZURE_VM_SIZE),$(AZURE_VM_SIZE),Standard_D16s_v6) --pod-cidr "10.128.0.0/11" \
137+
--tags "make-command=az-mkaks-perftest"; \
138+
elif [ $$EXIT_CODE -eq 2 ]; then \
139+
exit 1; \
140+
fi
107141
az aks get-credentials --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --overwrite-existing
108142
skaffold config set default-repo $(AZURE_ACR_NAME).$(AZURE_ACR_SUFFIX)/karpenter
109143

@@ -114,10 +148,17 @@ az-mksubnet: ## Create a subnet with address range of 10.1.0.0/24
114148
az network vnet subnet create --name $(CUSTOM_SUBNET_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --vnet-name $(CUSTOM_VNET_NAME) --address-prefixes "10.1.0.0/24"
115149

116150
az-mkaks-custom-vnet: az-mkacr az-mkvnet az-mksubnet ## Create test AKS cluster with custom VNet
117-
az aks create --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --attach-acr $(AZURE_ACR_NAME) \
118-
--enable-managed-identity --node-count 3 --generate-ssh-keys -o none --network-dataplane cilium --network-plugin azure --network-plugin-mode overlay \
119-
--enable-oidc-issuer --enable-workload-identity $(if $(AZURE_VM_SIZE),--node-vm-size $(AZURE_VM_SIZE),) \
120-
--vnet-subnet-id "/subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP)/providers/Microsoft.Network/virtualNetworks/$(CUSTOM_VNET_NAME)/subnets/$(CUSTOM_SUBNET_NAME)"
151+
@hack/deploy/check-cluster-exists.sh $(AZURE_CLUSTER_NAME) $(AZURE_RESOURCE_GROUP) az-mkaks-custom-vnet; \
152+
EXIT_CODE=$$?; \
153+
if [ $$EXIT_CODE -eq 1 ]; then \
154+
az aks create --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --attach-acr $(AZURE_ACR_NAME) \
155+
--enable-managed-identity --node-count 3 --generate-ssh-keys -o none --network-dataplane cilium --network-plugin azure --network-plugin-mode overlay \
156+
--enable-oidc-issuer --enable-workload-identity $(if $(AZURE_VM_SIZE),--node-vm-size $(AZURE_VM_SIZE),) \
157+
--vnet-subnet-id "/subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP)/providers/Microsoft.Network/virtualNetworks/$(CUSTOM_VNET_NAME)/subnets/$(CUSTOM_SUBNET_NAME)" \
158+
--tags "make-command=az-mkaks-custom-vnet"; \
159+
elif [ $$EXIT_CODE -eq 2 ]; then \
160+
exit 1; \
161+
fi
121162
az aks get-credentials --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) --overwrite-existing
122163
skaffold config set default-repo $(AZURE_ACR_NAME).$(AZURE_ACR_SUFFIX)/karpenter
123164

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#!/bin/bash
2+
3+
# Script to check if AKS cluster exists with the correct make-command tag
4+
# Usage: check-cluster-exists.sh <cluster-name> <resource-group> <expected-tag-value>
5+
# Exit codes:
6+
# 0 - Cluster exists with correct tag (skip creation)
7+
# 1 - Cluster doesn't exist (proceed with creation)
8+
# 2 - Cluster exists but has wrong/missing tag (error)
9+
10+
set -e
11+
12+
CLUSTER_NAME="$1"
13+
RESOURCE_GROUP="$2"
14+
EXPECTED_TAG="$3"
15+
16+
if [ -z "$CLUSTER_NAME" ] || [ -z "$RESOURCE_GROUP" ] || [ -z "$EXPECTED_TAG" ]; then
17+
echo "Usage: $0 <cluster-name> <resource-group> <expected-tag-value>"
18+
exit 2
19+
fi
20+
21+
# Check if cluster exists
22+
if az aks show --name "$CLUSTER_NAME" --resource-group "$RESOURCE_GROUP" -o none 2>/dev/null; then
23+
# Cluster exists, check the tag
24+
EXISTING_TAG=$(az aks show --name "$CLUSTER_NAME" --resource-group "$RESOURCE_GROUP" --query "tags.\"make-command\"" -o tsv 2>/dev/null || echo "")
25+
26+
if [ "$EXISTING_TAG" = "$EXPECTED_TAG" ]; then
27+
echo "Cluster $CLUSTER_NAME already exists with correct tag, skipping creation"
28+
exit 0 # Skip creation
29+
else
30+
echo "Error: Cluster $CLUSTER_NAME exists but does not have the required tag 'make-command: $EXPECTED_TAG'"
31+
echo "Current tag value: $EXISTING_TAG"
32+
exit 2 # Error condition
33+
fi
34+
else
35+
# Cluster doesn't exist
36+
exit 1 # Proceed with creation
37+
fi

pkg/operator/operator.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,11 @@ import (
2020
"context"
2121
"encoding/base64"
2222
"fmt"
23+
"net"
2324
"time"
2425

26+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
27+
2528
"github.com/Azure/azure-sdk-for-go/sdk/azcore"
2629
"github.com/Azure/azure-sdk-for-go/sdk/azcore/policy"
2730
"github.com/go-logr/logr"
@@ -90,6 +93,21 @@ type Operator struct {
9093
AZClient *instance.AZClient
9194
}
9295

96+
func kubeDNSIP(ctx context.Context, kubernetesInterface kubernetes.Interface) (net.IP, error) {
97+
if kubernetesInterface == nil {
98+
return nil, fmt.Errorf("no K8s client provided")
99+
}
100+
dnsService, err := kubernetesInterface.CoreV1().Services("kube-system").Get(ctx, "kube-dns", metav1.GetOptions{})
101+
if err != nil {
102+
return nil, err
103+
}
104+
kubeDNSIP := net.ParseIP(dnsService.Spec.ClusterIP)
105+
if kubeDNSIP == nil {
106+
return nil, fmt.Errorf("parsing cluster IP")
107+
}
108+
return kubeDNSIP, nil
109+
}
110+
93111
func NewOperator(ctx context.Context, operator *operator.Operator) (context.Context, *Operator) {
94112
azConfig, err := GetAZConfig()
95113
lo.Must0(err, "creating Azure config") // NOTE: we prefer this over the cleaner azConfig := lo.Must(GetAzConfig()), as when initializing the client there are helpful error messages in initializing clients and the azure config
@@ -119,6 +137,17 @@ func NewOperator(ctx context.Context, operator *operator.Operator) (context.Cont
119137
inClusterConfig.UserAgent = auth.GetUserAgentExtension()
120138
inClusterClient := kubernetes.NewForConfigOrDie(inClusterConfig)
121139

140+
if options.FromContext(ctx).DNSServiceIP == "" {
141+
kubeDNSIP, err := kubeDNSIP(ctx, operator.KubernetesInterface)
142+
if err != nil { // fall back to default
143+
log.FromContext(ctx).V(1).Info("unable to detect the IP of the kube-dns service, using default 10.0.0.10", "error", err)
144+
options.FromContext(ctx).DNSServiceIP = "10.0.0.10"
145+
} else {
146+
log.FromContext(ctx).V(1).Info("discovered DNS service IP", "dns-service-ip", kubeDNSIP.String())
147+
options.FromContext(ctx).DNSServiceIP = kubeDNSIP.String()
148+
}
149+
}
150+
122151
unavailableOfferingsCache := azurecache.NewUnavailableOfferings()
123152
pricingProvider := pricing.NewProvider(
124153
ctx,

pkg/operator/options/options.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ type Options struct {
7575
NetworkPolicy string `json:"networkPolicy,omitempty"` // => NetworkPolicy in bootstrap
7676
NetworkPluginMode string `json:"networkPluginMode,omitempty"` // => Network Plugin Mode is used to control the mode the network plugin should operate in. For example, "overlay" used with --network-plugin=azure will use an overlay network (non-VNET IPs) for pods in the cluster. Learn more about overlay networking here: https://learn.microsoft.com/en-us/azure/aks/azure-cni-overlay?tabs=kubectl#overview-of-overlay-networking
7777
NetworkDataplane string `json:"networkDataplane,omitempty"`
78+
DNSServiceIP string `json:"dnsServiceIP,omitempty"`
7879

7980
NodeIdentities []string `json:"nodeIdentities,omitempty"` // => Applied onto each VM
8081
KubeletIdentityClientID string `json:"kubeletIdentityClientID,omitempty"` // => Flows to bootstrap and used in drift
@@ -101,6 +102,7 @@ func (o *Options) AddFlags(fs *coreoptions.FlagSet) {
101102
fs.StringVar(&o.LinuxAdminUsername, "linux-admin-username", env.WithDefaultString("LINUX_ADMIN_USERNAME", "azureuser"), "The admin username for Linux VMs.")
102103
fs.StringVar(&o.SSHPublicKey, "ssh-public-key", env.WithDefaultString("SSH_PUBLIC_KEY", ""), "[REQUIRED] VM SSH public key.")
103104
fs.StringVar(&o.NetworkPlugin, "network-plugin", env.WithDefaultString("NETWORK_PLUGIN", consts.NetworkPluginAzure), "The network plugin used by the cluster.")
105+
fs.StringVar(&o.DNSServiceIP, "dns-service-ip", env.WithDefaultString("DNS_SERVICE_IP", ""), "The IP address of cluster DNS service.")
104106
fs.StringVar(&o.NetworkPluginMode, "network-plugin-mode", env.WithDefaultString("NETWORK_PLUGIN_MODE", consts.NetworkPluginModeOverlay), "network plugin mode of the cluster.")
105107
fs.StringVar(&o.NetworkPolicy, "network-policy", env.WithDefaultString("NETWORK_POLICY", ""), "The network policy used by the cluster.")
106108
fs.StringVar(&o.NetworkDataplane, "network-dataplane", env.WithDefaultString("NETWORK_DATAPLANE", "cilium"), "The network dataplane used by the cluster.")

pkg/operator/options/options_validation.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package options
1818

1919
import (
2020
"fmt"
21+
"net/netip"
2122
"net/url"
2223
"regexp"
2324
"strings"
@@ -44,10 +45,20 @@ func (o *Options) Validate() error {
4445
o.validateAdminUsername(),
4546
o.validateAdditionalTags(),
4647
o.validateDiskEncryptionSetID(),
48+
o.validateClusterDNSIP(),
4749
validate.Struct(o),
4850
)
4951
}
5052

53+
func (o *Options) validateClusterDNSIP() error {
54+
if o.DNSServiceIP != "" {
55+
if _, err := netip.ParseAddr(o.DNSServiceIP); err != nil {
56+
return fmt.Errorf("dns-service-ip is invalid %w", err)
57+
}
58+
}
59+
return nil
60+
}
61+
5162
func (o *Options) validateVNETGUID() error {
5263
if o.VnetGUID != "" && uuid.Validate(o.VnetGUID) != nil {
5364
return fmt.Errorf("vnet-guid %s is malformed", o.VnetGUID)

0 commit comments

Comments
 (0)