@@ -6,9 +6,9 @@ permissions:
66
77on :
88 push :
9- branches : [main, dev, ' release/*' ]
9+ branches : [main, dev, " release/*" ]
1010 pull_request :
11- branches : [main, dev, ' release/*' ]
11+ branches : [main, dev, " release/*" ]
1212
1313env :
1414 TAG : ${{ github.run_number }}
8383 if : steps.check.outputs.has_changed == 'true'
8484 run : python -m pip install --break-system-packages pyyaml jinja2 paramiko etcd3 protobuf==3.20.3 kubernetes gitpython
8585
86- - name : Install Special libs
87- if : steps.check.outputs.has_changed == 'true'
88- run : |
89- # check whether steps.changes.outputs.folders contains 'alert-manager'
90- if echo "${{ steps.changes.outputs.folders }}" | grep -q "alert-manager"; then
91- echo "Installing python-icm alertmanager dependencies..."
92- echo "${{ secrets.ICM_PACKAGE_B64 }}" | base64 -d > python-icm.zip
93- unzip -o python-icm.zip -d $GITHUB_WORKSPACE/src/alert-manager/src/node-recycler/python-icm
94- ls -l $GITHUB_WORKSPACE/src/alert-manager/src/node-recycler/python-icm
95- # XXX
96- cat $GITHUB_WORKSPACE/src/alert-manager/src/node-recycler/python-icm/README.md
97- fi
98-
9986 - name : Decode and unzip config file
10087 if : steps.check.outputs.has_changed == 'true'
10188 run : |
@@ -134,6 +121,23 @@ jobs:
134121 run : |
135122 changed_services="${{ steps.changes.outputs.folders }}"
136123 echo "Pushing: $changed_services"
124+ # check whether alter-manager is in the changed services
125+ echo "Changed services before removing alter-manager: $changed_services"
126+ if [[ "$changed_services" == *"alter-manager"* ]]; then
127+ echo "alter-manager is in the changed services"
128+ changed_services=$(echo $changed_services | sed 's/alter-manager//g')
129+ # push specific images in alter-manager to GHCR
130+ echo "Pushing specific alter-manager images to GHCR"
131+ $GITHUB_WORKSPACE/build/pai_build.py push \
132+ -c $GITHUB_WORKSPACE/config/cluster-configuration \
133+ -s alert-manager \
134+ -i abnormal-detector,alert-handler,alert-parser,cert-expiration-checker,cluster-utilization,job-data-recorder,job-status-change-notification,node-failure-detection,node-issue-classifier,nvidia-gpu-low-perf-fixer,redis-monitoring \
135+ --docker-registry ghcr.io \
136+ --docker-namespace ${GITHUB_REPOSITORY_OWNER} \
137+ --docker-username ${{ github.actor }} \
138+ --docker-password ${{ secrets.GITHUB_TOKEN }}
139+ fi
140+
137141 $GITHUB_WORKSPACE/build/pai_build.py push \
138142 -c $GITHUB_WORKSPACE/config/cluster-configuration \
139143 -s $changed_services \
@@ -145,24 +149,24 @@ jobs:
145149 - name : Azure CLI get credentials and deploy
146150 if : steps.check.outputs.has_changed == 'true'
147151 run : |
148- az version
149- az login --identity --client-id ${{ secrets.AZURE_MANAGED_IDENTITY_CLIENT_ID }}
150- az aks install-cli
151- az aks get-credentials \
152- --resource-group ${{ secrets.AZURE_RESOURCE_GROUP }} \
153- --name ${{ secrets.KUBERNETES_CLUSTER }} \
154- --overwrite-existing
155- kubelogin convert-kubeconfig -l azurecli
156- kubectl config use-context ${{ secrets.KUBERNETES_CLUSTER }}
157- echo "${{ secrets.PAI_CLUSTER_NAME }}" > cluster_id
158- echo "Stopping changed pai services \"${{ steps.changes.outputs.folders }}\" on ${{ secrets.PAI_CLUSTER_NAME }} ..."
159- $GITHUB_WORKSPACE/paictl.py service stop -n ${{ steps.changes.outputs.folders }} < cluster_id
160- echo "Pushing config to cluster \"${{ secrets.PAI_CLUSTER_NAME }}\" ..."
161- $GITHUB_WORKSPACE/paictl.py config push -m service -p $GITHUB_WORKSPACE/config/cluster-configuration < cluster_id
162- echo "Starting to update \"${{ steps.changes.outputs.folders }}\" on ${{ secrets.PAI_CLUSTER_NAME }} ..."
163- $GITHUB_WORKSPACE/paictl.py service start -n ${{ steps.changes.outputs.folders }} < cluster_id
164- kubectl get pod
165- kubectl get service
152+ az version
153+ az login --identity --client-id ${{ secrets.AZURE_MANAGED_IDENTITY_CLIENT_ID }}
154+ az aks install-cli
155+ az aks get-credentials \
156+ --resource-group ${{ secrets.AZURE_RESOURCE_GROUP }} \
157+ --name ${{ secrets.KUBERNETES_CLUSTER }} \
158+ --overwrite-existing
159+ kubelogin convert-kubeconfig -l azurecli
160+ kubectl config use-context ${{ secrets.KUBERNETES_CLUSTER }}
161+ echo "${{ secrets.PAI_CLUSTER_NAME }}" > cluster_id
162+ echo "Stopping changed pai services \"${{ steps.changes.outputs.folders }}\" on ${{ secrets.PAI_CLUSTER_NAME }} ..."
163+ $GITHUB_WORKSPACE/paictl.py service stop -n ${{ steps.changes.outputs.folders }} < cluster_id
164+ echo "Pushing config to cluster \"${{ secrets.PAI_CLUSTER_NAME }}\" ..."
165+ $GITHUB_WORKSPACE/paictl.py config push -m service -p $GITHUB_WORKSPACE/config/cluster-configuration < cluster_id
166+ echo "Starting to update \"${{ steps.changes.outputs.folders }}\" on ${{ secrets.PAI_CLUSTER_NAME }} ..."
167+ $GITHUB_WORKSPACE/paictl.py service start -n ${{ steps.changes.outputs.folders }} < cluster_id
168+ kubectl get pod
169+ kubectl get service
166170
167171 test :
168172 name : Test rest-server
0 commit comments