@@ -885,9 +885,10 @@ func (c *MPIJobController) getOrCreateService(job *kubeflow.MPIJob, newSvc *core
885885 }
886886
887887 // If the Service selector is changed, update it.
888- if ! equality .Semantic .DeepEqual (svc .Spec .Selector , newSvc .Spec .Selector ) {
888+ if ! equality .Semantic .DeepEqual (svc .Spec .Selector , newSvc .Spec .Selector ) || svc . Spec . PublishNotReadyAddresses != newSvc . Spec . PublishNotReadyAddresses {
889889 svc = svc .DeepCopy ()
890890 svc .Spec .Selector = newSvc .Spec .Selector
891+ svc .Spec .PublishNotReadyAddresses = newSvc .Spec .PublishNotReadyAddresses
891892 return c .kubeClient .CoreV1 ().Services (svc .Namespace ).Update (context .TODO (), svc , metav1.UpdateOptions {})
892893 }
893894
@@ -1343,10 +1344,10 @@ func newJobService(job *kubeflow.MPIJob) *corev1.Service {
13431344 kubeflow .OperatorNameLabel : kubeflow .OperatorName ,
13441345 kubeflow .JobNameLabel : job .Name ,
13451346 }
1346- return newService (job , job .Name , labels )
1347+ return newService (job , job .Name , labels , ptr . Deref ( job . Spec . RunLauncherAsWorker , false ) )
13471348}
13481349
1349- func newService (job * kubeflow.MPIJob , name string , selector map [string ]string ) * corev1.Service {
1350+ func newService (job * kubeflow.MPIJob , name string , selector map [string ]string , isRunLauncherAsWorker bool ) * corev1.Service {
13501351 return & corev1.Service {
13511352 ObjectMeta : metav1.ObjectMeta {
13521353 Name : name ,
@@ -1361,6 +1362,9 @@ func newService(job *kubeflow.MPIJob, name string, selector map[string]string) *
13611362 Spec : corev1.ServiceSpec {
13621363 ClusterIP : corev1 .ClusterIPNone ,
13631364 Selector : selector ,
1365+ // The publishNotReadyAddresses must be true only for the MPIJob with runLauncherAsWorker
1366+ // to avoid deadlock to wait for Launcher is ready.
1367+ PublishNotReadyAddresses : isRunLauncherAsWorker ,
13641368 },
13651369 }
13661370}
0 commit comments