Skip to content

Commit 986168a

Browse files
committed
disconnect: fixes scheduler logic to respect replace field
These changes refactor some scheduler code to respect the disconnect.replace field which was currently being ignored and update some logic/method names for code readability.
1 parent c648074 commit 986168a

File tree

7 files changed

+105
-115
lines changed

7 files changed

+105
-115
lines changed

client/client.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2901,7 +2901,7 @@ func (c *Client) updateAlloc(update *structs.Allocation) {
29012901
// Reconnect unknown allocations if they were updated and are not terminal.
29022902
reconnect := update.ClientStatus == structs.AllocClientStatusUnknown &&
29032903
update.AllocModifyIndex > alloc.AllocModifyIndex &&
2904-
(!update.ServerTerminalStatus() || !alloc.PreventReplaceOnDisconnect())
2904+
!update.ServerTerminalStatus()
29052905
if reconnect {
29062906
err = ar.Reconnect(update)
29072907
if err != nil {

nomad/plan_apply.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -806,7 +806,7 @@ func isValidForDisconnectedNode(plan *structs.Plan, nodeID string) bool {
806806
// as non reschedulables when lost or if the allocs are being updated to lost.
807807
func isValidForDownNode(plan *structs.Plan, nodeID string) bool {
808808
for _, alloc := range plan.NodeAllocation[nodeID] {
809-
if !(alloc.ClientStatus == structs.AllocClientStatusUnknown && alloc.PreventReplaceOnDisconnect()) &&
809+
if !(alloc.ClientStatus == structs.AllocClientStatusUnknown && !alloc.ReplaceOnDisconnect()) &&
810810
(alloc.ClientStatus != structs.AllocClientStatusLost) {
811811
return false
812812
}

nomad/structs/alloc.go

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -550,14 +550,26 @@ func (a *Allocation) DisconnectTimeout(now time.Time) time.Time {
550550

551551
tg := a.Job.LookupTaskGroup(a.TaskGroup)
552552

553-
timeout := tg.GetDisconnectLostTimeout()
553+
timeout := tg.GetDisconnectLostAfter()
554554
if timeout == 0 {
555555
return now
556556
}
557557

558558
return now.Add(timeout)
559559
}
560560

561+
// DisconnectLostAfter is a helper to get the Disconnect.LostAfter for an allocation
562+
func (a *Allocation) DisconnectLostAfter() time.Duration {
563+
if a.Job != nil {
564+
tg := a.Job.LookupTaskGroup(a.TaskGroup)
565+
if tg != nil {
566+
return tg.GetDisconnectLostAfter()
567+
}
568+
}
569+
570+
return 0
571+
}
572+
561573
// SupportsDisconnectedClients determines whether both the server and the task group
562574
// are configured to allow the allocation to reconnect after network connectivity
563575
// has been lost and then restored.
@@ -569,24 +581,24 @@ func (a *Allocation) SupportsDisconnectedClients(serverSupportsDisconnectedClien
569581
if a.Job != nil {
570582
tg := a.Job.LookupTaskGroup(a.TaskGroup)
571583
if tg != nil {
572-
return tg.GetDisconnectLostTimeout() != 0
584+
return tg.GetDisconnectLostAfter() != 0
573585
}
574586
}
575587

576588
return false
577589
}
578590

579-
// PreventReplaceOnDisconnect determines if an alloc allows to have a replacement
591+
// ReplaceOnDisconnect determines if an alloc can be replaced
580592
// when Disconnected.
581-
func (a *Allocation) PreventReplaceOnDisconnect() bool {
593+
func (a *Allocation) ReplaceOnDisconnect() bool {
582594
if a.Job != nil {
583595
tg := a.Job.LookupTaskGroup(a.TaskGroup)
584596
if tg != nil {
585-
return !tg.Replace()
597+
return tg.Replace()
586598
}
587599
}
588600

589-
return false
601+
return true
590602
}
591603

592604
// NextDelay returns a duration after which the allocation can be rescheduled.
@@ -802,8 +814,8 @@ func (a *Allocation) Expired(now time.Time) bool {
802814
return false
803815
}
804816

805-
timeout := tg.GetDisconnectLostTimeout()
806-
if timeout == 0 && tg.Replace() {
817+
timeout := tg.GetDisconnectLostAfter()
818+
if timeout == 0 || !tg.Replace() {
807819
return false
808820
}
809821

nomad/structs/structs.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7631,9 +7631,9 @@ func (tg *TaskGroup) Replace() bool {
76317631
return *tg.Disconnect.Replace
76327632
}
76337633

7634-
// GetDisconnectLostTimeout is a helper meant to simplify the logic for
7634+
// GetDisconnectLostAfter is a helper meant to simplify the logic for
76357635
// getting the Disconnect.LostAfter field of a task group.
7636-
func (tg *TaskGroup) GetDisconnectLostTimeout() time.Duration {
7636+
func (tg *TaskGroup) GetDisconnectLostAfter() time.Duration {
76377637
if tg.Disconnect != nil {
76387638
return tg.Disconnect.LostAfter
76397639
}

scheduler/reconciler/allocs_test.go

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,8 @@ func TestAllocSet_filterByTainted(t *testing.T) {
6060
ID: "draining",
6161
DrainStrategy: mock.DrainNode().DrainStrategy,
6262
},
63-
"lost": {
64-
ID: "lost",
63+
"down": {
64+
ID: "down",
6565
Status: structs.NodeStatusDown,
6666
},
6767
"nil": nil,
@@ -162,12 +162,12 @@ func TestAllocSet_filterByTainted(t *testing.T) {
162162
Job: testJob,
163163
NodeID: "draining",
164164
},
165-
// Terminal allocs are always untainted, even on lost nodes
165+
// Terminal allocs are always untainted, even on down nodes
166166
"untainted4": {
167167
ID: "untainted4",
168168
ClientStatus: structs.AllocClientStatusComplete,
169169
Job: testJob,
170-
NodeID: "lost",
170+
NodeID: "down",
171171
},
172172
// Non-terminal alloc with migrate=true should migrate on a draining node
173173
"migrating1": {
@@ -207,12 +207,12 @@ func TestAllocSet_filterByTainted(t *testing.T) {
207207
Job: testJob,
208208
NodeID: "draining",
209209
},
210-
// Terminal allocs are always untainted, even on lost nodes
210+
// Terminal allocs are always untainted, even on down nodes
211211
"untainted4": {
212212
ID: "untainted4",
213213
ClientStatus: structs.AllocClientStatusComplete,
214214
Job: testJob,
215-
NodeID: "lost",
215+
NodeID: "down",
216216
},
217217
},
218218
migrate: allocSet{
@@ -247,19 +247,19 @@ func TestAllocSet_filterByTainted(t *testing.T) {
247247
// false failures, so don't perform that test if in this case.
248248
skipNilNodeTest: true,
249249
all: allocSet{
250-
// Non-terminal allocs on lost nodes are lost
250+
// Non-terminal allocs on lost nodes are down
251251
"lost1": {
252252
ID: "lost1",
253253
ClientStatus: structs.AllocClientStatusPending,
254254
Job: testJob,
255-
NodeID: "lost",
255+
NodeID: "down",
256256
},
257-
// Non-terminal allocs on lost nodes are lost
257+
// Non-terminal allocs on lost nodes are down
258258
"lost2": {
259259
ID: "lost2",
260260
ClientStatus: structs.AllocClientStatusRunning,
261261
Job: testJob,
262-
NodeID: "lost",
262+
NodeID: "down",
263263
},
264264
},
265265
untainted: allocSet{},
@@ -268,19 +268,19 @@ func TestAllocSet_filterByTainted(t *testing.T) {
268268
reconnecting: allocSet{},
269269
ignore: allocSet{},
270270
lost: allocSet{
271-
// Non-terminal allocs on lost nodes are lost
271+
// Non-terminal allocs on lost nodes are down
272272
"lost1": {
273273
ID: "lost1",
274274
ClientStatus: structs.AllocClientStatusPending,
275275
Job: testJob,
276-
NodeID: "lost",
276+
NodeID: "down",
277277
},
278-
// Non-terminal allocs on lost nodes are lost
278+
// Non-terminal allocs on lost nodes are down
279279
"lost2": {
280280
ID: "lost2",
281281
ClientStatus: structs.AllocClientStatusRunning,
282282
Job: testJob,
283-
NodeID: "lost",
283+
NodeID: "down",
284284
},
285285
},
286286
expiring: allocSet{},
@@ -864,12 +864,12 @@ func TestAllocSet_filterByTainted(t *testing.T) {
864864
Job: testJobSingle,
865865
NodeID: "draining",
866866
},
867-
// Terminal allocs are always untainted, even on lost nodes
867+
// Terminal allocs are always untainted, even on down nodes
868868
"untainted4": {
869869
ID: "untainted4",
870870
ClientStatus: structs.AllocClientStatusComplete,
871871
Job: testJobSingle,
872-
NodeID: "lost",
872+
NodeID: "down",
873873
},
874874
// Non-terminal alloc with migrate=true should migrate on a draining node
875875
"migrating1": {
@@ -909,12 +909,12 @@ func TestAllocSet_filterByTainted(t *testing.T) {
909909
Job: testJobSingle,
910910
NodeID: "draining",
911911
},
912-
// Terminal allocs are always untainted, even on lost nodes
912+
// Terminal allocs are always untainted, even on down nodes
913913
"untainted4": {
914914
ID: "untainted4",
915915
ClientStatus: structs.AllocClientStatusComplete,
916916
Job: testJobSingle,
917-
NodeID: "lost",
917+
NodeID: "down",
918918
},
919919
},
920920
migrate: allocSet{
@@ -949,19 +949,19 @@ func TestAllocSet_filterByTainted(t *testing.T) {
949949
// false failures, so don't perform that test if in this case.
950950
skipNilNodeTest: true,
951951
all: allocSet{
952-
// Non-terminal allocs on lost nodes are lost
952+
// Non-terminal allocs on lost nodes are down
953953
"lost1": {
954954
ID: "lost1",
955955
ClientStatus: structs.AllocClientStatusPending,
956956
Job: testJobSingle,
957-
NodeID: "lost",
957+
NodeID: "down",
958958
},
959-
// Non-terminal allocs on lost nodes are lost
959+
// Non-terminal allocs on lost nodes are down
960960
"lost2": {
961961
ID: "lost2",
962962
ClientStatus: structs.AllocClientStatusRunning,
963963
Job: testJobSingle,
964-
NodeID: "lost",
964+
NodeID: "down",
965965
},
966966
},
967967
untainted: allocSet{},
@@ -970,19 +970,19 @@ func TestAllocSet_filterByTainted(t *testing.T) {
970970
reconnecting: allocSet{},
971971
ignore: allocSet{},
972972
lost: allocSet{
973-
// Non-terminal allocs on lost nodes are lost
973+
// Non-terminal allocs on lost nodes are down
974974
"lost1": {
975975
ID: "lost1",
976976
ClientStatus: structs.AllocClientStatusPending,
977977
Job: testJobSingle,
978-
NodeID: "lost",
978+
NodeID: "down",
979979
},
980-
// Non-terminal allocs on lost nodes are lost
980+
// Non-terminal allocs on lost nodes are down
981981
"lost2": {
982982
ID: "lost2",
983983
ClientStatus: structs.AllocClientStatusRunning,
984984
Job: testJobSingle,
985-
NodeID: "lost",
985+
NodeID: "down",
986986
},
987987
},
988988
expiring: allocSet{},

0 commit comments

Comments
 (0)