Skip to content
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -2901,7 +2901,7 @@ func (c *Client) updateAlloc(update *structs.Allocation) {
// Reconnect unknown allocations if they were updated and are not terminal.
reconnect := update.ClientStatus == structs.AllocClientStatusUnknown &&
update.AllocModifyIndex > alloc.AllocModifyIndex &&
(!update.ServerTerminalStatus() || !alloc.PreventReplaceOnDisconnect())
!update.ServerTerminalStatus()
if reconnect {
err = ar.Reconnect(update)
if err != nil {
Expand Down
6 changes: 6 additions & 0 deletions command/job_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (

"github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/api/contexts"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/posener/complete"
)

Expand Down Expand Up @@ -752,6 +753,11 @@ func (c *JobStatusCommand) outputReschedulingEvals(client *api.Client, job *api.
for _, taskGroup := range taskGroups {
evalID := followUpEvalIds[taskGroup]
evaluation, _, err := client.Evaluations().Info(evalID, nil)
// We don't want to show a reschedule if it's some other delayed eval
// not related to a reschedule.
if evaluation.TriggeredBy != structs.EvalTriggerAllocReschedule {
continue
}
// Eval time is not critical output,
// so don't return it on errors, if its not set, or its already in the past
if err != nil || evaluation.WaitUntil.IsZero() || time.Now().After(evaluation.WaitUntil) {
Expand Down
2 changes: 1 addition & 1 deletion nomad/plan_apply.go
Original file line number Diff line number Diff line change
Expand Up @@ -806,7 +806,7 @@ func isValidForDisconnectedNode(plan *structs.Plan, nodeID string) bool {
// as non reschedulables when lost or if the allocs are being updated to lost.
func isValidForDownNode(plan *structs.Plan, nodeID string) bool {
for _, alloc := range plan.NodeAllocation[nodeID] {
if !(alloc.ClientStatus == structs.AllocClientStatusUnknown && alloc.PreventReplaceOnDisconnect()) &&
if !(alloc.ClientStatus == structs.AllocClientStatusUnknown && !alloc.ReplaceOnDisconnect()) &&
(alloc.ClientStatus != structs.AllocClientStatusLost) {
return false
}
Expand Down
31 changes: 19 additions & 12 deletions nomad/structs/alloc.go
Original file line number Diff line number Diff line change
Expand Up @@ -550,14 +550,26 @@ func (a *Allocation) DisconnectTimeout(now time.Time) time.Time {

tg := a.Job.LookupTaskGroup(a.TaskGroup)

timeout := tg.GetDisconnectLostTimeout()
timeout := tg.GetDisconnectLostAfter()
if timeout == 0 {
return now
}

return now.Add(timeout)
}

// DisconnectLostAfter is a helper to get the Disconnect.LostAfter for an allocation
func (a *Allocation) DisconnectLostAfter() time.Duration {
if a.Job != nil {
tg := a.Job.LookupTaskGroup(a.TaskGroup)
if tg != nil {
return tg.GetDisconnectLostAfter()
}
}

return 0
}

// SupportsDisconnectedClients determines whether both the server and the task group
// are configured to allow the allocation to reconnect after network connectivity
// has been lost and then restored.
Expand All @@ -569,24 +581,24 @@ func (a *Allocation) SupportsDisconnectedClients(serverSupportsDisconnectedClien
if a.Job != nil {
tg := a.Job.LookupTaskGroup(a.TaskGroup)
if tg != nil {
return tg.GetDisconnectLostTimeout() != 0
return tg.GetDisconnectLostAfter() != 0
}
}

return false
}

// PreventReplaceOnDisconnect determines if an alloc allows to have a replacement
// ReplaceOnDisconnect determines if an alloc can be replaced
// when Disconnected.
func (a *Allocation) PreventReplaceOnDisconnect() bool {
func (a *Allocation) ReplaceOnDisconnect() bool {
if a.Job != nil {
tg := a.Job.LookupTaskGroup(a.TaskGroup)
if tg != nil {
return !tg.Replace()
return tg.Replace()
}
}

return false
return true
}

// NextDelay returns a duration after which the allocation can be rescheduled.
Expand Down Expand Up @@ -802,12 +814,7 @@ func (a *Allocation) Expired(now time.Time) bool {
return false
}

timeout := tg.GetDisconnectLostTimeout()
if timeout == 0 && tg.Replace() {
return false
}

expiry := lastUnknown.Add(timeout)
expiry := lastUnknown.Add(tg.GetDisconnectLostAfter())
return expiry.Sub(now) <= 0
}

Expand Down
4 changes: 2 additions & 2 deletions nomad/structs/structs.go
Original file line number Diff line number Diff line change
Expand Up @@ -7631,9 +7631,9 @@ func (tg *TaskGroup) Replace() bool {
return *tg.Disconnect.Replace
}

// GetDisconnectLostTimeout is a helper meant to simplify the logic for
// GetDisconnectLostAfter is a helper meant to simplify the logic for
// getting the Disconnect.LostAfter field of a task group.
func (tg *TaskGroup) GetDisconnectLostTimeout() time.Duration {
func (tg *TaskGroup) GetDisconnectLostAfter() time.Duration {
if tg.Disconnect != nil {
return tg.Disconnect.LostAfter
}
Expand Down
52 changes: 26 additions & 26 deletions scheduler/reconciler/allocs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ func TestAllocSet_filterByTainted(t *testing.T) {
ID: "draining",
DrainStrategy: mock.DrainNode().DrainStrategy,
},
"lost": {
ID: "lost",
"down": {
ID: "down",
Status: structs.NodeStatusDown,
},
"nil": nil,
Expand Down Expand Up @@ -162,12 +162,12 @@ func TestAllocSet_filterByTainted(t *testing.T) {
Job: testJob,
NodeID: "draining",
},
// Terminal allocs are always untainted, even on lost nodes
// Terminal allocs are always untainted, even on down nodes
"untainted4": {
ID: "untainted4",
ClientStatus: structs.AllocClientStatusComplete,
Job: testJob,
NodeID: "lost",
NodeID: "down",
},
// Non-terminal alloc with migrate=true should migrate on a draining node
"migrating1": {
Expand Down Expand Up @@ -207,12 +207,12 @@ func TestAllocSet_filterByTainted(t *testing.T) {
Job: testJob,
NodeID: "draining",
},
// Terminal allocs are always untainted, even on lost nodes
// Terminal allocs are always untainted, even on down nodes
"untainted4": {
ID: "untainted4",
ClientStatus: structs.AllocClientStatusComplete,
Job: testJob,
NodeID: "lost",
NodeID: "down",
},
},
migrate: allocSet{
Expand Down Expand Up @@ -247,19 +247,19 @@ func TestAllocSet_filterByTainted(t *testing.T) {
// false failures, so don't perform that test if in this case.
skipNilNodeTest: true,
all: allocSet{
// Non-terminal allocs on lost nodes are lost
// Non-terminal allocs on lost nodes are down
"lost1": {
ID: "lost1",
ClientStatus: structs.AllocClientStatusPending,
Job: testJob,
NodeID: "lost",
NodeID: "down",
},
// Non-terminal allocs on lost nodes are lost
// Non-terminal allocs on lost nodes are down
"lost2": {
ID: "lost2",
ClientStatus: structs.AllocClientStatusRunning,
Job: testJob,
NodeID: "lost",
NodeID: "down",
},
},
untainted: allocSet{},
Expand All @@ -268,19 +268,19 @@ func TestAllocSet_filterByTainted(t *testing.T) {
reconnecting: allocSet{},
ignore: allocSet{},
lost: allocSet{
// Non-terminal allocs on lost nodes are lost
// Non-terminal allocs on lost nodes are down
"lost1": {
ID: "lost1",
ClientStatus: structs.AllocClientStatusPending,
Job: testJob,
NodeID: "lost",
NodeID: "down",
},
// Non-terminal allocs on lost nodes are lost
// Non-terminal allocs on lost nodes are down
"lost2": {
ID: "lost2",
ClientStatus: structs.AllocClientStatusRunning,
Job: testJob,
NodeID: "lost",
NodeID: "down",
},
},
expiring: allocSet{},
Expand Down Expand Up @@ -864,12 +864,12 @@ func TestAllocSet_filterByTainted(t *testing.T) {
Job: testJobSingle,
NodeID: "draining",
},
// Terminal allocs are always untainted, even on lost nodes
// Terminal allocs are always untainted, even on down nodes
"untainted4": {
ID: "untainted4",
ClientStatus: structs.AllocClientStatusComplete,
Job: testJobSingle,
NodeID: "lost",
NodeID: "down",
},
// Non-terminal alloc with migrate=true should migrate on a draining node
"migrating1": {
Expand Down Expand Up @@ -909,12 +909,12 @@ func TestAllocSet_filterByTainted(t *testing.T) {
Job: testJobSingle,
NodeID: "draining",
},
// Terminal allocs are always untainted, even on lost nodes
// Terminal allocs are always untainted, even on down nodes
"untainted4": {
ID: "untainted4",
ClientStatus: structs.AllocClientStatusComplete,
Job: testJobSingle,
NodeID: "lost",
NodeID: "down",
},
},
migrate: allocSet{
Expand Down Expand Up @@ -949,19 +949,19 @@ func TestAllocSet_filterByTainted(t *testing.T) {
// false failures, so don't perform that test if in this case.
skipNilNodeTest: true,
all: allocSet{
// Non-terminal allocs on lost nodes are lost
// Non-terminal allocs on lost nodes are down
"lost1": {
ID: "lost1",
ClientStatus: structs.AllocClientStatusPending,
Job: testJobSingle,
NodeID: "lost",
NodeID: "down",
},
// Non-terminal allocs on lost nodes are lost
// Non-terminal allocs on lost nodes are down
"lost2": {
ID: "lost2",
ClientStatus: structs.AllocClientStatusRunning,
Job: testJobSingle,
NodeID: "lost",
NodeID: "down",
},
},
untainted: allocSet{},
Expand All @@ -970,19 +970,19 @@ func TestAllocSet_filterByTainted(t *testing.T) {
reconnecting: allocSet{},
ignore: allocSet{},
lost: allocSet{
// Non-terminal allocs on lost nodes are lost
// Non-terminal allocs on lost nodes are down
"lost1": {
ID: "lost1",
ClientStatus: structs.AllocClientStatusPending,
Job: testJobSingle,
NodeID: "lost",
NodeID: "down",
},
// Non-terminal allocs on lost nodes are lost
// Non-terminal allocs on lost nodes are down
"lost2": {
ID: "lost2",
ClientStatus: structs.AllocClientStatusRunning,
Job: testJobSingle,
NodeID: "lost",
NodeID: "down",
},
},
expiring: allocSet{},
Expand Down
Loading
Loading