Skip to content

Commit 263f89e

Browse files
committed
Merge pull request #6792 from hashicorp/b-propose-panic
scheduler: fix panic when preempting and evicting allocs
1 parent 4488559 commit 263f89e

File tree

8 files changed

+500
-376
lines changed

8 files changed

+500
-376
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ BUG FIXES:
2626

2727
* core: Ignore `server` config values if `server` is disabled [[GH-6047](https://github.com/hashicorp/nomad/issues/6047)]
2828
* core: Added `semver` constraint for strict Semver 2.0 version comparisons [[GH-6699](https://github.com/hashicorp/nomad/issues/6699)]
29+
* core: Fixed server panic caused by a plan evicting and preempting allocs on a node [[GH-6792](https://github.com/hashicorp/nomad/issues/6792)]
2930
* api: Return a 404 if endpoint not found instead of redirecting to /ui/ [[GH-6658](https://github.com/hashicorp/nomad/issues/6658)]
3031
* api: Decompress web socket response body if gzipped on error responses [[GH-6650](https://github.com/hashicorp/nomad/issues/6650)]
3132
* api: Fixed a bug where some FS/Allocation API endpoints didn't return error messages [[GH-6427](https://github.com/hashicorp/nomad/issues/6427)]

scheduler/context.go

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,9 @@ type Context interface {
2525
// Reset is invoked after making a placement
2626
Reset()
2727

28-
// ProposedAllocs returns the proposed allocations for a node
29-
// which is the existing allocations, removing evictions, and
30-
// adding any planned placements.
28+
// ProposedAllocs returns the proposed allocations for a node which are
29+
// the existing allocations, removing evictions, and adding any planned
30+
// placements.
3131
ProposedAllocs(nodeID string) ([]*structs.Allocation, error)
3232

3333
// RegexpCache is a cache of regular expressions
@@ -120,22 +120,21 @@ func (e *EvalContext) Reset() {
120120
func (e *EvalContext) ProposedAllocs(nodeID string) ([]*structs.Allocation, error) {
121121
// Get the existing allocations that are non-terminal
122122
ws := memdb.NewWatchSet()
123-
existingAlloc, err := e.state.AllocsByNodeTerminal(ws, nodeID, false)
123+
proposed, err := e.state.AllocsByNodeTerminal(ws, nodeID, false)
124124
if err != nil {
125125
return nil, err
126126
}
127127

128128
// Determine the proposed allocation by first removing allocations
129129
// that are planned evictions and adding the new allocations.
130-
proposed := existingAlloc
131130
if update := e.plan.NodeUpdate[nodeID]; len(update) > 0 {
132-
proposed = structs.RemoveAllocs(existingAlloc, update)
131+
proposed = structs.RemoveAllocs(proposed, update)
133132
}
134133

135134
// Remove any allocs that are being preempted
136135
nodePreemptedAllocs := e.plan.NodePreemptions[nodeID]
137136
if len(nodePreemptedAllocs) > 0 {
138-
proposed = structs.RemoveAllocs(existingAlloc, nodePreemptedAllocs)
137+
proposed = structs.RemoveAllocs(proposed, nodePreemptedAllocs)
139138
}
140139

141140
// We create an index of the existing allocations so that if an inplace

scheduler/context_test.go

Lines changed: 113 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -106,9 +106,9 @@ func TestEvalContext_ProposedAlloc(t *testing.T) {
106106
ClientStatus: structs.AllocClientStatusPending,
107107
TaskGroup: "web",
108108
}
109-
noErr(t, state.UpsertJobSummary(998, mock.JobSummary(alloc1.JobID)))
110-
noErr(t, state.UpsertJobSummary(999, mock.JobSummary(alloc2.JobID)))
111-
noErr(t, state.UpsertAllocs(1000, []*structs.Allocation{alloc1, alloc2}))
109+
require.NoError(t, state.UpsertJobSummary(998, mock.JobSummary(alloc1.JobID)))
110+
require.NoError(t, state.UpsertJobSummary(999, mock.JobSummary(alloc2.JobID)))
111+
require.NoError(t, state.UpsertAllocs(1000, []*structs.Allocation{alloc1, alloc2}))
112112

113113
// Add a planned eviction to alloc1
114114
plan := ctx.Plan()
@@ -149,6 +149,116 @@ func TestEvalContext_ProposedAlloc(t *testing.T) {
149149
}
150150
}
151151

152+
// TestEvalContext_ProposedAlloc_EvictPreempt asserts both Evicted and
153+
// Preempted allocs are removed from the allocs propsed for a node.
154+
//
155+
// See https://github.com/hashicorp/nomad/issues/6787
156+
//
157+
func TestEvalContext_ProposedAlloc_EvictPreempt(t *testing.T) {
158+
t.Parallel()
159+
state, ctx := testContext(t)
160+
nodes := []*RankedNode{
161+
{
162+
Node: &structs.Node{
163+
ID: uuid.Generate(),
164+
NodeResources: &structs.NodeResources{
165+
Cpu: structs.NodeCpuResources{
166+
CpuShares: 1024 * 3,
167+
},
168+
Memory: structs.NodeMemoryResources{
169+
MemoryMB: 1024 * 3,
170+
},
171+
},
172+
},
173+
},
174+
}
175+
176+
// Add existing allocations
177+
j1, j2, j3 := mock.Job(), mock.Job(), mock.Job()
178+
allocEvict := &structs.Allocation{
179+
ID: uuid.Generate(),
180+
Namespace: structs.DefaultNamespace,
181+
EvalID: uuid.Generate(),
182+
NodeID: nodes[0].Node.ID,
183+
JobID: j1.ID,
184+
Job: j1,
185+
AllocatedResources: &structs.AllocatedResources{
186+
Tasks: map[string]*structs.AllocatedTaskResources{
187+
"web": {
188+
Cpu: structs.AllocatedCpuResources{
189+
CpuShares: 1024,
190+
},
191+
Memory: structs.AllocatedMemoryResources{
192+
MemoryMB: 1024,
193+
},
194+
},
195+
},
196+
},
197+
DesiredStatus: structs.AllocDesiredStatusRun,
198+
ClientStatus: structs.AllocClientStatusPending,
199+
TaskGroup: "web",
200+
}
201+
allocPreempt := &structs.Allocation{
202+
ID: uuid.Generate(),
203+
Namespace: structs.DefaultNamespace,
204+
EvalID: uuid.Generate(),
205+
NodeID: nodes[0].Node.ID,
206+
JobID: j2.ID,
207+
Job: j2,
208+
AllocatedResources: &structs.AllocatedResources{
209+
Tasks: map[string]*structs.AllocatedTaskResources{
210+
"web": {
211+
Cpu: structs.AllocatedCpuResources{
212+
CpuShares: 1024,
213+
},
214+
Memory: structs.AllocatedMemoryResources{
215+
MemoryMB: 1024,
216+
},
217+
},
218+
},
219+
},
220+
DesiredStatus: structs.AllocDesiredStatusRun,
221+
ClientStatus: structs.AllocClientStatusPending,
222+
TaskGroup: "web",
223+
}
224+
allocPropose := &structs.Allocation{
225+
ID: uuid.Generate(),
226+
Namespace: structs.DefaultNamespace,
227+
EvalID: uuid.Generate(),
228+
NodeID: nodes[0].Node.ID,
229+
JobID: j3.ID,
230+
Job: j3,
231+
AllocatedResources: &structs.AllocatedResources{
232+
Tasks: map[string]*structs.AllocatedTaskResources{
233+
"web": {
234+
Cpu: structs.AllocatedCpuResources{
235+
CpuShares: 1024,
236+
},
237+
Memory: structs.AllocatedMemoryResources{
238+
MemoryMB: 1024,
239+
},
240+
},
241+
},
242+
},
243+
DesiredStatus: structs.AllocDesiredStatusRun,
244+
ClientStatus: structs.AllocClientStatusPending,
245+
TaskGroup: "web",
246+
}
247+
require.NoError(t, state.UpsertJobSummary(998, mock.JobSummary(allocEvict.JobID)))
248+
require.NoError(t, state.UpsertJobSummary(999, mock.JobSummary(allocPreempt.JobID)))
249+
require.NoError(t, state.UpsertJobSummary(999, mock.JobSummary(allocPropose.JobID)))
250+
require.NoError(t, state.UpsertAllocs(1000, []*structs.Allocation{allocEvict, allocPreempt, allocPropose}))
251+
252+
// Plan to evict one alloc and preempt another
253+
plan := ctx.Plan()
254+
plan.NodePreemptions[nodes[0].Node.ID] = []*structs.Allocation{allocEvict}
255+
plan.NodeUpdate[nodes[0].Node.ID] = []*structs.Allocation{allocPreempt}
256+
257+
proposed, err := ctx.ProposedAllocs(nodes[0].Node.ID)
258+
require.NoError(t, err)
259+
require.Len(t, proposed, 1)
260+
}
261+
152262
func TestEvalEligibility_JobStatus(t *testing.T) {
153263
e := NewEvalEligibility()
154264
cc := "v1:100"

0 commit comments

Comments
 (0)