Skip to content

Commit af960e6

Browse files
authored
Merge pull request #72 from JuliaCollections/kms/fix-key-corruption
Store maximum probe length in OrderedDicts
2 parents 346f55a + d3facfd commit af960e6

File tree

2 files changed

+63
-14
lines changed

2 files changed

+63
-14
lines changed

src/ordered_dict.jl

Lines changed: 48 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
# These can be changed, to trade off better performance for space
2+
const global maxallowedprobe = isdefined(Base, :maxallowedprobe) ? Base.maxallowedprobe : 16
3+
const global maxprobeshift = isdefined(Base, :maxprobeshift) ? Base.maxprobeshift : 6
4+
15
# OrderedDict
26

37
"""
@@ -11,10 +15,11 @@ mutable struct OrderedDict{K,V} <: AbstractDict{K,V}
1115
keys::Array{K,1}
1216
vals::Array{V,1}
1317
ndel::Int
18+
maxprobe::Int
1419
dirty::Bool
1520

1621
function OrderedDict{K,V}() where {K,V}
17-
new{K,V}(zeros(Int32,16), Vector{K}(), Vector{V}(), 0, false)
22+
new{K,V}(zeros(Int32,16), Vector{K}(), Vector{V}(), 0, 0, false)
1823
end
1924
function OrderedDict{K,V}(kv) where {K,V}
2025
h = OrderedDict{K,V}()
@@ -37,7 +42,7 @@ mutable struct OrderedDict{K,V} <: AbstractDict{K,V}
3742
rehash!(d)
3843
end
3944
@assert d.ndel == 0
40-
new{K,V}(copy(d.slots), copy(d.keys), copy(d.vals), 0)
45+
new{K,V}(copy(d.slots), copy(d.keys), copy(d.vals), 0, d.maxprobe, false)
4146
end
4247
end
4348
OrderedDict() = OrderedDict{Any,Any}()
@@ -104,6 +109,10 @@ function convert(::Type{OrderedDict{K,V}}, d::AbstractDict) where {K,V}
104109
end
105110
convert(::Type{OrderedDict{K,V}},d::OrderedDict{K,V}) where {K,V} = d
106111

112+
isslotempty(slot_value::Integer) = slot_value == 0
113+
isslotfilled(slot_value::Integer) = slot_value > 0
114+
isslotmissing(slot_value::Integer) = slot_value < 0
115+
107116
function rehash!(h::OrderedDict{K,V}, newsz = length(h.slots)) where {K,V}
108117
olds = h.slots
109118
keys = h.keys
@@ -122,6 +131,7 @@ function rehash!(h::OrderedDict{K,V}, newsz = length(h.slots)) where {K,V}
122131
end
123132

124133
slots = zeros(Int32, newsz)
134+
maxprobe = 0
125135

126136
if h.ndel > 0
127137
ndel0 = h.ndel
@@ -139,22 +149,24 @@ function rehash!(h::OrderedDict{K,V}, newsz = length(h.slots)) where {K,V}
139149
isdeleted = false
140150
if !ptrs
141151
iter = 0
142-
maxprobe = max(16, sz>>6)
143152
index = (hashk & (sz-1)) + 1
144-
while iter <= maxprobe
153+
while iter <= h.maxprobe
145154
si = olds[index]
146-
#si == 0 && break # shouldn't happen
147155
si == from && break
148-
si == -from && (isdeleted=true; break)
156+
# if we find si == 0, then the key was deleted and it's slot was reused/overwritten
157+
(si == -from || si == 0) && (isdeleted = true; break)
149158
index = (index & (sz-1)) + 1
150159
iter += 1
151160
end
161+
iter > h.maxprobe && (isdeleted = true) # Another case where the slot was reused/overwritten
152162
end
153163
if !isdeleted
154-
index = (hashk & (newsz-1)) + 1
164+
index0 = index = (hashk & (newsz-1)) + 1
155165
while slots[index] != 0
156166
index = (index & (newsz-1)) + 1
157167
end
168+
probe = (index - index0) & (newsz-1)
169+
probe > maxprobe && (maxprobe = probe)
158170
slots[index] = to
159171
newkeys[to] = k
160172
newvals[to] = vals[from]
@@ -172,10 +184,12 @@ function rehash!(h::OrderedDict{K,V}, newsz = length(h.slots)) where {K,V}
172184
else
173185
@inbounds for i = 1:count0
174186
k = keys[i]
175-
index = hashindex(k, newsz)
187+
index0 = index = hashindex(k, newsz)
176188
while slots[index] != 0
177189
index = (index & (newsz-1)) + 1
178190
end
191+
probe = (index - index0) & (newsz-1)
192+
probe > maxprobe && (maxprobe = probe)
179193
slots[index] = i
180194
if h.ndel > 0
181195
# if items are removed by finalizers, retry
@@ -185,6 +199,7 @@ function rehash!(h::OrderedDict{K,V}, newsz = length(h.slots)) where {K,V}
185199
end
186200

187201
h.slots = slots
202+
h.maxprobe = maxprobe
188203
return h
189204
end
190205

@@ -216,14 +231,14 @@ function ht_keyindex(h::OrderedDict{K,V}, key, direct) where {K,V}
216231
slots = h.slots
217232
sz = length(slots)
218233
iter = 0
219-
maxprobe = max(16, sz>>6)
234+
maxprobe = h.maxprobe
220235
index = hashindex(key, sz)
221236
keys = h.keys
222237

223238
@inbounds while iter <= maxprobe
224239
si = slots[index]
225-
si == 0 && break
226-
if si > 0 && isequal(key, keys[si])
240+
isslotempty(si) && break
241+
if isslotfilled(si) && isequal(key, keys[si])
227242
return ifelse(direct, oftype(index, si), index)
228243
end
229244

@@ -241,22 +256,41 @@ function ht_keyindex2(h::OrderedDict{K,V}, key) where {K,V}
241256
slots = h.slots
242257
sz = length(slots)
243258
iter = 0
244-
maxprobe = max(16, sz>>6)
259+
maxprobe = h.maxprobe
245260
index = hashindex(key, sz)
246261
keys = h.keys
262+
avail = 0
247263

248264
@inbounds while iter <= maxprobe
249265
si = slots[index]
250-
if si == 0
266+
if isslotempty(si)
267+
avail < 0 && return avail
251268
return -index
252-
elseif si > 0 && isequal(key, keys[si])
269+
end
270+
271+
if isslotmissing(si)
272+
avail == 0 && (avail = -index)
273+
elseif isequal(key, keys[si])
253274
return oftype(index, si)
254275
end
255276

256277
index = (index & (sz-1)) + 1
257278
iter += 1
258279
end
259280

281+
avail < 0 && return avail
282+
283+
# If key is not present, may need to keep searching to find slot
284+
maxallowed = max(maxallowedprobe, sz>>maxprobeshift)
285+
@inbounds while iter < maxallowed
286+
if !isslotfilled(slots[index])
287+
h.maxprobe = iter
288+
return -index
289+
end
290+
index = (index & (sz-1)) + 1
291+
iter += 1
292+
end
293+
260294
rehash!(h, length(h) > 64000 ? sz*2 : sz*4)
261295

262296
return ht_keyindex2(h, key)

test/test_ordered_dict.jl

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -435,4 +435,19 @@ using OrderedCollections, Test
435435
@test eltype(OrderedDict(tuple(String => String, SubString => SubString))) == Pair{Type,Type}
436436
end
437437

438+
@testset "Issue #71" begin
439+
od = OrderedDict(Dict(i=>0 for i=1:158))
440+
sort!(od)
441+
@test od[158] == 0
442+
end
443+
444+
@testset "Issue #71b" begin
445+
# This is actually a simplified version of #60, which was triggered while fixing #71
446+
# It doesn't actually fail on previous versions of OrderedCollections
447+
od = OrderedDict{Int,Int}(13=>13)
448+
delete!( od, 13 )
449+
od[14]=14
450+
@test od[14] == 14
451+
end
452+
438453
end # @testset OrderedDict

0 commit comments

Comments
 (0)