Skip to content

Commit d3facfd

Browse files
committed
Store maximum probe length in OrderedDicts
* When sorting an OrderedDict, it's possible that the probe length will exceed the maximum probe length previously defined. To prevent this, store and use the maximum probe length (and let it grow beyond the previously fixed length if needed). * Also now reuses deleted slots instead of skipping them. Previously, deleted slots were not reclaimed until the dictionary was rehashed.
1 parent 346f55a commit d3facfd

File tree

2 files changed

+63
-14
lines changed

2 files changed

+63
-14
lines changed

src/ordered_dict.jl

Lines changed: 48 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
# These can be changed, to trade off better performance for space
2+
const global maxallowedprobe = isdefined(Base, :maxallowedprobe) ? Base.maxallowedprobe : 16
3+
const global maxprobeshift = isdefined(Base, :maxprobeshift) ? Base.maxprobeshift : 6
4+
15
# OrderedDict
26

37
"""
@@ -11,10 +15,11 @@ mutable struct OrderedDict{K,V} <: AbstractDict{K,V}
1115
keys::Array{K,1}
1216
vals::Array{V,1}
1317
ndel::Int
18+
maxprobe::Int
1419
dirty::Bool
1520

1621
function OrderedDict{K,V}() where {K,V}
17-
new{K,V}(zeros(Int32,16), Vector{K}(), Vector{V}(), 0, false)
22+
new{K,V}(zeros(Int32,16), Vector{K}(), Vector{V}(), 0, 0, false)
1823
end
1924
function OrderedDict{K,V}(kv) where {K,V}
2025
h = OrderedDict{K,V}()
@@ -37,7 +42,7 @@ mutable struct OrderedDict{K,V} <: AbstractDict{K,V}
3742
rehash!(d)
3843
end
3944
@assert d.ndel == 0
40-
new{K,V}(copy(d.slots), copy(d.keys), copy(d.vals), 0)
45+
new{K,V}(copy(d.slots), copy(d.keys), copy(d.vals), 0, d.maxprobe, false)
4146
end
4247
end
4348
OrderedDict() = OrderedDict{Any,Any}()
@@ -104,6 +109,10 @@ function convert(::Type{OrderedDict{K,V}}, d::AbstractDict) where {K,V}
104109
end
105110
convert(::Type{OrderedDict{K,V}},d::OrderedDict{K,V}) where {K,V} = d
106111

112+
isslotempty(slot_value::Integer) = slot_value == 0
113+
isslotfilled(slot_value::Integer) = slot_value > 0
114+
isslotmissing(slot_value::Integer) = slot_value < 0
115+
107116
function rehash!(h::OrderedDict{K,V}, newsz = length(h.slots)) where {K,V}
108117
olds = h.slots
109118
keys = h.keys
@@ -122,6 +131,7 @@ function rehash!(h::OrderedDict{K,V}, newsz = length(h.slots)) where {K,V}
122131
end
123132

124133
slots = zeros(Int32, newsz)
134+
maxprobe = 0
125135

126136
if h.ndel > 0
127137
ndel0 = h.ndel
@@ -139,22 +149,24 @@ function rehash!(h::OrderedDict{K,V}, newsz = length(h.slots)) where {K,V}
139149
isdeleted = false
140150
if !ptrs
141151
iter = 0
142-
maxprobe = max(16, sz>>6)
143152
index = (hashk & (sz-1)) + 1
144-
while iter <= maxprobe
153+
while iter <= h.maxprobe
145154
si = olds[index]
146-
#si == 0 && break # shouldn't happen
147155
si == from && break
148-
si == -from && (isdeleted=true; break)
156+
# if we find si == 0, then the key was deleted and it's slot was reused/overwritten
157+
(si == -from || si == 0) && (isdeleted = true; break)
149158
index = (index & (sz-1)) + 1
150159
iter += 1
151160
end
161+
iter > h.maxprobe && (isdeleted = true) # Another case where the slot was reused/overwritten
152162
end
153163
if !isdeleted
154-
index = (hashk & (newsz-1)) + 1
164+
index0 = index = (hashk & (newsz-1)) + 1
155165
while slots[index] != 0
156166
index = (index & (newsz-1)) + 1
157167
end
168+
probe = (index - index0) & (newsz-1)
169+
probe > maxprobe && (maxprobe = probe)
158170
slots[index] = to
159171
newkeys[to] = k
160172
newvals[to] = vals[from]
@@ -172,10 +184,12 @@ function rehash!(h::OrderedDict{K,V}, newsz = length(h.slots)) where {K,V}
172184
else
173185
@inbounds for i = 1:count0
174186
k = keys[i]
175-
index = hashindex(k, newsz)
187+
index0 = index = hashindex(k, newsz)
176188
while slots[index] != 0
177189
index = (index & (newsz-1)) + 1
178190
end
191+
probe = (index - index0) & (newsz-1)
192+
probe > maxprobe && (maxprobe = probe)
179193
slots[index] = i
180194
if h.ndel > 0
181195
# if items are removed by finalizers, retry
@@ -185,6 +199,7 @@ function rehash!(h::OrderedDict{K,V}, newsz = length(h.slots)) where {K,V}
185199
end
186200

187201
h.slots = slots
202+
h.maxprobe = maxprobe
188203
return h
189204
end
190205

@@ -216,14 +231,14 @@ function ht_keyindex(h::OrderedDict{K,V}, key, direct) where {K,V}
216231
slots = h.slots
217232
sz = length(slots)
218233
iter = 0
219-
maxprobe = max(16, sz>>6)
234+
maxprobe = h.maxprobe
220235
index = hashindex(key, sz)
221236
keys = h.keys
222237

223238
@inbounds while iter <= maxprobe
224239
si = slots[index]
225-
si == 0 && break
226-
if si > 0 && isequal(key, keys[si])
240+
isslotempty(si) && break
241+
if isslotfilled(si) && isequal(key, keys[si])
227242
return ifelse(direct, oftype(index, si), index)
228243
end
229244

@@ -241,22 +256,41 @@ function ht_keyindex2(h::OrderedDict{K,V}, key) where {K,V}
241256
slots = h.slots
242257
sz = length(slots)
243258
iter = 0
244-
maxprobe = max(16, sz>>6)
259+
maxprobe = h.maxprobe
245260
index = hashindex(key, sz)
246261
keys = h.keys
262+
avail = 0
247263

248264
@inbounds while iter <= maxprobe
249265
si = slots[index]
250-
if si == 0
266+
if isslotempty(si)
267+
avail < 0 && return avail
251268
return -index
252-
elseif si > 0 && isequal(key, keys[si])
269+
end
270+
271+
if isslotmissing(si)
272+
avail == 0 && (avail = -index)
273+
elseif isequal(key, keys[si])
253274
return oftype(index, si)
254275
end
255276

256277
index = (index & (sz-1)) + 1
257278
iter += 1
258279
end
259280

281+
avail < 0 && return avail
282+
283+
# If key is not present, may need to keep searching to find slot
284+
maxallowed = max(maxallowedprobe, sz>>maxprobeshift)
285+
@inbounds while iter < maxallowed
286+
if !isslotfilled(slots[index])
287+
h.maxprobe = iter
288+
return -index
289+
end
290+
index = (index & (sz-1)) + 1
291+
iter += 1
292+
end
293+
260294
rehash!(h, length(h) > 64000 ? sz*2 : sz*4)
261295

262296
return ht_keyindex2(h, key)

test/test_ordered_dict.jl

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -435,4 +435,19 @@ using OrderedCollections, Test
435435
@test eltype(OrderedDict(tuple(String => String, SubString => SubString))) == Pair{Type,Type}
436436
end
437437

438+
@testset "Issue #71" begin
439+
od = OrderedDict(Dict(i=>0 for i=1:158))
440+
sort!(od)
441+
@test od[158] == 0
442+
end
443+
444+
@testset "Issue #71b" begin
445+
# This is actually a simplified version of #60, which was triggered while fixing #71
446+
# It doesn't actually fail on previous versions of OrderedCollections
447+
od = OrderedDict{Int,Int}(13=>13)
448+
delete!( od, 13 )
449+
od[14]=14
450+
@test od[14] == 14
451+
end
452+
438453
end # @testset OrderedDict

0 commit comments

Comments
 (0)