Skip to content

Commit 2b655fc

Browse files
Merge pull request #8872 from ThomasWaldmann/chunker-params-reject-even-window-size
ChunkerParams: reject even window size for buzhash, fixes #8868
2 parents 6fc0cd5 + 7072f62 commit 2b655fc

File tree

5 files changed

+8
-6
lines changed

5 files changed

+8
-6
lines changed

docs/internals/data-structures.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -462,7 +462,7 @@ can be used to tune the chunker parameters, the default is:
462462
- CHUNK_MIN_EXP = 19 (minimum chunk size = 2^19 B = 512 kiB)
463463
- CHUNK_MAX_EXP = 23 (maximum chunk size = 2^23 B = 8 MiB)
464464
- HASH_MASK_BITS = 21 (target chunk size ~= 2^21 B = 2 MiB)
465-
- HASH_WINDOW_SIZE = 4095 [B] (`0xFFF`)
465+
- HASH_WINDOW_SIZE = 4095 [B] (`0xFFF`) (must be an odd number)
466466

467467
The buzhash table is altered by XORing it with a seed randomly generated once
468468
for the repository, and stored encrypted in the keyfile. This is to prevent

docs/misc/create_chunker-params.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ determined by the windows contents rather than the min/max. chunk size).
1818
Default: 21 (statistically, chunks will be about 2^21 == 2MiB in size)
1919

2020
HASH_WINDOW_SIZE: the size of the window used for the rolling hash computation.
21-
Default: 4095B
21+
Must be an odd number. Default: 4095B
2222

2323

2424
Trying it out
@@ -114,4 +114,3 @@ $ ls -l /extra/repo-xl/index*
114114

115115
$ du -sk /extra/repo-xl/
116116
14253464 /extra/repo-xl/
117-

src/borg/helpers/parseformat.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,8 @@ def ChunkerParams(s):
201201
raise argparse.ArgumentTypeError(
202202
"max. chunk size exponent must not be more than 23 (2^23 = 8MiB max. chunk size)"
203203
)
204+
if window_size % 2 == 0:
205+
raise argparse.ArgumentTypeError("window_size must be an uneven (odd) number")
204206
return CH_BUZHASH, chunk_min, chunk_max, chunk_mask, window_size
205207
raise argparse.ArgumentTypeError("invalid chunker params")
206208

src/borg/testsuite/archiver/recreate_cmd_test.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ def test_recreate_rechunkify(archivers, request):
138138
fd.write(b"a" * 280)
139139
fd.write(b"b" * 280)
140140
cmd(archiver, "repo-create", RK_ENCRYPTION)
141-
cmd(archiver, "create", "test1", "input", "--chunker-params", "7,9,8,128")
141+
cmd(archiver, "create", "test1", "input", "--chunker-params", "7,9,8,127")
142142
cmd(archiver, "create", "test2", "input", "--files-cache=disabled")
143143
num_chunks1 = int(cmd(archiver, "list", "test1", "input/large_file", "--format", "{num_chunks}"))
144144
num_chunks2 = int(cmd(archiver, "list", "test2", "input/large_file", "--format", "{num_chunks}"))
@@ -159,7 +159,7 @@ def test_recreate_fixed_rechunkify(archivers, request):
159159
with open(os.path.join(archiver.input_path, "file"), "wb") as fd:
160160
fd.write(b"a" * 8192)
161161
cmd(archiver, "repo-create", RK_ENCRYPTION)
162-
cmd(archiver, "create", "test", "input", "--chunker-params", "7,9,8,128")
162+
cmd(archiver, "create", "test", "input", "--chunker-params", "7,9,8,127")
163163
output = cmd(archiver, "list", "test", "input/file", "--format", "{num_chunks}")
164164
num_chunks = int(output)
165165
assert num_chunks > 2
@@ -175,7 +175,7 @@ def test_recreate_no_rechunkify(archivers, request):
175175
fd.write(b"a" * 8192)
176176
cmd(archiver, "repo-create", RK_ENCRYPTION)
177177
# first create an archive with non-default chunker params:
178-
cmd(archiver, "create", "test", "input", "--chunker-params", "7,9,8,128")
178+
cmd(archiver, "create", "test", "input", "--chunker-params", "7,9,8,127")
179179
output = cmd(archiver, "list", "test", "input/file", "--format", "{num_chunks}")
180180
num_chunks = int(output)
181181
# now recreate the archive and do NOT specify chunker params:

src/borg/testsuite/helpers/parseformat_test.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -596,6 +596,7 @@ def test_valid_chunkerparams(chunker_params, expected_return):
596596
"buzhash,5,7,6,4095", # too small min. size
597597
"buzhash,19,24,21,4095", # too big max. size
598598
"buzhash,23,19,21,4095", # violates min <= mask <= max
599+
"buzhash,19,23,21,4096", # even window size
599600
"fixed,63", # too small block size
600601
"fixed,%d,%d" % (MAX_DATA_SIZE + 1, 4096), # too big block size
601602
"fixed,%d,%d" % (4096, MAX_DATA_SIZE + 1), # too big header size

0 commit comments

Comments
 (0)