Skip to content

Commit c21a6df

Browse files
committed
repozo: integrate a quick option for the incremental recover
The default behavior becomes to check the integrality of the existing recovered Data.fs before incrementally restore on it. The quick option allows to verify only the latest chunck previously recovered on the Data.fs before restoring incrementally. This saves many reads.
1 parent f5eacd0 commit c21a6df

File tree

2 files changed

+120
-26
lines changed

2 files changed

+120
-26
lines changed

src/ZODB/scripts/repozo.py

Lines changed: 38 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,13 @@
7979
ZODB file. A full recover will always be done if a pack has occured
8080
since the last incremental backup.
8181
82+
-Q / --quick
83+
Verify via md5 checksum only the last incremental recovered of the
84+
output file. This reduces the disk i/o at the (theoretical) cost of
85+
inconsistency. This is a probabilistic way of determining whether a
86+
full recover is necessary. This argument is ignored when -F / --full
87+
is used.
88+
8289
-w
8390
--with-verify
8491
Verify on the fly the backup files on recovering. This option runs
@@ -267,6 +274,9 @@ class Options:
267274
if options.killold:
268275
log('--kill-old-on-full option is ignored in recover mode')
269276
options.killold = False
277+
if options.full and options.quick:
278+
log('--quick option is ignored if --full option is used')
279+
options.quick = None
270280
else:
271281
assert options.mode == VERIFY
272282
if options.date is not None:
@@ -752,42 +762,49 @@ def do_incremental_recover(options, repofiles):
752762
with open(options.output, 'r+b') as outfp:
753763
outfp.seek(0, 2)
754764
initial_length = outfp.tell()
755-
with open(datfile) as fp:
756-
previous_chunk = None
765+
766+
error = ''
767+
previous_chunk = None
768+
with open(datfile) as fp, open(options.output, 'r+b') as outfp:
757769
for line in fp:
758-
fn, startpos, endpos, _ = chunk = line.split()
770+
fn, startpos, endpos, check_sum = chunk = line.split()
759771
startpos = int(startpos)
760772
endpos = int(endpos)
761773
if endpos > initial_length:
762774
break
775+
if not options.quick:
776+
if check_sum != checksum(outfp, endpos - startpos):
777+
error = ('Target file is not consistent with backup %s, '
778+
'falling back to a full recover.') % fn
779+
break
763780
previous_chunk = chunk
764-
765-
if previous_chunk is None:
781+
if error:
782+
log(error)
783+
return do_full_recover(options, repofiles)
784+
elif previous_chunk is None:
766785
log('Target file smaller than full backup, '
767786
'falling back to a full recover.')
768787
return do_full_recover(options, repofiles)
769-
if endpos < initial_length:
788+
elif endpos < initial_length:
770789
log('Target file is larger than latest backup, '
771790
'falling back to a full recover.')
772791
return do_full_recover(options, repofiles)
773-
check_startpos = int(previous_chunk[1])
774-
check_endpos = int(previous_chunk[2])
775-
with open(options.output, 'r+b') as outfp:
776-
outfp.seek(check_startpos)
777-
check_sum = checksum(outfp, check_endpos - check_startpos)
778-
if endpos == initial_length and chunk[3] == check_sum:
792+
if options.quick:
793+
check_startpos = int(previous_chunk[1])
794+
check_endpos = int(previous_chunk[2])
795+
with open(options.output, 'r+b') as outfp:
796+
outfp.seek(check_startpos)
797+
if previous_chunk[3] != checksum(
798+
outfp, check_endpos - check_startpos):
799+
error = ('Target file is not consistent with backup %s, '
800+
'falling back to a full recover.' % previous_chunk[0])
801+
if error:
802+
log(error)
803+
return do_full_recover(options, repofiles)
804+
if endpos == initial_length:
779805
log('Target file is same size as latest backup, '
780806
'doing nothing.')
781807
return
782-
elif previous_chunk[3] != check_sum:
783-
if endpos == initial_length:
784-
log('Target file is not consistent with latest backup, '
785-
'falling back to a full recover.')
786-
return do_full_recover(options, repofiles)
787-
else:
788-
log('Last whole common chunk checksum did not match with backup, '
789-
'falling back to a full recover.')
790-
return do_full_recover(options, repofiles)
791808

792809
filename = os.path.join(options.repository,
793810
os.path.basename(fn))

src/ZODB/scripts/tests/test_repozo.py

Lines changed: 82 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -220,13 +220,16 @@ def test_recover_ignored_args(self):
220220
from ZODB.scripts import repozo
221221
options = repozo.parseargs(['-R', '-r', '/tmp/nosuchdir', '-v',
222222
'-f', '/tmp/ignored.fs',
223-
'-k'])
223+
'-k', '--full', '--quick'])
224224
self.assertEqual(options.file, None)
225225
self.assertIn('--file option is ignored in recover mode',
226226
sys.stderr.getvalue())
227227
self.assertEqual(options.killold, False)
228228
self.assertIn('--kill-old-on-full option is ignored in recover mode',
229229
sys.stderr.getvalue())
230+
self.assertEqual(options.quick, None)
231+
self.assertIn('--quick option is ignored if --full option is used',
232+
sys.stderr.getvalue())
230233

231234
def test_verify_ignored_args(self):
232235
from ZODB.scripts import repozo
@@ -1040,6 +1043,7 @@ def tearDown(self):
10401043
def _makeOptions(self, **kw):
10411044
options = super()._makeOptions(**kw)
10421045
options.full = False
1046+
options.quick = kw.get('quick', False)
10431047
return options
10441048

10451049
def _createRecoveredDataFS(self, output, options):
@@ -1092,6 +1096,33 @@ def test_w_incr_recover_from_incr_backup(self):
10921096
self.assertEqual(_read_file(output), b'AAABBBCCCDDD')
10931097
self.assertFalse(os.path.exists(output + '.part'))
10941098

1099+
def test_w_quick_incr_recover_from_incr_backup(self):
1100+
import tempfile
1101+
dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
1102+
output = os.path.join(dd, 'Data.fs')
1103+
options = self._makeOptions(date='2010-05-15-13-30-57',
1104+
output=output,
1105+
quick=True,
1106+
withverify=False)
1107+
self._createRecoveredDataFS(output, options)
1108+
# Create 2 more .deltafs, to prove the code knows where to pick up
1109+
self._makeFile(6, 7, 8, '.deltafs', 'CCC')
1110+
self._makeFile(8, 9, 10, '.deltafs', 'DDD')
1111+
self._makeFile(
1112+
2, 3, 4, '.dat',
1113+
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n' # noqa: E501 line too long
1114+
'/backup/2010-05-14-04-05-06.deltafs 3 6 2bb225f0ba9a58930757a868ed57d9a3\n' # noqa: E501 line too long
1115+
'/backup/2010-05-14-06-07-08.deltafs 6 9 defb99e69a9f1f6e06f15006b1f166ae\n' # noqa: E501 line too long
1116+
'/backup/2010-05-14-08-09-10.deltafs 9 12 45054f47ac3305a2a33e9bcceadff712\n') # noqa: E501 line too long
1117+
os.unlink(
1118+
os.path.join(self._repository_directory,
1119+
'2010-05-14-04-05-06.deltafs'))
1120+
self._callFUT(options)
1121+
self.assertNotIn('falling back to a full recover.',
1122+
sys.stderr.getvalue())
1123+
self.assertEqual(_read_file(output), b'AAABBBCCCDDD')
1124+
self.assertFalse(os.path.exists(output + '.part'))
1125+
10951126
def test_w_incr_backup_with_verify_sum_inconsistent(self):
10961127
import tempfile
10971128
dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
@@ -1192,14 +1223,14 @@ def test_w_incr_backup_switch_auto_to_full_recover_if_chunk_is_wrong(self): # n
11921223
self._makeFile(6, 7, 8, '.deltafs', 'CCC')
11931224
self._makeFile(
11941225
2, 3, 4, '.dat',
1195-
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n' # noqa: E501 line too long
1196-
'/backup/2010-05-14-04-05-06.deltafs 3 6 2bb225f0ba9a58930757a868ed57d9a4\n' # noqa: E501 line too long
1226+
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b8\n' # noqa: E501 line too long
1227+
'/backup/2010-05-14-04-05-06.deltafs 3 6 2bb225f0ba9a58930757a868ed57d9a3\n' # noqa: E501 line too long
11971228
'/backup/2010-05-14-06-07-08.deltafs 6 9 defb99e69a9f1f6e06f15006b1f166ae\n') # noqa: E501 line too long
11981229
self._callFUT(options)
11991230
self.assertEqual(_read_file(output), b'AAABBBCCC')
12001231
self.assertFalse(os.path.exists(output + '.part'))
12011232
self.assertIn(
1202-
"Last whole common chunk checksum did not match with backup, falling back to a full recover.", # noqa: E501 line too long
1233+
"Target file is not consistent with backup /backup/2010-05-14-02-03-04.fs, falling back to a full recover.", # noqa: E501 line too long
12031234
sys.stderr.getvalue())
12041235

12051236
def test_w_incr_backup_switch_auto_to_full_recover_after_pack(self):
@@ -1226,9 +1257,55 @@ def test_w_incr_backup_switch_auto_to_full_recover_after_pack(self):
12261257
self.assertEqual(_read_file(output), b'CCDD')
12271258
self.assertFalse(os.path.exists(output + '.part'))
12281259
self.assertIn(
1229-
'Target file is larger than latest backup, falling back to a full recover.', # noqa: E501 line too long
1260+
"Target file is not consistent with backup /backup/2010-05-14-06-07-08.fs, falling back to a full recover.", # noqa: E501 line too long
12301261
sys.stderr.getvalue())
12311262

1263+
def test_w_quick_incr_backup_switch_auto_to_full_recover_if_last_chunk_is_wrong(self): # noqa: E501 line too long
1264+
import tempfile
1265+
dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
1266+
output = os.path.join(dd, 'Data.fs')
1267+
options = self._makeOptions(date='2010-05-15-13-30-57',
1268+
output=output,
1269+
quick=True,
1270+
withverify=False)
1271+
self._createRecoveredDataFS(output, options)
1272+
self._makeFile(6, 7, 8, '.deltafs', 'CCC')
1273+
self._makeFile(
1274+
2, 3, 4, '.dat',
1275+
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n' # noqa: E501 line too long
1276+
'/backup/2010-05-14-04-05-06.deltafs 3 6 2bb225f0ba9a58930757a868ed57d9a4\n' # noqa: E501 line too long
1277+
'/backup/2010-05-14-06-07-08.deltafs 6 9 defb99e69a9f1f6e06f15006b1f166ae\n') # noqa: E501 line too long
1278+
self._callFUT(options)
1279+
self.assertEqual(_read_file(output), b'AAABBBCCC')
1280+
self.assertFalse(os.path.exists(output + '.part'))
1281+
self.assertIn(
1282+
"Target file is not consistent with backup /backup/2010-05-14-04-05-06.deltafs, falling back to a full recover.", # noqa: E501 line too long
1283+
sys.stderr.getvalue())
1284+
1285+
def test_w_quick_incr_backup_dont_see_old_inconsistencies(self):
1286+
import tempfile
1287+
dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
1288+
output = os.path.join(dd, 'Data.fs')
1289+
options = self._makeOptions(date='2010-05-15-13-30-57',
1290+
output=output,
1291+
quick=True,
1292+
withverify=False)
1293+
self._createRecoveredDataFS(output, options)
1294+
self._makeFile(6, 7, 8, '.deltafs', 'CCC')
1295+
self._makeFile(
1296+
2, 3, 4, '.dat',
1297+
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n' # noqa: E501 line too long
1298+
'/backup/2010-05-14-04-05-06.deltafs 3 6 2bb225f0ba9a58930757a868ed57d9a3\n' # noqa: E501 line too long
1299+
'/backup/2010-05-14-06-07-08.deltafs 6 9 defb99e69a9f1f6e06f15006b1f166ae\n') # noqa: E501 line too long
1300+
# The ZODB is longer than announced in the .dat file
1301+
with open(output, 'r+b') as f:
1302+
f.write(b'ZZZBBBCCC')
1303+
self._callFUT(options)
1304+
self.assertEqual(_read_file(output), b'ZZZBBBCCC')
1305+
self.assertFalse(os.path.exists(output + '.part'))
1306+
self.assertNotIn(
1307+
"falling back to a full recover", sys.stderr.getvalue())
1308+
12321309

12331310
class Test_do_verify(OptionsTestBase, unittest.TestCase):
12341311

0 commit comments

Comments
 (0)