Skip to content

Commit 8eb4a7c

Browse files
State Archival Consistency Invariant (#4968)
(Overview by @claude ) Introduce new invariant ArchivedStateConsistency that is invoked on every commit. ### Eviction Checks (checkEvictionInvariants) - Verifies that entries being archived don't already exist in the archive - Confirms that evicted entries actually exist in the current live state - Validates that associated TTL (Time-To-Live) entries are properly cleaned up when entries are archived - Ensures archived entries contain the correct data from the live state before eviction ### Restoration Checks (checkRestoreInvariants) - Validates entries being restored from archive are correctly retrieved - Ensures restored entries don't conflict with current live state - Verifies TTL entries for restored persistent entries are handled correctly ### Startup Validation - On node startup, scans the complete hot archive and live bucket lists - Ensures no entries exist in both live and archived state simultaneously (critical consistency check)
2 parents c05eb78 + 829233a commit 8eb4a7c

18 files changed

+1131
-34
lines changed

docs/stellar-core_example.cfg

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -497,11 +497,17 @@ RUN_STANDALONE=false
497497
# of the network, caution is advised when using this.
498498
# - "EventsAreConsistentWithEntryDiffs"
499499
# Setting this will cause additional work on each operation apply - it
500-
# checks that all asset movements and events provide equivalent
500+
# checks that all asset movements and events provide equivalent
501501
# balance changes. The invariant required both EMIT_CLASSIC_EVENTS and
502502
# BACKFILL_STELLAR_ASSET_EVENTS config flags to be enabled. This is also
503503
# a "strict" invariant, which means your node will shutdown if the invariant
504504
# triggers.
505+
# - "ArchivedStateConsistency"
506+
# Setting this will cause additional work during ledger close - it
507+
# checks that archived entries evicted from the database have the correct value.
508+
# This check runs when entries are evicted during ledger close, adding
509+
# overhead to ledger close time. This is also a "strict" invariant, which
510+
# means your node will shutdown if the invariant triggers.
505511
INVARIANT_CHECKS = [ "AccountSubEntriesCountIsValid",
506512
"ConservationOfLumens",
507513
"ConstantProductInvariant",

src/bucket/BucketManager.cpp

Lines changed: 78 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "crypto/Hex.h"
1717
#include "history/HistoryManager.h"
1818
#include "historywork/VerifyBucketWork.h"
19+
#include "invariant/InvariantManager.h"
1920
#include "ledger/LedgerManager.h"
2021
#include "ledger/LedgerTxn.h"
2122
#include "ledger/LedgerTypeUtils.h"
@@ -28,6 +29,7 @@
2829
#include "util/Logging.h"
2930
#include "util/ProtocolVersion.h"
3031
#include "util/TmpDir.h"
32+
#include "util/UnorderedMap.h"
3133
#include "util/types.h"
3234
#include "xdr/Stellar-ledger.h"
3335
#include <filesystem>
@@ -1144,6 +1146,9 @@ BucketManager::startBackgroundEvictionScan(uint32_t ledgerSeq,
11441146

11451147
auto searchableBL =
11461148
mSnapshotManager->copySearchableLiveBucketListSnapshot();
1149+
1150+
// Snapshot should be based on the lcl ledger
1151+
releaseAssertOrThrow(searchableBL->getLedgerSeq() == ledgerSeq - 1);
11471152
auto const& sas = cfg.stateArchivalSettings();
11481153

11491154
using task_t =
@@ -1429,17 +1434,65 @@ loadEntriesFromBucket(std::shared_ptr<LiveBucket> b, std::string const& name,
14291434
b->getSize(), name, ms, formatSize(bytesPerSec));
14301435
}
14311436

1437+
// Loads a single bucket worth of entries into `map`, deleting tombstone entries
1438+
// and inserting archived entries. Should be called in a loop over a BL, from
1439+
// old to new.
1440+
static void
1441+
loadEntriesFromHotArchiveBucket(std::shared_ptr<HotArchiveBucket> b,
1442+
std::string const& name,
1443+
std::map<LedgerKey, LedgerEntry>& map)
1444+
{
1445+
ZoneScoped;
1446+
1447+
using namespace std::chrono;
1448+
medida::Timer timer;
1449+
HotArchiveBucketInputIterator in(b);
1450+
timer.Time([&]() {
1451+
while (in)
1452+
{
1453+
HotArchiveBucketEntry const& e = *in;
1454+
if (e.type() == HOT_ARCHIVE_ARCHIVED)
1455+
{
1456+
map[LedgerEntryKey(e.archivedEntry())] = e.archivedEntry();
1457+
}
1458+
else
1459+
{
1460+
if (e.type() != HOT_ARCHIVE_LIVE)
1461+
{
1462+
std::string err =
1463+
"Malformed hot archive bucket: unexpected "
1464+
"non-HOT_ARCHIVE_LIVE entry.";
1465+
CLOG_ERROR(Bucket, "{}", err);
1466+
throw std::runtime_error(err);
1467+
}
1468+
map.erase(e.key());
1469+
}
1470+
++in;
1471+
}
1472+
});
1473+
nanoseconds ns =
1474+
timer.duration_unit() * static_cast<nanoseconds::rep>(timer.max());
1475+
milliseconds ms = duration_cast<milliseconds>(ns);
1476+
size_t bytesPerSec = (b->getSize() * 1000 / (1 + ms.count()));
1477+
CLOG_INFO(Bucket, "Read {}-byte bucket file '{}' in {} ({}/s)",
1478+
b->getSize(), name, ms, formatSize(bytesPerSec));
1479+
}
1480+
1481+
template <typename BucketT>
14321482
std::map<LedgerKey, LedgerEntry>
1433-
BucketManager::loadCompleteLedgerState(HistoryArchiveState const& has)
1483+
BucketManager::loadCompleteBucketListStateHelper(
1484+
std::vector<HistoryStateBucket<BucketT>> const& buckets,
1485+
std::function<void(std::shared_ptr<BucketT>, std::string const&,
1486+
std::map<LedgerKey, LedgerEntry>&)>
1487+
loadFunc)
14341488
{
14351489
ZoneScoped;
14361490

14371491
std::map<LedgerKey, LedgerEntry> ledgerMap;
14381492
std::vector<std::pair<Hash, std::string>> hashes;
1439-
for (uint32_t i = LiveBucketList::kNumLevels; i > 0; --i)
1493+
for (uint32_t i = BucketListBase<BucketT>::kNumLevels; i > 0; --i)
14401494
{
1441-
HistoryStateBucket<LiveBucket> const& hsb =
1442-
has.currentBuckets.at(i - 1);
1495+
HistoryStateBucket<BucketT> const& hsb = buckets.at(i - 1);
14431496
hashes.emplace_back(hexToBin256(hsb.snap),
14441497
fmt::format(FMT_STRING("snap {:d}"), i - 1));
14451498
hashes.emplace_back(hexToBin256(hsb.curr),
@@ -1451,17 +1504,36 @@ BucketManager::loadCompleteLedgerState(HistoryArchiveState const& has)
14511504
{
14521505
continue;
14531506
}
1454-
auto b = getBucketByHashInternal(pair.first, mSharedLiveBuckets);
1507+
auto b = getBucketByHash<BucketT>(pair.first);
14551508
if (!b)
14561509
{
14571510
throw std::runtime_error(std::string("missing bucket: ") +
14581511
binToHex(pair.first));
14591512
}
1460-
loadEntriesFromBucket(b, pair.second, ledgerMap);
1513+
1514+
loadFunc(b, pair.second, ledgerMap);
14611515
}
14621516
return ledgerMap;
14631517
}
14641518

1519+
// Loads the complete state of the live BucketList into a map
1520+
std::map<LedgerKey, LedgerEntry>
1521+
BucketManager::loadCompleteLedgerState(HistoryArchiveState const& has)
1522+
{
1523+
CLOG_INFO(Bucket, "Loading complete live ledger state");
1524+
return loadCompleteBucketListStateHelper<LiveBucket>(has.currentBuckets,
1525+
loadEntriesFromBucket);
1526+
}
1527+
1528+
// Loads the complete state of the hot archive BucketList into a map
1529+
std::map<LedgerKey, LedgerEntry>
1530+
BucketManager::loadCompleteHotArchiveState(HistoryArchiveState const& has)
1531+
{
1532+
CLOG_INFO(Bucket, "Loading complete hot archive state");
1533+
return loadCompleteBucketListStateHelper<HotArchiveBucket>(
1534+
has.hotArchiveBuckets, loadEntriesFromHotArchiveBucket);
1535+
}
1536+
14651537
std::shared_ptr<LiveBucket>
14661538
BucketManager::mergeBuckets(asio::io_context& ctx,
14671539
HistoryArchiveState const& has)

src/bucket/BucketManager.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
#pragma once
22

33
#include "bucket/BucketMergeMap.h"
4+
#include "history/HistoryArchive.h"
45
#include "ledger/NetworkConfig.h"
56
#include "main/Config.h"
67
#include "util/TmpDir.h"
8+
#include "util/UnorderedMap.h"
79
#include "util/types.h"
810
#include "work/BasicWork.h"
911
#include "xdr/Stellar-ledger.h"
@@ -171,6 +173,13 @@ class BucketManager : NonMovableOrCopyable
171173

172174
void reportLiveBucketIndexCacheMetrics();
173175

176+
template <class BucketT>
177+
std::map<LedgerKey, LedgerEntry> loadCompleteBucketListStateHelper(
178+
std::vector<HistoryStateBucket<BucketT>> const& buckets,
179+
std::function<void(std::shared_ptr<BucketT>, std::string const&,
180+
std::map<LedgerKey, LedgerEntry>&)>
181+
loadFunc);
182+
174183
#ifdef BUILD_TESTS
175184
bool mUseFakeTestValuesForNextClose{false};
176185
uint32_t mFakeTestProtocolVersion;
@@ -378,6 +387,9 @@ class BucketManager : NonMovableOrCopyable
378387
std::map<LedgerKey, LedgerEntry>
379388
loadCompleteLedgerState(HistoryArchiveState const& has);
380389

390+
std::map<LedgerKey, LedgerEntry>
391+
loadCompleteHotArchiveState(HistoryArchiveState const& has);
392+
381393
// Merge the bucket list of the provided HAS into a single "super bucket"
382394
// consisting of only live entries, and return it.
383395
std::shared_ptr<LiveBucket> mergeBuckets(asio::io_context& ctx,

src/bucket/test/BucketTestUtils.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,8 @@ template size_t countEntries(std::shared_ptr<HotArchiveBucket> bucket);
187187

188188
void
189189
LedgerManagerForBucketTests::finalizeLedgerTxnChanges(
190+
SearchableSnapshotConstPtr lclSnapshot,
191+
SearchableHotArchiveSnapshotConstPtr lclHotArchiveSnapshot,
190192
AbstractLedgerTxn& ltx,
191193
std::unique_ptr<LedgerCloseMetaFrame> const& ledgerCloseMeta,
192194
LedgerHeader lh, uint32_t initialLedgerVers)
@@ -342,8 +344,9 @@ LedgerManagerForBucketTests::finalizeLedgerTxnChanges(
342344
}
343345
else
344346
{
345-
LedgerManagerImpl::finalizeLedgerTxnChanges(ltx, ledgerCloseMeta, lh,
346-
initialLedgerVers);
347+
LedgerManagerImpl::finalizeLedgerTxnChanges(
348+
lclSnapshot, lclHotArchiveSnapshot, ltx, ledgerCloseMeta, lh,
349+
initialLedgerVers);
347350
}
348351
}
349352

src/bucket/test/BucketTestUtils.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ class LedgerManagerForBucketTests : public LedgerManagerImpl
7373

7474
protected:
7575
void finalizeLedgerTxnChanges(
76+
SearchableSnapshotConstPtr lclSnapshot,
77+
SearchableHotArchiveSnapshotConstPtr lclHotArchiveSnapshot,
7678
AbstractLedgerTxn& ltx,
7779
std::unique_ptr<LedgerCloseMetaFrame> const& ledgerCloseMeta,
7880
LedgerHeader lh, uint32_t initialLedgerVers) override;

0 commit comments

Comments
 (0)