Skip to content

Commit 16705c7

Browse files
cjen1-msftCopilotachamayou
committed
Update CheckQuorum condition from quorum in any config to quorum in every active config (#7375)
Co-authored-by: Copilot <[email protected]> Co-authored-by: Amaury Chamayou <[email protected]>
1 parent 731955a commit 16705c7

File tree

7 files changed

+193
-32
lines changed

7 files changed

+193
-32
lines changed

CHANGELOG.md

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,15 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
77

88
## [6.0.17]
99

10-
[6.0.17]: https://github.com/microsoft/CCF/releases/tag/6.0.17
11-
1210
### Added
1311

1412
- Support for PreVote optimisation. Nodes understand and are able to respond to PreVote messages, but will not become pre-vote candidates themselves. (#7419)
1513

14+
### Fixed
15+
16+
- CheckQuorum now requires a quorum in every configuration (#7375)
17+
18+
1619
## [6.0.16]
1720

1821
[6.0.16]: https://github.com/microsoft/CCF/releases/tag/6.0.16

src/consensus/aft/raft.h

Lines changed: 27 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -846,38 +846,36 @@ namespace aft
846846
node.second.last_ack_timeout += elapsed;
847847
}
848848

849-
bool has_quorum_of_backups = false;
850-
for (auto const& conf : configurations)
851-
{
852-
size_t backup_ack_timeout_count = 0;
853-
for (auto const& node : conf.nodes)
854-
{
855-
auto search = all_other_nodes.find(node.first);
856-
if (search == all_other_nodes.end())
857-
{
858-
// Ignore ourselves as primary
859-
continue;
860-
}
861-
if (search->second.last_ack_timeout >= election_timeout)
849+
bool every_active_config_has_a_quorum = std::all_of(
850+
configurations.begin(),
851+
configurations.end(),
852+
[this](const Configuration& conf) {
853+
size_t live_nodes_in_config = 0;
854+
for (auto const& node : conf.nodes)
862855
{
863-
RAFT_DEBUG_FMT(
864-
"No ack received from {} in last {}",
865-
node.first,
866-
election_timeout);
867-
backup_ack_timeout_count++;
856+
auto search = all_other_nodes.find(node.first);
857+
if (
858+
// if a (non-self) node is in a configuration, then it is in
859+
// all_other_nodes. So if a node in a configuration is not found
860+
// in all_other_nodes, it must be self, and hence is live
861+
search == all_other_nodes.end() ||
862+
// Otherwise we use the most recent ack as a failure probe
863+
search->second.last_ack_timeout < election_timeout)
864+
{
865+
++live_nodes_in_config;
866+
}
867+
else
868+
{
869+
RAFT_DEBUG_FMT(
870+
"No ack received from {} in last {}",
871+
node.first,
872+
election_timeout);
873+
}
868874
}
869-
}
870-
871-
if (backup_ack_timeout_count < get_quorum(conf.nodes.size() - 1))
872-
{
873-
// If primary has quorum of active backups in _any_ configuration,
874-
// it should remain primary
875-
has_quorum_of_backups = true;
876-
break;
877-
}
878-
}
875+
return live_nodes_in_config >= get_quorum(conf.nodes.size());
876+
});
879877

880-
if (!has_quorum_of_backups)
878+
if (!every_active_config_has_a_quorum)
881879
{
882880
// CheckQuorum: The primary automatically steps down if there are no
883881
// active configuration in which it has heard back from a majority of

src/consensus/aft/test/driver.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,15 @@ int main(int argc, char** argv)
295295
assert(items.size() == 4);
296296
driver->assert_detail(items[1], items[2], items[3], false, lineno);
297297
break;
298+
case shash("assert_config"):
299+
assert(items.size() >= 3);
300+
driver->assert_config(
301+
items[1], items[2], {std::next(items.begin(), 3), items.end()});
302+
break;
303+
case shash("assert_absent_config"):
304+
assert(items.size() == 3);
305+
driver->assert_absent_config(items[1], items[2]);
306+
break;
298307
case shash("replicate_new_configuration"):
299308
assert(items.size() >= 3);
300309
items.erase(items.begin());

src/consensus/aft/test/driver.h

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1336,4 +1336,58 @@ class RaftDriver
13361336
std::to_string((int)lineno)));
13371337
}
13381338
}
1339+
1340+
void assert_config(
1341+
const std::string& node_id,
1342+
const std::string& config_idx_s,
1343+
const std::vector<std::string>& node_ids)
1344+
{
1345+
auto config_idx = std::stol(config_idx_s);
1346+
auto details = _nodes.at(node_id).raft->get_details();
1347+
for (const auto& config : details.configs)
1348+
{
1349+
if (config.idx == config_idx)
1350+
{
1351+
std::set<std::string> expected_nodes(node_ids.begin(), node_ids.end());
1352+
std::set<std::string> actual_nodes;
1353+
for (const auto& [id, _] : config.nodes)
1354+
{
1355+
actual_nodes.insert(id);
1356+
}
1357+
if (expected_nodes != actual_nodes)
1358+
{
1359+
auto actual_str =
1360+
fmt::format("{{{}}}", fmt::join(actual_nodes, ", "));
1361+
auto expected_str =
1362+
fmt::format("{{{}}}", fmt::join(expected_nodes, ", "));
1363+
throw std::runtime_error(fmt::format(
1364+
"Node {} configuration at idx {} ({}) does not match expected ({})",
1365+
node_id,
1366+
config_idx,
1367+
actual_str,
1368+
expected_str));
1369+
}
1370+
return;
1371+
}
1372+
}
1373+
throw std::runtime_error(fmt::format(
1374+
"Node {} does not have a configuration at idx {}", node_id, config_idx));
1375+
}
1376+
1377+
void assert_absent_config(
1378+
const std::string& node_id, const std::string& config_idx_s)
1379+
{
1380+
auto config_idx = std::stol(config_idx_s);
1381+
auto details = _nodes.at(node_id).raft->get_details();
1382+
for (const auto& config : details.configs)
1383+
{
1384+
if (config.idx == config_idx)
1385+
{
1386+
throw std::runtime_error(fmt::format(
1387+
"Node {} has unexpected configuration at idx {}",
1388+
node_id,
1389+
config_idx));
1390+
}
1391+
}
1392+
}
13391393
};
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
start_node,0
2+
emit_signature,2
3+
swap_nodes,2,in,1,2
4+
emit_signature,2
5+
6+
dispatch_all
7+
periodic_all,10
8+
dispatch_all
9+
periodic_all,10
10+
dispatch_all
11+
12+
# All nodes believe that the only configuration is {0,1,2}
13+
assert_state_sync
14+
assert_absent_config,0,1
15+
assert_config,0,3,0,1,2
16+
17+
disconnect_node,0
18+
swap_nodes,2,out,1,2
19+
emit_signature,2
20+
21+
# Only node 0 knows about config {0}, as it is partitioned from nodes 1 and 2
22+
assert_config,0,3,0,1,2
23+
assert_config,0,5,0
24+
25+
assert_config,1,3,0,1,2
26+
assert_absent_config,1,5
27+
assert_config,2,3,0,1,2
28+
assert_absent_config,2,5
29+
30+
# Node 0 should step down via CheckQuorum
31+
periodic_one,0,100
32+
assert_detail,0,leadership_state,Follower
33+
34+
periodic_one,1,100
35+
assert_detail,1,leadership_state,Candidate
36+
37+
dispatch_all
38+
periodic_all,10
39+
dispatch_all
40+
periodic_all,10
41+
dispatch_all
42+
periodic_all,10
43+
dispatch_all
44+
periodic_all,10
45+
dispatch_all
46+
47+
assert_detail,0,leadership_state,Follower
48+
assert_detail,1,leadership_state,Leader
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
start_node,0
2+
emit_signature,2
3+
swap_nodes,2,in,1,2
4+
emit_signature,2
5+
6+
periodic_one,0,10
7+
dispatch_all
8+
dispatch_all
9+
periodic_one,0,10
10+
dispatch_all
11+
12+
13+
# All nodes have both configs: {0} and {0,1,2}
14+
assert_commit_idx,0,4
15+
assert_absent_config,0,1
16+
assert_config,0,3,0,1,2
17+
18+
assert_commit_idx,1,2
19+
assert_config,1,1,0
20+
assert_config,1,3,0,1,2
21+
22+
assert_commit_idx,2,2
23+
assert_config,2,1,0
24+
assert_config,2,3,0,1,2
25+
26+
# Node 0 should step down via CheckQuorum
27+
disconnect_node,0
28+
periodic_one,0,100
29+
assert_detail,0,leadership_state,Follower
30+
31+
# Node 1 should try to start an election but fails without a quorum in {0}
32+
periodic_one,1,100
33+
assert_detail,1,leadership_state,Candidate
34+
35+
dispatch_all
36+
periodic_all,10
37+
dispatch_all
38+
periodic_all,10
39+
dispatch_all
40+
41+
assert_detail,1,leadership_state,Candidate
42+
43+
# Fixup the network to allow a leader to be elected and the test to pass
44+
reconnect_node,0
45+
periodic_all,100
46+
dispatch_all
47+
periodic_all,10
48+
dispatch_all

tla/consensus/Traceccfraft.tla

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -413,8 +413,9 @@ IsRcvRequestVoteResponse ==
413413
IsBecomeFollower ==
414414
/\ IsEvent("become_follower")
415415
/\ UNCHANGED vars \* UNCHANGED implies that it doesn't matter if we prime the previous variables.
416+
\* We don't assert committable and last idx here, as the spec and implementation are out of sync until
417+
\* IsSendAppendEntriesResponse or IsSendRequestVote (in the candidate path)
416418
/\ leadershipState[logline.msg.state.node_id] # Leader
417-
/\ Range(logline.msg.state.committable_indices) \subseteq CommittableIndices(logline.msg.state.node_id)
418419
/\ commitIndex[logline.msg.state.node_id] = logline.msg.state.commit_idx
419420
/\ leadershipState[logline.msg.state.node_id] = ToLeadershipState[logline.msg.state.leadership_state]
420421
/\ membershipState[logline.msg.state.node_id] \in ToMembershipState[logline.msg.state.membership_state]

0 commit comments

Comments
 (0)