Skip to content

Commit 42ab9ae

Browse files
committed
add rule for state machine ready
1 parent a171ab6 commit 42ab9ae

File tree

5 files changed

+93
-7
lines changed

5 files changed

+93
-7
lines changed

hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/events/SCMEvents.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,11 @@ public final class SCMEvents {
223223
public static final TypedEvent<ContainerID>
224224
RECONCILE_CONTAINER = new TypedEvent<>(ContainerID.class, "Reconcile_Container");
225225

226+
/**
227+
* This event will be triggered from SCM State Machine when ready.
228+
*/
229+
public static final TypedEvent<Boolean> STATEMACHINE_READY = new TypedEvent<>(Boolean.class, "STATEMACHINE_READY");
230+
226231
/**
227232
* Private Ctor. Never Constructed.
228233
*/

hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -361,15 +361,17 @@ public void notifyTermIndexUpdated(long term, long index) {
361361
// 1. Refresh Safemode rules state.
362362
// 2. Start DN Rpc server.
363363
if (!refreshedAfterLeaderReady.get()) {
364-
scm.getScmSafeModeManager().refresh();
365-
scm.getDatanodeProtocolServer().start();
366-
367364
refreshedAfterLeaderReady.set(true);
365+
scm.getScmSafeModeManager().refreshAndValidate();
368366
}
369367
currentLeaderTerm.set(-1L);
370368
}
371369
}
372370

371+
public boolean isRefreshedAfterLeaderReady() {
372+
return refreshedAfterLeaderReady.get();
373+
}
374+
373375
@Override
374376
public void notifyLeaderReady() {
375377
if (!isInitialized) {

hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeRuleFactory.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,11 @@
2222
import org.apache.hadoop.hdds.conf.ConfigurationSource;
2323
import org.apache.hadoop.hdds.scm.container.ContainerManager;
2424
import org.apache.hadoop.hdds.scm.ha.SCMContext;
25+
import org.apache.hadoop.hdds.scm.ha.SCMHAManager;
2526
import org.apache.hadoop.hdds.scm.node.NodeManager;
2627
import org.apache.hadoop.hdds.scm.pipeline.PipelineManager;
28+
import org.apache.hadoop.hdds.scm.server.OzoneStorageContainerManager;
29+
import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
2730
import org.apache.hadoop.hdds.server.events.EventQueue;
2831

2932
/**
@@ -79,6 +82,16 @@ private void loadRules(SCMSafeModeManager safeModeManager) {
7982

8083
preCheckRules.add(datanodeRule);
8184

85+
OzoneStorageContainerManager ozoneScm = scmContext.getScm();
86+
if (ozoneScm instanceof StorageContainerManager) {
87+
StorageContainerManager scm = (StorageContainerManager) ozoneScm;
88+
SCMHAManager scmHAManager = scm.getScmHAManager();
89+
if (scmHAManager != null && scmHAManager.getRatisServer() != null) {
90+
preCheckRules.add(new StateMachineReadyRule(eventQueue, safeModeManager,
91+
scmHAManager.getRatisServer().getSCMStateMachine()));
92+
}
93+
}
94+
8295
if (pipelineManager != null) {
8396
safeModeRules.add(new HealthyPipelineSafeModeRule(eventQueue, pipelineManager,
8497
safeModeManager, config, scmContext, nodeManager));
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.hadoop.hdds.scm.safemode;
19+
20+
import org.apache.hadoop.hdds.scm.events.SCMEvents;
21+
import org.apache.hadoop.hdds.scm.ha.SCMStateMachine;
22+
import org.apache.hadoop.hdds.server.events.EventQueue;
23+
import org.apache.hadoop.hdds.server.events.TypedEvent;
24+
25+
/**
26+
* Class defining Safe mode exit when SCM State Machine is ready, ie all transaction is applied
27+
*/
28+
public class StateMachineReadyRule extends SafeModeExitRule<Boolean> {
29+
private SCMStateMachine scmStateMachine = null;
30+
31+
public StateMachineReadyRule(EventQueue eventQueue, SCMSafeModeManager manager, SCMStateMachine scmStateMachine) {
32+
super(manager, eventQueue);
33+
this.scmStateMachine = scmStateMachine;
34+
}
35+
36+
@Override
37+
protected TypedEvent<Boolean> getEventType() {
38+
return SCMEvents.STATEMACHINE_READY;
39+
}
40+
41+
@Override
42+
protected boolean validate() {
43+
if (null != scmStateMachine) {
44+
return scmStateMachine.isRefreshedAfterLeaderReady();
45+
}
46+
// if no HA, always return true.
47+
return true;
48+
}
49+
50+
@Override
51+
protected void process(Boolean ignored) {
52+
}
53+
54+
@Override
55+
protected void cleanup() {
56+
}
57+
58+
@Override
59+
public String getStatusText() {
60+
return String.format("Refreshed SCM State Machine after leader ready: %s",
61+
scmStateMachine != null ? scmStateMachine.isRefreshedAfterLeaderReady() : "NA");
62+
}
63+
64+
@Override
65+
public void refresh(boolean forceRefresh) {
66+
// Do nothing.
67+
}
68+
}

hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1530,10 +1530,8 @@ public void start() throws IOException {
15301530
}
15311531
getBlockProtocolServer().start();
15321532

1533-
// If HA is enabled, start datanode protocol server once leader is ready.
1534-
if (!scmStorageConfig.isSCMHAEnabled()) {
1535-
getDatanodeProtocolServer().start();
1536-
}
1533+
// start datanode protocol server
1534+
getDatanodeProtocolServer().start();
15371535
if (getSecurityProtocolServer() != null) {
15381536
getSecurityProtocolServer().start();
15391537
persistSCMCertificates();

0 commit comments

Comments
 (0)