-
Notifications
You must be signed in to change notification settings - Fork 275
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implement Offline-To-Bootstrap state transition in StorageManager #1326
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
/** | ||
* Copyright 2019 LinkedIn Corp. All rights reserved. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
*/ | ||
package com.github.ambry.clustermap; | ||
|
||
/** | ||
* The type of partition state model listener. | ||
* The state model listeners implement {@link com.github.ambry.clustermap.PartitionStateChangeListener} in different | ||
* components (i.e. StorageManager, ReplicationManager etc) and take actions when state transition occurs. | ||
*/ | ||
public enum StateModelListenerType { | ||
/** | ||
* The partition state change listener owned by storage manager. It invokes some store operations when partition state | ||
* transition occurs. For example, when new replica transits from OFFLINE to BOOTSTRAP, storage manager instantiates | ||
* blob store associated with this replica and adds it into disk manager and compaction manager. | ||
*/ | ||
StorageManagerListener, | ||
/** | ||
* The partition state change listener owned by replication manager. It performs some replica operations in response to | ||
* partition state transition. For example, when new replica transits from BOOTSTRAP to STANDBY, replication manager | ||
* keeps checking replication lag of this replica and ensures it catches up with its peer replicas. | ||
*/ | ||
ReplicationManagerListener, | ||
/** | ||
* The partition state change listener owned by stats manager. It takes actions when new replica is added (OFFLINE -> | ||
* BOOTSTRAP) or old replica is removed (INACTIVE -> OFFLINE) | ||
*/ | ||
StatsManagerListener, | ||
/** | ||
* The partition state change listener owned by cloud-to-store replication manager. It takes actions when replica | ||
* leadership hand-off occurs. For example, if any replica becomes LEADER from STANDBY, it is supposed to replicate | ||
* data from VCR nodes. This is part of two-way replication between Ambry and cloud. | ||
*/ | ||
CloudToStoreReplicationManagerListener | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
/** | ||
* Copyright 2019 LinkedIn Corp. All rights reserved. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
*/ | ||
package com.github.ambry.clustermap; | ||
|
||
public class StateTransitionException extends RuntimeException { | ||
private static final long serialVersionUID = 1L; | ||
private final TransitionErrorCode error; | ||
|
||
public StateTransitionException(String s, TransitionErrorCode error) { | ||
super(s); | ||
this.error = error; | ||
} | ||
|
||
public TransitionErrorCode getErrorCode() { | ||
return error; | ||
} | ||
|
||
public enum TransitionErrorCode { | ||
/** | ||
* If replica is not present in Helix and not found on current node. | ||
*/ | ||
ReplicaNotFound, | ||
/** | ||
* If failure occurs during store operation (i.e. store addition/removal in StoreManager). | ||
*/ | ||
StoreOperationFailure, | ||
/** | ||
* If store is not started and unavailable for specific operations. | ||
*/ | ||
StoreNotStarted | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,9 +18,9 @@ | |
import com.github.ambry.utils.Utils; | ||
import java.io.IOException; | ||
import java.util.ArrayList; | ||
import java.util.Collections; | ||
import java.util.HashMap; | ||
import java.util.HashSet; | ||
import java.util.LinkedList; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Set; | ||
|
@@ -43,25 +43,26 @@ | |
/** | ||
* An implementation of {@link ClusterParticipant} that registers as a participant to a Helix cluster. | ||
*/ | ||
class HelixParticipant implements ClusterParticipant, PartitionStateChangeListener { | ||
private final Logger logger = LoggerFactory.getLogger(getClass()); | ||
public class HelixParticipant implements ClusterParticipant, PartitionStateChangeListener { | ||
private final String clusterName; | ||
private final String zkConnectStr; | ||
private final HelixFactory helixFactory; | ||
private final Object helixAdministrationLock = new Object(); | ||
private final ClusterMapConfig clusterMapConfig; | ||
private final Map<StateModelListenerType, PartitionStateChangeListener> partitionStateChangeListeners; | ||
private HelixManager manager; | ||
private String instanceName; | ||
private HelixAdmin helixAdmin; | ||
private List<PartitionStateChangeListener> partitionStateChangeListeners; | ||
|
||
private static final Logger logger = LoggerFactory.getLogger(HelixParticipant.class); | ||
|
||
/** | ||
* Instantiate a HelixParticipant. | ||
* @param clusterMapConfig the {@link ClusterMapConfig} associated with this participant. | ||
* @param helixFactory the {@link HelixFactory} to use to get the {@link HelixManager}. | ||
* @throws IOException if there is an error in parsing the JSON serialized ZK connect string config. | ||
*/ | ||
HelixParticipant(ClusterMapConfig clusterMapConfig, HelixFactory helixFactory) throws IOException { | ||
public HelixParticipant(ClusterMapConfig clusterMapConfig, HelixFactory helixFactory) throws IOException { | ||
this.clusterMapConfig = clusterMapConfig; | ||
clusterName = clusterMapConfig.clusterMapClusterName; | ||
instanceName = | ||
|
@@ -81,7 +82,7 @@ class HelixParticipant implements ClusterParticipant, PartitionStateChangeListen | |
throw new IOException("Received JSON exception while parsing ZKInfo json string", e); | ||
} | ||
manager = helixFactory.getZKHelixManager(clusterName, instanceName, InstanceType.PARTICIPANT, zkConnectStr); | ||
partitionStateChangeListeners = new LinkedList<>(); | ||
partitionStateChangeListeners = new HashMap<>(); | ||
} | ||
|
||
/** | ||
|
@@ -96,7 +97,7 @@ public void participate(List<AmbryHealthReport> ambryHealthReports) throws IOExc | |
clusterMapConfig.clustermapStateModelDefinition); | ||
StateMachineEngine stateMachineEngine = manager.getStateMachineEngine(); | ||
stateMachineEngine.registerStateModelFactory(clusterMapConfig.clustermapStateModelDefinition, | ||
new AmbryStateModelFactory(clusterMapConfig.clustermapStateModelDefinition, this)); | ||
new AmbryStateModelFactory(clusterMapConfig, this)); | ||
registerHealthReportTasks(stateMachineEngine, ambryHealthReports); | ||
try { | ||
synchronized (helixAdministrationLock) { | ||
|
@@ -117,8 +118,9 @@ public void participate(List<AmbryHealthReport> ambryHealthReports) throws IOExc | |
} | ||
|
||
@Override | ||
public void registerPartitionStateChangeListener(PartitionStateChangeListener partitionStateChangeListener) { | ||
partitionStateChangeListeners.add(partitionStateChangeListener); | ||
public void registerPartitionStateChangeListener(StateModelListenerType listenerType, | ||
PartitionStateChangeListener partitionStateChangeListener) { | ||
partitionStateChangeListeners.put(listenerType, partitionStateChangeListener); | ||
} | ||
|
||
@Override | ||
|
@@ -212,6 +214,13 @@ public List<String> getStoppedReplicas() { | |
return ClusterMapUtils.getStoppedReplicas(instanceConfig); | ||
} | ||
|
||
/** | ||
* @return a snapshot of registered state change listeners. | ||
*/ | ||
public Map<StateModelListenerType, PartitionStateChangeListener> getPartitionStateChangeListeners() { | ||
return Collections.unmodifiableMap(partitionStateChangeListeners); | ||
} | ||
|
||
/** | ||
* Register {@link HelixHealthReportAggregatorTask}s for appropriate {@link AmbryHealthReport}s. | ||
* @param engine the {@link StateMachineEngine} to register the task state model. | ||
|
@@ -275,16 +284,38 @@ boolean setStoppedReplicas(List<String> stoppedReplicas) { | |
} | ||
|
||
@Override | ||
public void onPartitionStateChangeToLeaderFromStandby(String partitionName) { | ||
for (PartitionStateChangeListener partitionStateChangeListener : partitionStateChangeListeners) { | ||
partitionStateChangeListener.onPartitionStateChangeToLeaderFromStandby(partitionName); | ||
public void onPartitionBecomeBootstrapFromOffline(String partitionName) { | ||
PartitionStateChangeListener storageManagerListener = | ||
partitionStateChangeListeners.get(StateModelListenerType.StorageManagerListener); | ||
if (storageManagerListener != null) { | ||
storageManagerListener.onPartitionBecomeBootstrapFromOffline(partitionName); | ||
} | ||
} | ||
|
||
@Override | ||
public void onPartitionBecomeStandbyFromBootstrap(String partitionName) { | ||
PartitionStateChangeListener storageManagerListener = | ||
partitionStateChangeListeners.get(StateModelListenerType.StorageManagerListener); | ||
if (storageManagerListener != null) { | ||
storageManagerListener.onPartitionBecomeStandbyFromBootstrap(partitionName); | ||
} | ||
} | ||
|
||
@Override | ||
public void onPartitionBecomeLeaderFromStandby(String partitionName) { | ||
PartitionStateChangeListener cloudToStoreReplicationListener = | ||
partitionStateChangeListeners.get(StateModelListenerType.CloudToStoreReplicationManagerListener); | ||
if (cloudToStoreReplicationListener != null) { | ||
cloudToStoreReplicationListener.onPartitionBecomeLeaderFromStandby(partitionName); | ||
} | ||
} | ||
|
||
@Override | ||
public void onPartitionStateChangeToStandbyFromLeader(String partitionName) { | ||
for (PartitionStateChangeListener partitionStateChangeListener : partitionStateChangeListeners) { | ||
partitionStateChangeListener.onPartitionStateChangeToStandbyFromLeader(partitionName); | ||
public void onPartitionBecomeStandbyFromLeader(String partitionName) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
maybe we can replace this by There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Appreciate your suggestion but I feel like it loses a little bit of readability. What do you think? |
||
PartitionStateChangeListener cloudToStoreReplicationListener = | ||
partitionStateChangeListeners.get(StateModelListenerType.CloudToStoreReplicationManagerListener); | ||
if (cloudToStoreReplicationListener != null) { | ||
cloudToStoreReplicationListener.onPartitionBecomeStandbyFromLeader(partitionName); | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
right, removed.