Skip to content

Commit

Permalink
Delay the security index initial bootstrap when the index is red (ope…
Browse files Browse the repository at this point in the history
  • Loading branch information
dhiAmzn committed May 21, 2021
1 parent 3692448 commit ea101e2
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,8 @@ public void run() {
threadContext.putHeader(ConfigConstants.OPENDISTRO_SECURITY_CONF_REQUEST_HEADER, "true");

final boolean isSecurityIndexCreated = createSecurityIndexIfAbsent();
waitForSecurityIndexToBeAtLeastYellow();

if (isSecurityIndexCreated) {
ConfigHelper.uploadFile(client, cd+"config.yml", securityIndex, CType.CONFIG, DEFAULT_CONFIG_VERSION);
ConfigHelper.uploadFile(client, cd+"roles.yml", securityIndex, CType.ROLES, DEFAULT_CONFIG_VERSION);
Expand All @@ -157,36 +159,10 @@ public void run() {
LOGGER.error("{} does not exist", confFile.getAbsolutePath());
}
} catch (Exception e) {
LOGGER.debug("Cannot apply default config (this is maybe not an error!) due to {}", e.getMessage());
LOGGER.error("Cannot apply default config (this is maybe not an error!)", e);
}
}

LOGGER.debug("Node started, try to initialize it. Wait for at least yellow cluster state....");
ClusterHealthResponse response = null;
try {
response = client.admin().cluster().health(new ClusterHealthRequest(securityIndex)
.waitForActiveShards(1)
.waitForYellowStatus()).actionGet();
} catch (Exception e1) {
LOGGER.debug("Catched a {} but we just try again ...", e1.toString());
}

while(response == null || response.isTimedOut() || response.getStatus() == ClusterHealthStatus.RED) {
LOGGER.debug("index '{}' not healthy yet, we try again ... (Reason: {})", securityIndex, response==null?"no response":(response.isTimedOut()?"timeout":"other, maybe red cluster"));
try {
Thread.sleep(500);
} catch (InterruptedException e1) {
//ignore
Thread.currentThread().interrupt();
}
try {
response = client.admin().cluster().health(new ClusterHealthRequest(securityIndex).waitForYellowStatus()).actionGet();
} catch (Exception e1) {
LOGGER.debug("Catched again a {} but we just try again ...", e1.toString());
}
continue;
}

while(!dynamicConfigFactory.isInitialized()) {
try {
LOGGER.debug("Try to load config ...");
Expand Down Expand Up @@ -250,6 +226,33 @@ private boolean createSecurityIndexIfAbsent() {
}
}

private void waitForSecurityIndexToBeAtLeastYellow() {
LOGGER.info("Node started, try to initialize it. Wait for at least yellow cluster state....");
ClusterHealthResponse response = null;
try {
response = client.admin().cluster().health(new ClusterHealthRequest(securityIndex)
.waitForActiveShards(1)
.waitForYellowStatus()).actionGet();
} catch (Exception e) {
LOGGER.debug("Caught a {} but we just try again ...", e.toString());
}

while(response == null || response.isTimedOut() || response.getStatus() == ClusterHealthStatus.RED) {
LOGGER.debug("index '{}' not healthy yet, we try again ... (Reason: {})", securityIndex, response==null?"no response":(response.isTimedOut()?"timeout":"other, maybe red cluster"));
try {
Thread.sleep(500);
} catch (InterruptedException e) {
//ignore
Thread.currentThread().interrupt();
}
try {
response = client.admin().cluster().health(new ClusterHealthRequest(securityIndex).waitForYellowStatus()).actionGet();
} catch (Exception e) {
LOGGER.debug("Caught again a {} but we just try again ...", e.toString());
}
}
}

public void initOnNodeStart() {
try {
if (settings.getAsBoolean(ConfigConstants.OPENDISTRO_SECURITY_ALLOW_DEFAULT_INIT_SECURITYINDEX, false)) {
Expand Down
27 changes: 27 additions & 0 deletions src/test/java/org/opensearch/security/SlowIntegrationTests.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,25 @@

package org.opensearch.security;

import org.apache.http.HttpStatus;
import org.opensearch.action.admin.cluster.health.ClusterHealthRequest;
import org.opensearch.action.admin.cluster.node.info.NodesInfoRequest;
import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest;
import org.opensearch.cluster.health.ClusterHealthStatus;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.unit.TimeValue;
import org.opensearch.node.Node;
import org.opensearch.node.PluginAwareNode;
import org.opensearch.security.ssl.util.SSLConfigConstants;
import org.opensearch.security.support.ConfigConstants;
import org.opensearch.security.test.helper.rest.RestHelper;
import org.opensearch.security.test.SingleClusterTest;
import org.opensearch.security.test.helper.cluster.ClusterConfiguration;
import org.opensearch.security.test.helper.file.FileHelper;
import org.opensearch.transport.Netty4Plugin;
import org.junit.Assert;
import org.junit.Test;
import java.io.IOException;

public class SlowIntegrationTests extends SingleClusterTest {

Expand Down Expand Up @@ -157,4 +162,26 @@ public void testNodeClientDisallowedWithNonServerCertificate2() throws Exception
}
}

@Test
public void testDelayInSecurityIndexInitialization() throws Exception {
final Settings settings = Settings.builder()
.put(ConfigConstants.OPENDISTRO_SECURITY_ALLOW_DEFAULT_INIT_SECURITYINDEX, true)
.put("cluster.routing.allocation.exclude._ip", "127.0.0.1")
.build();
try {
setup(Settings.EMPTY, null, settings, false);
Assert.fail("Expected IOException here due to red cluster state");
} catch (IOException e) {
// Index request has a default timeout of 1 minute, adding buffer between nodes initialization and cluster health check
Thread.sleep(1000*80);
// Ideally, we would want to remove this cluster setting, but default settings cannot be removed. So overriding with a reserved IP address
clusterHelper.nodeClient().admin().cluster().updateSettings(
new ClusterUpdateSettingsRequest().transientSettings(Settings.builder().put("cluster.routing.allocation.exclude._ip", "192.0.2.0").build()));
this.clusterInfo = clusterHelper.waitForCluster(ClusterHealthStatus.GREEN, TimeValue.timeValueSeconds(10),3);
}
RestHelper rh = nonSslRestHelper();
Thread.sleep(10000);
Assert.assertEquals(HttpStatus.SC_OK, rh.executeGetRequest("", encodeBasicHeader("admin", "admin")).getStatusCode());
}

}

0 comments on commit ea101e2

Please sign in to comment.