From ea101e287afb85189837a6b6526f10cb9b99e4c6 Mon Sep 17 00:00:00 2001 From: dhiAmzn <81139246+dhiAmzn@users.noreply.github.com> Date: Fri, 21 May 2021 11:28:47 +0530 Subject: [PATCH] Delay the security index initial bootstrap when the index is red (#1153) --- .../ConfigurationRepository.java | 57 ++++++++++--------- .../security/SlowIntegrationTests.java | 27 +++++++++ 2 files changed, 57 insertions(+), 27 deletions(-) diff --git a/src/main/java/org/opensearch/security/configuration/ConfigurationRepository.java b/src/main/java/org/opensearch/security/configuration/ConfigurationRepository.java index 98459597de..a77ad02f0b 100644 --- a/src/main/java/org/opensearch/security/configuration/ConfigurationRepository.java +++ b/src/main/java/org/opensearch/security/configuration/ConfigurationRepository.java @@ -132,6 +132,8 @@ public void run() { threadContext.putHeader(ConfigConstants.OPENDISTRO_SECURITY_CONF_REQUEST_HEADER, "true"); final boolean isSecurityIndexCreated = createSecurityIndexIfAbsent(); + waitForSecurityIndexToBeAtLeastYellow(); + if (isSecurityIndexCreated) { ConfigHelper.uploadFile(client, cd+"config.yml", securityIndex, CType.CONFIG, DEFAULT_CONFIG_VERSION); ConfigHelper.uploadFile(client, cd+"roles.yml", securityIndex, CType.ROLES, DEFAULT_CONFIG_VERSION); @@ -157,36 +159,10 @@ public void run() { LOGGER.error("{} does not exist", confFile.getAbsolutePath()); } } catch (Exception e) { - LOGGER.debug("Cannot apply default config (this is maybe not an error!) due to {}", e.getMessage()); + LOGGER.error("Cannot apply default config (this is maybe not an error!)", e); } } - LOGGER.debug("Node started, try to initialize it. Wait for at least yellow cluster state...."); - ClusterHealthResponse response = null; - try { - response = client.admin().cluster().health(new ClusterHealthRequest(securityIndex) - .waitForActiveShards(1) - .waitForYellowStatus()).actionGet(); - } catch (Exception e1) { - LOGGER.debug("Catched a {} but we just try again ...", e1.toString()); - } - - while(response == null || response.isTimedOut() || response.getStatus() == ClusterHealthStatus.RED) { - LOGGER.debug("index '{}' not healthy yet, we try again ... (Reason: {})", securityIndex, response==null?"no response":(response.isTimedOut()?"timeout":"other, maybe red cluster")); - try { - Thread.sleep(500); - } catch (InterruptedException e1) { - //ignore - Thread.currentThread().interrupt(); - } - try { - response = client.admin().cluster().health(new ClusterHealthRequest(securityIndex).waitForYellowStatus()).actionGet(); - } catch (Exception e1) { - LOGGER.debug("Catched again a {} but we just try again ...", e1.toString()); - } - continue; - } - while(!dynamicConfigFactory.isInitialized()) { try { LOGGER.debug("Try to load config ..."); @@ -250,6 +226,33 @@ private boolean createSecurityIndexIfAbsent() { } } + private void waitForSecurityIndexToBeAtLeastYellow() { + LOGGER.info("Node started, try to initialize it. Wait for at least yellow cluster state...."); + ClusterHealthResponse response = null; + try { + response = client.admin().cluster().health(new ClusterHealthRequest(securityIndex) + .waitForActiveShards(1) + .waitForYellowStatus()).actionGet(); + } catch (Exception e) { + LOGGER.debug("Caught a {} but we just try again ...", e.toString()); + } + + while(response == null || response.isTimedOut() || response.getStatus() == ClusterHealthStatus.RED) { + LOGGER.debug("index '{}' not healthy yet, we try again ... (Reason: {})", securityIndex, response==null?"no response":(response.isTimedOut()?"timeout":"other, maybe red cluster")); + try { + Thread.sleep(500); + } catch (InterruptedException e) { + //ignore + Thread.currentThread().interrupt(); + } + try { + response = client.admin().cluster().health(new ClusterHealthRequest(securityIndex).waitForYellowStatus()).actionGet(); + } catch (Exception e) { + LOGGER.debug("Caught again a {} but we just try again ...", e.toString()); + } + } + } + public void initOnNodeStart() { try { if (settings.getAsBoolean(ConfigConstants.OPENDISTRO_SECURITY_ALLOW_DEFAULT_INIT_SECURITYINDEX, false)) { diff --git a/src/test/java/org/opensearch/security/SlowIntegrationTests.java b/src/test/java/org/opensearch/security/SlowIntegrationTests.java index 7f48c3df7c..5b49f087f0 100644 --- a/src/test/java/org/opensearch/security/SlowIntegrationTests.java +++ b/src/test/java/org/opensearch/security/SlowIntegrationTests.java @@ -30,20 +30,25 @@ package org.opensearch.security; +import org.apache.http.HttpStatus; import org.opensearch.action.admin.cluster.health.ClusterHealthRequest; import org.opensearch.action.admin.cluster.node.info.NodesInfoRequest; +import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest; import org.opensearch.cluster.health.ClusterHealthStatus; import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; import org.opensearch.node.Node; import org.opensearch.node.PluginAwareNode; import org.opensearch.security.ssl.util.SSLConfigConstants; import org.opensearch.security.support.ConfigConstants; +import org.opensearch.security.test.helper.rest.RestHelper; import org.opensearch.security.test.SingleClusterTest; import org.opensearch.security.test.helper.cluster.ClusterConfiguration; import org.opensearch.security.test.helper.file.FileHelper; import org.opensearch.transport.Netty4Plugin; import org.junit.Assert; import org.junit.Test; +import java.io.IOException; public class SlowIntegrationTests extends SingleClusterTest { @@ -157,4 +162,26 @@ public void testNodeClientDisallowedWithNonServerCertificate2() throws Exception } } + @Test + public void testDelayInSecurityIndexInitialization() throws Exception { + final Settings settings = Settings.builder() + .put(ConfigConstants.OPENDISTRO_SECURITY_ALLOW_DEFAULT_INIT_SECURITYINDEX, true) + .put("cluster.routing.allocation.exclude._ip", "127.0.0.1") + .build(); + try { + setup(Settings.EMPTY, null, settings, false); + Assert.fail("Expected IOException here due to red cluster state"); + } catch (IOException e) { + // Index request has a default timeout of 1 minute, adding buffer between nodes initialization and cluster health check + Thread.sleep(1000*80); + // Ideally, we would want to remove this cluster setting, but default settings cannot be removed. So overriding with a reserved IP address + clusterHelper.nodeClient().admin().cluster().updateSettings( + new ClusterUpdateSettingsRequest().transientSettings(Settings.builder().put("cluster.routing.allocation.exclude._ip", "192.0.2.0").build())); + this.clusterInfo = clusterHelper.waitForCluster(ClusterHealthStatus.GREEN, TimeValue.timeValueSeconds(10),3); + } + RestHelper rh = nonSslRestHelper(); + Thread.sleep(10000); + Assert.assertEquals(HttpStatus.SC_OK, rh.executeGetRequest("", encodeBasicHeader("admin", "admin")).getStatusCode()); + } + }