Default to one shard (#30539)

This commit changes the default out-of-the-box configuration for the number of shards from five to one. We think this will help address a common problem of oversharding. For users with time-based indices that need a different default, this can be managed with index templates. For users with non-time-based indices that find they need to re-shard with the split API in place they no longer need to resort only to reindexing. Since this has the impact of changing the default number of shards used in REST tests, we want to ensure that we still have coverage for issues that could arise from multiple shards. As such, we randomize (rarely) the default number of shards in REST tests to two. This is managed via a global index template. However, some tests check the templates that are in the cluster state during the test. Since this template is randomly there, we need a way for tests to skip adding the template used to set the number of shards to two. For this we add the default_shards feature skip. To avoid having to write our docs in a complicated way because sometimes they might be behind one shard, and sometimes they might be behind two shards we apply the default_shards feature skip to all docs tests. That is, these tests will always run with the default number of shards (one).
elastic · May 14, 2018 · 4a4e3d7 · 4a4e3d7 · asgs · Oct 11, 2018
1 parent af10fd6
commit 4a4e3d7
Show file tree

Hide file tree

Showing 43 changed files with 232 additions and 157 deletions.
diff --git a/buildSrc/src/main/groovy/org/elasticsearch/gradle/doc/RestTestsFromSnippetsTask.groovy b/buildSrc/src/main/groovy/org/elasticsearch/gradle/doc/RestTestsFromSnippetsTask.groovy
@@ -225,6 +225,7 @@ public class RestTestsFromSnippetsTask extends SnippetsTask {
                  * warning every time. */
                 current.println("  - skip:")
                 current.println("      features: ")
+                current.println("        - default_shards")
                 current.println("        - stash_in_key")
                 current.println("        - stash_in_path")
                 current.println("        - stash_path_replace")

diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/SearchIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/SearchIT.java
@@ -312,14 +312,14 @@ public void testSearchWithMatrixStats() throws IOException {
         MatrixStats matrixStats = searchResponse.getAggregations().get("agg1");
         assertEquals(5, matrixStats.getFieldCount("num"));
         assertEquals(56d, matrixStats.getMean("num"), 0d);
-        assertEquals(1830d, matrixStats.getVariance("num"), 0d);
-        assertEquals(0.09340198804973046, matrixStats.getSkewness("num"), 0d);
+        assertEquals(1830.0000000000002, matrixStats.getVariance("num"), 0d);
+        assertEquals(0.09340198804973039, matrixStats.getSkewness("num"), 0d);
         assertEquals(1.2741646510794589, matrixStats.getKurtosis("num"), 0d);
         assertEquals(5, matrixStats.getFieldCount("num2"));
         assertEquals(29d, matrixStats.getMean("num2"), 0d);
         assertEquals(330d, matrixStats.getVariance("num2"), 0d);
         assertEquals(-0.13568039346585542, matrixStats.getSkewness("num2"), 1.0e-16);
-        assertEquals(1.3517561983471074, matrixStats.getKurtosis("num2"), 0d);
+        assertEquals(1.3517561983471071, matrixStats.getKurtosis("num2"), 0d);
         assertEquals(-767.5, matrixStats.getCovariance("num", "num2"), 0d);
         assertEquals(-0.9876336291667923, matrixStats.getCorrelation("num", "num2"), 0d);
     }

diff --git a/...igh-level/src/test/java/org/elasticsearch/client/documentation/SearchDocumentationIT.java b/...igh-level/src/test/java/org/elasticsearch/client/documentation/SearchDocumentationIT.java
@@ -800,7 +800,7 @@ public void testRankEval() throws Exception {
             double qualityLevel = evalQuality.getQualityLevel();        // <3>
             assertEquals(1.0 / 3.0, qualityLevel, 0.0);
             List<RatedSearchHit> hitsAndRatings = evalQuality.getHitsAndRatings();
-            RatedSearchHit ratedSearchHit = hitsAndRatings.get(0);
+            RatedSearchHit ratedSearchHit = hitsAndRatings.get(2);
             assertEquals("3", ratedSearchHit.getSearchHit().getId());   // <4>
             assertFalse(ratedSearchHit.getRating().isPresent());        // <5>
             MetricDetail metricDetails = evalQuality.getMetricDetails();

diff --git a/...ves/integ-test-zip/src/test/java/org/elasticsearch/test/rest/CreatedLocationHeaderIT.java b/...ves/integ-test-zip/src/test/java/org/elasticsearch/test/rest/CreatedLocationHeaderIT.java
@@ -21,18 +21,22 @@
 
 import org.apache.http.entity.ContentType;
 import org.apache.http.entity.StringEntity;
+import org.elasticsearch.client.Request;
 import org.elasticsearch.client.Response;
+import org.junit.Before;
 
 import java.io.IOException;
 
 import static java.util.Collections.emptyMap;
 import static java.util.Collections.singletonMap;
+import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.startsWith;
 
 /**
  * Tests for the "Location" header returned when returning {@code 201 CREATED}.
  */
 public class CreatedLocationHeaderIT extends ESRestTestCase {
+
     public void testCreate() throws IOException {
         locationTestCase("PUT", "test/test/1");
     }
@@ -54,8 +58,11 @@ public void testUpsert() throws IOException {
     private void locationTestCase(String method, String url) throws IOException {
         locationTestCase(client().performRequest(method, url, emptyMap(),
             new StringEntity("{\"test\": \"test\"}", ContentType.APPLICATION_JSON)));
+        // we have to delete the index otherwise the second indexing request will route to the single shard and not produce a 201
+        final Response response = client().performRequest(new Request("DELETE", "test"));
+        assertThat(response.getStatusLine().getStatusCode(), equalTo(200));
         locationTestCase(client().performRequest(method, url + "?routing=cat", emptyMap(),
-            new StringEntity("{\"test\": \"test\"}", ContentType.APPLICATION_JSON)));
+                new StringEntity("{\"test\": \"test\"}", ContentType.APPLICATION_JSON)));
     }
 
     private void locationTestCase(Response response) throws IOException {
@@ -65,4 +72,5 @@ private void locationTestCase(Response response) throws IOException {
         Response getResponse = client().performRequest("GET", location);
         assertEquals(singletonMap("test", "test"), entityAsMap(getResponse).get("_source"));
     }
+
 }
diff --git a/docs/reference/aggregations/bucket/children-aggregation.asciidoc b/docs/reference/aggregations/bucket/children-aggregation.asciidoc
@@ -137,8 +137,8 @@ Possible response:
   "took": 25,
   "timed_out": false,
   "_shards": {
-    "total": 5,
-    "successful": 5,
+    "total": 1,
+    "successful": 1,
     "skipped" : 0,
     "failed": 0
   },

diff --git a/docs/reference/aggregations/metrics/geocentroid-aggregation.asciidoc b/docs/reference/aggregations/metrics/geocentroid-aggregation.asciidoc
@@ -60,8 +60,8 @@ The response for the above aggregation:
     "aggregations": {
         "centroid": {
             "location": {
-                "lat": 51.00982963806018,
-                "lon": 3.9662131061777472
+                "lat": 51.009829603135586,
+                "lon": 3.9662130642682314
             },
             "count": 6
         }
@@ -113,8 +113,8 @@ The response for the above aggregation:
                    "doc_count": 3,
                    "centroid": {
                       "location": {
-                         "lat": 52.371655656024814,
-                         "lon": 4.909563297405839
+                         "lat": 52.371655642054975,
+                         "lon": 4.9095632415264845
                       },
                       "count": 3
                    }
@@ -125,7 +125,7 @@ The response for the above aggregation:
                    "centroid": {
                       "location": {
                          "lat": 48.86055548675358,
-                         "lon": 2.3316944623366
+                         "lon": 2.331694420427084
                       },
                       "count": 2
                    }

diff --git a/docs/reference/analysis/charfilters/pattern-replace-charfilter.asciidoc b/docs/reference/analysis/charfilters/pattern-replace-charfilter.asciidoc
@@ -235,8 +235,8 @@ The output from the above is:
   "timed_out": false,
   "took": $body.took,
   "_shards": {
-    "total": 5,
-    "successful": 5,
+    "total": 1,
+    "successful": 1,
     "skipped" : 0,
     "failed": 0
   },

diff --git a/docs/reference/analysis/tokenizers/edgengram-tokenizer.asciidoc b/docs/reference/analysis/tokenizers/edgengram-tokenizer.asciidoc
@@ -294,8 +294,8 @@ GET my_index/_search
   "took": $body.took,
   "timed_out": false,
   "_shards": {
-    "total": 5,
-    "successful": 5,
+    "total": 1,
+    "successful": 1,
     "skipped" : 0,
     "failed": 0
   },

diff --git a/docs/reference/api-conventions.asciidoc b/docs/reference/api-conventions.asciidoc
@@ -300,11 +300,7 @@ Responds:
     "indices": {
       "twitter": {
         "shards": {
-          "0": [{"state": "STARTED"}, {"state": "UNASSIGNED"}],
-          "1": [{"state": "STARTED"}, {"state": "UNASSIGNED"}],
-          "2": [{"state": "STARTED"}, {"state": "UNASSIGNED"}],
-          "3": [{"state": "STARTED"}, {"state": "UNASSIGNED"}],
-          "4": [{"state": "STARTED"}, {"state": "UNASSIGNED"}]
+          "0": [{"state": "STARTED"}, {"state": "UNASSIGNED"}]
         }
       }
     }

diff --git a/docs/reference/cat/allocation.asciidoc b/docs/reference/cat/allocation.asciidoc
@@ -16,7 +16,7 @@ Might respond with:
 [source,txt]
 --------------------------------------------------
 shards disk.indices disk.used disk.avail disk.total disk.percent host      ip        node
-     5         260b    47.3gb     43.4gb    100.7gb           46 127.0.0.1 127.0.0.1 CSUXak2
+     1         260b    47.3gb     43.4gb    100.7gb           46 127.0.0.1 127.0.0.1 CSUXak2
 --------------------------------------------------
 // TESTRESPONSE[s/\d+(\.\d+)?[tgmk]?b/\\d+(\\.\\d+)?[tgmk]?b/ s/46/\\d+/]
 // TESTRESPONSE[s/CSUXak2/.+/ _cat]

diff --git a/docs/reference/cat/health.asciidoc b/docs/reference/cat/health.asciidoc
@@ -14,7 +14,7 @@ GET /_cat/health?v
 [source,txt]
 --------------------------------------------------
 epoch      timestamp cluster       status node.total node.data shards pri relo init unassign pending_tasks max_task_wait_time active_shards_percent
-1475871424 16:17:04  elasticsearch green           1         1      5   5    0    0        0             0                  -                100.0%
+1475871424 16:17:04  elasticsearch green           1         1      1   1    0    0        0             0                  -                100.0%
 --------------------------------------------------
 // TESTRESPONSE[s/1475871424 16:17:04/\\d+ \\d+:\\d+:\\d+/]
 // TESTRESPONSE[s/elasticsearch/[^ ]+/ s/0                  -/\\d+ (-|\\d+(\\.\\d+)?[ms]+)/ _cat]
@@ -33,7 +33,7 @@ which looks like:
 [source,txt]
 --------------------------------------------------
 cluster       status node.total node.data shards pri relo init unassign pending_tasks max_task_wait_time active_shards_percent
-elasticsearch green           1         1      5   5    0    0        0             0                  -                100.0%
+elasticsearch green           1         1      1   1    0    0        0             0                  -                100.0%
 --------------------------------------------------
 // TESTRESPONSE[s/elasticsearch/[^ ]+/ s/0                  -/\\d+ (-|\\d+(\\.\\d+)?[ms]+)/ _cat]
 

diff --git a/docs/reference/cat/indices.asciidoc b/docs/reference/cat/indices.asciidoc
@@ -18,7 +18,7 @@ Might respond with:
 --------------------------------------------------
 health status index    uuid                   pri rep docs.count docs.deleted store.size pri.store.size
 yellow open   twitter  u8FNjxh8Rfy_awN11oDKYQ   1   1       1200            0     88.1kb         88.1kb
-green  open   twitter2 nYFWZEO7TUiOjLQXBaYJpA   5   0          0            0       260b           260b
+green  open   twitter2 nYFWZEO7TUiOjLQXBaYJpA   1   0          0            0       260b           260b
 --------------------------------------------------
 // TESTRESPONSE[s/\d+(\.\d+)?[tgmk]?b/\\d+(\\.\\d+)?[tgmk]?b/]
 // TESTRESPONSE[s/u8FNjxh8Rfy_awN11oDKYQ|nYFWZEO7TUiOjLQXBaYJpA/.+/ _cat]
@@ -81,7 +81,7 @@ Which looks like:
 --------------------------------------------------
 health status index    uuid                   pri rep docs.count docs.deleted store.size pri.store.size
 yellow open   twitter  u8FNjxh8Rfy_awN11oDKYQ   1   1       1200            0     88.1kb         88.1kb
-green  open   twitter2 nYFWZEO7TUiOjLQXBaYJpA   5   0          0            0       260b           260b
+green  open   twitter2 nYFWZEO7TUiOjLQXBaYJpA   1   0          0            0       260b           260b
 --------------------------------------------------
 // TESTRESPONSE[s/\d+(\.\d+)?[tgmk]?b/\\d+(\\.\\d+)?[tgmk]?b/]
 // TESTRESPONSE[s/u8FNjxh8Rfy_awN11oDKYQ|nYFWZEO7TUiOjLQXBaYJpA/.+/ _cat]

diff --git a/docs/reference/cat/segments.asciidoc b/docs/reference/cat/segments.asciidoc
@@ -17,8 +17,8 @@ might look like:
 ["source","txt",subs="attributes,callouts"]
 --------------------------------------------------
 index shard prirep ip        segment generation docs.count docs.deleted size size.memory committed searchable version compound
-test  4     p      127.0.0.1 _0               0          1            0  3kb        2042 false     true       {lucene_version}   true
-test1 4     p      127.0.0.1 _0               0          1            0  3kb        2042 false     true       {lucene_version}   true
+test  0     p      127.0.0.1 _0               0          1            0  3kb        2042 false     true       {lucene_version}   true
+test1 0     p      127.0.0.1 _0               0          1            0  3kb        2042 false     true       {lucene_version}   true
 --------------------------------------------------
 // TESTRESPONSE[s/3kb/\\d+(\\.\\d+)?[mk]?b/ s/2042/\\d+/ _cat]
 

diff --git a/docs/reference/cluster/health.asciidoc b/docs/reference/cluster/health.asciidoc
@@ -3,7 +3,7 @@
 
 The cluster health API allows to get a very simple status on the health
 of the cluster. For example, on a quiet single node cluster with a single index
-with 5 shards and one replica, this:
+with one shard and one replica, this:
 
 [source,js]
 --------------------------------------------------
@@ -22,11 +22,11 @@ Returns this:
   "timed_out" : false,
   "number_of_nodes" : 1,
   "number_of_data_nodes" : 1,
-  "active_primary_shards" : 5,
-  "active_shards" : 5,
+  "active_primary_shards" : 1,
+  "active_shards" : 1,
   "relocating_shards" : 0,
   "initializing_shards" : 0,
-  "unassigned_shards" : 5,
+  "unassigned_shards" : 1,
   "delayed_unassigned_shards": 0,
   "number_of_pending_tasks" : 0,
   "number_of_in_flight_fetch": 0,

diff --git a/docs/reference/getting-started.asciidoc b/docs/reference/getting-started.asciidoc
@@ -95,7 +95,7 @@ Replication is important for two primary reasons:
 To summarize, each index can be split into multiple shards. An index can also be replicated zero (meaning no replicas) or more times. Once replicated, each index will have primary shards (the original shards that were replicated from) and replica shards (the copies of the primary shards).
 The number of shards and replicas can be defined per index at the time the index is created. After the index is created, you may change the number of replicas dynamically anytime but you cannot change the number of shards after-the-fact.
 
-By default, each index in Elasticsearch is allocated 5 primary shards and 1 replica which means that if you have at least two nodes in your cluster, your index will have 5 primary shards and another 5 replica shards (1 complete replica) for a total of 10 shards per index.
+By default, each index in Elasticsearch is allocated one primary shard and one replica which means that if you have at least two nodes in your cluster, your index will have one primary shard and another replica shard (one complete replica) for a total of two shards per index.
 
 NOTE: Each Elasticsearch shard is a Lucene index.  There is a maximum number of documents you can have in a single Lucene index.  As of https://issues.apache.org/jira/browse/LUCENE-5843[`LUCENE-5843`], the limit is `2,147,483,519` (= Integer.MAX_VALUE - 128) documents.
 You can monitor shard sizes using the {ref}/cat-shards.html[`_cat/shards`] API.
@@ -366,11 +366,11 @@ And the response:
 [source,txt]
 --------------------------------------------------
 health status index    uuid                   pri rep docs.count docs.deleted store.size pri.store.size
-yellow open   customer 95SQ4TSUT7mWBT7VNHH67A   5   1          0            0       260b           260b
+yellow open   customer 95SQ4TSUT7mWBT7VNHH67A   1   1          0            0       260b           260b
 --------------------------------------------------
 // TESTRESPONSE[s/95SQ4TSUT7mWBT7VNHH67A/.+/ s/260b/\\d+\\.?\\d?k?b/ _cat]
 
-The results of the second command tells us that we now have 1 index named customer and it has 5 primary shards and 1 replica (the defaults) and it contains 0 documents in it.
+The results of the second command tells us that we now have one index named customer and it has one primary shard and one replica (the defaults) and it contains zero documents in it.
 
 You might also notice that the customer index has a yellow health tagged to it. Recall from our previous discussion that yellow means that some replicas are not (yet) allocated. The reason this happens for this index is because Elasticsearch by default created one replica for this index. Since we only have one node running at the moment, that one replica cannot yet be allocated (for high availability) until a later point in time when another node joins the cluster. Once that replica gets allocated onto a second node, the health status for this index will turn to green.
 

diff --git a/docs/reference/glossary.asciidoc b/docs/reference/glossary.asciidoc
@@ -105,12 +105,13 @@
   you index a document, it is indexed first on the primary shard, then
   on all <<glossary-replica-shard,replicas>> of the primary shard.
   +
-  By default, an <<glossary-index,index>> has 5 primary shards. You can
-  specify fewer or more primary shards to scale the number of
-  <<glossary-document,documents>> that your index can handle.
+  By default, an <<glossary-index,index>> has one primary shard. You can specify
+  more primary shards to scale the number of <<glossary-document,documents>>
+  that your index can handle.
   +
-  You cannot change the number of primary shards in an index, once the
-  index is created.
+  You cannot change the number of primary shards in an index, once the index is
+  index is created. However, an index can be split into a new index using the
+  <<indices-split-index, split API>>.
   +
   See also <<glossary-routing,routing>>
 

diff --git a/docs/reference/how-to/recipes/stemming.asciidoc b/docs/reference/how-to/recipes/stemming.asciidoc
@@ -78,31 +78,31 @@ GET index/_search
   "took": 2,
   "timed_out": false,
   "_shards": {
-    "total": 5,
-    "successful": 5,
+    "total": 1,
+    "successful": 1,
     "skipped" : 0,
     "failed": 0
   },
   "hits": {
     "total": 2,
-    "max_score": 0.2876821,
+    "max_score": 0.18232156,
     "hits": [
       {
         "_index": "index",
         "_type": "_doc",
-        "_id": "2",
-        "_score": 0.2876821,
+        "_id": "1",
+        "_score": 0.18232156,
         "_source": {
-          "body": "A pair of skis"
+          "body": "Ski resort"
         }
       },
       {
         "_index": "index",
         "_type": "_doc",
-        "_id": "1",
-        "_score": 0.2876821,
+        "_id": "2",
+        "_score": 0.18232156,
         "_source": {
-          "body": "Ski resort"
+          "body": "A pair of skis"
         }
       }
     ]
@@ -136,20 +136,20 @@ GET index/_search
   "took": 1,
   "timed_out": false,
   "_shards": {
-    "total": 5,
-    "successful": 5,
+    "total": 1,
+    "successful": 1,
     "skipped" : 0,
     "failed": 0
   },
   "hits": {
     "total": 1,
-    "max_score": 0.2876821,
+    "max_score": 0.80259144,
     "hits": [
       {
         "_index": "index",
         "_type": "_doc",
         "_id": "1",
-        "_score": 0.2876821,
+        "_score": 0.80259144,
         "_source": {
           "body": "Ski resort"
         }
@@ -193,20 +193,20 @@ GET index/_search
   "took": 2,
   "timed_out": false,
   "_shards": {
-    "total": 5,
-    "successful": 5,
+    "total": 1,
+    "successful": 1,
     "skipped" : 0,
     "failed": 0
   },
   "hits": {
     "total": 1,
-    "max_score": 0.2876821,
+    "max_score": 0.80259144,
     "hits": [
       {
         "_index": "index",
         "_type": "_doc",
         "_id": "1",
-        "_score": 0.2876821,
+        "_score": 0.80259144,
         "_source": {
           "body": "Ski resort"
         }

diff --git a/docs/reference/indices/flush.asciidoc b/docs/reference/indices/flush.asciidoc
@@ -106,11 +106,7 @@ which returns something similar to:
                    "num_docs" : 0
                  }
                }
-            ],
-            "1": ...,
-            "2": ...,
-            "3": ...,
-            "4": ...
+            ]
          }
       }
    }
@@ -120,10 +116,6 @@ which returns something similar to:
 // TESTRESPONSE[s/"translog_uuid" : "hnOG3xFcTDeoI_kvvvOdNA"/"translog_uuid": $body.indices.twitter.shards.0.0.commit.user_data.translog_uuid/]
 // TESTRESPONSE[s/"history_uuid" : "XP7KDJGiS1a2fHYiFL5TXQ"/"history_uuid": $body.indices.twitter.shards.0.0.commit.user_data.history_uuid/]
 // TESTRESPONSE[s/"sync_id" : "AVvFY-071siAOuFGEO9P"/"sync_id": $body.indices.twitter.shards.0.0.commit.user_data.sync_id/]
-// TESTRESPONSE[s/"1": \.\.\./"1": $body.indices.twitter.shards.1/]
-// TESTRESPONSE[s/"2": \.\.\./"2": $body.indices.twitter.shards.2/]
-// TESTRESPONSE[s/"3": \.\.\./"3": $body.indices.twitter.shards.3/]
-// TESTRESPONSE[s/"4": \.\.\./"4": $body.indices.twitter.shards.4/]
 <1> the `sync id` marker
 
 [float]