Merge remote-tracking branch 'upstream/main' into decommission/get-ap…

…i-fix
opensearch-project · Oct 13, 2022 · 8caa328 · 8caa328
2 parents 5b62331 + 89550c0
commit 8caa328
Show file tree

Hide file tree

Showing 53 changed files with 1,697 additions and 79 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -29,6 +29,8 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 - Recommission API changes for service layer ([#4320](https://github.com/opensearch-project/OpenSearch/pull/4320))
 - Update GeoGrid base class access modifier to support extensibility ([#4572](https://github.com/opensearch-project/OpenSearch/pull/4572))
 - Add a new node role 'search' which is dedicated to provide search capability ([#4689](https://github.com/opensearch-project/OpenSearch/pull/4689))
+- Introduce experimental searchable snapshot API ([#4680](https://github.com/opensearch-project/OpenSearch/pull/4680))
+- Recommissioning of zone. REST layer support. ([#4624](https://github.com/opensearch-project/OpenSearch/pull/4604))
 ### Dependencies
 - Bumps `log4j-core` from 2.18.0 to 2.19.0
 - Bumps `reactor-netty-http` from 1.0.18 to 1.0.23
@@ -74,6 +76,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 - Update to Apache Lucene 9.4.0 ([#4661](https://github.com/opensearch-project/OpenSearch/pull/4661))
 - Controlling discovery for decommissioned nodes ([#4590](https://github.com/opensearch-project/OpenSearch/pull/4590))
 - Backport Apache Lucene version change for 2.4.0 ([#4677](https://github.com/opensearch-project/OpenSearch/pull/4677))
+- Fix weighted routing metadata deserialization error on process restart ([#4691](https://github.com/opensearch-project/OpenSearch/pull/4691))
 - Refactor Base Action class javadocs to OpenSearch.API ([#4732](https://github.com/opensearch-project/OpenSearch/pull/4732))
 - Migrate client transports to Apache HttpClient / Core 5.x ([#4459](https://github.com/opensearch-project/OpenSearch/pull/4459))
 ### Deprecated
@@ -138,12 +141,15 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
   - Addition of Doc values on the GeoShape Field
   - Addition of GeoShape ValueSource level code interfaces for accessing the DocValues.
   - Addition of Missing Value feature in the GeoShape Aggregations.
+- Install and configure Log4j JUL Adapter for Lucene 9.4 ([#4754](https://github.com/opensearch-project/OpenSearch/pull/4754))
 ### Changed
 ### Deprecated
 ### Removed
 ### Fixed
 - PR reference to checkout code for changelog verifier ([#4296](https://github.com/opensearch-project/OpenSearch/pull/4296))
 - Commit workflow for dependabot changelog helper ([#4331](https://github.com/opensearch-project/OpenSearch/pull/4331))
+- Better plural stemmer than minimal_english ([#4738](https://github.com/opensearch-project/OpenSearch/pull/4738))
+
 ### Security
 [Unreleased]: https://github.com/opensearch-project/OpenSearch/compare/2.2.0...HEAD
 [2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.2.0...2.x
diff --git a/client/rest-high-level/src/test/java/org/opensearch/client/RestHighLevelClientTests.java b/client/rest-high-level/src/test/java/org/opensearch/client/RestHighLevelClientTests.java
@@ -890,7 +890,8 @@ public void testApiNamingConventions() throws Exception {
             "cluster.put_weighted_routing",
             "cluster.get_weighted_routing",
             "cluster.put_decommission_awareness",
-            "cluster.get_decommission_awareness", };
+            "cluster.get_decommission_awareness",
+            "cluster.delete_decommission_awareness", };
         List<String> booleanReturnMethods = Arrays.asList("security.enable_user", "security.disable_user", "security.change_password");
         Set<String> deprecatedMethods = new HashSet<>();
         deprecatedMethods.add("indices.force_merge");

diff --git a/client/rest/build.gradle b/client/rest/build.gradle
@@ -56,6 +56,7 @@ dependencies {
   testImplementation "net.bytebuddy:byte-buddy:${versions.bytebuddy}"
   testImplementation "org.apache.logging.log4j:log4j-api:${versions.log4j}"
   testImplementation "org.apache.logging.log4j:log4j-core:${versions.log4j}"
+  testImplementation "org.apache.logging.log4j:log4j-jul:${versions.log4j}"
   testImplementation "org.apache.logging.log4j:log4j-slf4j-impl:${versions.log4j}"
 }
 

diff --git a/...analysis-common/src/main/java/org/opensearch/analysis/common/EnglishPluralStemFilter.java b/...analysis-common/src/main/java/org/opensearch/analysis/common/EnglishPluralStemFilter.java
@@ -0,0 +1,182 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Modifications Copyright OpenSearch Contributors. See
+ * GitHub history for details.
+ */
+
+package org.opensearch.analysis.common;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.en.EnglishMinimalStemFilter;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+import java.io.IOException;
+
+public final class EnglishPluralStemFilter extends TokenFilter {
+    private final EnglishPluralStemmer stemmer = new EnglishPluralStemmer();
+    private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+    private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
+
+    public EnglishPluralStemFilter(TokenStream input) {
+        super(input);
+    }
+
+    @Override
+    public boolean incrementToken() throws IOException {
+        if (input.incrementToken()) {
+            if (!keywordAttr.isKeyword()) {
+                final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+                termAtt.setLength(newlen);
+            }
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    /**
+     * Plural stemmer for English based on the {@link EnglishMinimalStemFilter}
+     * <p>
+     * This stemmer removes plurals but beyond EnglishMinimalStemFilter adds
+     * four new suffix rules to remove dangling e characters:
+     * <ul>
+     * <li>xes - "boxes" becomes "box"</li>
+     * <li>sses - "dresses" becomes "dress"</li>
+     * <li>shes - "dishes" becomes "dish"</li>
+     * <li>tches - "watches" becomes "watch"</li>
+     * </ul>
+     * See https://github.com/elastic/elasticsearch/issues/42892
+     * <p>
+     * In addition the s stemmer logic is amended so that
+     * <ul>
+     * <li>ees-&gt;ee so that bees matches bee</li>
+     * <li>ies-&gt;y only on longer words to that ties matches tie</li>
+     * <li>oes-&gt;o rule so that tomatoes matches tomato but retains e for some words eg shoes to shoe</li>
+     * </ul>
+     */
+    public static class EnglishPluralStemmer {
+
+        // Words ending in oes that retain the e when stemmed
+        public static final char[][] oesExceptions = { "shoes".toCharArray(), "canoes".toCharArray(), "oboes".toCharArray() };
+        // Words ending in ches that retain the e when stemmed
+        public static final char[][] chesExceptions = {
+            "cliches".toCharArray(),
+            "avalanches".toCharArray(),
+            "mustaches".toCharArray(),
+            "moustaches".toCharArray(),
+            "quiches".toCharArray(),
+            "headaches".toCharArray(),
+            "heartaches".toCharArray(),
+            "porsches".toCharArray(),
+            "tranches".toCharArray(),
+            "caches".toCharArray() };
+
+        @SuppressWarnings("fallthrough")
+        public int stem(char s[], int len) {
+            if (len < 3 || s[len - 1] != 's') return len;
+
+            switch (s[len - 2]) {
+                case 'u':
+                case 's':
+                    return len;
+                case 'e':
+                    // Modified ies->y logic from original s-stemmer - only work on strings > 4
+                    // so spies -> spy still but pies->pie.
+                    // The original code also special-cased aies and eies for no good reason as far as I can tell.
+                    // ( no words of consequence - eg http://www.thefreedictionary.com/words-that-end-in-aies )
+                    if (len > 4 && s[len - 3] == 'i') {
+                        s[len - 3] = 'y';
+                        return len - 2;
+                    }
+
+                    // Suffix rules to remove any dangling "e"
+                    if (len > 3) {
+                        // xes (but >1 prefix so we can stem "boxes->box" but keep "axes->axe")
+                        if (len > 4 && s[len - 3] == 'x') {
+                            return len - 2;
+                        }
+                        // oes
+                        if (len > 3 && s[len - 3] == 'o') {
+                            if (isException(s, len, oesExceptions)) {
+                                // Only remove the S
+                                return len - 1;
+                            }
+                            // Remove the es
+                            return len - 2;
+                        }
+                        if (len > 4) {
+                            // shes/sses
+                            if (s[len - 4] == 's' && (s[len - 3] == 'h' || s[len - 3] == 's')) {
+                                return len - 2;
+                            }
+
+                            // ches
+                            if (len > 4) {
+                                if (s[len - 4] == 'c' && s[len - 3] == 'h') {
+                                    if (isException(s, len, chesExceptions)) {
+                                        // Only remove the S
+                                        return len - 1;
+                                    }
+                                    // Remove the es
+                                    return len - 2;
+
+                                }
+                            }
+                        }
+                    }
+
+                default:
+                    return len - 1;
+            }
+        }
+
+        private boolean isException(char[] s, int len, char[][] exceptionsList) {
+            for (char[] oesRule : exceptionsList) {
+                int rulePos = oesRule.length - 1;
+                int sPos = len - 1;
+                boolean matched = true;
+                while (rulePos >= 0 && sPos >= 0) {
+                    if (oesRule[rulePos] != s[sPos]) {
+                        matched = false;
+                        break;
+                    }
+                    rulePos--;
+                    sPos--;
+                }
+                if (matched) {
+                    return true;
+                }
+            }
+            return false;
+        }
+    }
+
+}
diff --git a/...alysis-common/src/main/java/org/opensearch/analysis/common/StemmerTokenFilterFactory.java b/...alysis-common/src/main/java/org/opensearch/analysis/common/StemmerTokenFilterFactory.java
@@ -154,6 +154,8 @@ public TokenStream create(TokenStream tokenStream) {
                 return new SnowballFilter(tokenStream, new EnglishStemmer());
             } else if ("minimal_english".equalsIgnoreCase(language) || "minimalEnglish".equalsIgnoreCase(language)) {
                 return new EnglishMinimalStemFilter(tokenStream);
+            } else if ("plural_english".equalsIgnoreCase(language) || "pluralEnglish".equalsIgnoreCase(language)) {
+                return new EnglishPluralStemFilter(tokenStream);
             } else if ("possessive_english".equalsIgnoreCase(language) || "possessiveEnglish".equalsIgnoreCase(language)) {
                 return new EnglishPossessiveFilter(tokenStream);
 

diff --git a/...s-common/src/test/java/org/opensearch/analysis/common/StemmerTokenFilterFactoryTests.java b/...s-common/src/test/java/org/opensearch/analysis/common/StemmerTokenFilterFactoryTests.java
@@ -111,6 +111,83 @@ public void testPorter2FilterFactory() throws IOException {
         }
     }
 
+    public void testEnglishPluralFilter() throws IOException {
+        int iters = scaledRandomIntBetween(20, 100);
+        for (int i = 0; i < iters; i++) {
+
+            Version v = VersionUtils.randomVersion(random());
+            Settings settings = Settings.builder()
+                .put("index.analysis.filter.my_plurals.type", "stemmer")
+                .put("index.analysis.filter.my_plurals.language", "plural_english")
+                .put("index.analysis.analyzer.my_plurals.tokenizer", "whitespace")
+                .put("index.analysis.analyzer.my_plurals.filter", "my_plurals")
+                .put(SETTING_VERSION_CREATED, v)
+                .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
+                .build();
+
+            OpenSearchTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings, PLUGIN);
+            TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_plurals");
+            assertThat(tokenFilter, instanceOf(StemmerTokenFilterFactory.class));
+            Tokenizer tokenizer = new WhitespaceTokenizer();
+            tokenizer.setReader(new StringReader("dresses"));
+            TokenStream create = tokenFilter.create(tokenizer);
+            IndexAnalyzers indexAnalyzers = analysis.indexAnalyzers;
+            NamedAnalyzer analyzer = indexAnalyzers.get("my_plurals");
+            assertThat(create, instanceOf(EnglishPluralStemFilter.class));
+
+            // Check old EnglishMinimalStemmer ("S" stemmer) logic
+            assertAnalyzesTo(analyzer, "phones", new String[] { "phone" });
+            assertAnalyzesTo(analyzer, "horses", new String[] { "horse" });
+            assertAnalyzesTo(analyzer, "cameras", new String[] { "camera" });
+
+            // The orginal s stemmer gives up on stemming oes words because English has no fixed rule for the stem
+            // (see https://howtospell.co.uk/making-O-words-plural )
+            // This stemmer removes the es but retains e for a small number of exceptions
+            assertAnalyzesTo(analyzer, "mosquitoes", new String[] { "mosquito" });
+            assertAnalyzesTo(analyzer, "heroes", new String[] { "hero" });
+            // oes exceptions that retain the e.
+            assertAnalyzesTo(analyzer, "shoes", new String[] { "shoe" });
+            assertAnalyzesTo(analyzer, "horseshoes", new String[] { "horseshoe" });
+            assertAnalyzesTo(analyzer, "canoes", new String[] { "canoe" });
+            assertAnalyzesTo(analyzer, "oboes", new String[] { "oboe" });
+
+            // Check improved EnglishPluralStemFilter logic
+            // sses
+            assertAnalyzesTo(analyzer, "dresses", new String[] { "dress" });
+            assertAnalyzesTo(analyzer, "possess", new String[] { "possess" });
+            assertAnalyzesTo(analyzer, "possesses", new String[] { "possess" });
+            // xes
+            assertAnalyzesTo(analyzer, "boxes", new String[] { "box" });
+            assertAnalyzesTo(analyzer, "axes", new String[] { "axe" });
+            // shes
+            assertAnalyzesTo(analyzer, "dishes", new String[] { "dish" });
+            assertAnalyzesTo(analyzer, "washes", new String[] { "wash" });
+            // ees
+            assertAnalyzesTo(analyzer, "employees", new String[] { "employee" });
+            assertAnalyzesTo(analyzer, "bees", new String[] { "bee" });
+            // tch
+            assertAnalyzesTo(analyzer, "watches", new String[] { "watch" });
+            assertAnalyzesTo(analyzer, "itches", new String[] { "itch" });
+            // ies->y but only for length >4
+            assertAnalyzesTo(analyzer, "spies", new String[] { "spy" });
+            assertAnalyzesTo(analyzer, "ties", new String[] { "tie" });
+            assertAnalyzesTo(analyzer, "lies", new String[] { "lie" });
+            assertAnalyzesTo(analyzer, "pies", new String[] { "pie" });
+            assertAnalyzesTo(analyzer, "dies", new String[] { "die" });
+
+            assertAnalyzesTo(analyzer, "lunches", new String[] { "lunch" });
+            assertAnalyzesTo(analyzer, "avalanches", new String[] { "avalanche" });
+            assertAnalyzesTo(analyzer, "headaches", new String[] { "headache" });
+            assertAnalyzesTo(analyzer, "caches", new String[] { "cache" });
+            assertAnalyzesTo(analyzer, "beaches", new String[] { "beach" });
+            assertAnalyzesTo(analyzer, "britches", new String[] { "britch" });
+            assertAnalyzesTo(analyzer, "cockroaches", new String[] { "cockroach" });
+            assertAnalyzesTo(analyzer, "cliches", new String[] { "cliche" });
+            assertAnalyzesTo(analyzer, "quiches", new String[] { "quiche" });
+
+        }
+    }
+
     public void testMultipleLanguagesThrowsException() throws IOException {
         Version v = VersionUtils.randomVersion(random());
         Settings settings = Settings.builder()

diff --git a/qa/os/build.gradle b/qa/os/build.gradle
@@ -42,6 +42,7 @@ dependencies {
   api "org.apache.httpcomponents:fluent-hc:${versions.httpclient}"
   api "org.apache.logging.log4j:log4j-api:${versions.log4j}"
   api "org.apache.logging.log4j:log4j-core:${versions.log4j}"
+  api "org.apache.logging.log4j:log4j-jul:${versions.log4j}"
   api "org.apache.logging.log4j:log4j-jcl:${versions.log4j}"
   api "commons-codec:commons-codec:${versions.commonscodec}"
   api "commons-logging:commons-logging:${versions.commonslogging}"

diff --git a/...-api-spec/src/main/resources/rest-api-spec/api/cluster.delete_decommission_awareness.json b/...-api-spec/src/main/resources/rest-api-spec/api/cluster.delete_decommission_awareness.json
@@ -0,0 +1,19 @@
+{
+  "cluster.delete_decommission_awareness": {
+    "documentation": {
+      "url": "https://opensearch.org/docs/latest/opensearch/rest-api/decommission/",
+      "description": "Delete any existing decommission."
+    },
+    "stability": "experimental",
+    "url": {
+      "paths": [
+        {
+          "path": "/_cluster/decommission/awareness/",
+          "methods": [
+            "DELETE"
+          ]
+        }
+      ]
+    }
+  }
+}
diff --git a/server/build.gradle b/server/build.gradle
@@ -129,6 +129,7 @@ dependencies {
 
   // logging
   api "org.apache.logging.log4j:log4j-api:${versions.log4j}"
+  api "org.apache.logging.log4j:log4j-jul:${versions.log4j}"
   api "org.apache.logging.log4j:log4j-core:${versions.log4j}", optional
 
   // jna

diff --git a/server/licenses/log4j-jul-2.17.1.jar.sha1 b/server/licenses/log4j-jul-2.17.1.jar.sha1
@@ -0,0 +1 @@
+881333b463d47828eda7443b19811763367b1916