elastic · romseygeek · Sep 5, 2018 · Jun 29, 2018 · Jul 2, 2018 · Jul 4, 2018
diff --git a/docs/reference/analysis/tokenfilters.asciidoc b/docs/reference/analysis/tokenfilters.asciidoc
@@ -37,6 +37,8 @@ include::tokenfilters/word-delimiter-graph-tokenfilter.asciidoc[]
 
 include::tokenfilters/multiplexer-tokenfilter.asciidoc[]
 
+include::tokenfilters/condition-tokenfilter.asciidoc[]
+
 include::tokenfilters/stemmer-tokenfilter.asciidoc[]
 
 include::tokenfilters/stemmer-override-tokenfilter.asciidoc[]

diff --git a/docs/reference/analysis/tokenfilters/condition-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/condition-tokenfilter.asciidoc
@@ -0,0 +1,90 @@
+[[analysis-condition-tokenfilter]]
+=== Conditional Token Filter
+
+The conditional token filter takes a predicate script and a list of subfilters, and
+only applies the subfilters to the current token if it matches the predicate.
+
+[float]
+=== Options
+[horizontal]
+filters:: a list of token filters to apply to the current token if the predicate
+  matches. These can be any token filters defined elsewhere in the index mappings.
+
+script:: a predicate script that determines whether or not the filters will be applied
+  to the current token.  Note that only inline scripts are supported
+
+[float]
+=== Settings example
+
+You can set it up like:
+
+[source,js]
+--------------------------------------------------
+PUT /multiplexer_example
+{
+    "settings" : {
+        "analysis" : {
+            "analyzer" : {
+                "my_analyzer" : {
+                    "tokenizer" : "standard",
+                    "filter" : [ "my_condition" ]
+                }
+            },
+            "filter" : {
+                "my_multiplexer" : {
+                    "type" : "condition",
+                    "filters" : [ "lowercase" ],
+                    "script" : {
+                        "source" : "return term.term.length() < 5"  <1>
+                    }
+                }
+            }
+        }
+    }
+}
+--------------------------------------------------
+// CONSOLE
+
+<1> This will only apply the lowercase filter to terms that are less than 5
+characters in length
+
+And test it like:
+
+[source,js]
+--------------------------------------------------
+POST /multiplexer_example/_analyze
+{
+  "analyzer" : "my_analyzer",
+  "text" : "What Flapdoodle"
+}
+--------------------------------------------------
+// CONSOLE
+// TEST[continued]
+
+And it'd respond:
+
+[source,js]
+--------------------------------------------------
+{
+  "tokens": [
+    {
+      "token": "what",              <1>
+      "start_offset": 0,
+      "end_offset": 4,
+      "type": "<ALPHANUM>",
+      "position": 0
+    },
+    {
+      "token": "Flapdoodle",        <2>
+      "start_offset": 5,
+      "end_offset": 15,
+      "type": "<ALPHANUM>",
+      "position": 0
+    }
+  ]
+}
+--------------------------------------------------
+// TESTRESPONSE
+<1> The term `What` has been lowercased, because it is only 4 characters long
+<2> The term `Flapdoodle` has been left in its original case, because it doesn't pass
+    the predicate
diff --git a/...analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java b/...analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java
@@ -111,9 +111,16 @@
 import org.apache.lucene.analysis.tr.ApostropheFilter;
 import org.apache.lucene.analysis.tr.TurkishAnalyzer;
 import org.apache.lucene.analysis.util.ElisionFilter;
+import org.apache.lucene.util.SetOnce;
+import org.elasticsearch.client.Client;
+import org.elasticsearch.cluster.service.ClusterService;
+import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
 import org.elasticsearch.common.logging.DeprecationLogger;
 import org.elasticsearch.common.logging.Loggers;
 import org.elasticsearch.common.regex.Regex;
+import org.elasticsearch.common.xcontent.NamedXContentRegistry;
+import org.elasticsearch.env.Environment;
+import org.elasticsearch.env.NodeEnvironment;
 import org.elasticsearch.index.analysis.AnalyzerProvider;
 import org.elasticsearch.index.analysis.CharFilterFactory;
 import org.elasticsearch.index.analysis.PreBuiltAnalyzerProviderFactory;
@@ -127,10 +134,15 @@
 import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;
 import org.elasticsearch.plugins.AnalysisPlugin;
 import org.elasticsearch.plugins.Plugin;
+import org.elasticsearch.script.ScriptService;
+import org.elasticsearch.threadpool.ThreadPool;
+import org.elasticsearch.watcher.ResourceWatcherService;
 import org.tartarus.snowball.ext.DutchStemmer;
 import org.tartarus.snowball.ext.FrenchStemmer;
 
 import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 import java.util.TreeMap;
@@ -141,6 +153,14 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
 
     private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(CommonAnalysisPlugin.class));
 
+    private final SetOnce<ScriptService> scriptService = new SetOnce<>();
+
+    @Override
+    public Collection<Object> createComponents(Client client, ClusterService clusterService, ThreadPool threadPool, ResourceWatcherService resourceWatcherService, ScriptService scriptService, NamedXContentRegistry xContentRegistry, Environment environment, NodeEnvironment nodeEnvironment, NamedWriteableRegistry namedWriteableRegistry) {
+        this.scriptService.set(scriptService);
+        return Collections.emptyList();
+    }
+
     @Override
     public Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() {
         Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> analyzers = new TreeMap<>();
@@ -202,6 +222,8 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
         filters.put("classic", ClassicFilterFactory::new);
         filters.put("czech_stem", CzechStemTokenFilterFactory::new);
         filters.put("common_grams", requriesAnalysisSettings(CommonGramsTokenFilterFactory::new));
+        filters.put("condition",
+            requriesAnalysisSettings((i, e, n, s) -> new ScriptedConditionTokenFilterFactory(i, n, s, scriptService.get())));
         filters.put("decimal_digit", DecimalDigitFilterFactory::new);
         filters.put("delimited_payload_filter", LegacyDelimitedPayloadTokenFilterFactory::new);
         filters.put("delimited_payload", DelimitedPayloadTokenFilterFactory::new);

diff --git a/.../src/main/java/org/elasticsearch/analysis/common/ScriptedConditionTokenFilterFactory.java b/.../src/main/java/org/elasticsearch/analysis/common/ScriptedConditionTokenFilterFactory.java
@@ -0,0 +1,95 @@
+package org.elasticsearch.analysis.common;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.ConditionalTokenFilter;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
+import org.elasticsearch.index.analysis.ReferringFilterFactory;
+import org.elasticsearch.index.analysis.TokenFilterFactory;
+import org.elasticsearch.script.AnalysisPredicateScript;
+import org.elasticsearch.script.Script;
+import org.elasticsearch.script.ScriptService;
+import org.elasticsearch.script.ScriptType;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.function.Function;
+
+/**
+ * A factory for a conditional token filter that only applies child filters if the underlying token
+ * matches an {@link AnalysisPredicateScript}
+ */
+public class ScriptedConditionTokenFilterFactory extends AbstractTokenFilterFactory implements ReferringFilterFactory {
+
+    private final AnalysisPredicateScript.Factory factory;
+    private final List<TokenFilterFactory> filters = new ArrayList<>();
+    private final List<String> filterNames;
+
+    ScriptedConditionTokenFilterFactory(IndexSettings indexSettings, String name,
+                                               Settings settings, ScriptService scriptService) {
+        super(indexSettings, name, settings);
+
+        Settings scriptSettings = settings.getAsSettings("script");
+        Script script = Script.parse(scriptSettings);
+        if (script.getType() != ScriptType.INLINE) {
+            throw new IllegalArgumentException("Cannot use stored scripts in tokenfilter [" + name + "]");
+        }
+        this.factory = scriptService.compile(script, AnalysisPredicateScript.CONTEXT);
+
+        this.filterNames = settings.getAsList("filters");
+    }
+
+    @Override
+    public TokenStream create(TokenStream tokenStream) {
+        Function<TokenStream, TokenStream> filter = in -> {
+            for (TokenFilterFactory tff : filters) {
+                in = tff.create(in);
+            }
+            return in;
+        };
+        AnalysisPredicateScript script = factory.newInstance();
+        final AnalysisPredicateScript.Term term = new AnalysisPredicateScript.Term();
+        return new ConditionalTokenFilter(tokenStream, filter) {
+
+            CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+            PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
+            PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class);
+            OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+            TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
+            KeywordAttribute keywordAtt = addAttribute(KeywordAttribute.class);
+
+            @Override
+            protected boolean shouldFilter() {
+                term.term = termAtt;
+                term.posInc = posIncAtt.getPositionIncrement();
+                term.posLen = posLenAtt.getPositionLength();
+                term.startOffset = offsetAtt.startOffset();
+                term.endOffset = offsetAtt.endOffset();
+                term.type = typeAtt.type();
+                term.isKeyword = keywordAtt.isKeyword();
+                return script.execute(term);
+            }
+        };
+    }
+
+    @Override
+    public void setReferences(Map<String, TokenFilterFactory> factories) {
+        for (String filter : filterNames) {
+            TokenFilterFactory tff = factories.get(filter);
+            if (tff == null) {
+                throw new IllegalArgumentException("ScriptedConditionTokenFilter [" + name() +
+                    "] refers to undefined token filter [" + filter + "]");
+            }
+            filters.add(tff);
+        }
+    }
+
+}
diff --git a/...on/src/test/java/org/elasticsearch/analysis/common/ScriptedConditionTokenFilterTests.java b/...on/src/test/java/org/elasticsearch/analysis/common/ScriptedConditionTokenFilterTests.java
@@ -0,0 +1,70 @@
+package org.elasticsearch.analysis.common;
+
+import org.elasticsearch.Version;
+import org.elasticsearch.cluster.metadata.IndexMetaData;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.env.Environment;
+import org.elasticsearch.env.TestEnvironment;
+import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.IndexAnalyzers;
+import org.elasticsearch.index.analysis.NamedAnalyzer;
+import org.elasticsearch.indices.analysis.AnalysisModule;
+import org.elasticsearch.script.AnalysisPredicateScript;
+import org.elasticsearch.script.Script;
+import org.elasticsearch.script.ScriptContext;
+import org.elasticsearch.script.ScriptService;
+import org.elasticsearch.test.ESTokenStreamTestCase;
+import org.elasticsearch.test.IndexSettingsModule;
+
+import java.util.Collections;
+
+public class ScriptedConditionTokenFilterTests extends ESTokenStreamTestCase {
+
+    public void testSimpleCondition() throws Exception {
+        Settings settings = Settings.builder()
+            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
+            .build();
+        Settings indexSettings = Settings.builder()
+            .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
+            .put("index.analysis.filter.cond.type", "condition")
+            .put("index.analysis.filter.cond.script.source", "return \"two\".equals(term.term)")
+            .putList("index.analysis.filter.cond.filters", "uppercase")
+            .put("index.analysis.analyzer.myAnalyzer.type", "custom")
+            .put("index.analysis.analyzer.myAnalyzer.tokenizer", "standard")
+            .putList("index.analysis.analyzer.myAnalyzer.filter", "cond")
+            .build();
+        IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
+
+        AnalysisPredicateScript.Factory factory = () -> new AnalysisPredicateScript() {
+            @Override
+            public boolean execute(Term term) {
+                return "two".contentEquals(term.term);
+            }
+        };
+
+        ScriptService scriptService = new ScriptService(indexSettings, Collections.emptyMap(), Collections.emptyMap()){
+            @Override
+            public <FactoryType> FactoryType compile(Script script, ScriptContext<FactoryType> context) {
+                assertEquals(context, AnalysisPredicateScript.CONTEXT);
+                assertEquals(new Script("return \"two\".equals(term.term)"), script);
+                return (FactoryType) factory;
+            }
+        };
+
+        CommonAnalysisPlugin plugin = new CommonAnalysisPlugin();
+        plugin.createComponents(null, null, null, null, scriptService, null, null, null, null);
+        AnalysisModule module
+            = new AnalysisModule(TestEnvironment.newEnvironment(settings), Collections.singletonList(plugin));
+
+        IndexAnalyzers analyzers = module.getAnalysisRegistry().build(idxSettings);
+
+        try (NamedAnalyzer analyzer = analyzers.get("myAnalyzer")) {
+            assertNotNull(analyzer);
+            assertAnalyzesTo(analyzer, "one two three", new String[]{
+                "one", "TWO", "three"
+            });
+        }
+
+    }
+
+}
diff --git a/...les/lang-painless/src/main/resources/org/elasticsearch/painless/spi/org.elasticsearch.txt b/...les/lang-painless/src/main/resources/org/elasticsearch/painless/spi/org.elasticsearch.txt
@@ -175,3 +175,13 @@ class org.elasticsearch.index.similarity.ScriptedSimilarity$Doc {
   int getLength()
   float getFreq()
 }
+
+class org.elasticsearch.script.AnalysisScript$Term {
+  CharSequence term
+  int posInc
+  int posLen
+  int startOffset
+  int endOffset
+  String type
+  boolean isKeyword
+}
diff --git a/modules/lang-painless/src/test/java/org/elasticsearch/painless/AnalysisScriptTests.java b/modules/lang-painless/src/test/java/org/elasticsearch/painless/AnalysisScriptTests.java
@@ -0,0 +1,32 @@
+package org.elasticsearch.painless;
+
+import org.elasticsearch.painless.spi.Whitelist;
+import org.elasticsearch.script.AnalysisPredicateScript;
+import org.elasticsearch.script.ScriptContext;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class AnalysisScriptTests extends ScriptTestCase {
+
+    @Override
+    protected Map<ScriptContext<?>, List<Whitelist>> scriptContexts() {
+        Map<ScriptContext<?>, List<Whitelist>> contexts = new HashMap<>();
+        contexts.put(AnalysisPredicateScript.CONTEXT, Whitelist.BASE_WHITELISTS);
+        return contexts;
+    }
+
+    public void testAnalysisScript() {
+        AnalysisPredicateScript.Factory factory = scriptEngine.compile("test", "return \"one\".contentEquals(term.term)",
+            AnalysisPredicateScript.CONTEXT, Collections.emptyMap());
+
+        AnalysisPredicateScript script = factory.newInstance();
+        AnalysisPredicateScript.Term term = new AnalysisPredicateScript.Term();
+        term.term = "one";
+        assertTrue(script.execute(term));
+        term.term = "two";
+        assertFalse(script.execute(term));
+    }
+}