Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

INGEST: Add Pipeline Processor #32473

Merged
merged 13 commits into from
Aug 29, 2018
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ public Map<String, Processor.Factory> getProcessors(Processor.Parameters paramet
processors.put(KeyValueProcessor.TYPE, new KeyValueProcessor.Factory());
processors.put(URLDecodeProcessor.TYPE, new URLDecodeProcessor.Factory());
processors.put(BytesProcessor.TYPE, new BytesProcessor.Factory());
processors.put(PipelineProcessor.TYPE, new PipelineProcessor.Factory(parameters.ingestService));
return Collections.unmodifiableMap(processors);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.ingest.common;

import java.util.Map;
import org.elasticsearch.ingest.AbstractProcessor;
import org.elasticsearch.ingest.ConfigurationUtils;
import org.elasticsearch.ingest.IngestDocument;
import org.elasticsearch.ingest.IngestService;
import org.elasticsearch.ingest.Pipeline;
import org.elasticsearch.ingest.Processor;

public class PipelineProcessor extends AbstractProcessor {

public static final String TYPE = "pipeline";

private final String pipelineName;

private final IngestService ingestService;

PipelineProcessor(final String tag, String pipelineName, IngestService ingestService) {
super(tag);
this.pipelineName = pipelineName;
this.ingestService = ingestService;
}

@Override
public void execute(final IngestDocument ingestDocument) throws Exception {
Pipeline pipeline = ingestService.getPipeline(pipelineName);
if (pipeline == null) {
throw new IllegalStateException("Pipeline processor configured for non-existent pipeline [" + pipelineName + ']');
}
if (ingestDocument.executePipeline(pipeline) == false) {
throw new IllegalStateException("Recursive invocation of pipeline [" + pipelineName + "] detected.");
}
}

@Override
public String getType() {
return TYPE;
}

public static final class Factory implements Processor.Factory {

private final IngestService ingestService;

public Factory(IngestService ingestService) {
this.ingestService = ingestService;
}

@Override
public PipelineProcessor create(Map<String, Processor.Factory> registry, String processorTag,
Map<String, Object> config) throws Exception {
String pipeline =
ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "pipeline");
return new PipelineProcessor(processorTag, pipeline, ingestService);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
---
teardown:
- do:
ingest.delete_pipeline:
id: "inner"
ignore: 404

- do:
ingest.delete_pipeline:
id: "outer"
ignore: 404

---
"Test Pipeline Processor with Simple Inner Pipeline":
- do:
ingest.put_pipeline:
id: "inner"
body: >
{
"description" : "inner pipeline",
"processors" : [
{
"set" : {
"field": "foo",
"value": "bar"
}
},
{
"set" : {
"field": "baz",
"value": "blub"
}
}
]
}
- match: { acknowledged: true }

- do:
ingest.put_pipeline:
id: "outer"
body: >
{
"description" : "outer pipeline",
"processors" : [
{
"pipeline" : {
"pipeline": "inner"
}
}
]
}
- match: { acknowledged: true }

- do:
index:
index: test
type: test
id: 1
pipeline: "outer"
body: {}

- do:
get:
index: test
type: test
id: 1
- match: { _source.foo: "bar" }
- match: { _source.baz: "blub" }

---
"Test Pipeline Processor with Circular Pipelines":
- do:
ingest.put_pipeline:
id: "outer"
body: >
{
"description" : "outer pipeline",
"processors" : [
{
"pipeline" : {
"pipeline": "inner"
}
}
]
}
- match: { acknowledged: true }

- do:
ingest.put_pipeline:
id: "inner"
body: >
{
"description" : "inner pipeline",
"processors" : [
{
"pipeline" : {
"pipeline": "outer"
}
}
]
}
- match: { acknowledged: true }

- do:
catch: /illegal_state_exception/
index:
index: test
type: test
id: 1
pipeline: "outer"
body: {}
- match: { error.root_cause.0.type: "exception" }
- match: { error.root_cause.0.reason: "java.lang.IllegalArgumentException: java.lang.IllegalStateException: Recursive invocation of pipeline [inner] detected." }
21 changes: 21 additions & 0 deletions server/src/main/java/org/elasticsearch/ingest/IngestDocument.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@

package org.elasticsearch.ingest;

import java.util.Collections;
import java.util.IdentityHashMap;
import java.util.Set;
import org.elasticsearch.common.Strings;
import org.elasticsearch.index.VersionType;
import org.elasticsearch.index.mapper.IdFieldMapper;
Expand Down Expand Up @@ -55,6 +58,9 @@ public final class IngestDocument {
private final Map<String, Object> sourceAndMetadata;
private final Map<String, Object> ingestMetadata;

// Contains all pipelines that have been executed for this document
private final Set<Pipeline> executedPipelines = Collections.newSetFromMap(new IdentityHashMap<>());

public IngestDocument(String index, String type, String id, String routing,
Long version, VersionType versionType, Map<String, Object> source) {
this.sourceAndMetadata = new HashMap<>();
Expand Down Expand Up @@ -632,6 +638,21 @@ private static Object deepCopy(Object value) {
}
}

/**
* Executes the given pipeline with for this document unless the pipeline has already been executed
* for this document.
* @param pipeline Pipeline to execute
* @return true if pipeline was executed, false if it could not be executed because it was already executed for this document
* @throws Exception On exception in pipeline execution
*/
public boolean executePipeline(Pipeline pipeline) throws Exception {
if (this.executedPipelines.add(pipeline) == false) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of using a set, I think there a call stack could be passed through, that doesn't need to be a member variable? I don't know ingest document coming more mutable than it already is. This method signature is also odd, as I would expect this to be an exception, but it looks like you are avoiding that because it would collide with exceptions that could be thrown from the pipeline itself? But this should fail the pipeline anyways, so I think it is ok to use an exception?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rjernst I can't really pass the callstack with the document can I? I only have the execute method available to pass things (because any called pipeline itself could contain additional pipeline processors).

The reason I made this return boolean instead of throwing right away was more of a style thing to make it clear that the exception was triggered by the pipeline processor. But in hindsight this may be a little needlessly complex :) Moving it in here.

return false;
}
pipeline.execute(this);
return true;
}

@Override
public boolean equals(Object obj) {
if (obj == this) { return true; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ public IngestService(ClusterService clusterService, ThreadPool threadPool,
threadPool.getThreadContext(), threadPool::relativeTimeInMillis,
(delay, command) -> threadPool.schedule(
TimeValue.timeValueMillis(delay), ThreadPool.Names.GENERIC, command
)
), this
)
);
this.threadPool = threadPool;
Expand Down
40 changes: 40 additions & 0 deletions server/src/main/java/org/elasticsearch/ingest/PipelineHolder.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.ingest;

import java.util.HashMap;
import java.util.Map;

public final class PipelineHolder {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was this accidentally added back? It shouldn't be necessary now right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rjernst yea I apparently suck at merging :) Will remove


// Ideally this should be in IngestMetadata class, but we don't have the processor factories around there.
// We know of all the processor factories when a node with all its plugin have been initialized. Also some
// processor factories rely on other node services. Custom metadata is statically registered when classes
// are loaded, so in the cluster state we just save the pipeline config and here we keep the actual pipelines around.
volatile Map<String, Pipeline> pipelines = new HashMap<>();

public Map<String, Pipeline> getPipelines() {
return pipelines;
}

public void setPipelines(Map<String, Pipeline> pipelines) {
this.pipelines = pipelines;
}
}
10 changes: 7 additions & 3 deletions server/src/main/java/org/elasticsearch/ingest/Processor.java
Original file line number Diff line number Diff line change
Expand Up @@ -97,22 +97,26 @@ class Parameters {
* instances that have run prior to in ingest.
*/
public final ThreadContext threadContext;

public final LongSupplier relativeTimeSupplier;


public final IngestService ingestService;

/**
* Provides scheduler support
*/
public final BiFunction<Long, Runnable, ScheduledFuture<?>> scheduler;

public Parameters(Environment env, ScriptService scriptService, AnalysisRegistry analysisRegistry, ThreadContext threadContext,
LongSupplier relativeTimeSupplier, BiFunction<Long, Runnable, ScheduledFuture<?>> scheduler) {
LongSupplier relativeTimeSupplier, BiFunction<Long, Runnable, ScheduledFuture<?>> scheduler,
IngestService ingestService) {
this.env = env;
this.scriptService = scriptService;
this.threadContext = threadContext;
this.analysisRegistry = analysisRegistry;
this.relativeTimeSupplier = relativeTimeSupplier;
this.scheduler = scheduler;
this.ingestService = ingestService;
}

}
Expand Down