-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add remove_by_pattern ingest processor
Signed-off-by: Gao Binlong <gbinlong@amazon.com>
- Loading branch information
1 parent
904c9a9
commit 00054dd
Showing
6 changed files
with
536 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
186 changes: 186 additions & 0 deletions
186
...es/ingest-common/src/main/java/org/opensearch/ingest/common/RemoveByPatternProcessor.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,186 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* The OpenSearch Contributors require contributions made to | ||
* this file be licensed under the Apache-2.0 license or a | ||
* compatible open source license. | ||
*/ | ||
|
||
package org.opensearch.ingest.common; | ||
|
||
import org.opensearch.common.Nullable; | ||
import org.opensearch.common.ValidationException; | ||
import org.opensearch.common.regex.Regex; | ||
import org.opensearch.core.common.Strings; | ||
import org.opensearch.ingest.AbstractProcessor; | ||
import org.opensearch.ingest.ConfigurationUtils; | ||
import org.opensearch.ingest.IngestDocument; | ||
import org.opensearch.ingest.Processor; | ||
|
||
import java.util.ArrayList; | ||
import java.util.HashSet; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Set; | ||
import java.util.stream.Collectors; | ||
|
||
import static org.opensearch.ingest.ConfigurationUtils.newConfigurationException; | ||
|
||
/** | ||
* Processor that removes existing fields by field patterns or excluding field patterns. | ||
*/ | ||
public final class RemoveByPatternProcessor extends AbstractProcessor { | ||
|
||
public static final String TYPE = "remove_by_pattern"; | ||
private final List<String> fieldPatterns; | ||
private final List<String> excludeFieldPatterns; | ||
|
||
RemoveByPatternProcessor( | ||
String tag, | ||
String description, | ||
@Nullable List<String> fieldPatterns, | ||
@Nullable List<String> excludeFieldPatterns | ||
) { | ||
super(tag, description); | ||
if (fieldPatterns != null && excludeFieldPatterns != null || fieldPatterns == null && excludeFieldPatterns == null) { | ||
throw new IllegalArgumentException("ether fieldPatterns and excludeFieldPatterns must be set"); | ||
} | ||
if (fieldPatterns == null) { | ||
this.fieldPatterns = null; | ||
this.excludeFieldPatterns = new ArrayList<>(excludeFieldPatterns); | ||
} else { | ||
this.fieldPatterns = new ArrayList<>(fieldPatterns); | ||
this.excludeFieldPatterns = null; | ||
} | ||
} | ||
|
||
public List<String> getFieldPatterns() { | ||
return fieldPatterns; | ||
} | ||
|
||
public List<String> getExcludeFieldPatterns() { | ||
return excludeFieldPatterns; | ||
} | ||
|
||
@Override | ||
public IngestDocument execute(IngestDocument document) { | ||
Set<String> existingFields = new HashSet<>(document.getSourceAndMetadata().keySet()); | ||
Set<String> metadataFields = document.getMetadata() | ||
.keySet() | ||
.stream() | ||
.map(IngestDocument.Metadata::getFieldName) | ||
.collect(Collectors.toSet()); | ||
|
||
if (fieldPatterns != null && !fieldPatterns.isEmpty()) { | ||
existingFields.forEach(field -> { | ||
// ignore metadata fields such as _index, _id, etc. | ||
if (!metadataFields.contains(field)) { | ||
final boolean matched = fieldPatterns.stream().anyMatch(pattern -> Regex.simpleMatch(pattern, field)); | ||
if (matched) { | ||
document.removeField(field); | ||
} | ||
} | ||
}); | ||
} | ||
|
||
if (excludeFieldPatterns != null && !excludeFieldPatterns.isEmpty()) { | ||
existingFields.forEach(field -> { | ||
// ignore metadata fields such as _index, _id, etc. | ||
if (!metadataFields.contains(field)) { | ||
final boolean matched = excludeFieldPatterns.stream().anyMatch(pattern -> Regex.simpleMatch(pattern, field)); | ||
if (!matched) { | ||
document.removeField(field); | ||
} | ||
} | ||
}); | ||
} | ||
|
||
return document; | ||
} | ||
|
||
@Override | ||
public String getType() { | ||
return TYPE; | ||
Check warning on line 103 in modules/ingest-common/src/main/java/org/opensearch/ingest/common/RemoveByPatternProcessor.java Codecov / codecov/patchmodules/ingest-common/src/main/java/org/opensearch/ingest/common/RemoveByPatternProcessor.java#L103
|
||
} | ||
|
||
public static final class Factory implements Processor.Factory { | ||
|
||
public Factory() {} | ||
|
||
@Override | ||
public RemoveByPatternProcessor create( | ||
Map<String, Processor.Factory> registry, | ||
String processorTag, | ||
String description, | ||
Map<String, Object> config | ||
) throws Exception { | ||
final List<String> fieldPatterns = new ArrayList<>(); | ||
final List<String> excludeFieldPatterns = new ArrayList<>(); | ||
final Object fieldPattern = ConfigurationUtils.readOptionalObject(config, "field_pattern"); | ||
final Object excludeFieldPattern = ConfigurationUtils.readOptionalObject(config, "exclude_field_pattern"); | ||
|
||
if (fieldPattern == null && excludeFieldPattern == null || fieldPattern != null && excludeFieldPattern != null) { | ||
throw newConfigurationException( | ||
TYPE, | ||
processorTag, | ||
"field_pattern", | ||
"ether field_pattern or exclude_field_pattern must be set" | ||
); | ||
} | ||
|
||
if (fieldPattern != null) { | ||
if (fieldPattern instanceof List) { | ||
@SuppressWarnings("unchecked") | ||
List<String> fieldPatternList = (List<String>) fieldPattern; | ||
fieldPatterns.addAll(fieldPatternList); | ||
} else { | ||
fieldPatterns.add((String) fieldPattern); | ||
} | ||
validateFieldPatterns(processorTag, fieldPatterns, "field_pattern"); | ||
return new RemoveByPatternProcessor(processorTag, description, fieldPatterns, null); | ||
} else { | ||
if (excludeFieldPattern instanceof List) { | ||
@SuppressWarnings("unchecked") | ||
List<String> excludeFieldPatternList = (List<String>) excludeFieldPattern; | ||
excludeFieldPatterns.addAll(excludeFieldPatternList); | ||
} else { | ||
excludeFieldPatterns.add((String) excludeFieldPattern); | ||
} | ||
validateFieldPatterns(processorTag, excludeFieldPatterns, "exclude_field_pattern"); | ||
return new RemoveByPatternProcessor(processorTag, description, null, excludeFieldPatterns); | ||
} | ||
} | ||
|
||
private void validateFieldPatterns(String processorTag, List<String> patterns, String patternKey) { | ||
List<String> validationErrors = new ArrayList<>(); | ||
for (String fieldPattern : patterns) { | ||
if (fieldPattern.contains(" ")) { | ||
validationErrors.add(patternKey + " [" + fieldPattern + "] must not contain a space"); | ||
} | ||
if (fieldPattern.contains(",")) { | ||
validationErrors.add(patternKey + " [" + fieldPattern + "] must not contain a ','"); | ||
} | ||
if (fieldPattern.contains("#")) { | ||
validationErrors.add(patternKey + " [" + fieldPattern + "] must not contain a '#'"); | ||
} | ||
if (fieldPattern.contains(":")) { | ||
validationErrors.add(patternKey + " [" + fieldPattern + "] must not contain a ':'"); | ||
} | ||
if (fieldPattern.startsWith("_")) { | ||
validationErrors.add(patternKey + " [" + fieldPattern + "] must not start with '_'"); | ||
} | ||
if (Strings.validFileNameExcludingAstrix(fieldPattern) == false) { | ||
validationErrors.add( | ||
patternKey + " [" + fieldPattern + "] must not contain the following characters " + Strings.INVALID_FILENAME_CHARS | ||
); | ||
} | ||
} | ||
|
||
if (validationErrors.size() > 0) { | ||
ValidationException validationException = new ValidationException(); | ||
validationException.addValidationErrors(validationErrors); | ||
throw newConfigurationException(TYPE, processorTag, patternKey, validationException.getMessage()); | ||
} | ||
} | ||
} | ||
} |
116 changes: 116 additions & 0 deletions
116
...mmon/src/test/java/org/opensearch/ingest/common/RemoveByPatternProcessorFactoryTests.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* The OpenSearch Contributors require contributions made to | ||
* this file be licensed under the Apache-2.0 license or a | ||
* compatible open source license. | ||
*/ | ||
|
||
package org.opensearch.ingest.common; | ||
|
||
import org.opensearch.OpenSearchException; | ||
import org.opensearch.OpenSearchParseException; | ||
import org.opensearch.test.OpenSearchTestCase; | ||
import org.junit.Before; | ||
|
||
import java.util.Arrays; | ||
import java.util.HashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
||
import static org.hamcrest.CoreMatchers.equalTo; | ||
|
||
public class RemoveByPatternProcessorFactoryTests extends OpenSearchTestCase { | ||
|
||
private RemoveByPatternProcessor.Factory factory; | ||
|
||
@Before | ||
public void init() { | ||
factory = new RemoveByPatternProcessor.Factory(); | ||
} | ||
|
||
public void testCreateFieldPatterns() throws Exception { | ||
Map<String, Object> config = new HashMap<>(); | ||
config.put("field_pattern", "field1*"); | ||
String processorTag = randomAlphaOfLength(10); | ||
RemoveByPatternProcessor removeByPatternProcessor = factory.create(null, processorTag, null, config); | ||
assertThat(removeByPatternProcessor.getTag(), equalTo(processorTag)); | ||
assertThat(removeByPatternProcessor.getFieldPatterns().get(0), equalTo("field1*")); | ||
|
||
Map<String, Object> config2 = new HashMap<>(); | ||
config2.put("field_pattern", List.of("field1*", "field2*")); | ||
removeByPatternProcessor = factory.create(null, processorTag, null, config2); | ||
assertThat(removeByPatternProcessor.getTag(), equalTo(processorTag)); | ||
assertThat(removeByPatternProcessor.getFieldPatterns().get(0), equalTo("field1*")); | ||
assertThat(removeByPatternProcessor.getFieldPatterns().get(1), equalTo("field2*")); | ||
|
||
Map<String, Object> config3 = new HashMap<>(); | ||
List<String> patterns = Arrays.asList("foo*", "*", " ", ",", "#", ":", "_"); | ||
config3.put("field_pattern", patterns); | ||
Exception exception = expectThrows(OpenSearchParseException.class, () -> factory.create(null, processorTag, null, config3)); | ||
assertThat( | ||
exception.getMessage(), | ||
equalTo( | ||
"[field_pattern] Validation Failed: 1: field_pattern [ ] must not contain a space;" | ||
+ "2: field_pattern [ ] must not contain the following characters [ , \", *, \\, <, |, ,, >, /, ?];" | ||
+ "3: field_pattern [,] must not contain a ',';" | ||
+ "4: field_pattern [,] must not contain the following characters [ , \", *, \\, <, |, ,, >, /, ?];" | ||
+ "5: field_pattern [#] must not contain a '#';" | ||
+ "6: field_pattern [:] must not contain a ':';" | ||
+ "7: field_pattern [_] must not start with '_';" | ||
) | ||
); | ||
} | ||
|
||
public void testCreateExcludeFieldPatterns() throws Exception { | ||
Map<String, Object> config = new HashMap<>(); | ||
config.put("exclude_field_pattern", "field1*"); | ||
String processorTag = randomAlphaOfLength(10); | ||
RemoveByPatternProcessor removeByPatternProcessor = factory.create(null, processorTag, null, config); | ||
assertThat(removeByPatternProcessor.getTag(), equalTo(processorTag)); | ||
assertThat(removeByPatternProcessor.getExcludeFieldPatterns().get(0), equalTo("field1*")); | ||
|
||
Map<String, Object> config2 = new HashMap<>(); | ||
config2.put("exclude_field_pattern", List.of("field1*", "field2*")); | ||
removeByPatternProcessor = factory.create(null, processorTag, null, config2); | ||
assertThat(removeByPatternProcessor.getTag(), equalTo(processorTag)); | ||
assertThat(removeByPatternProcessor.getExcludeFieldPatterns().get(0), equalTo("field1*")); | ||
assertThat(removeByPatternProcessor.getExcludeFieldPatterns().get(1), equalTo("field2*")); | ||
|
||
Map<String, Object> config3 = new HashMap<>(); | ||
List<String> patterns = Arrays.asList("foo*", "*", " ", ",", "#", ":", "_"); | ||
config3.put("exclude_field_pattern", patterns); | ||
Exception exception = expectThrows(OpenSearchParseException.class, () -> factory.create(null, processorTag, null, config3)); | ||
assertThat( | ||
exception.getMessage(), | ||
equalTo( | ||
"[exclude_field_pattern] Validation Failed: 1: exclude_field_pattern [ ] must not contain a space;" | ||
+ "2: exclude_field_pattern [ ] must not contain the following characters [ , \", *, \\, <, |, ,, >, /, ?];" | ||
+ "3: exclude_field_pattern [,] must not contain a ',';" | ||
+ "4: exclude_field_pattern [,] must not contain the following characters [ , \", *, \\, <, |, ,, >, /, ?];" | ||
+ "5: exclude_field_pattern [#] must not contain a '#';" | ||
+ "6: exclude_field_pattern [:] must not contain a ':';" | ||
+ "7: exclude_field_pattern [_] must not start with '_';" | ||
) | ||
); | ||
} | ||
|
||
public void testCreatePatternsFailed() throws Exception { | ||
Map<String, Object> config = new HashMap<>(); | ||
config.put("field_pattern", List.of("foo*")); | ||
config.put("exclude_field_pattern", List.of("bar*")); | ||
String processorTag = randomAlphaOfLength(10); | ||
OpenSearchException exception = expectThrows( | ||
OpenSearchParseException.class, | ||
() -> factory.create(null, processorTag, null, config) | ||
); | ||
assertThat(exception.getMessage(), equalTo("[field_pattern] ether field_pattern or exclude_field_pattern must be set")); | ||
|
||
Map<String, Object> config2 = new HashMap<>(); | ||
config2.put("field_pattern", null); | ||
config2.put("exclude_field_pattern", null); | ||
|
||
exception = expectThrows(OpenSearchParseException.class, () -> factory.create(null, processorTag, null, config2)); | ||
assertThat(exception.getMessage(), equalTo("[field_pattern] ether field_pattern or exclude_field_pattern must be set")); | ||
} | ||
} |
Oops, something went wrong.