forked from opensearch-project/OpenSearch
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add remove_by_pattern ingest processor (opensearch-project#11920)
* Add remove_by_pattern ingest processor * Modify change log * Remove some duplicated checks * Add more yml test case * Fix typo --------- Signed-off-by: Gao Binlong <gbinlong@amazon.com> Signed-off-by: Shivansh Arora <hishiv@amazon.com>
- Loading branch information
1 parent
1905cc2
commit 2697da1
Showing
7 changed files
with
555 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
180 changes: 180 additions & 0 deletions
180
...es/ingest-common/src/main/java/org/opensearch/ingest/common/RemoveByPatternProcessor.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,180 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* The OpenSearch Contributors require contributions made to | ||
* this file be licensed under the Apache-2.0 license or a | ||
* compatible open source license. | ||
*/ | ||
|
||
package org.opensearch.ingest.common; | ||
|
||
import org.opensearch.common.Nullable; | ||
import org.opensearch.common.ValidationException; | ||
import org.opensearch.common.regex.Regex; | ||
import org.opensearch.core.common.Strings; | ||
import org.opensearch.ingest.AbstractProcessor; | ||
import org.opensearch.ingest.ConfigurationUtils; | ||
import org.opensearch.ingest.IngestDocument; | ||
import org.opensearch.ingest.Processor; | ||
|
||
import java.util.ArrayList; | ||
import java.util.HashSet; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Set; | ||
import java.util.stream.Collectors; | ||
|
||
import static org.opensearch.ingest.ConfigurationUtils.newConfigurationException; | ||
|
||
/** | ||
* Processor that removes existing fields by field patterns or excluding field patterns. | ||
*/ | ||
public final class RemoveByPatternProcessor extends AbstractProcessor { | ||
|
||
public static final String TYPE = "remove_by_pattern"; | ||
private final List<String> fieldPatterns; | ||
private final List<String> excludeFieldPatterns; | ||
|
||
RemoveByPatternProcessor( | ||
String tag, | ||
String description, | ||
@Nullable List<String> fieldPatterns, | ||
@Nullable List<String> excludeFieldPatterns | ||
) { | ||
super(tag, description); | ||
if (fieldPatterns != null && excludeFieldPatterns != null || fieldPatterns == null && excludeFieldPatterns == null) { | ||
throw new IllegalArgumentException("either fieldPatterns and excludeFieldPatterns must be set"); | ||
} | ||
if (fieldPatterns == null) { | ||
this.fieldPatterns = null; | ||
this.excludeFieldPatterns = new ArrayList<>(excludeFieldPatterns); | ||
} else { | ||
this.fieldPatterns = new ArrayList<>(fieldPatterns); | ||
this.excludeFieldPatterns = null; | ||
} | ||
} | ||
|
||
public List<String> getFieldPatterns() { | ||
return fieldPatterns; | ||
} | ||
|
||
public List<String> getExcludeFieldPatterns() { | ||
return excludeFieldPatterns; | ||
} | ||
|
||
@Override | ||
public IngestDocument execute(IngestDocument document) { | ||
Set<String> existingFields = new HashSet<>(document.getSourceAndMetadata().keySet()); | ||
Set<String> metadataFields = document.getMetadata() | ||
.keySet() | ||
.stream() | ||
.map(IngestDocument.Metadata::getFieldName) | ||
.collect(Collectors.toSet()); | ||
|
||
if (fieldPatterns != null && !fieldPatterns.isEmpty()) { | ||
existingFields.forEach(field -> { | ||
// ignore metadata fields such as _index, _id, etc. | ||
if (!metadataFields.contains(field)) { | ||
final boolean matched = fieldPatterns.stream().anyMatch(pattern -> Regex.simpleMatch(pattern, field)); | ||
if (matched) { | ||
document.removeField(field); | ||
} | ||
} | ||
}); | ||
} | ||
|
||
if (excludeFieldPatterns != null && !excludeFieldPatterns.isEmpty()) { | ||
existingFields.forEach(field -> { | ||
// ignore metadata fields such as _index, _id, etc. | ||
if (!metadataFields.contains(field)) { | ||
final boolean matched = excludeFieldPatterns.stream().anyMatch(pattern -> Regex.simpleMatch(pattern, field)); | ||
if (!matched) { | ||
document.removeField(field); | ||
} | ||
} | ||
}); | ||
} | ||
|
||
return document; | ||
} | ||
|
||
@Override | ||
public String getType() { | ||
return TYPE; | ||
} | ||
|
||
public static final class Factory implements Processor.Factory { | ||
|
||
public Factory() {} | ||
|
||
@Override | ||
public RemoveByPatternProcessor create( | ||
Map<String, Processor.Factory> registry, | ||
String processorTag, | ||
String description, | ||
Map<String, Object> config | ||
) throws Exception { | ||
final List<String> fieldPatterns = new ArrayList<>(); | ||
final List<String> excludeFieldPatterns = new ArrayList<>(); | ||
final Object fieldPattern = ConfigurationUtils.readOptionalObject(config, "field_pattern"); | ||
final Object excludeFieldPattern = ConfigurationUtils.readOptionalObject(config, "exclude_field_pattern"); | ||
|
||
if (fieldPattern == null && excludeFieldPattern == null || fieldPattern != null && excludeFieldPattern != null) { | ||
throw newConfigurationException( | ||
TYPE, | ||
processorTag, | ||
"field_pattern", | ||
"either field_pattern or exclude_field_pattern must be set" | ||
); | ||
} | ||
|
||
if (fieldPattern != null) { | ||
if (fieldPattern instanceof List) { | ||
@SuppressWarnings("unchecked") | ||
List<String> fieldPatternList = (List<String>) fieldPattern; | ||
fieldPatterns.addAll(fieldPatternList); | ||
} else { | ||
fieldPatterns.add((String) fieldPattern); | ||
} | ||
validateFieldPatterns(processorTag, fieldPatterns, "field_pattern"); | ||
return new RemoveByPatternProcessor(processorTag, description, fieldPatterns, null); | ||
} else { | ||
if (excludeFieldPattern instanceof List) { | ||
@SuppressWarnings("unchecked") | ||
List<String> excludeFieldPatternList = (List<String>) excludeFieldPattern; | ||
excludeFieldPatterns.addAll(excludeFieldPatternList); | ||
} else { | ||
excludeFieldPatterns.add((String) excludeFieldPattern); | ||
} | ||
validateFieldPatterns(processorTag, excludeFieldPatterns, "exclude_field_pattern"); | ||
return new RemoveByPatternProcessor(processorTag, description, null, excludeFieldPatterns); | ||
} | ||
} | ||
|
||
private void validateFieldPatterns(String processorTag, List<String> patterns, String patternKey) { | ||
List<String> validationErrors = new ArrayList<>(); | ||
for (String fieldPattern : patterns) { | ||
if (fieldPattern.contains("#")) { | ||
validationErrors.add(patternKey + " [" + fieldPattern + "] must not contain a '#'"); | ||
} | ||
if (fieldPattern.contains(":")) { | ||
validationErrors.add(patternKey + " [" + fieldPattern + "] must not contain a ':'"); | ||
} | ||
if (fieldPattern.startsWith("_")) { | ||
validationErrors.add(patternKey + " [" + fieldPattern + "] must not start with '_'"); | ||
} | ||
if (Strings.validFileNameExcludingAstrix(fieldPattern) == false) { | ||
validationErrors.add( | ||
patternKey + " [" + fieldPattern + "] must not contain the following characters " + Strings.INVALID_FILENAME_CHARS | ||
); | ||
} | ||
} | ||
|
||
if (validationErrors.size() > 0) { | ||
ValidationException validationException = new ValidationException(); | ||
validationException.addValidationErrors(validationErrors); | ||
throw newConfigurationException(TYPE, processorTag, patternKey, validationException.getMessage()); | ||
} | ||
} | ||
} | ||
} |
114 changes: 114 additions & 0 deletions
114
...mmon/src/test/java/org/opensearch/ingest/common/RemoveByPatternProcessorFactoryTests.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* The OpenSearch Contributors require contributions made to | ||
* this file be licensed under the Apache-2.0 license or a | ||
* compatible open source license. | ||
*/ | ||
|
||
package org.opensearch.ingest.common; | ||
|
||
import org.opensearch.OpenSearchException; | ||
import org.opensearch.OpenSearchParseException; | ||
import org.opensearch.test.OpenSearchTestCase; | ||
import org.junit.Before; | ||
|
||
import java.util.Arrays; | ||
import java.util.HashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
||
import static org.hamcrest.CoreMatchers.equalTo; | ||
|
||
public class RemoveByPatternProcessorFactoryTests extends OpenSearchTestCase { | ||
|
||
private RemoveByPatternProcessor.Factory factory; | ||
|
||
@Before | ||
public void init() { | ||
factory = new RemoveByPatternProcessor.Factory(); | ||
} | ||
|
||
public void testCreateFieldPatterns() throws Exception { | ||
Map<String, Object> config = new HashMap<>(); | ||
config.put("field_pattern", "field1*"); | ||
String processorTag = randomAlphaOfLength(10); | ||
RemoveByPatternProcessor removeByPatternProcessor = factory.create(null, processorTag, null, config); | ||
assertThat(removeByPatternProcessor.getTag(), equalTo(processorTag)); | ||
assertThat(removeByPatternProcessor.getFieldPatterns().get(0), equalTo("field1*")); | ||
|
||
Map<String, Object> config2 = new HashMap<>(); | ||
config2.put("field_pattern", List.of("field1*", "field2*")); | ||
removeByPatternProcessor = factory.create(null, processorTag, null, config2); | ||
assertThat(removeByPatternProcessor.getTag(), equalTo(processorTag)); | ||
assertThat(removeByPatternProcessor.getFieldPatterns().get(0), equalTo("field1*")); | ||
assertThat(removeByPatternProcessor.getFieldPatterns().get(1), equalTo("field2*")); | ||
|
||
Map<String, Object> config3 = new HashMap<>(); | ||
List<String> patterns = Arrays.asList("foo*", "*", " ", ",", "#", ":", "_"); | ||
config3.put("field_pattern", patterns); | ||
Exception exception = expectThrows(OpenSearchParseException.class, () -> factory.create(null, processorTag, null, config3)); | ||
assertThat( | ||
exception.getMessage(), | ||
equalTo( | ||
"[field_pattern] Validation Failed: " | ||
+ "1: field_pattern [ ] must not contain the following characters [ , \", *, \\, <, |, ,, >, /, ?];" | ||
+ "2: field_pattern [,] must not contain the following characters [ , \", *, \\, <, |, ,, >, /, ?];" | ||
+ "3: field_pattern [#] must not contain a '#';" | ||
+ "4: field_pattern [:] must not contain a ':';" | ||
+ "5: field_pattern [_] must not start with '_';" | ||
) | ||
); | ||
} | ||
|
||
public void testCreateExcludeFieldPatterns() throws Exception { | ||
Map<String, Object> config = new HashMap<>(); | ||
config.put("exclude_field_pattern", "field1*"); | ||
String processorTag = randomAlphaOfLength(10); | ||
RemoveByPatternProcessor removeByPatternProcessor = factory.create(null, processorTag, null, config); | ||
assertThat(removeByPatternProcessor.getTag(), equalTo(processorTag)); | ||
assertThat(removeByPatternProcessor.getExcludeFieldPatterns().get(0), equalTo("field1*")); | ||
|
||
Map<String, Object> config2 = new HashMap<>(); | ||
config2.put("exclude_field_pattern", List.of("field1*", "field2*")); | ||
removeByPatternProcessor = factory.create(null, processorTag, null, config2); | ||
assertThat(removeByPatternProcessor.getTag(), equalTo(processorTag)); | ||
assertThat(removeByPatternProcessor.getExcludeFieldPatterns().get(0), equalTo("field1*")); | ||
assertThat(removeByPatternProcessor.getExcludeFieldPatterns().get(1), equalTo("field2*")); | ||
|
||
Map<String, Object> config3 = new HashMap<>(); | ||
List<String> patterns = Arrays.asList("foo*", "*", " ", ",", "#", ":", "_"); | ||
config3.put("exclude_field_pattern", patterns); | ||
Exception exception = expectThrows(OpenSearchParseException.class, () -> factory.create(null, processorTag, null, config3)); | ||
assertThat( | ||
exception.getMessage(), | ||
equalTo( | ||
"[exclude_field_pattern] Validation Failed: " | ||
+ "1: exclude_field_pattern [ ] must not contain the following characters [ , \", *, \\, <, |, ,, >, /, ?];" | ||
+ "2: exclude_field_pattern [,] must not contain the following characters [ , \", *, \\, <, |, ,, >, /, ?];" | ||
+ "3: exclude_field_pattern [#] must not contain a '#';" | ||
+ "4: exclude_field_pattern [:] must not contain a ':';" | ||
+ "5: exclude_field_pattern [_] must not start with '_';" | ||
) | ||
); | ||
} | ||
|
||
public void testCreatePatternsFailed() throws Exception { | ||
Map<String, Object> config = new HashMap<>(); | ||
config.put("field_pattern", List.of("foo*")); | ||
config.put("exclude_field_pattern", List.of("bar*")); | ||
String processorTag = randomAlphaOfLength(10); | ||
OpenSearchException exception = expectThrows( | ||
OpenSearchParseException.class, | ||
() -> factory.create(null, processorTag, null, config) | ||
); | ||
assertThat(exception.getMessage(), equalTo("[field_pattern] either field_pattern or exclude_field_pattern must be set")); | ||
|
||
Map<String, Object> config2 = new HashMap<>(); | ||
config2.put("field_pattern", null); | ||
config2.put("exclude_field_pattern", null); | ||
|
||
exception = expectThrows(OpenSearchParseException.class, () -> factory.create(null, processorTag, null, config2)); | ||
assertThat(exception.getMessage(), equalTo("[field_pattern] either field_pattern or exclude_field_pattern must be set")); | ||
} | ||
} |
Oops, something went wrong.