From 4a37513de6a0090cdf471a9b6e3ccc71a0e586f4 Mon Sep 17 00:00:00 2001 From: Gao Binlong Date: Mon, 18 Mar 2024 10:29:46 +0800 Subject: [PATCH 01/14] Add fingerprint ingest processor Signed-off-by: Gao Binlong --- CHANGELOG.md | 1 + .../ingest/common/FingerprintProcessor.java | 261 +++++++++++++++++ .../common/IngestCommonModulePlugin.java | 1 + .../FingerprintProcessorFactoryTests.java | 113 ++++++++ .../common/FingerprintProcessorTests.java | 151 ++++++++++ .../rest-api-spec/test/ingest/10_basic.yml | 16 + .../test/ingest/340_fingerprint_processor.yml | 274 ++++++++++++++++++ 7 files changed, 817 insertions(+) create mode 100644 modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java create mode 100644 modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorFactoryTests.java create mode 100644 modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorTests.java create mode 100644 modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/340_fingerprint_processor.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 1f868dd76039b..258b979b309fa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add support for Azure Managed Identity in repository-azure ([#12423](https://github.com/opensearch-project/OpenSearch/issues/12423)) - Add useCompoundFile index setting ([#13478](https://github.com/opensearch-project/OpenSearch/pull/13478)) - Make outbound side of transport protocol dependent ([#13293](https://github.com/opensearch-project/OpenSearch/pull/13293)) +- Add fingerprint ingest processor ([#13724](https://github.com/opensearch-project/OpenSearch/pull/13724)) ### Dependencies - Bump `com.github.spullara.mustache.java:compiler` from 0.9.10 to 0.9.13 ([#13329](https://github.com/opensearch-project/OpenSearch/pull/13329), [#13559](https://github.com/opensearch-project/OpenSearch/pull/13559)) diff --git a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java new file mode 100644 index 0000000000000..725d37773927d --- /dev/null +++ b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java @@ -0,0 +1,261 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ingest.common; + +import org.opensearch.common.Nullable; +import org.opensearch.common.hash.MessageDigests; +import org.opensearch.ingest.AbstractProcessor; +import org.opensearch.ingest.ConfigurationUtils; +import org.opensearch.ingest.IngestDocument; +import org.opensearch.ingest.Processor; + +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.util.Base64; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; + +import static org.opensearch.ingest.ConfigurationUtils.newConfigurationException; + +/** + * Processor that generating hash value for the specified fields or all fields in a document + */ +public class FingerprintProcessor extends AbstractProcessor { + public static final String TYPE = "fingerprint"; + private static final Set HASH_METHODS = Set.of("MD5", "SHA-1", "SHA-256"); + + // fields used to generate hash value + private final List fields; + // whether generate hash value for all fields in the document or not + private final boolean includeAllFields; + // the target field to store the hash value, defaults to fingerprint + private final String targetField; + // hash method used to generate the hash value, defaults to SHA-1 + private final String hashMethod; + private final boolean ignoreMissing; + + FingerprintProcessor( + String tag, + String description, + @Nullable List fields, + boolean includeAllFields, + String targetField, + String hashMethod, + boolean ignoreMissing + ) { + super(tag, description); + if (fields != null) { + if (fields.isEmpty()) { + throw new IllegalArgumentException("fields cannot be empty"); + } + if (fields.stream().anyMatch(Objects::isNull)) { + throw new IllegalArgumentException("field path cannot be null nor empty"); + } + if (includeAllFields) { + throw new IllegalArgumentException("either fields or include_all_fields can be set"); + } + } else if (!includeAllFields) { + throw new IllegalArgumentException("either fields or include_all_fields must be set"); + } + + if (!HASH_METHODS.contains(hashMethod.toUpperCase(Locale.ROOT))) { + throw new IllegalArgumentException("hash method must be MD5, SHA-1 or SHA-256"); + } + this.fields = fields; + this.includeAllFields = includeAllFields; + this.targetField = targetField; + this.hashMethod = hashMethod; + this.ignoreMissing = ignoreMissing; + } + + public List getFields() { + return fields; + } + + public boolean getIncludeAllFields() { + return includeAllFields; + } + + public String getTargetField() { + return targetField; + } + + public String getHashMethod() { + return hashMethod; + } + + public boolean isIgnoreMissing() { + return ignoreMissing; + } + + @Override + public IngestDocument execute(IngestDocument document) { + // we should deduplicate and sort the field names to make sure we can get consistent hash value + final List sortedFields; + if (includeAllFields) { + Set existingFields = new HashSet<>(document.getSourceAndMetadata().keySet()); + Set metadataFields = document.getMetadata() + .keySet() + .stream() + .map(IngestDocument.Metadata::getFieldName) + .collect(Collectors.toSet()); + sortedFields = existingFields.stream().filter(field -> !metadataFields.contains(field)).sorted().collect(Collectors.toList()); + } else { + sortedFields = fields.stream().distinct().sorted().collect(Collectors.toList()); + } + assert (!sortedFields.isEmpty()); + + final StringBuilder concatenatedFields = new StringBuilder(); + sortedFields.forEach(field -> { + if (!document.hasField(field)) { + if (ignoreMissing) { + return; + } else { + throw new IllegalArgumentException("field [" + field + "] doesn't exist"); + } + } + + final Object value = document.getFieldValue(field, Object.class); + if (value instanceof Map) { + @SuppressWarnings("unchecked") + Map flattenedMap = toFlattenedMap((Map) value); + flattenedMap.entrySet() + .stream() + .sorted(Map.Entry.comparingByKey()) + .forEach( + entry -> concatenatedFields.append("|") + .append(field) + .append(".") + .append(entry.getKey()) + .append("|") + .append(entry.getValue()) + ); + } else { + concatenatedFields.append("|").append(field).append("|").append(value); + } + }); + // if all specified fields don't exist and ignore_missing is true, then do nothing + if (concatenatedFields.length() == 0) { + return document; + } + concatenatedFields.append("|"); + + MessageDigest messageDigest = HashMethod.fromMethodName(hashMethod); + assert (messageDigest != null); + messageDigest.update(concatenatedFields.toString().getBytes(StandardCharsets.UTF_8)); + document.setFieldValue(targetField, Base64.getEncoder().encodeToString(messageDigest.digest())); + + return document; + } + + @Override + public String getType() { + return TYPE; + } + + /** + * Convert a map containing nested fields to a flattened map, + * for example, if the original map is + * { + * "a": { + * "b": 1, + * "c": 2 + * } + * }, then the converted map is + * { + * "a.b": 1, + * "a.c": 2 + * } + * @param map the original map which may contain nested fields + * @return a flattened map which has only one level fields + */ + @SuppressWarnings("unchecked") + private Map toFlattenedMap(Map map) { + Map flattenedMap = new HashMap<>(); + for (Map.Entry entry : map.entrySet()) { + if (entry.getValue() instanceof Map) { + toFlattenedMap((Map) entry.getValue()).forEach( + (key, value) -> flattenedMap.put(entry.getKey() + "." + key, value) + ); + } else { + flattenedMap.put(entry.getKey(), entry.getValue()); + } + } + return flattenedMap; + } + + /** + * The supported hash methods used to generate hash value + */ + enum HashMethod { + MD5(MessageDigests.md5()), + SHA1(MessageDigests.sha1()), + SHA256(MessageDigests.sha256()); + + private final MessageDigest messageDigest; + + HashMethod(MessageDigest messageDigest) { + this.messageDigest = messageDigest; + } + + public static MessageDigest fromMethodName(String methodName) { + String name = methodName.toUpperCase(Locale.ROOT); + switch (name) { + case "MD5": + return MD5.messageDigest; + case "SHA-1": + return SHA1.messageDigest; + case "SHA-256": + return SHA256.messageDigest; + default: + return null; + } + } + } + + public static final class Factory implements Processor.Factory { + @Override + public FingerprintProcessor create( + Map registry, + String processorTag, + String description, + Map config + ) throws Exception { + List fields = ConfigurationUtils.readOptionalList(TYPE, processorTag, config, "fields"); + boolean includeAllFields = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "include_all_fields", false); + if (fields != null) { + if (fields.isEmpty()) { + throw newConfigurationException(TYPE, processorTag, "fields", "fields cannot be empty"); + } + if (fields.stream().anyMatch(Objects::isNull)) { + throw newConfigurationException(TYPE, processorTag, "fields", "field path cannot be null nor empty"); + } + if (includeAllFields) { + throw newConfigurationException(TYPE, processorTag, "fields", "either fields or include_all_fields can be set"); + } + } else if (!includeAllFields) { + throw newConfigurationException(TYPE, processorTag, "fields", "either fields or include_all_fields must be set"); + } + + String targetField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "target_field", "fingerprint"); + String hashMethod = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "hash_method", "SHA-1"); + if (!HASH_METHODS.contains(hashMethod.toUpperCase(Locale.ROOT))) { + throw newConfigurationException(TYPE, processorTag, "hash_method", "hash method must be MD5, SHA-1 or SHA-256"); + } + boolean ignoreMissing = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false); + return new FingerprintProcessor(processorTag, description, fields, includeAllFields, targetField, hashMethod, ignoreMissing); + } + } +} diff --git a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/IngestCommonModulePlugin.java b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/IngestCommonModulePlugin.java index 0f8b248fd5af8..162934efa6778 100644 --- a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/IngestCommonModulePlugin.java +++ b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/IngestCommonModulePlugin.java @@ -109,6 +109,7 @@ public Map getProcessors(Processor.Parameters paramet processors.put(CopyProcessor.TYPE, new CopyProcessor.Factory(parameters.scriptService)); processors.put(RemoveByPatternProcessor.TYPE, new RemoveByPatternProcessor.Factory()); processors.put(CommunityIdProcessor.TYPE, new CommunityIdProcessor.Factory()); + processors.put(FingerprintProcessor.TYPE, new FingerprintProcessor.Factory()); return Collections.unmodifiableMap(processors); } diff --git a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorFactoryTests.java b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorFactoryTests.java new file mode 100644 index 0000000000000..5d68703b89ac3 --- /dev/null +++ b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorFactoryTests.java @@ -0,0 +1,113 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ingest.common; + +import org.opensearch.OpenSearchParseException; +import org.opensearch.test.OpenSearchTestCase; +import org.junit.Before; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +import static org.hamcrest.CoreMatchers.equalTo; + +public class FingerprintProcessorFactoryTests extends OpenSearchTestCase { + + private FingerprintProcessor.Factory factory; + + @Before + public void init() { + factory = new FingerprintProcessor.Factory(); + } + + public void testCreate() throws Exception { + Map config = new HashMap<>(); + + boolean includeAllFields = randomBoolean(); + List fields = null; + if (!includeAllFields) { + fields = List.of(randomAlphaOfLength(10)); + config.put("fields", fields); + } else { + config.put("include_all_fields", true); + } + + String targetField = null; + if (randomBoolean()) { + targetField = randomAlphaOfLength(10); + } + config.put("target_field", targetField); + + boolean ignoreMissing = randomBoolean(); + config.put("ignore_missing", ignoreMissing); + String processorTag = randomAlphaOfLength(10); + FingerprintProcessor fingerprintProcessor = factory.create(null, processorTag, null, config); + assertThat(fingerprintProcessor.getTag(), equalTo(processorTag)); + assertThat(fingerprintProcessor.getFields(), equalTo(fields)); + assertThat(fingerprintProcessor.getIncludeAllFields(), equalTo(includeAllFields)); + assertThat(fingerprintProcessor.getTargetField(), equalTo(Objects.requireNonNullElse(targetField, "fingerprint"))); + assertThat(fingerprintProcessor.isIgnoreMissing(), equalTo(ignoreMissing)); + } + + public void testCreateWithFields() throws Exception { + Map config = new HashMap<>(); + try { + factory.create(null, null, null, config); + fail("factory create should have failed"); + } catch (OpenSearchParseException e) { + assertThat(e.getMessage(), equalTo("[fields] either fields or include_all_fields must be set")); + } + + config.put("fields", Collections.emptyList()); + try { + factory.create(null, null, null, config); + fail("factory create should have failed"); + } catch (OpenSearchParseException e) { + assertThat(e.getMessage(), equalTo("[fields] fields cannot be empty")); + } + + config = new HashMap<>(); + config.put("fields", List.of(randomAlphaOfLength(10))); + config.put("include_all_fields", true); + try { + factory.create(null, null, null, config); + fail("factory create should have failed"); + } catch (OpenSearchParseException e) { + assertThat(e.getMessage(), equalTo("[fields] either fields or include_all_fields can be set")); + } + + config = new HashMap<>(); + List fields = new ArrayList<>(); + fields.add(null); + config.put("fields", fields); + try { + factory.create(null, null, null, config); + fail("factory create should have failed"); + } catch (OpenSearchParseException e) { + assertThat(e.getMessage(), equalTo("[fields] field path cannot be null nor empty")); + } + } + + public void testCreateWithHashMethod() throws Exception { + Map config = new HashMap<>(); + List fields = List.of(randomAlphaOfLength(10)); + config.put("fields", fields); + config.put("hash_method", randomAlphaOfLength(10)); + try { + factory.create(null, null, null, config); + fail("factory create should have failed"); + } catch (OpenSearchParseException e) { + assertThat(e.getMessage(), equalTo("[hash_method] hash method must be MD5, SHA-1 or SHA-256")); + } + } +} diff --git a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorTests.java b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorTests.java new file mode 100644 index 0000000000000..4040a87503e5f --- /dev/null +++ b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorTests.java @@ -0,0 +1,151 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ingest.common; + +import org.opensearch.ingest.IngestDocument; +import org.opensearch.ingest.Processor; +import org.opensearch.ingest.RandomDocumentPicks; +import org.opensearch.test.OpenSearchTestCase; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import static org.hamcrest.Matchers.equalTo; + +public class FingerprintProcessorTests extends OpenSearchTestCase { + + public void testGenerateFingerprint() throws Exception { + IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random()); + List fields = null; + boolean includeAllFields = false; + if (randomBoolean()) { + includeAllFields = true; + + } else { + fields = new ArrayList<>(); + for (int i = 0; i < randomIntBetween(1, 10); i++) { + fields.add(RandomDocumentPicks.addRandomField(random(), ingestDocument, randomAlphaOfLength(10))); + } + } + + String targetField = "fingerprint"; + if (randomBoolean()) { + targetField = randomAlphaOfLength(10); + } + + String hashMethod = randomFrom(List.of("MD5", "SHA-1", "SHA-256")); + Processor processor = createFingerprintProcessor(fields, includeAllFields, targetField, hashMethod, false); + processor.execute(ingestDocument); + assertThat(ingestDocument.hasField(targetField), equalTo(true)); + } + + public void testCreateFingerprintProcessorFailed() { + assertThrows( + "fields cannot be empty", + IllegalArgumentException.class, + () -> createFingerprintProcessor( + Collections.emptyList(), + false, + "fingerprint", + randomFrom(List.of("MD5", "SHA-1", "SHA-256")), + false + ) + ); + + List fields = new ArrayList<>(); + fields.add(null); + fields.add(randomAlphaOfLength(10)); + assertThrows( + "field path cannot be null nor empty", + IllegalArgumentException.class, + () -> createFingerprintProcessor(fields, false, null, randomFrom(List.of("MD5", "SHA-1", "SHA-256")), false) + ); + + assertThrows( + "hash method must be MD5, SHA-1 or SHA-256", + IllegalArgumentException.class, + () -> createFingerprintProcessor( + Collections.emptyList(), + false, + "fingerprint", + randomFrom(List.of("MD5", "SHA-1", "SHA-256")), + false + ) + ); + + assertThrows( + "either fields or include_all_fields can be set", + IllegalArgumentException.class, + () -> createFingerprintProcessor( + Collections.emptyList(), + true, + "fingerprint", + randomFrom(List.of("MD5", "SHA-1", "SHA-256")), + false + ) + ); + + assertThrows( + "either fields or include_all_fields must be set", + IllegalArgumentException.class, + () -> createFingerprintProcessor(null, false, "fingerprint", randomFrom(List.of("MD5", "SHA-1", "SHA-256")), false) + ); + } + + public void testIncludeAllFields() { + List fields = new ArrayList<>(); + fields.add(null); + fields.add(randomAlphaOfLength(10)); + assertThrows( + "field path cannot be null nor empty", + IllegalArgumentException.class, + () -> createFingerprintProcessor(fields, false, null, randomFrom(List.of("MD5", "SHA-1", "SHA-256")), false) + ); + } + + public void testIgnoreMissing() throws Exception { + IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random()); + String nonExistingFieldName = RandomDocumentPicks.randomNonExistingFieldName(random(), ingestDocument); + List nonExistingFields = List.of(nonExistingFieldName); + Processor processor = createFingerprintProcessor( + nonExistingFields, + false, + "fingerprint", + randomFrom(List.of("MD5", "SHA-1", "SHA-256")), + false + ); + assertThrows( + "field [" + nonExistingFieldName + "] doesn't exist", + IllegalArgumentException.class, + () -> processor.execute(ingestDocument) + ); + + String targetField = "fingerprint"; + Processor processorWithIgnoreMissing = createFingerprintProcessor( + nonExistingFields, + false, + "fingerprint", + randomFrom(List.of("MD5", "SHA-1", "SHA-256")), + true + ); + processorWithIgnoreMissing.execute(ingestDocument); + assertThat(ingestDocument.hasField(targetField), equalTo(false)); + } + + private FingerprintProcessor createFingerprintProcessor( + List fields, + boolean includeAllFields, + String targetField, + String hashMethod, + boolean ignoreMissing + ) { + return new FingerprintProcessor(randomAlphaOfLength(10), null, fields, includeAllFields, targetField, hashMethod, ignoreMissing); + } +} diff --git a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/10_basic.yml b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/10_basic.yml index 2a816f0386667..b5df5dfa153ff 100644 --- a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/10_basic.yml +++ b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/10_basic.yml @@ -86,3 +86,19 @@ - do: nodes.info: {} - contains: { nodes.$cluster_manager.ingest.processors: { type: community_id } } + +--- +"Fingerprint processor exists": + - skip: + version: " - 2.14.99" + features: contains + reason: "fingerprint processor was introduced in 2.15.0 and contains is a newly added assertion" + - do: + cluster.state: {} + + # Get cluster-manager node id + - set: { cluster_manager_node: cluster_manager } + + - do: + nodes.info: {} + - contains: { nodes.$cluster_manager.ingest.processors: { type: fingerprint } } diff --git a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/340_fingerprint_processor.yml b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/340_fingerprint_processor.yml new file mode 100644 index 0000000000000..ea94ff397d6c3 --- /dev/null +++ b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/340_fingerprint_processor.yml @@ -0,0 +1,274 @@ +--- +teardown: + - do: + ingest.delete_pipeline: + id: "1" + ignore: 404 + +--- +"Test creat fingerprint processor": + - skip: + version: " - 2.14.99" + reason: "introduced in 2.15" + - do: + catch: /\[fields\] either fields or include_all_fields must be set/ + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + } + } + ] + } + + - do: + catch: /\[fields\] either fields or include_all_fields can be set/ + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "fields" : ["foo"], + "include_all_fields" : true + } + } + ] + } + + - do: + catch: /field path cannot be null nor empty/ + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "fields": [null] + } + } + ] + } + + - do: + catch: /fields cannot be empty/ + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "fields": [] + } + } + ] + } + + - do: + catch: /hash method must be MD5\, SHA\-1 or SHA\-256/ + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "fields": ["foo"], + "hash_method": "non-existing" + } + } + ] + } + + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "fields" : ["foo"], + "target_field" : "fingerprint_field", + "hash_method": "SHA-256" + } + } + ] + } + - match: { acknowledged: true } + +--- +"Test fingerprint processor with ignore_missing": + - skip: + version: " - 2.14.99" + reason: "introduced in 2.15" + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "fields" : ["foo"] + } + } + ] + } + - match: { acknowledged: true } + + - do: + catch: /field \[foo\] doesn't exist/ + index: + index: test + id: 1 + pipeline: "1" + body: { + bar: "bar" + } + + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "fields" : ["foo", "bar"], + "ignore_missing" : true + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 1 + pipeline: "1" + body: { + foo: "foo" + } + - do: + get: + index: test + id: 1 + - match: { _source.fingerprint: "w1axmYeYkdIEZMKxybhjOEuBFxA=" } + +--- +"Test fingerprint processor with custom target field": + - skip: + version: " - 2.14.99" + reason: "introduced in 2.15" + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "fields" : ["foo"], + "target_field" : "target" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 1 + pipeline: "1" + body: { + foo: "foo" + } + - do: + get: + index: test + id: 1 + - match: { _source.target: "w1axmYeYkdIEZMKxybhjOEuBFxA=" } + +--- +"Test fingerprint processor with non-primitive fields": + - skip: + version: " - 2.14.99" + reason: "introduced in 2.15" + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "fields" : ["foo", "bar", "zoo"] + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 1 + pipeline: "1" + body: { + foo: [1, 2, 3], + bar: { + field: { + innerField: "inner" + } + }, + zoo: null + } + - do: + get: + index: test + id: 1 + - match: { _source.fingerprint: "R/ZOjN9U+AVLTxjAB8b8A5pbSyM=" } + + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "include_all_fields" : true + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 2 + pipeline: "1" + body: { + foo: [1, 2, 3], + bar: { + field: { + innerField: "inner" + } + }, + zoo: null + } + - do: + get: + index: test + id: 2 + - match: { _source.fingerprint: "R/ZOjN9U+AVLTxjAB8b8A5pbSyM=" } From b5181338e0c8ba2416f16ddf140188c87a884b5f Mon Sep 17 00:00:00 2001 From: Gao Binlong Date: Tue, 21 May 2024 12:04:51 +0800 Subject: [PATCH 02/14] Ignore metadata fields Signed-off-by: Gao Binlong --- .../ingest/common/FingerprintProcessor.java | 17 ++++++++----- .../common/FingerprintProcessorTests.java | 24 +++++++++++++++++++ 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java index 725d37773927d..27dd68cccbd33 100644 --- a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java +++ b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java @@ -104,16 +104,21 @@ public boolean isIgnoreMissing() { public IngestDocument execute(IngestDocument document) { // we should deduplicate and sort the field names to make sure we can get consistent hash value final List sortedFields; + Set metadataFields = document.getMetadata() + .keySet() + .stream() + .map(IngestDocument.Metadata::getFieldName) + .collect(Collectors.toSet()); + // metadata fields such as _index, _id and _routing are ignored if (includeAllFields) { Set existingFields = new HashSet<>(document.getSourceAndMetadata().keySet()); - Set metadataFields = document.getMetadata() - .keySet() - .stream() - .map(IngestDocument.Metadata::getFieldName) - .collect(Collectors.toSet()); sortedFields = existingFields.stream().filter(field -> !metadataFields.contains(field)).sorted().collect(Collectors.toList()); } else { - sortedFields = fields.stream().distinct().sorted().collect(Collectors.toList()); + sortedFields = fields.stream() + .distinct() + .filter(field -> !metadataFields.contains(field)) + .sorted() + .collect(Collectors.toList()); } assert (!sortedFields.isEmpty()); diff --git a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorTests.java b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorTests.java index 4040a87503e5f..7412c78e45eb8 100644 --- a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorTests.java +++ b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorTests.java @@ -16,6 +16,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.stream.Collectors; import static org.hamcrest.Matchers.equalTo; @@ -139,6 +140,29 @@ public void testIgnoreMissing() throws Exception { assertThat(ingestDocument.hasField(targetField), equalTo(false)); } + public void testIgnoreMetadataFields() throws Exception { + IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random()); + List metadataFields = ingestDocument.getMetadata() + .keySet() + .stream() + .map(IngestDocument.Metadata::getFieldName) + .collect(Collectors.toList()); + + String existingFieldName = RandomDocumentPicks.addRandomField(random(), ingestDocument, randomAlphaOfLength(10)); + List fields = List.of(existingFieldName, metadataFields.get(randomIntBetween(0, metadataFields.size() - 1))); + + String targetField = "fingerprint"; + String algorithm = randomFrom(List.of("MD5", "SHA-1", "SHA-256")); + Processor processor = createFingerprintProcessor(fields, false, targetField, algorithm, false); + + processor.execute(ingestDocument); + String fingerprint = ingestDocument.getFieldValue(targetField, String.class); + + processor = createFingerprintProcessor(List.of(existingFieldName), false, targetField, algorithm, false); + processor.execute(ingestDocument); + assertThat(ingestDocument.getFieldValue(targetField, String.class), equalTo(fingerprint)); + } + private FingerprintProcessor createFingerprintProcessor( List fields, boolean includeAllFields, From aae81a71c084741b254e39628f866526acb2e5ad Mon Sep 17 00:00:00 2001 From: Gao Binlong Date: Wed, 22 May 2024 16:57:55 +0800 Subject: [PATCH 03/14] Add sha3-256 hash method Signed-off-by: Gao Binlong --- .../ingest/common/FingerprintProcessor.java | 13 +++-- .../FingerprintProcessorFactoryTests.java | 2 +- .../common/FingerprintProcessorTests.java | 47 +++++-------------- .../test/ingest/340_fingerprint_processor.yml | 2 +- .../common/hash/MessageDigests.java | 13 +++++ .../common/hash/MessageDigestsTests.java | 25 ++++++++++ 6 files changed, 61 insertions(+), 41 deletions(-) diff --git a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java index 27dd68cccbd33..a652aaa1890fc 100644 --- a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java +++ b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java @@ -34,7 +34,7 @@ */ public class FingerprintProcessor extends AbstractProcessor { public static final String TYPE = "fingerprint"; - private static final Set HASH_METHODS = Set.of("MD5", "SHA-1", "SHA-256"); + private static final Set HASH_METHODS = Set.of("MD5", "SHA-1", "SHA-256", "SHA3-256"); // fields used to generate hash value private final List fields; @@ -71,7 +71,7 @@ public class FingerprintProcessor extends AbstractProcessor { } if (!HASH_METHODS.contains(hashMethod.toUpperCase(Locale.ROOT))) { - throw new IllegalArgumentException("hash method must be MD5, SHA-1 or SHA-256"); + throw new IllegalArgumentException("hash method must be MD5, SHA-1 or SHA-256 or SHA3-256"); } this.fields = fields; this.includeAllFields = includeAllFields; @@ -157,6 +157,8 @@ public IngestDocument execute(IngestDocument document) { } concatenatedFields.append("|"); + System.out.println(concatenatedFields); + MessageDigest messageDigest = HashMethod.fromMethodName(hashMethod); assert (messageDigest != null); messageDigest.update(concatenatedFields.toString().getBytes(StandardCharsets.UTF_8)); @@ -207,7 +209,8 @@ private Map toFlattenedMap(Map map) { enum HashMethod { MD5(MessageDigests.md5()), SHA1(MessageDigests.sha1()), - SHA256(MessageDigests.sha256()); + SHA256(MessageDigests.sha256()), + SHA3256(MessageDigests.sha3256()); private final MessageDigest messageDigest; @@ -224,6 +227,8 @@ public static MessageDigest fromMethodName(String methodName) { return SHA1.messageDigest; case "SHA-256": return SHA256.messageDigest; + case "SHA3-256": + return SHA3256.messageDigest; default: return null; } @@ -257,7 +262,7 @@ public FingerprintProcessor create( String targetField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "target_field", "fingerprint"); String hashMethod = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "hash_method", "SHA-1"); if (!HASH_METHODS.contains(hashMethod.toUpperCase(Locale.ROOT))) { - throw newConfigurationException(TYPE, processorTag, "hash_method", "hash method must be MD5, SHA-1 or SHA-256"); + throw newConfigurationException(TYPE, processorTag, "hash_method", "hash method must be MD5, SHA-1, SHA-256 or SHA3-256"); } boolean ignoreMissing = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false); return new FingerprintProcessor(processorTag, description, fields, includeAllFields, targetField, hashMethod, ignoreMissing); diff --git a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorFactoryTests.java b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorFactoryTests.java index 5d68703b89ac3..c340035905f0d 100644 --- a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorFactoryTests.java +++ b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorFactoryTests.java @@ -107,7 +107,7 @@ public void testCreateWithHashMethod() throws Exception { factory.create(null, null, null, config); fail("factory create should have failed"); } catch (OpenSearchParseException e) { - assertThat(e.getMessage(), equalTo("[hash_method] hash method must be MD5, SHA-1 or SHA-256")); + assertThat(e.getMessage(), equalTo("[hash_method] hash method must be MD5, SHA-1, SHA-256 or SHA3-256")); } } } diff --git a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorTests.java b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorTests.java index 7412c78e45eb8..e985e34b707a7 100644 --- a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorTests.java +++ b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorTests.java @@ -21,6 +21,7 @@ import static org.hamcrest.Matchers.equalTo; public class FingerprintProcessorTests extends OpenSearchTestCase { + private final List hashMethods = List.of("MD5", "SHA-1", "SHA-256", "SHA3-256"); public void testGenerateFingerprint() throws Exception { IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random()); @@ -41,7 +42,7 @@ public void testGenerateFingerprint() throws Exception { targetField = randomAlphaOfLength(10); } - String hashMethod = randomFrom(List.of("MD5", "SHA-1", "SHA-256")); + String hashMethod = randomFrom(hashMethods); Processor processor = createFingerprintProcessor(fields, includeAllFields, targetField, hashMethod, false); processor.execute(ingestDocument); assertThat(ingestDocument.hasField(targetField), equalTo(true)); @@ -51,13 +52,7 @@ public void testCreateFingerprintProcessorFailed() { assertThrows( "fields cannot be empty", IllegalArgumentException.class, - () -> createFingerprintProcessor( - Collections.emptyList(), - false, - "fingerprint", - randomFrom(List.of("MD5", "SHA-1", "SHA-256")), - false - ) + () -> createFingerprintProcessor(Collections.emptyList(), false, "fingerprint", randomFrom(hashMethods), false) ); List fields = new ArrayList<>(); @@ -66,37 +61,25 @@ public void testCreateFingerprintProcessorFailed() { assertThrows( "field path cannot be null nor empty", IllegalArgumentException.class, - () -> createFingerprintProcessor(fields, false, null, randomFrom(List.of("MD5", "SHA-1", "SHA-256")), false) + () -> createFingerprintProcessor(fields, false, null, randomFrom(List.of("MD5", "SHA-1", "SHA-256", "SHA3-256")), false) ); assertThrows( - "hash method must be MD5, SHA-1 or SHA-256", + "hash method must be MD5, SHA-1, SHA-256 or SHA3-256", IllegalArgumentException.class, - () -> createFingerprintProcessor( - Collections.emptyList(), - false, - "fingerprint", - randomFrom(List.of("MD5", "SHA-1", "SHA-256")), - false - ) + () -> createFingerprintProcessor(Collections.emptyList(), false, "fingerprint", randomFrom(hashMethods), false) ); assertThrows( "either fields or include_all_fields can be set", IllegalArgumentException.class, - () -> createFingerprintProcessor( - Collections.emptyList(), - true, - "fingerprint", - randomFrom(List.of("MD5", "SHA-1", "SHA-256")), - false - ) + () -> createFingerprintProcessor(Collections.emptyList(), true, "fingerprint", randomFrom(hashMethods), false) ); assertThrows( "either fields or include_all_fields must be set", IllegalArgumentException.class, - () -> createFingerprintProcessor(null, false, "fingerprint", randomFrom(List.of("MD5", "SHA-1", "SHA-256")), false) + () -> createFingerprintProcessor(null, false, "fingerprint", randomFrom(hashMethods), false) ); } @@ -107,7 +90,7 @@ public void testIncludeAllFields() { assertThrows( "field path cannot be null nor empty", IllegalArgumentException.class, - () -> createFingerprintProcessor(fields, false, null, randomFrom(List.of("MD5", "SHA-1", "SHA-256")), false) + () -> createFingerprintProcessor(fields, false, null, randomFrom(hashMethods), false) ); } @@ -115,13 +98,7 @@ public void testIgnoreMissing() throws Exception { IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random()); String nonExistingFieldName = RandomDocumentPicks.randomNonExistingFieldName(random(), ingestDocument); List nonExistingFields = List.of(nonExistingFieldName); - Processor processor = createFingerprintProcessor( - nonExistingFields, - false, - "fingerprint", - randomFrom(List.of("MD5", "SHA-1", "SHA-256")), - false - ); + Processor processor = createFingerprintProcessor(nonExistingFields, false, "fingerprint", randomFrom(hashMethods), false); assertThrows( "field [" + nonExistingFieldName + "] doesn't exist", IllegalArgumentException.class, @@ -133,7 +110,7 @@ public void testIgnoreMissing() throws Exception { nonExistingFields, false, "fingerprint", - randomFrom(List.of("MD5", "SHA-1", "SHA-256")), + randomFrom(hashMethods), true ); processorWithIgnoreMissing.execute(ingestDocument); @@ -152,7 +129,7 @@ public void testIgnoreMetadataFields() throws Exception { List fields = List.of(existingFieldName, metadataFields.get(randomIntBetween(0, metadataFields.size() - 1))); String targetField = "fingerprint"; - String algorithm = randomFrom(List.of("MD5", "SHA-1", "SHA-256")); + String algorithm = randomFrom(hashMethods); Processor processor = createFingerprintProcessor(fields, false, targetField, algorithm, false); processor.execute(ingestDocument); diff --git a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/340_fingerprint_processor.yml b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/340_fingerprint_processor.yml index ea94ff397d6c3..11be884a961bd 100644 --- a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/340_fingerprint_processor.yml +++ b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/340_fingerprint_processor.yml @@ -71,7 +71,7 @@ teardown: } - do: - catch: /hash method must be MD5\, SHA\-1 or SHA\-256/ + catch: /hash method must be MD5\, SHA\-1, SHA\-256 or SHA3\-256/ ingest.put_pipeline: id: "1" body: > diff --git a/server/src/main/java/org/opensearch/common/hash/MessageDigests.java b/server/src/main/java/org/opensearch/common/hash/MessageDigests.java index f53f60a3a97a3..123bd3489bedb 100644 --- a/server/src/main/java/org/opensearch/common/hash/MessageDigests.java +++ b/server/src/main/java/org/opensearch/common/hash/MessageDigests.java @@ -58,6 +58,7 @@ private static ThreadLocal createThreadLocalMessageDigest(String private static final ThreadLocal MD5_DIGEST = createThreadLocalMessageDigest("MD5"); private static final ThreadLocal SHA_1_DIGEST = createThreadLocalMessageDigest("SHA-1"); private static final ThreadLocal SHA_256_DIGEST = createThreadLocalMessageDigest("SHA-256"); + private static final ThreadLocal SHA3_256_DIGEST = createThreadLocalMessageDigest("SHA3-256"); /** * Returns a {@link MessageDigest} instance for MD5 digests; note @@ -95,6 +96,18 @@ public static MessageDigest sha256() { return get(SHA_256_DIGEST); } + /** + * Returns a {@link MessageDigest} instance for SHA3-256 digests; + * note that the instance returned is thread local and must not be + * shared amongst threads. + * + * @return a thread local {@link MessageDigest} instance that + * provides SHA3-256 message digest functionality. + */ + public static MessageDigest sha3256() { + return get(SHA3_256_DIGEST); + } + private static MessageDigest get(ThreadLocal messageDigest) { MessageDigest instance = messageDigest.get(); instance.reset(); diff --git a/server/src/test/java/org/opensearch/common/hash/MessageDigestsTests.java b/server/src/test/java/org/opensearch/common/hash/MessageDigestsTests.java index 9e793e5487eb8..6b7cfb4c8932c 100644 --- a/server/src/test/java/org/opensearch/common/hash/MessageDigestsTests.java +++ b/server/src/test/java/org/opensearch/common/hash/MessageDigestsTests.java @@ -91,6 +91,31 @@ public void testSha256() throws Exception { ); } + public void testSha3256() throws Exception { + assertHash("a7ffc6f8bf1ed76651c14756a061d662f580ff4de43b49fa82d80a4b80f8434a", "", MessageDigests.sha3256()); + assertHash("3a985da74fe225b2045c172d6bd390bd855f086e3e9d525b46bfe24511431532", "abc", MessageDigests.sha3256()); + assertHash( + "41c0dba2a9d6240849100376a8235e2c82e1b9998a999e21db32dd97496d3376", + "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", + MessageDigests.sha3256() + ); + assertHash( + "5c8875ae474a3634ba4fd55ec85bffd661f32aca75c6d699d0cdcb6c115891c1", + new String(new char[1000000]).replace("\0", "a"), + MessageDigests.sha3256() + ); + assertHash( + "69070dda01975c8c120c3aada1b282394e7f032fa9cf32f4cb2259a0897dfc04", + "The quick brown fox jumps over the lazy dog", + MessageDigests.sha3256() + ); + assertHash( + "cc80b0b13ba89613d93f02ee7ccbe72ee26c6edfe577f22e63a1380221caedbc", + "The quick brown fox jumps over the lazy cog", + MessageDigests.sha3256() + ); + } + public void testToHexString() throws Exception { BigInteger expected = BigInteger.probablePrime(256, random()); byte[] bytes = expected.toByteArray(); From cd7fcdc54b5daef06afc280803aa8342a1df2736 Mon Sep 17 00:00:00 2001 From: Gao Binlong Date: Wed, 22 May 2024 18:11:23 +0800 Subject: [PATCH 04/14] Remove unused code Signed-off-by: Gao Binlong --- .../java/org/opensearch/ingest/common/FingerprintProcessor.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java index a652aaa1890fc..a471850d8131c 100644 --- a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java +++ b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java @@ -157,8 +157,6 @@ public IngestDocument execute(IngestDocument document) { } concatenatedFields.append("|"); - System.out.println(concatenatedFields); - MessageDigest messageDigest = HashMethod.fromMethodName(hashMethod); assert (messageDigest != null); messageDigest.update(concatenatedFields.toString().getBytes(StandardCharsets.UTF_8)); From 614ba60c7a4f82d2978d31cf7ae62c17b182836d Mon Sep 17 00:00:00 2001 From: Gao Binlong Date: Tue, 4 Jun 2024 21:01:39 +0800 Subject: [PATCH 05/14] Add exclude_fields and remove include_all_fields Signed-off-by: Gao Binlong --- .../ingest/common/FingerprintProcessor.java | 58 +++++----- .../FingerprintProcessorFactoryTests.java | 36 +++---- .../common/FingerprintProcessorTests.java | 83 ++++++++------ .../test/ingest/340_fingerprint_processor.yml | 101 +++++++++++++++--- 4 files changed, 180 insertions(+), 98 deletions(-) diff --git a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java index a471850d8131c..35800c540966f 100644 --- a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java +++ b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java @@ -38,8 +38,8 @@ public class FingerprintProcessor extends AbstractProcessor { // fields used to generate hash value private final List fields; - // whether generate hash value for all fields in the document or not - private final boolean includeAllFields; + // all other fields other than the excluded fields are used to generate hash value + private final List excludeFields; // the target field to store the hash value, defaults to fingerprint private final String targetField; // hash method used to generate the hash value, defaults to SHA-1 @@ -50,31 +50,29 @@ public class FingerprintProcessor extends AbstractProcessor { String tag, String description, @Nullable List fields, - boolean includeAllFields, + @Nullable List excludeFields, String targetField, String hashMethod, boolean ignoreMissing ) { super(tag, description); - if (fields != null) { - if (fields.isEmpty()) { - throw new IllegalArgumentException("fields cannot be empty"); - } + if (fields != null && !fields.isEmpty()) { if (fields.stream().anyMatch(Objects::isNull)) { throw new IllegalArgumentException("field path cannot be null nor empty"); } - if (includeAllFields) { - throw new IllegalArgumentException("either fields or include_all_fields can be set"); + if (excludeFields != null && !excludeFields.isEmpty()) { + throw new IllegalArgumentException("either fields or exclude_fields can be set"); } - } else if (!includeAllFields) { - throw new IllegalArgumentException("either fields or include_all_fields must be set"); + } + if (excludeFields != null && !excludeFields.isEmpty() && excludeFields.stream().anyMatch(Objects::isNull)) { + throw new IllegalArgumentException("field path cannot be null nor empty"); } if (!HASH_METHODS.contains(hashMethod.toUpperCase(Locale.ROOT))) { throw new IllegalArgumentException("hash method must be MD5, SHA-1 or SHA-256 or SHA3-256"); } this.fields = fields; - this.includeAllFields = includeAllFields; + this.excludeFields = excludeFields; this.targetField = targetField; this.hashMethod = hashMethod; this.ignoreMissing = ignoreMissing; @@ -84,8 +82,8 @@ public List getFields() { return fields; } - public boolean getIncludeAllFields() { - return includeAllFields; + public List getExcludeFields() { + return excludeFields; } public String getTargetField() { @@ -104,21 +102,26 @@ public boolean isIgnoreMissing() { public IngestDocument execute(IngestDocument document) { // we should deduplicate and sort the field names to make sure we can get consistent hash value final List sortedFields; + Set existingFields = new HashSet<>(document.getSourceAndMetadata().keySet()); Set metadataFields = document.getMetadata() .keySet() .stream() .map(IngestDocument.Metadata::getFieldName) .collect(Collectors.toSet()); // metadata fields such as _index, _id and _routing are ignored - if (includeAllFields) { - Set existingFields = new HashSet<>(document.getSourceAndMetadata().keySet()); - sortedFields = existingFields.stream().filter(field -> !metadataFields.contains(field)).sorted().collect(Collectors.toList()); - } else { + if (fields != null && !fields.isEmpty()) { sortedFields = fields.stream() .distinct() .filter(field -> !metadataFields.contains(field)) .sorted() .collect(Collectors.toList()); + } else if (excludeFields != null && !excludeFields.isEmpty()) { + sortedFields = existingFields.stream() + .filter(field -> !metadataFields.contains(field) && !excludeFields.contains(field)) + .sorted() + .collect(Collectors.toList()); + } else { + sortedFields = existingFields.stream().filter(field -> !metadataFields.contains(field)).sorted().collect(Collectors.toList()); } assert (!sortedFields.isEmpty()); @@ -242,19 +245,18 @@ public FingerprintProcessor create( Map config ) throws Exception { List fields = ConfigurationUtils.readOptionalList(TYPE, processorTag, config, "fields"); - boolean includeAllFields = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "include_all_fields", false); - if (fields != null) { - if (fields.isEmpty()) { - throw newConfigurationException(TYPE, processorTag, "fields", "fields cannot be empty"); - } + List excludeFields = ConfigurationUtils.readOptionalList(TYPE, processorTag, config, "exclude_fields"); + if (fields != null && !fields.isEmpty()) { + if (fields.stream().anyMatch(Objects::isNull)) { throw newConfigurationException(TYPE, processorTag, "fields", "field path cannot be null nor empty"); } - if (includeAllFields) { - throw newConfigurationException(TYPE, processorTag, "fields", "either fields or include_all_fields can be set"); + if (excludeFields != null && !excludeFields.isEmpty()) { + throw newConfigurationException(TYPE, processorTag, "fields", "either fields or exclude_fields can be set"); } - } else if (!includeAllFields) { - throw newConfigurationException(TYPE, processorTag, "fields", "either fields or include_all_fields must be set"); + } + if (excludeFields != null && !excludeFields.isEmpty() && excludeFields.stream().anyMatch(Objects::isNull)) { + throw newConfigurationException(TYPE, processorTag, "exclude_fields", "field path cannot be null nor empty"); } String targetField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "target_field", "fingerprint"); @@ -263,7 +265,7 @@ public FingerprintProcessor create( throw newConfigurationException(TYPE, processorTag, "hash_method", "hash method must be MD5, SHA-1, SHA-256 or SHA3-256"); } boolean ignoreMissing = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false); - return new FingerprintProcessor(processorTag, description, fields, includeAllFields, targetField, hashMethod, ignoreMissing); + return new FingerprintProcessor(processorTag, description, fields, excludeFields, targetField, hashMethod, ignoreMissing); } } } diff --git a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorFactoryTests.java b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorFactoryTests.java index c340035905f0d..ce4b798be888e 100644 --- a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorFactoryTests.java +++ b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorFactoryTests.java @@ -13,7 +13,6 @@ import org.junit.Before; import java.util.ArrayList; -import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -35,11 +34,13 @@ public void testCreate() throws Exception { boolean includeAllFields = randomBoolean(); List fields = null; - if (!includeAllFields) { + List excludeFields = null; + if (randomBoolean()) { fields = List.of(randomAlphaOfLength(10)); config.put("fields", fields); } else { - config.put("include_all_fields", true); + excludeFields = List.of(randomAlphaOfLength(10)); + config.put("exclude_fields", excludeFields); } String targetField = null; @@ -54,47 +55,42 @@ public void testCreate() throws Exception { FingerprintProcessor fingerprintProcessor = factory.create(null, processorTag, null, config); assertThat(fingerprintProcessor.getTag(), equalTo(processorTag)); assertThat(fingerprintProcessor.getFields(), equalTo(fields)); - assertThat(fingerprintProcessor.getIncludeAllFields(), equalTo(includeAllFields)); + assertThat(fingerprintProcessor.getExcludeFields(), equalTo(excludeFields)); assertThat(fingerprintProcessor.getTargetField(), equalTo(Objects.requireNonNullElse(targetField, "fingerprint"))); assertThat(fingerprintProcessor.isIgnoreMissing(), equalTo(ignoreMissing)); } public void testCreateWithFields() throws Exception { Map config = new HashMap<>(); + config.put("fields", List.of(randomAlphaOfLength(10))); + config.put("exclude_fields", List.of(randomAlphaOfLength(10))); try { factory.create(null, null, null, config); fail("factory create should have failed"); } catch (OpenSearchParseException e) { - assertThat(e.getMessage(), equalTo("[fields] either fields or include_all_fields must be set")); - } - - config.put("fields", Collections.emptyList()); - try { - factory.create(null, null, null, config); - fail("factory create should have failed"); - } catch (OpenSearchParseException e) { - assertThat(e.getMessage(), equalTo("[fields] fields cannot be empty")); + assertThat(e.getMessage(), equalTo("[fields] either fields or exclude_fields can be set")); } config = new HashMap<>(); - config.put("fields", List.of(randomAlphaOfLength(10))); - config.put("include_all_fields", true); + List fields = new ArrayList<>(); + fields.add(null); + config.put("fields", fields); try { factory.create(null, null, null, config); fail("factory create should have failed"); } catch (OpenSearchParseException e) { - assertThat(e.getMessage(), equalTo("[fields] either fields or include_all_fields can be set")); + assertThat(e.getMessage(), equalTo("[fields] field path cannot be null nor empty")); } config = new HashMap<>(); - List fields = new ArrayList<>(); - fields.add(null); - config.put("fields", fields); + List excludeFields = new ArrayList<>(); + excludeFields.add(null); + config.put("exclude_fields", excludeFields); try { factory.create(null, null, null, config); fail("factory create should have failed"); } catch (OpenSearchParseException e) { - assertThat(e.getMessage(), equalTo("[fields] field path cannot be null nor empty")); + assertThat(e.getMessage(), equalTo("[exclude_fields] field path cannot be null nor empty")); } } diff --git a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorTests.java b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorTests.java index e985e34b707a7..be2350cf81d31 100644 --- a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorTests.java +++ b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorTests.java @@ -26,15 +26,18 @@ public class FingerprintProcessorTests extends OpenSearchTestCase { public void testGenerateFingerprint() throws Exception { IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random()); List fields = null; - boolean includeAllFields = false; + List excludeFields = null; if (randomBoolean()) { - includeAllFields = true; - - } else { fields = new ArrayList<>(); for (int i = 0; i < randomIntBetween(1, 10); i++) { fields.add(RandomDocumentPicks.addRandomField(random(), ingestDocument, randomAlphaOfLength(10))); } + + } else { + excludeFields = new ArrayList<>(); + for (int i = 0; i < randomIntBetween(1, 10); i++) { + excludeFields.add(RandomDocumentPicks.addRandomField(random(), ingestDocument, randomAlphaOfLength(10))); + } } String targetField = "fingerprint"; @@ -43,62 +46,76 @@ public void testGenerateFingerprint() throws Exception { } String hashMethod = randomFrom(hashMethods); - Processor processor = createFingerprintProcessor(fields, includeAllFields, targetField, hashMethod, false); + Processor processor = createFingerprintProcessor(fields, excludeFields, targetField, hashMethod, false); processor.execute(ingestDocument); assertThat(ingestDocument.hasField(targetField), equalTo(true)); } public void testCreateFingerprintProcessorFailed() { - assertThrows( - "fields cannot be empty", - IllegalArgumentException.class, - () -> createFingerprintProcessor(Collections.emptyList(), false, "fingerprint", randomFrom(hashMethods), false) - ); - List fields = new ArrayList<>(); fields.add(null); fields.add(randomAlphaOfLength(10)); + assertThrows( "field path cannot be null nor empty", IllegalArgumentException.class, - () -> createFingerprintProcessor(fields, false, null, randomFrom(List.of("MD5", "SHA-1", "SHA-256", "SHA3-256")), false) + () -> createFingerprintProcessor(fields, null, null, randomFrom(List.of("MD5", "SHA-1", "SHA-256", "SHA3-256")), false) ); + List excludeFields = new ArrayList<>(); + excludeFields.add(null); + excludeFields.add(randomAlphaOfLength(10)); + assertThrows( - "hash method must be MD5, SHA-1, SHA-256 or SHA3-256", + "field path cannot be null nor empty", IllegalArgumentException.class, - () -> createFingerprintProcessor(Collections.emptyList(), false, "fingerprint", randomFrom(hashMethods), false) + () -> createFingerprintProcessor(null, excludeFields, null, randomFrom(List.of("MD5", "SHA-1", "SHA-256", "SHA3-256")), false) ); assertThrows( - "either fields or include_all_fields can be set", + "either fields or exclude_fields can be set", IllegalArgumentException.class, - () -> createFingerprintProcessor(Collections.emptyList(), true, "fingerprint", randomFrom(hashMethods), false) + () -> createFingerprintProcessor( + List.of(randomAlphaOfLength(10)), + List.of(randomAlphaOfLength(10)), + null, + randomFrom(List.of("MD5", "SHA-1", "SHA-256", "SHA3-256")), + false + ) ); assertThrows( - "either fields or include_all_fields must be set", + "hash method must be MD5, SHA-1, SHA-256 or SHA3-256", IllegalArgumentException.class, - () -> createFingerprintProcessor(null, false, "fingerprint", randomFrom(hashMethods), false) + () -> createFingerprintProcessor(Collections.emptyList(), null, "fingerprint", randomAlphaOfLength(10), false) ); } - public void testIncludeAllFields() { - List fields = new ArrayList<>(); - fields.add(null); - fields.add(randomAlphaOfLength(10)); - assertThrows( - "field path cannot be null nor empty", - IllegalArgumentException.class, - () -> createFingerprintProcessor(fields, false, null, randomFrom(hashMethods), false) - ); + public void testEmptyFieldAndExcludeFields() throws Exception { + IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random()); + List fields = null; + List excludeFields = null; + if (randomBoolean()) { + fields = new ArrayList<>(); + } else { + excludeFields = new ArrayList<>(); + } + String targetField = "fingerprint"; + if (randomBoolean()) { + targetField = randomAlphaOfLength(10); + } + + String hashMethod = randomFrom(hashMethods); + Processor processor = createFingerprintProcessor(fields, excludeFields, targetField, hashMethod, false); + processor.execute(ingestDocument); + assertThat(ingestDocument.hasField(targetField), equalTo(true)); } public void testIgnoreMissing() throws Exception { IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random()); String nonExistingFieldName = RandomDocumentPicks.randomNonExistingFieldName(random(), ingestDocument); List nonExistingFields = List.of(nonExistingFieldName); - Processor processor = createFingerprintProcessor(nonExistingFields, false, "fingerprint", randomFrom(hashMethods), false); + Processor processor = createFingerprintProcessor(nonExistingFields, null, "fingerprint", randomFrom(hashMethods), false); assertThrows( "field [" + nonExistingFieldName + "] doesn't exist", IllegalArgumentException.class, @@ -108,7 +125,7 @@ public void testIgnoreMissing() throws Exception { String targetField = "fingerprint"; Processor processorWithIgnoreMissing = createFingerprintProcessor( nonExistingFields, - false, + null, "fingerprint", randomFrom(hashMethods), true @@ -130,23 +147,23 @@ public void testIgnoreMetadataFields() throws Exception { String targetField = "fingerprint"; String algorithm = randomFrom(hashMethods); - Processor processor = createFingerprintProcessor(fields, false, targetField, algorithm, false); + Processor processor = createFingerprintProcessor(fields, null, targetField, algorithm, false); processor.execute(ingestDocument); String fingerprint = ingestDocument.getFieldValue(targetField, String.class); - processor = createFingerprintProcessor(List.of(existingFieldName), false, targetField, algorithm, false); + processor = createFingerprintProcessor(List.of(existingFieldName), null, targetField, algorithm, false); processor.execute(ingestDocument); assertThat(ingestDocument.getFieldValue(targetField, String.class), equalTo(fingerprint)); } private FingerprintProcessor createFingerprintProcessor( List fields, - boolean includeAllFields, + List excludeFields, String targetField, String hashMethod, boolean ignoreMissing ) { - return new FingerprintProcessor(randomAlphaOfLength(10), null, fields, includeAllFields, targetField, hashMethod, ignoreMissing); + return new FingerprintProcessor(randomAlphaOfLength(10), null, fields, excludeFields, targetField, hashMethod, ignoreMissing); } } diff --git a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/340_fingerprint_processor.yml b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/340_fingerprint_processor.yml index 11be884a961bd..163af9ac85088 100644 --- a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/340_fingerprint_processor.yml +++ b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/340_fingerprint_processor.yml @@ -11,7 +11,7 @@ teardown: version: " - 2.14.99" reason: "introduced in 2.15" - do: - catch: /\[fields\] either fields or include_all_fields must be set/ + catch: /field path cannot be null nor empty/ ingest.put_pipeline: id: "1" body: > @@ -19,13 +19,13 @@ teardown: "processors": [ { "fingerprint" : { + "fields": [null] } } ] } - - do: - catch: /\[fields\] either fields or include_all_fields can be set/ + catch: /field path cannot be null nor empty/ ingest.put_pipeline: id: "1" body: > @@ -33,15 +33,13 @@ teardown: "processors": [ { "fingerprint" : { - "fields" : ["foo"], - "include_all_fields" : true + "exclude_fields": [null] } } ] } - - do: - catch: /field path cannot be null nor empty/ + catch: /either fields or exclude\_fields can be set/ ingest.put_pipeline: id: "1" body: > @@ -49,14 +47,15 @@ teardown: "processors": [ { "fingerprint" : { - "fields": [null] + "fields": ["foo"], + "exclude_fields": ["bar"] } } ] } - do: - catch: /fields cannot be empty/ + catch: /hash method must be MD5\, SHA\-1, SHA\-256 or SHA3\-256/ ingest.put_pipeline: id: "1" body: > @@ -64,14 +63,14 @@ teardown: "processors": [ { "fingerprint" : { - "fields": [] + "fields": ["foo"], + "hash_method": "non-existing" } } ] } - do: - catch: /hash method must be MD5\, SHA\-1, SHA\-256 or SHA3\-256/ ingest.put_pipeline: id: "1" body: > @@ -79,12 +78,14 @@ teardown: "processors": [ { "fingerprint" : { - "fields": ["foo"], - "hash_method": "non-existing" + "fields" : ["foo"], + "target_field" : "fingerprint_field", + "hash_method": "SHA-256" } } ] } + - match: { acknowledged: true } - do: ingest.put_pipeline: @@ -94,9 +95,6 @@ teardown: "processors": [ { "fingerprint" : { - "fields" : ["foo"], - "target_field" : "fingerprint_field", - "hash_method": "SHA-256" } } ] @@ -246,7 +244,6 @@ teardown: "processors": [ { "fingerprint" : { - "include_all_fields" : true } } ] @@ -272,3 +269,73 @@ teardown: index: test id: 2 - match: { _source.fingerprint: "R/ZOjN9U+AVLTxjAB8b8A5pbSyM=" } + + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "fields":[] + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 3 + pipeline: "1" + body: { + foo: [1, 2, 3], + bar: { + field: { + innerField: "inner" + } + }, + zoo: null + } + - do: + get: + index: test + id: 3 + - match: { _source.fingerprint: "R/ZOjN9U+AVLTxjAB8b8A5pbSyM=" } + + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "exclude_fields":[] + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 4 + pipeline: "1" + body: { + foo: [1, 2, 3], + bar: { + field: { + innerField: "inner" + } + }, + zoo: null + } + - do: + get: + index: test + id: 4 + - match: { _source.fingerprint: "R/ZOjN9U+AVLTxjAB8b8A5pbSyM=" } From 2d1c6bcbb7d4559d6718568f7560b5b1b42a5bba Mon Sep 17 00:00:00 2001 From: Gao Binlong Date: Tue, 4 Jun 2024 21:37:31 +0800 Subject: [PATCH 06/14] Modify processor description Signed-off-by: Gao Binlong --- .../java/org/opensearch/ingest/common/FingerprintProcessor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java index ba7856674cf58..8899cada9e8b3 100644 --- a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java +++ b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java @@ -30,7 +30,7 @@ import static org.opensearch.ingest.ConfigurationUtils.newConfigurationException; /** - * Processor that generating hash value for the specified fields or all fields in a document + * Processor that generating hash value for the specified fields or fields not in the specified excluded list */ public class FingerprintProcessor extends AbstractProcessor { public static final String TYPE = "fingerprint"; From 7cad2e0052cdbf0e2c4d52cc76fe98d07ee8c5ef Mon Sep 17 00:00:00 2001 From: Gao Binlong Date: Wed, 5 Jun 2024 22:34:21 +0800 Subject: [PATCH 07/14] Make FingerprintProcessor being final Signed-off-by: Gao Binlong --- .../java/org/opensearch/ingest/common/FingerprintProcessor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java index 8899cada9e8b3..c4f8209c4d07a 100644 --- a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java +++ b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java @@ -32,7 +32,7 @@ /** * Processor that generating hash value for the specified fields or fields not in the specified excluded list */ -public class FingerprintProcessor extends AbstractProcessor { +public final class FingerprintProcessor extends AbstractProcessor { public static final String TYPE = "fingerprint"; private static final Set HASH_METHODS = Set.of("MD5", "SHA-1", "SHA-256", "SHA3-256"); From cff2bcf08c1a893249fcb82eff1718f32239b24e Mon Sep 17 00:00:00 2001 From: Gao Binlong Date: Thu, 6 Jun 2024 10:54:29 +0800 Subject: [PATCH 08/14] Optimize error message and check if field name is empty string Signed-off-by: Gao Binlong --- .../ingest/common/FingerprintProcessor.java | 19 +++++++++---------- .../FingerprintProcessorFactoryTests.java | 17 ++++++++++++----- .../common/FingerprintProcessorTests.java | 16 ++++++++++++---- .../test/ingest/340_fingerprint_processor.yml | 6 +++--- 4 files changed, 36 insertions(+), 22 deletions(-) diff --git a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java index c4f8209c4d07a..9850dab35284b 100644 --- a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java +++ b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java @@ -10,6 +10,7 @@ import org.opensearch.common.Nullable; import org.opensearch.common.hash.MessageDigests; +import org.opensearch.core.common.Strings; import org.opensearch.ingest.AbstractProcessor; import org.opensearch.ingest.ConfigurationUtils; import org.opensearch.ingest.IngestDocument; @@ -23,7 +24,6 @@ import java.util.List; import java.util.Locale; import java.util.Map; -import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; @@ -57,15 +57,15 @@ public final class FingerprintProcessor extends AbstractProcessor { ) { super(tag, description); if (fields != null && !fields.isEmpty()) { - if (fields.stream().anyMatch(Objects::isNull)) { - throw new IllegalArgumentException("field path cannot be null nor empty"); + if (fields.stream().anyMatch(Strings::isNullOrEmpty)) { + throw new IllegalArgumentException("field name in [fields] cannot be null nor empty"); } if (excludeFields != null && !excludeFields.isEmpty()) { throw new IllegalArgumentException("either fields or exclude_fields can be set"); } } - if (excludeFields != null && !excludeFields.isEmpty() && excludeFields.stream().anyMatch(Objects::isNull)) { - throw new IllegalArgumentException("field path cannot be null nor empty"); + if (excludeFields != null && !excludeFields.isEmpty() && excludeFields.stream().anyMatch(Strings::isNullOrEmpty)) { + throw new IllegalArgumentException("field name in [exclude_fields] cannot be null nor empty"); } if (!HASH_METHODS.contains(hashMethod.toUpperCase(Locale.ROOT))) { @@ -247,16 +247,15 @@ public FingerprintProcessor create( List fields = ConfigurationUtils.readOptionalList(TYPE, processorTag, config, "fields"); List excludeFields = ConfigurationUtils.readOptionalList(TYPE, processorTag, config, "exclude_fields"); if (fields != null && !fields.isEmpty()) { - - if (fields.stream().anyMatch(Objects::isNull)) { - throw newConfigurationException(TYPE, processorTag, "fields", "field path cannot be null nor empty"); + if (fields.stream().anyMatch(Strings::isNullOrEmpty)) { + throw newConfigurationException(TYPE, processorTag, "fields", "field name cannot be null nor empty"); } if (excludeFields != null && !excludeFields.isEmpty()) { throw newConfigurationException(TYPE, processorTag, "fields", "either fields or exclude_fields can be set"); } } - if (excludeFields != null && !excludeFields.isEmpty() && excludeFields.stream().anyMatch(Objects::isNull)) { - throw newConfigurationException(TYPE, processorTag, "exclude_fields", "field path cannot be null nor empty"); + if (excludeFields != null && !excludeFields.isEmpty() && excludeFields.stream().anyMatch(Strings::isNullOrEmpty)) { + throw newConfigurationException(TYPE, processorTag, "exclude_fields", "field name cannot be null nor empty"); } String targetField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "target_field", "fingerprint"); diff --git a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorFactoryTests.java b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorFactoryTests.java index ce4b798be888e..9097429cd72c5 100644 --- a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorFactoryTests.java +++ b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorFactoryTests.java @@ -32,7 +32,6 @@ public void init() { public void testCreate() throws Exception { Map config = new HashMap<>(); - boolean includeAllFields = randomBoolean(); List fields = null; List excludeFields = null; if (randomBoolean()) { @@ -73,24 +72,32 @@ public void testCreateWithFields() throws Exception { config = new HashMap<>(); List fields = new ArrayList<>(); - fields.add(null); + if (randomBoolean()) { + fields.add(null); + } else { + fields.add(""); + } config.put("fields", fields); try { factory.create(null, null, null, config); fail("factory create should have failed"); } catch (OpenSearchParseException e) { - assertThat(e.getMessage(), equalTo("[fields] field path cannot be null nor empty")); + assertThat(e.getMessage(), equalTo("[fields] field name cannot be null nor empty")); } config = new HashMap<>(); List excludeFields = new ArrayList<>(); - excludeFields.add(null); + if (randomBoolean()) { + excludeFields.add(null); + } else { + excludeFields.add(""); + } config.put("exclude_fields", excludeFields); try { factory.create(null, null, null, config); fail("factory create should have failed"); } catch (OpenSearchParseException e) { - assertThat(e.getMessage(), equalTo("[exclude_fields] field path cannot be null nor empty")); + assertThat(e.getMessage(), equalTo("[exclude_fields] field name cannot be null nor empty")); } } diff --git a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorTests.java b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorTests.java index be2350cf81d31..2c1bbc13327f6 100644 --- a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorTests.java +++ b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorTests.java @@ -53,21 +53,29 @@ public void testGenerateFingerprint() throws Exception { public void testCreateFingerprintProcessorFailed() { List fields = new ArrayList<>(); - fields.add(null); + if (randomBoolean()) { + fields.add(null); + } else { + fields.add(""); + } fields.add(randomAlphaOfLength(10)); assertThrows( - "field path cannot be null nor empty", + "field name in [fields] cannot be null nor empty", IllegalArgumentException.class, () -> createFingerprintProcessor(fields, null, null, randomFrom(List.of("MD5", "SHA-1", "SHA-256", "SHA3-256")), false) ); List excludeFields = new ArrayList<>(); - excludeFields.add(null); + if (randomBoolean()) { + excludeFields.add(null); + } else { + excludeFields.add(""); + } excludeFields.add(randomAlphaOfLength(10)); assertThrows( - "field path cannot be null nor empty", + "field name in [exclude_fields] cannot be null nor empty", IllegalArgumentException.class, () -> createFingerprintProcessor(null, excludeFields, null, randomFrom(List.of("MD5", "SHA-1", "SHA-256", "SHA3-256")), false) ); diff --git a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/340_fingerprint_processor.yml b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/340_fingerprint_processor.yml index 163af9ac85088..abd18902bffab 100644 --- a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/340_fingerprint_processor.yml +++ b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/340_fingerprint_processor.yml @@ -11,7 +11,7 @@ teardown: version: " - 2.14.99" reason: "introduced in 2.15" - do: - catch: /field path cannot be null nor empty/ + catch: /field name in \[fields\] cannot be null nor empty/ ingest.put_pipeline: id: "1" body: > @@ -25,7 +25,7 @@ teardown: ] } - do: - catch: /field path cannot be null nor empty/ + catch: /field name in \[exclude_fields\] cannot be null nor empty/ ingest.put_pipeline: id: "1" body: > @@ -33,7 +33,7 @@ teardown: "processors": [ { "fingerprint" : { - "exclude_fields": [null] + "exclude_fields": [""] } } ] From 4447c9cf39e8d20cadcb0989c6dcd7541bd39b7a Mon Sep 17 00:00:00 2001 From: Gao Binlong Date: Thu, 6 Jun 2024 11:29:13 +0800 Subject: [PATCH 09/14] Fix yaml test failure Signed-off-by: Gao Binlong --- .../rest-api-spec/test/ingest/340_fingerprint_processor.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/340_fingerprint_processor.yml b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/340_fingerprint_processor.yml index abd18902bffab..a1296656c6dff 100644 --- a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/340_fingerprint_processor.yml +++ b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/340_fingerprint_processor.yml @@ -11,7 +11,7 @@ teardown: version: " - 2.14.99" reason: "introduced in 2.15" - do: - catch: /field name in \[fields\] cannot be null nor empty/ + catch: /field name cannot be null nor empty/ ingest.put_pipeline: id: "1" body: > @@ -25,7 +25,7 @@ teardown: ] } - do: - catch: /field name in \[exclude_fields\] cannot be null nor empty/ + catch: /field name cannot be null nor empty/ ingest.put_pipeline: id: "1" body: > From fb74e64a4b69a20a48cbb8a0f8a2584c0faf8104 Mon Sep 17 00:00:00 2001 From: Gao Binlong Date: Fri, 7 Jun 2024 15:42:05 +0800 Subject: [PATCH 10/14] Prepend string length to the field value Signed-off-by: Gao Binlong --- .../ingest/common/FingerprintProcessor.java | 25 ++++++++++--------- .../test/ingest/340_fingerprint_processor.yml | 12 ++++----- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java index 9850dab35284b..6f2d1e3408f2c 100644 --- a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java +++ b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java @@ -139,19 +139,20 @@ public IngestDocument execute(IngestDocument document) { if (value instanceof Map) { @SuppressWarnings("unchecked") Map flattenedMap = toFlattenedMap((Map) value); - flattenedMap.entrySet() - .stream() - .sorted(Map.Entry.comparingByKey()) - .forEach( - entry -> concatenatedFields.append("|") - .append(field) - .append(".") - .append(entry.getKey()) - .append("|") - .append(entry.getValue()) - ); + flattenedMap.entrySet().stream().sorted(Map.Entry.comparingByKey()).forEach(entry -> { + String fieldValue = String.valueOf(entry.getValue()); + concatenatedFields.append("|") + .append(field) + .append(".") + .append(entry.getKey()) + .append("|") + .append(fieldValue.length()) + .append(":") + .append(fieldValue); + }); } else { - concatenatedFields.append("|").append(field).append("|").append(value); + String fieldValue = String.valueOf(value); + concatenatedFields.append("|").append(field).append("|").append(fieldValue.length()).append(":").append(fieldValue); } }); // if all specified fields don't exist and ignore_missing is true, then do nothing diff --git a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/340_fingerprint_processor.yml b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/340_fingerprint_processor.yml index a1296656c6dff..ddd2c514f2aa4 100644 --- a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/340_fingerprint_processor.yml +++ b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/340_fingerprint_processor.yml @@ -159,7 +159,7 @@ teardown: get: index: test id: 1 - - match: { _source.fingerprint: "w1axmYeYkdIEZMKxybhjOEuBFxA=" } + - match: { _source.fingerprint: "YqpBTuHXCPV04j/7lGfWeUl8Tyo=" } --- "Test fingerprint processor with custom target field": @@ -194,7 +194,7 @@ teardown: get: index: test id: 1 - - match: { _source.target: "w1axmYeYkdIEZMKxybhjOEuBFxA=" } + - match: { _source.target: "YqpBTuHXCPV04j/7lGfWeUl8Tyo=" } --- "Test fingerprint processor with non-primitive fields": @@ -234,7 +234,7 @@ teardown: get: index: test id: 1 - - match: { _source.fingerprint: "R/ZOjN9U+AVLTxjAB8b8A5pbSyM=" } + - match: { _source.fingerprint: "KYJ4pc4ouFmAbgZGp7CfNoykZeo=" } - do: ingest.put_pipeline: @@ -268,7 +268,7 @@ teardown: get: index: test id: 2 - - match: { _source.fingerprint: "R/ZOjN9U+AVLTxjAB8b8A5pbSyM=" } + - match: { _source.fingerprint: "KYJ4pc4ouFmAbgZGp7CfNoykZeo=" } - do: ingest.put_pipeline: @@ -303,7 +303,7 @@ teardown: get: index: test id: 3 - - match: { _source.fingerprint: "R/ZOjN9U+AVLTxjAB8b8A5pbSyM=" } + - match: { _source.fingerprint: "KYJ4pc4ouFmAbgZGp7CfNoykZeo=" } - do: ingest.put_pipeline: @@ -338,4 +338,4 @@ teardown: get: index: test id: 4 - - match: { _source.fingerprint: "R/ZOjN9U+AVLTxjAB8b8A5pbSyM=" } + - match: { _source.fingerprint: "KYJ4pc4ouFmAbgZGp7CfNoykZeo=" } From 965303b6ece345c7ce755a8e869bc16c259992a1 Mon Sep 17 00:00:00 2001 From: Gao Binlong Date: Thu, 13 Jun 2024 13:22:19 +0800 Subject: [PATCH 11/14] Append hash method with version number Signed-off-by: Gao Binlong --- .../ingest/common/FingerprintProcessor.java | 21 ++++++++++++------- .../FingerprintProcessorFactoryTests.java | 5 ++++- .../common/FingerprintProcessorTests.java | 11 +++++----- .../test/ingest/340_fingerprint_processor.yml | 20 +++++++++--------- 4 files changed, 32 insertions(+), 25 deletions(-) diff --git a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java index 6f2d1e3408f2c..6348a2ddb8279 100644 --- a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java +++ b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java @@ -34,7 +34,7 @@ */ public final class FingerprintProcessor extends AbstractProcessor { public static final String TYPE = "fingerprint"; - private static final Set HASH_METHODS = Set.of("MD5", "SHA-1", "SHA-256", "SHA3-256"); + private static final Set HASH_METHODS = Set.of("MD5@2.16.0", "SHA-1@2.16.0", "SHA-256@2.16.0", "SHA3-256@2.16.0"); // fields used to generate hash value private final List fields; @@ -69,7 +69,7 @@ public final class FingerprintProcessor extends AbstractProcessor { } if (!HASH_METHODS.contains(hashMethod.toUpperCase(Locale.ROOT))) { - throw new IllegalArgumentException("hash method must be MD5, SHA-1 or SHA-256 or SHA3-256"); + throw new IllegalArgumentException("hash method must be MD5@2.16.0, SHA-1@2.16.0 or SHA-256@2.16.0 or SHA3-256@2.16.0"); } this.fields = fields; this.excludeFields = excludeFields; @@ -223,13 +223,13 @@ enum HashMethod { public static MessageDigest fromMethodName(String methodName) { String name = methodName.toUpperCase(Locale.ROOT); switch (name) { - case "MD5": + case "MD5@2.16.0": return MD5.messageDigest; - case "SHA-1": + case "SHA-1@2.16.0": return SHA1.messageDigest; - case "SHA-256": + case "SHA-256@2.16.0": return SHA256.messageDigest; - case "SHA3-256": + case "SHA3-256@2.16.0": return SHA3256.messageDigest; default: return null; @@ -260,9 +260,14 @@ public FingerprintProcessor create( } String targetField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "target_field", "fingerprint"); - String hashMethod = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "hash_method", "SHA-1"); + String hashMethod = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "hash_method", "SHA-1@2.16.0"); if (!HASH_METHODS.contains(hashMethod.toUpperCase(Locale.ROOT))) { - throw newConfigurationException(TYPE, processorTag, "hash_method", "hash method must be MD5, SHA-1, SHA-256 or SHA3-256"); + throw newConfigurationException( + TYPE, + processorTag, + "hash_method", + "hash method must be MD5@2.16.0, SHA-1@2.16.0, SHA-256@2.16.0 or SHA3-256@2.16.0" + ); } boolean ignoreMissing = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false); return new FingerprintProcessor(processorTag, description, fields, excludeFields, targetField, hashMethod, ignoreMissing); diff --git a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorFactoryTests.java b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorFactoryTests.java index 9097429cd72c5..74ad4cade7b37 100644 --- a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorFactoryTests.java +++ b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorFactoryTests.java @@ -110,7 +110,10 @@ public void testCreateWithHashMethod() throws Exception { factory.create(null, null, null, config); fail("factory create should have failed"); } catch (OpenSearchParseException e) { - assertThat(e.getMessage(), equalTo("[hash_method] hash method must be MD5, SHA-1, SHA-256 or SHA3-256")); + assertThat( + e.getMessage(), + equalTo("[hash_method] hash method must be MD5@2.16.0, SHA-1@2.16.0, SHA-256@2.16.0 or SHA3-256@2.16.0") + ); } } } diff --git a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorTests.java b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorTests.java index 2c1bbc13327f6..67a82f28fb763 100644 --- a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorTests.java +++ b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorTests.java @@ -21,7 +21,7 @@ import static org.hamcrest.Matchers.equalTo; public class FingerprintProcessorTests extends OpenSearchTestCase { - private final List hashMethods = List.of("MD5", "SHA-1", "SHA-256", "SHA3-256"); + private final List hashMethods = List.of("MD5@2.16.0", "SHA-1@2.16.0", "SHA-256@2.16.0", "SHA3-256@2.16.0"); public void testGenerateFingerprint() throws Exception { IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random()); @@ -32,7 +32,6 @@ public void testGenerateFingerprint() throws Exception { for (int i = 0; i < randomIntBetween(1, 10); i++) { fields.add(RandomDocumentPicks.addRandomField(random(), ingestDocument, randomAlphaOfLength(10))); } - } else { excludeFields = new ArrayList<>(); for (int i = 0; i < randomIntBetween(1, 10); i++) { @@ -63,7 +62,7 @@ public void testCreateFingerprintProcessorFailed() { assertThrows( "field name in [fields] cannot be null nor empty", IllegalArgumentException.class, - () -> createFingerprintProcessor(fields, null, null, randomFrom(List.of("MD5", "SHA-1", "SHA-256", "SHA3-256")), false) + () -> createFingerprintProcessor(fields, null, null, randomFrom(hashMethods), false) ); List excludeFields = new ArrayList<>(); @@ -77,7 +76,7 @@ public void testCreateFingerprintProcessorFailed() { assertThrows( "field name in [exclude_fields] cannot be null nor empty", IllegalArgumentException.class, - () -> createFingerprintProcessor(null, excludeFields, null, randomFrom(List.of("MD5", "SHA-1", "SHA-256", "SHA3-256")), false) + () -> createFingerprintProcessor(null, excludeFields, null, randomFrom(hashMethods), false) ); assertThrows( @@ -87,13 +86,13 @@ public void testCreateFingerprintProcessorFailed() { List.of(randomAlphaOfLength(10)), List.of(randomAlphaOfLength(10)), null, - randomFrom(List.of("MD5", "SHA-1", "SHA-256", "SHA3-256")), + randomFrom(hashMethods), false ) ); assertThrows( - "hash method must be MD5, SHA-1, SHA-256 or SHA3-256", + "hash method must be MD5@2.16.0, SHA-1@2.16.0, SHA-256@2.16.0 or SHA3-256@2.16.0", IllegalArgumentException.class, () -> createFingerprintProcessor(Collections.emptyList(), null, "fingerprint", randomAlphaOfLength(10), false) ); diff --git a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/340_fingerprint_processor.yml b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/340_fingerprint_processor.yml index ddd2c514f2aa4..5a827445b59ee 100644 --- a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/340_fingerprint_processor.yml +++ b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/340_fingerprint_processor.yml @@ -8,8 +8,8 @@ teardown: --- "Test creat fingerprint processor": - skip: - version: " - 2.14.99" - reason: "introduced in 2.15" + version: " - 2.15.99" + reason: "introduced in 2.16.0" - do: catch: /field name cannot be null nor empty/ ingest.put_pipeline: @@ -55,7 +55,7 @@ teardown: } - do: - catch: /hash method must be MD5\, SHA\-1, SHA\-256 or SHA3\-256/ + catch: /hash method must be MD5@2.16.0\, SHA\-1@2.16.0, SHA\-256@2.16.0 or SHA3\-256@2.16.0/ ingest.put_pipeline: id: "1" body: > @@ -80,7 +80,7 @@ teardown: "fingerprint" : { "fields" : ["foo"], "target_field" : "fingerprint_field", - "hash_method": "SHA-256" + "hash_method": "SHA-256@2.16.0" } } ] @@ -104,8 +104,8 @@ teardown: --- "Test fingerprint processor with ignore_missing": - skip: - version: " - 2.14.99" - reason: "introduced in 2.15" + version: " - 2.15.99" + reason: "introduced in 2.16.0" - do: ingest.put_pipeline: id: "1" @@ -164,8 +164,8 @@ teardown: --- "Test fingerprint processor with custom target field": - skip: - version: " - 2.14.99" - reason: "introduced in 2.15" + version: " - 2.15.99" + reason: "introduced in 2.16.0" - do: ingest.put_pipeline: id: "1" @@ -199,8 +199,8 @@ teardown: --- "Test fingerprint processor with non-primitive fields": - skip: - version: " - 2.14.99" - reason: "introduced in 2.15" + version: " - 2.15.99" + reason: "introduced in 2.16.0" - do: ingest.put_pipeline: id: "1" From 7883db5a6446c3a25c6e6c8a74940ee436ad0186 Mon Sep 17 00:00:00 2001 From: Gao Binlong Date: Thu, 13 Jun 2024 14:15:58 +0800 Subject: [PATCH 12/14] Update supported version in yml test file Signed-off-by: Gao Binlong --- .../resources/rest-api-spec/test/ingest/10_basic.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/10_basic.yml b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/10_basic.yml index b5df5dfa153ff..9bf4faf53a999 100644 --- a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/10_basic.yml +++ b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/10_basic.yml @@ -90,9 +90,9 @@ --- "Fingerprint processor exists": - skip: - version: " - 2.14.99" + version: " - 2.15.99" features: contains - reason: "fingerprint processor was introduced in 2.15.0 and contains is a newly added assertion" + reason: "fingerprint processor was introduced in 2.16.0 and contains is a newly added assertion" - do: cluster.state: {} From 56cc4b6b7340b8915c58131ab6b88e2559cc226d Mon Sep 17 00:00:00 2001 From: Gao Binlong Date: Thu, 13 Jun 2024 17:19:22 +0800 Subject: [PATCH 13/14] Add more comment Signed-off-by: Gao Binlong --- .../org/opensearch/ingest/common/FingerprintProcessor.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java index 6348a2ddb8279..dc7eac189c5bc 100644 --- a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java +++ b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java @@ -34,6 +34,9 @@ */ public final class FingerprintProcessor extends AbstractProcessor { public static final String TYPE = "fingerprint"; + // this processor is introduced in 2.16.0, we append the OpenSearch version to the hash method name to ensure + // that this processor always generates same hash value based on a specific hash method, if the processing logic + // of this processor changes in future version, the version number in the hash method should be increased correspondingly. private static final Set HASH_METHODS = Set.of("MD5@2.16.0", "SHA-1@2.16.0", "SHA-256@2.16.0", "SHA3-256@2.16.0"); // fields used to generate hash value From 1f2d5d05ee4b8fc95e9a290545b40487c49048a4 Mon Sep 17 00:00:00 2001 From: Gao Binlong Date: Fri, 14 Jun 2024 17:35:37 +0800 Subject: [PATCH 14/14] Prepend hash method to the hash value and add more test cases Signed-off-by: Gao Binlong --- .../ingest/common/FingerprintProcessor.java | 2 +- .../test/ingest/340_fingerprint_processor.yml | 459 +++++++++++++++++- 2 files changed, 453 insertions(+), 8 deletions(-) diff --git a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java index dc7eac189c5bc..c2f59bf586c81 100644 --- a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java +++ b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java @@ -167,7 +167,7 @@ public IngestDocument execute(IngestDocument document) { MessageDigest messageDigest = HashMethod.fromMethodName(hashMethod); assert (messageDigest != null); messageDigest.update(concatenatedFields.toString().getBytes(StandardCharsets.UTF_8)); - document.setFieldValue(targetField, Base64.getEncoder().encodeToString(messageDigest.digest())); + document.setFieldValue(targetField, hashMethod + ":" + Base64.getEncoder().encodeToString(messageDigest.digest())); return document; } diff --git a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/340_fingerprint_processor.yml b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/340_fingerprint_processor.yml index 5a827445b59ee..04568916239f4 100644 --- a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/340_fingerprint_processor.yml +++ b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/340_fingerprint_processor.yml @@ -159,7 +159,7 @@ teardown: get: index: test id: 1 - - match: { _source.fingerprint: "YqpBTuHXCPV04j/7lGfWeUl8Tyo=" } + - match: { _source.fingerprint: "SHA-1@2.16.0:YqpBTuHXCPV04j/7lGfWeUl8Tyo=" } --- "Test fingerprint processor with custom target field": @@ -194,10 +194,10 @@ teardown: get: index: test id: 1 - - match: { _source.target: "YqpBTuHXCPV04j/7lGfWeUl8Tyo=" } + - match: { _source.target: "SHA-1@2.16.0:YqpBTuHXCPV04j/7lGfWeUl8Tyo=" } --- -"Test fingerprint processor with non-primitive fields": +"Test fingerprint processor with non-primitive fields and SHA-1": - skip: version: " - 2.15.99" reason: "introduced in 2.16.0" @@ -234,7 +234,7 @@ teardown: get: index: test id: 1 - - match: { _source.fingerprint: "KYJ4pc4ouFmAbgZGp7CfNoykZeo=" } + - match: { _source.fingerprint: "SHA-1@2.16.0:KYJ4pc4ouFmAbgZGp7CfNoykZeo=" } - do: ingest.put_pipeline: @@ -268,7 +268,7 @@ teardown: get: index: test id: 2 - - match: { _source.fingerprint: "KYJ4pc4ouFmAbgZGp7CfNoykZeo=" } + - match: { _source.fingerprint: "SHA-1@2.16.0:KYJ4pc4ouFmAbgZGp7CfNoykZeo=" } - do: ingest.put_pipeline: @@ -303,7 +303,7 @@ teardown: get: index: test id: 3 - - match: { _source.fingerprint: "KYJ4pc4ouFmAbgZGp7CfNoykZeo=" } + - match: { _source.fingerprint: "SHA-1@2.16.0:KYJ4pc4ouFmAbgZGp7CfNoykZeo=" } - do: ingest.put_pipeline: @@ -338,4 +338,449 @@ teardown: get: index: test id: 4 - - match: { _source.fingerprint: "KYJ4pc4ouFmAbgZGp7CfNoykZeo=" } + - match: { _source.fingerprint: "SHA-1@2.16.0:KYJ4pc4ouFmAbgZGp7CfNoykZeo=" } + +--- +"Test fingerprint processor with non-primitive fields and MD5": + - skip: + version: " - 2.15.99" + reason: "introduced in 2.16.0" + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "fields" : ["foo", "bar", "zoo"], + "hash_method" : "MD5@2.16.0" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 1 + pipeline: "1" + body: { + foo: [1, 2, 3], + bar: { + field: { + innerField: "inner" + } + }, + zoo: null + } + - do: + get: + index: test + id: 1 + - match: { _source.fingerprint: "MD5@2.16.0:NovpcJ+MYHzEZtCewcDPTQ==" } + + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "hash_method" : "MD5@2.16.0" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 2 + pipeline: "1" + body: { + foo: [1, 2, 3], + bar: { + field: { + innerField: "inner" + } + }, + zoo: null + } + - do: + get: + index: test + id: 2 + - match: { _source.fingerprint: "MD5@2.16.0:NovpcJ+MYHzEZtCewcDPTQ==" } + + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "fields":[], + "hash_method" : "MD5@2.16.0" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 3 + pipeline: "1" + body: { + foo: [1, 2, 3], + bar: { + field: { + innerField: "inner" + } + }, + zoo: null + } + - do: + get: + index: test + id: 3 + - match: { _source.fingerprint: "MD5@2.16.0:NovpcJ+MYHzEZtCewcDPTQ==" } + + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "exclude_fields":[], + "hash_method" : "MD5@2.16.0" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 4 + pipeline: "1" + body: { + foo: [1, 2, 3], + bar: { + field: { + innerField: "inner" + } + }, + zoo: null + } + - do: + get: + index: test + id: 4 + - match: { _source.fingerprint: "MD5@2.16.0:NovpcJ+MYHzEZtCewcDPTQ==" } + + +--- +"Test fingerprint processor with non-primitive fields and SHA-256": + - skip: + version: " - 2.15.99" + reason: "introduced in 2.16.0" + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "fields" : ["foo", "bar", "zoo"], + "hash_method" : "SHA-256@2.16.0" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 1 + pipeline: "1" + body: { + foo: [1, 2, 3], + bar: { + field: { + innerField: "inner" + } + }, + zoo: null + } + - do: + get: + index: test + id: 1 + - match: { _source.fingerprint: "SHA-256@2.16.0:Sdlg0BodM3n1my4BvaTfJCPrvHxfrxno0kCLfMaC+XY=" } + + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "hash_method" : "SHA-256@2.16.0" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 2 + pipeline: "1" + body: { + foo: [1, 2, 3], + bar: { + field: { + innerField: "inner" + } + }, + zoo: null + } + - do: + get: + index: test + id: 2 + - match: { _source.fingerprint: "SHA-256@2.16.0:Sdlg0BodM3n1my4BvaTfJCPrvHxfrxno0kCLfMaC+XY=" } + + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "fields":[], + "hash_method" : "SHA-256@2.16.0" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 3 + pipeline: "1" + body: { + foo: [1, 2, 3], + bar: { + field: { + innerField: "inner" + } + }, + zoo: null + } + - do: + get: + index: test + id: 3 + - match: { _source.fingerprint: "SHA-256@2.16.0:Sdlg0BodM3n1my4BvaTfJCPrvHxfrxno0kCLfMaC+XY=" } + + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "exclude_fields":[], + "hash_method" : "SHA-256@2.16.0" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 4 + pipeline: "1" + body: { + foo: [1, 2, 3], + bar: { + field: { + innerField: "inner" + } + }, + zoo: null + } + - do: + get: + index: test + id: 4 + - match: { _source.fingerprint: "SHA-256@2.16.0:Sdlg0BodM3n1my4BvaTfJCPrvHxfrxno0kCLfMaC+XY=" } + +--- +"Test fingerprint processor with non-primitive fields and SHA3-256": + - skip: + version: " - 2.15.99" + reason: "introduced in 2.16.0" + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "fields" : ["foo", "bar", "zoo"], + "hash_method" : "SHA3-256@2.16.0" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 1 + pipeline: "1" + body: { + foo: [1, 2, 3], + bar: { + field: { + innerField: "inner" + } + }, + zoo: null + } + - do: + get: + index: test + id: 1 + - match: { _source.fingerprint: "SHA3-256@2.16.0:+GZCkMLEMkUA/4IrEZEZZYsVMbZdpJ92ppN3wUsFYOI=" } + + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "hash_method" : "SHA3-256@2.16.0" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 2 + pipeline: "1" + body: { + foo: [1, 2, 3], + bar: { + field: { + innerField: "inner" + } + }, + zoo: null + } + - do: + get: + index: test + id: 2 + - match: { _source.fingerprint: "SHA3-256@2.16.0:+GZCkMLEMkUA/4IrEZEZZYsVMbZdpJ92ppN3wUsFYOI=" } + + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "fields":[], + "hash_method" : "SHA3-256@2.16.0" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 3 + pipeline: "1" + body: { + foo: [1, 2, 3], + bar: { + field: { + innerField: "inner" + } + }, + zoo: null + } + - do: + get: + index: test + id: 3 + - match: { _source.fingerprint: "SHA3-256@2.16.0:+GZCkMLEMkUA/4IrEZEZZYsVMbZdpJ92ppN3wUsFYOI=" } + + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "exclude_fields":[], + "hash_method" : "SHA3-256@2.16.0" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 4 + pipeline: "1" + body: { + foo: [1, 2, 3], + bar: { + field: { + innerField: "inner" + } + }, + zoo: null + } + - do: + get: + index: test + id: 4 + - match: { _source.fingerprint: "SHA3-256@2.16.0:+GZCkMLEMkUA/4IrEZEZZYsVMbZdpJ92ppN3wUsFYOI=" }