diff --git a/CHANGELOG.md b/CHANGELOG.md index fb465153512bf..5744b17625d53 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -59,6 +59,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Pass parent filter to inner hit query ([#13903](https://github.com/opensearch-project/OpenSearch/pull/13903)) - Fix NPE on restore searchable snapshot ([#13911](https://github.com/opensearch-project/OpenSearch/pull/13911)) - Fix double invocation of postCollection when MultiBucketCollector is present ([#14015](https://github.com/opensearch-project/OpenSearch/pull/14015)) +- Fix null values indexed as "null" strings in flat_object field ([#14069](https://github.com/opensearch-project/OpenSearch/pull/14069)) ### Security diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/index/91_flat_object_null_value.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/index/91_flat_object_null_value.yml new file mode 100644 index 0000000000000..48be7294f5b4d --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/index/91_flat_object_null_value.yml @@ -0,0 +1,390 @@ +--- +# The test setup includes: +# - Create flat_object mapping for flat_object_null_value index +# - Index 20 example documents related to null value +# - Refresh the index so it is ready for search tests + +setup: + - do: + indices.create: + index: flat_object_null_value + body: + mappings: + properties: + record: + type: "flat_object" + - do: + index: + index: flat_object_null_value + id: 1 + body: { + "record": null + } + + - do: + index: + index: flat_object_null_value + id: 2 + body: { + "record": { + "name": null + } + } + + - do: + index: + index: flat_object_null_value + id: 3 + body: { + "record": { + "name": null, + "age":"5", + "name1": null + } + } + + - do: + index: + index: flat_object_null_value + id: 4 + body: { + "record": { + "name": [ + null, + { + "d": { + "name": "dsds" + } + } + ] + } + } + + - do: + index: + index: flat_object_null_value + id: 5 + body: { + "record": { + "name": null + } + } + + - do: + index: + index: flat_object_null_value + id: 6 + body: { + "record": { + "name": [ + { + "name": "age1" + }, + null, + { + "d": { + "name": "dsds" + } + } + ] + } + } + + - do: + index: + index: flat_object_null_value + id: 7 + body: { + "record": { + "name": null, + "age":"3" + } + } + + - do: + index: + index: flat_object_null_value + id: 8 + body: { + "record": { + "age":"3", + "name": null + } + } + + - do: + index: + index: flat_object_null_value + id: 9 + body: { + "record": { + "name": [ + null, + 3 + ], + "age": 4 + } + } + + - do: + index: + index: flat_object_null_value + id: 10 + body: { + "record": { + "age": 4, + "name": [ + null, + 3 + ] + } + } + + - do: + index: + index: flat_object_null_value + id: 11 + body: { + "record": { + "name": null + } + } + + - do: + index: + index: flat_object_null_value + id: 12 + body: { + "record": { + "r1": { + "labels": [ + null + ] + } + } + } + + - do: + index: + index: flat_object_null_value + id: 13 + body: { + "field": { + "labels": [ + null + ] + } + } + + - do: + index: + index: flat_object_null_value + id: 14 + body: { + "record": { + "r1": { + "name": null, + "labels": [ + null + ] + } + } + } + + - do: + index: + index: flat_object_null_value + id: 15 + body: { + "record": { + "age": "4", + "labels": [ + null + ] + } + } + + - do: + index: + index: flat_object_null_value + id: 16 + body: { + "record": { + "labels": [ + null + ], + "age": "4" + } + } + + - do: + index: + index: flat_object_null_value + id: 17 + body: { + "record": { + "name": { + "name1": [ + null, + "dsdsdsd" + ] + } + } + } + + - do: + index: + index: flat_object_null_value + id: 18 + body: { + "record": { + "name": { + "name1": { + "name2": null + } + } + } + } + + - do: + index: + index: flat_object_null_value + id: 19 + body: { + "record": { + "name": { + "name1": [ + [], + [ + "dsdsdsd", + null + ] + ] + } + } + } + + - do: + indices.refresh: + index: flat_object_null_value +--- +# Delete Index when connection is teardown +teardown: + - do: + indices.delete: + index: flat_object_null_value + + +--- +# Verify that mappings under the catalog field did not expand +# and no dynamic fields were created. +"Mappings": + - skip: + version: " - 2.99.99" + reason: "flat_object is introduced in 3.0.0 in main branch" + + - do: + indices.get_mapping: + index: flat_object_null_value + - is_true: flat_object_null_value.mappings + - match: { flat_object_null_value.mappings.properties.record.type: flat_object } + # https://github.com/opensearch-project/OpenSearch/tree/main/rest-api-spec/src/main/resources/rest-api-spec/test#length + - length: { flat_object_null_value.mappings.properties: 1 } + + +--- +"Supported queries": + - skip: + version: " - 2.99.99" + reason: "flat_object is introduced in 3.0.0 in main branch" + + + # Verify Document Count + - do: + search: + body: { + query: { + match_all: { } + } + } + + - length: { hits.hits: 19 } + + # Exists Query with no dot path. + - do: + search: + body: { + _source: true, + query: { + exists: { "field": "record" } + } + } + + - length: { hits.hits: 12 } + - match: { hits.hits.0._source.record: { "name": null, "age": "5", "name1": null } } + - match: { hits.hits.1._source.record.name: [ null, { "d": { "name": "dsds" } } ] } + - match: { hits.hits.2._source.record.name: [ { "d": { "name": "dsds" } }, null ] } + - match: { hits.hits.3._source.record.name: [ { "name": "age1" }, null, { "d": { "name": "dsds" } } ] } + - match: { hits.hits.4._source.record: { "name": null, "age": "3" } } + - match: { hits.hits.5._source.record: { "age": "3", "name": null } } + - match: { hits.hits.6._source.record: { "name": [ null, 3 ], "age": 4 } } + - match: { hits.hits.7._source.record: { "age": 4, "name": [ null, 3 ] } } + - match: { hits.hits.8._source.record: { "age": "4", "labels": [ null ] } } + - match: { hits.hits.9._source.record: { "labels": [ null ], "age": "4" } } + - match: { hits.hits.10._source.record.name: { "name1": [ null, "dsdsdsd" ] } } + - match: { hits.hits.10._source.record.name: { "name1": [ [], [ "dsdsdsd", null ] ] } } + + # Exists Query with dot path. + - do: + search: + body: { + _source: true, + query: { + exists: { "field": "record.d" } + } + } + + - length: { hits.hits: 3 } + - match: { hits.hits.1._source.record.name: [ null, { "d": { "name": "dsds" } } ] } + - match: { hits.hits.2._source.record.name: [ { "d": { "name": "dsds" } }, null ] } + - match: { hits.hits.3._source.record.name: [ { "name": "age1" }, null, { "d": { "name": "dsds" } } ] } + + # Term Query without exact dot path. + - do: + search: + body: { + _source: true, + query: { + term: { record: "dsdsdsd" } + } + } + + - length: { hits.hits: 2 } + - match: { hits.hits.10._source.record.name: { "name1": [ null, "dsdsdsd" ] } } + - match: { hits.hits.10._source.record.name: { "name1": [ [], [ "dsdsdsd", null ] ] } } + + # Term Query without exact dot path. + - do: + search: + body: { + _source: true, + query: { + term: { record.name.name1: "dsdsdsd" } + } + } + + - length: { hits.hits: 2 } + - match: { hits.hits.10._source.record.name: { "name1": [ null, "dsdsdsd" ] } } + - match: { hits.hits.10._source.record.name: { "name1": [ [], [ "dsdsdsd", null ] ] } } + + # Test "null" string search. + - do: + search: + body: { + _source: true, + query: { + term: { record: "null" } + } + } + + - length: { hits.hits: 0 } diff --git a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java index 998122d9e5c43..88e3d7b6813bd 100644 --- a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java +++ b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java @@ -23,6 +23,7 @@ import java.math.BigInteger; import java.nio.CharBuffer; import java.util.ArrayList; +import java.util.HashSet; /** * JsonToStringParser is the main parser class to transform JSON into stringFields in a XContentParser @@ -44,6 +45,10 @@ public class JsonToStringXContentParser extends AbstractXContentParser { private DeprecationHandler deprecationHandler; + private String nullValue; + + private int ignoreAbove; + private static final String VALUE_AND_PATH_SUFFIX = "._valueAndPath"; private static final String VALUE_SUFFIX = "._value"; private static final String DOT_SYMBOL = "."; @@ -53,37 +58,56 @@ public JsonToStringXContentParser( NamedXContentRegistry xContentRegistry, DeprecationHandler deprecationHandler, XContentParser parser, - String fieldTypeName + String fieldTypeName, + String nullValue, + int ignoreAbove ) throws IOException { super(xContentRegistry, deprecationHandler); this.deprecationHandler = deprecationHandler; this.xContentRegistry = xContentRegistry; this.parser = parser; this.fieldTypeName = fieldTypeName; + this.nullValue = nullValue; + this.ignoreAbove = ignoreAbove; } public XContentParser parseObject() throws IOException { builder.startObject(); StringBuilder path = new StringBuilder(fieldTypeName); - parseToken(path, null); - builder.field(this.fieldTypeName, keyList); - builder.field(this.fieldTypeName + VALUE_SUFFIX, valueList); - builder.field(this.fieldTypeName + VALUE_AND_PATH_SUFFIX, valueAndPathList); + boolean isChildrenValueValid = parseToken(path, null, 1); + removeKeyIfNeed(isChildrenValueValid); + // deduplication the fieldName,valueList,valueAndPathList + builder.field(this.fieldTypeName, new HashSet<>(keyList)); + builder.field(this.fieldTypeName + VALUE_SUFFIX, new HashSet<>(valueList)); + builder.field(this.fieldTypeName + VALUE_AND_PATH_SUFFIX, new HashSet<>(valueAndPathList)); builder.endObject(); String jString = XContentHelper.convertToJson(BytesReference.bytes(builder), false, MediaTypeRegistry.JSON); return JsonXContent.jsonXContent.createParser(this.xContentRegistry, this.deprecationHandler, String.valueOf(jString)); } - private void parseToken(StringBuilder path, String currentFieldName) throws IOException { + /** + * @return true if the child object contains no_null value, false otherwise + */ + private boolean parseToken(StringBuilder path, String currentFieldName, int depth) throws IOException { + + if (depth == 1 && processNoNestedValue()) { + return true; + } + boolean isChildrenValueValid = false; + boolean lastBrotherValueValid = true; + boolean visitFieldName = false; while (this.parser.nextToken() != Token.END_OBJECT) { if (this.parser.currentName() != null) { currentFieldName = this.parser.currentName(); } - StringBuilder parsedFields = new StringBuilder(); if (this.parser.currentToken() == Token.FIELD_NAME) { - path.append(DOT_SYMBOL).append(currentFieldName); + visitFieldName = true; + if (lastBrotherValueValid == false) { + removeKeyIfNeed(lastBrotherValueValid); + } + int dotIndex = currentFieldName.indexOf(DOT_SYMBOL); String fieldNameSuffix = currentFieldName; // The field name may be of the form foo.bar.baz @@ -100,41 +124,73 @@ private void parseToken(StringBuilder path, String currentFieldName) throws IOEx this.keyList.add(fieldNameSuffix); } } else if (this.parser.currentToken() == Token.START_ARRAY) { - parseToken(path, currentFieldName); + lastBrotherValueValid = parseToken(path, currentFieldName, depth); + isChildrenValueValid |= lastBrotherValueValid; break; } else if (this.parser.currentToken() == Token.END_ARRAY) { // skip } else if (this.parser.currentToken() == Token.START_OBJECT) { - parseToken(path, currentFieldName); - int dotIndex = path.lastIndexOf(DOT_SYMBOL, path.length()); - - if (dotIndex != -1 && path.length() > currentFieldName.length()) { - path.setLength(path.length() - currentFieldName.length() - 1); - } + path.append(DOT_SYMBOL).append(currentFieldName); + boolean validValue = parseToken(path, currentFieldName, depth + 1); + isChildrenValueValid |= validValue; + removeKeyIfNeed(validValue); + assert path.length() > (currentFieldName.length() - 1); + path.setLength(path.length() - currentFieldName.length() - 1); } else { - if (!path.toString().contains(currentFieldName)) { + String parseValue = parseValue(); + if (parseValue != null) { path.append(DOT_SYMBOL).append(currentFieldName); - } - parseValue(parsedFields); - this.valueList.add(parsedFields.toString()); - this.valueAndPathList.add(path + EQUAL_SYMBOL + parsedFields); - int dotIndex = path.lastIndexOf(DOT_SYMBOL, path.length()); - if (dotIndex != -1 && path.length() > currentFieldName.length()) { + this.valueList.add(parseValue); + this.valueAndPathList.add(path + EQUAL_SYMBOL + parseValue); + isChildrenValueValid = true; + lastBrotherValueValid = true; + assert path.length() > (currentFieldName.length() - 1); path.setLength(path.length() - currentFieldName.length() - 1); + } else { + lastBrotherValueValid = false; } } + } + if (visitFieldName && isChildrenValueValid == true) { + removeKeyIfNeed(lastBrotherValueValid); + } + return isChildrenValueValid; + } + public void removeKeyIfNeed(boolean hasValidValue) { + if (hasValidValue == false) { + // it means that the value is invalid, and the key has the only one value, + // we should delete the key from keyList. + assert keyList.size() > 0; + this.keyList.remove(keyList.size() - 1); } } - private void parseValue(StringBuilder parsedFields) throws IOException { + private boolean processNoNestedValue() throws IOException { + if (parser.currentToken() == Token.VALUE_NULL) { + if (nullValue != null) { + this.valueList.add(nullValue); + } + return true; + } else if (this.parser.currentToken() == Token.VALUE_STRING + || this.parser.currentToken() == Token.VALUE_NUMBER + || this.parser.currentToken() == Token.VALUE_BOOLEAN) { + String value = this.parser.textOrNull(); + if (value != null && value.length() <= ignoreAbove) { + this.valueList.add(value); + } + return true; + } + return false; + } + + private String parseValue() throws IOException { switch (this.parser.currentToken()) { case VALUE_BOOLEAN: case VALUE_NUMBER: case VALUE_STRING: case VALUE_NULL: - parsedFields.append(this.parser.textOrNull()); - break; + return this.parser.textOrNull(); // Handle other token types as needed case FIELD_NAME: case VALUE_EMBEDDED_OBJECT: @@ -144,6 +200,7 @@ private void parseValue(StringBuilder parsedFields) throws IOException { default: throw new IOException("Unsupported token type [" + parser.currentToken() + "]"); } + return null; } @Override diff --git a/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java index 9a3f2595a7c9e..ab5e03dc27adf 100644 --- a/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java @@ -573,7 +573,9 @@ protected void parseCreateField(ParseContext context) throws IOException { NamedXContentRegistry.EMPTY, DeprecationHandler.IGNORE_DEPRECATIONS, context.parser(), - fieldType().name() + fieldType().name(), + null, + Integer.MAX_VALUE ); /* JsonToStringParser is the main parser class to transform JSON into stringFields in a XContentParser diff --git a/server/src/test/java/org/opensearch/common/xcontent/JsonToStringXContentParserTests.java b/server/src/test/java/org/opensearch/common/xcontent/JsonToStringXContentParserTests.java index 0feb7bcd1ceec..bdb5cbce87bd2 100644 --- a/server/src/test/java/org/opensearch/common/xcontent/JsonToStringXContentParserTests.java +++ b/server/src/test/java/org/opensearch/common/xcontent/JsonToStringXContentParserTests.java @@ -31,7 +31,9 @@ private String flattenJsonString(String fieldName, String in) throws IOException xContentRegistry(), DeprecationHandler.THROW_UNSUPPORTED_OPERATION, parser, - fieldName + fieldName, + null, + Integer.MAX_VALUE ); // Skip the START_OBJECT token: jsonToStringXContentParser.nextToken(); @@ -49,9 +51,9 @@ public void testNestedObjects() throws IOException { assertEquals( "{" - + "\"flat\":[\"first\",\"second\",\"inner\",\"third\"]," + + "\"flat\":[\"third\",\"inner\",\"first\",\"second\"]," + "\"flat._value\":[\"1\",\"2.0\",\"three\"]," - + "\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner=2.0\",\"flat.third=three\"]" + + "\"flat._valueAndPath\":[\"flat.second.inner=2.0\",\"flat.first=1\",\"flat.third=three\"]" + "}", flattenJsonString("flat", jsonExample) ); @@ -64,9 +66,9 @@ public void testChildHasDots() throws IOException { assertEquals( "{" - + "\"flat\":[\"first\",\"second\",\"inner\",\"third\"]," + + "\"flat\":[\"third\",\"inner\",\"first\",\"second\"]," + "\"flat._value\":[\"1\",\"2.0\",\"three\"]," - + "\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner=2.0\",\"flat.third=three\"]" + + "\"flat._valueAndPath\":[\"flat.second.inner=2.0\",\"flat.first=1\",\"flat.third=three\"]" + "}", flattenJsonString("flat", jsonExample) ); @@ -83,7 +85,7 @@ public void testNestChildObjectWithDots() throws IOException { assertEquals( "{" - + "\"flat\":[\"first\",\"second\",\"inner\",\"really_inner\",\"third\"]," + + "\"flat\":[\"really_inner\",\"third\",\"inner\",\"first\",\"second\"]," + "\"flat._value\":[\"1\",\"2.0\",\"three\"]," + "\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner.really_inner=2.0\",\"flat.third=three\"]" + "}", @@ -102,7 +104,7 @@ public void testNestChildObjectWithDotsAndFieldWithDots() throws IOException { assertEquals( "{" - + "\"flat\":[\"first\",\"second\",\"inner\",\"totally\",\"absolutely\",\"inner\",\"third\"]," + + "\"flat\":[\"third\",\"absolutely\",\"totally\",\"inner\",\"first\",\"second\"]," + "\"flat._value\":[\"1\",\"2.0\",\"three\"]," + "\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner.totally.absolutely.inner=2.0\",\"flat.third=three\"]" + "}", diff --git a/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java index 637072c8886c1..5ad769fe4bba3 100644 --- a/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/FlatObjectFieldMapperTests.java @@ -25,7 +25,6 @@ import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.core.IsEqual.equalTo; -import static org.hamcrest.core.StringContains.containsString; public class FlatObjectFieldMapperTests extends MapperTestCase { private static final String FIELD_TYPE = "flat_object"; @@ -133,9 +132,250 @@ public void testDefaults() throws Exception { public void testNullValue() throws IOException { DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); - MapperParsingException e = expectThrows(MapperParsingException.class, () -> mapper.parse(source(b -> b.nullField("field")))); - assertThat(e.getMessage(), containsString("object mapping for [_doc] tried to parse field [field] as object")); + // test1: {"field":null} + ParsedDocument doc = mapper.parse(source(b -> b.nullField("field"))); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field")); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field" + VALUE_SUFFIX)); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX)); + + // test2: {"field":[null]} + doc = mapper.parse(source(b -> b.array("field", (String[]) null))); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field")); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field" + VALUE_SUFFIX)); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX)); + + // test3: {"field":{"name":null, "age":"5", "name1":null}} + String json = "{\"field\":{\"name\":null, \"age\":\"5\", \"name1\":null}}"; + doc = mapper.parse(source(json)); + IndexableField[] fields = doc.rootDoc().getFields("field"); + assertEquals(2, fields.length); + assertEquals(new BytesRef("field.age"), fields[0].binaryValue()); + IndexableField[] fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(2, fieldValues.length); + assertEquals(new BytesRef("5"), fieldValues[0].binaryValue()); + IndexableField[] fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(2, fieldValueAndPaths.length); + assertEquals(new BytesRef("field.age=5"), fieldValueAndPaths[0].binaryValue()); + + // test4: {"field":{"name": [null, {"d":{"name":"dsds"}}]}} + json = "{\"field\":{\"name\": [null, {\"d\":{\"name\":\"dsds\"}}]}}"; + doc = mapper.parse(source(json)); + fields = doc.rootDoc().getFields("field"); + assertEquals(4, fields.length); + assertEquals(new BytesRef("field.d"), fields[0].binaryValue()); + assertEquals(new BytesRef("field.name"), fields[2].binaryValue()); + fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(2, fieldValues.length); + assertEquals(new BytesRef("dsds"), fieldValues[0].binaryValue()); + fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(2, fieldValueAndPaths.length); + assertEquals(new BytesRef("field.name.d.name=dsds"), fieldValueAndPaths[0].binaryValue()); + + // test5: {"field":{"name": [{"d":{"name":"dsds"}}, null]}} + json = "{\"field\":{\"name\": [{\"d\":{\"name\":\"dsds\"}}, null]}}"; + doc = mapper.parse(source(json)); + IndexableField[] fields1 = doc.rootDoc().getFields("field"); + assertEquals(fields1.length, fields.length); + for (int i = 0; i < fields1.length; i++) { + assertEquals(fields[i].toString(), fields1[i].toString()); + } + assertEquals(4, fields.length); + assertEquals(new BytesRef("field.d"), fields[0].binaryValue()); + assertEquals(new BytesRef("field.name"), fields[2].binaryValue()); + fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(2, fieldValues.length); + assertEquals(new BytesRef("dsds"), fieldValues[0].binaryValue()); + fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(2, fieldValueAndPaths.length); + assertEquals(new BytesRef("field.name.d.name=dsds"), fieldValueAndPaths[0].binaryValue()); + + // test6: {"field":{"name": [{"name":"age1"}, null, {"d":{"name":"dsds"}}]}} + json = "{\"field\":{\"name\": [{\"name\":\"age1\"}, null, {\"d\":{\"name\":\"dsds\"}}]}}"; + doc = mapper.parse(source(json)); + fields = doc.rootDoc().getFields("field"); + assertEquals(4, fields.length); + assertEquals(new BytesRef("field.d"), fields[0].binaryValue()); + assertEquals(new BytesRef("field.name"), fields[2].binaryValue()); + fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(4, fieldValues.length); + assertEquals(new BytesRef("dsds"), fieldValues[0].binaryValue()); + assertEquals(new BytesRef("age1"), fieldValues[2].binaryValue()); + fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(4, fieldValueAndPaths.length); + assertEquals(new BytesRef("field.name.name=age1"), fieldValueAndPaths[0].binaryValue()); + assertEquals(new BytesRef("field.name.d.name=dsds"), fieldValueAndPaths[2].binaryValue()); + + // test7: {"field":{"name": null,"age":3}} + json = "{\"field\":{\"name\": null,\"age\":3}}"; + doc = mapper.parse(source(json)); + fields = doc.rootDoc().getFields("field"); + assertEquals(2, fields.length); + assertEquals(new BytesRef("field.age"), fields[0].binaryValue()); + fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(2, fieldValues.length); + assertEquals(new BytesRef("3"), fieldValues[0].binaryValue()); + fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(2, fieldValueAndPaths.length); + assertEquals(new BytesRef("field.age=3"), fieldValueAndPaths[0].binaryValue()); + + // test8: {"field":{"age":3, "name": null}} + json = "{\"field\":{\"age\":3, \"name\": null}}"; + doc = mapper.parse(source(json)); + fields = doc.rootDoc().getFields("field"); + assertEquals(2, fields.length); + assertEquals(new BytesRef("field.age"), fields[0].binaryValue()); + fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(2, fieldValues.length); + assertEquals(new BytesRef("3"), fieldValues[0].binaryValue()); + fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(2, fieldValueAndPaths.length); + assertEquals(new BytesRef("field.age=3"), fieldValueAndPaths[0].binaryValue()); + + // test9: {"field":{"name": [null,3],"age":4}} + json = "{\"field\":{\"name\": [null,3],\"age\":4}}"; + doc = mapper.parse(source(json)); + fields = doc.rootDoc().getFields("field"); + assertEquals(4, fields.length); + assertEquals(new BytesRef("field.name"), fields[0].binaryValue()); + assertEquals(new BytesRef("field.age"), fields[2].binaryValue()); + fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(4, fieldValues.length); + assertEquals(new BytesRef("3"), fieldValues[0].binaryValue()); + assertEquals(new BytesRef("4"), fieldValues[2].binaryValue()); + fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(4, fieldValueAndPaths.length); + assertEquals(new BytesRef("field.age=4"), fieldValueAndPaths[0].binaryValue()); + assertEquals(new BytesRef("field.name=3"), fieldValueAndPaths[2].binaryValue()); + + // test10: {"field":{"age": 4,"name": [null,"3"]}} + json = "{\"field\":{\"age\": 4,\"name\": [null,\"3\"]}}"; + doc = mapper.parse(source(json)); + fields = doc.rootDoc().getFields("field"); + assertEquals(4, fields.length); + assertEquals(new BytesRef("field.name"), fields[0].binaryValue()); + assertEquals(new BytesRef("field.age"), fields[2].binaryValue()); + fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(4, fieldValues.length); + assertEquals(new BytesRef("3"), fieldValues[0].binaryValue()); + assertEquals(new BytesRef("4"), fieldValues[2].binaryValue()); + fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(4, fieldValueAndPaths.length); + assertEquals(new BytesRef("field.age=4"), fieldValueAndPaths[0].binaryValue()); + assertEquals(new BytesRef("field.name=3"), fieldValueAndPaths[2].binaryValue()); + + // test11: {"field":{"labels": [null]}} + json = "{\"field\":{\"labels\": [null]}}"; + doc = mapper.parse(source(json)); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field")); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field" + VALUE_SUFFIX)); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX)); + + // test12: {"field":{"r1": {"labels": [null]}}} + json = "{\"field\":{\"r1\": {\"labels\": [null]}}}"; + doc = mapper.parse(source(json)); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field")); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field" + VALUE_SUFFIX)); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX)); + + // test13: {"field":{"labels": [null]}} + json = "{\"field\":{\"labels\": [null]}}"; + doc = mapper.parse(source(json)); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field")); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field" + VALUE_SUFFIX)); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX)); + + // test14: {"field":{"r1": {"name": null,"labels": [null]}}} + json = "{\"field\":{\"r1\": {\"name\": null,\"labels\": [null]}}}"; + doc = mapper.parse(source(json)); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field")); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field" + VALUE_SUFFIX)); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX)); + + // test15: {"field":{"age":"4","labels": [null]}} + json = "{\"field\":{\"age\":\"4\",\"labels\": [null]}}"; + doc = mapper.parse(source(json)); + fields = doc.rootDoc().getFields("field"); + assertEquals(2, fields.length); + assertEquals(new BytesRef("field.age"), fields[0].binaryValue()); + fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(2, fieldValues.length); + assertEquals(new BytesRef("4"), fieldValues[0].binaryValue()); + fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(2, fieldValueAndPaths.length); + assertEquals(new BytesRef("field.age=4"), fieldValueAndPaths[0].binaryValue()); + + // test16: {"field":{"labels": [null], "age":"4"}} + json = "{\"field\":{\"labels\": [null], \"age\":\"4\"}}"; + doc = mapper.parse(source(json)); + fields = doc.rootDoc().getFields("field"); + assertEquals(2, fields.length); + assertEquals(new BytesRef("field.age"), fields[0].binaryValue()); + fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(2, fieldValues.length); + assertEquals(new BytesRef("4"), fieldValues[0].binaryValue()); + fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(2, fieldValueAndPaths.length); + assertEquals(new BytesRef("field.age=4"), fieldValueAndPaths[0].binaryValue()); + + // test17: {"field":{"name": {"name1": [null,"dsdsdsd"]}}} + json = "{\"field\":{\"name\": {\"name1\": [null,\"dsdsdsd\"]}}}"; + doc = mapper.parse(source(json)); + fields = doc.rootDoc().getFields("field"); + assertEquals(4, fields.length); + assertEquals(new BytesRef("field.name"), fields[0].binaryValue()); + assertEquals(new BytesRef("field.name1"), fields[2].binaryValue()); + fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(2, fieldValues.length); + assertEquals(new BytesRef("dsdsdsd"), fieldValues[0].binaryValue()); + fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(2, fieldValueAndPaths.length); + assertEquals(new BytesRef("field.name.name1=dsdsdsd"), fieldValueAndPaths[0].binaryValue()); + + // test18: {"field":{"name": {"name1": {"name2":null}}}} + json = "{\"field\":{\"name\": {\"name1\": {\"name2\":null}}}}"; + doc = mapper.parse(source(json)); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field")); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field" + VALUE_SUFFIX)); + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX)); + + // test19: {"field":{"name": {"name1": [[],["dsdsdsd", null]]}}} + json = "{\"field\":{\"name\": {\"name1\": [[],[\"dsdsdsd\", null]]}}}"; + doc = mapper.parse(source(json)); + fields = doc.rootDoc().getFields("field"); + assertEquals(4, fields.length); + assertEquals(new BytesRef("field.name"), fields[0].binaryValue()); + assertEquals(new BytesRef("field.name1"), fields[2].binaryValue()); + fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(2, fieldValues.length); + assertEquals(new BytesRef("dsdsdsd"), fieldValues[0].binaryValue()); + fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(2, fieldValueAndPaths.length); + assertEquals(new BytesRef("field.name.name1=dsdsdsd"), fieldValueAndPaths[0].binaryValue()); + + } + + // test deduplicationValue of keyList, valueList, valueAndPathList + public void testDeduplicationValue() throws IOException { + DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); + + // test: {"field":{"age": 3,"labels": [null,"3"], "abc":{"abc":{"labels":"n"}}}} + String json = "{\"field\":{\"age\": 3,\"labels\": [null,\"3\"], \"abc\":{\"abc\":{\"labels\":\"n\"}}}}"; + ParsedDocument doc = mapper.parse(source(json)); + IndexableField[] fields = doc.rootDoc().getFields("field"); + assertEquals(6, fields.length); + assertEquals(new BytesRef("field.abc"), fields[0].binaryValue()); + assertEquals(new BytesRef("field.age"), fields[2].binaryValue()); + assertEquals(new BytesRef("field.labels"), fields[4].binaryValue()); + IndexableField[] fieldValues = doc.rootDoc().getFields("field" + VALUE_SUFFIX); + assertEquals(4, fieldValues.length); + assertEquals(new BytesRef("3"), fieldValues[0].binaryValue()); + assertEquals(new BytesRef("n"), fieldValues[2].binaryValue()); + IndexableField[] fieldValueAndPaths = doc.rootDoc().getFields("field" + VALUE_AND_PATH_SUFFIX); + assertEquals(6, fieldValueAndPaths.length); + assertEquals(new BytesRef("field.abc.abc.labels=n"), fieldValueAndPaths[0].binaryValue()); + assertEquals(new BytesRef("field.age=3"), fieldValueAndPaths[2].binaryValue()); + assertEquals(new BytesRef("field.labels=3"), fieldValueAndPaths[4].binaryValue()); } @Override