From 1387c18f080a24d25dcda6cf0903ce109d0c3c50 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Wed, 8 Aug 2018 16:21:08 -0400 Subject: [PATCH 1/2] Do not allow termvectors on nested fields Requesting _termvectors on a nested field or any sub-fields of a nested field returns empty results. Closes #21625 --- docs/reference/docs/termvectors.asciidoc | 4 ++ .../test/termvectors/50_nested.yml | 42 +++++++++++++++++++ .../index/termvectors/TermVectorsService.java | 16 +++++-- 3 files changed, 59 insertions(+), 3 deletions(-) create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/termvectors/50_nested.yml diff --git a/docs/reference/docs/termvectors.asciidoc b/docs/reference/docs/termvectors.asciidoc index 3cd21b21df4d6..0e6078ad7b231 100644 --- a/docs/reference/docs/termvectors.asciidoc +++ b/docs/reference/docs/termvectors.asciidoc @@ -30,6 +30,10 @@ in similar way to the <> [WARNING] Note that the usage of `/_termvector` is deprecated in 2.0, and replaced by `/_termvectors`. +[WARNING] +Term Vectors API doesn't work on nested fields. `/_termvectors` on a nested +field and any sub-fields of a nested field returns empty results. + [float] === Return values diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/termvectors/50_nested.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/termvectors/50_nested.yml new file mode 100644 index 0000000000000..ec96a40e30a3c --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/termvectors/50_nested.yml @@ -0,0 +1,42 @@ +setup: + - do: + indices.create: + index: testidx + body: + mappings: + _doc: + properties: + nested-field: + type : nested + properties: + sub-nested-text: + type: text + - do: + index: + index: testidx + type: _doc + id: 1 + body: + "text" : "The quick brown fox is brown." + - do: + indices.refresh: {} + +--- +"Termvectors on nested fields should return empty results": + + - do: + termvectors: + index: testidx + type: _doc + id: 1 + fields: ["nested-field"] + - match: {term_vectors: {}} + + + - do: + termvectors: + index: testidx + type: _doc + id: 1 + fields: ["nested-field.sub-nested-text"] + - match: {term_vectors: {}} diff --git a/server/src/main/java/org/elasticsearch/index/termvectors/TermVectorsService.java b/server/src/main/java/org/elasticsearch/index/termvectors/TermVectorsService.java index bc77626b94277..8aadcc8f08a82 100644 --- a/server/src/main/java/org/elasticsearch/index/termvectors/TermVectorsService.java +++ b/server/src/main/java/org/elasticsearch/index/termvectors/TermVectorsService.java @@ -45,6 +45,7 @@ import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.mapper.ObjectMapper; import org.elasticsearch.index.mapper.ParseContext; import org.elasticsearch.index.mapper.ParsedDocument; import org.elasticsearch.index.mapper.SourceFieldMapper; @@ -160,7 +161,7 @@ private static void handleFieldWildcards(IndexShard indexShard, TermVectorsReque request.selectedFields(fieldNames.toArray(Strings.EMPTY_ARRAY)); } - private static boolean isValidField(MappedFieldType fieldType) { + private static boolean isValidField(MappedFieldType fieldType, IndexShard indexShard) { // must be a string if (fieldType instanceof StringFieldType == false) { return false; @@ -169,6 +170,15 @@ private static boolean isValidField(MappedFieldType fieldType) { if (fieldType.indexOptions() == IndexOptions.NONE) { return false; } + // and must not be under nested field + int dotIndex = fieldType.name().indexOf('.'); + if (dotIndex > -1) { + String rootField = fieldType.name().substring(0, dotIndex); + ObjectMapper mapper = indexShard.mapperService().getObjectMapper(rootField); + if (mapper != null && mapper.nested().isNested()) { + return false; + } + } return true; } @@ -177,7 +187,7 @@ private static Fields addGeneratedTermVectors(IndexShard indexShard, Engine.GetR Set validFields = new HashSet<>(); for (String field : selectedFields) { MappedFieldType fieldType = indexShard.mapperService().fullName(field); - if (!isValidField(fieldType)) { + if (isValidField(fieldType, indexShard) == false) { continue; } // already retrieved, only if the analyzer hasn't been overridden at the field @@ -284,7 +294,7 @@ private static Fields generateTermVectorsFromDoc(IndexShard indexShard, TermVect Collection documentFields = new HashSet<>(); for (IndexableField field : doc.getFields()) { MappedFieldType fieldType = indexShard.mapperService().fullName(field.name()); - if (!isValidField(fieldType)) { + if (isValidField(fieldType, indexShard) == false) { continue; } if (request.selectedFields() != null && !request.selectedFields().contains(field.name())) { From bb9bd7c81b3fdeb0816f3f7b0e29e3845c06b8b7 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Fri, 10 Aug 2018 12:56:55 -0400 Subject: [PATCH 2/2] Address feedback --- .../test/termvectors/50_nested.yml | 33 +++++++++++-------- .../index/termvectors/TermVectorsService.java | 7 ++-- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/termvectors/50_nested.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/termvectors/50_nested.yml index ec96a40e30a3c..a10fc7b504bf0 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/termvectors/50_nested.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/termvectors/50_nested.yml @@ -6,18 +6,29 @@ setup: mappings: _doc: properties: - nested-field: + nested1: type : nested properties: - sub-nested-text: + nested1-text: type: text + object1: + properties: + object1-text: + type: text + object1-nested1: + type: nested + properties: + object1-nested1-text: + type: text - do: index: index: testidx type: _doc id: 1 body: - "text" : "The quick brown fox is brown." + "nested1" : [{ "nested1-text": "text1" }] + "object1" : [{ "object1-text": "text2" }, "object1-nested1" : [{"object1-nested1-text" : "text3"}]] + - do: indices.refresh: {} @@ -29,14 +40,10 @@ setup: index: testidx type: _doc id: 1 - fields: ["nested-field"] - - match: {term_vectors: {}} + fields: ["nested1", "nested1.nested1-text", "object1.object1-nested1", "object1.object1-nested1.object1-nested1-text", "object1.object1-text"] - - - do: - termvectors: - index: testidx - type: _doc - id: 1 - fields: ["nested-field.sub-nested-text"] - - match: {term_vectors: {}} + - is_false: term_vectors.nested1 + - is_false: term_vectors.nested1\.nested1-text # escaping as the field name contains dot + - is_false: term_vectors.object1\.object1-nested1 + - is_false: term_vectors.object1\.object1-nested1\.object1-nested1-text + - is_true: term_vectors.object1\.object1-text diff --git a/server/src/main/java/org/elasticsearch/index/termvectors/TermVectorsService.java b/server/src/main/java/org/elasticsearch/index/termvectors/TermVectorsService.java index 8aadcc8f08a82..43f1a278f54c3 100644 --- a/server/src/main/java/org/elasticsearch/index/termvectors/TermVectorsService.java +++ b/server/src/main/java/org/elasticsearch/index/termvectors/TermVectorsService.java @@ -172,12 +172,13 @@ private static boolean isValidField(MappedFieldType fieldType, IndexShard indexS } // and must not be under nested field int dotIndex = fieldType.name().indexOf('.'); - if (dotIndex > -1) { - String rootField = fieldType.name().substring(0, dotIndex); - ObjectMapper mapper = indexShard.mapperService().getObjectMapper(rootField); + while (dotIndex > -1) { + String parentField = fieldType.name().substring(0, dotIndex); + ObjectMapper mapper = indexShard.mapperService().getObjectMapper(parentField); if (mapper != null && mapper.nested().isNested()) { return false; } + dotIndex = fieldType.name().indexOf('.', dotIndex + 1); } return true; }