Skip to content

Commit

Permalink
Fix case insensitive query on wildcard field (opensearch-project#15882)
Browse files Browse the repository at this point in the history
* fix case insensitive query on wildcard field

Signed-off-by: gesong.samuel <gesong.samuel@bytedance.com>

* fix YAML test

Signed-off-by: gesong.samuel <gesong.samuel@bytedance.com>

* add change log

Signed-off-by: gesong.samuel <gesong.samuel@bytedance.com>

---------

Signed-off-by: gesong.samuel <gesong.samuel@bytedance.com>
Co-authored-by: gesong.samuel <gesong.samuel@bytedance.com>
  • Loading branch information
2 people authored and sachinpkale committed Sep 19, 2024
1 parent b93417c commit 68cfd16
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 9 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),

### Fixed
- Fix wildcard query containing escaped character ([#15737](https://github.com/opensearch-project/OpenSearch/pull/15737))

- Fix case-insensitive query on wildcard field ([#15882](https://github.com/opensearch-project/OpenSearch/pull/15882))
### Security

[Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.17...2.x
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,12 @@ setup:
id: 6
body:
other_field: "test"
- do:
index:
index: test
id: 7
body:
my_field: "ABCD"
- do:
indices.refresh: {}

Expand Down Expand Up @@ -90,8 +96,9 @@ setup:
query:
term:
my_field.lower: "abcd"
- match: { hits.total.value: 1 }
- match: { hits.total.value: 2 }
- match: { hits.hits.0._id: "5" }
- match: { hits.hits.1._id: "7" }

- do:
search:
Expand All @@ -100,8 +107,9 @@ setup:
query:
term:
my_field.lower: "ABCD"
- match: { hits.total.value: 1 }
- match: { hits.total.value: 2 }
- match: { hits.hits.0._id: "5" }
- match: { hits.hits.1._id: "7" }

- do:
search:
Expand Down Expand Up @@ -215,7 +223,7 @@ setup:
wildcard:
my_field:
value: "*"
- match: { hits.total.value: 5 }
- match: { hits.total.value: 6 }
---
"regexp match-all works":
- do:
Expand All @@ -226,7 +234,7 @@ setup:
regexp:
my_field:
value: ".*"
- match: { hits.total.value: 5 }
- match: { hits.total.value: 6 }
---
"terms query on wildcard field matches":
- do:
Expand All @@ -237,3 +245,28 @@ setup:
terms: { my_field: ["AbCd"] }
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "5" }
---
"case insensitive query on wildcard field":
- do:
search:
index: test
body:
query:
wildcard:
my_field:
value: "AbCd"
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "5" }

- do:
search:
index: test
body:
query:
wildcard:
my_field:
value: "AbCd"
case_insensitive: true
- match: { hits.total.value: 2 }
- match: { hits.hits.0._id: "5" }
- match: { hits.hits.1._id: "7" }
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import org.apache.lucene.util.automaton.RegExp;
import org.opensearch.common.lucene.BytesRefs;
import org.opensearch.common.lucene.Lucene;
import org.opensearch.common.lucene.search.AutomatonQueries;
import org.opensearch.common.unit.Fuzziness;
import org.opensearch.core.xcontent.XContentParser;
import org.opensearch.index.analysis.IndexAnalyzers;
Expand Down Expand Up @@ -464,7 +465,7 @@ public Query wildcardQuery(String value, MultiTermQuery.RewriteMethod method, bo
return existsQuery(context);
}
} else {
approximation = matchAllTermsQuery(name(), requiredNGrams);
approximation = matchAllTermsQuery(name(), requiredNGrams, caseInsensitive);
}
return new WildcardMatchingQuery(name(), approximation, matchPredicate, value, context, this);
}
Expand Down Expand Up @@ -678,7 +679,7 @@ public Query termsQuery(List<?> values, QueryShardContext context) {
StringBuilder pattern = new StringBuilder();
for (Object value : values) {
String stringVal = BytesRefs.toString(value);
builder.add(matchAllTermsQuery(name(), getRequiredNGrams(stringVal)), BooleanClause.Occur.SHOULD);
builder.add(matchAllTermsQuery(name(), getRequiredNGrams(stringVal), false), BooleanClause.Occur.SHOULD);
expectedValues.add(stringVal);
if (pattern.length() > 0) {
pattern.append('|');
Expand All @@ -688,10 +689,16 @@ public Query termsQuery(List<?> values, QueryShardContext context) {
return new WildcardMatchingQuery(name(), builder.build(), expectedValues::contains, pattern.toString(), context, this);
}

private static BooleanQuery matchAllTermsQuery(String fieldName, Set<String> terms) {
private static BooleanQuery matchAllTermsQuery(String fieldName, Set<String> terms, boolean caseInsensitive) {
BooleanQuery.Builder matchAllTermsBuilder = new BooleanQuery.Builder();
Query query;
for (String term : terms) {
matchAllTermsBuilder.add(new TermQuery(new Term(fieldName, term)), BooleanClause.Occur.FILTER);
if (caseInsensitive) {
query = AutomatonQueries.caseInsensitiveTermQuery(new Term(fieldName, term));
} else {
query = new TermQuery(new Term(fieldName, term));
}
matchAllTermsBuilder.add(query, BooleanClause.Occur.FILTER);
}
return matchAllTermsBuilder.build();
}
Expand Down

0 comments on commit 68cfd16

Please sign in to comment.