Skip to content

Commit

Permalink
Fix mapping char_filter when mapping a hashtag (opensearch-project#7591)
Browse files Browse the repository at this point in the history
Signed-off-by: Nathan Myles <myles.n.a@gmail.com>

Issue: opensearch-project#7308
  • Loading branch information
nathanmyles authored Jun 16, 2023
1 parent 8eea7b9 commit e049338
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 6 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Support OpenSSL Provider with default Netty allocator ([#5460](https://github.com/opensearch-project/OpenSearch/pull/5460))
- Replaces ZipInputStream with ZipFile to fix Zip Slip vulnerability ([#7230](https://github.com/opensearch-project/OpenSearch/pull/7230))
- Add missing validation/parsing of SearchBackpressureMode of SearchBackpressureSettings ([#7541](https://github.com/opensearch-project/OpenSearch/pull/7541))
- Fix mapping char_filter when mapping a hashtag ([#7591](https://github.com/opensearch-project/OpenSearch/pull/7591))

### Security

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ public class MappingCharFilterFactory extends AbstractCharFilterFactory implemen
MappingCharFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name);

List<MappingRule<String, String>> rules = Analysis.parseWordList(env, settings, "mappings", this::parse);
List<MappingRule<String, String>> rules = Analysis.parseWordList(env, settings, "mappings", this::parse, false);
if (rules == null) {
throw new IllegalArgumentException("mapping requires either `mappings` or `mappings_path` to be configured");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,19 +37,19 @@ public static CharFilterFactory create(String... rules) throws IOException {

public void testRulesOk() throws IOException {
MappingCharFilterFactory mappingCharFilterFactory = (MappingCharFilterFactory) create(
"# This is a comment",
"# => _hashtag_",
":) => _happy_",
":( => _sad_"
);
CharFilter inputReader = (CharFilter) mappingCharFilterFactory.create(new StringReader("I'm so :)"));
CharFilter inputReader = (CharFilter) mappingCharFilterFactory.create(new StringReader("I'm so :), I'm so :( #confused"));
char[] tempBuff = new char[14];
StringBuilder output = new StringBuilder();
while (true) {
int length = inputReader.read(tempBuff);
if (length == -1) break;
output.append(tempBuff, 0, length);
}
assertEquals("I'm so _happy_", output.toString());
assertEquals("I'm so _happy_, I'm so _sad_ _hashtag_confused", output.toString());
}

public void testRuleError() {
Expand All @@ -64,7 +64,7 @@ public void testRuleError() {
}

public void testRulePartError() {
RuntimeException ex = expectThrows(RuntimeException.class, () -> create("# This is a comment", ":) => _happy_", "a:b"));
RuntimeException ex = expectThrows(RuntimeException.class, () -> create("# => _hashtag_", ":) => _happy_", "a:b"));
assertEquals("Line [3]: Invalid mapping rule : [a:b]", ex.getMessage());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,21 @@
- match: { detail.tokenizer.tokens.0.start_offset: 0 }
- match: { detail.tokenizer.tokens.0.end_offset: 15 }
- match: { detail.tokenizer.tokens.0.position: 0 }
---
"mapping_with_hashtag":
- do:
indices.analyze:
body:
text: 'test #test @test'
tokenizer: standard
filter:
- lowercase
char_filter:
- type: mapping
mappings:
- "# => _hashsign_"
- "@ => _atsign_"
- length: { tokens: 3 }
- match: { tokens.0.token: test }
- match: { tokens.1.token: _hashsign_test }
- match: { tokens.2.token: _atsign_test }
23 changes: 22 additions & 1 deletion server/src/main/java/org/opensearch/index/analysis/Analysis.java
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,16 @@ public static <T> List<T> parseWordList(Environment env, Settings settings, Stri
return parseWordList(env, settings, settingPrefix + "_path", settingPrefix, parser);
}

public static <T> List<T> parseWordList(
Environment env,
Settings settings,
String settingPrefix,
CustomMappingRuleParser<T> parser,
boolean removeComments
) {
return parseWordList(env, settings, settingPrefix + "_path", settingPrefix, parser, removeComments);
}

/**
* Parses a list of words from the specified settings or from a file, with the given parser.
*
Expand All @@ -236,6 +246,17 @@ public static <T> List<T> parseWordList(
String settingPath,
String settingList,
CustomMappingRuleParser<T> parser
) {
return parseWordList(env, settings, settingPath, settingList, parser, true);
}

public static <T> List<T> parseWordList(
Environment env,
Settings settings,
String settingPath,
String settingList,
CustomMappingRuleParser<T> parser,
boolean removeComments
) {
List<String> words = getWordList(env, settings, settingPath, settingList);
if (words == null) {
Expand All @@ -245,7 +266,7 @@ public static <T> List<T> parseWordList(
int lineNum = 0;
for (String word : words) {
lineNum++;
if (word.startsWith("#") == false) {
if (removeComments == false || word.startsWith("#") == false) {
try {
rules.add(parser.apply(word));
} catch (RuntimeException ex) {
Expand Down

0 comments on commit e049338

Please sign in to comment.