From ed750904ba75a69c58bd9654e34b41d6389d584c Mon Sep 17 00:00:00 2001 From: elliVM <47@teragrep.com> Date: Fri, 15 Nov 2024 12:09:08 +0200 Subject: [PATCH 1/2] lowercase search term tokens --- .../planner/bloomfilter/SearchTermBloomFilter.java | 3 ++- .../planner/bloomfilter/SearchTermBloomFilterTest.java | 9 +++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java index a30dc70f..2a8204b2 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java @@ -52,6 +52,7 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.UncheckedIOException; +import java.nio.charset.StandardCharsets; import java.util.List; import java.util.Objects; @@ -86,7 +87,7 @@ public byte[] bytes() { } final BloomFilter filter = BloomFilter.create(expected, fpp); for (final String token : stringTokens) { - filter.put(token); + filter.put(token.toLowerCase()); } try (final ByteArrayOutputStream filterBAOS = new ByteArrayOutputStream()) { filter.writeTo(filterBAOS); diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java index ba9f6fc3..a46d6ede 100644 --- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java @@ -100,7 +100,7 @@ public void testRegexExtractedTokens() { byte[] bytes = Assertions.assertDoesNotThrow(filter::bytes); BloomFilter resultFilter = Assertions .assertDoesNotThrow(() -> BloomFilter.readFrom(new ByteArrayInputStream(bytes))); - Assertions.assertTrue(resultFilter.mightContain("Pattern")); + Assertions.assertTrue(resultFilter.mightContain("pattern")); } @Test @@ -110,9 +110,10 @@ public void testTokenizerTokens() { byte[] bytes = Assertions.assertDoesNotThrow(filter::bytes); BloomFilter resultFilter = Assertions .assertDoesNotThrow(() -> BloomFilter.readFrom(new ByteArrayInputStream(bytes))); - Assertions.assertFalse(resultFilter.mightContain("Pattern")); - Assertions.assertTrue(resultFilter.mightContain("Without")); - Assertions.assertTrue(resultFilter.mightContain("SearchValuePatternInThisString")); + // test that tokens present and in lower case + Assertions.assertFalse(resultFilter.mightContain("pattern")); + Assertions.assertTrue(resultFilter.mightContain("without")); + Assertions.assertTrue(resultFilter.mightContain("searchvaluepatterninthisstring")); } @Test From 8305d58202a67d37bd3279fe4665cf91bc4149bf Mon Sep 17 00:00:00 2001 From: elliVM <47@teragrep.com> Date: Fri, 15 Nov 2024 12:11:29 +0200 Subject: [PATCH 2/2] apply spotless --- .../pth_06/planner/bloomfilter/SearchTermBloomFilter.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java index 2a8204b2..4bd37c0f 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java @@ -52,7 +52,6 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.UncheckedIOException; -import java.nio.charset.StandardCharsets; import java.util.List; import java.util.Objects;