categoryTable = DSL.table(DSL.name(("term_" + bloomTermId + "_" + this.table.getName())));
final Field>[] insertFields = {
DSL.field("term_id", BIGINTUNSIGNED.nullable(false)),
DSL.field("type_id", BIGINTUNSIGNED.nullable(false)),
DSL.field(DSL.name(categoryTable.getName(), "filter"), byte[].class)
};
+ final BloomFilterFromRecord filterFromRecord = new BloomFilterFromRecord(record, table, searchTerm);
final Field>[] valueFields = {
DSL.val(bloomTermId, ULong.class),
DSL.val(record.getValue(BLOOMDB.FILTERTYPE.ID), ULong.class),
- DSL.val(filterBytesFromRecord(record), byte[].class)
+ DSL.val(filterFromRecord.bytes(), byte[].class)
};
ctx.insertInto(categoryTable).columns(insertFields).values(valueFields).execute();
}
- public void insertFiltersIntoCategoryTable() {
- recordsInMetadata.toResult().forEach(this::insertFilterRecordToCategoryTable);
- }
-
- /**
- * Expects DSLContext values to be the same instance
- *
- * @param object object compared
- * @returs true if object is equal
- */
@Override
public boolean equals(final Object object) {
if (this == object)
return true;
- if (object == null)
- return false;
- if (object.getClass() != this.getClass())
+ if (object == null || this.getClass() != object.getClass())
return false;
- final TableFilters cast = (TableFilters) object;
- return this.ctx == cast.ctx && this.value.equals(cast.value) && this.table.equals(cast.table)
- && this.bloomTermId == cast.bloomTermId;
+ final FilterFromRecordToCategoryTableConsumer cast = (FilterFromRecordToCategoryTableConsumer) object;
+ return bloomTermId == cast.bloomTermId && ctx == cast.ctx && table.equals(cast.table)
+ && searchTerm.equals(cast.searchTerm);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(ctx, table, bloomTermId, searchTerm);
}
}
diff --git a/src/main/java/com/teragrep/pth_06/planner/PatternMatchTables.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTables.java
similarity index 99%
rename from src/main/java/com/teragrep/pth_06/planner/PatternMatchTables.java
rename to src/main/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTables.java
index 94a7fe7f..10bc8d83 100644
--- a/src/main/java/com/teragrep/pth_06/planner/PatternMatchTables.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTables.java
@@ -43,7 +43,7 @@
* Teragrep, the applicable Commercial License may apply to this file if you as
* a licensee so wish it.
*/
-package com.teragrep.pth_06.planner;
+package com.teragrep.pth_06.planner.bloomfilter;
import com.teragrep.pth_06.planner.walker.conditions.PatternMatchCondition;
import com.teragrep.pth_06.planner.walker.conditions.QueryCondition;
diff --git a/src/main/java/com/teragrep/pth_06/planner/SearchTermFiltersInserted.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermFiltersInserted.java
similarity index 98%
rename from src/main/java/com/teragrep/pth_06/planner/SearchTermFiltersInserted.java
rename to src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermFiltersInserted.java
index a214b22c..a9913cb5 100644
--- a/src/main/java/com/teragrep/pth_06/planner/SearchTermFiltersInserted.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermFiltersInserted.java
@@ -43,7 +43,7 @@
* Teragrep, the applicable Commercial License may apply to this file if you as
* a licensee so wish it.
*/
-package com.teragrep.pth_06.planner;
+package com.teragrep.pth_06.planner.bloomfilter;
import com.teragrep.pth_06.planner.walker.conditions.QueryCondition;
diff --git a/src/main/java/com/teragrep/pth_06/planner/TableFilterTypesFromMetadata.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadata.java
similarity index 95%
rename from src/main/java/com/teragrep/pth_06/planner/TableFilterTypesFromMetadata.java
rename to src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadata.java
index d5962952..ffcdb4c7 100644
--- a/src/main/java/com/teragrep/pth_06/planner/TableFilterTypesFromMetadata.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadata.java
@@ -43,12 +43,14 @@
* Teragrep, the applicable Commercial License may apply to this file if you as
* a licensee so wish it.
*/
-package com.teragrep.pth_06.planner;
+package com.teragrep.pth_06.planner.bloomfilter;
import org.jooq.*;
import org.jooq.impl.DSL;
import org.jooq.types.ULong;
+import java.util.Objects;
+
import static com.teragrep.pth_06.jooq.generated.bloomdb.Bloomdb.BLOOMDB;
/**
@@ -105,4 +107,9 @@ public boolean equals(final Object object) {
final TableFilterTypesFromMetadata cast = (TableFilterTypesFromMetadata) object;
return this.bloomTermId == cast.bloomTermId && this.table.equals(cast.table) && this.ctx == cast.ctx;
}
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(ctx, table, bloomTermId);
+ }
}
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java
new file mode 100644
index 00000000..b5dc9207
--- /dev/null
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java
@@ -0,0 +1,100 @@
+/*
+ * Teragrep Archive Datasource (pth_06)
+ * Copyright (C) 2021-2024 Suomen Kanuuna Oy
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ *
+ *
+ * Additional permission under GNU Affero General Public License version 3
+ * section 7
+ *
+ * If you modify this Program, or any covered work, by linking or combining it
+ * with other code, such other code is not for that reason alone subject to any
+ * of the requirements of the GNU Affero GPL version 3 as long as this Program
+ * is the same Program as licensed from Suomen Kanuuna Oy without any additional
+ * modifications.
+ *
+ * Supplemented terms under GNU Affero General Public License version 3
+ * section 7
+ *
+ * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
+ * versions must be marked as "Modified version of" The Program.
+ *
+ * Names of the licensors and authors may not be used for publicity purposes.
+ *
+ * No rights are granted for use of trade names, trademarks, or service marks
+ * which are in The Program if any.
+ *
+ * Licensee must indemnify licensors and authors for any liability that these
+ * contractual assumptions impose on licensors and authors.
+ *
+ * To the extent this program is licensed as part of the Commercial versions of
+ * Teragrep, the applicable Commercial License may apply to this file if you as
+ * a licensee so wish it.
+ */
+package com.teragrep.pth_06.planner.bloomfilter;
+
+import org.jooq.DSLContext;
+import org.jooq.Table;
+
+import java.util.Objects;
+
+/**
+ * Filter types of a table that can be inserted into the tables category table
+ */
+public final class TableFilters {
+
+ private final TableRecords recordsInMetadata;
+ private final FilterFromRecordToCategoryTableConsumer recordConsumer;
+
+ public TableFilters(DSLContext ctx, Table> table, long bloomTermId, String searchTerm) {
+ this(
+ new TableFilterTypesFromMetadata(ctx, table, bloomTermId),
+ new FilterFromRecordToCategoryTableConsumer(ctx, table, bloomTermId, searchTerm)
+ );
+ }
+
+ public TableFilters(
+ TableFilterTypesFromMetadata recordsInMetadata,
+ FilterFromRecordToCategoryTableConsumer recordConsumer
+ ) {
+ this.recordsInMetadata = recordsInMetadata;
+ this.recordConsumer = recordConsumer;
+ }
+
+ public void insertFiltersIntoCategoryTable() {
+ recordsInMetadata.toResult().forEach(recordConsumer);
+ }
+
+ /**
+ * Expects DSLContext values to be the same instance
+ *
+ * @param object object compared
+ * @returs true if object is equal
+ */
+ @Override
+ public boolean equals(final Object object) {
+ if (this == object)
+ return true;
+ if (object == null || object.getClass() != this.getClass())
+ return false;
+ final TableFilters cast = (TableFilters) object;
+ return recordsInMetadata.equals(cast.recordsInMetadata) && recordConsumer.equals(cast.recordConsumer);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(recordsInMetadata, recordConsumer);
+ }
+}
diff --git a/src/main/java/com/teragrep/pth_06/planner/TableRecords.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableRecords.java
similarity index 97%
rename from src/main/java/com/teragrep/pth_06/planner/TableRecords.java
rename to src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableRecords.java
index 95b9fb92..932070f8 100644
--- a/src/main/java/com/teragrep/pth_06/planner/TableRecords.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableRecords.java
@@ -43,7 +43,7 @@
* Teragrep, the applicable Commercial License may apply to this file if you as
* a licensee so wish it.
*/
-package com.teragrep.pth_06.planner;
+package com.teragrep.pth_06.planner.bloomfilter;
import org.jooq.Record;
import org.jooq.Result;
diff --git a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java
index cf1cfced..8f691b62 100644
--- a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java
+++ b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java
@@ -46,7 +46,7 @@
package com.teragrep.pth_06.planner.walker.conditions;
import com.teragrep.pth_06.config.ConditionConfig;
-import com.teragrep.pth_06.planner.*;
+import com.teragrep.pth_06.planner.bloomfilter.*;
import org.jooq.Condition;
import org.jooq.Table;
import org.jooq.impl.DSL;
@@ -78,7 +78,7 @@ public IndexStatementCondition(String value, ConditionConfig config, Condition c
public Condition condition() {
if (!config.bloomEnabled()) {
- LOGGER.debug("Indexstatement reached with bloom disabled");
+ LOGGER.warn("Indexstatement reached with bloom disabled");
return condition;
}
Condition newCondition = condition;
diff --git a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchCondition.java b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchCondition.java
index f4ad5808..ce023b53 100644
--- a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchCondition.java
+++ b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchCondition.java
@@ -45,48 +45,42 @@
*/
package com.teragrep.pth_06.planner.walker.conditions;
-import com.teragrep.blf_01.Token;
-import com.teragrep.pth_06.planner.TokenizedValue;
import org.jooq.*;
import org.jooq.impl.DSL;
+import java.util.Objects;
+
import static com.teragrep.pth_06.jooq.generated.bloomdb.Bloomdb.BLOOMDB;
-/**
- * Combined regex match condition
- *
- * true if any of the tokens regex match against bloomdb.filtertype.pattern
- */
+/** true if BLOOMDB.FILTERTYPE.PATTERN regex like with input value */
public final class PatternMatchCondition implements QueryCondition {
- private final TokenizedValue value;
+ private final Field valueField;
public PatternMatchCondition(String input) {
- this(new TokenizedValue(input));
+ this(DSL.val(input));
}
- public PatternMatchCondition(TokenizedValue value) {
- this.value = value;
+ public PatternMatchCondition(Field valueField) {
+ this.valueField = valueField;
}
public Condition condition() {
- Condition patternCondition = DSL.noCondition();
- for (Token token : value.tokens()) {
- Field tokenStringField = DSL.val(token.toString());
- patternCondition = patternCondition.or(tokenStringField.likeRegex(BLOOMDB.FILTERTYPE.PATTERN));
- }
- return patternCondition;
+ return valueField.likeRegex(BLOOMDB.FILTERTYPE.PATTERN);
}
@Override
public boolean equals(final Object object) {
if (this == object)
return true;
- if (object == null)
- return false;
- if (object.getClass() != this.getClass())
+ if (object == null || object.getClass() != this.getClass())
return false;
final PatternMatchCondition cast = (PatternMatchCondition) object;
- return this.value.equals(cast.value);
+ return valueField.equals(cast.valueField);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(valueField);
}
}
diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecordTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecordTest.java
new file mode 100644
index 00000000..3e983795
--- /dev/null
+++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecordTest.java
@@ -0,0 +1,162 @@
+/*
+ * Teragrep Archive Datasource (pth_06)
+ * Copyright (C) 2021-2024 Suomen Kanuuna Oy
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ *
+ *
+ * Additional permission under GNU Affero General Public License version 3
+ * section 7
+ *
+ * If you modify this Program, or any covered work, by linking or combining it
+ * with other code, such other code is not for that reason alone subject to any
+ * of the requirements of the GNU Affero GPL version 3 as long as this Program
+ * is the same Program as licensed from Suomen Kanuuna Oy without any additional
+ * modifications.
+ *
+ * Supplemented terms under GNU Affero General Public License version 3
+ * section 7
+ *
+ * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
+ * versions must be marked as "Modified version of" The Program.
+ *
+ * Names of the licensors and authors may not be used for publicity purposes.
+ *
+ * No rights are granted for use of trade names, trademarks, or service marks
+ * which are in The Program if any.
+ *
+ * Licensee must indemnify licensors and authors for any liability that these
+ * contractual assumptions impose on licensors and authors.
+ *
+ * To the extent this program is licensed as part of the Commercial versions of
+ * Teragrep, the applicable Commercial License may apply to this file if you as
+ * a licensee so wish it.
+ */
+package com.teragrep.pth_06.planner.bloomfilter;
+
+import nl.jqno.equalsverifier.EqualsVerifier;
+import org.apache.spark.util.sketch.BloomFilter;
+import org.jooq.DSLContext;
+import org.jooq.Field;
+import org.jooq.Record;
+import org.jooq.Table;
+import org.jooq.impl.DSL;
+import org.jooq.types.ULong;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestInstance;
+
+import java.io.ByteArrayInputStream;
+import java.sql.Connection;
+import java.sql.DriverManager;
+
+@TestInstance(TestInstance.Lifecycle.PER_CLASS)
+public class BloomFilterFromRecordTest {
+
+ final String url = "jdbc:h2:mem:test;MODE=MariaDB;DATABASE_TO_LOWER=TRUE;CASE_INSENSITIVE_IDENTIFIERS=TRUE";
+ final String userName = "sa";
+ final String password = "";
+ final Connection conn = Assertions.assertDoesNotThrow(() -> DriverManager.getConnection(url, userName, password));
+
+ @Test
+ public void testInstantation() {
+ Record dynamicRecord = generateRecord(true);
+ Table> target = DSL.table(DSL.name("target"));
+ String searchTerm = "Pattern";
+ BloomFilterFromRecord filter = new BloomFilterFromRecord(dynamicRecord, target, searchTerm);
+ Assertions.assertDoesNotThrow(filter::bytes);
+ }
+
+ @Test
+ public void testCorrectFilterSize() {
+ Record dynamicRecord = generateRecord(true);
+ Table> target = DSL.table(DSL.name("target"));
+ String searchTerm = "SearchValuePatternInThisString";
+ BloomFilterFromRecord filter = new BloomFilterFromRecord(dynamicRecord, target, searchTerm);
+ byte[] bytes = Assertions.assertDoesNotThrow(filter::bytes);
+ BloomFilter resultFilter = Assertions
+ .assertDoesNotThrow(() -> BloomFilter.readFrom(new ByteArrayInputStream(bytes)));
+ BloomFilter expectedSize = BloomFilter.create(100, 0.01);
+ Assertions.assertEquals(expectedSize.bitSize(), resultFilter.bitSize());
+ }
+
+ @Test
+ public void testNoRegexExtractedTokensException() {
+ Record dynamicRecord = generateRecord(true);
+ Table> target = DSL.table(DSL.name("target"));
+ String searchTerm = "NoMatch";
+ BloomFilterFromRecord filter = new BloomFilterFromRecord(dynamicRecord, target, searchTerm);
+ RuntimeException e = Assertions.assertThrows(RuntimeException.class, filter::bytes);
+ String expectedMessage = "Trying to insert empty filter, pattern match joined table should always have tokens";
+ Assertions.assertEquals(expectedMessage, e.getMessage());
+ }
+
+ @Test
+ public void testRegexExtractedTokens() {
+ Record dynamicRecord = generateRecord(true);
+ Table> target = DSL.table(DSL.name("target"));
+ String searchTerm = "SearchValuePatternInThisString";
+ BloomFilterFromRecord filter = new BloomFilterFromRecord(dynamicRecord, target, searchTerm);
+ byte[] bytes = Assertions.assertDoesNotThrow(filter::bytes);
+ BloomFilter resultFilter = Assertions
+ .assertDoesNotThrow(() -> BloomFilter.readFrom(new ByteArrayInputStream(bytes)));
+ Assertions.assertTrue(resultFilter.mightContain("Pattern"));
+ }
+
+ @Test
+ public void testTokenizerTokens() {
+ Record dynamicRecord = generateRecord(false);
+ Table> target = DSL.table(DSL.name("target"));
+ String searchTerm = "SearchValuePatternInThisString.Without.Delimiter";
+ BloomFilterFromRecord filter = new BloomFilterFromRecord(dynamicRecord, target, searchTerm);
+ byte[] bytes = Assertions.assertDoesNotThrow(filter::bytes);
+ BloomFilter resultFilter = Assertions
+ .assertDoesNotThrow(() -> BloomFilter.readFrom(new ByteArrayInputStream(bytes)));
+ Assertions.assertFalse(resultFilter.mightContain("Pattern"));
+ Assertions.assertTrue(resultFilter.mightContain("Without"));
+ Assertions.assertTrue(resultFilter.mightContain("SearchValuePatternInThisString"));
+ }
+
+ @Test
+ public void equalsHashCodeContractTest() {
+ EqualsVerifier
+ .forClass(BloomFilterFromRecord.class)
+ .withNonnullFields("expected")
+ .withNonnullFields("fpp")
+ .withNonnullFields("searchTerm")
+ .withIgnoredFields("LOGGER")
+ .verify();
+ }
+
+ private Record generateRecord(final boolean withPattern) {
+ DSLContext ctx = DSL.using(conn);
+ Field idField = DSL.field(DSL.name("id"), ULong.class);
+ Field expectedField = DSL.field(DSL.name("expectedElements"), ULong.class);
+ Field fppField = DSL.field(DSL.name("targetFpp"), Double.class);
+ Field patternField = DSL.field(DSL.name("pattern"), String.class);
+
+ Record dynamicRecord = ctx.newRecord(idField, expectedField, fppField, patternField);
+ if (withPattern) {
+ dynamicRecord.set(patternField, "Pattern");
+ }
+ else {
+ // case is joined filtertype table has no pattern
+ dynamicRecord.set(patternField, null);
+ }
+ dynamicRecord.set(idField, ULong.valueOf(1));
+ dynamicRecord.set(expectedField, ULong.valueOf(100));
+ dynamicRecord.set(fppField, 0.01);
+ return dynamicRecord;
+ }
+}
diff --git a/src/test/java/com/teragrep/pth_06/planner/CategoryTableImplTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImplTest.java
similarity index 99%
rename from src/test/java/com/teragrep/pth_06/planner/CategoryTableImplTest.java
rename to src/test/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImplTest.java
index 60469640..e0595578 100644
--- a/src/test/java/com/teragrep/pth_06/planner/CategoryTableImplTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImplTest.java
@@ -43,7 +43,7 @@
* Teragrep, the applicable Commercial License may apply to this file if you as
* a licensee so wish it.
*/
-package com.teragrep.pth_06.planner;
+package com.teragrep.pth_06.planner.bloomfilter;
import org.apache.spark.util.sketch.BloomFilter;
import org.jooq.Condition;
diff --git a/src/test/java/com/teragrep/pth_06/planner/PatternMatchTablesTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTablesTest.java
similarity index 87%
rename from src/test/java/com/teragrep/pth_06/planner/PatternMatchTablesTest.java
rename to src/test/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTablesTest.java
index 36aaa431..2ff9924f 100644
--- a/src/test/java/com/teragrep/pth_06/planner/PatternMatchTablesTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTablesTest.java
@@ -43,7 +43,7 @@
* Teragrep, the applicable Commercial License may apply to this file if you as
* a licensee so wish it.
*/
-package com.teragrep.pth_06.planner;
+package com.teragrep.pth_06.planner.bloomfilter;
import org.apache.spark.util.sketch.BloomFilter;
import org.jooq.DSLContext;
@@ -71,7 +71,8 @@ public class PatternMatchTablesTest {
final String ipRegex = "(\\b25[0-5]|\\b2[0-4][0-9]|\\b[01]?[0-9][0-9]?)(\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}";
// matches IPv4 starting with 255.
final String ipStartingWith255 = "(\\b25[0-5]?)(\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}";
- final List patternList = new ArrayList<>(Arrays.asList(ipRegex, ipStartingWith255));
+ final String parenthesesPattern = "\\((.*?)\\)";
+ final List patternList = new ArrayList<>(Arrays.asList(ipRegex, ipStartingWith255, parenthesesPattern));
final Connection conn = Assertions.assertDoesNotThrow(() -> DriverManager.getConnection(url, userName, password));
@BeforeAll
@@ -82,6 +83,7 @@ void setup() {
conn.prepareStatement("DROP TABLE IF EXISTS filtertype").execute();
conn.prepareStatement("DROP TABLE IF EXISTS pattern_test_ip").execute();
conn.prepareStatement("DROP TABLE IF EXISTS pattern_test_ip255").execute();
+ conn.prepareStatement("DROP TABLE IF EXISTS parentheses_test").execute();
String filtertype = "CREATE TABLE`filtertype`" + "("
+ " `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT PRIMARY KEY,"
+ " `expectedElements` bigint(20) unsigned NOT NULL,"
@@ -98,9 +100,15 @@ void setup() {
+ " `partition_id` bigint(20) unsigned NOT NULL UNIQUE,"
+ " `filter_type_id` bigint(20) unsigned NOT NULL,"
+ " `filter` longblob NOT NULL)";
+ String parentheses = "CREATE TABLE `parentheses_test`("
+ + " `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT PRIMARY KEY,"
+ + " `partition_id` bigint(20) unsigned NOT NULL UNIQUE,"
+ + " `filter_type_id` bigint(20) unsigned NOT NULL,"
+ + " `filter` longblob NOT NULL)";
conn.prepareStatement(filtertype).execute();
conn.prepareStatement(ip).execute();
conn.prepareStatement(ip255).execute();
+ conn.prepareStatement(parentheses).execute();
String typeSQL = "INSERT INTO `filtertype` (`id`,`expectedElements`, `targetFpp`, `pattern`) VALUES (?,?,?,?)";
int id = 1;
for (String pattern : patternList) {
@@ -114,6 +122,7 @@ void setup() {
}
writeFilter("pattern_test_ip", 1);
writeFilter("pattern_test_ip255", 2);
+ writeFilter("parentheses_test", 3);
});
}
@@ -145,6 +154,16 @@ public void testSearchTermTokenizedMatch() {
Assertions.assertEquals("pattern_test_ip", result.get(0).getName());
}
+ @Test
+ public void testRegexMatch() {
+ DSLContext ctx = DSL.using(conn);
+ String input = "biz baz boz data has no content today (very important though) but it would still have if one had a means to extract it from (here is something else important as well) the strange patterns called parentheses that it seems to have been put in.";
+ PatternMatchTables patternMatchTables = new PatternMatchTables(ctx, input);
+ List> result = patternMatchTables.toList();
+ Assertions.assertEquals(1, result.size());
+ Assertions.assertEquals("parentheses_test", result.get(0).getName());
+ }
+
@Test
public void testMultipleMatch() {
DSLContext ctx = DSL.using(conn);
diff --git a/src/test/java/com/teragrep/pth_06/planner/TableFilterTypesFromMetadataResultTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadataResultTest.java
similarity index 99%
rename from src/test/java/com/teragrep/pth_06/planner/TableFilterTypesFromMetadataResultTest.java
rename to src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadataResultTest.java
index 50516717..b2a99963 100644
--- a/src/test/java/com/teragrep/pth_06/planner/TableFilterTypesFromMetadataResultTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadataResultTest.java
@@ -43,7 +43,7 @@
* Teragrep, the applicable Commercial License may apply to this file if you as
* a licensee so wish it.
*/
-package com.teragrep.pth_06.planner;
+package com.teragrep.pth_06.planner.bloomfilter;
import org.apache.spark.util.sketch.BloomFilter;
import org.jooq.DSLContext;
diff --git a/src/test/java/com/teragrep/pth_06/planner/TableFiltersTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java
similarity index 78%
rename from src/test/java/com/teragrep/pth_06/planner/TableFiltersTest.java
rename to src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java
index 5ab27f18..7e7fbecc 100644
--- a/src/test/java/com/teragrep/pth_06/planner/TableFiltersTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java
@@ -43,8 +43,9 @@
* Teragrep, the applicable Commercial License may apply to this file if you as
* a licensee so wish it.
*/
-package com.teragrep.pth_06.planner;
+package com.teragrep.pth_06.planner.bloomfilter;
+import nl.jqno.equalsverifier.EqualsVerifier;
import org.apache.spark.util.sketch.BloomFilter;
import org.jooq.DSLContext;
import org.jooq.Table;
@@ -69,8 +70,8 @@ class TableFiltersTest {
// matches IPv4
final String ipRegex = "(\\b25[0-5]|\\b2[0-4][0-9]|\\b[01]?[0-9][0-9]?)(\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}";
// matches IPv4 starting with 255.
- final String ipStartingWith255 = "(\\b25[0-5]?)(\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}";
- final List patternList = new ArrayList<>(Arrays.asList(ipRegex, ipStartingWith255));
+ final String parenthesesPattern = "\\((.*?)\\)";
+ final List patternList = new ArrayList<>(Arrays.asList(ipRegex, parenthesesPattern));
final Connection conn = Assertions.assertDoesNotThrow(() -> DriverManager.getConnection(url, userName, password));
@BeforeAll
@@ -125,7 +126,7 @@ void tearDown() {
@Test
public void testCreation() {
- fillTargetTable();
+ fillTargetTable(1);
DSLContext ctx = DSL.using(conn);
Table> table = ctx
.meta()
@@ -138,7 +139,7 @@ public void testCreation() {
@Test
public void testInsertFiltersIntoCategoryTable() {
- fillTargetTable();
+ fillTargetTable(1);
DSLContext ctx = DSL.using(conn);
Table> table = ctx
.meta()
@@ -151,9 +152,25 @@ public void testInsertFiltersIntoCategoryTable() {
Assertions.assertTrue(exception.getMessage().contains("term_0_target\" (term_id, type_id, \"filter\")"));
}
+ @Test
+ public void testInsertFiltersIntoCategoryTableRegexExtract() {
+ fillTargetTable(2);
+ DSLContext ctx = DSL.using(conn);
+ Table> table = ctx
+ .meta()
+ .filterSchemas(s -> s.getName().equals("bloomdb"))
+ .filterTables(t -> !t.getName().equals("filtertype"))
+ .getTables()
+ .get(0);
+ String query = "biz baz boz data has no content today (very important though) but it would still have if one had a means to extract it from (here is something else important as well) the strange patterns called parentheses that it seems to have been put in.";
+ DataAccessException exception = Assertions
+ .assertThrows(DataAccessException.class, () -> new TableFilters(ctx, table, 0L, query).insertFiltersIntoCategoryTable());
+ Assertions.assertTrue(exception.getMessage().contains("term_0_target\" (term_id, type_id, \"filter\")"));
+ }
+
@Test
public void testInsertFiltersWithoutPatternMatch() {
- fillTargetTable();
+ fillTargetTable(1);
DSLContext ctx = DSL.using(conn);
Table> table = ctx
.meta()
@@ -168,7 +185,7 @@ public void testInsertFiltersWithoutPatternMatch() {
@Test
public void testEquals() {
- fillTargetTable();
+ fillTargetTable(1);
DSLContext ctx = DSL.using(conn);
Table> table = ctx
.meta()
@@ -184,7 +201,7 @@ public void testEquals() {
@Test
public void testNotEquals() {
- fillTargetTable();
+ fillTargetTable(1);
DSLContext ctx = DSL.using(conn);
Table> table = ctx
.meta()
@@ -200,7 +217,35 @@ public void testNotEquals() {
Assertions.assertNotEquals(filter1, filter3);
}
- void fillTargetTable() {
+ @Test
+ public void testHashCode() {
+ fillTargetTable(1);
+ DSLContext ctx = DSL.using(conn);
+ Table> table = ctx
+ .meta()
+ .filterSchemas(s -> s.getName().equals("bloomdb"))
+ .filterTables(t -> !t.getName().equals("filtertype"))
+ .getTables()
+ .get(0);
+ TableFilters filter1 = new TableFilters(ctx, table, 0L, "test");
+ TableFilters filter2 = new TableFilters(ctx, table, 0L, "test");
+ TableFilters notEq1 = new TableFilters(ctx, table, 0L, "notTest");
+ TableFilters notEq2 = new TableFilters(ctx, table, 1L, "test");
+ Assertions.assertEquals(filter1.hashCode(), filter2.hashCode());
+ Assertions.assertNotEquals(filter1.hashCode(), notEq1.hashCode());
+ Assertions.assertNotEquals(filter1.hashCode(), notEq2.hashCode());
+ }
+
+ @Test
+ public void equalsHashCodeContractTest() {
+ EqualsVerifier
+ .forClass(TableFilters.class)
+ .withNonnullFields("recordsInMetadata")
+ .withNonnullFields("recordConsumer")
+ .verify();
+ }
+
+ void fillTargetTable(int id) {
Assertions.assertDoesNotThrow(() -> {
conn.prepareStatement("CREATE SCHEMA IF NOT EXISTS BLOOMDB").execute();
conn.prepareStatement("USE BLOOMDB").execute();
@@ -214,7 +259,7 @@ void fillTargetTable() {
filterBAOS.close();
});
stmt.setInt(1, 1);
- stmt.setInt(2, 1);
+ stmt.setInt(2, id); // filter type id
stmt.setBytes(3, filterBAOS.toByteArray());
stmt.executeUpdate();
});
diff --git a/src/test/java/com/teragrep/pth_06/planner/TokenizedValueTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValueTest.java
similarity index 83%
rename from src/test/java/com/teragrep/pth_06/planner/TokenizedValueTest.java
rename to src/test/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValueTest.java
index 69c2ee2d..fcd87918 100644
--- a/src/test/java/com/teragrep/pth_06/planner/TokenizedValueTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValueTest.java
@@ -43,9 +43,11 @@
* Teragrep, the applicable Commercial License may apply to this file if you as
* a licensee so wish it.
*/
-package com.teragrep.pth_06.planner;
+package com.teragrep.pth_06.planner.bloomfilter;
import com.teragrep.blf_01.Token;
+import com.teragrep.pth_06.planner.TokenizedValue;
+import nl.jqno.equalsverifier.EqualsVerifier;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
@@ -58,7 +60,6 @@ class TokenizedValueTest {
void testTokenization() {
TokenizedValue result = new TokenizedValue("test.nest");
Set tokens = result.tokens().stream().map(Token::toString).collect(Collectors.toSet());
- Assertions.assertEquals("test.nest", result.value);
Assertions.assertTrue(tokens.contains("nest"));
Assertions.assertTrue(tokens.contains("test"));
Assertions.assertTrue(tokens.contains("."));
@@ -86,4 +87,18 @@ void testNotEquals() {
Assertions.assertNotEquals(value2, value1);
Assertions.assertNotEquals(value1, null);
}
+
+ @Test
+ void testHashCode() {
+ TokenizedValue value1 = new TokenizedValue("test");
+ TokenizedValue value2 = new TokenizedValue("test");
+ TokenizedValue notEq = new TokenizedValue("nest");
+ Assertions.assertEquals(value1.hashCode(), value2.hashCode());
+ Assertions.assertNotEquals(value1.hashCode(), notEq.hashCode());
+ }
+
+ @Test
+ public void equalsHashCodeContractTest() {
+ EqualsVerifier.forClass(TokenizedValue.class).withNonnullFields("value").withNonnullFields("tokenSet").verify();
+ }
}
diff --git a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchConditionTest.java b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchConditionTest.java
index fd486989..769d9e67 100644
--- a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchConditionTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchConditionTest.java
@@ -45,6 +45,7 @@
*/
package com.teragrep.pth_06.planner.walker.conditions;
+import nl.jqno.equalsverifier.EqualsVerifier;
import org.jooq.Condition;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
@@ -64,18 +65,6 @@ void testSingleToken() {
Assertions.assertEquals(e, condition.toString());
}
- @Test
- void testMultipleTokens() {
- Condition condition = new PatternMatchCondition("test.nest").condition();
- String e = "(\n" + " ('test.' like_regex \"bloomdb\".\"filtertype\".\"pattern\")\n"
- + " or ('.nest' like_regex \"bloomdb\".\"filtertype\".\"pattern\")\n"
- + " or ('test.nest' like_regex \"bloomdb\".\"filtertype\".\"pattern\")\n"
- + " or ('nest' like_regex \"bloomdb\".\"filtertype\".\"pattern\")\n"
- + " or ('.' like_regex \"bloomdb\".\"filtertype\".\"pattern\")\n"
- + " or ('test' like_regex \"bloomdb\".\"filtertype\".\"pattern\")\n" + ")";
- Assertions.assertEquals(e, condition.toString());
- }
-
@Test
void testEquality() {
PatternMatchCondition cond1 = new PatternMatchCondition("test");
@@ -89,4 +78,18 @@ void testNotEquals() {
PatternMatchCondition cond2 = new PatternMatchCondition("next");
Assertions.assertNotEquals(cond1, cond2);
}
+
+ @Test
+ void testHashCode() {
+ PatternMatchCondition cond1 = new PatternMatchCondition("test");
+ PatternMatchCondition cond2 = new PatternMatchCondition("test");
+ PatternMatchCondition notEq = new PatternMatchCondition("next");
+ Assertions.assertEquals(cond1.hashCode(), cond2.hashCode());
+ Assertions.assertNotEquals(cond1.hashCode(), notEq.hashCode());
+ }
+
+ @Test
+ public void equalsHashCodeContractTest() {
+ EqualsVerifier.forClass(PatternMatchCondition.class).withNonnullFields("valueField").verify();
+ }
}
From 465cf83666f5b62a2d2301c0da8448dfc75c4f6b Mon Sep 17 00:00:00 2001
From: elliVM <47@teragrep.com>
Date: Tue, 22 Oct 2024 08:34:16 +0300
Subject: [PATCH 02/26] add RegexExtractedValueTest
---
.../bloomfilter/BloomFilterFromRecord.java | 2 -
.../RegexExtractedValue.java | 2 +-
.../{ => bloomfilter}/TokenizedValue.java | 2 +-
.../bloomfilter/RegexExtractedValueTest.java | 79 +++++++++++++++++++
.../bloomfilter/TokenizedValueTest.java | 1 -
5 files changed, 81 insertions(+), 5 deletions(-)
rename src/main/java/com/teragrep/pth_06/planner/{ => bloomfilter}/RegexExtractedValue.java (98%)
rename src/main/java/com/teragrep/pth_06/planner/{ => bloomfilter}/TokenizedValue.java (98%)
create mode 100644 src/test/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValueTest.java
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java
index 27758266..2c74c65b 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java
@@ -45,8 +45,6 @@
*/
package com.teragrep.pth_06.planner.bloomfilter;
-import com.teragrep.pth_06.planner.RegexExtractedValue;
-import com.teragrep.pth_06.planner.TokenizedValue;
import org.apache.spark.util.sketch.BloomFilter;
import org.jooq.Record;
import org.jooq.Table;
diff --git a/src/main/java/com/teragrep/pth_06/planner/RegexExtractedValue.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValue.java
similarity index 98%
rename from src/main/java/com/teragrep/pth_06/planner/RegexExtractedValue.java
rename to src/main/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValue.java
index 56a72978..a33d1d93 100644
--- a/src/main/java/com/teragrep/pth_06/planner/RegexExtractedValue.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValue.java
@@ -43,7 +43,7 @@
* Teragrep, the applicable Commercial License may apply to this file if you as
* a licensee so wish it.
*/
-package com.teragrep.pth_06.planner;
+package com.teragrep.pth_06.planner.bloomfilter;
import java.util.HashSet;
import java.util.Objects;
diff --git a/src/main/java/com/teragrep/pth_06/planner/TokenizedValue.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValue.java
similarity index 98%
rename from src/main/java/com/teragrep/pth_06/planner/TokenizedValue.java
rename to src/main/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValue.java
index c9740922..fab3fcf4 100644
--- a/src/main/java/com/teragrep/pth_06/planner/TokenizedValue.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValue.java
@@ -43,7 +43,7 @@
* Teragrep, the applicable Commercial License may apply to this file if you as
* a licensee so wish it.
*/
-package com.teragrep.pth_06.planner;
+package com.teragrep.pth_06.planner.bloomfilter;
import com.teragrep.blf_01.Token;
import com.teragrep.blf_01.Tokenizer;
diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValueTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValueTest.java
new file mode 100644
index 00000000..465f1405
--- /dev/null
+++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValueTest.java
@@ -0,0 +1,79 @@
+/*
+ * Teragrep Archive Datasource (pth_06)
+ * Copyright (C) 2021-2024 Suomen Kanuuna Oy
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ *
+ *
+ * Additional permission under GNU Affero General Public License version 3
+ * section 7
+ *
+ * If you modify this Program, or any covered work, by linking or combining it
+ * with other code, such other code is not for that reason alone subject to any
+ * of the requirements of the GNU Affero GPL version 3 as long as this Program
+ * is the same Program as licensed from Suomen Kanuuna Oy without any additional
+ * modifications.
+ *
+ * Supplemented terms under GNU Affero General Public License version 3
+ * section 7
+ *
+ * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
+ * versions must be marked as "Modified version of" The Program.
+ *
+ * Names of the licensors and authors may not be used for publicity purposes.
+ *
+ * No rights are granted for use of trade names, trademarks, or service marks
+ * which are in The Program if any.
+ *
+ * Licensee must indemnify licensors and authors for any liability that these
+ * contractual assumptions impose on licensors and authors.
+ *
+ * To the extent this program is licensed as part of the Commercial versions of
+ * Teragrep, the applicable Commercial License may apply to this file if you as
+ * a licensee so wish it.
+ */
+package com.teragrep.pth_06.planner.bloomfilter;
+
+import nl.jqno.equalsverifier.EqualsVerifier;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.util.Set;
+
+public class RegexExtractedValueTest {
+
+ @Test
+ public void testRegexExtraction() {
+ String regex = "\\((.*?)\\)";
+ String value = "find all (important) values inside (very important) parentheses.";
+ RegexExtractedValue regexValue = new RegexExtractedValue(value, regex);
+ Set tokens = regexValue.tokens();
+ Assertions.assertEquals(2, tokens.size());
+ Assertions.assertTrue(tokens.contains("(important)") && tokens.contains("(very important)"));
+ }
+
+ @Test
+ public void testPartialRegexMatch() {
+ String regex = "\\w{3}-\\w{3}-\\w{3}";
+ String value = "testValue=abc-abc";
+ RegexExtractedValue regexValue = new RegexExtractedValue(value, regex);
+ Set tokens = regexValue.tokens();
+ System.out.println(tokens);
+ }
+
+ @Test
+ public void testEqualsHashCodeContract() {
+ EqualsVerifier.forClass(RegexExtractedValue.class).withNonnullFields("matcher").verify();
+ }
+}
diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValueTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValueTest.java
index fcd87918..c5340db6 100644
--- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValueTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValueTest.java
@@ -46,7 +46,6 @@
package com.teragrep.pth_06.planner.bloomfilter;
import com.teragrep.blf_01.Token;
-import com.teragrep.pth_06.planner.TokenizedValue;
import nl.jqno.equalsverifier.EqualsVerifier;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
From 7d4559d5716ace828bf09518e2b57bc84ee0b896 Mon Sep 17 00:00:00 2001
From: elliVM <47@teragrep.com>
Date: Tue, 22 Oct 2024 08:35:13 +0300
Subject: [PATCH 03/26] remove unnecessary test
---
.../planner/bloomfilter/RegexExtractedValueTest.java | 9 ---------
1 file changed, 9 deletions(-)
diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValueTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValueTest.java
index 465f1405..bd2b950d 100644
--- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValueTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValueTest.java
@@ -63,15 +63,6 @@ public void testRegexExtraction() {
Assertions.assertTrue(tokens.contains("(important)") && tokens.contains("(very important)"));
}
- @Test
- public void testPartialRegexMatch() {
- String regex = "\\w{3}-\\w{3}-\\w{3}";
- String value = "testValue=abc-abc";
- RegexExtractedValue regexValue = new RegexExtractedValue(value, regex);
- Set tokens = regexValue.tokens();
- System.out.println(tokens);
- }
-
@Test
public void testEqualsHashCodeContract() {
EqualsVerifier.forClass(RegexExtractedValue.class).withNonnullFields("matcher").verify();
From 945e83977337d450ffdccc501375610eed59f407 Mon Sep 17 00:00:00 2001
From: elliVM <47@teragrep.com>
Date: Tue, 22 Oct 2024 12:26:42 +0300
Subject: [PATCH 04/26] TokenizedValue: call tokenizer only when needed, clean
up tests
---
.../bloomfilter/BloomFilterFromRecord.java | 2 +-
.../planner/bloomfilter/TokenizedValue.java | 25 ++++++++-----------
.../BloomFilterFromRecordTest.java | 9 -------
.../bloomfilter/RegexExtractedValueTest.java | 3 ++-
.../bloomfilter/TokenizedValueTest.java | 5 +---
5 files changed, 14 insertions(+), 30 deletions(-)
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java
index 2c74c65b..eabdc4ef 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java
@@ -82,7 +82,7 @@ private BloomFilter create() {
throw new RuntimeException("Object field was null");
}
final BloomFilter filter = BloomFilter.create(expected, fpp);
- // if no pattern use to tokenized value (currently BLOOMDB.FILTERTYPE.PATTERN is NOT NULL)
+ // if no pattern use tokenized value (currently BLOOMDB.FILTERTYPE.PATTERN is NOT NULL)
if (pattern == null) {
LOGGER.info("Table pattern was null using tokenizer to generate tokens");
new TokenizedValue(searchTerm).stringTokens().forEach(filter::put);
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValue.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValue.java
index fab3fcf4..5263342e 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValue.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValue.java
@@ -50,7 +50,7 @@
import java.io.ByteArrayInputStream;
import java.nio.charset.StandardCharsets;
-import java.util.HashSet;
+import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
@@ -58,26 +58,21 @@
public final class TokenizedValue {
private final String value;
- private final Set tokenSet;
public TokenizedValue(String value) {
- this(
- value,
- new HashSet<>(new Tokenizer(32).tokenize(new ByteArrayInputStream(value.getBytes(StandardCharsets.UTF_8))))
- );
- }
-
- public TokenizedValue(String value, Set tokenSet) {
this.value = value;
- this.tokenSet = tokenSet;
}
- public Set tokens() {
- return tokenSet;
+ public List tokens() {
+ return new Tokenizer(32).tokenize(new ByteArrayInputStream(value.getBytes(StandardCharsets.UTF_8)));
}
public Set stringTokens() {
- return tokenSet.stream().map(Token::toString).collect(Collectors.toSet());
+ return new Tokenizer(32)
+ .tokenize(new ByteArrayInputStream(value.getBytes(StandardCharsets.UTF_8)))
+ .stream()
+ .map(Token::toString)
+ .collect(Collectors.toSet());
}
@Override
@@ -87,11 +82,11 @@ public boolean equals(final Object object) {
if (object == null || object.getClass() != this.getClass())
return false;
final TokenizedValue cast = (TokenizedValue) object;
- return value.equals(cast.value) && tokenSet.equals(cast.tokenSet);
+ return value.equals(cast.value);
}
@Override
public int hashCode() {
- return Objects.hash(value, tokenSet);
+ return Objects.hash(value);
}
}
diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecordTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecordTest.java
index 3e983795..9fcb7a37 100644
--- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecordTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecordTest.java
@@ -69,15 +69,6 @@ public class BloomFilterFromRecordTest {
final String password = "";
final Connection conn = Assertions.assertDoesNotThrow(() -> DriverManager.getConnection(url, userName, password));
- @Test
- public void testInstantation() {
- Record dynamicRecord = generateRecord(true);
- Table> target = DSL.table(DSL.name("target"));
- String searchTerm = "Pattern";
- BloomFilterFromRecord filter = new BloomFilterFromRecord(dynamicRecord, target, searchTerm);
- Assertions.assertDoesNotThrow(filter::bytes);
- }
-
@Test
public void testCorrectFilterSize() {
Record dynamicRecord = generateRecord(true);
diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValueTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValueTest.java
index bd2b950d..c3163f91 100644
--- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValueTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValueTest.java
@@ -60,7 +60,8 @@ public void testRegexExtraction() {
RegexExtractedValue regexValue = new RegexExtractedValue(value, regex);
Set tokens = regexValue.tokens();
Assertions.assertEquals(2, tokens.size());
- Assertions.assertTrue(tokens.contains("(important)") && tokens.contains("(very important)"));
+ Assertions.assertTrue(tokens.contains("(important)"));
+ Assertions.assertTrue(tokens.contains("(very important)"));
}
@Test
diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValueTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValueTest.java
index c5340db6..ba53fc9a 100644
--- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValueTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValueTest.java
@@ -73,7 +73,6 @@ void testEquality() {
TokenizedValue value1 = new TokenizedValue("test");
TokenizedValue value2 = new TokenizedValue("test");
Assertions.assertEquals(value1, value2);
- Assertions.assertEquals(value2, value1);
value1.tokens();
Assertions.assertEquals(value2, value1);
}
@@ -83,8 +82,6 @@ void testNotEquals() {
TokenizedValue value1 = new TokenizedValue("test");
TokenizedValue value2 = new TokenizedValue("nest");
Assertions.assertNotEquals(value1, value2);
- Assertions.assertNotEquals(value2, value1);
- Assertions.assertNotEquals(value1, null);
}
@Test
@@ -98,6 +95,6 @@ void testHashCode() {
@Test
public void equalsHashCodeContractTest() {
- EqualsVerifier.forClass(TokenizedValue.class).withNonnullFields("value").withNonnullFields("tokenSet").verify();
+ EqualsVerifier.forClass(TokenizedValue.class).withNonnullFields("value").verify();
}
}
From 2a8d97c5c4b76e726fe301ded6ae890df24e320e Mon Sep 17 00:00:00 2001
From: elliVM <47@teragrep.com>
Date: Tue, 22 Oct 2024 12:32:01 +0300
Subject: [PATCH 05/26] clear up exception message in BloomFilterFromRecord
---
.../planner/bloomfilter/BloomFilterFromRecord.java | 13 ++++++-------
1 file changed, 6 insertions(+), 7 deletions(-)
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java
index eabdc4ef..12d7aba2 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java
@@ -73,13 +73,12 @@ public final class BloomFilterFromRecord {
private final String searchTerm;
private BloomFilter create() {
- if (expected == null || fpp == null) {
- LOGGER
- .error(
- "Null field while creating bloom filter expected <{}>, fpp <{}>, pattern <{}>, search term <{}>",
- expected, fpp, pattern, searchTerm
- );
- throw new RuntimeException("Object field was null");
+ LOGGER.debug("Create filter from Record with values: expected <{}>, fpp <{}>, pattern: <{}>", expected, fpp, pattern);
+ if (expected == null) {
+ throw new RuntimeException("Record did not contain table field value ");
+ }
+ if(fpp == null) {
+ throw new RuntimeException("Record did not contain table field value ");
}
final BloomFilter filter = BloomFilter.create(expected, fpp);
// if no pattern use tokenized value (currently BLOOMDB.FILTERTYPE.PATTERN is NOT NULL)
From b3ac3489ef677b7dba26018b22feb08134eb908c Mon Sep 17 00:00:00 2001
From: elliVM <47@teragrep.com>
Date: Tue, 22 Oct 2024 12:40:01 +0300
Subject: [PATCH 06/26] BloomFilterFromRecord: remove ULong.longValue() from
constructor, clarify exception messages. Add tests for exceptions.
---
.../bloomfilter/BloomFilterFromRecord.java | 20 ++++++++------
.../BloomFilterFromRecordTest.java | 26 +++++++++++++++++++
2 files changed, 38 insertions(+), 8 deletions(-)
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java
index 12d7aba2..1be012ac 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java
@@ -67,20 +67,24 @@
public final class BloomFilterFromRecord {
private final Logger LOGGER = LoggerFactory.getLogger(BloomFilterFromRecord.class);
- private final Long expected;
+ private final ULong expected;
private final Double fpp;
private final String pattern;
private final String searchTerm;
private BloomFilter create() {
- LOGGER.debug("Create filter from Record with values: expected <{}>, fpp <{}>, pattern: <{}>", expected, fpp, pattern);
+ LOGGER
+ .debug(
+ "Create filter from Record with values: expected <{}>, fpp <{}>, pattern: <{}>", expected, fpp,
+ pattern
+ );
if (expected == null) {
- throw new RuntimeException("Record did not contain table field value ");
+ throw new IllegalArgumentException("Record did not contain table field value ");
}
- if(fpp == null) {
- throw new RuntimeException("Record did not contain table field value ");
+ if (fpp == null) {
+ throw new IllegalArgumentException("Record did not contain table field value ");
}
- final BloomFilter filter = BloomFilter.create(expected, fpp);
+ final BloomFilter filter = BloomFilter.create(expected.longValue(), fpp);
// if no pattern use tokenized value (currently BLOOMDB.FILTERTYPE.PATTERN is NOT NULL)
if (pattern == null) {
LOGGER.info("Table pattern was null using tokenizer to generate tokens");
@@ -101,14 +105,14 @@ private BloomFilter create() {
public BloomFilterFromRecord(Record record, Table> table, String searchTerm) {
this(
- record.getValue(DSL.field(DSL.name(table.getName(), "expectedElements"), ULong.class)).longValue(),
+ record.getValue(DSL.field(DSL.name(table.getName(), "expectedElements"), ULong.class)),
record.getValue(DSL.field(DSL.name(table.getName(), "targetFpp"), Double.class)),
record.getValue(BLOOMDB.FILTERTYPE.PATTERN, String.class),
searchTerm
);
}
- public BloomFilterFromRecord(Long expected, Double fpp, String pattern, String searchTerm) {
+ public BloomFilterFromRecord(ULong expected, Double fpp, String pattern, String searchTerm) {
this.expected = expected;
this.fpp = fpp;
this.pattern = pattern;
diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecordTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecordTest.java
index 9fcb7a37..897c01a3 100644
--- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecordTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecordTest.java
@@ -119,6 +119,32 @@ public void testTokenizerTokens() {
Assertions.assertTrue(resultFilter.mightContain("SearchValuePatternInThisString"));
}
+ @Test
+ public void testNullExpectedField() {
+ Record dynamicRecord = generateRecord(false);
+ Field expectedField = DSL.field(DSL.name("expectedElements"), ULong.class);
+ dynamicRecord.set(expectedField, null);
+ Table> target = DSL.table(DSL.name("target"));
+ String searchTerm = "SearchValuePatternInThisString.Without.Delimiter";
+ BloomFilterFromRecord filter = new BloomFilterFromRecord(dynamicRecord, target, searchTerm);
+ IllegalArgumentException e = Assertions.assertThrows(IllegalArgumentException.class, filter::bytes);
+ String expectedMessage = "Record did not contain table field value ";
+ Assertions.assertEquals(expectedMessage, e.getMessage());
+ }
+
+ @Test
+ public void testNullFppField() {
+ Record dynamicRecord = generateRecord(false);
+ Field fppField = DSL.field(DSL.name("targetFpp"), Double.class);
+ dynamicRecord.set(fppField, null);
+ Table> target = DSL.table(DSL.name("target"));
+ String searchTerm = "SearchValuePatternInThisString.Without.Delimiter";
+ BloomFilterFromRecord filter = new BloomFilterFromRecord(dynamicRecord, target, searchTerm);
+ IllegalArgumentException e = Assertions.assertThrows(IllegalArgumentException.class, filter::bytes);
+ String expectedMessage = "Record did not contain table field value ";
+ Assertions.assertEquals(expectedMessage, e.getMessage());
+ }
+
@Test
public void equalsHashCodeContractTest() {
EqualsVerifier
From 0d724065bb7fea1d3a31719d72d7d64e8f2eba76 Mon Sep 17 00:00:00 2001
From: elliVM <47@teragrep.com>
Date: Tue, 22 Oct 2024 12:43:11 +0300
Subject: [PATCH 07/26] set logger level to debug when indexstatement is
reached with bloom disabled
---
.../planner/walker/conditions/IndexStatementCondition.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java
index 8f691b62..7a2c47ce 100644
--- a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java
+++ b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java
@@ -78,7 +78,7 @@ public IndexStatementCondition(String value, ConditionConfig config, Condition c
public Condition condition() {
if (!config.bloomEnabled()) {
- LOGGER.warn("Indexstatement reached with bloom disabled");
+ LOGGER.debug("Indexstatement reached with bloom disabled");
return condition;
}
Condition newCondition = condition;
From a68278977d95c93d048400b2cedcd7997b830df1 Mon Sep 17 00:00:00 2001
From: elliVM <47@teragrep.com>
Date: Tue, 22 Oct 2024 15:12:35 +0300
Subject: [PATCH 08/26] remove consumer class and use for loop in TableFilters
---
...lterFromRecordToCategoryTableConsumer.java | 112 ------------------
.../planner/bloomfilter/TableFilters.java | 63 ++++++----
.../planner/bloomfilter/TableFiltersTest.java | 5 +-
.../pth_06/walker/ConditionWalkerTest.java | 22 ++++
4 files changed, 68 insertions(+), 134 deletions(-)
delete mode 100644 src/main/java/com/teragrep/pth_06/planner/bloomfilter/FilterFromRecordToCategoryTableConsumer.java
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/FilterFromRecordToCategoryTableConsumer.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/FilterFromRecordToCategoryTableConsumer.java
deleted file mode 100644
index f84e7aed..00000000
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/FilterFromRecordToCategoryTableConsumer.java
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Teragrep Archive Datasource (pth_06)
- * Copyright (C) 2021-2024 Suomen Kanuuna Oy
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see .
- *
- *
- * Additional permission under GNU Affero General Public License version 3
- * section 7
- *
- * If you modify this Program, or any covered work, by linking or combining it
- * with other code, such other code is not for that reason alone subject to any
- * of the requirements of the GNU Affero GPL version 3 as long as this Program
- * is the same Program as licensed from Suomen Kanuuna Oy without any additional
- * modifications.
- *
- * Supplemented terms under GNU Affero General Public License version 3
- * section 7
- *
- * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
- * versions must be marked as "Modified version of" The Program.
- *
- * Names of the licensors and authors may not be used for publicity purposes.
- *
- * No rights are granted for use of trade names, trademarks, or service marks
- * which are in The Program if any.
- *
- * Licensee must indemnify licensors and authors for any liability that these
- * contractual assumptions impose on licensors and authors.
- *
- * To the extent this program is licensed as part of the Commercial versions of
- * Teragrep, the applicable Commercial License may apply to this file if you as
- * a licensee so wish it.
- */
-package com.teragrep.pth_06.planner.bloomfilter;
-
-import org.jooq.DSLContext;
-import org.jooq.Field;
-import org.jooq.Record;
-import org.jooq.Table;
-import org.jooq.impl.DSL;
-import org.jooq.types.ULong;
-
-import java.util.Objects;
-import java.util.function.Consumer;
-
-import static com.teragrep.pth_06.jooq.generated.bloomdb.Bloomdb.BLOOMDB;
-import static org.jooq.impl.SQLDataType.BIGINTUNSIGNED;
-
-public final class FilterFromRecordToCategoryTableConsumer implements Consumer {
-
- private final DSLContext ctx;
- private final Table> table;
- private final long bloomTermId;
- private final String searchTerm;
-
- public FilterFromRecordToCategoryTableConsumer(
- DSLContext ctx,
- Table> table,
- long bloomTermId,
- String searchTerm
- ) {
- this.ctx = ctx;
- this.table = table;
- this.bloomTermId = bloomTermId;
- this.searchTerm = searchTerm;
- }
-
- @Override
- public void accept(final Record record) {
- final Table categoryTable = DSL.table(DSL.name(("term_" + bloomTermId + "_" + this.table.getName())));
- final Field>[] insertFields = {
- DSL.field("term_id", BIGINTUNSIGNED.nullable(false)),
- DSL.field("type_id", BIGINTUNSIGNED.nullable(false)),
- DSL.field(DSL.name(categoryTable.getName(), "filter"), byte[].class)
- };
- final BloomFilterFromRecord filterFromRecord = new BloomFilterFromRecord(record, table, searchTerm);
- final Field>[] valueFields = {
- DSL.val(bloomTermId, ULong.class),
- DSL.val(record.getValue(BLOOMDB.FILTERTYPE.ID), ULong.class),
- DSL.val(filterFromRecord.bytes(), byte[].class)
- };
- ctx.insertInto(categoryTable).columns(insertFields).values(valueFields).execute();
- }
-
- @Override
- public boolean equals(final Object object) {
- if (this == object)
- return true;
- if (object == null || this.getClass() != object.getClass())
- return false;
- final FilterFromRecordToCategoryTableConsumer cast = (FilterFromRecordToCategoryTableConsumer) object;
- return bloomTermId == cast.bloomTermId && ctx == cast.ctx && table.equals(cast.table)
- && searchTerm.equals(cast.searchTerm);
- }
-
- @Override
- public int hashCode() {
- return Objects.hash(ctx, table, bloomTermId, searchTerm);
- }
-}
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java
index b5dc9207..65767a13 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java
@@ -45,56 +45,77 @@
*/
package com.teragrep.pth_06.planner.bloomfilter;
-import org.jooq.DSLContext;
-import org.jooq.Table;
+import org.jooq.*;
+import org.jooq.impl.DSL;
+import org.jooq.types.ULong;
import java.util.Objects;
+import static com.teragrep.pth_06.jooq.generated.bloomdb.Bloomdb.BLOOMDB;
+import static org.jooq.impl.SQLDataType.BIGINTUNSIGNED;
+
/**
* Filter types of a table that can be inserted into the tables category table
*/
public final class TableFilters {
private final TableRecords recordsInMetadata;
- private final FilterFromRecordToCategoryTableConsumer recordConsumer;
+ private final DSLContext ctx;
+ private final Table> table;
+ private final String searchTerm;
+ private final long bloomTermId;
public TableFilters(DSLContext ctx, Table> table, long bloomTermId, String searchTerm) {
- this(
- new TableFilterTypesFromMetadata(ctx, table, bloomTermId),
- new FilterFromRecordToCategoryTableConsumer(ctx, table, bloomTermId, searchTerm)
- );
+ this(new TableFilterTypesFromMetadata(ctx, table, bloomTermId), ctx, table, bloomTermId, searchTerm);
}
public TableFilters(
TableFilterTypesFromMetadata recordsInMetadata,
- FilterFromRecordToCategoryTableConsumer recordConsumer
+ DSLContext ctx,
+ Table> table,
+ long bloomTermId,
+ String searchTerm
) {
this.recordsInMetadata = recordsInMetadata;
- this.recordConsumer = recordConsumer;
+ this.ctx = ctx;
+ this.table = table;
+ this.bloomTermId = bloomTermId;
+ this.searchTerm = searchTerm;
}
public void insertFiltersIntoCategoryTable() {
- recordsInMetadata.toResult().forEach(recordConsumer);
+ final Result result = recordsInMetadata.toResult();
+ for (final Record record : result) {
+ final Table categoryTable = DSL
+ .table(DSL.name(("term_" + bloomTermId + "_" + this.table.getName())));
+ final Field>[] insertFields = {
+ DSL.field("term_id", BIGINTUNSIGNED.nullable(false)),
+ DSL.field("type_id", BIGINTUNSIGNED.nullable(false)),
+ DSL.field(DSL.name(categoryTable.getName(), "filter"), byte[].class)
+ };
+ final BloomFilterFromRecord filterFromRecord = new BloomFilterFromRecord(record, table, searchTerm);
+ final Field>[] valueFields = {
+ DSL.val(bloomTermId, ULong.class),
+ DSL.val(record.getValue(BLOOMDB.FILTERTYPE.ID), ULong.class),
+ DSL.val(filterFromRecord.bytes(), byte[].class)
+ };
+ ctx.insertInto(categoryTable).columns(insertFields).values(valueFields).execute();
+ }
}
- /**
- * Expects DSLContext values to be the same instance
- *
- * @param object object compared
- * @returs true if object is equal
- */
@Override
- public boolean equals(final Object object) {
+ public boolean equals(Object object) {
if (this == object)
return true;
- if (object == null || object.getClass() != this.getClass())
+ if (object == null || getClass() != object.getClass())
return false;
- final TableFilters cast = (TableFilters) object;
- return recordsInMetadata.equals(cast.recordsInMetadata) && recordConsumer.equals(cast.recordConsumer);
+ TableFilters cast = (TableFilters) object;
+ return bloomTermId == cast.bloomTermId && recordsInMetadata.equals(cast.recordsInMetadata) && ctx == cast.ctx
+ && table.equals(cast.table) && searchTerm.equals(cast.searchTerm);
}
@Override
public int hashCode() {
- return Objects.hash(recordsInMetadata, recordConsumer);
+ return Objects.hash(recordsInMetadata, ctx, table, searchTerm, bloomTermId);
}
}
diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java
index 7e7fbecc..15a723fd 100644
--- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java
@@ -241,7 +241,10 @@ public void equalsHashCodeContractTest() {
EqualsVerifier
.forClass(TableFilters.class)
.withNonnullFields("recordsInMetadata")
- .withNonnullFields("recordConsumer")
+ .withNonnullFields("ctx")
+ .withNonnullFields("table")
+ .withNonnullFields("searchTerm")
+ .withNonnullFields("bloomTermId")
.verify();
}
diff --git a/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java b/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java
index 60346b7e..9862d7f6 100644
--- a/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java
+++ b/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java
@@ -230,6 +230,28 @@ void twoTablePatternMatchTest() {
.assertTrue(walker.patternMatchTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255")));
}
+ @Test
+ void testFullXML() {
+ ConditionWalker walker = new ConditionWalker(DSL.using(conn), true);
+ String q = "";
+ String e = "(\n" + " \"getArchivedObjects_filter_table\".\"directory\" like 'haproxy'\n" + " and (\n"
+ + " (\n" + " bloommatch(\n" + " (\n"
+ + " select \"term_0_pattern_test_ip\".\"filter\"\n"
+ + " from \"term_0_pattern_test_ip\"\n" + " where (\n" + " term_id = 0\n"
+ + " and type_id = \"bloomdb\".\"pattern_test_ip\".\"filter_type_id\"\n" + " )\n"
+ + " ),\n" + " \"bloomdb\".\"pattern_test_ip\".\"filter\"\n" + " ) = true\n"
+ + " and \"bloomdb\".\"pattern_test_ip\".\"filter\" is not null\n" + " )\n" + " or (\n"
+ + " bloommatch(\n" + " (\n" + " select \"term_0_pattern_test_ip255\".\"filter\"\n"
+ + " from \"term_0_pattern_test_ip255\"\n" + " where (\n" + " term_id = 0\n"
+ + " and type_id = \"bloomdb\".\"pattern_test_ip255\".\"filter_type_id\"\n" + " )\n"
+ + " ),\n" + " \"bloomdb\".\"pattern_test_ip255\".\"filter\"\n" + " ) = true\n"
+ + " and \"bloomdb\".\"pattern_test_ip255\".\"filter\" is not null\n" + " )\n" + " or (\n"
+ + " \"bloomdb\".\"pattern_test_ip\".\"filter\" is null\n"
+ + " and \"bloomdb\".\"pattern_test_ip255\".\"filter\" is null\n" + " )\n" + " )\n" + ")";
+ Condition cond = Assertions.assertDoesNotThrow(() -> walker.fromString(q, false));
+ System.out.println(cond.toString());
+ }
+
@Test
void twoTablePatternMatchWithoutFiltersTest() {
ConditionWalker walker = new ConditionWalker(DSL.using(conn), true, true);
From b3546148015985745e96600880df678c81e3086a Mon Sep 17 00:00:00 2001
From: elliVM <47@teragrep.com>
Date: Tue, 22 Oct 2024 15:28:17 +0300
Subject: [PATCH 09/26] use try with resources and add comments on equals
methods about DSLContext equality
---
.../pth_06/planner/bloomfilter/BloomFilterFromRecord.java | 6 ++----
.../planner/bloomfilter/TableFilterTypesFromMetadata.java | 6 ++++++
.../teragrep/pth_06/planner/bloomfilter/TableFilters.java | 6 ++++++
3 files changed, 14 insertions(+), 4 deletions(-)
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java
index 1be012ac..f3bf84a1 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java
@@ -121,15 +121,13 @@ public BloomFilterFromRecord(ULong expected, Double fpp, String pattern, String
public byte[] bytes() {
final BloomFilter filter = create();
- final ByteArrayOutputStream filterBAOS = new ByteArrayOutputStream();
- try {
+ try (final ByteArrayOutputStream filterBAOS = new ByteArrayOutputStream()) {
filter.writeTo(filterBAOS);
- filterBAOS.close();
+ return filterBAOS.toByteArray();
}
catch (IOException e) {
throw new UncheckedIOException(new IOException("Error writing filter bytes: " + e.getMessage()));
}
- return filterBAOS.toByteArray();
}
@Override
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadata.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadata.java
index ffcdb4c7..2f8752f4 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadata.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadata.java
@@ -96,6 +96,12 @@ public Result toResult() {
return records;
}
+ /**
+ * Equal only if all object parameters are same value and the instances of DSLContext are same
+ *
+ * @param object object compared against
+ * @return true if all object is same class, object fields are equal and DSLContext is same instance
+ */
@Override
public boolean equals(final Object object) {
if (this == object)
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java
index 65767a13..db5f85dc 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java
@@ -103,6 +103,12 @@ public void insertFiltersIntoCategoryTable() {
}
}
+ /**
+ * Equal only if all object parameters are same value and the instances of DSLContext are same
+ *
+ * @param object object compared against
+ * @return true if all object is same class, object fields are equal and DSLContext is same instance
+ */
@Override
public boolean equals(Object object) {
if (this == object)
From 5f907c86fc1341b215e52f322b5f03cb95e7eb05 Mon Sep 17 00:00:00 2001
From: elliVM <47@teragrep.com>
Date: Wed, 23 Oct 2024 12:09:22 +0300
Subject: [PATCH 10/26] add Tokenizable interface and decorators, rename
BloomFilterFromRecord and make it unconfigurable, make matcher immutable
---
.../bloomfilter/RegexExtractedValue.java | 35 ++++---
...Record.java => SearchTermBloomFilter.java} | 80 ++++++----------
.../planner/bloomfilter/TableFilters.java | 18 +++-
.../planner/bloomfilter/Tokenizable.java | 53 +++++++++++
.../planner/bloomfilter/TokenizedValue.java | 12 +--
.../planner/bloomfilter/TokensAsStrings.java | 65 +++++++++++++
.../bloomfilter/RegexExtractedValueTest.java | 10 +-
...st.java => SearchTermBloomFilterTest.java} | 94 ++++---------------
.../planner/bloomfilter/TableFiltersTest.java | 6 +-
.../bloomfilter/TokensAsStringsTest.java | 63 +++++++++++++
.../conditions/PatternMatchConditionTest.java | 2 +-
11 files changed, 270 insertions(+), 168 deletions(-)
rename src/main/java/com/teragrep/pth_06/planner/bloomfilter/{BloomFilterFromRecord.java => SearchTermBloomFilter.java} (52%)
create mode 100644 src/main/java/com/teragrep/pth_06/planner/bloomfilter/Tokenizable.java
create mode 100644 src/main/java/com/teragrep/pth_06/planner/bloomfilter/TokensAsStrings.java
rename src/test/java/com/teragrep/pth_06/planner/bloomfilter/{BloomFilterFromRecordTest.java => SearchTermBloomFilterTest.java} (50%)
create mode 100644 src/test/java/com/teragrep/pth_06/planner/bloomfilter/TokensAsStringsTest.java
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValue.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValue.java
index a33d1d93..33655b29 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValue.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValue.java
@@ -45,30 +45,27 @@
*/
package com.teragrep.pth_06.planner.bloomfilter;
-import java.util.HashSet;
-import java.util.Objects;
-import java.util.Set;
+import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-public final class RegexExtractedValue {
+public final class RegexExtractedValue implements Tokenizable {
- private final Matcher matcher;
+ private final String value;
+ private final Pattern pattern;
- public RegexExtractedValue(String value, String regex) {
- this(value, Pattern.compile(regex));
+ public RegexExtractedValue(String value, String pattern) {
+ this(value, Pattern.compile(pattern));
}
public RegexExtractedValue(String value, Pattern pattern) {
- this(pattern.matcher(value));
+ this.value = value;
+ this.pattern = pattern;
}
- public RegexExtractedValue(Matcher matcher) {
- this.matcher = matcher;
- }
-
- public Set tokens() {
- final Set tokens = new HashSet<>();
+ public List tokens() {
+ final Matcher matcher = pattern.matcher(value);
+ final List tokens = new ArrayList<>();
while (matcher.find()) {
final String token = matcher.group();
tokens.add(token);
@@ -77,17 +74,17 @@ public Set tokens() {
}
@Override
- public boolean equals(final Object object) {
+ public boolean equals(Object object) {
if (this == object)
return true;
- if (object == null || object.getClass() != this.getClass())
+ if (object == null || getClass() != object.getClass())
return false;
- final RegexExtractedValue cast = (RegexExtractedValue) object;
- return matcher.equals(cast.matcher);
+ RegexExtractedValue cast = (RegexExtractedValue) object;
+ return value.equals(cast.value) && pattern.equals(cast.pattern);
}
@Override
public int hashCode() {
- return Objects.hash(matcher);
+ return Objects.hash(value, pattern);
}
}
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java
similarity index 52%
rename from src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java
rename to src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java
index f3bf84a1..61884af3 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java
@@ -46,77 +46,52 @@
package com.teragrep.pth_06.planner.bloomfilter;
import org.apache.spark.util.sketch.BloomFilter;
-import org.jooq.Record;
-import org.jooq.Table;
-import org.jooq.impl.DSL;
-import org.jooq.types.ULong;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.UncheckedIOException;
+import java.util.List;
import java.util.Objects;
-import java.util.Set;
-
-import static com.teragrep.pth_06.jooq.generated.bloomdb.Bloomdb.BLOOMDB;
/**
- * Extracts filter type from record, creates a bloom filter and returns the filters byte array
+ * Inserts given tokens into configurable filter
*/
-public final class BloomFilterFromRecord {
+public final class SearchTermBloomFilter {
- private final Logger LOGGER = LoggerFactory.getLogger(BloomFilterFromRecord.class);
- private final ULong expected;
+ private final Logger LOGGER = LoggerFactory.getLogger(SearchTermBloomFilter.class);
+ private final Long expected;
private final Double fpp;
- private final String pattern;
- private final String searchTerm;
+ private final List stringTokens;
private BloomFilter create() {
- LOGGER
- .debug(
- "Create filter from Record with values: expected <{}>, fpp <{}>, pattern: <{}>", expected, fpp,
- pattern
- );
- if (expected == null) {
- throw new IllegalArgumentException("Record did not contain table field value ");
- }
- if (fpp == null) {
- throw new IllegalArgumentException("Record did not contain table field value ");
- }
- final BloomFilter filter = BloomFilter.create(expected.longValue(), fpp);
- // if no pattern use tokenized value (currently BLOOMDB.FILTERTYPE.PATTERN is NOT NULL)
- if (pattern == null) {
- LOGGER.info("Table pattern was null using tokenizer to generate tokens");
- new TokenizedValue(searchTerm).stringTokens().forEach(filter::put);
+ LOGGER.debug("Create filter from Record with values: expected <{}>, fpp <{}>", expected, fpp);
+
+ if (stringTokens.isEmpty()) {
+ throw new IllegalStateException(
+ "Trying to insert empty filter, pattern match joined table should always have tokens"
+ );
}
- else { // get tokens using regex
- final Set tokens = new RegexExtractedValue(searchTerm, pattern).tokens();
- LOGGER.info("Insert pattern <{}> tokens to temp table filter <{}>", pattern, tokens);
- if (tokens.isEmpty()) {
- throw new IllegalStateException(
- "Trying to insert empty filter, pattern match joined table should always have tokens"
- );
- }
- tokens.forEach(filter::put);
+ final BloomFilter filter = BloomFilter.create(1000, 0.01);
+ for (String token : stringTokens) {
+ filter.put(token);
}
return filter;
}
- public BloomFilterFromRecord(Record record, Table> table, String searchTerm) {
- this(
- record.getValue(DSL.field(DSL.name(table.getName(), "expectedElements"), ULong.class)),
- record.getValue(DSL.field(DSL.name(table.getName(), "targetFpp"), Double.class)),
- record.getValue(BLOOMDB.FILTERTYPE.PATTERN, String.class),
- searchTerm
- );
+ public SearchTermBloomFilter(Long expected, Double fpp, RegexExtractedValue tokenizable) {
+ this(expected, fpp, tokenizable.tokens());
+ }
+
+ public SearchTermBloomFilter(Long expected, Double fpp, TokenizedValue tokenizable) {
+ this(expected, fpp, new TokensAsStrings(tokenizable).tokens());
}
- public BloomFilterFromRecord(ULong expected, Double fpp, String pattern, String searchTerm) {
+ public SearchTermBloomFilter(Long expected, Double fpp, List stringTokens) {
this.expected = expected;
this.fpp = fpp;
- this.pattern = pattern;
- this.searchTerm = searchTerm;
+ this.stringTokens = stringTokens;
}
public byte[] bytes() {
@@ -131,18 +106,17 @@ public byte[] bytes() {
}
@Override
- public boolean equals(final Object object) {
+ public boolean equals(Object object) {
if (this == object)
return true;
if (object == null || getClass() != object.getClass())
return false;
- final BloomFilterFromRecord cast = (BloomFilterFromRecord) object;
- return expected.equals(cast.expected) && fpp.equals(cast.fpp) && Objects.equals(pattern, cast.pattern)
- && searchTerm.equals(cast.searchTerm);
+ SearchTermBloomFilter cast = (SearchTermBloomFilter) object;
+ return expected.equals(cast.expected) && fpp.equals(cast.fpp) && stringTokens.equals(cast.stringTokens);
}
@Override
public int hashCode() {
- return Objects.hash(expected, fpp, pattern, searchTerm);
+ return Objects.hash(expected, fpp, stringTokens);
}
}
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java
index db5f85dc..26d6b6e4 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java
@@ -93,11 +93,25 @@ public void insertFiltersIntoCategoryTable() {
DSL.field("type_id", BIGINTUNSIGNED.nullable(false)),
DSL.field(DSL.name(categoryTable.getName(), "filter"), byte[].class)
};
- final BloomFilterFromRecord filterFromRecord = new BloomFilterFromRecord(record, table, searchTerm);
+ final ULong expectedField = record
+ .getValue(DSL.field(DSL.name(table.getName(), "expectedElements"), ULong.class));
+ final Double fpp = record.getValue(DSL.field(DSL.name(table.getName(), "targetFpp"), Double.class));
+ final String pattern = record.getValue(BLOOMDB.FILTERTYPE.PATTERN, String.class);
+ final SearchTermBloomFilter filter;
+ if (pattern == null) {
+ filter = new SearchTermBloomFilter(expectedField.longValue(), fpp, new TokenizedValue(searchTerm));
+ }
+ else {
+ filter = new SearchTermBloomFilter(
+ expectedField.longValue(),
+ fpp,
+ new RegexExtractedValue(searchTerm, pattern)
+ );
+ }
final Field>[] valueFields = {
DSL.val(bloomTermId, ULong.class),
DSL.val(record.getValue(BLOOMDB.FILTERTYPE.ID), ULong.class),
- DSL.val(filterFromRecord.bytes(), byte[].class)
+ DSL.val(filter.bytes(), byte[].class)
};
ctx.insertInto(categoryTable).columns(insertFields).values(valueFields).execute();
}
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/Tokenizable.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/Tokenizable.java
new file mode 100644
index 00000000..e209f536
--- /dev/null
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/Tokenizable.java
@@ -0,0 +1,53 @@
+/*
+ * Teragrep Archive Datasource (pth_06)
+ * Copyright (C) 2021-2024 Suomen Kanuuna Oy
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ *
+ *
+ * Additional permission under GNU Affero General Public License version 3
+ * section 7
+ *
+ * If you modify this Program, or any covered work, by linking or combining it
+ * with other code, such other code is not for that reason alone subject to any
+ * of the requirements of the GNU Affero GPL version 3 as long as this Program
+ * is the same Program as licensed from Suomen Kanuuna Oy without any additional
+ * modifications.
+ *
+ * Supplemented terms under GNU Affero General Public License version 3
+ * section 7
+ *
+ * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
+ * versions must be marked as "Modified version of" The Program.
+ *
+ * Names of the licensors and authors may not be used for publicity purposes.
+ *
+ * No rights are granted for use of trade names, trademarks, or service marks
+ * which are in The Program if any.
+ *
+ * Licensee must indemnify licensors and authors for any liability that these
+ * contractual assumptions impose on licensors and authors.
+ *
+ * To the extent this program is licensed as part of the Commercial versions of
+ * Teragrep, the applicable Commercial License may apply to this file if you as
+ * a licensee so wish it.
+ */
+package com.teragrep.pth_06.planner.bloomfilter;
+
+import java.util.List;
+
+public interface Tokenizable {
+
+ List tokens();
+}
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValue.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValue.java
index 5263342e..fe32e624 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValue.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValue.java
@@ -52,10 +52,8 @@
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.Objects;
-import java.util.Set;
-import java.util.stream.Collectors;
-public final class TokenizedValue {
+public final class TokenizedValue implements Tokenizable {
private final String value;
@@ -67,14 +65,6 @@ public List tokens() {
return new Tokenizer(32).tokenize(new ByteArrayInputStream(value.getBytes(StandardCharsets.UTF_8)));
}
- public Set stringTokens() {
- return new Tokenizer(32)
- .tokenize(new ByteArrayInputStream(value.getBytes(StandardCharsets.UTF_8)))
- .stream()
- .map(Token::toString)
- .collect(Collectors.toSet());
- }
-
@Override
public boolean equals(final Object object) {
if (this == object)
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TokensAsStrings.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TokensAsStrings.java
new file mode 100644
index 00000000..9dea7ef0
--- /dev/null
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TokensAsStrings.java
@@ -0,0 +1,65 @@
+/*
+ * Teragrep Archive Datasource (pth_06)
+ * Copyright (C) 2021-2024 Suomen Kanuuna Oy
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ *
+ *
+ * Additional permission under GNU Affero General Public License version 3
+ * section 7
+ *
+ * If you modify this Program, or any covered work, by linking or combining it
+ * with other code, such other code is not for that reason alone subject to any
+ * of the requirements of the GNU Affero GPL version 3 as long as this Program
+ * is the same Program as licensed from Suomen Kanuuna Oy without any additional
+ * modifications.
+ *
+ * Supplemented terms under GNU Affero General Public License version 3
+ * section 7
+ *
+ * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
+ * versions must be marked as "Modified version of" The Program.
+ *
+ * Names of the licensors and authors may not be used for publicity purposes.
+ *
+ * No rights are granted for use of trade names, trademarks, or service marks
+ * which are in The Program if any.
+ *
+ * Licensee must indemnify licensors and authors for any liability that these
+ * contractual assumptions impose on licensors and authors.
+ *
+ * To the extent this program is licensed as part of the Commercial versions of
+ * Teragrep, the applicable Commercial License may apply to this file if you as
+ * a licensee so wish it.
+ */
+package com.teragrep.pth_06.planner.bloomfilter;
+
+import com.teragrep.blf_01.Token;
+
+import java.util.List;
+import java.util.stream.Collectors;
+
+public final class TokensAsStrings implements Tokenizable {
+
+ private final Tokenizable origin;
+
+ public TokensAsStrings(Tokenizable origin) {
+ this.origin = origin;
+ }
+
+ @Override
+ public List tokens() {
+ return origin.tokens().stream().map(Token::toString).collect(Collectors.toList());
+ }
+}
diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValueTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValueTest.java
index c3163f91..04bd7edd 100644
--- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValueTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValueTest.java
@@ -49,7 +49,7 @@
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
-import java.util.Set;
+import java.util.List;
public class RegexExtractedValueTest {
@@ -58,7 +58,7 @@ public void testRegexExtraction() {
String regex = "\\((.*?)\\)";
String value = "find all (important) values inside (very important) parentheses.";
RegexExtractedValue regexValue = new RegexExtractedValue(value, regex);
- Set tokens = regexValue.tokens();
+ List tokens = regexValue.tokens();
Assertions.assertEquals(2, tokens.size());
Assertions.assertTrue(tokens.contains("(important)"));
Assertions.assertTrue(tokens.contains("(very important)"));
@@ -66,6 +66,10 @@ public void testRegexExtraction() {
@Test
public void testEqualsHashCodeContract() {
- EqualsVerifier.forClass(RegexExtractedValue.class).withNonnullFields("matcher").verify();
+ EqualsVerifier
+ .forClass(RegexExtractedValue.class)
+ .withNonnullFields("value")
+ .withNonnullFields("pattern")
+ .verify();
}
}
diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecordTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java
similarity index 50%
rename from src/test/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecordTest.java
rename to src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java
index 897c01a3..89838ba9 100644
--- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecordTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java
@@ -47,58 +47,47 @@
import nl.jqno.equalsverifier.EqualsVerifier;
import org.apache.spark.util.sketch.BloomFilter;
-import org.jooq.DSLContext;
-import org.jooq.Field;
-import org.jooq.Record;
-import org.jooq.Table;
-import org.jooq.impl.DSL;
-import org.jooq.types.ULong;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.TestInstance;
import java.io.ByteArrayInputStream;
-import java.sql.Connection;
-import java.sql.DriverManager;
@TestInstance(TestInstance.Lifecycle.PER_CLASS)
-public class BloomFilterFromRecordTest {
-
- final String url = "jdbc:h2:mem:test;MODE=MariaDB;DATABASE_TO_LOWER=TRUE;CASE_INSENSITIVE_IDENTIFIERS=TRUE";
- final String userName = "sa";
- final String password = "";
- final Connection conn = Assertions.assertDoesNotThrow(() -> DriverManager.getConnection(url, userName, password));
+public class SearchTermBloomFilterTest {
@Test
public void testCorrectFilterSize() {
- Record dynamicRecord = generateRecord(true);
- Table> target = DSL.table(DSL.name("target"));
String searchTerm = "SearchValuePatternInThisString";
- BloomFilterFromRecord filter = new BloomFilterFromRecord(dynamicRecord, target, searchTerm);
+ SearchTermBloomFilter filter = new SearchTermBloomFilter(1000L, 0.01, new TokenizedValue(searchTerm));
byte[] bytes = Assertions.assertDoesNotThrow(filter::bytes);
BloomFilter resultFilter = Assertions
.assertDoesNotThrow(() -> BloomFilter.readFrom(new ByteArrayInputStream(bytes)));
- BloomFilter expectedSize = BloomFilter.create(100, 0.01);
+ BloomFilter expectedSize = BloomFilter.create(1000L, 0.01);
Assertions.assertEquals(expectedSize.bitSize(), resultFilter.bitSize());
}
@Test
public void testNoRegexExtractedTokensException() {
- Record dynamicRecord = generateRecord(true);
- Table> target = DSL.table(DSL.name("target"));
String searchTerm = "NoMatch";
- BloomFilterFromRecord filter = new BloomFilterFromRecord(dynamicRecord, target, searchTerm);
- RuntimeException e = Assertions.assertThrows(RuntimeException.class, filter::bytes);
+ SearchTermBloomFilter filter = new SearchTermBloomFilter(
+ 1000L,
+ 0.01,
+ new RegexExtractedValue(searchTerm, "Pattern")
+ );
+ IllegalStateException e = Assertions.assertThrows(IllegalStateException.class, filter::bytes);
String expectedMessage = "Trying to insert empty filter, pattern match joined table should always have tokens";
Assertions.assertEquals(expectedMessage, e.getMessage());
}
@Test
public void testRegexExtractedTokens() {
- Record dynamicRecord = generateRecord(true);
- Table> target = DSL.table(DSL.name("target"));
String searchTerm = "SearchValuePatternInThisString";
- BloomFilterFromRecord filter = new BloomFilterFromRecord(dynamicRecord, target, searchTerm);
+ SearchTermBloomFilter filter = new SearchTermBloomFilter(
+ 1000L,
+ 0.01,
+ new RegexExtractedValue(searchTerm, "Pattern")
+ );
byte[] bytes = Assertions.assertDoesNotThrow(filter::bytes);
BloomFilter resultFilter = Assertions
.assertDoesNotThrow(() -> BloomFilter.readFrom(new ByteArrayInputStream(bytes)));
@@ -107,10 +96,8 @@ public void testRegexExtractedTokens() {
@Test
public void testTokenizerTokens() {
- Record dynamicRecord = generateRecord(false);
- Table> target = DSL.table(DSL.name("target"));
String searchTerm = "SearchValuePatternInThisString.Without.Delimiter";
- BloomFilterFromRecord filter = new BloomFilterFromRecord(dynamicRecord, target, searchTerm);
+ SearchTermBloomFilter filter = new SearchTermBloomFilter(1000L, 0.01, new TokenizedValue(searchTerm));
byte[] bytes = Assertions.assertDoesNotThrow(filter::bytes);
BloomFilter resultFilter = Assertions
.assertDoesNotThrow(() -> BloomFilter.readFrom(new ByteArrayInputStream(bytes)));
@@ -119,61 +106,14 @@ public void testTokenizerTokens() {
Assertions.assertTrue(resultFilter.mightContain("SearchValuePatternInThisString"));
}
- @Test
- public void testNullExpectedField() {
- Record dynamicRecord = generateRecord(false);
- Field expectedField = DSL.field(DSL.name("expectedElements"), ULong.class);
- dynamicRecord.set(expectedField, null);
- Table> target = DSL.table(DSL.name("target"));
- String searchTerm = "SearchValuePatternInThisString.Without.Delimiter";
- BloomFilterFromRecord filter = new BloomFilterFromRecord(dynamicRecord, target, searchTerm);
- IllegalArgumentException e = Assertions.assertThrows(IllegalArgumentException.class, filter::bytes);
- String expectedMessage = "Record did not contain table field value ";
- Assertions.assertEquals(expectedMessage, e.getMessage());
- }
-
- @Test
- public void testNullFppField() {
- Record dynamicRecord = generateRecord(false);
- Field fppField = DSL.field(DSL.name("targetFpp"), Double.class);
- dynamicRecord.set(fppField, null);
- Table> target = DSL.table(DSL.name("target"));
- String searchTerm = "SearchValuePatternInThisString.Without.Delimiter";
- BloomFilterFromRecord filter = new BloomFilterFromRecord(dynamicRecord, target, searchTerm);
- IllegalArgumentException e = Assertions.assertThrows(IllegalArgumentException.class, filter::bytes);
- String expectedMessage = "Record did not contain table field value ";
- Assertions.assertEquals(expectedMessage, e.getMessage());
- }
-
@Test
public void equalsHashCodeContractTest() {
EqualsVerifier
- .forClass(BloomFilterFromRecord.class)
+ .forClass(SearchTermBloomFilter.class)
.withNonnullFields("expected")
.withNonnullFields("fpp")
- .withNonnullFields("searchTerm")
+ .withNonnullFields("stringTokens")
.withIgnoredFields("LOGGER")
.verify();
}
-
- private Record generateRecord(final boolean withPattern) {
- DSLContext ctx = DSL.using(conn);
- Field idField = DSL.field(DSL.name("id"), ULong.class);
- Field expectedField = DSL.field(DSL.name("expectedElements"), ULong.class);
- Field fppField = DSL.field(DSL.name("targetFpp"), Double.class);
- Field patternField = DSL.field(DSL.name("pattern"), String.class);
-
- Record dynamicRecord = ctx.newRecord(idField, expectedField, fppField, patternField);
- if (withPattern) {
- dynamicRecord.set(patternField, "Pattern");
- }
- else {
- // case is joined filtertype table has no pattern
- dynamicRecord.set(patternField, null);
- }
- dynamicRecord.set(idField, ULong.valueOf(1));
- dynamicRecord.set(expectedField, ULong.valueOf(100));
- dynamicRecord.set(fppField, 0.01);
- return dynamicRecord;
- }
}
diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java
index 15a723fd..19a4e1d2 100644
--- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java
@@ -70,8 +70,10 @@ class TableFiltersTest {
// matches IPv4
final String ipRegex = "(\\b25[0-5]|\\b2[0-4][0-9]|\\b[01]?[0-9][0-9]?)(\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}";
// matches IPv4 starting with 255.
+ final String ipStartingWith255 = "(\\b25[0-5]?)(\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}";
+ // matches with values surrounded by parentheses
final String parenthesesPattern = "\\((.*?)\\)";
- final List patternList = new ArrayList<>(Arrays.asList(ipRegex, parenthesesPattern));
+ final List patternList = new ArrayList<>(Arrays.asList(ipRegex, ipStartingWith255, parenthesesPattern));
final Connection conn = Assertions.assertDoesNotThrow(() -> DriverManager.getConnection(url, userName, password));
@BeforeAll
@@ -154,7 +156,7 @@ public void testInsertFiltersIntoCategoryTable() {
@Test
public void testInsertFiltersIntoCategoryTableRegexExtract() {
- fillTargetTable(2);
+ fillTargetTable(3);
DSLContext ctx = DSL.using(conn);
Table> table = ctx
.meta()
diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TokensAsStringsTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TokensAsStringsTest.java
new file mode 100644
index 00000000..ad1330f1
--- /dev/null
+++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TokensAsStringsTest.java
@@ -0,0 +1,63 @@
+/*
+ * Teragrep Archive Datasource (pth_06)
+ * Copyright (C) 2021-2024 Suomen Kanuuna Oy
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ *
+ *
+ * Additional permission under GNU Affero General Public License version 3
+ * section 7
+ *
+ * If you modify this Program, or any covered work, by linking or combining it
+ * with other code, such other code is not for that reason alone subject to any
+ * of the requirements of the GNU Affero GPL version 3 as long as this Program
+ * is the same Program as licensed from Suomen Kanuuna Oy without any additional
+ * modifications.
+ *
+ * Supplemented terms under GNU Affero General Public License version 3
+ * section 7
+ *
+ * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
+ * versions must be marked as "Modified version of" The Program.
+ *
+ * Names of the licensors and authors may not be used for publicity purposes.
+ *
+ * No rights are granted for use of trade names, trademarks, or service marks
+ * which are in The Program if any.
+ *
+ * Licensee must indemnify licensors and authors for any liability that these
+ * contractual assumptions impose on licensors and authors.
+ *
+ * To the extent this program is licensed as part of the Commercial versions of
+ * Teragrep, the applicable Commercial License may apply to this file if you as
+ * a licensee so wish it.
+ */
+package com.teragrep.pth_06.planner.bloomfilter;
+
+import com.teragrep.blf_01.Token;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+public class TokensAsStringsTest {
+
+ @Test
+ public void testTokensToStrings() {
+ String value = "one.two.three";
+ Tokenizable tokenizedValue = new TokenizedValue(value);
+ boolean allTokenClass = tokenizedValue.tokens().stream().allMatch(t -> t.getClass().equals(Token.class));
+ Assertions.assertTrue(allTokenClass);
+ Tokenizable toStrings = new TokensAsStrings(tokenizedValue);
+ Assertions.assertTrue(toStrings.tokens().contains("one"));
+ }
+}
diff --git a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchConditionTest.java b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchConditionTest.java
index 769d9e67..2f089a8e 100644
--- a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchConditionTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchConditionTest.java
@@ -59,7 +59,7 @@
class PatternMatchConditionTest {
@Test
- void testSingleToken() {
+ void testCondition() {
Condition condition = new PatternMatchCondition("test").condition();
String e = "('test' like_regex \"bloomdb\".\"filtertype\".\"pattern\")";
Assertions.assertEquals(e, condition.toString());
From 0bf741049ceec3131b62e80d4767c9766003a97a Mon Sep 17 00:00:00 2001
From: elliVM <47@teragrep.com>
Date: Wed, 23 Oct 2024 13:40:37 +0300
Subject: [PATCH 11/26] add missing assertion to test
---
.../pth_06/walker/ConditionWalkerTest.java | 28 +++++++++++--------
1 file changed, 16 insertions(+), 12 deletions(-)
diff --git a/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java b/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java
index 9862d7f6..4fb58634 100644
--- a/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java
+++ b/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java
@@ -231,25 +231,29 @@ void twoTablePatternMatchTest() {
}
@Test
- void testFullXML() {
+ void testFullXMLTwoMatchingTables() {
ConditionWalker walker = new ConditionWalker(DSL.using(conn), true);
String q = "";
- String e = "(\n" + " \"getArchivedObjects_filter_table\".\"directory\" like 'haproxy'\n" + " and (\n"
- + " (\n" + " bloommatch(\n" + " (\n"
+ String e = "(\n" + " \"getArchivedObjects_filter_table\".\"directory\" like 'search_bench'\n"
+ + " and \"journaldb\".\"logfile\".\"logdate\" >= date '2022-01-26'\n"
+ + " and (UNIX_TIMESTAMP(STR_TO_DATE(SUBSTRING(REGEXP_SUBSTR(path,'[0-9]+(\\.log)?\\.gz(\\.[0-9]*)?$'), 1, 10), '%Y%m%d%H')) >= 1643205600)\n"
+ + " and \"journaldb\".\"logfile\".\"logdate\" <= date '2024-10-20'\n"
+ + " and (UNIX_TIMESTAMP(STR_TO_DATE(SUBSTRING(REGEXP_SUBSTR(path,'[0-9]+(\\.log)?\\.gz(\\.[0-9]*)?$'), 1, 10), '%Y%m%d%H')) <= 1729435021)\n"
+ + " and (\n" + " (\n" + " bloommatch(\n" + " (\n"
+ " select \"term_0_pattern_test_ip\".\"filter\"\n"
+ " from \"term_0_pattern_test_ip\"\n" + " where (\n" + " term_id = 0\n"
+ " and type_id = \"bloomdb\".\"pattern_test_ip\".\"filter_type_id\"\n" + " )\n"
+ " ),\n" + " \"bloomdb\".\"pattern_test_ip\".\"filter\"\n" + " ) = true\n"
- + " and \"bloomdb\".\"pattern_test_ip\".\"filter\" is not null\n" + " )\n" + " or (\n"
- + " bloommatch(\n" + " (\n" + " select \"term_0_pattern_test_ip255\".\"filter\"\n"
- + " from \"term_0_pattern_test_ip255\"\n" + " where (\n" + " term_id = 0\n"
- + " and type_id = \"bloomdb\".\"pattern_test_ip255\".\"filter_type_id\"\n" + " )\n"
- + " ),\n" + " \"bloomdb\".\"pattern_test_ip255\".\"filter\"\n" + " ) = true\n"
- + " and \"bloomdb\".\"pattern_test_ip255\".\"filter\" is not null\n" + " )\n" + " or (\n"
- + " \"bloomdb\".\"pattern_test_ip\".\"filter\" is null\n"
- + " and \"bloomdb\".\"pattern_test_ip255\".\"filter\" is null\n" + " )\n" + " )\n" + ")";
+ + " and \"bloomdb\".\"pattern_test_ip\".\"filter\" is not null\n" + " )\n"
+ + " or \"bloomdb\".\"pattern_test_ip\".\"filter\" is null\n" + " )\n" + " and (\n" + " (\n"
+ + " bloommatch(\n" + " (\n" + " select \"term_1_pattern_test_ip\".\"filter\"\n"
+ + " from \"term_1_pattern_test_ip\"\n" + " where (\n" + " term_id = 1\n"
+ + " and type_id = \"bloomdb\".\"pattern_test_ip\".\"filter_type_id\"\n" + " )\n"
+ + " ),\n" + " \"bloomdb\".\"pattern_test_ip\".\"filter\"\n" + " ) = true\n"
+ + " and \"bloomdb\".\"pattern_test_ip\".\"filter\" is not null\n" + " )\n"
+ + " or \"bloomdb\".\"pattern_test_ip\".\"filter\" is null\n" + " )\n" + ")";
Condition cond = Assertions.assertDoesNotThrow(() -> walker.fromString(q, false));
- System.out.println(cond.toString());
+ Assertions.assertEquals(e, cond.toString());
}
@Test
From 31e0bf9f7fa66f09b10ff6f5bddc2288b6823f8d Mon Sep 17 00:00:00 2001
From: elliVM <47@teragrep.com>
Date: Wed, 23 Oct 2024 13:52:32 +0300
Subject: [PATCH 12/26] move method after constructors
---
.../bloomfilter/SearchTermBloomFilter.java | 28 +++++++++----------
1 file changed, 14 insertions(+), 14 deletions(-)
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java
index 61884af3..f9dc43f5 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java
@@ -65,6 +65,20 @@ public final class SearchTermBloomFilter {
private final Double fpp;
private final List stringTokens;
+ public SearchTermBloomFilter(Long expected, Double fpp, RegexExtractedValue tokenizable) {
+ this(expected, fpp, tokenizable.tokens());
+ }
+
+ public SearchTermBloomFilter(Long expected, Double fpp, TokenizedValue tokenizable) {
+ this(expected, fpp, new TokensAsStrings(tokenizable).tokens());
+ }
+
+ public SearchTermBloomFilter(Long expected, Double fpp, List stringTokens) {
+ this.expected = expected;
+ this.fpp = fpp;
+ this.stringTokens = stringTokens;
+ }
+
private BloomFilter create() {
LOGGER.debug("Create filter from Record with values: expected <{}>, fpp <{}>", expected, fpp);
@@ -80,20 +94,6 @@ private BloomFilter create() {
return filter;
}
- public SearchTermBloomFilter(Long expected, Double fpp, RegexExtractedValue tokenizable) {
- this(expected, fpp, tokenizable.tokens());
- }
-
- public SearchTermBloomFilter(Long expected, Double fpp, TokenizedValue tokenizable) {
- this(expected, fpp, new TokensAsStrings(tokenizable).tokens());
- }
-
- public SearchTermBloomFilter(Long expected, Double fpp, List stringTokens) {
- this.expected = expected;
- this.fpp = fpp;
- this.stringTokens = stringTokens;
- }
-
public byte[] bytes() {
final BloomFilter filter = create();
try (final ByteArrayOutputStream filterBAOS = new ByteArrayOutputStream()) {
From f370356c771fa6f91e7a213d453f2b5a3b2325af Mon Sep 17 00:00:00 2001
From: elliVM <47@teragrep.com>
Date: Fri, 25 Oct 2024 13:37:16 +0300
Subject: [PATCH 13/26] fix hard coded filter size and fix testing that
different sizes are applied.
---
.../pth_06/planner/StreamDBClient.java | 4 ++--
.../planner/bloomfilter/DatabaseTables.java | 10 ++++++++
.../bloomfilter/PatternMatchTables.java | 4 ++--
.../bloomfilter/RegexExtractedValue.java | 2 +-
.../bloomfilter/SearchTermBloomFilter.java | 11 +++------
.../conditions/IndexStatementCondition.java | 4 ++--
.../bloomfilter/PatternMatchTablesTest.java | 12 +++++-----
.../SearchTermBloomFilterTest.java | 23 +++++++++++++------
8 files changed, 42 insertions(+), 28 deletions(-)
create mode 100644 src/main/java/com/teragrep/pth_06/planner/bloomfilter/DatabaseTables.java
diff --git a/src/main/java/com/teragrep/pth_06/planner/StreamDBClient.java b/src/main/java/com/teragrep/pth_06/planner/StreamDBClient.java
index 0cc044cb..57084f3b 100644
--- a/src/main/java/com/teragrep/pth_06/planner/StreamDBClient.java
+++ b/src/main/java/com/teragrep/pth_06/planner/StreamDBClient.java
@@ -344,9 +344,9 @@ private Table getTableStatement(Condition journaldbCondition, Date day)
.on(JOURNALDB.LOGFILE.HOST_ID.eq(GetArchivedObjectsFilterTable.host_id).and(JOURNALDB.LOGFILE.LOGTAG.eq(GetArchivedObjectsFilterTable.tag)));
if (bloomEnabled) {
- Set> tables = walker.patternMatchTables();
+ final Set> tables = walker.patternMatchTables();
if (!tables.isEmpty()) {
- for (Table> table : tables) {
+ for (final Table> table : tables) {
if (LOGGER.isInfoEnabled()) {
LOGGER.info("Left join pattern match table: <{}>", table.getName());
}
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/DatabaseTables.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/DatabaseTables.java
new file mode 100644
index 00000000..92d09133
--- /dev/null
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/DatabaseTables.java
@@ -0,0 +1,10 @@
+package com.teragrep.pth_06.planner.bloomfilter;
+
+import org.jooq.Table;
+
+import java.util.List;
+
+public interface DatabaseTables {
+
+ List> tables();
+}
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTables.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTables.java
index 10bc8d83..b7b638e7 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTables.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTables.java
@@ -61,7 +61,7 @@
/**
* Class to get a collection of Tables that match the given PatternMatchCondition
*/
-public final class PatternMatchTables {
+public final class PatternMatchTables implements DatabaseTables {
private static final Logger LOGGER = LoggerFactory.getLogger(PatternMatchTables.class);
@@ -82,7 +82,7 @@ public PatternMatchTables(DSLContext ctx, PatternMatchCondition patternMatchCond
*
* @return List of tables that matched condition and were not empty
*/
- public List> toList() {
+ public List> tables() {
final List> tables = ctx
.meta()
.filterSchemas(s -> s.equals(BLOOMDB)) // select bloomdb
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValue.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValue.java
index 33655b29..bf3cfbd9 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValue.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValue.java
@@ -74,7 +74,7 @@ public List tokens() {
}
@Override
- public boolean equals(Object object) {
+ public boolean equals(final Object object) {
if (this == object)
return true;
if (object == null || getClass() != object.getClass())
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java
index f9dc43f5..88573f73 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java
@@ -79,7 +79,7 @@ public SearchTermBloomFilter(Long expected, Double fpp, List stringToken
this.stringTokens = stringTokens;
}
- private BloomFilter create() {
+ public byte[] bytes() {
LOGGER.debug("Create filter from Record with values: expected <{}>, fpp <{}>", expected, fpp);
if (stringTokens.isEmpty()) {
@@ -87,15 +87,10 @@ private BloomFilter create() {
"Trying to insert empty filter, pattern match joined table should always have tokens"
);
}
- final BloomFilter filter = BloomFilter.create(1000, 0.01);
- for (String token : stringTokens) {
+ final BloomFilter filter = BloomFilter.create(expected, fpp);
+ for (final String token : stringTokens) {
filter.put(token);
}
- return filter;
- }
-
- public byte[] bytes() {
- final BloomFilter filter = create();
try (final ByteArrayOutputStream filterBAOS = new ByteArrayOutputStream()) {
filter.writeTo(filterBAOS);
return filterBAOS.toByteArray();
diff --git a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java
index 7a2c47ce..7b36f73c 100644
--- a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java
+++ b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java
@@ -83,8 +83,8 @@ public Condition condition() {
}
Condition newCondition = condition;
if (tableSet.isEmpty()) {
- final PatternMatchTables patternMatchTables = new PatternMatchTables(config.context(), value);
- tableSet.addAll(patternMatchTables.toList());
+ final DatabaseTables patternMatchTables = new PatternMatchTables(config.context(), value);
+ tableSet.addAll(patternMatchTables.tables());
}
if (!tableSet.isEmpty()) {
if (LOGGER.isDebugEnabled()) {
diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTablesTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTablesTest.java
index 2ff9924f..5313a4c4 100644
--- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTablesTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTablesTest.java
@@ -139,7 +139,7 @@ public void testSingleMatch() {
DSLContext ctx = DSL.using(conn);
String input = "192.168.1.1";
PatternMatchTables patternMatchTables = new PatternMatchTables(ctx, input);
- List> result = patternMatchTables.toList();
+ List> result = patternMatchTables.tables();
Assertions.assertEquals(1, result.size());
Assertions.assertEquals("pattern_test_ip", result.get(0).getName());
}
@@ -149,7 +149,7 @@ public void testSearchTermTokenizedMatch() {
DSLContext ctx = DSL.using(conn);
String input = "target_ip=192.168.1.1";
PatternMatchTables patternMatchTables = new PatternMatchTables(ctx, input);
- List> result = patternMatchTables.toList();
+ List> result = patternMatchTables.tables();
Assertions.assertEquals(1, result.size());
Assertions.assertEquals("pattern_test_ip", result.get(0).getName());
}
@@ -159,7 +159,7 @@ public void testRegexMatch() {
DSLContext ctx = DSL.using(conn);
String input = "biz baz boz data has no content today (very important though) but it would still have if one had a means to extract it from (here is something else important as well) the strange patterns called parentheses that it seems to have been put in.";
PatternMatchTables patternMatchTables = new PatternMatchTables(ctx, input);
- List> result = patternMatchTables.toList();
+ List> result = patternMatchTables.tables();
Assertions.assertEquals(1, result.size());
Assertions.assertEquals("parentheses_test", result.get(0).getName());
}
@@ -169,8 +169,8 @@ public void testMultipleMatch() {
DSLContext ctx = DSL.using(conn);
String input = "255.255.255.255";
PatternMatchTables patternMatchTables = new PatternMatchTables(ctx, input);
- List> result = patternMatchTables.toList();
- List> result2 = patternMatchTables.toList();
+ List> result = patternMatchTables.tables();
+ List> result2 = patternMatchTables.tables();
List tableNames = result.stream().map(Named::getName).collect(Collectors.toList());
Assertions.assertEquals(2, result.size());
Assertions.assertEquals(2, result2.size());
@@ -183,7 +183,7 @@ public void testNoMatch() {
DSLContext ctx = DSL.using(conn);
String input = "testinput";
PatternMatchTables patternMatchTables = new PatternMatchTables(ctx, input);
- List> result = patternMatchTables.toList();
+ List> result = patternMatchTables.tables();
Assertions.assertTrue(result.isEmpty());
}
diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java
index 89838ba9..27932eb4 100644
--- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java
@@ -58,13 +58,22 @@ public class SearchTermBloomFilterTest {
@Test
public void testCorrectFilterSize() {
- String searchTerm = "SearchValuePatternInThisString";
- SearchTermBloomFilter filter = new SearchTermBloomFilter(1000L, 0.01, new TokenizedValue(searchTerm));
- byte[] bytes = Assertions.assertDoesNotThrow(filter::bytes);
- BloomFilter resultFilter = Assertions
- .assertDoesNotThrow(() -> BloomFilter.readFrom(new ByteArrayInputStream(bytes)));
- BloomFilter expectedSize = BloomFilter.create(1000L, 0.01);
- Assertions.assertEquals(expectedSize.bitSize(), resultFilter.bitSize());
+ String searchTerm = "test";
+ SearchTermBloomFilter filter1 = new SearchTermBloomFilter(1000L, 0.01, new TokenizedValue(searchTerm));
+ SearchTermBloomFilter filter2 = new SearchTermBloomFilter(1000L, 0.02, new TokenizedValue(searchTerm));
+ SearchTermBloomFilter filter3 = new SearchTermBloomFilter(100L, 0.01, new TokenizedValue(searchTerm));
+ byte[] bytes1 = Assertions.assertDoesNotThrow(filter1::bytes);
+ byte[] bytes2 = Assertions.assertDoesNotThrow(filter2::bytes);
+ byte[] bytes3 = Assertions.assertDoesNotThrow(filter3::bytes);
+ BloomFilter resultFilter1 = Assertions
+ .assertDoesNotThrow(() -> BloomFilter.readFrom(new ByteArrayInputStream(bytes1)));
+ BloomFilter resultFilter2 = Assertions
+ .assertDoesNotThrow(() -> BloomFilter.readFrom(new ByteArrayInputStream(bytes2)));
+ BloomFilter resultFilter3 = Assertions
+ .assertDoesNotThrow(() -> BloomFilter.readFrom(new ByteArrayInputStream(bytes3)));
+ Assertions.assertEquals(BloomFilter.create(1000L, 0.01).bitSize(), resultFilter1.bitSize());
+ Assertions.assertEquals(BloomFilter.create(1000L, 0.02).bitSize(), resultFilter2.bitSize());
+ Assertions.assertEquals(BloomFilter.create(100L, 0.01).bitSize(), resultFilter3.bitSize());
}
@Test
From eda9878231364b4c2dc5e45422d9b53116b47806 Mon Sep 17 00:00:00 2001
From: elliVM <47@teragrep.com>
Date: Mon, 28 Oct 2024 16:15:23 +0200
Subject: [PATCH 14/26] refactor code to simplify, add testing for SQL temp
table values created by bloom operations
---
pom.xml | 6 +
.../planner/bloomfilter/CategoryTable.java | 5 -
.../bloomfilter/CategoryTableImpl.java | 34 +----
...ble.java => CategoryTableWithFilters.java} | 30 ++--
....java => ConditionMatchBloomDBTables.java} | 22 +--
.../planner/bloomfilter/DatabaseTables.java | 45 ++++++
.../bloomfilter/SearchTermBloomFilter.java | 10 +-
.../SearchTermFiltersInserted.java | 76 ----------
.../TableFilterTypesFromMetadata.java | 76 +++++-----
.../planner/bloomfilter/TableFilters.java | 35 +++--
.../conditions/CategoryTableCondition.java | 42 ++++--
.../conditions/IndexStatementCondition.java | 19 ++-
... RegexLikeFiltertypePatternCondition.java} | 8 +-
.../bloomfilter/CategoryTableImplTest.java | 64 +--------
...a => ConditionMatchBloomDBTablesTest.java} | 36 ++---
.../SearchTermBloomFilterTest.java | 13 ++
...ableFilterTypesFromMetadataResultTest.java | 11 +-
.../planner/bloomfilter/TableFiltersTest.java | 1 +
.../CategoryTableConditionTest.java | 7 +-
...exLikeFiltertypePatternConditionTest.java} | 20 +--
.../pth_06/walker/ConditionWalkerTest.java | 135 +++++++++++++++++-
21 files changed, 398 insertions(+), 297 deletions(-)
rename src/main/java/com/teragrep/pth_06/planner/bloomfilter/{CreatedCategoryTable.java => CategoryTableWithFilters.java} (76%)
rename src/main/java/com/teragrep/pth_06/planner/bloomfilter/{PatternMatchTables.java => ConditionMatchBloomDBTables.java} (83%)
delete mode 100644 src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermFiltersInserted.java
rename src/main/java/com/teragrep/pth_06/planner/walker/conditions/{PatternMatchCondition.java => RegexLikeFiltertypePatternCondition.java} (89%)
rename src/test/java/com/teragrep/pth_06/planner/bloomfilter/{PatternMatchTablesTest.java => ConditionMatchBloomDBTablesTest.java} (86%)
rename src/test/java/com/teragrep/pth_06/planner/walker/conditions/{PatternMatchConditionTest.java => RegexLikeFiltertypePatternConditionTest.java} (75%)
diff --git a/pom.xml b/pom.xml
index 47ffaa4a..af810fab 100644
--- a/pom.xml
+++ b/pom.xml
@@ -209,6 +209,12 @@
3.16.1
test
+
+ io.github.hakky54
+ logcaptor
+ 2.9.3
+ test
+
org.apache.kafka
kafka-clients
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTable.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTable.java
index 5499d781..03f7499a 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTable.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTable.java
@@ -45,13 +45,8 @@
*/
package com.teragrep.pth_06.planner.bloomfilter;
-import com.teragrep.pth_06.planner.walker.conditions.QueryCondition;
-
public interface CategoryTable {
void create();
- void insertFilters();
-
- QueryCondition bloommatchCondition();
}
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImpl.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImpl.java
index d0c7b079..45b2a767 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImpl.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImpl.java
@@ -46,8 +46,6 @@
package com.teragrep.pth_06.planner.bloomfilter;
import com.teragrep.pth_06.config.ConditionConfig;
-import com.teragrep.pth_06.planner.walker.conditions.CategoryTableCondition;
-import com.teragrep.pth_06.planner.walker.conditions.QueryCondition;
import org.jooq.*;
import org.jooq.impl.DSL;
import org.slf4j.Logger;
@@ -88,7 +86,6 @@ public final class CategoryTableImpl implements CategoryTable {
private final DSLContext ctx;
private final Table> originTable;
private final long bloomTermId;
- private final CategoryTableCondition tableCondition;
private final TableFilters tableFilters;
public CategoryTableImpl(ConditionConfig config, Table> originTable, String value) {
@@ -96,32 +93,18 @@ public CategoryTableImpl(ConditionConfig config, Table> originTable, String va
config.context(),
originTable,
config.bloomTermId(),
- new CategoryTableCondition(originTable, config.bloomTermId()),
new TableFilters(config.context(), originTable, config.bloomTermId(), value)
);
}
public CategoryTableImpl(DSLContext ctx, Table> originTable, long bloomTermId, String value) {
- this(
- ctx,
- originTable,
- bloomTermId,
- new CategoryTableCondition(originTable, bloomTermId),
- new TableFilters(ctx, originTable, bloomTermId, value)
- );
+ this(ctx, originTable, bloomTermId, new TableFilters(ctx, originTable, bloomTermId, value));
}
- public CategoryTableImpl(
- DSLContext ctx,
- Table> originTable,
- long bloomTermId,
- CategoryTableCondition tableCondition,
- TableFilters tableFilters
- ) {
+ public CategoryTableImpl(DSLContext ctx, Table> originTable, long bloomTermId, TableFilters tableFilters) {
this.ctx = ctx;
this.originTable = originTable;
this.bloomTermId = bloomTermId;
- this.tableCondition = tableCondition;
this.tableFilters = tableFilters;
}
@@ -144,19 +127,6 @@ public void create() {
indexStep.execute();
}
- public void insertFilters() {
- tableFilters.insertFiltersIntoCategoryTable();
- }
-
- /**
- * Row condition that selects the same sized filter arrays from this category table and the origin table.
- *
- * @return condition
- */
- public QueryCondition bloommatchCondition() {
- return tableCondition;
- }
-
/**
* Equal only if all object parameters are same value and the instances of DSLContext are same
*
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CreatedCategoryTable.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableWithFilters.java
similarity index 76%
rename from src/main/java/com/teragrep/pth_06/planner/bloomfilter/CreatedCategoryTable.java
rename to src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableWithFilters.java
index 0090ba97..a664b636 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CreatedCategoryTable.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableWithFilters.java
@@ -45,32 +45,32 @@
*/
package com.teragrep.pth_06.planner.bloomfilter;
-import com.teragrep.pth_06.planner.walker.conditions.QueryCondition;
+import org.jooq.DSLContext;
+import org.jooq.Table;
/**
- * Decorator that inserts category tables filter types into database
+ * Decorator that inserts category tables filter types into the table
*/
-public final class CreatedCategoryTable implements CategoryTable {
+public final class CategoryTableWithFilters implements CategoryTable {
private final CategoryTable origin;
+ private final TableFilters filters;
- public CreatedCategoryTable(final CategoryTable origin) {
+ public CategoryTableWithFilters(DSLContext ctx, Table> origin, long bloomTermId, String searchTerm) {
+ this(
+ new CategoryTableImpl(ctx, origin, bloomTermId, searchTerm),
+ new TableFilters(ctx, origin, bloomTermId, searchTerm)
+ );
+ }
+
+ public CategoryTableWithFilters(CategoryTable origin, TableFilters filters) {
this.origin = origin;
+ this.filters = filters;
}
@Override
public void create() {
origin.create();
- }
-
- @Override
- public void insertFilters() {
- origin.insertFilters();
- }
-
- @Override
- public QueryCondition bloommatchCondition() {
- create();
- return origin.bloommatchCondition();
+ filters.insertFiltersIntoCategoryTable();
}
}
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTables.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/ConditionMatchBloomDBTables.java
similarity index 83%
rename from src/main/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTables.java
rename to src/main/java/com/teragrep/pth_06/planner/bloomfilter/ConditionMatchBloomDBTables.java
index b7b638e7..8ccd9cf4 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTables.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/ConditionMatchBloomDBTables.java
@@ -45,7 +45,7 @@
*/
package com.teragrep.pth_06.planner.bloomfilter;
-import com.teragrep.pth_06.planner.walker.conditions.PatternMatchCondition;
+import com.teragrep.pth_06.planner.walker.conditions.RegexLikeFiltertypePatternCondition;
import com.teragrep.pth_06.planner.walker.conditions.QueryCondition;
import org.jooq.DSLContext;
import org.jooq.Field;
@@ -61,20 +61,20 @@
/**
* Class to get a collection of Tables that match the given PatternMatchCondition
*/
-public final class PatternMatchTables implements DatabaseTables {
+public final class ConditionMatchBloomDBTables implements DatabaseTables {
- private static final Logger LOGGER = LoggerFactory.getLogger(PatternMatchTables.class);
+ private static final Logger LOGGER = LoggerFactory.getLogger(ConditionMatchBloomDBTables.class);
private final DSLContext ctx;
- private final QueryCondition patternMatchCondition;
+ private final QueryCondition condition;
- public PatternMatchTables(DSLContext ctx, String pattern) {
- this(ctx, new PatternMatchCondition(pattern));
+ public ConditionMatchBloomDBTables(DSLContext ctx, String pattern) {
+ this(ctx, new RegexLikeFiltertypePatternCondition(pattern));
}
- public PatternMatchTables(DSLContext ctx, PatternMatchCondition patternMatchCondition) {
+ public ConditionMatchBloomDBTables(DSLContext ctx, QueryCondition condition) {
this.ctx = ctx;
- this.patternMatchCondition = patternMatchCondition;
+ this.condition = condition;
}
/**
@@ -91,7 +91,7 @@ public List> tables() {
.from(t)
.leftJoin(BLOOMDB.FILTERTYPE)// join filtertype to access patterns
.on(BLOOMDB.FILTERTYPE.ID.eq((Field) t.field("filter_type_id")))
- .where(patternMatchCondition.condition())// select tables that match pattern condition
+ .where(condition.condition())// select tables that match pattern condition
.limit(1)// limit 1 since we are checking only if table is not empty
.fetch()
.isNotEmpty() // select table if not empty
@@ -115,7 +115,7 @@ public boolean equals(final Object object) {
return false;
if (object.getClass() != this.getClass())
return false;
- final PatternMatchTables cast = (PatternMatchTables) object;
- return this.patternMatchCondition.equals(cast.patternMatchCondition) && this.ctx == cast.ctx; // only same instance of DSLContext is equal
+ final ConditionMatchBloomDBTables cast = (ConditionMatchBloomDBTables) object;
+ return this.condition.equals(cast.condition) && this.ctx == cast.ctx; // only same instance of DSLContext is equal
}
}
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/DatabaseTables.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/DatabaseTables.java
index 92d09133..2a4869f6 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/DatabaseTables.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/DatabaseTables.java
@@ -1,3 +1,48 @@
+/*
+ * Teragrep Archive Datasource (pth_06)
+ * Copyright (C) 2021-2024 Suomen Kanuuna Oy
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ *
+ *
+ * Additional permission under GNU Affero General Public License version 3
+ * section 7
+ *
+ * If you modify this Program, or any covered work, by linking or combining it
+ * with other code, such other code is not for that reason alone subject to any
+ * of the requirements of the GNU Affero GPL version 3 as long as this Program
+ * is the same Program as licensed from Suomen Kanuuna Oy without any additional
+ * modifications.
+ *
+ * Supplemented terms under GNU Affero General Public License version 3
+ * section 7
+ *
+ * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
+ * versions must be marked as "Modified version of" The Program.
+ *
+ * Names of the licensors and authors may not be used for publicity purposes.
+ *
+ * No rights are granted for use of trade names, trademarks, or service marks
+ * which are in The Program if any.
+ *
+ * Licensee must indemnify licensors and authors for any liability that these
+ * contractual assumptions impose on licensors and authors.
+ *
+ * To the extent this program is licensed as part of the Commercial versions of
+ * Teragrep, the applicable Commercial License may apply to this file if you as
+ * a licensee so wish it.
+ */
package com.teragrep.pth_06.planner.bloomfilter;
import org.jooq.Table;
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java
index 88573f73..c3132c8a 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java
@@ -81,7 +81,6 @@ public SearchTermBloomFilter(Long expected, Double fpp, List stringToken
public byte[] bytes() {
LOGGER.debug("Create filter from Record with values: expected <{}>, fpp <{}>", expected, fpp);
-
if (stringTokens.isEmpty()) {
throw new IllegalStateException(
"Trying to insert empty filter, pattern match joined table should always have tokens"
@@ -91,6 +90,15 @@ public byte[] bytes() {
for (final String token : stringTokens) {
filter.put(token);
}
+ if (LOGGER.isWarnEnabled()) {
+ if (stringTokens.size() > expected) {
+ LOGGER
+ .warn(
+ "Number of tokens <{}> was larger than the expected value <{}>, resulting FPP <{}>",
+ stringTokens.size(), expected, filter.expectedFpp()
+ );
+ }
+ }
try (final ByteArrayOutputStream filterBAOS = new ByteArrayOutputStream()) {
filter.writeTo(filterBAOS);
return filterBAOS.toByteArray();
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermFiltersInserted.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermFiltersInserted.java
deleted file mode 100644
index a9913cb5..00000000
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermFiltersInserted.java
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Teragrep Archive Datasource (pth_06)
- * Copyright (C) 2021-2024 Suomen Kanuuna Oy
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see .
- *
- *
- * Additional permission under GNU Affero General Public License version 3
- * section 7
- *
- * If you modify this Program, or any covered work, by linking or combining it
- * with other code, such other code is not for that reason alone subject to any
- * of the requirements of the GNU Affero GPL version 3 as long as this Program
- * is the same Program as licensed from Suomen Kanuuna Oy without any additional
- * modifications.
- *
- * Supplemented terms under GNU Affero General Public License version 3
- * section 7
- *
- * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
- * versions must be marked as "Modified version of" The Program.
- *
- * Names of the licensors and authors may not be used for publicity purposes.
- *
- * No rights are granted for use of trade names, trademarks, or service marks
- * which are in The Program if any.
- *
- * Licensee must indemnify licensors and authors for any liability that these
- * contractual assumptions impose on licensors and authors.
- *
- * To the extent this program is licensed as part of the Commercial versions of
- * Teragrep, the applicable Commercial License may apply to this file if you as
- * a licensee so wish it.
- */
-package com.teragrep.pth_06.planner.bloomfilter;
-
-import com.teragrep.pth_06.planner.walker.conditions.QueryCondition;
-
-/**
- * Decorator that inserts category tables filter types into database
- */
-public final class SearchTermFiltersInserted implements CategoryTable {
-
- private final CategoryTable origin;
-
- public SearchTermFiltersInserted(final CategoryTable origin) {
- this.origin = origin;
- }
-
- @Override
- public void create() {
- origin.create();
- }
-
- @Override
- public void insertFilters() {
- origin.insertFilters();
- }
-
- @Override
- public QueryCondition bloommatchCondition() {
- insertFilters();
- return origin.bloommatchCondition();
- }
-}
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadata.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadata.java
index 2f8752f4..4b171589 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadata.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadata.java
@@ -49,6 +49,8 @@
import org.jooq.impl.DSL;
import org.jooq.types.ULong;
+import java.util.Arrays;
+import java.util.List;
import java.util.Objects;
import static com.teragrep.pth_06.jooq.generated.bloomdb.Bloomdb.BLOOMDB;
@@ -60,62 +62,70 @@ public final class TableFilterTypesFromMetadata implements TableRecords {
private final DSLContext ctx;
private final Table> table;
- private final long bloomTermId;
+ private final Field expectedField;
+ private final Field fppField;
+ private final Field patternField;
+ private final Field filterTypeIdField;
public TableFilterTypesFromMetadata(DSLContext ctx, Table> table, long bloomTermId) {
+ this(
+ ctx,
+ table.join(BLOOMDB.FILTERTYPE).on(BLOOMDB.FILTERTYPE.ID.eq(table.field("filter_type_id").cast(ULong.class))), DSL.table(DSL.name(("term_" + bloomTermId + "_" + table.getName()))).getName()
+ );
+ }
+
+ public TableFilterTypesFromMetadata(DSLContext ctx, Table> table, String name) {
+ this(
+ ctx,
+ table,
+ DSL.field(DSL.name(name, "expectedElements"), ULong.class),
+ DSL.field(DSL.name(name, "targetFpp"), Double.class),
+ DSL.field(DSL.name(name, "pattern"), String.class)
+ );
+
+ }
+
+ public TableFilterTypesFromMetadata(
+ DSLContext ctx,
+ Table> table,
+ Field expectedField,
+ Field fppField,
+ Field patternField
+ ) {
this.ctx = ctx;
this.table = table;
- this.bloomTermId = bloomTermId;
+ this.expectedField = expectedField;
+ this.fppField = fppField;
+ this.patternField = patternField;
+ this.filterTypeIdField = table.field("filter_type_id").cast(ULong.class);
}
public Result toResult() {
- if (table == null) {
- throw new IllegalStateException("Origin table was null");
- }
- final Table> joined = table
- .join(BLOOMDB.FILTERTYPE)
- .on(BLOOMDB.FILTERTYPE.ID.eq((Field) table.field("filter_type_id")));
- final Table namedTable = DSL.table(DSL.name(("term_" + bloomTermId + "_" + table.getName())));
- final Field expectedField = DSL.field(DSL.name(namedTable.getName(), "expectedElements"), ULong.class);
- final Field fppField = DSL.field(DSL.name(namedTable.getName(), "targetFpp"), Double.class);
- final SelectField>[] resultFields = {
- BLOOMDB.FILTERTYPE.ID,
- joined.field("expectedElements").as(expectedField),
- joined.field("targetFpp").as(fppField),
- joined.field("pattern")
- };
+ List> selectFieldsList = Arrays
+ .asList(BLOOMDB.FILTERTYPE.ID, table.field("expectedElements"), table.field("targetFpp"), table.field("pattern"));
// Fetch filtertype values from metadata
- final Result records = ctx
- .select(resultFields)
- .from(joined)
- .groupBy(joined.field("filter_type_id"))
- .fetch();
+ final Result records = ctx.select(selectFieldsList).from(table).groupBy(filterTypeIdField).fetch();
if (records.isEmpty()) {
throw new RuntimeException("Origin table was empty");
}
return records;
}
- /**
- * Equal only if all object parameters are same value and the instances of DSLContext are same
- *
- * @param object object compared against
- * @return true if all object is same class, object fields are equal and DSLContext is same instance
- */
@Override
public boolean equals(final Object object) {
if (this == object)
return true;
- if (object == null)
- return false;
- if (object.getClass() != this.getClass())
+ if (object == null || getClass() != object.getClass())
return false;
final TableFilterTypesFromMetadata cast = (TableFilterTypesFromMetadata) object;
- return this.bloomTermId == cast.bloomTermId && this.table.equals(cast.table) && this.ctx == cast.ctx;
+ return ctx == cast.ctx && Objects.equals(table, cast.table) && Objects
+ .equals(expectedField, cast.expectedField) && Objects.equals(fppField, cast.fppField) && Objects
+ .equals(patternField, cast.patternField)
+ && Objects.equals(filterTypeIdField, cast.filterTypeIdField);
}
@Override
public int hashCode() {
- return Objects.hash(ctx, table, bloomTermId);
+ return Objects.hash(ctx, table, expectedField, fppField, patternField, filterTypeIdField);
}
}
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java
index 26d6b6e4..51d6c885 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java
@@ -55,39 +55,50 @@
import static org.jooq.impl.SQLDataType.BIGINTUNSIGNED;
/**
- * Filter types of a table that can be inserted into the tables category table
+ * Filter types of a table that can be inserted into the category table
*/
public final class TableFilters {
private final TableRecords recordsInMetadata;
private final DSLContext ctx;
private final Table> table;
+ private final Table> categoryTable;
private final String searchTerm;
private final long bloomTermId;
public TableFilters(DSLContext ctx, Table> table, long bloomTermId, String searchTerm) {
- this(new TableFilterTypesFromMetadata(ctx, table, bloomTermId), ctx, table, bloomTermId, searchTerm);
+ this(
+ new TableFilterTypesFromMetadata(ctx, table, bloomTermId),
+ ctx,
+ table,
+ DSL.table(DSL.name(("term_" + bloomTermId + "_" + table.getName()))),
+ bloomTermId,
+ searchTerm
+ );
}
public TableFilters(
TableFilterTypesFromMetadata recordsInMetadata,
DSLContext ctx,
Table> table,
+ Table> categoryTable,
long bloomTermId,
String searchTerm
) {
this.recordsInMetadata = recordsInMetadata;
this.ctx = ctx;
this.table = table;
+ this.categoryTable = categoryTable;
this.bloomTermId = bloomTermId;
this.searchTerm = searchTerm;
}
public void insertFiltersIntoCategoryTable() {
+ if (table == null) {
+ throw new IllegalStateException("Origin table was null");
+ }
final Result result = recordsInMetadata.toResult();
for (final Record record : result) {
- final Table categoryTable = DSL
- .table(DSL.name(("term_" + bloomTermId + "_" + this.table.getName())));
final Field>[] insertFields = {
DSL.field("term_id", BIGINTUNSIGNED.nullable(false)),
DSL.field("type_id", BIGINTUNSIGNED.nullable(false)),
@@ -118,24 +129,26 @@ public void insertFiltersIntoCategoryTable() {
}
/**
- * Equal only if all object parameters are same value and the instances of DSLContext are same
+ * Equal if the compared object is the same instance or if the compared object is of the same class, object fields
+ * are equal, and DSLContext is the same instance
*
* @param object object compared against
- * @return true if all object is same class, object fields are equal and DSLContext is same instance
+ * @return true if equal
*/
@Override
- public boolean equals(Object object) {
+ public boolean equals(final Object object) {
if (this == object)
return true;
if (object == null || getClass() != object.getClass())
return false;
- TableFilters cast = (TableFilters) object;
- return bloomTermId == cast.bloomTermId && recordsInMetadata.equals(cast.recordsInMetadata) && ctx == cast.ctx
- && table.equals(cast.table) && searchTerm.equals(cast.searchTerm);
+ final TableFilters cast = (TableFilters) object;
+ return bloomTermId == cast.bloomTermId && recordsInMetadata
+ .equals(cast.recordsInMetadata) && ctx == cast.ctx && table.equals(cast.table)
+ && categoryTable.equals(cast.categoryTable) && searchTerm.equals(cast.searchTerm);
}
@Override
public int hashCode() {
- return Objects.hash(recordsInMetadata, ctx, table, searchTerm, bloomTermId);
+ return Objects.hash(recordsInMetadata, ctx, table, categoryTable, searchTerm, bloomTermId);
}
}
diff --git a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/CategoryTableCondition.java b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/CategoryTableCondition.java
index 6d328964..95cd4efc 100644
--- a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/CategoryTableCondition.java
+++ b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/CategoryTableCondition.java
@@ -49,6 +49,8 @@
import org.jooq.impl.DSL;
import org.jooq.types.ULong;
+import java.util.Objects;
+
import static org.jooq.impl.SQLDataType.BIGINTUNSIGNED;
/**
@@ -57,24 +59,38 @@
public final class CategoryTableCondition implements QueryCondition {
private final Table> comparedTo;
- private final long bloomTermId;
+ private final Condition bloomTermCondition;
+ private final Condition typeIdCondition;
+ private final Table categoryTable;
public CategoryTableCondition(Table> comparedTo, long bloomTermId) {
+ this(
+ comparedTo,
+ DSL.field("term_id", BIGINTUNSIGNED.nullable(false)).eq(ULong.valueOf(bloomTermId)),
+ DSL.field("type_id", BIGINTUNSIGNED.nullable(false)).eq((Field) comparedTo.field("filter_type_id")), DSL.table(DSL.name(("term_" + bloomTermId + "_" + comparedTo.getName())))
+ );
+ }
+
+ public CategoryTableCondition(
+ Table> comparedTo,
+ Condition bloomTermCondition,
+ Condition typeIdCondition,
+ Table categoryTable
+ ) {
this.comparedTo = comparedTo;
- this.bloomTermId = bloomTermId;
+ this.bloomTermCondition = bloomTermCondition;
+ this.typeIdCondition = typeIdCondition;
+ this.categoryTable = categoryTable;
}
public Condition condition() {
- final Table categoryTable = DSL.table(DSL.name(("term_" + bloomTermId + "_" + comparedTo.getName())));
- final Field termIdField = DSL.field("term_id", BIGINTUNSIGNED.nullable(false));
- final Field typeIdField = DSL.field("type_id", BIGINTUNSIGNED.nullable(false));
final Field filterField = DSL.field(DSL.name(categoryTable.getName(), "filter"), byte[].class);
// select filter with correct bloom term id and filter type id from category table
final SelectConditionStep> selectFilterStep = DSL
.select(filterField)
.from(categoryTable)
- .where(termIdField.eq(ULong.valueOf(bloomTermId)))
- .and(typeIdField.eq((Field) comparedTo.field("filter_type_id")));
+ .where(bloomTermCondition)
+ .and(typeIdCondition);
// compares category table filter byte[] against bloom filter byte[]
final Condition filterFieldCondition = DSL
.function("bloommatch", Boolean.class, selectFilterStep.asField(), comparedTo.field("filter"))
@@ -88,11 +104,15 @@ public Condition condition() {
public boolean equals(final Object object) {
if (this == object)
return true;
- if (object == null)
- return false;
- if (object.getClass() != this.getClass())
+ if (object == null || getClass() != object.getClass())
return false;
final CategoryTableCondition cast = (CategoryTableCondition) object;
- return this.bloomTermId == cast.bloomTermId && this.comparedTo.equals(cast.comparedTo);
+ return comparedTo.equals(cast.comparedTo) && bloomTermCondition.equals(cast.bloomTermCondition)
+ && typeIdCondition.equals(cast.typeIdCondition) && categoryTable.equals(cast.categoryTable);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(comparedTo, bloomTermCondition, typeIdCondition, categoryTable);
}
}
diff --git a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java
index 7b36f73c..395f029c 100644
--- a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java
+++ b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java
@@ -83,7 +83,12 @@ public Condition condition() {
}
Condition newCondition = condition;
if (tableSet.isEmpty()) {
- final DatabaseTables patternMatchTables = new PatternMatchTables(config.context(), value);
+ // get all tables that pattern match with search value
+ final QueryCondition regexLikeCondition = new RegexLikeFiltertypePatternCondition(value);
+ final DatabaseTables patternMatchTables = new ConditionMatchBloomDBTables(
+ config.context(),
+ regexLikeCondition
+ );
tableSet.addAll(patternMatchTables.tables());
}
if (!tableSet.isEmpty()) {
@@ -94,11 +99,17 @@ public Condition condition() {
Condition combinedNullFilterCondition = DSL.noCondition();
for (final Table> table : tableSet) {
- final CategoryTable categoryTable = new CreatedCategoryTable(
- new SearchTermFiltersInserted(new CategoryTableImpl(config, table, value))
+ // create a category temp table with filters
+ final CategoryTable categoryTable = new CategoryTableWithFilters(
+ config.context(),
+ table,
+ config.bloomTermId(),
+ value
);
+ categoryTable.create();
+ // create table condition for table
final Condition nullFilterCondition = table.field("filter").isNull();
- final QueryCondition tableCondition = categoryTable.bloommatchCondition();
+ final QueryCondition tableCondition = new CategoryTableCondition(table, config.bloomTermId());
combinedTableCondition = combinedTableCondition.or(tableCondition.condition());
combinedNullFilterCondition = combinedNullFilterCondition.and(nullFilterCondition);
}
diff --git a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchCondition.java b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeFiltertypePatternCondition.java
similarity index 89%
rename from src/main/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchCondition.java
rename to src/main/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeFiltertypePatternCondition.java
index ce023b53..f5414e82 100644
--- a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchCondition.java
+++ b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeFiltertypePatternCondition.java
@@ -53,15 +53,15 @@
import static com.teragrep.pth_06.jooq.generated.bloomdb.Bloomdb.BLOOMDB;
/** true if BLOOMDB.FILTERTYPE.PATTERN regex like with input value */
-public final class PatternMatchCondition implements QueryCondition {
+public final class RegexLikeFiltertypePatternCondition implements QueryCondition {
private final Field valueField;
- public PatternMatchCondition(String input) {
+ public RegexLikeFiltertypePatternCondition(String input) {
this(DSL.val(input));
}
- public PatternMatchCondition(Field valueField) {
+ public RegexLikeFiltertypePatternCondition(Field valueField) {
this.valueField = valueField;
}
@@ -75,7 +75,7 @@ public boolean equals(final Object object) {
return true;
if (object == null || object.getClass() != this.getClass())
return false;
- final PatternMatchCondition cast = (PatternMatchCondition) object;
+ final RegexLikeFiltertypePatternCondition cast = (RegexLikeFiltertypePatternCondition) object;
return valueField.equals(cast.valueField);
}
diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImplTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImplTest.java
index e0595578..8f27a639 100644
--- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImplTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImplTest.java
@@ -46,7 +46,6 @@
package com.teragrep.pth_06.planner.bloomfilter;
import org.apache.spark.util.sketch.BloomFilter;
-import org.jooq.Condition;
import org.jooq.DSLContext;
import org.jooq.Table;
import org.jooq.impl.DSL;
@@ -131,19 +130,6 @@ void tearDown() {
});
}
- @Test
- public void testNonCreatedEmptyTable() {
- DSLContext ctx = DSL.using(conn);
- Table> table = ctx
- .meta()
- .filterSchemas(s -> s.getName().equals("bloomdb"))
- .filterTables(t -> !t.getName().equals("filtertype"))
- .getTables()
- .get(0);
-
- Assertions.assertDoesNotThrow(new CategoryTableImpl(ctx, table, 0L, "test")::bloommatchCondition);
- }
-
@Test
public void testCreatedWithEmptyTable() {
DSLContext ctx = DSL.using(conn);
@@ -153,10 +139,11 @@ public void testCreatedWithEmptyTable() {
.filterTables(t -> !t.getName().equals("filtertype"))
.getTables()
.get(0);
-
- CategoryTable tempTable = new CategoryTableImpl(ctx, table, 0L, "test");
- tempTable.create();
- RuntimeException ex = Assertions.assertThrows(RuntimeException.class, tempTable::insertFilters);
+ CategoryTable tempTable = new CategoryTableWithFilters(
+ new CategoryTableImpl(ctx, table, 0L, "test"),
+ new TableFilters(ctx, table, 0L, "test")
+ );
+ RuntimeException ex = Assertions.assertThrows(RuntimeException.class, tempTable::create);
Assertions.assertEquals("Origin table was empty", ex.getMessage());
}
@@ -186,9 +173,8 @@ public void testFilterInsertion() {
.getTables()
.get(0);
- CategoryTable categoryTable = new CategoryTableImpl(ctx, table, 0L, "ip=192.168.1.1");
- Assertions.assertDoesNotThrow(categoryTable::create);
- Assertions.assertDoesNotThrow(categoryTable::insertFilters);
+ CategoryTable tempTable = new CategoryTableWithFilters(ctx, table, 0L, "192.168.1.1");
+ Assertions.assertDoesNotThrow(tempTable::create);
BloomFilter filter = Assertions.assertDoesNotThrow(() -> {
ResultSet rs = conn.prepareStatement("SELECT * FROM term_0_target").executeQuery();
rs.absolute(1);
@@ -201,42 +187,6 @@ public void testFilterInsertion() {
Assertions.assertFalse(filter.mightContain("168.1.1"));
}
- @Test
- public void testConditionGeneration() {
- fillTargetTable();
- DSLContext ctx = DSL.using(conn);
- Table> table = ctx
- .meta()
- .filterSchemas(s -> s.getName().equals("bloomdb"))
- .filterTables(t -> !t.getName().equals("filtertype"))
- .getTables()
- .get(0);
-
- CategoryTableImpl tempTable = new CategoryTableImpl(ctx, table, 0L, "test");
- Condition tableCond = tempTable.bloommatchCondition().condition();
- String e = "(\n" + " bloommatch(\n" + " (\n" + " select \"term_0_target\".\"filter\"\n"
- + " from \"term_0_target\"\n" + " where (\n" + " term_id = 0\n"
- + " and type_id = \"bloomdb\".\"target\".\"filter_type_id\"\n" + " )\n" + " ),\n"
- + " \"bloomdb\".\"target\".\"filter\"\n" + " ) = true\n"
- + " and \"bloomdb\".\"target\".\"filter\" is not null\n" + ")";
- Assertions.assertEquals(e, tableCond.toString());
- }
-
- @Test
- public void testBloomTerm() {
- fillTargetTable();
- DSLContext ctx = DSL.using(conn);
- Table> table = ctx
- .meta()
- .filterSchemas(s -> s.getName().equals("bloomdb"))
- .filterTables(t -> !t.getName().equals("filtertype"))
- .getTables()
- .get(0);
- CategoryTableImpl tempTable = new CategoryTableImpl(ctx, table, 1L, "test");
- Condition condition = tempTable.bloommatchCondition().condition();
- Assertions.assertTrue(condition.toString().contains("term_1_"));
- }
-
@Test
public void testEquality() {
fillTargetTable();
diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTablesTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/ConditionMatchBloomDBTablesTest.java
similarity index 86%
rename from src/test/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTablesTest.java
rename to src/test/java/com/teragrep/pth_06/planner/bloomfilter/ConditionMatchBloomDBTablesTest.java
index 5313a4c4..5aae238c 100644
--- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTablesTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/ConditionMatchBloomDBTablesTest.java
@@ -62,7 +62,7 @@
import java.util.stream.Collectors;
@TestInstance(TestInstance.Lifecycle.PER_CLASS)
-public class PatternMatchTablesTest {
+public class ConditionMatchBloomDBTablesTest {
final String url = "jdbc:h2:mem:test;MODE=MariaDB;DATABASE_TO_LOWER=TRUE;CASE_INSENSITIVE_IDENTIFIERS=TRUE";
final String userName = "sa";
@@ -138,8 +138,8 @@ void tearDown() {
public void testSingleMatch() {
DSLContext ctx = DSL.using(conn);
String input = "192.168.1.1";
- PatternMatchTables patternMatchTables = new PatternMatchTables(ctx, input);
- List> result = patternMatchTables.tables();
+ ConditionMatchBloomDBTables conditionMatchBloomDBTables = new ConditionMatchBloomDBTables(ctx, input);
+ List> result = conditionMatchBloomDBTables.tables();
Assertions.assertEquals(1, result.size());
Assertions.assertEquals("pattern_test_ip", result.get(0).getName());
}
@@ -148,8 +148,8 @@ public void testSingleMatch() {
public void testSearchTermTokenizedMatch() {
DSLContext ctx = DSL.using(conn);
String input = "target_ip=192.168.1.1";
- PatternMatchTables patternMatchTables = new PatternMatchTables(ctx, input);
- List> result = patternMatchTables.tables();
+ ConditionMatchBloomDBTables conditionMatchBloomDBTables = new ConditionMatchBloomDBTables(ctx, input);
+ List> result = conditionMatchBloomDBTables.tables();
Assertions.assertEquals(1, result.size());
Assertions.assertEquals("pattern_test_ip", result.get(0).getName());
}
@@ -158,8 +158,8 @@ public void testSearchTermTokenizedMatch() {
public void testRegexMatch() {
DSLContext ctx = DSL.using(conn);
String input = "biz baz boz data has no content today (very important though) but it would still have if one had a means to extract it from (here is something else important as well) the strange patterns called parentheses that it seems to have been put in.";
- PatternMatchTables patternMatchTables = new PatternMatchTables(ctx, input);
- List> result = patternMatchTables.tables();
+ ConditionMatchBloomDBTables conditionMatchBloomDBTables = new ConditionMatchBloomDBTables(ctx, input);
+ List> result = conditionMatchBloomDBTables.tables();
Assertions.assertEquals(1, result.size());
Assertions.assertEquals("parentheses_test", result.get(0).getName());
}
@@ -168,9 +168,9 @@ public void testRegexMatch() {
public void testMultipleMatch() {
DSLContext ctx = DSL.using(conn);
String input = "255.255.255.255";
- PatternMatchTables patternMatchTables = new PatternMatchTables(ctx, input);
- List> result = patternMatchTables.tables();
- List> result2 = patternMatchTables.tables();
+ ConditionMatchBloomDBTables conditionMatchBloomDBTables = new ConditionMatchBloomDBTables(ctx, input);
+ List> result = conditionMatchBloomDBTables.tables();
+ List> result2 = conditionMatchBloomDBTables.tables();
List tableNames = result.stream().map(Named::getName).collect(Collectors.toList());
Assertions.assertEquals(2, result.size());
Assertions.assertEquals(2, result2.size());
@@ -182,8 +182,8 @@ public void testMultipleMatch() {
public void testNoMatch() {
DSLContext ctx = DSL.using(conn);
String input = "testinput";
- PatternMatchTables patternMatchTables = new PatternMatchTables(ctx, input);
- List> result = patternMatchTables.tables();
+ ConditionMatchBloomDBTables conditionMatchBloomDBTables = new ConditionMatchBloomDBTables(ctx, input);
+ List> result = conditionMatchBloomDBTables.tables();
Assertions.assertTrue(result.isEmpty());
}
@@ -191,8 +191,8 @@ public void testNoMatch() {
public void equalsTest() {
DSLContext ctx = DSL.using(conn);
String input = "testinput";
- PatternMatchTables eq1 = new PatternMatchTables(ctx, input);
- PatternMatchTables eq2 = new PatternMatchTables(ctx, input);
+ ConditionMatchBloomDBTables eq1 = new ConditionMatchBloomDBTables(ctx, input);
+ ConditionMatchBloomDBTables eq2 = new ConditionMatchBloomDBTables(ctx, input);
Assertions.assertEquals(eq1, eq2);
Assertions.assertEquals(eq2, eq1);
}
@@ -200,8 +200,8 @@ public void equalsTest() {
@Test
public void differentInputNotEqualsTest() {
DSLContext ctx = DSL.using(conn);
- PatternMatchTables eq1 = new PatternMatchTables(ctx, "testinput");
- PatternMatchTables eq2 = new PatternMatchTables(ctx, "anotherinput");
+ ConditionMatchBloomDBTables eq1 = new ConditionMatchBloomDBTables(ctx, "testinput");
+ ConditionMatchBloomDBTables eq2 = new ConditionMatchBloomDBTables(ctx, "anotherinput");
Assertions.assertNotEquals(eq1, eq2);
Assertions.assertNotEquals(eq2, eq1);
}
@@ -210,8 +210,8 @@ public void differentInputNotEqualsTest() {
public void differentDSLContextNotEqualsTest() {
DSLContext ctx1 = DSL.using(conn);
DSLContext ctx2 = DSL.using(conn);
- PatternMatchTables eq1 = new PatternMatchTables(ctx1, "testinput");
- PatternMatchTables eq2 = new PatternMatchTables(ctx2, "testinput");
+ ConditionMatchBloomDBTables eq1 = new ConditionMatchBloomDBTables(ctx1, "testinput");
+ ConditionMatchBloomDBTables eq2 = new ConditionMatchBloomDBTables(ctx2, "testinput");
Assertions.assertNotEquals(eq1, eq2);
Assertions.assertNotEquals(eq2, eq1);
}
diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java
index 27932eb4..9c0ea43d 100644
--- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java
@@ -45,6 +45,7 @@
*/
package com.teragrep.pth_06.planner.bloomfilter;
+import nl.altindag.log.LogCaptor;
import nl.jqno.equalsverifier.EqualsVerifier;
import org.apache.spark.util.sketch.BloomFilter;
import org.junit.jupiter.api.Assertions;
@@ -115,6 +116,18 @@ public void testTokenizerTokens() {
Assertions.assertTrue(resultFilter.mightContain("SearchValuePatternInThisString"));
}
+ @Test
+ public void testTokensSizeTooLarge() {
+ LogCaptor captor = Assertions.assertDoesNotThrow(() -> LogCaptor.forClass(SearchTermBloomFilter.class));
+ String searchTerm = "";
+ SearchTermBloomFilter filter = new SearchTermBloomFilter(10L, 0.01, new TokenizedValue(searchTerm));
+ Assertions.assertDoesNotThrow(filter::bytes);
+ String e = "Number of tokens <132> was larger than the expected value <10>, resulting FPP <0.6002870054872016>";
+ String warn = captor.getWarnLogs().get(0);
+ Assertions.assertEquals(warn, e);
+
+ }
+
@Test
public void equalsHashCodeContractTest() {
EqualsVerifier
diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadataResultTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadataResultTest.java
index b2a99963..fb8be48a 100644
--- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadataResultTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadataResultTest.java
@@ -176,6 +176,12 @@ void testMultipleFilterTypes() {
Assertions.assertEquals(second.get(1), ULong.valueOf("2000"));
}
+ @Test
+ public void testNullTableException() {
+ DSLContext ctx = DSL.using(conn);
+ Assertions.assertThrows(NullPointerException.class, () -> new TableFilterTypesFromMetadata(ctx, null, 0L));
+ }
+
@Test
public void testEquality() {
DSLContext ctx = DSL.using(conn);
@@ -188,7 +194,6 @@ public void testEquality() {
TableFilterTypesFromMetadata result1 = new TableFilterTypesFromMetadata(ctx, table, 0L);
TableFilterTypesFromMetadata result2 = new TableFilterTypesFromMetadata(ctx, table, 0L);
Assertions.assertEquals(result1, result2);
- Assertions.assertEquals(result2, result1);
}
@Test
@@ -202,9 +207,9 @@ public void testNotEquals() {
.get(0);
TableFilterTypesFromMetadata result1 = new TableFilterTypesFromMetadata(ctx, table, 0L);
TableFilterTypesFromMetadata result2 = new TableFilterTypesFromMetadata(ctx, table, 1L);
- TableFilterTypesFromMetadata result3 = new TableFilterTypesFromMetadata(ctx, null, 0L);
+ System.out.println(result1);
+ System.out.println(result2);
Assertions.assertNotEquals(result1, result2);
- Assertions.assertNotEquals(result1, result3);
}
void insertSizedFilterIntoTargetTable(int filterTypeId) {
diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java
index 19a4e1d2..79b11b44 100644
--- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java
@@ -247,6 +247,7 @@ public void equalsHashCodeContractTest() {
.withNonnullFields("table")
.withNonnullFields("searchTerm")
.withNonnullFields("bloomTermId")
+ .withNonnullFields("categoryTable")
.verify();
}
diff --git a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/CategoryTableConditionTest.java b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/CategoryTableConditionTest.java
index c57e2312..0a911c79 100644
--- a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/CategoryTableConditionTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/CategoryTableConditionTest.java
@@ -166,6 +166,11 @@ void testBloomTermId() {
Assertions.assertEquals(e, cond.condition().toString());
}
+ @Test
+ public void testNullTableException() {
+ Assertions.assertThrows(NullPointerException.class, () -> new CategoryTableCondition(null, 0L));
+ }
+
@Test
public void testEquality() {
fillTargetTable();
@@ -195,9 +200,7 @@ public void testNonEquality() {
.get(0);
CategoryTableCondition cond1 = new CategoryTableCondition(target1, 0L);
CategoryTableCondition cond2 = new CategoryTableCondition(target1, 1L);
- CategoryTableCondition cond3 = new CategoryTableCondition(null, 1L);
Assertions.assertNotEquals(cond1, cond2);
- Assertions.assertNotEquals(cond1, cond3);
}
void fillTargetTable() {
diff --git a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchConditionTest.java b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeFiltertypePatternConditionTest.java
similarity index 75%
rename from src/test/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchConditionTest.java
rename to src/test/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeFiltertypePatternConditionTest.java
index 2f089a8e..4cb74da9 100644
--- a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchConditionTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeFiltertypePatternConditionTest.java
@@ -56,40 +56,40 @@
*
* @see org.jooq.QueryPart
*/
-class PatternMatchConditionTest {
+class RegexLikeFiltertypePatternConditionTest {
@Test
void testCondition() {
- Condition condition = new PatternMatchCondition("test").condition();
+ Condition condition = new RegexLikeFiltertypePatternCondition("test").condition();
String e = "('test' like_regex \"bloomdb\".\"filtertype\".\"pattern\")";
Assertions.assertEquals(e, condition.toString());
}
@Test
void testEquality() {
- PatternMatchCondition cond1 = new PatternMatchCondition("test");
- PatternMatchCondition cond2 = new PatternMatchCondition("test");
+ RegexLikeFiltertypePatternCondition cond1 = new RegexLikeFiltertypePatternCondition("test");
+ RegexLikeFiltertypePatternCondition cond2 = new RegexLikeFiltertypePatternCondition("test");
Assertions.assertEquals(cond1, cond2);
}
@Test
void testNotEquals() {
- PatternMatchCondition cond1 = new PatternMatchCondition("test");
- PatternMatchCondition cond2 = new PatternMatchCondition("next");
+ RegexLikeFiltertypePatternCondition cond1 = new RegexLikeFiltertypePatternCondition("test");
+ RegexLikeFiltertypePatternCondition cond2 = new RegexLikeFiltertypePatternCondition("next");
Assertions.assertNotEquals(cond1, cond2);
}
@Test
void testHashCode() {
- PatternMatchCondition cond1 = new PatternMatchCondition("test");
- PatternMatchCondition cond2 = new PatternMatchCondition("test");
- PatternMatchCondition notEq = new PatternMatchCondition("next");
+ RegexLikeFiltertypePatternCondition cond1 = new RegexLikeFiltertypePatternCondition("test");
+ RegexLikeFiltertypePatternCondition cond2 = new RegexLikeFiltertypePatternCondition("test");
+ RegexLikeFiltertypePatternCondition notEq = new RegexLikeFiltertypePatternCondition("next");
Assertions.assertEquals(cond1.hashCode(), cond2.hashCode());
Assertions.assertNotEquals(cond1.hashCode(), notEq.hashCode());
}
@Test
public void equalsHashCodeContractTest() {
- EqualsVerifier.forClass(PatternMatchCondition.class).withNonnullFields("valueField").verify();
+ EqualsVerifier.forClass(RegexLikeFiltertypePatternCondition.class).withNonnullFields("valueField").verify();
}
}
diff --git a/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java b/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java
index 4fb58634..6ae11d55 100644
--- a/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java
+++ b/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java
@@ -51,10 +51,9 @@
import org.jooq.impl.DSL;
import org.junit.jupiter.api.*;
+import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
-import java.sql.Connection;
-import java.sql.DriverManager;
-import java.sql.PreparedStatement;
+import java.sql.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@@ -112,7 +111,7 @@ void setup() {
for (String pattern : patternList) {
PreparedStatement filterType = conn.prepareStatement(typeSQL);
filterType.setInt(1, id);
- filterType.setInt(2, 1000);
+ filterType.setInt(2, 1000 * id);
filterType.setDouble(3, 0.01);
filterType.setString(4, pattern);
filterType.executeUpdate();
@@ -339,6 +338,134 @@ void multipleSearchTermTwoAndOneMatchWithoutFiltersTest() {
.assertTrue(walker.patternMatchTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255")));
}
+ @Test
+ void testSinglePatternMatchTempTableValues() {
+ String q = "";
+ ConditionWalker walker = new ConditionWalker(DSL.using(conn), true);
+ Assertions.assertDoesNotThrow(() -> walker.fromString(q, false));
+ ResultSet result = Assertions
+ .assertDoesNotThrow(() -> conn.prepareStatement("SELECT * FROM term_0_pattern_test_ip").executeQuery());
+ Assertions.assertDoesNotThrow(() -> {
+ int colCount = result.getMetaData().getColumnCount();
+ Assertions.assertEquals(4, colCount);
+ int loops = 0;
+ while (result.next()) {
+ Assertions.assertEquals(1, result.getLong("id"));
+ Assertions.assertEquals(0, result.getLong("term_id"));
+ Assertions.assertEquals(1, result.getLong("type_id"));
+ BloomFilter filter = BloomFilter.readFrom(new ByteArrayInputStream(result.getBytes("filter")));
+ Assertions.assertTrue(filter.mightContain("192.168.1.1"));
+ Assertions.assertFalse(filter.mightContain("192"));
+ Assertions.assertFalse(filter.mightContain("192."));
+ Assertions.assertFalse(filter.mightContain("192.168.1"));
+ loops++;
+ }
+ Assertions.assertEquals(1, loops);
+ });
+ }
+
+ @Test
+ void testMultiplePatternMatchTempTableValues() {
+ String q = "";
+ ConditionWalker walker = new ConditionWalker(DSL.using(conn), true);
+ Assertions.assertDoesNotThrow(() -> walker.fromString(q, false));
+
+ // check pattern_test_ip
+ ResultSet result1 = Assertions
+ .assertDoesNotThrow(() -> conn.prepareStatement("SELECT * FROM term_0_pattern_test_ip").executeQuery());
+ Assertions.assertDoesNotThrow(() -> {
+ int colCount = result1.getMetaData().getColumnCount();
+ Assertions.assertEquals(4, colCount);
+ int loops = 0;
+ while (result1.next()) {
+ Assertions.assertEquals(1, result1.getLong("id"));
+ Assertions.assertEquals(0, result1.getLong("term_id"));
+ Assertions.assertEquals(1, result1.getLong("type_id"));
+ BloomFilter filter = BloomFilter.readFrom(new ByteArrayInputStream(result1.getBytes("filter")));
+ Assertions.assertEquals(filter.bitSize(), BloomFilter.create(1000, 0.01).bitSize());
+ Assertions.assertTrue(filter.mightContain("255.255.255.255"));
+ Assertions.assertFalse(filter.mightContain("255"));
+ Assertions.assertFalse(filter.mightContain("255."));
+ Assertions.assertFalse(filter.mightContain("255.255."));
+ loops++;
+ }
+ Assertions.assertEquals(1, loops);
+ });
+
+ // check pattern_test_ip244 table
+ ResultSet result2 = Assertions
+ .assertDoesNotThrow(() -> conn.prepareStatement("SELECT * FROM term_0_pattern_test_ip255").executeQuery());
+ Assertions.assertDoesNotThrow(() -> {
+ int colCount = result2.getMetaData().getColumnCount();
+ Assertions.assertEquals(4, colCount);
+ int loops = 0;
+ while (result2.next()) {
+ Assertions.assertEquals(1, result2.getLong("id"));
+ Assertions.assertEquals(0, result2.getLong("term_id"));
+ Assertions.assertEquals(2, result2.getLong("type_id"));
+ BloomFilter filter = BloomFilter.readFrom(new ByteArrayInputStream(result2.getBytes("filter")));
+ Assertions.assertEquals(filter.bitSize(), BloomFilter.create(2000, 0.01).bitSize());
+ Assertions.assertTrue(filter.mightContain("255.255.255.255"));
+ Assertions.assertFalse(filter.mightContain("255"));
+ Assertions.assertFalse(filter.mightContain("255."));
+ Assertions.assertFalse(filter.mightContain("255.255."));
+ loops++;
+ }
+ Assertions.assertEquals(1, loops);
+ });
+ }
+
+ @Test
+ void testCorrectTokensForTwoSearchTerms() {
+ ConditionWalker walker = new ConditionWalker(DSL.using(conn), true);
+ String q = "";
+ Assertions.assertDoesNotThrow(() -> walker.fromString(q, false));
+
+ // check term 0
+ ResultSet result1 = Assertions
+ .assertDoesNotThrow(() -> conn.prepareStatement("SELECT * FROM term_0_pattern_test_ip").executeQuery());
+ Assertions.assertDoesNotThrow(() -> {
+ int colCount = result1.getMetaData().getColumnCount();
+ Assertions.assertEquals(4, colCount);
+ int loops = 0;
+ while (result1.next()) {
+ Assertions.assertEquals(1, result1.getLong("id"));
+ Assertions.assertEquals(0, result1.getLong("term_id"));
+ Assertions.assertEquals(1, result1.getLong("type_id"));
+ BloomFilter filter = BloomFilter.readFrom(new ByteArrayInputStream(result1.getBytes("filter")));
+ Assertions.assertEquals(filter.bitSize(), BloomFilter.create(1000, 0.01).bitSize());
+ Assertions.assertTrue(filter.mightContain("192.168.1.1"));
+ Assertions.assertFalse(filter.mightContain("192"));
+ Assertions.assertFalse(filter.mightContain("168."));
+ Assertions.assertFalse(filter.mightContain("1.1"));
+ loops++;
+ }
+ Assertions.assertEquals(1, loops);
+ });
+
+ // check term 1
+ ResultSet result2 = Assertions
+ .assertDoesNotThrow(() -> conn.prepareStatement("SELECT * FROM term_1_pattern_test_ip").executeQuery());
+ Assertions.assertDoesNotThrow(() -> {
+ int colCount = result2.getMetaData().getColumnCount();
+ Assertions.assertEquals(4, colCount);
+ int loops = 0;
+ while (result2.next()) {
+ Assertions.assertEquals(1, result2.getLong("id"));
+ Assertions.assertEquals(1, result2.getLong("term_id"));
+ Assertions.assertEquals(1, result2.getLong("type_id"));
+ BloomFilter filter = BloomFilter.readFrom(new ByteArrayInputStream(result2.getBytes("filter")));
+ Assertions.assertEquals(filter.bitSize(), BloomFilter.create(1000, 0.01).bitSize());
+ Assertions.assertTrue(filter.mightContain("192.000.1.1"));
+ Assertions.assertFalse(filter.mightContain("192"));
+ Assertions.assertFalse(filter.mightContain("000."));
+ Assertions.assertFalse(filter.mightContain("192.000"));
+ loops++;
+ }
+ Assertions.assertEquals(1, loops);
+ });
+ }
+
private void writeFilter(String tableName, int filterId) {
Assertions.assertDoesNotThrow(() -> {
conn.prepareStatement("CREATE SCHEMA IF NOT EXISTS BLOOMDB").execute();
From aefb66a1c7c8aa37197e24bf34756bd16639cd78 Mon Sep 17 00:00:00 2001
From: elliVM <47@teragrep.com>
Date: Tue, 29 Oct 2024 14:35:17 +0200
Subject: [PATCH 15/26] use UncheckedIOException constructor
---
.../pth_06/planner/bloomfilter/SearchTermBloomFilter.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java
index c3132c8a..25882752 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java
@@ -104,7 +104,7 @@ public byte[] bytes() {
return filterBAOS.toByteArray();
}
catch (IOException e) {
- throw new UncheckedIOException(new IOException("Error writing filter bytes: " + e.getMessage()));
+ throw new UncheckedIOException("Error writing filter bytes: ", e);
}
}
From 73efeb98c17d718bb019ca58f34f15a1e363e891 Mon Sep 17 00:00:00 2001
From: elliVM <47@teragrep.com>
Date: Tue, 29 Oct 2024 14:39:31 +0200
Subject: [PATCH 16/26] TableFilters returns a batch that
CategoryTableWithFilters executes
---
.../bloomfilter/CategoryTableWithFilters.java | 2 +-
.../pth_06/planner/bloomfilter/SafeBatch.java | 74 +++++++++++++++++++
.../planner/bloomfilter/TableFilters.java | 10 ++-
.../planner/bloomfilter/TableFiltersTest.java | 6 +-
4 files changed, 86 insertions(+), 6 deletions(-)
create mode 100644 src/main/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatch.java
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableWithFilters.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableWithFilters.java
index a664b636..9a848513 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableWithFilters.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableWithFilters.java
@@ -71,6 +71,6 @@ public CategoryTableWithFilters(CategoryTable origin, TableFilters filters) {
@Override
public void create() {
origin.create();
- filters.insertFiltersIntoCategoryTable();
+ filters.asBatch().execute();
}
}
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatch.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatch.java
new file mode 100644
index 00000000..c2709af8
--- /dev/null
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatch.java
@@ -0,0 +1,74 @@
+/*
+ * Teragrep Archive Datasource (pth_06)
+ * Copyright (C) 2021-2024 Suomen Kanuuna Oy
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ *
+ *
+ * Additional permission under GNU Affero General Public License version 3
+ * section 7
+ *
+ * If you modify this Program, or any covered work, by linking or combining it
+ * with other code, such other code is not for that reason alone subject to any
+ * of the requirements of the GNU Affero GPL version 3 as long as this Program
+ * is the same Program as licensed from Suomen Kanuuna Oy without any additional
+ * modifications.
+ *
+ * Supplemented terms under GNU Affero General Public License version 3
+ * section 7
+ *
+ * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
+ * versions must be marked as "Modified version of" The Program.
+ *
+ * Names of the licensors and authors may not be used for publicity purposes.
+ *
+ * No rights are granted for use of trade names, trademarks, or service marks
+ * which are in The Program if any.
+ *
+ * Licensee must indemnify licensors and authors for any liability that these
+ * contractual assumptions impose on licensors and authors.
+ *
+ * To the extent this program is licensed as part of the Commercial versions of
+ * Teragrep, the applicable Commercial License may apply to this file if you as
+ * a licensee so wish it.
+ */
+package com.teragrep.pth_06.planner.bloomfilter;
+
+import org.jooq.Batch;
+import org.jooq.exception.DataAccessException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class SafeBatch {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(SafeBatch.class);
+
+ private final Batch batch;
+
+ public SafeBatch(final Batch batch) {
+ this.batch = batch;
+ }
+
+ public void execute() {
+ try {
+ int[] results = batch.execute();
+ if (LOGGER.isTraceEnabled()) {
+ LOGGER.trace("Batch added <{}> row(s)", results.length);
+ }
+ }
+ catch (final DataAccessException e) {
+ throw new DataAccessException("Error executing batch: " + e);
+ }
+ }
+}
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java
index 51d6c885..4fa65dbb 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java
@@ -49,6 +49,8 @@
import org.jooq.impl.DSL;
import org.jooq.types.ULong;
+import java.util.ArrayList;
+import java.util.List;
import java.util.Objects;
import static com.teragrep.pth_06.jooq.generated.bloomdb.Bloomdb.BLOOMDB;
@@ -93,10 +95,11 @@ public TableFilters(
this.searchTerm = searchTerm;
}
- public void insertFiltersIntoCategoryTable() {
+ public SafeBatch asBatch() {
if (table == null) {
throw new IllegalStateException("Origin table was null");
}
+ final List> queryList = new ArrayList<>();
final Result result = recordsInMetadata.toResult();
for (final Record record : result) {
final Field>[] insertFields = {
@@ -124,8 +127,11 @@ public void insertFiltersIntoCategoryTable() {
DSL.val(record.getValue(BLOOMDB.FILTERTYPE.ID), ULong.class),
DSL.val(filter.bytes(), byte[].class)
};
- ctx.insertInto(categoryTable).columns(insertFields).values(valueFields).execute();
+ InsertValuesStepN> query = ctx.insertInto(categoryTable).columns(insertFields).values(valueFields);
+ queryList.add(query);
}
+ final Batch batch = ctx.batch(queryList);
+ return new SafeBatch(batch);
}
/**
diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java
index 79b11b44..15f05237 100644
--- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java
@@ -150,7 +150,7 @@ public void testInsertFiltersIntoCategoryTable() {
.getTables()
.get(0);
DataAccessException exception = Assertions
- .assertThrows(DataAccessException.class, () -> new TableFilters(ctx, table, 0L, "192.168.1.1").insertFiltersIntoCategoryTable());
+ .assertThrows(DataAccessException.class, () -> new TableFilters(ctx, table, 0L, "192.168.1.1").asBatch().execute());
Assertions.assertTrue(exception.getMessage().contains("term_0_target\" (term_id, type_id, \"filter\")"));
}
@@ -166,7 +166,7 @@ public void testInsertFiltersIntoCategoryTableRegexExtract() {
.get(0);
String query = "biz baz boz data has no content today (very important though) but it would still have if one had a means to extract it from (here is something else important as well) the strange patterns called parentheses that it seems to have been put in.";
DataAccessException exception = Assertions
- .assertThrows(DataAccessException.class, () -> new TableFilters(ctx, table, 0L, query).insertFiltersIntoCategoryTable());
+ .assertThrows(DataAccessException.class, () -> new TableFilters(ctx, table, 0L, query).asBatch().execute());
Assertions.assertTrue(exception.getMessage().contains("term_0_target\" (term_id, type_id, \"filter\")"));
}
@@ -181,7 +181,7 @@ public void testInsertFiltersWithoutPatternMatch() {
.getTables()
.get(0);
IllegalStateException exception = Assertions
- .assertThrows(IllegalStateException.class, () -> new TableFilters(ctx, table, 0L, "nomatch").insertFiltersIntoCategoryTable());
+ .assertThrows(IllegalStateException.class, () -> new TableFilters(ctx, table, 0L, "nomatch").asBatch().execute());
Assertions.assertTrue(exception.getMessage().contains("Trying to insert empty filter"));
}
From 441e40b01565cc59f58cdf131db3cd5ab7ff1176 Mon Sep 17 00:00:00 2001
From: elliVM <47@teragrep.com>
Date: Tue, 29 Oct 2024 15:08:44 +0200
Subject: [PATCH 17/26] add test for SafeBatch
---
.../pth_06/planner/bloomfilter/SafeBatch.java | 4 +-
.../planner/bloomfilter/SafeBatchTest.java | 140 ++++++++++++++++++
2 files changed, 142 insertions(+), 2 deletions(-)
create mode 100644 src/test/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatchTest.java
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatch.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatch.java
index c2709af8..60bc56ae 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatch.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatch.java
@@ -50,7 +50,7 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-public class SafeBatch {
+public final class SafeBatch {
private static final Logger LOGGER = LoggerFactory.getLogger(SafeBatch.class);
@@ -62,7 +62,7 @@ public SafeBatch(final Batch batch) {
public void execute() {
try {
- int[] results = batch.execute();
+ final int[] results = batch.execute();
if (LOGGER.isTraceEnabled()) {
LOGGER.trace("Batch added <{}> row(s)", results.length);
}
diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatchTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatchTest.java
new file mode 100644
index 00000000..fdbdd266
--- /dev/null
+++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatchTest.java
@@ -0,0 +1,140 @@
+/*
+ * Teragrep Archive Datasource (pth_06)
+ * Copyright (C) 2021-2024 Suomen Kanuuna Oy
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ *
+ *
+ * Additional permission under GNU Affero General Public License version 3
+ * section 7
+ *
+ * If you modify this Program, or any covered work, by linking or combining it
+ * with other code, such other code is not for that reason alone subject to any
+ * of the requirements of the GNU Affero GPL version 3 as long as this Program
+ * is the same Program as licensed from Suomen Kanuuna Oy without any additional
+ * modifications.
+ *
+ * Supplemented terms under GNU Affero General Public License version 3
+ * section 7
+ *
+ * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
+ * versions must be marked as "Modified version of" The Program.
+ *
+ * Names of the licensors and authors may not be used for publicity purposes.
+ *
+ * No rights are granted for use of trade names, trademarks, or service marks
+ * which are in The Program if any.
+ *
+ * Licensee must indemnify licensors and authors for any liability that these
+ * contractual assumptions impose on licensors and authors.
+ *
+ * To the extent this program is licensed as part of the Commercial versions of
+ * Teragrep, the applicable Commercial License may apply to this file if you as
+ * a licensee so wish it.
+ */
+package com.teragrep.pth_06.planner.bloomfilter;
+
+import org.jooq.DSLContext;
+import org.jooq.exception.DataAccessException;
+import org.jooq.impl.DSL;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.ResultSet;
+import java.util.ArrayList;
+import java.util.List;
+
+class SafeBatchTest {
+
+ final String url = "jdbc:h2:mem:test;MODE=MariaDB;DATABASE_TO_LOWER=TRUE;CASE_INSENSITIVE_IDENTIFIERS=TRUE";
+ final String userName = "sa";
+ final String password = "";
+ final Connection conn = Assertions.assertDoesNotThrow(() -> DriverManager.getConnection(url, userName, password));
+
+ @BeforeEach
+ void setup() {
+ Assertions.assertDoesNotThrow(() -> {
+ conn.prepareStatement("CREATE SCHEMA IF NOT EXISTS BLOOMDB").execute();
+ conn.prepareStatement("USE BLOOMDB").execute();
+ conn.prepareStatement("DROP TABLE IF EXISTS target").execute();
+ String targetTable = "CREATE TABLE `target`("
+ + " `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT PRIMARY KEY,"
+ + " `partition_id` bigint(20) unsigned NOT NULL UNIQUE" + ")";
+ conn.prepareStatement(targetTable).execute();
+ });
+ }
+
+ @Test
+ public void testOneInsert() {
+ String sql = "INSERT INTO target (`partition_id`) VALUES(12345)";
+ DSLContext ctx = DSL.using(conn);
+ SafeBatch batch = new SafeBatch(ctx.batch(ctx.query(sql)));
+ Assertions.assertDoesNotThrow(batch::execute);
+ ResultSet result = Assertions
+ .assertDoesNotThrow(() -> conn.prepareStatement("SELECT `partition_id` FROM target").executeQuery());
+ Assertions.assertDoesNotThrow(() -> {
+ int loops = 0;
+ while (result.next()) {
+ loops++;
+ Assertions.assertEquals(12345L, result.getLong(1));
+ }
+ Assertions.assertEquals(1, loops);
+ });
+ }
+
+ @Test
+ public void testInsertTwo() {
+ String sql1 = "INSERT INTO target (`partition_id`) VALUES(12345)";
+ String sql2 = "INSERT INTO target (`partition_id`) VALUES(54321)";
+ DSLContext ctx = DSL.using(conn);
+ SafeBatch batch = new SafeBatch(ctx.batch(ctx.query(sql1), ctx.query(sql2)));
+ Assertions.assertDoesNotThrow(batch::execute);
+ ResultSet result = Assertions
+ .assertDoesNotThrow(() -> conn.prepareStatement("SELECT `partition_id` FROM target").executeQuery());
+ Assertions.assertDoesNotThrow(() -> {
+ List values = new ArrayList<>();
+ int loops = 0;
+ while (result.next()) {
+ loops++;
+ values.add(result.getLong(1));
+ }
+ Assertions.assertEquals(2, loops);
+ Assertions.assertEquals(2, values.size());
+ Assertions.assertEquals(12345L, values.get(0));
+ Assertions.assertEquals(54321L, values.get(1));
+ });
+ }
+
+ @Test
+ public void testDataAccessException() {
+ String sql1 = "INSERT INTO target (`partition_id`) VALUES(12345)";
+ String sql2 = "INSERT INTO target (`partition_id`) VALUES(12345)";
+ DSLContext ctx = DSL.using(conn);
+ SafeBatch batch = new SafeBatch(ctx.batch(ctx.query(sql1), ctx.query(sql2)));
+ Assertions.assertThrows(DataAccessException.class, batch::execute);
+ ResultSet result = Assertions
+ .assertDoesNotThrow(() -> conn.prepareStatement("SELECT `partition_id` FROM target").executeQuery());
+ Assertions.assertDoesNotThrow(() -> {
+ int loops = 0;
+ while (result.next()) {
+ Assertions.assertEquals(12345L, result.getLong(1));
+ loops++;
+ }
+ Assertions.assertEquals(1, loops);
+ });
+ }
+}
From 4844ebd693e83134dc4a6f92bca4c3b954c99bcf Mon Sep 17 00:00:00 2001
From: elliVM <47@teragrep.com>
Date: Tue, 29 Oct 2024 15:47:13 +0200
Subject: [PATCH 18/26] update comments and clean up code, add constructors for
RegexLikeCondition
---
.../ConditionMatchBloomDBTables.java | 14 ++++----
.../pth_06/planner/bloomfilter/SafeBatch.java | 1 +
.../TableFilterTypesFromMetadata.java | 6 ++++
.../planner/bloomfilter/TableFilters.java | 8 ++---
.../conditions/IndexStatementCondition.java | 4 ++-
...Condition.java => RegexLikeCondition.java} | 32 +++++++++++++++----
...nTest.java => RegexLikeConditionTest.java} | 20 ++++++------
7 files changed, 56 insertions(+), 29 deletions(-)
rename src/main/java/com/teragrep/pth_06/planner/walker/conditions/{RegexLikeFiltertypePatternCondition.java => RegexLikeCondition.java} (71%)
rename src/test/java/com/teragrep/pth_06/planner/walker/conditions/{RegexLikeFiltertypePatternConditionTest.java => RegexLikeConditionTest.java} (75%)
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/ConditionMatchBloomDBTables.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/ConditionMatchBloomDBTables.java
index 8ccd9cf4..1eae8048 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/ConditionMatchBloomDBTables.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/ConditionMatchBloomDBTables.java
@@ -45,7 +45,7 @@
*/
package com.teragrep.pth_06.planner.bloomfilter;
-import com.teragrep.pth_06.planner.walker.conditions.RegexLikeFiltertypePatternCondition;
+import com.teragrep.pth_06.planner.walker.conditions.RegexLikeCondition;
import com.teragrep.pth_06.planner.walker.conditions.QueryCondition;
import org.jooq.DSLContext;
import org.jooq.Field;
@@ -59,7 +59,7 @@
import static com.teragrep.pth_06.jooq.generated.bloomdb.Bloomdb.BLOOMDB;
/**
- * Class to get a collection of Tables that match the given PatternMatchCondition
+ * Class to get a collection of Tables that match the given QueryCondition
*/
public final class ConditionMatchBloomDBTables implements DatabaseTables {
@@ -69,7 +69,7 @@ public final class ConditionMatchBloomDBTables implements DatabaseTables {
private final QueryCondition condition;
public ConditionMatchBloomDBTables(DSLContext ctx, String pattern) {
- this(ctx, new RegexLikeFiltertypePatternCondition(pattern));
+ this(ctx, new RegexLikeCondition(pattern, BLOOMDB.FILTERTYPE.PATTERN));
}
public ConditionMatchBloomDBTables(DSLContext ctx, QueryCondition condition) {
@@ -78,9 +78,9 @@ public ConditionMatchBloomDBTables(DSLContext ctx, QueryCondition condition) {
}
/**
- * List of tables from bloomdb that match patternMatchCondition Note: Table records are not fetched fully
+ * List of tables from bloomdb that match QueryCondition Note: Table records are not fetched fully
*
- * @return List of tables that matched condition and were not empty
+ * @return List of tables that matched QueryCondition and were not empty
*/
public List> tables() {
final List> tables = ctx
@@ -91,8 +91,8 @@ public List> tables() {
.from(t)
.leftJoin(BLOOMDB.FILTERTYPE)// join filtertype to access patterns
.on(BLOOMDB.FILTERTYPE.ID.eq((Field) t.field("filter_type_id")))
- .where(condition.condition())// select tables that match pattern condition
- .limit(1)// limit 1 since we are checking only if table is not empty
+ .where(condition.condition())// select tables that match the condition
+ .limit(1)// limit 1 since we are checking only if the table is not empty
.fetch()
.isNotEmpty() // select table if not empty
)
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatch.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatch.java
index 60bc56ae..f661dad4 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatch.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatch.java
@@ -60,6 +60,7 @@ public SafeBatch(final Batch batch) {
this.batch = batch;
}
+ /** Does not roll back successfully inserted values on exception */
public void execute() {
try {
final int[] results = batch.execute();
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadata.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadata.java
index 4b171589..e2cbec46 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadata.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadata.java
@@ -111,6 +111,12 @@ public Result toResult() {
return records;
}
+ /**
+ * Equal only if all values are equal and same instance of DSLContext
+ *
+ * @param object object compared against
+ * @return true if all object is same class, object fields are equal and DSLContext is same instance
+ */
@Override
public boolean equals(final Object object) {
if (this == object)
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java
index 4fa65dbb..58716b0c 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java
@@ -99,7 +99,7 @@ public SafeBatch asBatch() {
if (table == null) {
throw new IllegalStateException("Origin table was null");
}
- final List> queryList = new ArrayList<>();
+ final List> insertValuesStepNList = new ArrayList<>();
final Result result = recordsInMetadata.toResult();
for (final Record record : result) {
final Field>[] insertFields = {
@@ -127,10 +127,10 @@ public SafeBatch asBatch() {
DSL.val(record.getValue(BLOOMDB.FILTERTYPE.ID), ULong.class),
DSL.val(filter.bytes(), byte[].class)
};
- InsertValuesStepN> query = ctx.insertInto(categoryTable).columns(insertFields).values(valueFields);
- queryList.add(query);
+ final InsertValuesStepN> insertStep = ctx.insertInto(categoryTable).columns(insertFields).values(valueFields);
+ insertValuesStepNList.add(insertStep);
}
- final Batch batch = ctx.batch(queryList);
+ final Batch batch = ctx.batch(insertValuesStepNList);
return new SafeBatch(batch);
}
diff --git a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java
index 395f029c..eeb92f61 100644
--- a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java
+++ b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java
@@ -56,6 +56,8 @@
import java.util.HashSet;
import java.util.Set;
+import static com.teragrep.pth_06.jooq.generated.bloomdb.Bloomdb.BLOOMDB;
+
public final class IndexStatementCondition implements QueryCondition, BloomQueryCondition {
private final Logger LOGGER = LoggerFactory.getLogger(IndexStatementCondition.class);
@@ -84,7 +86,7 @@ public Condition condition() {
Condition newCondition = condition;
if (tableSet.isEmpty()) {
// get all tables that pattern match with search value
- final QueryCondition regexLikeCondition = new RegexLikeFiltertypePatternCondition(value);
+ final QueryCondition regexLikeCondition = new RegexLikeCondition(value, BLOOMDB.FILTERTYPE.PATTERN);
final DatabaseTables patternMatchTables = new ConditionMatchBloomDBTables(
config.context(),
regexLikeCondition
diff --git a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeFiltertypePatternCondition.java b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeCondition.java
similarity index 71%
rename from src/main/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeFiltertypePatternCondition.java
rename to src/main/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeCondition.java
index f5414e82..d171acdb 100644
--- a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeFiltertypePatternCondition.java
+++ b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeCondition.java
@@ -52,21 +52,39 @@
import static com.teragrep.pth_06.jooq.generated.bloomdb.Bloomdb.BLOOMDB;
-/** true if BLOOMDB.FILTERTYPE.PATTERN regex like with input value */
-public final class RegexLikeFiltertypePatternCondition implements QueryCondition {
+/** true if input value regex like comparedTo value, compared against BLOOMDB.FILTERTYPE.PATTERN by default */
+public final class RegexLikeCondition implements QueryCondition {
private final Field valueField;
+ private final Field comparedToField;
- public RegexLikeFiltertypePatternCondition(String input) {
- this(DSL.val(input));
+ public RegexLikeCondition(String input) {
+ this(DSL.val(input), BLOOMDB.FILTERTYPE.PATTERN);
}
- public RegexLikeFiltertypePatternCondition(Field valueField) {
+ public RegexLikeCondition(Field input) {
+ this(input, BLOOMDB.FILTERTYPE.PATTERN);
+ }
+
+ public RegexLikeCondition(String input, String comparedTo) {
+ this(DSL.val(input), DSL.val(comparedTo));
+ }
+
+ public RegexLikeCondition(String input, Field comparedTo) {
+ this(DSL.val(input), comparedTo);
+ }
+
+ public RegexLikeCondition(Field input, String comparedTo) {
+ this(input, DSL.val(comparedTo));
+ }
+
+ public RegexLikeCondition(Field valueField, Field comparedToField) {
this.valueField = valueField;
+ this.comparedToField = comparedToField;
}
public Condition condition() {
- return valueField.likeRegex(BLOOMDB.FILTERTYPE.PATTERN);
+ return valueField.likeRegex(comparedToField);
}
@Override
@@ -75,7 +93,7 @@ public boolean equals(final Object object) {
return true;
if (object == null || object.getClass() != this.getClass())
return false;
- final RegexLikeFiltertypePatternCondition cast = (RegexLikeFiltertypePatternCondition) object;
+ final RegexLikeCondition cast = (RegexLikeCondition) object;
return valueField.equals(cast.valueField);
}
diff --git a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeFiltertypePatternConditionTest.java b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeConditionTest.java
similarity index 75%
rename from src/test/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeFiltertypePatternConditionTest.java
rename to src/test/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeConditionTest.java
index 4cb74da9..1c7e5343 100644
--- a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeFiltertypePatternConditionTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeConditionTest.java
@@ -56,40 +56,40 @@
*
* @see org.jooq.QueryPart
*/
-class RegexLikeFiltertypePatternConditionTest {
+class RegexLikeConditionTest {
@Test
void testCondition() {
- Condition condition = new RegexLikeFiltertypePatternCondition("test").condition();
+ Condition condition = new RegexLikeCondition("test").condition();
String e = "('test' like_regex \"bloomdb\".\"filtertype\".\"pattern\")";
Assertions.assertEquals(e, condition.toString());
}
@Test
void testEquality() {
- RegexLikeFiltertypePatternCondition cond1 = new RegexLikeFiltertypePatternCondition("test");
- RegexLikeFiltertypePatternCondition cond2 = new RegexLikeFiltertypePatternCondition("test");
+ RegexLikeCondition cond1 = new RegexLikeCondition("test");
+ RegexLikeCondition cond2 = new RegexLikeCondition("test");
Assertions.assertEquals(cond1, cond2);
}
@Test
void testNotEquals() {
- RegexLikeFiltertypePatternCondition cond1 = new RegexLikeFiltertypePatternCondition("test");
- RegexLikeFiltertypePatternCondition cond2 = new RegexLikeFiltertypePatternCondition("next");
+ RegexLikeCondition cond1 = new RegexLikeCondition("test");
+ RegexLikeCondition cond2 = new RegexLikeCondition("next");
Assertions.assertNotEquals(cond1, cond2);
}
@Test
void testHashCode() {
- RegexLikeFiltertypePatternCondition cond1 = new RegexLikeFiltertypePatternCondition("test");
- RegexLikeFiltertypePatternCondition cond2 = new RegexLikeFiltertypePatternCondition("test");
- RegexLikeFiltertypePatternCondition notEq = new RegexLikeFiltertypePatternCondition("next");
+ RegexLikeCondition cond1 = new RegexLikeCondition("test");
+ RegexLikeCondition cond2 = new RegexLikeCondition("test");
+ RegexLikeCondition notEq = new RegexLikeCondition("next");
Assertions.assertEquals(cond1.hashCode(), cond2.hashCode());
Assertions.assertNotEquals(cond1.hashCode(), notEq.hashCode());
}
@Test
public void equalsHashCodeContractTest() {
- EqualsVerifier.forClass(RegexLikeFiltertypePatternCondition.class).withNonnullFields("valueField").verify();
+ EqualsVerifier.forClass(RegexLikeCondition.class).withNonnullFields("valueField").verify();
}
}
From 145fcb682c68a3499c0905093ff33868ad7b221a Mon Sep 17 00:00:00 2001
From: elliVM <47@teragrep.com>
Date: Tue, 29 Oct 2024 16:06:11 +0200
Subject: [PATCH 19/26] use qualified names update tests
---
.../bloomfilter/SearchTermBloomFilter.java | 4 ++--
.../planner/bloomfilter/TableFilters.java | 18 +++++++++---------
.../walker/conditions/RegexLikeCondition.java | 4 ++--
.../planner/bloomfilter/TableFiltersTest.java | 6 +++---
.../conditions/RegexLikeConditionTest.java | 5 ++++-
5 files changed, 20 insertions(+), 17 deletions(-)
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java
index 25882752..17c24b9c 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java
@@ -109,12 +109,12 @@ public byte[] bytes() {
}
@Override
- public boolean equals(Object object) {
+ public boolean equals(final Object object) {
if (this == object)
return true;
if (object == null || getClass() != object.getClass())
return false;
- SearchTermBloomFilter cast = (SearchTermBloomFilter) object;
+ final SearchTermBloomFilter cast = (SearchTermBloomFilter) object;
return expected.equals(cast.expected) && fpp.equals(cast.fpp) && stringTokens.equals(cast.stringTokens);
}
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java
index 58716b0c..660be480 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java
@@ -64,7 +64,7 @@ public final class TableFilters {
private final TableRecords recordsInMetadata;
private final DSLContext ctx;
private final Table> table;
- private final Table> categoryTable;
+ private final Table> thisTable;
private final String searchTerm;
private final long bloomTermId;
@@ -83,14 +83,14 @@ public TableFilters(
TableFilterTypesFromMetadata recordsInMetadata,
DSLContext ctx,
Table> table,
- Table> categoryTable,
+ Table> thisTable,
long bloomTermId,
String searchTerm
) {
this.recordsInMetadata = recordsInMetadata;
this.ctx = ctx;
this.table = table;
- this.categoryTable = categoryTable;
+ this.thisTable = thisTable;
this.bloomTermId = bloomTermId;
this.searchTerm = searchTerm;
}
@@ -103,9 +103,9 @@ public SafeBatch asBatch() {
final Result result = recordsInMetadata.toResult();
for (final Record record : result) {
final Field>[] insertFields = {
- DSL.field("term_id", BIGINTUNSIGNED.nullable(false)),
- DSL.field("type_id", BIGINTUNSIGNED.nullable(false)),
- DSL.field(DSL.name(categoryTable.getName(), "filter"), byte[].class)
+ DSL.field(DSL.name(thisTable.getName(), "term_id"), BIGINTUNSIGNED.nullable(false)),
+ DSL.field(DSL.name(thisTable.getName(), "type_id"), BIGINTUNSIGNED.nullable(false)),
+ DSL.field(DSL.name(thisTable.getName(), "filter"), byte[].class)
};
final ULong expectedField = record
.getValue(DSL.field(DSL.name(table.getName(), "expectedElements"), ULong.class));
@@ -127,7 +127,7 @@ public SafeBatch asBatch() {
DSL.val(record.getValue(BLOOMDB.FILTERTYPE.ID), ULong.class),
DSL.val(filter.bytes(), byte[].class)
};
- final InsertValuesStepN> insertStep = ctx.insertInto(categoryTable).columns(insertFields).values(valueFields);
+ final InsertValuesStepN> insertStep = ctx.insertInto(thisTable).columns(insertFields).values(valueFields);
insertValuesStepNList.add(insertStep);
}
final Batch batch = ctx.batch(insertValuesStepNList);
@@ -150,11 +150,11 @@ public boolean equals(final Object object) {
final TableFilters cast = (TableFilters) object;
return bloomTermId == cast.bloomTermId && recordsInMetadata
.equals(cast.recordsInMetadata) && ctx == cast.ctx && table.equals(cast.table)
- && categoryTable.equals(cast.categoryTable) && searchTerm.equals(cast.searchTerm);
+ && thisTable.equals(cast.thisTable) && searchTerm.equals(cast.searchTerm);
}
@Override
public int hashCode() {
- return Objects.hash(recordsInMetadata, ctx, table, categoryTable, searchTerm, bloomTermId);
+ return Objects.hash(recordsInMetadata, ctx, table, thisTable, searchTerm, bloomTermId);
}
}
diff --git a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeCondition.java b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeCondition.java
index d171acdb..9026227b 100644
--- a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeCondition.java
+++ b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeCondition.java
@@ -94,11 +94,11 @@ public boolean equals(final Object object) {
if (object == null || object.getClass() != this.getClass())
return false;
final RegexLikeCondition cast = (RegexLikeCondition) object;
- return valueField.equals(cast.valueField);
+ return valueField.equals(cast.valueField) && comparedToField.equals(cast.comparedToField);
}
@Override
public int hashCode() {
- return Objects.hash(valueField);
+ return Objects.hash(valueField, comparedToField);
}
}
diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java
index 15f05237..48b6f7bf 100644
--- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java
@@ -151,7 +151,7 @@ public void testInsertFiltersIntoCategoryTable() {
.get(0);
DataAccessException exception = Assertions
.assertThrows(DataAccessException.class, () -> new TableFilters(ctx, table, 0L, "192.168.1.1").asBatch().execute());
- Assertions.assertTrue(exception.getMessage().contains("term_0_target\" (term_id, type_id, \"filter\")"));
+ Assertions.assertTrue(exception.getMessage().contains("insert into \"term_0_target\" (\"term_id\", \"type_id\", \"filter\") values"));
}
@Test
@@ -167,7 +167,7 @@ public void testInsertFiltersIntoCategoryTableRegexExtract() {
String query = "biz baz boz data has no content today (very important though) but it would still have if one had a means to extract it from (here is something else important as well) the strange patterns called parentheses that it seems to have been put in.";
DataAccessException exception = Assertions
.assertThrows(DataAccessException.class, () -> new TableFilters(ctx, table, 0L, query).asBatch().execute());
- Assertions.assertTrue(exception.getMessage().contains("term_0_target\" (term_id, type_id, \"filter\")"));
+ Assertions.assertTrue(exception.getMessage().contains("\"term_0_target\" (\"term_id\", \"type_id\", \"filter\")"));
}
@Test
@@ -247,7 +247,7 @@ public void equalsHashCodeContractTest() {
.withNonnullFields("table")
.withNonnullFields("searchTerm")
.withNonnullFields("bloomTermId")
- .withNonnullFields("categoryTable")
+ .withNonnullFields("thisTable")
.verify();
}
diff --git a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeConditionTest.java b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeConditionTest.java
index 1c7e5343..e8652458 100644
--- a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeConditionTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeConditionTest.java
@@ -90,6 +90,9 @@ void testHashCode() {
@Test
public void equalsHashCodeContractTest() {
- EqualsVerifier.forClass(RegexLikeCondition.class).withNonnullFields("valueField").verify();
+ EqualsVerifier.forClass(RegexLikeCondition.class)
+ .withNonnullFields("valueField")
+ .withNonnullFields("comparedToField")
+ .verify();
}
}
From 40c21654bbb485586d2d56d4528cc55ead9b16a9 Mon Sep 17 00:00:00 2001
From: elliVM <47@teragrep.com>
Date: Wed, 30 Oct 2024 12:47:54 +0200
Subject: [PATCH 20/26] more descriptive naming of methods and variables,
update comments, javadoc and exception messages
---
.../pth_06/planner/StreamDBClient.java | 3 +-
.../bloomfilter/CategoryTableImpl.java | 5 ++-
.../ConditionMatchBloomDBTables.java | 7 +--
.../bloomfilter/RegexExtractedValue.java | 2 +-
.../bloomfilter/SearchTermBloomFilter.java | 6 +--
.../TableFilterTypesFromMetadata.java | 5 ++-
.../planner/bloomfilter/TableFilters.java | 3 --
.../planner/bloomfilter/TokensAsStrings.java | 16 +++++++
.../planner/walker/ConditionWalker.java | 12 ++---
.../conditions/BloomQueryCondition.java | 2 +-
.../conditions/CategoryTableCondition.java | 4 +-
.../walker/conditions/ElementCondition.java | 7 +--
.../conditions/IndexStatementCondition.java | 2 +-
.../SearchTermBloomFilterTest.java | 8 ++--
.../planner/bloomfilter/TableFiltersTest.java | 5 ++-
.../CategoryTableConditionTest.java | 2 +-
.../conditions/EarliestConditionTest.java | 2 +-
.../conditions/ElementConditionTest.java | 2 +-
.../IndexStatementConditionTest.java | 12 ++---
.../pth_06/walker/ConditionWalkerTest.java | 44 +++++++++----------
20 files changed, 82 insertions(+), 67 deletions(-)
diff --git a/src/main/java/com/teragrep/pth_06/planner/StreamDBClient.java b/src/main/java/com/teragrep/pth_06/planner/StreamDBClient.java
index 57084f3b..ccae7ea6 100644
--- a/src/main/java/com/teragrep/pth_06/planner/StreamDBClient.java
+++ b/src/main/java/com/teragrep/pth_06/planner/StreamDBClient.java
@@ -344,7 +344,8 @@ private Table getTableStatement(Condition journaldbCondition, Date day)
.on(JOURNALDB.LOGFILE.HOST_ID.eq(GetArchivedObjectsFilterTable.host_id).and(JOURNALDB.LOGFILE.LOGTAG.eq(GetArchivedObjectsFilterTable.tag)));
if (bloomEnabled) {
- final Set> tables = walker.patternMatchTables();
+ // join all tables needed for the condition generated by walker
+ final Set> tables = walker.conditionRequiredTables();
if (!tables.isEmpty()) {
for (final Table> table : tables) {
if (LOGGER.isInfoEnabled()) {
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImpl.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImpl.java
index 45b2a767..02c4b72f 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImpl.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImpl.java
@@ -128,10 +128,11 @@ public void create() {
}
/**
- * Equal only if all object parameters are same value and the instances of DSLContext are same
+ * Equal if the compared object is the same instance or if the compared object is of the same class, object fields
+ * are equal, and DSLContext is the same instance
*
* @param object object compared against
- * @return true if all object is same class, object fields are equal and DSLContext is same instance
+ * @return true if equal
*/
@Override
public boolean equals(final Object object) {
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/ConditionMatchBloomDBTables.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/ConditionMatchBloomDBTables.java
index 1eae8048..654537b3 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/ConditionMatchBloomDBTables.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/ConditionMatchBloomDBTables.java
@@ -102,10 +102,11 @@ public List> tables() {
}
/**
- * Equal only if all values are equal and same instance of DSLContext
+ * Equal if the compared object is the same instance or if the compared object is of the same class, object fields
+ * are equal, and DSLContext is the same instance
*
* @param object object compared against
- * @return true if all object is same class, object fields are equal and DSLContext is same instance
+ * @return true if equal
*/
@Override
public boolean equals(final Object object) {
@@ -116,6 +117,6 @@ public boolean equals(final Object object) {
if (object.getClass() != this.getClass())
return false;
final ConditionMatchBloomDBTables cast = (ConditionMatchBloomDBTables) object;
- return this.condition.equals(cast.condition) && this.ctx == cast.ctx; // only same instance of DSLContext is equal
+ return this.condition.equals(cast.condition) && this.ctx == cast.ctx;
}
}
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValue.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValue.java
index bf3cfbd9..a723a0dc 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValue.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValue.java
@@ -79,7 +79,7 @@ public boolean equals(final Object object) {
return true;
if (object == null || getClass() != object.getClass())
return false;
- RegexExtractedValue cast = (RegexExtractedValue) object;
+ final RegexExtractedValue cast = (RegexExtractedValue) object;
return value.equals(cast.value) && pattern.equals(cast.pattern);
}
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java
index 17c24b9c..280d3e89 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java
@@ -83,7 +83,7 @@ public byte[] bytes() {
LOGGER.debug("Create filter from Record with values: expected <{}>, fpp <{}>", expected, fpp);
if (stringTokens.isEmpty()) {
throw new IllegalStateException(
- "Trying to insert empty filter, pattern match joined table should always have tokens"
+ "Tried to create a filter without any items"
);
}
final BloomFilter filter = BloomFilter.create(expected, fpp);
@@ -94,7 +94,7 @@ public byte[] bytes() {
if (stringTokens.size() > expected) {
LOGGER
.warn(
- "Number of tokens <{}> was larger than the expected value <{}>, resulting FPP <{}>",
+ "Number of items <{}> was larger than the expected number of items <{}>, resulting FPP <{}>",
stringTokens.size(), expected, filter.expectedFpp()
);
}
@@ -103,7 +103,7 @@ public byte[] bytes() {
filter.writeTo(filterBAOS);
return filterBAOS.toByteArray();
}
- catch (IOException e) {
+ catch (final IOException e) {
throw new UncheckedIOException("Error writing filter bytes: ", e);
}
}
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadata.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadata.java
index e2cbec46..a5efa749 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadata.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadata.java
@@ -112,10 +112,11 @@ public Result toResult() {
}
/**
- * Equal only if all values are equal and same instance of DSLContext
+ * Equal if the compared object is the same instance or if the compared object is of the same class, object fields
+ * are equal, and DSLContext is the same instance
*
* @param object object compared against
- * @return true if all object is same class, object fields are equal and DSLContext is same instance
+ * @return true if equal
*/
@Override
public boolean equals(final Object object) {
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java
index 660be480..1d3f03d3 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java
@@ -96,9 +96,6 @@ public TableFilters(
}
public SafeBatch asBatch() {
- if (table == null) {
- throw new IllegalStateException("Origin table was null");
- }
final List> insertValuesStepNList = new ArrayList<>();
final Result result = recordsInMetadata.toResult();
for (final Record record : result) {
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TokensAsStrings.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TokensAsStrings.java
index 9dea7ef0..b0c9dce8 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TokensAsStrings.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TokensAsStrings.java
@@ -48,6 +48,7 @@
import com.teragrep.blf_01.Token;
import java.util.List;
+import java.util.Objects;
import java.util.stream.Collectors;
public final class TokensAsStrings implements Tokenizable {
@@ -62,4 +63,19 @@ public TokensAsStrings(Tokenizable origin) {
public List tokens() {
return origin.tokens().stream().map(Token::toString).collect(Collectors.toList());
}
+
+ @Override
+ public boolean equals(final Object object) {
+ if (this == object)
+ return true;
+ if (object == null || object.getClass() != this.getClass())
+ return false;
+ final TokensAsStrings cast = (TokensAsStrings) object;
+ return origin.equals(cast.origin);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(origin);
+ }
}
diff --git a/src/main/java/com/teragrep/pth_06/planner/walker/ConditionWalker.java b/src/main/java/com/teragrep/pth_06/planner/walker/ConditionWalker.java
index 24ceffd1..ad85fa15 100644
--- a/src/main/java/com/teragrep/pth_06/planner/walker/ConditionWalker.java
+++ b/src/main/java/com/teragrep/pth_06/planner/walker/ConditionWalker.java
@@ -101,12 +101,8 @@ public Condition fromString(String inXml, boolean streamQuery) throws Exception
return fromString(inXml);
}
- /**
- * Set of all the tables that pattern matched with tokenized value search elements the walkers has traversed
- *
- * @return Set of Tables that had a pattern match
- */
- public Set> patternMatchTables() {
+ /** Set of all tables needed to be joined to the query using the condition generated by this walker */
+ public Set> conditionRequiredTables() {
return combinedMatchSet;
}
@@ -165,9 +161,9 @@ Condition emitElem(final Element current) {
new ConditionConfig(ctx, streamQuery, bloomEnabled, withoutFilters, bloomTermId)
);
if (elementCondition.isBloomSearchCondition()) {
- final Set> elementPatternMatchTables = elementCondition.patternMatchTables();
+ final Set> conditionRequiredTables = elementCondition.requiredTables();
// add tables condition found to walker pattern match tables
- patternMatchTables().addAll(elementPatternMatchTables);
+ conditionRequiredTables().addAll(conditionRequiredTables);
bloomTermId++;
}
return elementCondition.condition();
diff --git a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/BloomQueryCondition.java b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/BloomQueryCondition.java
index 198260c3..a3a0a360 100644
--- a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/BloomQueryCondition.java
+++ b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/BloomQueryCondition.java
@@ -53,5 +53,5 @@ public interface BloomQueryCondition {
boolean isBloomSearchCondition();
- Set> patternMatchTables();
+ Set> requiredTables();
}
diff --git a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/CategoryTableCondition.java b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/CategoryTableCondition.java
index 95cd4efc..f3e10e6d 100644
--- a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/CategoryTableCondition.java
+++ b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/CategoryTableCondition.java
@@ -85,13 +85,13 @@ public CategoryTableCondition(
public Condition condition() {
final Field filterField = DSL.field(DSL.name(categoryTable.getName(), "filter"), byte[].class);
- // select filter with correct bloom term id and filter type id from category table
+ // select filter with the correct bloom term id and filter type id from the category table
final SelectConditionStep> selectFilterStep = DSL
.select(filterField)
.from(categoryTable)
.where(bloomTermCondition)
.and(typeIdCondition);
- // compares category table filter byte[] against bloom filter byte[]
+ // function 'bloommatch' compares category table filter byte[] against bloom filter byte[]
final Condition filterFieldCondition = DSL
.function("bloommatch", Boolean.class, selectFilterStep.asField(), comparedTo.field("filter"))
.eq(true);
diff --git a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/ElementCondition.java b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/ElementCondition.java
index 51adb310..4faad3f6 100644
--- a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/ElementCondition.java
+++ b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/ElementCondition.java
@@ -109,7 +109,7 @@ public Condition condition() {
condition = indexStatement.condition();
}
}
- // bloom search can return condition unmodified
+ // bloom search can return the condition unmodified
if (condition.equals(DSL.noCondition()) && !isBloomSearchCondition()) {
throw new IllegalStateException("Unsupported Element tag " + tag);
}
@@ -124,9 +124,10 @@ public boolean isBloomSearchCondition() {
&& config.bloomEnabled();
}
- public Set> patternMatchTables() {
+ /** A set of tables needed to be joined to the query to use this condition */
+ public Set> requiredTables() {
final String value = element.value();
- return new IndexStatementCondition(value, config).patternMatchTables();
+ return new IndexStatementCondition(value, config).requiredTables();
}
@Override
diff --git a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java
index eeb92f61..c8fb9505 100644
--- a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java
+++ b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java
@@ -131,7 +131,7 @@ public boolean isBloomSearchCondition() {
}
@Override
- public Set> patternMatchTables() {
+ public Set> requiredTables() {
if (tableSet.isEmpty()) {
condition();
}
diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java
index 9c0ea43d..5ede2361 100644
--- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java
@@ -78,7 +78,7 @@ public void testCorrectFilterSize() {
}
@Test
- public void testNoRegexExtractedTokensException() {
+ public void testEmptyFilterException() {
String searchTerm = "NoMatch";
SearchTermBloomFilter filter = new SearchTermBloomFilter(
1000L,
@@ -86,7 +86,7 @@ public void testNoRegexExtractedTokensException() {
new RegexExtractedValue(searchTerm, "Pattern")
);
IllegalStateException e = Assertions.assertThrows(IllegalStateException.class, filter::bytes);
- String expectedMessage = "Trying to insert empty filter, pattern match joined table should always have tokens";
+ String expectedMessage = "Tried to create a filter without any items";
Assertions.assertEquals(expectedMessage, e.getMessage());
}
@@ -122,9 +122,9 @@ public void testTokensSizeTooLarge() {
String searchTerm = "";
SearchTermBloomFilter filter = new SearchTermBloomFilter(10L, 0.01, new TokenizedValue(searchTerm));
Assertions.assertDoesNotThrow(filter::bytes);
- String e = "Number of tokens <132> was larger than the expected value <10>, resulting FPP <0.6002870054872016>";
+ String e = "Number of items <132> was larger than the expected number of items <10>, resulting FPP <0.6002870054872016>";
String warn = captor.getWarnLogs().get(0);
- Assertions.assertEquals(warn, e);
+ Assertions.assertEquals(e, warn);
}
diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java
index 48b6f7bf..cf019ced 100644
--- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java
@@ -171,7 +171,7 @@ public void testInsertFiltersIntoCategoryTableRegexExtract() {
}
@Test
- public void testInsertFiltersWithoutPatternMatch() {
+ public void testCreateFilterWithoutItemsException() {
fillTargetTable(1);
DSLContext ctx = DSL.using(conn);
Table> table = ctx
@@ -182,7 +182,8 @@ public void testInsertFiltersWithoutPatternMatch() {
.get(0);
IllegalStateException exception = Assertions
.assertThrows(IllegalStateException.class, () -> new TableFilters(ctx, table, 0L, "nomatch").asBatch().execute());
- Assertions.assertTrue(exception.getMessage().contains("Trying to insert empty filter"));
+ String expected = "Tried to create a filter without any items";
+ Assertions.assertEquals(expected, exception.getMessage());
}
@Test
diff --git a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/CategoryTableConditionTest.java b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/CategoryTableConditionTest.java
index 0a911c79..fef658b9 100644
--- a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/CategoryTableConditionTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/CategoryTableConditionTest.java
@@ -61,7 +61,7 @@
/**
* Comparing Condition equality using toString() since jooq Condition uses just toString() to check for equality.
- * inherited from QueryPart
+ * Inherited from the QueryPart
*
* @see org.jooq.QueryPart
*/
diff --git a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/EarliestConditionTest.java b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/EarliestConditionTest.java
index 352305f3..070fc902 100644
--- a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/EarliestConditionTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/EarliestConditionTest.java
@@ -51,7 +51,7 @@
/**
* Comparing Condition equality using toString() since jooq Condition uses just toString() to check for equality.
- * inherited from QueryPart
+ * Inherited from the QueryPart
*
* @see org.jooq.QueryPart
*/
diff --git a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/ElementConditionTest.java b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/ElementConditionTest.java
index 30f614ed..06f3bce9 100644
--- a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/ElementConditionTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/ElementConditionTest.java
@@ -61,7 +61,7 @@
/**
* Comparing Condition equality using toString() since jooq Condition uses just toString() to check for equality.
- * inherited from QueryPart
+ * Inherited from the QueryPart
*
* @see org.jooq.QueryPart
*/
diff --git a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementConditionTest.java b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementConditionTest.java
index b180b2dc..818946d1 100644
--- a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementConditionTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementConditionTest.java
@@ -160,8 +160,8 @@ void noMatchesTest() {
IndexStatementCondition cond2 = new IndexStatementCondition("test", withoutFiltersConfig, e2);
Assertions.assertEquals(e1, cond1.condition());
Assertions.assertEquals(e2, cond2.condition());
- Assertions.assertTrue(cond1.patternMatchTables().isEmpty());
- Assertions.assertTrue(cond2.patternMatchTables().isEmpty());
+ Assertions.assertTrue(cond1.requiredTables().isEmpty());
+ Assertions.assertTrue(cond2.requiredTables().isEmpty());
}
@Test
@@ -177,7 +177,7 @@ void oneMatchingTableTest() {
+ " and \"bloomdb\".\"pattern_test_ip\".\"filter\" is not null\n" + " )\n"
+ " or \"bloomdb\".\"pattern_test_ip\".\"filter\" is null\n" + ")";
Assertions.assertEquals(e, cond.condition().toString());
- Assertions.assertEquals(1, cond.patternMatchTables().size());
+ Assertions.assertEquals(1, cond.requiredTables().size());
}
@Test
@@ -187,7 +187,7 @@ void testOneMatchWithoutFilters() {
IndexStatementCondition cond = new IndexStatementCondition("192.168.1.1", config);
String e = "\"bloomdb\".\"pattern_test_ip\".\"filter\" is null";
Assertions.assertEquals(e, cond.condition().toString());
- Assertions.assertEquals(1, cond.patternMatchTables().size());
+ Assertions.assertEquals(1, cond.requiredTables().size());
}
@Test
@@ -198,7 +198,7 @@ void testTwoMatchWithoutFilters() {
String e = "(\n" + " \"bloomdb\".\"pattern_test_ip\".\"filter\" is null\n"
+ " and \"bloomdb\".\"pattern_test_ip255\".\"filter\" is null\n" + ")";
Assertions.assertEquals(e, cond.condition().toString());
- Assertions.assertEquals(2, cond.patternMatchTables().size());
+ Assertions.assertEquals(2, cond.requiredTables().size());
}
@Test
@@ -220,7 +220,7 @@ void twoMatchingTableTest() {
+ " \"bloomdb\".\"pattern_test_ip\".\"filter\" is null\n"
+ " and \"bloomdb\".\"pattern_test_ip255\".\"filter\" is null\n" + " )\n" + ")";
Assertions.assertEquals(e, cond.condition().toString());
- Assertions.assertEquals(2, cond.patternMatchTables().size());
+ Assertions.assertEquals(2, cond.requiredTables().size());
}
@Test
diff --git a/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java b/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java
index 6ae11d55..53b96d4a 100644
--- a/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java
+++ b/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java
@@ -137,7 +137,7 @@ void bloomNoMatchTest() {
String e = "\"getArchivedObjects_filter_table\".\"directory\" like 'haproxy'";
Condition cond = Assertions.assertDoesNotThrow(() -> walker.fromString(q, false));
Assertions.assertEquals(e, cond.toString());
- Assertions.assertEquals(0, walker.patternMatchTables().size());
+ Assertions.assertEquals(0, walker.conditionRequiredTables().size());
}
@Test
@@ -147,7 +147,7 @@ void bloomNoMatchStreamQueryTest() {
String e = "\"streamdb\".\"stream\".\"directory\" like 'haproxy'";
Condition cond = Assertions.assertDoesNotThrow(() -> walker.fromString(q, true));
Assertions.assertEquals(e, cond.toString());
- Assertions.assertEquals(0, walker.patternMatchTables().size());
+ Assertions.assertEquals(0, walker.conditionRequiredTables().size());
}
@Test
@@ -157,7 +157,7 @@ void bloomNoMatchStreamQueryWithoutFiltersTest() {
String e = "\"streamdb\".\"stream\".\"directory\" like 'haproxy'";
Condition cond = Assertions.assertDoesNotThrow(() -> walker.fromString(q, true));
Assertions.assertEquals(e, cond.toString());
- Assertions.assertEquals(0, walker.patternMatchTables().size());
+ Assertions.assertEquals(0, walker.conditionRequiredTables().size());
}
@Test
@@ -167,7 +167,7 @@ void singleTablePatternMatchStreamQueryTest() {
String e = "\"streamdb\".\"stream\".\"directory\" like 'haproxy'";
Condition cond = Assertions.assertDoesNotThrow(() -> walker.fromString(q, true));
Assertions.assertEquals(e, cond.toString());
- Assertions.assertEquals(0, walker.patternMatchTables().size());
+ Assertions.assertEquals(0, walker.conditionRequiredTables().size());
}
@Test
@@ -184,9 +184,9 @@ void singleTablePatternMatchTest() {
+ " or \"bloomdb\".\"pattern_test_ip\".\"filter\" is null\n" + " )\n" + ")";
Condition cond = Assertions.assertDoesNotThrow(() -> walker.fromString(q, false));
Assertions.assertEquals(e, cond.toString());
- Assertions.assertEquals(1, walker.patternMatchTables().size());
+ Assertions.assertEquals(1, walker.conditionRequiredTables().size());
Assertions
- .assertTrue(walker.patternMatchTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip")));
+ .assertTrue(walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip")));
}
@Test
@@ -197,9 +197,9 @@ void singleTablePatternMatchWithoutFiltersTest() {
+ " and \"bloomdb\".\"pattern_test_ip\".\"filter\" is null\n" + ")";
Condition cond = Assertions.assertDoesNotThrow(() -> walker.fromString(q, false));
Assertions.assertEquals(e, cond.toString());
- Assertions.assertEquals(1, walker.patternMatchTables().size());
+ Assertions.assertEquals(1, walker.conditionRequiredTables().size());
Assertions
- .assertTrue(walker.patternMatchTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip")));
+ .assertTrue(walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip")));
}
@Test
@@ -222,11 +222,11 @@ void twoTablePatternMatchTest() {
+ " and \"bloomdb\".\"pattern_test_ip255\".\"filter\" is null\n" + " )\n" + " )\n" + ")";
Condition cond = Assertions.assertDoesNotThrow(() -> walker.fromString(q, false));
Assertions.assertEquals(e, cond.toString());
- Assertions.assertEquals(2, walker.patternMatchTables().size());
+ Assertions.assertEquals(2, walker.conditionRequiredTables().size());
Assertions
- .assertTrue(walker.patternMatchTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip")));
+ .assertTrue(walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip")));
Assertions
- .assertTrue(walker.patternMatchTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255")));
+ .assertTrue(walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255")));
}
@Test
@@ -264,11 +264,11 @@ void twoTablePatternMatchWithoutFiltersTest() {
+ " and \"bloomdb\".\"pattern_test_ip255\".\"filter\" is null\n" + ")";
Condition cond = Assertions.assertDoesNotThrow(() -> walker.fromString(q, false));
Assertions.assertEquals(e, cond.toString());
- Assertions.assertEquals(2, walker.patternMatchTables().size());
+ Assertions.assertEquals(2, walker.conditionRequiredTables().size());
Assertions
- .assertTrue(walker.patternMatchTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip")));
+ .assertTrue(walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip")));
Assertions
- .assertTrue(walker.patternMatchTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255")));
+ .assertTrue(walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255")));
}
@Test
@@ -284,9 +284,9 @@ void multipleSearchTermTestOneMatchTest() {
+ " or \"bloomdb\".\"pattern_test_ip\".\"filter\" is null\n" + ")";
Condition cond = Assertions.assertDoesNotThrow(() -> walker.fromString(q, false));
Assertions.assertEquals(e, cond.toString());
- Assertions.assertEquals(1, walker.patternMatchTables().size());
+ Assertions.assertEquals(1, walker.conditionRequiredTables().size());
Assertions
- .assertTrue(walker.patternMatchTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip")));
+ .assertTrue(walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip")));
}
@Test
@@ -315,11 +315,11 @@ void multipleSearchTermTwoAndOneMatchTest() {
+ " or \"bloomdb\".\"pattern_test_ip\".\"filter\" is null\n" + " )\n" + ")";
Condition cond = Assertions.assertDoesNotThrow(() -> walker.fromString(q, false));
Assertions.assertEquals(e, cond.toString());
- Assertions.assertEquals(2, walker.patternMatchTables().size());
+ Assertions.assertEquals(2, walker.conditionRequiredTables().size());
Assertions
- .assertTrue(walker.patternMatchTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip")));
+ .assertTrue(walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip")));
Assertions
- .assertTrue(walker.patternMatchTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255")));
+ .assertTrue(walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255")));
}
@Test
@@ -331,11 +331,11 @@ void multipleSearchTermTwoAndOneMatchWithoutFiltersTest() {
+ " and \"bloomdb\".\"pattern_test_ip\".\"filter\" is null\n" + ")";
Condition cond = Assertions.assertDoesNotThrow(() -> walker.fromString(q, false));
Assertions.assertEquals(e, cond.toString());
- Assertions.assertEquals(2, walker.patternMatchTables().size());
+ Assertions.assertEquals(2, walker.conditionRequiredTables().size());
Assertions
- .assertTrue(walker.patternMatchTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip")));
+ .assertTrue(walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip")));
Assertions
- .assertTrue(walker.patternMatchTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255")));
+ .assertTrue(walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255")));
}
@Test
From 6513861af794a9ad6143beae2cafc143e208f974 Mon Sep 17 00:00:00 2001
From: elliVM <47@teragrep.com>
Date: Wed, 30 Oct 2024 12:59:30 +0200
Subject: [PATCH 21/26] apply spotless
---
.../bloomfilter/SearchTermBloomFilter.java | 4 +---
.../pth_06/planner/bloomfilter/TableFilters.java | 5 ++---
.../planner/bloomfilter/TableFiltersTest.java | 10 ++++++++--
.../conditions/RegexLikeConditionTest.java | 3 ++-
.../pth_06/walker/ConditionWalkerTest.java | 16 ++++++++++++----
5 files changed, 25 insertions(+), 13 deletions(-)
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java
index 280d3e89..5246446d 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java
@@ -82,9 +82,7 @@ public SearchTermBloomFilter(Long expected, Double fpp, List stringToken
public byte[] bytes() {
LOGGER.debug("Create filter from Record with values: expected <{}>, fpp <{}>", expected, fpp);
if (stringTokens.isEmpty()) {
- throw new IllegalStateException(
- "Tried to create a filter without any items"
- );
+ throw new IllegalStateException("Tried to create a filter without any items");
}
final BloomFilter filter = BloomFilter.create(expected, fpp);
for (final String token : stringTokens) {
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java
index 1d3f03d3..cd11f431 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java
@@ -145,9 +145,8 @@ public boolean equals(final Object object) {
if (object == null || getClass() != object.getClass())
return false;
final TableFilters cast = (TableFilters) object;
- return bloomTermId == cast.bloomTermId && recordsInMetadata
- .equals(cast.recordsInMetadata) && ctx == cast.ctx && table.equals(cast.table)
- && thisTable.equals(cast.thisTable) && searchTerm.equals(cast.searchTerm);
+ return bloomTermId == cast.bloomTermId && recordsInMetadata.equals(cast.recordsInMetadata) && ctx == cast.ctx
+ && table.equals(cast.table) && thisTable.equals(cast.thisTable) && searchTerm.equals(cast.searchTerm);
}
@Override
diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java
index cf019ced..b46964c9 100644
--- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java
@@ -151,7 +151,12 @@ public void testInsertFiltersIntoCategoryTable() {
.get(0);
DataAccessException exception = Assertions
.assertThrows(DataAccessException.class, () -> new TableFilters(ctx, table, 0L, "192.168.1.1").asBatch().execute());
- Assertions.assertTrue(exception.getMessage().contains("insert into \"term_0_target\" (\"term_id\", \"type_id\", \"filter\") values"));
+ Assertions
+ .assertTrue(
+ exception
+ .getMessage()
+ .contains("insert into \"term_0_target\" (\"term_id\", \"type_id\", \"filter\") values")
+ );
}
@Test
@@ -167,7 +172,8 @@ public void testInsertFiltersIntoCategoryTableRegexExtract() {
String query = "biz baz boz data has no content today (very important though) but it would still have if one had a means to extract it from (here is something else important as well) the strange patterns called parentheses that it seems to have been put in.";
DataAccessException exception = Assertions
.assertThrows(DataAccessException.class, () -> new TableFilters(ctx, table, 0L, query).asBatch().execute());
- Assertions.assertTrue(exception.getMessage().contains("\"term_0_target\" (\"term_id\", \"type_id\", \"filter\")"));
+ Assertions
+ .assertTrue(exception.getMessage().contains("\"term_0_target\" (\"term_id\", \"type_id\", \"filter\")"));
}
@Test
diff --git a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeConditionTest.java b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeConditionTest.java
index e8652458..ce2af62c 100644
--- a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeConditionTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeConditionTest.java
@@ -90,7 +90,8 @@ void testHashCode() {
@Test
public void equalsHashCodeContractTest() {
- EqualsVerifier.forClass(RegexLikeCondition.class)
+ EqualsVerifier
+ .forClass(RegexLikeCondition.class)
.withNonnullFields("valueField")
.withNonnullFields("comparedToField")
.verify();
diff --git a/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java b/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java
index 53b96d4a..24e756fc 100644
--- a/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java
+++ b/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java
@@ -226,7 +226,9 @@ void twoTablePatternMatchTest() {
Assertions
.assertTrue(walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip")));
Assertions
- .assertTrue(walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255")));
+ .assertTrue(
+ walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255"))
+ );
}
@Test
@@ -268,7 +270,9 @@ void twoTablePatternMatchWithoutFiltersTest() {
Assertions
.assertTrue(walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip")));
Assertions
- .assertTrue(walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255")));
+ .assertTrue(
+ walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255"))
+ );
}
@Test
@@ -319,7 +323,9 @@ void multipleSearchTermTwoAndOneMatchTest() {
Assertions
.assertTrue(walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip")));
Assertions
- .assertTrue(walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255")));
+ .assertTrue(
+ walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255"))
+ );
}
@Test
@@ -335,7 +341,9 @@ void multipleSearchTermTwoAndOneMatchWithoutFiltersTest() {
Assertions
.assertTrue(walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip")));
Assertions
- .assertTrue(walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255")));
+ .assertTrue(
+ walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255"))
+ );
}
@Test
From adc7faf0158887b81fb3f0a58003f44b12f26427 Mon Sep 17 00:00:00 2001
From: elliVM <47@teragrep.com>
Date: Thu, 31 Oct 2024 11:04:38 +0200
Subject: [PATCH 22/26] add missing hashCode() methods
---
.../pth_06/planner/bloomfilter/CategoryTableImpl.java | 7 +++++++
.../planner/bloomfilter/ConditionMatchBloomDBTables.java | 6 ++++++
2 files changed, 13 insertions(+)
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImpl.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImpl.java
index 02c4b72f..d13c2678 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImpl.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImpl.java
@@ -51,6 +51,8 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.util.Objects;
+
import static org.jooq.impl.SQLDataType.BIGINTUNSIGNED;
/**
@@ -146,4 +148,9 @@ public boolean equals(final Object object) {
return this.originTable.equals(cast.originTable) && this.ctx == cast.ctx && // equal only if same instance of DSLContext
this.bloomTermId == cast.bloomTermId && this.tableFilters.equals(cast.tableFilters);
}
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(ctx, originTable, bloomTermId, tableFilters);
+ }
}
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/ConditionMatchBloomDBTables.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/ConditionMatchBloomDBTables.java
index 654537b3..3f32b31c 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/ConditionMatchBloomDBTables.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/ConditionMatchBloomDBTables.java
@@ -55,6 +55,7 @@
import org.slf4j.LoggerFactory;
import java.util.List;
+import java.util.Objects;
import static com.teragrep.pth_06.jooq.generated.bloomdb.Bloomdb.BLOOMDB;
@@ -119,4 +120,9 @@ public boolean equals(final Object object) {
final ConditionMatchBloomDBTables cast = (ConditionMatchBloomDBTables) object;
return this.condition.equals(cast.condition) && this.ctx == cast.ctx;
}
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(ctx, condition);
+ }
}
From 2c01634d768a05b5cb33cf28d7cf8ce8d3378362 Mon Sep 17 00:00:00 2001
From: elliVM <47@teragrep.com>
Date: Thu, 31 Oct 2024 12:12:37 +0200
Subject: [PATCH 23/26] don't wrap jooq.Batch object and execute in
CategoryTableWithFilters, remove sout
---
.../bloomfilter/CategoryTableWithFilters.java | 16 +-
.../pth_06/planner/bloomfilter/SafeBatch.java | 75 ----------
.../planner/bloomfilter/TableFilters.java | 5 +-
.../planner/bloomfilter/SafeBatchTest.java | 140 ------------------
...ableFilterTypesFromMetadataResultTest.java | 2 -
5 files changed, 17 insertions(+), 221 deletions(-)
delete mode 100644 src/main/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatch.java
delete mode 100644 src/test/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatchTest.java
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableWithFilters.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableWithFilters.java
index 9a848513..bfb11559 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableWithFilters.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableWithFilters.java
@@ -45,14 +45,19 @@
*/
package com.teragrep.pth_06.planner.bloomfilter;
+import org.jooq.Batch;
import org.jooq.DSLContext;
import org.jooq.Table;
+import org.jooq.exception.DataAccessException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* Decorator that inserts category tables filter types into the table
*/
public final class CategoryTableWithFilters implements CategoryTable {
+ private static final Logger LOGGER = LoggerFactory.getLogger(CategoryTableWithFilters.class);
private final CategoryTable origin;
private final TableFilters filters;
@@ -71,6 +76,15 @@ public CategoryTableWithFilters(CategoryTable origin, TableFilters filters) {
@Override
public void create() {
origin.create();
- filters.asBatch().execute();
+ final Batch batch = filters.asBatch();
+ try {
+ final int[] results = batch.execute();
+ if (LOGGER.isTraceEnabled()) {
+ LOGGER.trace("Batch added <{}> row(s)", results.length);
+ }
+ }
+ catch (final DataAccessException e) {
+ throw new DataAccessException("Error executing batch: " + e);
+ }
}
}
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatch.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatch.java
deleted file mode 100644
index f661dad4..00000000
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatch.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Teragrep Archive Datasource (pth_06)
- * Copyright (C) 2021-2024 Suomen Kanuuna Oy
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see .
- *
- *
- * Additional permission under GNU Affero General Public License version 3
- * section 7
- *
- * If you modify this Program, or any covered work, by linking or combining it
- * with other code, such other code is not for that reason alone subject to any
- * of the requirements of the GNU Affero GPL version 3 as long as this Program
- * is the same Program as licensed from Suomen Kanuuna Oy without any additional
- * modifications.
- *
- * Supplemented terms under GNU Affero General Public License version 3
- * section 7
- *
- * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
- * versions must be marked as "Modified version of" The Program.
- *
- * Names of the licensors and authors may not be used for publicity purposes.
- *
- * No rights are granted for use of trade names, trademarks, or service marks
- * which are in The Program if any.
- *
- * Licensee must indemnify licensors and authors for any liability that these
- * contractual assumptions impose on licensors and authors.
- *
- * To the extent this program is licensed as part of the Commercial versions of
- * Teragrep, the applicable Commercial License may apply to this file if you as
- * a licensee so wish it.
- */
-package com.teragrep.pth_06.planner.bloomfilter;
-
-import org.jooq.Batch;
-import org.jooq.exception.DataAccessException;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public final class SafeBatch {
-
- private static final Logger LOGGER = LoggerFactory.getLogger(SafeBatch.class);
-
- private final Batch batch;
-
- public SafeBatch(final Batch batch) {
- this.batch = batch;
- }
-
- /** Does not roll back successfully inserted values on exception */
- public void execute() {
- try {
- final int[] results = batch.execute();
- if (LOGGER.isTraceEnabled()) {
- LOGGER.trace("Batch added <{}> row(s)", results.length);
- }
- }
- catch (final DataAccessException e) {
- throw new DataAccessException("Error executing batch: " + e);
- }
- }
-}
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java
index cd11f431..54184424 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java
@@ -95,7 +95,7 @@ public TableFilters(
this.searchTerm = searchTerm;
}
- public SafeBatch asBatch() {
+ public Batch asBatch() {
final List> insertValuesStepNList = new ArrayList<>();
final Result result = recordsInMetadata.toResult();
for (final Record record : result) {
@@ -127,8 +127,7 @@ public SafeBatch asBatch() {
final InsertValuesStepN> insertStep = ctx.insertInto(thisTable).columns(insertFields).values(valueFields);
insertValuesStepNList.add(insertStep);
}
- final Batch batch = ctx.batch(insertValuesStepNList);
- return new SafeBatch(batch);
+ return ctx.batch(insertValuesStepNList);
}
/**
diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatchTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatchTest.java
deleted file mode 100644
index fdbdd266..00000000
--- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatchTest.java
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Teragrep Archive Datasource (pth_06)
- * Copyright (C) 2021-2024 Suomen Kanuuna Oy
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see .
- *
- *
- * Additional permission under GNU Affero General Public License version 3
- * section 7
- *
- * If you modify this Program, or any covered work, by linking or combining it
- * with other code, such other code is not for that reason alone subject to any
- * of the requirements of the GNU Affero GPL version 3 as long as this Program
- * is the same Program as licensed from Suomen Kanuuna Oy without any additional
- * modifications.
- *
- * Supplemented terms under GNU Affero General Public License version 3
- * section 7
- *
- * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified
- * versions must be marked as "Modified version of" The Program.
- *
- * Names of the licensors and authors may not be used for publicity purposes.
- *
- * No rights are granted for use of trade names, trademarks, or service marks
- * which are in The Program if any.
- *
- * Licensee must indemnify licensors and authors for any liability that these
- * contractual assumptions impose on licensors and authors.
- *
- * To the extent this program is licensed as part of the Commercial versions of
- * Teragrep, the applicable Commercial License may apply to this file if you as
- * a licensee so wish it.
- */
-package com.teragrep.pth_06.planner.bloomfilter;
-
-import org.jooq.DSLContext;
-import org.jooq.exception.DataAccessException;
-import org.jooq.impl.DSL;
-import org.junit.jupiter.api.Assertions;
-import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Test;
-
-import java.sql.Connection;
-import java.sql.DriverManager;
-import java.sql.ResultSet;
-import java.util.ArrayList;
-import java.util.List;
-
-class SafeBatchTest {
-
- final String url = "jdbc:h2:mem:test;MODE=MariaDB;DATABASE_TO_LOWER=TRUE;CASE_INSENSITIVE_IDENTIFIERS=TRUE";
- final String userName = "sa";
- final String password = "";
- final Connection conn = Assertions.assertDoesNotThrow(() -> DriverManager.getConnection(url, userName, password));
-
- @BeforeEach
- void setup() {
- Assertions.assertDoesNotThrow(() -> {
- conn.prepareStatement("CREATE SCHEMA IF NOT EXISTS BLOOMDB").execute();
- conn.prepareStatement("USE BLOOMDB").execute();
- conn.prepareStatement("DROP TABLE IF EXISTS target").execute();
- String targetTable = "CREATE TABLE `target`("
- + " `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT PRIMARY KEY,"
- + " `partition_id` bigint(20) unsigned NOT NULL UNIQUE" + ")";
- conn.prepareStatement(targetTable).execute();
- });
- }
-
- @Test
- public void testOneInsert() {
- String sql = "INSERT INTO target (`partition_id`) VALUES(12345)";
- DSLContext ctx = DSL.using(conn);
- SafeBatch batch = new SafeBatch(ctx.batch(ctx.query(sql)));
- Assertions.assertDoesNotThrow(batch::execute);
- ResultSet result = Assertions
- .assertDoesNotThrow(() -> conn.prepareStatement("SELECT `partition_id` FROM target").executeQuery());
- Assertions.assertDoesNotThrow(() -> {
- int loops = 0;
- while (result.next()) {
- loops++;
- Assertions.assertEquals(12345L, result.getLong(1));
- }
- Assertions.assertEquals(1, loops);
- });
- }
-
- @Test
- public void testInsertTwo() {
- String sql1 = "INSERT INTO target (`partition_id`) VALUES(12345)";
- String sql2 = "INSERT INTO target (`partition_id`) VALUES(54321)";
- DSLContext ctx = DSL.using(conn);
- SafeBatch batch = new SafeBatch(ctx.batch(ctx.query(sql1), ctx.query(sql2)));
- Assertions.assertDoesNotThrow(batch::execute);
- ResultSet result = Assertions
- .assertDoesNotThrow(() -> conn.prepareStatement("SELECT `partition_id` FROM target").executeQuery());
- Assertions.assertDoesNotThrow(() -> {
- List values = new ArrayList<>();
- int loops = 0;
- while (result.next()) {
- loops++;
- values.add(result.getLong(1));
- }
- Assertions.assertEquals(2, loops);
- Assertions.assertEquals(2, values.size());
- Assertions.assertEquals(12345L, values.get(0));
- Assertions.assertEquals(54321L, values.get(1));
- });
- }
-
- @Test
- public void testDataAccessException() {
- String sql1 = "INSERT INTO target (`partition_id`) VALUES(12345)";
- String sql2 = "INSERT INTO target (`partition_id`) VALUES(12345)";
- DSLContext ctx = DSL.using(conn);
- SafeBatch batch = new SafeBatch(ctx.batch(ctx.query(sql1), ctx.query(sql2)));
- Assertions.assertThrows(DataAccessException.class, batch::execute);
- ResultSet result = Assertions
- .assertDoesNotThrow(() -> conn.prepareStatement("SELECT `partition_id` FROM target").executeQuery());
- Assertions.assertDoesNotThrow(() -> {
- int loops = 0;
- while (result.next()) {
- Assertions.assertEquals(12345L, result.getLong(1));
- loops++;
- }
- Assertions.assertEquals(1, loops);
- });
- }
-}
diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadataResultTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadataResultTest.java
index fb8be48a..f8bd9052 100644
--- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadataResultTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadataResultTest.java
@@ -207,8 +207,6 @@ public void testNotEquals() {
.get(0);
TableFilterTypesFromMetadata result1 = new TableFilterTypesFromMetadata(ctx, table, 0L);
TableFilterTypesFromMetadata result2 = new TableFilterTypesFromMetadata(ctx, table, 1L);
- System.out.println(result1);
- System.out.println(result2);
Assertions.assertNotEquals(result1, result2);
}
From 34efb98e9f61e23d51a73dbdbbffb434602061cd Mon Sep 17 00:00:00 2001
From: elliVM <47@teragrep.com>
Date: Fri, 1 Nov 2024 10:59:01 +0200
Subject: [PATCH 24/26] throw exception if search term filter tokens size
larger than expected, remove logcaptor dependency
---
pom.xml | 6 ------
.../planner/bloomfilter/SearchTermBloomFilter.java | 7 ++++---
.../planner/bloomfilter/SearchTermBloomFilterTest.java | 10 +++-------
3 files changed, 7 insertions(+), 16 deletions(-)
diff --git a/pom.xml b/pom.xml
index af810fab..47ffaa4a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -209,12 +209,6 @@
3.16.1
test
-
- io.github.hakky54
- logcaptor
- 2.9.3
- test
-
org.apache.kafka
kafka-clients
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java
index 5246446d..03636e53 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java
@@ -88,14 +88,15 @@ public byte[] bytes() {
for (final String token : stringTokens) {
filter.put(token);
}
- if (LOGGER.isWarnEnabled()) {
- if (stringTokens.size() > expected) {
+ if (stringTokens.size() > expected) {
+ if (LOGGER.isErrorEnabled()) {
LOGGER
- .warn(
+ .error(
"Number of items <{}> was larger than the expected number of items <{}>, resulting FPP <{}>",
stringTokens.size(), expected, filter.expectedFpp()
);
}
+ throw new IllegalStateException("Number of items was larger than the expected number of items");
}
try (final ByteArrayOutputStream filterBAOS = new ByteArrayOutputStream()) {
filter.writeTo(filterBAOS);
diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java
index 5ede2361..5a6d252a 100644
--- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java
@@ -45,7 +45,6 @@
*/
package com.teragrep.pth_06.planner.bloomfilter;
-import nl.altindag.log.LogCaptor;
import nl.jqno.equalsverifier.EqualsVerifier;
import org.apache.spark.util.sketch.BloomFilter;
import org.junit.jupiter.api.Assertions;
@@ -118,14 +117,11 @@ public void testTokenizerTokens() {
@Test
public void testTokensSizeTooLarge() {
- LogCaptor captor = Assertions.assertDoesNotThrow(() -> LogCaptor.forClass(SearchTermBloomFilter.class));
String searchTerm = "";
SearchTermBloomFilter filter = new SearchTermBloomFilter(10L, 0.01, new TokenizedValue(searchTerm));
- Assertions.assertDoesNotThrow(filter::bytes);
- String e = "Number of items <132> was larger than the expected number of items <10>, resulting FPP <0.6002870054872016>";
- String warn = captor.getWarnLogs().get(0);
- Assertions.assertEquals(e, warn);
-
+ IllegalStateException e = Assertions.assertThrows(IllegalStateException.class, filter::bytes);
+ String expected = "Number of items was larger than the expected number of items";
+ Assertions.assertEquals(expected, e.getMessage());
}
@Test
From 7412ec2cf6af6bbcaa3c2f0e87ff920bc9f6eb2c Mon Sep 17 00:00:00 2001
From: elliVM <47@teragrep.com>
Date: Fri, 1 Nov 2024 15:23:19 +0200
Subject: [PATCH 25/26] improve TableFiltersTest and TokensAsStringsTest
---
.../planner/bloomfilter/TableFiltersTest.java | 83 +++++++++++++++----
.../bloomfilter/TokensAsStringsTest.java | 5 ++
2 files changed, 70 insertions(+), 18 deletions(-)
diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java
index b46964c9..617ddc46 100644
--- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java
@@ -53,10 +53,12 @@
import org.jooq.impl.DSL;
import org.junit.jupiter.api.*;
+import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
+import java.sql.ResultSet;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@@ -69,11 +71,9 @@ class TableFiltersTest {
final String password = "";
// matches IPv4
final String ipRegex = "(\\b25[0-5]|\\b2[0-4][0-9]|\\b[01]?[0-9][0-9]?)(\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}";
- // matches IPv4 starting with 255.
- final String ipStartingWith255 = "(\\b25[0-5]?)(\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}";
// matches with values surrounded by parentheses
final String parenthesesPattern = "\\((.*?)\\)";
- final List patternList = new ArrayList<>(Arrays.asList(ipRegex, ipStartingWith255, parenthesesPattern));
+ final List patternList = new ArrayList<>(Arrays.asList(ipRegex, parenthesesPattern));
final Connection conn = Assertions.assertDoesNotThrow(() -> DriverManager.getConnection(url, userName, password));
@BeforeAll
@@ -109,6 +109,9 @@ void createTargetTable() {
conn.prepareStatement("CREATE SCHEMA IF NOT EXISTS BLOOMDB").execute();
conn.prepareStatement("USE BLOOMDB").execute();
conn.prepareStatement("DROP TABLE IF EXISTS target").execute();
+ // drop temp tables created by tests
+ conn.prepareStatement("DROP TABLE IF EXISTS term_0_target").execute();
+ conn.prepareStatement("DROP TABLE IF EXISTS term_1_target").execute();
String targetTable = "CREATE TABLE `target`("
+ " `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT PRIMARY KEY,"
+ " `partition_id` bigint(20) unsigned NOT NULL UNIQUE,"
@@ -140,7 +143,7 @@ public void testCreation() {
}
@Test
- public void testInsertFiltersIntoCategoryTable() {
+ public void testFilterInsertion() {
fillTargetTable(1);
DSLContext ctx = DSL.using(conn);
Table> table = ctx
@@ -149,19 +152,30 @@ public void testInsertFiltersIntoCategoryTable() {
.filterTables(t -> !t.getName().equals("filtertype"))
.getTables()
.get(0);
- DataAccessException exception = Assertions
- .assertThrows(DataAccessException.class, () -> new TableFilters(ctx, table, 0L, "192.168.1.1").asBatch().execute());
- Assertions
- .assertTrue(
- exception
- .getMessage()
- .contains("insert into \"term_0_target\" (\"term_id\", \"type_id\", \"filter\") values")
- );
+ CategoryTable tableImpl = new CategoryTableImpl(ctx, table, 0L, "192.168.1.1");
+ Assertions.assertDoesNotThrow(tableImpl::create);
+ Assertions.assertDoesNotThrow(() -> new TableFilters(ctx, table, 0L, "192.168.1.1").asBatch().execute());
+ ResultSet result = Assertions.assertDoesNotThrow(() -> conn.prepareStatement("SELECT * FROM `term_0_target`").executeQuery());
+ Assertions.assertDoesNotThrow(() -> {
+ int loops = 0;
+ while (result.next()) {
+ long termId = result.getLong("term_id");
+ long typeId = result.getLong("type_id");
+ byte[] filterBytes = result.getBytes("filter");
+ Assertions.assertEquals(0, termId);
+ Assertions.assertEquals(1, typeId);
+ BloomFilter filter = BloomFilter.readFrom(new ByteArrayInputStream(filterBytes));
+ Assertions.assertTrue(filter.mightContain("192.168.1.1"));
+ Assertions.assertFalse(filter.mightContain("192"));
+ loops++;
+ }
+ Assertions.assertEquals(1, loops);
+ });
}
@Test
- public void testInsertFiltersIntoCategoryTableRegexExtract() {
- fillTargetTable(3);
+ public void testFilterInsertionWithRegexExtractedValue() {
+ fillTargetTable(2);
DSLContext ctx = DSL.using(conn);
Table> table = ctx
.meta()
@@ -169,11 +183,44 @@ public void testInsertFiltersIntoCategoryTableRegexExtract() {
.filterTables(t -> !t.getName().equals("filtertype"))
.getTables()
.get(0);
- String query = "biz baz boz data has no content today (very important though) but it would still have if one had a means to extract it from (here is something else important as well) the strange patterns called parentheses that it seems to have been put in.";
- DataAccessException exception = Assertions
- .assertThrows(DataAccessException.class, () -> new TableFilters(ctx, table, 0L, query).asBatch().execute());
+ String value = "biz baz boz data has no content today (very important though) but it would still have if one had a means to extract it from (here is something else important as well) the strange patterns called parentheses that it seems to have been put in.";
+ CategoryTable tableImpl = new CategoryTableImpl(ctx, table, 1L, value);
+ Assertions.assertDoesNotThrow(tableImpl::create);
Assertions
- .assertTrue(exception.getMessage().contains("\"term_0_target\" (\"term_id\", \"type_id\", \"filter\")"));
+ .assertDoesNotThrow(() -> new TableFilters(ctx, table, 1L, value).asBatch().execute());
+ ResultSet result = Assertions.assertDoesNotThrow(() -> conn.prepareStatement("SELECT * FROM `term_1_target`").executeQuery());
+ Assertions.assertDoesNotThrow(() -> {
+ int loops = 0;
+ while (result.next()) {
+ long termId = result.getLong("term_id");
+ long typeId = result.getLong("type_id");
+ byte[] filterBytes = result.getBytes("filter");
+ Assertions.assertEquals(1, termId);
+ Assertions.assertEquals(2, typeId);
+ BloomFilter filter = BloomFilter.readFrom(new ByteArrayInputStream(filterBytes));
+ Assertions.assertTrue(filter.mightContain("(here is something else important as well)"));
+ Assertions.assertTrue(filter.mightContain("(very important though)"));
+ Assertions.assertFalse(filter.mightContain("content"));
+ Assertions.assertFalse(filter.mightContain("(very"));
+ Assertions.assertFalse(filter.mightContain("though)"));
+ loops++;
+ }
+ Assertions.assertEquals(1, loops);
+ });
+ }
+
+ @Test
+ public void testMissingTempTableDataAccessException() {
+ fillTargetTable(1);
+ DSLContext ctx = DSL.using(conn);
+ Table> table = ctx
+ .meta()
+ .filterSchemas(s -> s.getName().equals("bloomdb"))
+ .filterTables(t -> !t.getName().equals("filtertype"))
+ .getTables()
+ .get(0);
+ DataAccessException ex = Assertions.assertThrows(DataAccessException.class, () -> new TableFilters(ctx, table, 0L, "192.168.1.1").asBatch().execute());
+ Assertions.assertTrue(ex.getMessage().contains("Table \"term_0_target\" not found"));
}
@Test
diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TokensAsStringsTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TokensAsStringsTest.java
index ad1330f1..083ae2ae 100644
--- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TokensAsStringsTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TokensAsStringsTest.java
@@ -59,5 +59,10 @@ public void testTokensToStrings() {
Assertions.assertTrue(allTokenClass);
Tokenizable toStrings = new TokensAsStrings(tokenizedValue);
Assertions.assertTrue(toStrings.tokens().contains("one"));
+ Assertions.assertTrue(toStrings.tokens().contains("one."));
+ Assertions.assertTrue(toStrings.tokens().contains("one.two"));
+ Assertions.assertTrue(toStrings.tokens().contains("two"));
+ Assertions.assertTrue(toStrings.tokens().contains("three"));
+ Assertions.assertEquals(16, toStrings.tokens().size());
}
}
From 7dea6f740db44e9d6922b37a1d3eb1c1baf32a7b Mon Sep 17 00:00:00 2001
From: elliVM <47@teragrep.com>
Date: Mon, 11 Nov 2024 10:57:23 +0200
Subject: [PATCH 26/26] allow search term filter tokens to be larger than
expected tokens
---
.../planner/bloomfilter/SearchTermBloomFilter.java | 10 ----------
.../planner/bloomfilter/SearchTermBloomFilterTest.java | 8 +++-----
2 files changed, 3 insertions(+), 15 deletions(-)
diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java
index 03636e53..a30dc70f 100644
--- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java
+++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java
@@ -88,16 +88,6 @@ public byte[] bytes() {
for (final String token : stringTokens) {
filter.put(token);
}
- if (stringTokens.size() > expected) {
- if (LOGGER.isErrorEnabled()) {
- LOGGER
- .error(
- "Number of items <{}> was larger than the expected number of items <{}>, resulting FPP <{}>",
- stringTokens.size(), expected, filter.expectedFpp()
- );
- }
- throw new IllegalStateException("Number of items was larger than the expected number of items");
- }
try (final ByteArrayOutputStream filterBAOS = new ByteArrayOutputStream()) {
filter.writeTo(filterBAOS);
return filterBAOS.toByteArray();
diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java
index 5a6d252a..ba9f6fc3 100644
--- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java
+++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java
@@ -116,12 +116,10 @@ public void testTokenizerTokens() {
}
@Test
- public void testTokensSizeTooLarge() {
+ public void testSaturatedSearchTermFilterIsAllowed() {
String searchTerm = "";
- SearchTermBloomFilter filter = new SearchTermBloomFilter(10L, 0.01, new TokenizedValue(searchTerm));
- IllegalStateException e = Assertions.assertThrows(IllegalStateException.class, filter::bytes);
- String expected = "Number of items was larger than the expected number of items";
- Assertions.assertEquals(expected, e.getMessage());
+ SearchTermBloomFilter filter = new SearchTermBloomFilter(1L, 0.01, new TokenizedValue(searchTerm));
+ Assertions.assertDoesNotThrow(filter::bytes);
}
@Test