From b9d273121aa256e3040d7147d88b718063598457 Mon Sep 17 00:00:00 2001 From: elliVM <47@teragrep.com> Date: Fri, 18 Oct 2024 13:00:34 +0300 Subject: [PATCH 01/26] expect regex extracted tokens in database bloom filters --- pom.xml | 6 + .../pth_06/planner/RegexExtractedValue.java | 93 ++++++++++ .../pth_06/planner/TokenizedValue.java | 32 +++- .../bloomfilter/BloomFilterFromRecord.java | 149 ++++++++++++++++ .../{ => bloomfilter}/CategoryTable.java | 2 +- .../{ => bloomfilter}/CategoryTableImpl.java | 2 +- .../CreatedCategoryTable.java | 2 +- ...terFromRecordToCategoryTableConsumer.java} | 106 +++--------- .../{ => bloomfilter}/PatternMatchTables.java | 2 +- .../SearchTermFiltersInserted.java | 2 +- .../TableFilterTypesFromMetadata.java | 9 +- .../planner/bloomfilter/TableFilters.java | 100 +++++++++++ .../{ => bloomfilter}/TableRecords.java | 2 +- .../conditions/IndexStatementCondition.java | 4 +- .../conditions/PatternMatchCondition.java | 36 ++-- .../BloomFilterFromRecordTest.java | 162 ++++++++++++++++++ .../CategoryTableImplTest.java | 2 +- .../PatternMatchTablesTest.java | 23 ++- ...ableFilterTypesFromMetadataResultTest.java | 2 +- .../{ => bloomfilter}/TableFiltersTest.java | 65 +++++-- .../{ => bloomfilter}/TokenizedValueTest.java | 19 +- .../conditions/PatternMatchConditionTest.java | 27 +-- 22 files changed, 700 insertions(+), 147 deletions(-) create mode 100644 src/main/java/com/teragrep/pth_06/planner/RegexExtractedValue.java create mode 100644 src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java rename src/main/java/com/teragrep/pth_06/planner/{ => bloomfilter}/CategoryTable.java (97%) rename src/main/java/com/teragrep/pth_06/planner/{ => bloomfilter}/CategoryTableImpl.java (99%) rename src/main/java/com/teragrep/pth_06/planner/{ => bloomfilter}/CreatedCategoryTable.java (98%) rename src/main/java/com/teragrep/pth_06/planner/{TableFilters.java => bloomfilter/FilterFromRecordToCategoryTableConsumer.java} (50%) rename src/main/java/com/teragrep/pth_06/planner/{ => bloomfilter}/PatternMatchTables.java (99%) rename src/main/java/com/teragrep/pth_06/planner/{ => bloomfilter}/SearchTermFiltersInserted.java (98%) rename src/main/java/com/teragrep/pth_06/planner/{ => bloomfilter}/TableFilterTypesFromMetadata.java (95%) create mode 100644 src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java rename src/main/java/com/teragrep/pth_06/planner/{ => bloomfilter}/TableRecords.java (97%) create mode 100644 src/test/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecordTest.java rename src/test/java/com/teragrep/pth_06/planner/{ => bloomfilter}/CategoryTableImplTest.java (99%) rename src/test/java/com/teragrep/pth_06/planner/{ => bloomfilter}/PatternMatchTablesTest.java (87%) rename src/test/java/com/teragrep/pth_06/planner/{ => bloomfilter}/TableFilterTypesFromMetadataResultTest.java (99%) rename src/test/java/com/teragrep/pth_06/planner/{ => bloomfilter}/TableFiltersTest.java (78%) rename src/test/java/com/teragrep/pth_06/planner/{ => bloomfilter}/TokenizedValueTest.java (83%) diff --git a/pom.xml b/pom.xml index 9cc7356d..47ffaa4a 100644 --- a/pom.xml +++ b/pom.xml @@ -203,6 +203,12 @@ 2.2.224 test + + nl.jqno.equalsverifier + equalsverifier + 3.16.1 + test + org.apache.kafka kafka-clients diff --git a/src/main/java/com/teragrep/pth_06/planner/RegexExtractedValue.java b/src/main/java/com/teragrep/pth_06/planner/RegexExtractedValue.java new file mode 100644 index 00000000..56a72978 --- /dev/null +++ b/src/main/java/com/teragrep/pth_06/planner/RegexExtractedValue.java @@ -0,0 +1,93 @@ +/* + * Teragrep Archive Datasource (pth_06) + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.pth_06.planner; + +import java.util.HashSet; +import java.util.Objects; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public final class RegexExtractedValue { + + private final Matcher matcher; + + public RegexExtractedValue(String value, String regex) { + this(value, Pattern.compile(regex)); + } + + public RegexExtractedValue(String value, Pattern pattern) { + this(pattern.matcher(value)); + } + + public RegexExtractedValue(Matcher matcher) { + this.matcher = matcher; + } + + public Set tokens() { + final Set tokens = new HashSet<>(); + while (matcher.find()) { + final String token = matcher.group(); + tokens.add(token); + } + return tokens; + } + + @Override + public boolean equals(final Object object) { + if (this == object) + return true; + if (object == null || object.getClass() != this.getClass()) + return false; + final RegexExtractedValue cast = (RegexExtractedValue) object; + return matcher.equals(cast.matcher); + } + + @Override + public int hashCode() { + return Objects.hash(matcher); + } +} diff --git a/src/main/java/com/teragrep/pth_06/planner/TokenizedValue.java b/src/main/java/com/teragrep/pth_06/planner/TokenizedValue.java index 3d5bc6c1..c9740922 100644 --- a/src/main/java/com/teragrep/pth_06/planner/TokenizedValue.java +++ b/src/main/java/com/teragrep/pth_06/planner/TokenizedValue.java @@ -51,31 +51,47 @@ import java.io.ByteArrayInputStream; import java.nio.charset.StandardCharsets; import java.util.HashSet; +import java.util.Objects; import java.util.Set; +import java.util.stream.Collectors; public final class TokenizedValue { - public final String value; + private final String value; + private final Set tokenSet; public TokenizedValue(String value) { + this( + value, + new HashSet<>(new Tokenizer(32).tokenize(new ByteArrayInputStream(value.getBytes(StandardCharsets.UTF_8)))) + ); + } + + public TokenizedValue(String value, Set tokenSet) { this.value = value; + this.tokenSet = tokenSet; } public Set tokens() { - return new HashSet<>( - new Tokenizer(32).tokenize(new ByteArrayInputStream(value.getBytes(StandardCharsets.UTF_8))) - ); + return tokenSet; + } + + public Set stringTokens() { + return tokenSet.stream().map(Token::toString).collect(Collectors.toSet()); } @Override public boolean equals(final Object object) { if (this == object) return true; - if (object == null) - return false; - if (object.getClass() != this.getClass()) + if (object == null || object.getClass() != this.getClass()) return false; final TokenizedValue cast = (TokenizedValue) object; - return this.value.equals(cast.value); + return value.equals(cast.value) && tokenSet.equals(cast.tokenSet); + } + + @Override + public int hashCode() { + return Objects.hash(value, tokenSet); } } diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java new file mode 100644 index 00000000..27758266 --- /dev/null +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java @@ -0,0 +1,149 @@ +/* + * Teragrep Archive Datasource (pth_06) + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.pth_06.planner.bloomfilter; + +import com.teragrep.pth_06.planner.RegexExtractedValue; +import com.teragrep.pth_06.planner.TokenizedValue; +import org.apache.spark.util.sketch.BloomFilter; +import org.jooq.Record; +import org.jooq.Table; +import org.jooq.impl.DSL; +import org.jooq.types.ULong; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.Objects; +import java.util.Set; + +import static com.teragrep.pth_06.jooq.generated.bloomdb.Bloomdb.BLOOMDB; + +/** + * Extracts filter type from record, creates a bloom filter and returns the filters byte array + */ +public final class BloomFilterFromRecord { + + private final Logger LOGGER = LoggerFactory.getLogger(BloomFilterFromRecord.class); + private final Long expected; + private final Double fpp; + private final String pattern; + private final String searchTerm; + + private BloomFilter create() { + if (expected == null || fpp == null) { + LOGGER + .error( + "Null field while creating bloom filter expected <{}>, fpp <{}>, pattern <{}>, search term <{}>", + expected, fpp, pattern, searchTerm + ); + throw new RuntimeException("Object field was null"); + } + final BloomFilter filter = BloomFilter.create(expected, fpp); + // if no pattern use to tokenized value (currently BLOOMDB.FILTERTYPE.PATTERN is NOT NULL) + if (pattern == null) { + LOGGER.info("Table pattern was null using tokenizer to generate tokens"); + new TokenizedValue(searchTerm).stringTokens().forEach(filter::put); + } + else { // get tokens using regex + final Set tokens = new RegexExtractedValue(searchTerm, pattern).tokens(); + LOGGER.info("Insert pattern <{}> tokens to temp table filter <{}>", pattern, tokens); + if (tokens.isEmpty()) { + throw new IllegalStateException( + "Trying to insert empty filter, pattern match joined table should always have tokens" + ); + } + tokens.forEach(filter::put); + } + return filter; + } + + public BloomFilterFromRecord(Record record, Table table, String searchTerm) { + this( + record.getValue(DSL.field(DSL.name(table.getName(), "expectedElements"), ULong.class)).longValue(), + record.getValue(DSL.field(DSL.name(table.getName(), "targetFpp"), Double.class)), + record.getValue(BLOOMDB.FILTERTYPE.PATTERN, String.class), + searchTerm + ); + } + + public BloomFilterFromRecord(Long expected, Double fpp, String pattern, String searchTerm) { + this.expected = expected; + this.fpp = fpp; + this.pattern = pattern; + this.searchTerm = searchTerm; + } + + public byte[] bytes() { + final BloomFilter filter = create(); + final ByteArrayOutputStream filterBAOS = new ByteArrayOutputStream(); + try { + filter.writeTo(filterBAOS); + filterBAOS.close(); + } + catch (IOException e) { + throw new UncheckedIOException(new IOException("Error writing filter bytes: " + e.getMessage())); + } + return filterBAOS.toByteArray(); + } + + @Override + public boolean equals(final Object object) { + if (this == object) + return true; + if (object == null || getClass() != object.getClass()) + return false; + final BloomFilterFromRecord cast = (BloomFilterFromRecord) object; + return expected.equals(cast.expected) && fpp.equals(cast.fpp) && Objects.equals(pattern, cast.pattern) + && searchTerm.equals(cast.searchTerm); + } + + @Override + public int hashCode() { + return Objects.hash(expected, fpp, pattern, searchTerm); + } +} diff --git a/src/main/java/com/teragrep/pth_06/planner/CategoryTable.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTable.java similarity index 97% rename from src/main/java/com/teragrep/pth_06/planner/CategoryTable.java rename to src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTable.java index f7b9618d..5499d781 100644 --- a/src/main/java/com/teragrep/pth_06/planner/CategoryTable.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTable.java @@ -43,7 +43,7 @@ * Teragrep, the applicable Commercial License may apply to this file if you as * a licensee so wish it. */ -package com.teragrep.pth_06.planner; +package com.teragrep.pth_06.planner.bloomfilter; import com.teragrep.pth_06.planner.walker.conditions.QueryCondition; diff --git a/src/main/java/com/teragrep/pth_06/planner/CategoryTableImpl.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImpl.java similarity index 99% rename from src/main/java/com/teragrep/pth_06/planner/CategoryTableImpl.java rename to src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImpl.java index 3c0a1c7a..d0c7b079 100644 --- a/src/main/java/com/teragrep/pth_06/planner/CategoryTableImpl.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImpl.java @@ -43,7 +43,7 @@ * Teragrep, the applicable Commercial License may apply to this file if you as * a licensee so wish it. */ -package com.teragrep.pth_06.planner; +package com.teragrep.pth_06.planner.bloomfilter; import com.teragrep.pth_06.config.ConditionConfig; import com.teragrep.pth_06.planner.walker.conditions.CategoryTableCondition; diff --git a/src/main/java/com/teragrep/pth_06/planner/CreatedCategoryTable.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CreatedCategoryTable.java similarity index 98% rename from src/main/java/com/teragrep/pth_06/planner/CreatedCategoryTable.java rename to src/main/java/com/teragrep/pth_06/planner/bloomfilter/CreatedCategoryTable.java index 86a67441..0090ba97 100644 --- a/src/main/java/com/teragrep/pth_06/planner/CreatedCategoryTable.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CreatedCategoryTable.java @@ -43,7 +43,7 @@ * Teragrep, the applicable Commercial License may apply to this file if you as * a licensee so wish it. */ -package com.teragrep.pth_06.planner; +package com.teragrep.pth_06.planner.bloomfilter; import com.teragrep.pth_06.planner.walker.conditions.QueryCondition; diff --git a/src/main/java/com/teragrep/pth_06/planner/TableFilters.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/FilterFromRecordToCategoryTableConsumer.java similarity index 50% rename from src/main/java/com/teragrep/pth_06/planner/TableFilters.java rename to src/main/java/com/teragrep/pth_06/planner/bloomfilter/FilterFromRecordToCategoryTableConsumer.java index d5ff1086..f84e7aed 100644 --- a/src/main/java/com/teragrep/pth_06/planner/TableFilters.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/FilterFromRecordToCategoryTableConsumer.java @@ -43,126 +43,70 @@ * Teragrep, the applicable Commercial License may apply to this file if you as * a licensee so wish it. */ -package com.teragrep.pth_06.planner; +package com.teragrep.pth_06.planner.bloomfilter; -import com.teragrep.blf_01.Token; -import org.apache.spark.util.sketch.BloomFilter; -import org.jooq.*; +import org.jooq.DSLContext; +import org.jooq.Field; +import org.jooq.Record; +import org.jooq.Table; import org.jooq.impl.DSL; import org.jooq.types.ULong; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.UncheckedIOException; -import java.util.regex.Pattern; +import java.util.Objects; +import java.util.function.Consumer; import static com.teragrep.pth_06.jooq.generated.bloomdb.Bloomdb.BLOOMDB; import static org.jooq.impl.SQLDataType.BIGINTUNSIGNED; -/** - * Filter types of a table that can be inserted into the tables category table - */ -public final class TableFilters { +public final class FilterFromRecordToCategoryTableConsumer implements Consumer { private final DSLContext ctx; private final Table table; private final long bloomTermId; - private final TokenizedValue value; - private final TableRecords recordsInMetadata; - - public TableFilters(DSLContext ctx, Table table, long bloomTermId, String input) { - this( - ctx, - table, - bloomTermId, - new TokenizedValue(input), - new TableFilterTypesFromMetadata(ctx, table, bloomTermId) - ); - } + private final String searchTerm; - public TableFilters( + public FilterFromRecordToCategoryTableConsumer( DSLContext ctx, Table table, long bloomTermId, - TokenizedValue value, - TableFilterTypesFromMetadata recordsInMetadata + String searchTerm ) { this.ctx = ctx; this.table = table; this.bloomTermId = bloomTermId; - this.value = value; - this.recordsInMetadata = recordsInMetadata; + this.searchTerm = searchTerm; } - /** - * Extracts filter type from record, creates a bloom filter and returns the filters byte array - * - * @param record record with filter info - * @return byte[] of the created filter - */ - private byte[] filterBytesFromRecord(final Record record) { - final ULong expected = record.getValue(DSL.field(DSL.name(table.getName(), "expectedElements"), ULong.class)); - final Double fpp = record.getValue(DSL.field(DSL.name(table.getName(), "targetFpp"), Double.class)); - final String pattern = record.getValue(BLOOMDB.FILTERTYPE.PATTERN, String.class); - final BloomFilter filter = BloomFilter.create(expected.longValue(), fpp); - final Pattern compiled = Pattern.compile(pattern); - boolean isEmpty = true; - for (final Token token : value.tokens()) { - final String tokenString = token.toString(); - if (compiled.matcher(tokenString).matches()) { - isEmpty = false; - filter.put(tokenString); - } - } - if (isEmpty) { - throw new IllegalStateException("Trying to insert empty filter"); - } - final ByteArrayOutputStream filterBAOS = new ByteArrayOutputStream(); - try { - filter.writeTo(filterBAOS); - filterBAOS.close(); - } - catch (IOException e) { - throw new UncheckedIOException(new IOException("Error writing filter bytes: " + e.getMessage())); - } - return filterBAOS.toByteArray(); - } - - private void insertFilterRecordToCategoryTable(final Record record) { + @Override + public void accept(final Record record) { final Table categoryTable = DSL.table(DSL.name(("term_" + bloomTermId + "_" + this.table.getName()))); final Field[] insertFields = { DSL.field("term_id", BIGINTUNSIGNED.nullable(false)), DSL.field("type_id", BIGINTUNSIGNED.nullable(false)), DSL.field(DSL.name(categoryTable.getName(), "filter"), byte[].class) }; + final BloomFilterFromRecord filterFromRecord = new BloomFilterFromRecord(record, table, searchTerm); final Field[] valueFields = { DSL.val(bloomTermId, ULong.class), DSL.val(record.getValue(BLOOMDB.FILTERTYPE.ID), ULong.class), - DSL.val(filterBytesFromRecord(record), byte[].class) + DSL.val(filterFromRecord.bytes(), byte[].class) }; ctx.insertInto(categoryTable).columns(insertFields).values(valueFields).execute(); } - public void insertFiltersIntoCategoryTable() { - recordsInMetadata.toResult().forEach(this::insertFilterRecordToCategoryTable); - } - - /** - * Expects DSLContext values to be the same instance - * - * @param object object compared - * @returs true if object is equal - */ @Override public boolean equals(final Object object) { if (this == object) return true; - if (object == null) - return false; - if (object.getClass() != this.getClass()) + if (object == null || this.getClass() != object.getClass()) return false; - final TableFilters cast = (TableFilters) object; - return this.ctx == cast.ctx && this.value.equals(cast.value) && this.table.equals(cast.table) - && this.bloomTermId == cast.bloomTermId; + final FilterFromRecordToCategoryTableConsumer cast = (FilterFromRecordToCategoryTableConsumer) object; + return bloomTermId == cast.bloomTermId && ctx == cast.ctx && table.equals(cast.table) + && searchTerm.equals(cast.searchTerm); + } + + @Override + public int hashCode() { + return Objects.hash(ctx, table, bloomTermId, searchTerm); } } diff --git a/src/main/java/com/teragrep/pth_06/planner/PatternMatchTables.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTables.java similarity index 99% rename from src/main/java/com/teragrep/pth_06/planner/PatternMatchTables.java rename to src/main/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTables.java index 94a7fe7f..10bc8d83 100644 --- a/src/main/java/com/teragrep/pth_06/planner/PatternMatchTables.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTables.java @@ -43,7 +43,7 @@ * Teragrep, the applicable Commercial License may apply to this file if you as * a licensee so wish it. */ -package com.teragrep.pth_06.planner; +package com.teragrep.pth_06.planner.bloomfilter; import com.teragrep.pth_06.planner.walker.conditions.PatternMatchCondition; import com.teragrep.pth_06.planner.walker.conditions.QueryCondition; diff --git a/src/main/java/com/teragrep/pth_06/planner/SearchTermFiltersInserted.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermFiltersInserted.java similarity index 98% rename from src/main/java/com/teragrep/pth_06/planner/SearchTermFiltersInserted.java rename to src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermFiltersInserted.java index a214b22c..a9913cb5 100644 --- a/src/main/java/com/teragrep/pth_06/planner/SearchTermFiltersInserted.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermFiltersInserted.java @@ -43,7 +43,7 @@ * Teragrep, the applicable Commercial License may apply to this file if you as * a licensee so wish it. */ -package com.teragrep.pth_06.planner; +package com.teragrep.pth_06.planner.bloomfilter; import com.teragrep.pth_06.planner.walker.conditions.QueryCondition; diff --git a/src/main/java/com/teragrep/pth_06/planner/TableFilterTypesFromMetadata.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadata.java similarity index 95% rename from src/main/java/com/teragrep/pth_06/planner/TableFilterTypesFromMetadata.java rename to src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadata.java index d5962952..ffcdb4c7 100644 --- a/src/main/java/com/teragrep/pth_06/planner/TableFilterTypesFromMetadata.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadata.java @@ -43,12 +43,14 @@ * Teragrep, the applicable Commercial License may apply to this file if you as * a licensee so wish it. */ -package com.teragrep.pth_06.planner; +package com.teragrep.pth_06.planner.bloomfilter; import org.jooq.*; import org.jooq.impl.DSL; import org.jooq.types.ULong; +import java.util.Objects; + import static com.teragrep.pth_06.jooq.generated.bloomdb.Bloomdb.BLOOMDB; /** @@ -105,4 +107,9 @@ public boolean equals(final Object object) { final TableFilterTypesFromMetadata cast = (TableFilterTypesFromMetadata) object; return this.bloomTermId == cast.bloomTermId && this.table.equals(cast.table) && this.ctx == cast.ctx; } + + @Override + public int hashCode() { + return Objects.hash(ctx, table, bloomTermId); + } } diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java new file mode 100644 index 00000000..b5dc9207 --- /dev/null +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java @@ -0,0 +1,100 @@ +/* + * Teragrep Archive Datasource (pth_06) + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.pth_06.planner.bloomfilter; + +import org.jooq.DSLContext; +import org.jooq.Table; + +import java.util.Objects; + +/** + * Filter types of a table that can be inserted into the tables category table + */ +public final class TableFilters { + + private final TableRecords recordsInMetadata; + private final FilterFromRecordToCategoryTableConsumer recordConsumer; + + public TableFilters(DSLContext ctx, Table table, long bloomTermId, String searchTerm) { + this( + new TableFilterTypesFromMetadata(ctx, table, bloomTermId), + new FilterFromRecordToCategoryTableConsumer(ctx, table, bloomTermId, searchTerm) + ); + } + + public TableFilters( + TableFilterTypesFromMetadata recordsInMetadata, + FilterFromRecordToCategoryTableConsumer recordConsumer + ) { + this.recordsInMetadata = recordsInMetadata; + this.recordConsumer = recordConsumer; + } + + public void insertFiltersIntoCategoryTable() { + recordsInMetadata.toResult().forEach(recordConsumer); + } + + /** + * Expects DSLContext values to be the same instance + * + * @param object object compared + * @returs true if object is equal + */ + @Override + public boolean equals(final Object object) { + if (this == object) + return true; + if (object == null || object.getClass() != this.getClass()) + return false; + final TableFilters cast = (TableFilters) object; + return recordsInMetadata.equals(cast.recordsInMetadata) && recordConsumer.equals(cast.recordConsumer); + } + + @Override + public int hashCode() { + return Objects.hash(recordsInMetadata, recordConsumer); + } +} diff --git a/src/main/java/com/teragrep/pth_06/planner/TableRecords.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableRecords.java similarity index 97% rename from src/main/java/com/teragrep/pth_06/planner/TableRecords.java rename to src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableRecords.java index 95b9fb92..932070f8 100644 --- a/src/main/java/com/teragrep/pth_06/planner/TableRecords.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableRecords.java @@ -43,7 +43,7 @@ * Teragrep, the applicable Commercial License may apply to this file if you as * a licensee so wish it. */ -package com.teragrep.pth_06.planner; +package com.teragrep.pth_06.planner.bloomfilter; import org.jooq.Record; import org.jooq.Result; diff --git a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java index cf1cfced..8f691b62 100644 --- a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java +++ b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java @@ -46,7 +46,7 @@ package com.teragrep.pth_06.planner.walker.conditions; import com.teragrep.pth_06.config.ConditionConfig; -import com.teragrep.pth_06.planner.*; +import com.teragrep.pth_06.planner.bloomfilter.*; import org.jooq.Condition; import org.jooq.Table; import org.jooq.impl.DSL; @@ -78,7 +78,7 @@ public IndexStatementCondition(String value, ConditionConfig config, Condition c public Condition condition() { if (!config.bloomEnabled()) { - LOGGER.debug("Indexstatement reached with bloom disabled"); + LOGGER.warn("Indexstatement reached with bloom disabled"); return condition; } Condition newCondition = condition; diff --git a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchCondition.java b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchCondition.java index f4ad5808..ce023b53 100644 --- a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchCondition.java +++ b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchCondition.java @@ -45,48 +45,42 @@ */ package com.teragrep.pth_06.planner.walker.conditions; -import com.teragrep.blf_01.Token; -import com.teragrep.pth_06.planner.TokenizedValue; import org.jooq.*; import org.jooq.impl.DSL; +import java.util.Objects; + import static com.teragrep.pth_06.jooq.generated.bloomdb.Bloomdb.BLOOMDB; -/** - * Combined regex match condition - *

- * true if any of the tokens regex match against bloomdb.filtertype.pattern - */ +/** true if BLOOMDB.FILTERTYPE.PATTERN regex like with input value */ public final class PatternMatchCondition implements QueryCondition { - private final TokenizedValue value; + private final Field valueField; public PatternMatchCondition(String input) { - this(new TokenizedValue(input)); + this(DSL.val(input)); } - public PatternMatchCondition(TokenizedValue value) { - this.value = value; + public PatternMatchCondition(Field valueField) { + this.valueField = valueField; } public Condition condition() { - Condition patternCondition = DSL.noCondition(); - for (Token token : value.tokens()) { - Field tokenStringField = DSL.val(token.toString()); - patternCondition = patternCondition.or(tokenStringField.likeRegex(BLOOMDB.FILTERTYPE.PATTERN)); - } - return patternCondition; + return valueField.likeRegex(BLOOMDB.FILTERTYPE.PATTERN); } @Override public boolean equals(final Object object) { if (this == object) return true; - if (object == null) - return false; - if (object.getClass() != this.getClass()) + if (object == null || object.getClass() != this.getClass()) return false; final PatternMatchCondition cast = (PatternMatchCondition) object; - return this.value.equals(cast.value); + return valueField.equals(cast.valueField); + } + + @Override + public int hashCode() { + return Objects.hash(valueField); } } diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecordTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecordTest.java new file mode 100644 index 00000000..3e983795 --- /dev/null +++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecordTest.java @@ -0,0 +1,162 @@ +/* + * Teragrep Archive Datasource (pth_06) + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.pth_06.planner.bloomfilter; + +import nl.jqno.equalsverifier.EqualsVerifier; +import org.apache.spark.util.sketch.BloomFilter; +import org.jooq.DSLContext; +import org.jooq.Field; +import org.jooq.Record; +import org.jooq.Table; +import org.jooq.impl.DSL; +import org.jooq.types.ULong; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; + +import java.io.ByteArrayInputStream; +import java.sql.Connection; +import java.sql.DriverManager; + +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +public class BloomFilterFromRecordTest { + + final String url = "jdbc:h2:mem:test;MODE=MariaDB;DATABASE_TO_LOWER=TRUE;CASE_INSENSITIVE_IDENTIFIERS=TRUE"; + final String userName = "sa"; + final String password = ""; + final Connection conn = Assertions.assertDoesNotThrow(() -> DriverManager.getConnection(url, userName, password)); + + @Test + public void testInstantation() { + Record dynamicRecord = generateRecord(true); + Table target = DSL.table(DSL.name("target")); + String searchTerm = "Pattern"; + BloomFilterFromRecord filter = new BloomFilterFromRecord(dynamicRecord, target, searchTerm); + Assertions.assertDoesNotThrow(filter::bytes); + } + + @Test + public void testCorrectFilterSize() { + Record dynamicRecord = generateRecord(true); + Table target = DSL.table(DSL.name("target")); + String searchTerm = "SearchValuePatternInThisString"; + BloomFilterFromRecord filter = new BloomFilterFromRecord(dynamicRecord, target, searchTerm); + byte[] bytes = Assertions.assertDoesNotThrow(filter::bytes); + BloomFilter resultFilter = Assertions + .assertDoesNotThrow(() -> BloomFilter.readFrom(new ByteArrayInputStream(bytes))); + BloomFilter expectedSize = BloomFilter.create(100, 0.01); + Assertions.assertEquals(expectedSize.bitSize(), resultFilter.bitSize()); + } + + @Test + public void testNoRegexExtractedTokensException() { + Record dynamicRecord = generateRecord(true); + Table target = DSL.table(DSL.name("target")); + String searchTerm = "NoMatch"; + BloomFilterFromRecord filter = new BloomFilterFromRecord(dynamicRecord, target, searchTerm); + RuntimeException e = Assertions.assertThrows(RuntimeException.class, filter::bytes); + String expectedMessage = "Trying to insert empty filter, pattern match joined table should always have tokens"; + Assertions.assertEquals(expectedMessage, e.getMessage()); + } + + @Test + public void testRegexExtractedTokens() { + Record dynamicRecord = generateRecord(true); + Table target = DSL.table(DSL.name("target")); + String searchTerm = "SearchValuePatternInThisString"; + BloomFilterFromRecord filter = new BloomFilterFromRecord(dynamicRecord, target, searchTerm); + byte[] bytes = Assertions.assertDoesNotThrow(filter::bytes); + BloomFilter resultFilter = Assertions + .assertDoesNotThrow(() -> BloomFilter.readFrom(new ByteArrayInputStream(bytes))); + Assertions.assertTrue(resultFilter.mightContain("Pattern")); + } + + @Test + public void testTokenizerTokens() { + Record dynamicRecord = generateRecord(false); + Table target = DSL.table(DSL.name("target")); + String searchTerm = "SearchValuePatternInThisString.Without.Delimiter"; + BloomFilterFromRecord filter = new BloomFilterFromRecord(dynamicRecord, target, searchTerm); + byte[] bytes = Assertions.assertDoesNotThrow(filter::bytes); + BloomFilter resultFilter = Assertions + .assertDoesNotThrow(() -> BloomFilter.readFrom(new ByteArrayInputStream(bytes))); + Assertions.assertFalse(resultFilter.mightContain("Pattern")); + Assertions.assertTrue(resultFilter.mightContain("Without")); + Assertions.assertTrue(resultFilter.mightContain("SearchValuePatternInThisString")); + } + + @Test + public void equalsHashCodeContractTest() { + EqualsVerifier + .forClass(BloomFilterFromRecord.class) + .withNonnullFields("expected") + .withNonnullFields("fpp") + .withNonnullFields("searchTerm") + .withIgnoredFields("LOGGER") + .verify(); + } + + private Record generateRecord(final boolean withPattern) { + DSLContext ctx = DSL.using(conn); + Field idField = DSL.field(DSL.name("id"), ULong.class); + Field expectedField = DSL.field(DSL.name("expectedElements"), ULong.class); + Field fppField = DSL.field(DSL.name("targetFpp"), Double.class); + Field patternField = DSL.field(DSL.name("pattern"), String.class); + + Record dynamicRecord = ctx.newRecord(idField, expectedField, fppField, patternField); + if (withPattern) { + dynamicRecord.set(patternField, "Pattern"); + } + else { + // case is joined filtertype table has no pattern + dynamicRecord.set(patternField, null); + } + dynamicRecord.set(idField, ULong.valueOf(1)); + dynamicRecord.set(expectedField, ULong.valueOf(100)); + dynamicRecord.set(fppField, 0.01); + return dynamicRecord; + } +} diff --git a/src/test/java/com/teragrep/pth_06/planner/CategoryTableImplTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImplTest.java similarity index 99% rename from src/test/java/com/teragrep/pth_06/planner/CategoryTableImplTest.java rename to src/test/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImplTest.java index 60469640..e0595578 100644 --- a/src/test/java/com/teragrep/pth_06/planner/CategoryTableImplTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImplTest.java @@ -43,7 +43,7 @@ * Teragrep, the applicable Commercial License may apply to this file if you as * a licensee so wish it. */ -package com.teragrep.pth_06.planner; +package com.teragrep.pth_06.planner.bloomfilter; import org.apache.spark.util.sketch.BloomFilter; import org.jooq.Condition; diff --git a/src/test/java/com/teragrep/pth_06/planner/PatternMatchTablesTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTablesTest.java similarity index 87% rename from src/test/java/com/teragrep/pth_06/planner/PatternMatchTablesTest.java rename to src/test/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTablesTest.java index 36aaa431..2ff9924f 100644 --- a/src/test/java/com/teragrep/pth_06/planner/PatternMatchTablesTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTablesTest.java @@ -43,7 +43,7 @@ * Teragrep, the applicable Commercial License may apply to this file if you as * a licensee so wish it. */ -package com.teragrep.pth_06.planner; +package com.teragrep.pth_06.planner.bloomfilter; import org.apache.spark.util.sketch.BloomFilter; import org.jooq.DSLContext; @@ -71,7 +71,8 @@ public class PatternMatchTablesTest { final String ipRegex = "(\\b25[0-5]|\\b2[0-4][0-9]|\\b[01]?[0-9][0-9]?)(\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}"; // matches IPv4 starting with 255. final String ipStartingWith255 = "(\\b25[0-5]?)(\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}"; - final List patternList = new ArrayList<>(Arrays.asList(ipRegex, ipStartingWith255)); + final String parenthesesPattern = "\\((.*?)\\)"; + final List patternList = new ArrayList<>(Arrays.asList(ipRegex, ipStartingWith255, parenthesesPattern)); final Connection conn = Assertions.assertDoesNotThrow(() -> DriverManager.getConnection(url, userName, password)); @BeforeAll @@ -82,6 +83,7 @@ void setup() { conn.prepareStatement("DROP TABLE IF EXISTS filtertype").execute(); conn.prepareStatement("DROP TABLE IF EXISTS pattern_test_ip").execute(); conn.prepareStatement("DROP TABLE IF EXISTS pattern_test_ip255").execute(); + conn.prepareStatement("DROP TABLE IF EXISTS parentheses_test").execute(); String filtertype = "CREATE TABLE`filtertype`" + "(" + " `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT PRIMARY KEY," + " `expectedElements` bigint(20) unsigned NOT NULL," @@ -98,9 +100,15 @@ void setup() { + " `partition_id` bigint(20) unsigned NOT NULL UNIQUE," + " `filter_type_id` bigint(20) unsigned NOT NULL," + " `filter` longblob NOT NULL)"; + String parentheses = "CREATE TABLE `parentheses_test`(" + + " `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT PRIMARY KEY," + + " `partition_id` bigint(20) unsigned NOT NULL UNIQUE," + + " `filter_type_id` bigint(20) unsigned NOT NULL," + + " `filter` longblob NOT NULL)"; conn.prepareStatement(filtertype).execute(); conn.prepareStatement(ip).execute(); conn.prepareStatement(ip255).execute(); + conn.prepareStatement(parentheses).execute(); String typeSQL = "INSERT INTO `filtertype` (`id`,`expectedElements`, `targetFpp`, `pattern`) VALUES (?,?,?,?)"; int id = 1; for (String pattern : patternList) { @@ -114,6 +122,7 @@ void setup() { } writeFilter("pattern_test_ip", 1); writeFilter("pattern_test_ip255", 2); + writeFilter("parentheses_test", 3); }); } @@ -145,6 +154,16 @@ public void testSearchTermTokenizedMatch() { Assertions.assertEquals("pattern_test_ip", result.get(0).getName()); } + @Test + public void testRegexMatch() { + DSLContext ctx = DSL.using(conn); + String input = "biz baz boz data has no content today (very important though) but it would still have if one had a means to extract it from (here is something else important as well) the strange patterns called parentheses that it seems to have been put in."; + PatternMatchTables patternMatchTables = new PatternMatchTables(ctx, input); + List> result = patternMatchTables.toList(); + Assertions.assertEquals(1, result.size()); + Assertions.assertEquals("parentheses_test", result.get(0).getName()); + } + @Test public void testMultipleMatch() { DSLContext ctx = DSL.using(conn); diff --git a/src/test/java/com/teragrep/pth_06/planner/TableFilterTypesFromMetadataResultTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadataResultTest.java similarity index 99% rename from src/test/java/com/teragrep/pth_06/planner/TableFilterTypesFromMetadataResultTest.java rename to src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadataResultTest.java index 50516717..b2a99963 100644 --- a/src/test/java/com/teragrep/pth_06/planner/TableFilterTypesFromMetadataResultTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadataResultTest.java @@ -43,7 +43,7 @@ * Teragrep, the applicable Commercial License may apply to this file if you as * a licensee so wish it. */ -package com.teragrep.pth_06.planner; +package com.teragrep.pth_06.planner.bloomfilter; import org.apache.spark.util.sketch.BloomFilter; import org.jooq.DSLContext; diff --git a/src/test/java/com/teragrep/pth_06/planner/TableFiltersTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java similarity index 78% rename from src/test/java/com/teragrep/pth_06/planner/TableFiltersTest.java rename to src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java index 5ab27f18..7e7fbecc 100644 --- a/src/test/java/com/teragrep/pth_06/planner/TableFiltersTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java @@ -43,8 +43,9 @@ * Teragrep, the applicable Commercial License may apply to this file if you as * a licensee so wish it. */ -package com.teragrep.pth_06.planner; +package com.teragrep.pth_06.planner.bloomfilter; +import nl.jqno.equalsverifier.EqualsVerifier; import org.apache.spark.util.sketch.BloomFilter; import org.jooq.DSLContext; import org.jooq.Table; @@ -69,8 +70,8 @@ class TableFiltersTest { // matches IPv4 final String ipRegex = "(\\b25[0-5]|\\b2[0-4][0-9]|\\b[01]?[0-9][0-9]?)(\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}"; // matches IPv4 starting with 255. - final String ipStartingWith255 = "(\\b25[0-5]?)(\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}"; - final List patternList = new ArrayList<>(Arrays.asList(ipRegex, ipStartingWith255)); + final String parenthesesPattern = "\\((.*?)\\)"; + final List patternList = new ArrayList<>(Arrays.asList(ipRegex, parenthesesPattern)); final Connection conn = Assertions.assertDoesNotThrow(() -> DriverManager.getConnection(url, userName, password)); @BeforeAll @@ -125,7 +126,7 @@ void tearDown() { @Test public void testCreation() { - fillTargetTable(); + fillTargetTable(1); DSLContext ctx = DSL.using(conn); Table table = ctx .meta() @@ -138,7 +139,7 @@ public void testCreation() { @Test public void testInsertFiltersIntoCategoryTable() { - fillTargetTable(); + fillTargetTable(1); DSLContext ctx = DSL.using(conn); Table table = ctx .meta() @@ -151,9 +152,25 @@ public void testInsertFiltersIntoCategoryTable() { Assertions.assertTrue(exception.getMessage().contains("term_0_target\" (term_id, type_id, \"filter\")")); } + @Test + public void testInsertFiltersIntoCategoryTableRegexExtract() { + fillTargetTable(2); + DSLContext ctx = DSL.using(conn); + Table table = ctx + .meta() + .filterSchemas(s -> s.getName().equals("bloomdb")) + .filterTables(t -> !t.getName().equals("filtertype")) + .getTables() + .get(0); + String query = "biz baz boz data has no content today (very important though) but it would still have if one had a means to extract it from (here is something else important as well) the strange patterns called parentheses that it seems to have been put in."; + DataAccessException exception = Assertions + .assertThrows(DataAccessException.class, () -> new TableFilters(ctx, table, 0L, query).insertFiltersIntoCategoryTable()); + Assertions.assertTrue(exception.getMessage().contains("term_0_target\" (term_id, type_id, \"filter\")")); + } + @Test public void testInsertFiltersWithoutPatternMatch() { - fillTargetTable(); + fillTargetTable(1); DSLContext ctx = DSL.using(conn); Table table = ctx .meta() @@ -168,7 +185,7 @@ public void testInsertFiltersWithoutPatternMatch() { @Test public void testEquals() { - fillTargetTable(); + fillTargetTable(1); DSLContext ctx = DSL.using(conn); Table table = ctx .meta() @@ -184,7 +201,7 @@ public void testEquals() { @Test public void testNotEquals() { - fillTargetTable(); + fillTargetTable(1); DSLContext ctx = DSL.using(conn); Table table = ctx .meta() @@ -200,7 +217,35 @@ public void testNotEquals() { Assertions.assertNotEquals(filter1, filter3); } - void fillTargetTable() { + @Test + public void testHashCode() { + fillTargetTable(1); + DSLContext ctx = DSL.using(conn); + Table table = ctx + .meta() + .filterSchemas(s -> s.getName().equals("bloomdb")) + .filterTables(t -> !t.getName().equals("filtertype")) + .getTables() + .get(0); + TableFilters filter1 = new TableFilters(ctx, table, 0L, "test"); + TableFilters filter2 = new TableFilters(ctx, table, 0L, "test"); + TableFilters notEq1 = new TableFilters(ctx, table, 0L, "notTest"); + TableFilters notEq2 = new TableFilters(ctx, table, 1L, "test"); + Assertions.assertEquals(filter1.hashCode(), filter2.hashCode()); + Assertions.assertNotEquals(filter1.hashCode(), notEq1.hashCode()); + Assertions.assertNotEquals(filter1.hashCode(), notEq2.hashCode()); + } + + @Test + public void equalsHashCodeContractTest() { + EqualsVerifier + .forClass(TableFilters.class) + .withNonnullFields("recordsInMetadata") + .withNonnullFields("recordConsumer") + .verify(); + } + + void fillTargetTable(int id) { Assertions.assertDoesNotThrow(() -> { conn.prepareStatement("CREATE SCHEMA IF NOT EXISTS BLOOMDB").execute(); conn.prepareStatement("USE BLOOMDB").execute(); @@ -214,7 +259,7 @@ void fillTargetTable() { filterBAOS.close(); }); stmt.setInt(1, 1); - stmt.setInt(2, 1); + stmt.setInt(2, id); // filter type id stmt.setBytes(3, filterBAOS.toByteArray()); stmt.executeUpdate(); }); diff --git a/src/test/java/com/teragrep/pth_06/planner/TokenizedValueTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValueTest.java similarity index 83% rename from src/test/java/com/teragrep/pth_06/planner/TokenizedValueTest.java rename to src/test/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValueTest.java index 69c2ee2d..fcd87918 100644 --- a/src/test/java/com/teragrep/pth_06/planner/TokenizedValueTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValueTest.java @@ -43,9 +43,11 @@ * Teragrep, the applicable Commercial License may apply to this file if you as * a licensee so wish it. */ -package com.teragrep.pth_06.planner; +package com.teragrep.pth_06.planner.bloomfilter; import com.teragrep.blf_01.Token; +import com.teragrep.pth_06.planner.TokenizedValue; +import nl.jqno.equalsverifier.EqualsVerifier; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -58,7 +60,6 @@ class TokenizedValueTest { void testTokenization() { TokenizedValue result = new TokenizedValue("test.nest"); Set tokens = result.tokens().stream().map(Token::toString).collect(Collectors.toSet()); - Assertions.assertEquals("test.nest", result.value); Assertions.assertTrue(tokens.contains("nest")); Assertions.assertTrue(tokens.contains("test")); Assertions.assertTrue(tokens.contains(".")); @@ -86,4 +87,18 @@ void testNotEquals() { Assertions.assertNotEquals(value2, value1); Assertions.assertNotEquals(value1, null); } + + @Test + void testHashCode() { + TokenizedValue value1 = new TokenizedValue("test"); + TokenizedValue value2 = new TokenizedValue("test"); + TokenizedValue notEq = new TokenizedValue("nest"); + Assertions.assertEquals(value1.hashCode(), value2.hashCode()); + Assertions.assertNotEquals(value1.hashCode(), notEq.hashCode()); + } + + @Test + public void equalsHashCodeContractTest() { + EqualsVerifier.forClass(TokenizedValue.class).withNonnullFields("value").withNonnullFields("tokenSet").verify(); + } } diff --git a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchConditionTest.java b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchConditionTest.java index fd486989..769d9e67 100644 --- a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchConditionTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchConditionTest.java @@ -45,6 +45,7 @@ */ package com.teragrep.pth_06.planner.walker.conditions; +import nl.jqno.equalsverifier.EqualsVerifier; import org.jooq.Condition; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -64,18 +65,6 @@ void testSingleToken() { Assertions.assertEquals(e, condition.toString()); } - @Test - void testMultipleTokens() { - Condition condition = new PatternMatchCondition("test.nest").condition(); - String e = "(\n" + " ('test.' like_regex \"bloomdb\".\"filtertype\".\"pattern\")\n" - + " or ('.nest' like_regex \"bloomdb\".\"filtertype\".\"pattern\")\n" - + " or ('test.nest' like_regex \"bloomdb\".\"filtertype\".\"pattern\")\n" - + " or ('nest' like_regex \"bloomdb\".\"filtertype\".\"pattern\")\n" - + " or ('.' like_regex \"bloomdb\".\"filtertype\".\"pattern\")\n" - + " or ('test' like_regex \"bloomdb\".\"filtertype\".\"pattern\")\n" + ")"; - Assertions.assertEquals(e, condition.toString()); - } - @Test void testEquality() { PatternMatchCondition cond1 = new PatternMatchCondition("test"); @@ -89,4 +78,18 @@ void testNotEquals() { PatternMatchCondition cond2 = new PatternMatchCondition("next"); Assertions.assertNotEquals(cond1, cond2); } + + @Test + void testHashCode() { + PatternMatchCondition cond1 = new PatternMatchCondition("test"); + PatternMatchCondition cond2 = new PatternMatchCondition("test"); + PatternMatchCondition notEq = new PatternMatchCondition("next"); + Assertions.assertEquals(cond1.hashCode(), cond2.hashCode()); + Assertions.assertNotEquals(cond1.hashCode(), notEq.hashCode()); + } + + @Test + public void equalsHashCodeContractTest() { + EqualsVerifier.forClass(PatternMatchCondition.class).withNonnullFields("valueField").verify(); + } } From 465cf83666f5b62a2d2301c0da8448dfc75c4f6b Mon Sep 17 00:00:00 2001 From: elliVM <47@teragrep.com> Date: Tue, 22 Oct 2024 08:34:16 +0300 Subject: [PATCH 02/26] add RegexExtractedValueTest --- .../bloomfilter/BloomFilterFromRecord.java | 2 - .../RegexExtractedValue.java | 2 +- .../{ => bloomfilter}/TokenizedValue.java | 2 +- .../bloomfilter/RegexExtractedValueTest.java | 79 +++++++++++++++++++ .../bloomfilter/TokenizedValueTest.java | 1 - 5 files changed, 81 insertions(+), 5 deletions(-) rename src/main/java/com/teragrep/pth_06/planner/{ => bloomfilter}/RegexExtractedValue.java (98%) rename src/main/java/com/teragrep/pth_06/planner/{ => bloomfilter}/TokenizedValue.java (98%) create mode 100644 src/test/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValueTest.java diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java index 27758266..2c74c65b 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java @@ -45,8 +45,6 @@ */ package com.teragrep.pth_06.planner.bloomfilter; -import com.teragrep.pth_06.planner.RegexExtractedValue; -import com.teragrep.pth_06.planner.TokenizedValue; import org.apache.spark.util.sketch.BloomFilter; import org.jooq.Record; import org.jooq.Table; diff --git a/src/main/java/com/teragrep/pth_06/planner/RegexExtractedValue.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValue.java similarity index 98% rename from src/main/java/com/teragrep/pth_06/planner/RegexExtractedValue.java rename to src/main/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValue.java index 56a72978..a33d1d93 100644 --- a/src/main/java/com/teragrep/pth_06/planner/RegexExtractedValue.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValue.java @@ -43,7 +43,7 @@ * Teragrep, the applicable Commercial License may apply to this file if you as * a licensee so wish it. */ -package com.teragrep.pth_06.planner; +package com.teragrep.pth_06.planner.bloomfilter; import java.util.HashSet; import java.util.Objects; diff --git a/src/main/java/com/teragrep/pth_06/planner/TokenizedValue.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValue.java similarity index 98% rename from src/main/java/com/teragrep/pth_06/planner/TokenizedValue.java rename to src/main/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValue.java index c9740922..fab3fcf4 100644 --- a/src/main/java/com/teragrep/pth_06/planner/TokenizedValue.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValue.java @@ -43,7 +43,7 @@ * Teragrep, the applicable Commercial License may apply to this file if you as * a licensee so wish it. */ -package com.teragrep.pth_06.planner; +package com.teragrep.pth_06.planner.bloomfilter; import com.teragrep.blf_01.Token; import com.teragrep.blf_01.Tokenizer; diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValueTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValueTest.java new file mode 100644 index 00000000..465f1405 --- /dev/null +++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValueTest.java @@ -0,0 +1,79 @@ +/* + * Teragrep Archive Datasource (pth_06) + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.pth_06.planner.bloomfilter; + +import nl.jqno.equalsverifier.EqualsVerifier; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.Set; + +public class RegexExtractedValueTest { + + @Test + public void testRegexExtraction() { + String regex = "\\((.*?)\\)"; + String value = "find all (important) values inside (very important) parentheses."; + RegexExtractedValue regexValue = new RegexExtractedValue(value, regex); + Set tokens = regexValue.tokens(); + Assertions.assertEquals(2, tokens.size()); + Assertions.assertTrue(tokens.contains("(important)") && tokens.contains("(very important)")); + } + + @Test + public void testPartialRegexMatch() { + String regex = "\\w{3}-\\w{3}-\\w{3}"; + String value = "testValue=abc-abc"; + RegexExtractedValue regexValue = new RegexExtractedValue(value, regex); + Set tokens = regexValue.tokens(); + System.out.println(tokens); + } + + @Test + public void testEqualsHashCodeContract() { + EqualsVerifier.forClass(RegexExtractedValue.class).withNonnullFields("matcher").verify(); + } +} diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValueTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValueTest.java index fcd87918..c5340db6 100644 --- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValueTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValueTest.java @@ -46,7 +46,6 @@ package com.teragrep.pth_06.planner.bloomfilter; import com.teragrep.blf_01.Token; -import com.teragrep.pth_06.planner.TokenizedValue; import nl.jqno.equalsverifier.EqualsVerifier; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; From 7d4559d5716ace828bf09518e2b57bc84ee0b896 Mon Sep 17 00:00:00 2001 From: elliVM <47@teragrep.com> Date: Tue, 22 Oct 2024 08:35:13 +0300 Subject: [PATCH 03/26] remove unnecessary test --- .../planner/bloomfilter/RegexExtractedValueTest.java | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValueTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValueTest.java index 465f1405..bd2b950d 100644 --- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValueTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValueTest.java @@ -63,15 +63,6 @@ public void testRegexExtraction() { Assertions.assertTrue(tokens.contains("(important)") && tokens.contains("(very important)")); } - @Test - public void testPartialRegexMatch() { - String regex = "\\w{3}-\\w{3}-\\w{3}"; - String value = "testValue=abc-abc"; - RegexExtractedValue regexValue = new RegexExtractedValue(value, regex); - Set tokens = regexValue.tokens(); - System.out.println(tokens); - } - @Test public void testEqualsHashCodeContract() { EqualsVerifier.forClass(RegexExtractedValue.class).withNonnullFields("matcher").verify(); From 945e83977337d450ffdccc501375610eed59f407 Mon Sep 17 00:00:00 2001 From: elliVM <47@teragrep.com> Date: Tue, 22 Oct 2024 12:26:42 +0300 Subject: [PATCH 04/26] TokenizedValue: call tokenizer only when needed, clean up tests --- .../bloomfilter/BloomFilterFromRecord.java | 2 +- .../planner/bloomfilter/TokenizedValue.java | 25 ++++++++----------- .../BloomFilterFromRecordTest.java | 9 ------- .../bloomfilter/RegexExtractedValueTest.java | 3 ++- .../bloomfilter/TokenizedValueTest.java | 5 +--- 5 files changed, 14 insertions(+), 30 deletions(-) diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java index 2c74c65b..eabdc4ef 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java @@ -82,7 +82,7 @@ private BloomFilter create() { throw new RuntimeException("Object field was null"); } final BloomFilter filter = BloomFilter.create(expected, fpp); - // if no pattern use to tokenized value (currently BLOOMDB.FILTERTYPE.PATTERN is NOT NULL) + // if no pattern use tokenized value (currently BLOOMDB.FILTERTYPE.PATTERN is NOT NULL) if (pattern == null) { LOGGER.info("Table pattern was null using tokenizer to generate tokens"); new TokenizedValue(searchTerm).stringTokens().forEach(filter::put); diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValue.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValue.java index fab3fcf4..5263342e 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValue.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValue.java @@ -50,7 +50,7 @@ import java.io.ByteArrayInputStream; import java.nio.charset.StandardCharsets; -import java.util.HashSet; +import java.util.List; import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; @@ -58,26 +58,21 @@ public final class TokenizedValue { private final String value; - private final Set tokenSet; public TokenizedValue(String value) { - this( - value, - new HashSet<>(new Tokenizer(32).tokenize(new ByteArrayInputStream(value.getBytes(StandardCharsets.UTF_8)))) - ); - } - - public TokenizedValue(String value, Set tokenSet) { this.value = value; - this.tokenSet = tokenSet; } - public Set tokens() { - return tokenSet; + public List tokens() { + return new Tokenizer(32).tokenize(new ByteArrayInputStream(value.getBytes(StandardCharsets.UTF_8))); } public Set stringTokens() { - return tokenSet.stream().map(Token::toString).collect(Collectors.toSet()); + return new Tokenizer(32) + .tokenize(new ByteArrayInputStream(value.getBytes(StandardCharsets.UTF_8))) + .stream() + .map(Token::toString) + .collect(Collectors.toSet()); } @Override @@ -87,11 +82,11 @@ public boolean equals(final Object object) { if (object == null || object.getClass() != this.getClass()) return false; final TokenizedValue cast = (TokenizedValue) object; - return value.equals(cast.value) && tokenSet.equals(cast.tokenSet); + return value.equals(cast.value); } @Override public int hashCode() { - return Objects.hash(value, tokenSet); + return Objects.hash(value); } } diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecordTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecordTest.java index 3e983795..9fcb7a37 100644 --- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecordTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecordTest.java @@ -69,15 +69,6 @@ public class BloomFilterFromRecordTest { final String password = ""; final Connection conn = Assertions.assertDoesNotThrow(() -> DriverManager.getConnection(url, userName, password)); - @Test - public void testInstantation() { - Record dynamicRecord = generateRecord(true); - Table target = DSL.table(DSL.name("target")); - String searchTerm = "Pattern"; - BloomFilterFromRecord filter = new BloomFilterFromRecord(dynamicRecord, target, searchTerm); - Assertions.assertDoesNotThrow(filter::bytes); - } - @Test public void testCorrectFilterSize() { Record dynamicRecord = generateRecord(true); diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValueTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValueTest.java index bd2b950d..c3163f91 100644 --- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValueTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValueTest.java @@ -60,7 +60,8 @@ public void testRegexExtraction() { RegexExtractedValue regexValue = new RegexExtractedValue(value, regex); Set tokens = regexValue.tokens(); Assertions.assertEquals(2, tokens.size()); - Assertions.assertTrue(tokens.contains("(important)") && tokens.contains("(very important)")); + Assertions.assertTrue(tokens.contains("(important)")); + Assertions.assertTrue(tokens.contains("(very important)")); } @Test diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValueTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValueTest.java index c5340db6..ba53fc9a 100644 --- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValueTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValueTest.java @@ -73,7 +73,6 @@ void testEquality() { TokenizedValue value1 = new TokenizedValue("test"); TokenizedValue value2 = new TokenizedValue("test"); Assertions.assertEquals(value1, value2); - Assertions.assertEquals(value2, value1); value1.tokens(); Assertions.assertEquals(value2, value1); } @@ -83,8 +82,6 @@ void testNotEquals() { TokenizedValue value1 = new TokenizedValue("test"); TokenizedValue value2 = new TokenizedValue("nest"); Assertions.assertNotEquals(value1, value2); - Assertions.assertNotEquals(value2, value1); - Assertions.assertNotEquals(value1, null); } @Test @@ -98,6 +95,6 @@ void testHashCode() { @Test public void equalsHashCodeContractTest() { - EqualsVerifier.forClass(TokenizedValue.class).withNonnullFields("value").withNonnullFields("tokenSet").verify(); + EqualsVerifier.forClass(TokenizedValue.class).withNonnullFields("value").verify(); } } From 2a8d97c5c4b76e726fe301ded6ae890df24e320e Mon Sep 17 00:00:00 2001 From: elliVM <47@teragrep.com> Date: Tue, 22 Oct 2024 12:32:01 +0300 Subject: [PATCH 05/26] clear up exception message in BloomFilterFromRecord --- .../planner/bloomfilter/BloomFilterFromRecord.java | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java index eabdc4ef..12d7aba2 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java @@ -73,13 +73,12 @@ public final class BloomFilterFromRecord { private final String searchTerm; private BloomFilter create() { - if (expected == null || fpp == null) { - LOGGER - .error( - "Null field while creating bloom filter expected <{}>, fpp <{}>, pattern <{}>, search term <{}>", - expected, fpp, pattern, searchTerm - ); - throw new RuntimeException("Object field was null"); + LOGGER.debug("Create filter from Record with values: expected <{}>, fpp <{}>, pattern: <{}>", expected, fpp, pattern); + if (expected == null) { + throw new RuntimeException("Record did not contain table field value "); + } + if(fpp == null) { + throw new RuntimeException("Record did not contain table field value "); } final BloomFilter filter = BloomFilter.create(expected, fpp); // if no pattern use tokenized value (currently BLOOMDB.FILTERTYPE.PATTERN is NOT NULL) From b3ac3489ef677b7dba26018b22feb08134eb908c Mon Sep 17 00:00:00 2001 From: elliVM <47@teragrep.com> Date: Tue, 22 Oct 2024 12:40:01 +0300 Subject: [PATCH 06/26] BloomFilterFromRecord: remove ULong.longValue() from constructor, clarify exception messages. Add tests for exceptions. --- .../bloomfilter/BloomFilterFromRecord.java | 20 ++++++++------ .../BloomFilterFromRecordTest.java | 26 +++++++++++++++++++ 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java index 12d7aba2..1be012ac 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java @@ -67,20 +67,24 @@ public final class BloomFilterFromRecord { private final Logger LOGGER = LoggerFactory.getLogger(BloomFilterFromRecord.class); - private final Long expected; + private final ULong expected; private final Double fpp; private final String pattern; private final String searchTerm; private BloomFilter create() { - LOGGER.debug("Create filter from Record with values: expected <{}>, fpp <{}>, pattern: <{}>", expected, fpp, pattern); + LOGGER + .debug( + "Create filter from Record with values: expected <{}>, fpp <{}>, pattern: <{}>", expected, fpp, + pattern + ); if (expected == null) { - throw new RuntimeException("Record did not contain table field value "); + throw new IllegalArgumentException("Record did not contain table field value "); } - if(fpp == null) { - throw new RuntimeException("Record did not contain table field value "); + if (fpp == null) { + throw new IllegalArgumentException("Record did not contain table field value "); } - final BloomFilter filter = BloomFilter.create(expected, fpp); + final BloomFilter filter = BloomFilter.create(expected.longValue(), fpp); // if no pattern use tokenized value (currently BLOOMDB.FILTERTYPE.PATTERN is NOT NULL) if (pattern == null) { LOGGER.info("Table pattern was null using tokenizer to generate tokens"); @@ -101,14 +105,14 @@ private BloomFilter create() { public BloomFilterFromRecord(Record record, Table table, String searchTerm) { this( - record.getValue(DSL.field(DSL.name(table.getName(), "expectedElements"), ULong.class)).longValue(), + record.getValue(DSL.field(DSL.name(table.getName(), "expectedElements"), ULong.class)), record.getValue(DSL.field(DSL.name(table.getName(), "targetFpp"), Double.class)), record.getValue(BLOOMDB.FILTERTYPE.PATTERN, String.class), searchTerm ); } - public BloomFilterFromRecord(Long expected, Double fpp, String pattern, String searchTerm) { + public BloomFilterFromRecord(ULong expected, Double fpp, String pattern, String searchTerm) { this.expected = expected; this.fpp = fpp; this.pattern = pattern; diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecordTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecordTest.java index 9fcb7a37..897c01a3 100644 --- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecordTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecordTest.java @@ -119,6 +119,32 @@ public void testTokenizerTokens() { Assertions.assertTrue(resultFilter.mightContain("SearchValuePatternInThisString")); } + @Test + public void testNullExpectedField() { + Record dynamicRecord = generateRecord(false); + Field expectedField = DSL.field(DSL.name("expectedElements"), ULong.class); + dynamicRecord.set(expectedField, null); + Table target = DSL.table(DSL.name("target")); + String searchTerm = "SearchValuePatternInThisString.Without.Delimiter"; + BloomFilterFromRecord filter = new BloomFilterFromRecord(dynamicRecord, target, searchTerm); + IllegalArgumentException e = Assertions.assertThrows(IllegalArgumentException.class, filter::bytes); + String expectedMessage = "Record did not contain table field value "; + Assertions.assertEquals(expectedMessage, e.getMessage()); + } + + @Test + public void testNullFppField() { + Record dynamicRecord = generateRecord(false); + Field fppField = DSL.field(DSL.name("targetFpp"), Double.class); + dynamicRecord.set(fppField, null); + Table target = DSL.table(DSL.name("target")); + String searchTerm = "SearchValuePatternInThisString.Without.Delimiter"; + BloomFilterFromRecord filter = new BloomFilterFromRecord(dynamicRecord, target, searchTerm); + IllegalArgumentException e = Assertions.assertThrows(IllegalArgumentException.class, filter::bytes); + String expectedMessage = "Record did not contain table field value "; + Assertions.assertEquals(expectedMessage, e.getMessage()); + } + @Test public void equalsHashCodeContractTest() { EqualsVerifier From 0d724065bb7fea1d3a31719d72d7d64e8f2eba76 Mon Sep 17 00:00:00 2001 From: elliVM <47@teragrep.com> Date: Tue, 22 Oct 2024 12:43:11 +0300 Subject: [PATCH 07/26] set logger level to debug when indexstatement is reached with bloom disabled --- .../planner/walker/conditions/IndexStatementCondition.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java index 8f691b62..7a2c47ce 100644 --- a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java +++ b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java @@ -78,7 +78,7 @@ public IndexStatementCondition(String value, ConditionConfig config, Condition c public Condition condition() { if (!config.bloomEnabled()) { - LOGGER.warn("Indexstatement reached with bloom disabled"); + LOGGER.debug("Indexstatement reached with bloom disabled"); return condition; } Condition newCondition = condition; From a68278977d95c93d048400b2cedcd7997b830df1 Mon Sep 17 00:00:00 2001 From: elliVM <47@teragrep.com> Date: Tue, 22 Oct 2024 15:12:35 +0300 Subject: [PATCH 08/26] remove consumer class and use for loop in TableFilters --- ...lterFromRecordToCategoryTableConsumer.java | 112 ------------------ .../planner/bloomfilter/TableFilters.java | 63 ++++++---- .../planner/bloomfilter/TableFiltersTest.java | 5 +- .../pth_06/walker/ConditionWalkerTest.java | 22 ++++ 4 files changed, 68 insertions(+), 134 deletions(-) delete mode 100644 src/main/java/com/teragrep/pth_06/planner/bloomfilter/FilterFromRecordToCategoryTableConsumer.java diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/FilterFromRecordToCategoryTableConsumer.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/FilterFromRecordToCategoryTableConsumer.java deleted file mode 100644 index f84e7aed..00000000 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/FilterFromRecordToCategoryTableConsumer.java +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Teragrep Archive Datasource (pth_06) - * Copyright (C) 2021-2024 Suomen Kanuuna Oy - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - * - * - * Additional permission under GNU Affero General Public License version 3 - * section 7 - * - * If you modify this Program, or any covered work, by linking or combining it - * with other code, such other code is not for that reason alone subject to any - * of the requirements of the GNU Affero GPL version 3 as long as this Program - * is the same Program as licensed from Suomen Kanuuna Oy without any additional - * modifications. - * - * Supplemented terms under GNU Affero General Public License version 3 - * section 7 - * - * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified - * versions must be marked as "Modified version of" The Program. - * - * Names of the licensors and authors may not be used for publicity purposes. - * - * No rights are granted for use of trade names, trademarks, or service marks - * which are in The Program if any. - * - * Licensee must indemnify licensors and authors for any liability that these - * contractual assumptions impose on licensors and authors. - * - * To the extent this program is licensed as part of the Commercial versions of - * Teragrep, the applicable Commercial License may apply to this file if you as - * a licensee so wish it. - */ -package com.teragrep.pth_06.planner.bloomfilter; - -import org.jooq.DSLContext; -import org.jooq.Field; -import org.jooq.Record; -import org.jooq.Table; -import org.jooq.impl.DSL; -import org.jooq.types.ULong; - -import java.util.Objects; -import java.util.function.Consumer; - -import static com.teragrep.pth_06.jooq.generated.bloomdb.Bloomdb.BLOOMDB; -import static org.jooq.impl.SQLDataType.BIGINTUNSIGNED; - -public final class FilterFromRecordToCategoryTableConsumer implements Consumer { - - private final DSLContext ctx; - private final Table table; - private final long bloomTermId; - private final String searchTerm; - - public FilterFromRecordToCategoryTableConsumer( - DSLContext ctx, - Table table, - long bloomTermId, - String searchTerm - ) { - this.ctx = ctx; - this.table = table; - this.bloomTermId = bloomTermId; - this.searchTerm = searchTerm; - } - - @Override - public void accept(final Record record) { - final Table categoryTable = DSL.table(DSL.name(("term_" + bloomTermId + "_" + this.table.getName()))); - final Field[] insertFields = { - DSL.field("term_id", BIGINTUNSIGNED.nullable(false)), - DSL.field("type_id", BIGINTUNSIGNED.nullable(false)), - DSL.field(DSL.name(categoryTable.getName(), "filter"), byte[].class) - }; - final BloomFilterFromRecord filterFromRecord = new BloomFilterFromRecord(record, table, searchTerm); - final Field[] valueFields = { - DSL.val(bloomTermId, ULong.class), - DSL.val(record.getValue(BLOOMDB.FILTERTYPE.ID), ULong.class), - DSL.val(filterFromRecord.bytes(), byte[].class) - }; - ctx.insertInto(categoryTable).columns(insertFields).values(valueFields).execute(); - } - - @Override - public boolean equals(final Object object) { - if (this == object) - return true; - if (object == null || this.getClass() != object.getClass()) - return false; - final FilterFromRecordToCategoryTableConsumer cast = (FilterFromRecordToCategoryTableConsumer) object; - return bloomTermId == cast.bloomTermId && ctx == cast.ctx && table.equals(cast.table) - && searchTerm.equals(cast.searchTerm); - } - - @Override - public int hashCode() { - return Objects.hash(ctx, table, bloomTermId, searchTerm); - } -} diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java index b5dc9207..65767a13 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java @@ -45,56 +45,77 @@ */ package com.teragrep.pth_06.planner.bloomfilter; -import org.jooq.DSLContext; -import org.jooq.Table; +import org.jooq.*; +import org.jooq.impl.DSL; +import org.jooq.types.ULong; import java.util.Objects; +import static com.teragrep.pth_06.jooq.generated.bloomdb.Bloomdb.BLOOMDB; +import static org.jooq.impl.SQLDataType.BIGINTUNSIGNED; + /** * Filter types of a table that can be inserted into the tables category table */ public final class TableFilters { private final TableRecords recordsInMetadata; - private final FilterFromRecordToCategoryTableConsumer recordConsumer; + private final DSLContext ctx; + private final Table table; + private final String searchTerm; + private final long bloomTermId; public TableFilters(DSLContext ctx, Table table, long bloomTermId, String searchTerm) { - this( - new TableFilterTypesFromMetadata(ctx, table, bloomTermId), - new FilterFromRecordToCategoryTableConsumer(ctx, table, bloomTermId, searchTerm) - ); + this(new TableFilterTypesFromMetadata(ctx, table, bloomTermId), ctx, table, bloomTermId, searchTerm); } public TableFilters( TableFilterTypesFromMetadata recordsInMetadata, - FilterFromRecordToCategoryTableConsumer recordConsumer + DSLContext ctx, + Table table, + long bloomTermId, + String searchTerm ) { this.recordsInMetadata = recordsInMetadata; - this.recordConsumer = recordConsumer; + this.ctx = ctx; + this.table = table; + this.bloomTermId = bloomTermId; + this.searchTerm = searchTerm; } public void insertFiltersIntoCategoryTable() { - recordsInMetadata.toResult().forEach(recordConsumer); + final Result result = recordsInMetadata.toResult(); + for (final Record record : result) { + final Table categoryTable = DSL + .table(DSL.name(("term_" + bloomTermId + "_" + this.table.getName()))); + final Field[] insertFields = { + DSL.field("term_id", BIGINTUNSIGNED.nullable(false)), + DSL.field("type_id", BIGINTUNSIGNED.nullable(false)), + DSL.field(DSL.name(categoryTable.getName(), "filter"), byte[].class) + }; + final BloomFilterFromRecord filterFromRecord = new BloomFilterFromRecord(record, table, searchTerm); + final Field[] valueFields = { + DSL.val(bloomTermId, ULong.class), + DSL.val(record.getValue(BLOOMDB.FILTERTYPE.ID), ULong.class), + DSL.val(filterFromRecord.bytes(), byte[].class) + }; + ctx.insertInto(categoryTable).columns(insertFields).values(valueFields).execute(); + } } - /** - * Expects DSLContext values to be the same instance - * - * @param object object compared - * @returs true if object is equal - */ @Override - public boolean equals(final Object object) { + public boolean equals(Object object) { if (this == object) return true; - if (object == null || object.getClass() != this.getClass()) + if (object == null || getClass() != object.getClass()) return false; - final TableFilters cast = (TableFilters) object; - return recordsInMetadata.equals(cast.recordsInMetadata) && recordConsumer.equals(cast.recordConsumer); + TableFilters cast = (TableFilters) object; + return bloomTermId == cast.bloomTermId && recordsInMetadata.equals(cast.recordsInMetadata) && ctx == cast.ctx + && table.equals(cast.table) && searchTerm.equals(cast.searchTerm); } @Override public int hashCode() { - return Objects.hash(recordsInMetadata, recordConsumer); + return Objects.hash(recordsInMetadata, ctx, table, searchTerm, bloomTermId); } } diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java index 7e7fbecc..15a723fd 100644 --- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java @@ -241,7 +241,10 @@ public void equalsHashCodeContractTest() { EqualsVerifier .forClass(TableFilters.class) .withNonnullFields("recordsInMetadata") - .withNonnullFields("recordConsumer") + .withNonnullFields("ctx") + .withNonnullFields("table") + .withNonnullFields("searchTerm") + .withNonnullFields("bloomTermId") .verify(); } diff --git a/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java b/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java index 60346b7e..9862d7f6 100644 --- a/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java +++ b/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java @@ -230,6 +230,28 @@ void twoTablePatternMatchTest() { .assertTrue(walker.patternMatchTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255"))); } + @Test + void testFullXML() { + ConditionWalker walker = new ConditionWalker(DSL.using(conn), true); + String q = ""; + String e = "(\n" + " \"getArchivedObjects_filter_table\".\"directory\" like 'haproxy'\n" + " and (\n" + + " (\n" + " bloommatch(\n" + " (\n" + + " select \"term_0_pattern_test_ip\".\"filter\"\n" + + " from \"term_0_pattern_test_ip\"\n" + " where (\n" + " term_id = 0\n" + + " and type_id = \"bloomdb\".\"pattern_test_ip\".\"filter_type_id\"\n" + " )\n" + + " ),\n" + " \"bloomdb\".\"pattern_test_ip\".\"filter\"\n" + " ) = true\n" + + " and \"bloomdb\".\"pattern_test_ip\".\"filter\" is not null\n" + " )\n" + " or (\n" + + " bloommatch(\n" + " (\n" + " select \"term_0_pattern_test_ip255\".\"filter\"\n" + + " from \"term_0_pattern_test_ip255\"\n" + " where (\n" + " term_id = 0\n" + + " and type_id = \"bloomdb\".\"pattern_test_ip255\".\"filter_type_id\"\n" + " )\n" + + " ),\n" + " \"bloomdb\".\"pattern_test_ip255\".\"filter\"\n" + " ) = true\n" + + " and \"bloomdb\".\"pattern_test_ip255\".\"filter\" is not null\n" + " )\n" + " or (\n" + + " \"bloomdb\".\"pattern_test_ip\".\"filter\" is null\n" + + " and \"bloomdb\".\"pattern_test_ip255\".\"filter\" is null\n" + " )\n" + " )\n" + ")"; + Condition cond = Assertions.assertDoesNotThrow(() -> walker.fromString(q, false)); + System.out.println(cond.toString()); + } + @Test void twoTablePatternMatchWithoutFiltersTest() { ConditionWalker walker = new ConditionWalker(DSL.using(conn), true, true); From b3546148015985745e96600880df678c81e3086a Mon Sep 17 00:00:00 2001 From: elliVM <47@teragrep.com> Date: Tue, 22 Oct 2024 15:28:17 +0300 Subject: [PATCH 09/26] use try with resources and add comments on equals methods about DSLContext equality --- .../pth_06/planner/bloomfilter/BloomFilterFromRecord.java | 6 ++---- .../planner/bloomfilter/TableFilterTypesFromMetadata.java | 6 ++++++ .../teragrep/pth_06/planner/bloomfilter/TableFilters.java | 6 ++++++ 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java index 1be012ac..f3bf84a1 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java @@ -121,15 +121,13 @@ public BloomFilterFromRecord(ULong expected, Double fpp, String pattern, String public byte[] bytes() { final BloomFilter filter = create(); - final ByteArrayOutputStream filterBAOS = new ByteArrayOutputStream(); - try { + try (final ByteArrayOutputStream filterBAOS = new ByteArrayOutputStream()) { filter.writeTo(filterBAOS); - filterBAOS.close(); + return filterBAOS.toByteArray(); } catch (IOException e) { throw new UncheckedIOException(new IOException("Error writing filter bytes: " + e.getMessage())); } - return filterBAOS.toByteArray(); } @Override diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadata.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadata.java index ffcdb4c7..2f8752f4 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadata.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadata.java @@ -96,6 +96,12 @@ public Result toResult() { return records; } + /** + * Equal only if all object parameters are same value and the instances of DSLContext are same + * + * @param object object compared against + * @return true if all object is same class, object fields are equal and DSLContext is same instance + */ @Override public boolean equals(final Object object) { if (this == object) diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java index 65767a13..db5f85dc 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java @@ -103,6 +103,12 @@ public void insertFiltersIntoCategoryTable() { } } + /** + * Equal only if all object parameters are same value and the instances of DSLContext are same + * + * @param object object compared against + * @return true if all object is same class, object fields are equal and DSLContext is same instance + */ @Override public boolean equals(Object object) { if (this == object) From 5f907c86fc1341b215e52f322b5f03cb95e7eb05 Mon Sep 17 00:00:00 2001 From: elliVM <47@teragrep.com> Date: Wed, 23 Oct 2024 12:09:22 +0300 Subject: [PATCH 10/26] add Tokenizable interface and decorators, rename BloomFilterFromRecord and make it unconfigurable, make matcher immutable --- .../bloomfilter/RegexExtractedValue.java | 35 ++++--- ...Record.java => SearchTermBloomFilter.java} | 80 ++++++---------- .../planner/bloomfilter/TableFilters.java | 18 +++- .../planner/bloomfilter/Tokenizable.java | 53 +++++++++++ .../planner/bloomfilter/TokenizedValue.java | 12 +-- .../planner/bloomfilter/TokensAsStrings.java | 65 +++++++++++++ .../bloomfilter/RegexExtractedValueTest.java | 10 +- ...st.java => SearchTermBloomFilterTest.java} | 94 ++++--------------- .../planner/bloomfilter/TableFiltersTest.java | 6 +- .../bloomfilter/TokensAsStringsTest.java | 63 +++++++++++++ .../conditions/PatternMatchConditionTest.java | 2 +- 11 files changed, 270 insertions(+), 168 deletions(-) rename src/main/java/com/teragrep/pth_06/planner/bloomfilter/{BloomFilterFromRecord.java => SearchTermBloomFilter.java} (52%) create mode 100644 src/main/java/com/teragrep/pth_06/planner/bloomfilter/Tokenizable.java create mode 100644 src/main/java/com/teragrep/pth_06/planner/bloomfilter/TokensAsStrings.java rename src/test/java/com/teragrep/pth_06/planner/bloomfilter/{BloomFilterFromRecordTest.java => SearchTermBloomFilterTest.java} (50%) create mode 100644 src/test/java/com/teragrep/pth_06/planner/bloomfilter/TokensAsStringsTest.java diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValue.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValue.java index a33d1d93..33655b29 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValue.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValue.java @@ -45,30 +45,27 @@ */ package com.teragrep.pth_06.planner.bloomfilter; -import java.util.HashSet; -import java.util.Objects; -import java.util.Set; +import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; -public final class RegexExtractedValue { +public final class RegexExtractedValue implements Tokenizable { - private final Matcher matcher; + private final String value; + private final Pattern pattern; - public RegexExtractedValue(String value, String regex) { - this(value, Pattern.compile(regex)); + public RegexExtractedValue(String value, String pattern) { + this(value, Pattern.compile(pattern)); } public RegexExtractedValue(String value, Pattern pattern) { - this(pattern.matcher(value)); + this.value = value; + this.pattern = pattern; } - public RegexExtractedValue(Matcher matcher) { - this.matcher = matcher; - } - - public Set tokens() { - final Set tokens = new HashSet<>(); + public List tokens() { + final Matcher matcher = pattern.matcher(value); + final List tokens = new ArrayList<>(); while (matcher.find()) { final String token = matcher.group(); tokens.add(token); @@ -77,17 +74,17 @@ public Set tokens() { } @Override - public boolean equals(final Object object) { + public boolean equals(Object object) { if (this == object) return true; - if (object == null || object.getClass() != this.getClass()) + if (object == null || getClass() != object.getClass()) return false; - final RegexExtractedValue cast = (RegexExtractedValue) object; - return matcher.equals(cast.matcher); + RegexExtractedValue cast = (RegexExtractedValue) object; + return value.equals(cast.value) && pattern.equals(cast.pattern); } @Override public int hashCode() { - return Objects.hash(matcher); + return Objects.hash(value, pattern); } } diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java similarity index 52% rename from src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java rename to src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java index f3bf84a1..61884af3 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecord.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java @@ -46,77 +46,52 @@ package com.teragrep.pth_06.planner.bloomfilter; import org.apache.spark.util.sketch.BloomFilter; -import org.jooq.Record; -import org.jooq.Table; -import org.jooq.impl.DSL; -import org.jooq.types.ULong; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.UncheckedIOException; +import java.util.List; import java.util.Objects; -import java.util.Set; - -import static com.teragrep.pth_06.jooq.generated.bloomdb.Bloomdb.BLOOMDB; /** - * Extracts filter type from record, creates a bloom filter and returns the filters byte array + * Inserts given tokens into configurable filter */ -public final class BloomFilterFromRecord { +public final class SearchTermBloomFilter { - private final Logger LOGGER = LoggerFactory.getLogger(BloomFilterFromRecord.class); - private final ULong expected; + private final Logger LOGGER = LoggerFactory.getLogger(SearchTermBloomFilter.class); + private final Long expected; private final Double fpp; - private final String pattern; - private final String searchTerm; + private final List stringTokens; private BloomFilter create() { - LOGGER - .debug( - "Create filter from Record with values: expected <{}>, fpp <{}>, pattern: <{}>", expected, fpp, - pattern - ); - if (expected == null) { - throw new IllegalArgumentException("Record did not contain table field value "); - } - if (fpp == null) { - throw new IllegalArgumentException("Record did not contain table field value "); - } - final BloomFilter filter = BloomFilter.create(expected.longValue(), fpp); - // if no pattern use tokenized value (currently BLOOMDB.FILTERTYPE.PATTERN is NOT NULL) - if (pattern == null) { - LOGGER.info("Table pattern was null using tokenizer to generate tokens"); - new TokenizedValue(searchTerm).stringTokens().forEach(filter::put); + LOGGER.debug("Create filter from Record with values: expected <{}>, fpp <{}>", expected, fpp); + + if (stringTokens.isEmpty()) { + throw new IllegalStateException( + "Trying to insert empty filter, pattern match joined table should always have tokens" + ); } - else { // get tokens using regex - final Set tokens = new RegexExtractedValue(searchTerm, pattern).tokens(); - LOGGER.info("Insert pattern <{}> tokens to temp table filter <{}>", pattern, tokens); - if (tokens.isEmpty()) { - throw new IllegalStateException( - "Trying to insert empty filter, pattern match joined table should always have tokens" - ); - } - tokens.forEach(filter::put); + final BloomFilter filter = BloomFilter.create(1000, 0.01); + for (String token : stringTokens) { + filter.put(token); } return filter; } - public BloomFilterFromRecord(Record record, Table table, String searchTerm) { - this( - record.getValue(DSL.field(DSL.name(table.getName(), "expectedElements"), ULong.class)), - record.getValue(DSL.field(DSL.name(table.getName(), "targetFpp"), Double.class)), - record.getValue(BLOOMDB.FILTERTYPE.PATTERN, String.class), - searchTerm - ); + public SearchTermBloomFilter(Long expected, Double fpp, RegexExtractedValue tokenizable) { + this(expected, fpp, tokenizable.tokens()); + } + + public SearchTermBloomFilter(Long expected, Double fpp, TokenizedValue tokenizable) { + this(expected, fpp, new TokensAsStrings(tokenizable).tokens()); } - public BloomFilterFromRecord(ULong expected, Double fpp, String pattern, String searchTerm) { + public SearchTermBloomFilter(Long expected, Double fpp, List stringTokens) { this.expected = expected; this.fpp = fpp; - this.pattern = pattern; - this.searchTerm = searchTerm; + this.stringTokens = stringTokens; } public byte[] bytes() { @@ -131,18 +106,17 @@ public byte[] bytes() { } @Override - public boolean equals(final Object object) { + public boolean equals(Object object) { if (this == object) return true; if (object == null || getClass() != object.getClass()) return false; - final BloomFilterFromRecord cast = (BloomFilterFromRecord) object; - return expected.equals(cast.expected) && fpp.equals(cast.fpp) && Objects.equals(pattern, cast.pattern) - && searchTerm.equals(cast.searchTerm); + SearchTermBloomFilter cast = (SearchTermBloomFilter) object; + return expected.equals(cast.expected) && fpp.equals(cast.fpp) && stringTokens.equals(cast.stringTokens); } @Override public int hashCode() { - return Objects.hash(expected, fpp, pattern, searchTerm); + return Objects.hash(expected, fpp, stringTokens); } } diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java index db5f85dc..26d6b6e4 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java @@ -93,11 +93,25 @@ public void insertFiltersIntoCategoryTable() { DSL.field("type_id", BIGINTUNSIGNED.nullable(false)), DSL.field(DSL.name(categoryTable.getName(), "filter"), byte[].class) }; - final BloomFilterFromRecord filterFromRecord = new BloomFilterFromRecord(record, table, searchTerm); + final ULong expectedField = record + .getValue(DSL.field(DSL.name(table.getName(), "expectedElements"), ULong.class)); + final Double fpp = record.getValue(DSL.field(DSL.name(table.getName(), "targetFpp"), Double.class)); + final String pattern = record.getValue(BLOOMDB.FILTERTYPE.PATTERN, String.class); + final SearchTermBloomFilter filter; + if (pattern == null) { + filter = new SearchTermBloomFilter(expectedField.longValue(), fpp, new TokenizedValue(searchTerm)); + } + else { + filter = new SearchTermBloomFilter( + expectedField.longValue(), + fpp, + new RegexExtractedValue(searchTerm, pattern) + ); + } final Field[] valueFields = { DSL.val(bloomTermId, ULong.class), DSL.val(record.getValue(BLOOMDB.FILTERTYPE.ID), ULong.class), - DSL.val(filterFromRecord.bytes(), byte[].class) + DSL.val(filter.bytes(), byte[].class) }; ctx.insertInto(categoryTable).columns(insertFields).values(valueFields).execute(); } diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/Tokenizable.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/Tokenizable.java new file mode 100644 index 00000000..e209f536 --- /dev/null +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/Tokenizable.java @@ -0,0 +1,53 @@ +/* + * Teragrep Archive Datasource (pth_06) + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.pth_06.planner.bloomfilter; + +import java.util.List; + +public interface Tokenizable { + + List tokens(); +} diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValue.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValue.java index 5263342e..fe32e624 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValue.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TokenizedValue.java @@ -52,10 +52,8 @@ import java.nio.charset.StandardCharsets; import java.util.List; import java.util.Objects; -import java.util.Set; -import java.util.stream.Collectors; -public final class TokenizedValue { +public final class TokenizedValue implements Tokenizable { private final String value; @@ -67,14 +65,6 @@ public List tokens() { return new Tokenizer(32).tokenize(new ByteArrayInputStream(value.getBytes(StandardCharsets.UTF_8))); } - public Set stringTokens() { - return new Tokenizer(32) - .tokenize(new ByteArrayInputStream(value.getBytes(StandardCharsets.UTF_8))) - .stream() - .map(Token::toString) - .collect(Collectors.toSet()); - } - @Override public boolean equals(final Object object) { if (this == object) diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TokensAsStrings.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TokensAsStrings.java new file mode 100644 index 00000000..9dea7ef0 --- /dev/null +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TokensAsStrings.java @@ -0,0 +1,65 @@ +/* + * Teragrep Archive Datasource (pth_06) + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.pth_06.planner.bloomfilter; + +import com.teragrep.blf_01.Token; + +import java.util.List; +import java.util.stream.Collectors; + +public final class TokensAsStrings implements Tokenizable { + + private final Tokenizable origin; + + public TokensAsStrings(Tokenizable origin) { + this.origin = origin; + } + + @Override + public List tokens() { + return origin.tokens().stream().map(Token::toString).collect(Collectors.toList()); + } +} diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValueTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValueTest.java index c3163f91..04bd7edd 100644 --- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValueTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValueTest.java @@ -49,7 +49,7 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; -import java.util.Set; +import java.util.List; public class RegexExtractedValueTest { @@ -58,7 +58,7 @@ public void testRegexExtraction() { String regex = "\\((.*?)\\)"; String value = "find all (important) values inside (very important) parentheses."; RegexExtractedValue regexValue = new RegexExtractedValue(value, regex); - Set tokens = regexValue.tokens(); + List tokens = regexValue.tokens(); Assertions.assertEquals(2, tokens.size()); Assertions.assertTrue(tokens.contains("(important)")); Assertions.assertTrue(tokens.contains("(very important)")); @@ -66,6 +66,10 @@ public void testRegexExtraction() { @Test public void testEqualsHashCodeContract() { - EqualsVerifier.forClass(RegexExtractedValue.class).withNonnullFields("matcher").verify(); + EqualsVerifier + .forClass(RegexExtractedValue.class) + .withNonnullFields("value") + .withNonnullFields("pattern") + .verify(); } } diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecordTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java similarity index 50% rename from src/test/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecordTest.java rename to src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java index 897c01a3..89838ba9 100644 --- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/BloomFilterFromRecordTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java @@ -47,58 +47,47 @@ import nl.jqno.equalsverifier.EqualsVerifier; import org.apache.spark.util.sketch.BloomFilter; -import org.jooq.DSLContext; -import org.jooq.Field; -import org.jooq.Record; -import org.jooq.Table; -import org.jooq.impl.DSL; -import org.jooq.types.ULong; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; import java.io.ByteArrayInputStream; -import java.sql.Connection; -import java.sql.DriverManager; @TestInstance(TestInstance.Lifecycle.PER_CLASS) -public class BloomFilterFromRecordTest { - - final String url = "jdbc:h2:mem:test;MODE=MariaDB;DATABASE_TO_LOWER=TRUE;CASE_INSENSITIVE_IDENTIFIERS=TRUE"; - final String userName = "sa"; - final String password = ""; - final Connection conn = Assertions.assertDoesNotThrow(() -> DriverManager.getConnection(url, userName, password)); +public class SearchTermBloomFilterTest { @Test public void testCorrectFilterSize() { - Record dynamicRecord = generateRecord(true); - Table target = DSL.table(DSL.name("target")); String searchTerm = "SearchValuePatternInThisString"; - BloomFilterFromRecord filter = new BloomFilterFromRecord(dynamicRecord, target, searchTerm); + SearchTermBloomFilter filter = new SearchTermBloomFilter(1000L, 0.01, new TokenizedValue(searchTerm)); byte[] bytes = Assertions.assertDoesNotThrow(filter::bytes); BloomFilter resultFilter = Assertions .assertDoesNotThrow(() -> BloomFilter.readFrom(new ByteArrayInputStream(bytes))); - BloomFilter expectedSize = BloomFilter.create(100, 0.01); + BloomFilter expectedSize = BloomFilter.create(1000L, 0.01); Assertions.assertEquals(expectedSize.bitSize(), resultFilter.bitSize()); } @Test public void testNoRegexExtractedTokensException() { - Record dynamicRecord = generateRecord(true); - Table target = DSL.table(DSL.name("target")); String searchTerm = "NoMatch"; - BloomFilterFromRecord filter = new BloomFilterFromRecord(dynamicRecord, target, searchTerm); - RuntimeException e = Assertions.assertThrows(RuntimeException.class, filter::bytes); + SearchTermBloomFilter filter = new SearchTermBloomFilter( + 1000L, + 0.01, + new RegexExtractedValue(searchTerm, "Pattern") + ); + IllegalStateException e = Assertions.assertThrows(IllegalStateException.class, filter::bytes); String expectedMessage = "Trying to insert empty filter, pattern match joined table should always have tokens"; Assertions.assertEquals(expectedMessage, e.getMessage()); } @Test public void testRegexExtractedTokens() { - Record dynamicRecord = generateRecord(true); - Table target = DSL.table(DSL.name("target")); String searchTerm = "SearchValuePatternInThisString"; - BloomFilterFromRecord filter = new BloomFilterFromRecord(dynamicRecord, target, searchTerm); + SearchTermBloomFilter filter = new SearchTermBloomFilter( + 1000L, + 0.01, + new RegexExtractedValue(searchTerm, "Pattern") + ); byte[] bytes = Assertions.assertDoesNotThrow(filter::bytes); BloomFilter resultFilter = Assertions .assertDoesNotThrow(() -> BloomFilter.readFrom(new ByteArrayInputStream(bytes))); @@ -107,10 +96,8 @@ public void testRegexExtractedTokens() { @Test public void testTokenizerTokens() { - Record dynamicRecord = generateRecord(false); - Table target = DSL.table(DSL.name("target")); String searchTerm = "SearchValuePatternInThisString.Without.Delimiter"; - BloomFilterFromRecord filter = new BloomFilterFromRecord(dynamicRecord, target, searchTerm); + SearchTermBloomFilter filter = new SearchTermBloomFilter(1000L, 0.01, new TokenizedValue(searchTerm)); byte[] bytes = Assertions.assertDoesNotThrow(filter::bytes); BloomFilter resultFilter = Assertions .assertDoesNotThrow(() -> BloomFilter.readFrom(new ByteArrayInputStream(bytes))); @@ -119,61 +106,14 @@ public void testTokenizerTokens() { Assertions.assertTrue(resultFilter.mightContain("SearchValuePatternInThisString")); } - @Test - public void testNullExpectedField() { - Record dynamicRecord = generateRecord(false); - Field expectedField = DSL.field(DSL.name("expectedElements"), ULong.class); - dynamicRecord.set(expectedField, null); - Table target = DSL.table(DSL.name("target")); - String searchTerm = "SearchValuePatternInThisString.Without.Delimiter"; - BloomFilterFromRecord filter = new BloomFilterFromRecord(dynamicRecord, target, searchTerm); - IllegalArgumentException e = Assertions.assertThrows(IllegalArgumentException.class, filter::bytes); - String expectedMessage = "Record did not contain table field value "; - Assertions.assertEquals(expectedMessage, e.getMessage()); - } - - @Test - public void testNullFppField() { - Record dynamicRecord = generateRecord(false); - Field fppField = DSL.field(DSL.name("targetFpp"), Double.class); - dynamicRecord.set(fppField, null); - Table target = DSL.table(DSL.name("target")); - String searchTerm = "SearchValuePatternInThisString.Without.Delimiter"; - BloomFilterFromRecord filter = new BloomFilterFromRecord(dynamicRecord, target, searchTerm); - IllegalArgumentException e = Assertions.assertThrows(IllegalArgumentException.class, filter::bytes); - String expectedMessage = "Record did not contain table field value "; - Assertions.assertEquals(expectedMessage, e.getMessage()); - } - @Test public void equalsHashCodeContractTest() { EqualsVerifier - .forClass(BloomFilterFromRecord.class) + .forClass(SearchTermBloomFilter.class) .withNonnullFields("expected") .withNonnullFields("fpp") - .withNonnullFields("searchTerm") + .withNonnullFields("stringTokens") .withIgnoredFields("LOGGER") .verify(); } - - private Record generateRecord(final boolean withPattern) { - DSLContext ctx = DSL.using(conn); - Field idField = DSL.field(DSL.name("id"), ULong.class); - Field expectedField = DSL.field(DSL.name("expectedElements"), ULong.class); - Field fppField = DSL.field(DSL.name("targetFpp"), Double.class); - Field patternField = DSL.field(DSL.name("pattern"), String.class); - - Record dynamicRecord = ctx.newRecord(idField, expectedField, fppField, patternField); - if (withPattern) { - dynamicRecord.set(patternField, "Pattern"); - } - else { - // case is joined filtertype table has no pattern - dynamicRecord.set(patternField, null); - } - dynamicRecord.set(idField, ULong.valueOf(1)); - dynamicRecord.set(expectedField, ULong.valueOf(100)); - dynamicRecord.set(fppField, 0.01); - return dynamicRecord; - } } diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java index 15a723fd..19a4e1d2 100644 --- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java @@ -70,8 +70,10 @@ class TableFiltersTest { // matches IPv4 final String ipRegex = "(\\b25[0-5]|\\b2[0-4][0-9]|\\b[01]?[0-9][0-9]?)(\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}"; // matches IPv4 starting with 255. + final String ipStartingWith255 = "(\\b25[0-5]?)(\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}"; + // matches with values surrounded by parentheses final String parenthesesPattern = "\\((.*?)\\)"; - final List patternList = new ArrayList<>(Arrays.asList(ipRegex, parenthesesPattern)); + final List patternList = new ArrayList<>(Arrays.asList(ipRegex, ipStartingWith255, parenthesesPattern)); final Connection conn = Assertions.assertDoesNotThrow(() -> DriverManager.getConnection(url, userName, password)); @BeforeAll @@ -154,7 +156,7 @@ public void testInsertFiltersIntoCategoryTable() { @Test public void testInsertFiltersIntoCategoryTableRegexExtract() { - fillTargetTable(2); + fillTargetTable(3); DSLContext ctx = DSL.using(conn); Table table = ctx .meta() diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TokensAsStringsTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TokensAsStringsTest.java new file mode 100644 index 00000000..ad1330f1 --- /dev/null +++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TokensAsStringsTest.java @@ -0,0 +1,63 @@ +/* + * Teragrep Archive Datasource (pth_06) + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.pth_06.planner.bloomfilter; + +import com.teragrep.blf_01.Token; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class TokensAsStringsTest { + + @Test + public void testTokensToStrings() { + String value = "one.two.three"; + Tokenizable tokenizedValue = new TokenizedValue(value); + boolean allTokenClass = tokenizedValue.tokens().stream().allMatch(t -> t.getClass().equals(Token.class)); + Assertions.assertTrue(allTokenClass); + Tokenizable toStrings = new TokensAsStrings(tokenizedValue); + Assertions.assertTrue(toStrings.tokens().contains("one")); + } +} diff --git a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchConditionTest.java b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchConditionTest.java index 769d9e67..2f089a8e 100644 --- a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchConditionTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchConditionTest.java @@ -59,7 +59,7 @@ class PatternMatchConditionTest { @Test - void testSingleToken() { + void testCondition() { Condition condition = new PatternMatchCondition("test").condition(); String e = "('test' like_regex \"bloomdb\".\"filtertype\".\"pattern\")"; Assertions.assertEquals(e, condition.toString()); From 0bf741049ceec3131b62e80d4767c9766003a97a Mon Sep 17 00:00:00 2001 From: elliVM <47@teragrep.com> Date: Wed, 23 Oct 2024 13:40:37 +0300 Subject: [PATCH 11/26] add missing assertion to test --- .../pth_06/walker/ConditionWalkerTest.java | 28 +++++++++++-------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java b/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java index 9862d7f6..4fb58634 100644 --- a/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java +++ b/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java @@ -231,25 +231,29 @@ void twoTablePatternMatchTest() { } @Test - void testFullXML() { + void testFullXMLTwoMatchingTables() { ConditionWalker walker = new ConditionWalker(DSL.using(conn), true); String q = ""; - String e = "(\n" + " \"getArchivedObjects_filter_table\".\"directory\" like 'haproxy'\n" + " and (\n" - + " (\n" + " bloommatch(\n" + " (\n" + String e = "(\n" + " \"getArchivedObjects_filter_table\".\"directory\" like 'search_bench'\n" + + " and \"journaldb\".\"logfile\".\"logdate\" >= date '2022-01-26'\n" + + " and (UNIX_TIMESTAMP(STR_TO_DATE(SUBSTRING(REGEXP_SUBSTR(path,'[0-9]+(\\.log)?\\.gz(\\.[0-9]*)?$'), 1, 10), '%Y%m%d%H')) >= 1643205600)\n" + + " and \"journaldb\".\"logfile\".\"logdate\" <= date '2024-10-20'\n" + + " and (UNIX_TIMESTAMP(STR_TO_DATE(SUBSTRING(REGEXP_SUBSTR(path,'[0-9]+(\\.log)?\\.gz(\\.[0-9]*)?$'), 1, 10), '%Y%m%d%H')) <= 1729435021)\n" + + " and (\n" + " (\n" + " bloommatch(\n" + " (\n" + " select \"term_0_pattern_test_ip\".\"filter\"\n" + " from \"term_0_pattern_test_ip\"\n" + " where (\n" + " term_id = 0\n" + " and type_id = \"bloomdb\".\"pattern_test_ip\".\"filter_type_id\"\n" + " )\n" + " ),\n" + " \"bloomdb\".\"pattern_test_ip\".\"filter\"\n" + " ) = true\n" - + " and \"bloomdb\".\"pattern_test_ip\".\"filter\" is not null\n" + " )\n" + " or (\n" - + " bloommatch(\n" + " (\n" + " select \"term_0_pattern_test_ip255\".\"filter\"\n" - + " from \"term_0_pattern_test_ip255\"\n" + " where (\n" + " term_id = 0\n" - + " and type_id = \"bloomdb\".\"pattern_test_ip255\".\"filter_type_id\"\n" + " )\n" - + " ),\n" + " \"bloomdb\".\"pattern_test_ip255\".\"filter\"\n" + " ) = true\n" - + " and \"bloomdb\".\"pattern_test_ip255\".\"filter\" is not null\n" + " )\n" + " or (\n" - + " \"bloomdb\".\"pattern_test_ip\".\"filter\" is null\n" - + " and \"bloomdb\".\"pattern_test_ip255\".\"filter\" is null\n" + " )\n" + " )\n" + ")"; + + " and \"bloomdb\".\"pattern_test_ip\".\"filter\" is not null\n" + " )\n" + + " or \"bloomdb\".\"pattern_test_ip\".\"filter\" is null\n" + " )\n" + " and (\n" + " (\n" + + " bloommatch(\n" + " (\n" + " select \"term_1_pattern_test_ip\".\"filter\"\n" + + " from \"term_1_pattern_test_ip\"\n" + " where (\n" + " term_id = 1\n" + + " and type_id = \"bloomdb\".\"pattern_test_ip\".\"filter_type_id\"\n" + " )\n" + + " ),\n" + " \"bloomdb\".\"pattern_test_ip\".\"filter\"\n" + " ) = true\n" + + " and \"bloomdb\".\"pattern_test_ip\".\"filter\" is not null\n" + " )\n" + + " or \"bloomdb\".\"pattern_test_ip\".\"filter\" is null\n" + " )\n" + ")"; Condition cond = Assertions.assertDoesNotThrow(() -> walker.fromString(q, false)); - System.out.println(cond.toString()); + Assertions.assertEquals(e, cond.toString()); } @Test From 31e0bf9f7fa66f09b10ff6f5bddc2288b6823f8d Mon Sep 17 00:00:00 2001 From: elliVM <47@teragrep.com> Date: Wed, 23 Oct 2024 13:52:32 +0300 Subject: [PATCH 12/26] move method after constructors --- .../bloomfilter/SearchTermBloomFilter.java | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java index 61884af3..f9dc43f5 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java @@ -65,6 +65,20 @@ public final class SearchTermBloomFilter { private final Double fpp; private final List stringTokens; + public SearchTermBloomFilter(Long expected, Double fpp, RegexExtractedValue tokenizable) { + this(expected, fpp, tokenizable.tokens()); + } + + public SearchTermBloomFilter(Long expected, Double fpp, TokenizedValue tokenizable) { + this(expected, fpp, new TokensAsStrings(tokenizable).tokens()); + } + + public SearchTermBloomFilter(Long expected, Double fpp, List stringTokens) { + this.expected = expected; + this.fpp = fpp; + this.stringTokens = stringTokens; + } + private BloomFilter create() { LOGGER.debug("Create filter from Record with values: expected <{}>, fpp <{}>", expected, fpp); @@ -80,20 +94,6 @@ private BloomFilter create() { return filter; } - public SearchTermBloomFilter(Long expected, Double fpp, RegexExtractedValue tokenizable) { - this(expected, fpp, tokenizable.tokens()); - } - - public SearchTermBloomFilter(Long expected, Double fpp, TokenizedValue tokenizable) { - this(expected, fpp, new TokensAsStrings(tokenizable).tokens()); - } - - public SearchTermBloomFilter(Long expected, Double fpp, List stringTokens) { - this.expected = expected; - this.fpp = fpp; - this.stringTokens = stringTokens; - } - public byte[] bytes() { final BloomFilter filter = create(); try (final ByteArrayOutputStream filterBAOS = new ByteArrayOutputStream()) { From f370356c771fa6f91e7a213d453f2b5a3b2325af Mon Sep 17 00:00:00 2001 From: elliVM <47@teragrep.com> Date: Fri, 25 Oct 2024 13:37:16 +0300 Subject: [PATCH 13/26] fix hard coded filter size and fix testing that different sizes are applied. --- .../pth_06/planner/StreamDBClient.java | 4 ++-- .../planner/bloomfilter/DatabaseTables.java | 10 ++++++++ .../bloomfilter/PatternMatchTables.java | 4 ++-- .../bloomfilter/RegexExtractedValue.java | 2 +- .../bloomfilter/SearchTermBloomFilter.java | 11 +++------ .../conditions/IndexStatementCondition.java | 4 ++-- .../bloomfilter/PatternMatchTablesTest.java | 12 +++++----- .../SearchTermBloomFilterTest.java | 23 +++++++++++++------ 8 files changed, 42 insertions(+), 28 deletions(-) create mode 100644 src/main/java/com/teragrep/pth_06/planner/bloomfilter/DatabaseTables.java diff --git a/src/main/java/com/teragrep/pth_06/planner/StreamDBClient.java b/src/main/java/com/teragrep/pth_06/planner/StreamDBClient.java index 0cc044cb..57084f3b 100644 --- a/src/main/java/com/teragrep/pth_06/planner/StreamDBClient.java +++ b/src/main/java/com/teragrep/pth_06/planner/StreamDBClient.java @@ -344,9 +344,9 @@ private Table getTableStatement(Condition journaldbCondition, Date day) .on(JOURNALDB.LOGFILE.HOST_ID.eq(GetArchivedObjectsFilterTable.host_id).and(JOURNALDB.LOGFILE.LOGTAG.eq(GetArchivedObjectsFilterTable.tag))); if (bloomEnabled) { - Set> tables = walker.patternMatchTables(); + final Set> tables = walker.patternMatchTables(); if (!tables.isEmpty()) { - for (Table table : tables) { + for (final Table table : tables) { if (LOGGER.isInfoEnabled()) { LOGGER.info("Left join pattern match table: <{}>", table.getName()); } diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/DatabaseTables.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/DatabaseTables.java new file mode 100644 index 00000000..92d09133 --- /dev/null +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/DatabaseTables.java @@ -0,0 +1,10 @@ +package com.teragrep.pth_06.planner.bloomfilter; + +import org.jooq.Table; + +import java.util.List; + +public interface DatabaseTables { + + List> tables(); +} diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTables.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTables.java index 10bc8d83..b7b638e7 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTables.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTables.java @@ -61,7 +61,7 @@ /** * Class to get a collection of Tables that match the given PatternMatchCondition */ -public final class PatternMatchTables { +public final class PatternMatchTables implements DatabaseTables { private static final Logger LOGGER = LoggerFactory.getLogger(PatternMatchTables.class); @@ -82,7 +82,7 @@ public PatternMatchTables(DSLContext ctx, PatternMatchCondition patternMatchCond * * @return List of tables that matched condition and were not empty */ - public List> toList() { + public List> tables() { final List> tables = ctx .meta() .filterSchemas(s -> s.equals(BLOOMDB)) // select bloomdb diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValue.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValue.java index 33655b29..bf3cfbd9 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValue.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValue.java @@ -74,7 +74,7 @@ public List tokens() { } @Override - public boolean equals(Object object) { + public boolean equals(final Object object) { if (this == object) return true; if (object == null || getClass() != object.getClass()) diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java index f9dc43f5..88573f73 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java @@ -79,7 +79,7 @@ public SearchTermBloomFilter(Long expected, Double fpp, List stringToken this.stringTokens = stringTokens; } - private BloomFilter create() { + public byte[] bytes() { LOGGER.debug("Create filter from Record with values: expected <{}>, fpp <{}>", expected, fpp); if (stringTokens.isEmpty()) { @@ -87,15 +87,10 @@ private BloomFilter create() { "Trying to insert empty filter, pattern match joined table should always have tokens" ); } - final BloomFilter filter = BloomFilter.create(1000, 0.01); - for (String token : stringTokens) { + final BloomFilter filter = BloomFilter.create(expected, fpp); + for (final String token : stringTokens) { filter.put(token); } - return filter; - } - - public byte[] bytes() { - final BloomFilter filter = create(); try (final ByteArrayOutputStream filterBAOS = new ByteArrayOutputStream()) { filter.writeTo(filterBAOS); return filterBAOS.toByteArray(); diff --git a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java index 7a2c47ce..7b36f73c 100644 --- a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java +++ b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java @@ -83,8 +83,8 @@ public Condition condition() { } Condition newCondition = condition; if (tableSet.isEmpty()) { - final PatternMatchTables patternMatchTables = new PatternMatchTables(config.context(), value); - tableSet.addAll(patternMatchTables.toList()); + final DatabaseTables patternMatchTables = new PatternMatchTables(config.context(), value); + tableSet.addAll(patternMatchTables.tables()); } if (!tableSet.isEmpty()) { if (LOGGER.isDebugEnabled()) { diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTablesTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTablesTest.java index 2ff9924f..5313a4c4 100644 --- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTablesTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTablesTest.java @@ -139,7 +139,7 @@ public void testSingleMatch() { DSLContext ctx = DSL.using(conn); String input = "192.168.1.1"; PatternMatchTables patternMatchTables = new PatternMatchTables(ctx, input); - List> result = patternMatchTables.toList(); + List> result = patternMatchTables.tables(); Assertions.assertEquals(1, result.size()); Assertions.assertEquals("pattern_test_ip", result.get(0).getName()); } @@ -149,7 +149,7 @@ public void testSearchTermTokenizedMatch() { DSLContext ctx = DSL.using(conn); String input = "target_ip=192.168.1.1"; PatternMatchTables patternMatchTables = new PatternMatchTables(ctx, input); - List> result = patternMatchTables.toList(); + List> result = patternMatchTables.tables(); Assertions.assertEquals(1, result.size()); Assertions.assertEquals("pattern_test_ip", result.get(0).getName()); } @@ -159,7 +159,7 @@ public void testRegexMatch() { DSLContext ctx = DSL.using(conn); String input = "biz baz boz data has no content today (very important though) but it would still have if one had a means to extract it from (here is something else important as well) the strange patterns called parentheses that it seems to have been put in."; PatternMatchTables patternMatchTables = new PatternMatchTables(ctx, input); - List> result = patternMatchTables.toList(); + List> result = patternMatchTables.tables(); Assertions.assertEquals(1, result.size()); Assertions.assertEquals("parentheses_test", result.get(0).getName()); } @@ -169,8 +169,8 @@ public void testMultipleMatch() { DSLContext ctx = DSL.using(conn); String input = "255.255.255.255"; PatternMatchTables patternMatchTables = new PatternMatchTables(ctx, input); - List> result = patternMatchTables.toList(); - List> result2 = patternMatchTables.toList(); + List> result = patternMatchTables.tables(); + List> result2 = patternMatchTables.tables(); List tableNames = result.stream().map(Named::getName).collect(Collectors.toList()); Assertions.assertEquals(2, result.size()); Assertions.assertEquals(2, result2.size()); @@ -183,7 +183,7 @@ public void testNoMatch() { DSLContext ctx = DSL.using(conn); String input = "testinput"; PatternMatchTables patternMatchTables = new PatternMatchTables(ctx, input); - List> result = patternMatchTables.toList(); + List> result = patternMatchTables.tables(); Assertions.assertTrue(result.isEmpty()); } diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java index 89838ba9..27932eb4 100644 --- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java @@ -58,13 +58,22 @@ public class SearchTermBloomFilterTest { @Test public void testCorrectFilterSize() { - String searchTerm = "SearchValuePatternInThisString"; - SearchTermBloomFilter filter = new SearchTermBloomFilter(1000L, 0.01, new TokenizedValue(searchTerm)); - byte[] bytes = Assertions.assertDoesNotThrow(filter::bytes); - BloomFilter resultFilter = Assertions - .assertDoesNotThrow(() -> BloomFilter.readFrom(new ByteArrayInputStream(bytes))); - BloomFilter expectedSize = BloomFilter.create(1000L, 0.01); - Assertions.assertEquals(expectedSize.bitSize(), resultFilter.bitSize()); + String searchTerm = "test"; + SearchTermBloomFilter filter1 = new SearchTermBloomFilter(1000L, 0.01, new TokenizedValue(searchTerm)); + SearchTermBloomFilter filter2 = new SearchTermBloomFilter(1000L, 0.02, new TokenizedValue(searchTerm)); + SearchTermBloomFilter filter3 = new SearchTermBloomFilter(100L, 0.01, new TokenizedValue(searchTerm)); + byte[] bytes1 = Assertions.assertDoesNotThrow(filter1::bytes); + byte[] bytes2 = Assertions.assertDoesNotThrow(filter2::bytes); + byte[] bytes3 = Assertions.assertDoesNotThrow(filter3::bytes); + BloomFilter resultFilter1 = Assertions + .assertDoesNotThrow(() -> BloomFilter.readFrom(new ByteArrayInputStream(bytes1))); + BloomFilter resultFilter2 = Assertions + .assertDoesNotThrow(() -> BloomFilter.readFrom(new ByteArrayInputStream(bytes2))); + BloomFilter resultFilter3 = Assertions + .assertDoesNotThrow(() -> BloomFilter.readFrom(new ByteArrayInputStream(bytes3))); + Assertions.assertEquals(BloomFilter.create(1000L, 0.01).bitSize(), resultFilter1.bitSize()); + Assertions.assertEquals(BloomFilter.create(1000L, 0.02).bitSize(), resultFilter2.bitSize()); + Assertions.assertEquals(BloomFilter.create(100L, 0.01).bitSize(), resultFilter3.bitSize()); } @Test From eda9878231364b4c2dc5e45422d9b53116b47806 Mon Sep 17 00:00:00 2001 From: elliVM <47@teragrep.com> Date: Mon, 28 Oct 2024 16:15:23 +0200 Subject: [PATCH 14/26] refactor code to simplify, add testing for SQL temp table values created by bloom operations --- pom.xml | 6 + .../planner/bloomfilter/CategoryTable.java | 5 - .../bloomfilter/CategoryTableImpl.java | 34 +---- ...ble.java => CategoryTableWithFilters.java} | 30 ++-- ....java => ConditionMatchBloomDBTables.java} | 22 +-- .../planner/bloomfilter/DatabaseTables.java | 45 ++++++ .../bloomfilter/SearchTermBloomFilter.java | 10 +- .../SearchTermFiltersInserted.java | 76 ---------- .../TableFilterTypesFromMetadata.java | 76 +++++----- .../planner/bloomfilter/TableFilters.java | 35 +++-- .../conditions/CategoryTableCondition.java | 42 ++++-- .../conditions/IndexStatementCondition.java | 19 ++- ... RegexLikeFiltertypePatternCondition.java} | 8 +- .../bloomfilter/CategoryTableImplTest.java | 64 +-------- ...a => ConditionMatchBloomDBTablesTest.java} | 36 ++--- .../SearchTermBloomFilterTest.java | 13 ++ ...ableFilterTypesFromMetadataResultTest.java | 11 +- .../planner/bloomfilter/TableFiltersTest.java | 1 + .../CategoryTableConditionTest.java | 7 +- ...exLikeFiltertypePatternConditionTest.java} | 20 +-- .../pth_06/walker/ConditionWalkerTest.java | 135 +++++++++++++++++- 21 files changed, 398 insertions(+), 297 deletions(-) rename src/main/java/com/teragrep/pth_06/planner/bloomfilter/{CreatedCategoryTable.java => CategoryTableWithFilters.java} (76%) rename src/main/java/com/teragrep/pth_06/planner/bloomfilter/{PatternMatchTables.java => ConditionMatchBloomDBTables.java} (83%) delete mode 100644 src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermFiltersInserted.java rename src/main/java/com/teragrep/pth_06/planner/walker/conditions/{PatternMatchCondition.java => RegexLikeFiltertypePatternCondition.java} (89%) rename src/test/java/com/teragrep/pth_06/planner/bloomfilter/{PatternMatchTablesTest.java => ConditionMatchBloomDBTablesTest.java} (86%) rename src/test/java/com/teragrep/pth_06/planner/walker/conditions/{PatternMatchConditionTest.java => RegexLikeFiltertypePatternConditionTest.java} (75%) diff --git a/pom.xml b/pom.xml index 47ffaa4a..af810fab 100644 --- a/pom.xml +++ b/pom.xml @@ -209,6 +209,12 @@ 3.16.1 test + + io.github.hakky54 + logcaptor + 2.9.3 + test + org.apache.kafka kafka-clients diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTable.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTable.java index 5499d781..03f7499a 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTable.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTable.java @@ -45,13 +45,8 @@ */ package com.teragrep.pth_06.planner.bloomfilter; -import com.teragrep.pth_06.planner.walker.conditions.QueryCondition; - public interface CategoryTable { void create(); - void insertFilters(); - - QueryCondition bloommatchCondition(); } diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImpl.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImpl.java index d0c7b079..45b2a767 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImpl.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImpl.java @@ -46,8 +46,6 @@ package com.teragrep.pth_06.planner.bloomfilter; import com.teragrep.pth_06.config.ConditionConfig; -import com.teragrep.pth_06.planner.walker.conditions.CategoryTableCondition; -import com.teragrep.pth_06.planner.walker.conditions.QueryCondition; import org.jooq.*; import org.jooq.impl.DSL; import org.slf4j.Logger; @@ -88,7 +86,6 @@ public final class CategoryTableImpl implements CategoryTable { private final DSLContext ctx; private final Table originTable; private final long bloomTermId; - private final CategoryTableCondition tableCondition; private final TableFilters tableFilters; public CategoryTableImpl(ConditionConfig config, Table originTable, String value) { @@ -96,32 +93,18 @@ public CategoryTableImpl(ConditionConfig config, Table originTable, String va config.context(), originTable, config.bloomTermId(), - new CategoryTableCondition(originTable, config.bloomTermId()), new TableFilters(config.context(), originTable, config.bloomTermId(), value) ); } public CategoryTableImpl(DSLContext ctx, Table originTable, long bloomTermId, String value) { - this( - ctx, - originTable, - bloomTermId, - new CategoryTableCondition(originTable, bloomTermId), - new TableFilters(ctx, originTable, bloomTermId, value) - ); + this(ctx, originTable, bloomTermId, new TableFilters(ctx, originTable, bloomTermId, value)); } - public CategoryTableImpl( - DSLContext ctx, - Table originTable, - long bloomTermId, - CategoryTableCondition tableCondition, - TableFilters tableFilters - ) { + public CategoryTableImpl(DSLContext ctx, Table originTable, long bloomTermId, TableFilters tableFilters) { this.ctx = ctx; this.originTable = originTable; this.bloomTermId = bloomTermId; - this.tableCondition = tableCondition; this.tableFilters = tableFilters; } @@ -144,19 +127,6 @@ public void create() { indexStep.execute(); } - public void insertFilters() { - tableFilters.insertFiltersIntoCategoryTable(); - } - - /** - * Row condition that selects the same sized filter arrays from this category table and the origin table. - * - * @return condition - */ - public QueryCondition bloommatchCondition() { - return tableCondition; - } - /** * Equal only if all object parameters are same value and the instances of DSLContext are same * diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CreatedCategoryTable.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableWithFilters.java similarity index 76% rename from src/main/java/com/teragrep/pth_06/planner/bloomfilter/CreatedCategoryTable.java rename to src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableWithFilters.java index 0090ba97..a664b636 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CreatedCategoryTable.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableWithFilters.java @@ -45,32 +45,32 @@ */ package com.teragrep.pth_06.planner.bloomfilter; -import com.teragrep.pth_06.planner.walker.conditions.QueryCondition; +import org.jooq.DSLContext; +import org.jooq.Table; /** - * Decorator that inserts category tables filter types into database + * Decorator that inserts category tables filter types into the table */ -public final class CreatedCategoryTable implements CategoryTable { +public final class CategoryTableWithFilters implements CategoryTable { private final CategoryTable origin; + private final TableFilters filters; - public CreatedCategoryTable(final CategoryTable origin) { + public CategoryTableWithFilters(DSLContext ctx, Table origin, long bloomTermId, String searchTerm) { + this( + new CategoryTableImpl(ctx, origin, bloomTermId, searchTerm), + new TableFilters(ctx, origin, bloomTermId, searchTerm) + ); + } + + public CategoryTableWithFilters(CategoryTable origin, TableFilters filters) { this.origin = origin; + this.filters = filters; } @Override public void create() { origin.create(); - } - - @Override - public void insertFilters() { - origin.insertFilters(); - } - - @Override - public QueryCondition bloommatchCondition() { - create(); - return origin.bloommatchCondition(); + filters.insertFiltersIntoCategoryTable(); } } diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTables.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/ConditionMatchBloomDBTables.java similarity index 83% rename from src/main/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTables.java rename to src/main/java/com/teragrep/pth_06/planner/bloomfilter/ConditionMatchBloomDBTables.java index b7b638e7..8ccd9cf4 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTables.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/ConditionMatchBloomDBTables.java @@ -45,7 +45,7 @@ */ package com.teragrep.pth_06.planner.bloomfilter; -import com.teragrep.pth_06.planner.walker.conditions.PatternMatchCondition; +import com.teragrep.pth_06.planner.walker.conditions.RegexLikeFiltertypePatternCondition; import com.teragrep.pth_06.planner.walker.conditions.QueryCondition; import org.jooq.DSLContext; import org.jooq.Field; @@ -61,20 +61,20 @@ /** * Class to get a collection of Tables that match the given PatternMatchCondition */ -public final class PatternMatchTables implements DatabaseTables { +public final class ConditionMatchBloomDBTables implements DatabaseTables { - private static final Logger LOGGER = LoggerFactory.getLogger(PatternMatchTables.class); + private static final Logger LOGGER = LoggerFactory.getLogger(ConditionMatchBloomDBTables.class); private final DSLContext ctx; - private final QueryCondition patternMatchCondition; + private final QueryCondition condition; - public PatternMatchTables(DSLContext ctx, String pattern) { - this(ctx, new PatternMatchCondition(pattern)); + public ConditionMatchBloomDBTables(DSLContext ctx, String pattern) { + this(ctx, new RegexLikeFiltertypePatternCondition(pattern)); } - public PatternMatchTables(DSLContext ctx, PatternMatchCondition patternMatchCondition) { + public ConditionMatchBloomDBTables(DSLContext ctx, QueryCondition condition) { this.ctx = ctx; - this.patternMatchCondition = patternMatchCondition; + this.condition = condition; } /** @@ -91,7 +91,7 @@ public List> tables() { .from(t) .leftJoin(BLOOMDB.FILTERTYPE)// join filtertype to access patterns .on(BLOOMDB.FILTERTYPE.ID.eq((Field) t.field("filter_type_id"))) - .where(patternMatchCondition.condition())// select tables that match pattern condition + .where(condition.condition())// select tables that match pattern condition .limit(1)// limit 1 since we are checking only if table is not empty .fetch() .isNotEmpty() // select table if not empty @@ -115,7 +115,7 @@ public boolean equals(final Object object) { return false; if (object.getClass() != this.getClass()) return false; - final PatternMatchTables cast = (PatternMatchTables) object; - return this.patternMatchCondition.equals(cast.patternMatchCondition) && this.ctx == cast.ctx; // only same instance of DSLContext is equal + final ConditionMatchBloomDBTables cast = (ConditionMatchBloomDBTables) object; + return this.condition.equals(cast.condition) && this.ctx == cast.ctx; // only same instance of DSLContext is equal } } diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/DatabaseTables.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/DatabaseTables.java index 92d09133..2a4869f6 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/DatabaseTables.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/DatabaseTables.java @@ -1,3 +1,48 @@ +/* + * Teragrep Archive Datasource (pth_06) + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ package com.teragrep.pth_06.planner.bloomfilter; import org.jooq.Table; diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java index 88573f73..c3132c8a 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java @@ -81,7 +81,6 @@ public SearchTermBloomFilter(Long expected, Double fpp, List stringToken public byte[] bytes() { LOGGER.debug("Create filter from Record with values: expected <{}>, fpp <{}>", expected, fpp); - if (stringTokens.isEmpty()) { throw new IllegalStateException( "Trying to insert empty filter, pattern match joined table should always have tokens" @@ -91,6 +90,15 @@ public byte[] bytes() { for (final String token : stringTokens) { filter.put(token); } + if (LOGGER.isWarnEnabled()) { + if (stringTokens.size() > expected) { + LOGGER + .warn( + "Number of tokens <{}> was larger than the expected value <{}>, resulting FPP <{}>", + stringTokens.size(), expected, filter.expectedFpp() + ); + } + } try (final ByteArrayOutputStream filterBAOS = new ByteArrayOutputStream()) { filter.writeTo(filterBAOS); return filterBAOS.toByteArray(); diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermFiltersInserted.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermFiltersInserted.java deleted file mode 100644 index a9913cb5..00000000 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermFiltersInserted.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Teragrep Archive Datasource (pth_06) - * Copyright (C) 2021-2024 Suomen Kanuuna Oy - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - * - * - * Additional permission under GNU Affero General Public License version 3 - * section 7 - * - * If you modify this Program, or any covered work, by linking or combining it - * with other code, such other code is not for that reason alone subject to any - * of the requirements of the GNU Affero GPL version 3 as long as this Program - * is the same Program as licensed from Suomen Kanuuna Oy without any additional - * modifications. - * - * Supplemented terms under GNU Affero General Public License version 3 - * section 7 - * - * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified - * versions must be marked as "Modified version of" The Program. - * - * Names of the licensors and authors may not be used for publicity purposes. - * - * No rights are granted for use of trade names, trademarks, or service marks - * which are in The Program if any. - * - * Licensee must indemnify licensors and authors for any liability that these - * contractual assumptions impose on licensors and authors. - * - * To the extent this program is licensed as part of the Commercial versions of - * Teragrep, the applicable Commercial License may apply to this file if you as - * a licensee so wish it. - */ -package com.teragrep.pth_06.planner.bloomfilter; - -import com.teragrep.pth_06.planner.walker.conditions.QueryCondition; - -/** - * Decorator that inserts category tables filter types into database - */ -public final class SearchTermFiltersInserted implements CategoryTable { - - private final CategoryTable origin; - - public SearchTermFiltersInserted(final CategoryTable origin) { - this.origin = origin; - } - - @Override - public void create() { - origin.create(); - } - - @Override - public void insertFilters() { - origin.insertFilters(); - } - - @Override - public QueryCondition bloommatchCondition() { - insertFilters(); - return origin.bloommatchCondition(); - } -} diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadata.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadata.java index 2f8752f4..4b171589 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadata.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadata.java @@ -49,6 +49,8 @@ import org.jooq.impl.DSL; import org.jooq.types.ULong; +import java.util.Arrays; +import java.util.List; import java.util.Objects; import static com.teragrep.pth_06.jooq.generated.bloomdb.Bloomdb.BLOOMDB; @@ -60,62 +62,70 @@ public final class TableFilterTypesFromMetadata implements TableRecords { private final DSLContext ctx; private final Table table; - private final long bloomTermId; + private final Field expectedField; + private final Field fppField; + private final Field patternField; + private final Field filterTypeIdField; public TableFilterTypesFromMetadata(DSLContext ctx, Table table, long bloomTermId) { + this( + ctx, + table.join(BLOOMDB.FILTERTYPE).on(BLOOMDB.FILTERTYPE.ID.eq(table.field("filter_type_id").cast(ULong.class))), DSL.table(DSL.name(("term_" + bloomTermId + "_" + table.getName()))).getName() + ); + } + + public TableFilterTypesFromMetadata(DSLContext ctx, Table table, String name) { + this( + ctx, + table, + DSL.field(DSL.name(name, "expectedElements"), ULong.class), + DSL.field(DSL.name(name, "targetFpp"), Double.class), + DSL.field(DSL.name(name, "pattern"), String.class) + ); + + } + + public TableFilterTypesFromMetadata( + DSLContext ctx, + Table table, + Field expectedField, + Field fppField, + Field patternField + ) { this.ctx = ctx; this.table = table; - this.bloomTermId = bloomTermId; + this.expectedField = expectedField; + this.fppField = fppField; + this.patternField = patternField; + this.filterTypeIdField = table.field("filter_type_id").cast(ULong.class); } public Result toResult() { - if (table == null) { - throw new IllegalStateException("Origin table was null"); - } - final Table joined = table - .join(BLOOMDB.FILTERTYPE) - .on(BLOOMDB.FILTERTYPE.ID.eq((Field) table.field("filter_type_id"))); - final Table namedTable = DSL.table(DSL.name(("term_" + bloomTermId + "_" + table.getName()))); - final Field expectedField = DSL.field(DSL.name(namedTable.getName(), "expectedElements"), ULong.class); - final Field fppField = DSL.field(DSL.name(namedTable.getName(), "targetFpp"), Double.class); - final SelectField[] resultFields = { - BLOOMDB.FILTERTYPE.ID, - joined.field("expectedElements").as(expectedField), - joined.field("targetFpp").as(fppField), - joined.field("pattern") - }; + List> selectFieldsList = Arrays + .asList(BLOOMDB.FILTERTYPE.ID, table.field("expectedElements"), table.field("targetFpp"), table.field("pattern")); // Fetch filtertype values from metadata - final Result records = ctx - .select(resultFields) - .from(joined) - .groupBy(joined.field("filter_type_id")) - .fetch(); + final Result records = ctx.select(selectFieldsList).from(table).groupBy(filterTypeIdField).fetch(); if (records.isEmpty()) { throw new RuntimeException("Origin table was empty"); } return records; } - /** - * Equal only if all object parameters are same value and the instances of DSLContext are same - * - * @param object object compared against - * @return true if all object is same class, object fields are equal and DSLContext is same instance - */ @Override public boolean equals(final Object object) { if (this == object) return true; - if (object == null) - return false; - if (object.getClass() != this.getClass()) + if (object == null || getClass() != object.getClass()) return false; final TableFilterTypesFromMetadata cast = (TableFilterTypesFromMetadata) object; - return this.bloomTermId == cast.bloomTermId && this.table.equals(cast.table) && this.ctx == cast.ctx; + return ctx == cast.ctx && Objects.equals(table, cast.table) && Objects + .equals(expectedField, cast.expectedField) && Objects.equals(fppField, cast.fppField) && Objects + .equals(patternField, cast.patternField) + && Objects.equals(filterTypeIdField, cast.filterTypeIdField); } @Override public int hashCode() { - return Objects.hash(ctx, table, bloomTermId); + return Objects.hash(ctx, table, expectedField, fppField, patternField, filterTypeIdField); } } diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java index 26d6b6e4..51d6c885 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java @@ -55,39 +55,50 @@ import static org.jooq.impl.SQLDataType.BIGINTUNSIGNED; /** - * Filter types of a table that can be inserted into the tables category table + * Filter types of a table that can be inserted into the category table */ public final class TableFilters { private final TableRecords recordsInMetadata; private final DSLContext ctx; private final Table table; + private final Table categoryTable; private final String searchTerm; private final long bloomTermId; public TableFilters(DSLContext ctx, Table table, long bloomTermId, String searchTerm) { - this(new TableFilterTypesFromMetadata(ctx, table, bloomTermId), ctx, table, bloomTermId, searchTerm); + this( + new TableFilterTypesFromMetadata(ctx, table, bloomTermId), + ctx, + table, + DSL.table(DSL.name(("term_" + bloomTermId + "_" + table.getName()))), + bloomTermId, + searchTerm + ); } public TableFilters( TableFilterTypesFromMetadata recordsInMetadata, DSLContext ctx, Table table, + Table categoryTable, long bloomTermId, String searchTerm ) { this.recordsInMetadata = recordsInMetadata; this.ctx = ctx; this.table = table; + this.categoryTable = categoryTable; this.bloomTermId = bloomTermId; this.searchTerm = searchTerm; } public void insertFiltersIntoCategoryTable() { + if (table == null) { + throw new IllegalStateException("Origin table was null"); + } final Result result = recordsInMetadata.toResult(); for (final Record record : result) { - final Table categoryTable = DSL - .table(DSL.name(("term_" + bloomTermId + "_" + this.table.getName()))); final Field[] insertFields = { DSL.field("term_id", BIGINTUNSIGNED.nullable(false)), DSL.field("type_id", BIGINTUNSIGNED.nullable(false)), @@ -118,24 +129,26 @@ public void insertFiltersIntoCategoryTable() { } /** - * Equal only if all object parameters are same value and the instances of DSLContext are same + * Equal if the compared object is the same instance or if the compared object is of the same class, object fields + * are equal, and DSLContext is the same instance * * @param object object compared against - * @return true if all object is same class, object fields are equal and DSLContext is same instance + * @return true if equal */ @Override - public boolean equals(Object object) { + public boolean equals(final Object object) { if (this == object) return true; if (object == null || getClass() != object.getClass()) return false; - TableFilters cast = (TableFilters) object; - return bloomTermId == cast.bloomTermId && recordsInMetadata.equals(cast.recordsInMetadata) && ctx == cast.ctx - && table.equals(cast.table) && searchTerm.equals(cast.searchTerm); + final TableFilters cast = (TableFilters) object; + return bloomTermId == cast.bloomTermId && recordsInMetadata + .equals(cast.recordsInMetadata) && ctx == cast.ctx && table.equals(cast.table) + && categoryTable.equals(cast.categoryTable) && searchTerm.equals(cast.searchTerm); } @Override public int hashCode() { - return Objects.hash(recordsInMetadata, ctx, table, searchTerm, bloomTermId); + return Objects.hash(recordsInMetadata, ctx, table, categoryTable, searchTerm, bloomTermId); } } diff --git a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/CategoryTableCondition.java b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/CategoryTableCondition.java index 6d328964..95cd4efc 100644 --- a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/CategoryTableCondition.java +++ b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/CategoryTableCondition.java @@ -49,6 +49,8 @@ import org.jooq.impl.DSL; import org.jooq.types.ULong; +import java.util.Objects; + import static org.jooq.impl.SQLDataType.BIGINTUNSIGNED; /** @@ -57,24 +59,38 @@ public final class CategoryTableCondition implements QueryCondition { private final Table comparedTo; - private final long bloomTermId; + private final Condition bloomTermCondition; + private final Condition typeIdCondition; + private final Table categoryTable; public CategoryTableCondition(Table comparedTo, long bloomTermId) { + this( + comparedTo, + DSL.field("term_id", BIGINTUNSIGNED.nullable(false)).eq(ULong.valueOf(bloomTermId)), + DSL.field("type_id", BIGINTUNSIGNED.nullable(false)).eq((Field) comparedTo.field("filter_type_id")), DSL.table(DSL.name(("term_" + bloomTermId + "_" + comparedTo.getName()))) + ); + } + + public CategoryTableCondition( + Table comparedTo, + Condition bloomTermCondition, + Condition typeIdCondition, + Table categoryTable + ) { this.comparedTo = comparedTo; - this.bloomTermId = bloomTermId; + this.bloomTermCondition = bloomTermCondition; + this.typeIdCondition = typeIdCondition; + this.categoryTable = categoryTable; } public Condition condition() { - final Table categoryTable = DSL.table(DSL.name(("term_" + bloomTermId + "_" + comparedTo.getName()))); - final Field termIdField = DSL.field("term_id", BIGINTUNSIGNED.nullable(false)); - final Field typeIdField = DSL.field("type_id", BIGINTUNSIGNED.nullable(false)); final Field filterField = DSL.field(DSL.name(categoryTable.getName(), "filter"), byte[].class); // select filter with correct bloom term id and filter type id from category table final SelectConditionStep> selectFilterStep = DSL .select(filterField) .from(categoryTable) - .where(termIdField.eq(ULong.valueOf(bloomTermId))) - .and(typeIdField.eq((Field) comparedTo.field("filter_type_id"))); + .where(bloomTermCondition) + .and(typeIdCondition); // compares category table filter byte[] against bloom filter byte[] final Condition filterFieldCondition = DSL .function("bloommatch", Boolean.class, selectFilterStep.asField(), comparedTo.field("filter")) @@ -88,11 +104,15 @@ public Condition condition() { public boolean equals(final Object object) { if (this == object) return true; - if (object == null) - return false; - if (object.getClass() != this.getClass()) + if (object == null || getClass() != object.getClass()) return false; final CategoryTableCondition cast = (CategoryTableCondition) object; - return this.bloomTermId == cast.bloomTermId && this.comparedTo.equals(cast.comparedTo); + return comparedTo.equals(cast.comparedTo) && bloomTermCondition.equals(cast.bloomTermCondition) + && typeIdCondition.equals(cast.typeIdCondition) && categoryTable.equals(cast.categoryTable); + } + + @Override + public int hashCode() { + return Objects.hash(comparedTo, bloomTermCondition, typeIdCondition, categoryTable); } } diff --git a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java index 7b36f73c..395f029c 100644 --- a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java +++ b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java @@ -83,7 +83,12 @@ public Condition condition() { } Condition newCondition = condition; if (tableSet.isEmpty()) { - final DatabaseTables patternMatchTables = new PatternMatchTables(config.context(), value); + // get all tables that pattern match with search value + final QueryCondition regexLikeCondition = new RegexLikeFiltertypePatternCondition(value); + final DatabaseTables patternMatchTables = new ConditionMatchBloomDBTables( + config.context(), + regexLikeCondition + ); tableSet.addAll(patternMatchTables.tables()); } if (!tableSet.isEmpty()) { @@ -94,11 +99,17 @@ public Condition condition() { Condition combinedNullFilterCondition = DSL.noCondition(); for (final Table table : tableSet) { - final CategoryTable categoryTable = new CreatedCategoryTable( - new SearchTermFiltersInserted(new CategoryTableImpl(config, table, value)) + // create a category temp table with filters + final CategoryTable categoryTable = new CategoryTableWithFilters( + config.context(), + table, + config.bloomTermId(), + value ); + categoryTable.create(); + // create table condition for table final Condition nullFilterCondition = table.field("filter").isNull(); - final QueryCondition tableCondition = categoryTable.bloommatchCondition(); + final QueryCondition tableCondition = new CategoryTableCondition(table, config.bloomTermId()); combinedTableCondition = combinedTableCondition.or(tableCondition.condition()); combinedNullFilterCondition = combinedNullFilterCondition.and(nullFilterCondition); } diff --git a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchCondition.java b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeFiltertypePatternCondition.java similarity index 89% rename from src/main/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchCondition.java rename to src/main/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeFiltertypePatternCondition.java index ce023b53..f5414e82 100644 --- a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchCondition.java +++ b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeFiltertypePatternCondition.java @@ -53,15 +53,15 @@ import static com.teragrep.pth_06.jooq.generated.bloomdb.Bloomdb.BLOOMDB; /** true if BLOOMDB.FILTERTYPE.PATTERN regex like with input value */ -public final class PatternMatchCondition implements QueryCondition { +public final class RegexLikeFiltertypePatternCondition implements QueryCondition { private final Field valueField; - public PatternMatchCondition(String input) { + public RegexLikeFiltertypePatternCondition(String input) { this(DSL.val(input)); } - public PatternMatchCondition(Field valueField) { + public RegexLikeFiltertypePatternCondition(Field valueField) { this.valueField = valueField; } @@ -75,7 +75,7 @@ public boolean equals(final Object object) { return true; if (object == null || object.getClass() != this.getClass()) return false; - final PatternMatchCondition cast = (PatternMatchCondition) object; + final RegexLikeFiltertypePatternCondition cast = (RegexLikeFiltertypePatternCondition) object; return valueField.equals(cast.valueField); } diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImplTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImplTest.java index e0595578..8f27a639 100644 --- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImplTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImplTest.java @@ -46,7 +46,6 @@ package com.teragrep.pth_06.planner.bloomfilter; import org.apache.spark.util.sketch.BloomFilter; -import org.jooq.Condition; import org.jooq.DSLContext; import org.jooq.Table; import org.jooq.impl.DSL; @@ -131,19 +130,6 @@ void tearDown() { }); } - @Test - public void testNonCreatedEmptyTable() { - DSLContext ctx = DSL.using(conn); - Table table = ctx - .meta() - .filterSchemas(s -> s.getName().equals("bloomdb")) - .filterTables(t -> !t.getName().equals("filtertype")) - .getTables() - .get(0); - - Assertions.assertDoesNotThrow(new CategoryTableImpl(ctx, table, 0L, "test")::bloommatchCondition); - } - @Test public void testCreatedWithEmptyTable() { DSLContext ctx = DSL.using(conn); @@ -153,10 +139,11 @@ public void testCreatedWithEmptyTable() { .filterTables(t -> !t.getName().equals("filtertype")) .getTables() .get(0); - - CategoryTable tempTable = new CategoryTableImpl(ctx, table, 0L, "test"); - tempTable.create(); - RuntimeException ex = Assertions.assertThrows(RuntimeException.class, tempTable::insertFilters); + CategoryTable tempTable = new CategoryTableWithFilters( + new CategoryTableImpl(ctx, table, 0L, "test"), + new TableFilters(ctx, table, 0L, "test") + ); + RuntimeException ex = Assertions.assertThrows(RuntimeException.class, tempTable::create); Assertions.assertEquals("Origin table was empty", ex.getMessage()); } @@ -186,9 +173,8 @@ public void testFilterInsertion() { .getTables() .get(0); - CategoryTable categoryTable = new CategoryTableImpl(ctx, table, 0L, "ip=192.168.1.1"); - Assertions.assertDoesNotThrow(categoryTable::create); - Assertions.assertDoesNotThrow(categoryTable::insertFilters); + CategoryTable tempTable = new CategoryTableWithFilters(ctx, table, 0L, "192.168.1.1"); + Assertions.assertDoesNotThrow(tempTable::create); BloomFilter filter = Assertions.assertDoesNotThrow(() -> { ResultSet rs = conn.prepareStatement("SELECT * FROM term_0_target").executeQuery(); rs.absolute(1); @@ -201,42 +187,6 @@ public void testFilterInsertion() { Assertions.assertFalse(filter.mightContain("168.1.1")); } - @Test - public void testConditionGeneration() { - fillTargetTable(); - DSLContext ctx = DSL.using(conn); - Table table = ctx - .meta() - .filterSchemas(s -> s.getName().equals("bloomdb")) - .filterTables(t -> !t.getName().equals("filtertype")) - .getTables() - .get(0); - - CategoryTableImpl tempTable = new CategoryTableImpl(ctx, table, 0L, "test"); - Condition tableCond = tempTable.bloommatchCondition().condition(); - String e = "(\n" + " bloommatch(\n" + " (\n" + " select \"term_0_target\".\"filter\"\n" - + " from \"term_0_target\"\n" + " where (\n" + " term_id = 0\n" - + " and type_id = \"bloomdb\".\"target\".\"filter_type_id\"\n" + " )\n" + " ),\n" - + " \"bloomdb\".\"target\".\"filter\"\n" + " ) = true\n" - + " and \"bloomdb\".\"target\".\"filter\" is not null\n" + ")"; - Assertions.assertEquals(e, tableCond.toString()); - } - - @Test - public void testBloomTerm() { - fillTargetTable(); - DSLContext ctx = DSL.using(conn); - Table table = ctx - .meta() - .filterSchemas(s -> s.getName().equals("bloomdb")) - .filterTables(t -> !t.getName().equals("filtertype")) - .getTables() - .get(0); - CategoryTableImpl tempTable = new CategoryTableImpl(ctx, table, 1L, "test"); - Condition condition = tempTable.bloommatchCondition().condition(); - Assertions.assertTrue(condition.toString().contains("term_1_")); - } - @Test public void testEquality() { fillTargetTable(); diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTablesTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/ConditionMatchBloomDBTablesTest.java similarity index 86% rename from src/test/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTablesTest.java rename to src/test/java/com/teragrep/pth_06/planner/bloomfilter/ConditionMatchBloomDBTablesTest.java index 5313a4c4..5aae238c 100644 --- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/PatternMatchTablesTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/ConditionMatchBloomDBTablesTest.java @@ -62,7 +62,7 @@ import java.util.stream.Collectors; @TestInstance(TestInstance.Lifecycle.PER_CLASS) -public class PatternMatchTablesTest { +public class ConditionMatchBloomDBTablesTest { final String url = "jdbc:h2:mem:test;MODE=MariaDB;DATABASE_TO_LOWER=TRUE;CASE_INSENSITIVE_IDENTIFIERS=TRUE"; final String userName = "sa"; @@ -138,8 +138,8 @@ void tearDown() { public void testSingleMatch() { DSLContext ctx = DSL.using(conn); String input = "192.168.1.1"; - PatternMatchTables patternMatchTables = new PatternMatchTables(ctx, input); - List> result = patternMatchTables.tables(); + ConditionMatchBloomDBTables conditionMatchBloomDBTables = new ConditionMatchBloomDBTables(ctx, input); + List> result = conditionMatchBloomDBTables.tables(); Assertions.assertEquals(1, result.size()); Assertions.assertEquals("pattern_test_ip", result.get(0).getName()); } @@ -148,8 +148,8 @@ public void testSingleMatch() { public void testSearchTermTokenizedMatch() { DSLContext ctx = DSL.using(conn); String input = "target_ip=192.168.1.1"; - PatternMatchTables patternMatchTables = new PatternMatchTables(ctx, input); - List> result = patternMatchTables.tables(); + ConditionMatchBloomDBTables conditionMatchBloomDBTables = new ConditionMatchBloomDBTables(ctx, input); + List> result = conditionMatchBloomDBTables.tables(); Assertions.assertEquals(1, result.size()); Assertions.assertEquals("pattern_test_ip", result.get(0).getName()); } @@ -158,8 +158,8 @@ public void testSearchTermTokenizedMatch() { public void testRegexMatch() { DSLContext ctx = DSL.using(conn); String input = "biz baz boz data has no content today (very important though) but it would still have if one had a means to extract it from (here is something else important as well) the strange patterns called parentheses that it seems to have been put in."; - PatternMatchTables patternMatchTables = new PatternMatchTables(ctx, input); - List> result = patternMatchTables.tables(); + ConditionMatchBloomDBTables conditionMatchBloomDBTables = new ConditionMatchBloomDBTables(ctx, input); + List> result = conditionMatchBloomDBTables.tables(); Assertions.assertEquals(1, result.size()); Assertions.assertEquals("parentheses_test", result.get(0).getName()); } @@ -168,9 +168,9 @@ public void testRegexMatch() { public void testMultipleMatch() { DSLContext ctx = DSL.using(conn); String input = "255.255.255.255"; - PatternMatchTables patternMatchTables = new PatternMatchTables(ctx, input); - List> result = patternMatchTables.tables(); - List> result2 = patternMatchTables.tables(); + ConditionMatchBloomDBTables conditionMatchBloomDBTables = new ConditionMatchBloomDBTables(ctx, input); + List> result = conditionMatchBloomDBTables.tables(); + List> result2 = conditionMatchBloomDBTables.tables(); List tableNames = result.stream().map(Named::getName).collect(Collectors.toList()); Assertions.assertEquals(2, result.size()); Assertions.assertEquals(2, result2.size()); @@ -182,8 +182,8 @@ public void testMultipleMatch() { public void testNoMatch() { DSLContext ctx = DSL.using(conn); String input = "testinput"; - PatternMatchTables patternMatchTables = new PatternMatchTables(ctx, input); - List> result = patternMatchTables.tables(); + ConditionMatchBloomDBTables conditionMatchBloomDBTables = new ConditionMatchBloomDBTables(ctx, input); + List> result = conditionMatchBloomDBTables.tables(); Assertions.assertTrue(result.isEmpty()); } @@ -191,8 +191,8 @@ public void testNoMatch() { public void equalsTest() { DSLContext ctx = DSL.using(conn); String input = "testinput"; - PatternMatchTables eq1 = new PatternMatchTables(ctx, input); - PatternMatchTables eq2 = new PatternMatchTables(ctx, input); + ConditionMatchBloomDBTables eq1 = new ConditionMatchBloomDBTables(ctx, input); + ConditionMatchBloomDBTables eq2 = new ConditionMatchBloomDBTables(ctx, input); Assertions.assertEquals(eq1, eq2); Assertions.assertEquals(eq2, eq1); } @@ -200,8 +200,8 @@ public void equalsTest() { @Test public void differentInputNotEqualsTest() { DSLContext ctx = DSL.using(conn); - PatternMatchTables eq1 = new PatternMatchTables(ctx, "testinput"); - PatternMatchTables eq2 = new PatternMatchTables(ctx, "anotherinput"); + ConditionMatchBloomDBTables eq1 = new ConditionMatchBloomDBTables(ctx, "testinput"); + ConditionMatchBloomDBTables eq2 = new ConditionMatchBloomDBTables(ctx, "anotherinput"); Assertions.assertNotEquals(eq1, eq2); Assertions.assertNotEquals(eq2, eq1); } @@ -210,8 +210,8 @@ public void differentInputNotEqualsTest() { public void differentDSLContextNotEqualsTest() { DSLContext ctx1 = DSL.using(conn); DSLContext ctx2 = DSL.using(conn); - PatternMatchTables eq1 = new PatternMatchTables(ctx1, "testinput"); - PatternMatchTables eq2 = new PatternMatchTables(ctx2, "testinput"); + ConditionMatchBloomDBTables eq1 = new ConditionMatchBloomDBTables(ctx1, "testinput"); + ConditionMatchBloomDBTables eq2 = new ConditionMatchBloomDBTables(ctx2, "testinput"); Assertions.assertNotEquals(eq1, eq2); Assertions.assertNotEquals(eq2, eq1); } diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java index 27932eb4..9c0ea43d 100644 --- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java @@ -45,6 +45,7 @@ */ package com.teragrep.pth_06.planner.bloomfilter; +import nl.altindag.log.LogCaptor; import nl.jqno.equalsverifier.EqualsVerifier; import org.apache.spark.util.sketch.BloomFilter; import org.junit.jupiter.api.Assertions; @@ -115,6 +116,18 @@ public void testTokenizerTokens() { Assertions.assertTrue(resultFilter.mightContain("SearchValuePatternInThisString")); } + @Test + public void testTokensSizeTooLarge() { + LogCaptor captor = Assertions.assertDoesNotThrow(() -> LogCaptor.forClass(SearchTermBloomFilter.class)); + String searchTerm = ""; + SearchTermBloomFilter filter = new SearchTermBloomFilter(10L, 0.01, new TokenizedValue(searchTerm)); + Assertions.assertDoesNotThrow(filter::bytes); + String e = "Number of tokens <132> was larger than the expected value <10>, resulting FPP <0.6002870054872016>"; + String warn = captor.getWarnLogs().get(0); + Assertions.assertEquals(warn, e); + + } + @Test public void equalsHashCodeContractTest() { EqualsVerifier diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadataResultTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadataResultTest.java index b2a99963..fb8be48a 100644 --- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadataResultTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadataResultTest.java @@ -176,6 +176,12 @@ void testMultipleFilterTypes() { Assertions.assertEquals(second.get(1), ULong.valueOf("2000")); } + @Test + public void testNullTableException() { + DSLContext ctx = DSL.using(conn); + Assertions.assertThrows(NullPointerException.class, () -> new TableFilterTypesFromMetadata(ctx, null, 0L)); + } + @Test public void testEquality() { DSLContext ctx = DSL.using(conn); @@ -188,7 +194,6 @@ public void testEquality() { TableFilterTypesFromMetadata result1 = new TableFilterTypesFromMetadata(ctx, table, 0L); TableFilterTypesFromMetadata result2 = new TableFilterTypesFromMetadata(ctx, table, 0L); Assertions.assertEquals(result1, result2); - Assertions.assertEquals(result2, result1); } @Test @@ -202,9 +207,9 @@ public void testNotEquals() { .get(0); TableFilterTypesFromMetadata result1 = new TableFilterTypesFromMetadata(ctx, table, 0L); TableFilterTypesFromMetadata result2 = new TableFilterTypesFromMetadata(ctx, table, 1L); - TableFilterTypesFromMetadata result3 = new TableFilterTypesFromMetadata(ctx, null, 0L); + System.out.println(result1); + System.out.println(result2); Assertions.assertNotEquals(result1, result2); - Assertions.assertNotEquals(result1, result3); } void insertSizedFilterIntoTargetTable(int filterTypeId) { diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java index 19a4e1d2..79b11b44 100644 --- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java @@ -247,6 +247,7 @@ public void equalsHashCodeContractTest() { .withNonnullFields("table") .withNonnullFields("searchTerm") .withNonnullFields("bloomTermId") + .withNonnullFields("categoryTable") .verify(); } diff --git a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/CategoryTableConditionTest.java b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/CategoryTableConditionTest.java index c57e2312..0a911c79 100644 --- a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/CategoryTableConditionTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/CategoryTableConditionTest.java @@ -166,6 +166,11 @@ void testBloomTermId() { Assertions.assertEquals(e, cond.condition().toString()); } + @Test + public void testNullTableException() { + Assertions.assertThrows(NullPointerException.class, () -> new CategoryTableCondition(null, 0L)); + } + @Test public void testEquality() { fillTargetTable(); @@ -195,9 +200,7 @@ public void testNonEquality() { .get(0); CategoryTableCondition cond1 = new CategoryTableCondition(target1, 0L); CategoryTableCondition cond2 = new CategoryTableCondition(target1, 1L); - CategoryTableCondition cond3 = new CategoryTableCondition(null, 1L); Assertions.assertNotEquals(cond1, cond2); - Assertions.assertNotEquals(cond1, cond3); } void fillTargetTable() { diff --git a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchConditionTest.java b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeFiltertypePatternConditionTest.java similarity index 75% rename from src/test/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchConditionTest.java rename to src/test/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeFiltertypePatternConditionTest.java index 2f089a8e..4cb74da9 100644 --- a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/PatternMatchConditionTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeFiltertypePatternConditionTest.java @@ -56,40 +56,40 @@ * * @see org.jooq.QueryPart */ -class PatternMatchConditionTest { +class RegexLikeFiltertypePatternConditionTest { @Test void testCondition() { - Condition condition = new PatternMatchCondition("test").condition(); + Condition condition = new RegexLikeFiltertypePatternCondition("test").condition(); String e = "('test' like_regex \"bloomdb\".\"filtertype\".\"pattern\")"; Assertions.assertEquals(e, condition.toString()); } @Test void testEquality() { - PatternMatchCondition cond1 = new PatternMatchCondition("test"); - PatternMatchCondition cond2 = new PatternMatchCondition("test"); + RegexLikeFiltertypePatternCondition cond1 = new RegexLikeFiltertypePatternCondition("test"); + RegexLikeFiltertypePatternCondition cond2 = new RegexLikeFiltertypePatternCondition("test"); Assertions.assertEquals(cond1, cond2); } @Test void testNotEquals() { - PatternMatchCondition cond1 = new PatternMatchCondition("test"); - PatternMatchCondition cond2 = new PatternMatchCondition("next"); + RegexLikeFiltertypePatternCondition cond1 = new RegexLikeFiltertypePatternCondition("test"); + RegexLikeFiltertypePatternCondition cond2 = new RegexLikeFiltertypePatternCondition("next"); Assertions.assertNotEquals(cond1, cond2); } @Test void testHashCode() { - PatternMatchCondition cond1 = new PatternMatchCondition("test"); - PatternMatchCondition cond2 = new PatternMatchCondition("test"); - PatternMatchCondition notEq = new PatternMatchCondition("next"); + RegexLikeFiltertypePatternCondition cond1 = new RegexLikeFiltertypePatternCondition("test"); + RegexLikeFiltertypePatternCondition cond2 = new RegexLikeFiltertypePatternCondition("test"); + RegexLikeFiltertypePatternCondition notEq = new RegexLikeFiltertypePatternCondition("next"); Assertions.assertEquals(cond1.hashCode(), cond2.hashCode()); Assertions.assertNotEquals(cond1.hashCode(), notEq.hashCode()); } @Test public void equalsHashCodeContractTest() { - EqualsVerifier.forClass(PatternMatchCondition.class).withNonnullFields("valueField").verify(); + EqualsVerifier.forClass(RegexLikeFiltertypePatternCondition.class).withNonnullFields("valueField").verify(); } } diff --git a/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java b/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java index 4fb58634..6ae11d55 100644 --- a/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java +++ b/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java @@ -51,10 +51,9 @@ import org.jooq.impl.DSL; import org.junit.jupiter.api.*; +import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.PreparedStatement; +import java.sql.*; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -112,7 +111,7 @@ void setup() { for (String pattern : patternList) { PreparedStatement filterType = conn.prepareStatement(typeSQL); filterType.setInt(1, id); - filterType.setInt(2, 1000); + filterType.setInt(2, 1000 * id); filterType.setDouble(3, 0.01); filterType.setString(4, pattern); filterType.executeUpdate(); @@ -339,6 +338,134 @@ void multipleSearchTermTwoAndOneMatchWithoutFiltersTest() { .assertTrue(walker.patternMatchTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255"))); } + @Test + void testSinglePatternMatchTempTableValues() { + String q = ""; + ConditionWalker walker = new ConditionWalker(DSL.using(conn), true); + Assertions.assertDoesNotThrow(() -> walker.fromString(q, false)); + ResultSet result = Assertions + .assertDoesNotThrow(() -> conn.prepareStatement("SELECT * FROM term_0_pattern_test_ip").executeQuery()); + Assertions.assertDoesNotThrow(() -> { + int colCount = result.getMetaData().getColumnCount(); + Assertions.assertEquals(4, colCount); + int loops = 0; + while (result.next()) { + Assertions.assertEquals(1, result.getLong("id")); + Assertions.assertEquals(0, result.getLong("term_id")); + Assertions.assertEquals(1, result.getLong("type_id")); + BloomFilter filter = BloomFilter.readFrom(new ByteArrayInputStream(result.getBytes("filter"))); + Assertions.assertTrue(filter.mightContain("192.168.1.1")); + Assertions.assertFalse(filter.mightContain("192")); + Assertions.assertFalse(filter.mightContain("192.")); + Assertions.assertFalse(filter.mightContain("192.168.1")); + loops++; + } + Assertions.assertEquals(1, loops); + }); + } + + @Test + void testMultiplePatternMatchTempTableValues() { + String q = ""; + ConditionWalker walker = new ConditionWalker(DSL.using(conn), true); + Assertions.assertDoesNotThrow(() -> walker.fromString(q, false)); + + // check pattern_test_ip + ResultSet result1 = Assertions + .assertDoesNotThrow(() -> conn.prepareStatement("SELECT * FROM term_0_pattern_test_ip").executeQuery()); + Assertions.assertDoesNotThrow(() -> { + int colCount = result1.getMetaData().getColumnCount(); + Assertions.assertEquals(4, colCount); + int loops = 0; + while (result1.next()) { + Assertions.assertEquals(1, result1.getLong("id")); + Assertions.assertEquals(0, result1.getLong("term_id")); + Assertions.assertEquals(1, result1.getLong("type_id")); + BloomFilter filter = BloomFilter.readFrom(new ByteArrayInputStream(result1.getBytes("filter"))); + Assertions.assertEquals(filter.bitSize(), BloomFilter.create(1000, 0.01).bitSize()); + Assertions.assertTrue(filter.mightContain("255.255.255.255")); + Assertions.assertFalse(filter.mightContain("255")); + Assertions.assertFalse(filter.mightContain("255.")); + Assertions.assertFalse(filter.mightContain("255.255.")); + loops++; + } + Assertions.assertEquals(1, loops); + }); + + // check pattern_test_ip244 table + ResultSet result2 = Assertions + .assertDoesNotThrow(() -> conn.prepareStatement("SELECT * FROM term_0_pattern_test_ip255").executeQuery()); + Assertions.assertDoesNotThrow(() -> { + int colCount = result2.getMetaData().getColumnCount(); + Assertions.assertEquals(4, colCount); + int loops = 0; + while (result2.next()) { + Assertions.assertEquals(1, result2.getLong("id")); + Assertions.assertEquals(0, result2.getLong("term_id")); + Assertions.assertEquals(2, result2.getLong("type_id")); + BloomFilter filter = BloomFilter.readFrom(new ByteArrayInputStream(result2.getBytes("filter"))); + Assertions.assertEquals(filter.bitSize(), BloomFilter.create(2000, 0.01).bitSize()); + Assertions.assertTrue(filter.mightContain("255.255.255.255")); + Assertions.assertFalse(filter.mightContain("255")); + Assertions.assertFalse(filter.mightContain("255.")); + Assertions.assertFalse(filter.mightContain("255.255.")); + loops++; + } + Assertions.assertEquals(1, loops); + }); + } + + @Test + void testCorrectTokensForTwoSearchTerms() { + ConditionWalker walker = new ConditionWalker(DSL.using(conn), true); + String q = ""; + Assertions.assertDoesNotThrow(() -> walker.fromString(q, false)); + + // check term 0 + ResultSet result1 = Assertions + .assertDoesNotThrow(() -> conn.prepareStatement("SELECT * FROM term_0_pattern_test_ip").executeQuery()); + Assertions.assertDoesNotThrow(() -> { + int colCount = result1.getMetaData().getColumnCount(); + Assertions.assertEquals(4, colCount); + int loops = 0; + while (result1.next()) { + Assertions.assertEquals(1, result1.getLong("id")); + Assertions.assertEquals(0, result1.getLong("term_id")); + Assertions.assertEquals(1, result1.getLong("type_id")); + BloomFilter filter = BloomFilter.readFrom(new ByteArrayInputStream(result1.getBytes("filter"))); + Assertions.assertEquals(filter.bitSize(), BloomFilter.create(1000, 0.01).bitSize()); + Assertions.assertTrue(filter.mightContain("192.168.1.1")); + Assertions.assertFalse(filter.mightContain("192")); + Assertions.assertFalse(filter.mightContain("168.")); + Assertions.assertFalse(filter.mightContain("1.1")); + loops++; + } + Assertions.assertEquals(1, loops); + }); + + // check term 1 + ResultSet result2 = Assertions + .assertDoesNotThrow(() -> conn.prepareStatement("SELECT * FROM term_1_pattern_test_ip").executeQuery()); + Assertions.assertDoesNotThrow(() -> { + int colCount = result2.getMetaData().getColumnCount(); + Assertions.assertEquals(4, colCount); + int loops = 0; + while (result2.next()) { + Assertions.assertEquals(1, result2.getLong("id")); + Assertions.assertEquals(1, result2.getLong("term_id")); + Assertions.assertEquals(1, result2.getLong("type_id")); + BloomFilter filter = BloomFilter.readFrom(new ByteArrayInputStream(result2.getBytes("filter"))); + Assertions.assertEquals(filter.bitSize(), BloomFilter.create(1000, 0.01).bitSize()); + Assertions.assertTrue(filter.mightContain("192.000.1.1")); + Assertions.assertFalse(filter.mightContain("192")); + Assertions.assertFalse(filter.mightContain("000.")); + Assertions.assertFalse(filter.mightContain("192.000")); + loops++; + } + Assertions.assertEquals(1, loops); + }); + } + private void writeFilter(String tableName, int filterId) { Assertions.assertDoesNotThrow(() -> { conn.prepareStatement("CREATE SCHEMA IF NOT EXISTS BLOOMDB").execute(); From aefb66a1c7c8aa37197e24bf34756bd16639cd78 Mon Sep 17 00:00:00 2001 From: elliVM <47@teragrep.com> Date: Tue, 29 Oct 2024 14:35:17 +0200 Subject: [PATCH 15/26] use UncheckedIOException constructor --- .../pth_06/planner/bloomfilter/SearchTermBloomFilter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java index c3132c8a..25882752 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java @@ -104,7 +104,7 @@ public byte[] bytes() { return filterBAOS.toByteArray(); } catch (IOException e) { - throw new UncheckedIOException(new IOException("Error writing filter bytes: " + e.getMessage())); + throw new UncheckedIOException("Error writing filter bytes: ", e); } } From 73efeb98c17d718bb019ca58f34f15a1e363e891 Mon Sep 17 00:00:00 2001 From: elliVM <47@teragrep.com> Date: Tue, 29 Oct 2024 14:39:31 +0200 Subject: [PATCH 16/26] TableFilters returns a batch that CategoryTableWithFilters executes --- .../bloomfilter/CategoryTableWithFilters.java | 2 +- .../pth_06/planner/bloomfilter/SafeBatch.java | 74 +++++++++++++++++++ .../planner/bloomfilter/TableFilters.java | 10 ++- .../planner/bloomfilter/TableFiltersTest.java | 6 +- 4 files changed, 86 insertions(+), 6 deletions(-) create mode 100644 src/main/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatch.java diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableWithFilters.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableWithFilters.java index a664b636..9a848513 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableWithFilters.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableWithFilters.java @@ -71,6 +71,6 @@ public CategoryTableWithFilters(CategoryTable origin, TableFilters filters) { @Override public void create() { origin.create(); - filters.insertFiltersIntoCategoryTable(); + filters.asBatch().execute(); } } diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatch.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatch.java new file mode 100644 index 00000000..c2709af8 --- /dev/null +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatch.java @@ -0,0 +1,74 @@ +/* + * Teragrep Archive Datasource (pth_06) + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.pth_06.planner.bloomfilter; + +import org.jooq.Batch; +import org.jooq.exception.DataAccessException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class SafeBatch { + + private static final Logger LOGGER = LoggerFactory.getLogger(SafeBatch.class); + + private final Batch batch; + + public SafeBatch(final Batch batch) { + this.batch = batch; + } + + public void execute() { + try { + int[] results = batch.execute(); + if (LOGGER.isTraceEnabled()) { + LOGGER.trace("Batch added <{}> row(s)", results.length); + } + } + catch (final DataAccessException e) { + throw new DataAccessException("Error executing batch: " + e); + } + } +} diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java index 51d6c885..4fa65dbb 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java @@ -49,6 +49,8 @@ import org.jooq.impl.DSL; import org.jooq.types.ULong; +import java.util.ArrayList; +import java.util.List; import java.util.Objects; import static com.teragrep.pth_06.jooq.generated.bloomdb.Bloomdb.BLOOMDB; @@ -93,10 +95,11 @@ public TableFilters( this.searchTerm = searchTerm; } - public void insertFiltersIntoCategoryTable() { + public SafeBatch asBatch() { if (table == null) { throw new IllegalStateException("Origin table was null"); } + final List> queryList = new ArrayList<>(); final Result result = recordsInMetadata.toResult(); for (final Record record : result) { final Field[] insertFields = { @@ -124,8 +127,11 @@ public void insertFiltersIntoCategoryTable() { DSL.val(record.getValue(BLOOMDB.FILTERTYPE.ID), ULong.class), DSL.val(filter.bytes(), byte[].class) }; - ctx.insertInto(categoryTable).columns(insertFields).values(valueFields).execute(); + InsertValuesStepN query = ctx.insertInto(categoryTable).columns(insertFields).values(valueFields); + queryList.add(query); } + final Batch batch = ctx.batch(queryList); + return new SafeBatch(batch); } /** diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java index 79b11b44..15f05237 100644 --- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java @@ -150,7 +150,7 @@ public void testInsertFiltersIntoCategoryTable() { .getTables() .get(0); DataAccessException exception = Assertions - .assertThrows(DataAccessException.class, () -> new TableFilters(ctx, table, 0L, "192.168.1.1").insertFiltersIntoCategoryTable()); + .assertThrows(DataAccessException.class, () -> new TableFilters(ctx, table, 0L, "192.168.1.1").asBatch().execute()); Assertions.assertTrue(exception.getMessage().contains("term_0_target\" (term_id, type_id, \"filter\")")); } @@ -166,7 +166,7 @@ public void testInsertFiltersIntoCategoryTableRegexExtract() { .get(0); String query = "biz baz boz data has no content today (very important though) but it would still have if one had a means to extract it from (here is something else important as well) the strange patterns called parentheses that it seems to have been put in."; DataAccessException exception = Assertions - .assertThrows(DataAccessException.class, () -> new TableFilters(ctx, table, 0L, query).insertFiltersIntoCategoryTable()); + .assertThrows(DataAccessException.class, () -> new TableFilters(ctx, table, 0L, query).asBatch().execute()); Assertions.assertTrue(exception.getMessage().contains("term_0_target\" (term_id, type_id, \"filter\")")); } @@ -181,7 +181,7 @@ public void testInsertFiltersWithoutPatternMatch() { .getTables() .get(0); IllegalStateException exception = Assertions - .assertThrows(IllegalStateException.class, () -> new TableFilters(ctx, table, 0L, "nomatch").insertFiltersIntoCategoryTable()); + .assertThrows(IllegalStateException.class, () -> new TableFilters(ctx, table, 0L, "nomatch").asBatch().execute()); Assertions.assertTrue(exception.getMessage().contains("Trying to insert empty filter")); } From 441e40b01565cc59f58cdf131db3cd5ab7ff1176 Mon Sep 17 00:00:00 2001 From: elliVM <47@teragrep.com> Date: Tue, 29 Oct 2024 15:08:44 +0200 Subject: [PATCH 17/26] add test for SafeBatch --- .../pth_06/planner/bloomfilter/SafeBatch.java | 4 +- .../planner/bloomfilter/SafeBatchTest.java | 140 ++++++++++++++++++ 2 files changed, 142 insertions(+), 2 deletions(-) create mode 100644 src/test/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatchTest.java diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatch.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatch.java index c2709af8..60bc56ae 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatch.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatch.java @@ -50,7 +50,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class SafeBatch { +public final class SafeBatch { private static final Logger LOGGER = LoggerFactory.getLogger(SafeBatch.class); @@ -62,7 +62,7 @@ public SafeBatch(final Batch batch) { public void execute() { try { - int[] results = batch.execute(); + final int[] results = batch.execute(); if (LOGGER.isTraceEnabled()) { LOGGER.trace("Batch added <{}> row(s)", results.length); } diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatchTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatchTest.java new file mode 100644 index 00000000..fdbdd266 --- /dev/null +++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatchTest.java @@ -0,0 +1,140 @@ +/* + * Teragrep Archive Datasource (pth_06) + * Copyright (C) 2021-2024 Suomen Kanuuna Oy + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * + * Additional permission under GNU Affero General Public License version 3 + * section 7 + * + * If you modify this Program, or any covered work, by linking or combining it + * with other code, such other code is not for that reason alone subject to any + * of the requirements of the GNU Affero GPL version 3 as long as this Program + * is the same Program as licensed from Suomen Kanuuna Oy without any additional + * modifications. + * + * Supplemented terms under GNU Affero General Public License version 3 + * section 7 + * + * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified + * versions must be marked as "Modified version of" The Program. + * + * Names of the licensors and authors may not be used for publicity purposes. + * + * No rights are granted for use of trade names, trademarks, or service marks + * which are in The Program if any. + * + * Licensee must indemnify licensors and authors for any liability that these + * contractual assumptions impose on licensors and authors. + * + * To the extent this program is licensed as part of the Commercial versions of + * Teragrep, the applicable Commercial License may apply to this file if you as + * a licensee so wish it. + */ +package com.teragrep.pth_06.planner.bloomfilter; + +import org.jooq.DSLContext; +import org.jooq.exception.DataAccessException; +import org.jooq.impl.DSL; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.ResultSet; +import java.util.ArrayList; +import java.util.List; + +class SafeBatchTest { + + final String url = "jdbc:h2:mem:test;MODE=MariaDB;DATABASE_TO_LOWER=TRUE;CASE_INSENSITIVE_IDENTIFIERS=TRUE"; + final String userName = "sa"; + final String password = ""; + final Connection conn = Assertions.assertDoesNotThrow(() -> DriverManager.getConnection(url, userName, password)); + + @BeforeEach + void setup() { + Assertions.assertDoesNotThrow(() -> { + conn.prepareStatement("CREATE SCHEMA IF NOT EXISTS BLOOMDB").execute(); + conn.prepareStatement("USE BLOOMDB").execute(); + conn.prepareStatement("DROP TABLE IF EXISTS target").execute(); + String targetTable = "CREATE TABLE `target`(" + + " `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT PRIMARY KEY," + + " `partition_id` bigint(20) unsigned NOT NULL UNIQUE" + ")"; + conn.prepareStatement(targetTable).execute(); + }); + } + + @Test + public void testOneInsert() { + String sql = "INSERT INTO target (`partition_id`) VALUES(12345)"; + DSLContext ctx = DSL.using(conn); + SafeBatch batch = new SafeBatch(ctx.batch(ctx.query(sql))); + Assertions.assertDoesNotThrow(batch::execute); + ResultSet result = Assertions + .assertDoesNotThrow(() -> conn.prepareStatement("SELECT `partition_id` FROM target").executeQuery()); + Assertions.assertDoesNotThrow(() -> { + int loops = 0; + while (result.next()) { + loops++; + Assertions.assertEquals(12345L, result.getLong(1)); + } + Assertions.assertEquals(1, loops); + }); + } + + @Test + public void testInsertTwo() { + String sql1 = "INSERT INTO target (`partition_id`) VALUES(12345)"; + String sql2 = "INSERT INTO target (`partition_id`) VALUES(54321)"; + DSLContext ctx = DSL.using(conn); + SafeBatch batch = new SafeBatch(ctx.batch(ctx.query(sql1), ctx.query(sql2))); + Assertions.assertDoesNotThrow(batch::execute); + ResultSet result = Assertions + .assertDoesNotThrow(() -> conn.prepareStatement("SELECT `partition_id` FROM target").executeQuery()); + Assertions.assertDoesNotThrow(() -> { + List values = new ArrayList<>(); + int loops = 0; + while (result.next()) { + loops++; + values.add(result.getLong(1)); + } + Assertions.assertEquals(2, loops); + Assertions.assertEquals(2, values.size()); + Assertions.assertEquals(12345L, values.get(0)); + Assertions.assertEquals(54321L, values.get(1)); + }); + } + + @Test + public void testDataAccessException() { + String sql1 = "INSERT INTO target (`partition_id`) VALUES(12345)"; + String sql2 = "INSERT INTO target (`partition_id`) VALUES(12345)"; + DSLContext ctx = DSL.using(conn); + SafeBatch batch = new SafeBatch(ctx.batch(ctx.query(sql1), ctx.query(sql2))); + Assertions.assertThrows(DataAccessException.class, batch::execute); + ResultSet result = Assertions + .assertDoesNotThrow(() -> conn.prepareStatement("SELECT `partition_id` FROM target").executeQuery()); + Assertions.assertDoesNotThrow(() -> { + int loops = 0; + while (result.next()) { + Assertions.assertEquals(12345L, result.getLong(1)); + loops++; + } + Assertions.assertEquals(1, loops); + }); + } +} From 4844ebd693e83134dc4a6f92bca4c3b954c99bcf Mon Sep 17 00:00:00 2001 From: elliVM <47@teragrep.com> Date: Tue, 29 Oct 2024 15:47:13 +0200 Subject: [PATCH 18/26] update comments and clean up code, add constructors for RegexLikeCondition --- .../ConditionMatchBloomDBTables.java | 14 ++++---- .../pth_06/planner/bloomfilter/SafeBatch.java | 1 + .../TableFilterTypesFromMetadata.java | 6 ++++ .../planner/bloomfilter/TableFilters.java | 8 ++--- .../conditions/IndexStatementCondition.java | 4 ++- ...Condition.java => RegexLikeCondition.java} | 32 +++++++++++++++---- ...nTest.java => RegexLikeConditionTest.java} | 20 ++++++------ 7 files changed, 56 insertions(+), 29 deletions(-) rename src/main/java/com/teragrep/pth_06/planner/walker/conditions/{RegexLikeFiltertypePatternCondition.java => RegexLikeCondition.java} (71%) rename src/test/java/com/teragrep/pth_06/planner/walker/conditions/{RegexLikeFiltertypePatternConditionTest.java => RegexLikeConditionTest.java} (75%) diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/ConditionMatchBloomDBTables.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/ConditionMatchBloomDBTables.java index 8ccd9cf4..1eae8048 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/ConditionMatchBloomDBTables.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/ConditionMatchBloomDBTables.java @@ -45,7 +45,7 @@ */ package com.teragrep.pth_06.planner.bloomfilter; -import com.teragrep.pth_06.planner.walker.conditions.RegexLikeFiltertypePatternCondition; +import com.teragrep.pth_06.planner.walker.conditions.RegexLikeCondition; import com.teragrep.pth_06.planner.walker.conditions.QueryCondition; import org.jooq.DSLContext; import org.jooq.Field; @@ -59,7 +59,7 @@ import static com.teragrep.pth_06.jooq.generated.bloomdb.Bloomdb.BLOOMDB; /** - * Class to get a collection of Tables that match the given PatternMatchCondition + * Class to get a collection of Tables that match the given QueryCondition */ public final class ConditionMatchBloomDBTables implements DatabaseTables { @@ -69,7 +69,7 @@ public final class ConditionMatchBloomDBTables implements DatabaseTables { private final QueryCondition condition; public ConditionMatchBloomDBTables(DSLContext ctx, String pattern) { - this(ctx, new RegexLikeFiltertypePatternCondition(pattern)); + this(ctx, new RegexLikeCondition(pattern, BLOOMDB.FILTERTYPE.PATTERN)); } public ConditionMatchBloomDBTables(DSLContext ctx, QueryCondition condition) { @@ -78,9 +78,9 @@ public ConditionMatchBloomDBTables(DSLContext ctx, QueryCondition condition) { } /** - * List of tables from bloomdb that match patternMatchCondition Note: Table records are not fetched fully + * List of tables from bloomdb that match QueryCondition Note: Table records are not fetched fully * - * @return List of tables that matched condition and were not empty + * @return List of tables that matched QueryCondition and were not empty */ public List> tables() { final List> tables = ctx @@ -91,8 +91,8 @@ public List> tables() { .from(t) .leftJoin(BLOOMDB.FILTERTYPE)// join filtertype to access patterns .on(BLOOMDB.FILTERTYPE.ID.eq((Field) t.field("filter_type_id"))) - .where(condition.condition())// select tables that match pattern condition - .limit(1)// limit 1 since we are checking only if table is not empty + .where(condition.condition())// select tables that match the condition + .limit(1)// limit 1 since we are checking only if the table is not empty .fetch() .isNotEmpty() // select table if not empty ) diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatch.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatch.java index 60bc56ae..f661dad4 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatch.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatch.java @@ -60,6 +60,7 @@ public SafeBatch(final Batch batch) { this.batch = batch; } + /** Does not roll back successfully inserted values on exception */ public void execute() { try { final int[] results = batch.execute(); diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadata.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadata.java index 4b171589..e2cbec46 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadata.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadata.java @@ -111,6 +111,12 @@ public Result toResult() { return records; } + /** + * Equal only if all values are equal and same instance of DSLContext + * + * @param object object compared against + * @return true if all object is same class, object fields are equal and DSLContext is same instance + */ @Override public boolean equals(final Object object) { if (this == object) diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java index 4fa65dbb..58716b0c 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java @@ -99,7 +99,7 @@ public SafeBatch asBatch() { if (table == null) { throw new IllegalStateException("Origin table was null"); } - final List> queryList = new ArrayList<>(); + final List> insertValuesStepNList = new ArrayList<>(); final Result result = recordsInMetadata.toResult(); for (final Record record : result) { final Field[] insertFields = { @@ -127,10 +127,10 @@ public SafeBatch asBatch() { DSL.val(record.getValue(BLOOMDB.FILTERTYPE.ID), ULong.class), DSL.val(filter.bytes(), byte[].class) }; - InsertValuesStepN query = ctx.insertInto(categoryTable).columns(insertFields).values(valueFields); - queryList.add(query); + final InsertValuesStepN insertStep = ctx.insertInto(categoryTable).columns(insertFields).values(valueFields); + insertValuesStepNList.add(insertStep); } - final Batch batch = ctx.batch(queryList); + final Batch batch = ctx.batch(insertValuesStepNList); return new SafeBatch(batch); } diff --git a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java index 395f029c..eeb92f61 100644 --- a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java +++ b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java @@ -56,6 +56,8 @@ import java.util.HashSet; import java.util.Set; +import static com.teragrep.pth_06.jooq.generated.bloomdb.Bloomdb.BLOOMDB; + public final class IndexStatementCondition implements QueryCondition, BloomQueryCondition { private final Logger LOGGER = LoggerFactory.getLogger(IndexStatementCondition.class); @@ -84,7 +86,7 @@ public Condition condition() { Condition newCondition = condition; if (tableSet.isEmpty()) { // get all tables that pattern match with search value - final QueryCondition regexLikeCondition = new RegexLikeFiltertypePatternCondition(value); + final QueryCondition regexLikeCondition = new RegexLikeCondition(value, BLOOMDB.FILTERTYPE.PATTERN); final DatabaseTables patternMatchTables = new ConditionMatchBloomDBTables( config.context(), regexLikeCondition diff --git a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeFiltertypePatternCondition.java b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeCondition.java similarity index 71% rename from src/main/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeFiltertypePatternCondition.java rename to src/main/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeCondition.java index f5414e82..d171acdb 100644 --- a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeFiltertypePatternCondition.java +++ b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeCondition.java @@ -52,21 +52,39 @@ import static com.teragrep.pth_06.jooq.generated.bloomdb.Bloomdb.BLOOMDB; -/** true if BLOOMDB.FILTERTYPE.PATTERN regex like with input value */ -public final class RegexLikeFiltertypePatternCondition implements QueryCondition { +/** true if input value regex like comparedTo value, compared against BLOOMDB.FILTERTYPE.PATTERN by default */ +public final class RegexLikeCondition implements QueryCondition { private final Field valueField; + private final Field comparedToField; - public RegexLikeFiltertypePatternCondition(String input) { - this(DSL.val(input)); + public RegexLikeCondition(String input) { + this(DSL.val(input), BLOOMDB.FILTERTYPE.PATTERN); } - public RegexLikeFiltertypePatternCondition(Field valueField) { + public RegexLikeCondition(Field input) { + this(input, BLOOMDB.FILTERTYPE.PATTERN); + } + + public RegexLikeCondition(String input, String comparedTo) { + this(DSL.val(input), DSL.val(comparedTo)); + } + + public RegexLikeCondition(String input, Field comparedTo) { + this(DSL.val(input), comparedTo); + } + + public RegexLikeCondition(Field input, String comparedTo) { + this(input, DSL.val(comparedTo)); + } + + public RegexLikeCondition(Field valueField, Field comparedToField) { this.valueField = valueField; + this.comparedToField = comparedToField; } public Condition condition() { - return valueField.likeRegex(BLOOMDB.FILTERTYPE.PATTERN); + return valueField.likeRegex(comparedToField); } @Override @@ -75,7 +93,7 @@ public boolean equals(final Object object) { return true; if (object == null || object.getClass() != this.getClass()) return false; - final RegexLikeFiltertypePatternCondition cast = (RegexLikeFiltertypePatternCondition) object; + final RegexLikeCondition cast = (RegexLikeCondition) object; return valueField.equals(cast.valueField); } diff --git a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeFiltertypePatternConditionTest.java b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeConditionTest.java similarity index 75% rename from src/test/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeFiltertypePatternConditionTest.java rename to src/test/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeConditionTest.java index 4cb74da9..1c7e5343 100644 --- a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeFiltertypePatternConditionTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeConditionTest.java @@ -56,40 +56,40 @@ * * @see org.jooq.QueryPart */ -class RegexLikeFiltertypePatternConditionTest { +class RegexLikeConditionTest { @Test void testCondition() { - Condition condition = new RegexLikeFiltertypePatternCondition("test").condition(); + Condition condition = new RegexLikeCondition("test").condition(); String e = "('test' like_regex \"bloomdb\".\"filtertype\".\"pattern\")"; Assertions.assertEquals(e, condition.toString()); } @Test void testEquality() { - RegexLikeFiltertypePatternCondition cond1 = new RegexLikeFiltertypePatternCondition("test"); - RegexLikeFiltertypePatternCondition cond2 = new RegexLikeFiltertypePatternCondition("test"); + RegexLikeCondition cond1 = new RegexLikeCondition("test"); + RegexLikeCondition cond2 = new RegexLikeCondition("test"); Assertions.assertEquals(cond1, cond2); } @Test void testNotEquals() { - RegexLikeFiltertypePatternCondition cond1 = new RegexLikeFiltertypePatternCondition("test"); - RegexLikeFiltertypePatternCondition cond2 = new RegexLikeFiltertypePatternCondition("next"); + RegexLikeCondition cond1 = new RegexLikeCondition("test"); + RegexLikeCondition cond2 = new RegexLikeCondition("next"); Assertions.assertNotEquals(cond1, cond2); } @Test void testHashCode() { - RegexLikeFiltertypePatternCondition cond1 = new RegexLikeFiltertypePatternCondition("test"); - RegexLikeFiltertypePatternCondition cond2 = new RegexLikeFiltertypePatternCondition("test"); - RegexLikeFiltertypePatternCondition notEq = new RegexLikeFiltertypePatternCondition("next"); + RegexLikeCondition cond1 = new RegexLikeCondition("test"); + RegexLikeCondition cond2 = new RegexLikeCondition("test"); + RegexLikeCondition notEq = new RegexLikeCondition("next"); Assertions.assertEquals(cond1.hashCode(), cond2.hashCode()); Assertions.assertNotEquals(cond1.hashCode(), notEq.hashCode()); } @Test public void equalsHashCodeContractTest() { - EqualsVerifier.forClass(RegexLikeFiltertypePatternCondition.class).withNonnullFields("valueField").verify(); + EqualsVerifier.forClass(RegexLikeCondition.class).withNonnullFields("valueField").verify(); } } From 145fcb682c68a3499c0905093ff33868ad7b221a Mon Sep 17 00:00:00 2001 From: elliVM <47@teragrep.com> Date: Tue, 29 Oct 2024 16:06:11 +0200 Subject: [PATCH 19/26] use qualified names update tests --- .../bloomfilter/SearchTermBloomFilter.java | 4 ++-- .../planner/bloomfilter/TableFilters.java | 18 +++++++++--------- .../walker/conditions/RegexLikeCondition.java | 4 ++-- .../planner/bloomfilter/TableFiltersTest.java | 6 +++--- .../conditions/RegexLikeConditionTest.java | 5 ++++- 5 files changed, 20 insertions(+), 17 deletions(-) diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java index 25882752..17c24b9c 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java @@ -109,12 +109,12 @@ public byte[] bytes() { } @Override - public boolean equals(Object object) { + public boolean equals(final Object object) { if (this == object) return true; if (object == null || getClass() != object.getClass()) return false; - SearchTermBloomFilter cast = (SearchTermBloomFilter) object; + final SearchTermBloomFilter cast = (SearchTermBloomFilter) object; return expected.equals(cast.expected) && fpp.equals(cast.fpp) && stringTokens.equals(cast.stringTokens); } diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java index 58716b0c..660be480 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java @@ -64,7 +64,7 @@ public final class TableFilters { private final TableRecords recordsInMetadata; private final DSLContext ctx; private final Table table; - private final Table categoryTable; + private final Table thisTable; private final String searchTerm; private final long bloomTermId; @@ -83,14 +83,14 @@ public TableFilters( TableFilterTypesFromMetadata recordsInMetadata, DSLContext ctx, Table table, - Table categoryTable, + Table thisTable, long bloomTermId, String searchTerm ) { this.recordsInMetadata = recordsInMetadata; this.ctx = ctx; this.table = table; - this.categoryTable = categoryTable; + this.thisTable = thisTable; this.bloomTermId = bloomTermId; this.searchTerm = searchTerm; } @@ -103,9 +103,9 @@ public SafeBatch asBatch() { final Result result = recordsInMetadata.toResult(); for (final Record record : result) { final Field[] insertFields = { - DSL.field("term_id", BIGINTUNSIGNED.nullable(false)), - DSL.field("type_id", BIGINTUNSIGNED.nullable(false)), - DSL.field(DSL.name(categoryTable.getName(), "filter"), byte[].class) + DSL.field(DSL.name(thisTable.getName(), "term_id"), BIGINTUNSIGNED.nullable(false)), + DSL.field(DSL.name(thisTable.getName(), "type_id"), BIGINTUNSIGNED.nullable(false)), + DSL.field(DSL.name(thisTable.getName(), "filter"), byte[].class) }; final ULong expectedField = record .getValue(DSL.field(DSL.name(table.getName(), "expectedElements"), ULong.class)); @@ -127,7 +127,7 @@ public SafeBatch asBatch() { DSL.val(record.getValue(BLOOMDB.FILTERTYPE.ID), ULong.class), DSL.val(filter.bytes(), byte[].class) }; - final InsertValuesStepN insertStep = ctx.insertInto(categoryTable).columns(insertFields).values(valueFields); + final InsertValuesStepN insertStep = ctx.insertInto(thisTable).columns(insertFields).values(valueFields); insertValuesStepNList.add(insertStep); } final Batch batch = ctx.batch(insertValuesStepNList); @@ -150,11 +150,11 @@ public boolean equals(final Object object) { final TableFilters cast = (TableFilters) object; return bloomTermId == cast.bloomTermId && recordsInMetadata .equals(cast.recordsInMetadata) && ctx == cast.ctx && table.equals(cast.table) - && categoryTable.equals(cast.categoryTable) && searchTerm.equals(cast.searchTerm); + && thisTable.equals(cast.thisTable) && searchTerm.equals(cast.searchTerm); } @Override public int hashCode() { - return Objects.hash(recordsInMetadata, ctx, table, categoryTable, searchTerm, bloomTermId); + return Objects.hash(recordsInMetadata, ctx, table, thisTable, searchTerm, bloomTermId); } } diff --git a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeCondition.java b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeCondition.java index d171acdb..9026227b 100644 --- a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeCondition.java +++ b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeCondition.java @@ -94,11 +94,11 @@ public boolean equals(final Object object) { if (object == null || object.getClass() != this.getClass()) return false; final RegexLikeCondition cast = (RegexLikeCondition) object; - return valueField.equals(cast.valueField); + return valueField.equals(cast.valueField) && comparedToField.equals(cast.comparedToField); } @Override public int hashCode() { - return Objects.hash(valueField); + return Objects.hash(valueField, comparedToField); } } diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java index 15f05237..48b6f7bf 100644 --- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java @@ -151,7 +151,7 @@ public void testInsertFiltersIntoCategoryTable() { .get(0); DataAccessException exception = Assertions .assertThrows(DataAccessException.class, () -> new TableFilters(ctx, table, 0L, "192.168.1.1").asBatch().execute()); - Assertions.assertTrue(exception.getMessage().contains("term_0_target\" (term_id, type_id, \"filter\")")); + Assertions.assertTrue(exception.getMessage().contains("insert into \"term_0_target\" (\"term_id\", \"type_id\", \"filter\") values")); } @Test @@ -167,7 +167,7 @@ public void testInsertFiltersIntoCategoryTableRegexExtract() { String query = "biz baz boz data has no content today (very important though) but it would still have if one had a means to extract it from (here is something else important as well) the strange patterns called parentheses that it seems to have been put in."; DataAccessException exception = Assertions .assertThrows(DataAccessException.class, () -> new TableFilters(ctx, table, 0L, query).asBatch().execute()); - Assertions.assertTrue(exception.getMessage().contains("term_0_target\" (term_id, type_id, \"filter\")")); + Assertions.assertTrue(exception.getMessage().contains("\"term_0_target\" (\"term_id\", \"type_id\", \"filter\")")); } @Test @@ -247,7 +247,7 @@ public void equalsHashCodeContractTest() { .withNonnullFields("table") .withNonnullFields("searchTerm") .withNonnullFields("bloomTermId") - .withNonnullFields("categoryTable") + .withNonnullFields("thisTable") .verify(); } diff --git a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeConditionTest.java b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeConditionTest.java index 1c7e5343..e8652458 100644 --- a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeConditionTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeConditionTest.java @@ -90,6 +90,9 @@ void testHashCode() { @Test public void equalsHashCodeContractTest() { - EqualsVerifier.forClass(RegexLikeCondition.class).withNonnullFields("valueField").verify(); + EqualsVerifier.forClass(RegexLikeCondition.class) + .withNonnullFields("valueField") + .withNonnullFields("comparedToField") + .verify(); } } From 40c21654bbb485586d2d56d4528cc55ead9b16a9 Mon Sep 17 00:00:00 2001 From: elliVM <47@teragrep.com> Date: Wed, 30 Oct 2024 12:47:54 +0200 Subject: [PATCH 20/26] more descriptive naming of methods and variables, update comments, javadoc and exception messages --- .../pth_06/planner/StreamDBClient.java | 3 +- .../bloomfilter/CategoryTableImpl.java | 5 ++- .../ConditionMatchBloomDBTables.java | 7 +-- .../bloomfilter/RegexExtractedValue.java | 2 +- .../bloomfilter/SearchTermBloomFilter.java | 6 +-- .../TableFilterTypesFromMetadata.java | 5 ++- .../planner/bloomfilter/TableFilters.java | 3 -- .../planner/bloomfilter/TokensAsStrings.java | 16 +++++++ .../planner/walker/ConditionWalker.java | 12 ++--- .../conditions/BloomQueryCondition.java | 2 +- .../conditions/CategoryTableCondition.java | 4 +- .../walker/conditions/ElementCondition.java | 7 +-- .../conditions/IndexStatementCondition.java | 2 +- .../SearchTermBloomFilterTest.java | 8 ++-- .../planner/bloomfilter/TableFiltersTest.java | 5 ++- .../CategoryTableConditionTest.java | 2 +- .../conditions/EarliestConditionTest.java | 2 +- .../conditions/ElementConditionTest.java | 2 +- .../IndexStatementConditionTest.java | 12 ++--- .../pth_06/walker/ConditionWalkerTest.java | 44 +++++++++---------- 20 files changed, 82 insertions(+), 67 deletions(-) diff --git a/src/main/java/com/teragrep/pth_06/planner/StreamDBClient.java b/src/main/java/com/teragrep/pth_06/planner/StreamDBClient.java index 57084f3b..ccae7ea6 100644 --- a/src/main/java/com/teragrep/pth_06/planner/StreamDBClient.java +++ b/src/main/java/com/teragrep/pth_06/planner/StreamDBClient.java @@ -344,7 +344,8 @@ private Table getTableStatement(Condition journaldbCondition, Date day) .on(JOURNALDB.LOGFILE.HOST_ID.eq(GetArchivedObjectsFilterTable.host_id).and(JOURNALDB.LOGFILE.LOGTAG.eq(GetArchivedObjectsFilterTable.tag))); if (bloomEnabled) { - final Set> tables = walker.patternMatchTables(); + // join all tables needed for the condition generated by walker + final Set> tables = walker.conditionRequiredTables(); if (!tables.isEmpty()) { for (final Table table : tables) { if (LOGGER.isInfoEnabled()) { diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImpl.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImpl.java index 45b2a767..02c4b72f 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImpl.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImpl.java @@ -128,10 +128,11 @@ public void create() { } /** - * Equal only if all object parameters are same value and the instances of DSLContext are same + * Equal if the compared object is the same instance or if the compared object is of the same class, object fields + * are equal, and DSLContext is the same instance * * @param object object compared against - * @return true if all object is same class, object fields are equal and DSLContext is same instance + * @return true if equal */ @Override public boolean equals(final Object object) { diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/ConditionMatchBloomDBTables.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/ConditionMatchBloomDBTables.java index 1eae8048..654537b3 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/ConditionMatchBloomDBTables.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/ConditionMatchBloomDBTables.java @@ -102,10 +102,11 @@ public List> tables() { } /** - * Equal only if all values are equal and same instance of DSLContext + * Equal if the compared object is the same instance or if the compared object is of the same class, object fields + * are equal, and DSLContext is the same instance * * @param object object compared against - * @return true if all object is same class, object fields are equal and DSLContext is same instance + * @return true if equal */ @Override public boolean equals(final Object object) { @@ -116,6 +117,6 @@ public boolean equals(final Object object) { if (object.getClass() != this.getClass()) return false; final ConditionMatchBloomDBTables cast = (ConditionMatchBloomDBTables) object; - return this.condition.equals(cast.condition) && this.ctx == cast.ctx; // only same instance of DSLContext is equal + return this.condition.equals(cast.condition) && this.ctx == cast.ctx; } } diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValue.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValue.java index bf3cfbd9..a723a0dc 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValue.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/RegexExtractedValue.java @@ -79,7 +79,7 @@ public boolean equals(final Object object) { return true; if (object == null || getClass() != object.getClass()) return false; - RegexExtractedValue cast = (RegexExtractedValue) object; + final RegexExtractedValue cast = (RegexExtractedValue) object; return value.equals(cast.value) && pattern.equals(cast.pattern); } diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java index 17c24b9c..280d3e89 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java @@ -83,7 +83,7 @@ public byte[] bytes() { LOGGER.debug("Create filter from Record with values: expected <{}>, fpp <{}>", expected, fpp); if (stringTokens.isEmpty()) { throw new IllegalStateException( - "Trying to insert empty filter, pattern match joined table should always have tokens" + "Tried to create a filter without any items" ); } final BloomFilter filter = BloomFilter.create(expected, fpp); @@ -94,7 +94,7 @@ public byte[] bytes() { if (stringTokens.size() > expected) { LOGGER .warn( - "Number of tokens <{}> was larger than the expected value <{}>, resulting FPP <{}>", + "Number of items <{}> was larger than the expected number of items <{}>, resulting FPP <{}>", stringTokens.size(), expected, filter.expectedFpp() ); } @@ -103,7 +103,7 @@ public byte[] bytes() { filter.writeTo(filterBAOS); return filterBAOS.toByteArray(); } - catch (IOException e) { + catch (final IOException e) { throw new UncheckedIOException("Error writing filter bytes: ", e); } } diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadata.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadata.java index e2cbec46..a5efa749 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadata.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadata.java @@ -112,10 +112,11 @@ public Result toResult() { } /** - * Equal only if all values are equal and same instance of DSLContext + * Equal if the compared object is the same instance or if the compared object is of the same class, object fields + * are equal, and DSLContext is the same instance * * @param object object compared against - * @return true if all object is same class, object fields are equal and DSLContext is same instance + * @return true if equal */ @Override public boolean equals(final Object object) { diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java index 660be480..1d3f03d3 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java @@ -96,9 +96,6 @@ public TableFilters( } public SafeBatch asBatch() { - if (table == null) { - throw new IllegalStateException("Origin table was null"); - } final List> insertValuesStepNList = new ArrayList<>(); final Result result = recordsInMetadata.toResult(); for (final Record record : result) { diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TokensAsStrings.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TokensAsStrings.java index 9dea7ef0..b0c9dce8 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TokensAsStrings.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TokensAsStrings.java @@ -48,6 +48,7 @@ import com.teragrep.blf_01.Token; import java.util.List; +import java.util.Objects; import java.util.stream.Collectors; public final class TokensAsStrings implements Tokenizable { @@ -62,4 +63,19 @@ public TokensAsStrings(Tokenizable origin) { public List tokens() { return origin.tokens().stream().map(Token::toString).collect(Collectors.toList()); } + + @Override + public boolean equals(final Object object) { + if (this == object) + return true; + if (object == null || object.getClass() != this.getClass()) + return false; + final TokensAsStrings cast = (TokensAsStrings) object; + return origin.equals(cast.origin); + } + + @Override + public int hashCode() { + return Objects.hash(origin); + } } diff --git a/src/main/java/com/teragrep/pth_06/planner/walker/ConditionWalker.java b/src/main/java/com/teragrep/pth_06/planner/walker/ConditionWalker.java index 24ceffd1..ad85fa15 100644 --- a/src/main/java/com/teragrep/pth_06/planner/walker/ConditionWalker.java +++ b/src/main/java/com/teragrep/pth_06/planner/walker/ConditionWalker.java @@ -101,12 +101,8 @@ public Condition fromString(String inXml, boolean streamQuery) throws Exception return fromString(inXml); } - /** - * Set of all the tables that pattern matched with tokenized value search elements the walkers has traversed - * - * @return Set of Tables that had a pattern match - */ - public Set> patternMatchTables() { + /** Set of all tables needed to be joined to the query using the condition generated by this walker */ + public Set> conditionRequiredTables() { return combinedMatchSet; } @@ -165,9 +161,9 @@ Condition emitElem(final Element current) { new ConditionConfig(ctx, streamQuery, bloomEnabled, withoutFilters, bloomTermId) ); if (elementCondition.isBloomSearchCondition()) { - final Set> elementPatternMatchTables = elementCondition.patternMatchTables(); + final Set> conditionRequiredTables = elementCondition.requiredTables(); // add tables condition found to walker pattern match tables - patternMatchTables().addAll(elementPatternMatchTables); + conditionRequiredTables().addAll(conditionRequiredTables); bloomTermId++; } return elementCondition.condition(); diff --git a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/BloomQueryCondition.java b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/BloomQueryCondition.java index 198260c3..a3a0a360 100644 --- a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/BloomQueryCondition.java +++ b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/BloomQueryCondition.java @@ -53,5 +53,5 @@ public interface BloomQueryCondition { boolean isBloomSearchCondition(); - Set> patternMatchTables(); + Set> requiredTables(); } diff --git a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/CategoryTableCondition.java b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/CategoryTableCondition.java index 95cd4efc..f3e10e6d 100644 --- a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/CategoryTableCondition.java +++ b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/CategoryTableCondition.java @@ -85,13 +85,13 @@ public CategoryTableCondition( public Condition condition() { final Field filterField = DSL.field(DSL.name(categoryTable.getName(), "filter"), byte[].class); - // select filter with correct bloom term id and filter type id from category table + // select filter with the correct bloom term id and filter type id from the category table final SelectConditionStep> selectFilterStep = DSL .select(filterField) .from(categoryTable) .where(bloomTermCondition) .and(typeIdCondition); - // compares category table filter byte[] against bloom filter byte[] + // function 'bloommatch' compares category table filter byte[] against bloom filter byte[] final Condition filterFieldCondition = DSL .function("bloommatch", Boolean.class, selectFilterStep.asField(), comparedTo.field("filter")) .eq(true); diff --git a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/ElementCondition.java b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/ElementCondition.java index 51adb310..4faad3f6 100644 --- a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/ElementCondition.java +++ b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/ElementCondition.java @@ -109,7 +109,7 @@ public Condition condition() { condition = indexStatement.condition(); } } - // bloom search can return condition unmodified + // bloom search can return the condition unmodified if (condition.equals(DSL.noCondition()) && !isBloomSearchCondition()) { throw new IllegalStateException("Unsupported Element tag " + tag); } @@ -124,9 +124,10 @@ public boolean isBloomSearchCondition() { && config.bloomEnabled(); } - public Set> patternMatchTables() { + /** A set of tables needed to be joined to the query to use this condition */ + public Set> requiredTables() { final String value = element.value(); - return new IndexStatementCondition(value, config).patternMatchTables(); + return new IndexStatementCondition(value, config).requiredTables(); } @Override diff --git a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java index eeb92f61..c8fb9505 100644 --- a/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java +++ b/src/main/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementCondition.java @@ -131,7 +131,7 @@ public boolean isBloomSearchCondition() { } @Override - public Set> patternMatchTables() { + public Set> requiredTables() { if (tableSet.isEmpty()) { condition(); } diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java index 9c0ea43d..5ede2361 100644 --- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java @@ -78,7 +78,7 @@ public void testCorrectFilterSize() { } @Test - public void testNoRegexExtractedTokensException() { + public void testEmptyFilterException() { String searchTerm = "NoMatch"; SearchTermBloomFilter filter = new SearchTermBloomFilter( 1000L, @@ -86,7 +86,7 @@ public void testNoRegexExtractedTokensException() { new RegexExtractedValue(searchTerm, "Pattern") ); IllegalStateException e = Assertions.assertThrows(IllegalStateException.class, filter::bytes); - String expectedMessage = "Trying to insert empty filter, pattern match joined table should always have tokens"; + String expectedMessage = "Tried to create a filter without any items"; Assertions.assertEquals(expectedMessage, e.getMessage()); } @@ -122,9 +122,9 @@ public void testTokensSizeTooLarge() { String searchTerm = ""; SearchTermBloomFilter filter = new SearchTermBloomFilter(10L, 0.01, new TokenizedValue(searchTerm)); Assertions.assertDoesNotThrow(filter::bytes); - String e = "Number of tokens <132> was larger than the expected value <10>, resulting FPP <0.6002870054872016>"; + String e = "Number of items <132> was larger than the expected number of items <10>, resulting FPP <0.6002870054872016>"; String warn = captor.getWarnLogs().get(0); - Assertions.assertEquals(warn, e); + Assertions.assertEquals(e, warn); } diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java index 48b6f7bf..cf019ced 100644 --- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java @@ -171,7 +171,7 @@ public void testInsertFiltersIntoCategoryTableRegexExtract() { } @Test - public void testInsertFiltersWithoutPatternMatch() { + public void testCreateFilterWithoutItemsException() { fillTargetTable(1); DSLContext ctx = DSL.using(conn); Table table = ctx @@ -182,7 +182,8 @@ public void testInsertFiltersWithoutPatternMatch() { .get(0); IllegalStateException exception = Assertions .assertThrows(IllegalStateException.class, () -> new TableFilters(ctx, table, 0L, "nomatch").asBatch().execute()); - Assertions.assertTrue(exception.getMessage().contains("Trying to insert empty filter")); + String expected = "Tried to create a filter without any items"; + Assertions.assertEquals(expected, exception.getMessage()); } @Test diff --git a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/CategoryTableConditionTest.java b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/CategoryTableConditionTest.java index 0a911c79..fef658b9 100644 --- a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/CategoryTableConditionTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/CategoryTableConditionTest.java @@ -61,7 +61,7 @@ /** * Comparing Condition equality using toString() since jooq Condition uses just toString() to check for equality. - * inherited from QueryPart + * Inherited from the QueryPart * * @see org.jooq.QueryPart */ diff --git a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/EarliestConditionTest.java b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/EarliestConditionTest.java index 352305f3..070fc902 100644 --- a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/EarliestConditionTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/EarliestConditionTest.java @@ -51,7 +51,7 @@ /** * Comparing Condition equality using toString() since jooq Condition uses just toString() to check for equality. - * inherited from QueryPart + * Inherited from the QueryPart * * @see org.jooq.QueryPart */ diff --git a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/ElementConditionTest.java b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/ElementConditionTest.java index 30f614ed..06f3bce9 100644 --- a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/ElementConditionTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/ElementConditionTest.java @@ -61,7 +61,7 @@ /** * Comparing Condition equality using toString() since jooq Condition uses just toString() to check for equality. - * inherited from QueryPart + * Inherited from the QueryPart * * @see org.jooq.QueryPart */ diff --git a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementConditionTest.java b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementConditionTest.java index b180b2dc..818946d1 100644 --- a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementConditionTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/IndexStatementConditionTest.java @@ -160,8 +160,8 @@ void noMatchesTest() { IndexStatementCondition cond2 = new IndexStatementCondition("test", withoutFiltersConfig, e2); Assertions.assertEquals(e1, cond1.condition()); Assertions.assertEquals(e2, cond2.condition()); - Assertions.assertTrue(cond1.patternMatchTables().isEmpty()); - Assertions.assertTrue(cond2.patternMatchTables().isEmpty()); + Assertions.assertTrue(cond1.requiredTables().isEmpty()); + Assertions.assertTrue(cond2.requiredTables().isEmpty()); } @Test @@ -177,7 +177,7 @@ void oneMatchingTableTest() { + " and \"bloomdb\".\"pattern_test_ip\".\"filter\" is not null\n" + " )\n" + " or \"bloomdb\".\"pattern_test_ip\".\"filter\" is null\n" + ")"; Assertions.assertEquals(e, cond.condition().toString()); - Assertions.assertEquals(1, cond.patternMatchTables().size()); + Assertions.assertEquals(1, cond.requiredTables().size()); } @Test @@ -187,7 +187,7 @@ void testOneMatchWithoutFilters() { IndexStatementCondition cond = new IndexStatementCondition("192.168.1.1", config); String e = "\"bloomdb\".\"pattern_test_ip\".\"filter\" is null"; Assertions.assertEquals(e, cond.condition().toString()); - Assertions.assertEquals(1, cond.patternMatchTables().size()); + Assertions.assertEquals(1, cond.requiredTables().size()); } @Test @@ -198,7 +198,7 @@ void testTwoMatchWithoutFilters() { String e = "(\n" + " \"bloomdb\".\"pattern_test_ip\".\"filter\" is null\n" + " and \"bloomdb\".\"pattern_test_ip255\".\"filter\" is null\n" + ")"; Assertions.assertEquals(e, cond.condition().toString()); - Assertions.assertEquals(2, cond.patternMatchTables().size()); + Assertions.assertEquals(2, cond.requiredTables().size()); } @Test @@ -220,7 +220,7 @@ void twoMatchingTableTest() { + " \"bloomdb\".\"pattern_test_ip\".\"filter\" is null\n" + " and \"bloomdb\".\"pattern_test_ip255\".\"filter\" is null\n" + " )\n" + ")"; Assertions.assertEquals(e, cond.condition().toString()); - Assertions.assertEquals(2, cond.patternMatchTables().size()); + Assertions.assertEquals(2, cond.requiredTables().size()); } @Test diff --git a/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java b/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java index 6ae11d55..53b96d4a 100644 --- a/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java +++ b/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java @@ -137,7 +137,7 @@ void bloomNoMatchTest() { String e = "\"getArchivedObjects_filter_table\".\"directory\" like 'haproxy'"; Condition cond = Assertions.assertDoesNotThrow(() -> walker.fromString(q, false)); Assertions.assertEquals(e, cond.toString()); - Assertions.assertEquals(0, walker.patternMatchTables().size()); + Assertions.assertEquals(0, walker.conditionRequiredTables().size()); } @Test @@ -147,7 +147,7 @@ void bloomNoMatchStreamQueryTest() { String e = "\"streamdb\".\"stream\".\"directory\" like 'haproxy'"; Condition cond = Assertions.assertDoesNotThrow(() -> walker.fromString(q, true)); Assertions.assertEquals(e, cond.toString()); - Assertions.assertEquals(0, walker.patternMatchTables().size()); + Assertions.assertEquals(0, walker.conditionRequiredTables().size()); } @Test @@ -157,7 +157,7 @@ void bloomNoMatchStreamQueryWithoutFiltersTest() { String e = "\"streamdb\".\"stream\".\"directory\" like 'haproxy'"; Condition cond = Assertions.assertDoesNotThrow(() -> walker.fromString(q, true)); Assertions.assertEquals(e, cond.toString()); - Assertions.assertEquals(0, walker.patternMatchTables().size()); + Assertions.assertEquals(0, walker.conditionRequiredTables().size()); } @Test @@ -167,7 +167,7 @@ void singleTablePatternMatchStreamQueryTest() { String e = "\"streamdb\".\"stream\".\"directory\" like 'haproxy'"; Condition cond = Assertions.assertDoesNotThrow(() -> walker.fromString(q, true)); Assertions.assertEquals(e, cond.toString()); - Assertions.assertEquals(0, walker.patternMatchTables().size()); + Assertions.assertEquals(0, walker.conditionRequiredTables().size()); } @Test @@ -184,9 +184,9 @@ void singleTablePatternMatchTest() { + " or \"bloomdb\".\"pattern_test_ip\".\"filter\" is null\n" + " )\n" + ")"; Condition cond = Assertions.assertDoesNotThrow(() -> walker.fromString(q, false)); Assertions.assertEquals(e, cond.toString()); - Assertions.assertEquals(1, walker.patternMatchTables().size()); + Assertions.assertEquals(1, walker.conditionRequiredTables().size()); Assertions - .assertTrue(walker.patternMatchTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip"))); + .assertTrue(walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip"))); } @Test @@ -197,9 +197,9 @@ void singleTablePatternMatchWithoutFiltersTest() { + " and \"bloomdb\".\"pattern_test_ip\".\"filter\" is null\n" + ")"; Condition cond = Assertions.assertDoesNotThrow(() -> walker.fromString(q, false)); Assertions.assertEquals(e, cond.toString()); - Assertions.assertEquals(1, walker.patternMatchTables().size()); + Assertions.assertEquals(1, walker.conditionRequiredTables().size()); Assertions - .assertTrue(walker.patternMatchTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip"))); + .assertTrue(walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip"))); } @Test @@ -222,11 +222,11 @@ void twoTablePatternMatchTest() { + " and \"bloomdb\".\"pattern_test_ip255\".\"filter\" is null\n" + " )\n" + " )\n" + ")"; Condition cond = Assertions.assertDoesNotThrow(() -> walker.fromString(q, false)); Assertions.assertEquals(e, cond.toString()); - Assertions.assertEquals(2, walker.patternMatchTables().size()); + Assertions.assertEquals(2, walker.conditionRequiredTables().size()); Assertions - .assertTrue(walker.patternMatchTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip"))); + .assertTrue(walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip"))); Assertions - .assertTrue(walker.patternMatchTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255"))); + .assertTrue(walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255"))); } @Test @@ -264,11 +264,11 @@ void twoTablePatternMatchWithoutFiltersTest() { + " and \"bloomdb\".\"pattern_test_ip255\".\"filter\" is null\n" + ")"; Condition cond = Assertions.assertDoesNotThrow(() -> walker.fromString(q, false)); Assertions.assertEquals(e, cond.toString()); - Assertions.assertEquals(2, walker.patternMatchTables().size()); + Assertions.assertEquals(2, walker.conditionRequiredTables().size()); Assertions - .assertTrue(walker.patternMatchTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip"))); + .assertTrue(walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip"))); Assertions - .assertTrue(walker.patternMatchTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255"))); + .assertTrue(walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255"))); } @Test @@ -284,9 +284,9 @@ void multipleSearchTermTestOneMatchTest() { + " or \"bloomdb\".\"pattern_test_ip\".\"filter\" is null\n" + ")"; Condition cond = Assertions.assertDoesNotThrow(() -> walker.fromString(q, false)); Assertions.assertEquals(e, cond.toString()); - Assertions.assertEquals(1, walker.patternMatchTables().size()); + Assertions.assertEquals(1, walker.conditionRequiredTables().size()); Assertions - .assertTrue(walker.patternMatchTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip"))); + .assertTrue(walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip"))); } @Test @@ -315,11 +315,11 @@ void multipleSearchTermTwoAndOneMatchTest() { + " or \"bloomdb\".\"pattern_test_ip\".\"filter\" is null\n" + " )\n" + ")"; Condition cond = Assertions.assertDoesNotThrow(() -> walker.fromString(q, false)); Assertions.assertEquals(e, cond.toString()); - Assertions.assertEquals(2, walker.patternMatchTables().size()); + Assertions.assertEquals(2, walker.conditionRequiredTables().size()); Assertions - .assertTrue(walker.patternMatchTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip"))); + .assertTrue(walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip"))); Assertions - .assertTrue(walker.patternMatchTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255"))); + .assertTrue(walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255"))); } @Test @@ -331,11 +331,11 @@ void multipleSearchTermTwoAndOneMatchWithoutFiltersTest() { + " and \"bloomdb\".\"pattern_test_ip\".\"filter\" is null\n" + ")"; Condition cond = Assertions.assertDoesNotThrow(() -> walker.fromString(q, false)); Assertions.assertEquals(e, cond.toString()); - Assertions.assertEquals(2, walker.patternMatchTables().size()); + Assertions.assertEquals(2, walker.conditionRequiredTables().size()); Assertions - .assertTrue(walker.patternMatchTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip"))); + .assertTrue(walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip"))); Assertions - .assertTrue(walker.patternMatchTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255"))); + .assertTrue(walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255"))); } @Test From 6513861af794a9ad6143beae2cafc143e208f974 Mon Sep 17 00:00:00 2001 From: elliVM <47@teragrep.com> Date: Wed, 30 Oct 2024 12:59:30 +0200 Subject: [PATCH 21/26] apply spotless --- .../bloomfilter/SearchTermBloomFilter.java | 4 +--- .../pth_06/planner/bloomfilter/TableFilters.java | 5 ++--- .../planner/bloomfilter/TableFiltersTest.java | 10 ++++++++-- .../conditions/RegexLikeConditionTest.java | 3 ++- .../pth_06/walker/ConditionWalkerTest.java | 16 ++++++++++++---- 5 files changed, 25 insertions(+), 13 deletions(-) diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java index 280d3e89..5246446d 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java @@ -82,9 +82,7 @@ public SearchTermBloomFilter(Long expected, Double fpp, List stringToken public byte[] bytes() { LOGGER.debug("Create filter from Record with values: expected <{}>, fpp <{}>", expected, fpp); if (stringTokens.isEmpty()) { - throw new IllegalStateException( - "Tried to create a filter without any items" - ); + throw new IllegalStateException("Tried to create a filter without any items"); } final BloomFilter filter = BloomFilter.create(expected, fpp); for (final String token : stringTokens) { diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java index 1d3f03d3..cd11f431 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java @@ -145,9 +145,8 @@ public boolean equals(final Object object) { if (object == null || getClass() != object.getClass()) return false; final TableFilters cast = (TableFilters) object; - return bloomTermId == cast.bloomTermId && recordsInMetadata - .equals(cast.recordsInMetadata) && ctx == cast.ctx && table.equals(cast.table) - && thisTable.equals(cast.thisTable) && searchTerm.equals(cast.searchTerm); + return bloomTermId == cast.bloomTermId && recordsInMetadata.equals(cast.recordsInMetadata) && ctx == cast.ctx + && table.equals(cast.table) && thisTable.equals(cast.thisTable) && searchTerm.equals(cast.searchTerm); } @Override diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java index cf019ced..b46964c9 100644 --- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java @@ -151,7 +151,12 @@ public void testInsertFiltersIntoCategoryTable() { .get(0); DataAccessException exception = Assertions .assertThrows(DataAccessException.class, () -> new TableFilters(ctx, table, 0L, "192.168.1.1").asBatch().execute()); - Assertions.assertTrue(exception.getMessage().contains("insert into \"term_0_target\" (\"term_id\", \"type_id\", \"filter\") values")); + Assertions + .assertTrue( + exception + .getMessage() + .contains("insert into \"term_0_target\" (\"term_id\", \"type_id\", \"filter\") values") + ); } @Test @@ -167,7 +172,8 @@ public void testInsertFiltersIntoCategoryTableRegexExtract() { String query = "biz baz boz data has no content today (very important though) but it would still have if one had a means to extract it from (here is something else important as well) the strange patterns called parentheses that it seems to have been put in."; DataAccessException exception = Assertions .assertThrows(DataAccessException.class, () -> new TableFilters(ctx, table, 0L, query).asBatch().execute()); - Assertions.assertTrue(exception.getMessage().contains("\"term_0_target\" (\"term_id\", \"type_id\", \"filter\")")); + Assertions + .assertTrue(exception.getMessage().contains("\"term_0_target\" (\"term_id\", \"type_id\", \"filter\")")); } @Test diff --git a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeConditionTest.java b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeConditionTest.java index e8652458..ce2af62c 100644 --- a/src/test/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeConditionTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/walker/conditions/RegexLikeConditionTest.java @@ -90,7 +90,8 @@ void testHashCode() { @Test public void equalsHashCodeContractTest() { - EqualsVerifier.forClass(RegexLikeCondition.class) + EqualsVerifier + .forClass(RegexLikeCondition.class) .withNonnullFields("valueField") .withNonnullFields("comparedToField") .verify(); diff --git a/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java b/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java index 53b96d4a..24e756fc 100644 --- a/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java +++ b/src/test/java/com/teragrep/pth_06/walker/ConditionWalkerTest.java @@ -226,7 +226,9 @@ void twoTablePatternMatchTest() { Assertions .assertTrue(walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip"))); Assertions - .assertTrue(walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255"))); + .assertTrue( + walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255")) + ); } @Test @@ -268,7 +270,9 @@ void twoTablePatternMatchWithoutFiltersTest() { Assertions .assertTrue(walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip"))); Assertions - .assertTrue(walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255"))); + .assertTrue( + walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255")) + ); } @Test @@ -319,7 +323,9 @@ void multipleSearchTermTwoAndOneMatchTest() { Assertions .assertTrue(walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip"))); Assertions - .assertTrue(walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255"))); + .assertTrue( + walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255")) + ); } @Test @@ -335,7 +341,9 @@ void multipleSearchTermTwoAndOneMatchWithoutFiltersTest() { Assertions .assertTrue(walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip"))); Assertions - .assertTrue(walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255"))); + .assertTrue( + walker.conditionRequiredTables().stream().anyMatch(t -> t.getName().equals("pattern_test_ip255")) + ); } @Test From adc7faf0158887b81fb3f0a58003f44b12f26427 Mon Sep 17 00:00:00 2001 From: elliVM <47@teragrep.com> Date: Thu, 31 Oct 2024 11:04:38 +0200 Subject: [PATCH 22/26] add missing hashCode() methods --- .../pth_06/planner/bloomfilter/CategoryTableImpl.java | 7 +++++++ .../planner/bloomfilter/ConditionMatchBloomDBTables.java | 6 ++++++ 2 files changed, 13 insertions(+) diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImpl.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImpl.java index 02c4b72f..d13c2678 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImpl.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableImpl.java @@ -51,6 +51,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.util.Objects; + import static org.jooq.impl.SQLDataType.BIGINTUNSIGNED; /** @@ -146,4 +148,9 @@ public boolean equals(final Object object) { return this.originTable.equals(cast.originTable) && this.ctx == cast.ctx && // equal only if same instance of DSLContext this.bloomTermId == cast.bloomTermId && this.tableFilters.equals(cast.tableFilters); } + + @Override + public int hashCode() { + return Objects.hash(ctx, originTable, bloomTermId, tableFilters); + } } diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/ConditionMatchBloomDBTables.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/ConditionMatchBloomDBTables.java index 654537b3..3f32b31c 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/ConditionMatchBloomDBTables.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/ConditionMatchBloomDBTables.java @@ -55,6 +55,7 @@ import org.slf4j.LoggerFactory; import java.util.List; +import java.util.Objects; import static com.teragrep.pth_06.jooq.generated.bloomdb.Bloomdb.BLOOMDB; @@ -119,4 +120,9 @@ public boolean equals(final Object object) { final ConditionMatchBloomDBTables cast = (ConditionMatchBloomDBTables) object; return this.condition.equals(cast.condition) && this.ctx == cast.ctx; } + + @Override + public int hashCode() { + return Objects.hash(ctx, condition); + } } From 2c01634d768a05b5cb33cf28d7cf8ce8d3378362 Mon Sep 17 00:00:00 2001 From: elliVM <47@teragrep.com> Date: Thu, 31 Oct 2024 12:12:37 +0200 Subject: [PATCH 23/26] don't wrap jooq.Batch object and execute in CategoryTableWithFilters, remove sout --- .../bloomfilter/CategoryTableWithFilters.java | 16 +- .../pth_06/planner/bloomfilter/SafeBatch.java | 75 ---------- .../planner/bloomfilter/TableFilters.java | 5 +- .../planner/bloomfilter/SafeBatchTest.java | 140 ------------------ ...ableFilterTypesFromMetadataResultTest.java | 2 - 5 files changed, 17 insertions(+), 221 deletions(-) delete mode 100644 src/main/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatch.java delete mode 100644 src/test/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatchTest.java diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableWithFilters.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableWithFilters.java index 9a848513..bfb11559 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableWithFilters.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/CategoryTableWithFilters.java @@ -45,14 +45,19 @@ */ package com.teragrep.pth_06.planner.bloomfilter; +import org.jooq.Batch; import org.jooq.DSLContext; import org.jooq.Table; +import org.jooq.exception.DataAccessException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Decorator that inserts category tables filter types into the table */ public final class CategoryTableWithFilters implements CategoryTable { + private static final Logger LOGGER = LoggerFactory.getLogger(CategoryTableWithFilters.class); private final CategoryTable origin; private final TableFilters filters; @@ -71,6 +76,15 @@ public CategoryTableWithFilters(CategoryTable origin, TableFilters filters) { @Override public void create() { origin.create(); - filters.asBatch().execute(); + final Batch batch = filters.asBatch(); + try { + final int[] results = batch.execute(); + if (LOGGER.isTraceEnabled()) { + LOGGER.trace("Batch added <{}> row(s)", results.length); + } + } + catch (final DataAccessException e) { + throw new DataAccessException("Error executing batch: " + e); + } } } diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatch.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatch.java deleted file mode 100644 index f661dad4..00000000 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatch.java +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Teragrep Archive Datasource (pth_06) - * Copyright (C) 2021-2024 Suomen Kanuuna Oy - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - * - * - * Additional permission under GNU Affero General Public License version 3 - * section 7 - * - * If you modify this Program, or any covered work, by linking or combining it - * with other code, such other code is not for that reason alone subject to any - * of the requirements of the GNU Affero GPL version 3 as long as this Program - * is the same Program as licensed from Suomen Kanuuna Oy without any additional - * modifications. - * - * Supplemented terms under GNU Affero General Public License version 3 - * section 7 - * - * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified - * versions must be marked as "Modified version of" The Program. - * - * Names of the licensors and authors may not be used for publicity purposes. - * - * No rights are granted for use of trade names, trademarks, or service marks - * which are in The Program if any. - * - * Licensee must indemnify licensors and authors for any liability that these - * contractual assumptions impose on licensors and authors. - * - * To the extent this program is licensed as part of the Commercial versions of - * Teragrep, the applicable Commercial License may apply to this file if you as - * a licensee so wish it. - */ -package com.teragrep.pth_06.planner.bloomfilter; - -import org.jooq.Batch; -import org.jooq.exception.DataAccessException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public final class SafeBatch { - - private static final Logger LOGGER = LoggerFactory.getLogger(SafeBatch.class); - - private final Batch batch; - - public SafeBatch(final Batch batch) { - this.batch = batch; - } - - /** Does not roll back successfully inserted values on exception */ - public void execute() { - try { - final int[] results = batch.execute(); - if (LOGGER.isTraceEnabled()) { - LOGGER.trace("Batch added <{}> row(s)", results.length); - } - } - catch (final DataAccessException e) { - throw new DataAccessException("Error executing batch: " + e); - } - } -} diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java index cd11f431..54184424 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/TableFilters.java @@ -95,7 +95,7 @@ public TableFilters( this.searchTerm = searchTerm; } - public SafeBatch asBatch() { + public Batch asBatch() { final List> insertValuesStepNList = new ArrayList<>(); final Result result = recordsInMetadata.toResult(); for (final Record record : result) { @@ -127,8 +127,7 @@ public SafeBatch asBatch() { final InsertValuesStepN insertStep = ctx.insertInto(thisTable).columns(insertFields).values(valueFields); insertValuesStepNList.add(insertStep); } - final Batch batch = ctx.batch(insertValuesStepNList); - return new SafeBatch(batch); + return ctx.batch(insertValuesStepNList); } /** diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatchTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatchTest.java deleted file mode 100644 index fdbdd266..00000000 --- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SafeBatchTest.java +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Teragrep Archive Datasource (pth_06) - * Copyright (C) 2021-2024 Suomen Kanuuna Oy - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - * - * - * Additional permission under GNU Affero General Public License version 3 - * section 7 - * - * If you modify this Program, or any covered work, by linking or combining it - * with other code, such other code is not for that reason alone subject to any - * of the requirements of the GNU Affero GPL version 3 as long as this Program - * is the same Program as licensed from Suomen Kanuuna Oy without any additional - * modifications. - * - * Supplemented terms under GNU Affero General Public License version 3 - * section 7 - * - * Origin of the software must be attributed to Suomen Kanuuna Oy. Any modified - * versions must be marked as "Modified version of" The Program. - * - * Names of the licensors and authors may not be used for publicity purposes. - * - * No rights are granted for use of trade names, trademarks, or service marks - * which are in The Program if any. - * - * Licensee must indemnify licensors and authors for any liability that these - * contractual assumptions impose on licensors and authors. - * - * To the extent this program is licensed as part of the Commercial versions of - * Teragrep, the applicable Commercial License may apply to this file if you as - * a licensee so wish it. - */ -package com.teragrep.pth_06.planner.bloomfilter; - -import org.jooq.DSLContext; -import org.jooq.exception.DataAccessException; -import org.jooq.impl.DSL; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.ResultSet; -import java.util.ArrayList; -import java.util.List; - -class SafeBatchTest { - - final String url = "jdbc:h2:mem:test;MODE=MariaDB;DATABASE_TO_LOWER=TRUE;CASE_INSENSITIVE_IDENTIFIERS=TRUE"; - final String userName = "sa"; - final String password = ""; - final Connection conn = Assertions.assertDoesNotThrow(() -> DriverManager.getConnection(url, userName, password)); - - @BeforeEach - void setup() { - Assertions.assertDoesNotThrow(() -> { - conn.prepareStatement("CREATE SCHEMA IF NOT EXISTS BLOOMDB").execute(); - conn.prepareStatement("USE BLOOMDB").execute(); - conn.prepareStatement("DROP TABLE IF EXISTS target").execute(); - String targetTable = "CREATE TABLE `target`(" - + " `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT PRIMARY KEY," - + " `partition_id` bigint(20) unsigned NOT NULL UNIQUE" + ")"; - conn.prepareStatement(targetTable).execute(); - }); - } - - @Test - public void testOneInsert() { - String sql = "INSERT INTO target (`partition_id`) VALUES(12345)"; - DSLContext ctx = DSL.using(conn); - SafeBatch batch = new SafeBatch(ctx.batch(ctx.query(sql))); - Assertions.assertDoesNotThrow(batch::execute); - ResultSet result = Assertions - .assertDoesNotThrow(() -> conn.prepareStatement("SELECT `partition_id` FROM target").executeQuery()); - Assertions.assertDoesNotThrow(() -> { - int loops = 0; - while (result.next()) { - loops++; - Assertions.assertEquals(12345L, result.getLong(1)); - } - Assertions.assertEquals(1, loops); - }); - } - - @Test - public void testInsertTwo() { - String sql1 = "INSERT INTO target (`partition_id`) VALUES(12345)"; - String sql2 = "INSERT INTO target (`partition_id`) VALUES(54321)"; - DSLContext ctx = DSL.using(conn); - SafeBatch batch = new SafeBatch(ctx.batch(ctx.query(sql1), ctx.query(sql2))); - Assertions.assertDoesNotThrow(batch::execute); - ResultSet result = Assertions - .assertDoesNotThrow(() -> conn.prepareStatement("SELECT `partition_id` FROM target").executeQuery()); - Assertions.assertDoesNotThrow(() -> { - List values = new ArrayList<>(); - int loops = 0; - while (result.next()) { - loops++; - values.add(result.getLong(1)); - } - Assertions.assertEquals(2, loops); - Assertions.assertEquals(2, values.size()); - Assertions.assertEquals(12345L, values.get(0)); - Assertions.assertEquals(54321L, values.get(1)); - }); - } - - @Test - public void testDataAccessException() { - String sql1 = "INSERT INTO target (`partition_id`) VALUES(12345)"; - String sql2 = "INSERT INTO target (`partition_id`) VALUES(12345)"; - DSLContext ctx = DSL.using(conn); - SafeBatch batch = new SafeBatch(ctx.batch(ctx.query(sql1), ctx.query(sql2))); - Assertions.assertThrows(DataAccessException.class, batch::execute); - ResultSet result = Assertions - .assertDoesNotThrow(() -> conn.prepareStatement("SELECT `partition_id` FROM target").executeQuery()); - Assertions.assertDoesNotThrow(() -> { - int loops = 0; - while (result.next()) { - Assertions.assertEquals(12345L, result.getLong(1)); - loops++; - } - Assertions.assertEquals(1, loops); - }); - } -} diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadataResultTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadataResultTest.java index fb8be48a..f8bd9052 100644 --- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadataResultTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFilterTypesFromMetadataResultTest.java @@ -207,8 +207,6 @@ public void testNotEquals() { .get(0); TableFilterTypesFromMetadata result1 = new TableFilterTypesFromMetadata(ctx, table, 0L); TableFilterTypesFromMetadata result2 = new TableFilterTypesFromMetadata(ctx, table, 1L); - System.out.println(result1); - System.out.println(result2); Assertions.assertNotEquals(result1, result2); } From 34efb98e9f61e23d51a73dbdbbffb434602061cd Mon Sep 17 00:00:00 2001 From: elliVM <47@teragrep.com> Date: Fri, 1 Nov 2024 10:59:01 +0200 Subject: [PATCH 24/26] throw exception if search term filter tokens size larger than expected, remove logcaptor dependency --- pom.xml | 6 ------ .../planner/bloomfilter/SearchTermBloomFilter.java | 7 ++++--- .../planner/bloomfilter/SearchTermBloomFilterTest.java | 10 +++------- 3 files changed, 7 insertions(+), 16 deletions(-) diff --git a/pom.xml b/pom.xml index af810fab..47ffaa4a 100644 --- a/pom.xml +++ b/pom.xml @@ -209,12 +209,6 @@ 3.16.1 test - - io.github.hakky54 - logcaptor - 2.9.3 - test - org.apache.kafka kafka-clients diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java index 5246446d..03636e53 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java @@ -88,14 +88,15 @@ public byte[] bytes() { for (final String token : stringTokens) { filter.put(token); } - if (LOGGER.isWarnEnabled()) { - if (stringTokens.size() > expected) { + if (stringTokens.size() > expected) { + if (LOGGER.isErrorEnabled()) { LOGGER - .warn( + .error( "Number of items <{}> was larger than the expected number of items <{}>, resulting FPP <{}>", stringTokens.size(), expected, filter.expectedFpp() ); } + throw new IllegalStateException("Number of items was larger than the expected number of items"); } try (final ByteArrayOutputStream filterBAOS = new ByteArrayOutputStream()) { filter.writeTo(filterBAOS); diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java index 5ede2361..5a6d252a 100644 --- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java @@ -45,7 +45,6 @@ */ package com.teragrep.pth_06.planner.bloomfilter; -import nl.altindag.log.LogCaptor; import nl.jqno.equalsverifier.EqualsVerifier; import org.apache.spark.util.sketch.BloomFilter; import org.junit.jupiter.api.Assertions; @@ -118,14 +117,11 @@ public void testTokenizerTokens() { @Test public void testTokensSizeTooLarge() { - LogCaptor captor = Assertions.assertDoesNotThrow(() -> LogCaptor.forClass(SearchTermBloomFilter.class)); String searchTerm = ""; SearchTermBloomFilter filter = new SearchTermBloomFilter(10L, 0.01, new TokenizedValue(searchTerm)); - Assertions.assertDoesNotThrow(filter::bytes); - String e = "Number of items <132> was larger than the expected number of items <10>, resulting FPP <0.6002870054872016>"; - String warn = captor.getWarnLogs().get(0); - Assertions.assertEquals(e, warn); - + IllegalStateException e = Assertions.assertThrows(IllegalStateException.class, filter::bytes); + String expected = "Number of items was larger than the expected number of items"; + Assertions.assertEquals(expected, e.getMessage()); } @Test From 7412ec2cf6af6bbcaa3c2f0e87ff920bc9f6eb2c Mon Sep 17 00:00:00 2001 From: elliVM <47@teragrep.com> Date: Fri, 1 Nov 2024 15:23:19 +0200 Subject: [PATCH 25/26] improve TableFiltersTest and TokensAsStringsTest --- .../planner/bloomfilter/TableFiltersTest.java | 83 +++++++++++++++---- .../bloomfilter/TokensAsStringsTest.java | 5 ++ 2 files changed, 70 insertions(+), 18 deletions(-) diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java index b46964c9..617ddc46 100644 --- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TableFiltersTest.java @@ -53,10 +53,12 @@ import org.jooq.impl.DSL; import org.junit.jupiter.api.*; +import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.sql.Connection; import java.sql.DriverManager; import java.sql.PreparedStatement; +import java.sql.ResultSet; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -69,11 +71,9 @@ class TableFiltersTest { final String password = ""; // matches IPv4 final String ipRegex = "(\\b25[0-5]|\\b2[0-4][0-9]|\\b[01]?[0-9][0-9]?)(\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}"; - // matches IPv4 starting with 255. - final String ipStartingWith255 = "(\\b25[0-5]?)(\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}"; // matches with values surrounded by parentheses final String parenthesesPattern = "\\((.*?)\\)"; - final List patternList = new ArrayList<>(Arrays.asList(ipRegex, ipStartingWith255, parenthesesPattern)); + final List patternList = new ArrayList<>(Arrays.asList(ipRegex, parenthesesPattern)); final Connection conn = Assertions.assertDoesNotThrow(() -> DriverManager.getConnection(url, userName, password)); @BeforeAll @@ -109,6 +109,9 @@ void createTargetTable() { conn.prepareStatement("CREATE SCHEMA IF NOT EXISTS BLOOMDB").execute(); conn.prepareStatement("USE BLOOMDB").execute(); conn.prepareStatement("DROP TABLE IF EXISTS target").execute(); + // drop temp tables created by tests + conn.prepareStatement("DROP TABLE IF EXISTS term_0_target").execute(); + conn.prepareStatement("DROP TABLE IF EXISTS term_1_target").execute(); String targetTable = "CREATE TABLE `target`(" + " `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT PRIMARY KEY," + " `partition_id` bigint(20) unsigned NOT NULL UNIQUE," @@ -140,7 +143,7 @@ public void testCreation() { } @Test - public void testInsertFiltersIntoCategoryTable() { + public void testFilterInsertion() { fillTargetTable(1); DSLContext ctx = DSL.using(conn); Table table = ctx @@ -149,19 +152,30 @@ public void testInsertFiltersIntoCategoryTable() { .filterTables(t -> !t.getName().equals("filtertype")) .getTables() .get(0); - DataAccessException exception = Assertions - .assertThrows(DataAccessException.class, () -> new TableFilters(ctx, table, 0L, "192.168.1.1").asBatch().execute()); - Assertions - .assertTrue( - exception - .getMessage() - .contains("insert into \"term_0_target\" (\"term_id\", \"type_id\", \"filter\") values") - ); + CategoryTable tableImpl = new CategoryTableImpl(ctx, table, 0L, "192.168.1.1"); + Assertions.assertDoesNotThrow(tableImpl::create); + Assertions.assertDoesNotThrow(() -> new TableFilters(ctx, table, 0L, "192.168.1.1").asBatch().execute()); + ResultSet result = Assertions.assertDoesNotThrow(() -> conn.prepareStatement("SELECT * FROM `term_0_target`").executeQuery()); + Assertions.assertDoesNotThrow(() -> { + int loops = 0; + while (result.next()) { + long termId = result.getLong("term_id"); + long typeId = result.getLong("type_id"); + byte[] filterBytes = result.getBytes("filter"); + Assertions.assertEquals(0, termId); + Assertions.assertEquals(1, typeId); + BloomFilter filter = BloomFilter.readFrom(new ByteArrayInputStream(filterBytes)); + Assertions.assertTrue(filter.mightContain("192.168.1.1")); + Assertions.assertFalse(filter.mightContain("192")); + loops++; + } + Assertions.assertEquals(1, loops); + }); } @Test - public void testInsertFiltersIntoCategoryTableRegexExtract() { - fillTargetTable(3); + public void testFilterInsertionWithRegexExtractedValue() { + fillTargetTable(2); DSLContext ctx = DSL.using(conn); Table table = ctx .meta() @@ -169,11 +183,44 @@ public void testInsertFiltersIntoCategoryTableRegexExtract() { .filterTables(t -> !t.getName().equals("filtertype")) .getTables() .get(0); - String query = "biz baz boz data has no content today (very important though) but it would still have if one had a means to extract it from (here is something else important as well) the strange patterns called parentheses that it seems to have been put in."; - DataAccessException exception = Assertions - .assertThrows(DataAccessException.class, () -> new TableFilters(ctx, table, 0L, query).asBatch().execute()); + String value = "biz baz boz data has no content today (very important though) but it would still have if one had a means to extract it from (here is something else important as well) the strange patterns called parentheses that it seems to have been put in."; + CategoryTable tableImpl = new CategoryTableImpl(ctx, table, 1L, value); + Assertions.assertDoesNotThrow(tableImpl::create); Assertions - .assertTrue(exception.getMessage().contains("\"term_0_target\" (\"term_id\", \"type_id\", \"filter\")")); + .assertDoesNotThrow(() -> new TableFilters(ctx, table, 1L, value).asBatch().execute()); + ResultSet result = Assertions.assertDoesNotThrow(() -> conn.prepareStatement("SELECT * FROM `term_1_target`").executeQuery()); + Assertions.assertDoesNotThrow(() -> { + int loops = 0; + while (result.next()) { + long termId = result.getLong("term_id"); + long typeId = result.getLong("type_id"); + byte[] filterBytes = result.getBytes("filter"); + Assertions.assertEquals(1, termId); + Assertions.assertEquals(2, typeId); + BloomFilter filter = BloomFilter.readFrom(new ByteArrayInputStream(filterBytes)); + Assertions.assertTrue(filter.mightContain("(here is something else important as well)")); + Assertions.assertTrue(filter.mightContain("(very important though)")); + Assertions.assertFalse(filter.mightContain("content")); + Assertions.assertFalse(filter.mightContain("(very")); + Assertions.assertFalse(filter.mightContain("though)")); + loops++; + } + Assertions.assertEquals(1, loops); + }); + } + + @Test + public void testMissingTempTableDataAccessException() { + fillTargetTable(1); + DSLContext ctx = DSL.using(conn); + Table table = ctx + .meta() + .filterSchemas(s -> s.getName().equals("bloomdb")) + .filterTables(t -> !t.getName().equals("filtertype")) + .getTables() + .get(0); + DataAccessException ex = Assertions.assertThrows(DataAccessException.class, () -> new TableFilters(ctx, table, 0L, "192.168.1.1").asBatch().execute()); + Assertions.assertTrue(ex.getMessage().contains("Table \"term_0_target\" not found")); } @Test diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TokensAsStringsTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TokensAsStringsTest.java index ad1330f1..083ae2ae 100644 --- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TokensAsStringsTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/TokensAsStringsTest.java @@ -59,5 +59,10 @@ public void testTokensToStrings() { Assertions.assertTrue(allTokenClass); Tokenizable toStrings = new TokensAsStrings(tokenizedValue); Assertions.assertTrue(toStrings.tokens().contains("one")); + Assertions.assertTrue(toStrings.tokens().contains("one.")); + Assertions.assertTrue(toStrings.tokens().contains("one.two")); + Assertions.assertTrue(toStrings.tokens().contains("two")); + Assertions.assertTrue(toStrings.tokens().contains("three")); + Assertions.assertEquals(16, toStrings.tokens().size()); } } From 7dea6f740db44e9d6922b37a1d3eb1c1baf32a7b Mon Sep 17 00:00:00 2001 From: elliVM <47@teragrep.com> Date: Mon, 11 Nov 2024 10:57:23 +0200 Subject: [PATCH 26/26] allow search term filter tokens to be larger than expected tokens --- .../planner/bloomfilter/SearchTermBloomFilter.java | 10 ---------- .../planner/bloomfilter/SearchTermBloomFilterTest.java | 8 +++----- 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java index 03636e53..a30dc70f 100644 --- a/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java +++ b/src/main/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilter.java @@ -88,16 +88,6 @@ public byte[] bytes() { for (final String token : stringTokens) { filter.put(token); } - if (stringTokens.size() > expected) { - if (LOGGER.isErrorEnabled()) { - LOGGER - .error( - "Number of items <{}> was larger than the expected number of items <{}>, resulting FPP <{}>", - stringTokens.size(), expected, filter.expectedFpp() - ); - } - throw new IllegalStateException("Number of items was larger than the expected number of items"); - } try (final ByteArrayOutputStream filterBAOS = new ByteArrayOutputStream()) { filter.writeTo(filterBAOS); return filterBAOS.toByteArray(); diff --git a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java index 5a6d252a..ba9f6fc3 100644 --- a/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java +++ b/src/test/java/com/teragrep/pth_06/planner/bloomfilter/SearchTermBloomFilterTest.java @@ -116,12 +116,10 @@ public void testTokenizerTokens() { } @Test - public void testTokensSizeTooLarge() { + public void testSaturatedSearchTermFilterIsAllowed() { String searchTerm = ""; - SearchTermBloomFilter filter = new SearchTermBloomFilter(10L, 0.01, new TokenizedValue(searchTerm)); - IllegalStateException e = Assertions.assertThrows(IllegalStateException.class, filter::bytes); - String expected = "Number of items was larger than the expected number of items"; - Assertions.assertEquals(expected, e.getMessage()); + SearchTermBloomFilter filter = new SearchTermBloomFilter(1L, 0.01, new TokenizedValue(searchTerm)); + Assertions.assertDoesNotThrow(filter::bytes); } @Test