From 23d975e7581969136915674b47dfaf019ca131c9 Mon Sep 17 00:00:00 2001 From: Moriarty <22225248+apmoriarty@users.noreply.github.com> Date: Thu, 27 Jun 2024 10:08:45 +0000 Subject: [PATCH 1/5] Add QueryFunctionsDescriptorTest, fields method will now return correct values for functions with more than one argument --- .../query/attributes/UniqueFields.java | 4 +- .../functions/QueryFunctionsDescriptor.java | 74 ++++++- .../QueryFunctionsDescriptorTest.java | 194 ++++++++++++++++++ .../QueryOptionsFromQueryVisitorTest.java | 84 ++++++++ 4 files changed, 343 insertions(+), 13 deletions(-) create mode 100644 warehouse/query-core/src/test/java/datawave/query/jexl/functions/QueryFunctionsDescriptorTest.java diff --git a/warehouse/query-core/src/main/java/datawave/query/attributes/UniqueFields.java b/warehouse/query-core/src/main/java/datawave/query/attributes/UniqueFields.java index 0c861f54e6b..98c074287b4 100644 --- a/warehouse/query-core/src/main/java/datawave/query/attributes/UniqueFields.java +++ b/warehouse/query-core/src/main/java/datawave/query/attributes/UniqueFields.java @@ -7,14 +7,12 @@ import java.util.NavigableSet; import java.util.Objects; import java.util.Set; -import java.util.SortedSet; import org.apache.commons.lang.StringUtils; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonValue; import com.google.common.collect.Multimap; -import com.google.common.collect.Multimaps; import com.google.common.collect.Sets; import com.google.common.collect.SortedSetMultimap; import com.google.common.collect.TreeMultimap; @@ -32,7 +30,7 @@ public class UniqueFields implements Serializable, Cloneable { private final TreeMultimap fieldMap = TreeMultimap.create(); private boolean mostRecent = false; - private static String MOST_RECENT_UNIQUE = "_MOST_RECENT_"; + private static final String MOST_RECENT_UNIQUE = "_MOST_RECENT_"; /** * Returns a new {@link UniqueFields} parsed from this string. The provided string is expected to have the format returned by diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java b/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java index 0dbcc3de4f1..bfedf1ef401 100644 --- a/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java @@ -4,6 +4,7 @@ import java.util.Arrays; import java.util.Collections; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -14,11 +15,13 @@ import org.apache.commons.jexl3.parser.ASTGENode; import org.apache.commons.jexl3.parser.ASTIdentifier; import org.apache.commons.jexl3.parser.ASTLENode; +import org.apache.commons.jexl3.parser.ASTStringLiteral; import org.apache.commons.jexl3.parser.JexlNode; import org.apache.commons.jexl3.parser.JexlNodes; import org.apache.commons.jexl3.parser.ParserTreeConstants; import datawave.query.attributes.AttributeFactory; +import datawave.query.attributes.UniqueFields; import datawave.query.config.ShardQueryConfiguration; import datawave.query.jexl.ArithmeticJexlEngines; import datawave.query.jexl.JexlASTHelper; @@ -41,7 +44,8 @@ public class QueryFunctionsDescriptor implements JexlFunctionArgumentDescriptorF */ public static class QueryJexlArgumentDescriptor implements JexlArgumentDescriptor { private final ASTFunctionNode node; - private final String namespace, name; + private final String namespace; + private final String name; private final List args; public QueryJexlArgumentDescriptor(ASTFunctionNode node, String namespace, String name, List args) { @@ -126,24 +130,74 @@ public void addFilters(AttributeFactory attributeFactory, Map fieldsForNormalization(MetadataHelper helper, Set datatypeFilter, int arg) { - // Do not normalize fields for the includeText function. - if (!name.equalsIgnoreCase(INCLUDE_TEXT)) { - // All other functions use the fields in the first argument for normalization. - if (arg > 0) { - return fields(helper, datatypeFilter); - } + if (name.equalsIgnoreCase(QueryFunctions.INCLUDE_TEXT)) { + // do not normalize fields for the includeText function + return Collections.emptySet(); } - return Collections.emptySet(); + + // otherwise delegate to the fields method + return fields(helper, datatypeFilter); } @Override public Set fields(MetadataHelper helper, Set datatypeFilter) { - return JexlASTHelper.getIdentifierNames(args.get(0)); + Set fields = new HashSet<>(); + switch (name) { + case QueryFunctions.COUNT: + case QueryFunctions.SUM: + case QueryFunctions.MIN: + case QueryFunctions.MAX: + case QueryFunctions.AVERAGE: + case QueryFunctions.GROUPBY_FUNCTION: + case QueryFunctions.NO_EXPANSION: + case QueryFunctions.LENIENT_FIELDS_FUNCTION: + case QueryFunctions.STRICT_FIELDS_FUNCTION: + // In practice each of these functions should be parsed from the query + // almost immediately. This implementation is added for consistency + for (JexlNode arg : args) { + fields.addAll(JexlASTHelper.getIdentifierNames(arg)); + } + break; + case QueryFunctions.INCLUDE_TEXT: + if (args.size() == 2) { + fields.addAll(JexlASTHelper.getIdentifierNames(args.get(0))); + } else { + for (int i = 1; i < args.size(); i += 2) { + fields.addAll(JexlASTHelper.getIdentifierNames(args.get(i))); + } + } + break; + case QueryFunctions.UNIQUE_FUNCTION: + for (JexlNode arg : args) { + if (arg instanceof ASTStringLiteral) { + // FIELD[GRANULARITY] is represented by an ASTStringLiteral + String literal = ((ASTStringLiteral) arg).getLiteral(); + fields.addAll(UniqueFields.from(literal).getFields()); + } else { + // otherwise it's just an ASTIdentifier + for (String identifier : JexlASTHelper.getIdentifierNames(arg)) { + fields.addAll(UniqueFields.from(identifier).getFields()); + } + } + } + break; + case QueryFunctions.MATCH_REGEX: + case BETWEEN: + case LENGTH: + default: + fields.addAll(JexlASTHelper.getIdentifierNames(args.get(0))); + } + return fields; } @Override public Set> fieldSets(MetadataHelper helper, Set datatypeFilter) { - return JexlArgumentDescriptor.Fields.product(args.get(0)); + Set> fieldSet = new HashSet<>(); + Set fields = fields(helper, datatypeFilter); + for (String field : fields) { + fieldSet.add(Set.of(field)); + } + return fieldSet; } @Override diff --git a/warehouse/query-core/src/test/java/datawave/query/jexl/functions/QueryFunctionsDescriptorTest.java b/warehouse/query-core/src/test/java/datawave/query/jexl/functions/QueryFunctionsDescriptorTest.java new file mode 100644 index 00000000000..85d00ef7328 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/jexl/functions/QueryFunctionsDescriptorTest.java @@ -0,0 +1,194 @@ +package datawave.query.jexl.functions; + +import static datawave.query.jexl.functions.QueryFunctionsDescriptor.QueryJexlArgumentDescriptor; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.fail; + +import java.util.Set; + +import org.apache.commons.jexl3.parser.ASTFunctionNode; +import org.apache.commons.jexl3.parser.ASTJexlScript; +import org.apache.commons.jexl3.parser.JexlNode; +import org.apache.commons.jexl3.parser.ParseException; +import org.junit.jupiter.api.Test; + +import datawave.query.jexl.JexlASTHelper; +import datawave.query.jexl.visitors.QueryOptionsFromQueryVisitor; + +/** + * Although most query functions are removed from the query by the {@link QueryOptionsFromQueryVisitor}, several functions will persist. These functions may + * contribute contextual information to the query planner, namely what fields are present in the query. When a field only exists in one of these non-removable + * functions it is important to verify that all fields are actually parsed by the {@link QueryFunctionsDescriptor}. + */ +class QueryFunctionsDescriptorTest { + + private final String singleFieldCount = "f:count(FIELD)"; + private final String multiFieldedCount = "f:count(FIELD_A, FIELD_B)"; + + private final String betweenDecimal = "f:between(FIELD, 50.0, 60.0)"; + private final String betweenValue = "f:between(FIELD, 'm', 'm~')"; + + private final String length = "f:length(FIELD, '2', '3')"; + + private final String include = "f:includeText(FIELD, 'baz')"; + private final String includeAnd = "f:includeText(AND, FIELD_A, 'bar', FIELD_B, 'baz')"; + private final String includeOr = "f:includeText(OR, FIELD_A, 'bar', FIELD_B, 'baz')"; + + private final String regex = "f:matchRegex(FIELD, 'ba.*')"; + + private final String singleFieldSum = "f:sum(FIELD)"; + private final String multiFieldSum = "f:sum(FIELD_A, FIELD_B)"; + + private final String singleFieldMin = "f:min(FIELD)"; + private final String multiFieldMin = "f:min(FIELD_A, FIELD_B)"; + + private final String singleFieldMax = "f:max(FIELD)"; + private final String multiFieldMax = "f:max(FIELD_A, FIELD_B)"; + + private final String singleFieldAvg = "f:average(FIELD)"; + private final String multiFieldAvg = "f:average(FIELD_A, FIELD_B)"; + + private final String singleFieldGroupBy = "f:groupby(FIELD)"; + private final String multiFieldGroupBy = "f:groupby(FIELD_A, FIELD_B)"; + + private final String singleFieldUnique = "f:unique(FIELD)"; + private final String multiFieldUnique = "f:unique(FIELD_A, FIELD_B)"; + + private final String singleFieldUniqueDay = "f:unique('FIELD[DAY]')"; + private final String multiFieldUniqueDay = "f:unique('FIELD_A[DAY]', 'FIELD_B[DAY]')"; + + private final String singleFieldNoExpansion = "f:noExpansion(FIELD)"; + private final String multiFieldNoExpansion = "f:noExpansion(FIELD_A, FIELD_B)"; + + private final String singleFieldLenient = "f:lenient(FIELD)"; + private final String multiFieldLenient = "f:lenient(FIELD_A, FIELD_B)"; + + private final String singleFieldStrict = "f:strict(FIELD)"; + private final String multiFieldStrict = "f:strict(FIELD_A, FIELD_B)"; + + private final QueryFunctionsDescriptor descriptor = new QueryFunctionsDescriptor(); + + @Test + void testFields() { + assertFields(singleFieldCount, Set.of("FIELD")); + assertFields(multiFieldedCount, Set.of("FIELD_A", "FIELD_B")); + + assertFields(betweenDecimal, Set.of("FIELD")); + assertFields(betweenValue, Set.of("FIELD")); + + assertFields(length, Set.of("FIELD")); + + assertFields(include, Set.of("FIELD")); + assertFields(includeAnd, Set.of("FIELD_A", "FIELD_B")); + assertFields(includeOr, Set.of("FIELD_A", "FIELD_B")); + + assertFields(regex, Set.of("FIELD")); + + assertFields(singleFieldSum, Set.of("FIELD")); + assertFields(multiFieldSum, Set.of("FIELD_A", "FIELD_B")); + + assertFields(singleFieldMin, Set.of("FIELD")); + assertFields(multiFieldMin, Set.of("FIELD_A", "FIELD_B")); + + assertFields(singleFieldMax, Set.of("FIELD")); + assertFields(multiFieldMax, Set.of("FIELD_A", "FIELD_B")); + + assertFields(singleFieldAvg, Set.of("FIELD")); + assertFields(multiFieldAvg, Set.of("FIELD_A", "FIELD_B")); + + assertFields(singleFieldGroupBy, Set.of("FIELD")); + assertFields(multiFieldGroupBy, Set.of("FIELD_A", "FIELD_B")); + + assertFields(singleFieldUnique, Set.of("FIELD")); + assertFields(multiFieldUnique, Set.of("FIELD_A", "FIELD_B")); + + assertFields(singleFieldUniqueDay, Set.of("FIELD")); + assertFields(multiFieldUniqueDay, Set.of("FIELD_A", "FIELD_B")); + + assertFields(singleFieldNoExpansion, Set.of("FIELD")); + assertFields(multiFieldNoExpansion, Set.of("FIELD_A", "FIELD_B")); + + assertFields(singleFieldLenient, Set.of("FIELD")); + assertFields(multiFieldLenient, Set.of("FIELD_A", "FIELD_B")); + + assertFields(singleFieldStrict, Set.of("FIELD")); + assertFields(multiFieldStrict, Set.of("FIELD_A", "FIELD_B")); + } + + private void assertFields(String query, Set expected) { + QueryJexlArgumentDescriptor jexlDescriptor = getDescriptor(query); + Set fields = jexlDescriptor.fields(null, Set.of()); + assertEquals(expected, fields); + } + + @Test + void testFieldSets() { + assertFieldSets(singleFieldCount, Set.of(Set.of("FIELD"))); + assertFieldSets(multiFieldedCount, Set.of(Set.of("FIELD_A"), Set.of("FIELD_B"))); + + assertFieldSets(betweenDecimal, Set.of(Set.of("FIELD"))); + assertFieldSets(betweenValue, Set.of(Set.of("FIELD"))); + + assertFieldSets(length, Set.of(Set.of("FIELD"))); + + assertFieldSets(include, Set.of(Set.of("FIELD"))); + assertFieldSets(includeAnd, Set.of(Set.of("FIELD_A"), Set.of("FIELD_B"))); + assertFieldSets(includeOr, Set.of(Set.of("FIELD_A"), Set.of("FIELD_B"))); + + assertFieldSets(regex, Set.of(Set.of("FIELD"))); + + assertFieldSets(singleFieldSum, Set.of(Set.of("FIELD"))); + assertFieldSets(multiFieldSum, Set.of(Set.of("FIELD_A"), Set.of("FIELD_B"))); + + assertFieldSets(singleFieldMin, Set.of(Set.of("FIELD"))); + assertFieldSets(multiFieldMin, Set.of(Set.of("FIELD_A"), Set.of("FIELD_B"))); + + assertFieldSets(singleFieldMax, Set.of(Set.of("FIELD"))); + assertFieldSets(multiFieldMax, Set.of(Set.of("FIELD_A"), Set.of("FIELD_B"))); + + assertFieldSets(singleFieldAvg, Set.of(Set.of("FIELD"))); + assertFieldSets(multiFieldAvg, Set.of(Set.of("FIELD_A"), Set.of("FIELD_B"))); + + assertFieldSets(singleFieldGroupBy, Set.of(Set.of("FIELD"))); + assertFieldSets(multiFieldGroupBy, Set.of(Set.of("FIELD_A"), Set.of("FIELD_B"))); + + assertFieldSets(singleFieldUnique, Set.of(Set.of("FIELD"))); + assertFieldSets(multiFieldUnique, Set.of(Set.of("FIELD_A"), Set.of("FIELD_B"))); + + assertFields(singleFieldUniqueDay, Set.of("FIELD")); + assertFields(multiFieldUniqueDay, Set.of("FIELD_A", "FIELD_B")); + + assertFieldSets(singleFieldNoExpansion, Set.of(Set.of("FIELD"))); + assertFieldSets(multiFieldNoExpansion, Set.of(Set.of("FIELD_A"), Set.of("FIELD_B"))); + + assertFieldSets(singleFieldLenient, Set.of(Set.of("FIELD"))); + assertFieldSets(multiFieldLenient, Set.of(Set.of("FIELD_A"), Set.of("FIELD_B"))); + + assertFieldSets(singleFieldStrict, Set.of(Set.of("FIELD"))); + assertFieldSets(multiFieldStrict, Set.of(Set.of("FIELD_A"), Set.of("FIELD_B"))); + } + + private void assertFieldSets(String query, Set> expected) { + QueryJexlArgumentDescriptor jexlDescriptor = getDescriptor(query); + Set> fields = jexlDescriptor.fieldSets(null, Set.of()); + assertEquals(expected, fields); + } + + private QueryJexlArgumentDescriptor getDescriptor(String query) { + ASTJexlScript script = getQuery(query); + JexlNode child = script.jjtGetChild(0); + if (child instanceof ASTFunctionNode) { + return (QueryJexlArgumentDescriptor) descriptor.getArgumentDescriptor((ASTFunctionNode) child); + } + throw new IllegalArgumentException("Could not get descriptor for query: " + query); + } + + private ASTJexlScript getQuery(String query) { + try { + return JexlASTHelper.parseAndFlattenJexlQuery(query); + } catch (ParseException e) { + fail("Could not parse query: " + query); + throw new RuntimeException(e); + } + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/QueryOptionsFromQueryVisitorTest.java b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/QueryOptionsFromQueryVisitorTest.java index afee2b09606..6a0e4bf1950 100644 --- a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/QueryOptionsFromQueryVisitorTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/QueryOptionsFromQueryVisitorTest.java @@ -54,6 +54,90 @@ public void testGroupByFunction() throws ParseException { assertOption(QueryParameters.GROUP_FIELDS, "field1,field2,field3"); } + @Test + public void testSumFunction() throws ParseException { + assertResult("f:sum()", ""); + assertOption(QueryParameters.SUM_FIELDS, ""); + + assertResult("f:sum(FIELD)", ""); + assertOption(QueryParameters.SUM_FIELDS, "FIELD"); + + assertResult("f:sum(FIELD_A, FIELD_B)", ""); + assertOption(QueryParameters.SUM_FIELDS, "FIELD_A,FIELD_B"); + } + + @Test + public void testCountFunction() throws ParseException { + assertResult("f:count()", ""); + assertOption(QueryParameters.COUNT_FIELDS, ""); + + assertResult("f:count(FIELD)", ""); + assertOption(QueryParameters.COUNT_FIELDS, "FIELD"); + + assertResult("f:count(FIELD_A, FIELD_B)", ""); + assertOption(QueryParameters.COUNT_FIELDS, "FIELD_A,FIELD_B"); + } + + @Test + public void testMinFunction() throws ParseException { + assertResult("f:min()", ""); + assertOption(QueryParameters.MIN_FIELDS, ""); + + assertResult("f:min(FIELD)", ""); + assertOption(QueryParameters.MIN_FIELDS, "FIELD"); + + assertResult("f:min(FIELD_A, FIELD_B)", ""); + assertOption(QueryParameters.MIN_FIELDS, "FIELD_A,FIELD_B"); + } + + @Test + public void testMaxFunction() throws ParseException { + assertResult("f:max()", ""); + assertOption(QueryParameters.MAX_FIELDS, ""); + + assertResult("f:max(FIELD)", ""); + assertOption(QueryParameters.MAX_FIELDS, "FIELD"); + + assertResult("f:max(FIELD_A, FIELD_B)", ""); + assertOption(QueryParameters.MAX_FIELDS, "FIELD_A,FIELD_B"); + } + + @Test + public void testAverageFunction() throws ParseException { + assertResult("f:average()", ""); + assertOption(QueryParameters.AVERAGE_FIELDS, ""); + + assertResult("f:average(FIELD)", ""); + assertOption(QueryParameters.AVERAGE_FIELDS, "FIELD"); + + assertResult("f:average(FIELD_A, FIELD_B)", ""); + assertOption(QueryParameters.AVERAGE_FIELDS, "FIELD_A,FIELD_B"); + } + + @Test + public void testStrictFunction() throws ParseException { + assertResult("f:strict()", ""); + assertOption(QueryParameters.STRICT_FIELDS, ""); + + assertResult("f:strict(FIELD)", ""); + assertOption(QueryParameters.STRICT_FIELDS, "FIELD"); + + assertResult("f:strict(FIELD_A, FIELD_B)", ""); + assertOption(QueryParameters.STRICT_FIELDS, "FIELD_A,FIELD_B"); + } + + @Test + public void testLenientFunction() throws ParseException { + assertResult("f:lenient()", ""); + assertOption(QueryParameters.LENIENT_FIELDS, ""); + + assertResult("f:lenient(FIELD)", ""); + assertOption(QueryParameters.LENIENT_FIELDS, "FIELD"); + + assertResult("f:lenient(FIELD_A, FIELD_B)", ""); + assertOption(QueryParameters.LENIENT_FIELDS, "FIELD_A,FIELD_B"); + } + @Test public void testUniqueFunction() throws ParseException { // Verify an empty function results in an empty parameter value. From df8d29c301f27b470a668d48c636ea5de41bf0ee Mon Sep 17 00:00:00 2001 From: Moriarty <22225248+apmoriarty@users.noreply.github.com> Date: Thu, 27 Jun 2024 14:00:24 +0000 Subject: [PATCH 2/5] Remove unnecessary local duplicate of QueryFunctions.INCLUDE_TEXT --- .../query/jexl/functions/QueryFunctionsDescriptor.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java b/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java index bfedf1ef401..75300dc028f 100644 --- a/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java @@ -37,7 +37,6 @@ public class QueryFunctionsDescriptor implements JexlFunctionArgumentDescriptorF public static final String BETWEEN = "between"; public static final String LENGTH = "length"; - public static final String INCLUDE_TEXT = "includeText"; /** * This is the argument descriptor which can be used to normalize and optimize function node queries @@ -72,7 +71,7 @@ public JexlNode getIndexQuery(ShardQueryConfiguration config, MetadataHelper hel case QueryFunctions.MATCH_REGEX: // Return an index query. return getIndexQuery(); - case INCLUDE_TEXT: + case QueryFunctions.INCLUDE_TEXT: // Return the appropriate index query. return getTextIndexQuery(); default: From 88f9ede4d0c66e791173010fa957dd9dbacd02d6 Mon Sep 17 00:00:00 2001 From: Moriarty <22225248+apmoriarty@users.noreply.github.com> Date: Mon, 1 Jul 2024 15:31:27 +0000 Subject: [PATCH 3/5] Revert changes to QueryFunctionsDescriptor fieldForNormalization method --- .../functions/QueryFunctionsDescriptor.java | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java b/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java index 75300dc028f..cdbe1b574a9 100644 --- a/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java @@ -1,5 +1,6 @@ package datawave.query.jexl.functions; +import static datawave.query.jexl.functions.QueryFunctions.INCLUDE_TEXT; import static datawave.query.jexl.nodes.QueryPropertyMarker.MarkerType.BOUNDED_RANGE; import java.util.Arrays; @@ -71,7 +72,7 @@ public JexlNode getIndexQuery(ShardQueryConfiguration config, MetadataHelper hel case QueryFunctions.MATCH_REGEX: // Return an index query. return getIndexQuery(); - case QueryFunctions.INCLUDE_TEXT: + case INCLUDE_TEXT: // Return the appropriate index query. return getTextIndexQuery(); default: @@ -129,13 +130,14 @@ public void addFilters(AttributeFactory attributeFactory, Map fieldsForNormalization(MetadataHelper helper, Set datatypeFilter, int arg) { - if (name.equalsIgnoreCase(QueryFunctions.INCLUDE_TEXT)) { - // do not normalize fields for the includeText function - return Collections.emptySet(); + // Do not normalize fields for the includeText function. + if (!name.equalsIgnoreCase(INCLUDE_TEXT)) { + // All other functions use the fields in the first argument for normalization. + if (arg > 0) { + return fields(helper, datatypeFilter); + } } - - // otherwise delegate to the fields method - return fields(helper, datatypeFilter); + return Collections.emptySet(); } @Override @@ -157,7 +159,7 @@ public Set fields(MetadataHelper helper, Set datatypeFilter) { fields.addAll(JexlASTHelper.getIdentifierNames(arg)); } break; - case QueryFunctions.INCLUDE_TEXT: + case INCLUDE_TEXT: if (args.size() == 2) { fields.addAll(JexlASTHelper.getIdentifierNames(args.get(0))); } else { @@ -270,7 +272,7 @@ private static void verify(String name, int numArgs) { case QueryFunctions.GROUPBY_FUNCTION: case QueryFunctions.EXCERPT_FIELDS_FUNCTION: case QueryFunctions.MATCH_REGEX: - case QueryFunctions.INCLUDE_TEXT: + case INCLUDE_TEXT: case QueryFunctions.NO_EXPANSION: case QueryFunctions.LENIENT_FIELDS_FUNCTION: case QueryFunctions.STRICT_FIELDS_FUNCTION: From da468374232de01488504e448b7353de5f8d442a Mon Sep 17 00:00:00 2001 From: Moriarty <22225248+apmoriarty@users.noreply.github.com> Date: Tue, 16 Jul 2024 15:49:27 +0000 Subject: [PATCH 4/5] Add test for QueryFunctions --- .../query/QueryFunctionQueryTest.java | 297 ++++++++++++++++++ 1 file changed, 297 insertions(+) create mode 100644 warehouse/query-core/src/test/java/datawave/query/QueryFunctionQueryTest.java diff --git a/warehouse/query-core/src/test/java/datawave/query/QueryFunctionQueryTest.java b/warehouse/query-core/src/test/java/datawave/query/QueryFunctionQueryTest.java new file mode 100644 index 00000000000..d0f0ccd2af7 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/QueryFunctionQueryTest.java @@ -0,0 +1,297 @@ +package datawave.query; + +import java.io.File; +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Collections; +import java.util.Date; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.TimeZone; +import java.util.UUID; + +import javax.inject.Inject; + +import org.apache.accumulo.core.client.AccumuloClient; +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Value; +import org.apache.accumulo.core.security.Authorizations; +import org.apache.log4j.Logger; +import org.jboss.arquillian.container.test.api.Deployment; +import org.jboss.arquillian.junit.Arquillian; +import org.jboss.shrinkwrap.api.ShrinkWrap; +import org.jboss.shrinkwrap.api.asset.StringAsset; +import org.jboss.shrinkwrap.api.spec.JavaArchive; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; + +import datawave.configuration.spring.SpringBean; +import datawave.core.query.configuration.GenericQueryConfiguration; +import datawave.helpers.PrintUtility; +import datawave.ingest.data.TypeRegistry; +import datawave.microservice.query.QueryImpl; +import datawave.query.attributes.Attribute; +import datawave.query.attributes.Document; +import datawave.query.attributes.PreNormalizedAttribute; +import datawave.query.attributes.TypeAttribute; +import datawave.query.function.deserializer.KryoDocumentDeserializer; +import datawave.query.jexl.functions.QueryFunctions; +import datawave.query.tables.ShardQueryLogic; +import datawave.query.tables.edge.DefaultEdgeEventQueryLogic; +import datawave.query.util.WiseGuysIngest; +import datawave.util.TableName; +import datawave.webservice.edgedictionary.RemoteEdgeDictionary; + +/** + * Integration test for {@link QueryFunctions}. + *

+ * The following functions are tested + *

    + *
  • {@link QueryFunctions#INCLUDE_TEXT}
  • + *
  • {@link QueryFunctions#MATCH_REGEX}
  • + *
+ */ +public abstract class QueryFunctionQueryTest { + + @ClassRule + public static TemporaryFolder temporaryFolder = new TemporaryFolder(); + + @RunWith(Arquillian.class) + public static class ShardRange extends QueryFunctionQueryTest { + protected static AccumuloClient client = null; + + @BeforeClass + public static void setUp() throws Exception { + // this will get property substituted into the TypeMetadataBridgeContext.xml file + // for the injection test (when this unit test is first created) + File tempDir = temporaryFolder.newFolder("TempDirForCompositeFunctionsTestShardRange"); + System.setProperty("type.metadata.dir", tempDir.getCanonicalPath()); + + QueryTestTableHelper qtth = new QueryTestTableHelper(CompositeFunctionsTest.ShardRange.class.toString(), log); + client = qtth.client; + + WiseGuysIngest.writeItAll(client, WiseGuysIngest.WhatKindaRange.SHARD); + Authorizations auths = new Authorizations("ALL"); + PrintUtility.printTable(client, auths, TableName.SHARD); + PrintUtility.printTable(client, auths, TableName.SHARD_INDEX); + PrintUtility.printTable(client, auths, QueryTestTableHelper.METADATA_TABLE_NAME); + PrintUtility.printTable(client, auths, QueryTestTableHelper.MODEL_TABLE_NAME); + } + + @AfterClass + public static void teardown() { + TypeRegistry.reset(); + } + + @Override + protected void runTestQuery(List expected, String querystr, Date startDate, Date endDate, Map extraParms) throws Exception { + super.runTestQuery(expected, querystr, startDate, endDate, extraParms, client, eventQueryLogic); + } + + @Override + protected void runTestQuery(List expected, String querystr, Date startDate, Date endDate, Map extraParms, ShardQueryLogic logic) + throws Exception { + super.runTestQuery(expected, querystr, startDate, endDate, extraParms, client, logic); + } + } + + @RunWith(Arquillian.class) + public static class DocumentRange extends QueryFunctionQueryTest { + protected static AccumuloClient client = null; + + @BeforeClass + public static void setUp() throws Exception { + // this will get property substituted into the TypeMetadataBridgeContext.xml file + // for the injection test (when this unit test is first created) + File tempDir = temporaryFolder.newFolder("TempDirForCompositeFunctionsTestDocumentRange"); + System.setProperty("type.metadata.dir", tempDir.getCanonicalPath()); + + QueryTestTableHelper qtth = new QueryTestTableHelper(CompositeFunctionsTest.DocumentRange.class.toString(), log); + client = qtth.client; + + WiseGuysIngest.writeItAll(client, WiseGuysIngest.WhatKindaRange.DOCUMENT); + Authorizations auths = new Authorizations("ALL"); + PrintUtility.printTable(client, auths, TableName.SHARD); + PrintUtility.printTable(client, auths, TableName.SHARD_INDEX); + PrintUtility.printTable(client, auths, QueryTestTableHelper.METADATA_TABLE_NAME); + PrintUtility.printTable(client, auths, QueryTestTableHelper.MODEL_TABLE_NAME); + } + + @AfterClass + public static void teardown() { + TypeRegistry.reset(); + } + + @Override + protected void runTestQuery(List expected, String querystr, Date startDate, Date endDate, Map extraParms) throws Exception { + super.runTestQuery(expected, querystr, startDate, endDate, extraParms, client, eventQueryLogic); + } + + @Override + protected void runTestQuery(List expected, String querystr, Date startDate, Date endDate, Map extraParms, ShardQueryLogic logic) + throws Exception { + super.runTestQuery(expected, querystr, startDate, endDate, extraParms, client, logic); + } + } + + private static final Logger log = Logger.getLogger(CompositeFunctionsTest.class); + + protected Authorizations auths = new Authorizations("ALL"); + + private final Set authSet = Collections.singleton(auths); + + @Inject + @SpringBean(name = "EventQuery") + protected ShardQueryLogic eventQueryLogic; + + private KryoDocumentDeserializer deserializer; + + private final DateFormat format = new SimpleDateFormat("yyyyMMdd"); + + @Deployment + public static JavaArchive createDeployment() { + + return ShrinkWrap.create(JavaArchive.class) + .addPackages(true, "org.apache.deltaspike", "io.astefanutti.metrics.cdi", "datawave.query", "org.jboss.logging", + "datawave.webservice.query.result.event", "datawave.core.query.result.event") + .deleteClass(DefaultEdgeEventQueryLogic.class).deleteClass(RemoteEdgeDictionary.class) + .deleteClass(datawave.query.metrics.QueryMetricQueryLogic.class) + .addAsManifestResource(new StringAsset( + "" + "datawave.query.tables.edge.MockAlternative" + ""), + "beans.xml"); + } + + @Before + public void setup() { + TimeZone.setDefault(TimeZone.getTimeZone("GMT")); + deserializer = new KryoDocumentDeserializer(); + } + + protected abstract void runTestQuery(List expected, String querystr, Date startDate, Date endDate, Map extraParms) throws Exception; + + protected abstract void runTestQuery(List expected, String querystr, Date startDate, Date endDate, Map extraParms, + ShardQueryLogic logic) throws Exception; + + protected void runTestQuery(List expected, String querystr, Date startDate, Date endDate, Map extraParms, AccumuloClient client, + ShardQueryLogic logic) throws Exception { + log.debug("runTestQuery"); + log.trace("Creating QueryImpl"); + QueryImpl settings = new QueryImpl(); + settings.setBeginDate(startDate); + settings.setEndDate(endDate); + settings.setPagesize(Integer.MAX_VALUE); + settings.setQueryAuthorizations(auths.serialize()); + settings.setQuery(querystr); + settings.setParameters(extraParms); + settings.setId(UUID.randomUUID()); + + log.debug("query: " + settings.getQuery()); + log.debug("logic: " + settings.getQueryLogicName()); + logic.setMaxEvaluationPipelines(1); + + GenericQueryConfiguration config = logic.initialize(client, settings, authSet); + logic.setupQuery(config); + + HashSet expectedSet = new HashSet<>(expected); + HashSet resultSet; + resultSet = new HashSet<>(); + Set docs = new HashSet<>(); + for (Entry entry : logic) { + Document d = deserializer.apply(entry).getValue(); + + log.debug(entry.getKey() + " => " + d); + + Attribute attr = d.get("UUID"); + if (attr == null) { + attr = d.get("UUID.0"); + } + + Assert.assertNotNull("Result Document did not contain a 'UUID'", attr); + Assert.assertTrue("Expected result to be an instance of DatwawaveTypeAttribute, was: " + attr.getClass().getName(), + attr instanceof TypeAttribute || attr instanceof PreNormalizedAttribute); + + TypeAttribute UUIDAttr = (TypeAttribute) attr; + + String UUID = UUIDAttr.getType().getDelegate().toString(); + Assert.assertTrue("Received unexpected UUID: " + UUID, expected.contains(UUID)); + + resultSet.add(UUID); + docs.add(d); + } + + if (expected.size() > resultSet.size()) { + expectedSet.addAll(expected); + expectedSet.removeAll(resultSet); + + for (String s : expectedSet) { + log.warn("Missing: " + s); + } + } + + if (!expected.containsAll(resultSet)) { + log.error("Expected results " + expected + " differ form actual results " + resultSet); + } + Assert.assertTrue("Expected results " + expected + " differ form actual results " + resultSet, expected.containsAll(resultSet)); + Assert.assertEquals("Unexpected number of records", expected.size(), resultSet.size()); + } + + @Test + public void testIncludeText() throws Exception { + Map extraParameters = new HashMap<>(); + extraParameters.put("hit.list", "true"); + + // @formatter:off + String[] queryStrings = { + "UUID == 'corleone' && f:includeText(GENERE, 'FEMALE')", + "UUID == 'corleone' && f:includeText(GENERE, 'male')", + "UUID == 'corleone' && f:includeText(NUMBER, '25')", + }; + @SuppressWarnings("unchecked") + List[] expectedLists = new List[] { + List.of("CORLEONE"), + List.of(), // misses because includeText is case-sensitive + List.of("CORLEONE"), + }; + // @formatter:on + + for (int i = 0; i < queryStrings.length; i++) { + runTestQuery(expectedLists[i], queryStrings[i], format.parse("20091231"), format.parse("20150101"), extraParameters); + } + } + + @Test + public void testMatchRegex() throws Exception { + Map extraParameters = new HashMap<>(); + extraParameters.put("hit.list", "true"); + + // @formatter:off + String[] queryStrings = { + "UUID == 'corleone' && f:matchRegex(GENERE, '.*MALE')", + "UUID == 'corleone' && f:matchRegex(GENERE, '.*male')", + "UUID == 'corleone' && f:matchRegex(NUMBER, '2.*')", + "UUID == 'corleone' && f:matchRegex(GENERE, '[A-Z]+')", + }; + @SuppressWarnings("unchecked") + List[] expectedLists = new List[] { + List.of("CORLEONE"), + List.of("CORLEONE"), + List.of("CORLEONE"), + List.of("CORLEONE"), + }; + // @formatter:on + + for (int i = 0; i < queryStrings.length; i++) { + runTestQuery(expectedLists[i], queryStrings[i], format.parse("20091231"), format.parse("20150101"), extraParameters); + } + } +} From 97d1d8342b7ab81ff38cdacf1797da0e4b5c9b77 Mon Sep 17 00:00:00 2001 From: Moriarty <22225248+apmoriarty@users.noreply.github.com> Date: Wed, 17 Jul 2024 16:39:46 +0000 Subject: [PATCH 5/5] Make static references consistent --- .../query/jexl/functions/QueryFunctionsDescriptor.java | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java b/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java index 2b2febf3085..718cfa7ff50 100644 --- a/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java @@ -1,6 +1,5 @@ package datawave.query.jexl.functions; -import static datawave.query.jexl.functions.QueryFunctions.INCLUDE_TEXT; import static datawave.query.jexl.nodes.QueryPropertyMarker.MarkerType.BOUNDED_RANGE; import java.util.Arrays; @@ -72,7 +71,7 @@ public JexlNode getIndexQuery(ShardQueryConfiguration config, MetadataHelper hel case QueryFunctions.MATCH_REGEX: // Return an index query. return getIndexQuery(); - case INCLUDE_TEXT: + case QueryFunctions.INCLUDE_TEXT: // Return the appropriate index query. return getTextIndexQuery(); default: @@ -131,7 +130,7 @@ public void addFilters(AttributeFactory attributeFactory, Map fieldsForNormalization(MetadataHelper helper, Set datatypeFilter, int arg) { // Do not normalize fields for the includeText function. - if (!name.equalsIgnoreCase(INCLUDE_TEXT)) { + if (!name.equalsIgnoreCase(QueryFunctions.INCLUDE_TEXT)) { // All other functions use the fields in the first argument for normalization. if (arg > 0) { return fields(helper, datatypeFilter); @@ -159,7 +158,7 @@ public Set fields(MetadataHelper helper, Set datatypeFilter) { fields.addAll(JexlASTHelper.getIdentifierNames(arg)); } break; - case INCLUDE_TEXT: + case QueryFunctions.INCLUDE_TEXT: if (args.size() == 2) { fields.addAll(JexlASTHelper.getIdentifierNames(args.get(0))); } else { @@ -263,7 +262,7 @@ private static void verify(String name, int numArgs) { case QueryFunctions.GROUPBY_FUNCTION: case QueryFunctions.EXCERPT_FIELDS_FUNCTION: case QueryFunctions.MATCH_REGEX: - case INCLUDE_TEXT: + case QueryFunctions.INCLUDE_TEXT: case QueryFunctions.NO_EXPANSION: case QueryFunctions.LENIENT_FIELDS_FUNCTION: case QueryFunctions.STRICT_FIELDS_FUNCTION: