Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

QueryFunctionsDescriptor returns all fields for multi-fielded functions #2446

Merged
merged 11 commits into from
Jul 17, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,12 @@
import java.util.NavigableSet;
import java.util.Objects;
import java.util.Set;
import java.util.SortedSet;

import org.apache.commons.lang.StringUtils;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonValue;
import com.google.common.collect.Multimap;
import com.google.common.collect.Multimaps;
import com.google.common.collect.Sets;
import com.google.common.collect.SortedSetMultimap;
import com.google.common.collect.TreeMultimap;
Expand All @@ -32,7 +30,7 @@ public class UniqueFields implements Serializable, Cloneable {

private final TreeMultimap<String,UniqueGranularity> fieldMap = TreeMultimap.create();
private boolean mostRecent = false;
private static String MOST_RECENT_UNIQUE = "_MOST_RECENT_";
private static final String MOST_RECENT_UNIQUE = "_MOST_RECENT_";

/**
* Returns a new {@link UniqueFields} parsed from this string. The provided string is expected to have the format returned by
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package datawave.query.jexl.functions;

import static datawave.query.jexl.functions.QueryFunctions.INCLUDE_TEXT;
import static datawave.query.jexl.nodes.QueryPropertyMarker.MarkerType.BOUNDED_RANGE;

import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
Expand All @@ -14,11 +16,13 @@
import org.apache.commons.jexl3.parser.ASTGENode;
import org.apache.commons.jexl3.parser.ASTIdentifier;
import org.apache.commons.jexl3.parser.ASTLENode;
import org.apache.commons.jexl3.parser.ASTStringLiteral;
import org.apache.commons.jexl3.parser.JexlNode;
import org.apache.commons.jexl3.parser.JexlNodes;
import org.apache.commons.jexl3.parser.ParserTreeConstants;

import datawave.query.attributes.AttributeFactory;
import datawave.query.attributes.UniqueFields;
import datawave.query.config.ShardQueryConfiguration;
import datawave.query.jexl.ArithmeticJexlEngines;
import datawave.query.jexl.JexlASTHelper;
Expand All @@ -34,14 +38,14 @@ public class QueryFunctionsDescriptor implements JexlFunctionArgumentDescriptorF

public static final String BETWEEN = "between";
public static final String LENGTH = "length";
public static final String INCLUDE_TEXT = "includeText";

/**
* This is the argument descriptor which can be used to normalize and optimize function node queries
*/
public static class QueryJexlArgumentDescriptor implements JexlArgumentDescriptor {
private final ASTFunctionNode node;
private final String namespace, name;
private final String namespace;
private final String name;
private final List<JexlNode> args;

public QueryJexlArgumentDescriptor(ASTFunctionNode node, String namespace, String name, List<JexlNode> args) {
Expand Down Expand Up @@ -138,12 +142,63 @@ public Set<String> fieldsForNormalization(MetadataHelper helper, Set<String> dat

@Override
public Set<String> fields(MetadataHelper helper, Set<String> datatypeFilter) {
return JexlASTHelper.getIdentifierNames(args.get(0));
Set<String> fields = new HashSet<>();
switch (name) {
case QueryFunctions.COUNT:
case QueryFunctions.SUM:
case QueryFunctions.MIN:
case QueryFunctions.MAX:
case QueryFunctions.AVERAGE:
case QueryFunctions.GROUPBY_FUNCTION:
case QueryFunctions.NO_EXPANSION:
case QueryFunctions.LENIENT_FIELDS_FUNCTION:
case QueryFunctions.STRICT_FIELDS_FUNCTION:
// In practice each of these functions should be parsed from the query
// almost immediately. This implementation is added for consistency
for (JexlNode arg : args) {
fields.addAll(JexlASTHelper.getIdentifierNames(arg));
mineralntl marked this conversation as resolved.
Show resolved Hide resolved
}
break;
case INCLUDE_TEXT:
if (args.size() == 2) {
fields.addAll(JexlASTHelper.getIdentifierNames(args.get(0)));
} else {
for (int i = 1; i < args.size(); i += 2) {
fields.addAll(JexlASTHelper.getIdentifierNames(args.get(i)));
}
}
break;
case QueryFunctions.UNIQUE_FUNCTION:
for (JexlNode arg : args) {
if (arg instanceof ASTStringLiteral) {
// FIELD[GRANULARITY] is represented by an ASTStringLiteral
String literal = ((ASTStringLiteral) arg).getLiteral();
fields.addAll(UniqueFields.from(literal).getFields());
} else {
// otherwise it's just an ASTIdentifier
for (String identifier : JexlASTHelper.getIdentifierNames(arg)) {
fields.addAll(UniqueFields.from(identifier).getFields());
}
}
}
break;
case QueryFunctions.MATCH_REGEX:
case BETWEEN:
case LENGTH:
default:
fields.addAll(JexlASTHelper.getIdentifierNames(args.get(0)));
}
return fields;
}

@Override
public Set<Set<String>> fieldSets(MetadataHelper helper, Set<String> datatypeFilter) {
return JexlArgumentDescriptor.Fields.product(args.get(0));
Set<Set<String>> fieldSet = new HashSet<>();
Set<String> fields = fields(helper, datatypeFilter);
for (String field : fields) {
fieldSet.add(Set.of(field));
}
return fieldSet;
}

@Override
Expand Down Expand Up @@ -217,7 +272,7 @@ private static void verify(String name, int numArgs) {
case QueryFunctions.GROUPBY_FUNCTION:
case QueryFunctions.EXCERPT_FIELDS_FUNCTION:
case QueryFunctions.MATCH_REGEX:
case QueryFunctions.INCLUDE_TEXT:
case INCLUDE_TEXT:
apmoriarty marked this conversation as resolved.
Show resolved Hide resolved
case QueryFunctions.NO_EXPANSION:
case QueryFunctions.LENIENT_FIELDS_FUNCTION:
case QueryFunctions.STRICT_FIELDS_FUNCTION:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
package datawave.query.jexl.functions;

import static datawave.query.jexl.functions.QueryFunctionsDescriptor.QueryJexlArgumentDescriptor;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.fail;

import java.util.Set;

import org.apache.commons.jexl3.parser.ASTFunctionNode;
import org.apache.commons.jexl3.parser.ASTJexlScript;
import org.apache.commons.jexl3.parser.JexlNode;
import org.apache.commons.jexl3.parser.ParseException;
import org.junit.jupiter.api.Test;

import datawave.query.jexl.JexlASTHelper;
import datawave.query.jexl.visitors.QueryOptionsFromQueryVisitor;

/**
* Although most query functions are removed from the query by the {@link QueryOptionsFromQueryVisitor}, several functions will persist. These functions may
* contribute contextual information to the query planner, namely what fields are present in the query. When a field only exists in one of these non-removable
* functions it is important to verify that all fields are actually parsed by the {@link QueryFunctionsDescriptor}.
*/
class QueryFunctionsDescriptorTest {

private final String singleFieldCount = "f:count(FIELD)";
private final String multiFieldedCount = "f:count(FIELD_A, FIELD_B)";

private final String betweenDecimal = "f:between(FIELD, 50.0, 60.0)";
private final String betweenValue = "f:between(FIELD, 'm', 'm~')";

private final String length = "f:length(FIELD, '2', '3')";

private final String include = "f:includeText(FIELD, 'baz')";
private final String includeAnd = "f:includeText(AND, FIELD_A, 'bar', FIELD_B, 'baz')";
private final String includeOr = "f:includeText(OR, FIELD_A, 'bar', FIELD_B, 'baz')";

private final String regex = "f:matchRegex(FIELD, 'ba.*')";

private final String singleFieldSum = "f:sum(FIELD)";
private final String multiFieldSum = "f:sum(FIELD_A, FIELD_B)";

private final String singleFieldMin = "f:min(FIELD)";
private final String multiFieldMin = "f:min(FIELD_A, FIELD_B)";

private final String singleFieldMax = "f:max(FIELD)";
private final String multiFieldMax = "f:max(FIELD_A, FIELD_B)";

private final String singleFieldAvg = "f:average(FIELD)";
private final String multiFieldAvg = "f:average(FIELD_A, FIELD_B)";

private final String singleFieldGroupBy = "f:groupby(FIELD)";
private final String multiFieldGroupBy = "f:groupby(FIELD_A, FIELD_B)";

private final String singleFieldUnique = "f:unique(FIELD)";
private final String multiFieldUnique = "f:unique(FIELD_A, FIELD_B)";

private final String singleFieldUniqueDay = "f:unique('FIELD[DAY]')";
private final String multiFieldUniqueDay = "f:unique('FIELD_A[DAY]', 'FIELD_B[DAY]')";

private final String singleFieldNoExpansion = "f:noExpansion(FIELD)";
private final String multiFieldNoExpansion = "f:noExpansion(FIELD_A, FIELD_B)";

private final String singleFieldLenient = "f:lenient(FIELD)";
private final String multiFieldLenient = "f:lenient(FIELD_A, FIELD_B)";

private final String singleFieldStrict = "f:strict(FIELD)";
private final String multiFieldStrict = "f:strict(FIELD_A, FIELD_B)";

private final QueryFunctionsDescriptor descriptor = new QueryFunctionsDescriptor();

@Test
void testFields() {
assertFields(singleFieldCount, Set.of("FIELD"));
assertFields(multiFieldedCount, Set.of("FIELD_A", "FIELD_B"));

assertFields(betweenDecimal, Set.of("FIELD"));
assertFields(betweenValue, Set.of("FIELD"));

assertFields(length, Set.of("FIELD"));

assertFields(include, Set.of("FIELD"));
assertFields(includeAnd, Set.of("FIELD_A", "FIELD_B"));
assertFields(includeOr, Set.of("FIELD_A", "FIELD_B"));

assertFields(regex, Set.of("FIELD"));

assertFields(singleFieldSum, Set.of("FIELD"));
assertFields(multiFieldSum, Set.of("FIELD_A", "FIELD_B"));

assertFields(singleFieldMin, Set.of("FIELD"));
assertFields(multiFieldMin, Set.of("FIELD_A", "FIELD_B"));

assertFields(singleFieldMax, Set.of("FIELD"));
assertFields(multiFieldMax, Set.of("FIELD_A", "FIELD_B"));

assertFields(singleFieldAvg, Set.of("FIELD"));
assertFields(multiFieldAvg, Set.of("FIELD_A", "FIELD_B"));

assertFields(singleFieldGroupBy, Set.of("FIELD"));
assertFields(multiFieldGroupBy, Set.of("FIELD_A", "FIELD_B"));

assertFields(singleFieldUnique, Set.of("FIELD"));
assertFields(multiFieldUnique, Set.of("FIELD_A", "FIELD_B"));

assertFields(singleFieldUniqueDay, Set.of("FIELD"));
assertFields(multiFieldUniqueDay, Set.of("FIELD_A", "FIELD_B"));

assertFields(singleFieldNoExpansion, Set.of("FIELD"));
assertFields(multiFieldNoExpansion, Set.of("FIELD_A", "FIELD_B"));

assertFields(singleFieldLenient, Set.of("FIELD"));
assertFields(multiFieldLenient, Set.of("FIELD_A", "FIELD_B"));

assertFields(singleFieldStrict, Set.of("FIELD"));
assertFields(multiFieldStrict, Set.of("FIELD_A", "FIELD_B"));
}

private void assertFields(String query, Set<String> expected) {
QueryJexlArgumentDescriptor jexlDescriptor = getDescriptor(query);
Set<String> fields = jexlDescriptor.fields(null, Set.of());
assertEquals(expected, fields);
}

@Test
void testFieldSets() {
assertFieldSets(singleFieldCount, Set.of(Set.of("FIELD")));
assertFieldSets(multiFieldedCount, Set.of(Set.of("FIELD_A"), Set.of("FIELD_B")));

assertFieldSets(betweenDecimal, Set.of(Set.of("FIELD")));
assertFieldSets(betweenValue, Set.of(Set.of("FIELD")));

assertFieldSets(length, Set.of(Set.of("FIELD")));

assertFieldSets(include, Set.of(Set.of("FIELD")));
assertFieldSets(includeAnd, Set.of(Set.of("FIELD_A"), Set.of("FIELD_B")));
assertFieldSets(includeOr, Set.of(Set.of("FIELD_A"), Set.of("FIELD_B")));

assertFieldSets(regex, Set.of(Set.of("FIELD")));

assertFieldSets(singleFieldSum, Set.of(Set.of("FIELD")));
assertFieldSets(multiFieldSum, Set.of(Set.of("FIELD_A"), Set.of("FIELD_B")));

assertFieldSets(singleFieldMin, Set.of(Set.of("FIELD")));
assertFieldSets(multiFieldMin, Set.of(Set.of("FIELD_A"), Set.of("FIELD_B")));

assertFieldSets(singleFieldMax, Set.of(Set.of("FIELD")));
assertFieldSets(multiFieldMax, Set.of(Set.of("FIELD_A"), Set.of("FIELD_B")));

assertFieldSets(singleFieldAvg, Set.of(Set.of("FIELD")));
assertFieldSets(multiFieldAvg, Set.of(Set.of("FIELD_A"), Set.of("FIELD_B")));

assertFieldSets(singleFieldGroupBy, Set.of(Set.of("FIELD")));
assertFieldSets(multiFieldGroupBy, Set.of(Set.of("FIELD_A"), Set.of("FIELD_B")));

assertFieldSets(singleFieldUnique, Set.of(Set.of("FIELD")));
assertFieldSets(multiFieldUnique, Set.of(Set.of("FIELD_A"), Set.of("FIELD_B")));

assertFields(singleFieldUniqueDay, Set.of("FIELD"));
assertFields(multiFieldUniqueDay, Set.of("FIELD_A", "FIELD_B"));

assertFieldSets(singleFieldNoExpansion, Set.of(Set.of("FIELD")));
assertFieldSets(multiFieldNoExpansion, Set.of(Set.of("FIELD_A"), Set.of("FIELD_B")));

assertFieldSets(singleFieldLenient, Set.of(Set.of("FIELD")));
assertFieldSets(multiFieldLenient, Set.of(Set.of("FIELD_A"), Set.of("FIELD_B")));

assertFieldSets(singleFieldStrict, Set.of(Set.of("FIELD")));
assertFieldSets(multiFieldStrict, Set.of(Set.of("FIELD_A"), Set.of("FIELD_B")));
}

private void assertFieldSets(String query, Set<Set<String>> expected) {
QueryJexlArgumentDescriptor jexlDescriptor = getDescriptor(query);
Set<Set<String>> fields = jexlDescriptor.fieldSets(null, Set.of());
assertEquals(expected, fields);
}

private QueryJexlArgumentDescriptor getDescriptor(String query) {
ASTJexlScript script = getQuery(query);
JexlNode child = script.jjtGetChild(0);
if (child instanceof ASTFunctionNode) {
return (QueryJexlArgumentDescriptor) descriptor.getArgumentDescriptor((ASTFunctionNode) child);
}
throw new IllegalArgumentException("Could not get descriptor for query: " + query);
}

private ASTJexlScript getQuery(String query) {
try {
return JexlASTHelper.parseAndFlattenJexlQuery(query);
} catch (ParseException e) {
fail("Could not parse query: " + query);
throw new RuntimeException(e);
}
}
}
Loading
Loading