Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Log the results of index expansion in a standardized way #2658

Merged
merged 2 commits into from
Dec 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,15 @@
import org.apache.accumulo.core.client.TableNotFoundException;
import org.apache.commons.jexl3.parser.JexlNode;
import org.apache.commons.jexl3.parser.JexlNodes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import datawave.query.config.ShardQueryConfiguration;
import datawave.query.exceptions.DatawaveFatalQueryException;
import datawave.query.jexl.JexlASTHelper;
import datawave.query.jexl.lookups.AsyncIndexLookup;
import datawave.query.jexl.lookups.IndexLookup;
import datawave.query.jexl.lookups.IndexLookupMap;
import datawave.query.planner.pushdown.CostEstimator;
import datawave.query.tables.ScannerFactory;
import datawave.query.util.MetadataHelper;
Expand All @@ -31,6 +35,8 @@
* Abstract class which provides a framework for visitors which perform index lookups based on the contents of the Jexl tree
*/
public abstract class BaseIndexExpansionVisitor extends RebuildingVisitor {

private static final Logger log = LoggerFactory.getLogger(BaseIndexExpansionVisitor.class);
private static final int MIN_THREADS = 1;

protected ShardQueryConfiguration config;
Expand All @@ -49,6 +55,8 @@ public abstract class BaseIndexExpansionVisitor extends RebuildingVisitor {
protected Map<String,IndexLookup> lookupMap;
protected List<FutureJexlNode> futureJexlNodes;

protected String stage = "default";

protected BaseIndexExpansionVisitor(ShardQueryConfiguration config, ScannerFactory scannerFactory, MetadataHelper helper, String threadName)
throws TableNotFoundException {
this(config, scannerFactory, helper, null, threadName);
Expand Down Expand Up @@ -235,6 +243,52 @@ public void setRebuiltNode(JexlNode rebuiltNode) {
}
}

protected String getStage() {
return this.stage;
}

/**
* Log the result of the index expansion for the provided term
*
* @param node
* the term
* @param lookupMap
* the result of the lookup
*/
protected void logResult(JexlNode node, IndexLookupMap lookupMap) {
logResult(getStage(), node, lookupMap);
}

/**
* Log the result of index expansion for the provided term.
*
* @param stage
* which visitor is running
* @param node
* the term
* @param lookupMap
* the result of the lookup
*/
protected void logResult(String stage, JexlNode node, IndexLookupMap lookupMap) {
String field = JexlASTHelper.getIdentifier(node);

if (lookupMap == null) {
log.debug("Lookup map was null for term [{}]", JexlStringBuildingVisitor.buildQuery(node));
return;
}

if (lookupMap.containsKey(field)) {
String term = JexlStringBuildingVisitor.buildQuery(node);
if (lookupMap.get(field).isEmpty()) {
log.debug("{} expansion for term [{}] failed (no data)", stage, term);
} else if (lookupMap.get(field).isThresholdExceeded()) {
log.debug("{} expansion for term [{}] failed (threshold)", stage, term);
} else {
log.debug("{} expansion for term [{}] success ({} values)", stage, term, lookupMap.get(field).size());
}
}
}

/**
* Serves as a means to associate index lookup threads with a particular Index Expansion Visitor
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
import org.apache.accumulo.core.client.TableNotFoundException;
import org.apache.commons.jexl3.parser.ASTAndNode;
import org.apache.commons.jexl3.parser.JexlNode;
import org.apache.log4j.Logger;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import datawave.core.common.logging.ThreadConfigurableLogger;
import datawave.query.config.ShardQueryConfiguration;
import datawave.query.exceptions.IllegalRangeArgumentException;
import datawave.query.jexl.JexlASTHelper;
Expand All @@ -30,7 +30,7 @@
* Visits a Jexl tree, looks for bounded ranges, and replaces them with concrete values from the index
*/
public class BoundedRangeIndexExpansionVisitor extends BaseIndexExpansionVisitor {
private static final Logger log = ThreadConfigurableLogger.getLogger(BoundedRangeIndexExpansionVisitor.class);
private static final Logger log = LoggerFactory.getLogger(BoundedRangeIndexExpansionVisitor.class);

private final JexlASTHelper.RangeFinder rangeFinder;

Expand All @@ -40,6 +40,7 @@ protected BoundedRangeIndexExpansionVisitor(ShardQueryConfiguration config, Scan
super(config, scannerFactory, helper, "BoundedRangeIndexExpansion");

rangeFinder = JexlASTHelper.findRange().indexedOnly(this.config.getDatatypeFilter(), this.helper).notDelayed();
this.stage = "range";
}

/**
Expand Down Expand Up @@ -104,6 +105,10 @@ protected void rebuildFutureJexlNode(FutureJexlNode futureJexlNode) {
JexlNode currentNode = futureJexlNode.getOrigNode();
IndexLookupMap fieldsToTerms = futureJexlNode.getLookup().lookup();

if (log.isDebugEnabled()) {
logResult(currentNode, fieldsToTerms);
}

futureJexlNode.setRebuiltNode(JexlNodeFactory.createNodeTreeFromFieldsToValues(JexlNodeFactory.ContainerType.OR_NODE, false, currentNode, fieldsToTerms,
expandFields, expandValues, futureJexlNode.isKeepOriginalNode()));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@
import org.apache.commons.jexl3.parser.JexlNode;
import org.apache.commons.jexl3.parser.JexlNodes;
import org.apache.commons.jexl3.parser.ParserTreeConstants;
import org.apache.log4j.Logger;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
Expand All @@ -55,7 +56,7 @@
* Visits a Jexl tree, looks for regex terms, and replaces them with concrete values from the index
*/
public class RegexIndexExpansionVisitor extends BaseIndexExpansionVisitor {
private static final Logger log = ThreadConfigurableLogger.getLogger(RegexIndexExpansionVisitor.class);
private static final Logger log = LoggerFactory.getLogger(RegexIndexExpansionVisitor.class);

protected boolean expandUnfieldedNegations;

Expand Down Expand Up @@ -94,6 +95,7 @@ protected RegexIndexExpansionVisitor(ShardQueryConfiguration config, ScannerFact
} else {
this.onlyUseThese = null;
}
this.stage = "regex";
}

/**
Expand Down Expand Up @@ -210,7 +212,7 @@ public Object visit(ASTERNode node, Object data) {
// already did this expansion
return node;
} else if (!nonEvent && evalOnly) {
// no need to expand its going to come out of the event
// no need to expand it is going to come out of the event
return node;
}
}
Expand Down Expand Up @@ -238,12 +240,12 @@ public Object visit(ASTERNode node, Object data) {
// expand this regex because it would be more efficient to do so
if (!shouldProcessRegexFromCost(node)) {

if (log.isDebugEnabled()) {
log.debug("Determined we don't need to process regex node:");
if (log.isTraceEnabled()) {
log.trace("Determined we don't need to process regex node:");
for (String line : PrintingVisitor.formattedQueryStringList(node)) {
log.debug(line);
log.trace(line);
}
log.debug("");
log.trace("");
}
if (markedParents != null) {
for (JexlNode markedParent : markedParents) {
Expand All @@ -255,17 +257,14 @@ public Object visit(ASTERNode node, Object data) {
return QueryPropertyMarker.create(node, DELAYED); // wrap in a delayed predicate to avoid using in RangeStream
}
} else {
if (config.getMaxIndexScanTimeMillis() != Long.MAX_VALUE)
log.debug("Skipping cost estimation since we have a timeout ");
if (config.getMaxIndexScanTimeMillis() != Long.MAX_VALUE) {
log.trace("Skipping cost estimation because there is a timeout configured for index expansion");
}
}

try {
if (!helper.isIndexed(fieldName, config.getDatatypeFilter())) {
log.debug("Not expanding regular expression node as the field is not indexed");
for (String logLine : PrintingVisitor.formattedQueryStringList(node)) {
log.info(logLine);
}

log.debug("Not expanding regex [{}] because the field is not indexed", JexlStringBuildingVisitor.buildQuery(node));
return node;
}
} catch (TableNotFoundException e) {
Expand Down Expand Up @@ -634,13 +633,10 @@ public boolean shouldProcessRegexByCostWithChildren(List<JexlNode> children, Cos
Cost childCost = costAnalysis.computeCostForSubtree(child);

if (log.isDebugEnabled()) {
log.debug("Computed cost of " + childCost + " for:");
for (String logLine : PrintingVisitor.formattedQueryStringList(child)) {
log.debug(logLine);
}
log.debug("cost for term [{}] is {}", JexlStringBuildingVisitor.buildQuery(child), childCost);
}

// Use this child's cost if we have no current cost or it's less than the current cost
// Use this child's cost if we have no current cost, or it's less than the current cost
if (0 != childCost.getOtherCost()) {
if (0 != c.getOtherCost()) {
if (childCost.getOtherCost() < c.getOtherCost()) {
Expand Down Expand Up @@ -687,22 +683,20 @@ protected void rebuildFutureJexlNode(FutureJexlNode futureJexlNode) {
JexlNode currentNode = futureJexlNode.getOrigNode();
IndexLookupMap fieldsToTerms = futureJexlNode.getLookup().lookup();

if (log.isDebugEnabled()) {
logResult(currentNode, fieldsToTerms);
}

if (futureJexlNode.isIgnoreComposites()) {
// composites should be removed prior to building the index expansion iterators
// and should never be returned to the webservice
removeCompositeFields(fieldsToTerms);
}

JexlNode newNode;

// If we have no children, it's impossible to find any records, so this query returns no results
if (fieldsToTerms.isEmpty()) {
if (log.isDebugEnabled()) {
try {
log.debug("Failed to expand _ANYFIELD_ node because of no mappings for {\"term\": \"" + JexlASTHelper.getLiteral(currentNode) + "\"}");
} catch (Exception ex) {
// it's just a debug statement
}
}

// simply replace the _ANYFIELD_ with _NOFIELD_ denoting that there was no expansion. This will naturally evaluate correctly when applying
// the query against the document
for (ASTIdentifier id : JexlASTHelper.getIdentifiers(currentNode)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,17 @@
import org.apache.commons.jexl3.parser.ASTOrNode;
import org.apache.commons.jexl3.parser.ASTReferenceExpression;
import org.apache.commons.jexl3.parser.JexlNode;
import org.apache.log4j.Logger;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import datawave.core.common.logging.ThreadConfigurableLogger;
import datawave.data.type.Type;
import datawave.query.config.ShardQueryConfiguration;
import datawave.query.exceptions.DatawaveFatalQueryException;
import datawave.query.exceptions.EmptyUnfieldedTermExpansionException;
import datawave.query.jexl.JexlASTHelper;
import datawave.query.jexl.JexlNodeFactory;
import datawave.query.jexl.lookups.IndexLookup;
import datawave.query.jexl.lookups.IndexLookupMap;
import datawave.query.jexl.lookups.ShardIndexQueryTableStaticMethods;
import datawave.query.jexl.nodes.QueryPropertyMarker;
import datawave.query.tables.ScannerFactory;
Expand All @@ -38,7 +40,7 @@
* Visits a Jexl tree, looks for unfielded terms, and replaces them with fielded terms from the index
*/
public class UnfieldedIndexExpansionVisitor extends RegexIndexExpansionVisitor {
private static final Logger log = ThreadConfigurableLogger.getLogger(UnfieldedIndexExpansionVisitor.class);
private static final Logger log = LoggerFactory.getLogger(UnfieldedIndexExpansionVisitor.class);

protected Set<String> expansionFields;
protected Set<Type<?>> allTypes;
Expand All @@ -54,6 +56,7 @@ protected UnfieldedIndexExpansionVisitor(ShardQueryConfiguration config, Scanner
}

this.allTypes = helper.getAllDatatypes();
this.stage = "field";
}

/**
Expand Down Expand Up @@ -91,7 +94,7 @@ public static <T extends JexlNode> T expandUnfielded(ShardQueryConfiguration con
private static <T extends JexlNode> T ensureTreeNotEmpty(T script) throws EmptyUnfieldedTermExpansionException {
if (script.jjtGetNumChildren() == 0) {
NotFoundQueryException qe = new NotFoundQueryException(DatawaveErrorCode.NO_UNFIELDED_TERM_EXPANSION_MATCH);
log.warn(qe);
log.warn("Empty script", qe);
throw new EmptyUnfieldedTermExpansionException(qe);
}
return script;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
package datawave.query.jexl.lookups;

import static org.easymock.EasyMock.createMock;
import static org.easymock.EasyMock.eq;
import static org.easymock.EasyMock.expect;
import static org.easymock.EasyMock.isA;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.powermock.api.easymock.PowerMock.replayAll;
import static org.powermock.api.easymock.PowerMock.verifyAll;

import java.text.SimpleDateFormat;
import java.util.AbstractMap;
Expand Down
Loading