Skip to content

Commit

Permalink
working summaries created
Browse files Browse the repository at this point in the history
  • Loading branch information
austin007008 committed Nov 21, 2024
1 parent faf7544 commit 239efc1
Show file tree
Hide file tree
Showing 19 changed files with 1,094 additions and 460 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,11 @@ public class QueryParameters {
*/
public static final String EXCERPT_FIELDS = "excerpt.fields";

/**
* Used to specify summaries that should be returned.
*/
public static final String SUMMARY = "summary.size";

/**
* Used to specify model or DB fields that should be treated as lenient (can be skipped if normalization fails)
*/
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
package datawave.query.attributes;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Objects;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonValue;

import datawave.query.Constants;
import datawave.query.postprocessing.tf.PhraseIndexes;

/**
* Represents options for a summary that have been specified within an #SUMMARY_SIZE function. An instance of {@link SummarySize} can easily be captured as a
* parameter string using {@link SummarySize#toString()}, and transformed back into a {@link SummarySize} instance via {@link SummarySize#from(String)}.
*/
public class SummarySize implements Serializable {

private static final long serialVersionUID = 6769159729743311079L;

private static final int DEFAULT_SIZE = 150;
private static final Logger log = LoggerFactory.getLogger(SummarySize.class);

private int summarySize;
private ArrayList<String> contentNamesList;
private boolean only;

public SummarySize() {
summarySize = DEFAULT_SIZE;
contentNamesList = new ArrayList<>();
only = false;
}

/**
* Returns a new {@link SummarySize} parsed from the string. The provided string is expected to have the format returned by {@link SummarySize#toString()}.
* <ul>
* <li>Given null, null will be returned.</li>
* <li>Given an empty or blank string, an empty {@link SummarySize} will be returned.</li>
* <li>Given {@code 50/ONLY/CONTENT1,CONTENT2}, an {@link SummarySize} will be returned with a size of 50, only using the specified content names, and list
* of content names of (CONTENT1, CONTENT2).
* </ul>
*
* @param string
* the string to parse
* @return the parsed {@link SummarySize}
*/
@JsonCreator
public static SummarySize from(String string) {
if (string == null) {
return null;
}
// Strip whitespaces.
string = PhraseIndexes.whitespacePattern.matcher(string).replaceAll("");

if (string.isEmpty()) {
return new SummarySize();
}

SummarySize summarySize = new SummarySize();

String[] parameterParts = string.split(Constants.FORWARD_SLASH);
// add the size
summarySize.summarySize = Integer.parseInt(parameterParts[0]);
// if 2 parts, assume the second part is a list of content names
if (parameterParts.length == 2) {
Collections.addAll(summarySize.contentNamesList, parameterParts[1].split(Constants.COMMA));
} else if (parameterParts.length >= 3) { // if 3 parts, assume part 2 is "only" and part 3 is a list of content names
if (parameterParts[1].equalsIgnoreCase("ONLY")) {
summarySize.only = true;
}
Collections.addAll(summarySize.contentNamesList, parameterParts[2].split(Constants.COMMA));
}

return summarySize;
}

/**
* Returns a copy of the given {@link SummarySize}
*
* @param other
* the instance to copy
* @return the copy
*/
public static SummarySize copyOf(SummarySize other) {
if (other == null) {
return null;
}
SummarySize summarySize = new SummarySize();
summarySize.summarySize = other.summarySize;
summarySize.contentNamesList = new ArrayList<>(other.contentNamesList);
summarySize.only = other.only;
return summarySize;
}

public List<String> getContentNames() {
return contentNamesList;
}

public int getSummarySize() {
return summarySize;
}

public boolean onlyListedContents() {
return only;
}

public void addContentName(String contentName) {
contentNamesList.add(contentName);
}

public void addContentName(String contentName, int index) {
if (index < contentNamesList.size() && index >= 0) {
contentNamesList.add(index, contentName);
} else {
log.info("index out of bounds, adding to beginning of list");
contentNamesList.add(0, contentName);
}
}

public void addContentNameToBeginning(String contentName) {
contentNamesList.add(0, contentName);
}

/**
* Replace a content name with another content name
*
* @param contentName
* the one to replace
* @param replacement
* the one to replace the other
*/
public void replace(String contentName, String replacement) {
int index = contentNamesList.indexOf(contentName);
if (index != -1) {
contentNamesList.set(index, replacement);
}
}

/**
* Return whether this {@link SummarySize} content names list is empty.
*
* @return true if empty, or false otherwise
*/
public boolean isEmpty() {
return contentNamesList.isEmpty();
}

public String contentNamesListToString() {
if (contentNamesList.isEmpty()) {
return "";
}

StringBuilder sb = new StringBuilder();
for (String contentName : contentNamesList) {
sb.append(contentName).append(Constants.COMMA);
}
return sb.substring(0, sb.length() - 1);
}

public static String[] contentNamesListFromString(String string) {
return string.split(Constants.COMMA);
}

/**
* Returns this {@link SummarySize} as a formatted string that can later be parsed back into a {@link SummarySize} using {@link SummarySize#from(String)}.
* This is also what will be used when serializing a {@link SummarySize} to JSON/XML. The string will have the format
* {@code size/[only]/[contentName1, contentName2, ....]}.
*
* @return a formatted string
*/
@JsonValue
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append(summarySize);
if (only) {
sb.append("/").append("ONLY");
}
if (!contentNamesList.isEmpty()) {
sb.append("/");
for (String contentName : contentNamesList) {
sb.append(contentName).append(Constants.COMMA);
}
return sb.substring(0, sb.length() - 1);
}
return sb.toString();
}

@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
SummarySize that = (SummarySize) o;
return Objects.equals(summarySize, that.summarySize) && Objects.equals(contentNamesList, that.contentNamesList) && Objects.equals(only, that.only);
}

@Override
public int hashCode() {
return Objects.hash(summarySize, contentNamesList, only);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,13 @@
import datawave.query.DocumentSerialization.ReturnType;
import datawave.query.QueryParameters;
import datawave.query.attributes.ExcerptFields;
import datawave.query.attributes.SummarySize;
import datawave.query.attributes.UniqueFields;
import datawave.query.common.grouping.GroupFields;
import datawave.query.function.DocumentPermutation;
import datawave.query.iterator.QueryIterator;
import datawave.query.iterator.ivarator.IvaratorCacheDirConfig;
import datawave.query.iterator.logic.DColumnSummaryIterator;
import datawave.query.iterator.logic.TermFrequencyExcerptIterator;
import datawave.query.jexl.JexlASTHelper;
import datawave.query.jexl.visitors.JexlStringBuildingVisitor;
Expand Down Expand Up @@ -438,6 +440,11 @@ public class ShardQueryConfiguration extends GenericQueryConfiguration implement
// The class for the excerpt iterator
private Class<? extends SortedKeyValueIterator<Key,Value>> excerptIterator = TermFrequencyExcerptIterator.class;

private SummarySize summarySize = new SummarySize();

// The class for the summary iterator
private Class<? extends SortedKeyValueIterator<Key,Value>> summaryIterator = DColumnSummaryIterator.class;

/**
* A bloom filter to avoid duplicate results if needed
*/
Expand Down Expand Up @@ -734,6 +741,8 @@ public void copyFrom(ShardQueryConfiguration other) {
this.setStrictFields(other.getStrictFields());
this.setExcerptFields(ExcerptFields.copyOf(other.getExcerptFields()));
this.setExcerptIterator(other.getExcerptIterator());
this.setSummarySize(SummarySize.copyOf(other.getSummarySize()));
this.setSummaryIterator(other.getSummaryIterator());
this.setFiFieldSeek(other.getFiFieldSeek());
this.setFiNextSeek(other.getFiNextSeek());
this.setEventFieldSeek(other.getEventFieldSeek());
Expand Down Expand Up @@ -2611,6 +2620,24 @@ public void setExcerptIterator(Class<? extends SortedKeyValueIterator<Key,Value>
this.excerptIterator = excerptIterator;
}

public SummarySize getSummarySize() {
return summarySize;
}

public void setSummarySize(SummarySize summarySize) {
if (summarySize != null) {
this.summarySize = summarySize;
}
}

public Class<? extends SortedKeyValueIterator<Key,Value>> getSummaryIterator() {
return summaryIterator;
}

public void setSummaryIterator(Class<? extends SortedKeyValueIterator<Key,Value>> summaryIterator) {
this.summaryIterator = summaryIterator;
}

public int getFiFieldSeek() {
return fiFieldSeek;
}
Expand Down Expand Up @@ -2990,6 +3017,7 @@ public boolean equals(Object o) {
Objects.equals(getLenientFields(), that.getLenientFields()) &&
Objects.equals(getStrictFields(), that.getStrictFields()) &&
Objects.equals(getExcerptFields(), that.getExcerptFields()) &&
Objects.equals(getSummarySize(), that.getSummarySize()) &&
getFiFieldSeek() == that.getFiFieldSeek() &&
getFiNextSeek() == that.getFiNextSeek() &&
getEventFieldSeek() == that.getEventFieldSeek() &&
Expand Down Expand Up @@ -3195,6 +3223,7 @@ public int hashCode() {
getLenientFields(),
getStrictFields(),
getExcerptFields(),
getSummarySize(),
getFiFieldSeek(),
getFiNextSeek(),
getEventFieldSeek(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@
import datawave.query.tracking.ActiveQuery;
import datawave.query.tracking.ActiveQueryLog;
import datawave.query.transformer.ExcerptTransform;
import datawave.query.transformer.SummaryTransform;
import datawave.query.transformer.UniqueTransform;
import datawave.query.util.EmptyContext;
import datawave.query.util.EntryToTuple;
Expand Down Expand Up @@ -203,6 +204,8 @@ public class QueryIterator extends QueryOptions implements YieldingKeyValueItera

protected ExcerptTransform excerptTransform = null;

protected SummaryTransform summaryTransform = null;

protected RangeProvider rangeProvider;

public QueryIterator() {}
Expand Down Expand Up @@ -830,6 +833,11 @@ public Entry<DocumentData,Document> apply(@Nullable Entry<Key,Document> input) {
documents = excerptTransform.getIterator(documents);
}

SummaryTransform summaryTransform = getSummaryTransform();
if (summaryTransform != null) {
documents = summaryTransform.getIterator(documents);
}

// a hook to allow mapping the document such as with the TLD or Parent
// query logics
// or if the document was not aggregated in the first place because the
Expand Down Expand Up @@ -1625,6 +1633,22 @@ protected ExcerptTransform getExcerptTransform() {
return excerptTransform;
}

protected SummaryTransform getSummaryTransform() {
if (summaryTransform == null && getSummarySize() != null) {
synchronized (getSummarySize()) {
if (summaryTransform == null) {
try {
summaryTransform = new SummaryTransform(summarySize, myEnvironment, sourceForDeepCopies.deepCopy(myEnvironment),
summaryIterator.getDeclaredConstructor().newInstance());
} catch (NoSuchMethodException | InstantiationException | IllegalAccessException | InvocationTargetException e) {
throw new RuntimeException("Could not create summary transform", e);
}
}
}
}
return summaryTransform;
}

/**
* Get a default implementation of a {@link RangeProvider}
*
Expand Down
Loading

0 comments on commit 239efc1

Please sign in to comment.