Skip to content

Commit

Permalink
recommendation pt2
Browse files Browse the repository at this point in the history
  • Loading branch information
austin007008 committed Dec 9, 2024
1 parent 7533c5e commit d095793
Show file tree
Hide file tree
Showing 6 changed files with 202 additions and 93 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,11 @@ public static SummaryOptions from(String string) {
String[] parts = parameterPart.split(Constants.COLON);
// if we have the "size" option...
if (parts[0].equalsIgnoreCase(SIZE_PARAMETER)) {
summaryOptions.summarySize = Integer.parseInt(parts[1]);
int size = Integer.parseInt(parts[1]);
if (size == 0) {
return new SummaryOptions();
}
summaryOptions.summarySize = size;
}
// if we have the "only" option...
else if (parts[0].equalsIgnoreCase(ONLY_PARAMETER)) {
Expand All @@ -102,6 +106,10 @@ else if (parts[0].equalsIgnoreCase(VIEWS_PARAMETER)) {
}
}
}
// if size was not specified, make it DEFAULT_SIZE
if (summaryOptions.summarySize == 0) {
summaryOptions.summarySize = DEFAULT_SIZE;
}
} catch (Exception e) {
log.warn("Unable to parse summary size string, returning empty SummaryOptions: {}", string, e);
return new SummaryOptions();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@

import datawave.query.Constants;
import datawave.query.attributes.SummaryOptions;
import datawave.query.table.parser.ContentKeyValueFactory;

/**
* This iterator is intended to scan the d column for a specified document. The result will be a summary for each document scanned.
Expand Down Expand Up @@ -261,7 +260,7 @@ public void next() throws IOException {
}

// create the summary
String summary = createSummary(viewSummaryOrder, foundContent, summarySize);
String summary = new SummaryCreator(viewSummaryOrder, foundContent, summarySize).createSummary();
if (summary != null) {
tk = new Key(top.getRow(), new Text(dtUid), new Text(summary), top.getColumnVisibility());
tv = new Value();
Expand All @@ -273,59 +272,6 @@ public void next() throws IOException {
tv = null;
}

/**
* this method attempts to create a summary out of the found views
*
* @param viewSummaryOrder
* the order to check for views. the first one found will have a summary made from it
* @param foundContent
* the map of all the content found for the document
* @param summarySize
* the size in bytes of the summary to create
* @return the created summary
*/
private static String createSummary(List<String> viewSummaryOrder, Map<String,byte[]> foundContent, int summarySize) {
// check each potential view name we could make summaries for
for (String name : viewSummaryOrder) {
if (name.endsWith("*")) {
// strip wildcard from view name
name = name.substring(0, name.length() - 1);
// if we have a view name that matches the list...
Map<String,String> summaries = new HashMap<>();
for (Map.Entry<String,byte[]> entry : foundContent.entrySet()) {
if (entry.getKey().startsWith(name)) {
// decode and decompress the content
String summary = new String(ContentKeyValueFactory.decodeAndDecompressContent(entry.getValue()));
// if the content is longer than the specified length, truncate it
if (summary.length() > summarySize) {
summary = summary.substring(0, summarySize);
}
summaries.put(entry.getKey(), summary);
}
}
if (!summaries.isEmpty()) {
// return the view name and summary separated by null
StringBuilder sb = new StringBuilder();
for (Map.Entry<String,String> entry : summaries.entrySet()) {
sb.append(entry.getKey()).append(": ").append(entry.getValue()).append("\n");
}
return sb.toString().trim();
}
} else {
if (foundContent.containsKey(name)) {
// decode and decompress the content
String summary = new String(ContentKeyValueFactory.decodeAndDecompressContent(foundContent.get(name)));
// if the content is longer than the specified length, truncate it
if (summary.length() > summarySize) {
summary = summary.substring(0, summarySize);
}
return name + ": " + summary;
}
}
}
return null;
}

/**
* Seek to the dt/uid following the one passed in
*
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
package datawave.query.iterator.logic;

import java.util.HashMap;
import java.util.List;
import java.util.Map;

import datawave.query.table.parser.ContentKeyValueFactory;

/**
* This class contains the functionality to generate summaries.
* <p>
* </p>
* Just need to call "createSummary()" after creation.
*/
public class SummaryCreator {
private final List<String> viewSummaryOrder;
Map<String,byte[]> foundContent;
int summarySize;

public SummaryCreator(List<String> viewSummaryOrder, Map<String,byte[]> foundContent, int summarySize) {
this.viewSummaryOrder = viewSummaryOrder;
this.foundContent = foundContent;
this.summarySize = summarySize;
}

/**
* this method attempts to create a summary out of the found views
*
* @return the created summary
*/
public String createSummary() {
// check each potential view name we could make summaries for
for (String name : viewSummaryOrder) {
if (name.endsWith("*")) {
// strip wildcard from view name
name = name.substring(0, name.length() - 1);

String endingWildcardSummary = getEndingWildcardSummary(name, foundContent, summarySize);
if (endingWildcardSummary != null) {
return endingWildcardSummary;
}
} else {
String simpleSummary = getSimpleSummary(name, foundContent, summarySize);
if (simpleSummary != null) {
return simpleSummary;
}
}
}
return null;
}

/** for matching and creating summaries when view names have trailing wildcards */
private static String getEndingWildcardSummary(String currentViewName, Map<String,byte[]> foundContent, int summarySize) {
// if we have a view name that matches the list...
Map<String,String> summaries = new HashMap<>();
for (Map.Entry<String,byte[]> entry : foundContent.entrySet()) {
if (entry.getKey().startsWith(currentViewName)) {
// decode and decompress the content
String summary = new String(ContentKeyValueFactory.decodeAndDecompressContent(entry.getValue()));
// if the content is longer than the specified length, truncate it
if (summary.length() > summarySize) {
summary = summary.substring(0, summarySize);
}
summaries.put(entry.getKey(), summary);
}
}
if (!summaries.isEmpty()) {
// return the view name and summary separated by a new line character
StringBuilder sb = new StringBuilder();
for (Map.Entry<String,String> entry : summaries.entrySet()) {
sb.append(entry.getKey()).append(": ").append(entry.getValue()).append("\n");
}
return sb.toString().trim();
}
return null;
}

/** a straight-up match between view names */
private static String getSimpleSummary(String currentViewName, Map<String,byte[]> foundContent, int summarySize) {
if (foundContent.containsKey(currentViewName)) {
// decode and decompress the content
String summary = new String(ContentKeyValueFactory.decodeAndDecompressContent(foundContent.get(currentViewName)));
// if the content is longer than the specified length, truncate it
if (summary.length() > summarySize) {
summary = summary.substring(0, summarySize);
}
return currentViewName + ": " + summary;
}
return null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1562,7 +1562,7 @@ public void setSummaryIterator(String iteratorClass) {
try {
getConfig().setSummaryIterator((Class<? extends SortedKeyValueIterator<Key,Value>>) Class.forName(iteratorClass));
} catch (Exception e) {
throw new DatawaveFatalQueryException("Illegal d column summary iterator class", e);
throw new DatawaveFatalQueryException("Illegal content summary iterator class", e);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,21 @@
import datawave.query.attributes.DocumentKey;
import datawave.query.attributes.SummaryOptions;
import datawave.query.iterator.logic.ContentSummaryIterator;
import datawave.query.iterator.logic.TermFrequencyExcerptIterator;

/**
* This class is used to add summaries to returned documents when specified.
* <p>
* </p>
* An iterator of type "ContentSummaryIterator" is used to do the summary generation using options from a "SummaryOptions"
*/
public class SummaryTransform extends DocumentTransform.DefaultDocumentTransform {

private static final Logger log = LoggerFactory.getLogger(SummaryTransform.class);

public static final String SUMMARY_ERROR_MESSAGE = "UNABLE TO GENERATE SUMMARY";
private static final String SUMMARY_ERROR_MESSAGE = "UNABLE TO GENERATE SUMMARY";
private static final String SUMMARY_EMPTY_MESSAGE = "NO CONTENT FOUND TO SUMMARIZE";
private static final Summary ERROR_SUMMARY = new Summary(null, SUMMARY_ERROR_MESSAGE);
private static final Summary EMPTY_SUMMARY = new Summary(null, "NO CONTENT FOUND TO SUMMARIZE");
private static final Summary EMPTY_SUMMARY = new Summary(null, SUMMARY_EMPTY_MESSAGE);

private static final String CONTENT_SUMMARY = "CONTENT_SUMMARY";

Expand All @@ -53,17 +59,14 @@ public class SummaryTransform extends DocumentTransform.DefaultDocumentTransform
private final IteratorEnvironment env;
private final SortedKeyValueIterator<Key,Value> source;

public SummaryTransform(SummaryOptions summaryOptions, IteratorEnvironment env, SortedKeyValueIterator<Key,Value> source) {
this(summaryOptions, env, source, new TermFrequencyExcerptIterator());
}

public SummaryTransform(SummaryOptions summaryOptions, IteratorEnvironment env, SortedKeyValueIterator<Key,Value> source,
SortedKeyValueIterator<Key,Value> summaryIterator) {
ArgumentChecker.notNull(summaryOptions);
this.summaryOptions = summaryOptions;
this.env = env;
this.source = source;
this.summaryIterator = (ContentSummaryIterator) summaryIterator;

}

@Nullable
Expand Down Expand Up @@ -204,7 +207,7 @@ private Summary getSummary(Range range, SummaryOptions summaryOptions) {
if (summary.isBlank()) {
if (log.isErrorEnabled()) {
log.error("{} returned top key with blank column qualifier in key: {} when scanning for summary within range {}",
TermFrequencyExcerptIterator.class.getSimpleName(), summaryIterator.getTopKey(), range);
ContentSummaryIterator.class.getSimpleName(), summaryIterator.getTopKey(), range);
}
return ERROR_SUMMARY;
}
Expand Down
Loading

0 comments on commit d095793

Please sign in to comment.