-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Search Pipelines] Add request-scoped state shared between processors (…
…#9405) To handle cases where multiple search pipeline processors need to share information, we will allocate a context holder for the lifetime of the request and pass it to each processor to get/set values. To explain this behavior and benefit from it, this change also introduces three new processors: 1. The "oversample" request processor that increases "size", storing the original size in the context. 2. The "truncate" response processor that discards results after some number, by default using the original size before oversampling. 3. The "collapse" response processor offers similar behavior to a collapse query, discarding results that have a field value in common with a higher-scoring result. Signed-off-by: Michael Froh <froh@amazon.com>
- Loading branch information
Showing
30 changed files
with
1,444 additions
and
464 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
126 changes: 126 additions & 0 deletions
126
.../search-pipeline-common/src/main/java/org/opensearch/search/pipeline/common/BasicMap.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* The OpenSearch Contributors require contributions made to | ||
* this file be licensed under the Apache-2.0 license or a | ||
* compatible open source license. | ||
*/ | ||
|
||
package org.opensearch.search.pipeline.common; | ||
|
||
import java.util.Collection; | ||
import java.util.Map; | ||
import java.util.Set; | ||
import java.util.function.BiConsumer; | ||
import java.util.function.BiFunction; | ||
import java.util.function.Function; | ||
|
||
/** | ||
* Helper for map abstractions passed to scripting processors. Throws {@link UnsupportedOperationException} for almost | ||
* all methods. Subclasses just need to implement get and put. | ||
*/ | ||
abstract class BasicMap implements Map<String, Object> { | ||
|
||
/** | ||
* No-args constructor. | ||
*/ | ||
protected BasicMap() {} | ||
|
||
private static final String UNSUPPORTED_OP_ERR = " Method not supported in Search pipeline script"; | ||
|
||
@Override | ||
public boolean isEmpty() { | ||
throw new UnsupportedOperationException("isEmpty" + UNSUPPORTED_OP_ERR); | ||
} | ||
|
||
public int size() { | ||
throw new UnsupportedOperationException("size" + UNSUPPORTED_OP_ERR); | ||
} | ||
|
||
public boolean containsKey(Object key) { | ||
return get(key) != null; | ||
} | ||
|
||
public boolean containsValue(Object value) { | ||
throw new UnsupportedOperationException("containsValue" + UNSUPPORTED_OP_ERR); | ||
} | ||
|
||
public Object remove(Object key) { | ||
throw new UnsupportedOperationException("remove" + UNSUPPORTED_OP_ERR); | ||
} | ||
|
||
public void putAll(Map<? extends String, ?> m) { | ||
throw new UnsupportedOperationException("putAll" + UNSUPPORTED_OP_ERR); | ||
} | ||
|
||
public void clear() { | ||
throw new UnsupportedOperationException("clear" + UNSUPPORTED_OP_ERR); | ||
} | ||
|
||
public Set<String> keySet() { | ||
throw new UnsupportedOperationException("keySet" + UNSUPPORTED_OP_ERR); | ||
} | ||
|
||
public Collection<Object> values() { | ||
throw new UnsupportedOperationException("values" + UNSUPPORTED_OP_ERR); | ||
} | ||
|
||
public Set<Map.Entry<String, Object>> entrySet() { | ||
throw new UnsupportedOperationException("entrySet" + UNSUPPORTED_OP_ERR); | ||
} | ||
|
||
@Override | ||
public Object getOrDefault(Object key, Object defaultValue) { | ||
throw new UnsupportedOperationException("getOrDefault" + UNSUPPORTED_OP_ERR); | ||
} | ||
|
||
@Override | ||
public void forEach(BiConsumer<? super String, ? super Object> action) { | ||
throw new UnsupportedOperationException("forEach" + UNSUPPORTED_OP_ERR); | ||
} | ||
|
||
@Override | ||
public void replaceAll(BiFunction<? super String, ? super Object, ?> function) { | ||
throw new UnsupportedOperationException("replaceAll" + UNSUPPORTED_OP_ERR); | ||
} | ||
|
||
@Override | ||
public Object putIfAbsent(String key, Object value) { | ||
throw new UnsupportedOperationException("putIfAbsent" + UNSUPPORTED_OP_ERR); | ||
} | ||
|
||
@Override | ||
public boolean remove(Object key, Object value) { | ||
throw new UnsupportedOperationException("remove" + UNSUPPORTED_OP_ERR); | ||
} | ||
|
||
@Override | ||
public boolean replace(String key, Object oldValue, Object newValue) { | ||
throw new UnsupportedOperationException("replace" + UNSUPPORTED_OP_ERR); | ||
} | ||
|
||
@Override | ||
public Object replace(String key, Object value) { | ||
throw new UnsupportedOperationException("replace" + UNSUPPORTED_OP_ERR); | ||
} | ||
|
||
@Override | ||
public Object computeIfAbsent(String key, Function<? super String, ?> mappingFunction) { | ||
throw new UnsupportedOperationException("computeIfAbsent" + UNSUPPORTED_OP_ERR); | ||
} | ||
|
||
@Override | ||
public Object computeIfPresent(String key, BiFunction<? super String, ? super Object, ?> remappingFunction) { | ||
throw new UnsupportedOperationException("computeIfPresent" + UNSUPPORTED_OP_ERR); | ||
} | ||
|
||
@Override | ||
public Object compute(String key, BiFunction<? super String, ? super Object, ?> remappingFunction) { | ||
throw new UnsupportedOperationException("compute" + UNSUPPORTED_OP_ERR); | ||
} | ||
|
||
@Override | ||
public Object merge(String key, Object value, BiFunction<? super Object, ? super Object, ?> remappingFunction) { | ||
throw new UnsupportedOperationException("merge" + UNSUPPORTED_OP_ERR); | ||
} | ||
} |
122 changes: 122 additions & 0 deletions
122
...common/src/main/java/org/opensearch/search/pipeline/common/CollapseResponseProcessor.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* The OpenSearch Contributors require contributions made to | ||
* this file be licensed under the Apache-2.0 license or a | ||
* compatible open source license. | ||
*/ | ||
|
||
package org.opensearch.search.pipeline.common; | ||
|
||
import org.opensearch.action.search.SearchRequest; | ||
import org.opensearch.action.search.SearchResponse; | ||
import org.opensearch.common.document.DocumentField; | ||
import org.opensearch.ingest.ConfigurationUtils; | ||
import org.opensearch.search.SearchHit; | ||
import org.opensearch.search.SearchHits; | ||
import org.opensearch.search.pipeline.AbstractProcessor; | ||
import org.opensearch.search.pipeline.Processor; | ||
import org.opensearch.search.pipeline.SearchResponseProcessor; | ||
import org.opensearch.search.pipeline.common.helpers.SearchResponseUtil; | ||
|
||
import java.util.ArrayList; | ||
import java.util.LinkedHashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Objects; | ||
|
||
/** | ||
* A simple implementation of field collapsing on search responses. Note that this is not going to work as well as | ||
* field collapsing at the shard level, as implemented with the "collapse" parameter in a search request. Mostly | ||
* just using this to demo the oversample / truncate_hits processors. | ||
*/ | ||
public class CollapseResponseProcessor extends AbstractProcessor implements SearchResponseProcessor { | ||
/** | ||
* Key to reference this processor type from a search pipeline. | ||
*/ | ||
public static final String TYPE = "collapse"; | ||
static final String COLLAPSE_FIELD = "field"; | ||
private final String collapseField; | ||
|
||
private CollapseResponseProcessor(String tag, String description, boolean ignoreFailure, String collapseField) { | ||
super(tag, description, ignoreFailure); | ||
this.collapseField = Objects.requireNonNull(collapseField); | ||
} | ||
|
||
@Override | ||
public String getType() { | ||
return TYPE; | ||
} | ||
|
||
@Override | ||
public SearchResponse processResponse(SearchRequest request, SearchResponse response) { | ||
|
||
if (response.getHits() != null) { | ||
if (response.getHits().getCollapseField() != null) { | ||
throw new IllegalStateException( | ||
"Cannot collapse on " + collapseField + ". Results already collapsed on " + response.getHits().getCollapseField() | ||
); | ||
} | ||
Map<String, SearchHit> collapsedHits = new LinkedHashMap<>(); | ||
List<Object> collapseValues = new ArrayList<>(); | ||
for (SearchHit hit : response.getHits()) { | ||
Object fieldValue = null; | ||
DocumentField docField = hit.getFields().get(collapseField); | ||
if (docField != null) { | ||
if (docField.getValues().size() > 1) { | ||
throw new IllegalStateException( | ||
"Failed to collapse " + hit.getId() + ": doc has multiple values for field " + collapseField | ||
); | ||
} | ||
fieldValue = docField.getValues().get(0); | ||
} else if (hit.getSourceAsMap() != null) { | ||
fieldValue = hit.getSourceAsMap().get(collapseField); | ||
} | ||
String fieldValueString; | ||
if (fieldValue == null) { | ||
fieldValueString = "__missing__"; | ||
} else { | ||
fieldValueString = fieldValue.toString(); | ||
} | ||
|
||
// Results are already sorted by sort criterion. Only keep the first hit for each field. | ||
if (collapsedHits.containsKey(fieldValueString) == false) { | ||
collapsedHits.put(fieldValueString, hit); | ||
collapseValues.add(fieldValue); | ||
} | ||
} | ||
SearchHit[] newHits = new SearchHit[collapsedHits.size()]; | ||
int i = 0; | ||
for (SearchHit collapsedHit : collapsedHits.values()) { | ||
newHits[i++] = collapsedHit; | ||
} | ||
SearchHits searchHits = new SearchHits( | ||
newHits, | ||
response.getHits().getTotalHits(), | ||
response.getHits().getMaxScore(), | ||
response.getHits().getSortFields(), | ||
collapseField, | ||
collapseValues.toArray() | ||
); | ||
return SearchResponseUtil.replaceHits(searchHits, response); | ||
} | ||
return response; | ||
} | ||
|
||
static class Factory implements Processor.Factory<SearchResponseProcessor> { | ||
|
||
@Override | ||
public CollapseResponseProcessor create( | ||
Map<String, Processor.Factory<SearchResponseProcessor>> processorFactories, | ||
String tag, | ||
String description, | ||
boolean ignoreFailure, | ||
Map<String, Object> config, | ||
PipelineContext pipelineContext | ||
) { | ||
String collapseField = ConfigurationUtils.readStringProperty(TYPE, tag, config, COLLAPSE_FIELD); | ||
return new CollapseResponseProcessor(tag, description, ignoreFailure, collapseField); | ||
} | ||
} | ||
|
||
} |
83 changes: 83 additions & 0 deletions
83
...ommon/src/main/java/org/opensearch/search/pipeline/common/OversampleRequestProcessor.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* The OpenSearch Contributors require contributions made to | ||
* this file be licensed under the Apache-2.0 license or a | ||
* compatible open source license. | ||
*/ | ||
|
||
package org.opensearch.search.pipeline.common; | ||
|
||
import org.opensearch.action.search.SearchRequest; | ||
import org.opensearch.ingest.ConfigurationUtils; | ||
import org.opensearch.search.SearchService; | ||
import org.opensearch.search.pipeline.AbstractProcessor; | ||
import org.opensearch.search.pipeline.PipelineProcessingContext; | ||
import org.opensearch.search.pipeline.Processor; | ||
import org.opensearch.search.pipeline.SearchRequestProcessor; | ||
import org.opensearch.search.pipeline.StatefulSearchRequestProcessor; | ||
import org.opensearch.search.pipeline.common.helpers.ContextUtils; | ||
|
||
import java.util.Map; | ||
|
||
import static org.opensearch.search.pipeline.common.helpers.ContextUtils.applyContextPrefix; | ||
|
||
/** | ||
* Multiplies the "size" parameter on the {@link SearchRequest} by the given scaling factor, storing the original value | ||
* in the request context as "original_size". | ||
*/ | ||
public class OversampleRequestProcessor extends AbstractProcessor implements StatefulSearchRequestProcessor { | ||
|
||
/** | ||
* Key to reference this processor type from a search pipeline. | ||
*/ | ||
public static final String TYPE = "oversample"; | ||
static final String SAMPLE_FACTOR = "sample_factor"; | ||
static final String ORIGINAL_SIZE = "original_size"; | ||
private final double sampleFactor; | ||
private final String contextPrefix; | ||
|
||
private OversampleRequestProcessor(String tag, String description, boolean ignoreFailure, double sampleFactor, String contextPrefix) { | ||
super(tag, description, ignoreFailure); | ||
this.sampleFactor = sampleFactor; | ||
this.contextPrefix = contextPrefix; | ||
} | ||
|
||
@Override | ||
public SearchRequest processRequest(SearchRequest request, PipelineProcessingContext requestContext) { | ||
if (request.source() != null) { | ||
int originalSize = request.source().size(); | ||
if (originalSize == -1) { | ||
originalSize = SearchService.DEFAULT_SIZE; | ||
} | ||
requestContext.setAttribute(applyContextPrefix(contextPrefix, ORIGINAL_SIZE), originalSize); | ||
int newSize = (int) Math.ceil(originalSize * sampleFactor); | ||
request.source().size(newSize); | ||
} | ||
return request; | ||
} | ||
|
||
@Override | ||
public String getType() { | ||
return TYPE; | ||
} | ||
|
||
static class Factory implements Processor.Factory<SearchRequestProcessor> { | ||
@Override | ||
public OversampleRequestProcessor create( | ||
Map<String, Processor.Factory<SearchRequestProcessor>> processorFactories, | ||
String tag, | ||
String description, | ||
boolean ignoreFailure, | ||
Map<String, Object> config, | ||
PipelineContext pipelineContext | ||
) { | ||
double sampleFactor = ConfigurationUtils.readDoubleProperty(TYPE, tag, config, SAMPLE_FACTOR); | ||
if (sampleFactor < 1.0) { | ||
throw ConfigurationUtils.newConfigurationException(TYPE, tag, SAMPLE_FACTOR, "Value must be >= 1.0"); | ||
} | ||
String contextPrefix = ConfigurationUtils.readOptionalStringProperty(TYPE, tag, config, ContextUtils.CONTEXT_PREFIX_PARAMETER); | ||
return new OversampleRequestProcessor(tag, description, ignoreFailure, sampleFactor, contextPrefix); | ||
} | ||
} | ||
} |
Oops, something went wrong.