-
Notifications
You must be signed in to change notification settings - Fork 16
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add no-index support for Shredder (part 1) #786
Changes from all commits
4f5805f
2568238
466f238
d65e9b7
4d105cd
29e3f88
be57387
57f3a05
da61890
ce8c165
1ff103e
7cfffa3
028ff0b
a03a31b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,13 +10,16 @@ | |
import com.fasterxml.jackson.core.JsonProcessingException; | ||
import com.fasterxml.jackson.databind.JsonNode; | ||
import com.fasterxml.jackson.databind.ObjectMapper; | ||
import com.google.common.base.Suppliers; | ||
import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants; | ||
import io.stargate.sgv2.jsonapi.exception.ErrorCode; | ||
import io.stargate.sgv2.jsonapi.exception.JsonApiException; | ||
import io.stargate.sgv2.jsonapi.service.projection.DocumentProjector; | ||
import java.util.HashSet; | ||
import java.util.Map; | ||
import java.util.Optional; | ||
import java.util.Set; | ||
import java.util.function.Supplier; | ||
|
||
/** | ||
* Refactored as seperate class that represent a collection property. | ||
|
@@ -37,7 +40,35 @@ public record CollectionSettings( | |
String modelName, | ||
IndexingConfig indexingConfig) { | ||
|
||
public record IndexingConfig(Set<String> allowed, Set<String> denied) { | ||
private static final CollectionSettings EMPTY = | ||
new CollectionSettings("", false, 0, null, null, null, null); | ||
|
||
public static CollectionSettings empty() { | ||
return EMPTY; | ||
} | ||
|
||
public DocumentProjector indexingProjector() { | ||
// IndexingConfig null if no indexing definitions: default, index all: | ||
if (indexingConfig == null) { | ||
return DocumentProjector.identityProjector(); | ||
} | ||
// otherwise get lazily initialized indexing projector from config | ||
return indexingConfig.indexingProjector(); | ||
} | ||
|
||
public record IndexingConfig( | ||
Set<String> allowed, Set<String> denied, Supplier<DocumentProjector> indexedProject) { | ||
public IndexingConfig(Set<String> allowed, Set<String> denied) { | ||
this( | ||
allowed, | ||
denied, | ||
Suppliers.memoize(() -> DocumentProjector.createForIndexing(allowed, denied))); | ||
} | ||
|
||
public DocumentProjector indexingProjector() { | ||
return indexedProject.get(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This does build the Projector for every call. Is the idea to get this once use it in code passing everywhere? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, it uses Guava's memoize() to avoid just that. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll try to come up with a test to verify that we only get one instance; maybe There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, easier to verify with stand-alone unit test: see |
||
} | ||
|
||
public static IndexingConfig fromJson(JsonNode jsonNode) { | ||
Set<String> allowed = new HashSet<>(); | ||
Set<String> denied = new HashSet<>(); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,8 +7,10 @@ | |
import io.stargate.sgv2.jsonapi.exception.JsonApiException; | ||
import java.math.BigDecimal; | ||
import java.util.ArrayList; | ||
import java.util.Collections; | ||
import java.util.List; | ||
import java.util.Objects; | ||
import java.util.Set; | ||
|
||
/** | ||
* Helper class that implements functionality needed to support projections on documents fetched via | ||
|
@@ -48,7 +50,7 @@ public static DocumentProjector createFromDefinition( | |
JsonNode projectionDefinition, boolean includeSimilarity) { | ||
if (projectionDefinition == null) { | ||
if (includeSimilarity) { | ||
return IDENTITY_PROJECTOR_WITH_SIMILARITY; | ||
return identityProjectorWithSimilarity(); | ||
} else { | ||
return identityProjector(); | ||
} | ||
|
@@ -63,12 +65,45 @@ public static DocumentProjector createFromDefinition( | |
return PathCollector.collectPaths(projectionDefinition, includeSimilarity).buildProjector(); | ||
} | ||
|
||
public static DocumentProjector createForIndexing(Set<String> allowed, Set<String> denied) { | ||
// Sets are expected to be validated to have one of 3 main cases: | ||
// 1. Non-empty "allowed" (but empty/null "denied") -> build inclusion projection | ||
// 2. Non-empty "denied" (but empty/null "allowed") -> build exclusion projection | ||
// 3. Empty/null "allowed" and "denied" -> return identity projection | ||
// as well as 2 special cases: | ||
// 4. Empty "allowed" and single "*" entry for "denied" -> return exclude-all projection | ||
// 5. Empty "deny" and single "*" entry for "allowed" -> return include-all ("identity") | ||
// projection | ||
// We need not (and should not) do further validation here. | ||
// Note that (5) is effectively same as (3) and included for sake of uniformity | ||
if (allowed != null && !allowed.isEmpty()) { | ||
// (special) Case 5: | ||
if (allowed.size() == 1 && allowed.contains("*")) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. allowed can't have There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That is something I wanted to discuss (should have brought up) -- for consistency it would make sense to allow this. But for minimal approach not. Will ask on Stargate channel. My main concern is if users would pass this, assuming it works, and it meaning something totally different. |
||
return identityProjector(); | ||
} | ||
// Case 1: inclusion-based projection | ||
return new DocumentProjector(ProjectionLayer.buildLayersOverlapOk(allowed), true, false); | ||
} | ||
if (denied != null && !denied.isEmpty()) { | ||
// (special) Case 4: | ||
if (denied.size() == 1 && denied.contains("*")) { | ||
// Basically inclusion projector with nothing to include | ||
return new DocumentProjector( | ||
ProjectionLayer.buildLayersOverlapOk(Collections.emptySet()), true, false); | ||
} | ||
// Case 2: exclusion-based projection | ||
return new DocumentProjector(ProjectionLayer.buildLayersOverlapOk(denied), false, false); | ||
} | ||
// Case 3: include-all (identity) projection | ||
return identityProjector(); | ||
} | ||
|
||
public static DocumentProjector identityProjector() { | ||
return IDENTITY_PROJECTOR; | ||
} | ||
|
||
public static DocumentProjector getIdentityProjectorWithSimilarity() { | ||
return IDENTITY_PROJECTOR; | ||
public static DocumentProjector identityProjectorWithSimilarity() { | ||
return IDENTITY_PROJECTOR_WITH_SIMILARITY; | ||
} | ||
|
||
public boolean isInclusion() { | ||
|
@@ -102,6 +137,32 @@ public void applyProjection(JsonNode document, Float similarityScore) { | |
} | ||
} | ||
|
||
/** | ||
* Method to call to check if given path (dotted path, that is, dot-separated segments) would be | ||
* included by this Projection. That is, either | ||
* | ||
* <ul> | ||
* <li>This is inclusion projection, and path is covered by an inclusion path | ||
* <li>This is exclusion projection, and path is NOT covered by any exclusion path | ||
* </ul> | ||
* | ||
* @param path Dotted path (possibly nested) to check | ||
* @return {@code true} if path is included; {@code false} if not. | ||
*/ | ||
public boolean isPathIncluded(String path) { | ||
// First: if we have no layers, we are identity projector and include everything | ||
if (rootLayer == null) { | ||
return true; | ||
} | ||
// Otherwise need to split path, evaluate; but note reversal wrt include/exclude | ||
// projections | ||
if (inclusion) { | ||
return rootLayer.isPathIncluded(path); | ||
} else { | ||
return !rootLayer.isPathIncluded(path); | ||
} | ||
} | ||
|
||
// Mostly for deserialization tests | ||
@Override | ||
public boolean equals(Object o) { | ||
|
@@ -151,13 +212,13 @@ public DocumentProjector buildProjector() { | |
if (inclusions > 0) { // inclusion-based projection | ||
// doc-id included unless explicitly excluded | ||
return new DocumentProjector( | ||
ProjectionLayer.buildLayers(paths, slices, !Boolean.FALSE.equals(idInclusion)), | ||
ProjectionLayer.buildLayersNoOverlap(paths, slices, !Boolean.FALSE.equals(idInclusion)), | ||
true, | ||
includeSimilarityScore); | ||
} else { // exclusion-based | ||
// doc-id excluded only if explicitly excluded | ||
return new DocumentProjector( | ||
ProjectionLayer.buildLayers(paths, slices, Boolean.FALSE.equals(idInclusion)), | ||
ProjectionLayer.buildLayersNoOverlap(paths, slices, Boolean.FALSE.equals(idInclusion)), | ||
false, | ||
includeSimilarityScore); | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is done so that
DocumentProjector
is only created first time it is needed (if at all) and reused if used in future (asCollectionSettings
are effectively cached)