Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

vectorization on demand #1258

Merged
merged 37 commits into from
Jul 29, 2024
Merged
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
4581baa
init
Yuqi-Du Jun 14, 2024
77415ad
fix
Yuqi-Du Jun 14, 2024
5fecd64
Merge remote-tracking branch 'origin/main' into fix-update-vectorize
Yuqi-Du Jun 17, 2024
8a99d28
fix
Yuqi-Du Jun 17, 2024
3e296e8
Merge remote-tracking branch 'origin/main' into fix-update-vectorize
Yuqi-Du Jun 26, 2024
94ded0e
Merge remote-tracking branch 'origin/main' into fix-update-vectorize
Yuqi-Du Jul 8, 2024
7d27b2a
added logic to check if there is $vectorize text diff
Yuqi-Du Jul 8, 2024
e961200
delete two vectorize update unit tests
Yuqi-Du Jul 8, 2024
3e681ba
Merge branch 'main' into fix-update-vectorize
Yuqi-Du Jul 8, 2024
9a5aafc
Merge branch 'main' into fix-update-vectorize
Yuqi-Du Jul 8, 2024
3f332bc
Merge branch 'main' into fix-update-vectorize
Yuqi-Du Jul 9, 2024
a35e328
Merge remote-tracking branch 'origin/main' into fix-update-vectorize
Yuqi-Du Jul 9, 2024
5073fc2
refactor
Yuqi-Du Jul 10, 2024
f50d64a
Merge remote-tracking branch 'origin/fix-update-vectorize' into fix-u…
Yuqi-Du Jul 10, 2024
1cb614d
Merge remote-tracking branch 'origin/main' into fix-update-vectorize
Yuqi-Du Jul 10, 2024
b1b5ad7
fix
Yuqi-Du Jul 10, 2024
95ed7d9
Merge remote-tracking branch 'origin/main' into fix-update-vectorize
Yuqi-Du Jul 10, 2024
62ce663
fix
Yuqi-Du Jul 10, 2024
4f1d47c
add unset test
Yuqi-Du Jul 10, 2024
6887ac3
Merge remote-tracking branch 'origin/main' into fix-update-vectorize
Yuqi-Du Jul 11, 2024
930dc2e
merge from main
Yuqi-Du Jul 11, 2024
a65880c
fix IT
Yuqi-Du Jul 11, 2024
fa810c1
Merge remote-tracking branch 'origin/main' into fix-update-vectorize
Yuqi-Du Jul 11, 2024
6710a6b
Merge remote-tracking branch 'origin/main' into fix-update-vectorize
Yuqi-Du Jul 16, 2024
d51ff20
Merge remote-tracking branch 'origin/main' into fix-update-vectorize
Yuqi-Du Jul 16, 2024
2e3e7f6
refactor
Yuqi-Du Jul 16, 2024
f59ed44
Merge branch 'main' into fix-update-vectorize
Yuqi-Du Jul 16, 2024
fc5b0fc
fix
Yuqi-Du Jul 16, 2024
55f1c25
Merge remote-tracking branch 'origin/main' into fix-update-vectorize
Yuqi-Du Jul 22, 2024
7d3e0df
fix comments
Yuqi-Du Jul 22, 2024
2eec55b
fix comments
Yuqi-Du Jul 22, 2024
f2f6a21
fix IT
Yuqi-Du Jul 22, 2024
2b744b4
Merge remote-tracking branch 'origin/main' into fix-update-vectorize
Yuqi-Du Jul 23, 2024
577703f
merged from tables, fix conflicts
Yuqi-Du Jul 23, 2024
3e4fe40
Merge branch 'main' into fix-update-vectorize
Yuqi-Du Jul 24, 2024
247742c
Merge remote-tracking branch 'origin/main' into fix-update-vectorize
Yuqi-Du Jul 26, 2024
ca9d04f
fix comments
Yuqi-Du Jul 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ private static boolean hasModifier(ObjectNode node) {
}

@Override
public boolean updateDocument(ObjectNode doc) {
public UpdateOperationResult updateDocument(ObjectNode doc) {
boolean modified = false;
for (Action action : actions) {
PathMatch target = action.locator().findOrCreate(doc);
Expand Down Expand Up @@ -136,7 +136,7 @@ public boolean updateDocument(ObjectNode doc) {
}
}

return modified;
return new UpdateOperationResult(modified, null);
}

private boolean addToSet(ArrayNode set, JsonNode elementToAdd) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ private static void verifyIsTrueOrDate(JsonNode value) {
}

@Override
public boolean updateDocument(ObjectNode doc) {
public UpdateOperationResult updateDocument(ObjectNode doc) {
boolean modified = false;
final long now = System.currentTimeMillis();
ObjectNode newValue = JsonUtil.createEJSonDate(doc, now);
Expand All @@ -66,7 +66,7 @@ public boolean updateDocument(ObjectNode doc) {
modified = true;
}
}
return modified;
return new UpdateOperationResult(modified, null);
}

record Action(PathMatchLocator locator) implements ActionWithLocator {}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package io.stargate.sgv2.jsonapi.api.model.command.clause.update;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.JsonNodeFactory;
import com.fasterxml.jackson.databind.node.ObjectNode;
import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants;

public record EmbeddingUpdateOperation(String vectorizeContent) {
// UpdateOperation.UpdateOperationResult<? extends ActionWithLocator>
// updateDocument(ObjectNode doc, float[] vector)

/**
* // TODO 一定会换的,因为我们已经知道 vectorize 有 diff了 , 而且你已经拿到vector array了
*
* <p>update the document with corresponding vector
*
* @param doc Document to update
* @param dataVectorizer dataVectorizer
* @return Uni<Boolean> modified
*/
public void updateDocument(JsonNode doc, float[] vector) {
// TODO can I do this instancitation?
ObjectMapper objectMapper = new ObjectMapper();
JsonNodeFactory nodeFactory = objectMapper.getNodeFactory();
final JsonNode vectorJsonNode = doc.get(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD);
final ArrayNode arrayNode = nodeFactory.arrayNode(vector.length);
for (float listValue : vector) {
arrayNode.add(nodeFactory.numberNode(listValue));
}
((ObjectNode) doc).put(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD, arrayNode);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public static IncOperation construct(ObjectNode args) {
}

@Override
public boolean updateDocument(ObjectNode doc) {
public UpdateOperationResult updateDocument(ObjectNode doc) {
// Almost always changes, except if adding zero; need to track
boolean modified = false;
for (Action action : actions) {
Expand Down Expand Up @@ -77,7 +77,7 @@ public boolean updateDocument(ObjectNode doc) {
}
}

return modified;
return new UpdateOperationResult(modified, null);
}

private JsonNode addNumbers(ObjectNode doc, NumericNode nr1, NumericNode nr2) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ private static MinMaxOperation construct(ObjectNode args, UpdateOperator oper, b
}

@Override
public boolean updateDocument(ObjectNode doc) {
public UpdateOperationResult updateDocument(ObjectNode doc) {
// Almost always changes, except if adding zero; need to track
boolean modified = false;
for (Action action : actions) {
Expand All @@ -66,7 +66,7 @@ public boolean updateDocument(ObjectNode doc) {
}
}

return modified;
return new UpdateOperationResult(modified, null);
}

private boolean shouldReplace(JsonNode oldValue, JsonNode newValue) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ public static MulOperation construct(ObjectNode args) {
}

@Override
public boolean updateDocument(ObjectNode doc) {
public UpdateOperationResult updateDocument(ObjectNode doc) {
boolean modified = false;
for (Action action : actions) {
final NumericNode multiplier = action.value;
Expand Down Expand Up @@ -76,7 +76,7 @@ public boolean updateDocument(ObjectNode doc) {
}
}

return modified;
return new UpdateOperationResult(modified, null);
}

private JsonNode multiply(ObjectNode doc, JsonNode oldValue, JsonNode multiplierValue) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public static PopOperation construct(ObjectNode args) {
}

@Override
public boolean updateDocument(ObjectNode doc) {
public UpdateOperationResult updateDocument(ObjectNode doc) {
boolean changes = false;
for (Action action : actions) {
PathMatch target = action.locator().findIfExists(doc);
Expand Down Expand Up @@ -89,7 +89,7 @@ public boolean updateDocument(ObjectNode doc) {
+ value.getNodeType());
}
}
return changes;
return new UpdateOperationResult(changes, null);
}

/** Value class for per-field Pop operation definitions. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ private static boolean hasModifier(ObjectNode node) {
}

@Override
public boolean updateDocument(ObjectNode doc) {
public UpdateOperationResult updateDocument(ObjectNode doc) {
for (Action action : actions) {
final JsonNode toAdd = action.value;

Expand Down Expand Up @@ -174,7 +174,7 @@ public boolean updateDocument(ObjectNode doc) {
}

// Every valid update operation modifies document so need just one:
return !actions.isEmpty();
return new UpdateOperationResult(!actions.isEmpty(), null);
}

// Just needed for tests
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ public static RenameOperation construct(ObjectNode args) {
}

@Override
public boolean updateDocument(ObjectNode doc) {
public UpdateOperationResult updateDocument(ObjectNode doc) {
boolean modified = false;
for (Action action : actions) {
PathMatch src = action.sourceLocator().findIfExists(doc);
Expand Down Expand Up @@ -78,7 +78,7 @@ public boolean updateDocument(ObjectNode doc) {
dst.replaceValue(value);
}
}
return modified;
return new UpdateOperationResult(modified, null);
}

// Unlike most operations, we have 2 locators (src, dest), use explicit names
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants;
import io.stargate.sgv2.jsonapi.exception.ErrorCode;
import io.stargate.sgv2.jsonapi.util.JsonUtil;
import io.stargate.sgv2.jsonapi.util.PathMatch;
import io.stargate.sgv2.jsonapi.util.PathMatchLocator;
Expand Down Expand Up @@ -73,27 +74,47 @@ public boolean shouldApplyIf(boolean isInsert) {
}

@Override
public boolean updateDocument(ObjectNode doc) {
public UpdateOperationResult updateDocument(ObjectNode doc) {
Yuqi-Du marked this conversation as resolved.
Show resolved Hide resolved
boolean modified = false;
Set<String> setPaths = new HashSet<>();
actions.stream().forEach(action -> setPaths.add(action.locator().path()));
EmbeddingUpdateOperation embeddingUpdateOperation = null;

for (Action action : actions) {
PathMatch target = action.locator().findOrCreate(doc);
JsonNode newValue = action.value();
JsonNode oldValue = target.valueNode();
// Modify if no old value OR new value differs, as per Mongo-equality rules
if ((oldValue == null) || !JsonUtil.equalsOrdered(oldValue, newValue)) {
// replace old value with matched path new value
target.replaceValue(newValue);
// $vector is updated and $vectorize is not updated, remove the $vectorize field in the
// document

// $vector is updated and $vectorize is not updated, remove $vectorize in the document
if (DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD.equals(action.locator().path())
&& !setPaths.contains(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD)) {
doc.remove(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD);
}

// $vectorize
if (DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD.equals(action.locator().path())) {
if (newValue.isNull()) {
// if $vectorize is null value, update $vector as null
doc.put(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD, (String) null);
Yuqi-Du marked this conversation as resolved.
Show resolved Hide resolved
} else if (!newValue.isTextual()) {
// if $vectorize is not textual value
throw ErrorCode.INVALID_VECTORIZE_VALUE_TYPE.toApiException();
} else if (newValue.asText().isBlank()) {
// $vectorize is blank text value, set $vector as null value, no need to vectorize
doc.put(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD, (String) null);
Yuqi-Du marked this conversation as resolved.
Show resolved Hide resolved
} else {
// if $vectorize is textual and not blank, create embeddingUpdateOperation
embeddingUpdateOperation = new EmbeddingUpdateOperation(newValue.asText());
}
}
modified = true;
}
}
return modified;
return new UpdateOperationResult(modified, embeddingUpdateOperation);
}

// Needed because some unit tests check for equality
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public static UnsetOperation construct(ObjectNode args) {
}

@Override
public boolean updateDocument(ObjectNode doc) {
public UpdateOperationResult updateDocument(ObjectNode doc) {
boolean modified = false;
Set<String> unsetPaths = new HashSet<>();
actions.stream().forEach(action -> unsetPaths.add(action.locator().path()));
Expand All @@ -40,7 +40,11 @@ public boolean updateDocument(ObjectNode doc) {
if (modified && unsetPaths.contains(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD)) {
doc.remove(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD);
}
return modified;
// $vectorize field is unset, remove $vector field value
if (modified && unsetPaths.contains(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD)) {
doc.remove(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD);
}
return new UpdateOperationResult(modified, null);
}

record Action(PathMatchLocator locator) implements ActionWithLocator {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public List<A> actions() {
* @param doc Document to apply operation to
* @return True if document was modified by operation; false if not.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Update the java doc here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Solved

*/
public abstract boolean updateDocument(ObjectNode doc);
public abstract UpdateOperationResult updateDocument(ObjectNode doc);

/**
* Method called to see if update operator should be applied for specific kind of update:
Expand Down Expand Up @@ -110,4 +110,7 @@ public int compare(ActionWithLocator o1, ActionWithLocator o2) {
return o1.path().compareTo(o2.path());
}
}

public record UpdateOperationResult(
Yuqi-Du marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Java doc?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Solved

boolean modified, EmbeddingUpdateOperation embeddingUpdateOperation) {}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,11 @@
import com.fasterxml.jackson.databind.node.ObjectNode;
import io.stargate.sgv2.jsonapi.api.model.command.clause.update.UpdateClause;
import io.stargate.sgv2.jsonapi.api.model.command.clause.update.UpdateOperator;
import io.stargate.sgv2.jsonapi.config.constants.DocumentConstants;
import io.stargate.sgv2.jsonapi.exception.ErrorCode;
import io.stargate.sgv2.jsonapi.exception.JsonApiException;
import java.io.IOException;
import java.util.EnumMap;
import java.util.Iterator;
import java.util.Map;
import java.util.*;

/** {@link StdDeserializer} for the {@link UpdateClause}. */
public class UpdateClauseDeserializer extends StdDeserializer<UpdateClause> {
Expand Down Expand Up @@ -55,6 +54,22 @@ public UpdateClause deserialize(
}
updateDefs.put(oper, (ObjectNode) operationArg);
}
validateUpdateDefs(updateDefs);
return new UpdateClause(updateDefs);
}

public void validateUpdateDefs(EnumMap<UpdateOperator, ObjectNode> updateDefs) {
// check1: can not unset $vectorize and $vector at the same time
List<ObjectNode> checkUpdateOperationNodes = new ArrayList<>();
checkUpdateOperationNodes.add(updateDefs.get(UpdateOperator.UNSET));
checkUpdateOperationNodes.add(updateDefs.get(UpdateOperator.SET));
for (ObjectNode checkUpdateOperationNode : checkUpdateOperationNodes) {
if (checkUpdateOperationNode != null
&& checkUpdateOperationNode.has(DocumentConstants.Fields.VECTOR_EMBEDDING_TEXT_FIELD)) {
if (checkUpdateOperationNode.has(DocumentConstants.Fields.VECTOR_EMBEDDING_FIELD)) {
throw new JsonApiException(ErrorCode.INVALID_USAGE_OF_VECTORIZE);
}
}
}
}
}
Loading
Loading