Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

atlas: add couldMatch to QueryIndex #1139

Merged
merged 1 commit into from
Jul 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import java.util.function.Consumer;
import java.util.function.Predicate;

/**
* Simple tree for finding all values associated with a prefix that matches the search
Expand Down Expand Up @@ -278,6 +279,58 @@ private void forEachImpl(Node node, String key, int offset, Consumer<Query.KeyQu
}
}

/**
* Invokes the predicate function for each value associated with a prefix of the search key
* until a match is found.
*
* @param key
* Key to compare against the prefixes.
* @param predicate
* Function to call to see if there is a match.
*/
boolean exists(String key, Predicate<Query.KeyQuery> predicate) {
// In queries cannot have an empty value, so cannot be in the root set
if (exists(otherQueries, predicate)) {
return true;
}
Node node = root;
return node != null && existsImpl(node, key, 0, predicate);
}

private boolean existsImpl(Node node, String key, int offset, Predicate<Query.KeyQuery> predicate) {
final int prefixLength = node.prefix.length();
final int keyLength = key.length() - offset;
final int commonLength = commonPrefixLength(node.prefix, key, offset);

if (commonLength == prefixLength) {
// Prefix matches, consume other queries
if (exists(node.otherQueries, predicate)) {
return true;
}

if (commonLength < keyLength) {
// There is more to the key, check if there are also matches for child nodes
int childOffset = offset + commonLength;
int pos = find(node.children, key, childOffset);
return pos >= 0 && existsImpl(node.children[pos], key, childOffset, predicate);
} else {
// It is an exact match, consume in queries
return true;
}
}

return false;
}

private boolean exists(Set<Query.KeyQuery> qs, Predicate<Query.KeyQuery> predicate) {
for (Query.KeyQuery kq : qs) {
if (predicate.test(kq)) {
return true;
}
}
return false;
}

/**
* Returns true if the tree is empty.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -526,6 +526,68 @@ public void forEachMatch(Function<String, String> tags, Consumer<T> consumer) {
}
}

/**
* Check the set of tags, which could be a partial set, and return true if it is possible
* that it would match some set of expressions. This method can be used as a cheap pre-filter
* check. In some cases this can be useful to avoid expensive transforms to get the final
* set of tags for matching.
*
* @param tags
* Partial set of tags to check against the index. Function is used to look up the
* value for a given tag key. The function should return {@code null} if there is no
* value for the key.
* @return
* True if it is possible there would be a match based on the partial set of tags.
*/
@SuppressWarnings("PMD.NPathComplexity")
public boolean couldMatch(Function<String, String> tags) {
// Matches for this level
if (!matches.isEmpty()) {
return true;
}

boolean keyPresent = false;
if (key != null) {
String v = tags.apply(key);
if (v != null) {
keyPresent = true;

// Check exact matches
QueryIndex<T> eqIdx = equalChecks.get(v);
if (eqIdx != null && eqIdx.couldMatch(tags)) {
return true;
}

// Scan for matches with other conditions
if (!otherChecks.isEmpty()) {
boolean otherMatches = otherChecksTree.exists(v, kq -> {
if (kq instanceof Query.In || couldMatch(kq, v)) {
QueryIndex<T> idx = otherChecks.get(kq);
return idx != null && idx.couldMatch(tags);
}
return false;
});
if (otherMatches) {
return true;
}
}

// Check matches for has key
if (hasKeyIdx != null && hasKeyIdx.couldMatch(tags)) {
return true;
}
}
}

// Check matches with other keys
if (otherKeysIdx != null && otherKeysIdx.couldMatch(tags)) {
return true;
}

// Check matches with missing keys
return !keyPresent;
}

private boolean matches(Query.KeyQuery kq, String value) {
if (kq instanceof Query.Regex) {
Query.Regex re = (Query.Regex) kq;
Expand All @@ -535,6 +597,16 @@ private boolean matches(Query.KeyQuery kq, String value) {
}
}

private boolean couldMatch(Query.KeyQuery kq, String value) {
if (kq instanceof Query.Regex) {
// For this possible matches prefix check is sufficient, avoid full regex to
// keep the pre-filter checks cheap.
return true;
} else {
return kq.matches(value);
}
}

/**
* Find hot spots in the index where there is a large set of linear matches, e.g. a bunch
* of regex queries for a given key.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,33 +44,43 @@ private void assertSize(PrefixTree tree, int expected) {
Assertions.assertEquals(expected == 0, tree.isEmpty());
}

private void assertEquals(List<Query.KeyQuery> expected, PrefixTree tree, String key) {
List<Query.KeyQuery> actual = sort(tree.get(key));
if (!actual.isEmpty()) {
Assertions.assertTrue(tree.exists(key, k -> true));
} else {
Assertions.assertFalse(tree.exists(key, k -> true));
}
Assertions.assertEquals(expected, actual);
}

@Test
public void nullPrefix() {
PrefixTree tree = new PrefixTree();
tree.put(null, re("1"));
assertSize(tree, 1);

Assertions.assertEquals(list("1"), tree.get("foo"));
Assertions.assertEquals(list("1"), tree.get("bar"));
Assertions.assertEquals(list("1"), tree.get(""));
assertEquals(list("1"), tree, "foo");
assertEquals(list("1"), tree, "bar");
assertEquals(list("1"), tree, "");

Assertions.assertFalse(tree.remove(null, re("2")));
Assertions.assertTrue(tree.remove(null, re("1")));
assertSize(tree, 0);
Assertions.assertEquals(Collections.emptyList(), tree.get("foo"));
assertEquals(Collections.emptyList(), tree, "foo");
}

@Test
public void emptyPrefix() {
PrefixTree tree = new PrefixTree();
tree.put("", re("1"));
Assertions.assertEquals(list("1"), tree.get("foo"));
Assertions.assertEquals(list("1"), tree.get("bar"));
Assertions.assertEquals(list("1"), tree.get(""));
assertEquals(list("1"), tree, "foo");
assertEquals(list("1"), tree, "bar");
assertEquals(list("1"), tree, "");

Assertions.assertFalse(tree.remove("", re("2")));
Assertions.assertTrue(tree.remove("", re("1")));
Assertions.assertEquals(Collections.emptyList(), tree.get("foo"));
assertEquals(Collections.emptyList(), tree, "foo");
}

@Test
Expand All @@ -79,16 +89,16 @@ public void simplePrefix() {
tree.put("abc", re("1"));
assertSize(tree, 1);

Assertions.assertEquals(list("1"), tree.get("abcdef"));
Assertions.assertEquals(list("1"), tree.get("abcghi"));
Assertions.assertEquals(list("1"), tree.get("abc"));
Assertions.assertEquals(Collections.emptyList(), tree.get("abd"));
Assertions.assertEquals(Collections.emptyList(), tree.get("ab"));
assertEquals(list("1"), tree, "abcdef");
assertEquals(list("1"), tree, "abcghi");
assertEquals(list("1"), tree, "abc");
assertEquals(Collections.emptyList(), tree, "abd");
assertEquals(Collections.emptyList(), tree, "ab");

Assertions.assertTrue(tree.remove("abc", re("1")));
Assertions.assertFalse(tree.remove("abc", re("1")));
assertSize(tree, 0);
Assertions.assertEquals(Collections.emptyList(), tree.get("abcdef"));
assertEquals(Collections.emptyList(), tree, "abcdef");
}

@Test
Expand All @@ -100,15 +110,15 @@ public void multipleMatches() {
tree.put("abc", re("4"));
assertSize(tree, 4);

Assertions.assertEquals(list("1", "2", "3", "4"), sort(tree.get("abcdef")));
Assertions.assertEquals(list("2", "3"), sort(tree.get("abdef")));
Assertions.assertEquals(list("3"), tree.get("adef"));
Assertions.assertEquals(Collections.emptyList(), tree.get("bcdef"));
assertEquals(list("1", "2", "3", "4"), tree, "abcdef");
assertEquals(list("2", "3"), tree, "abdef");
assertEquals(list("3"), tree, "adef");
assertEquals(Collections.emptyList(), tree, "bcdef");

Assertions.assertFalse(tree.remove("ab", re("1")));
Assertions.assertTrue(tree.remove("abc", re("1")));
assertSize(tree, 3);
Assertions.assertEquals(list("2", "3", "4"), sort(tree.get("abcdef")));
assertEquals(list("2", "3", "4"), tree, "abcdef");
}

@Test
Expand All @@ -117,20 +127,20 @@ public void unicodeCharInPrefix() {
tree.put("aβc", re("1"));
assertSize(tree, 1);

Assertions.assertEquals(list("1"), tree.get("aβcdef"));
Assertions.assertEquals(list("1"), tree.get("aβcghi"));
Assertions.assertEquals(list("1"), tree.get("aβc"));
Assertions.assertEquals(Collections.emptyList(), tree.get("abcdef"));
Assertions.assertEquals(Collections.emptyList(), tree.get("abcghi"));
Assertions.assertEquals(Collections.emptyList(), tree.get("abc"));
Assertions.assertEquals(Collections.emptyList(), tree.get("abd"));
Assertions.assertEquals(Collections.emptyList(), tree.get("ab"));
Assertions.assertEquals(Collections.emptyList(), tree.get("aβ"));
Assertions.assertEquals(Collections.emptyList(), tree.get("b"));
assertEquals(list("1"), tree, "aβcdef");
assertEquals(list("1"), tree, "aβcghi");
assertEquals(list("1"), tree, "aβc");
assertEquals(Collections.emptyList(), tree, "abcdef");
assertEquals(Collections.emptyList(), tree, "abcghi");
assertEquals(Collections.emptyList(), tree, "abc");
assertEquals(Collections.emptyList(), tree, "abd");
assertEquals(Collections.emptyList(), tree, "ab");
assertEquals(Collections.emptyList(), tree, "aβ");
assertEquals(Collections.emptyList(), tree, "b");

Assertions.assertTrue(tree.remove("aβc", re("1")));
assertSize(tree, 0);
Assertions.assertEquals(Collections.emptyList(), tree.get("abcdef"));
assertEquals(Collections.emptyList(), tree, "abcdef");
}

@Test
Expand All @@ -139,18 +149,18 @@ public void emptyRootPrefix() {
tree.put("ab", re("ab"));
tree.put("cd", re("cd"));

Assertions.assertEquals(list("ab"), tree.get("ab"));
Assertions.assertEquals(list("cd"), tree.get("cd"));
Assertions.assertEquals(Collections.emptyList(), tree.get("ef"));
assertEquals(list("ab"), tree, "ab");
assertEquals(list("cd"), tree, "cd");
assertEquals(Collections.emptyList(), tree, "ef");

Assertions.assertTrue(tree.remove("ab", re("ab")));
PrefixTree tree2 = new PrefixTree();
tree2.put("cd", re("cd"));
Assertions.assertEquals(tree2, tree);

Assertions.assertEquals(Collections.emptyList(), tree.get("ab"));
Assertions.assertEquals(list("cd"), tree.get("cd"));
Assertions.assertEquals(Collections.emptyList(), tree.get("ef"));
assertEquals(Collections.emptyList(), tree, "ab");
assertEquals(list("cd"), tree, "cd");
assertEquals(Collections.emptyList(), tree, "ef");
}

@Test
Expand All @@ -159,7 +169,7 @@ public void updateExistingNode() {
tree.put("abcdef", re("1"));
tree.put("abcdef", re("2"));

Assertions.assertEquals(list("1", "2"), tree.get("abcdef"));
assertEquals(list("1", "2"), tree, "abcdef");
}

@Test
Expand All @@ -169,8 +179,8 @@ public void addChildNode() {
tree.put("abcdefghi", re("2"));
tree.put("abcdef", re("3"));

Assertions.assertEquals(list("1", "3"), tree.get("abcdef"));
Assertions.assertEquals(list("1", "3", "2"), tree.get("abcdefghi"));
assertEquals(list("1", "3"), tree, "abcdef");
assertEquals(list("1", "2", "3"), tree, "abcdefghi");
}

@Test
Expand All @@ -179,8 +189,8 @@ public void splitInteriorNode() {
tree.put("abcdef", re("abcdef"));
tree.put("abcghi", re("abcghi"));

Assertions.assertEquals(list("abcdef"), tree.get("abcdef"));
Assertions.assertEquals(list("abcghi"), tree.get("abcghi"));
assertEquals(list("abcdef"), tree, "abcdef");
assertEquals(list("abcghi"), tree, "abcghi");
}

@Test
Expand All @@ -189,7 +199,7 @@ public void manyDifferentRootPrefixes() {
tree.put("abc", re("abc"));
tree.put("def", re("def"));
tree.put("ghi", re("ghi"));
Assertions.assertEquals(list("abc"), tree.get("abcdef"));
assertEquals(list("abc"), tree, "abcdef");
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,11 @@ private void assertEquals(List<Query> expected, QueryIndex<Query> idx, Id id) {
}
for (int i = 0; i < 4; ++i) {
Assertions.assertEquals(expected, sort(idx.findMatches(Query.toMap(id)::get)));
// Only check could match case here since couldMatch should be a superset of actual
// matches
if (!expected.isEmpty()) {
Assertions.assertTrue(idx.couldMatch(Query.toMap(id)::get));
}
}
}

Expand Down Expand Up @@ -551,4 +556,24 @@ public void findHotSpots() {
Assertions.assertEquals(5, queries.size());
});
}

@Test
public void couldMatchPartial() {
Registry registry = new NoopRegistry();
QueryIndex<Integer> idx = QueryIndex.newInstance(registry);
Query q = Parser.parseQuery("name,foo,:eq,id,bar,:eq,:and,app,baz,:re,:and");
idx.add(q, 42);

Assertions.assertTrue(idx.couldMatch(Query.toMap(id("foo"))::get));
Assertions.assertTrue(idx.couldMatch(Query.toMap(id("foo", "id", "bar"))::get));
Assertions.assertTrue(idx.couldMatch(Query.toMap(id("foo", "app", "baz-main"))::get));

// Since `id` is after `app` and `app` is missing in tags, it will short-circuit and
// assume it could match
Assertions.assertTrue(idx.couldMatch(Query.toMap(id("foo", "id", "baz"))::get));

// Filtered out
Assertions.assertFalse(idx.couldMatch(Query.toMap(id("foo2", "id", "bar"))::get));
Assertions.assertFalse(idx.couldMatch(Query.toMap(id("foo", "app", "bar-main"))::get));
}
}
Loading