From 2922874b1b95ddd14155e3ab9c6b0acf1e54e68b Mon Sep 17 00:00:00 2001 From: U9G Date: Wed, 2 Aug 2023 11:20:44 -0400 Subject: [PATCH 01/17] add min_fold_count_limit optimization --- trustfall_core/src/interpreter/execution.rs | 81 ++- .../fold_with_no_outputs.trace.ron | 66 +-- ...ld_with_no_outputs_elided_braces.trace.ron | 66 +-- ...ransform_and_filter_greater_than.trace.ron | 66 +-- .../nested_fold_with_no_outputs.trace.ron | 555 ++---------------- 5 files changed, 185 insertions(+), 649 deletions(-) diff --git a/trustfall_core/src/interpreter/execution.rs b/trustfall_core/src/interpreter/execution.rs index 4ce6671c..9d2fa8b5 100644 --- a/trustfall_core/src/interpreter/execution.rs +++ b/trustfall_core/src/interpreter/execution.rs @@ -250,6 +250,8 @@ fn get_max_fold_count_limit(carrier: &mut QueryCarrier, fold: &IRFold) -> Option let query_arguments = &carrier.query.as_ref().expect("query was not returned").arguments; for post_fold_filter in fold.post_filters.iter() { let next_limit = match post_fold_filter { + // Equals and OneOf must be visited here as they are not visited + // in `get_min_fold_count_limit` Operation::Equals(FoldSpecificFieldKind::Count, Argument::Variable(var_ref)) | Operation::LessThanOrEqual( FoldSpecificFieldKind::Count, @@ -286,10 +288,52 @@ fn get_max_fold_count_limit(carrier: &mut QueryCarrier, fold: &IRFold) -> Option result } +/// If this IRFold has a filter on the folded element count, and that filter imposes +/// a min size that can be statically determined, return that min size so it can +/// be used for further optimizations. Otherwise, return None. +fn get_min_fold_count_limit(carrier: &mut QueryCarrier, fold: &IRFold) -> Option { + let mut result: Option = None; + + let query_arguments = &carrier.query.as_ref().expect("query was not returned").arguments; + for post_fold_filter in fold.post_filters.iter() { + let next_limit = match post_fold_filter { + // We do not need to visit Equals and OneOf here, + // since those will be handled by `get_max_fold_count_limit` + Operation::GreaterThanOrEqual( + FoldSpecificFieldKind::Count, + Argument::Variable(var_ref), + ) => { + let variable_value = query_arguments[&var_ref.variable_name].as_usize().unwrap(); + Some(variable_value) + } + Operation::GreaterThan(FoldSpecificFieldKind::Count, Argument::Variable(var_ref)) => { + let variable_value = query_arguments[&var_ref.variable_name].as_usize().unwrap(); + Some(variable_value + 1) + } + _ => None, + }; + + match (result, next_limit) { + (None, _) => result = next_limit, + (Some(l), Some(r)) if l < r => result = next_limit, + _ => {} + } + } + + result +} + fn collect_fold_elements<'query, Vertex: Clone + Debug + 'query>( mut iterator: ContextIterator<'query, Vertex>, max_fold_count_limit: &Option, + min_fold_count_limit: &Option, + no_outputs_in_fold: bool, + has_output_on_fold_count: bool, ) -> Option>> { + // We do not apply the min_fold_count_limit optimization if we have an upper bound, + // in the form of max_fold_count_limit, because if we have a required upperbound, + // then we can't stop at the lower bound, if we are required to observe whether we hit the + // upperbound, it's no longer safe to stop at the lower bound. if let Some(max_fold_count_limit) = max_fold_count_limit { // If this fold has more than `max_fold_count_limit` elements, // it will get filtered out by a post-fold filter. @@ -318,9 +362,28 @@ fn collect_fold_elements<'query, Vertex: Clone + Debug + 'query>( Some(fold_elements) } else { - // We weren't able to find any early-termination condition for materializing the fold, - // so materialize the whole thing and return it. - Some(iterator.collect()) + let collected = match min_fold_count_limit { + // If we have a min_fold_count_limit then it is safe to .take(min_fold_count_limit) + // because .take will take at max min_fold_count_limit elements, and if the iterator has + // less it will just take all of them. + + // This optimization requires that the user can never observe that we didn't fully + // iterate the entire iterator. This is only possible if the fold has no outputs and + // the user does not have output the count of elements in the fold. + + // Additionally, we only apply this optimization if we don't have an upper bound, + // in the form of max_fold_count_limit, because if we dont have a required upperbound + // can we stop at the lower bound, if we are required to observe whether we hit the + // upperbound, it's no longer safe to stop at the lower bound. + Some(min_fold_count_limit) if no_outputs_in_fold && !has_output_on_fold_count => { + iterator.take(*min_fold_count_limit).collect() + } + // We weren't able to find any early-termination condition for materializing the fold, + // so materialize the whole thing and return it. + _ => iterator.collect(), + }; + + Some(collected) } } @@ -411,6 +474,10 @@ fn compute_fold<'query, AdapterT: Adapter<'query> + 'query>( let fold_component = fold.component.clone(); let fold_eid = fold.eid; let max_fold_size = get_max_fold_count_limit(carrier, fold.as_ref()); + let min_fold_size = get_min_fold_count_limit(carrier, fold.as_ref()); + let no_outputs_in_fold = fold.component.outputs.is_empty(); + let has_output_on_fold_count = + fold.fold_specific_outputs.values().any(|x| *x == FoldSpecificFieldKind::Count); let moved_fold = fold.clone(); let folded_iterator = edge_iterator.filter_map(move |(mut context, neighbors)| { let imported_tags = context.imported_tags.clone(); @@ -434,7 +501,13 @@ fn compute_fold<'query, AdapterT: Adapter<'query> + 'query>( let fold_elements = if fold_exists { // N.B.: Note the `?` at the end here! // This lets us early-discard folds that failed a post-processing filter. - Some(collect_fold_elements(computed_iterator, &max_fold_size)?) + Some(collect_fold_elements( + computed_iterator, + &max_fold_size, + &min_fold_size, + no_outputs_in_fold, + has_output_on_fold_count, + )?) } else { None }; diff --git a/trustfall_core/test_data/tests/valid_queries/fold_with_no_outputs.trace.ron b/trustfall_core/test_data/tests/valid_queries/fold_with_no_outputs.trace.ron index da959b02..2c56bda8 100644 --- a/trustfall_core/test_data/tests/valid_queries/fold_with_no_outputs.trace.ron +++ b/trustfall_core/test_data/tests/valid_queries/fold_with_no_outputs.trace.ron @@ -118,16 +118,6 @@ TestInterpreterOutputTrace( ), Opid(15): TraceOp( opid: Opid(15), - parent_opid: Some(Opid(12)), - content: YieldFrom(ResolveNeighborsInner(2, Prime(PrimeNumber(5)))), - ), - Opid(16): TraceOp( - opid: Opid(16), - parent_opid: Some(Opid(12)), - content: OutputIteratorExhausted, - ), - Opid(17): TraceOp( - opid: Opid(17), parent_opid: Some(Opid(4)), content: YieldInto(SerializableContext( active_vertex: Some(Composite(CompositeNumber(30, [ @@ -156,18 +146,12 @@ TestInterpreterOutputTrace( Vid(2): Some(Prime(PrimeNumber(3))), }, ), - SerializableContext( - active_vertex: Some(Prime(PrimeNumber(5))), - vertices: { - Vid(2): Some(Prime(PrimeNumber(5))), - }, - ), ]), }, )), ), - Opid(18): TraceOp( - opid: Opid(18), + Opid(16): TraceOp( + opid: Opid(16), parent_opid: Some(Opid(4)), content: YieldFrom(ResolveProperty(SerializableContext( active_vertex: Some(Composite(CompositeNumber(30, [ @@ -196,70 +180,64 @@ TestInterpreterOutputTrace( Vid(2): Some(Prime(PrimeNumber(3))), }, ), - SerializableContext( - active_vertex: Some(Prime(PrimeNumber(5))), - vertices: { - Vid(2): Some(Prime(PrimeNumber(5))), - }, - ), ]), }, ), Int64(30))), ), - Opid(19): TraceOp( - opid: Opid(19), + Opid(17): TraceOp( + opid: Opid(17), parent_opid: None, content: ProduceQueryResult({ "value": Int64(30), }), ), + Opid(18): TraceOp( + opid: Opid(18), + parent_opid: Some(Opid(4)), + content: AdvanceInputIterator, + ), + Opid(19): TraceOp( + opid: Opid(19), + parent_opid: Some(Opid(3)), + content: AdvanceInputIterator, + ), Opid(20): TraceOp( opid: Opid(20), - parent_opid: Some(Opid(4)), + parent_opid: Some(Opid(2)), content: AdvanceInputIterator, ), Opid(21): TraceOp( opid: Opid(21), - parent_opid: Some(Opid(3)), - content: AdvanceInputIterator, + parent_opid: Some(Opid(1)), + content: OutputIteratorExhausted, ), Opid(22): TraceOp( opid: Opid(22), parent_opid: Some(Opid(2)), - content: AdvanceInputIterator, + content: InputIteratorExhausted, ), Opid(23): TraceOp( opid: Opid(23), - parent_opid: Some(Opid(1)), + parent_opid: Some(Opid(2)), content: OutputIteratorExhausted, ), Opid(24): TraceOp( opid: Opid(24), - parent_opid: Some(Opid(2)), + parent_opid: Some(Opid(3)), content: InputIteratorExhausted, ), Opid(25): TraceOp( opid: Opid(25), - parent_opid: Some(Opid(2)), + parent_opid: Some(Opid(3)), content: OutputIteratorExhausted, ), Opid(26): TraceOp( opid: Opid(26), - parent_opid: Some(Opid(3)), + parent_opid: Some(Opid(4)), content: InputIteratorExhausted, ), Opid(27): TraceOp( opid: Opid(27), - parent_opid: Some(Opid(3)), - content: OutputIteratorExhausted, - ), - Opid(28): TraceOp( - opid: Opid(28), - parent_opid: Some(Opid(4)), - content: InputIteratorExhausted, - ), - Opid(29): TraceOp( - opid: Opid(29), parent_opid: Some(Opid(4)), content: OutputIteratorExhausted, ), diff --git a/trustfall_core/test_data/tests/valid_queries/fold_with_no_outputs_elided_braces.trace.ron b/trustfall_core/test_data/tests/valid_queries/fold_with_no_outputs_elided_braces.trace.ron index da959b02..2c56bda8 100644 --- a/trustfall_core/test_data/tests/valid_queries/fold_with_no_outputs_elided_braces.trace.ron +++ b/trustfall_core/test_data/tests/valid_queries/fold_with_no_outputs_elided_braces.trace.ron @@ -118,16 +118,6 @@ TestInterpreterOutputTrace( ), Opid(15): TraceOp( opid: Opid(15), - parent_opid: Some(Opid(12)), - content: YieldFrom(ResolveNeighborsInner(2, Prime(PrimeNumber(5)))), - ), - Opid(16): TraceOp( - opid: Opid(16), - parent_opid: Some(Opid(12)), - content: OutputIteratorExhausted, - ), - Opid(17): TraceOp( - opid: Opid(17), parent_opid: Some(Opid(4)), content: YieldInto(SerializableContext( active_vertex: Some(Composite(CompositeNumber(30, [ @@ -156,18 +146,12 @@ TestInterpreterOutputTrace( Vid(2): Some(Prime(PrimeNumber(3))), }, ), - SerializableContext( - active_vertex: Some(Prime(PrimeNumber(5))), - vertices: { - Vid(2): Some(Prime(PrimeNumber(5))), - }, - ), ]), }, )), ), - Opid(18): TraceOp( - opid: Opid(18), + Opid(16): TraceOp( + opid: Opid(16), parent_opid: Some(Opid(4)), content: YieldFrom(ResolveProperty(SerializableContext( active_vertex: Some(Composite(CompositeNumber(30, [ @@ -196,70 +180,64 @@ TestInterpreterOutputTrace( Vid(2): Some(Prime(PrimeNumber(3))), }, ), - SerializableContext( - active_vertex: Some(Prime(PrimeNumber(5))), - vertices: { - Vid(2): Some(Prime(PrimeNumber(5))), - }, - ), ]), }, ), Int64(30))), ), - Opid(19): TraceOp( - opid: Opid(19), + Opid(17): TraceOp( + opid: Opid(17), parent_opid: None, content: ProduceQueryResult({ "value": Int64(30), }), ), + Opid(18): TraceOp( + opid: Opid(18), + parent_opid: Some(Opid(4)), + content: AdvanceInputIterator, + ), + Opid(19): TraceOp( + opid: Opid(19), + parent_opid: Some(Opid(3)), + content: AdvanceInputIterator, + ), Opid(20): TraceOp( opid: Opid(20), - parent_opid: Some(Opid(4)), + parent_opid: Some(Opid(2)), content: AdvanceInputIterator, ), Opid(21): TraceOp( opid: Opid(21), - parent_opid: Some(Opid(3)), - content: AdvanceInputIterator, + parent_opid: Some(Opid(1)), + content: OutputIteratorExhausted, ), Opid(22): TraceOp( opid: Opid(22), parent_opid: Some(Opid(2)), - content: AdvanceInputIterator, + content: InputIteratorExhausted, ), Opid(23): TraceOp( opid: Opid(23), - parent_opid: Some(Opid(1)), + parent_opid: Some(Opid(2)), content: OutputIteratorExhausted, ), Opid(24): TraceOp( opid: Opid(24), - parent_opid: Some(Opid(2)), + parent_opid: Some(Opid(3)), content: InputIteratorExhausted, ), Opid(25): TraceOp( opid: Opid(25), - parent_opid: Some(Opid(2)), + parent_opid: Some(Opid(3)), content: OutputIteratorExhausted, ), Opid(26): TraceOp( opid: Opid(26), - parent_opid: Some(Opid(3)), + parent_opid: Some(Opid(4)), content: InputIteratorExhausted, ), Opid(27): TraceOp( opid: Opid(27), - parent_opid: Some(Opid(3)), - content: OutputIteratorExhausted, - ), - Opid(28): TraceOp( - opid: Opid(28), - parent_opid: Some(Opid(4)), - content: InputIteratorExhausted, - ), - Opid(29): TraceOp( - opid: Opid(29), parent_opid: Some(Opid(4)), content: OutputIteratorExhausted, ), diff --git a/trustfall_core/test_data/tests/valid_queries/fold_with_no_outputs_transform_and_filter_greater_than.trace.ron b/trustfall_core/test_data/tests/valid_queries/fold_with_no_outputs_transform_and_filter_greater_than.trace.ron index fdd02c22..22223938 100644 --- a/trustfall_core/test_data/tests/valid_queries/fold_with_no_outputs_transform_and_filter_greater_than.trace.ron +++ b/trustfall_core/test_data/tests/valid_queries/fold_with_no_outputs_transform_and_filter_greater_than.trace.ron @@ -118,16 +118,6 @@ TestInterpreterOutputTrace( ), Opid(15): TraceOp( opid: Opid(15), - parent_opid: Some(Opid(12)), - content: YieldFrom(ResolveNeighborsInner(2, Prime(PrimeNumber(5)))), - ), - Opid(16): TraceOp( - opid: Opid(16), - parent_opid: Some(Opid(12)), - content: OutputIteratorExhausted, - ), - Opid(17): TraceOp( - opid: Opid(17), parent_opid: Some(Opid(4)), content: YieldInto(SerializableContext( active_vertex: Some(Composite(CompositeNumber(30, [ @@ -156,18 +146,12 @@ TestInterpreterOutputTrace( Vid(2): Some(Prime(PrimeNumber(3))), }, ), - SerializableContext( - active_vertex: Some(Prime(PrimeNumber(5))), - vertices: { - Vid(2): Some(Prime(PrimeNumber(5))), - }, - ), ]), }, )), ), - Opid(18): TraceOp( - opid: Opid(18), + Opid(16): TraceOp( + opid: Opid(16), parent_opid: Some(Opid(4)), content: YieldFrom(ResolveProperty(SerializableContext( active_vertex: Some(Composite(CompositeNumber(30, [ @@ -196,70 +180,64 @@ TestInterpreterOutputTrace( Vid(2): Some(Prime(PrimeNumber(3))), }, ), - SerializableContext( - active_vertex: Some(Prime(PrimeNumber(5))), - vertices: { - Vid(2): Some(Prime(PrimeNumber(5))), - }, - ), ]), }, ), Int64(30))), ), - Opid(19): TraceOp( - opid: Opid(19), + Opid(17): TraceOp( + opid: Opid(17), parent_opid: None, content: ProduceQueryResult({ "value": Int64(30), }), ), + Opid(18): TraceOp( + opid: Opid(18), + parent_opid: Some(Opid(4)), + content: AdvanceInputIterator, + ), + Opid(19): TraceOp( + opid: Opid(19), + parent_opid: Some(Opid(3)), + content: AdvanceInputIterator, + ), Opid(20): TraceOp( opid: Opid(20), - parent_opid: Some(Opid(4)), + parent_opid: Some(Opid(2)), content: AdvanceInputIterator, ), Opid(21): TraceOp( opid: Opid(21), - parent_opid: Some(Opid(3)), - content: AdvanceInputIterator, + parent_opid: Some(Opid(1)), + content: OutputIteratorExhausted, ), Opid(22): TraceOp( opid: Opid(22), parent_opid: Some(Opid(2)), - content: AdvanceInputIterator, + content: InputIteratorExhausted, ), Opid(23): TraceOp( opid: Opid(23), - parent_opid: Some(Opid(1)), + parent_opid: Some(Opid(2)), content: OutputIteratorExhausted, ), Opid(24): TraceOp( opid: Opid(24), - parent_opid: Some(Opid(2)), + parent_opid: Some(Opid(3)), content: InputIteratorExhausted, ), Opid(25): TraceOp( opid: Opid(25), - parent_opid: Some(Opid(2)), + parent_opid: Some(Opid(3)), content: OutputIteratorExhausted, ), Opid(26): TraceOp( opid: Opid(26), - parent_opid: Some(Opid(3)), + parent_opid: Some(Opid(4)), content: InputIteratorExhausted, ), Opid(27): TraceOp( opid: Opid(27), - parent_opid: Some(Opid(3)), - content: OutputIteratorExhausted, - ), - Opid(28): TraceOp( - opid: Opid(28), - parent_opid: Some(Opid(4)), - content: InputIteratorExhausted, - ), - Opid(29): TraceOp( - opid: Opid(29), parent_opid: Some(Opid(4)), content: OutputIteratorExhausted, ), diff --git a/trustfall_core/test_data/tests/valid_queries/nested_fold_with_no_outputs.trace.ron b/trustfall_core/test_data/tests/valid_queries/nested_fold_with_no_outputs.trace.ron index 291201b1..2c9246d1 100644 --- a/trustfall_core/test_data/tests/valid_queries/nested_fold_with_no_outputs.trace.ron +++ b/trustfall_core/test_data/tests/valid_queries/nested_fold_with_no_outputs.trace.ron @@ -231,28 +231,23 @@ TestInterpreterOutputTrace( ), Opid(32): TraceOp( opid: Opid(32), - parent_opid: Some(Opid(29)), - content: OutputIteratorExhausted, + parent_opid: Some(Opid(14)), + content: AdvanceInputIterator, ), Opid(33): TraceOp( opid: Opid(33), - parent_opid: Some(Opid(14)), + parent_opid: Some(Opid(13)), content: AdvanceInputIterator, ), Opid(34): TraceOp( opid: Opid(34), - parent_opid: Some(Opid(13)), - content: AdvanceInputIterator, - ), - Opid(35): TraceOp( - opid: Opid(35), parent_opid: Some(Opid(12)), content: YieldFrom(ResolveNeighborsInner(3, Composite(CompositeNumber(8, [ 2, ])))), ), - Opid(36): TraceOp( - opid: Opid(36), + Opid(35): TraceOp( + opid: Opid(35), parent_opid: Some(Opid(13)), content: YieldInto(SerializableContext( active_vertex: Some(Composite(CompositeNumber(8, [ @@ -261,8 +256,8 @@ TestInterpreterOutputTrace( vertices: {}, )), ), - Opid(37): TraceOp( - opid: Opid(37), + Opid(36): TraceOp( + opid: Opid(36), parent_opid: Some(Opid(13)), content: YieldFrom(ResolveCoercion(SerializableContext( active_vertex: Some(Composite(CompositeNumber(8, [ @@ -271,8 +266,8 @@ TestInterpreterOutputTrace( vertices: {}, ), true)), ), - Opid(38): TraceOp( - opid: Opid(38), + Opid(37): TraceOp( + opid: Opid(37), parent_opid: Some(Opid(14)), content: YieldInto(SerializableContext( active_vertex: Some(Composite(CompositeNumber(8, [ @@ -285,8 +280,8 @@ TestInterpreterOutputTrace( }, )), ), - Opid(39): TraceOp( - opid: Opid(39), + Opid(38): TraceOp( + opid: Opid(38), parent_opid: Some(Opid(14)), content: YieldFrom(ResolveNeighborsOuter(SerializableContext( active_vertex: Some(Composite(CompositeNumber(8, [ @@ -299,260 +294,18 @@ TestInterpreterOutputTrace( }, ))), ), + Opid(39): TraceOp( + opid: Opid(39), + parent_opid: Some(Opid(38)), + content: YieldFrom(ResolveNeighborsInner(0, Neither(NeitherNumber(1)))), + ), Opid(40): TraceOp( opid: Opid(40), - parent_opid: Some(Opid(39)), - content: YieldFrom(ResolveNeighborsInner(0, Neither(NeitherNumber(1)))), + parent_opid: Some(Opid(38)), + content: YieldFrom(ResolveNeighborsInner(1, Prime(PrimeNumber(2)))), ), Opid(41): TraceOp( opid: Opid(41), - parent_opid: Some(Opid(39)), - content: YieldFrom(ResolveNeighborsInner(1, Prime(PrimeNumber(2)))), - ), - Opid(42): TraceOp( - opid: Opid(42), - parent_opid: Some(Opid(39)), - content: YieldFrom(ResolveNeighborsInner(2, Composite(CompositeNumber(4, [ - 2, - ])))), - ), - Opid(43): TraceOp( - opid: Opid(43), - parent_opid: Some(Opid(39)), - content: OutputIteratorExhausted, - ), - Opid(44): TraceOp( - opid: Opid(44), - parent_opid: Some(Opid(14)), - content: AdvanceInputIterator, - ), - Opid(45): TraceOp( - opid: Opid(45), - parent_opid: Some(Opid(13)), - content: AdvanceInputIterator, - ), - Opid(46): TraceOp( - opid: Opid(46), - parent_opid: Some(Opid(12)), - content: YieldFrom(ResolveNeighborsInner(4, Composite(CompositeNumber(16, [ - 2, - ])))), - ), - Opid(47): TraceOp( - opid: Opid(47), - parent_opid: Some(Opid(13)), - content: YieldInto(SerializableContext( - active_vertex: Some(Composite(CompositeNumber(16, [ - 2, - ]))), - vertices: {}, - )), - ), - Opid(48): TraceOp( - opid: Opid(48), - parent_opid: Some(Opid(13)), - content: YieldFrom(ResolveCoercion(SerializableContext( - active_vertex: Some(Composite(CompositeNumber(16, [ - 2, - ]))), - vertices: {}, - ), true)), - ), - Opid(49): TraceOp( - opid: Opid(49), - parent_opid: Some(Opid(14)), - content: YieldInto(SerializableContext( - active_vertex: Some(Composite(CompositeNumber(16, [ - 2, - ]))), - vertices: { - Vid(2): Some(Composite(CompositeNumber(16, [ - 2, - ]))), - }, - )), - ), - Opid(50): TraceOp( - opid: Opid(50), - parent_opid: Some(Opid(14)), - content: YieldFrom(ResolveNeighborsOuter(SerializableContext( - active_vertex: Some(Composite(CompositeNumber(16, [ - 2, - ]))), - vertices: { - Vid(2): Some(Composite(CompositeNumber(16, [ - 2, - ]))), - }, - ))), - ), - Opid(51): TraceOp( - opid: Opid(51), - parent_opid: Some(Opid(50)), - content: YieldFrom(ResolveNeighborsInner(0, Neither(NeitherNumber(1)))), - ), - Opid(52): TraceOp( - opid: Opid(52), - parent_opid: Some(Opid(50)), - content: YieldFrom(ResolveNeighborsInner(1, Prime(PrimeNumber(2)))), - ), - Opid(53): TraceOp( - opid: Opid(53), - parent_opid: Some(Opid(50)), - content: YieldFrom(ResolveNeighborsInner(2, Composite(CompositeNumber(4, [ - 2, - ])))), - ), - Opid(54): TraceOp( - opid: Opid(54), - parent_opid: Some(Opid(50)), - content: YieldFrom(ResolveNeighborsInner(3, Composite(CompositeNumber(8, [ - 2, - ])))), - ), - Opid(55): TraceOp( - opid: Opid(55), - parent_opid: Some(Opid(50)), - content: OutputIteratorExhausted, - ), - Opid(56): TraceOp( - opid: Opid(56), - parent_opid: Some(Opid(14)), - content: AdvanceInputIterator, - ), - Opid(57): TraceOp( - opid: Opid(57), - parent_opid: Some(Opid(13)), - content: AdvanceInputIterator, - ), - Opid(58): TraceOp( - opid: Opid(58), - parent_opid: Some(Opid(12)), - content: YieldFrom(ResolveNeighborsInner(5, Composite(CompositeNumber(32, [ - 2, - ])))), - ), - Opid(59): TraceOp( - opid: Opid(59), - parent_opid: Some(Opid(13)), - content: YieldInto(SerializableContext( - active_vertex: Some(Composite(CompositeNumber(32, [ - 2, - ]))), - vertices: {}, - )), - ), - Opid(60): TraceOp( - opid: Opid(60), - parent_opid: Some(Opid(13)), - content: YieldFrom(ResolveCoercion(SerializableContext( - active_vertex: Some(Composite(CompositeNumber(32, [ - 2, - ]))), - vertices: {}, - ), true)), - ), - Opid(61): TraceOp( - opid: Opid(61), - parent_opid: Some(Opid(14)), - content: YieldInto(SerializableContext( - active_vertex: Some(Composite(CompositeNumber(32, [ - 2, - ]))), - vertices: { - Vid(2): Some(Composite(CompositeNumber(32, [ - 2, - ]))), - }, - )), - ), - Opid(62): TraceOp( - opid: Opid(62), - parent_opid: Some(Opid(14)), - content: YieldFrom(ResolveNeighborsOuter(SerializableContext( - active_vertex: Some(Composite(CompositeNumber(32, [ - 2, - ]))), - vertices: { - Vid(2): Some(Composite(CompositeNumber(32, [ - 2, - ]))), - }, - ))), - ), - Opid(63): TraceOp( - opid: Opid(63), - parent_opid: Some(Opid(62)), - content: YieldFrom(ResolveNeighborsInner(0, Neither(NeitherNumber(1)))), - ), - Opid(64): TraceOp( - opid: Opid(64), - parent_opid: Some(Opid(62)), - content: YieldFrom(ResolveNeighborsInner(1, Prime(PrimeNumber(2)))), - ), - Opid(65): TraceOp( - opid: Opid(65), - parent_opid: Some(Opid(62)), - content: YieldFrom(ResolveNeighborsInner(2, Composite(CompositeNumber(4, [ - 2, - ])))), - ), - Opid(66): TraceOp( - opid: Opid(66), - parent_opid: Some(Opid(62)), - content: YieldFrom(ResolveNeighborsInner(3, Composite(CompositeNumber(8, [ - 2, - ])))), - ), - Opid(67): TraceOp( - opid: Opid(67), - parent_opid: Some(Opid(62)), - content: YieldFrom(ResolveNeighborsInner(4, Composite(CompositeNumber(16, [ - 2, - ])))), - ), - Opid(68): TraceOp( - opid: Opid(68), - parent_opid: Some(Opid(62)), - content: OutputIteratorExhausted, - ), - Opid(69): TraceOp( - opid: Opid(69), - parent_opid: Some(Opid(14)), - content: AdvanceInputIterator, - ), - Opid(70): TraceOp( - opid: Opid(70), - parent_opid: Some(Opid(13)), - content: AdvanceInputIterator, - ), - Opid(71): TraceOp( - opid: Opid(71), - parent_opid: Some(Opid(12)), - content: OutputIteratorExhausted, - ), - Opid(72): TraceOp( - opid: Opid(72), - parent_opid: Some(Opid(13)), - content: InputIteratorExhausted, - ), - Opid(73): TraceOp( - opid: Opid(73), - parent_opid: Some(Opid(13)), - content: OutputIteratorExhausted, - ), - Opid(74): TraceOp( - opid: Opid(74), - parent_opid: Some(Opid(14)), - content: InputIteratorExhausted, - ), - Opid(75): TraceOp( - opid: Opid(75), - parent_opid: Some(Opid(14)), - content: OutputIteratorExhausted, - ), - Opid(76): TraceOp( - opid: Opid(76), parent_opid: Some(Opid(4)), content: YieldInto(SerializableContext( active_vertex: Some(Composite(CompositeNumber(64, [ @@ -614,118 +367,6 @@ TestInterpreterOutputTrace( Vid(3): Some(Prime(PrimeNumber(2))), }, ), - SerializableContext( - active_vertex: Some(Composite(CompositeNumber(4, [ - 2, - ]))), - vertices: { - Vid(3): Some(Composite(CompositeNumber(4, [ - 2, - ]))), - }, - ), - ]), - }, - ), - SerializableContext( - active_vertex: Some(Composite(CompositeNumber(16, [ - 2, - ]))), - vertices: { - Vid(2): Some(Composite(CompositeNumber(16, [ - 2, - ]))), - }, - folded_contexts: { - Eid(2): Some([ - SerializableContext( - active_vertex: Some(Neither(NeitherNumber(1))), - vertices: { - Vid(3): Some(Neither(NeitherNumber(1))), - }, - ), - SerializableContext( - active_vertex: Some(Prime(PrimeNumber(2))), - vertices: { - Vid(3): Some(Prime(PrimeNumber(2))), - }, - ), - SerializableContext( - active_vertex: Some(Composite(CompositeNumber(4, [ - 2, - ]))), - vertices: { - Vid(3): Some(Composite(CompositeNumber(4, [ - 2, - ]))), - }, - ), - SerializableContext( - active_vertex: Some(Composite(CompositeNumber(8, [ - 2, - ]))), - vertices: { - Vid(3): Some(Composite(CompositeNumber(8, [ - 2, - ]))), - }, - ), - ]), - }, - ), - SerializableContext( - active_vertex: Some(Composite(CompositeNumber(32, [ - 2, - ]))), - vertices: { - Vid(2): Some(Composite(CompositeNumber(32, [ - 2, - ]))), - }, - folded_contexts: { - Eid(2): Some([ - SerializableContext( - active_vertex: Some(Neither(NeitherNumber(1))), - vertices: { - Vid(3): Some(Neither(NeitherNumber(1))), - }, - ), - SerializableContext( - active_vertex: Some(Prime(PrimeNumber(2))), - vertices: { - Vid(3): Some(Prime(PrimeNumber(2))), - }, - ), - SerializableContext( - active_vertex: Some(Composite(CompositeNumber(4, [ - 2, - ]))), - vertices: { - Vid(3): Some(Composite(CompositeNumber(4, [ - 2, - ]))), - }, - ), - SerializableContext( - active_vertex: Some(Composite(CompositeNumber(8, [ - 2, - ]))), - vertices: { - Vid(3): Some(Composite(CompositeNumber(8, [ - 2, - ]))), - }, - ), - SerializableContext( - active_vertex: Some(Composite(CompositeNumber(16, [ - 2, - ]))), - vertices: { - Vid(3): Some(Composite(CompositeNumber(16, [ - 2, - ]))), - }, - ), ]), }, ), @@ -733,8 +374,8 @@ TestInterpreterOutputTrace( }, )), ), - Opid(77): TraceOp( - opid: Opid(77), + Opid(42): TraceOp( + opid: Opid(42), parent_opid: Some(Opid(4)), content: YieldFrom(ResolveProperty(SerializableContext( active_vertex: Some(Composite(CompositeNumber(64, [ @@ -796,118 +437,6 @@ TestInterpreterOutputTrace( Vid(3): Some(Prime(PrimeNumber(2))), }, ), - SerializableContext( - active_vertex: Some(Composite(CompositeNumber(4, [ - 2, - ]))), - vertices: { - Vid(3): Some(Composite(CompositeNumber(4, [ - 2, - ]))), - }, - ), - ]), - }, - ), - SerializableContext( - active_vertex: Some(Composite(CompositeNumber(16, [ - 2, - ]))), - vertices: { - Vid(2): Some(Composite(CompositeNumber(16, [ - 2, - ]))), - }, - folded_contexts: { - Eid(2): Some([ - SerializableContext( - active_vertex: Some(Neither(NeitherNumber(1))), - vertices: { - Vid(3): Some(Neither(NeitherNumber(1))), - }, - ), - SerializableContext( - active_vertex: Some(Prime(PrimeNumber(2))), - vertices: { - Vid(3): Some(Prime(PrimeNumber(2))), - }, - ), - SerializableContext( - active_vertex: Some(Composite(CompositeNumber(4, [ - 2, - ]))), - vertices: { - Vid(3): Some(Composite(CompositeNumber(4, [ - 2, - ]))), - }, - ), - SerializableContext( - active_vertex: Some(Composite(CompositeNumber(8, [ - 2, - ]))), - vertices: { - Vid(3): Some(Composite(CompositeNumber(8, [ - 2, - ]))), - }, - ), - ]), - }, - ), - SerializableContext( - active_vertex: Some(Composite(CompositeNumber(32, [ - 2, - ]))), - vertices: { - Vid(2): Some(Composite(CompositeNumber(32, [ - 2, - ]))), - }, - folded_contexts: { - Eid(2): Some([ - SerializableContext( - active_vertex: Some(Neither(NeitherNumber(1))), - vertices: { - Vid(3): Some(Neither(NeitherNumber(1))), - }, - ), - SerializableContext( - active_vertex: Some(Prime(PrimeNumber(2))), - vertices: { - Vid(3): Some(Prime(PrimeNumber(2))), - }, - ), - SerializableContext( - active_vertex: Some(Composite(CompositeNumber(4, [ - 2, - ]))), - vertices: { - Vid(3): Some(Composite(CompositeNumber(4, [ - 2, - ]))), - }, - ), - SerializableContext( - active_vertex: Some(Composite(CompositeNumber(8, [ - 2, - ]))), - vertices: { - Vid(3): Some(Composite(CompositeNumber(8, [ - 2, - ]))), - }, - ), - SerializableContext( - active_vertex: Some(Composite(CompositeNumber(16, [ - 2, - ]))), - vertices: { - Vid(3): Some(Composite(CompositeNumber(16, [ - 2, - ]))), - }, - ), ]), }, ), @@ -915,60 +444,60 @@ TestInterpreterOutputTrace( }, ), Int64(64))), ), - Opid(78): TraceOp( - opid: Opid(78), + Opid(43): TraceOp( + opid: Opid(43), parent_opid: None, content: ProduceQueryResult({ "value": Int64(64), }), ), - Opid(79): TraceOp( - opid: Opid(79), + Opid(44): TraceOp( + opid: Opid(44), parent_opid: Some(Opid(4)), content: AdvanceInputIterator, ), - Opid(80): TraceOp( - opid: Opid(80), + Opid(45): TraceOp( + opid: Opid(45), parent_opid: Some(Opid(3)), content: AdvanceInputIterator, ), - Opid(81): TraceOp( - opid: Opid(81), + Opid(46): TraceOp( + opid: Opid(46), parent_opid: Some(Opid(2)), content: AdvanceInputIterator, ), - Opid(82): TraceOp( - opid: Opid(82), + Opid(47): TraceOp( + opid: Opid(47), parent_opid: Some(Opid(1)), content: OutputIteratorExhausted, ), - Opid(83): TraceOp( - opid: Opid(83), + Opid(48): TraceOp( + opid: Opid(48), parent_opid: Some(Opid(2)), content: InputIteratorExhausted, ), - Opid(84): TraceOp( - opid: Opid(84), + Opid(49): TraceOp( + opid: Opid(49), parent_opid: Some(Opid(2)), content: OutputIteratorExhausted, ), - Opid(85): TraceOp( - opid: Opid(85), + Opid(50): TraceOp( + opid: Opid(50), parent_opid: Some(Opid(3)), content: InputIteratorExhausted, ), - Opid(86): TraceOp( - opid: Opid(86), + Opid(51): TraceOp( + opid: Opid(51), parent_opid: Some(Opid(3)), content: OutputIteratorExhausted, ), - Opid(87): TraceOp( - opid: Opid(87), + Opid(52): TraceOp( + opid: Opid(52), parent_opid: Some(Opid(4)), content: InputIteratorExhausted, ), - Opid(88): TraceOp( - opid: Opid(88), + Opid(53): TraceOp( + opid: Opid(53), parent_opid: Some(Opid(4)), content: OutputIteratorExhausted, ), From 4d5927a539db5975276b59d08fa67b0b4367c8a0 Mon Sep 17 00:00:00 2001 From: U9G Date: Thu, 3 Aug 2023 22:31:39 -0400 Subject: [PATCH 02/17] use usize_from_field_value in get_min_fold_count_limit --- trustfall_core/src/interpreter/execution.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/trustfall_core/src/interpreter/execution.rs b/trustfall_core/src/interpreter/execution.rs index f6113bd6..44affea5 100644 --- a/trustfall_core/src/interpreter/execution.rs +++ b/trustfall_core/src/interpreter/execution.rs @@ -334,11 +334,15 @@ fn get_min_fold_count_limit(carrier: &mut QueryCarrier, fold: &IRFold) -> Option FoldSpecificFieldKind::Count, Argument::Variable(var_ref), ) => { - let variable_value = query_arguments[&var_ref.variable_name].as_usize().unwrap(); + let variable_value = + usize_from_field_value(&query_arguments[&var_ref.variable_name]) + .expect("for field value to be coercible to usize"); Some(variable_value) } Operation::GreaterThan(FoldSpecificFieldKind::Count, Argument::Variable(var_ref)) => { - let variable_value = query_arguments[&var_ref.variable_name].as_usize().unwrap(); + let variable_value = + usize_from_field_value(&query_arguments[&var_ref.variable_name]) + .expect("for field value to be coercible to usize"); Some(variable_value + 1) } _ => None, From 8d8b871afe21c54e2d50ba0849eab9835723f41f Mon Sep 17 00:00:00 2001 From: u9g Date: Thu, 3 Aug 2023 22:55:45 -0400 Subject: [PATCH 03/17] Update trustfall_core/src/interpreter/execution.rs Co-authored-by: Predrag Gruevski <2348618+obi1kenobi@users.noreply.github.com> --- trustfall_core/src/interpreter/execution.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/trustfall_core/src/interpreter/execution.rs b/trustfall_core/src/interpreter/execution.rs index 44affea5..5304b767 100644 --- a/trustfall_core/src/interpreter/execution.rs +++ b/trustfall_core/src/interpreter/execution.rs @@ -400,8 +400,8 @@ fn collect_fold_elements<'query, Vertex: Clone + Debug + 'query>( } else { let collected = match min_fold_count_limit { // If we have a min_fold_count_limit then it is safe to .take(min_fold_count_limit) - // because .take will take at max min_fold_count_limit elements, and if the iterator has - // less it will just take all of them. + // because it will take at most min_fold_count_limit elements, and if the iterator has + // fewer items then it will just take all of them. // This optimization requires that the user can never observe that we didn't fully // iterate the entire iterator. This is only possible if the fold has no outputs and From a5574af8b5a47ae109d236c927a0cd30e0415403 Mon Sep 17 00:00:00 2001 From: u9g Date: Thu, 3 Aug 2023 22:56:08 -0400 Subject: [PATCH 04/17] Update trustfall_core/src/interpreter/execution.rs Co-authored-by: Predrag Gruevski <2348618+obi1kenobi@users.noreply.github.com> --- trustfall_core/src/interpreter/execution.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/trustfall_core/src/interpreter/execution.rs b/trustfall_core/src/interpreter/execution.rs index 5304b767..1c81810d 100644 --- a/trustfall_core/src/interpreter/execution.rs +++ b/trustfall_core/src/interpreter/execution.rs @@ -402,11 +402,11 @@ fn collect_fold_elements<'query, Vertex: Clone + Debug + 'query>( // If we have a min_fold_count_limit then it is safe to .take(min_fold_count_limit) // because it will take at most min_fold_count_limit elements, and if the iterator has // fewer items then it will just take all of them. - + // // This optimization requires that the user can never observe that we didn't fully - // iterate the entire iterator. This is only possible if the fold has no outputs and + // compute the entire `@fold`. This is only possible if the fold has no outputs and // the user does not have output the count of elements in the fold. - + // // Additionally, we only apply this optimization if we don't have an upper bound, // in the form of max_fold_count_limit, because if we dont have a required upperbound // can we stop at the lower bound, if we are required to observe whether we hit the From f960c052b406945ac37159507d72d72da507a124 Mon Sep 17 00:00:00 2001 From: U9G Date: Thu, 3 Aug 2023 23:12:34 -0400 Subject: [PATCH 05/17] Improve comment about the optimization --- trustfall_core/src/interpreter/execution.rs | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/trustfall_core/src/interpreter/execution.rs b/trustfall_core/src/interpreter/execution.rs index 1c81810d..2d8d9f74 100644 --- a/trustfall_core/src/interpreter/execution.rs +++ b/trustfall_core/src/interpreter/execution.rs @@ -399,18 +399,19 @@ fn collect_fold_elements<'query, Vertex: Clone + Debug + 'query>( Some(fold_elements) } else { let collected = match min_fold_count_limit { - // If we have a min_fold_count_limit then it is safe to .take(min_fold_count_limit) - // because it will take at most min_fold_count_limit elements, and if the iterator has - // fewer items then it will just take all of them. + // In this optimization we do not need to collect the entire fold if we can decide + // that the user never expects to observe the length in the entire fold, therefore + // performing less work. // // This optimization requires that the user can never observe that we didn't fully - // compute the entire `@fold`. This is only possible if the fold has no outputs and - // the user does not have output the count of elements in the fold. + // compute the entire `@fold`. This is only possible if the fold has no outputs, + // the user does not output the count of elements in the fold, and the user does + // not use any of the filters that require knowing the exact count of elements + // in the fold. // - // Additionally, we only apply this optimization if we don't have an upper bound, - // in the form of max_fold_count_limit, because if we dont have a required upperbound - // can we stop at the lower bound, if we are required to observe whether we hit the - // upperbound, it's no longer safe to stop at the lower bound. + // When we do `iterator.take(*min_fold_count_limit)`, this statement takes at most + // `min_fold_count_limit` elements, however if we have less the iterator can stop + // early. Some(min_fold_count_limit) if no_outputs_in_fold && !has_output_on_fold_count => { iterator.take(*min_fold_count_limit).collect() } From acd55be3c01050b7dd39e89e93e8026e30c85786 Mon Sep 17 00:00:00 2001 From: U9G Date: Fri, 4 Aug 2023 11:21:59 -0400 Subject: [PATCH 06/17] disable optimization when we have a tag on the count of folded elements --- trustfall_core/src/interpreter/execution.rs | 23 ++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/trustfall_core/src/interpreter/execution.rs b/trustfall_core/src/interpreter/execution.rs index 2d8d9f74..afc6f566 100644 --- a/trustfall_core/src/interpreter/execution.rs +++ b/trustfall_core/src/interpreter/execution.rs @@ -364,6 +364,7 @@ fn collect_fold_elements<'query, Vertex: Clone + Debug + 'query>( min_fold_count_limit: &Option, no_outputs_in_fold: bool, has_output_on_fold_count: bool, + has_tag_on_fold_count: bool, ) -> Option>> { // We do not apply the min_fold_count_limit optimization if we have an upper bound, // in the form of max_fold_count_limit, because if we have a required upperbound, @@ -412,7 +413,9 @@ fn collect_fold_elements<'query, Vertex: Clone + Debug + 'query>( // When we do `iterator.take(*min_fold_count_limit)`, this statement takes at most // `min_fold_count_limit` elements, however if we have less the iterator can stop // early. - Some(min_fold_count_limit) if no_outputs_in_fold && !has_output_on_fold_count => { + Some(min_fold_count_limit) + if no_outputs_in_fold && !has_output_on_fold_count && !has_tag_on_fold_count => + { iterator.take(*min_fold_count_limit).collect() } // We weren't able to find any early-termination condition for materializing the fold, @@ -515,6 +518,23 @@ fn compute_fold<'query, AdapterT: Adapter<'query> + 'query>( let no_outputs_in_fold = fold.component.outputs.is_empty(); let has_output_on_fold_count = fold.fold_specific_outputs.values().any(|x| *x == FoldSpecificFieldKind::Count); + let has_tag_on_fold_count = parent_component + .vertices + .iter() + .flat_map(|x| { + x.1.filters.iter().filter(|x| { + let Some(Argument::Tag(FieldRef::FoldSpecificField(tagged_fold_count))) = x.right() + else { + return false; + }; + + tagged_fold_count.fold_root_vid == fold.to_vid + && tagged_fold_count.fold_eid == fold.eid + && tagged_fold_count.kind == FoldSpecificFieldKind::Count + }) + }) + .next() + .is_some(); let moved_fold = fold.clone(); let folded_iterator = edge_iterator.filter_map(move |(mut context, neighbors)| { let imported_tags = context.imported_tags.clone(); @@ -544,6 +564,7 @@ fn compute_fold<'query, AdapterT: Adapter<'query> + 'query>( &min_fold_size, no_outputs_in_fold, has_output_on_fold_count, + has_tag_on_fold_count, )?) } else { None From 22af47db802bbbcf4bea88fe477ace66e2eb5d27 Mon Sep 17 00:00:00 2001 From: U9G Date: Fri, 4 Aug 2023 11:23:37 -0400 Subject: [PATCH 07/17] rewrite comment again --- trustfall_core/src/interpreter/execution.rs | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/trustfall_core/src/interpreter/execution.rs b/trustfall_core/src/interpreter/execution.rs index afc6f566..7e600209 100644 --- a/trustfall_core/src/interpreter/execution.rs +++ b/trustfall_core/src/interpreter/execution.rs @@ -371,14 +371,12 @@ fn collect_fold_elements<'query, Vertex: Clone + Debug + 'query>( // then we can't stop at the lower bound, if we are required to observe whether we hit the // upperbound, it's no longer safe to stop at the lower bound. if let Some(max_fold_count_limit) = max_fold_count_limit { - // If this fold has more than `max_fold_count_limit` elements, - // it will get filtered out by a post-fold filter. - // Pulling elements from `iterator` causes computations and data fetches to happen, - // and as an optimization we'd like to stop pulling elements as soon as possible. - // If we are able to pull more than `max_fold_count_limit + 1` elements, - // we know that this fold is going to get filtered out, so we might as well - // stop materializing its elements early. Limit the max allocation size since - // it might not always be fully used. + // Queries that do not observe the fold count nor any fold contents may be able to + // be optimized by only partially expanding the fold, just enough to check any filters + // that may be applied to the fold count. + // + // For example, if `@filter(op: ">", value: ["$ten"])` is our only filter on the count + // of the fold, we can stop computing the rest of the fold after seeing we have 11 elements. let mut fold_elements = Vec::with_capacity((*max_fold_count_limit).min(16)); let mut stopped_early = false; From 735684a375e1b1eb82a5658c47816f5641f9993a Mon Sep 17 00:00:00 2001 From: U9G Date: Fri, 4 Aug 2023 11:35:17 -0400 Subject: [PATCH 08/17] improve comments --- trustfall_core/src/interpreter/execution.rs | 38 +++++++++------------ 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/trustfall_core/src/interpreter/execution.rs b/trustfall_core/src/interpreter/execution.rs index 7e600209..cce53be5 100644 --- a/trustfall_core/src/interpreter/execution.rs +++ b/trustfall_core/src/interpreter/execution.rs @@ -366,17 +366,18 @@ fn collect_fold_elements<'query, Vertex: Clone + Debug + 'query>( has_output_on_fold_count: bool, has_tag_on_fold_count: bool, ) -> Option>> { - // We do not apply the min_fold_count_limit optimization if we have an upper bound, - // in the form of max_fold_count_limit, because if we have a required upperbound, - // then we can't stop at the lower bound, if we are required to observe whether we hit the - // upperbound, it's no longer safe to stop at the lower bound. + // If we must collect the fold up to our upperbound of `max_fold_count_limit`, + // then we won't use our lowerbound of `min_fold_count_limit`, as by definition + // the upperbound will be larger than the lowerbound. if let Some(max_fold_count_limit) = max_fold_count_limit { - // Queries that do not observe the fold count nor any fold contents may be able to - // be optimized by only partially expanding the fold, just enough to check any filters - // that may be applied to the fold count. - // - // For example, if `@filter(op: ">", value: ["$ten"])` is our only filter on the count - // of the fold, we can stop computing the rest of the fold after seeing we have 11 elements. + // If this fold has more than `max_fold_count_limit` elements, + // it will get filtered out by a post-fold filter. + // Pulling elements from `iterator` causes computations and data fetches to happen, + // and as an optimization we'd like to stop pulling elements as soon as possible. + // If we are able to pull more than `max_fold_count_limit + 1` elements, + // we know that this fold is going to get filtered out, so we might as well + // stop materializing its elements early. Limit the max allocation size since + // it might not always be fully used. let mut fold_elements = Vec::with_capacity((*max_fold_count_limit).min(16)); let mut stopped_early = false; @@ -398,19 +399,12 @@ fn collect_fold_elements<'query, Vertex: Clone + Debug + 'query>( Some(fold_elements) } else { let collected = match min_fold_count_limit { - // In this optimization we do not need to collect the entire fold if we can decide - // that the user never expects to observe the length in the entire fold, therefore - // performing less work. - // - // This optimization requires that the user can never observe that we didn't fully - // compute the entire `@fold`. This is only possible if the fold has no outputs, - // the user does not output the count of elements in the fold, and the user does - // not use any of the filters that require knowing the exact count of elements - // in the fold. + // Queries that do not observe the fold count nor any fold contents may be able to + // be optimized by only partially expanding the fold, just enough to check any filters + // that may be applied to the fold count. // - // When we do `iterator.take(*min_fold_count_limit)`, this statement takes at most - // `min_fold_count_limit` elements, however if we have less the iterator can stop - // early. + // For example, if `@filter(op: ">", value: ["$ten"])` is our only filter on the count + // of the fold, we can stop computing the rest of the fold after seeing we have 11 elements. Some(min_fold_count_limit) if no_outputs_in_fold && !has_output_on_fold_count && !has_tag_on_fold_count => { From 9b7fe6aeb8960840d8cb5f7ef33ad0e46d76750d Mon Sep 17 00:00:00 2001 From: U9G Date: Fri, 4 Aug 2023 23:33:38 -0400 Subject: [PATCH 09/17] use values() over iter() --- trustfall_core/src/interpreter/execution.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/trustfall_core/src/interpreter/execution.rs b/trustfall_core/src/interpreter/execution.rs index cce53be5..301c9fbc 100644 --- a/trustfall_core/src/interpreter/execution.rs +++ b/trustfall_core/src/interpreter/execution.rs @@ -512,9 +512,9 @@ fn compute_fold<'query, AdapterT: Adapter<'query> + 'query>( fold.fold_specific_outputs.values().any(|x| *x == FoldSpecificFieldKind::Count); let has_tag_on_fold_count = parent_component .vertices - .iter() + .values() .flat_map(|x| { - x.1.filters.iter().filter(|x| { + x.filters.iter().filter(|x| { let Some(Argument::Tag(FieldRef::FoldSpecificField(tagged_fold_count))) = x.right() else { return false; From a68060fa82e9319e62725975ea6dc76e49dfab54 Mon Sep 17 00:00:00 2001 From: U9G Date: Fri, 4 Aug 2023 23:36:25 -0400 Subject: [PATCH 10/17] improve variable names --- trustfall_core/src/interpreter/execution.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/trustfall_core/src/interpreter/execution.rs b/trustfall_core/src/interpreter/execution.rs index 301c9fbc..c1a9edec 100644 --- a/trustfall_core/src/interpreter/execution.rs +++ b/trustfall_core/src/interpreter/execution.rs @@ -513,9 +513,10 @@ fn compute_fold<'query, AdapterT: Adapter<'query> + 'query>( let has_tag_on_fold_count = parent_component .vertices .values() - .flat_map(|x| { - x.filters.iter().filter(|x| { - let Some(Argument::Tag(FieldRef::FoldSpecificField(tagged_fold_count))) = x.right() + .flat_map(|vertex| { + vertex.filters.iter().filter(|filter| { + let Some(Argument::Tag(FieldRef::FoldSpecificField(tagged_fold_count))) = + filter.right() else { return false; }; From 78a49bb42504376d8222f16ccd57c703cce54448 Mon Sep 17 00:00:00 2001 From: u9g Date: Fri, 4 Aug 2023 23:38:42 -0400 Subject: [PATCH 11/17] Update trustfall_core/src/interpreter/execution.rs Co-authored-by: Predrag Gruevski <2348618+obi1kenobi@users.noreply.github.com> --- trustfall_core/src/interpreter/execution.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/trustfall_core/src/interpreter/execution.rs b/trustfall_core/src/interpreter/execution.rs index cce53be5..77b41b0b 100644 --- a/trustfall_core/src/interpreter/execution.rs +++ b/trustfall_core/src/interpreter/execution.rs @@ -343,7 +343,7 @@ fn get_min_fold_count_limit(carrier: &mut QueryCarrier, fold: &IRFold) -> Option let variable_value = usize_from_field_value(&query_arguments[&var_ref.variable_name]) .expect("for field value to be coercible to usize"); - Some(variable_value + 1) + Some(variable_value.saturating_add(1)) } _ => None, }; From 9e6545b8a99c0ea6267d31c80c2fa7a5e26b1b8e Mon Sep 17 00:00:00 2001 From: U9G Date: Fri, 4 Aug 2023 23:46:27 -0400 Subject: [PATCH 12/17] use .any --- trustfall_core/src/interpreter/execution.rs | 27 +++++++++------------ 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/trustfall_core/src/interpreter/execution.rs b/trustfall_core/src/interpreter/execution.rs index fa013c59..78ce7287 100644 --- a/trustfall_core/src/interpreter/execution.rs +++ b/trustfall_core/src/interpreter/execution.rs @@ -510,24 +510,19 @@ fn compute_fold<'query, AdapterT: Adapter<'query> + 'query>( let no_outputs_in_fold = fold.component.outputs.is_empty(); let has_output_on_fold_count = fold.fold_specific_outputs.values().any(|x| *x == FoldSpecificFieldKind::Count); - let has_tag_on_fold_count = parent_component - .vertices - .values() - .flat_map(|vertex| { - vertex.filters.iter().filter(|filter| { - let Some(Argument::Tag(FieldRef::FoldSpecificField(tagged_fold_count))) = - filter.right() - else { - return false; - }; + let has_tag_on_fold_count = parent_component.vertices.values().any(|vertex| { + vertex.filters.iter().any(|filter| { + let Some(Argument::Tag(FieldRef::FoldSpecificField(tagged_fold_count))) = + filter.right() + else { + return false; + }; - tagged_fold_count.fold_root_vid == fold.to_vid - && tagged_fold_count.fold_eid == fold.eid - && tagged_fold_count.kind == FoldSpecificFieldKind::Count - }) + tagged_fold_count.fold_root_vid == fold.to_vid + && tagged_fold_count.fold_eid == fold.eid + && tagged_fold_count.kind == FoldSpecificFieldKind::Count }) - .next() - .is_some(); + }); let moved_fold = fold.clone(); let folded_iterator = edge_iterator.filter_map(move |(mut context, neighbors)| { let imported_tags = context.imported_tags.clone(); From 625392dae774eaba20e9560e564f7aae5771b54a Mon Sep 17 00:00:00 2001 From: U9G Date: Fri, 4 Aug 2023 23:52:54 -0400 Subject: [PATCH 13/17] combine booleans to make safe_to_skip_part_of_fold arg --- trustfall_core/src/interpreter/execution.rs | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/trustfall_core/src/interpreter/execution.rs b/trustfall_core/src/interpreter/execution.rs index 78ce7287..d8e5e939 100644 --- a/trustfall_core/src/interpreter/execution.rs +++ b/trustfall_core/src/interpreter/execution.rs @@ -362,9 +362,7 @@ fn collect_fold_elements<'query, Vertex: Clone + Debug + 'query>( mut iterator: ContextIterator<'query, Vertex>, max_fold_count_limit: &Option, min_fold_count_limit: &Option, - no_outputs_in_fold: bool, - has_output_on_fold_count: bool, - has_tag_on_fold_count: bool, + safe_to_skip_part_of_fold: bool, ) -> Option>> { // If we must collect the fold up to our upperbound of `max_fold_count_limit`, // then we won't use our lowerbound of `min_fold_count_limit`, as by definition @@ -405,9 +403,7 @@ fn collect_fold_elements<'query, Vertex: Clone + Debug + 'query>( // // For example, if `@filter(op: ">", value: ["$ten"])` is our only filter on the count // of the fold, we can stop computing the rest of the fold after seeing we have 11 elements. - Some(min_fold_count_limit) - if no_outputs_in_fold && !has_output_on_fold_count && !has_tag_on_fold_count => - { + Some(min_fold_count_limit) if safe_to_skip_part_of_fold => { iterator.take(*min_fold_count_limit).collect() } // We weren't able to find any early-termination condition for materializing the fold, @@ -523,6 +519,8 @@ fn compute_fold<'query, AdapterT: Adapter<'query> + 'query>( && tagged_fold_count.kind == FoldSpecificFieldKind::Count }) }); + let safe_to_skip_part_of_fold = + no_outputs_in_fold && !has_output_on_fold_count && !has_tag_on_fold_count; let moved_fold = fold.clone(); let folded_iterator = edge_iterator.filter_map(move |(mut context, neighbors)| { let imported_tags = context.imported_tags.clone(); @@ -550,9 +548,7 @@ fn compute_fold<'query, AdapterT: Adapter<'query> + 'query>( computed_iterator, &max_fold_size, &min_fold_size, - no_outputs_in_fold, - has_output_on_fold_count, - has_tag_on_fold_count, + safe_to_skip_part_of_fold, )?) } else { None From 3e2caa119320af886ce44f79c568600a5e5874e7 Mon Sep 17 00:00:00 2001 From: U9G Date: Sat, 5 Aug 2023 00:29:38 -0400 Subject: [PATCH 14/17] remove comments that point out something obvious and rely on implem --- trustfall_core/src/interpreter/execution.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/trustfall_core/src/interpreter/execution.rs b/trustfall_core/src/interpreter/execution.rs index d8e5e939..3a167254 100644 --- a/trustfall_core/src/interpreter/execution.rs +++ b/trustfall_core/src/interpreter/execution.rs @@ -271,8 +271,6 @@ fn get_max_fold_count_limit(carrier: &mut QueryCarrier, fold: &IRFold) -> Option let query_arguments = &carrier.query.as_ref().expect("query was not returned").arguments; for post_fold_filter in fold.post_filters.iter() { let next_limit = match post_fold_filter { - // Equals and OneOf must be visited here as they are not visited - // in `get_min_fold_count_limit` Operation::Equals(FoldSpecificFieldKind::Count, Argument::Variable(var_ref)) | Operation::LessThanOrEqual( FoldSpecificFieldKind::Count, @@ -328,8 +326,6 @@ fn get_min_fold_count_limit(carrier: &mut QueryCarrier, fold: &IRFold) -> Option let query_arguments = &carrier.query.as_ref().expect("query was not returned").arguments; for post_fold_filter in fold.post_filters.iter() { let next_limit = match post_fold_filter { - // We do not need to visit Equals and OneOf here, - // since those will be handled by `get_max_fold_count_limit` Operation::GreaterThanOrEqual( FoldSpecificFieldKind::Count, Argument::Variable(var_ref), From 17ff414e92d061790030a98c149bef4dce80888c Mon Sep 17 00:00:00 2001 From: U9G Date: Sat, 5 Aug 2023 00:34:04 -0400 Subject: [PATCH 15/17] add comment and return None if we try getting the lowerbound of a filter but can't --- trustfall_core/src/interpreter/execution.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/trustfall_core/src/interpreter/execution.rs b/trustfall_core/src/interpreter/execution.rs index 3a167254..b20998b3 100644 --- a/trustfall_core/src/interpreter/execution.rs +++ b/trustfall_core/src/interpreter/execution.rs @@ -341,7 +341,11 @@ fn get_min_fold_count_limit(carrier: &mut QueryCarrier, fold: &IRFold) -> Option .expect("for field value to be coercible to usize"); Some(variable_value.saturating_add(1)) } - _ => None, + // This optimization is only valid if we know that every + // filter applied to the folded element count is a comparison + // that we can determine the number of elements needed to satisfy + // the filter. + _ => return None, }; match (result, next_limit) { From 8814461630408c273a6e1533f16f36ea33bfa1bf Mon Sep 17 00:00:00 2001 From: U9G Date: Sat, 5 Aug 2023 11:37:17 -0400 Subject: [PATCH 16/17] get rid of safe_to_skip_part_of_fold and move around comments --- trustfall_core/src/interpreter/execution.rs | 72 +++++++++++---------- 1 file changed, 38 insertions(+), 34 deletions(-) diff --git a/trustfall_core/src/interpreter/execution.rs b/trustfall_core/src/interpreter/execution.rs index b20998b3..43ab8c15 100644 --- a/trustfall_core/src/interpreter/execution.rs +++ b/trustfall_core/src/interpreter/execution.rs @@ -362,7 +362,6 @@ fn collect_fold_elements<'query, Vertex: Clone + Debug + 'query>( mut iterator: ContextIterator<'query, Vertex>, max_fold_count_limit: &Option, min_fold_count_limit: &Option, - safe_to_skip_part_of_fold: bool, ) -> Option>> { // If we must collect the fold up to our upperbound of `max_fold_count_limit`, // then we won't use our lowerbound of `min_fold_count_limit`, as by definition @@ -397,15 +396,9 @@ fn collect_fold_elements<'query, Vertex: Clone + Debug + 'query>( Some(fold_elements) } else { let collected = match min_fold_count_limit { - // Queries that do not observe the fold count nor any fold contents may be able to - // be optimized by only partially expanding the fold, just enough to check any filters - // that may be applied to the fold count. - // - // For example, if `@filter(op: ">", value: ["$ten"])` is our only filter on the count - // of the fold, we can stop computing the rest of the fold after seeing we have 11 elements. - Some(min_fold_count_limit) if safe_to_skip_part_of_fold => { - iterator.take(*min_fold_count_limit).collect() - } + // Some queries may be able to be optimized by only partially expanding the fold, + // just enough to check any filters that may be applied to the fold count. + Some(min_fold_count_limit) => iterator.take(*min_fold_count_limit).collect(), // We weren't able to find any early-termination condition for materializing the fold, // so materialize the whole thing and return it. _ => iterator.collect(), @@ -502,25 +495,41 @@ fn compute_fold<'query, AdapterT: Adapter<'query> + 'query>( let fold_component = fold.component.clone(); let fold_eid = fold.eid; let max_fold_size = get_max_fold_count_limit(carrier, fold.as_ref()); - let min_fold_size = get_min_fold_count_limit(carrier, fold.as_ref()); - let no_outputs_in_fold = fold.component.outputs.is_empty(); - let has_output_on_fold_count = - fold.fold_specific_outputs.values().any(|x| *x == FoldSpecificFieldKind::Count); - let has_tag_on_fold_count = parent_component.vertices.values().any(|vertex| { - vertex.filters.iter().any(|filter| { - let Some(Argument::Tag(FieldRef::FoldSpecificField(tagged_fold_count))) = - filter.right() - else { - return false; - }; - tagged_fold_count.fold_root_vid == fold.to_vid - && tagged_fold_count.fold_eid == fold.eid - && tagged_fold_count.kind == FoldSpecificFieldKind::Count - }) - }); - let safe_to_skip_part_of_fold = - no_outputs_in_fold && !has_output_on_fold_count && !has_tag_on_fold_count; + // Queries that do not observe the fold count nor any fold contents may be able to + // be optimized by only partially expanding the fold, just enough to check any filters + // that may be applied to the fold count. + // + // For example, if `@filter(op: ">", value: ["$ten"])` is our only filter on the count + // of the fold, we can stop computing the rest of the fold after seeing we have 11 elements. + let min_fold_size = + if let Some(min_fold_size) = get_min_fold_count_limit(carrier, fold.as_ref()) { + let no_outputs_in_fold = fold.component.outputs.is_empty(); + let has_output_on_fold_count = + fold.fold_specific_outputs.values().any(|x| *x == FoldSpecificFieldKind::Count); + let has_tag_on_fold_count = parent_component.vertices.values().any(|vertex| { + vertex.filters.iter().any(|filter| { + let Some(Argument::Tag(FieldRef::FoldSpecificField(tagged_fold_count))) = + filter.right() + else { + return false; + }; + + tagged_fold_count.fold_root_vid == fold.to_vid + && tagged_fold_count.fold_eid == fold.eid + && tagged_fold_count.kind == FoldSpecificFieldKind::Count + }) + }); + + if no_outputs_in_fold && !has_output_on_fold_count && !has_tag_on_fold_count { + Some(min_fold_size) + } else { + None + } + } else { + None + }; + let moved_fold = fold.clone(); let folded_iterator = edge_iterator.filter_map(move |(mut context, neighbors)| { let imported_tags = context.imported_tags.clone(); @@ -544,12 +553,7 @@ fn compute_fold<'query, AdapterT: Adapter<'query> + 'query>( let fold_elements = if fold_exists { // N.B.: Note the `?` at the end here! // This lets us early-discard folds that failed a post-processing filter. - Some(collect_fold_elements( - computed_iterator, - &max_fold_size, - &min_fold_size, - safe_to_skip_part_of_fold, - )?) + Some(collect_fold_elements(computed_iterator, &max_fold_size, &min_fold_size)?) } else { None }; From 7e6354ace0d1a4a932857521bfd2626d728034f9 Mon Sep 17 00:00:00 2001 From: U9G Date: Sat, 5 Aug 2023 11:54:56 -0400 Subject: [PATCH 17/17] improve comment for why we return None --- trustfall_core/src/interpreter/execution.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/trustfall_core/src/interpreter/execution.rs b/trustfall_core/src/interpreter/execution.rs index 43ab8c15..f3492732 100644 --- a/trustfall_core/src/interpreter/execution.rs +++ b/trustfall_core/src/interpreter/execution.rs @@ -341,10 +341,8 @@ fn get_min_fold_count_limit(carrier: &mut QueryCarrier, fold: &IRFold) -> Option .expect("for field value to be coercible to usize"); Some(variable_value.saturating_add(1)) } - // This optimization is only valid if we know that every - // filter applied to the folded element count is a comparison - // that we can determine the number of elements needed to satisfy - // the filter. + // If we don't know how many elements of the fold are required + // to satisfy this filter, fail early. _ => return None, };