Skip to content

Commit

Permalink
Cleanup: Remove extraneous_linkable_specs (#1530)
Browse files Browse the repository at this point in the history
The difference between `queried_linkable_specs`,
`required_linkable_specs`, and `extraneous_linkable_specs` has often
been overlooked and therefore been the source of many bugs. In [this
PR](#1506), we removed the
unneeded `FilterElementsNodes` that were the main reason we needed those
different spec variables. So now we can remove the
`extraneous_linkable_specs` variables altogether, and hopefully reduce
confusion here.
There is also some related cleanup in here. It's not a huge PR but I
still recommend reviewing by commit.
  • Loading branch information
courtneyholcomb authored Nov 14, 2024
1 parent 0be9c0a commit df48dd8
Show file tree
Hide file tree
Showing 16 changed files with 63 additions and 57 deletions.
48 changes: 27 additions & 21 deletions metricflow/dataflow/builder/dataflow_plan_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ def _build_aggregated_conversion_node(
)

# Build measure recipes
base_required_linkable_specs, extraneous_linkable_specs = self.__get_required_and_extraneous_linkable_specs(
base_required_linkable_specs = self.__get_required_linkable_specs(
queried_linkable_specs=queried_linkable_specs,
filter_specs=base_measure_spec.filter_spec_set.all_filter_specs,
)
Expand Down Expand Up @@ -578,7 +578,7 @@ def _build_derived_metric_output_node(
)
)

required_linkable_specs, extraneous_linkable_specs = self.__get_required_and_extraneous_linkable_specs(
required_linkable_specs = self.__get_required_linkable_specs(
queried_linkable_specs=queried_linkable_specs, filter_specs=metric_spec.filter_spec_set.all_filter_specs
)

Expand Down Expand Up @@ -665,13 +665,18 @@ def _build_derived_metric_output_node(
output_node = WhereConstraintNode.create(
parent_node=output_node, where_specs=metric_spec.filter_spec_set.all_filter_specs
)
if not extraneous_linkable_specs.is_subset_of(queried_linkable_specs):
output_node = FilterElementsNode.create(
parent_node=output_node,
include_specs=InstanceSpecSet(metric_specs=(metric_spec,)).merge(
InstanceSpecSet.create_from_specs(queried_linkable_specs.as_tuple)
),
specs_in_filters = set(
linkable_spec
for filter_spec in metric_spec.filter_spec_set.all_filter_specs
for linkable_spec in filter_spec.linkable_specs
)
if not specs_in_filters.issubset(queried_linkable_specs.as_tuple):
output_node = FilterElementsNode.create(
parent_node=output_node,
include_specs=InstanceSpecSet(metric_specs=(metric_spec,)).merge(
InstanceSpecSet.create_from_specs(queried_linkable_specs.as_tuple)
),
)
return output_node

def _build_any_metric_output_node(self, parameter_set: BuildAnyMetricOutputNodeParameterSet) -> DataflowPlanNode:
Expand Down Expand Up @@ -806,7 +811,7 @@ def _build_plan_for_distinct_values(
filter_intersection=query_spec.filter_intersection,
)

required_linkable_specs, _ = self.__get_required_and_extraneous_linkable_specs(
required_linkable_specs = self.__get_required_linkable_specs(
queried_linkable_specs=query_spec.linkable_specs, filter_specs=query_level_filter_specs
)
predicate_pushdown_state = PredicatePushdownState(
Expand Down Expand Up @@ -1446,16 +1451,16 @@ def build_aggregated_measure(
measure_recipe=measure_recipe,
)

def __get_required_and_extraneous_linkable_specs(
def __get_required_linkable_specs(
self,
queried_linkable_specs: LinkableSpecSet,
filter_specs: Sequence[WhereFilterSpec],
measure_spec_properties: Optional[MeasureSpecProperties] = None,
) -> Tuple[LinkableSpecSet, LinkableSpecSet]:
"""Get the required and extraneous linkable specs for this query.
) -> LinkableSpecSet:
"""Get all required linkable specs for this query, including extraneous linkable specs.
Extraneous linkable specs are specs that are used in this phase that should not show up in the final result
unless it was already a requested spec in the query (e.g., linkable spec used in where constraint)
unless it was already a requested spec in the query, e.g., a linkable spec used in where constraint is extraneous.
"""
linkable_spec_sets_to_merge: List[LinkableSpecSet] = []
for filter_spec in filter_specs:
Expand All @@ -1477,12 +1482,14 @@ def __get_required_and_extraneous_linkable_specs(
required_linkable_specs = queried_linkable_specs.merge(extraneous_linkable_specs).dedupe()

# Custom grains require joining to their base grain, so add base grain to extraneous specs.
base_grain_set = LinkableSpecSet.create_from_specs(
[spec.with_base_grain() for spec in required_linkable_specs.time_dimension_specs_with_custom_grain]
)
extraneous_linkable_specs = extraneous_linkable_specs.merge(base_grain_set).dedupe()
if required_linkable_specs.time_dimension_specs_with_custom_grain:
base_grain_set = LinkableSpecSet.create_from_specs(
[spec.with_base_grain() for spec in required_linkable_specs.time_dimension_specs_with_custom_grain]
)
extraneous_linkable_specs = extraneous_linkable_specs.merge(base_grain_set).dedupe()
required_linkable_specs = required_linkable_specs.merge(extraneous_linkable_specs).dedupe()

return required_linkable_specs, extraneous_linkable_specs
return required_linkable_specs

def _build_aggregated_measure_from_measure_source_node(
self,
Expand Down Expand Up @@ -1529,7 +1536,7 @@ def _build_aggregated_measure_from_measure_source_node(
LazyFormat(lambda: f"Adjusted time range constraint to: {cumulative_metric_adjusted_time_constraint}")
)

required_linkable_specs, extraneous_linkable_specs = self.__get_required_and_extraneous_linkable_specs(
required_linkable_specs = self.__get_required_linkable_specs(
queried_linkable_specs=queried_linkable_specs,
filter_specs=metric_input_measure_spec.filter_spec_set.all_filter_specs,
measure_spec_properties=measure_properties,
Expand Down Expand Up @@ -1756,15 +1763,14 @@ def _build_pre_aggregation_plan(
if measure_properties and measure_properties.non_additive_dimension_spec:
if queried_linkable_specs is None:
raise ValueError(
"`queried_linkable_specs` must be provided in _build_pre_aggregation_plan() if "
"`queried_linkable_specs` must be provided in when building pre-aggregation plan if "
"`non_additive_dimension_spec` is present."
)
output_node = self._build_semi_additive_join_node(
measure_properties=measure_properties,
queried_linkable_specs=queried_linkable_specs,
parent_node=output_node,
)

output_node = FilterElementsNode.create(
parent_node=output_node, include_specs=filter_to_specs, distinct=distinct
)
Expand Down
2 changes: 1 addition & 1 deletion metricflow/dataflow/builder/node_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def join_description(self) -> JoinDescription:
)

filtered_node_to_join = FilterElementsNode.create(
parent_node=self.node_to_join, include_specs=group_specs_by_type(include_specs)
parent_node=self.node_to_join, include_specs=group_specs_by_type(include_specs).dedupe()
)

return JoinDescription(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,7 @@ FROM (
) subq_0
) subq_1
LEFT OUTER JOIN (
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing', 'listing']
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing']
SELECT
subq_3.listing
, subq_3.is_lux_latest
Expand Down Expand Up @@ -841,7 +841,7 @@ FULL OUTER JOIN (
) subq_10
) subq_11
LEFT OUTER JOIN (
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing', 'listing']
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing']
SELECT
subq_13.listing
, subq_13.is_lux_latest
Expand Down Expand Up @@ -1419,7 +1419,7 @@ FULL OUTER JOIN (
) subq_20
) subq_21
LEFT OUTER JOIN (
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing', 'listing']
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing']
SELECT
subq_23.listing
, subq_23.is_lux_latest
Expand Down Expand Up @@ -1831,7 +1831,7 @@ FULL OUTER JOIN (
) subq_30
) subq_31
LEFT OUTER JOIN (
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing', 'listing']
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing']
SELECT
subq_33.listing
, subq_33.is_lux_latest
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ FROM (
) subq_0
) subq_1
LEFT OUTER JOIN (
-- Pass Only Elements: ['country_latest', 'is_lux_latest', 'listing', 'listing']
-- Pass Only Elements: ['country_latest', 'is_lux_latest', 'listing']
SELECT
subq_3.listing
, subq_3.country_latest
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,7 @@ FROM (
) subq_0
) subq_1
LEFT OUTER JOIN (
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing', 'listing']
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing']
SELECT
subq_3.listing
, subq_3.is_lux_latest
Expand Down Expand Up @@ -841,7 +841,7 @@ FULL OUTER JOIN (
) subq_10
) subq_11
LEFT OUTER JOIN (
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing', 'listing']
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing']
SELECT
subq_13.listing
, subq_13.is_lux_latest
Expand Down Expand Up @@ -1419,7 +1419,7 @@ FULL OUTER JOIN (
) subq_20
) subq_21
LEFT OUTER JOIN (
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing', 'listing']
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing']
SELECT
subq_23.listing
, subq_23.is_lux_latest
Expand Down Expand Up @@ -1831,7 +1831,7 @@ FULL OUTER JOIN (
) subq_30
) subq_31
LEFT OUTER JOIN (
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing', 'listing']
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing']
SELECT
subq_33.listing
, subq_33.is_lux_latest
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ FROM (
) subq_0
) subq_1
LEFT OUTER JOIN (
-- Pass Only Elements: ['country_latest', 'is_lux_latest', 'listing', 'listing']
-- Pass Only Elements: ['country_latest', 'is_lux_latest', 'listing']
SELECT
subq_3.listing
, subq_3.country_latest
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,7 @@ FROM (
) subq_0
) subq_1
LEFT OUTER JOIN (
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing', 'listing']
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing']
SELECT
subq_3.listing
, subq_3.is_lux_latest
Expand Down Expand Up @@ -841,7 +841,7 @@ FULL OUTER JOIN (
) subq_10
) subq_11
LEFT OUTER JOIN (
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing', 'listing']
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing']
SELECT
subq_13.listing
, subq_13.is_lux_latest
Expand Down Expand Up @@ -1419,7 +1419,7 @@ FULL OUTER JOIN (
) subq_20
) subq_21
LEFT OUTER JOIN (
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing', 'listing']
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing']
SELECT
subq_23.listing
, subq_23.is_lux_latest
Expand Down Expand Up @@ -1831,7 +1831,7 @@ FULL OUTER JOIN (
) subq_30
) subq_31
LEFT OUTER JOIN (
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing', 'listing']
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing']
SELECT
subq_33.listing
, subq_33.is_lux_latest
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ FROM (
) subq_0
) subq_1
LEFT OUTER JOIN (
-- Pass Only Elements: ['country_latest', 'is_lux_latest', 'listing', 'listing']
-- Pass Only Elements: ['country_latest', 'is_lux_latest', 'listing']
SELECT
subq_3.listing
, subq_3.country_latest
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,7 @@ FROM (
) subq_0
) subq_1
LEFT OUTER JOIN (
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing', 'listing']
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing']
SELECT
subq_3.listing
, subq_3.is_lux_latest
Expand Down Expand Up @@ -841,7 +841,7 @@ FULL OUTER JOIN (
) subq_10
) subq_11
LEFT OUTER JOIN (
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing', 'listing']
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing']
SELECT
subq_13.listing
, subq_13.is_lux_latest
Expand Down Expand Up @@ -1419,7 +1419,7 @@ FULL OUTER JOIN (
) subq_20
) subq_21
LEFT OUTER JOIN (
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing', 'listing']
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing']
SELECT
subq_23.listing
, subq_23.is_lux_latest
Expand Down Expand Up @@ -1831,7 +1831,7 @@ FULL OUTER JOIN (
) subq_30
) subq_31
LEFT OUTER JOIN (
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing', 'listing']
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing']
SELECT
subq_33.listing
, subq_33.is_lux_latest
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ FROM (
) subq_0
) subq_1
LEFT OUTER JOIN (
-- Pass Only Elements: ['country_latest', 'is_lux_latest', 'listing', 'listing']
-- Pass Only Elements: ['country_latest', 'is_lux_latest', 'listing']
SELECT
subq_3.listing
, subq_3.country_latest
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,7 @@ FROM (
) subq_0
) subq_1
LEFT OUTER JOIN (
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing', 'listing']
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing']
SELECT
subq_3.listing
, subq_3.is_lux_latest
Expand Down Expand Up @@ -841,7 +841,7 @@ FULL OUTER JOIN (
) subq_10
) subq_11
LEFT OUTER JOIN (
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing', 'listing']
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing']
SELECT
subq_13.listing
, subq_13.is_lux_latest
Expand Down Expand Up @@ -1419,7 +1419,7 @@ FULL OUTER JOIN (
) subq_20
) subq_21
LEFT OUTER JOIN (
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing', 'listing']
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing']
SELECT
subq_23.listing
, subq_23.is_lux_latest
Expand Down Expand Up @@ -1831,7 +1831,7 @@ FULL OUTER JOIN (
) subq_30
) subq_31
LEFT OUTER JOIN (
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing', 'listing']
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing']
SELECT
subq_33.listing
, subq_33.is_lux_latest
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ FROM (
) subq_0
) subq_1
LEFT OUTER JOIN (
-- Pass Only Elements: ['country_latest', 'is_lux_latest', 'listing', 'listing']
-- Pass Only Elements: ['country_latest', 'is_lux_latest', 'listing']
SELECT
subq_3.listing
, subq_3.country_latest
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,7 @@ FROM (
) subq_0
) subq_1
LEFT OUTER JOIN (
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing', 'listing']
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing']
SELECT
subq_3.listing
, subq_3.is_lux_latest
Expand Down Expand Up @@ -841,7 +841,7 @@ FULL OUTER JOIN (
) subq_10
) subq_11
LEFT OUTER JOIN (
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing', 'listing']
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing']
SELECT
subq_13.listing
, subq_13.is_lux_latest
Expand Down Expand Up @@ -1419,7 +1419,7 @@ FULL OUTER JOIN (
) subq_20
) subq_21
LEFT OUTER JOIN (
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing', 'listing']
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing']
SELECT
subq_23.listing
, subq_23.is_lux_latest
Expand Down Expand Up @@ -1831,7 +1831,7 @@ FULL OUTER JOIN (
) subq_30
) subq_31
LEFT OUTER JOIN (
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing', 'listing']
-- Pass Only Elements: ['capacity_latest', 'is_lux_latest', 'listing']
SELECT
subq_33.listing
, subq_33.is_lux_latest
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ FROM (
) subq_0
) subq_1
LEFT OUTER JOIN (
-- Pass Only Elements: ['country_latest', 'is_lux_latest', 'listing', 'listing']
-- Pass Only Elements: ['country_latest', 'is_lux_latest', 'listing']
SELECT
subq_3.listing
, subq_3.country_latest
Expand Down
Loading

0 comments on commit df48dd8

Please sign in to comment.