Skip to content

Commit

Permalink
[opt](nereids) refine left semi/anti shortcut cost
Browse files Browse the repository at this point in the history
  • Loading branch information
zhongjian.xzj committed Jul 18, 2024
1 parent 70b8d19 commit ff8e61f
Show file tree
Hide file tree
Showing 17 changed files with 213 additions and 216 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -414,15 +414,14 @@ public Cost visitPhysicalHashJoin(
}
}
return CostV1.of(context.getSessionVariable(),
leftRowCount * probeShortcutFactor
+ rightRowCount * buildSideFactor
leftRowCount + rightRowCount * buildSideFactor * probeShortcutFactor
+ outputRowCount * probeSideFactor,
rightRowCount,
0
);
}
return CostV1.of(context.getSessionVariable(),
leftRowCount * probeShortcutFactor + rightRowCount + outputRowCount,
leftRowCount + rightRowCount * probeShortcutFactor + outputRowCount,
rightRowCount, 0
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1299,7 +1299,7 @@ public void setEnableLeftZigZag(boolean enableLeftZigZag) {
private double broadcastRightTableScaleFactor = 0.0;

@VariableMgr.VarAttr(name = LEFT_SEMI_OR_ANTI_PROBE_FACTOR)
private double leftSemiOrAntiProbeFactor = 0.1;
private double leftSemiOrAntiProbeFactor = 0.2;

@VariableMgr.VarAttr(name = BROADCAST_ROW_COUNT_LIMIT, needForward = true)
private double broadcastRowCountLimit = 30000000;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,27 @@ PhysicalResultSink
----------hashAgg[GLOBAL]
------------hashAgg[LOCAL]
--------------PhysicalProject
----------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=(( not (cs_warehouse_sk = cs_warehouse_sk))) build RFs:RF3 cs_order_number->[cs_order_number]
----------------hashJoin[RIGHT_SEMI_JOIN shuffleBucket] hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=(( not (cs_warehouse_sk = cs_warehouse_sk))) build RFs:RF4 cs_order_number->[cs_order_number]
------------------PhysicalProject
--------------------PhysicalOlapScan[catalog_sales] apply RFs: RF3
------------------PhysicalProject
--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((cs1.cs_call_center_sk = call_center.cc_call_center_sk)) otherCondition=() build RFs:RF2 cc_call_center_sk->[cs_call_center_sk]
----------------------PhysicalProject
------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((cs1.cs_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_ship_date_sk]
--------------------------PhysicalProject
----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((cs1.cs_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF0 ca_address_sk->[cs_ship_addr_sk]
------------------------------hashJoin[LEFT_ANTI_JOIN shuffle] hashCondition=((cs1.cs_order_number = cr1.cr_order_number)) otherCondition=()
--------------------PhysicalOlapScan[catalog_sales] apply RFs: RF4
------------------hashJoin[RIGHT_ANTI_JOIN shuffle] hashCondition=((cs1.cs_order_number = cr1.cr_order_number)) otherCondition=() build RFs:RF3 cs_order_number->[cr_order_number]
--------------------PhysicalProject
----------------------PhysicalOlapScan[catalog_returns] apply RFs: RF3
--------------------PhysicalProject
----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((cs1.cs_call_center_sk = call_center.cc_call_center_sk)) otherCondition=() build RFs:RF2 cc_call_center_sk->[cs_call_center_sk]
------------------------PhysicalProject
--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((cs1.cs_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_ship_date_sk]
----------------------------PhysicalProject
------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((cs1.cs_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF0 ca_address_sk->[cs_ship_addr_sk]
--------------------------------PhysicalProject
----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF2
--------------------------------PhysicalProject
----------------------------------PhysicalOlapScan[catalog_returns]
------------------------------PhysicalProject
--------------------------------filter((customer_address.ca_state = 'PA'))
----------------------------------PhysicalOlapScan[customer_address]
--------------------------PhysicalProject
----------------------------filter((date_dim.d_date <= '2002-05-31') and (date_dim.d_date >= '2002-04-01'))
------------------------------PhysicalOlapScan[date_dim]
----------------------PhysicalProject
------------------------filter((call_center.cc_county = 'Williamson County'))
--------------------------PhysicalOlapScan[call_center]
----------------------------------filter((customer_address.ca_state = 'PA'))
------------------------------------PhysicalOlapScan[customer_address]
----------------------------PhysicalProject
------------------------------filter((date_dim.d_date <= '2002-05-31') and (date_dim.d_date >= '2002-04-01'))
--------------------------------PhysicalOlapScan[date_dim]
------------------------PhysicalProject
--------------------------filter((call_center.cc_county = 'Williamson County'))
----------------------------PhysicalOlapScan[call_center]

Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------------hashAgg[LOCAL]
--------------PhysicalUnion
----------------PhysicalProject
------------------hashJoin[RIGHT_SEMI_JOIN shuffle] hashCondition=((catalog_sales.cs_item_sk = frequent_ss_items.item_sk)) otherCondition=() build RFs:RF4 cs_item_sk->[item_sk]
--------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF4
------------------hashJoin[LEFT_SEMI_JOIN shuffle] hashCondition=((catalog_sales.cs_item_sk = frequent_ss_items.item_sk)) otherCondition=()
--------------------PhysicalProject
----------------------hashJoin[LEFT_SEMI_JOIN shuffle] hashCondition=((catalog_sales.cs_bill_customer_sk = best_ss_customer.c_customer_sk)) otherCondition=()
------------------------PhysicalProject
Expand All @@ -64,17 +63,18 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------------------------------filter((date_dim.d_moy = 7) and (date_dim.d_year = 2000))
--------------------------------PhysicalOlapScan[date_dim]
------------------------PhysicalCteConsumer ( cteId=CTEId#2 )
--------------------PhysicalCteConsumer ( cteId=CTEId#0 )
----------------PhysicalProject
------------------hashJoin[RIGHT_SEMI_JOIN shuffle] hashCondition=((web_sales.ws_item_sk = frequent_ss_items.item_sk)) otherCondition=() build RFs:RF6 ws_item_sk->[item_sk]
--------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF6
------------------hashJoin[LEFT_SEMI_JOIN shuffle] hashCondition=((web_sales.ws_item_sk = frequent_ss_items.item_sk)) otherCondition=()
--------------------PhysicalProject
----------------------hashJoin[LEFT_SEMI_JOIN shuffle] hashCondition=((web_sales.ws_bill_customer_sk = best_ss_customer.c_customer_sk)) otherCondition=()
------------------------PhysicalProject
--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[ws_sold_date_sk]
--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ws_sold_date_sk]
----------------------------PhysicalProject
------------------------------PhysicalOlapScan[web_sales] apply RFs: RF5
------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4
----------------------------PhysicalProject
------------------------------filter((date_dim.d_moy = 7) and (date_dim.d_year = 2000))
--------------------------------PhysicalOlapScan[date_dim]
------------------------PhysicalCteConsumer ( cteId=CTEId#2 )
--------------------PhysicalCteConsumer ( cteId=CTEId#0 )

Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,17 @@ PhysicalResultSink
----PhysicalDistribute[DistributionSpecGather]
------PhysicalTopN[LOCAL_SORT]
--------PhysicalProject
----------hashJoin[INNER_JOIN colocated] hashCondition=((ss_items.item_id = ws_items.item_id)) otherCondition=((cast(cs_item_rev as DOUBLE) <= cast((1.1 * ws_item_rev) as DOUBLE)) and (cast(cs_item_rev as DOUBLE) >= cast((0.9 * ws_item_rev) as DOUBLE)) and (cast(ss_item_rev as DOUBLE) <= cast((1.1 * ws_item_rev) as DOUBLE)) and (cast(ss_item_rev as DOUBLE) >= cast((0.9 * ws_item_rev) as DOUBLE)) and (cast(ws_item_rev as DOUBLE) <= cast((1.1 * cs_item_rev) as DOUBLE)) and (cast(ws_item_rev as DOUBLE) <= cast((1.1 * ss_item_rev) as DOUBLE)) and (cast(ws_item_rev as DOUBLE) >= cast((0.9 * cs_item_rev) as DOUBLE)) and (cast(ws_item_rev as DOUBLE) >= cast((0.9 * ss_item_rev) as DOUBLE))) build RFs:RF13 item_id->[i_item_id]
----------hashJoin[INNER_JOIN colocated] hashCondition=((ss_items.item_id = cs_items.item_id)) otherCondition=((cast(cs_item_rev as DOUBLE) <= cast((1.1 * ss_item_rev) as DOUBLE)) and (cast(cs_item_rev as DOUBLE) <= cast((1.1 * ws_item_rev) as DOUBLE)) and (cast(cs_item_rev as DOUBLE) >= cast((0.9 * ss_item_rev) as DOUBLE)) and (cast(cs_item_rev as DOUBLE) >= cast((0.9 * ws_item_rev) as DOUBLE)) and (cast(ss_item_rev as DOUBLE) <= cast((1.1 * cs_item_rev) as DOUBLE)) and (cast(ss_item_rev as DOUBLE) >= cast((0.9 * cs_item_rev) as DOUBLE)) and (cast(ws_item_rev as DOUBLE) <= cast((1.1 * cs_item_rev) as DOUBLE)) and (cast(ws_item_rev as DOUBLE) >= cast((0.9 * cs_item_rev) as DOUBLE))) build RFs:RF13 item_id->[i_item_id]
------------PhysicalProject
--------------hashAgg[GLOBAL]
----------------PhysicalDistribute[DistributionSpecHash]
------------------hashAgg[LOCAL]
--------------------PhysicalProject
----------------------hashJoin[INNER_JOIN shuffle] hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF12 i_item_sk->[ws_item_sk]
----------------------hashJoin[INNER_JOIN shuffle] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF12 i_item_sk->[cs_item_sk]
------------------------PhysicalProject
--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF11 d_date_sk->[ws_sold_date_sk]
--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF11 d_date_sk->[cs_sold_date_sk]
----------------------------PhysicalProject
------------------------------PhysicalOlapScan[web_sales] apply RFs: RF11 RF12
------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF11 RF12
----------------------------PhysicalProject
------------------------------hashJoin[LEFT_SEMI_JOIN broadcast] hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=() build RFs:RF10 d_date->[d_date]
--------------------------------PhysicalProject
Expand All @@ -32,7 +32,7 @@ PhysicalResultSink
------------------------PhysicalProject
--------------------------PhysicalOlapScan[item] apply RFs: RF13
------------PhysicalProject
--------------hashJoin[INNER_JOIN colocated] hashCondition=((ss_items.item_id = cs_items.item_id)) otherCondition=((cast(cs_item_rev as DOUBLE) <= cast((1.1 * ss_item_rev) as DOUBLE)) and (cast(cs_item_rev as DOUBLE) >= cast((0.9 * ss_item_rev) as DOUBLE)) and (cast(ss_item_rev as DOUBLE) <= cast((1.1 * cs_item_rev) as DOUBLE)) and (cast(ss_item_rev as DOUBLE) >= cast((0.9 * cs_item_rev) as DOUBLE))) build RFs:RF8 item_id->[i_item_id]
--------------hashJoin[INNER_JOIN colocated] hashCondition=((ss_items.item_id = ws_items.item_id)) otherCondition=((cast(ss_item_rev as DOUBLE) <= cast((1.1 * ws_item_rev) as DOUBLE)) and (cast(ss_item_rev as DOUBLE) >= cast((0.9 * ws_item_rev) as DOUBLE)) and (cast(ws_item_rev as DOUBLE) <= cast((1.1 * ss_item_rev) as DOUBLE)) and (cast(ws_item_rev as DOUBLE) >= cast((0.9 * ss_item_rev) as DOUBLE))) build RFs:RF8 item_id->[i_item_id]
----------------PhysicalProject
------------------hashAgg[GLOBAL]
--------------------PhysicalDistribute[DistributionSpecHash]
Expand Down Expand Up @@ -63,11 +63,11 @@ PhysicalResultSink
--------------------PhysicalDistribute[DistributionSpecHash]
----------------------hashAgg[LOCAL]
------------------------PhysicalProject
--------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF3 i_item_sk->[cs_item_sk]
--------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF3 i_item_sk->[ws_item_sk]
----------------------------PhysicalProject
------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk]
------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ws_sold_date_sk]
--------------------------------PhysicalProject
----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3
----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF2 RF3
--------------------------------PhysicalProject
----------------------------------hashJoin[LEFT_SEMI_JOIN broadcast] hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=() build RFs:RF1 d_date->[d_date]
------------------------------------PhysicalProject
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,44 +14,44 @@ PhysicalResultSink
----------------------PhysicalDistribute[DistributionSpecHash]
------------------------hashAgg[LOCAL]
--------------------------PhysicalProject
----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ss_sold_date_sk]
----------------------------hashJoin[LEFT_ANTI_JOIN colocated] hashCondition=((store_returns.sr_ticket_number = store_sales.ss_ticket_number) and (store_sales.ss_item_sk = store_returns.sr_item_sk)) otherCondition=()
------------------------------PhysicalProject
--------------------------------hashJoin[LEFT_ANTI_JOIN colocated] hashCondition=((store_returns.sr_ticket_number = store_sales.ss_ticket_number) and (store_sales.ss_item_sk = store_returns.sr_item_sk)) otherCondition=()
--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ss_sold_date_sk]
----------------------------------PhysicalProject
------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF2
----------------------------------PhysicalProject
------------------------------------PhysicalOlapScan[store_returns]
------------------------------------filter((date_dim.d_year = 1998))
--------------------------------------PhysicalOlapScan[date_dim]
------------------------------PhysicalProject
--------------------------------filter((date_dim.d_year = 1998))
----------------------------------PhysicalOlapScan[date_dim]
--------------------------------PhysicalOlapScan[store_returns]
------------------PhysicalProject
--------------------hashAgg[GLOBAL]
----------------------PhysicalDistribute[DistributionSpecHash]
------------------------hashAgg[LOCAL]
--------------------------PhysicalProject
----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ws_sold_date_sk]
----------------------------hashJoin[LEFT_ANTI_JOIN colocated] hashCondition=((web_returns.wr_order_number = web_sales.ws_order_number) and (web_sales.ws_item_sk = web_returns.wr_item_sk)) otherCondition=()
------------------------------PhysicalProject
--------------------------------hashJoin[LEFT_ANTI_JOIN colocated] hashCondition=((web_returns.wr_order_number = web_sales.ws_order_number) and (web_sales.ws_item_sk = web_returns.wr_item_sk)) otherCondition=()
--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ws_sold_date_sk]
----------------------------------PhysicalProject
------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF1
----------------------------------PhysicalProject
------------------------------------PhysicalOlapScan[web_returns]
------------------------------------filter((date_dim.d_year = 1998))
--------------------------------------PhysicalOlapScan[date_dim]
------------------------------PhysicalProject
--------------------------------filter((date_dim.d_year = 1998))
----------------------------------PhysicalOlapScan[date_dim]
--------------------------------PhysicalOlapScan[web_returns]
--------------PhysicalProject
----------------hashAgg[GLOBAL]
------------------PhysicalDistribute[DistributionSpecHash]
--------------------hashAgg[LOCAL]
----------------------PhysicalProject
------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[cs_sold_date_sk]
------------------------hashJoin[LEFT_ANTI_JOIN colocated] hashCondition=((catalog_returns.cr_order_number = catalog_sales.cs_order_number) and (catalog_sales.cs_item_sk = catalog_returns.cr_item_sk)) otherCondition=()
--------------------------PhysicalProject
----------------------------hashJoin[LEFT_ANTI_JOIN colocated] hashCondition=((catalog_returns.cr_order_number = catalog_sales.cs_order_number) and (catalog_sales.cs_item_sk = catalog_returns.cr_item_sk)) otherCondition=()
----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[cs_sold_date_sk]
------------------------------PhysicalProject
--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0
------------------------------PhysicalProject
--------------------------------PhysicalOlapScan[catalog_returns]
--------------------------------filter((date_dim.d_year = 1998))
----------------------------------PhysicalOlapScan[date_dim]
--------------------------PhysicalProject
----------------------------filter((date_dim.d_year = 1998))
------------------------------PhysicalOlapScan[date_dim]
----------------------------PhysicalOlapScan[catalog_returns]

Loading

0 comments on commit ff8e61f

Please sign in to comment.