Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[dnm] [optimize] MRE::typ(): can we avoid recalculating types? #30894

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions src/adapter/src/coord/peek.rs
Original file line number Diff line number Diff line change
Expand Up @@ -315,13 +315,12 @@ pub fn create_fast_path_plan<T: Timestamp>(
if dataflow_plan.objects_to_build.len() >= 1 && dataflow_plan.objects_to_build[0].id == view_id
{
let mut mir = &*dataflow_plan.objects_to_build[0].plan.as_inner_mut();
if let Some((rows, ..)) = mir.as_const() {
if let Some((rows, found_typ)) = mir.as_const() {
// In the case of a constant, we can return the result now.
return Ok(Some(FastPathPlan::Constant(
rows.clone()
.map(|rows| rows.into_iter().map(|(row, diff)| (row, diff)).collect()),
// For best accuracy, we need to recalculate typ.
mir.typ(),
found_typ.clone(),
)));
} else {
// If there is a TopK that would be completely covered by the finishing, then jump
Expand Down
2 changes: 1 addition & 1 deletion src/environmentd/tests/testdata/http/ws
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ ws-text
ws-text
{"query": "SELECT 1"}
----
{"type":"Notice","payload":{"message":"{\n \"plans\": {\n \"raw\": {\n \"text\": \"Finish output=[#0]\\n Map (1)\\n Constant\\n - ()\\n\\nTarget cluster: mz_catalog_server\\n\",\n \"json\": {\n \"Map\": {\n \"input\": {\n \"Constant\": {\n \"rows\": [\n {\n \"data\": []\n }\n ],\n \"typ\": {\n \"column_types\": [],\n \"keys\": []\n }\n }\n },\n \"scalars\": [\n {\n \"Literal\": [\n {\n \"data\": [\n 45,\n 1\n ]\n },\n {\n \"scalar_type\": \"Int32\",\n \"nullable\": false\n }\n ]\n }\n ]\n }\n }\n },\n \"optimized\": {\n \"global\": {\n \"text\": \"t66:\\n Finish output=[#0]\\n ArrangeBy keys=[[#0]]\\n ReadGlobalFromSameDataflow t65\\n\\nt65:\\n Constant\\n - (1)\\n\\nTarget cluster: mz_catalog_server\\n\",\n \"json\": {\n \"plans\": [\n {\n \"id\": \"t66\",\n \"plan\": {\n \"ArrangeBy\": {\n \"input\": {\n \"Get\": {\n \"id\": {\n \"Global\": {\n \"Transient\": 65\n }\n },\n \"typ\": {\n \"column_types\": [\n {\n \"scalar_type\": \"Int32\",\n \"nullable\": false\n }\n ],\n \"keys\": [\n []\n ]\n },\n \"access_strategy\": \"SameDataflow\"\n }\n },\n \"keys\": [\n [\n {\n \"Column\": 0\n }\n ]\n ]\n }\n }\n },\n {\n \"id\": \"t65\",\n \"plan\": {\n \"Constant\": {\n \"rows\": {\n \"Ok\": [\n [\n {\n \"data\": [\n 45,\n 1\n ]\n },\n 1\n ]\n ]\n },\n \"typ\": {\n \"column_types\": [\n {\n \"scalar_type\": \"Int32\",\n \"nullable\": false\n }\n ],\n \"keys\": []\n }\n }\n }\n }\n ],\n \"sources\": []\n }\n },\n \"fast_path\": {\n \"text\": \"Explained Query (fast path):\\n Finish output=[#0]\\n Constant\\n - (1)\\n\\nTarget cluster: mz_catalog_server\\n\",\n \"json\": {\n \"plans\": [\n {\n \"id\": \"Explained Query (fast path)\",\n \"plan\": {\n \"Constant\": [\n {\n \"Ok\": [\n [\n {\n \"data\": [\n 45,\n 1\n ]\n },\n 1\n ]\n ]\n },\n {\n \"column_types\": [\n {\n \"scalar_type\": \"Int32\",\n \"nullable\": false\n }\n ],\n \"keys\": [\n []\n ]\n }\n ]\n }\n }\n ],\n \"sources\": []\n }\n }\n }\n },\n \"insights\": {\n \"imports\": {},\n \"fast_path_clusters\": {},\n \"fast_path_limit\": null,\n \"persist_count\": []\n },\n \"cluster\": {\n \"name\": \"mz_catalog_server\",\n \"id\": {\n \"System\": 2\n }\n },\n \"redacted_sql\": \"SELECT '<REDACTED>'\"\n}","code":"MZ001","severity":"notice"}}
{"type":"Notice","payload":{"message":"{\n \"plans\": {\n \"raw\": {\n \"text\": \"Finish output=[#0]\\n Map (1)\\n Constant\\n - ()\\n\\nTarget cluster: mz_catalog_server\\n\",\n \"json\": {\n \"Map\": {\n \"input\": {\n \"Constant\": {\n \"rows\": [\n {\n \"data\": []\n }\n ],\n \"typ\": {\n \"column_types\": [],\n \"keys\": []\n }\n }\n },\n \"scalars\": [\n {\n \"Literal\": [\n {\n \"data\": [\n 45,\n 1\n ]\n },\n {\n \"scalar_type\": \"Int32\",\n \"nullable\": false\n }\n ]\n }\n ]\n }\n }\n },\n \"optimized\": {\n \"global\": {\n \"text\": \"t66:\\n Finish output=[#0]\\n ArrangeBy keys=[[#0]]\\n ReadGlobalFromSameDataflow t65\\n\\nt65:\\n Constant\\n - (1)\\n\\nTarget cluster: mz_catalog_server\\n\",\n \"json\": {\n \"plans\": [\n {\n \"id\": \"t66\",\n \"plan\": {\n \"ArrangeBy\": {\n \"input\": {\n \"Get\": {\n \"id\": {\n \"Global\": {\n \"Transient\": 65\n }\n },\n \"typ\": {\n \"column_types\": [\n {\n \"scalar_type\": \"Int32\",\n \"nullable\": false\n }\n ],\n \"keys\": [\n []\n ]\n },\n \"access_strategy\": \"SameDataflow\"\n }\n },\n \"keys\": [\n [\n {\n \"Column\": 0\n }\n ]\n ]\n }\n }\n },\n {\n \"id\": \"t65\",\n \"plan\": {\n \"Constant\": {\n \"rows\": {\n \"Ok\": [\n [\n {\n \"data\": [\n 45,\n 1\n ]\n },\n 1\n ]\n ]\n },\n \"typ\": {\n \"column_types\": [\n {\n \"scalar_type\": \"Int32\",\n \"nullable\": false\n }\n ],\n \"keys\": []\n }\n }\n }\n }\n ],\n \"sources\": []\n }\n },\n \"fast_path\": {\n \"text\": \"Explained Query (fast path):\\n Finish output=[#0]\\n Constant\\n - (1)\\n\\nTarget cluster: mz_catalog_server\\n\",\n \"json\": {\n \"plans\": [\n {\n \"id\": \"Explained Query (fast path)\",\n \"plan\": {\n \"Constant\": [\n {\n \"Ok\": [\n [\n {\n \"data\": [\n 45,\n 1\n ]\n },\n 1\n ]\n ]\n },\n {\n \"column_types\": [\n {\n \"scalar_type\": \"Int32\",\n \"nullable\": false\n }\n ],\n \"keys\": []\n }\n ]\n }\n }\n ],\n \"sources\": []\n }\n }\n }\n },\n \"insights\": {\n \"imports\": {},\n \"fast_path_clusters\": {},\n \"fast_path_limit\": null,\n \"persist_count\": []\n },\n \"cluster\": {\n \"name\": \"mz_catalog_server\",\n \"id\": {\n \"System\": 2\n }\n },\n \"redacted_sql\": \"SELECT '<REDACTED>'\"\n}","code":"MZ001","severity":"notice"}}
{"type":"CommandStarting","payload":{"has_rows":true,"is_streaming":false}}
{"type":"Rows","payload":{"columns":[{"name":"?column?","type_oid":23,"type_len":4,"type_mod":-1}]}}
{"type":"Row","payload":["1"]}
Expand Down
40 changes: 38 additions & 2 deletions src/expr/src/relation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ use mz_ore::collections::CollectionExt;
use mz_ore::id_gen::IdGen;
use mz_ore::metrics::Histogram;
use mz_ore::num::NonNeg;
use mz_ore::soft_assert_no_log;
use mz_ore::stack::RecursionLimitError;
use mz_ore::str::Indent;
use mz_proto::{IntoRustIfSome, ProtoType, RustType, TryFromProtoError};
Expand Down Expand Up @@ -1688,8 +1689,40 @@ impl MirRelationExpr {
}

/// Take ownership of `self`, leaving an empty `MirRelationExpr::Constant` with the correct type.
///
/// This calls `.typ()` to determine scalar types. If you already know the scalar types type,
/// then use either
/// [MirRelationExpr::take_safely_with_col_types] or [MirRelationExpr::take_safely_with_rel_type].
pub fn take_safely(&mut self) -> MirRelationExpr {
let typ = self.typ();
self.take_safely_with_rel_type(self.typ())
}

/// Take ownership of `self`, leaving an empty `MirRelationExpr::Constant` with the given scalar
/// types. Keys and nullability are ignored in the given `RelationType`, and instead we set the
/// best possible key and nullability, since we are making an empty collection.
///
/// Compared to `take_safely()`, this just saves the cost of the `.typ()` call.
pub fn take_safely_with_col_types(&mut self, typ: Vec<ColumnType>) -> MirRelationExpr {
self.take_safely_with_rel_type(RelationType::new(typ))
}

/// Take ownership of `self`, leaving an empty `MirRelationExpr::Constant` with the given scalar
/// types. The given scalar types should be `base_eq` with the types that `typ()` would find.
/// Keys and nullability are ignored in the given `RelationType`, and instead we set the best
/// possible key and nullability, since we are making an empty collection.
///
/// Compared to `take_safely()`, this just saves the cost of the `.typ()` call.
pub fn take_safely_with_rel_type(&mut self, mut typ: RelationType) -> MirRelationExpr {
soft_assert_no_log!(self
.typ()
.column_types
.iter()
.zip_eq(typ.column_types.iter())
.all(|(t1, t2)| t1.scalar_type.base_eq(&t2.scalar_type)));
typ.keys = vec![vec![]];
for ct in typ.column_types.iter_mut() {
ct.nullable = false;
}
std::mem::replace(
self,
MirRelationExpr::Constant {
Expand All @@ -1698,6 +1731,7 @@ impl MirRelationExpr {
},
)
}

/// Take ownership of `self`, leaving an empty `MirRelationExpr::Constant` with an **incorrect** type.
///
/// This should only be used if `self` is about to be dropped or otherwise overwritten.
Expand Down Expand Up @@ -2233,11 +2267,13 @@ impl MirRelationExpr {
value.visit_pre_mut(|e| {
if let MirRelationExpr::Get {
id: crate::Id::Local(id),
typ,
..
} = e
{
let typ = typ.clone();
if deadlist.contains(id) {
e.take_safely();
e.take_safely_with_rel_type(typ);
}
}
});
Expand Down
5 changes: 3 additions & 2 deletions src/transform/src/equivalence_propagation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ impl EquivalencePropagation {
let expr_type = derived
.value::<RelationType>()
.expect("RelationType required");
assert!(expr_type.is_some());
let expr_equivalences = derived
.value::<Equivalences>()
.expect("Equivalences required");
Expand All @@ -132,7 +133,7 @@ impl EquivalencePropagation {
let expr_equivalences = if let Some(e) = expr_equivalences {
e
} else {
expr.take_safely();
expr.take_safely_with_col_types(expr_type.clone().unwrap());
return;
};

Expand All @@ -147,7 +148,7 @@ impl EquivalencePropagation {

outer_equivalences.minimize(expr_type.as_ref().map(|x| &x[..]));
if outer_equivalences.unsatisfiable() {
expr.take_safely();
expr.take_safely_with_col_types(expr_type.clone().unwrap());
return;
}

Expand Down
4 changes: 2 additions & 2 deletions src/transform/src/fold_constants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ impl FoldConstants {
.iter()
.any(|p| p.is_literal_false() || p.is_literal_null())
{
relation.take_safely();
relation.take_safely_with_rel_type(relation_type.clone());
} else if let Some((rows, ..)) = (**input).as_const() {
// Evaluate errors last, to reduce risk of spurious errors.
predicates.sort_by_key(|p| p.is_literal_err());
Expand Down Expand Up @@ -291,7 +291,7 @@ impl FoldConstants {
..
} => {
if inputs.iter().any(|e| e.is_empty()) {
relation.take_safely();
relation.take_safely_with_rel_type(relation_type.clone());
} else if let Some(e) = inputs.iter().find_map(|i| i.as_const_err()) {
*relation = MirRelationExpr::Constant {
rows: Err(e.clone()),
Expand Down
4 changes: 3 additions & 1 deletion src/transform/src/predicate_pushdown.rs
Original file line number Diff line number Diff line change
Expand Up @@ -595,7 +595,9 @@ impl PredicatePushdown {
.count()
> 1
{
relation.take_safely();
relation.take_safely_with_rel_type(
relation.typ_with_input_types(&input_types),
);
return Ok(());
}

Expand Down
4 changes: 1 addition & 3 deletions test/sqllogictest/explain/plan_insights.slt
Original file line number Diff line number Diff line change
Expand Up @@ -1248,9 +1248,7 @@ EXPLAIN PLAN INSIGHTS AS JSON FOR SELECT 'abc'
"nullable": false
}
],
"keys": [
[]
]
"keys": []
}
]
}
Expand Down
Loading