From 6e637488188c6620ecd113bf47987bd98f6d7871 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=9E=97=E4=BC=9F?= Date: Thu, 4 Jul 2024 14:47:14 +0800 Subject: [PATCH] Enable clone_on_ref_ptr clippy lint on expr crate (#11238) --- datafusion/expr/src/expr_rewriter/mod.rs | 2 +- datafusion/expr/src/expr_schema.rs | 2 +- datafusion/expr/src/lib.rs | 2 ++ datafusion/expr/src/logical_plan/builder.rs | 23 +++++++++----- datafusion/expr/src/logical_plan/plan.rs | 30 +++++++++---------- datafusion/expr/src/type_coercion/binary.rs | 8 ++--- .../expr/src/type_coercion/functions.rs | 12 ++++---- datafusion/expr/src/udaf.rs | 9 ++++-- datafusion/expr/src/udf.rs | 8 ++--- datafusion/expr/src/udwf.rs | 6 ++-- datafusion/expr/src/utils.rs | 2 +- 11 files changed, 58 insertions(+), 46 deletions(-) diff --git a/datafusion/expr/src/expr_rewriter/mod.rs b/datafusion/expr/src/expr_rewriter/mod.rs index 024e4a0ceae5..91bec501f4a0 100644 --- a/datafusion/expr/src/expr_rewriter/mod.rs +++ b/datafusion/expr/src/expr_rewriter/mod.rs @@ -215,7 +215,7 @@ pub fn coerce_plan_expr_for_schema( LogicalPlan::Projection(Projection { expr, input, .. }) => { let new_exprs = coerce_exprs_for_schema(expr.clone(), input.schema(), schema)?; - let projection = Projection::try_new(new_exprs, input.clone())?; + let projection = Projection::try_new(new_exprs, Arc::clone(input))?; Ok(LogicalPlan::Projection(projection)) } _ => { diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 8bb655eda575..a84931398f5b 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -520,7 +520,7 @@ pub fn cast_subquery(subquery: Subquery, cast_to_type: &DataType) -> Result { diff --git a/datafusion/expr/src/lib.rs b/datafusion/expr/src/lib.rs index 5f1d3c9d5c6b..e1943c890e7c 100644 --- a/datafusion/expr/src/lib.rs +++ b/datafusion/expr/src/lib.rs @@ -14,6 +14,8 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. +// Make cheap clones clear: https://github.com/apache/datafusion/issues/11143 +#![deny(clippy::clone_on_ref_ptr)] //! [DataFusion](https://github.com/apache/datafusion) //! is an extensible query execution framework that uses diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs index cc4348d58c33..4ad3bd5018a4 100644 --- a/datafusion/expr/src/logical_plan/builder.rs +++ b/datafusion/expr/src/logical_plan/builder.rs @@ -1223,17 +1223,17 @@ pub fn build_join_schema( JoinType::Inner => { // left then right let left_fields = left_fields - .map(|(q, f)| (q.cloned(), f.clone())) + .map(|(q, f)| (q.cloned(), Arc::clone(f))) .collect::>(); let right_fields = right_fields - .map(|(q, f)| (q.cloned(), f.clone())) + .map(|(q, f)| (q.cloned(), Arc::clone(f))) .collect::>(); left_fields.into_iter().chain(right_fields).collect() } JoinType::Left => { // left then right, right set to nullable in case of not matched scenario let left_fields = left_fields - .map(|(q, f)| (q.cloned(), f.clone())) + .map(|(q, f)| (q.cloned(), Arc::clone(f))) .collect::>(); left_fields .into_iter() @@ -1243,7 +1243,7 @@ pub fn build_join_schema( JoinType::Right => { // left then right, left set to nullable in case of not matched scenario let right_fields = right_fields - .map(|(q, f)| (q.cloned(), f.clone())) + .map(|(q, f)| (q.cloned(), Arc::clone(f))) .collect::>(); nullify_fields(left_fields) .into_iter() @@ -1259,11 +1259,15 @@ pub fn build_join_schema( } JoinType::LeftSemi | JoinType::LeftAnti => { // Only use the left side for the schema - left_fields.map(|(q, f)| (q.cloned(), f.clone())).collect() + left_fields + .map(|(q, f)| (q.cloned(), Arc::clone(f))) + .collect() } JoinType::RightSemi | JoinType::RightAnti => { // Only use the right side for the schema - right_fields.map(|(q, f)| (q.cloned(), f.clone())).collect() + right_fields + .map(|(q, f)| (q.cloned(), Arc::clone(f))) + .collect() } }; let func_dependencies = left.functional_dependencies().join( @@ -1577,7 +1581,7 @@ impl TableSource for LogicalTableSource { } fn schema(&self) -> SchemaRef { - self.table_schema.clone() + Arc::clone(&self.table_schema) } fn supports_filters_pushdown( @@ -1691,7 +1695,10 @@ pub fn unnest_with_options( } None => { dependency_indices.push(index); - Ok(vec![(original_qualifier.cloned(), original_field.clone())]) + Ok(vec![( + original_qualifier.cloned(), + Arc::clone(original_field), + )]) } } }) diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index 2921541934f8..8fd5982a0f2e 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -762,9 +762,9 @@ impl LogicalPlan { // If inputs are not pruned do not change schema // TODO this seems wrong (shouldn't we always use the schema of the input?) let schema = if schema.fields().len() == input_schema.fields().len() { - schema.clone() + Arc::clone(&schema) } else { - input_schema.clone() + Arc::clone(input_schema) }; Ok(LogicalPlan::Union(Union { inputs, schema })) } @@ -850,7 +850,7 @@ impl LogicalPlan { .. }) => Ok(LogicalPlan::Dml(DmlStatement::new( table_name.clone(), - table_schema.clone(), + Arc::clone(table_schema), op.clone(), Arc::new(inputs.swap_remove(0)), ))), @@ -863,13 +863,13 @@ impl LogicalPlan { }) => Ok(LogicalPlan::Copy(CopyTo { input: Arc::new(inputs.swap_remove(0)), output_url: output_url.clone(), - file_type: file_type.clone(), + file_type: Arc::clone(file_type), options: options.clone(), partition_by: partition_by.clone(), })), LogicalPlan::Values(Values { schema, .. }) => { Ok(LogicalPlan::Values(Values { - schema: schema.clone(), + schema: Arc::clone(schema), values: expr .chunks_exact(schema.fields().len()) .map(|s| s.to_vec()) @@ -1027,9 +1027,9 @@ impl LogicalPlan { let input_schema = inputs[0].schema(); // If inputs are not pruned do not change schema. let schema = if schema.fields().len() == input_schema.fields().len() { - schema.clone() + Arc::clone(schema) } else { - input_schema.clone() + Arc::clone(input_schema) }; Ok(LogicalPlan::Union(Union { inputs: inputs.into_iter().map(Arc::new).collect(), @@ -1073,7 +1073,7 @@ impl LogicalPlan { assert_eq!(inputs.len(), 1); Ok(LogicalPlan::Analyze(Analyze { verbose: a.verbose, - schema: a.schema.clone(), + schema: Arc::clone(&a.schema), input: Arc::new(inputs.swap_remove(0)), })) } @@ -1087,7 +1087,7 @@ impl LogicalPlan { verbose: e.verbose, plan: Arc::new(inputs.swap_remove(0)), stringified_plans: e.stringified_plans.clone(), - schema: e.schema.clone(), + schema: Arc::clone(&e.schema), logical_optimization_succeeded: e.logical_optimization_succeeded, })) } @@ -1369,7 +1369,7 @@ impl LogicalPlan { param_values: &ParamValues, ) -> Result { self.transform_up_with_subqueries(|plan| { - let schema = plan.schema().clone(); + let schema = Arc::clone(plan.schema()); plan.map_expressions(|e| { e.infer_placeholder_types(&schema)?.transform_up(|e| { if let Expr::Placeholder(Placeholder { id, .. }) = e { @@ -2227,7 +2227,7 @@ impl Window { let fields: Vec<(Option, Arc)> = input .schema() .iter() - .map(|(q, f)| (q.cloned(), f.clone())) + .map(|(q, f)| (q.cloned(), Arc::clone(f))) .collect(); let input_len = fields.len(); let mut window_fields = fields; @@ -3352,7 +3352,7 @@ digraph { vec![col("a")], Arc::new(LogicalPlan::EmptyRelation(EmptyRelation { produce_one_row: false, - schema: empty_schema.clone(), + schema: Arc::clone(&empty_schema), })), empty_schema, ); @@ -3467,9 +3467,9 @@ digraph { ); let scan = Arc::new(LogicalPlan::TableScan(TableScan { table_name: TableReference::bare("tab"), - source: source.clone(), + source: Arc::clone(&source) as Arc, projection: None, - projected_schema: schema.clone(), + projected_schema: Arc::clone(&schema), filters: vec![], fetch: None, })); @@ -3499,7 +3499,7 @@ digraph { table_name: TableReference::bare("tab"), source, projection: None, - projected_schema: unique_schema.clone(), + projected_schema: Arc::clone(&unique_schema), filters: vec![], fetch: None, })); diff --git a/datafusion/expr/src/type_coercion/binary.rs b/datafusion/expr/src/type_coercion/binary.rs index 5645a2a4dede..442a33bebc99 100644 --- a/datafusion/expr/src/type_coercion/binary.rs +++ b/datafusion/expr/src/type_coercion/binary.rs @@ -1048,16 +1048,16 @@ fn temporal_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option Some(lhs_tz.clone()), - (lhs, rhs) if lhs == rhs => Some(lhs_tz.clone()), + ("UTC", "+00:00") | ("+00:00", "UTC") => Some(Arc::clone(lhs_tz)), + (lhs, rhs) if lhs == rhs => Some(Arc::clone(lhs_tz)), // can't cast across timezones _ => { return None; } } } - (Some(lhs_tz), None) => Some(lhs_tz.clone()), - (None, Some(rhs_tz)) => Some(rhs_tz.clone()), + (Some(lhs_tz), None) => Some(Arc::clone(lhs_tz)), + (None, Some(rhs_tz)) => Some(Arc::clone(rhs_tz)), (None, None) => None, }; diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index 5f060a4a4f16..f9f467098ee4 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -598,7 +598,7 @@ fn coerced_from<'a>( Arc::new(f_into.as_ref().clone().with_data_type(data_type)); Some(FixedSizeList(new_field, *size_from)) } - Some(_) => Some(FixedSizeList(f_into.clone(), *size_from)), + Some(_) => Some(FixedSizeList(Arc::clone(f_into), *size_from)), _ => None, } } @@ -607,7 +607,7 @@ fn coerced_from<'a>( (Timestamp(unit, Some(tz)), _) if tz.as_ref() == TIMEZONE_WILDCARD => { match type_from { Timestamp(_, Some(from_tz)) => { - Some(Timestamp(unit.clone(), Some(from_tz.clone()))) + Some(Timestamp(unit.clone(), Some(Arc::clone(from_tz)))) } Null | Date32 | Utf8 | LargeUtf8 | Timestamp(_, None) => { // In the absence of any other information assume the time zone is "+00" (UTC). @@ -715,12 +715,12 @@ mod tests { fn test_fixed_list_wildcard_coerce() -> Result<()> { let inner = Arc::new(Field::new("item", DataType::Int32, false)); let current_types = vec![ - DataType::FixedSizeList(inner.clone(), 2), // able to coerce for any size + DataType::FixedSizeList(Arc::clone(&inner), 2), // able to coerce for any size ]; let signature = Signature::exact( vec![DataType::FixedSizeList( - inner.clone(), + Arc::clone(&inner), FIXED_SIZE_LIST_WILDCARD, )], Volatility::Stable, @@ -731,7 +731,7 @@ mod tests { // make sure it can't coerce to a different size let signature = Signature::exact( - vec![DataType::FixedSizeList(inner.clone(), 3)], + vec![DataType::FixedSizeList(Arc::clone(&inner), 3)], Volatility::Stable, ); let coerced_data_types = data_types(¤t_types, &signature); @@ -739,7 +739,7 @@ mod tests { // make sure it works with the same type. let signature = Signature::exact( - vec![DataType::FixedSizeList(inner.clone(), 2)], + vec![DataType::FixedSizeList(Arc::clone(&inner), 2)], Volatility::Stable, ); let coerced_data_types = data_types(¤t_types, &signature).unwrap(); diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs index c8362691452b..7a054abea75b 100644 --- a/datafusion/expr/src/udaf.rs +++ b/datafusion/expr/src/udaf.rs @@ -106,8 +106,8 @@ impl AggregateUDF { Self::new_from_impl(AggregateUDFLegacyWrapper { name: name.to_owned(), signature: signature.clone(), - return_type: return_type.clone(), - accumulator: accumulator.clone(), + return_type: Arc::clone(return_type), + accumulator: Arc::clone(accumulator), }) } @@ -133,7 +133,10 @@ impl AggregateUDF { /// /// If you implement [`AggregateUDFImpl`] directly you should return aliases directly. pub fn with_aliases(self, aliases: impl IntoIterator) -> Self { - Self::new_from_impl(AliasedAggregateUDFImpl::new(self.inner.clone(), aliases)) + Self::new_from_impl(AliasedAggregateUDFImpl::new( + Arc::clone(&self.inner), + aliases, + )) } /// creates an [`Expr`] that calls the aggregate function. diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index 03650b1d4748..68d3af6ace3c 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -87,8 +87,8 @@ impl ScalarUDF { Self::new_from_impl(ScalarUdfLegacyWrapper { name: name.to_owned(), signature: signature.clone(), - return_type: return_type.clone(), - fun: fun.clone(), + return_type: Arc::clone(return_type), + fun: Arc::clone(fun), }) } @@ -114,7 +114,7 @@ impl ScalarUDF { /// /// If you implement [`ScalarUDFImpl`] directly you should return aliases directly. pub fn with_aliases(self, aliases: impl IntoIterator) -> Self { - Self::new_from_impl(AliasedScalarUDFImpl::new(self.inner.clone(), aliases)) + Self::new_from_impl(AliasedScalarUDFImpl::new(Arc::clone(&self.inner), aliases)) } /// Returns a [`Expr`] logical expression to call this UDF with specified @@ -199,7 +199,7 @@ impl ScalarUDF { /// Returns a `ScalarFunctionImplementation` that can invoke the function /// during execution pub fn fun(&self) -> ScalarFunctionImplementation { - let captured = self.inner.clone(); + let captured = Arc::clone(&self.inner); Arc::new(move |args| captured.invoke(args)) } diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs index a17bb0ade8e3..70b44e5e307a 100644 --- a/datafusion/expr/src/udwf.rs +++ b/datafusion/expr/src/udwf.rs @@ -90,8 +90,8 @@ impl WindowUDF { Self::new_from_impl(WindowUDFLegacyWrapper { name: name.to_owned(), signature: signature.clone(), - return_type: return_type.clone(), - partition_evaluator_factory: partition_evaluator_factory.clone(), + return_type: Arc::clone(return_type), + partition_evaluator_factory: Arc::clone(partition_evaluator_factory), }) } @@ -117,7 +117,7 @@ impl WindowUDF { /// /// If you implement [`WindowUDFImpl`] directly you should return aliases directly. pub fn with_aliases(self, aliases: impl IntoIterator) -> Self { - Self::new_from_impl(AliasedWindowUDFImpl::new(self.inner.clone(), aliases)) + Self::new_from_impl(AliasedWindowUDFImpl::new(Arc::clone(&self.inner), aliases)) } /// creates a [`Expr`] that calls the window function given diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs index 286f05309ea7..e3b8db676c98 100644 --- a/datafusion/expr/src/utils.rs +++ b/datafusion/expr/src/utils.rs @@ -1199,7 +1199,7 @@ pub fn only_or_err(slice: &[T]) -> Result<&T> { /// merge inputs schema into a single schema. pub fn merge_schema(inputs: Vec<&LogicalPlan>) -> DFSchema { if inputs.len() == 1 { - inputs[0].schema().clone().as_ref().clone() + inputs[0].schema().as_ref().clone() } else { inputs.iter().map(|input| input.schema()).fold( DFSchema::empty(),