From 9745d44daacd6228ab56f8751940fc3df1b039fb Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 30 May 2024 05:30:37 -0700 Subject: [PATCH] fix[codegen]: fix `make_setter` overlap in `dynarray_append` (#4059) `make_setter` can potentially run into overlap when called from `dynarray_append`; this commit copies to a temporary intermediate buffer (as is done in `AnnAssign`) before copying to the destination if the condition is detected. this is a variant of the bugs fixed in ad9c10b0b98e2d and 1c8349e867b2b. this commit also adds a util function to detect overlap, and adds an assertion directly in `make_setter` so that future variants panic instead of generating bad code. --- .../codegen/types/test_dynamic_array.py | 22 +++++++++++ vyper/codegen/context.py | 1 - vyper/codegen/core.py | 23 +++++++++++ vyper/codegen/expr.py | 38 ++++++++++++++----- vyper/codegen/stmt.py | 6 +-- 5 files changed, 76 insertions(+), 14 deletions(-) diff --git a/tests/functional/codegen/types/test_dynamic_array.py b/tests/functional/codegen/types/test_dynamic_array.py index e475b79be1..5f26e05839 100644 --- a/tests/functional/codegen/types/test_dynamic_array.py +++ b/tests/functional/codegen/types/test_dynamic_array.py @@ -1865,3 +1865,25 @@ def test_dynarray_length_no_clobber(get_contract, tx_failed, code): c = get_contract(code) with tx_failed(): c.should_revert() + + +def test_dynarray_make_setter_overlap(get_contract): + # GH 4056, variant of GH 3503 + code = """ +a: DynArray[DynArray[uint256, 10], 10] + +@external +def foo() -> DynArray[uint256, 10]: + self.a.append([1, 2, self.boo(), 4]) + return self.a[0] # returns [11, 12, 3, 4] + +@internal +def boo() -> uint256: + self.a.append([11, 12, 13, 14, 15, 16]) + self.a.pop() + # it should now be impossible to read any of [11, 12, 13, 14, 15, 16] + return 3 + """ + + c = get_contract(code) + assert c.foo() == [1, 2, 3, 4] diff --git a/vyper/codegen/context.py b/vyper/codegen/context.py index 42488f06da..f49914ac78 100644 --- a/vyper/codegen/context.py +++ b/vyper/codegen/context.py @@ -67,7 +67,6 @@ def as_ir_node(self): mutable=self.mutable, location=self.location, ) - ret._referenced_variables = {self} if self.alloca is not None: ret.passthrough_metadata["alloca"] = self.alloca return ret diff --git a/vyper/codegen/core.py b/vyper/codegen/core.py index 29831909c1..3c81778660 100644 --- a/vyper/codegen/core.py +++ b/vyper/codegen/core.py @@ -901,10 +901,33 @@ def _abi_payload_size(ir_node): raise CompilerPanic("unreachable") # pragma: nocover +def potential_overlap(left, right): + """ + Return true if make_setter(left, right) could potentially trample + src or dst during evaluation. + """ + if left.typ._is_prim_word and right.typ._is_prim_word: + return False + + if len(left.referenced_variables & right.referenced_variables) > 0: + return True + + if len(left.referenced_variables) > 0 and right.contains_risky_call: + return True + + if left.contains_risky_call and len(right.referenced_variables) > 0: + return True + + return False + + # Create an x=y statement, where the types may be compound def make_setter(left, right, hi=None): check_assign(left, right) + if potential_overlap(left, right): + raise CompilerPanic("overlap between src and dst!") + # we need bounds checks when decoding from memory, otherwise we can # get oob reads. # diff --git a/vyper/codegen/expr.py b/vyper/codegen/expr.py index 5ed0107c79..65df5a0930 100644 --- a/vyper/codegen/expr.py +++ b/vyper/codegen/expr.py @@ -18,7 +18,9 @@ is_flag_type, is_numeric_type, is_tuple_like, + make_setter, pop_dyn_array, + potential_overlap, sar, shl, shr, @@ -165,17 +167,24 @@ def parse_NameConstant(self): # Variable names def parse_Name(self): - if self.expr.id == "self": + varname = self.expr.id + + if varname == "self": return IRnode.from_list(["address"], typ=AddressT()) - elif self.expr.id in self.context.vars: - return self.context.lookup_var(self.expr.id).as_ir_node() - elif (varinfo := self.expr._expr_info.var_info) is not None: - if varinfo.is_constant: - return Expr.parse_value_expr(varinfo.decl_node.value, self.context) + varinfo = self.expr._expr_info.var_info + assert varinfo is not None - assert varinfo.is_immutable, "not an immutable!" + # local variable + if varname in self.context.vars: + ret = self.context.lookup_var(varname).as_ir_node() + ret._referenced_variables = {varinfo} + return ret + if varinfo.is_constant: + return Expr.parse_value_expr(varinfo.decl_node.value, self.context) + + if varinfo.is_immutable: mutable = self.context.is_ctor_context location = data_location_to_address_space( @@ -186,12 +195,14 @@ def parse_Name(self): varinfo.position.position, typ=varinfo.typ, location=location, - annotation=self.expr.id, + annotation=varname, mutable=mutable, ) ret._referenced_variables = {varinfo} return ret + raise CompilerPanic("unreachable") # pragma: nocover + # x.y or x[5] def parse_Attribute(self): typ = self.expr._metadata["type"] @@ -691,7 +702,16 @@ def parse_Call(self): check_assign( dummy_node_for_type(darray.typ.value_type), dummy_node_for_type(arg.typ) ) - return append_dyn_array(darray, arg) + + ret = ["seq"] + if potential_overlap(darray, arg): + tmp = self.context.new_internal_variable(arg.typ) + tmp = IRnode.from_list(tmp, typ=arg.typ, location=MEMORY) + ret.append(make_setter(tmp, arg)) + arg = tmp + + ret.append(append_dyn_array(darray, arg)) + return IRnode.from_list(ret) assert isinstance(func_t, ContractFunctionT) assert func_t.is_internal or func_t.is_constructor diff --git a/vyper/codegen/stmt.py b/vyper/codegen/stmt.py index 947de2dcde..830f2f923d 100644 --- a/vyper/codegen/stmt.py +++ b/vyper/codegen/stmt.py @@ -13,6 +13,7 @@ get_element_ptr, get_type_for_exact_size, make_setter, + potential_overlap, wrap_value_for_external_return, writeable, ) @@ -70,10 +71,7 @@ def parse_Assign(self): dst = self._get_target(self.stmt.target) ret = ["seq"] - overlap = len(dst.referenced_variables & src.referenced_variables) > 0 - overlap |= len(dst.referenced_variables) > 0 and src.contains_risky_call - overlap |= dst.contains_risky_call and len(src.referenced_variables) > 0 - if overlap and not dst.typ._is_prim_word: + if potential_overlap(dst, src): # there is overlap between the lhs and rhs, and the type is # complex - i.e., it spans multiple words. for safety, we # copy to a temporary buffer before copying to the destination.