From 6b51a1dcb8724b88b7c3dc666a8b4aac8d7a6475 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 29 Mar 2022 11:59:20 -0700 Subject: [PATCH 01/10] use a loop for make_setter above a certain size --- vyper/codegen/core.py | 55 +++++++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/vyper/codegen/core.py b/vyper/codegen/core.py index 46cc090328..10690dd851 100644 --- a/vyper/codegen/core.py +++ b/vyper/codegen/core.py @@ -98,6 +98,7 @@ def _dynarray_make_setter(dst, src): if src.value == "~empty": return IRnode.from_list(STORE(dst, 0)) + if src.value == "multi": ret = ["seq"] # handle literals @@ -119,14 +120,18 @@ def _dynarray_make_setter(dst, src): return ret + with src.cache_when_complex("darray_src") as (b1, src): # for ABI-encoded dynamic data, we must loop to unpack, since # the layout does not match our memory layout - should_loop = ( + can_batch_copy = not ( src.encoding in (Encoding.ABI, Encoding.JSON_ABI) and src.typ.subtype.abi_type.is_dynamic() ) + # if the source data needs clamping, we cannot do straight + # bytes copy; we must call into make_setter for the clamping logic. + can_batch_copy &= not needs_clamp(src.typ.subtype, src.encoding) # if the subtype is dynamic, there might be a lot of # unused space inside of each element. for instance @@ -136,15 +141,24 @@ def _dynarray_make_setter(dst, src): # TODO we can make this heuristic more precise, e.g. # loop when subtype.is_dynamic AND location == storage # OR array_size <= /bound where loop is cheaper than memcpy/ - should_loop |= src.typ.subtype.abi_type.is_dynamic() - should_loop |= needs_clamp(src.typ.subtype, src.encoding) + can_batch_copy &= not src.typ.subtype.abi_type.is_dynamic() with get_dyn_array_count(src).cache_when_complex("darray_count") as (b2, count): ret = ["seq"] ret.append(STORE(dst, count)) - if should_loop: + if can_batch_copy: + element_size = src.typ.subtype.memory_bytes_required + # number of elements * size of element in bytes + n_bytes = _mul(count, element_size) + max_bytes = src.typ.count * element_size + + src_ = dynarray_data_ptr(src) + dst_ = dynarray_data_ptr(dst) + ret.append(copy_bytes(dst_, src_, n_bytes, max_bytes)) + + else: i = IRnode.from_list(_freshname("copy_darray_ix"), typ="uint256") loop_body = make_setter( @@ -155,16 +169,6 @@ def _dynarray_make_setter(dst, src): ret.append(["repeat", i, 0, count, src.typ.count, loop_body]) - else: - element_size = src.typ.subtype.memory_bytes_required - # number of elements * size of element in bytes - n_bytes = _mul(count, element_size) - max_bytes = src.typ.count * element_size - - src_ = dynarray_data_ptr(src) - dst_ = dynarray_data_ptr(dst) - ret.append(copy_bytes(dst_, src_, n_bytes, max_bytes)) - return b1.resolve(b2.resolve(ret)) @@ -759,12 +763,6 @@ def make_setter(left, right): return IRnode.from_list(ret) elif isinstance(left.typ, DArrayType): - # TODO should we enable this? - # implicit conversion from sarray to darray - # if isinstance(right.typ, SArrayType): - # return _complex_make_setter(left, right) - - # TODO rethink/streamline the clamp_basetype logic if needs_clamp(right.typ, right.encoding): with right.cache_when_complex("arr_ptr") as (b, right): copier = _dynarray_make_setter(left, right) @@ -784,6 +782,18 @@ def _complex_make_setter(left, right): # optimized memzero return mzero(left, left.typ.memory_bytes_required) + can_batch_copy = not needs_clamp(right.typ, right.encoding) and not right.typ.abi_type.is_dynamic() + can_batch_copy &= (left.is_pointer and right.is_pointer) + _len = left.typ.memory_bytes_required + + if can_batch_copy: + assert _len == left.typ.storage_size_in_words * 32 # only the paranoid survive + # TODO: push unrolling capability down to copy_bytes + if _len > 256: # only loop if > 8 words + return copy_bytes(left, right, _len, _len) + + + # general case, including literals. ret = ["seq"] if isinstance(left.typ, SArrayType): @@ -793,11 +803,6 @@ def _complex_make_setter(left, right): if isinstance(left.typ, TupleLike): keys = left.typ.tuple_keys() - # if len(keyz) == 0: - # return IRnode.from_list(["pass"]) - - # general case - # TODO use copy_bytes when the generated code is above a certain size with left.cache_when_complex("_L") as (b1, left), right.cache_when_complex("_R") as (b2, right): for k in keys: From a41f00723c85407c4239b26dff694611e363c140 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 31 Mar 2022 03:34:48 -0700 Subject: [PATCH 02/10] push loop unrolling down into copy_bytes --- vyper/codegen/core.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/vyper/codegen/core.py b/vyper/codegen/core.py index 10690dd851..3ef39d964c 100644 --- a/vyper/codegen/core.py +++ b/vyper/codegen/core.py @@ -172,6 +172,10 @@ def _dynarray_make_setter(dst, src): return b1.resolve(b2.resolve(ret)) +# below how many words should we unroll vs loop or staticcall? +UNROLL_WORD_COPY_TUNING = 8 * 32 + + # Copy bytes # Accepts 4 arguments: # (i) an IR node for the start position of the source @@ -192,6 +196,18 @@ def copy_bytes(dst, src, length, length_bound): "copy_bytes_count" ) as (b2, length), dst.cache_when_complex("dst") as (b3, dst): + # unroll if we know the loop would be more expensive than unrolling + # (also prefer it to identity precompile). roughly, this is around 7 words. + batch_copy_op_exists = dst.location == MEMORY and src.location in (CALLDATA, DATA) + if length.is_literal and length.value < UNROLL_WORD_COPY_TUNING and not batch_copy_op_exists: + ret = ["seq"] + for i in range(ceil32(length.value) // 32): + dst_i = add_ofst(dst, dst.location.word_scale * i) + src_i = add_ofst(src, src.location.word_scale * i) + ret.append(STORE(dst_i, LOAD(src_i))) + ret = IRnode.from_list(ret, annotation=annotation) + return b1.resolve(b2.resolve(b3.resolve(ret))) + # fast code for common case where num bytes is small # TODO expand this for more cases where num words is less than ~8 if length_bound <= 32: @@ -788,9 +804,7 @@ def _complex_make_setter(left, right): if can_batch_copy: assert _len == left.typ.storage_size_in_words * 32 # only the paranoid survive - # TODO: push unrolling capability down to copy_bytes - if _len > 256: # only loop if > 8 words - return copy_bytes(left, right, _len, _len) + return copy_bytes(left, right, _len, _len) # general case, including literals. From 658192c291a6658d66271da104431542c194f391 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 31 Mar 2022 04:03:48 -0700 Subject: [PATCH 03/10] fix lint --- vyper/codegen/core.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/vyper/codegen/core.py b/vyper/codegen/core.py index 3ef39d964c..d68cc3b9ad 100644 --- a/vyper/codegen/core.py +++ b/vyper/codegen/core.py @@ -98,7 +98,6 @@ def _dynarray_make_setter(dst, src): if src.value == "~empty": return IRnode.from_list(STORE(dst, 0)) - if src.value == "multi": ret = ["seq"] # handle literals @@ -120,7 +119,6 @@ def _dynarray_make_setter(dst, src): return ret - with src.cache_when_complex("darray_src") as (b1, src): # for ABI-encoded dynamic data, we must loop to unpack, since @@ -199,7 +197,11 @@ def copy_bytes(dst, src, length, length_bound): # unroll if we know the loop would be more expensive than unrolling # (also prefer it to identity precompile). roughly, this is around 7 words. batch_copy_op_exists = dst.location == MEMORY and src.location in (CALLDATA, DATA) - if length.is_literal and length.value < UNROLL_WORD_COPY_TUNING and not batch_copy_op_exists: + if ( + length.is_literal + and length.value < UNROLL_WORD_COPY_TUNING + and not batch_copy_op_exists + ): ret = ["seq"] for i in range(ceil32(length.value) // 32): dst_i = add_ofst(dst, dst.location.word_scale * i) @@ -798,15 +800,16 @@ def _complex_make_setter(left, right): # optimized memzero return mzero(left, left.typ.memory_bytes_required) - can_batch_copy = not needs_clamp(right.typ, right.encoding) and not right.typ.abi_type.is_dynamic() - can_batch_copy &= (left.is_pointer and right.is_pointer) + can_batch_copy = ( + not needs_clamp(right.typ, right.encoding) and not right.typ.abi_type.is_dynamic() + ) + can_batch_copy &= left.is_pointer and right.is_pointer _len = left.typ.memory_bytes_required if can_batch_copy: assert _len == left.typ.storage_size_in_words * 32 # only the paranoid survive return copy_bytes(left, right, _len, _len) - # general case, including literals. ret = ["seq"] From f6714a4b2c18f4c07046ea0c2268b0b31b8f1127 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 31 Mar 2022 04:11:50 -0700 Subject: [PATCH 04/10] roll make_setter for static arrays --- vyper/codegen/core.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/vyper/codegen/core.py b/vyper/codegen/core.py index d68cc3b9ad..74f5ad48f5 100644 --- a/vyper/codegen/core.py +++ b/vyper/codegen/core.py @@ -795,6 +795,9 @@ def make_setter(left, right): return _complex_make_setter(left, right) +ROLL_ARRAY_TUNING = 5 + + def _complex_make_setter(left, right): if right.value == "~empty" and left.location == MEMORY: # optimized memzero @@ -810,6 +813,20 @@ def _complex_make_setter(left, right): assert _len == left.typ.storage_size_in_words * 32 # only the paranoid survive return copy_bytes(left, right, _len, _len) + if isinstance(left.typ, SArrayType) and left.typ.count > ROLL_ARRAY_TUNING: + n = left.typ.count + + i = IRnode.from_list(_freshname("copy_sarray_ix"), typ="uint256") + + loop_body = make_setter( + get_element_ptr(left, i, array_bounds_check=False), + get_element_ptr(right, i, array_bounds_check=False), + ) + loop_body.annotation = f"{left}[i] = {right}[i]" + + return IRnode.from_list(["repeat", i, 0, n, n, loop_body]) + + # general case, including literals. ret = ["seq"] From f27bb8611905107bbba0487b7405a9294d2b7d41 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 31 Mar 2022 04:20:43 -0700 Subject: [PATCH 05/10] refactor make_setter a bit merge shared code blocks in dynarray and static array/tuple --- vyper/codegen/core.py | 321 ++++++++++++++++++++++-------------------- vyper/codegen/stmt.py | 3 +- 2 files changed, 171 insertions(+), 153 deletions(-) diff --git a/vyper/codegen/core.py b/vyper/codegen/core.py index 74f5ad48f5..7d36ef0450 100644 --- a/vyper/codegen/core.py +++ b/vyper/codegen/core.py @@ -45,7 +45,7 @@ def _codecopy_gas_bound(num_bytes): # Copy byte array word-for-word (including layout) -def make_byte_array_copier(dst, src): +def _bytestring_make_setter(dst, src): assert isinstance(src.typ, ByteArrayLike) assert isinstance(dst.typ, ByteArrayLike) @@ -91,87 +91,8 @@ def dynarray_data_ptr(ptr): return add_ofst(ptr, ptr.location.word_scale) -def _dynarray_make_setter(dst, src): - assert isinstance(src.typ, DArrayType) - assert isinstance(dst.typ, DArrayType) - - if src.value == "~empty": - return IRnode.from_list(STORE(dst, 0)) - - if src.value == "multi": - ret = ["seq"] - # handle literals - - # write the length word - store_length = STORE(dst, len(src.args)) - ann = None - if src.annotation is not None: - ann = f"len({src.annotation})" - store_length = IRnode.from_list(store_length, annotation=ann) - ret.append(store_length) - - n_items = len(src.args) - for i in range(n_items): - k = IRnode.from_list(i, typ="uint256") - dst_i = get_element_ptr(dst, k, array_bounds_check=False) - src_i = get_element_ptr(src, k, array_bounds_check=False) - ret.append(make_setter(dst_i, src_i)) - - return ret - - with src.cache_when_complex("darray_src") as (b1, src): - - # for ABI-encoded dynamic data, we must loop to unpack, since - # the layout does not match our memory layout - can_batch_copy = not ( - src.encoding in (Encoding.ABI, Encoding.JSON_ABI) - and src.typ.subtype.abi_type.is_dynamic() - ) - # if the source data needs clamping, we cannot do straight - # bytes copy; we must call into make_setter for the clamping logic. - can_batch_copy &= not needs_clamp(src.typ.subtype, src.encoding) - - # if the subtype is dynamic, there might be a lot of - # unused space inside of each element. for instance - # DynArray[DynArray[uint256, 100], 5] where all the child - # arrays are empty - for this case, we recursively call - # into make_setter instead of straight bytes copy - # TODO we can make this heuristic more precise, e.g. - # loop when subtype.is_dynamic AND location == storage - # OR array_size <= /bound where loop is cheaper than memcpy/ - can_batch_copy &= not src.typ.subtype.abi_type.is_dynamic() - - with get_dyn_array_count(src).cache_when_complex("darray_count") as (b2, count): - ret = ["seq"] - - ret.append(STORE(dst, count)) - - if can_batch_copy: - element_size = src.typ.subtype.memory_bytes_required - # number of elements * size of element in bytes - n_bytes = _mul(count, element_size) - max_bytes = src.typ.count * element_size - - src_ = dynarray_data_ptr(src) - dst_ = dynarray_data_ptr(dst) - ret.append(copy_bytes(dst_, src_, n_bytes, max_bytes)) - - else: - i = IRnode.from_list(_freshname("copy_darray_ix"), typ="uint256") - - loop_body = make_setter( - get_element_ptr(dst, i, array_bounds_check=False), - get_element_ptr(src, i, array_bounds_check=False), - ) - loop_body.annotation = f"{dst}[i] = {src}[i]" - - ret.append(["repeat", i, 0, count, src.typ.count, loop_body]) - - return b1.resolve(b2.resolve(ret)) - - -# below how many words should we unroll vs loop or staticcall? -UNROLL_WORD_COPY_TUNING = 8 * 32 +# below how many bytes should we unroll vs loop or staticcall? +UNROLL_WORD_BYTES_TUNING = 8 * 32 # Copy bytes @@ -199,7 +120,7 @@ def copy_bytes(dst, src, length, length_bound): batch_copy_op_exists = dst.location == MEMORY and src.location in (CALLDATA, DATA) if ( length.is_literal - and length.value < UNROLL_WORD_COPY_TUNING + and length.value < 32 * UNROLL_WORD_BYTES_TUNING and not batch_copy_op_exists ): ret = ["seq"] @@ -270,16 +191,18 @@ def get_bytearray_length(arg): def get_dyn_array_count(arg): assert isinstance(arg.typ, DArrayType) + ann = None if arg.annotation is None else f"len({arg.annotation})" + typ = BaseType("uint256") if arg.value == "multi": - return IRnode.from_list(len(arg.args), typ=typ) + return IRnode.from_list(len(arg.args), typ=typ, annotation=ann) if arg.value == "~empty": # empty(DynArray[]) - return IRnode.from_list(0, typ=typ) + return IRnode.from_list(0, typ=typ, annotation=ann) - return IRnode.from_list(LOAD(arg), typ=typ) + return IRnode.from_list(LOAD(arg), typ=typ, annotation=ann) def append_dyn_array(darray_node, elem_node): @@ -526,6 +449,8 @@ def _get_element_ptr_mapping(parent, key): # Take a value representing a memory or storage location, and descend down to # an element or member variable # This is analogous (but not necessarily equivalent to) getelementptr in LLVM. +# Note that this correctly resolves the double indirection for dynamic types +# if parent.encoding is set to Encoding.ABI def get_element_ptr(parent, key, array_bounds_check=True): with parent.cache_when_complex("val") as (b, parent): typ = parent.typ @@ -754,97 +679,191 @@ def needs_clamp(t, encoding): return False -# Create an x=y statement, where the types may be compound -def make_setter(left, right): +def _is_list_literal(x: IRnode) -> bool: + return x.value == "multi" + + +def make_setter(left: IRnode, right: IRnode) -> IRnode: + """ + Generalized routine to copy an object from right to left. + + Arguments: + left: An IRnode pointer or list of pointers, the destination to copy into + right: An IRnode value, could be a literal + + Returns: + An IRnode with the copy instructions (typ=None) + """ check_assign(left, right) - # Basic types - if isinstance(left.typ, BaseType): - enc = right.encoding # unwrap_location butchers encoding - right = unwrap_location(right) - # TODO rethink/streamline the clamp_basetype logic - if needs_clamp(right.typ, enc): - right = clamp_basetype(right) - - return STORE(left, right) - - # Byte arrays - elif isinstance(left.typ, ByteArrayLike): - # TODO rethink/streamline the clamp_basetype logic - if needs_clamp(right.typ, right.encoding): - with right.cache_when_complex("bs_ptr") as (b, right): - copier = make_byte_array_copier(left, right) - ret = b.resolve(["seq", clamp_bytestring(right), copier]) - else: - ret = make_byte_array_copier(left, right) + ann_l = left.typ if left.annotation is None else left.annotation + ann_r = right.typ if right.annotation is None else right.annotation + ann = f"_make_setter({ann_l}, {ann_r})" - return IRnode.from_list(ret) + with left.cache_when_complex("_L") as (b1, left), right.cache_when_complex("_R") as (b2, right): - elif isinstance(left.typ, DArrayType): - if needs_clamp(right.typ, right.encoding): - with right.cache_when_complex("arr_ptr") as (b, right): - copier = _dynarray_make_setter(left, right) - ret = b.resolve(["seq", clamp_dyn_array(right), copier]) - else: - ret = _dynarray_make_setter(left, right) + def _finalize(ret): + ret = IRnode.from_list(ret, annotation=ann) + return b1.resolve(b2.resolve(ret)) - return IRnode.from_list(ret) + # Basic types + if isinstance(left.typ, BaseType): + enc = right.encoding # unwrap_location butchers encoding + right = unwrap_location(right) + # TODO rethink/streamline the needs_clamp logic + if needs_clamp(right.typ, enc): + right = clamp_basetype(right) + + return _finalize(STORE(left, right)) + + # Byte arrays + if isinstance(left.typ, ByteArrayLike): + ret = ["seq"] + # TODO rethink/streamline the needs_clamp logic + if needs_clamp(right.typ, right.encoding): + ret.append(clamp_bytestring(right)) + ret.append(_bytestring_make_setter(left, right)) + + return _finalize(ret) + + if isinstance(left.typ, DArrayType): + ret = ["seq"] + # TODO rethink/streamline the needs_clamp logic + if needs_clamp(right.typ, right.encoding): + ret.append(clamp_dyn_array(right)) + ret.append(_dynarray_make_setter(left, right)) - # Arrays - elif isinstance(left.typ, (SArrayType, TupleLike)): - return _complex_make_setter(left, right) + return _finalize(ret) + + # Arrays + if isinstance(left.typ, (SArrayType, TupleLike)): + return _finalize(_complex_make_setter(left, right)) + + raise CompilerPanic("unreachable type") # pragma: notest ROLL_ARRAY_TUNING = 5 -def _complex_make_setter(left, right): - if right.value == "~empty" and left.location == MEMORY: - # optimized memzero - return mzero(left, left.typ.memory_bytes_required) +def _ir_loop_make_setter(dst, src, n, n_bound): + # TODO: cache when complex + i = IRnode.from_list(_freshname("copy_array_ix"), typ="uint256") - can_batch_copy = ( - not needs_clamp(right.typ, right.encoding) and not right.typ.abi_type.is_dynamic() + loop_body = make_setter( + get_element_ptr(dst, i, array_bounds_check=False), + get_element_ptr(src, i, array_bounds_check=False), ) - can_batch_copy &= left.is_pointer and right.is_pointer - _len = left.typ.memory_bytes_required + loop_body.annotation = f"{dst}[i] = {src}[i]" - if can_batch_copy: - assert _len == left.typ.storage_size_in_words * 32 # only the paranoid survive - return copy_bytes(left, right, _len, _len) + ret = ["repeat", i, 0, n, n_bound, loop_body] + return IRnode.from_list(ret, annotation="__loop_make_setter") + + +# works for static arrays, and also tuples. works for literals. +def _unroll_loop_make_setter(dst, src, keys): + ret = ["seq"] + + for k in keys: + dst_i = get_element_ptr(dst, k, array_bounds_check=False) + src_i = get_element_ptr(src, k, array_bounds_check=False) + ret.append(make_setter(dst_i, src_i)) + + return IRnode.from_list(ret, annotation="__seq_make_setter") + + +def _dynarray_make_setter(dst, src): + assert isinstance(src.typ, DArrayType) + assert isinstance(dst.typ, DArrayType) + + if src.value == "~empty": + return IRnode.from_list(STORE(dst, 0)) - if isinstance(left.typ, SArrayType) and left.typ.count > ROLL_ARRAY_TUNING: - n = left.typ.count + with get_dyn_array_count(src).cache_when_complex("count") as (b1, count): + ret = ["seq"] + # write the length word + store_length = STORE(dst, count) + ret.append(store_length) - i = IRnode.from_list(_freshname("copy_sarray_ix"), typ="uint256") + if _is_list_literal(src): + # is literal list, generate instructions + # to set every element of the lhs + assert isinstance(count.value, int), src + ret.append(_unroll_loop_make_setter(dst, src, range(count.value))) + return IRnode.from_list(ret) - loop_body = make_setter( - get_element_ptr(left, i, array_bounds_check=False), - get_element_ptr(right, i, array_bounds_check=False), + # for ABI-encoded dynamic data, we must loop to unpack, since + # the layout does not match our memory layout + can_batch_copy = not ( + src.encoding in (Encoding.ABI, Encoding.JSON_ABI) + and src.typ.subtype.abi_type.is_dynamic() ) - loop_body.annotation = f"{left}[i] = {right}[i]" + # if the subtype needs clamping, we cannot do straight + # bytes copy; we must call into make_setter for the clamping logic. + can_batch_copy &= not needs_clamp(src.typ.subtype, src.encoding) - return IRnode.from_list(["repeat", i, 0, n, n, loop_body]) + # if the subtype is dynamic, there might be a lot of + # unused space inside of each element. for instance + # DynArray[DynArray[uint256, 100], 5] where all the child + # arrays are empty - for this case, we call make_setter in + # a loop instead of straight bytes copy + # TODO we can make this heuristic more precise, e.g. + # loop when subtype.is_dynamic AND location == storage + # OR array_size <= /bound where loop is cheaper than memcpy/ + should_batch_copy = not src.typ.subtype.abi_type.is_dynamic() + if can_batch_copy and should_batch_copy: + element_size = src.typ.subtype.memory_bytes_required + # number of elements * size of element in bytes + n_bytes = _mul(count, element_size) + max_bytes = src.typ.count * element_size - # general case, including literals. - ret = ["seq"] + src_ = dynarray_data_ptr(src) + dst_ = dynarray_data_ptr(dst) + ret.append(copy_bytes(dst_, src_, n_bytes, max_bytes)) - if isinstance(left.typ, SArrayType): - n_items = right.typ.count - keys = [IRnode.from_list(i, typ="uint256") for i in range(n_items)] + else: + ret = _ir_loop_make_setter(dst, src, count, src.typ.count) - if isinstance(left.typ, TupleLike): - keys = left.typ.tuple_keys() + return b1.resolve(ret) - with left.cache_when_complex("_L") as (b1, left), right.cache_when_complex("_R") as (b2, right): - for k in keys: - l_i = get_element_ptr(left, k, array_bounds_check=False) - r_i = get_element_ptr(right, k, array_bounds_check=False) - ret.append(make_setter(l_i, r_i)) +def _complex_make_setter(dst, src): + # make_setter for complex, statically sized types. tuple and sarray + if src.value == "~empty" and dst.location == MEMORY: + # optimized memzero + return mzero(dst, dst.typ.memory_bytes_required) + + can_batch_copy = ( + not needs_clamp(src.typ, src.encoding) + and not src.typ.abi_type.is_dynamic() + and dst.is_pointer + and src.is_pointer + ) + + if can_batch_copy: + _len = dst.typ.memory_bytes_required + assert _len == dst.typ.storage_size_in_words * 32 # only the paranoid survive + return copy_bytes(dst, src, _len, _len) + + if ( + isinstance(dst.typ, SArrayType) + and dst.typ.count > ROLL_ARRAY_TUNING + and not _is_list_literal(src) + ): + assert dst.is_pointer and src.is_pointer + assert dst.typ.count == src.typ.count + n = dst.typ.count + ret = _ir_loop_make_setter(dst, src, n, n) + return IRnode.from_list(ret) + + # general case, including literals. + if isinstance(dst.typ, SArrayType): + n_items = src.typ.count + keys = [IRnode.from_list(i, typ="uint256") for i in range(n_items)] + if isinstance(dst.typ, TupleLike): + keys = dst.typ.tuple_keys() - return b1.resolve(b2.resolve(IRnode.from_list(ret))) + return _unroll_loop_make_setter(dst, src, keys) def ensure_in_memory(ir_var, context): diff --git a/vyper/codegen/stmt.py b/vyper/codegen/stmt.py index bd25152dd3..8e8ff0041e 100644 --- a/vyper/codegen/stmt.py +++ b/vyper/codegen/stmt.py @@ -16,7 +16,6 @@ get_element_ptr, getpos, is_return_from_function, - make_byte_array_copier, make_setter, pop_dyn_array, zero_pad, @@ -183,7 +182,7 @@ def _get_last(ir): # TODO maybe use ensure_in_memory if msg_ir.location != MEMORY: buf = self.context.new_internal_variable(msg_ir.typ) - instantiate_msg = make_byte_array_copier(buf, msg_ir) + instantiate_msg = make_setter(buf, msg_ir) else: buf = _get_last(msg_ir) if not isinstance(buf, int): From 420aa9d596765e95b7e02ebab614c9a8e31baa4a Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sat, 2 Apr 2022 10:39:56 -0700 Subject: [PATCH 06/10] remove caching for base types they are only referenced once, in a single load/store, so it is not necessary to cache them --- vyper/codegen/core.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/vyper/codegen/core.py b/vyper/codegen/core.py index 7d36ef0450..4d2155eb30 100644 --- a/vyper/codegen/core.py +++ b/vyper/codegen/core.py @@ -700,22 +700,23 @@ def make_setter(left: IRnode, right: IRnode) -> IRnode: ann_r = right.typ if right.annotation is None else right.annotation ann = f"_make_setter({ann_l}, {ann_r})" + # Basic types + if isinstance(left.typ, BaseType): + enc = right.encoding # unwrap_location butchers encoding + right = unwrap_location(right) + # TODO rethink/streamline the needs_clamp logic + if needs_clamp(right.typ, enc): + right = clamp_basetype(right) + + return IRnode.from_list(STORE(left, right), annotation=ann) + + # pointer/complex types with left.cache_when_complex("_L") as (b1, left), right.cache_when_complex("_R") as (b2, right): def _finalize(ret): ret = IRnode.from_list(ret, annotation=ann) return b1.resolve(b2.resolve(ret)) - # Basic types - if isinstance(left.typ, BaseType): - enc = right.encoding # unwrap_location butchers encoding - right = unwrap_location(right) - # TODO rethink/streamline the needs_clamp logic - if needs_clamp(right.typ, enc): - right = clamp_basetype(right) - - return _finalize(STORE(left, right)) - # Byte arrays if isinstance(left.typ, ByteArrayLike): ret = ["seq"] From cf635adf6c592eabab287076a83c5ecf452bb3d7 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sat, 2 Apr 2022 10:49:18 -0700 Subject: [PATCH 07/10] fix annotations --- vyper/codegen/core.py | 4 ++-- vyper/codegen/ir_node.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/vyper/codegen/core.py b/vyper/codegen/core.py index 4d2155eb30..7851ea9ea4 100644 --- a/vyper/codegen/core.py +++ b/vyper/codegen/core.py @@ -757,7 +757,7 @@ def _ir_loop_make_setter(dst, src, n, n_bound): loop_body.annotation = f"{dst}[i] = {src}[i]" ret = ["repeat", i, 0, n, n_bound, loop_body] - return IRnode.from_list(ret, annotation="__loop_make_setter") + return IRnode.from_list(ret) # works for static arrays, and also tuples. works for literals. @@ -769,7 +769,7 @@ def _unroll_loop_make_setter(dst, src, keys): src_i = get_element_ptr(src, k, array_bounds_check=False) ret.append(make_setter(dst_i, src_i)) - return IRnode.from_list(ret, annotation="__seq_make_setter") + return IRnode.from_list(ret) def _dynarray_make_setter(dst, src): diff --git a/vyper/codegen/ir_node.py b/vyper/codegen/ir_node.py index 7b8c684f95..3629e97e71 100644 --- a/vyper/codegen/ir_node.py +++ b/vyper/codegen/ir_node.py @@ -320,7 +320,7 @@ def __init__(self, ir_node, name): # a named IR variable which represents the # output of `ir_node` self.ir_var = IRnode.from_list( - name, typ=ir_node.typ, location=ir_node.location, encoding=ir_node.encoding + name, typ=ir_node.typ, location=ir_node.location, encoding=ir_node.encoding, annotation=ir_node.annotation ) def __enter__(self): From 6760176cfd1cbeb25f6f43810ac5a2058894aedb Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sat, 2 Apr 2022 11:09:11 -0700 Subject: [PATCH 08/10] fix a small type error --- vyper/codegen/core.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vyper/codegen/core.py b/vyper/codegen/core.py index 7851ea9ea4..4cb4d11c53 100644 --- a/vyper/codegen/core.py +++ b/vyper/codegen/core.py @@ -788,8 +788,9 @@ def _dynarray_make_setter(dst, src): if _is_list_literal(src): # is literal list, generate instructions # to set every element of the lhs - assert isinstance(count.value, int), src - ret.append(_unroll_loop_make_setter(dst, src, range(count.value))) + assert isinstance(count.value, int) + keys = [IRnode.from_list(i, typ="uint256") for i in range(count.value)] + ret.append(_unroll_loop_make_setter(dst, src, keys)) return IRnode.from_list(ret) # for ABI-encoded dynamic data, we must loop to unpack, since From 508a28b52d5d60ba36c0c1b5f8e2c24084626a44 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sat, 2 Apr 2022 11:15:07 -0700 Subject: [PATCH 09/10] fix lint --- vyper/codegen/ir_node.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/vyper/codegen/ir_node.py b/vyper/codegen/ir_node.py index 3629e97e71..2f5aa903e6 100644 --- a/vyper/codegen/ir_node.py +++ b/vyper/codegen/ir_node.py @@ -320,7 +320,11 @@ def __init__(self, ir_node, name): # a named IR variable which represents the # output of `ir_node` self.ir_var = IRnode.from_list( - name, typ=ir_node.typ, location=ir_node.location, encoding=ir_node.encoding, annotation=ir_node.annotation + name, + typ=ir_node.typ, + location=ir_node.location, + encoding=ir_node.encoding, + annotation=ir_node.annotation, ) def __enter__(self): From 3c21a7ae9d0e7bede72bdbee50af2de76179f5d3 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Sat, 2 Apr 2022 11:24:48 -0700 Subject: [PATCH 10/10] move constants to top of file --- vyper/codegen/core.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/vyper/codegen/core.py b/vyper/codegen/core.py index 4cb4d11c53..ca8871aaf5 100644 --- a/vyper/codegen/core.py +++ b/vyper/codegen/core.py @@ -21,6 +21,13 @@ from vyper.exceptions import CompilerPanic, StructureException, TypeCheckFailure, TypeMismatch from vyper.utils import GAS_CALLDATACOPY_WORD, GAS_CODECOPY_WORD, GAS_IDENTITY, GAS_IDENTITYWORD +# below how many bytes should we unroll vs loop or staticcall? +UNROLL_WORD_BYTES_TUNING = 8 * 32 + + +# above how many words should we roll an array copy operation? +ROLL_ARRAY_TUNING = 5 + # propagate revert message when calls to external contracts fail def check_external_call(call_ir): @@ -91,10 +98,6 @@ def dynarray_data_ptr(ptr): return add_ofst(ptr, ptr.location.word_scale) -# below how many bytes should we unroll vs loop or staticcall? -UNROLL_WORD_BYTES_TUNING = 8 * 32 - - # Copy bytes # Accepts 4 arguments: # (i) an IR node for the start position of the source @@ -743,9 +746,6 @@ def _finalize(ret): raise CompilerPanic("unreachable type") # pragma: notest -ROLL_ARRAY_TUNING = 5 - - def _ir_loop_make_setter(dst, src, n, n_bound): # TODO: cache when complex i = IRnode.from_list(_freshname("copy_array_ix"), typ="uint256")