Use SetLenOnDrop in Vec::truncate() #52908

merged 1 commit into from
Aug 1, 2018

Use SetLenOnDrop in Vec::truncate() #52908

merged 1 commit into from
Aug 1, 2018


@lnicola lnicola commented Jul 31, 2018

This avoids a redundant length check in some cases when calling
Vec::truncate or Vec::clear.

Fixes #51802

Note that the generated code still seems suboptimal. I tested with the following functions:

pub extern fn foo(x: &mut Vec<u8>) {

pub extern fn bar(x: &mut Vec<u8>) {

pub extern fn baz(x: &mut Vec<u8>, n: usize) {

pub extern fn foo_string(x: &mut Vec<String>) {

pub extern fn bar_string(x: &mut Vec<String>) {

pub extern fn baz_string(x: &mut Vec<String>, n: usize) {
Old output
00000000000460a0 <foo>:
   460a0:       48 83 7f 10 00          cmpq   $0x0,0x10(%rdi)
   460a5:       74 08                   je     460af <foo+0xf>
   460a7:       48 c7 47 10 00 00 00    movq   $0x0,0x10(%rdi)
   460ae:       00
   460af:       c3                      retq

00000000000460b0 <bar>:
   460b0:       48 83 7f 10 06          cmpq   $0x6,0x10(%rdi)
   460b5:       72 08                   jb     460bf <bar+0xf>
   460b7:       48 c7 47 10 05 00 00    movq   $0x5,0x10(%rdi)
   460be:       00
   460bf:       c3                      retq

00000000000460c0 <baz>:
   460c0:       48 39 77 10             cmp    %rsi,0x10(%rdi)
   460c4:       76 04                   jbe    460ca <baz+0xa>
   460c6:       48 89 77 10             mov    %rsi,0x10(%rdi)
   460ca:       c3                      retq
   460cb:       0f 1f 44 00 00          nopl   0x0(%rax,%rax,1)

00000000000460d0 <foo_string>:
   460d0:       41 57                   push   %r15
   460d2:       41 56                   push   %r14
   460d4:       53                      push   %rbx
   460d5:       48 8b 47 10             mov    0x10(%rdi),%rax
   460d9:       48 85 c0                test   %rax,%rax
   460dc:       74 4a                   je     46128 <foo_string+0x58>
   460de:       49 89 fe                mov    %rdi,%r14
   460e1:       48 8b 0f                mov    (%rdi),%rcx
   460e4:       48 8d 14 40             lea    (%rax,%rax,2),%rdx
   460e8:       48 8d 58 ff             lea    -0x1(%rax),%rbx
   460ec:       4c 8d 3c d1             lea    (%rcx,%rdx,8),%r15
   460f0:       49 83 c7 f0             add    $0xfffffffffffffff0,%r15
   460f4:       66 2e 0f 1f 84 00 00    nopw   %cs:0x0(%rax,%rax,1)
   460fb:       00 00 00
   460fe:       66 90                   xchg   %ax,%ax
   46100:       49 89 5e 10             mov    %rbx,0x10(%r14)
   46104:       49 8b 37                mov    (%r15),%rsi
   46107:       48 85 f6                test   %rsi,%rsi
   4610a:       74 0e                   je     4611a <foo_string+0x4a>
   4610c:       49 8b 7f f8             mov    -0x8(%r15),%rdi
   46110:       ba 01 00 00 00          mov    $0x1,%edx
   46115:       e8 a6 e9 ff ff          callq  44ac0 <__rust_dealloc@plt>
   4611a:       48 83 c3 ff             add    $0xffffffffffffffff,%rbx
   4611e:       49 83 c7 e8             add    $0xffffffffffffffe8,%r15
   46122:       48 83 fb ff             cmp    $0xffffffffffffffff,%rbx
   46126:       75 d8                   jne    46100 <foo_string+0x30>
   46128:       5b                      pop    %rbx
   46129:       41 5e                   pop    %r14
   4612b:       41 5f                   pop    %r15
   4612d:       c3                      retq
   4612e:       66 90                   xchg   %ax,%ax

0000000000046130 <bar_string>:
   46130:       41 57                   push   %r15
   46132:       41 56                   push   %r14
   46134:       53                      push   %rbx
   46135:       4c 8b 7f 10             mov    0x10(%rdi),%r15
   46139:       49 83 ff 06             cmp    $0x6,%r15
   4613d:       72 49                   jb     46188 <bar_string+0x58>
   4613f:       49 89 fe                mov    %rdi,%r14
   46142:       48 8b 07                mov    (%rdi),%rax
   46145:       4b 8d 0c 7f             lea    (%r15,%r15,2),%rcx
   46149:       48 8d 1c c8             lea    (%rax,%rcx,8),%rbx
   4614d:       48 83 c3 f0             add    $0xfffffffffffffff0,%rbx
   46151:       66 2e 0f 1f 84 00 00    nopw   %cs:0x0(%rax,%rax,1)
   46158:       00 00 00
   4615b:       0f 1f 44 00 00          nopl   0x0(%rax,%rax,1)
   46160:       49 83 c7 ff             add    $0xffffffffffffffff,%r15
   46164:       4d 89 7e 10             mov    %r15,0x10(%r14)
   46168:       48 8b 33                mov    (%rbx),%rsi
   4616b:       48 85 f6                test   %rsi,%rsi
   4616e:       74 0e                   je     4617e <bar_string+0x4e>
   46170:       48 8b 7b f8             mov    -0x8(%rbx),%rdi
   46174:       ba 01 00 00 00          mov    $0x1,%edx
   46179:       e8 42 e9 ff ff          callq  44ac0 <__rust_dealloc@plt>
   4617e:       48 83 c3 e8             add    $0xffffffffffffffe8,%rbx
   46182:       49 83 ff 05             cmp    $0x5,%r15
   46186:       77 d8                   ja     46160 <bar_string+0x30>
   46188:       5b                      pop    %rbx
   46189:       41 5e                   pop    %r14
   4618b:       41 5f                   pop    %r15
   4618d:       c3                      retq
   4618e:       66 90                   xchg   %ax,%ax

0000000000046190 <baz_string>:
   46190:       41 57                   push   %r15
   46192:       41 56                   push   %r14
   46194:       41 54                   push   %r12
   46196:       53                      push   %rbx
   46197:       50                      push   %rax
   46198:       4c 8b 67 10             mov    0x10(%rdi),%r12
   4619c:       49 39 f4                cmp    %rsi,%r12
   4619f:       76 46                   jbe    461e7 <baz_string+0x57>
   461a1:       49 89 f6                mov    %rsi,%r14
   461a4:       49 89 ff                mov    %rdi,%r15
   461a7:       48 8b 07                mov    (%rdi),%rax
   461aa:       4b 8d 0c 64             lea    (%r12,%r12,2),%rcx
   461ae:       48 8d 1c c8             lea    (%rax,%rcx,8),%rbx
   461b2:       48 83 c3 f0             add    $0xfffffffffffffff0,%rbx
   461b6:       66 2e 0f 1f 84 00 00    nopw   %cs:0x0(%rax,%rax,1)
   461bd:       00 00 00
   461c0:       49 83 c4 ff             add    $0xffffffffffffffff,%r12
   461c4:       4d 89 67 10             mov    %r12,0x10(%r15)
   461c8:       48 8b 33                mov    (%rbx),%rsi
   461cb:       48 85 f6                test   %rsi,%rsi
   461ce:       74 0e                   je     461de <baz_string+0x4e>
   461d0:       48 8b 7b f8             mov    -0x8(%rbx),%rdi
   461d4:       ba 01 00 00 00          mov    $0x1,%edx
   461d9:       e8 e2 e8 ff ff          callq  44ac0 <__rust_dealloc@plt>
   461de:       48 83 c3 e8             add    $0xffffffffffffffe8,%rbx
   461e2:       4d 39 f4                cmp    %r14,%r12
   461e5:       77 d9                   ja     461c0 <baz_string+0x30>
   461e7:       48 83 c4 08             add    $0x8,%rsp
   461eb:       5b                      pop    %rbx
   461ec:       41 5c                   pop    %r12
   461ee:       41 5e                   pop    %r14
   461f0:       41 5f                   pop    %r15
   461f2:       c3                      retq
   461f3:       90                      nop
   461f4:       66 2e 0f 1f 84 00 00    nopw   %cs:0x0(%rax,%rax,1)
   461fb:       00 00 00
   461fe:       66 90                   xchg   %ax,%ax
New output
0000000000084d10 <foo>:
   84d10:       48 c7 47 10 00 00 00    movq   $0x0,0x10(%rdi)
   84d17:       00
   84d18:       c3                      retq
   84d19:       0f 1f 80 00 00 00 00    nopl   0x0(%rax)

0000000000084d20 <bar>:
   84d20:       48 8b 47 10             mov    0x10(%rdi),%rax
   84d24:       48 83 f8 05             cmp    $0x5,%rax
   84d28:       b9 05 00 00 00          mov    $0x5,%ecx
   84d2d:       48 0f 42 c8             cmovb  %rax,%rcx
   84d31:       48 89 4f 10             mov    %rcx,0x10(%rdi)
   84d35:       c3                      retq
   84d36:       66 2e 0f 1f 84 00 00    nopw   %cs:0x0(%rax,%rax,1)
   84d3d:       00 00 00

0000000000084d40 <baz>:
   84d40:       48 8b 47 10             mov    0x10(%rdi),%rax
   84d44:       48 39 f0                cmp    %rsi,%rax
   84d47:       48 0f 47 c6             cmova  %rsi,%rax
   84d4b:       48 89 47 10             mov    %rax,0x10(%rdi)
   84d4f:       c3                      retq

0000000000084d50 <foo_string>:
   84d50:       41 57                   push   %r15
   84d52:       41 56                   push   %r14
   84d54:       53                      push   %rbx
   84d55:       49 89 fe                mov    %rdi,%r14
   84d58:       4c 8b 7f 10             mov    0x10(%rdi),%r15
   84d5c:       4d 85 ff                test   %r15,%r15
   84d5f:       74 2f                   je     84d90 <foo_string+0x40>
   84d61:       49 8b 06                mov    (%r14),%rax
   84d64:       4b 8d 0c 7f             lea    (%r15,%r15,2),%rcx
   84d68:       48 8d 1c c8             lea    (%rax,%rcx,8),%rbx
   84d6c:       48 83 c3 f0             add    $0xfffffffffffffff0,%rbx
   84d70:       48 8b 33                mov    (%rbx),%rsi
   84d73:       48 85 f6                test   %rsi,%rsi
   84d76:       74 0e                   je     84d86 <foo_string+0x36>
   84d78:       48 8b 7b f8             mov    -0x8(%rbx),%rdi
   84d7c:       ba 01 00 00 00          mov    $0x1,%edx
   84d81:       e8 1a b1 ff ff          callq  7fea0 <__rust_dealloc@plt>
   84d86:       48 83 c3 e8             add    $0xffffffffffffffe8,%rbx
   84d8a:       49 83 c7 ff             add    $0xffffffffffffffff,%r15
   84d8e:       75 e0                   jne    84d70 <foo_string+0x20>
   84d90:       49 c7 46 10 00 00 00    movq   $0x0,0x10(%r14)
   84d97:       00
   84d98:       5b                      pop    %rbx
   84d99:       41 5e                   pop    %r14
   84d9b:       41 5f                   pop    %r15
   84d9d:       c3                      retq
   84d9e:       66 90                   xchg   %ax,%ax

0000000000084da0 <bar_string>:
   84da0:       41 57                   push   %r15
   84da2:       41 56                   push   %r14
   84da4:       53                      push   %rbx
   84da5:       49 89 fe                mov    %rdi,%r14
   84da8:       4c 8b 7f 10             mov    0x10(%rdi),%r15
   84dac:       49 83 ff 06             cmp    $0x6,%r15
   84db0:       72 44                   jb     84df6 <bar_string+0x56>
   84db2:       49 8b 06                mov    (%r14),%rax
   84db5:       4b 8d 0c 7f             lea    (%r15,%r15,2),%rcx
   84db9:       48 8d 1c c8             lea    (%rax,%rcx,8),%rbx
   84dbd:       48 83 c3 f0             add    $0xfffffffffffffff0,%rbx
   84dc1:       49 83 c7 fb             add    $0xfffffffffffffffb,%r15
   84dc5:       66 2e 0f 1f 84 00 00    nopw   %cs:0x0(%rax,%rax,1)
   84dcc:       00 00 00
   84dcf:       90                      nop
   84dd0:       48 8b 33                mov    (%rbx),%rsi
   84dd3:       48 85 f6                test   %rsi,%rsi
   84dd6:       74 0e                   je     84de6 <bar_string+0x46>
   84dd8:       48 8b 7b f8             mov    -0x8(%rbx),%rdi
   84ddc:       ba 01 00 00 00          mov    $0x1,%edx
   84de1:       e8 ba b0 ff ff          callq  7fea0 <__rust_dealloc@plt>
   84de6:       48 83 c3 e8             add    $0xffffffffffffffe8,%rbx
   84dea:       49 83 c7 ff             add    $0xffffffffffffffff,%r15
   84dee:       75 e0                   jne    84dd0 <bar_string+0x30>
   84df0:       41 bf 05 00 00 00       mov    $0x5,%r15d
   84df6:       4d 89 7e 10             mov    %r15,0x10(%r14)
   84dfa:       5b                      pop    %rbx
   84dfb:       41 5e                   pop    %r14
   84dfd:       41 5f                   pop    %r15
   84dff:       c3                      retq

0000000000084e00 <baz_string>:
   84e00:       41 57                   push   %r15
   84e02:       41 56                   push   %r14
   84e04:       41 54                   push   %r12
   84e06:       53                      push   %rbx
   84e07:       50                      push   %rax
   84e08:       49 89 ff                mov    %rdi,%r15
   84e0b:       48 8b 47 10             mov    0x10(%rdi),%rax
   84e0f:       49 89 c4                mov    %rax,%r12
   84e12:       49 29 f4                sub    %rsi,%r12
   84e15:       76 3c                   jbe    84e53 <baz_string+0x53>
   84e17:       49 89 f6                mov    %rsi,%r14
   84e1a:       49 8b 0f                mov    (%r15),%rcx
   84e1d:       48 8d 04 40             lea    (%rax,%rax,2),%rax
   84e21:       48 8d 1c c1             lea    (%rcx,%rax,8),%rbx
   84e25:       48 83 c3 f0             add    $0xfffffffffffffff0,%rbx
   84e29:       0f 1f 80 00 00 00 00    nopl   0x0(%rax)
   84e30:       48 8b 33                mov    (%rbx),%rsi
   84e33:       48 85 f6                test   %rsi,%rsi
   84e36:       74 0e                   je     84e46 <baz_string+0x46>
   84e38:       48 8b 7b f8             mov    -0x8(%rbx),%rdi
   84e3c:       ba 01 00 00 00          mov    $0x1,%edx
   84e41:       e8 5a b0 ff ff          callq  7fea0 <__rust_dealloc@plt>
   84e46:       48 83 c3 e8             add    $0xffffffffffffffe8,%rbx
   84e4a:       49 83 c4 ff             add    $0xffffffffffffffff,%r12
   84e4e:       75 e0                   jne    84e30 <baz_string+0x30>
   84e50:       4c 89 f0                mov    %r14,%rax
   84e53:       49 89 47 10             mov    %rax,0x10(%r15)
   84e57:       48 83 c4 08             add    $0x8,%rsp
   84e5b:       5b                      pop    %rbx
   84e5c:       41 5c                   pop    %r12
   84e5e:       41 5e                   pop    %r14
   84e60:       41 5f                   pop    %r15
   84e62:       c3                      retq
   84e63:       90                      nop
   84e64:       66 2e 0f 1f 84 00 00    nopw   %cs:0x0(%rax,%rax,1)
   84e6b:       00 00 00
   84e6e:       66 90                   xchg   %ax,%ax

For calling truncate with non-zero lengths on non-Drop types, it seems that a redundant load and comparison gets replaced with an awkward sequence with a conditional move. In the unknown length case, the new code is no longer awkward.

Maybe someone moderately proficient at assembly could tell if this looks like a win or not.

This came up when discussing replacing unsafe { vec.set_len(0) } with vec.clear() in a project where the author was worried about potential performance degradation. It might be worth replacing some unsafe code, even it it's trivial to see that it's actually safe.

S-waiting-on-review Status: Awaiting review from the assignee but also interested parties. label Jul 31, 2018
r? @alexcrichton

Awesome, thanks! The changes here look good to me. Could a codegen test perhaps be added to ensure it doesn't regress?

Member Author

lnicola commented Jul 31, 2018

I've never written one of those, and test doesn't like me. Can I just drop this in src/test/codegen?

// compile-flags: -O

#![crate_type = "lib"]

// CHECK-LABEL: @vec_clear
pub fn vec_clear(x: &mut Vec<u32>) {
    // CHECK-NOT: load
    // CHECK-NOT: icmp

This avoids a redundant length check in some cases when calling
`Vec::truncate` or `Vec::clear`.

Fixes #51802
That should work yeah! You could also use things like CHECK-NEXT to assert the precise next instruction too (but either way works!)

Member Author

lnicola commented Jul 31, 2018

CI seems happy about this 😄.

Copy link

Copy link

bors commented Jul 31, 2018

📌 Commit 38e311e has been approved by alexcrichton

S-waiting-on-bors Status: Waiting on bors to run and complete tests. Bors will change the label on completion. and removed S-waiting-on-review Status: Awaiting review from the assignee but also interested parties. labels Jul 31, 2018
@bors bors merged commit 38e311e into rust-lang:master Aug 1, 2018
nnethercote added a commit to nnethercote/rust that referenced this pull request Apr 13, 2022
Currently it just calls `truncate(0)`. `truncate()` is (a) not marked as
`#[inline]`, and (b) more general than needed for `clear()`.

This commit changes `clear()` to do the work itself. This modest change
was first proposed in rust-lang#74172, where the reviewer rejected it because
there was insufficient evidence that `Vec::clear()`'s performance
mattered enough to justify the change. Recent changes within rustc have
made `Vec::clear()` hot within ``, so the change is now
clearly worthwhile.

Although it doesn't show wins on CI perf runs, this seems to be because they
use PGO. But not all platforms currently use PGO. Also, local builds don't use
PGO, and `truncate` sometimes shows up in an over-represented fashion in local
profiles. So local profiling will be made easier by this change.

Note that this will also benefit `String::clear()`, because it just
calls `Vec::clear()`.

Finally, the commit removes the `` codegen test. It was
added in rust-lang#52908. From before then until now, `Vec::clear()` just called
`Vec::truncate()` with a zero length. The body of Vec::truncate() has
changed a lot since then. Now that `Vec::clear()` is doing actual work
itself, and not just calling `Vec::truncate()`, it's not surprising that
its generated code includes a load and an icmp. I think it's reasonable
to remove this test.
bors added a commit to rust-lang-ci/rust that referenced this pull request Apr 17, 2022

Speed up Vec::clear().

Currently it just calls `truncate(0)`. `truncate()` is (a) not marked as
`#[inline]`, and (b) more general than needed for `clear()`.

This commit changes `clear()` to do the work itself. This modest change
was first proposed in rust-lang#74172, where the reviewer rejected it because
there was insufficient evidence that `Vec::clear()`'s performance
mattered enough to justify the change. Recent changes within rustc have
made `Vec::clear()` hot within ``, so the change is now
clearly worthwhile.

Although it doesn't show wins on CI perf runs, this seems to be because they
use PGO. But not all platforms currently use PGO. Also, local builds don't use
PGO, and `truncate` sometimes shows up in an over-represented fashion in local
profiles. So local profiling will be made easier by this change.

Note that this will also benefit `String::clear()`, because it just
calls `Vec::clear()`.

Finally, the commit removes the `` codegen test. It was
added in rust-lang#52908. From before then until now, `Vec::clear()` just called
`Vec::truncate()` with a zero length. The body of Vec::truncate() has
changed a lot since then. Now that `Vec::clear()` is doing actual work
itself, and not just calling `Vec::truncate()`, it's not surprising that
its generated code includes a load and an icmp. I think it's reasonable
to remove this test.

r? `@m-ou-se`
