Skip to content

Commit

Permalink
[libc++] Optimize vector push_back to avoid continuous load and store…
Browse files Browse the repository at this point in the history
… of end pointer

Credits: this change is based on analysis and a proof of concept by
gerbens@google.com.

Before, the compiler loses track of end as 'this' and other references
possibly escape beyond the compiler's scope. This can be see in the
generated assembly:

     16.28 │200c80:   mov     %r15d,(%rax)
     60.87 │200c83:   add     $0x4,%rax
           │200c87:   mov     %rax,-0x38(%rbp)
      0.03 │200c8b: → jmpq    200d4e
      ...
      ...
      1.69 │200d4e:   cmp     %r15d,%r12d
           │200d51: → je      200c40
     16.34 │200d57:   inc     %r15d
      0.05 │200d5a:   mov     -0x38(%rbp),%rax
      3.27 │200d5e:   mov     -0x30(%rbp),%r13
      1.47 │200d62:   cmp     %r13,%rax
           │200d65: → jne     200c80

We fix this by always explicitly storing the loaded local and pointer
back at the end of push back. This generates some slight source 'noise',
but creates nice and compact fast path code, i.e.:

     32.64 │200760:   mov    %r14d,(%r12)
      9.97 │200764:   add    $0x4,%r12
      6.97 │200768:   mov    %r12,-0x38(%rbp)
     32.17 │20076c:   add    $0x1,%r14d
      2.36 │200770:   cmp    %r14d,%ebx
           │200773: → je     200730
      8.98 │200775:   mov    -0x30(%rbp),%r13
      6.75 │200779:   cmp    %r13,%r12
           │20077c: → jne    200760

Now there is a single store for the push_back value (as before), and a
single store for the end without a reload (dependency).

For fully local vectors, (i.e., not referenced elsewhere), the capacity
load and store inside the loop could also be removed, but this requires
more substantial refactoring inside vector.

Differential Revision: https://reviews.llvm.org/D80588
  • Loading branch information
martijnvels authored and ldionne committed Oct 2, 2023
1 parent b52a5c6 commit 6fe4e03
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 17 deletions.
13 changes: 13 additions & 0 deletions libcxx/benchmarks/ContainerBenchmarks.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,19 @@ void BM_ConstructFromRange(benchmark::State& st, Container, GenInputs gen) {
}
}

template <class Container>
void BM_Pushback(benchmark::State& state, Container c) {
int count = state.range(0);
c.reserve(count);
while (state.KeepRunningBatch(count)) {
c.clear();
for (int i = 0; i != count; ++i) {
c.push_back(i);
}
benchmark::DoNotOptimize(c.data());
}
}

template <class Container, class GenInputs>
void BM_InsertValue(benchmark::State& st, Container c, GenInputs gen) {
auto in = gen(st.range(0));
Expand Down
2 changes: 2 additions & 0 deletions libcxx/benchmarks/vector_operations.bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,6 @@ BENCHMARK_CAPTURE(BM_ConstructFromRange, vector_size_t, std::vector<size_t>{}, g
BENCHMARK_CAPTURE(BM_ConstructFromRange, vector_string, std::vector<std::string>{}, getRandomStringInputs)
->Arg(TestNumInputs);

BENCHMARK_CAPTURE(BM_Pushback, vector_int, std::vector<int>{})->Arg(TestNumInputs);

BENCHMARK_MAIN();
42 changes: 25 additions & 17 deletions libcxx/include/vector
Original file line number Diff line number Diff line change
Expand Up @@ -833,11 +833,11 @@ private:

template <class _Up>
_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI
inline void __push_back_slow_path(_Up&& __x);
inline pointer __push_back_slow_path(_Up&& __x);

template <class... _Args>
_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI
inline void __emplace_back_slow_path(_Args&&... __args);
inline pointer __emplace_back_slow_path(_Args&&... __args);

// The following functions are no-ops outside of AddressSanitizer mode.
// We call annotations for every allocator, unless explicitly disabled.
Expand Down Expand Up @@ -1609,7 +1609,7 @@ vector<_Tp, _Allocator>::shrink_to_fit() _NOEXCEPT
template <class _Tp, class _Allocator>
template <class _Up>
_LIBCPP_CONSTEXPR_SINCE_CXX20
void
typename vector<_Tp, _Allocator>::pointer
vector<_Tp, _Allocator>::__push_back_slow_path(_Up&& __x)
{
allocator_type& __a = this->__alloc();
Expand All @@ -1618,6 +1618,7 @@ vector<_Tp, _Allocator>::__push_back_slow_path(_Up&& __x)
__alloc_traits::construct(__a, std::__to_address(__v.__end_), std::forward<_Up>(__x));
__v.__end_++;
__swap_out_circular_buffer(__v);
return this->__end_;
}

template <class _Tp, class _Allocator>
Expand All @@ -1626,12 +1627,14 @@ inline _LIBCPP_HIDE_FROM_ABI
void
vector<_Tp, _Allocator>::push_back(const_reference __x)
{
if (this->__end_ != this->__end_cap())
{
pointer __end = this->__end_;
if (__end < this->__end_cap()) {
__construct_one_at_end(__x);
++__end;
} else {
__end = __push_back_slow_path(__x);
}
else
__push_back_slow_path(__x);
this->__end_ = __end;
}

template <class _Tp, class _Allocator>
Expand All @@ -1640,18 +1643,20 @@ inline _LIBCPP_HIDE_FROM_ABI
void
vector<_Tp, _Allocator>::push_back(value_type&& __x)
{
if (this->__end_ < this->__end_cap())
{
pointer __end = this->__end_;
if (__end < this->__end_cap()) {
__construct_one_at_end(std::move(__x));
++__end;
} else {
__end = __push_back_slow_path(std::move(__x));
}
else
__push_back_slow_path(std::move(__x));
this->__end_ = __end;
}

template <class _Tp, class _Allocator>
template <class... _Args>
_LIBCPP_CONSTEXPR_SINCE_CXX20
void
typename vector<_Tp, _Allocator>::pointer
vector<_Tp, _Allocator>::__emplace_back_slow_path(_Args&&... __args)
{
allocator_type& __a = this->__alloc();
Expand All @@ -1660,6 +1665,7 @@ vector<_Tp, _Allocator>::__emplace_back_slow_path(_Args&&... __args)
__alloc_traits::construct(__a, std::__to_address(__v.__end_), std::forward<_Args>(__args)...);
__v.__end_++;
__swap_out_circular_buffer(__v);
return this->__end_;
}

template <class _Tp, class _Allocator>
Expand All @@ -1673,14 +1679,16 @@ void
#endif
vector<_Tp, _Allocator>::emplace_back(_Args&&... __args)
{
if (this->__end_ < this->__end_cap())
{
pointer __end = this->__end_;
if (__end < this->__end_cap()) {
__construct_one_at_end(std::forward<_Args>(__args)...);
++__end;
} else {
__end = __emplace_back_slow_path(std::forward<_Args>(__args)...);
}
else
__emplace_back_slow_path(std::forward<_Args>(__args)...);
this->__end_ = __end;
#if _LIBCPP_STD_VER >= 17
return this->back();
return *(__end - 1);
#endif
}

Expand Down

0 comments on commit 6fe4e03

Please sign in to comment.