-
Notifications
You must be signed in to change notification settings - Fork 12.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[libcxx] Align __recommend() + 1
by __endian_factor
#90292
Conversation
@llvm/pr-subscribers-libcxx Author: Vitaly Buka (vitalybuka) Changes
Full diff: https://github.com/llvm/llvm-project/pull/90292.diff 1 Files Affected:
diff --git a/libcxx/include/string b/libcxx/include/string
index 883bc1d7e5dc9f..ff9c7b81375c9c 100644
--- a/libcxx/include/string
+++ b/libcxx/include/string
@@ -868,7 +868,7 @@ private:
__r_.first() = __rep();
__set_short_size(__size);
} else {
- auto __capacity = __recommend(__size) + 1;
+ auto __capacity = __align_it<__endian_factor>(__recommend(__size) + 1);
auto __allocation = __alloc_traits::allocate(__alloc(), __capacity);
__begin_lifetime(__allocation, __capacity);
__set_long_cap(__capacity);
@@ -1876,6 +1876,7 @@ private:
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __set_long_cap(size_type __s) _NOEXCEPT {
__r_.first().__l.__cap_ = __s / __endian_factor;
__r_.first().__l.__is_long_ = true;
+ _LIBCPP_ASSERT_INTERNAL(__s == __get_long_cap(), "Size must be __endian_factor aligned.");
}
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type __get_long_cap() const _NOEXCEPT {
@@ -2201,7 +2202,7 @@ basic_string<_CharT, _Traits, _Allocator>::__init(const value_type* __s, size_ty
__set_short_size(__sz);
__p = __get_short_pointer();
} else {
- auto __allocation = std::__allocate_at_least(__alloc(), __recommend(__reserve) + 1);
+ auto __allocation = std::__allocate_at_least(__alloc(), __align_it<__endian_factor>(__recommend(__reserve) + 1));
__p = __allocation.ptr;
__begin_lifetime(__p, __allocation.count);
__set_long_pointer(__p);
@@ -2225,7 +2226,7 @@ basic_string<_CharT, _Traits, _Allocator>::__init(const value_type* __s, size_ty
__set_short_size(__sz);
__p = __get_short_pointer();
} else {
- auto __allocation = std::__allocate_at_least(__alloc(), __recommend(__sz) + 1);
+ auto __allocation = std::__allocate_at_least(__alloc(), __align_it<__endian_factor>(__recommend(__sz) + 1));
__p = __allocation.ptr;
__begin_lifetime(__p, __allocation.count);
__set_long_pointer(__p);
@@ -2250,7 +2251,7 @@ basic_string<_CharT, _Traits, _Allocator>::__init_copy_ctor_external(const value
} else {
if (__sz > max_size())
__throw_length_error();
- auto __allocation = std::__allocate_at_least(__alloc(), __recommend(__sz) + 1);
+ auto __allocation = std::__allocate_at_least(__alloc(), __align_it<__endian_factor>(__recommend(__sz) + 1));
__p = __allocation.ptr;
__begin_lifetime(__p, __allocation.count);
__set_long_pointer(__p);
@@ -2273,7 +2274,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 void basic_string<_CharT, _Traits, _Allocator>::__
__set_short_size(__n);
__p = __get_short_pointer();
} else {
- auto __allocation = std::__allocate_at_least(__alloc(), __recommend(__n) + 1);
+ auto __allocation = std::__allocate_at_least(__alloc(), __align_it<__endian_factor>(__recommend(__n) + 1));
__p = __allocation.ptr;
__begin_lifetime(__p, __allocation.count);
__set_long_pointer(__p);
@@ -2338,7 +2339,7 @@ basic_string<_CharT, _Traits, _Allocator>::__init_with_size(_InputIterator __fir
__p = __get_short_pointer();
} else {
- auto __allocation = std::__allocate_at_least(__alloc(), __recommend(__sz) + 1);
+ auto __allocation = std::__allocate_at_least(__alloc(), __align_it<__endian_factor>(__recommend(__sz) + 1));
__p = __allocation.ptr;
__begin_lifetime(__p, __allocation.count);
__set_long_pointer(__p);
@@ -2378,7 +2379,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 void basic_string<_CharT, _Traits, _Allocator>::__
size_type __cap =
__old_cap < __ms / 2 - __alignment ? __recommend(std::max(__old_cap + __delta_cap, 2 * __old_cap)) : __ms - 1;
__annotate_delete();
- auto __allocation = std::__allocate_at_least(__alloc(), __cap + 1);
+ auto __allocation = std::__allocate_at_least(__alloc(), __align_it<__endian_factor>(__cap + 1));
pointer __p = __allocation.ptr;
__begin_lifetime(__p, __allocation.count);
if (__n_copy != 0)
@@ -2421,7 +2422,7 @@ void _LIBCPP_CONSTEXPR_SINCE_CXX20
size_type __cap =
__old_cap < __ms / 2 - __alignment ? __recommend(std::max(__old_cap + __delta_cap, 2 * __old_cap)) : __ms - 1;
__annotate_delete();
- auto __allocation = std::__allocate_at_least(__alloc(), __cap + 1);
+ auto __allocation = std::__allocate_at_least(__alloc(), __align_it<__endian_factor>(__cap + 1));
pointer __p = __allocation.ptr;
__begin_lifetime(__p, __allocation.count);
if (__n_copy != 0)
@@ -3254,14 +3255,14 @@ basic_string<_CharT, _Traits, _Allocator>::__shrink_or_extend(size_type __target
__p = __get_long_pointer();
} else {
if (__target_capacity > __cap) {
- auto __allocation = std::__allocate_at_least(__alloc(), __target_capacity + 1);
+ auto __allocation = std::__allocate_at_least(__alloc(), __align_it<__endian_factor>(__target_capacity + 1));
__new_data = __allocation.ptr;
__target_capacity = __allocation.count - 1;
} else {
#ifndef _LIBCPP_HAS_NO_EXCEPTIONS
try {
#endif // _LIBCPP_HAS_NO_EXCEPTIONS
- auto __allocation = std::__allocate_at_least(__alloc(), __target_capacity + 1);
+ auto __allocation = std::__allocate_at_least(__alloc(), __align_it<__endian_factor>(__target_capacity + 1));
__new_data = __allocation.ptr;
__target_capacity = __allocation.count - 1;
#ifndef _LIBCPP_HAS_NO_EXCEPTIONS
|
__set_long_cap
and __get_long_cap
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you add some more info here? I don't see what bug this fixes, and I don't understand why we can't drop a byte when setting the long cap.
We pass the capacity directly to deallocate, e.g.:
and for sized-delete, we are required to pass the actual size that was passed to allocate, not a rounded amount. |
AFAICT we always return |
The assert inserted there confirms that it's not true in some cases, on check-cxx, even without sanitizers. |
Ok, then what am I missing? |
BTW it's expected that there might be a byte dropped if the allocator returns more than what we asked for. That's completely legal for us to do. (But I don't think we test that for |
Sorry, maybe we are discussing different things.
|
What I'm saying is that |
Hm, I would expect allocator should still accept the same size for new/delete, even if it returns more. Otherwise using those operators will be very annoying. |
Yes, it does. With |
So, I am not sure then what are you asking? |
I'm asking where we get the mismatch. My current mental model is that we always allocate an even number of elements (due to |
Got it, will reproduce with details soon.
Sorry, it's just how I understood the code, will be happy to improve. |
libcxx/include/string
Outdated
@@ -1876,6 +1876,7 @@ private: | |||
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __set_long_cap(size_type __s) _NOEXCEPT { | |||
__r_.first().__l.__cap_ = __s / __endian_factor; | |||
__r_.first().__l.__is_long_ = true; | |||
_LIBCPP_ASSERT_INTERNAL(__s == __get_long_cap(), "Size must be __endian_factor aligned."); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@philnik777
Note, this assert is enough to reproduce the issue on LIT_FILTER=string ninja check-cxx
, in case it would be easy for you to debug yourself.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Or maybe not...
I'll be back with details soon.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see:
https://github.com/llvm/llvm-project/actions/runs/8899718744/job/24441331959?pr=90645
Also my local run if we apply only #90645
gdb runtimes/runtimes-bins/test/std/strings/basic.string/string.modifiers/string_replace/Output/size_size_pointer_size.pass.cpp.dir/t.tmp.exe
#0 __pthread_kill_implementation (threadid=<optimized out>, signo=6, no_tid=no_tid@entry=0) at ./nptl/pthread_kill.c:44
#1 0x00007ffff7b691cf in __pthread_kill_internal (signo=<optimized out>, threadid=<optimized out>) at ./nptl/pthread_kill.c:89
#2 __GI___pthread_kill (threadid=<optimized out>, signo=<optimized out>) at ./nptl/pthread_kill.c:89
#3 0x00007ffff7cb3e90 in ?? () from /lib/x86_64-linux-gnu/libc.so.6
#4 0x00007ffff7b054b2 in __GI_abort () at ./stdlib/abort.c:79
#5 0x00007ffff7f44bce in std::__1::__libcpp_verbose_abort (format=0x7ffff7e9c1f8 "%s")
at /usr/local/google/home/vitalybuka/src/llvm.git/llvm-project/libcxx/src/verbose_abort.cpp:74
#6 0x00007ffff7f2da14 in std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__set_long_cap[abi:de190000](unsigned long) (this=0x7fffffffc7b0, __s=25) at include/c++/v1/string:1879
#7 0x00007ffff7f3ab96 in std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__init (this=0x7fffffffc7b0,
__s=0x5555555e638d "1234567890123456789bcde", __sz=23) at include/c++/v1/string:2233
#8 0x0000555555584334 in std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::basic_string[abi:de190000]<0>
()
#9 0x0000555555557441 in bool test0<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >() ()
#10 0x00005555555552d9 in void test<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >() ()
#11 0x00005555555552bb in main ()
- __init(const value_type* __s, size_type __sz = 23)
- __recommend(23) returns 24, because 23 is
__min_cap
- then as in the description we allocate 25, but store
25 / 2
- and next time we delete 24
I've looked into this a bit more and it turns out that we have two unrelated bugs in I think diff --git a/libcxx/include/string b/libcxx/include/string
index 883bc1d7e5dc..13af92fbce3c 100644
--- a/libcxx/include/string
+++ b/libcxx/include/string
@@ -1959,10 +1959,10 @@ private:
if (__s < __min_cap) {
return static_cast<size_type>(__min_cap) - 1;
}
- const size_type __boundary = sizeof(value_type) < __alignment ? __alignment / sizeof(value_type) : 1;
+ const size_type __boundary = sizeof(value_type) < __alignment ? __alignment / sizeof(value_type) : __endian_factor;
size_type __guess = __align_it<__boundary>(__s + 1) - 1;
if (__guess == __min_cap)
- ++__guess;
+ __guess += 2;
return __guess;
} should fix the issues you're seeing. |
My last reply written before reading this, but looks like they are consistent. |
libcxx/include/string
Outdated
size_type __guess = __align_it<__boundary>(__s + 1) - 1; | ||
if (__guess == __min_cap) | ||
++__guess; | ||
__guess += 2; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
works with ASSERT, and likely will work with asan
Should __guess += 2
be replaced with __guess += __endian_factor
if we want to keep __endian_factor alignment?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
works with ASSERT, and likely will work with asan Should
__guess += 2
be replaced with__guess += __endian_factor
if we want to keep __endian_factor alignment?
Assuming __endian_factor unlikely will be >2
in future, 2 will work, but __endian_factor is better for reader?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, I think __endian_factor
would be the right call. Maybe we should rename it into something like __min_capacity_alignment
, since that's what it basically is? Just a though, not a request for this patch.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To unblock landing #83774 I'm happy with this patch if you remove the _LIBCPP_ASSERT_INTERNAL
(since it may not actually be correct) and change __guess += 2
to __guess += __endian_factor
.
Is there a reason why gcc has this on by default, but clang doesn't? |
I'm unable to say whether or not this is a problem for Google's codebase, since @alk speculated upthread that it might be string ABI dependent. I'll see if I can loop some folks in who would know. Either way, sized deallocation is an important performance optimization in modern memory allocators to avoid a costly pointer-to-size lookup (in TCMalloc's case, a 2/3-level radix tree walk). Passing the wrong size is UB and diagnosed by ASan as such, and in practice means we might place the deallocated object on the wrong freelist. |
Basically, we had trouble with rolling it out. It's a wide-reaching change to deploy in the wild because it's effectively an ABI change (call sites that used to call |
OK, if we backport this fix, is there someone willing to do some extra testing on the release branch? I would like to get some kind of confirmation that the backport fixes the issues and that it doesn't introduce another regression? |
@vitalybuka found this with sanitizers (and |
Sure, send me PR, or a branch with a fix. |
Google has indeed been using -fsized-deallocation with tcmalloc (which uses the deallocation size information) internally for a long time. However, we also use the libc++ unstable abi, which enables the "alternate" string layout. In that version, the "endian_factor" constant is 1 on little endian machines, rather than 2, so this bug doesn't appear. |
/cherry-pick d129ea8 |
Failed to cherry-pick: d129ea8 https://github.com/llvm/llvm-project/actions/runs/9485332934 Please manually backport the fix and push it to your github fork. Once this is done, please create a pull request |
This is detected by asan after llvm#83774 Allocation size will be divided by `__endian_factor` before storing. If it's not aligned, we will not be able to recover allocation size to pass into `__alloc_traits::deallocate`. we have code like this ``` auto __allocation = std::__allocate_at_least(__alloc(), __recommend(__sz) + 1); __p = __allocation.ptr; __set_long_cap(__allocation.count); void __set_long_cap(size_type __s) _NOEXCEPT { __r_.first().__l.__cap_ = __s / __endian_factor; __r_.first().__l.__is_long_ = true; } size_type __get_long_cap() const _NOEXCEPT { return __r_.first().__l.__cap_ * __endian_factor; } inline ~basic_string() { __annotate_delete(); if (__is_long()) __alloc_traits::deallocate(__alloc(), __get_long_pointer(), __get_long_cap()); } ``` 1. __recommend() -> even size 2. `std::__allocate_at_least(__alloc(), __recommend(__sz) + 1)` - > not even size 3. ` __set_long_cap() `- > lose one bit of size for __endian_factor == 2 (see `/ __endian_factor`) 4. `__alloc_traits::deallocate(__alloc(), __get_long_pointer(), __get_long_cap())` -> uses even size (see `__get_long_cap`) (cherry picked from commit d129ea8)
/pull-request #95264 |
This is detected by asan after #83774 Allocation size will be divided by `__endian_factor` before storing. If it's not aligned, we will not be able to recover allocation size to pass into `__alloc_traits::deallocate`. we have code like this ``` auto __allocation = std::__allocate_at_least(__alloc(), __recommend(__sz) + 1); __p = __allocation.ptr; __set_long_cap(__allocation.count); void __set_long_cap(size_type __s) _NOEXCEPT { __r_.first().__l.__cap_ = __s / __endian_factor; __r_.first().__l.__is_long_ = true; } size_type __get_long_cap() const _NOEXCEPT { return __r_.first().__l.__cap_ * __endian_factor; } inline ~basic_string() { __annotate_delete(); if (__is_long()) __alloc_traits::deallocate(__alloc(), __get_long_pointer(), __get_long_cap()); } ``` 1. __recommend() -> even size 2. `std::__allocate_at_least(__alloc(), __recommend(__sz) + 1)` - > not even size 3. ` __set_long_cap() `- > lose one bit of size for __endian_factor == 2 (see `/ __endian_factor`) 4. `__alloc_traits::deallocate(__alloc(), __get_long_pointer(), __get_long_cap())` -> uses even size (see `__get_long_cap`) (cherry picked from commit d129ea8)
Clang 19 finally enabled it by default (as of 130e93c). IMO this makes it even more important to be able to provide a smooth upgrade path (i.e. people being able to test |
This is detected by asan after llvm#83774 Allocation size will be divided by `__endian_factor` before storing. If it's not aligned, we will not be able to recover allocation size to pass into `__alloc_traits::deallocate`. we have code like this ``` auto __allocation = std::__allocate_at_least(__alloc(), __recommend(__sz) + 1); __p = __allocation.ptr; __set_long_cap(__allocation.count); void __set_long_cap(size_type __s) _NOEXCEPT { __r_.first().__l.__cap_ = __s / __endian_factor; __r_.first().__l.__is_long_ = true; } size_type __get_long_cap() const _NOEXCEPT { return __r_.first().__l.__cap_ * __endian_factor; } inline ~basic_string() { __annotate_delete(); if (__is_long()) __alloc_traits::deallocate(__alloc(), __get_long_pointer(), __get_long_cap()); } ``` 1. __recommend() -> even size 2. `std::__allocate_at_least(__alloc(), __recommend(__sz) + 1)` - > not even size 3. ` __set_long_cap() `- > lose one bit of size for __endian_factor == 2 (see `/ __endian_factor`) 4. `__alloc_traits::deallocate(__alloc(), __get_long_pointer(), __get_long_cap())` -> uses even size (see `__get_long_cap`) (cherry picked from commit d129ea8)
This is detected by asan after llvm#83774 Allocation size will be divided by `__endian_factor` before storing. If it's not aligned, we will not be able to recover allocation size to pass into `__alloc_traits::deallocate`. we have code like this ``` auto __allocation = std::__allocate_at_least(__alloc(), __recommend(__sz) + 1); __p = __allocation.ptr; __set_long_cap(__allocation.count); void __set_long_cap(size_type __s) _NOEXCEPT { __r_.first().__l.__cap_ = __s / __endian_factor; __r_.first().__l.__is_long_ = true; } size_type __get_long_cap() const _NOEXCEPT { return __r_.first().__l.__cap_ * __endian_factor; } inline ~basic_string() { __annotate_delete(); if (__is_long()) __alloc_traits::deallocate(__alloc(), __get_long_pointer(), __get_long_cap()); } ``` 1. __recommend() -> even size 2. `std::__allocate_at_least(__alloc(), __recommend(__sz) + 1)` - > not even size 3. ` __set_long_cap() `- > lose one bit of size for __endian_factor == 2 (see `/ __endian_factor`) 4. `__alloc_traits::deallocate(__alloc(), __get_long_pointer(), __get_long_cap())` -> uses even size (see `__get_long_cap`) (cherry picked from commit d129ea8)
This is regression test for #90292. Allocator used in test is very similar to test_allocator. However, reproducer requires size_type of the string to be 64bit, but test_allocator uses 32bit. 32bit size_type makes `sizeof(string::__long)` to be 16, but the alignment issue fixed with #90292 is only triggered with default `sizeof(string::__long)` which is 24. Fixes #92128. --------- Co-authored-by: Louis Dionne <ldionne.2@gmail.com>
…10210) This is regression test for llvm#90292. Allocator used in test is very similar to test_allocator. However, reproducer requires size_type of the string to be 64bit, but test_allocator uses 32bit. 32bit size_type makes `sizeof(string::__long)` to be 16, but the alignment issue fixed with llvm#90292 is only triggered with default `sizeof(string::__long)` which is 24. Fixes llvm#92128. --------- Co-authored-by: Louis Dionne <ldionne.2@gmail.com>
…10210) This is regression test for llvm#90292. Allocator used in test is very similar to test_allocator. However, reproducer requires size_type of the string to be 64bit, but test_allocator uses 32bit. 32bit size_type makes `sizeof(string::__long)` to be 16, but the alignment issue fixed with llvm#90292 is only triggered with default `sizeof(string::__long)` which is 24. Fixes llvm#92128. --------- Co-authored-by: Louis Dionne <ldionne.2@gmail.com>
…10210) This is regression test for llvm#90292. Allocator used in test is very similar to test_allocator. However, reproducer requires size_type of the string to be 64bit, but test_allocator uses 32bit. 32bit size_type makes `sizeof(string::__long)` to be 16, but the alignment issue fixed with llvm#90292 is only triggered with default `sizeof(string::__long)` which is 24. Fixes llvm#92128. --------- Co-authored-by: Louis Dionne <ldionne.2@gmail.com>
…10210) This is regression test for llvm#90292. Allocator used in test is very similar to test_allocator. However, reproducer requires size_type of the string to be 64bit, but test_allocator uses 32bit. 32bit size_type makes `sizeof(string::__long)` to be 16, but the alignment issue fixed with llvm#90292 is only triggered with default `sizeof(string::__long)` which is 24. Fixes llvm#92128. --------- Co-authored-by: Louis Dionne <ldionne.2@gmail.com>
This is detected by asan after #83774
Allocation size will be divided by
__endian_factor
before storing. If it's not aligned,we will not be able to recover allocation size to pass into
__alloc_traits::deallocate
.we have code like this
std::__allocate_at_least(__alloc(), __recommend(__sz) + 1)
- > not even size__set_long_cap()
- > lose one bit of size for __endian_factor == 2 (see/ __endian_factor
)__alloc_traits::deallocate(__alloc(), __get_long_pointer(), __get_long_cap())
-> uses even size (see__get_long_cap
)