Skip to content

Commit

Permalink
[ASan][libc++] Turn on ASan annotations for short strings
Browse files Browse the repository at this point in the history
This commit turns on ASan annotations in `std::basic_string` for short stings (SSO case).

Originally suggested here: https://reviews.llvm.org/D147680

String annotations added here: llvm#72677

Annotating `std::basic_string` with default allocator is implemented in llvm#72677 but annotations for short strings (SSO - Short String Optimization) are turned off there. This commit turns them on. This also removes `_LIBCPP_SHORT_STRING_ANNOTATIONS_ALLOWED`, because we do not plan to support turning on and off short string annotations.

Support in ASan API exists since llvm@dd1b7b7.
You can turn off annotations for a specific allocator based on changes from llvm@2fa1bec.

This PR is a part of a series of patches extending AddressSanitizer C++ container overflow detection capabilities by adding annotations, similar to those existing in `std::vector` and `std::deque` collections. These enhancements empower ASan to effectively detect instances where the instrumented program attempts to access memory within a collection's internal allocation that remains unused. This includes cases where access occurs before or after the stored elements in `std::deque`, or between the `std::basic_string`'s size (including the null terminator) and capacity bounds.

The introduction of these annotations was spurred by a real-world software bug discovered by Trail of Bits, involving an out-of-bounds memory access during the comparison of two strings using the `std::equals` function. This function was taking iterators (`iter1_begin`, `iter1_end`, `iter2_begin`) to perform the comparison, using a custom comparison function. When the `iter1` object exceeded the length of `iter2`, an out-of-bounds read could occur on the `iter2` object. Container sanitization, upon enabling these annotations, would effectively identify and flag this potential vulnerability.

If you have any questions, please email:

    advenam.tacet@trailofbits.com
    disconnect3d@trailofbits.com
  • Loading branch information
Advenam Tacet committed Jan 17, 2024
1 parent de423cf commit e7e4d3c
Show file tree
Hide file tree
Showing 5 changed files with 431 additions and 21 deletions.
14 changes: 4 additions & 10 deletions libcxx/include/string
Original file line number Diff line number Diff line change
Expand Up @@ -659,7 +659,6 @@ _LIBCPP_PUSH_MACROS
#else
# define _LIBCPP_STRING_INTERNAL_MEMORY_ACCESS
#endif
#define _LIBCPP_SHORT_STRING_ANNOTATIONS_ALLOWED false

_LIBCPP_BEGIN_NAMESPACE_STD

Expand Down Expand Up @@ -1896,38 +1895,33 @@ private:
#endif
}

// ASan: short string is poisoned if and only if this function returns true.
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __asan_short_string_is_annotated() const _NOEXCEPT {
return _LIBCPP_SHORT_STRING_ANNOTATIONS_ALLOWED && !__libcpp_is_constant_evaluated();
}

_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __annotate_new(size_type __current_size) const _NOEXCEPT {
(void) __current_size;
#if !defined(_LIBCPP_HAS_NO_ASAN) && defined(_LIBCPP_INSTRUMENTED_WITH_ASAN)
if (!__libcpp_is_constant_evaluated() && (__asan_short_string_is_annotated() || __is_long()))
if (!__libcpp_is_constant_evaluated())
__annotate_contiguous_container(data() + capacity() + 1, data() + __current_size + 1);
#endif
}

_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __annotate_delete() const _NOEXCEPT {
#if !defined(_LIBCPP_HAS_NO_ASAN) && defined(_LIBCPP_INSTRUMENTED_WITH_ASAN)
if (!__libcpp_is_constant_evaluated() && (__asan_short_string_is_annotated() || __is_long()))
if (!__libcpp_is_constant_evaluated())
__annotate_contiguous_container(data() + size() + 1, data() + capacity() + 1);
#endif
}

_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __annotate_increase(size_type __n) const _NOEXCEPT {
(void) __n;
#if !defined(_LIBCPP_HAS_NO_ASAN) && defined(_LIBCPP_INSTRUMENTED_WITH_ASAN)
if (!__libcpp_is_constant_evaluated() && (__asan_short_string_is_annotated() || __is_long()))
if (!__libcpp_is_constant_evaluated())
__annotate_contiguous_container(data() + size() + 1, data() + size() + 1 + __n);
#endif
}

_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __annotate_shrink(size_type __old_size) const _NOEXCEPT {
(void) __old_size;
#if !defined(_LIBCPP_HAS_NO_ASAN) && defined(_LIBCPP_INSTRUMENTED_WITH_ASAN)
if (!__libcpp_is_constant_evaluated() && (__asan_short_string_is_annotated() || __is_long()))
if (!__libcpp_is_constant_evaluated())
__annotate_contiguous_container(data() + __old_size + 1, data() + size() + 1);
#endif
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

// REQUIRES: asan
// UNSUPPORTED: c++03

#include <cassert>
#include <array>
#include "test_macros.h"
#include "asan_testing.h" // includes deque and string - don't do it before
#include "min_allocator.h"

// This tests exists to check if strings work well with deque, as those
// may be partialy annotated, we cannot simply call
// is_double_ended_contiguous_container_asan_correct, as it assumes that
// object memory inside is not annotated, so we check everything in a more careful way.

template <typename D>
bool verify_inside(D const& d) {
for (size_t i = 0; i < d.size(); ++i) {
if (!is_string_asan_correct(d[i]))
return false;
}

return true;
}

template <typename S, size_t N>
S get_s(char c) {
S s;
for (size_t i = 0; i < N; ++i)
s.push_back(c);

return s;
}

template <class C, class S>
void test_string() {
size_t const N = sizeof(S) < 256 ? (4096 / sizeof(S)) : 16;

{
C d1a(1), d1b(N), d1c(N + 1), d1d(32 * N);
assert(verify_inside(d1a));
assert(verify_inside(d1b));
assert(verify_inside(d1c));
assert(verify_inside(d1d));
}
{
C d2;
for (size_t i = 0; i < 16 * N; ++i) {
d2.push_back(get_s<S, 1>(i % 10 + 'a'));
assert(verify_inside(d2));
d2.push_back(get_s<S, 222>(i % 10 + 'b'));
assert(verify_inside(d2));

d2.pop_front();
assert(verify_inside(d2));
}
}
{
C d3;
for (size_t i = 0; i < 16 * N; ++i) {
d3.push_front(get_s<S, 1>(i % 10 + 'a'));
assert(verify_inside(d3));
d3.push_front(get_s<S, 222>(i % 10 + 'b'));
assert(verify_inside(d3));

d3.pop_back();
assert(verify_inside(d3));
}
}
{
C d4;
for (size_t i = 0; i < 16 * N; ++i) {
// When there is no SSO, all elements inside should not be poisoned,
// so we can verify deque poisoning.
d4.push_front(get_s<S, 333>(i % 10 + 'a'));
assert(verify_inside(d4));
assert(is_double_ended_contiguous_container_asan_correct(d4));
d4.push_back(get_s<S, 222>(i % 10 + 'b'));
assert(verify_inside(d4));
assert(is_double_ended_contiguous_container_asan_correct(d4));
}
}
{
C d5;
for (size_t i = 0; i < 5 * N; ++i) {
// In d4 we never had poisoned memory inside deque.
// Here we start with SSO, so part of the inside of the container,
// will be poisoned.
d5.push_front(S());
assert(verify_inside(d5));
}
for (size_t i = 0; i < d5.size(); ++i) {
// We change the size to have long string.
// Memory owne by deque should not be poisoned by string.
d5[i].resize(1000);
assert(verify_inside(d5));
}

assert(is_double_ended_contiguous_container_asan_correct(d5));

d5.erase(d5.begin() + 2);
assert(verify_inside(d5));

d5.erase(d5.end() - 2);
assert(verify_inside(d5));

assert(is_double_ended_contiguous_container_asan_correct(d5));
}
{
C d6a;
assert(is_double_ended_contiguous_container_asan_correct(d6a));

C d6b(N + 2, get_s<S, 1000>('a'));
d6b.push_front(get_s<S, 1001>('b'));
while (!d6b.empty()) {
d6b.pop_back();
assert(is_double_ended_contiguous_container_asan_correct(d6b));
}

C d6c(N + 2, get_s<S, 1002>('c'));
while (!d6c.empty()) {
d6c.pop_back();
assert(is_double_ended_contiguous_container_asan_correct(d6c));
}
}
{
C d7(9 * N + 2);

d7.insert(d7.begin() + 1, S());
assert(verify_inside(d7));

d7.insert(d7.end() - 3, S());
assert(verify_inside(d7));

d7.insert(d7.begin() + 2 * N, get_s<S, 1>('a'));
assert(verify_inside(d7));

d7.insert(d7.end() - 2 * N, get_s<S, 1>('b'));
assert(verify_inside(d7));

d7.insert(d7.begin() + 2 * N, 3 * N, get_s<S, 1>('c'));
assert(verify_inside(d7));

// It may not be short for big element types, but it will be checked correctly:
d7.insert(d7.end() - 2 * N, 3 * N, get_s<S, 2>('d'));
assert(verify_inside(d7));

d7.erase(d7.begin() + 2);
assert(verify_inside(d7));

d7.erase(d7.end() - 2);
assert(verify_inside(d7));
}
}

template <class S>
void test_container() {
test_string<std::deque<S, std::allocator<S>>, S>();
test_string<std::deque<S, min_allocator<S>>, S>();
test_string<std::deque<S, safe_allocator<S>>, S>();
}

int main(int, char**) {
// Those tests support only types based on std::basic_string.
test_container<std::string>();
test_container<std::wstring>();
#if TEST_STD_VER >= 11
test_container<std::u16string>();
test_container<std::u32string>();
#endif
#if TEST_STD_VER >= 20
test_container<std::u8string>();
#endif

return 0;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

// REQUIRES: asan
// UNSUPPORTED: c++03

// <string>

// Basic test if ASan annotations work for short strings.

#include <string>
#include <cassert>
#include <cstdlib>

#include "asan_testing.h"
#include "min_allocator.h"
#include "test_iterators.h"
#include "test_macros.h"

extern "C" void __sanitizer_set_death_callback(void (*callback)(void));

void do_exit() { exit(0); }

int main(int, char**) {
{
typedef cpp17_input_iterator<char*> MyInputIter;
// Should not trigger ASan.
std::basic_string<char, std::char_traits<char>, safe_allocator<char>> v;
char i[] = {'a', 'b', 'c', 'd'};

v.insert(v.begin(), MyInputIter(i), MyInputIter(i + 4));
assert(v[0] == 'a');
assert(is_string_asan_correct(v));
}

__sanitizer_set_death_callback(do_exit);
{
using T = char;
using C = std::basic_string<T, std::char_traits<T>, safe_allocator<T>>;
const T t[] = {'a', 'b', 'c', 'd', 'e', 'f', 'g'};
C c(std::begin(t), std::end(t));
assert(is_string_asan_correct(c));
assert(__sanitizer_verify_contiguous_container(c.data(), c.data() + c.size() + 1, c.data() + c.capacity() + 1) !=
0);
volatile T foo = c[c.size() + 1]; // should trigger ASAN. Use volatile to prevent being optimized away.
assert(false); // if we got here, ASAN didn't trigger
((void)foo);
}

return 0;
}
Loading

0 comments on commit e7e4d3c

Please sign in to comment.