From c5cbdfeb65c31b78d4a90c084097b5772dceb237 Mon Sep 17 00:00:00 2001 From: vglavnyy Date: Sat, 10 Nov 2018 23:00:45 +0700 Subject: [PATCH] Make the Parser independent from the global C-locale --- .travis.yml | 19 +- CMakeLists.txt | 23 ++ docs/source/CppUsage.md | 47 +++- include/flatbuffers/base.h | 34 ++- include/flatbuffers/util.h | 251 +++++++++++----------- src/idl_parser.cpp | 2 +- src/util.cpp | 22 ++ tests/fuzzer/CMakeLists.txt | 12 +- tests/fuzzer/flatbuffers_parser_fuzzer.cc | 4 +- tests/fuzzer/flatbuffers_scalar_fuzzer.cc | 43 +++- tests/fuzzer/readme.md | 10 +- tests/test.cpp | 17 +- 12 files changed, 323 insertions(+), 161 deletions(-) diff --git a/.travis.yml b/.travis.yml index 40d38302fbb..3c0c3a8ca96 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,6 +2,12 @@ env: global: # Set at the root level as this is ignored when set under matrix.env. - GCC_VERSION="4.9" + # Fail on first error if UBSAN or ASAN enabled for a target + - UBSAN_OPTIONS=halt_on_error=1 + - ASAN_OPTIONS=halt_on_error=1 + # Travis machines have 2 cores + - JOBS=2 + - MAKEFLAGS="-j 2" conan-linux: &conan-linux os: linux @@ -53,7 +59,7 @@ matrix: # branch: master - language: cpp os: - - linux + - linux compiler: - gcc @@ -79,8 +85,8 @@ matrix: -DGRPC_INSTALL_PATH=$TRAVIS_BUILD_DIR/google/grpc/install -DPROTOBUF_DOWNLOAD_PATH=$TRAVIS_BUILD_DIR/google/grpc/third_party/protobuf -DFLATBUFFERS_CODE_SANITIZE=ON - - make - - LD_LIBRARY_PATH=$TRAVIS_BUILD_DIR/google/grpc/install/lib make test ARGS=-V + - cmake --build . -- -j${JOBS} + - LD_LIBRARY_PATH=$TRAVIS_BUILD_DIR/google/grpc/install/lib ctest --extra-verbose --output-on-failure - bash .travis/check-generate-code.sh - if [ "$CONAN" == "true" ] && [ "$TRAVIS_OS_NAME" == "linux" ]; then sudo pip install conan && conan create . google/testing -s build_type=$BUILD_TYPE -tf conan/test_package; fi @@ -91,6 +97,7 @@ matrix: matrix: - BUILD_TYPE=Debug - BUILD_TYPE=Release + script: - bash grpc/build_grpc.sh - cmake . @@ -99,10 +106,9 @@ matrix: -DGRPC_INSTALL_PATH=$TRAVIS_BUILD_DIR/google/grpc/install -DPROTOBUF_DOWNLOAD_PATH=$TRAVIS_BUILD_DIR/google/grpc/third_party/protobuf -DFLATBUFFERS_CODE_SANITIZE=ON - - make - - ./flattests + - cmake --build . -- -j${JOBS} + - DYLD_LIBRARY_PATH=$TRAVIS_BUILD_DIR/google/grpc/install/lib ctest --extra-verbose --output-on-failure - bash .travis/check-generate-code.sh - - DYLD_LIBRARY_PATH=$TRAVIS_BUILD_DIR/google/grpc/install/lib ./grpctest - <<: *conan-linux env: CONAN_GCC_VERSIONS=4.9 CONAN_DOCKER_IMAGE=lasote/conangcc49 @@ -146,6 +152,7 @@ matrix: - extra-android-m2repository compiler: - gcc + before_install: - git clone https://github.com/urho3d/android-ndk.git $HOME/android-ndk-root - export ANDROID_NDK_HOME=$HOME/android-ndk-root diff --git a/CMakeLists.txt b/CMakeLists.txt index 2a548fed269..45884b7909a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,7 @@ cmake_minimum_required(VERSION 2.8) # generate compile_commands.json set(CMAKE_EXPORT_COMPILE_COMMANDS ON) +include(CheckCXXSymbolExists) project(FlatBuffers) @@ -35,6 +36,16 @@ if(DEFINED FLATBUFFERS_MAX_PARSING_DEPTH) message(STATUS "FLATBUFFERS_MAX_PARSING_DEPTH: ${FLATBUFFERS_MAX_PARSING_DEPTH}") endif() +# Auto-detect locale-narrow 'strtod_l' function. +if(NOT DEFINED FLATBUFFERS_LOCALE_INDEPENDENT) + if(MSVC) + check_cxx_symbol_exists(_strtof_l stdlib.h FLATBUFFERS_LOCALE_INDEPENDENT) + else() + check_cxx_symbol_exists(strtof_l stdlib.h FLATBUFFERS_LOCALE_INDEPENDENT) + endif() +endif() +add_definitions(-DFLATBUFFERS_LOCALE_INDEPENDENT=$) + set(FlatBuffers_Library_SRCS include/flatbuffers/code_generators.h include/flatbuffers/base.h @@ -213,6 +224,7 @@ function(add_fsanitize_to_target _target _sanitizer) target_link_libraries(${_target} PRIVATE "-fsanitize${_sanitizer_flags}") set_property(TARGET ${_target} PROPERTY POSITION_INDEPENDENT_CODE ON) + message(STATUS "Sanitizer ${_sanitizer_flags} added to ${_target}") endif() endfunction() @@ -304,6 +316,17 @@ if(FLATBUFFERS_BUILD_TESTS) include_directories(${CMAKE_CURRENT_BINARY_DIR}/samples) add_executable(flatsamplebinary ${FlatBuffers_Sample_Binary_SRCS}) add_executable(flatsampletext ${FlatBuffers_Sample_Text_SRCS}) + + if(DEFINED FLATBUFFERS_TEST_LOCALE) + # Enable test of locale independent code. + # -DFLATBUFFERS_TEST_LOCALE="" - test with default C-locale + # -DFLATBUFFERS_TEST_LOCALE="ru_RU.CP1251" - test with ru_RU.CP1251 + # Locale was installed before (Ubuntu):>sudo locale-gen ru_RU.CP1251 + target_compile_definitions(flattests PRIVATE + FLATBUFFERS_TEST_LOCALE=\"${FLATBUFFERS_TEST_LOCALE}\") + message(STATUS "FLATBUFFERS_TEST_LOCALE: \"${FLATBUFFERS_TEST_LOCALE}\"") + endif() + endif() if(FLATBUFFERS_BUILD_GRPCTEST) diff --git a/docs/source/CppUsage.md b/docs/source/CppUsage.md index e13e1bad18d..facec92069c 100644 --- a/docs/source/CppUsage.md +++ b/docs/source/CppUsage.md @@ -499,11 +499,46 @@ To use scalars, simply wrap them in a struct. ## Depth limit of nested objects and stack-overflow control The parser of Flatbuffers schema or json-files is kind of recursive parser. -To avoid stack-overflow problem the parser has a built-in limiter of recursion depth. -Number of nested declarations in a schema or number of nested json-objects is limited. -By default, this depth limit set to `64`. -It is possible to override this limit with `FLATBUFFERS_MAX_PARSING_DEPTH` definition. -This definition can be helpful for testing purposes or embedded applications. -For details see [build](@ref flatbuffers_guide_building) of CMake-based projects. +To avoid stack-overflow problem the parser has a built-in limiter of +recursion depth. Number of nested declarations in a schema or number of +nested json-objects is limited. By default, this depth limit set to `64`. +It is possible to override this limit with `FLATBUFFERS_MAX_PARSING_DEPTH` +definition. This definition can be helpful for testing purposes or embedded +applications. For details see [build](@ref flatbuffers_guide_building) of +CMake-based projects. + +## Dependence from C-locale {#flatbuffers_locale_cpp} +The Flatbuffers [grammar](@ref flatbuffers grammar) uses ASCII +character set for identifiers, alphanumeric literals, reserved words. + +Internal implementation of the Flatbuffers depends from functions which +depend from C-locale: `strtod()` or `strtof()`, for example. +The library expects the dot `.` symbol as the separator of an integer +part from the fractional part of a float number. +Another separator symbols (`,` for example) will break the compatibility +and may lead to an error while parsing a Flatbuffers schema or a json file. + +The Standard C locale is a global resource, there is only one locale for +the entire application. Some modern compilers and platforms have +locale-independent or locale-narrow functions `strtof_l`, `strtod_l`, +`strtoll_l`, `strtoull_l` to resolve this dependency. +These functions use specified locale rather than the global or per-thread +locale instead. They are part of POSIX-2008 but not part of the C/C++ +standard library, therefore, may be missing on some platforms. + +The Flatbuffers library try to detect these functions at configuration and +compile time: +- `_MSC_VER >= 1900`: check MSVC2012 or higher for MSVC buid +- `_XOPEN_SOURCE>=700`: check POSIX-2008 for GCC/Clang build +- `check_cxx_symbol_exists(strtof_l stdlib.h)`: CMake check of `strtod_f` + +After detection, the definition `FLATBUFFERS_LOCALE_INDEPENDENT` will be +set to `0` or `1`. + +It is possible to test the compatibility of the Flatbuffers library with +a specific locale. Set the exact name of locale for the `flattests` target +using `FLATBUFFERS_TEST_LOCALE` definition, for example: +- `-D FLATBUFFERS_TEST_LOCALE=""` - default a system locale +- `-D FLATBUFFERS_TEST_LOCALE="ru_RU.CP1251"`
diff --git a/include/flatbuffers/base.h b/include/flatbuffers/base.h index f6eccc6c2b5..3166602bdf1 100644 --- a/include/flatbuffers/base.h +++ b/include/flatbuffers/base.h @@ -195,15 +195,35 @@ #endif #endif // !FLATBUFFERS_HAS_NEW_STRTOD -// Suppress sanitizer directives. +#ifndef FLATBUFFERS_LOCALE_INDEPENDENT + // Enable locale independent functions {strtof_l, strtod_l,strtoll_l, strtoull_l}. + // They are part of the POSIX-2008 but not part of the C/C++ standard. + // GCC/Clang have definition (_XOPEN_SOURCE>=700) if POSIX-2008. + #if ((defined(_MSC_VER) && _MSC_VER >= 1800) || \ + (defined(_XOPEN_SOURCE) && (_XOPEN_SOURCE>=700))) + #define FLATBUFFERS_LOCALE_INDEPENDENT 1 + #else + #define FLATBUFFERS_LOCALE_INDEPENDENT 0 + #endif +#endif // !FLATBUFFERS_LOCALE_INDEPENDENT + +// Suppress Undefined Behavior Sanitizer (recoverable only). Usage: +// - __supress_ubsan__("undefined") +// - __supress_ubsan__("signed-integer-overflow") #if defined(__clang__) - #define __no_sanitize_undefined__(reason) __attribute__((no_sanitize("undefined"))) + #define __supress_ubsan__(type) __attribute__((no_sanitize(type))) #elif defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 408) - #define __no_sanitize_undefined__(reason) __attribute__((no_sanitize_undefined)) + #define __supress_ubsan__(type) __attribute__((no_sanitize_undefined)) #else - #define __no_sanitize_undefined__(reason) + #define __supress_ubsan__(type) #endif +// This is constexpr function used for checking compile-time constants. +// Avoid `#pragma warning(disable: 4127) // C4127: expression is constant`. +template FLATBUFFERS_CONSTEXPR inline bool IsConstTrue(T t) { + return !!t; +} + /// @endcond /// @file @@ -287,13 +307,15 @@ template T EndianScalar(T t) { } template -__no_sanitize_undefined__("C++ aliasing type rules, see std::bit_cast<>") +// UBSAN: C++ aliasing type rules, see std::bit_cast<> for details. +__supress_ubsan__("alignment") T ReadScalar(const void *p) { return EndianScalar(*reinterpret_cast(p)); } template -__no_sanitize_undefined__("C++ aliasing type rules, see std::bit_cast<>") +// UBSAN: C++ aliasing type rules, see std::bit_cast<> for details. +__supress_ubsan__("alignment") void WriteScalar(void *p, T t) { *reinterpret_cast(p) = EndianScalar(t); } diff --git a/include/flatbuffers/util.h b/include/flatbuffers/util.h index 01e1b23f843..d1da7adb8ca 100644 --- a/include/flatbuffers/util.h +++ b/include/flatbuffers/util.h @@ -50,11 +50,6 @@ namespace flatbuffers { -// Avoid `#pragma warning(disable: 4127) // C4127: expression is constant`. -template FLATBUFFERS_CONSTEXPR inline bool IsConstTrue(const T &t) { - return !!t; -} - // @locale-independent functions for ASCII characters set. // Check that integer scalar is in closed range: (a <= x <= b) @@ -217,36 +212,70 @@ inline std::string IntToStringHex(int i, int xdigits) { // clang-format on } -static inline double strtod_impl(const char *str, char **str_end) { - // Result of strtod (printf, etc) depends from current C-locale. - return strtod(str, str_end); -} - -static inline float strtof_impl(const char *str, char **str_end) { - // Use "strtof" for float and strtod for double to avoid double=>float - // rounding problems (see - // https://en.cppreference.com/w/cpp/numeric/fenv/feround) or problems with - // std::numeric_limits::is_iec559==false. Example: - // for (int mode : { FE_DOWNWARD, FE_TONEAREST, FE_TOWARDZERO, FE_UPWARD }){ - // const char *s = "-4e38"; - // std::fesetround(mode); - // std::cout << strtof(s, nullptr) << "; " << strtod(s, nullptr) << "; " - // << static_cast(strtod(s, nullptr)) << "\n"; - // } - // Gives: - // -inf; -4e+38; -inf - // -inf; -4e+38; -inf - // -inf; -4e+38; -3.40282e+38 - // -inf; -4e+38; -3.40282e+38 - - // clang-format off - #ifdef FLATBUFFERS_HAS_NEW_STRTOD - return strtof(str, str_end); +// clang-format off +// Use locale independent functions {strtod_l, strtof_l, strtoll_l, strtoull_l}. +#if defined(FLATBUFFERS_LOCALE_INDEPENDENT) && (FLATBUFFERS_LOCALE_INDEPENDENT > 0) + class ClassicLocale { + #ifdef _MSC_VER + typedef _locale_t locale_type; + #else + typedef locale_t locale_type; // POSIX.1-2008 locale_t type + #endif + ClassicLocale(); + ~ClassicLocale(); + locale_type locale_; + static ClassicLocale instance_; + public: + static locale_type Get() { return instance_.locale_; } + }; + + #ifdef _MSC_VER + #define __strtoull_impl(s, pe, b) _strtoui64_l(s, pe, b, ClassicLocale::Get()) + #define __strtoll_impl(s, pe, b) _strtoi64_l(s, pe, b, ClassicLocale::Get()) + #define __strtod_impl(s, pe) _strtod_l(s, pe, ClassicLocale::Get()) + #define __strtof_impl(s, pe) _strtof_l(s, pe, ClassicLocale::Get()) #else - return static_cast(strtod_impl(str, str_end)); - #endif // !FLATBUFFERS_HAS_NEW_STRTOD - // clang-format on + #define __strtoull_impl(s, pe, b) strtoull_l(s, pe, b, ClassicLocale::Get()) + #define __strtoll_impl(s, pe, b) strtoll_l(s, pe, b, ClassicLocale::Get()) + #define __strtod_impl(s, pe) strtod_l(s, pe, ClassicLocale::Get()) + #define __strtof_impl(s, pe) strtof_l(s, pe, ClassicLocale::Get()) + #endif +#else + #define __strtod_impl(s, pe) strtod(s, pe) + #define __strtof_impl(s, pe) static_cast(strtod(s, pe)) + #ifdef _MSC_VER + #define __strtoull_impl(s, pe, b) _strtoui64(s, pe, b) + #define __strtoll_impl(s, pe, b) _strtoi64(s, pe, b) + #else + #define __strtoull_impl(s, pe, b) strtoull_l(s, pe, b) + #define __strtoll_impl(s, pe, b) strtoll_l(s, pe, b) + #endif +#endif + +static inline void strtoval_impl(int64_t *val, const char *str, char **endptr, + int base) { + *val = __strtoll_impl(str, endptr, base); +} + +static inline void strtoval_impl(uint64_t *val, const char *str, char **endptr, + int base) { + *val = __strtoull_impl(str, endptr, base); +} + +static inline void strtoval_impl(double *val, const char *str, char **endptr) { + *val = __strtod_impl(str, endptr); +} + +// UBSAN: double to float is safe if numeric_limits::is_iec559 is true. +__supress_ubsan__("float-cast-overflow") +static inline void strtoval_impl(float *val, const char *str, char **endptr) { + *val = __strtof_impl(str, endptr); } +#undef __strtoull_impl +#undef __strtoll_impl +#undef __strtod_impl +#undef __strtof_impl +// clang-format on // Adaptor for strtoull()/strtoll(). // Flatbuffers accepts numbers with any count of leading zeros (-009 is -9), @@ -261,66 +290,43 @@ static inline float strtof_impl(const char *str, char **str_end) { // - If the converted value falls out of range of corresponding return type, a // range error occurs. In this case value MAX(T)/MIN(T) is returned. template -inline T StringToInteger64Impl(const char *const str, const char **endptr, - const int base, const bool check_errno = true) { - static_assert(flatbuffers::is_same::value || - flatbuffers::is_same::value, - "Type T must be either int64_t or uint64_t"); - FLATBUFFERS_ASSERT(str && endptr); // endptr must be not null +inline bool StringToIntegerImpl(T *val, const char *const str, + const int base = 0, + const bool check_errno = true) { + // T is int64_t or uint64_T + FLATBUFFERS_ASSERT(str); if (base <= 0) { auto s = str; while (*s && !is_digit(*s)) s++; if (s[0] == '0' && is_alpha_char(s[1], 'X')) - return StringToInteger64Impl(str, endptr, 16, check_errno); + return StringToIntegerImpl(val, str, 16, check_errno); // if a prefix not match, try base=10 - return StringToInteger64Impl(str, endptr, 10, check_errno); + return StringToIntegerImpl(val, str, 10, check_errno); } else { if (check_errno) errno = 0; // clear thread-local errno - // calculate result - T result; - if (IsConstTrue(flatbuffers::is_same::value)) { - // clang-format off - #ifdef _MSC_VER - result = _strtoi64(str, const_cast(endptr), base); - #else - result = strtoll(str, const_cast(endptr), base); - #endif - // clang-format on - } else { // T is uint64_t - // clang-format off - #ifdef _MSC_VER - result = _strtoui64(str, const_cast(endptr), base); - #else - result = strtoull(str, const_cast(endptr), base); - #endif - // clang-format on - - // The strtoull accepts negative numbers: - // If the minus sign was part of the input sequence, the numeric value - // calculated from the sequence of digits is negated as if by unary minus - // in the result type, which applies unsigned integer wraparound rules. - // Fix this behaviour (except -0). - if ((**endptr == '\0') && (0 != result)) { - auto s = str; - while (*s && !is_digit(*s)) s++; - s = (s > str) ? (s - 1) : s; // step back to one symbol - if (*s == '-') { - // For unsigned types return max to distinguish from - // "no conversion can be performed". - result = flatbuffers::numeric_limits::max(); - // point to the start of string, like errno - *endptr = str; - } - } + auto endptr = str; + strtoval_impl(val, str, const_cast(&endptr), base); + if ((*endptr != '\0') || (endptr == str)) { + *val = 0; // erase partial result + return false; // invalid string } - // check for overflow - if (check_errno && errno) *endptr = str; // point it to start of input - // erase partial result, but save an overflow - if ((*endptr != str) && (**endptr != '\0')) result = 0; - return result; + // errno is out-of-range, return MAX/MIN + if (check_errno && errno) return false; + return true; } } +template +inline bool StringToFloatImpl(T *val, const char *const str) { + // Type T must be either float or double. + FLATBUFFERS_ASSERT(str && val); + auto end = str; + strtoval_impl(val, str, const_cast(&end)); + auto done = (end != str) && (*end == '\0'); + if (!done) *val = 0; // erase partial result + return done; +} + // Convert a string to an instance of T. // Return value (matched with StringToInteger64Impl and strtod): // - If successful, a numeric value corresponding to the str is returned. @@ -329,66 +335,69 @@ inline T StringToInteger64Impl(const char *const str, const char **endptr, // range error occurs. In this case value MAX(T)/MIN(T) is returned. template inline bool StringToNumber(const char *s, T *val) { FLATBUFFERS_ASSERT(s && val); - const char *end = nullptr; - // The errno check isn't needed. strtoll will return MAX/MIN on overlow. - const int64_t i = StringToInteger64Impl(s, &end, -1, false); - *val = static_cast(i); - const auto done = (s != end) && (*end == '\0'); - if (done) { + int64_t i64; + // The errno check isn't needed, will return MAX/MIN on overflow. + if (StringToIntegerImpl(&i64, s, 0, false)) { const int64_t max = flatbuffers::numeric_limits::max(); const int64_t min = flatbuffers::numeric_limits::lowest(); - if (i > max) { + if (i64 > max) { *val = static_cast(max); return false; } - if (i < min) { + if (i64 < min) { // For unsigned types return max to distinguish from // "no conversion can be performed" when 0 is returned. *val = static_cast(flatbuffers::is_unsigned::value ? max : min); return false; } + *val = static_cast(i64); + return true; } - return done; + *val = 0; + return false; } -template<> inline bool StringToNumber(const char *s, int64_t *val) { - const char *end = s; // request errno checking - *val = StringToInteger64Impl(s, &end, -1); - return (s != end) && (*end == '\0'); + +template<> inline bool StringToNumber(const char *str, int64_t *val) { + return StringToIntegerImpl(val, str); } -template<> inline bool StringToNumber(const char *s, uint64_t *val) { - const char *end = s; // request errno checking - *val = StringToInteger64Impl(s, &end, -1); - return (s != end) && (*end == '\0'); + +template<> inline bool StringToNumber(const char *str, uint64_t *val) { + if (!StringToIntegerImpl(val, str)) return false; + // The strtoull accepts negative numbers: + // If the minus sign was part of the input sequence, the numeric value + // calculated from the sequence of digits is negated as if by unary minus + // in the result type, which applies unsigned integer wraparound rules. + // Fix this behaviour (except -0). + if (*val) { + auto s = str; + while (*s && !is_digit(*s)) s++; + s = (s > str) ? (s - 1) : s; // step back to one symbol + if (*s == '-') { + // For unsigned types return the max to distinguish from + // "no conversion can be performed". + *val = flatbuffers::numeric_limits::max(); + return false; + } + } + return true; } -template<> inline bool StringToNumber(const char *s, double *val) { - FLATBUFFERS_ASSERT(s && val); - char *end = nullptr; - *val = strtod_impl(s, &end); - auto done = (s != end) && (*end == '\0'); - if (!done) *val = 0; // erase partial result - return done; +template<> inline bool StringToNumber(const char *s, float *val) { + return StringToFloatImpl(val, s); } -template<> inline bool StringToNumber(const char *s, float *val) { - FLATBUFFERS_ASSERT(s && val); - char *end = nullptr; - *val = strtof_impl(s, &end); - auto done = (s != end) && (*end == '\0'); - if (!done) *val = 0; // erase partial result - return done; +template<> inline bool StringToNumber(const char *s, double *val) { + return StringToFloatImpl(val, s); } -inline int64_t StringToInt(const char *str, const char **endptr = nullptr, - int base = 10) { - const char *ep = nullptr; - return StringToInteger64Impl(str, endptr ? endptr : &ep, base); +static inline int64_t StringToInt(const char *s, int base = 10) { + int64_t val; + return StringToIntegerImpl(&val, s, base) ? val : 0; } -inline uint64_t StringToUInt(const char *str, const char **endptr = nullptr, - int base = 10) { - const char *ep = nullptr; - return StringToInteger64Impl(str, endptr ? endptr : &ep, base); +static inline uint64_t StringToUInt(const char *s, int base = 10) { + uint64_t val; + return StringToIntegerImpl(&val, s, base) ? val : 0; } typedef bool (*LoadFileFunction)(const char *filename, bool binary, diff --git a/src/idl_parser.cpp b/src/idl_parser.cpp index 50ec7608309..c32f59af11c 100644 --- a/src/idl_parser.cpp +++ b/src/idl_parser.cpp @@ -219,7 +219,7 @@ CheckedError Parser::ParseHexNum(int nibbles, uint64_t *val) { return Error("escape code must be followed by " + NumToString(nibbles) + " hex digits"); std::string target(cursor_, cursor_ + nibbles); - *val = StringToUInt(target.c_str(), nullptr, 16); + *val = StringToUInt(target.c_str(), 16); cursor_ += nibbles; return NoError(); } diff --git a/src/util.cpp b/src/util.cpp index 15a2f53e266..67464acfdad 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -58,6 +58,7 @@ bool FileExists(const char *name) { bool DirExists(const char *name) { // clang-format off + #ifdef _WIN32 #define flatbuffers_stat _stat #define FLATBUFFERS_S_IFDIR _S_IFDIR @@ -85,4 +86,25 @@ FileExistsFunction SetFileExistsFunction( return previous_function; } +// Locale-independent code. +#if defined(FLATBUFFERS_LOCALE_INDEPENDENT) && \ + (FLATBUFFERS_LOCALE_INDEPENDENT > 0) + +// clang-format off +// Allocate locale instance at startup of application. +ClassicLocale ClassicLocale::instance_; + +#ifdef _MSC_VER + ClassicLocale::ClassicLocale() + : locale_(_create_locale(LC_ALL, "C")) {} + ClassicLocale::~ClassicLocale() { _free_locale(locale_); } +#else + ClassicLocale::ClassicLocale() + : locale_(newlocale(LC_ALL, "C", nullptr)) {} + ClassicLocale::~ClassicLocale() { freelocale(locale_); } +#endif +// clang-format off + +#endif // !FLATBUFFERS_LOCALE_INDEPENDENT + } // namespace flatbuffers diff --git a/tests/fuzzer/CMakeLists.txt b/tests/fuzzer/CMakeLists.txt index f0d1cdc79ea..ad82f08ee2b 100644 --- a/tests/fuzzer/CMakeLists.txt +++ b/tests/fuzzer/CMakeLists.txt @@ -83,14 +83,14 @@ target_compile_definitions(flatbuffers PRIVATE FLATBUFFERS_MAX_PARSING_DEPTH=8) # Change default ASCII locale (affects to isalpha, isalnum, decimal # delimiters, other). https://en.cppreference.com/w/cpp/locale/setlocale -if(DEFINED FUZZ_TEST_LOCALE) - # Enable locale independent code and define locale for tests. - # -DFUZZ_TEST_LOCALE="" - enable, but test with default locale - # -DFUZZ_TEST_LOCALE="ru_RU.CP1251" - enable and test with ru_RU.CP1251 +if(DEFINED FLATBUFFERS_TEST_LOCALE) + # Enable test of locale independent code. + # -DFLATBUFFERS_TEST_LOCALE="" - test with default C-locale + # -DFLATBUFFERS_TEST_LOCALE="ru_RU.CP1251" - test with ru_RU.CP1251 # Locale was installed before (Ubuntu):>sudo locale-gen ru_RU.CP1251 - add_definitions(-DFUZZ_TEST_LOCALE=\"${FUZZ_TEST_LOCALE}\") + add_definitions(-DFLATBUFFERS_TEST_LOCALE=\"${FLATBUFFERS_TEST_LOCALE}\") endif() -message(STATUS "FUZZ_TEST_LOCALE: ${FUZZ_TEST_LOCALE}") +message(STATUS "FLATBUFFERS_TEST_LOCALE: ${FLATBUFFERS_TEST_LOCALE}") add_executable(scalar_fuzzer flatbuffers_scalar_fuzzer.cc) target_link_libraries(scalar_fuzzer PRIVATE flatbuffers) diff --git a/tests/fuzzer/flatbuffers_parser_fuzzer.cc b/tests/fuzzer/flatbuffers_parser_fuzzer.cc index e3e4d2d7804..09e10cd55bb 100644 --- a/tests/fuzzer/flatbuffers_parser_fuzzer.cc +++ b/tests/fuzzer/flatbuffers_parser_fuzzer.cc @@ -18,8 +18,8 @@ static constexpr uint8_t flags_allow_non_utf8 = 0x04; // static constexpr uint8_t flags_flag_7 = 0x80; // See readme.md and CMakeLists.txt for details. -#ifdef FUZZ_TEST_LOCALE -static constexpr const char *test_locale = (FUZZ_TEST_LOCALE); +#ifdef FLATBUFFERS_TEST_LOCALE +static constexpr const char *test_locale = (FLATBUFFERS_TEST_LOCALE); #else static constexpr const char *test_locale = nullptr; #endif diff --git a/tests/fuzzer/flatbuffers_scalar_fuzzer.cc b/tests/fuzzer/flatbuffers_scalar_fuzzer.cc index cb2a6e32295..b792428f969 100644 --- a/tests/fuzzer/flatbuffers_scalar_fuzzer.cc +++ b/tests/fuzzer/flatbuffers_scalar_fuzzer.cc @@ -21,8 +21,8 @@ static constexpr uint8_t flags_quotes_kind = 0x10; // quote " or ' // static constexpr uint8_t flags_json_bracer = 0x20; // See readme.md and CMakeLists.txt for details. -#ifdef FUZZ_TEST_LOCALE -static constexpr const char *test_locale = (FUZZ_TEST_LOCALE); +#ifdef FLATBUFFERS_TEST_LOCALE +static constexpr const char *test_locale = (FLATBUFFERS_TEST_LOCALE); #else static constexpr const char *test_locale = nullptr; #endif @@ -309,13 +309,16 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { auto orig_scalar = "{ \"Y\" : " + input + " }"; std::string orig_back; auto orig_done = Parse(parser, orig_scalar, &orig_back); + if (recheck.res != orig_done) { // look for "does not fit" or "doesn't fit" or "out of range" - auto parser_not_fit = - (orig_back.find("does not fit") == std::string::npos) || - (orig_back.find("out of range") == std::string::npos); + auto not_fit = + (true == recheck.res) + ? ((orig_back.find("does not fit") != std::string::npos) || + (orig_back.find("out of range") != std::string::npos)) + : false; - if ((false == recheck.res) || (false == parser_not_fit)) { + if (false == not_fit) { TEST_OUTPUT_LINE("Stage 1 failed: Parser(%d) != Regex(%d)", orig_done, recheck.res); TEST_EQ_STR(orig_back.c_str(), @@ -344,6 +347,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { auto fix_scalar = "{ \"Y\" : " + qouted_input + " }"; std::string fix_back; auto fix_done = Parse(parser, fix_scalar, &fix_back); + if (orig_done != fix_done) { TEST_OUTPUT_LINE("Stage 2 failed: Parser(%d) != Regex(%d)", fix_done, orig_done); @@ -353,9 +357,34 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { TEST_EQ_FUNC(fix_done, orig_done); } + // Create new parser and test default value + if (true == orig_done) { + flatbuffers::Parser def_parser(opts); // re-use options + auto def_schema = "table X { Y: " + std::string(ref_res.type) + " = " + + input + "; } root_type X;" + + "{}"; // <- with empty json {}! + + auto def_done = def_parser.Parse(def_schema.c_str()); + if (false == def_done) { + TEST_OUTPUT_LINE("Stage 3.1 failed with _error = %s", + def_parser.error_.c_str()); + FLATBUFFERS_ASSERT(false); + } + // Compare with print. + std::string ref_string, def_string; + FLATBUFFERS_ASSERT(GenerateText( + parser, parser.builder_.GetBufferPointer(), &ref_string)); + FLATBUFFERS_ASSERT(GenerateText( + def_parser, def_parser.builder_.GetBufferPointer(), &def_string)); + if (ref_string != def_string) { + TEST_OUTPUT_LINE("Stage 3.2 failed: '%s' != '%s'", def_string.c_str(), + ref_string.c_str()); + FLATBUFFERS_ASSERT(false); + } + } + // Restore locale. if (use_locale) { FLATBUFFERS_ASSERT(!!std::setlocale(LC_ALL, "C")); } } - return 0; } diff --git a/tests/fuzzer/readme.md b/tests/fuzzer/readme.md index e1171be4b33..91ea8b7a568 100644 --- a/tests/fuzzer/readme.md +++ b/tests/fuzzer/readme.md @@ -19,16 +19,16 @@ The fuzzer section include three tests: Flatbuffers library use only printable-ASCII characters as characters of grammar alphabet for type and data declaration. This alphabet is fully compatible with JSON specification and make schema declaration fully portable. Flatbuffers library is independent from global or thread locales used by end-user application. -To run fuzzer tests with selected C-locale under test pass `-DFUZZ_TEST_LOCALE=""` to CMake when configuring. +To run fuzzer tests with selected C-locale under test pass `-DFLATBUFFERS_TEST_LOCALE=""` to CMake when configuring. Selected locale must be installed in system before use. Command line: ```sh -cmake .. -DFUZZ_TEST_LOCALE="ru_RU.CP1251" +cmake .. -DFLATBUFFERS_TEST_LOCALE="ru_RU.CP1251" ``` If use VSCode, use `cmake.configureSettings` section of workspace settings: ```json "cmake.configureSettings": { - "FUZZ_TEST_LOCALE" : "ru_RU.CP1251" + "FLATBUFFERS_TEST_LOCALE" : "ru_RU.CP1251" } ``` @@ -53,8 +53,8 @@ The **libFuzzer** allow to filter (minimize) corpus with help of `-merge` flag: If set to 1, any corpus inputs from the 2nd, 3rd etc. corpus directories that trigger new code coverage will be merged into the first corpus directory. Defaults to 0. This flag can be used to minimize a corpus. -Merge several seeds to one: -`./scalar_fuzzer -merge=1 ../.corpus/ ../.seed_1/ ../.seed_2/` +Merge several seeds to one (a new collected corpus to the seed collection, for example): +`./scalar_fuzzer -merge=1 ../.seed_parser/ ../.corpus_parser/` ## Know limitations - LLVM 7.0 std::regex library has problem with stack overflow, maximum length of input for `scalar_fuzzer` run should be limited to 3000. diff --git a/tests/test.cpp b/tests/test.cpp index 2730026bfc3..00233c1656c 100644 --- a/tests/test.cpp +++ b/tests/test.cpp @@ -13,6 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#if defined(FLATBUFFERS_TEST_LOCALE) +#include +#endif + #include #include "flatbuffers/flatbuffers.h" #include "flatbuffers/idl.h" @@ -1530,7 +1534,7 @@ void ValidFloatTest() { // Old MSVC versions may have problem with this check. // https://www.exploringbinary.com/visual-c-plus-plus-strtod-still-broken/ TEST_EQ(TestValue("{ Y:6.9294956446009195e15 }", "double"), - 6929495644600920); + 6929495644600920.0); // check nan's TEST_EQ(std::isnan(TestValue("{ Y:nan }", "double")), true); TEST_EQ(std::isnan(TestValue("{ Y:nan }", "float")), true); @@ -1658,6 +1662,7 @@ void NumericUtilsTestInteger(const char *lower, const char *upper) { template void NumericUtilsTestFloat(const char *lower, const char *upper) { T f; + TEST_EQ(flatbuffers::StringToNumber("", &f), false); TEST_EQ(flatbuffers::StringToNumber("1q", &f), false); TEST_EQ(f, 0); TEST_EQ(flatbuffers::StringToNumber(upper, &f), true); @@ -2453,6 +2458,16 @@ int FlatBufferTests() { int main(int /*argc*/, const char * /*argv*/ []) { InitTestEngine(); + // clang-format off + // If testing with specific C-locale is requested. + #ifdef FLATBUFFERS_TEST_LOCALE + // Assume that FLATBUFFERS_TEST_LOCALE is a string with locale name. + const auto loc_name = std::setlocale(LC_ALL, FLATBUFFERS_TEST_LOCALE); + TEST_NOTNULL(loc_name); + TEST_OUTPUT_LINE("The global locale is: %s", loc_name); + #endif + // clang-format on + FlatBufferTests(); FlatBufferBuilderTest();