Skip to content

Commit

Permalink
Prevent ndarray size vector from recursive use, fix #3503 (#3505)
Browse files Browse the repository at this point in the history
* Prevent ndarray size vector from recursive use, fix #3503

* fix ci error

* complete coverage

* add missing coverage

* fix style issue in added test
  • Loading branch information
fangq authored May 23, 2022
1 parent 41226d0 commit ede6667
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 26 deletions.
36 changes: 23 additions & 13 deletions include/nlohmann/detail/input/binary_reader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1938,9 +1938,9 @@ class binary_reader
{
std::pair<std::size_t, char_int_type> size_and_type;
size_t dimlen = 0;
bool is_ndarray = true;
bool inside_ndarray = true;

if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type)))
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type, inside_ndarray)))
{
return false;
}
Expand All @@ -1953,7 +1953,7 @@ class binary_reader
{
for (std::size_t i = 0; i < size_and_type.first; ++i)
{
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, is_ndarray, size_and_type.second)))
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, inside_ndarray, size_and_type.second)))
{
return false;
}
Expand All @@ -1965,7 +1965,7 @@ class binary_reader
{
for (std::size_t i = 0; i < size_and_type.first; ++i)
{
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, is_ndarray)))
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, inside_ndarray)))
{
return false;
}
Expand All @@ -1977,7 +1977,7 @@ class binary_reader
{
while (current != ']')
{
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, is_ndarray, current)))
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, inside_ndarray, current)))
{
return false;
}
Expand All @@ -1990,9 +1990,12 @@ class binary_reader

/*!
@param[out] result determined size
@param[in,out] inside_ndarray whether the parser is parsing an ND array dimensional vector
@param[in] prefix type marker if already read, otherwise set to 0
@return whether size determination completed
*/
bool get_ubjson_size_value(std::size_t& result, bool& is_ndarray, char_int_type prefix = 0)
bool get_ubjson_size_value(std::size_t& result, bool& inside_ndarray, char_int_type prefix = 0)
{
if (prefix == 0)
{
Expand Down Expand Up @@ -2127,7 +2130,7 @@ class binary_reader
{
break;
}
if (is_ndarray) // ndarray dimensional vector can only contain integers, and can not embed another array
if (inside_ndarray) // ndarray dimensional vector can only contain integers, and can not embed another array
{
return sax->parse_error(chars_read, get_token_string(), parse_error::create(113, chars_read, exception_message(input_format, "ndarray dimention vector can only contain integers", "size"), nullptr));
}
Expand Down Expand Up @@ -2166,7 +2169,7 @@ class binary_reader
return false;
}
}
is_ndarray = true;
inside_ndarray = true;
return sax->end_array();
}
result = 0;
Expand Down Expand Up @@ -2197,14 +2200,15 @@ class binary_reader
for a more compact representation.
@param[out] result pair of the size and the type
@param[in] inside_ndarray whether the parser is parsing an ND array dimensional vector
@return whether pair creation completed
*/
bool get_ubjson_size_type(std::pair<std::size_t, char_int_type>& result)
bool get_ubjson_size_type(std::pair<std::size_t, char_int_type>& result, bool inside_ndarray = false)
{
bool is_ndarray = false;
result.first = string_t::npos; // size
result.second = 0; // type
bool is_ndarray = false;

get_ignore_noop();

Expand Down Expand Up @@ -2240,6 +2244,11 @@ class binary_reader
bool is_error = get_ubjson_size_value(result.first, is_ndarray);
if (input_format == input_format_t::bjdata && is_ndarray)
{
if (inside_ndarray)
{
return sax->parse_error(chars_read, get_token_string(), parse_error::create(112, chars_read,
exception_message(input_format, "ndarray can not be recursive", "size"), nullptr));
}
result.second |= (1 << 8); // use bit 8 to indicate ndarray, all UBJSON and BJData markers should be ASCII letters
}
return is_error;
Expand All @@ -2250,7 +2259,8 @@ class binary_reader
bool is_error = get_ubjson_size_value(result.first, is_ndarray);
if (input_format == input_format_t::bjdata && is_ndarray)
{
result.second |= (1 << 8); // use bit 8 to indicate ndarray, all UBJSON and BJData markers should be ASCII letters
return sax->parse_error(chars_read, get_token_string(), parse_error::create(112, chars_read,
exception_message(input_format, "ndarray requires both type and size", "size"), nullptr));
}
return is_error;
}
Expand Down Expand Up @@ -2640,8 +2650,8 @@ class binary_reader
{
// get size of following number string
std::size_t size{};
bool is_ndarray = false;
auto res = get_ubjson_size_value(size, is_ndarray);
bool inside_ndarray = false;
auto res = get_ubjson_size_value(size, inside_ndarray);
if (JSON_HEDLEY_UNLIKELY(!res))
{
return res;
Expand Down
36 changes: 23 additions & 13 deletions single_include/nlohmann/json.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10421,9 +10421,9 @@ class binary_reader
{
std::pair<std::size_t, char_int_type> size_and_type;
size_t dimlen = 0;
bool is_ndarray = true;
bool inside_ndarray = true;

if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type)))
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_type(size_and_type, inside_ndarray)))
{
return false;
}
Expand All @@ -10436,7 +10436,7 @@ class binary_reader
{
for (std::size_t i = 0; i < size_and_type.first; ++i)
{
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, is_ndarray, size_and_type.second)))
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, inside_ndarray, size_and_type.second)))
{
return false;
}
Expand All @@ -10448,7 +10448,7 @@ class binary_reader
{
for (std::size_t i = 0; i < size_and_type.first; ++i)
{
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, is_ndarray)))
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, inside_ndarray)))
{
return false;
}
Expand All @@ -10460,7 +10460,7 @@ class binary_reader
{
while (current != ']')
{
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, is_ndarray, current)))
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_size_value(dimlen, inside_ndarray, current)))
{
return false;
}
Expand All @@ -10473,9 +10473,12 @@ class binary_reader

/*!
@param[out] result determined size
@param[in,out] inside_ndarray whether the parser is parsing an ND array dimensional vector
@param[in] prefix type marker if already read, otherwise set to 0

@return whether size determination completed
*/
bool get_ubjson_size_value(std::size_t& result, bool& is_ndarray, char_int_type prefix = 0)
bool get_ubjson_size_value(std::size_t& result, bool& inside_ndarray, char_int_type prefix = 0)
{
if (prefix == 0)
{
Expand Down Expand Up @@ -10610,7 +10613,7 @@ class binary_reader
{
break;
}
if (is_ndarray) // ndarray dimensional vector can only contain integers, and can not embed another array
if (inside_ndarray) // ndarray dimensional vector can only contain integers, and can not embed another array
{
return sax->parse_error(chars_read, get_token_string(), parse_error::create(113, chars_read, exception_message(input_format, "ndarray dimention vector can only contain integers", "size"), nullptr));
}
Expand Down Expand Up @@ -10649,7 +10652,7 @@ class binary_reader
return false;
}
}
is_ndarray = true;
inside_ndarray = true;
return sax->end_array();
}
result = 0;
Expand Down Expand Up @@ -10680,14 +10683,15 @@ class binary_reader
for a more compact representation.

@param[out] result pair of the size and the type
@param[in] inside_ndarray whether the parser is parsing an ND array dimensional vector

@return whether pair creation completed
*/
bool get_ubjson_size_type(std::pair<std::size_t, char_int_type>& result)
bool get_ubjson_size_type(std::pair<std::size_t, char_int_type>& result, bool inside_ndarray = false)
{
bool is_ndarray = false;
result.first = string_t::npos; // size
result.second = 0; // type
bool is_ndarray = false;

get_ignore_noop();

Expand Down Expand Up @@ -10723,6 +10727,11 @@ class binary_reader
bool is_error = get_ubjson_size_value(result.first, is_ndarray);
if (input_format == input_format_t::bjdata && is_ndarray)
{
if (inside_ndarray)
{
return sax->parse_error(chars_read, get_token_string(), parse_error::create(112, chars_read,
exception_message(input_format, "ndarray can not be recursive", "size"), nullptr));
}
result.second |= (1 << 8); // use bit 8 to indicate ndarray, all UBJSON and BJData markers should be ASCII letters
}
return is_error;
Expand All @@ -10733,7 +10742,8 @@ class binary_reader
bool is_error = get_ubjson_size_value(result.first, is_ndarray);
if (input_format == input_format_t::bjdata && is_ndarray)
{
result.second |= (1 << 8); // use bit 8 to indicate ndarray, all UBJSON and BJData markers should be ASCII letters
return sax->parse_error(chars_read, get_token_string(), parse_error::create(112, chars_read,
exception_message(input_format, "ndarray requires both type and size", "size"), nullptr));
}
return is_error;
}
Expand Down Expand Up @@ -11123,8 +11133,8 @@ class binary_reader
{
// get size of following number string
std::size_t size{};
bool is_ndarray = false;
auto res = get_ubjson_size_value(size, is_ndarray);
bool inside_ndarray = false;
auto res = get_ubjson_size_value(size, inside_ndarray);
if (JSON_HEDLEY_UNLIKELY(!res))
{
return res;
Expand Down
31 changes: 31 additions & 0 deletions tests/src/unit-bjdata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2372,6 +2372,7 @@ TEST_CASE("BJData")
std::vector<uint8_t> v_D = {'[', '$', 'D', '#', '[', 'i', 1, 'i', 2, ']', 0x4a, 0xd8, 0x12, 0x4d, 0xfb, 0x21, 0x09, 0x40, 0x4a, 0xd8, 0x12, 0x4d, 0xfb, 0x21, 0x09, 0x40};
std::vector<uint8_t> v_S = {'[', '#', '[', 'i', 1, 'i', 2, ']', 'S', 'i', 1, 'a', 'S', 'i', 1, 'a'};
std::vector<uint8_t> v_C = {'[', '$', 'C', '#', '[', 'i', 1, 'i', 2, ']', 'a', 'a'};
std::vector<uint8_t> v_R = {'[', '#', '[', 'i', 2, ']', 'i', 6, 'U', 7};

// check if vector is parsed correctly
CHECK(json::from_bjdata(v_0) == json::array());
Expand All @@ -2387,6 +2388,7 @@ TEST_CASE("BJData")
CHECK(json::from_bjdata(v_D) == json({3.1415926, 3.1415926}));
CHECK(json::from_bjdata(v_S) == json({"a", "a"}));
CHECK(json::from_bjdata(v_C) == json({"a", "a"}));
CHECK(json::from_bjdata(v_R) == json({6, 7}));
}

SECTION("optimized ndarray (type and vector-size as size-optimized array)")
Expand Down Expand Up @@ -2750,6 +2752,30 @@ TEST_CASE("BJData")
std::vector<uint8_t> vRo = {'[', '$', 'i', '#', '[', 'i', 0, '{', '}', ']', 1};
CHECK_THROWS_WITH_AS(_ = json::from_bjdata(vRo), "[json.exception.parse_error.113] parse error at byte 8: syntax error while parsing BJData size: expected length type specification (U, i, u, I, m, l, M, L) after '#'; last byte: 0x7B", json::parse_error&);
CHECK(json::from_bjdata(vRo, true, false).is_discarded());

std::vector<uint8_t> vR1 = {'[', '$', 'i', '#', '[', '[', 'i', 1, ']', ']', 1};
CHECK_THROWS_WITH_AS(_ = json::from_bjdata(vR1), "[json.exception.parse_error.113] parse error at byte 6: syntax error while parsing BJData size: ndarray dimention vector can only contain integers", json::parse_error&);
CHECK(json::from_bjdata(vR1, true, false).is_discarded());

std::vector<uint8_t> vR2 = {'[', '$', 'i', '#', '[', '#', '[', 'i', 1, ']', ']', 1};
CHECK_THROWS_WITH_AS(_ = json::from_bjdata(vR2), "[json.exception.parse_error.113] parse error at byte 11: syntax error while parsing BJData size: expected length type specification (U, i, u, I, m, l, M, L) after '#'; last byte: 0x5D", json::parse_error&);
CHECK(json::from_bjdata(vR2, true, false).is_discarded());

std::vector<uint8_t> vR3 = {'[', '#', '[', 'i', '2', 'i', 2, ']'};
CHECK_THROWS_WITH_AS(_ = json::from_bjdata(vR3), "[json.exception.parse_error.112] parse error at byte 8: syntax error while parsing BJData size: ndarray requires both type and size", json::parse_error&);
CHECK(json::from_bjdata(vR3, true, false).is_discarded());

std::vector<uint8_t> vR4 = {'[', '$', 'i', '#', '[', '$', 'i', '#', '[', 'i', 1, ']', 1};
CHECK_THROWS_WITH_AS(_ = json::from_bjdata(vR4), "[json.exception.parse_error.110] parse error at byte 14: syntax error while parsing BJData number: unexpected end of input", json::parse_error&);
CHECK(json::from_bjdata(vR4, true, false).is_discarded());

std::vector<uint8_t> vR5 = {'[', '$', 'i', '#', '[', '[', '[', ']', ']', ']'};
CHECK_THROWS_WITH_AS(_ = json::from_bjdata(vR5), "[json.exception.parse_error.113] parse error at byte 6: syntax error while parsing BJData size: ndarray dimention vector can only contain integers", json::parse_error&);
CHECK(json::from_bjdata(vR5, true, false).is_discarded());

std::vector<uint8_t> vR6 = {'[', '$', 'i', '#', '[', '$', 'i', '#', '[', 'i', '2', 'i', 2, ']'};
CHECK_THROWS_WITH_AS(_ = json::from_bjdata(vR6), "[json.exception.parse_error.112] parse error at byte 14: syntax error while parsing BJData size: ndarray can not be recursive", json::parse_error&);
CHECK(json::from_bjdata(vR6, true, false).is_discarded());
}

SECTION("objects")
Expand Down Expand Up @@ -2788,7 +2814,12 @@ TEST_CASE("BJData")
CHECK(json::from_bjdata(vST2, true, false).is_discarded());

std::vector<uint8_t> vO = {'{', '#', '[', 'i', 2, 'i', 1, ']', 'i', 1, 'a', 'i', 1, 'i', 1, 'b', 'i', 2};
CHECK_THROWS_WITH_AS(_ = json::from_bjdata(vO), "[json.exception.parse_error.112] parse error at byte 8: syntax error while parsing BJData size: ndarray requires both type and size", json::parse_error&);
CHECK(json::from_bjdata(vO, true, false).is_discarded());

std::vector<uint8_t> vO2 = {'{', '$', 'i', '#', '[', 'i', 2, 'i', 1, ']', 'i', 1, 'a', 1, 'i', 1, 'b', 2};
CHECK_THROWS_WITH_AS(_ = json::from_bjdata(vO2), "[json.exception.parse_error.112] parse error at byte 10: syntax error while parsing BJData object: BJData object does not support ND-array size in optimized format", json::parse_error&);
CHECK(json::from_bjdata(vO2, true, false).is_discarded());
}
}

Expand Down

0 comments on commit ede6667

Please sign in to comment.