Skip to content

Commit

Permalink
better handle special characters
Browse files Browse the repository at this point in the history
  • Loading branch information
altalk23 committed Sep 15, 2024
1 parent cda9807 commit 07900be
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 11 deletions.
22 changes: 22 additions & 0 deletions src/parser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,28 @@ std::string parse_string(std::string_view& source, std::string& error) noexcept
value |= take_hex() << 8;
value |= take_hex() << 4;
value |= take_hex();
if (0xd800 <= value && value <= 0xdbff) {
// surrogate pair
if (take_one(source, error) != '\\') {
error = "expected backslash";
return {};
}
if (take_one(source, error) != 'u') {
error = "expected u";
return {};
}
int32_t value2 = 0;
value2 |= take_hex() << 12;
value2 |= take_hex() << 8;
value2 |= take_hex() << 4;
value2 |= take_hex();
if (0xdc00 <= value2 && value2 <= 0xdfff) {
value = 0x10000 + ((value & 0x3ff) << 10) + (value2 & 0x3ff);
} else {
error = "invalid surrogate pair";
return {};
}
}
if (!error.empty()) return {};
encode_utf8(str, value);
} break;
Expand Down
11 changes: 6 additions & 5 deletions src/value.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,11 +221,12 @@ void dump_impl_string(const std::string& str, std::string& result) {
case '"': result += "\\\""sv; break;
case '\\': result += "\\\\"sv; break;
default: {
// TODO: exceptionless dump to make alk happy
// in the meantime, this is better than creating
// an invalid json :+1:
if (c >= 0 && c < 0x20)
throw std::runtime_error("invalid string");
if (c >= 0 && c < 0x20) {
std::array<char, 7> buffer;
snprintf(buffer.data(), buffer.size(), "\\u%04x", c);
result += buffer.data();
break;
}
result.push_back(c); break;
}
}
Expand Down
30 changes: 24 additions & 6 deletions test/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,12 +260,6 @@ TEST_CASE("Invalid json") {
TEST_CASE("Invalid dump") {
matjson::Value obj;
using namespace std::string_literals;
// if this somehow happens (cough cough)
obj["Hello"] = "Wor\x00ld"s;
// then dump() should throw because it would create an invalid json
REQUIRE_THROWS(obj.dump());

obj.as_object().clear();
// no throw
obj.dump();

Expand Down Expand Up @@ -311,4 +305,28 @@ TEST_CASE("Rvalue as_array() return") {
// `auto& arr = get_json().as_array();` should fail to compile, however i can't test that
auto const& arr = get_json().as_array();
REQUIRE(arr.size() == 4);
}

TEST_CASE("Parsing unicode characters") {
auto obj = matjson::parse(R"(
{
"hello": "\u00D3l\u00E1!",
"cool": "😎",
"pair": "\uD83D\uDE00"
}
)");

REQUIRE(obj["hello"].as_string() == "Ólá!");
REQUIRE(obj["cool"].as_string() == "😎");
REQUIRE(obj["pair"].as_string() == "😀");
}

TEST_CASE("Special characters") {
auto obj = matjson::parse(R"(
{
"control": "\b\f\n\r\t\u0012 "
}
)");

REQUIRE(obj["control"].as_string() == "\b\f\n\r\t\x12 ");
}

0 comments on commit 07900be

Please sign in to comment.