diff --git a/tools/inspector_protocol/encoding/encoding.cc b/tools/inspector_protocol/encoding/encoding.cc index 636281dd8ad894..f7e933e41afac5 100644 --- a/tools/inspector_protocol/encoding/encoding.cc +++ b/tools/inspector_protocol/encoding/encoding.cc @@ -847,7 +847,7 @@ void CBORTokenizer::ReadNextToken(bool enter_envelope) { // value 0). // The represented allowed values range is -1 to -2^31. // They are mapped into the encoded range of 0 to 2^31-1. - // We check the the payload in token_start_internal_value_ against + // We check the payload in token_start_internal_value_ against // that range (2^31-1 is also known as // std::numeric_limits::max()). if (!bytes_read || @@ -1386,7 +1386,7 @@ class JSONEncoder : public StreamingParserHandler { // Disallow overlong encodings for ascii characters, as these // would include " and other characters significant to JSON // string termination / control. - if (codepoint < 0x7f) + if (codepoint <= 0x7f) continue; // Invalid in UTF8, and can't be represented in UTF16 anyway. if (codepoint > 0x10ffff) diff --git a/tools/inspector_protocol/encoding/encoding.h b/tools/inspector_protocol/encoding/encoding.h index 14432484d55b9d..a1bcfc4be3db4a 100644 --- a/tools/inspector_protocol/encoding/encoding.h +++ b/tools/inspector_protocol/encoding/encoding.h @@ -5,6 +5,7 @@ #ifndef V8_INSPECTOR_PROTOCOL_ENCODING_ENCODING_H_ #define V8_INSPECTOR_PROTOCOL_ENCODING_ENCODING_H_ +#include #include #include #include @@ -14,6 +15,19 @@ #include namespace v8_inspector_protocol_encoding { +// This library is designed to be portable. The only allowed dependency +// are the C/C++ standard libraries, up to C++11. We support both 32 bit +// and 64 architectures. +// +// Types used below: +// uint8_t: a byte, e.g. for raw bytes or UTF8 characters +// uint16_t: two bytes, e.g. for UTF16 characters +// For input parameters: +// span: pointer to bytes and length +// span: pointer to UTF16 chars and length +// For output parameters: +// std::vector - Owned segment of bytes / utf8 characters and length. +// std::string - Same, for compatibility, even though char is signed. // ============================================================================= // span - sequence of bytes @@ -72,6 +86,22 @@ inline span SpanFrom(const std::string& v) { return span(reinterpret_cast(v.data()), v.size()); } +// Less than / equality comparison functions for sorting / searching for byte +// spans. These are similar to absl::string_view's < and == operators. +inline bool SpanLessThan(span x, span y) noexcept { + auto min_size = std::min(x.size(), y.size()); + const int r = min_size == 0 ? 0 : memcmp(x.data(), y.data(), min_size); + return (r < 0) || (r == 0 && x.size() < y.size()); +} + +inline bool SpanEquals(span x, span y) noexcept { + auto len = x.size(); + if (len != y.size()) + return false; + return x.data() == y.data() || len == 0 || + std::memcmp(x.data(), y.data(), len) == 0; +} + // ============================================================================= // Status and Error codes // ============================================================================= diff --git a/tools/inspector_protocol/encoding/encoding_test.cc b/tools/inspector_protocol/encoding/encoding_test.cc index 6893fe2581683c..f6b40dfcefe2df 100644 --- a/tools/inspector_protocol/encoding/encoding_test.cc +++ b/tools/inspector_protocol/encoding/encoding_test.cc @@ -121,6 +121,28 @@ TEST(SpanFromTest, FromConstCharAndLiteral) { EXPECT_EQ(3u, SpanFrom("foo").size()); } +TEST(SpanComparisons, ByteWiseLexicographicalOrder) { + // Compare the empty span. + EXPECT_FALSE(SpanLessThan(span(), span())); + EXPECT_TRUE(SpanEquals(span(), span())); + + // Compare message with itself. + std::string msg = "Hello, world"; + EXPECT_FALSE(SpanLessThan(SpanFrom(msg), SpanFrom(msg))); + EXPECT_TRUE(SpanEquals(SpanFrom(msg), SpanFrom(msg))); + + // Compare message and copy. + EXPECT_FALSE(SpanLessThan(SpanFrom(msg), SpanFrom(std::string(msg)))); + EXPECT_TRUE(SpanEquals(SpanFrom(msg), SpanFrom(std::string(msg)))); + + // Compare two messages. |lesser_msg| < |msg| because of the first + // byte ('A' < 'H'). + std::string lesser_msg = "A lesser message."; + EXPECT_TRUE(SpanLessThan(SpanFrom(lesser_msg), SpanFrom(msg))); + EXPECT_FALSE(SpanLessThan(SpanFrom(msg), SpanFrom(lesser_msg))); + EXPECT_FALSE(SpanEquals(SpanFrom(msg), SpanFrom(lesser_msg))); +} + // ============================================================================= // Status and Error codes // ============================================================================= @@ -1325,6 +1347,25 @@ void WriteUTF8AsUTF16(StreamingParserHandler* writer, const std::string& utf8) { writer->HandleString16(SpanFrom(UTF8ToUTF16(SpanFrom(utf8)))); } +TEST(JsonEncoder, OverlongEncodings) { + std::string out; + Status status; + std::unique_ptr writer = + NewJSONEncoder(&GetTestPlatform(), &out, &status); + + // We encode 0x7f, which is the DEL ascii character, as a 4 byte UTF8 + // sequence. This is called an overlong encoding, because only 1 byte + // is needed to represent 0x7f as UTF8. + std::vector chars = { + 0xf0, // Starts 4 byte utf8 sequence + 0x80, // continuation byte + 0x81, // continuation byte w/ payload bit 7 set to 1. + 0xbf, // continuation byte w/ payload bits 0-6 set to 11111. + }; + writer->HandleString8(SpanFrom(chars)); + EXPECT_EQ("\"\"", out); // Empty string means that 0x7f was rejected (good). +} + TEST(JsonStdStringWriterTest, HelloWorld) { std::string out; Status status; @@ -1561,6 +1602,13 @@ TEST_F(JsonParserTest, UsAsciiDelCornerCase) { "string16: a\x7f\n" "map end\n", log_.str()); + + // We've seen an implementation of UTF16ToUTF8 which would replace the DEL + // character with ' ', so this simple roundtrip tests the routines in + // encoding_test_helper.h, to make test failures of the above easier to + // diagnose. + std::vector utf16 = UTF8ToUTF16(SpanFrom(json)); + EXPECT_EQ(json, UTF16ToUTF8(SpanFrom(utf16))); } TEST_F(JsonParserTest, Whitespace) { diff --git a/tools/inspector_protocol/lib/Allocator_h.template b/tools/inspector_protocol/lib/Allocator_h.template index 15eaaaff0236d2..d94c4ca5b0ae99 100644 --- a/tools/inspector_protocol/lib/Allocator_h.template +++ b/tools/inspector_protocol/lib/Allocator_h.template @@ -11,8 +11,6 @@ namespace {{namespace}} { {% endfor %} -enum NotNullTagEnum { NotNullLiteral }; - #define PROTOCOL_DISALLOW_COPY(ClassName) \ private: \ ClassName(const ClassName&) = delete; \ diff --git a/tools/inspector_protocol/lib/DispatcherBase_h.template b/tools/inspector_protocol/lib/DispatcherBase_h.template index 7d859c4f2753bb..4aa0688adb33fc 100644 --- a/tools/inspector_protocol/lib/DispatcherBase_h.template +++ b/tools/inspector_protocol/lib/DispatcherBase_h.template @@ -25,6 +25,9 @@ public: kFallThrough = 2, }; + // For historical reasons, these error codes correspond to commonly used + // XMLRPC codes (e.g. see METHOD_NOT_FOUND in + // https://github.com/python/cpython/blob/master/Lib/xmlrpc/client.py). enum ErrorCode { kParseError = -32700, kInvalidRequest = -32600, diff --git a/tools/inspector_protocol/lib/Forward_h.template b/tools/inspector_protocol/lib/Forward_h.template index ff5e685863395b..746ba20bba15f4 100644 --- a/tools/inspector_protocol/lib/Forward_h.template +++ b/tools/inspector_protocol/lib/Forward_h.template @@ -28,7 +28,6 @@ class DispatchResponse; class ErrorSupport; class FundamentalValue; class ListValue; -template class Maybe; class Object; using Response = DispatchResponse; class SerializedValue; diff --git a/tools/inspector_protocol/lib/Maybe_h.template b/tools/inspector_protocol/lib/Maybe_h.template index 22cfac6b240bef..8dfee7e9d5cb72 100644 --- a/tools/inspector_protocol/lib/Maybe_h.template +++ b/tools/inspector_protocol/lib/Maybe_h.template @@ -7,53 +7,19 @@ #ifndef {{"_".join(config.protocol.namespace)}}_Maybe_h #define {{"_".join(config.protocol.namespace)}}_Maybe_h -// This macro allows to test for the version of the GNU C++ compiler. -// Note that this also applies to compilers that masquerade as GCC, -// for example clang and the Intel C++ compiler for Linux. -// Use like: -// #if IP_GNUC_PREREQ(4, 3, 1) -// ... -// #endif -#if defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__) -#define IP_GNUC_PREREQ(major, minor, patchlevel) \ - ((__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) >= \ - ((major)*10000 + (minor)*100 + (patchlevel))) -#elif defined(__GNUC__) && defined(__GNUC_MINOR__) -#define IP_GNUC_PREREQ(major, minor, patchlevel) \ - ((__GNUC__ * 10000 + __GNUC_MINOR__ * 100) >= \ - ((major)*10000 + (minor)*100 + (patchlevel))) -#else -#define IP_GNUC_PREREQ(major, minor, patchlevel) 0 -#endif - -#if defined(__mips64) -#define IP_TARGET_ARCH_MIPS64 1 -#elif defined(__MIPSEB__) || defined(__MIPSEL__) -#define IP_TARGET_ARCH_MIPS 1 -#endif - -// Allowing the use of noexcept by removing the keyword on older compilers that -// do not support adding noexcept to default members. -#if ((IP_GNUC_PREREQ(4, 9, 0) && !defined(IP_TARGET_ARCH_MIPS) && \ - !defined(IP_TARGET_ARCH_MIPS64)) || \ - (defined(__clang__) && __cplusplus > 201300L)) -#define IP_NOEXCEPT noexcept -#else -#define IP_NOEXCEPT -#endif - //#include "Forward.h" {% for namespace in config.protocol.namespace %} namespace {{namespace}} { {% endfor %} +namespace detail { template -class Maybe { +class PtrMaybe { public: - Maybe() : m_value() { } - Maybe(std::unique_ptr value) : m_value(std::move(value)) { } - Maybe(Maybe&& other) IP_NOEXCEPT : m_value(std::move(other.m_value)) {} + PtrMaybe() = default; + PtrMaybe(std::unique_ptr value) : m_value(std::move(value)) { } + PtrMaybe(PtrMaybe&& other) noexcept : m_value(std::move(other.m_value)) {} void operator=(std::unique_ptr value) { m_value = std::move(value); } T* fromJust() const { DCHECK(m_value); return m_value.get(); } T* fromMaybe(T* defaultValue) const { return m_value ? m_value.get() : defaultValue; } @@ -64,76 +30,48 @@ private: }; template -class MaybeBase { +class ValueMaybe { public: - MaybeBase() : m_isJust(false) { } - MaybeBase(T value) : m_isJust(true), m_value(value) { } - MaybeBase(MaybeBase&& other) IP_NOEXCEPT + ValueMaybe() : m_isJust(false), m_value() { } + ValueMaybe(T value) : m_isJust(true), m_value(std::move(value)) { } + ValueMaybe(ValueMaybe&& other) noexcept : m_isJust(other.m_isJust), m_value(std::move(other.m_value)) {} void operator=(T value) { m_value = value; m_isJust = true; } - T fromJust() const { DCHECK(m_isJust); return m_value; } - T fromMaybe(const T& defaultValue) const { return m_isJust ? m_value : defaultValue; } + const T& fromJust() const { DCHECK(m_isJust); return m_value; } + const T& fromMaybe(const T& defaultValue) const { return m_isJust ? m_value : defaultValue; } bool isJust() const { return m_isJust; } - T takeJust() { DCHECK(m_isJust); return m_value; } - -protected: + T takeJust() { DCHECK(m_isJust); return std::move(m_value); } +private: bool m_isJust; T m_value; }; -template<> -class Maybe : public MaybeBase { -public: - Maybe() { m_value = false; } - Maybe(bool value) : MaybeBase(value) { } - Maybe(Maybe&& other) IP_NOEXCEPT : MaybeBase(std::move(other)) {} - using MaybeBase::operator=; -}; +template +struct MaybeTypedef { typedef PtrMaybe type; }; -template<> -class Maybe : public MaybeBase { -public: - Maybe() { m_value = 0; } - Maybe(int value) : MaybeBase(value) { } - Maybe(Maybe&& other) IP_NOEXCEPT : MaybeBase(std::move(other)) {} - using MaybeBase::operator=; -}; +template <> +struct MaybeTypedef { typedef ValueMaybe type; }; -template<> -class Maybe : public MaybeBase { -public: - Maybe() { m_value = 0; } - Maybe(double value) : MaybeBase(value) { } - Maybe(Maybe&& other) IP_NOEXCEPT : MaybeBase(std::move(other)) {} - using MaybeBase::operator=; -}; +template <> +struct MaybeTypedef { typedef ValueMaybe type; }; -template<> -class Maybe : public MaybeBase { -public: - Maybe() { } - Maybe(const String& value) : MaybeBase(value) { } - Maybe(Maybe&& other) IP_NOEXCEPT : MaybeBase(std::move(other)) {} - using MaybeBase::operator=; -}; +template <> +struct MaybeTypedef { typedef ValueMaybe type; }; -template<> -class Maybe : public MaybeBase { -public: - Maybe() { } - Maybe(Binary value) : MaybeBase(value) { } - Maybe(Maybe&& other) IP_NOEXCEPT : MaybeBase(std::move(other)) {} - using MaybeBase::operator=; -}; +template <> +struct MaybeTypedef { typedef ValueMaybe type; }; + +template <> +struct MaybeTypedef { typedef ValueMaybe type; }; + +} // namespace detail + +template +using Maybe = typename detail::MaybeTypedef::type; {% for namespace in config.protocol.namespace %} } // namespace {{namespace}} {% endfor %} -#undef IP_GNUC_PREREQ -#undef IP_TARGET_ARCH_MIPS64 -#undef IP_TARGET_ARCH_MIPS -#undef IP_NOEXCEPT - #endif // !defined({{"_".join(config.protocol.namespace)}}_Maybe_h) diff --git a/tools/inspector_protocol/lib/encoding_cpp.template b/tools/inspector_protocol/lib/encoding_cpp.template index d3646491140663..a0377d12f7dbb1 100644 --- a/tools/inspector_protocol/lib/encoding_cpp.template +++ b/tools/inspector_protocol/lib/encoding_cpp.template @@ -855,7 +855,7 @@ void CBORTokenizer::ReadNextToken(bool enter_envelope) { // value 0). // The represented allowed values range is -1 to -2^31. // They are mapped into the encoded range of 0 to 2^31-1. - // We check the the payload in token_start_internal_value_ against + // We check the payload in token_start_internal_value_ against // that range (2^31-1 is also known as // std::numeric_limits::max()). if (!bytes_read || @@ -1394,7 +1394,7 @@ class JSONEncoder : public StreamingParserHandler { // Disallow overlong encodings for ascii characters, as these // would include " and other characters significant to JSON // string termination / control. - if (codepoint < 0x7f) + if (codepoint <= 0x7f) continue; // Invalid in UTF8, and can't be represented in UTF16 anyway. if (codepoint > 0x10ffff) diff --git a/tools/inspector_protocol/lib/encoding_h.template b/tools/inspector_protocol/lib/encoding_h.template index 4d9874bfbd5cb4..2601192e160543 100644 --- a/tools/inspector_protocol/lib/encoding_h.template +++ b/tools/inspector_protocol/lib/encoding_h.template @@ -9,6 +9,7 @@ #ifndef {{"_".join(config.protocol.namespace)}}_encoding_h #define {{"_".join(config.protocol.namespace)}}_encoding_h +#include #include #include #include @@ -23,6 +24,19 @@ namespace {{namespace}} { // ===== encoding/encoding.h ===== +// This library is designed to be portable. The only allowed dependency +// are the C/C++ standard libraries, up to C++11. We support both 32 bit +// and 64 architectures. +// +// Types used below: +// uint8_t: a byte, e.g. for raw bytes or UTF8 characters +// uint16_t: two bytes, e.g. for UTF16 characters +// For input parameters: +// span: pointer to bytes and length +// span: pointer to UTF16 chars and length +// For output parameters: +// std::vector - Owned segment of bytes / utf8 characters and length. +// std::string - Same, for compatibility, even though char is signed. // ============================================================================= // span - sequence of bytes @@ -81,6 +95,22 @@ inline span SpanFrom(const std::string& v) { return span(reinterpret_cast(v.data()), v.size()); } +// Less than / equality comparison functions for sorting / searching for byte +// spans. These are similar to absl::string_view's < and == operators. +inline bool SpanLessThan(span x, span y) noexcept { + auto min_size = std::min(x.size(), y.size()); + const int r = min_size == 0 ? 0 : memcmp(x.data(), y.data(), min_size); + return (r < 0) || (r == 0 && x.size() < y.size()); +} + +inline bool SpanEquals(span x, span y) noexcept { + auto len = x.size(); + if (len != y.size()) + return false; + return x.data() == y.data() || len == 0 || + std::memcmp(x.data(), y.data(), len) == 0; +} + // ============================================================================= // Status and Error codes // =============================================================================