diff --git a/velox/functions/prestosql/SIMDJsonFunctions.h b/velox/functions/prestosql/SIMDJsonFunctions.h index cb4b9f4ec4df..94605a5c0c35 100644 --- a/velox/functions/prestosql/SIMDJsonFunctions.h +++ b/velox/functions/prestosql/SIMDJsonFunctions.h @@ -218,7 +218,8 @@ struct SIMDJsonExtractScalarFunction { return simdjson::SUCCESS; }; - SIMDJSON_TRY(simdJsonExtract(json, jsonPath, consumer)); + auto& extractor = SIMDJsonExtractor::getInstance(jsonPath); + SIMDJSON_TRY(simdJsonExtract(json, extractor, consumer)); if (resultStr.has_value()) { result.copy_from(*resultStr); @@ -284,20 +285,26 @@ struct SIMDJsonExtractFunction { return simdjson::SUCCESS; }; - SIMDJSON_TRY(simdJsonExtract(json, jsonPath, consumer)); + auto& extractor = SIMDJsonExtractor::getInstance(jsonPath); + SIMDJSON_TRY(simdJsonExtract(json, extractor, consumer)); if (resultSize == 0) { - // If the path didn't map to anything in the JSON object, return null. - return simdjson::NO_SUCH_FIELD; - } + if (extractor.isDefinitePath()) { + // If the path didn't map to anything in the JSON object, return null. + return simdjson::NO_SUCH_FIELD; + } - if (resultSize == 1) { + result.copy_from("[]"); + } else if (resultSize == 1 && extractor.isDefinitePath()) { // If there was only one value mapped to by the path, don't wrap it in an // array. result.copy_from(results); } else { // Add the square brackets to make it a valid JSON array. - result.copy_from("[" + results + "]"); + result.reserve(2 + results.size()); + result.append("["); + result.append(results); + result.append("]"); } return simdjson::SUCCESS; } @@ -349,7 +356,8 @@ struct SIMDJsonSizeFunction { return simdjson::SUCCESS; }; - SIMDJSON_TRY(simdJsonExtract(json, jsonPath, consumer)); + auto& extractor = SIMDJsonExtractor::getInstance(jsonPath); + SIMDJSON_TRY(simdJsonExtract(json, extractor, consumer)); if (resultCount == 0) { // If the path didn't map to anything in the JSON object, return null. diff --git a/velox/functions/prestosql/json/SIMDJsonExtractor.cpp b/velox/functions/prestosql/json/SIMDJsonExtractor.cpp index 422618204d02..b65a7cccf905 100644 --- a/velox/functions/prestosql/json/SIMDJsonExtractor.cpp +++ b/velox/functions/prestosql/json/SIMDJsonExtractor.cpp @@ -16,7 +16,11 @@ #include "velox/functions/prestosql/json/SIMDJsonExtractor.h" -namespace facebook::velox::functions::detail { +namespace facebook::velox::functions { +namespace { +using JsonVector = std::vector; +} + /* static */ SIMDJsonExtractor& SIMDJsonExtractor::getInstance( folly::StringPiece path) { // Cache tokenize operations in JsonExtractor across invocations in the same @@ -94,4 +98,4 @@ simdjson::error_code extractArray( } return simdjson::SUCCESS; } -} // namespace facebook::velox::functions::detail +} // namespace facebook::velox::functions diff --git a/velox/functions/prestosql/json/SIMDJsonExtractor.h b/velox/functions/prestosql/json/SIMDJsonExtractor.h index 957c928f0941..a816ec08874d 100644 --- a/velox/functions/prestosql/json/SIMDJsonExtractor.h +++ b/velox/functions/prestosql/json/SIMDJsonExtractor.h @@ -27,16 +27,6 @@ namespace facebook::velox::functions { -template -simdjson::error_code simdJsonExtract( - const velox::StringView& json, - const velox::StringView& path, - TConsumer&& consumer); - -namespace detail { - -using JsonVector = std::vector; - class SIMDJsonExtractor { public: template @@ -45,14 +35,26 @@ class SIMDJsonExtractor { TConsumer& consumer, size_t tokenStartIndex = 0); - // Returns true if this extractor was initialized with the trivial path "$". + /// Returns true if this extractor was initialized with the trivial path "$". bool isRootOnlyPath() { return tokens_.empty(); } - // Use this method to get an instance of SIMDJsonExtractor given a JSON path. - // Given the nature of the cache, it's important this is only used by - // simdJsonExtract. + /// Returns true if this extractor was initialized with a path that's + /// guaranteed to match at most one entry. + bool isDefinitePath() { + for (const auto& token : tokens_) { + if (token == "*") { + return false; + } + } + + return true; + } + + /// Use this method to get an instance of SIMDJsonExtractor given a JSON path. + /// Given the nature of the cache, it's important this is only used by + /// the callers of simdJsonExtract. static SIMDJsonExtractor& getInstance(folly::StringPiece path); private: @@ -123,9 +125,7 @@ simdjson::error_code SIMDJsonExtractor::extract( } return consumer(input); -} - -} // namespace detail +}; /** * Extract element(s) from a JSON object using the given path. @@ -145,15 +145,11 @@ simdjson::error_code SIMDJsonExtractor::extract( * @return Return simdjson::SUCCESS on success. * If any errors are encountered parsing the JSON, returns the error. */ - template simdjson::error_code simdJsonExtract( const velox::StringView& json, - const velox::StringView& path, + SIMDJsonExtractor& extractor, TConsumer&& consumer) { - // If extractor fails to parse the path, this will throw a VeloxUserError, and - // we want to let this exception bubble up to the client. - auto& extractor = detail::SIMDJsonExtractor::getInstance(path); simdjson::padded_string paddedJson(json.data(), json.size()); SIMDJSON_ASSIGN_OR_RAISE(auto jsonDoc, simdjsonParse(paddedJson)); @@ -167,14 +163,4 @@ simdjson::error_code simdJsonExtract( return extractor.extract(value, std::forward(consumer)); } -template -simdjson::error_code simdJsonExtract( - const std::string& json, - const std::string& path, - TConsumer&& consumer) { - return simdJsonExtract( - velox::StringView(json), - velox::StringView(path), - std::forward(consumer)); -} } // namespace facebook::velox::functions diff --git a/velox/functions/prestosql/json/tests/SIMDJsonExtractorTest.cpp b/velox/functions/prestosql/json/tests/SIMDJsonExtractorTest.cpp index 2b83d81d1eae..9c683146cfae 100644 --- a/velox/functions/prestosql/json/tests/SIMDJsonExtractorTest.cpp +++ b/velox/functions/prestosql/json/tests/SIMDJsonExtractorTest.cpp @@ -22,9 +22,18 @@ #include "gtest/gtest.h" #include "velox/common/base/VeloxException.h" +namespace facebook::velox::functions { namespace { -using facebook::velox::VeloxUserError; -using facebook::velox::functions::simdJsonExtract; + +template +simdjson::error_code simdJsonExtract( + const std::string& json, + const std::string& path, + TConsumer&& consumer) { + auto& extractor = SIMDJsonExtractor::getInstance(path); + return simdJsonExtract( + velox::StringView(json), extractor, std::forward(consumer)); +} class SIMDJsonExtractorTest : public testing::Test { public: @@ -52,8 +61,10 @@ class SIMDJsonExtractorTest : public testing::Test { return simdjson::SUCCESS; }; - EXPECT_EQ(simdJsonExtract(json, path, consumer), simdjson::SUCCESS) - << "with json " << json << " and path " << path; + SCOPED_TRACE(json); + SCOPED_TRACE(path); + + EXPECT_EQ(simdJsonExtract(json, path, consumer), simdjson::SUCCESS); if (!expected) { EXPECT_EQ(0, res.size()); @@ -536,4 +547,6 @@ TEST_F(SIMDJsonExtractorTest, invalidJson) { json = "{\"foo\": [\"bar\", \"baz]}"; EXPECT_NE(simdJsonExtract(json, "$.foo[0]", consumer), simdjson::SUCCESS); } + } // namespace +} // namespace facebook::velox::functions diff --git a/velox/functions/prestosql/tests/JsonFunctionsTest.cpp b/velox/functions/prestosql/tests/JsonFunctionsTest.cpp index 6a4275389f03..702b461324ef 100644 --- a/velox/functions/prestosql/tests/JsonFunctionsTest.cpp +++ b/velox/functions/prestosql/tests/JsonFunctionsTest.cpp @@ -618,6 +618,13 @@ TEST_F(JsonFunctionsTest, jsonExtract) { EXPECT_EQ("3", jsonExtract(json, "$[0][0][0].b.[2]")); EXPECT_EQ("3", jsonExtract(json, "$.[0].[0][0].b.[2]")); + // Definite vs. non-definite paths. + EXPECT_EQ("[123]", jsonExtract(R"({"a": [{"b": 123}]})", "$.a[*].b")); + EXPECT_EQ("123", jsonExtract(R"({"a": [{"b": 123}]})", "$.a[0].b")); + + EXPECT_EQ("[]", jsonExtract(R"({"a": [{"b": 123}]})", "$.a[*].c")); + EXPECT_EQ(std::nullopt, jsonExtract(R"({"a": [{"b": 123}]})", "$.a[0].c")); + // TODO The following paths are supported by Presto via Jayway, but do not // work in Velox yet. Figure out how to add support for these. VELOX_ASSERT_THROW(jsonExtract(kJson, "$..price"), "Invalid JSON path");