Skip to content

Commit

Permalink
Add transform_keys and transform_values Presto functions (#2245)
Browse files Browse the repository at this point in the history
Summary: Pull Request resolved: #2245

Differential Revision: D38565343

Pulled By: mbasmanova

fbshipit-source-id: 5eecb069a1a23d772d15893feb12f2ec88b4b781
  • Loading branch information
mbasmanova authored and facebook-github-bot committed Aug 10, 2022
1 parent 483aa3f commit 0e7be68
Show file tree
Hide file tree
Showing 10 changed files with 716 additions and 2 deletions.
23 changes: 23 additions & 0 deletions velox/docs/functions/map.rst
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,26 @@ Map Functions
Corresponds to SQL subscript operator [].

SELECT name_to_age_map['Bob'] AS bob_age;

.. function:: transform_keys(map(K1,V), function(K1,V,K2)) -> map(K2,V)

Returns a map that applies ``function`` to each entry of ``map`` and transforms the keys::

SELECT transform_keys(MAP(ARRAY[], ARRAY[]), (k, v) -> k + 1); -- {}
SELECT transform_keys(MAP(ARRAY [1, 2, 3], ARRAY ['a', 'b', 'c']), (k, v) -> k + 1); -- {2 -> a, 3 -> b, 4 -> c}
SELECT transform_keys(MAP(ARRAY ['a', 'b', 'c'], ARRAY [1, 2, 3]), (k, v) -> v * v); -- {1 -> 1, 4 -> 2, 9 -> 3}
SELECT transform_keys(MAP(ARRAY ['a', 'b'], ARRAY [1, 2]), (k, v) -> k || CAST(v as VARCHAR)); -- {a1 -> 1, b2 -> 2}
SELECT transform_keys(MAP(ARRAY [1, 2], ARRAY [1.0, 1.4]), -- {one -> 1.0, two -> 1.4}
(k, v) -> MAP(ARRAY[1, 2], ARRAY['one', 'two'])[k]);

.. function:: transform_values(map(K,V1), function(K,V1,V2)) -> map(K,V2)

Returns a map that applies ``function`` to each entry of ``map`` and transforms the values::

SELECT transform_values(MAP(ARRAY[], ARRAY[]), (k, v) -> v + 1); -- {}
SELECT transform_values(MAP(ARRAY [1, 2, 3], ARRAY [10, 20, 30]), (k, v) -> v + k); -- {1 -> 11, 2 -> 22, 3 -> 33}
SELECT transform_values(MAP(ARRAY [1, 2, 3], ARRAY ['a', 'b', 'c']), (k, v) -> k * k); -- {1 -> 1, 2 -> 4, 3 -> 9}
SELECT transform_values(MAP(ARRAY ['a', 'b'], ARRAY [1, 2]), (k, v) -> k || CAST(v as VARCHAR)); -- {a -> a1, b -> b2}
SELECT transform_values(MAP(ARRAY [1, 2], ARRAY [1.0, 1.4]), -- {1 -> one_1.0, 2 -> two_1.4}
(k, v) -> MAP(ARRAY[1, 2], ARRAY['one', 'two'])[k] || '_' || CAST(v AS VARCHAR));

2 changes: 2 additions & 0 deletions velox/functions/prestosql/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ add_library(
Subscript.cpp
ToUtf8.cpp
Transform.cpp
TransformKeys.cpp
TransformValues.cpp
URLFunctions.cpp
VectorArithmetic.cpp
WidthBucketArray.cpp
Expand Down
2 changes: 1 addition & 1 deletion velox/functions/prestosql/Transform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class TransformFunction : public exec::VectorFunction {
public:
bool isDefaultNullBehavior() const override {
// transform is null preserving for the array. But since an
// expr tree witht a lambda depends on all named fields, including
// expr tree with a lambda depends on all named fields, including
// captures, a null in a capture does not automatically make a
// null result.
return false;
Expand Down
124 changes: 124 additions & 0 deletions velox/functions/prestosql/TransformKeys.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "velox/expression/Expr.h"
#include "velox/expression/VectorFunction.h"
#include "velox/functions/lib/LambdaFunctionUtil.h"
#include "velox/vector/FunctionVector.h"

namespace facebook::velox::functions {
namespace {

// See documentation at https://prestodb.io/docs/current/functions/map.html
class TransformKeysFunction : public exec::VectorFunction {
public:
bool isDefaultNullBehavior() const override {
// transform_keys is null preserving for the map. But
// since an expr tree with a lambda depends on all named fields, including
// captures, a null in a capture does not automatically make a
// null result.
return false;
}

void apply(
const SelectivityVector& rows,
std::vector<VectorPtr>& args,
const TypePtr& outputType,
exec::EvalCtx* context,
VectorPtr* result) const override {
VELOX_CHECK_EQ(args.size(), 2);

// Flatten input map.
exec::LocalDecodedVector mapDecoder(context, *args[0], rows);
auto& decodedMap = *mapDecoder.get();

auto flatMap = flattenMap(rows, args[0], decodedMap);

std::vector<VectorPtr> lambdaArgs = {
flatMap->mapKeys(), flatMap->mapValues()};
auto numKeys = flatMap->mapKeys()->size();

VectorPtr transformedKeys;

// Loop over lambda functions and apply these to keys of the map.
// In most cases there will be only one function and the loop will run once.
auto it = args[1]->asUnchecked<FunctionVector>()->iterator(&rows);
while (auto entry = it.next()) {
auto keyRows =
toElementRows<MapVector>(numKeys, *entry.rows, flatMap.get());
auto wrapCapture = toWrapCapture<MapVector>(
numKeys, entry.callable, *entry.rows, flatMap);

entry.callable->apply(
keyRows, wrapCapture, context, lambdaArgs, &transformedKeys);
}

auto localResult = std::make_shared<MapVector>(
flatMap->pool(),
outputType,
flatMap->nulls(),
flatMap->size(),
flatMap->offsets(),
flatMap->sizes(),
transformedKeys,
flatMap->mapValues());

checkDuplicateKeys(localResult, rows);

context->moveOrCopyResult(localResult, rows, result);
}

static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {
// map(K1, V), function(K1, V) -> K2 -> map(K2, V)
return {exec::FunctionSignatureBuilder()
.typeVariable("K1")
.typeVariable("K2")
.typeVariable("V")
.returnType("map(K2,V)")
.argumentType("map(K1,V)")
.argumentType("function(K1,V,K2)")
.build()};
}

private:
void checkDuplicateKeys(
const MapVectorPtr& mapVector,
const SelectivityVector& rows) const {
static const char* kDuplicateKey = "Duplicate map keys are not allowed";

MapVector::canonicalize(mapVector);

auto offsets = mapVector->rawOffsets();
auto sizes = mapVector->rawSizes();
auto mapKeys = mapVector->mapKeys();
rows.applyToSelected([&](auto row) {
auto offset = offsets[row];
auto size = sizes[row];
for (auto i = 1; i < size; i++) {
if (mapKeys->equalValueAt(mapKeys.get(), offset + i, offset + i - 1)) {
VELOX_USER_FAIL("{}", kDuplicateKey);
}
}
});
}
};
} // namespace

VELOX_DECLARE_VECTOR_FUNCTION(
udf_transform_keys,
TransformKeysFunction::signatures(),
std::make_unique<TransformKeysFunction>());

} // namespace facebook::velox::functions
99 changes: 99 additions & 0 deletions velox/functions/prestosql/TransformValues.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "velox/expression/Expr.h"
#include "velox/expression/VectorFunction.h"
#include "velox/functions/lib/LambdaFunctionUtil.h"
#include "velox/vector/FunctionVector.h"

namespace facebook::velox::functions {
namespace {

// See documentation at https://prestodb.io/docs/current/functions/map.html
class TransformValuesFunction : public exec::VectorFunction {
public:
bool isDefaultNullBehavior() const override {
// transform_values is null preserving for the map. But
// since an expr tree with a lambda depends on all named fields, including
// captures, a null in a capture does not automatically make a
// null result.
return false;
}

void apply(
const SelectivityVector& rows,
std::vector<VectorPtr>& args,
const TypePtr& outputType,
exec::EvalCtx* context,
VectorPtr* result) const override {
VELOX_CHECK_EQ(args.size(), 2);

// Flatten input map.
exec::LocalDecodedVector mapDecoder(context, *args[0], rows);
auto& decodedMap = *mapDecoder.get();

auto flatMap = flattenMap(rows, args[0], decodedMap);

std::vector<VectorPtr> lambdaArgs = {
flatMap->mapKeys(), flatMap->mapValues()};
auto numValues = flatMap->mapValues()->size();

VectorPtr transformedValues;

// Loop over lambda functions and apply these to values of the map.
// In most cases there will be only one function and the loop will run once.
auto it = args[1]->asUnchecked<FunctionVector>()->iterator(&rows);
while (auto entry = it.next()) {
auto valueRows =
toElementRows<MapVector>(numValues, *entry.rows, flatMap.get());
auto wrapCapture = toWrapCapture<MapVector>(
numValues, entry.callable, *entry.rows, flatMap);

entry.callable->apply(
valueRows, wrapCapture, context, lambdaArgs, &transformedValues);
}

auto localResult = std::make_shared<MapVector>(
flatMap->pool(),
outputType,
flatMap->nulls(),
flatMap->size(),
flatMap->offsets(),
flatMap->sizes(),
flatMap->mapKeys(),
transformedValues);
context->moveOrCopyResult(localResult, rows, result);
}

static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {
// map(K, V1), function(K, V1) -> V2 -> map(K, V2)
return {exec::FunctionSignatureBuilder()
.typeVariable("K")
.typeVariable("V1")
.typeVariable("V2")
.returnType("map(K,V2)")
.argumentType("map(K,V1)")
.argumentType("function(K,V1,V2)")
.build()};
}
};
} // namespace

VELOX_DECLARE_VECTOR_FUNCTION(
udf_transform_values,
TransformValuesFunction::signatures(),
std::make_unique<TransformValuesFunction>());

} // namespace facebook::velox::functions
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
namespace facebook::velox::functions {
void registerMapFunctions() {
VELOX_REGISTER_VECTOR_FUNCTION(udf_map_filter, "map_filter");
VELOX_REGISTER_VECTOR_FUNCTION(udf_transform_keys, "transform_keys");
VELOX_REGISTER_VECTOR_FUNCTION(udf_transform_values, "transform_values");
VELOX_REGISTER_VECTOR_FUNCTION(udf_map, "map");
VELOX_REGISTER_VECTOR_FUNCTION(udf_map_concat, "map_concat");
VELOX_REGISTER_VECTOR_FUNCTION(udf_map_entries, "map_entries");
Expand Down
2 changes: 2 additions & 0 deletions velox/functions/prestosql/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ add_executable(
SplitTest.cpp
StringFunctionsTest.cpp
TransformTest.cpp
TransformKeysTest.cpp
TransformValuesTest.cpp
URLFunctionsTest.cpp
WidthBucketArrayTest.cpp
GreatestLeastTest.cpp
Expand Down
Loading

0 comments on commit 0e7be68

Please sign in to comment.