Skip to content

Commit

Permalink
Check determinism using function name only (facebookincubator#10241)
Browse files Browse the repository at this point in the history
Summary:
In ExpressionFuzzer, the determinism could be decided without argument
types. This PR changes to check determinism using function name only by
fetching all registry entries for the given function name and checking if all
of them are deterministic. The function is deterministic if "deterministic" is
true for every entry. With this change, argument types are not required when
the signature variables are not empty.

Pull Request resolved: facebookincubator#10241

Reviewed By: pedroerp

Differential Revision: D59122132

Pulled By: bikramSingh91

fbshipit-source-id: ab3c2907f102d3ebb81269fe58499bef0bb878ab
  • Loading branch information
rui-mo authored and facebook-github-bot committed Jul 3, 2024
1 parent 4a0ce3e commit 63cceca
Show file tree
Hide file tree
Showing 6 changed files with 90 additions and 51 deletions.
7 changes: 7 additions & 0 deletions velox/expression/VectorFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,13 @@ VectorFunctionMap& vectorFunctionFactories() {
return factories;
}

std::optional<VectorFunctionMetadata> getVectorFunctionMetadata(
const std::string& name) {
return applyToVectorFunctionEntry<VectorFunctionMetadata>(
name,
[&](const auto& /*name*/, const auto& entry) { return entry.metadata; });
}

std::optional<std::vector<FunctionSignaturePtr>> getVectorFunctionSignatures(
const std::string& name) {
return applyToVectorFunctionEntry<std::vector<FunctionSignaturePtr>>(
Expand Down
5 changes: 5 additions & 0 deletions velox/expression/VectorFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,11 @@ class SimpleFunctionAdapterFactory {
virtual ~SimpleFunctionAdapterFactory() = default;
};

/// Returns the function metadata with the specified name. Returns std::nullopt
/// if there is no function with the specified name.
std::optional<VectorFunctionMetadata> getVectorFunctionMetadata(
const std::string& name);

/// Returns a list of signatures supported by VectorFunction with the specified
/// name. Returns std::nullopt if there is no function with the specified name.
std::optional<std::vector<FunctionSignaturePtr>> getVectorFunctionSignatures(
Expand Down
89 changes: 38 additions & 51 deletions velox/expression/fuzzer/ExpressionFuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -341,12 +341,9 @@ static void appendSpecialForms(
}
}

/// Returns if `functionName` with the given `argTypes` is deterministic.
/// Returns true if the function was not found or determinism cannot be
/// established.
bool isDeterministic(
const std::string& functionName,
const std::vector<TypePtr>& argTypes) {
// Returns if `functionName` is deterministic. Returns true if the function was
// not found or determinism cannot be established.
bool isDeterministic(const std::string& functionName) {
// We know that the 'cast', 'and', and 'or' special forms are deterministic.
// Hard-code them here because they are not real functions and hence cannot
// be resolved by the code below.
Expand All @@ -356,15 +353,14 @@ bool isDeterministic(
return true;
}

if (auto typeAndMetadata =
resolveFunctionWithMetadata(functionName, argTypes)) {
return typeAndMetadata->second.deterministic;
const auto determinism = velox::isDeterministic(functionName);
if (!determinism.has_value()) {
// functionName must be a special form.
LOG(WARNING) << "Unable to determine if '" << functionName
<< "' is deterministic or not. Assuming it is.";
return true;
}

// functionName must be a special form.
LOG(WARNING) << "Unable to determine if '" << functionName
<< "' is deterministic or not. Assuming it is.";
return true;
return determinism.value();
}

std::optional<CallableSignature> processConcreteSignature(
Expand Down Expand Up @@ -567,13 +563,26 @@ ExpressionFuzzer::ExpressionFuzzer(
continue;
}

// Determine a list of concrete argument types that can bind to the
// signature. For non-parameterized signatures, these argument types will
// be used to create a callable signature. For parameterized signatures,
// these argument types are only used to fetch the function instance to
// get their determinism.
std::vector<TypePtr> argTypes;
if (signature->variables().empty()) {
if (!isDeterministic(function.first)) {
LOG(WARNING) << "Skipping non-deterministic function: "
<< function.first << signature->toString();
continue;
}

if (!signature->variables().empty()) {
std::unordered_set<std::string> typeVariables;
for (const auto& [name, _] : signature->variables()) {
typeVariables.insert(name);
}
atLeastOneSupported = true;
++supportedFunctionSignatures;
signatureTemplates_.emplace_back(SignatureTemplate{
function.first, signature, std::move(typeVariables)});
} else {
// Determine a list of concrete argument types that can bind to the
// signature. For non-parameterized signatures, these argument types
// will be used to create a callable signature.
std::vector<TypePtr> argTypes;
bool supportedSignature = true;
for (const auto& arg : signature->argumentTypes()) {
auto resolvedType = SignatureBinder::tryResolveType(arg, {}, {});
Expand All @@ -589,37 +598,15 @@ ExpressionFuzzer::ExpressionFuzzer(
<< function.first << signature->toString();
continue;
}
} else {
ArgumentTypeFuzzer typeFuzzer{*signature, localRng};
typeFuzzer.fuzzReturnType();
VELOX_CHECK_EQ(
typeFuzzer.fuzzArgumentTypes(options_.maxNumVarArgs), true);
argTypes = typeFuzzer.argumentTypes();
}
if (!isDeterministic(function.first, argTypes)) {
LOG(WARNING) << "Skipping non-deterministic function: "
<< function.first << signature->toString();
continue;
}

if (!signature->variables().empty()) {
std::unordered_set<std::string> typeVariables;
for (const auto& [name, _] : signature->variables()) {
typeVariables.insert(name);
if (auto callableFunction = processConcreteSignature(
function.first,
argTypes,
*signature,
options_.enableComplexTypes)) {
atLeastOneSupported = true;
++supportedFunctionSignatures;
signatures_.emplace_back(*callableFunction);
}
atLeastOneSupported = true;
++supportedFunctionSignatures;
signatureTemplates_.emplace_back(SignatureTemplate{
function.first, signature, std::move(typeVariables)});
} else if (
auto callableFunction = processConcreteSignature(
function.first,
argTypes,
*signature,
options_.enableComplexTypes)) {
atLeastOneSupported = true;
++supportedFunctionSignatures;
signatures_.emplace_back(*callableFunction);
}
}

Expand Down
19 changes: 19 additions & 0 deletions velox/functions/FunctionRegistry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,25 @@ void clearFunctionRegistry() {
[](auto& functionMap) { functionMap.clear(); });
}

std::optional<bool> isDeterministic(const std::string& functionName) {
const auto simpleFunctions =
exec::simpleFunctions().getFunctionSignaturesAndMetadata(functionName);
const auto metadata = exec::getVectorFunctionMetadata(functionName);
if (simpleFunctions.empty() && !metadata.has_value()) {
return std::nullopt;
}

for (const auto& [metadata, _] : simpleFunctions) {
if (!metadata.deterministic) {
return false;
}
}
if (metadata.has_value() && !metadata.value().deterministic) {
return false;
}
return true;
}

TypePtr resolveFunction(
const std::string& functionName,
const std::vector<TypePtr>& argTypes) {
Expand Down
6 changes: 6 additions & 0 deletions velox/functions/FunctionRegistry.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ FunctionSignatureMap getFunctionSignatures();
/// The mapping is function name -> list of function signatures
FunctionSignatureMap getVectorFunctionSignatures();

/// Returns if a function is deterministic by fetching all registry entries for
/// the given function name and checking if all of them are deterministic.
/// Returns std::nullopt if the function is not found. Returns false if any of
/// the entries are not deterministic.
std::optional<bool> isDeterministic(const std::string& functionName);

/// Given a function name and argument types, returns
/// the return type if function exists otherwise returns nullptr
TypePtr resolveFunction(
Expand Down
15 changes: 15 additions & 0 deletions velox/functions/tests/FunctionRegistryTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "velox/functions/FunctionRegistry.h"
#include "velox/functions/Macros.h"
#include "velox/functions/Registerer.h"
#include "velox/functions/prestosql/registration/RegistrationFunctions.h"
#include "velox/functions/prestosql/tests/utils/FunctionBaseTest.h"
#include "velox/type/Type.h"

Expand Down Expand Up @@ -495,6 +496,20 @@ TEST_F(FunctionRegistryTest, functionNameInMixedCase) {
ASSERT_EQ(*result, *VARCHAR());
}

TEST_F(FunctionRegistryTest, isDeterministic) {
functions::prestosql::registerAllScalarFunctions();
ASSERT_TRUE(isDeterministic("plus").value());
ASSERT_TRUE(isDeterministic("in").value());

ASSERT_FALSE(isDeterministic("rand").value());
ASSERT_FALSE(isDeterministic("uuid").value());
ASSERT_FALSE(isDeterministic("shuffle").value());

// Not found functions.
ASSERT_FALSE(isDeterministic("cast").has_value());
ASSERT_FALSE(isDeterministic("not_found_function").has_value());
}

template <typename T>
struct TestFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);
Expand Down

0 comments on commit 63cceca

Please sign in to comment.