You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I am using the given code as a util file which is being use to run a config driven job,
On the failure of the check job, the task is supposed to finish and give a notification but the task is not finishing even after going in the exception block.
from pydeequ.checks import Check,CheckLevel
from pydeequ.verification import VerificationSuite,VerificationResult
class ValidatorObject:
def checkIsNonNegative(self, spark, df, column):
"""Method to validate if a column has non negative values
:param spark: spark session, df: Dataframe, column: One column
:returns: json object
"""
assert spark, "Error while passing spark"
assert df, "Error while passing df"
assert column, "Error while passing column"
check = Check(spark, CheckLevel.Warning, "check isNonNegative")
checkResult = VerificationSuite(spark).onData(df).addCheck(check.isNonNegative(column)).run()
#checkResult_df = VerificationResult.checkResultsAsDataFrame(spark, checkResult)
checkResult_json = VerificationResult.checkResultsAsJson(spark, checkResult)
return checkResult_json
#return checkResult_df.select("constraint_status").collect()
def checkIsContainedIn(self, spark, df, column, allowed_values):
"""Method to validate if a value is exists in a column
:param spark: spark session, df: Dataframe, allowed_values: list of possible values
:returns: json object
"""
assert spark, "Error while passing spark"
assert df, "Error while passing df"
assert column, "Error while passing column"
assert allowed_values, "Error while passing allowed_values"
check = Check(spark, CheckLevel.Warning, "check isContainedIn")
checkResult = VerificationSuite(spark).onData(df).addCheck(check.isContainedIn(column, allowed_values)).run()
#checkResult_df = VerificationResult.checkResultsAsDataFrame(spark, checkResult)
checkResult_json = VerificationResult.checkResultsAsJson(spark, checkResult)
return checkResult_json
#return checkResult_df.select("constraint_status").collect()
def checkHasSize(self, spark, df, assertion, hint=None):
"""Method to validate if a value exists in a column
:param spark: spark session, df: Dataframe, assertion: condition to be passed
:returns: json object
"""
assert spark, "Error while passing spark"
assert df, "Error while passing df"
assert assertion, "Error while passing assertion"
check = Check(spark, CheckLevel.Warning, "test hasSize")
checkResult = VerificationSuite(spark).onData(df).addCheck(check.hasSize(assertion, hint)).run()
checkResult_json = VerificationResult.checkResultsAsJson(spark, checkResult)
#checkResult_df = VerificationResult.checkResultsAsDataFrame(spark, checkResult)
#return checkResult_df.select("constraint_status").collect()
return checkResult_json
def checkIsComplete(self, spark, df, column):
"""Method to validate if a the entire column is complete
:param spark: spark session, df: Dataframe, column: One column
:returns: json object
"""
assert spark, "Error while passing spark"
assert df, "Error while passing df"
assert column, "Error while passing column"
check = Check(spark, CheckLevel.Warning, "test isComplete")
checkResult = VerificationSuite(spark).onData(df).addCheck(check.isComplete(column)).run()
#checkResult_df = VerificationResult.checkResultsAsDataFrame(spark, checkResult)
checkResult_json = VerificationResult.checkResultsAsJson(spark, checkResult)
return checkResult_json
def checkIsUnique(self, spark, df, column):
"""Method to validate if a column has unique values
:param spark: spark session, df: Dataframe, column: One column
:returns: json object
"""
assert spark, "Error while passing spark"
assert df, "Error while passing df"
assert column, "Error while passing column"
check = Check(spark, CheckLevel.Warning, "test isUnique")
checkResult = VerificationSuite(spark).onData(df).addCheck(check.isUnique(column)).run()
#checkResult_df = VerificationResult.checkResultsAsDataFrame(spark, checkResult)
checkResult_json = VerificationResult.checkResultsAsJson(spark, checkResult)
return checkResult_json
this is happening only with the cases where i am using from pydeequ.checks import Check,CheckLevel from pydeequ.verification import VerificationSuite,VerificationResult
in one other case where i am using from pydeequ.analyzers import (AnalysisRunner, AnalyzerContext, Completeness, Uniqueness, UniqueValueRatio, Size, Mean, Correlation)
On the failure of the check the task is finishing so i deduced the issue is only with either of the pydeequ.checks or pydeequ.verification.
please help me with this
The text was updated successfully, but these errors were encountered:
I am using the given code as a util file which is being use to run a config driven job,
On the failure of the check job, the task is supposed to finish and give a notification but the task is not finishing even after going in the exception block.
this is happening only with the cases where i am using
from pydeequ.checks import Check,CheckLevel from pydeequ.verification import VerificationSuite,VerificationResult
in one other case where i am using
from pydeequ.analyzers import (AnalysisRunner, AnalyzerContext, Completeness, Uniqueness, UniqueValueRatio, Size, Mean, Correlation)
On the failure of the check the task is finishing so i deduced the issue is only with either of the pydeequ.checks or pydeequ.verification.
please help me with this
The text was updated successfully, but these errors were encountered: