diff --git a/dlp/conftest.py b/dlp/conftest.py new file mode 100644 index 000000000000..362e5a2c2711 --- /dev/null +++ b/dlp/conftest.py @@ -0,0 +1,20 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + + +# Used in risk_test.py to limit the maximum wait time before the flaky retries. +def pytest_configure(config): + pytest.MAX_FLAKY_WAIT = 3600 # maximum of an hour diff --git a/dlp/risk_test.py b/dlp/risk_test.py index ef2b0357dc66..0164cf3b8c0e 100644 --- a/dlp/risk_test.py +++ b/dlp/risk_test.py @@ -13,6 +13,7 @@ # limitations under the License. import os +import time import uuid import google.cloud.bigquery @@ -36,9 +37,14 @@ BIGQUERY_TABLE_ID = "dlp_test_table" + UNIQUE_STRING BIGQUERY_HARMFUL_TABLE_ID = "harmful" + UNIQUE_STRING +TIMEOUT = 30 + # Create new custom topic/subscription -@pytest.fixture(scope="module") +# We observe sometimes all the tests in this file fail. In a +# hypothesis where DLP service somehow loses the connection to the +# topic, now we use function scope for Pub/Sub fixtures. +@pytest.fixture(scope="function") def topic_id(): # Creates a pubsub topic, and tears it down. publisher = google.cloud.pubsub.PublisherClient() @@ -53,7 +59,7 @@ def topic_id(): publisher.delete_topic(topic_path) -@pytest.fixture(scope="module") +@pytest.fixture(scope="function") def subscription_id(topic_id): # Subscribes to a topic. subscriber = google.cloud.pubsub.SubscriberClient() @@ -160,7 +166,22 @@ def bigquery_project(): bigquery_client.delete_dataset(dataset_ref, delete_contents=True) -@pytest.mark.flaky +def delay(err, *args): + # 20 mins of delay. This sounds like too long a delay, but we + # occasionally observe consequtive time block where operations are + # slow which leads to the test failures. These situations tend to + # get self healed in 20 minutes or so, so I'm trying this strategy. + # + # There are 10 tests, so we don't want the retry delay happening + # for all the tests. When we exhaust the MAX_FLAKY_WAIT, we retry + # the test immediately. + wait_time = min(pytest.MAX_FLAKY_WAIT, 60*20) + pytest.MAX_FLAKY_WAIT -= wait_time + time.sleep(wait_time) + return True + + +@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay) def test_numerical_risk_analysis( topic_id, subscription_id, bigquery_project, capsys ): @@ -172,13 +193,14 @@ def test_numerical_risk_analysis( NUMERIC_FIELD, topic_id, subscription_id, + timeout=TIMEOUT, ) out, _ = capsys.readouterr() assert "Value Range:" in out -@pytest.mark.flaky +@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay) def test_categorical_risk_analysis_on_string_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -190,14 +212,14 @@ def test_categorical_risk_analysis_on_string_field( UNIQUE_FIELD, topic_id, subscription_id, - timeout=180, + timeout=TIMEOUT, ) out, _ = capsys.readouterr() assert "Most common value occurs" in out -@pytest.mark.flaky +@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay) def test_categorical_risk_analysis_on_number_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -209,13 +231,14 @@ def test_categorical_risk_analysis_on_number_field( NUMERIC_FIELD, topic_id, subscription_id, + timeout=TIMEOUT, ) out, _ = capsys.readouterr() assert "Most common value occurs" in out -@pytest.mark.flaky +@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay) def test_k_anonymity_analysis_single_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -227,6 +250,7 @@ def test_k_anonymity_analysis_single_field( topic_id, subscription_id, [NUMERIC_FIELD], + timeout=TIMEOUT, ) out, _ = capsys.readouterr() @@ -234,7 +258,7 @@ def test_k_anonymity_analysis_single_field( assert "Class size:" in out -@pytest.mark.flaky(max_runs=3, min_passes=1) +@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay) def test_k_anonymity_analysis_multiple_fields( topic_id, subscription_id, bigquery_project, capsys ): @@ -246,6 +270,7 @@ def test_k_anonymity_analysis_multiple_fields( topic_id, subscription_id, [NUMERIC_FIELD, REPEATED_FIELD], + timeout=TIMEOUT, ) out, _ = capsys.readouterr() @@ -253,7 +278,7 @@ def test_k_anonymity_analysis_multiple_fields( assert "Class size:" in out -@pytest.mark.flaky +@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay) def test_l_diversity_analysis_single_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -266,6 +291,7 @@ def test_l_diversity_analysis_single_field( subscription_id, UNIQUE_FIELD, [NUMERIC_FIELD], + timeout=TIMEOUT, ) out, _ = capsys.readouterr() @@ -274,7 +300,7 @@ def test_l_diversity_analysis_single_field( assert "Sensitive value" in out -@pytest.mark.flaky(max_runs=3, min_passes=1) +@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay) def test_l_diversity_analysis_multiple_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -287,6 +313,7 @@ def test_l_diversity_analysis_multiple_field( subscription_id, UNIQUE_FIELD, [NUMERIC_FIELD, REPEATED_FIELD], + timeout=TIMEOUT, ) out, _ = capsys.readouterr() @@ -295,7 +322,7 @@ def test_l_diversity_analysis_multiple_field( assert "Sensitive value" in out -@pytest.mark.flaky +@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay) def test_k_map_estimate_analysis_single_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -308,6 +335,7 @@ def test_k_map_estimate_analysis_single_field( subscription_id, [NUMERIC_FIELD], ["AGE"], + timeout=TIMEOUT, ) out, _ = capsys.readouterr() @@ -316,7 +344,7 @@ def test_k_map_estimate_analysis_single_field( assert "Values" in out -@pytest.mark.flaky(max_runs=3, min_passes=1) +@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay) def test_k_map_estimate_analysis_multiple_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -329,6 +357,7 @@ def test_k_map_estimate_analysis_multiple_field( subscription_id, [NUMERIC_FIELD, STRING_BOOLEAN_FIELD], ["AGE", "GENDER"], + timeout=TIMEOUT, ) out, _ = capsys.readouterr() @@ -337,7 +366,7 @@ def test_k_map_estimate_analysis_multiple_field( assert "Values" in out -@pytest.mark.flaky +@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay) def test_k_map_estimate_analysis_quasi_ids_info_types_equal( topic_id, subscription_id, bigquery_project ): @@ -351,4 +380,5 @@ def test_k_map_estimate_analysis_quasi_ids_info_types_equal( subscription_id, [NUMERIC_FIELD, STRING_BOOLEAN_FIELD], ["AGE"], + timeout=TIMEOUT, )