Skip to content

Commit

Permalink
Move the soft delete code and test to gcsio.
Browse files Browse the repository at this point in the history
  • Loading branch information
shunping committed Jun 10, 2024
1 parent c68709c commit 6260d57
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 34 deletions.
13 changes: 13 additions & 0 deletions sdks/python/apache_beam/io/gcp/gcsio.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from typing import Optional
from typing import Union

from google.api_core.exceptions import GoogleAPICallError
from google.cloud import storage
from google.cloud.exceptions import NotFound
from google.cloud.storage.fileio import BlobReader
Expand Down Expand Up @@ -529,6 +530,18 @@ def _updated_to_seconds(updated):
time.mktime(updated.timetuple()) - time.timezone +
updated.microsecond / 1000000.0)

def is_soft_delete_enabled(self, gcs_path):
try:
bucket_name, _ = parse_gcs_path(gcs_path)
bucket = self.get_bucket(bucket_name)
if (bucket.soft_delete_policy is not None and
bucket.soft_delete_policy.retention_duration_seconds > 0):
return True
except Exception:
_LOGGER.warning(
"Unexpected error occurred when checking soft delete policy for %s"
% gcs_path)
return False

class BeamBlobReader(BlobReader):
def __init__(self, blob, chunk_size=DEFAULT_READ_BUFFER_SIZE):
Expand Down
15 changes: 15 additions & 0 deletions sdks/python/apache_beam/io/gcp/gcsio_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -600,6 +600,21 @@ def test_create_default_bucket(
self.assertEqual(
request_data_json['softDeletePolicy']['retentionDurationSeconds'], 0)

@mock.patch("apache_beam.io.gcp.gcsio.GcsIO.get_bucket")
def test_is_soft_delete_enabled(self, mock_get_bucket):
bucket = mock.MagicMock()
mock_get_bucket.return_value = bucket

# soft delete policy enabled
bucket.soft_delete_policy.retention_duration_seconds = 1024
self.assertTrue(self.gcs.is_soft_delete_enabled(
"gs://beam_with_soft_delete/tmp"))

# soft delete policy disabled
bucket.soft_delete_policy.retention_duration_seconds = 0
self.assertFalse(self.gcs.is_soft_delete_enabled(
"gs://beam_without_soft_delete/tmp"))


if __name__ == '__main__':
logging.getLogger().setLevel(logging.INFO)
Expand Down
21 changes: 5 additions & 16 deletions sdks/python/apache_beam/options/pipeline_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -920,30 +920,19 @@ def _create_default_gcs_bucket(self):
def _warn_if_soft_delete_policy_enabled(self, arg_name):
gcs_path = getattr(self, arg_name, None)
try:
from google.api_core.exceptions import GoogleAPICallError
from apache_beam.io.gcp import gcsio
try:
bucket_name, _ = gcsio.parse_gcs_path(gcs_path)
bucket = gcsio.GcsIO().get_bucket(bucket_name)
if (bucket.soft_delete_policy is not None and
bucket.soft_delete_policy.retention_duration_seconds > 0):
if gcsio.GcsIO.is_soft_delete_enabled(gcs_path):
_LOGGER.warning(
"Bucket %s specified in %s has soft-delete policy enabled."
"Bucket specified in %s has soft-delete policy enabled."
" To avoid being billed for unnecessary storage costs, turn"
" off the soft delete feature on buckets that your Dataflow"
" jobs use for temporary and staging storage. For more"
" information, see"
" https://cloud.google.com/storage/docs/use-soft-delete"
"#remove-soft-delete-policy." % (bucket_name, arg_name))
return True
except GoogleAPICallError:
_LOGGER.warning(
'Unable to check soft delete policy in the bucket of %s.' %
gcs_path)
except Exception:
"#remove-soft-delete-policy." % arg_name)
except ImportError:
_LOGGER.warning(
'Unexpected error occurred when checking soft delete policy.')
return False
'Unable to check soft delete policy due to import error.')

# If either temp or staging location has an issue, we use the valid one for
# both locations. If both are bad we return an error.
Expand Down
18 changes: 0 additions & 18 deletions sdks/python/apache_beam/options/pipeline_options_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -790,24 +790,6 @@ def test_validation_bad_stg_bad_temp_no_default(self):
errors,
errors)

def test_soft_delete_on_temp_location(self):
with mock.patch(
"apache_beam.io.gcp.gcsio.GcsIO.get_bucket") as mock_get_bucket:
options = MockGoogleCloudOptionsWithBucket(
['--project=myproject', '--temp_location=gs://beam/tmp'])
bucket = mock.MagicMock()
mock_get_bucket.return_value = bucket

# soft delete policy enabled
bucket.soft_delete_policy.retention_duration_seconds = 1024
self.assertTrue(
options._warn_if_soft_delete_policy_enabled("temp_location"))

# soft delete policy disabled
bucket.soft_delete_policy.retention_duration_seconds = 0
self.assertFalse(
options._warn_if_soft_delete_policy_enabled("temp_location"))


if __name__ == '__main__':
logging.getLogger().setLevel(logging.INFO)
Expand Down

0 comments on commit 6260d57

Please sign in to comment.