Skip to content

Commit

Permalink
fix: Blob.from_string parse storage uri with regex (#1170)
Browse files Browse the repository at this point in the history
  • Loading branch information
cojenco committed Oct 31, 2023
1 parent d38adb6 commit 0a243fa
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 9 deletions.
13 changes: 7 additions & 6 deletions google/cloud/storage/blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,9 @@
"Blob.download_as_string() is deprecated and will be removed in future. "
"Use Blob.download_as_bytes() instead."
)

_GS_URL_REGEX_PATTERN = re.compile(
r"(?P<scheme>gs)://(?P<bucket_name>[a-z0-9_.-]+)/(?P<object_name>.+)"
)

_DEFAULT_CHUNKSIZE = 104857600 # 1024 * 1024 B * 100 = 100 MB
_MAX_MULTIPART_SIZE = 8388608 # 8 MB
Expand Down Expand Up @@ -403,12 +405,11 @@ def from_string(cls, uri, client=None):
"""
from google.cloud.storage.bucket import Bucket

scheme, netloc, path, query, frag = urlsplit(uri)
if scheme != "gs":
match = _GS_URL_REGEX_PATTERN.match(uri)
if not match:
raise ValueError("URI scheme must be gs")

bucket = Bucket(client, name=netloc)
return cls(path[1:], bucket)
bucket = Bucket(client, name=match.group("bucket_name"))
return cls(match.group("object_name"), bucket)

def generate_signed_url(
self,
Expand Down
14 changes: 11 additions & 3 deletions tests/unit/test_blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -5819,13 +5819,21 @@ def test_from_string_w_valid_uri(self):
from google.cloud.storage.blob import Blob

client = self._make_client()
uri = "gs://BUCKET_NAME/b"
blob = Blob.from_string(uri, client)
basic_uri = "gs://bucket_name/b"
blob = Blob.from_string(basic_uri, client)

self.assertIsInstance(blob, Blob)
self.assertIs(blob.client, client)
self.assertEqual(blob.name, "b")
self.assertEqual(blob.bucket.name, "BUCKET_NAME")
self.assertEqual(blob.bucket.name, "bucket_name")

nested_uri = "gs://bucket_name/path1/path2/b#name"
blob = Blob.from_string(nested_uri, client)

self.assertIsInstance(blob, Blob)
self.assertIs(blob.client, client)
self.assertEqual(blob.name, "path1/path2/b#name")
self.assertEqual(blob.bucket.name, "bucket_name")

def test_from_string_w_invalid_uri(self):
from google.cloud.storage.blob import Blob
Expand Down

0 comments on commit 0a243fa

Please sign in to comment.