From 805ca258d67711bcaa46a05fc957f55a0857c1c6 Mon Sep 17 00:00:00 2001 From: Shubhangi Singh Date: Sat, 21 Sep 2024 12:20:16 +0000 Subject: [PATCH] feat(storage): fetch file and bucket details from url (#27322) --- .../lib/google/cloud/storage/file.rb | 44 +++++++++++++++++++ .../test/google/cloud/storage/file_test.rb | 39 ++++++++++++++++ 2 files changed, 83 insertions(+) diff --git a/google-cloud-storage/lib/google/cloud/storage/file.rb b/google-cloud-storage/lib/google/cloud/storage/file.rb index 3e57543dcf9c..11bdc3d30455 100644 --- a/google-cloud-storage/lib/google/cloud/storage/file.rb +++ b/google-cloud-storage/lib/google/cloud/storage/file.rb @@ -2081,6 +2081,50 @@ def self.gapi_from_attrs gapi, attributes Google::Apis::StorageV1::Object.new(**attr_params) end + ## + # from_gs_url is a method to fetch bucket details and file details from a gs url + # + # @return [Hash(String => String)] + # + # @example Fetch bucket_name and file_Path from gs url: + # require "google/cloud/storage" + # gs_url= "gs://my-todo-app/avatars/heidi.jpeg" + # file=Google::Cloud::Storage::File + # file.from_gs_url(gs_url) + # => + # {"bucket_name"=>"my-todo-app", "file_path"=>"avatars/heidi.jpeg"} + # + # @example Fetch bucket_name , file_Path and other query params from gs url: + # require "google/cloud/storage" + # gs_url= "gs://my-todo-app/test_sub_folder/heidi.jpeg?params1=test1¶ms2=test2" + # file=Google::Cloud::Storage::File + # file.from_gs_url(gs_url) + # =>{ + # "bucket_name"=>"my-todo-app", + # "file_path"=>"test_sub_folder/heidi.jpeg", + # "options" => { + # "params1"=>"test1", + # "params2"=>"test2" + # } + # } + + def self.from_gs_url gs_url + prefix = "gs://".freeze + raise ArgumentError, "Invalid GCS URL" unless gs_url.start_with? prefix + # seprating params from input url + path, query = gs_url.sub(prefix, "").split("?", 2) + # parsing the url + bucket_name, file_path = path.split "/", 2 + query_params = URI.decode_www_form(query).to_h if query + url_items = { + "bucket_name" => bucket_name, + "file_path" => file_path + } + # adding url params to output hash + url_items.merge! "options" => query_params if query + url_items + end + protected ## diff --git a/google-cloud-storage/test/google/cloud/storage/file_test.rb b/google-cloud-storage/test/google/cloud/storage/file_test.rb index 06f541229ea0..4b52e9954b82 100644 --- a/google-cloud-storage/test/google/cloud/storage/file_test.rb +++ b/google-cloud-storage/test/google/cloud/storage/file_test.rb @@ -1571,6 +1571,45 @@ def file_user_project.sleep *args end end + describe "fetch details from gs_url" do + let(:bucket_name) { "my-random-bucket" } + let(:file_path) {"file.jpeg"} + let(:file) {Google::Cloud::Storage::File} + let(:param) {"param1"} + let(:param_val) {"test"} + let(:gs_url) {"gs://#{bucket_name}/#{file_path}"} + + it "it returns file_name and bucket_name from given gs url" do + url_items = file.from_gs_url gs_url + assert_equal bucket_name, url_items["bucket_name"] + assert_equal file_path, url_items["file_path"] + end + + it "it returns file_name, bucket_name and url params in options hash from given gs url with parameters" do + gs_url= "gs://#{bucket_name}/#{file_path}?#{param}=#{param_val}" + url_items = file.from_gs_url gs_url + assert_equal bucket_name, url_items["bucket_name"] + assert_equal file_path, url_items["file_path"] + expected_params_hash_in_output = {'param1' =>'test'} + assert_equal expected_params_hash_in_output, url_items["options"] + end + + it "it returns file_path with subfolder name and file name and bucket_name from given gs url" do + file_path = "avatars/#{file_path}" + gs_url = "gs://#{bucket_name}/#{file_path}" + url_items = file.from_gs_url gs_url + assert_equal bucket_name, url_items["bucket_name"] + assert_equal file_path, url_items["file_path"] + end + + it "raises error if url provided is not a valid gs url" do + invalid_gs_url = "http://my_bucket/my_file.txt" + assert_raises ArgumentError do + file.from_gs_url invalid_gs_url + end + end + end + def gzip_data data gz = StringIO.new("") z = Zlib::GzipWriter.new(gz)