Skip to content

Commit

Permalink
Use managed identity for backups IO in Azure Blob Storage
Browse files Browse the repository at this point in the history
Also adds option to prevent ClickHouse from trying to create a non-existing container, which requires
a role assignment at the storage account level.
  • Loading branch information
danipozo committed Mar 22, 2024
1 parent 9082a01 commit 6a61251
Show file tree
Hide file tree
Showing 11 changed files with 51 additions and 28 deletions.
1 change: 1 addition & 0 deletions docs/en/operations/backup.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ The BACKUP and RESTORE statements take a list of DATABASE and TABLE names, a des
- `structure_only`: if enabled, allows to only backup or restore the CREATE statements without the data of tables
- `storage_policy`: storage policy for the tables being restored. See [Using Multiple Block Devices for Data Storage](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes). This setting is only applicable to the `RESTORE` command. The specified storage policy applies only to tables with an engine from the `MergeTree` family.
- `s3_storage_class`: the storage class used for S3 backup. For example, `STANDARD`
- `azure_attempt_to_create_container`: when using Azure Blob Storage, whether the specified container will try to be created if it doesn't exist. Default: true.
### Usage examples
Expand Down
1 change: 1 addition & 0 deletions src/Backups/BackupFactory.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ class BackupFactory : boost::noncopyable
bool deduplicate_files = true;
bool allow_s3_native_copy = true;
bool use_same_s3_credentials_for_base_backup = false;
bool azure_attempt_to_create_container = true;
ReadSettings read_settings;
WriteSettings write_settings;
};
Expand Down
5 changes: 3 additions & 2 deletions src/Backups/BackupIO_AzureBlobStorage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,12 +140,13 @@ BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage(
StorageAzureBlob::Configuration configuration_,
const ReadSettings & read_settings_,
const WriteSettings & write_settings_,
const ContextPtr & context_)
const ContextPtr & context_,
bool attempt_to_create_container)
: BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterAzureBlobStorage"))
, data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.container, false, false}
, configuration(configuration_)
{
auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false);
auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false, attempt_to_create_container);
object_storage = std::make_unique<AzureObjectStorage>("BackupWriterAzureBlobStorage",
std::move(client_ptr),
StorageAzureBlob::createSettings(context_),
Expand Down
2 changes: 1 addition & 1 deletion src/Backups/BackupIO_AzureBlobStorage.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class BackupReaderAzureBlobStorage : public BackupReaderDefault
class BackupWriterAzureBlobStorage : public BackupWriterDefault
{
public:
BackupWriterAzureBlobStorage(StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_);
BackupWriterAzureBlobStorage(StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_, bool attempt_to_create_container);
~BackupWriterAzureBlobStorage() override;

bool fileExists(const String & file_name) override;
Expand Down
1 change: 1 addition & 0 deletions src/Backups/BackupSettings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ namespace ErrorCodes
M(Bool, deduplicate_files) \
M(Bool, allow_s3_native_copy) \
M(Bool, use_same_s3_credentials_for_base_backup) \
M(Bool, azure_attempt_to_create_container) \
M(Bool, read_from_filesystem_cache) \
M(UInt64, shard_num) \
M(UInt64, replica_num) \
Expand Down
3 changes: 3 additions & 0 deletions src/Backups/BackupSettings.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ struct BackupSettings
/// Whether base backup to S3 should inherit credentials from the BACKUP query.
bool use_same_s3_credentials_for_base_backup = false;

/// Whether a new Azure container should be created if it does not exist (requires permissions at storage account level)
bool azure_attempt_to_create_container = true;

/// Allow to use the filesystem cache in passive mode - benefit from the existing cache entries,
/// but don't put more entries into the cache.
bool read_from_filesystem_cache = true;
Expand Down
1 change: 1 addition & 0 deletions src/Backups/BackupsWorker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,7 @@ void BackupsWorker::doBackup(
backup_create_params.deduplicate_files = backup_settings.deduplicate_files;
backup_create_params.allow_s3_native_copy = backup_settings.allow_s3_native_copy;
backup_create_params.use_same_s3_credentials_for_base_backup = backup_settings.use_same_s3_credentials_for_base_backup;
backup_create_params.azure_attempt_to_create_container = backup_settings.azure_attempt_to_create_container;
backup_create_params.read_settings = getReadSettingsForBackup(context, backup_settings);
backup_create_params.write_settings = getWriteSettingsForBackup(context);
backup = BackupFactory::instance().createBackup(backup_create_params);
Expand Down
5 changes: 3 additions & 2 deletions src/Backups/registerBackupEngineAzureBlobStorage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory)
if (args.size() == 3)
{
configuration.connection_url = args[0].safeGet<String>();
configuration.is_connection_string = true;
configuration.is_connection_string = !configuration.connection_url.starts_with("http");

configuration.container = args[1].safeGet<String>();
configuration.blob_path = args[2].safeGet<String>();
Expand Down Expand Up @@ -147,7 +147,8 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory)
auto writer = std::make_shared<BackupWriterAzureBlobStorage>(configuration,
params.read_settings,
params.write_settings,
params.context);
params.context,
params.azure_attempt_to_create_container);

return std::make_unique<BackupImpl>(
params.backup_info,
Expand Down
56 changes: 35 additions & 21 deletions src/Storages/StorageAzureBlob.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <DataTypes/DataTypesNumber.h>

#include <azure/storage/common/storage_credential.hpp>
#include <azure/identity/managed_identity_credential.hpp>
#include <Processors/Transforms/AddingDefaultsTransform.h>
#include <Processors/Transforms/ExtractColumnsTransform.h>
#include <Processors/Formats/IOutputFormat.h>
Expand Down Expand Up @@ -336,33 +337,37 @@ static bool containerExists(std::unique_ptr<BlobServiceClient> &blob_service_cli
return false;
}

AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration configuration, bool is_read_only)
AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration configuration, bool is_read_only, bool attempt_to_create_container)
{
AzureClientPtr result;

if (configuration.is_connection_string)
{
std::shared_ptr<Azure::Identity::ManagedIdentityCredential> managed_identity_credential = std::make_shared<Azure::Identity::ManagedIdentityCredential>();
std::unique_ptr<BlobServiceClient> blob_service_client = std::make_unique<BlobServiceClient>(BlobServiceClient::CreateFromConnectionString(configuration.connection_url));
result = std::make_unique<BlobContainerClient>(BlobContainerClient::CreateFromConnectionString(configuration.connection_url, configuration.container));
bool container_exists = containerExists(blob_service_client,configuration.container);

if (!container_exists)
if (attempt_to_create_container)
{
if (is_read_only)
throw Exception(
ErrorCodes::DATABASE_ACCESS_DENIED,
"AzureBlobStorage container does not exist '{}'",
configuration.container);

try
{
result->CreateIfNotExists();
} catch (const Azure::Storage::StorageException & e)
bool container_exists = containerExists(blob_service_client,configuration.container);
if (!container_exists)
{
if (!(e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict
&& e.ReasonPhrase == "The specified container already exists."))
if (is_read_only)
throw Exception(
ErrorCodes::DATABASE_ACCESS_DENIED,
"AzureBlobStorage container does not exist '{}'",
configuration.container);

try
{
throw;
result->CreateIfNotExists();
} catch (const Azure::Storage::StorageException & e)
{
if (!(e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict
&& e.ReasonPhrase == "The specified container already exists."))
{
throw;
}
}
}
}
Expand All @@ -377,17 +382,17 @@ AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration co
}

std::unique_ptr<BlobServiceClient> blob_service_client;
std::shared_ptr<Azure::Identity::ManagedIdentityCredential> managed_identity_credential;
if (storage_shared_key_credential)
{
blob_service_client = std::make_unique<BlobServiceClient>(configuration.connection_url, storage_shared_key_credential);
}
else
{
blob_service_client = std::make_unique<BlobServiceClient>(configuration.connection_url);
managed_identity_credential = std::make_shared<Azure::Identity::ManagedIdentityCredential>();
blob_service_client = std::make_unique<BlobServiceClient>(configuration.connection_url, managed_identity_credential);
}

bool container_exists = containerExists(blob_service_client,configuration.container);

std::string final_url;
size_t pos = configuration.connection_url.find('?');
if (pos != std::string::npos)
Expand All @@ -400,12 +405,21 @@ AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration co
final_url
= configuration.connection_url + (configuration.connection_url.back() == '/' ? "" : "/") + configuration.container;

if (!attempt_to_create_container)
{
if (storage_shared_key_credential)
return std::make_unique<BlobContainerClient>(final_url, storage_shared_key_credential);
else
return std::make_unique<BlobContainerClient>(final_url, managed_identity_credential);
}

bool container_exists = containerExists(blob_service_client,configuration.container);
if (container_exists)
{
if (storage_shared_key_credential)
result = std::make_unique<BlobContainerClient>(final_url, storage_shared_key_credential);
else
result = std::make_unique<BlobContainerClient>(final_url);
result = std::make_unique<BlobContainerClient>(final_url, managed_identity_credential);
}
else
{
Expand All @@ -425,7 +439,7 @@ AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration co
if (storage_shared_key_credential)
result = std::make_unique<BlobContainerClient>(final_url, storage_shared_key_credential);
else
result = std::make_unique<BlobContainerClient>(final_url);
result = std::make_unique<BlobContainerClient>(final_url, managed_identity_credential);
}
else
{
Expand Down
2 changes: 1 addition & 1 deletion src/Storages/StorageAzureBlob.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ class StorageAzureBlob : public IStorage
ASTPtr partition_by_);

static StorageAzureBlob::Configuration getConfiguration(ASTs & engine_args, const ContextPtr & local_context);
static AzureClientPtr createClient(StorageAzureBlob::Configuration configuration, bool is_read_only);
static AzureClientPtr createClient(StorageAzureBlob::Configuration configuration, bool is_read_only, bool attempt_to_create_container = true);

static AzureObjectStorage::SettingsPtr createSettings(const ContextPtr & local_context);

Expand Down

0 comments on commit 6a61251

Please sign in to comment.