Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

br: add encryption config to streaming backup #1255

Merged
merged 6 commits into from
Sep 9, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
957 changes: 639 additions & 318 deletions pkg/brpb/brpb.pb.go

Large diffs are not rendered by default.

2,053 changes: 1,883 additions & 170 deletions pkg/encryptionpb/encryptionpb.pb.go

Large diffs are not rendered by default.

425 changes: 278 additions & 147 deletions pkg/import_sstpb/import_sstpb.pb.go

Large diffs are not rendered by default.

26 changes: 17 additions & 9 deletions proto/brpb.proto
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,11 @@ enum CompressionType {
ZSTD = 3;
}

message StreamBackupTaskSecurityConfig {
// not recommended in production. configure KMS based or locally managed master key instead in TiKV StreamBackupConfig
CipherInfo plaintext_data_key = 1;
}

// BackupMpde represents the mode of this whole backup request to the cluster.
// and we need to store it in `backupmeta`.
enum BackupMode {
Expand Down Expand Up @@ -356,7 +361,7 @@ message StreamBackupTaskInfo {
uint64 start_ts = 2;
uint64 end_ts = 3;

// Misc meta datas.
// Misc meta data.
// The name of the task, also the ID of the task.
string name = 4;
// The table filter of the task.
Expand All @@ -366,7 +371,10 @@ message StreamBackupTaskInfo {
// compression type
CompressionType compression_type = 6;

// The last timestamp of the task has been updated.
// security config for backup files
StreamBackupTaskSecurityConfig security_config = 7;

// The last timestamp of the task has been updated.
// This is a simple solution for unfrequent config changing:
// When we watched a config change(via polling or etcd watching),
// We perform a incremental scan between [last_update_ts, now),
Expand Down Expand Up @@ -665,7 +673,7 @@ message DataFileGroup {
}

message DataFileInfo {
// SHA256 of the file.
// Checksum of the plaintext file, i.e., pre-compression, pre-encryption.
bytes sha256 = 1;
// Path of the file.
string path = 2;
Expand Down Expand Up @@ -721,12 +729,12 @@ message DataFileInfo {
bytes region_end_key = 21;
// The region epoch that the log file belongs to.
// In older versions, this might be empty.
// If a region get split or merged during observing,
// If a region get split or merged during observing,
// the file may contain multi epoches.
repeated metapb.RegionEpoch region_epoch = 22;

// It may support encrypting at future.
reserved "iv";
// Encryption information of this data file, not set if plaintext.
encryptionpb.FileEncryptionInfo file_encryption_info = 23;
}

message StreamBackupError {
Expand Down Expand Up @@ -812,12 +820,12 @@ message LogFileCompaction {
}

message MetaEdit {
// Path to the meta file.
// Path to the meta file.
string path = 1;
// Delete the physical files (MetaFileGroup) in the meta file.
repeated string delete_physical_files = 2;
// Delete the logical files (MetaFileInfo) in the meta file.
// Note: Even the operation have been performed in the meta,
// Note: Even the operation have been performed in the meta,
// this modification should be kept as long as the corresponding physical
// file not deleted. Or we may cannot know when to delete the physical file.
// Then the file will be leak until truncated.
Expand All @@ -826,7 +834,7 @@ message MetaEdit {
bool destruct_self = 4;
}

// An extended version of `SpansOfFile`, added more metadata for the
// An extended version of `SpansOfFile`, added more metadata for the
// execution of delayed deletion.
message DeleteSpansOfFile {
string path = 1;
Expand Down
46 changes: 46 additions & 0 deletions proto/encryptionpb.proto
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,30 @@ message MasterKeyKms {
string region = 3;
// KMS endpoint. Normally not needed.
string endpoint = 4;
// optional, used to set up azure master key backend
AzureKms azure_kms = 5;
// optional, used to set up gcp master key backend
GcpKms gcp_kms = 6;

}

message AzureKms {
string tenant_id = 1;
string client_id = 2;
string client_secret = 3;
// Key vault to encrypt/decrypt data key.
string key_vault_url = 4;
// optional hsm used to generate data key
string hsm_name = 5;
string hsm_url = 6;
string client_certificate = 7;
string client_certificate_path = 8;
string client_certificate_password = 9;

}

message GcpKms {
string credential = 1;
}

message EncryptedContent {
Expand All @@ -113,3 +137,25 @@ message EncryptedContent {
// Valid only when KMS is used.
bytes ciphertext_key = 5;
}

message FileEncryptionInfo {
oneof mode {
PlainTextDataKey plain_text_data_key = 1;
MasterKeyBased master_key_based = 2;
}
// file encryption method
encryptionpb.EncryptionMethod encryption_method = 3;
// iv to encrypt the file by data key
bytes file_iv = 4;
// file checksum after encryption, optional if using GCM
bytes checksum = 5;
}

// not recommended in production.
// user needs to pass back the same data key for restore.
message PlainTextDataKey {}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Empty message, how to pass data keys?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right, it's a placeholder as this proto is going to be serialized and stored as part of the metadata in external storage. The actual plaintext key is going to be passed back by user during restore so not get exposed if external storage is breached.


message MasterKeyBased {
// encrypted data key with metadata
repeated encryptionpb.EncryptedContent data_key_encrypted_content = 1;
}
9 changes: 8 additions & 1 deletion proto/import_sstpb.proto
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import "kvrpcpb.proto";
import "gogoproto/gogo.proto";
import "rustproto.proto";
import "brpb.proto";
import "encryptionpb.proto";

option (gogoproto.sizer_all) = true;
option (gogoproto.marshaler_all) = true;
Expand Down Expand Up @@ -394,6 +395,9 @@ message KVMeta {

// the compression type for the file.
backup.CompressionType compression_type = 13;

// encryption information of the kv file, not set if encryption is not enabled.
encryptionpb.FileEncryptionInfo file_encryption_info = 19;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The largest tag number in this message is 13. Why 19?

Copy link
Contributor Author

@Tristan1900 Tristan1900 Sep 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good catch! fixed

}


Expand Down Expand Up @@ -425,8 +429,11 @@ message ApplyRequest {
// context represents region info and it used to build raft commands.
kvrpcpb.Context context = 4;

// cipher_info is used to decrypt kv file when download file.
// plaintext data key to decrypt kv file if configured during log backup.
backup.CipherInfo cipher_info = 11;

// master keys config used to decrypt data keys in restore if configured during log backup.
repeated encryptionpb.MasterKey master_keys = 14;
}

message ApplyResponse {
Expand Down
11 changes: 8 additions & 3 deletions scripts/check.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
#!/usr/bin/env bash

check_protoc_version() {
version=$(protoc --version)
major=$(echo ${version} | sed -n -e 's/.*\([0-9]\{1,\}\)\.[0-9]\{1,\}\.[0-9]\{1,\}.*/\1/p')
minor=$(echo ${version} | sed -n -e 's/.*[0-9]\{1,\}\.\([0-9]\{1,\}\)\.[0-9]\{1,\}.*/\1/p')
version=$(protoc --version | awk '{print $NF}')
major=$(echo ${version} | cut -d '.' -f 1)
minor=$(echo ${version} | cut -d '.' -f 2)
if [ "$major" -eq 3 ] && [ "$minor" -ge 8 ]; then
return 0
fi
# protobuf bumps the major version to 21 after 3.
# https://github.com/protocolbuffers/protobuf/releases/tag/v21.7
if [ "$major" -ge 21 ]; then
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you! Now we no more need to download an old protoc for passing the check...

return 0
fi
echo "protoc version not match, version 3.8.x+ is needed, current version: ${version}"
return 1
}
Expand Down
Loading
Loading