Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add default_missing_value_interpretation field; indicate KMS_SERVICE_ERROR is retryable #347

Merged
merged 8 commits into from
Aug 15, 2023
68 changes: 53 additions & 15 deletions protos/google/cloud/bigquery/storage/v1/storage.proto
Original file line number Diff line number Diff line change
Expand Up @@ -397,19 +397,25 @@ message CreateWriteStreamRequest {

// Request message for `AppendRows`.
//
// Due to the nature of AppendRows being a bidirectional streaming RPC, certain
// parts of the AppendRowsRequest need only be specified for the first request
// sent each time the gRPC network connection is opened/reopened.
// Because AppendRows is a bidirectional streaming RPC, certain parts of the
// AppendRowsRequest need only be specified for the first request before
// switching table destinations. You can also switch table destinations within
// the same connection for the default stream.
//
// The size of a single AppendRowsRequest must be less than 10 MB in size.
// Requests larger than this return an error, typically `INVALID_ARGUMENT`.
message AppendRowsRequest {
// ProtoData contains the data rows and schema when constructing append
// requests.
message ProtoData {
// Proto schema used to serialize the data. This value only needs to be
// provided as part of the first request on a gRPC network connection,
// and will be ignored for subsequent requests on the connection.
// The protocol buffer schema used to serialize the data. Provide this value
// whenever:
//
// * You send the first request of an RPC connection.
//
// * You change the input schema.
//
// * You specify a new destination table.
ProtoSchema writer_schema = 1;

// Serialized row data in protobuf message format.
Expand All @@ -419,10 +425,9 @@ message AppendRowsRequest {
ProtoRows rows = 2;
}

// An enum to indicate how to interpret missing values. Missing values are
// fields present in user schema but missing in rows. A missing value can
// represent a NULL or a column default value defined in BigQuery table
// schema.
// An enum to indicate how to interpret missing values of fields that are
// present in user schema but missing in rows. A missing value can represent a
// NULL or a column default value defined in BigQuery table schema.
enum MissingValueInterpretation {
// Invalid missing value interpretation. Requests with this value will be
// rejected.
Expand All @@ -436,10 +441,14 @@ message AppendRowsRequest {
DEFAULT_VALUE = 2;
}

// Required. The write_stream identifies the target of the append operation,
// and only needs to be specified as part of the first request on the gRPC
// connection. If provided for subsequent requests, it must match the value of
// the first request.
// Required. The write_stream identifies the append operation. It must be
// provided in the following scenarios:
//
// * In the first request to an AppendRows connection.
//
// * In all subsequent requests to an AppendRows connection, if you use the
// same connection to write to multiple tables or change the input schema for
// default streams.
//
// For explicitly created write streams, the format is:
//
Expand All @@ -448,6 +457,22 @@ message AppendRowsRequest {
// For the special default stream, the format is:
//
// * `projects/{project}/datasets/{dataset}/tables/{table}/streams/_default`.
//
// An example of a possible sequence of requests with write_stream fields
// within a single connection:
//
// * r1: {write_stream: stream_name_1}
//
// * r2: {write_stream: /*omit*/}
//
// * r3: {write_stream: /*omit*/}
//
// * r4: {write_stream: stream_name_2}
//
// * r5: {write_stream: stream_name_2}
//
// The destination changed in request_4, so the write_stream field must be
// populated in all subsequent requests in this stream.
string write_stream = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
Expand Down Expand Up @@ -493,6 +518,18 @@ message AppendRowsRequest {
// Currently, field name can only be top-level column name, can't be a struct
// field path like 'foo.bar'.
map<string, MissingValueInterpretation> missing_value_interpretations = 7;

// Optional. Default missing value interpretation for all columns in the
// table. When a value is specified on an `AppendRowsRequest`, it is applied
// to all requests on the connection from that point forward, until a
// subsequent `AppendRowsRequest` sets it to a different value.
// `missing_value_interpretation` can override
// `default_missing_value_interpretation`. For example, if you want to write
// `NULL` instead of using default values for some columns, you can set
// `default_missing_value_interpretation` to `DEFAULT_VALUE` and at the same
// time, set `missing_value_interpretations` to `NULL_VALUE` on those columns.
MissingValueInterpretation default_missing_value_interpretation = 8
[(google.api.field_behavior) = OPTIONAL];
}

// Response message for `AppendRows`.
Expand Down Expand Up @@ -680,7 +717,8 @@ message StorageError {
// There is an encryption error while using customer-managed encryption key.
CMEK_ENCRYPTION_ERROR = 12;

// Key Management Service (KMS) service returned an error.
// Key Management Service (KMS) service returned an error, which can be
// retried.
KMS_SERVICE_ERROR = 13;

// Permission denied while using customer-managed encryption key.
Expand Down
18 changes: 13 additions & 5 deletions protos/google/cloud/bigquery/storage/v1/stream.proto
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,11 @@ message ReadSession {
}

// Optional. Specifies a table sampling percentage. Specifically, the query
// planner will use TABLESAMPLE SYSTEM (sample_percentage PERCENT). This
// samples at the file-level. It will randomly choose for each file whether
// to include that file in the sample returned. Note, that if the table only
// has one file, then TABLESAMPLE SYSTEM will select that file and return
// all returnable rows contained within.
// planner will use TABLESAMPLE SYSTEM (sample_percentage PERCENT). The
// sampling percentage is applied at the data block granularity. It will
// randomly choose for each data block whether to read the rows in that data
// block. For more details, see
// https://cloud.google.com/bigquery/docs/table-sampling)
optional double sample_percentage = 5
[(google.api.field_behavior) = OPTIONAL];
}
Expand Down Expand Up @@ -194,6 +194,14 @@ message ReadSession {
int64 estimated_total_bytes_scanned = 12
[(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. A pre-projected estimate of the total physical size of files
// (in bytes) that this session will scan when all streams are consumed. This
// estimate is independent of the selected columns and can be based on
// incomplete or stale metadata from the table. This field is only set for
// BigLake tables.
int64 estimated_total_physical_file_size = 15
[(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. An estimate on the number of rows present in this session's
// streams. This estimate is based on metadata from the table which might be
// incomplete or stale.
Expand Down
12 changes: 12 additions & 0 deletions protos/protos.d.ts

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading