googleapis · sofisl · Aug 15, 2023 · Aug 10, 2023 · Aug 10, 2023 · Aug 14, 2023
@@ -397,19 +397,25 @@ message CreateWriteStreamRequest {
 
 // Request message for `AppendRows`.
 //
-// Due to the nature of AppendRows being a bidirectional streaming RPC, certain
-// parts of the AppendRowsRequest need only be specified for the first request
-// sent each time the gRPC network connection is opened/reopened.
+// Because AppendRows is a bidirectional streaming RPC, certain parts of the
+// AppendRowsRequest need only be specified for the first request before
+// switching table destinations. You can also switch table destinations within
+// the same connection for the default stream.
 //
 // The size of a single AppendRowsRequest must be less than 10 MB in size.
 // Requests larger than this return an error, typically `INVALID_ARGUMENT`.
 message AppendRowsRequest {
   // ProtoData contains the data rows and schema when constructing append
   // requests.
   message ProtoData {
-    // Proto schema used to serialize the data.  This value only needs to be
-    // provided as part of the first request on a gRPC network connection,
-    // and will be ignored for subsequent requests on the connection.
+    // The protocol buffer schema used to serialize the data. Provide this value
+    // whenever:
+    //
+    // * You send the first request of an RPC connection.
+    //
+    // * You change the input schema.
+    //
+    // * You specify a new destination table.
     ProtoSchema writer_schema = 1;
 
     // Serialized row data in protobuf message format.
@@ -419,10 +425,9 @@ message AppendRowsRequest {
     ProtoRows rows = 2;
   }
 
-  // An enum to indicate how to interpret missing values. Missing values are
-  // fields present in user schema but missing in rows. A missing value can
-  // represent a NULL or a column default value defined in BigQuery table
-  // schema.
+  // An enum to indicate how to interpret missing values of fields that are
+  // present in user schema but missing in rows. A missing value can represent a
+  // NULL or a column default value defined in BigQuery table schema.
   enum MissingValueInterpretation {
     // Invalid missing value interpretation. Requests with this value will be
     // rejected.
@@ -436,10 +441,14 @@ message AppendRowsRequest {
     DEFAULT_VALUE = 2;
   }
 
-  // Required. The write_stream identifies the target of the append operation,
-  // and only needs to be specified as part of the first request on the gRPC
-  // connection. If provided for subsequent requests, it must match the value of
-  // the first request.
+  // Required. The write_stream identifies the append operation. It must be
+  // provided in the following scenarios:
+  //
+  // * In the first request to an AppendRows connection.
+  //
+  // * In all subsequent requests to an AppendRows connection, if you use the
+  // same connection to write to multiple tables or change the input schema for
+  // default streams.
   //
   // For explicitly created write streams, the format is:
   //
@@ -448,6 +457,22 @@ message AppendRowsRequest {
   // For the special default stream, the format is:
   //
   // * `projects/{project}/datasets/{dataset}/tables/{table}/streams/_default`.
+  //
+  // An example of a possible sequence of requests with write_stream fields
+  // within a single connection:
+  //
+  // * r1: {write_stream: stream_name_1}
+  //
+  // * r2: {write_stream: /*omit*/}
+  //
+  // * r3: {write_stream: /*omit*/}
+  //
+  // * r4: {write_stream: stream_name_2}
+  //
+  // * r5: {write_stream: stream_name_2}
+  //
+  // The destination changed in request_4, so the write_stream field must be
+  // populated in all subsequent requests in this stream.
   string write_stream = 1 [
     (google.api.field_behavior) = REQUIRED,
     (google.api.resource_reference) = {
@@ -493,6 +518,18 @@ message AppendRowsRequest {
   // Currently, field name can only be top-level column name, can't be a struct
   // field path like 'foo.bar'.
   map<string, MissingValueInterpretation> missing_value_interpretations = 7;
+
+  // Optional. Default missing value interpretation for all columns in the
+  // table. When a value is specified on an `AppendRowsRequest`, it is applied
+  // to all requests on the connection from that point forward, until a
+  // subsequent `AppendRowsRequest` sets it to a different value.
+  // `missing_value_interpretation` can override
+  // `default_missing_value_interpretation`. For example, if you want to write
+  // `NULL` instead of using default values for some columns, you can set
+  // `default_missing_value_interpretation` to `DEFAULT_VALUE` and at the same
+  // time, set `missing_value_interpretations` to `NULL_VALUE` on those columns.
+  MissingValueInterpretation default_missing_value_interpretation = 8
+      [(google.api.field_behavior) = OPTIONAL];
 }
 
 // Response message for `AppendRows`.
@@ -680,7 +717,8 @@ message StorageError {
     // There is an encryption error while using customer-managed encryption key.
     CMEK_ENCRYPTION_ERROR = 12;
 
-    // Key Management Service (KMS) service returned an error.
+    // Key Management Service (KMS) service returned an error, which can be
+    // retried.
     KMS_SERVICE_ERROR = 13;
 
     // Permission denied while using customer-managed encryption key.

@@ -131,11 +131,11 @@ message ReadSession {
     }
 
     // Optional. Specifies a table sampling percentage. Specifically, the query
-    // planner will use TABLESAMPLE SYSTEM (sample_percentage PERCENT). This
-    // samples at the file-level. It will randomly choose for each file whether
-    // to include that file in the sample returned. Note, that if the table only
-    // has one file, then TABLESAMPLE SYSTEM will select that file and return
-    // all returnable rows contained within.
+    // planner will use TABLESAMPLE SYSTEM (sample_percentage PERCENT). The
+    // sampling percentage is applied at the data block granularity. It will
+    // randomly choose for each data block whether to read the rows in that data
+    // block. For more details, see
+    // https://cloud.google.com/bigquery/docs/table-sampling)
     optional double sample_percentage = 5
         [(google.api.field_behavior) = OPTIONAL];
   }
@@ -194,6 +194,14 @@ message ReadSession {
   int64 estimated_total_bytes_scanned = 12
       [(google.api.field_behavior) = OUTPUT_ONLY];
 
+  // Output only. A pre-projected estimate of the total physical size of files
+  // (in bytes) that this session will scan when all streams are consumed. This
+  // estimate is independent of the selected columns and can be based on
+  // incomplete or stale metadata from the table.  This field is only set for
+  // BigLake tables.
+  int64 estimated_total_physical_file_size = 15
+      [(google.api.field_behavior) = OUTPUT_ONLY];
+
   // Output only. An estimate on the number of rows present in this session's
   // streams. This estimate is based on metadata from the table which might be
   // incomplete or stale.