Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add sinks to write to GCP Bigquery #1

Merged
merged 10 commits into from
Sep 24, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/actions/spelling/allow.txt
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ armhf
backpressure
backticks
bigendian
bigquery
bindir
binfmt
bitcast
Expand Down
1 change: 1 addition & 0 deletions .github/actions/spelling/excludes.txt
Original file line number Diff line number Diff line change
Expand Up @@ -118,4 +118,5 @@
^\Qwebsite/layouts/shortcodes/config/unit-tests.html\E$
^lib/codecs/tests/data/native_encoding/
^\Qwebsite/config.toml\E$
^proto/google/
ignore$
1 change: 1 addition & 0 deletions .github/actions/spelling/expect.txt
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,7 @@ ingesters
ingestor
initdb
initech
Insertdata
installdeb
Instrumentable
interpolatedstring
Expand Down
1 change: 1 addition & 0 deletions buf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ modules:
- path: lib/vector-core/proto
- path: proto/third-party
- path: proto/vector
- path: proto/google/cloud/bigquery/storage/v1
lint:
use:
- DEFAULT
Expand Down
4 changes: 4 additions & 0 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@
println!("cargo:rerun-if-changed=proto/vector/dd_trace.proto");
println!("cargo:rerun-if-changed=proto/vector/ddsketch_full.proto");
println!("cargo:rerun-if-changed=proto/vector/vector.proto");
println!("cargo:rerun-if-changed=proto/google/cloud/bigquery/storage/v1/storage.proto");

// Create and store the "file descriptor set" from the compiled Protocol Buffers packages.
//
Expand All @@ -144,6 +145,7 @@
prost_build,
&[
"lib/vector-core/proto/event.proto",
"proto/google/cloud/bigquery/storage/v1/storage.proto",
"proto/third-party/dnstap.proto",
"proto/vector/ddsketch_full.proto",
"proto/vector/dd_metric.proto",
Expand All @@ -155,6 +157,7 @@
&[
"proto/third-party",
"proto/vector",
"proto/gogole/cloud/bigquery/storage/v1",
Fixed Show fixed Hide fixed
"lib/vector-core/proto/",
],
)
Expand Down Expand Up @@ -268,3 +271,4 @@
// Emit the aforementioned stanzas.
tracker.emit_rerun_stanzas();
}

11 changes: 11 additions & 0 deletions lib/codecs/tests/data/protobuf/integration.desc

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions lib/codecs/tests/data/protobuf/integration.proto

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion lib/vector-core/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ fn main() {
.bytes(["raw_bytes"])
.compile_protos(
&["proto/event.proto"],
&["proto", "../../proto/third-party", "../../proto/vector"],
&["proto", "../../proto/third-party", "../../proto/vector", "../../proto/google/cloud/bigquery/storage/v1"],
)
.unwrap();
}
64 changes: 64 additions & 0 deletions proto/google/cloud/bigquery/storage/v1/arrow.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.bigquery.storage.v1;

option csharp_namespace = "Google.Cloud.BigQuery.Storage.V1";
option go_package = "cloud.google.com/go/bigquery/storage/apiv1/storagepb;storagepb";
option java_multiple_files = true;
option java_outer_classname = "ArrowProto";
option java_package = "com.google.cloud.bigquery.storage.v1";
option php_namespace = "Google\\Cloud\\BigQuery\\Storage\\V1";

// Arrow schema as specified in
// https://arrow.apache.org/docs/python/api/datatypes.html
// and serialized to bytes using IPC:
// https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc
//
// See code samples on how this message can be deserialized.
message ArrowSchema {
// IPC serialized Arrow schema.
bytes serialized_schema = 1;
}

// Arrow RecordBatch.
message ArrowRecordBatch {
// IPC-serialized Arrow RecordBatch.
bytes serialized_record_batch = 1;

// [Deprecated] The count of rows in `serialized_record_batch`.
// Please use the format-independent ReadRowsResponse.row_count instead.
int64 row_count = 2 [deprecated = true];
}

// Contains options specific to Arrow Serialization.
message ArrowSerializationOptions {
// Compression codec's supported by Arrow.
enum CompressionCodec {
// If unspecified no compression will be used.
COMPRESSION_UNSPECIFIED = 0;

// LZ4 Frame (https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md)
LZ4_FRAME = 1;

// Zstandard compression.
ZSTD = 2;
}

// The compression codec to use for Arrow buffers in serialized record
// batches.
CompressionCodec buffer_compression = 2;
}
56 changes: 56 additions & 0 deletions proto/google/cloud/bigquery/storage/v1/avro.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.bigquery.storage.v1;

option csharp_namespace = "Google.Cloud.BigQuery.Storage.V1";
option go_package = "cloud.google.com/go/bigquery/storage/apiv1/storagepb;storagepb";
option java_multiple_files = true;
option java_outer_classname = "AvroProto";
option java_package = "com.google.cloud.bigquery.storage.v1";
option php_namespace = "Google\\Cloud\\BigQuery\\Storage\\V1";

// Avro schema.
message AvroSchema {
// Json serialized schema, as described at
// https://avro.apache.org/docs/1.8.1/spec.html.
string schema = 1;
}

// Avro rows.
message AvroRows {
// Binary serialized rows in a block.
bytes serialized_binary_rows = 1;

// [Deprecated] The count of rows in the returning block.
// Please use the format-independent ReadRowsResponse.row_count instead.
int64 row_count = 2 [deprecated = true];
}

// Contains options specific to Avro Serialization.
message AvroSerializationOptions {
// Enable displayName attribute in Avro schema.
//
// The Avro specification requires field names to be alphanumeric. By
// default, in cases when column names do not conform to these requirements
// (e.g. non-ascii unicode codepoints) and Avro is requested as an output
// format, the CreateReadSession call will fail.
//
// Setting this field to true, populates avro field names with a placeholder
// value and populates a "displayName" attribute for every avro field with the
// original column name.
bool enable_display_name_attribute = 1;
}
48 changes: 48 additions & 0 deletions proto/google/cloud/bigquery/storage/v1/protobuf.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.bigquery.storage.v1;

import "google/protobuf/descriptor.proto";

option csharp_namespace = "Google.Cloud.BigQuery.Storage.V1";
option go_package = "cloud.google.com/go/bigquery/storage/apiv1/storagepb;storagepb";
option java_multiple_files = true;
option java_outer_classname = "ProtoBufProto";
option java_package = "com.google.cloud.bigquery.storage.v1";
option php_namespace = "Google\\Cloud\\BigQuery\\Storage\\V1";

// ProtoSchema describes the schema of the serialized protocol buffer data rows.
message ProtoSchema {
// Descriptor for input message. The provided descriptor must be self
// contained, such that data rows sent can be fully decoded using only the
// single descriptor. For data rows that are compositions of multiple
// independent messages, this means the descriptor may need to be transformed
// to only use nested types:
// https://developers.google.com/protocol-buffers/docs/proto#nested
//
// For additional information for how proto types and values map onto BigQuery
// see: https://cloud.google.com/bigquery/docs/write-api#data_type_conversions
google.protobuf.DescriptorProto proto_descriptor = 1;
}

message ProtoRows {
// A sequence of rows serialized as a Protocol Buffer.
//
// See https://developers.google.com/protocol-buffers/docs/overview for more
// information on deserializing this field.
repeated bytes serialized_rows = 1;
}
Loading
Loading