diff --git a/.github/workflows/go_tests.yml b/.github/workflows/go_tests.yml index 6a5ab4450b1ea..4521c274e588c 100644 --- a/.github/workflows/go_tests.yml +++ b/.github/workflows/go_tests.yml @@ -59,19 +59,9 @@ jobs: - name: Run vet run: | cd sdks/go/pkg/beam - VOUT=$(go vet --copylocks=false --unsafeptr=false ./...) - if [ -n "$VOUT" ]; then - echo -e "Run go vet and fix warnings before checking in changes\n" - echo -e "Vet Warnings:\n" - echo -e "$VOUT" && exit 1 - fi + go vet --copylocks=false --unsafeptr=false ./... - name: Run Staticcheck run: | go install "honnef.co/go/tools/cmd/staticcheck@2022.1" cd sdks/go/pkg/beam - RESULTS=$($(go env GOPATH)/bin/staticcheck ./...) - if [ -n "$RESULTS" ]; then - echo -e "Please address Staticcheck warnings before checking in changes\n" - echo -e "Staticcheck Warnings:\n" - echo -e "$RESULTS" && exit 1 - fi + $(go env GOPATH)/bin/staticcheck ./... diff --git a/.github/workflows/playground_examples_ci_reusable.yml b/.github/workflows/playground_examples_ci_reusable.yml index 1b1e4efd83844..7c5f039c34888 100644 --- a/.github/workflows/playground_examples_ci_reusable.yml +++ b/.github/workflows/playground_examples_ci_reusable.yml @@ -91,6 +91,7 @@ jobs: working-directory: playground/infrastructure env: BEAM_ROOT_DIR: "../.." + BEAM_EXAMPLE_CATEGORIES: "../categories.yaml" ci_cd: name: ${{ inputs.step }} ${{ inputs.sdk }} ${{ inputs.origin }} diff --git a/.github/workflows/typescript_tests.yml b/.github/workflows/typescript_tests.yml index 7a20016ff3f6d..01efc06c14dfc 100644 --- a/.github/workflows/typescript_tests.yml +++ b/.github/workflows/typescript_tests.yml @@ -69,6 +69,7 @@ jobs: typescript_xlang_tests: name: 'TypeScript xlang Tests' runs-on: [self-hosted, ubuntu-20.04] + timeout-minutes: 10 strategy: fail-fast: false steps: diff --git a/.gitignore b/.gitignore index 73c9e05b4eec1..5c1068399458e 100644 --- a/.gitignore +++ b/.gitignore @@ -127,6 +127,8 @@ website/www/yarn-error.log **/.packages **/generated_plugin_registrant.dart playground/frontend/playground_components/pubspec.lock +playground/frontend/playground_components/test/tools/extract_symbols_java/dependencies +playground/frontend/playground_components_dev/pubspec.lock # Ignore Beam Playground Terraform **/.terraform @@ -136,4 +138,4 @@ playground/frontend/playground_components/pubspec.lock **/*.tfvars # Ignore Katas auto-generated files -**/*-remote-info.yaml \ No newline at end of file +**/*-remote-info.yaml diff --git a/.test-infra/jenkins/README.md b/.test-infra/jenkins/README.md index e53dae86c4589..53635d40d290e 100644 --- a/.test-infra/jenkins/README.md +++ b/.test-infra/jenkins/README.md @@ -27,12 +27,9 @@ Beam Jenkins overview page: [link](https://ci-beam.apache.org/) | Name | Link | PR Trigger Phrase | Cron Status | |------|------|-------------------|-------------| -| beam_PreCommit_BeamSQL_ZetaSQL | [commit](https://ci-beam.apache.org/job/beam_PreCommit_JavaBeamZetaSQL_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_JavaBeamZetaSQL_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_JavaBeamZetaSQL_Phrase/) | `Run BeamSQL_ZetaSQL PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_JavaBeamZetaSQL_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_JavaBeamZetaSQL_Cron) | | beam_PreCommit_CommunityMetrics | [commit](https://ci-beam.apache.org/job/beam_PreCommit_CommunityMetrics_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_CommunityMetrics_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_CommunityMetrics_Phrase/) | `Run CommunityMetrics PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_CommunityMetrics_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_CommunityMetrics_Cron) | | beam_PreCommit_Go | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Go_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Go_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Go_Phrase/) | `Run Go PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Go_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Go_Cron) | | beam_PreCommit_Java | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_Phrase/) | `Run Java PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_Cron) | -| beam_PreCommit_JavaPortabilityApi | [commit](https://ci-beam.apache.org/job/beam_PreCommit_JavaPortabilityApi_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_JavaPortabilityApi_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_JavaPortabilityApi_Phrase/) | `Run JavaPortabilityApi PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_JavaPortabilityApi_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_JavaPortabilityApi_Cron) | -| beam_PreCommit_JavaPortabilityApiJava11 | [commit](https://ci-beam.apache.org/job/beam_PreCommit_JavaPortabilityApiJava11_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_JavaPortabilityApiJava11_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_JavaPortabilityApiJava11_Phrase/) | `Run JavaPortabilityApiJava11 PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_JavaPortabilityApiJava11_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_JavaPortabilityApiJava11_Cron/) | | beam_PreCommit_Java_Debezium_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_Debezium_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_Debezium_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_Debezium_IO_Direct_Phrase/) | `Run Java_Debezium_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_Debezium_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_Debezium_IO_Direct_Cron/) | | beam_PreCommit_Java_Examples_Dataflow | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_Examples_Dataflow_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_Examples_Dataflow_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_Examples_Dataflow_Phrase/) | `Run Java_Examples_Dataflow PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_Examples_Dataflow_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_Examples_Dataflow_Cron) | | beam_PreCommit_Java_Examples_Dataflow_Java11 | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_Examples_Dataflow_Java11_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_Examples_Dataflow_Java11_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_Examples_Dataflow_Java11_Phrase/) | `Run Java_Examples_Dataflow_Java11 PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_Examples_Dataflow_Java11_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_Examples_Dataflow_Java11_Cron/) | @@ -41,6 +38,7 @@ Beam Jenkins overview page: [link](https://ci-beam.apache.org/) | beam_PreCommit_Java_Kafka_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_Kafka_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_Kafka_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_Kafka_IO_Direct_Phrase/) | `Run Java_Kafka_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_Kafka_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_Kafka_IO_Direct_Cron/) | | beam_PreCommit_Java_Kinesis_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_Kinesis_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_Kinesis_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_Kinesis_IO_Direct_Phrase/) | `Run Java_Kinesis_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_Kinesis_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_Kinesis_IO_Direct_Cron/) | | beam_PreCommit_Java_Neo4j_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_Neo4j_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_Neo4j_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_Neo4j_IO_Direct_Phrase/) | `Run Java_Neo4j_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_Neo4j_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_Neo4j_IO_Direct_Cron/) | +| beam_PreCommit_Java_SingleStore_IO_Direct | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Java_SingleStore_IO_Direct_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Java_SingleStore_IO_Direct_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Java_SingleStore_IO_Direct_Phrase/) | `Run Java_SingleStore_IO_Direct PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Java_SingleStore_IO_Direct_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Java_SingleStore_IO_Direct_Cron/) | | beam_PreCommit_Portable_Python | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Portable_Python_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Portable_Python_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Portable_Python_Phrase/) | `Run Portable_Python PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Portable_Python_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Portable_Python_Cron) | | beam_PreCommit_PythonLint | [commit](https://ci-beam.apache.org/job/beam_PreCommit_PythonLint_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_PythonLint_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_PythonLint_Phrase/) | `Run PythonLint PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_PythonLint_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_PythonLint_Cron) | | beam_PreCommit_Python | [commit](https://ci-beam.apache.org/job/beam_PreCommit_Python_Commit/), [cron](https://ci-beam.apache.org/job/beam_PreCommit_Python_Cron/), [phrase](https://ci-beam.apache.org/job/beam_PreCommit_Python_Phrase/) | `Run Python PreCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PreCommit_Python_Cron/badge/icon)](https://ci-beam.apache.org/job/beam_PreCommit_Python_Cron) | @@ -76,6 +74,7 @@ Beam Jenkins overview page: [link](https://ci-beam.apache.org/) | beam_PostCommit_Java | [cron](https://ci-beam.apache.org/job/beam_PostCommit_Java/), [phrase](https://ci-beam.apache.org/job/beam_PostCommit_Java_PR/) | `Run Java PostCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java) | | beam_PostCommit_Java_DataflowV1 | [cron](https://ci-beam.apache.org/view/PostCommit/job/beam_PostCommit_Java_DataflowV1/), [phrase](https://ci-beam.apache.org/view/PostCommit/job/beam_PostCommit_Java_DataflowV1_PR/) | `Run PostCommit_Java_Dataflow` | [![Build Status](https://ci-beam.apache.org/view/PostCommit/job/beam_PostCommit_Java_DataflowV1/badge/icon)](https://ci-beam.apache.org/view/PostCommit/job/beam_PostCommit_Java_DataflowV1/) | | beam_PostCommit_Java_DataflowV2 | [cron](https://ci-beam.apache.org/view/PostCommit/job/beam_PostCommit_Java_DataflowV2/), [phrase](https://ci-beam.apache.org/view/PostCommit/job/beam_PostCommit_Java_DataflowV2_PR/) | `Run PostCommit_Java_DataflowV2` | [![Build Status](https://ci-beam.apache.org/view/PostCommit/job/beam_PostCommit_Java_DataflowV2/badge/icon)](https://ci-beam.apache.org/view/PostCommit/job/beam_PostCommit_Java_DataflowV2/) | +| beam_PostCommit_Java_InfluxDbIO_IT | [cron](https://ci-beam.apache.org/view/PerformanceTests/job/beam_PostCommit_Java_InfluxDbIO_IT/), [phrase](https://ci-beam.apache.org/view/PostCommit/job/beam_PostCommit_Java_SingleStoreIO_IT_PR/) | `Run Java InfluxDbIO_IT` | [![Build Status](https://ci-beam.apache.org/view/PerformanceTests/job/beam_PostCommit_Java_InfluxDbIO_IT/badge/icon)](https://ci-beam.apache.org/view/PerformanceTests/job/beam_PostCommit_Java_InfluxDbIO_IT/) | | beam_PostCommit_Java_Nexmark_Dataflow | [cron](https://ci-beam.apache.org/job/beam_PostCommit_Java_Nexmark_Dataflow/), [phrase](https://ci-beam.apache.org/job/beam_PostCommit_Java_Nexmark_Dataflow_PR/) | `Dataflow Runner Nexmark Tests` | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_Nexmark_Dataflow/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_Nexmark_Dataflow) | | beam_PostCommit_Java_Nexmark_Dataflow_V2 | [cron](https://ci-beam.apache.org/view/PostCommit/job/beam_PostCommit_Java_Nexmark_Dataflow_V2/), [phrase](https://ci-beam.apache.org/view/PostCommit/job/beam_PostCommit_Java_Nexmark_DataflowV2_PR/) | `Run Dataflow Runner V2 Nexmark Tests` | [![Build Status](https://ci-beam.apache.org/view/PostCommit/job/beam_PostCommit_Java_Nexmark_Dataflow_V2/badge/icon)](https://ci-beam.apache.org/view/PostCommit/job/beam_PostCommit_Java_Nexmark_Dataflow_V2/) | | beam_PostCommit_Java_Nexmark_Dataflow_V2_Java11 | [cron](https://ci-beam.apache.org/view/PostCommit/job/beam_PostCommit_Java_Nexmark_Dataflow_V2_Java11/), [phrase](https://ci-beam.apache.org/view/PostCommit/job/beam_PostCommit_Java_Nexmark_Dataflow_V2_Java11/badge/icon) | `Dataflow Runner V2 Java 11 Nexmark Tests` | [![Build Status](https://ci-beam.apache.org/view/PostCommit/job/beam_PostCommit_Java_Nexmark_Dataflow_V2_Java11/badge/icon)](https://ci-beam.apache.org/view/PostCommit/job/beam_PostCommit_Java_Nexmark_Dataflow_V2_Java11/) | @@ -88,6 +87,7 @@ Beam Jenkins overview page: [link](https://ci-beam.apache.org/) | beam_PostCommit_Java_PVR_Spark_Batch | [cron](https://ci-beam.apache.org/job/beam_PostCommit_Java_PVR_Spark_Batch/), [phrase](https://ci-beam.apache.org/job/beam_PostCommit_Java_PVR_Spark_Batch_PR/) | `Run Java Spark PortableValidatesRunner Batch` | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_PVR_Spark_Batch/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_PVR_Spark_Batch) | | beam_PostCommit_Java_PVR_Spark2_Streaming | [cron](https://ci-beam.apache.org/view/PostCommit/job/beam_PostCommit_Java_PVR_Spark2_Streaming/), [phrase](https://ci-beam.apache.org/view/PostCommit/job/beam_PostCommit_Java_PVR_Spark2_Streaming_PR/) | `Run Java Spark v2 PortableValidatesRunner Streaming` | [![Build Status](https://ci-beam.apache.org/view/PostCommit/job/beam_PostCommit_Java_PVR_Spark2_Streaming/badge/icon)](https://ci-beam.apache.org/view/PostCommit/job/beam_PostCommit_Java_PVR_Spark2_Streaming/) | | beam_PostCommit_Java_PVR_Spark3_Streaming | [cron](https://ci-beam.apache.org/view/PostCommit/job/beam_PostCommit_Java_PVR_Spark3_Streaming/), [phrase](https://ci-beam.apache.org/view/PostCommit/job/beam_PostCommit_Java_PVR_Spark3_Streaming_PR/) | `Run Java Spark v3 PortableValidatesRunner Streaming` | [![Build Status](https://ci-beam.apache.org/view/PostCommit/job/beam_PostCommit_Java_PVR_Spark3_Streaming/badge/icon)](https://ci-beam.apache.org/view/PostCommit/job/beam_PostCommit_Java_PVR_Spark3_Streaming/) | +| beam_PostCommit_Java_SingleStoreIO_IT | [cron](https://ci-beam.apache.org/view/PerformanceTests/job/beam_PostCommit_Java_SingleStoreIO_IT/), [phrase](https://ci-beam.apache.org/view/PostCommit/job/beam_PostCommit_Java_SingleStoreIO_IT_PR/) | `Run Java SingleStoreIO_IT` | [![Build Status](https://ci-beam.apache.org/view/PerformanceTests/job/beam_PostCommit_Java_SingleStoreIO_IT/badge/icon)](https://ci-beam.apache.org/view/PerformanceTests/job/beam_PostCommit_Java_SingleStoreIO_IT/) | | beam_PostCommit_Java_Dataflow_Examples_Java11 | [cron](https://ci-beam.apache.org/job/beam_PostCommit_Java_Examples_Dataflow_Java11/), [phrase](https://ci-beam.apache.org/job/beam_PostCommit_Java_Examples_Dataflow_Java11_PR/) | `Run Java examples on Dataflow Java 11` | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_Examples_Dataflow_Java11/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_Examples_Dataflow_Java11) | | beam_PostCommit_Java_Examples_Dataflow_V2 | [cron](https://ci-beam.apache.org/job/beam_PostCommit_Java_Examples_Dataflow_V2/), [phrase](https://ci-beam.apache.org/job/beam_PostCommit_Java_Examples_Dataflow_V2/) | `Run Java Examples on Dataflow Runner V2` | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_Examples_Dataflow_V2/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_Examples_Dataflow_V2) | | beam_PostCommit_Java_Examples_Dataflow_V2_Java11 | [cron](https://ci-beam.apache.org/job/beam_PostCommit_Java_Examples__Dataflow_V2_Java11/), [phrase](https://ci-beam.apache.org/job/beam_PostCommit_Java_Examples__Dataflow_V2_Java11/) | `Run Java 11 Examples on Dataflow Runner V2` | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Java_Examples_Dataflow_V2_Java11/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Java_Examples_Dataflow_V2_Java11) | @@ -126,9 +126,12 @@ Beam Jenkins overview page: [link](https://ci-beam.apache.org/) | beam_PostCommit_Python_VR_Spark | [cron](https://ci-beam.apache.org/job/beam_PostCommit_Python_VR_Spark/), [phrase](https://ci-beam.apache.org/job/beam_PostCommit_Python_VR_Spark/) | `Run Python Spark ValidatesRunner` | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Python_VR_Spark/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Python_VR_Spark) | | beam_PostCommit_Python37 | [cron](https://ci-beam.apache.org/job/beam_PostCommit_Python37), [phrase](https://ci-beam.apache.org/job/beam_PostCommit_Python37_PR/) | `Run Python 3.7 PostCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Python37/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Python37) | | beam_PostCommit_Python38 | [cron](https://ci-beam.apache.org/job/beam_PostCommit_Python38), [phrase](https://ci-beam.apache.org/job/beam_PostCommit_Python38_PR/) | `Run Python 3.8 PostCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Python38/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Python38) | -| beam_PostCommit_Sickbay_Python36 | [cron](https://ci-beam.apache.org/job/beam_PostCommit_Sickbay_Python36), [phrase](https://ci-beam.apache.org/job/beam_PostCommit_SickBay_Python36_PR/) | `Run Python 3.6 PostCommit Sickbay tests` | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Sickbay_Python36/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Sickbay_Python36) | +| beam_PostCommit_Python39 | [cron](https://ci-beam.apache.org/job/beam_PostCommit_Python39), [phrase](https://ci-beam.apache.org/job/beam_PostCommit_Python39_PR/) | `Run Python 3.9 PostCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Python39/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Python39) | +| beam_PostCommit_Python310 | [cron](https://ci-beam.apache.org/job/beam_PostCommit_Python310), [phrase](https://ci-beam.apache.org/job/beam_PostCommit_Python310_PR/) | `Run Python 3.10 PostCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Python310/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Python310) | | beam_PostCommit_Sickbay_Python37 | [cron](https://ci-beam.apache.org/job/beam_PostCommit_Sickbay_Python37), [phrase](https://ci-beam.apache.org/job/beam_PostCommit_SickBay_Python37_PR/) | `Run Python 3.7 PostCommit Sickbay tests` | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Sickbay_Python37/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Sickbay_Python37) | | beam_PostCommit_Sickbay_Python38 | [cron](https://ci-beam.apache.org/job/beam_PostCommit_Sickbay_Python38), [phrase](https://ci-beam.apache.org/job/beam_PostCommit_SickBay_Python38_PR/) | `Run Python 3.8 PostCommit Sickbay tests` | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Sickbay_Python38/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Sickbay_Python38) | +| beam_PostCommit_Sickbay_Python39 | [cron](https://ci-beam.apache.org/job/beam_PostCommit_Sickbay_Python39), [phrase](https://ci-beam.apache.org/job/beam_PostCommit_SickBay_Python39_PR/) | `Run Python 3.9 PostCommit Sickbay tests` | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Sickbay_Python39/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Sickbay_Python39) | +| beam_PostCommit_Sickbay_Python310 | [cron](https://ci-beam.apache.org/job/beam_PostCommit_Sickbay_Python310), [phrase](https://ci-beam.apache.org/job/beam_PostCommit_SickBay_Python310_PR/) | `Run Python 3.10 PostCommit Sickbay tests` | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Sickbay_Python310/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Sickbay_Python310) | | beam_PostCommit_SQL | [cron](https://ci-beam.apache.org/job/beam_PostCommit_SQL/), [phrase](https://ci-beam.apache.org/job/beam_PostCommit_SQL_PR/) | `Run SQL PostCommit` | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_SQL/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_SQL) | | beam_PostCommit_Website_Publish | [cron](https://ci-beam.apache.org/job/beam_PostCommit_Website_Publish/) | N/A | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Website_Publish/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Website_Publish) | | beam_PostCommit_Website_Test | [cron](https://ci-beam.apache.org/job/beam_PostCommit_Website_Test/) | `Run Full Website Test` | [![Build Status](https://ci-beam.apache.org/job/beam_PostCommit_Website_Test/badge/icon)](https://ci-beam.apache.org/job/beam_PostCommit_Website_Test) | @@ -146,21 +149,21 @@ Beam Jenkins overview page: [link](https://ci-beam.apache.org/) | beam_PerformanceTests_Cdap | [cron](https://ci-beam.apache.org/job/beam_PerformanceTests_Cdap/) | `Run Java CdapIO Performance Test` | [![Build Status](https://ci-beam.apache.org/job/beam_PerformanceTests_Cdap/badge/icon)](https://ci-beam.apache.org/job/beam_PerformanceTests_Cdap) | | beam_PerformanceTests_Compressed_TextIOIT | [cron](https://ci-beam.apache.org/job/beam_PerformanceTests_Compressed_TextIOIT/), [hdfs_cron](https://ci-beam.apache.org/job/beam_PerformanceTests_Compressed_TextIOIT_HDFS/) | `Run Java CompressedTextIO Performance Test` | [![Build Status](https://ci-beam.apache.org/job/beam_PerformanceTests_Compressed_TextIOIT/badge/icon)](https://ci-beam.apache.org/job/beam_PerformanceTests_Compressed_TextIOIT) [![Build Status](https://ci-beam.apache.org/job/beam_PerformanceTests_Compressed_TextIOIT_HDFS/badge/icon)](https://ci-beam.apache.org/job/beam_PerformanceTests_Compressed_TextIOIT_HDFS) | | beam_PerformanceTests_HadoopFormat | [cron](https://ci-beam.apache.org/job/beam_PerformanceTests_HadoopFormat/) | `Run Java HadoopFormatIO Performance Test` | [![Build Status](https://ci-beam.apache.org/job/beam_PerformanceTests_HadoopFormat/badge/icon)](https://ci-beam.apache.org/job/beam_PerformanceTests_HadoopFormat) | -| beam_PerformanceTests_InfluxDbIO_IT | [cron](https://ci-beam.apache.org/view/PerformanceTests/job/beam_PerformanceTests_InfluxDbIO_IT/) | `Run Java InfluxDbIO Performance Test` | [![Build Status](https://ci-beam.apache.org/view/PerformanceTests/job/beam_PerformanceTests_InfluxDbIO_IT/badge/icon)](https://ci-beam.apache.org/view/PerformanceTests/job/beam_PerformanceTests_InfluxDbIO_IT/) | | beam_PerformanceTests_JDBC | [cron](https://ci-beam.apache.org/job/beam_PerformanceTests_JDBC/) | `Run Java JdbcIO Performance Test` | [![Build Status](https://ci-beam.apache.org/job/beam_PerformanceTests_JDBC/badge/icon)](https://ci-beam.apache.org/job/beam_PerformanceTests_JDBC) | | beam_PerformanceTests_KafkaIOIT | [cron](https://ci-beam.apache.org/job/beam_PerformanceTests_Kafka_IO/) | `Run Java KafkaIO Performance Test` | [![Build Status](https://ci-beam.apache.org/job/beam_PerformanceTests_Kafka_IO/badge/icon)](https://ci-beam.apache.org/job/beam_PerformanceTests_Kafka_IO) [![Build Status](https://ci-beam.apache.org/job/beam_PerformanceTests_Kafka_IO/badge/icon)](https://ci-beam.apache.org/job/beam_PerformanceTests_Kafka_IO) | | beam_PerformanceTests_ManyFiles_TextIOIT | [cron](https://ci-beam.apache.org/job/beam_PerformanceTests_ManyFiles_TextIOIT/), [hdfs_cron](https://ci-beam.apache.org/job/beam_PerformanceTests_ManyFiles_TextIOIT_HDFS/) | `Run Java ManyFilesTextIO Performance Test` | [![Build Status](https://ci-beam.apache.org/job/beam_PerformanceTests_ManyFiles_TextIOIT/badge/icon)](https://ci-beam.apache.org/job/beam_PerformanceTests_ManyFiles_TextIOIT) [![Build Status](https://ci-beam.apache.org/job/beam_PerformanceTests_ManyFiles_TextIOIT_HDFS/badge/icon)](https://ci-beam.apache.org/job/beam_PerformanceTests_ManyFiles_TextIOIT_HDFS) | | beam_PerformanceTests_MongoDBIOIT | [cron](https://ci-beam.apache.org/job/beam_PerformanceTests_MongoDBIO_IT/) | `Run Java MongoDBIO Performance Test` | [![Build Status](https://ci-beam.apache.org/job/beam_PerformanceTests_MongoDBIO_IT/badge/icon)](https://ci-beam.apache.org/job/beam_PerformanceTests_MongoDBIO_IT) | | beam_PerformanceTests_ParquetIOIT | [cron](https://ci-beam.apache.org/job/beam_PerformanceTests_ParquetIOIT/), [hdfs_cron](https://ci-beam.apache.org/job/beam_PerformanceTests_ParquetIOIT_HDFS/) | `Run Java ParquetIO Performance Test` | [![Build Status](https://ci-beam.apache.org/job/beam_PerformanceTests_ParquetIOIT/badge/icon)](https://ci-beam.apache.org/job/beam_PerformanceTests_ParquetIOIT) [![Build Status](https://ci-beam.apache.org/job/beam_PerformanceTests_ParquetIOIT_HDFS/badge/icon)](https://ci-beam.apache.org/job/beam_PerformanceTests_ParquetIOIT_HDFS) | | beam_PerformanceTests_PubsubIOIT_Python_Streaming | [cron](https://ci-beam.apache.org/job/beam_PerformanceTests_PubsubIOIT_Python_Streaming/), [phrase](https://ci-beam.apache.org/view/PerformanceTests/job/beam_PerformanceTests_PubsubIOIT_Python_Streaming_PR/) | `Run PubsubIO Performance Test Python` | [![Build Status](https://ci-beam.apache.org/job/beam_PerformanceTests_PubsubIOIT_Python_Streaming/badge/icon)](https://ci-beam.apache.org/job/beam_PerformanceTests_PubsubIOIT_Python_Streaming) | +| beam_PerformanceTests_SingleStoreIO | [cron](https://ci-beam.apache.org/job/beam_PerformanceTests_SingleStoreIO/) | `Run Java SingleStoreIO Performance Test` | [![Build Status](https://ci-beam.apache.org/job/beam_PerformanceTests_SingleStoreIO/badge/icon)](https://ci-beam.apache.org/job/beam_PerformanceTests_SingleStoreIO) | | beam_PerformanceTests_SpannerIO_Read_2GB_Python | [cron](https://ci-beam.apache.org/view/PerformanceTests/job/beam_PerformanceTests_SpannerIO_Read_2GB_Python/), [phrase](https://ci-beam.apache.org/view/PerformanceTests/job/beam_PerformanceTests_SpannerIO_Read_2GB_Python_PR/) | `Run SpannerIO Read 2GB Performance Test Python Batch` | [![Build Status](https://ci-beam.apache.org/view/PerformanceTests/job/beam_PerformanceTests_SpannerIO_Read_2GB_Python/badge/icon)](https://ci-beam.apache.org/view/PerformanceTests/job/beam_PerformanceTests_SpannerIO_Read_2GB_Python/) | | beam_PerformanceTests_SpannerIO_Write_2GB_Python_Batch | [cron](https://ci-beam.apache.org/job/beam_PerformanceTests_SpannerIO_Write_2GB_Python_Batch/), [phrase](https://ci-beam.apache.org/view/PerformanceTests/job/beam_PerformanceTests_SpannerIO_Write_2GB_Python_Batch_PR/) | `Run SpannerIO Write 2GB Performance Test Python Batch` | [![Build Status](https://ci-beam.apache.org/job/beam_PerformanceTests_SpannerIO_Write_2GB_Python_Batch/badge/icon)](https://ci-beam.apache.org/job/beam_PerformanceTests_SpannerIO_Write_2GB_Python_Batch) | -| beam_PerformanceTests_SparkReceiverIOIT | [cron](https://ci-beam.apache.org/job/beam_PerformanceTests_SparkReceiverIOIT/) | `Run Java SparkReceiverIO Performance Test` | [![Build Status](https://ci-beam.apache.org/job/beam_PerformanceTests_SparkReceiverIO/badge/icon)](https://ci-beam.apache.org/job/beam_PerformanceTests_SparkReceiverIO) | +| beam_PerformanceTests_SparkReceiver_IO | [cron](https://ci-beam.apache.org/job/beam_PerformanceTests_SparkReceiver_IO/) | `Run Java SparkReceiverIO Performance Test` | [![Build Status](https://ci-beam.apache.org/job/beam_PerformanceTests_SparkReceiver_IO/badge/icon)](https://ci-beam.apache.org/job/beam_PerformanceTests_SparkReceiver_IO) | | beam_PerformanceTests_TFRecordIOIT | [cron](https://ci-beam.apache.org/job/beam_PerformanceTests_TFRecordIOIT/) | `Run Java TFRecordIO Performance Test` | [![Build Status](https://ci-beam.apache.org/job/beam_PerformanceTests_TFRecordIOIT/badge/icon)](https://ci-beam.apache.org/job/beam_PerformanceTests_TFRecordIOIT) | | beam_PerformanceTests_TextIOIT | [cron](https://ci-beam.apache.org/job/beam_PerformanceTests_TextIOIT/), [hdfs_cron](https://ci-beam.apache.org/job/beam_PerformanceTests_TextIOIT_HDFS/) | `Run Java TextIO Performance Test` | [![Build Status](https://ci-beam.apache.org/job/beam_PerformanceTests_TextIOIT/badge/icon)](https://ci-beam.apache.org/job/beam_PerformanceTests_TextIOIT) [![Build Status](https://ci-beam.apache.org/job/beam_PerformanceTests_TextIOIT_HDFS/badge/icon)](https://ci-beam.apache.org/job/beam_PerformanceTests_TextIOIT_HDFS) | | beam_PerformanceTests_WordCountIT_Py37 | [cron](https://ci-beam.apache.org/job/beam_PerformanceTests_WordCountIT_Py37/) | `Run Python37 WordCountIT Performance Test` | [![Build Status](https://ci-beam.apache.org/job/beam_PerformanceTests_WordCountIT_Py37/badge/icon)](https://ci-beam.apache.org/job/beam_PerformanceTests_WordCountIT_Py37) | | beam_PerformanceTests_XmlIOIT | [cron](https://ci-beam.apache.org/job/beam_PerformanceTests_XmlIOIT/), [hdfs_cron](https://ci-beam.apache.org/job/beam_PerformanceTests_XmlIOIT_HDFS/) | `Run Java XmlIO Performance Test` | [![Build Status](https://ci-beam.apache.org/job/beam_PerformanceTests_XmlIOIT/badge/icon)](https://ci-beam.apache.org/job/beam_PerformanceTests_XmlIOIT) [![Build Status](https://ci-beam.apache.org/job/beam_PerformanceTests_XmlIOIT_HDFS/badge/icon)](https://ci-beam.apache.org/job/beam_PerformanceTests_XmlIOIT_HDFS) | -| beam_SQLBigQueryIO_Batch_Performance_Test_Java | [cron](https://ci-beam.apache.org/job/beam_SQLBigQueryIO_Batch_Performance_Test_Java/) | `Run SQLBigQueryIO Batch Performance Test Java` | [![Build Status](https://ci-beam.apache.org/job/beam_SQLBigQueryIO_Batch_Performance_Test_Java/badge/icon)](https://ci-beam.apache.org/job/beam_SQLBigQueryIO_Batch_Performance_Test_Java/) | +| beam_PerformanceTests_SQLBigQueryIO_Batch_Java | [cron](https://ci-beam.apache.org/job/beam_PerformanceTests_SQLBigQueryIO_Batch_Java/) | `Run SQLBigQueryIO Batch Performance Test Java` | [![Build Status](https://ci-beam.apache.org/job/beam_PerformanceTests_SQLBigQueryIO_Batch_Java/badge/icon)](https://ci-beam.apache.org/job/beam_PerformanceTests_SQLBigQueryIO_Batch_Java/) | | beam_Java_JMH | [cron](https://ci-beam.apache.org/job/beam_Java_JMH/) | | [![Build Status](https://ci-beam.apache.org/job/beam_Java_JMH/badge/icon)](https://ci-beam.apache.org/job/beam_Java_JMH/) | ### Load test Jobs @@ -250,6 +253,7 @@ Beam Jenkins overview page: [link](https://ci-beam.apache.org/) | beam_SeedJob | [cron](https://ci-beam.apache.org/job/beam_SeedJob/), [standalone](https://ci-beam.apache.org/job/beam_SeedJob_Standalone/) | `Run Seed Job` | [![Build Status](https://ci-beam.apache.org/job/beam_SeedJob/badge/icon)](https://ci-beam.apache.org/job/beam_SeedJob) | | beam_sonarqube_report | [cron](https://ci-beam.apache.org/job/beam_sonarqube_report/)| N/A | [![Build Status](https://ci-beam.apache.org/job/beam_sonarqube_report/badge/icon)](https://ci-beam.apache.org/job/beam_sonarqube_report/) | | beam_CancelStaleDataflowJobs | [cron](https://ci-beam.apache.org/job/beam_CancelStaleDataflowJobs/)| `Run Cancel Stale Dataflow Jobs` | [![Build Status](https://ci-beam.apache.org/job/beam_CancelStaleDataflowJobs/badge/icon)](https://ci-beam.apache.org/job/beam_CancelStaleDataflowJobs/) | +| beam_CleanUpGCPResources | [cron](https://ci-beam.apache.org/job/beam_CleanUpGCPResources/)| `Run Clean GCP Resources` | [![Build Status](https://ci-beam.apache.org/job/beam_CleanUpGCPResources/badge/icon)](https://ci-beam.apache.org/job/beam_CleanUpGCPResources/) | | beam_Clean_tmp_directory | [cron](https://ci-beam.apache.org/job/beam_Clean_tmp_directory/)| N/A | [![Build Status](https://ci-beam.apache.org/job/beam_Clean_tmp_directory/badge/icon)](https://ci-beam.apache.org/job/beam_Clean_tmp_directory/) | | beam_Publish_Beam_SDK_Snapshots | [cron](https://ci-beam.apache.org/job/beam_Publish_Beam_SDK_Snapshots/)| N/A | [![Build Status](https://ci-beam.apache.org/job/beam_Publish_Beam_SDK_Snapshots/badge/icon)](https://ci-beam.apache.org/job/beam_Publish_Beam_SDK_Snapshots/) | | beam_Publish_Docker_Snapshots | [cron](https://ci-beam.apache.org/job/beam_Publish_Docker_Snapshots/)| N/A | [![Build Status](https://ci-beam.apache.org/job/beam_Publish_Docker_Snapshots/badge/icon)](https://ci-beam.apache.org/job/beam_Publish_Docker_Snapshots/) | diff --git a/.test-infra/jenkins/job_CloudMLBenchmarkTests_Python.groovy b/.test-infra/jenkins/job_CloudMLBenchmarkTests_Python.groovy new file mode 100644 index 0000000000000..770d8d7367d0a --- /dev/null +++ b/.test-infra/jenkins/job_CloudMLBenchmarkTests_Python.groovy @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import CommonJobProperties as commonJobProperties +import PhraseTriggeringPostCommitBuilder +import CronJobBuilder + +def cloudMLJob = { scope -> + scope.description('Runs the TFT Criteo Examples on the Dataflow runner.') + + // Set common parameters. + commonJobProperties.setTopLevelMainJobProperties(scope, 'master', 360) + + // Gradle goals for this job. + scope.steps { + gradle { + rootBuildScriptDir(commonJobProperties.checkoutDir) + commonJobProperties.setGradleSwitches(delegate) + tasks(':sdks:python:test-suites:dataflow:tftTests') + } + } +} + +PhraseTriggeringPostCommitBuilder.postCommitJob( + 'beam_CloudML_Benchmarks_Dataflow', + 'Run TFT Criteo Benchmarks', + 'TFT Criteo benchmarks on Dataflow(\"Run TFT Criteo Benchmarks"\"")', + this + ) { + cloudMLJob(delegate) + } + +CronJobBuilder.cronJob( + 'beam_CloudML_Benchmarks_Dataflow', + 'H 14 * * *', + this + ) { + cloudMLJob(delegate) + } diff --git a/.test-infra/jenkins/job_LoadTests_Combine_Python.groovy b/.test-infra/jenkins/job_LoadTests_Combine_Python.groovy index 7c4f1ead0cf75..99fd10e3bdde1 100644 --- a/.test-infra/jenkins/job_LoadTests_Combine_Python.groovy +++ b/.test-infra/jenkins/job_LoadTests_Combine_Python.groovy @@ -100,8 +100,7 @@ def loadTestConfigurations = { datasetName, mode -> def addStreamingOptions(test){ test.pipelineOptions << [streaming: null, - // TODO(https://github.com/apache/beam/issues/20806) remove shuffle_mode=appliance with runner v2 once issue is resolved. - experiments: "use_runner_v2,shuffle_mode=appliance" + experiments: "use_runner_v2" ] } diff --git a/.test-infra/jenkins/job_LoadTests_GBK_Python.groovy b/.test-infra/jenkins/job_LoadTests_GBK_Python.groovy index 9a38af439bde3..1a772704ed7b9 100644 --- a/.test-infra/jenkins/job_LoadTests_GBK_Python.groovy +++ b/.test-infra/jenkins/job_LoadTests_GBK_Python.groovy @@ -156,8 +156,7 @@ def addStreamingOptions(test) { // Use the new Dataflow runner, which offers improved efficiency of Dataflow jobs. // See https://cloud.google.com/dataflow/docs/guides/deploying-a-pipeline#dataflow-runner-v2 // for more details. - // TODO(https://github.com/apache/beam/issues/20806) remove shuffle_mode=appliance with runner v2 once issue is resolved. - experiments: 'use_runner_v2,shuffle_mode=appliance', + experiments: 'use_runner_v2', ] } diff --git a/.test-infra/jenkins/job_LoadTests_GBK_Python_reiterate.groovy b/.test-infra/jenkins/job_LoadTests_GBK_Python_reiterate.groovy index 3fa262ab5912e..d1960abce1704 100644 --- a/.test-infra/jenkins/job_LoadTests_GBK_Python_reiterate.groovy +++ b/.test-infra/jenkins/job_LoadTests_GBK_Python_reiterate.groovy @@ -86,8 +86,7 @@ def addStreamingOptions(test) { // Use the new Dataflow runner, which offers improved efficiency of Dataflow jobs. // See https://cloud.google.com/dataflow/docs/guides/deploying-a-pipeline#dataflow-runner-v2 // for more details. - // TODO(https://github.com/apache/beam/issues/20806) remove shuffle_mode=appliance with runner v2 once issue is resolved. - experiments: 'use_runner_v2,shuffle_mode=appliance', + experiments: 'use_runner_v2', ] } diff --git a/.test-infra/jenkins/job_LoadTests_ParDo_Python.groovy b/.test-infra/jenkins/job_LoadTests_ParDo_Python.groovy index 44e9497dac915..090361a21a5e8 100644 --- a/.test-infra/jenkins/job_LoadTests_ParDo_Python.groovy +++ b/.test-infra/jenkins/job_LoadTests_ParDo_Python.groovy @@ -131,8 +131,7 @@ def addStreamingOptions(test) { // Use the new Dataflow runner, which offers improved efficiency of Dataflow jobs. // See https://cloud.google.com/dataflow/docs/guides/deploying-a-pipeline#dataflow-runner-v2 // for more details. - // TODO(https://github.com/apache/beam/issues/20806) remove shuffle_mode=appliance with runner v2 once issue is resolved. - experiments: 'use_runner_v2,shuffle_mode=appliance', + experiments: 'use_runner_v2', ] } diff --git a/.test-infra/jenkins/job_LoadTests_SideInput_Python.groovy b/.test-infra/jenkins/job_LoadTests_SideInput_Python.groovy index 404d74c41ad78..5ed7cc6381dfa 100644 --- a/.test-infra/jenkins/job_LoadTests_SideInput_Python.groovy +++ b/.test-infra/jenkins/job_LoadTests_SideInput_Python.groovy @@ -39,8 +39,7 @@ def fromTemplate = { mode, name, id, datasetName, testSpecificOptions -> influx_measurement : "python_${mode}_sideinput_${id}", num_workers : 10, autoscaling_algorithm: 'NONE', - // TODO(https://github.com/apache/beam/issues/20806) remove shuffle_mode=appliance with runner v2 once issue is resolved. - experiments : 'use_runner_v2,shuffle_mode=appliance', + experiments : 'use_runner_v2', ] << testSpecificOptions ] } diff --git a/.test-infra/jenkins/job_PerformanceTests_KafkaIO_IT.groovy b/.test-infra/jenkins/job_PerformanceTests_KafkaIO_IT.groovy index c9dda806a4b54..d513dd96a7e20 100644 --- a/.test-infra/jenkins/job_PerformanceTests_KafkaIO_IT.groovy +++ b/.test-infra/jenkins/job_PerformanceTests_KafkaIO_IT.groovy @@ -35,7 +35,7 @@ String HIGH_RANGE_PORT = "32767" */ job(jobName) { common.setTopLevelMainJobProperties(delegate, 'master', 120) - common.setAutoJob(delegate, 'H H/6 * * *') + common.setAutoJob(delegate, 'H H/12 * * *') common.enablePhraseTriggeringFromPullRequest( delegate, 'Java KafkaIO Performance Test', @@ -97,17 +97,16 @@ job(jobName) { Map dataflowRunnerV2SdfPipelineOptions = pipelineOptions + [ sourceOptions : """ { - "numRecords": "100000", - "keySizeBytes": "1", + "numRecords": "100000000", + "keySizeBytes": "10", "valueSizeBytes": "90" } """.trim().replaceAll("\\s", ""), kafkaTopic : 'beam-sdf', - readTimeout : '900', + readTimeout : '1500', bigQueryTable : 'kafkaioit_results_runner_v2', influxMeasurement : 'kafkaioit_results_runner_v2', - // TODO(https://github.com/apache/beam/issues/20806) remove shuffle_mode=appliance with runner v2 once issue is resolved. - experiments : 'use_runner_v2,shuffle_mode=appliance,use_unified_worker', + experiments : 'use_runner_v2,use_unified_worker', ] steps { diff --git a/.test-infra/jenkins/job_PerformanceTests_SQLIO_Java.groovy b/.test-infra/jenkins/job_PerformanceTests_SQLIO_Java.groovy index 3fffe394802e2..ceded537bb4ce 100644 --- a/.test-infra/jenkins/job_PerformanceTests_SQLIO_Java.groovy +++ b/.test-infra/jenkins/job_PerformanceTests_SQLIO_Java.groovy @@ -21,7 +21,8 @@ def jobConfigs = [ [ title : 'SQL BigQueryIO with push-down Batch Performance Test Java', triggerPhrase: 'Run SQLBigQueryIO Batch Performance Test Java', - name : 'beam_SQLBigQueryIO_Batch_Performance_Test_Java', + name : 'beam_PerformanceTests_SQLBigQueryIO_Batch_Java', + previousName : 'beam_SQLBigQueryIO_Batch_Performance_Test_Java/', itClass : 'org.apache.beam.sdk.extensions.sql.meta.provider.bigquery.BigQueryIOPushDownIT', properties: [ project : 'apache-beam-testing', @@ -44,7 +45,10 @@ private void createPostCommitJob(jobConfig) { description(jobConfig.description) common.setTopLevelMainJobProperties(delegate) common.enablePhraseTriggeringFromPullRequest(delegate, jobConfig.title, jobConfig.triggerPhrase) - common.setAutoJob(delegate, 'H H/6 * * *') + common.setAutoJob(delegate, 'H H/12 * * *') + if (jobConfig.containsKey('previousName')) { + previousNames(jobConfig.previousName) + } publishers { archiveJunit('**/build/test-results/**/*.xml') } diff --git a/.test-infra/jenkins/job_PerformanceTests_xlang_KafkaIO_Python.groovy b/.test-infra/jenkins/job_PerformanceTests_xlang_KafkaIO_Python.groovy new file mode 100644 index 0000000000000..142921c277a4e --- /dev/null +++ b/.test-infra/jenkins/job_PerformanceTests_xlang_KafkaIO_Python.groovy @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import CommonJobProperties as common +import Kubernetes +import LoadTestsBuilder as loadTestsBuilder +import InfluxDBCredentialsHelper + +def jobs = [ + [ + name : 'beam_PerformanceTests_xlang_KafkaIO_Python', + description : 'Runs performance tests for xlang Python KafkaIO', + test : 'apache_beam.io.external.xlang_kafkaio_perf_test', + githubTitle : 'Python xlang KafkaIO Performance Test', + githubTriggerPhrase: 'Run Python xlang KafkaIO Performance Test', + pipelineOptions : [ + publish_to_big_query : true, + metrics_dataset : 'beam_performance', + metrics_table : 'python_kafkaio_results', + influx_measurement : 'python_kafkaio_results', + test_class : 'KafkaIOPerfTest', + input_options : """'{ + "num_records": 100000000, + "key_size": 10, + "value_size": 90 + }'""".trim().replaceAll("\\s", ""), + kafka_topic : 'beam', + read_timeout : '1500', + num_workers : '5', + autoscaling_algorithm: 'NONE' + ] + ] +] + +jobs.findAll { + it.name in [ + // all tests that enabled + 'beam_PerformanceTests_xlang_KafkaIO_Python', + ] +}.forEach { testJob -> createKafkaIOTestJob(testJob) } + +private void createKafkaIOTestJob(testJob) { + job(testJob.name) { + description(testJob.description) + common.setTopLevelMainJobProperties(delegate) + common.enablePhraseTriggeringFromPullRequest(delegate, testJob.githubTitle, testJob.githubTriggerPhrase) + common.setAutoJob(delegate, 'H H * * *') + InfluxDBCredentialsHelper.useCredentials(delegate) + + // Setup kafka k8s pods + String namespace = common.getKubernetesNamespace(testJob.name) + String kubeconfig = common.getKubeconfigLocationForNamespace(namespace) + Kubernetes k8s = Kubernetes.create(delegate, kubeconfig, namespace) + String kafkaDir = common.makePathAbsolute("src/.test-infra/kubernetes/kafka-cluster") + String kafkaTopicJob = "job.batch/kafka-config-eff079ec" + + /** + * Specifies steps to avoid port collisions when the Kafka outside services (1,2,3) are created. + Function k8s.availablePort finds unused ports in the Kubernetes cluster in a range from 32400 + to 32767 by querying used ports, those ports are stored in env vars like KAFKA_SERVICE_PORT_${service}, + which are used to replace default ports for outside-${service}.yml files, before the apply command. + */ + steps { + String[] configuredPorts = ["32400", "32401", "32402"] + String HIGH_RANGE_PORT = "32767" + (0..2).each { service -> + k8s.availablePort(service == 0 ? configuredPorts[service] : "\$KAFKA_SERVICE_PORT_${service-1}", + HIGH_RANGE_PORT, "KAFKA_SERVICE_PORT_$service") + shell("sed -i -e s/${configuredPorts[service]}/\$KAFKA_SERVICE_PORT_$service/ \ + ${kafkaDir}/04-outside-services/outside-${service}.yml") + } + gradle { + rootBuildScriptDir(common.checkoutDir) + tasks(':sdks:java:io:expansion-service:shadowJar') + } + } + k8s.apply(kafkaDir) + (0..2).each { k8s.loadBalancerIP("outside-$it", "KAFKA_BROKER_$it") } + k8s.waitForJob(kafkaTopicJob,"40m") + + additionalPipelineArgs = [ + influx_db_name: InfluxDBCredentialsHelper.InfluxDBDatabaseName, + influx_hostname: InfluxDBCredentialsHelper.InfluxDBHostUrl, + bootstrap_servers: "\$KAFKA_BROKER_0:\$KAFKA_SERVICE_PORT_0,\$KAFKA_BROKER_1:\$KAFKA_SERVICE_PORT_1," + + "\$KAFKA_BROKER_2:\$KAFKA_SERVICE_PORT_2", //KAFKA_BROKER_ represents IP and KAFKA_SERVICE_ port of outside services + ] + testJob.pipelineOptions.putAll(additionalPipelineArgs) + + def dataflowSpecificOptions = [ + runner : 'DataflowRunner', + project : 'apache-beam-testing', + region : 'us-central1', + temp_location : 'gs://temp-storage-for-perf-tests/', + filename_prefix : "gs://temp-storage-for-perf-tests/${testJob.name}/\${BUILD_ID}/", + sdk_harness_container_image_overrides: '.*java.*,gcr.io/apache-beam-testing/beam-sdk/beam_java8_sdk:latest' + ] + + Map allPipelineOptions = dataflowSpecificOptions << testJob.pipelineOptions + + loadTestsBuilder.loadTest( + delegate, + testJob.name, + CommonTestProperties.Runner.DATAFLOW, + CommonTestProperties.SDK.PYTHON, + allPipelineOptions, + testJob.test) + } +} diff --git a/.test-infra/jenkins/job_PerformanceTests_InfluxDBIO_IT.groovy b/.test-infra/jenkins/job_PostCommit_Java_InfluxDBIO_IT.groovy similarity index 83% rename from .test-infra/jenkins/job_PerformanceTests_InfluxDBIO_IT.groovy rename to .test-infra/jenkins/job_PostCommit_Java_InfluxDBIO_IT.groovy index 18a0f1e685fac..cb74cbf3228ff 100644 --- a/.test-infra/jenkins/job_PerformanceTests_InfluxDBIO_IT.groovy +++ b/.test-infra/jenkins/job_PostCommit_Java_InfluxDBIO_IT.groovy @@ -16,19 +16,18 @@ * limitations under the License. */ import CommonJobProperties as common +import PostcommitJobBuilder import Kubernetes -String jobName = "beam_PerformanceTests_InfluxDbIO_IT" +String jobName = "beam_PostCommit_Java_InfluxDbIO_IT" -job(jobName) { +PostcommitJobBuilder.postCommitJob(jobName, 'Run Java InfluxDbIO_IT', 'Java InfluxDbIO Integration Test', this) { + description('Runs the Java InfluxDbIO Integration Test.') + previousNames(/beam_PerformanceTests_InfluxDbIO_IT/) // Set common parameters. - common.setTopLevelMainJobProperties(delegate, 'master', 240, true, 'beam-perf') - common.setAutoJob(delegate,'H H/6 * * *') - common.enablePhraseTriggeringFromPullRequest( - delegate, - 'Java InfluxDbIO Performance Test', - 'Run Java InfluxDbIO Performance Test') + common.setTopLevelMainJobProperties(delegate) + // Deploy InfluxDb cluster String namespace = common.getKubernetesNamespace(jobName) String kubeconfigPath = common.getKubeconfigLocationForNamespace(namespace) Kubernetes k8s = Kubernetes.create(delegate, kubeconfigPath, namespace) diff --git a/.test-infra/jenkins/job_sonarqube_report.groovy b/.test-infra/jenkins/job_sonarqube_report.groovy index df55632621c96..515b7e43061e6 100644 --- a/.test-infra/jenkins/job_sonarqube_report.groovy +++ b/.test-infra/jenkins/job_sonarqube_report.groovy @@ -31,7 +31,9 @@ job('beam_sonarqube_report') { } } - commonJobProperties.setAutoJob delegate + + // TODO(https://github.com/apache/beam/issues/24768) remove or fix this job. + // commonJobProperties.setAutoJob delegate publishers { archiveJunit('**/build/test-results/**/*.xml') diff --git a/.test-infra/metrics/grafana/dashboards/perftests_metrics/Java_IO_IT_Tests_Dataflow.json b/.test-infra/metrics/grafana/dashboards/perftests_metrics/Java_IO_IT_Tests_Dataflow.json index eb8f5135798c1..962e0d8b1be4f 100644 --- a/.test-infra/metrics/grafana/dashboards/perftests_metrics/Java_IO_IT_Tests_Dataflow.json +++ b/.test-infra/metrics/grafana/dashboards/perftests_metrics/Java_IO_IT_Tests_Dataflow.json @@ -2148,7 +2148,73 @@ "steppedLine": false, "targets": [ { - "alias": "$tag_metric", + "alias": "LEGACY_read_time", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "", + "orderByTime": "ASC", + "policy": "default", + "query": "SELECT mean(\"value\") FROM \"kafkaioit_results\" WHERE \"metric\" = 'read_time' AND $timeFilter GROUP BY time($__interval), \"metric\"", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "alias": "write_time", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "", + "orderByTime": "ASC", + "policy": "default", + "query": "SELECT mean(\"value\") FROM \"kafkaioit_results\" WHERE \"metric\" = 'write_time' AND $timeFilter GROUP BY time($__interval), \"metric\"", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "alias": "SDF_read_time", "groupBy": [ { "params": [ @@ -2160,7 +2226,7 @@ "measurement": "", "orderByTime": "ASC", "policy": "default", - "query": "SELECT mean(\"value\") FROM \"kafkaioit_results\" WHERE (\"metric\" = 'write_time' OR \"metric\" = 'read_time') AND $timeFilter GROUP BY time($__interval), \"metric\"", + "query": "SELECT mean(\"value\") FROM \"kafkaioit_results_runner_v2\" WHERE \"metric\" = 'read_time' AND $timeFilter GROUP BY time($__interval), \"metric\"", "rawQuery": true, "refId": "A", "resultFormat": "time_series", diff --git a/.test-infra/metrics/grafana/dashboards/perftests_metrics/Python_IO_IT_Tests_Dataflow.json b/.test-infra/metrics/grafana/dashboards/perftests_metrics/Python_IO_IT_Tests_Dataflow.json index 2ece04cf4d488..e8d4e36b37af7 100644 --- a/.test-infra/metrics/grafana/dashboards/perftests_metrics/Python_IO_IT_Tests_Dataflow.json +++ b/.test-infra/metrics/grafana/dashboards/perftests_metrics/Python_IO_IT_Tests_Dataflow.json @@ -604,6 +604,128 @@ "align": false, "alignLevel": null } + }, + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "dashLength": 10, + "dashes": false, + "datasource": "BeamInfluxDB", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 18 + }, + "hiddenSeries": false, + "id": 6, + "interval": "24h", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pluginVersion": "6.7.2", + "pointradius": 2, + "points": true, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "$tag_metric", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + } + ], + "measurement": "", + "orderByTime": "ASC", + "policy": "default", + "query": "SELECT mean(\"value\") FROM \"python_kafkaio_results\" WHERE \"metric\" = 'read_runtime' OR \"metric\" = 'write_runtime' AND $timeFilter GROUP BY time($__interval), \"metric\"", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Xlang KafkaIO | 100M records, 10 GB", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "transparent": true, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:403", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:404", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } } ], "schemaVersion": 22, diff --git a/.test-infra/metrics/sync/github/sync_workflows.py b/.test-infra/metrics/sync/github/sync_workflows.py new file mode 100644 index 0000000000000..646bd78cf61bb --- /dev/null +++ b/.test-infra/metrics/sync/github/sync_workflows.py @@ -0,0 +1,187 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +''' +This module queries GitHub to collect Beam-related workflows metrics and put them in +PostgreSQL. +This Script is running every 3 hours in a cloud function in apache-beam-testing project. +This cloud function is triggered by a pubsub topic. +You can find the cloud function in the next link +https://console.cloud.google.com/functions/details/us-central1/github_actions_workflows_dashboard_sync?env=gen1&project=apache-beam-testing +Pub sub topic : https://console.cloud.google.com/cloudpubsub/topic/detail/github_actions_workflows_sync?project=apache-beam-testing +Cron Job : https://console.cloud.google.com/cloudscheduler/jobs/edit/us-central1/github_actions_workflows_dashboard_sync?project=apache-beam-testing +Writing the latest 10 runs of every postcommit workflow in master branch in a beammetrics database +''' + +import os +import sys +import time +import re +import requests +import psycopg2 + +from datetime import datetime +from github import GithubIntegration + +DB_HOST = os.environ['DB_HOST'] +DB_PORT = os.environ['DB_PORT'] +DB_NAME = os.environ['DB_NAME'] +DB_USER_NAME = os.environ['DB_USER'] +DB_PASSWORD = os.environ['DB_PASS'] +GH_WORKFLOWS_TABLE_NAME = "github_workflows" +# Number of workflows that fetch github API +GH_NUMBER_OF_WORKFLOWS = 100 +GH_WORKFLOWS_NUMBER_EXECUTIONS = 100 +WORKFLOWS_OBJECT_LIST = [] + + +class Workflow: + def __init__(self,id,name,filename): + self.id = id + self.name = name + self.filename = filename + self.listOfRuns = [] + self.runUrl = [] + +# The table will save the latest ten run of every workflow +GH_WORKFLOWS_CREATE_TABLE_QUERY = f""" +CREATE TABLE IF NOT EXISTS {GH_WORKFLOWS_TABLE_NAME} ( + job_name text PRIMARY KEY, + job_yml_filename text""" +for i in range(0,GH_WORKFLOWS_NUMBER_EXECUTIONS): + i = i + 1 + GH_WORKFLOWS_CREATE_TABLE_QUERY += """,\n run{} text, + run{}Id text""".format(str(i),str(i)) +GH_WORKFLOWS_CREATE_TABLE_QUERY += ")\n" + +def githubWorkflowsGrafanaSync(data,context): + print('Started') + print('Updating table with recent workflow runs') + databaseOperations(initDbConnection(),fetchWorkflowData()) + print('Done') + return "Completed" + +def initDbConnection(): + '''Init connection with the Database''' + connection = None + maxRetries = 3 + i = 0 + while connection == None and i < maxRetries: + try: + connection = psycopg2.connect( + f"dbname='{DB_NAME}' user='{DB_USER_NAME}' host='{DB_HOST}'" + f" port='{DB_PORT}' password='{DB_PASSWORD}'") + except Exception as e: + print('Failed to connect to DB; retrying in 1 minute') + print(e) + time.sleep(60) + i = i + 1 + if i >= maxRetries: + print("Number of retries exceded ") + sys.exit(1) + return connection + +def getToken(): + git_integration = GithubIntegration( + os.environ["GH_APP_ID"], + os.environ["GH_PEM_KEY"]) + token=git_integration.get_access_token( + os.environ["GH_APP_INSTALLATION_ID"] + ).token + return token + +def retriesRequest(request): + requestSucceeded = False + retryFactor = 1 + while not requestSucceeded: + retryTime = 60 * retryFactor + if request.status_code != 200: + print('Failed to get the request with code {}'.format(request.status_code)) + time.sleep(retryTime) + retryFactor = retryFactor + retryFactor + if retryFactor * 60 >= 3600: + print("Error: The request take more than an hour") + sys.exit(1) + else: + requestSucceeded = True +def fetchWorkflowData(): + '''Return a json with all the workflows and the latests + ten executions''' + completed = False + page = 1 + workflows = [] + try: + while not completed: + url = "https://api.github.com/repos/apache/beam/actions/workflows" + queryOptions = { 'branch' : 'master', 'page': page, 'per_page' : GH_NUMBER_OF_WORKFLOWS } + response = requests.get(url = url, params = queryOptions) + retriesRequest(response) + jsonResponse = response.json() + if jsonResponse['total_count'] >= GH_NUMBER_OF_WORKFLOWS: + page = page + 1 + workflowsPage = jsonResponse['workflows'] + workflows.append(workflowsPage) + else: + completed = True + workflowsPage = jsonResponse['workflows'] + workflows.append(workflowsPage) + for pageItem in workflows: + for item in pageItem: + path =item['path'] + isPostCommit = re.search('(.*)postcommit(.*)',path) + if isPostCommit: + result = re.search('/(.*).yml', path) + path =(result.group(1)) + ".yml" + workflowObject = Workflow(item['id'],item['name'],path) + WORKFLOWS_OBJECT_LIST.append(workflowObject) + url = "https://api.github.com/repos/apache/beam/actions/workflows/" + queryOptions = { 'branch' : 'master', 'per_page' : GH_WORKFLOWS_NUMBER_EXECUTIONS, + 'page' :'1', 'exclude_pull_request':True } + for workflow in WORKFLOWS_OBJECT_LIST: + response = requests.get(url = "{}{}/runs".format(url,workflow.id), + params=queryOptions) + retriesRequest(response) + responseJson = response.json() + workflowsRuns = responseJson['workflow_runs'] + for item in workflowsRuns: + if item['status'] == 'completed': + workflow.runUrl.append(item['html_url']) + workflow.listOfRuns.append(item['conclusion']) + else: + workflow.listOfRuns.append(item['status']) + workflow.runUrl.append(item['html_url']) + for i in range(0,GH_WORKFLOWS_NUMBER_EXECUTIONS): + if i >= len(workflow.listOfRuns): + workflow.listOfRuns.append('None') + workflow.runUrl.append('None') + except Exception as e: + print('Failed to get GHA workflows') + print(e) + +def databaseOperations(connection,fetchWorkflows): + '''Create the table if not exist and update the table with the latest runs + of the workflows ''' + queryInsert = "INSERT INTO {} VALUES ".format(GH_WORKFLOWS_TABLE_NAME) + cursor = connection.cursor() + cursor.execute(GH_WORKFLOWS_CREATE_TABLE_QUERY) + cursor.execute("DELETE FROM {};".format(GH_WORKFLOWS_TABLE_NAME)) + query = "" + for workflow in WORKFLOWS_OBJECT_LIST: + rowInsert = "(\'{}\',\'{}\'".format(workflow.name,workflow.filename) + for run, runUrl in zip(workflow.listOfRuns,workflow.runUrl): + rowInsert += ",\'{}\',\'{}\'".format(run,runUrl) + query = query + rowInsert + query += ")," + query = query[:-1] + ";" + query = queryInsert + query + cursor.execute(query) + cursor.close() + connection.commit() + connection.close() \ No newline at end of file diff --git a/.test-infra/tools/stale_bq_datasets_cleaner.sh b/.test-infra/tools/stale_bq_datasets_cleaner.sh index 6250b4697a3a2..fc68666e4aac9 100755 --- a/.test-infra/tools/stale_bq_datasets_cleaner.sh +++ b/.test-infra/tools/stale_bq_datasets_cleaner.sh @@ -21,24 +21,38 @@ set -euo pipefail PROJECT=apache-beam-testing -BQ_DATASETS=`bq --format=json --project_id=$PROJECT ls --max_results=1500 | jq -r .[].id` +MAX_RESULT=1500 +BQ_DATASETS=`bq --project_id=$PROJECT ls --max_results=$MAX_RESULT | tail -n $MAX_RESULT | sed s/^[[:space:]]*/${PROJECT}:/` CLEANUP_DATASET_TEMPLATES=(beam_bigquery_samples_ beam_temp_dataset_ FHIR_store_ bq_query_schema_update_options_16 bq_query_to_table_16 bq_read_all_[a-z0-9]*) # A grace period of 5 days GRACE_PERIOD=$((`date +%s` - 24 * 3600 * 5)) +# count number of failed api calls +declare -i failed_calls=0 for dataset in ${BQ_DATASETS[@]}; do for template in ${CLEANUP_DATASET_TEMPLATES[@]}; do if [[ $dataset =~ $template ]]; then # The BQ API reports LAST MODIFIED TIME in miliseconds, while unix works in seconds since epoch # thus why we need to convert to seconds. - LAST_MODIFIED_MS=`bq --format=json --project_id=$PROJECT show $dataset | jq -r .lastModifiedTime` + [[ `bq --format=json --project_id=$PROJECT show $dataset` =~ \"lastModifiedTime\":\"([0-9]+)\" ]] + LAST_MODIFIED_MS=${BASH_REMATCH[1]} LAST_MODIFIED=$(($LAST_MODIFIED_MS / 1000)) if [[ $GRACE_PERIOD -gt $LAST_MODIFIED ]]; then - echo "Deleting $dataset (modified `date -d @$LAST_MODIFIED`) Command bq --project_id=$PROJECT rm -r -f $dataset" - bq --project_id=$PROJECT rm -r -f $dataset + if bq --project_id=$PROJECT rm -r -f $dataset; then + echo "Deleted $dataset (modified `date -d @$LAST_MODIFIED`)" + else + failed_calls+=1 + fi fi + break fi done done + +# fail the script if failed_calls is nonzero +if [[ failed_calls -ne 0 ]]; then + echo "Failed delete $failed_calls datasets" + exit 1 +fi diff --git a/.test-infra/tools/stale_dataflow_prebuilt_image_cleaner.sh b/.test-infra/tools/stale_dataflow_prebuilt_image_cleaner.sh index 50d93497470ad..a9f7be945ffc6 100755 --- a/.test-infra/tools/stale_dataflow_prebuilt_image_cleaner.sh +++ b/.test-infra/tools/stale_dataflow_prebuilt_image_cleaner.sh @@ -24,7 +24,8 @@ set -euo pipefail PUBLIC_REPOSITORIES=(beam-sdk beam_portability) PRIVATE_REPOSITORIES=(java-postcommit-it python-postcommit-it jenkins) -DELETE_BEFORE_DAY=$(date --iso-8601=s -d '30 days ago') +# set as the same as 6-week release period +DELETE_BEFORE_DAY=$(date --iso-8601=s -d '6 weeks ago') REPOSITORIES=("${PUBLIC_REPOSITORIES[@]/#/gcr.io/apache-beam-testing/}" "${PRIVATE_REPOSITORIES[@]/#/us.gcr.io/apache-beam-testing/}") diff --git a/CHANGES.md b/CHANGES.md index 310078a580674..18b7ed989fb6c 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -59,18 +59,23 @@ ## I/Os * Support for X source added (Java/Python) ([#X](https://github.com/apache/beam/issues/X)). +* MongoDB IO connector added (Go) ([#24575](https://github.com/apache/beam/issues/24575)). ## New Features / Improvements * RunInference Wrapper with Sklearn Model Handler support added in Go SDK ([#24497](https://github.com/apache/beam/issues/23382)). * X feature added (Java/Python) ([#X](https://github.com/apache/beam/issues/X)). +* Adding override of allowed TLS algorithms (Java), now maintaining the disabled/legacy algorithms + present in 2.43.0 (up to 1.8.0_342, 11.0.16, 17.0.2 for respective Java versions). This is accompanied + by an explicit re-enabling of TLSv1 and TLSv1.1 for Java 8 and Java 11. ## Breaking Changes -* Python streaming pipelines and portable Python batch pipelines on Dataflow are required to - use Runner V2. The `disable_runner_v2`, `disable_runner_v2_until_2023`, `disable_prime_runner_v2` - experiments will raise an error during pipeline construction. Note that non-portable Python - batch jobs are not impacted. ([#24515](https://github.com/apache/beam/issues/24515)) +* Portable Java pipelines, Go pipelines, Python streaming pipelines, and portable Python batch + pipelines on Dataflow are required to use Runner V2. The `disable_runner_v2`, + `disable_runner_v2_until_2023`, `disable_prime_runner_v2` experiments will raise an error during + pipeline construction. You can no longer specify the Dataflow worker jar override. Note that + non-portable Java jobs and non-portable Python batch jobs are not impacted. ([#24515](https://github.com/apache/beam/issues/24515)). ## Deprecations @@ -78,7 +83,8 @@ ## Bugfixes -* Fixed X (Java/Python) ([#X](https://github.com/apache/beam/issues/X)). +* Avoids Cassandra syntax error when user-defined query has no where clause in it (Java) ([#24829](https://github.com/apache/beam/issues/24829)). +* Fixed JDBC connection failures (Java) during handshake due to deprecated TLSv1(.1) protocol for the JDK. ([#24623](https://github.com/apache/beam/issues/24623)) ## Known Issues @@ -145,7 +151,7 @@ * Decreased TextSource CPU utilization by 2.3x (Java) ([#23193](https://github.com/apache/beam/issues/23193)). * Fixed bug when using SpannerIO with RuntimeValueProvider options (Java) ([#22146](https://github.com/apache/beam/issues/22146)). -* Fixed issue for unicode rendering on WriteToBigQuery ([#10785](https://github.com/apache/beam/issues/10785)) +* Fixed issue for unicode rendering on WriteToBigQuery ([#22312](https://github.com/apache/beam/issues/22312)) * Remove obsolete variants of BigQuery Read and Write, always using Beam-native variant ([#23564](https://github.com/apache/beam/issues/23564) and [#23559](https://github.com/apache/beam/issues/23559)). * Bumped google-cloud-spanner dependency version to 3.x for Python SDK ([#21198](https://github.com/apache/beam/issues/21198)). diff --git a/build.gradle.kts b/build.gradle.kts index 9983465f25a36..9b4f6e57b4e79 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -117,7 +117,7 @@ tasks.rat { // Tour Of Beam backend autogenerated Datastore indexes "learning/tour-of-beam/backend/internal/storage/index.yaml", - + // Tour Of Beam backend autogenerated Playground GRPC API stubs and mocks "learning/tour-of-beam/backend/playground_api/api.pb.go", "learning/tour-of-beam/backend/playground_api/api_grpc.pb.go", @@ -127,6 +127,11 @@ tasks.rat { "playground/backend/internal/api/v1/api.pb.go", "playground/backend/internal/api/v1/api_grpc.pb.go", + // Playground infrastructure autogenerated GRPC API stubs and mocks + "playground/infrastructure/api/v1/api_pb2.py", + "playground/infrastructure/api/v1/api_pb2.pyi", + "playground/infrastructure/api/v1/api_pb2_grpc.py", + // test p8 file for SnowflakeIO "sdks/java/io/snowflake/src/test/resources/invalid_test_rsa_key.p8", "sdks/java/io/snowflake/src/test/resources/valid_encrypted_test_rsa_key.p8", @@ -545,6 +550,7 @@ tasks.register("pushAllDockerImages") { dependsOn(":sdks:java:container:pushAll") dependsOn(":sdks:python:container:pushAll") dependsOn(":sdks:go:container:pushAll") + dependsOn(":sdks:typescript:container:pushAll") for (version in project.ext.get("allFlinkVersions") as Array<*>) { dependsOn(":runners:flink:${version}:job-server-container:dockerPush") } diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy index bbf4c55f26f0b..2274906deacd6 100644 --- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy +++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy @@ -486,6 +486,7 @@ class BeamModulePlugin implements Plugin { def jsr305_version = "3.0.2" def everit_json_version = "1.14.1" def kafka_version = "2.4.1" + def log4j2_version = "2.17.2" def nemo_version = "0.1" def netty_version = "4.1.77.Final" def postgres_version = "42.2.16" @@ -573,6 +574,7 @@ class BeamModulePlugin implements Plugin { commons_csv : "org.apache.commons:commons-csv:1.8", commons_io : "commons-io:commons-io:2.7", commons_lang3 : "org.apache.commons:commons-lang3:3.9", + commons_logging : "commons-logging:commons-logging:1.2", commons_math3 : "org.apache.commons:commons-math3:3.6.1", dbcp2 : "org.apache.commons:commons-dbcp2:$dbcp2_version", error_prone_annotations : "com.google.errorprone:error_prone_annotations:$errorprone_version", @@ -675,6 +677,7 @@ class BeamModulePlugin implements Plugin { jamm : 'io.github.stephankoelle:jamm:0.4.1', jaxb_api : "jakarta.xml.bind:jakarta.xml.bind-api:$jaxb_api_version", jaxb_impl : "com.sun.xml.bind:jaxb-impl:$jaxb_api_version", + jcl_over_slf4j : "org.slf4j:jcl-over-slf4j:$slf4j_version", jmh_core : "org.openjdk.jmh:jmh-core:$jmh_version", joda_time : "joda-time:joda-time:2.10.10", jsonassert : "org.skyscreamer:jsonassert:1.5.0", @@ -684,6 +687,12 @@ class BeamModulePlugin implements Plugin { junit : "junit:junit:4.13.1", kafka : "org.apache.kafka:kafka_2.11:$kafka_version", kafka_clients : "org.apache.kafka:kafka-clients:$kafka_version", + log4j : "log4j:log4j:1.2.17", + log4j_over_slf4j : "org.slf4j:log4j-over-slf4j:$slf4j_version", + log4j2_api : "org.apache.logging.log4j:log4j-api:$log4j2_version", + log4j2_core : "org.apache.logging.log4j:log4j-core:$log4j2_version", + log4j2_to_slf4j : "org.apache.logging.log4j:log4j-to-slf4j:$log4j2_version", + log4j2_slf4j_impl : "org.apache.logging.log4j:log4j-slf4j-impl:$log4j2_version", mockito_core : "org.mockito:mockito-core:3.7.7", mockito_inline : "org.mockito:mockito-inline:4.5.1", mongo_java_driver : "org.mongodb:mongo-java-driver:3.12.11", @@ -711,9 +720,14 @@ class BeamModulePlugin implements Plugin { sbe_tool : "uk.co.real-logic:sbe-tool:$sbe_tool_version", singlestore_jdbc : "com.singlestore:singlestore-jdbc-client:$singlestore_jdbc_version", slf4j_api : "org.slf4j:slf4j-api:$slf4j_version", - slf4j_simple : "org.slf4j:slf4j-simple:$slf4j_version", + slf4j_android : "org.slf4j:slf4j-android:$slf4j_version", + slf4j_ext : "org.slf4j:slf4j-ext:$slf4j_version", slf4j_jdk14 : "org.slf4j:slf4j-jdk14:$slf4j_version", + slf4j_nop : "org.slf4j:slf4j-nop:$slf4j_version", + slf4j_simple : "org.slf4j:slf4j-simple:$slf4j_version", + slf4j_jul_to_slf4j : "org.slf4j:jul-to-slf4j:$slf4j_version", slf4j_log4j12 : "org.slf4j:slf4j-log4j12:$slf4j_version", + slf4j_jcl : "org.slf4j:slf4j-jcl:$slf4j_version", snappy_java : "org.xerial.snappy:snappy-java:1.1.8.4", spark_core : "org.apache.spark:spark-core_2.11:$spark2_version", spark_network_common : "org.apache.spark:spark-network-common_2.11:$spark2_version", @@ -2365,7 +2379,8 @@ class BeamModulePlugin implements Plugin { } else if (JavaVersion.current() == JavaVersion.VERSION_17) { javaContainerSuffix = 'java17' } else { - throw new GradleException("unsupported java version.") + String exceptionMessage = "Your Java version is unsupported. You need Java version of 8 or 11 or 17 to get started, but your Java version is: " + JavaVersion.current(); + throw new GradleException(exceptionMessage) } def setupTask = project.tasks.register(config.name+"Setup", Exec) { dependsOn ':sdks:java:container:'+javaContainerSuffix+':docker' diff --git a/examples/java/cdap/README.md b/examples/java/cdap/README.md index 28d493916e336..fb28dc000b7d9 100644 --- a/examples/java/cdap/README.md +++ b/examples/java/cdap/README.md @@ -22,6 +22,6 @@ from a [CDAP plugin](https://github.com/data-integrations) and write data into . Supported CDAP plugins: - [ServiceNow](https://github.com/data-integrations/servicenow-plugins). More info in the ServiceNow example [README](servicenow/src/main/java/org/apache/beam/examples/complete/cdap/servicenow/README.md). -- [Salesforce](https://github.com/data-integrations/salesforce) +- [Salesforce](https://github.com/data-integrations/salesforce). More info in the Salesforce example [README](salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/README.md). - [Hubspot](https://github.com/data-integrations/hubspot). More info in the Hubspot example [README](hubspot/src/main/java/org/apache/beam/examples/complete/cdap/hubspot/README.md). - [Zendesk](https://github.com/data-integrations/zendesk). More info in the Zendesk example [README](zendesk/src/main/java/org/apache/beam/examples/complete/cdap/zendesk/README.md). diff --git a/examples/java/cdap/hubspot/build.gradle b/examples/java/cdap/hubspot/build.gradle index 9a45d604678c5..f163d17d555ad 100644 --- a/examples/java/cdap/hubspot/build.gradle +++ b/examples/java/cdap/hubspot/build.gradle @@ -104,7 +104,7 @@ task preCommit() { } } -task executeCdap (type:JavaExec) { +task executeCdapHubspot (type:JavaExec) { mainClass = System.getProperty("mainClass") classpath = sourceSets.main.runtimeClasspath systemProperties System.getProperties() diff --git a/examples/java/cdap/hubspot/src/main/java/org/apache/beam/examples/complete/cdap/hubspot/README.md b/examples/java/cdap/hubspot/src/main/java/org/apache/beam/examples/complete/cdap/hubspot/README.md index 2ff5f5353a4bb..13b4056ab8a21 100644 --- a/examples/java/cdap/hubspot/src/main/java/org/apache/beam/examples/complete/cdap/hubspot/README.md +++ b/examples/java/cdap/hubspot/src/main/java/org/apache/beam/examples/complete/cdap/hubspot/README.md @@ -20,7 +20,7 @@ To run this example your `build.gradle` file should contain the following task to execute the pipeline: ``` -task executeCdap (type:JavaExec) { +task executeCdapHubspot (type:JavaExec) { mainClass = System.getProperty("mainClass") classpath = sourceSets.main.runtimeClasspath systemProperties System.getProperties() @@ -30,10 +30,10 @@ task executeCdap (type:JavaExec) { ## Running the CdapHubspotToTxt pipeline example -Gradle 'executeCdap' task allows to run the pipeline via the following command: +Gradle 'executeCdapHubspot' task allows to run the pipeline via the following command: ```bash -gradle clean executeCdap -DmainClass=org.apache.beam.examples.complete.cdap.hubspot.CdapHubspotToTxt \ +gradle clean executeCdapHubspot -DmainClass=org.apache.beam.examples.complete.cdap.hubspot.CdapHubspotToTxt \ -Dexec.args="--= --=" ``` @@ -55,10 +55,10 @@ To execute this pipeline, specify the parameters in the following format: ## Running the CdapHubspotStreamingToTxt pipeline example -Gradle 'executeCdap' task allows to run the pipeline via the following command: +Gradle 'executeCdapHubspot' task allows to run the pipeline via the following command: ```bash -gradle clean executeCdap -DmainClass=org.apache.beam.examples.complete.cdap.hubspot.CdapHubspotStreamingToTxt \ +gradle clean executeCdapHubspot -DmainClass=org.apache.beam.examples.complete.cdap.hubspot.CdapHubspotStreamingToTxt \ -Dexec.args="--= --=" ``` @@ -84,10 +84,10 @@ To execute this pipeline, specify the parameters in the following format: ## Running the TxtToCdapHubspot pipeline example -Gradle 'executeCdap' task allows to run the pipeline via the following command: +Gradle 'executeCdapHubspot' task allows to run the pipeline via the following command: ```bash -gradle clean executeCdap -DmainClass=org.apache.beam.examples.complete.cdap.hubspot.TxtToCdapHubspot \ +gradle clean executeCdapHubspot -DmainClass=org.apache.beam.examples.complete.cdap.hubspot.TxtToCdapHubspot \ -Dexec.args="--= --=" ``` diff --git a/examples/java/cdap/salesforce/build.gradle b/examples/java/cdap/salesforce/build.gradle new file mode 100644 index 0000000000000..3bc6aaf263298 --- /dev/null +++ b/examples/java/cdap/salesforce/build.gradle @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import groovy.json.JsonOutput + +plugins { + id 'java' + id 'org.apache.beam.module' + id 'com.github.johnrengelman.shadow' +} + +applyJavaNature( + exportJavadoc: false, + automaticModuleName: 'org.apache.beam.examples.complete.cdap.salesforce', +) + +description = "Apache Beam :: Examples :: Java :: CDAP :: Salesforce" +ext.summary = """Apache Beam SDK provides a simple, Java-based +interface for processing virtually any size data. This +artifact includes CDAP Salesforce Apache Beam Java SDK examples.""" + +/** Define the list of runners which execute a precommit test. + * Some runners are run from separate projects, see the preCommit task below + * for details. + */ +def preCommitRunners = ["directRunner", "flinkRunner"] +for (String runner : preCommitRunners) { + configurations.create(runner + "PreCommit") +} + +dependencies { + implementation enforcedPlatform(library.java.google_cloud_platform_libraries_bom) + implementation project(path: ":sdks:java:core", configuration: "shadow") + implementation project(":examples:java:cdap") + implementation project(":sdks:java:io:cdap") + implementation project(":sdks:java:io:hadoop-common") + implementation library.java.cdap_api + implementation library.java.cdap_api_commons + implementation library.java.cdap_etl_api + permitUnusedDeclared library.java.cdap_etl_api + implementation library.java.cdap_hydrator_common + //TODO: modify to 'implementation library.java.cdap_plugin_salesforce', + // when new release with HasOffset interface will be published + implementation "com.akvelon:cdap-salesforce-plugins:1.5.0" + implementation library.java.google_code_gson + implementation library.java.hadoop_common + implementation library.java.slf4j_api + implementation library.java.vendored_guava_26_0_jre + runtimeOnly project(path: ":runners:direct-java", configuration: "shadow") + + // Add dependencies for the PreCommit configurations + // For each runner a project level dependency on the examples project. + for (String runner : preCommitRunners) { + delegate.add(runner + "PreCommit", project(":examples:java:cdap:salesforce")) + delegate.add(runner + "PreCommit", project(path: ":examples:java:cdap:salesforce", configuration: "testRuntimeMigration")) + } + directRunnerPreCommit project(path: ":runners:direct-java", configuration: "shadow") + flinkRunnerPreCommit project(":runners:flink:${project.ext.latestFlinkVersion}") +} + +/* + * Create a ${runner}PreCommit task for each runner which runs a set + * of integration tests for WordCount and WindowedWordCount. + */ +def preCommitRunnerClass = [ + directRunner: "org.apache.beam.runners.direct.DirectRunner", + flinkRunner: "org.apache.beam.runners.flink.TestFlinkRunner" +] + +for (String runner : preCommitRunners) { + tasks.create(name: runner + "PreCommit", type: Test) { + def preCommitBeamTestPipelineOptions = [ + "--runner=" + preCommitRunnerClass[runner], + ] + classpath = configurations."${runner}PreCommit" + forkEvery 1 + maxParallelForks 4 + systemProperty "beamTestPipelineOptions", JsonOutput.toJson(preCommitBeamTestPipelineOptions) + } +} + +/* Define a common precommit task which depends on all the individual precommits. */ +task preCommit() { + for (String runner : preCommitRunners) { + dependsOn runner + "PreCommit" + } +} + +task executeCdapSalesforce (type:JavaExec) { + mainClass = System.getProperty("mainClass") + classpath = sourceSets.main.runtimeClasspath + systemProperties System.getProperties() + args System.getProperty("exec.args", "").split() +} diff --git a/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/CdapSalesforceStreamingToTxt.java b/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/CdapSalesforceStreamingToTxt.java new file mode 100644 index 0000000000000..d35bb40c8388e --- /dev/null +++ b/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/CdapSalesforceStreamingToTxt.java @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.examples.complete.cdap.salesforce; + +import java.util.Map; +import org.apache.beam.examples.complete.cdap.salesforce.options.CdapSalesforceStreamingSourceOptions; +import org.apache.beam.examples.complete.cdap.salesforce.transforms.FormatInputTransform; +import org.apache.beam.examples.complete.cdap.salesforce.utils.PluginConfigOptionsConverter; +import org.apache.beam.sdk.Pipeline; +import org.apache.beam.sdk.PipelineResult; +import org.apache.beam.sdk.coders.KvCoder; +import org.apache.beam.sdk.coders.NullableCoder; +import org.apache.beam.sdk.coders.StringUtf8Coder; +import org.apache.beam.sdk.io.TextIO; +import org.apache.beam.sdk.io.hadoop.WritableCoder; +import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.apache.beam.sdk.transforms.Values; +import org.apache.beam.sdk.transforms.windowing.AfterProcessingTime; +import org.apache.beam.sdk.transforms.windowing.GlobalWindows; +import org.apache.beam.sdk.transforms.windowing.Repeatedly; +import org.apache.beam.sdk.transforms.windowing.Window; +import org.apache.beam.sdk.values.KV; +import org.apache.hadoop.io.NullWritable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The {@link CdapSalesforceStreamingToTxt} pipeline is a streaming pipeline which ingests data in + * JSON format from CDAP Salesforce, and outputs the resulting records to .txt file. Salesforce + * parameters and output .txt file path are specified by the user as template parameters.
+ * + *

Example Usage + * + *

+ * # Gradle preparation
+ *
+ * To run this example your {@code build.gradle} file should contain the following task
+ * to execute the pipeline:
+ * {@code
+ * task executeCdap (type:JavaExec) {
+ *     mainClass = System.getProperty("mainClass")
+ *     classpath = sourceSets.main.runtimeClasspath
+ *     systemProperties System.getProperties()
+ *     args System.getProperty("exec.args", "").split()
+ * }
+ * }
+ *
+ * This task allows to run the pipeline via the following command:
+ * {@code
+ * gradle clean executeCdap -DmainClass=org.apache.beam.examples.complete.cdap.salesforce.CdapSalesforceStreamingToTxt \
+ *      -Dexec.args="--= --="
+ * }
+ *
+ * # Running the pipeline
+ * To execute this pipeline, specify the parameters in the following format:
+ * {@code
+ * --username=your-user-name\
+ * --password=your-password \
+ * --securityToken=your-token \
+ * --consumerKey=your-key \
+ * --consumerSecret=your-secret \
+ * --loginUrl=your-login-url \
+ * --sObjectName=object-name \
+ * --pushTopicName=your-push-topic-name \
+ * --referenceName=your-reference-name \
+ * --outputTxtFilePathPrefix=your-path-to-output-folder-with-filename-prefix \
+ * --pullFrequencySec=1 \
+ * --startOffset=0
+ * }
+ *
+ * By default this will run the pipeline locally with the DirectRunner. To change the runner, specify:
+ * {@code
+ * --runner=YOUR_SELECTED_RUNNER
+ * }
+ * 
+ */ +public class CdapSalesforceStreamingToTxt { + + /* Logger for class.*/ + private static final Logger LOG = LoggerFactory.getLogger(CdapSalesforceStreamingToTxt.class); + + /** + * Main entry point for pipeline execution. + * + * @param args Command line arguments to the pipeline. + */ + public static void main(String[] args) { + CdapSalesforceStreamingSourceOptions options = + PipelineOptionsFactory.fromArgs(args) + .withValidation() + .as(CdapSalesforceStreamingSourceOptions.class); + + // Create the pipeline + Pipeline pipeline = Pipeline.create(options); + run(pipeline, options); + } + + /** + * Runs a pipeline which reads records from CDAP Salesforce and writes them to .txt file. + * + * @param options arguments to the pipeline + */ + public static PipelineResult run( + Pipeline pipeline, CdapSalesforceStreamingSourceOptions options) { + Map paramsMap = + PluginConfigOptionsConverter.salesforceStreamingSourceOptionsToParamsMap(options); + LOG.info("Starting Cdap-Salesforce-streaming-to-txt pipeline with parameters: {}", paramsMap); + + /* + * Steps: + * 1) Read messages in from Cdap Salesforce + * 2) Extract values only + * 3) Write successful records to .txt file + */ + + pipeline + .apply( + "readFromCdapSalesforceStreaming", + FormatInputTransform.readFromCdapSalesforceStreaming( + paramsMap, options.getPullFrequencySec(), options.getStartOffset())) + .setCoder( + KvCoder.of( + NullableCoder.of(WritableCoder.of(NullWritable.class)), StringUtf8Coder.of())) + .apply( + "globalwindow", + Window.>into(new GlobalWindows()) + .triggering(Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane())) + .discardingFiredPanes()) + .apply(Values.create()) + .apply( + "writeToTxt", + TextIO.write() + .withWindowedWrites() + .withNumShards(1) + .to(options.getOutputTxtFilePathPrefix())); + + return pipeline.run(); + } +} diff --git a/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/CdapSalesforceToTxt.java b/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/CdapSalesforceToTxt.java new file mode 100644 index 0000000000000..5584635a00625 --- /dev/null +++ b/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/CdapSalesforceToTxt.java @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.examples.complete.cdap.salesforce; + +import io.cdap.cdap.api.data.schema.Schema; +import java.util.LinkedHashMap; +import java.util.Map; +import org.apache.beam.examples.complete.cdap.salesforce.options.CdapSalesforceSourceOptions; +import org.apache.beam.examples.complete.cdap.salesforce.transforms.FormatInputTransform; +import org.apache.beam.examples.complete.cdap.salesforce.utils.PluginConfigOptionsConverter; +import org.apache.beam.sdk.Pipeline; +import org.apache.beam.sdk.PipelineResult; +import org.apache.beam.sdk.coders.KvCoder; +import org.apache.beam.sdk.coders.SerializableCoder; +import org.apache.beam.sdk.coders.StringUtf8Coder; +import org.apache.beam.sdk.io.TextIO; +import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.apache.beam.sdk.transforms.MapValues; +import org.apache.beam.sdk.transforms.Values; +import org.apache.beam.sdk.values.TypeDescriptors; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The {@link CdapSalesforceToTxt} pipeline is a batch pipeline which ingests data in JSON format + * from CDAP Salesforce, and outputs the resulting records to .txt file. Salesforce parameters and + * output txt file path are specified by the user as template parameters.
+ * + *

Example Usage + * + *

+ * # Gradle preparation
+ *
+ * To run this example your {@code build.gradle} file should contain the following task
+ * to execute the pipeline:
+ * {@code
+ * task executeCdap (type:JavaExec) {
+ *     mainClass = System.getProperty("mainClass")
+ *     classpath = sourceSets.main.runtimeClasspath
+ *     systemProperties System.getProperties()
+ *     args System.getProperty("exec.args", "").split()
+ * }
+ * }
+ *
+ * This task allows to run the pipeline via the following command:
+ * {@code
+ * gradle clean executeCdap -DmainClass=org.apache.beam.examples.complete.cdap.salesforce.CdapSalesforceToTxt \
+ *      -Dexec.args="--= --="
+ * }
+ *
+ * # Running the pipeline
+ * To execute this pipeline, specify the parameters in the following format:
+ * {@code
+ * --username=your-user-name\
+ * --password=your-password \
+ * --securityToken=your-token \
+ * --consumerKey=your-key \
+ * --consumerSecret=your-secret \
+ * --loginUrl=your-login-url \
+ * --sObjectName=object-name \
+ * --referenceName=your-reference-name \
+ * --outputTxtFilePathPrefix=your-path-to-output-folder-with-filename-prefix
+ * }
+ *
+ * By default this will run the pipeline locally with the DirectRunner. To change the runner, specify:
+ * {@code
+ * --runner=YOUR_SELECTED_RUNNER
+ * }
+ * 
+ */ +public class CdapSalesforceToTxt { + + /* Logger for class.*/ + private static final Logger LOG = LoggerFactory.getLogger(CdapSalesforceToTxt.class); + + /** + * Main entry point for pipeline execution. + * + * @param args Command line arguments to the pipeline. + */ + public static void main(String[] args) { + CdapSalesforceSourceOptions options = + PipelineOptionsFactory.fromArgs(args) + .withValidation() + .as(CdapSalesforceSourceOptions.class); + + // Create the pipeline + Pipeline pipeline = Pipeline.create(options); + run(pipeline, options); + } + + /** + * Runs a pipeline which reads records from CDAP Salesforce plugin. + * + * @param options arguments to the pipeline + */ + @SuppressWarnings("rawtypes") + public static PipelineResult run(Pipeline pipeline, CdapSalesforceSourceOptions options) { + Map paramsMap = + PluginConfigOptionsConverter.salesforceBatchSourceOptionsToParamsMap(options); + LOG.info("Starting Cdap-Salesforce pipeline with parameters: {}", paramsMap); + + /* + * Steps: + * 1) Read messages from Cdap Salesforce + * 2) Extract values only + * 3) Write successful records to .txt file + */ + + pipeline + .apply("readFromCdapSalesforce", FormatInputTransform.readFromCdapSalesforce(paramsMap)) + .setCoder( + KvCoder.of( + SerializableCoder.of(Schema.class), SerializableCoder.of(LinkedHashMap.class))) + .apply(MapValues.into(TypeDescriptors.strings()).via(LinkedHashMap::toString)) + .setCoder(KvCoder.of(SerializableCoder.of(Schema.class), StringUtf8Coder.of())) + .apply(Values.create()) + .apply("writeToTxt", TextIO.write().to(options.getOutputTxtFilePathPrefix())); + + return pipeline.run(); + } +} diff --git a/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/README.md b/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/README.md new file mode 100644 index 0000000000000..0088467f8adfd --- /dev/null +++ b/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/README.md @@ -0,0 +1,123 @@ + + +## Gradle preparation + +To run this example your `build.gradle` file should contain the following task to execute the pipeline: + +``` +task executeCdapSalesforce (type:JavaExec) { + mainClass = System.getProperty("mainClass") + classpath = sourceSets.main.runtimeClasspath + systemProperties System.getProperties() + args System.getProperty("exec.args", "").split() +} +``` + +## Running the CdapSalesforceToTxt pipeline example + +Gradle 'executeCdapSalesforce' task allows to run the pipeline via the following command: + +```bash +gradle clean executeCdapSalesforce -DmainClass=org.apache.beam.examples.complete.cdap.salesforce.CdapSalesforceToTxt \ + -Dexec.args="--= --=" +``` + +To execute this pipeline, specify the parameters in the following format: + +```bash + --username=your-user-name\ + --password=your-password \ + --securityToken=your-token \ + --consumerKey=your-key \ + --consumerSecret=your-secret \ + --loginUrl=your-login-url \ + --sObjectName=object-name \ + --referenceName=your-reference-name \ + --outputTxtFilePathPrefix=your-path-to-output-folder-with-filename-prefix +``` + +Please see CDAP [Salesforce Batch Source](https://github.com/data-integrations/servicenow-plugins/blob/develop/docs/ServiceNow-batchsource.md) for more information. + +## Running the TxtToCdapSalesforce pipeline example + +Gradle 'executeCdapSalesforce' task allows to run the pipeline via the following command: + +```bash +gradle clean executeCdapSalesforce -DmainClass=org.apache.beam.examples.complete.cdap.salesforce.TxtToCdapSalesforce \ + -Dexec.args="--= --=" +``` + +To execute this pipeline, specify the parameters in the following format: + +```bash + --username=your-user-name\ + --password=your-password \ + --securityToken=your-token \ + --consumerKey=your-key \ + --consumerSecret=your-secret \ + --loginUrl=your-login-url \ + --sObject=CustomObject__c \ + --referenceName=your-reference-name \ + --inputTxtFilePath=your-path-to-txt-file \ + --operation=Insert \ + --errorHandling=Stop on error \ + --maxRecordsPerBatch=10 \ + --maxBytesPerBatch=9999999 \ + --locksDirPath=your-path +``` +Please see CDAP [Salesforce Batch Sink](https://github.com/data-integrations/salesforce/blob/develop/docs/Salesforce-batchsink.md) for more information. + +## Running the CdapSalesforceStreamingToTxt pipeline example + +Gradle 'executeCdapSalesforce' task allows to run the pipeline via the following command: + +```bash +gradle clean executeCdapSalesforce -DmainClass=org.apache.beam.examples.complete.cdap.salesforce.CdapSalesforceStreamingToTxt \ + -Dexec.args="--= --=" +``` + +`CdapSalesforceStreamingToTxt` pipeline parameters: +- `username` - Salesforce username. +- `password` - Salesforce user password. +- `securityToken` - Salesforce security token. +- `consumerKey` - Salesforce connected app's consumer key. +- `consumerSecret` - Salesforce connected app's consumer secret. +- `loginUrl` - Salesforce endpoint to authenticate to. Example: *'https://MyDomainName.my.salesforce.com/services/oauth2/token'*. +- `sObjectName` - Salesforce object to pull supported by CDAP Salesforce Streaming Source. +- `pushTopicName` - name of the push topic that was created from query for some sObject. This push topic should have enabled *pushTopicNotifyCreate* property. + If push topic with such name doesn't exist, then new push topic for provided **'sObjectName'** will be created automatically. +- `pullFrequencySec` - delay in seconds between polling for new records updates. (Optional) +- `startOffset` - inclusive start offset from which the reading should be started. (Optional) + +Please see [CDAP Salesforce](https://github.com/data-integrations/salesforce) for more information. +Also, please see documentation regarding Salesforce streaming API authorization [here](https://developer.salesforce.com/docs/atlas.en-us.api_streaming.meta/api_streaming/code_sample_auth_oauth.htm). + +To execute this pipeline, specify the parameters in the following format: + +```bash + --username=your-user-name\ + --password=your-password \ + --securityToken=your-token \ + --consumerKey=your-key \ + --consumerSecret=your-secret \ + --loginUrl=your-login-url \ + --sObjectName=object-name \ + --pushTopicName=your-topic-name \ + --pullFrequencySec=100 \ + --startOffset=1000 +``` diff --git a/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/TxtToCdapSalesforce.java b/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/TxtToCdapSalesforce.java new file mode 100644 index 0000000000000..17828f2d23c8b --- /dev/null +++ b/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/TxtToCdapSalesforce.java @@ -0,0 +1,151 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.examples.complete.cdap.salesforce; + +import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull; + +import com.google.gson.Gson; +import io.cdap.plugin.salesforce.plugin.sink.batch.CSVRecord; +import java.util.Map; +import org.apache.beam.examples.complete.cdap.salesforce.options.CdapSalesforceSinkOptions; +import org.apache.beam.examples.complete.cdap.salesforce.transforms.FormatOutputTransform; +import org.apache.beam.examples.complete.cdap.salesforce.utils.CsvRecordCoder; +import org.apache.beam.examples.complete.cdap.salesforce.utils.PluginConfigOptionsConverter; +import org.apache.beam.sdk.Pipeline; +import org.apache.beam.sdk.PipelineResult; +import org.apache.beam.sdk.coders.KvCoder; +import org.apache.beam.sdk.coders.NullableCoder; +import org.apache.beam.sdk.io.TextIO; +import org.apache.beam.sdk.io.hadoop.WritableCoder; +import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.apache.beam.sdk.transforms.MapElements; +import org.apache.beam.sdk.values.KV; +import org.apache.beam.sdk.values.TypeDescriptor; +import org.apache.hadoop.io.NullWritable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The {@link TxtToCdapSalesforce} pipeline is a batch pipeline which ingests data from .txt file, + * and outputs the resulting records to Salesforce. Salesforce parameters and input .txt file path + * are specified by the user as template parameters.
+ * + *

Example Usage + * + *

+ * # Gradle preparation
+ *
+ * To run this example your {@code build.gradle} file should contain the following task
+ * to execute the pipeline:
+ * {@code
+ * task executeCdap (type:JavaExec) {
+ *     mainClass = System.getProperty("mainClass")
+ *     classpath = sourceSets.main.runtimeClasspath
+ *     systemProperties System.getProperties()
+ *     args System.getProperty("exec.args", "").split()
+ * }
+ * }
+ *
+ * This task allows to run the pipeline via the following command:
+ * {@code
+ * gradle clean executeCdap -DmainClass=org.apache.beam.examples.complete.cdap.salesforce.TxtToCdapSalesforce \
+ *      -Dexec.args="--= --="
+ * }
+ *
+ * # Running the pipeline
+ * To execute this pipeline, specify the parameters in the following format:
+ * {@code
+ * --username=your-user-name\
+ * --password=your-password \
+ * --securityToken=your-token \
+ * --consumerKey=your-key \
+ * --consumerSecret=your-secret \
+ * --loginUrl=your-login-url \
+ * --sObject=CustomObject__c \
+ * --referenceName=your-reference-name \
+ * --inputTxtFilePath=your-path-to-input-file \
+ * --maxRecordsPerBatch=10 \
+ * --maxBytesPerBatch=9999999 \
+ * --operation=Insert \
+ * --errorHandling=Stop on error
+ * }
+ *
+ * By default this will run the pipeline locally with the DirectRunner. To change the runner, specify:
+ * {@code
+ * --runner=YOUR_SELECTED_RUNNER
+ * }
+ * 
+ */ +public class TxtToCdapSalesforce { + + private static final Gson GSON = new Gson(); + + /* Logger for class.*/ + private static final Logger LOG = LoggerFactory.getLogger(TxtToCdapSalesforce.class); + + /** + * Main entry point for pipeline execution. + * + * @param args Command line arguments to the pipeline. + */ + public static void main(String[] args) { + CdapSalesforceSinkOptions options = + PipelineOptionsFactory.fromArgs(args).withValidation().as(CdapSalesforceSinkOptions.class); + + checkStateNotNull(options.getLocksDirPath(), "locksDirPath can not be null!"); + + // Create the pipeline + Pipeline pipeline = Pipeline.create(options); + run(pipeline, options); + } + + /** + * Runs a pipeline which reads records from .txt file and writes it to CDAP Salesforce. + * + * @param options arguments to the pipeline + */ + public static PipelineResult run(Pipeline pipeline, CdapSalesforceSinkOptions options) { + Map paramsMap = + PluginConfigOptionsConverter.salesforceBatchSinkOptionsToParamsMap(options); + LOG.info("Starting Txt-to-Cdap-Salesforce pipeline with parameters: {}", paramsMap); + + /* + * Steps: + * 1) Read messages in from .txt file + * 2) Map to KV + * 3) Write successful records to Cdap Salesforce + */ + + pipeline + .apply("readFromTxt", TextIO.read().from(options.getInputTxtFilePath())) + .apply( + MapElements.into(new TypeDescriptor>() {}) + .via( + json -> { + CSVRecord csvRecord = GSON.fromJson(json, CSVRecord.class); + return KV.of(NullWritable.get(), csvRecord); + })) + .setCoder( + KvCoder.of(NullableCoder.of(WritableCoder.of(NullWritable.class)), CsvRecordCoder.of())) + .apply( + "writeToCdapSalesforce", + FormatOutputTransform.writeToCdapSalesforce(paramsMap, options.getLocksDirPath())); + + return pipeline.run(); + } +} diff --git a/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/options/CdapSalesforceOptions.java b/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/options/CdapSalesforceOptions.java new file mode 100644 index 0000000000000..f10c7674255f2 --- /dev/null +++ b/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/options/CdapSalesforceOptions.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.examples.complete.cdap.salesforce.options; + +import io.cdap.plugin.salesforce.SalesforceConstants; +import org.apache.beam.examples.complete.cdap.options.BaseCdapOptions; +import org.apache.beam.sdk.options.Description; +import org.apache.beam.sdk.options.Validation; + +/** + * The {@link CdapSalesforceOptions} interface provides the custom execution options passed by the + * executor at the command-line for example with Cdap Salesfroce plugins. + */ +public interface CdapSalesforceOptions extends BaseCdapOptions { + + @Validation.Required + @Description(SalesforceConstants.PROPERTY_USERNAME) + String getUsername(); + + void setUsername(String username); + + @Validation.Required + @Description(SalesforceConstants.PROPERTY_PASSWORD) + String getPassword(); + + void setPassword(String password); + + @Validation.Required + @Description(SalesforceConstants.PROPERTY_SECURITY_TOKEN) + String getSecurityToken(); + + void setSecurityToken(String securityToken); + + @Validation.Required + @Description(SalesforceConstants.PROPERTY_CONSUMER_KEY) + String getConsumerKey(); + + void setConsumerKey(String consumerKey); + + @Validation.Required + @Description(SalesforceConstants.PROPERTY_CONSUMER_SECRET) + String getConsumerSecret(); + + void setConsumerSecret(String consumerSecret); + + @Validation.Required + @Description(SalesforceConstants.PROPERTY_LOGIN_URL) + String getLoginUrl(); + + void setLoginUrl(String loginUrl); +} diff --git a/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/options/CdapSalesforceSinkOptions.java b/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/options/CdapSalesforceSinkOptions.java new file mode 100644 index 0000000000000..0856fe86fd58b --- /dev/null +++ b/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/options/CdapSalesforceSinkOptions.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.examples.complete.cdap.salesforce.options; + +import org.apache.beam.examples.complete.cdap.salesforce.TxtToCdapSalesforce; +import org.apache.beam.sdk.options.Description; +import org.apache.beam.sdk.options.Validation; + +/** + * The {@link CdapSalesforceSinkOptions} interface provides the custom execution options passed by + * the executor at the command-line for {@link TxtToCdapSalesforce} example. + */ +public interface CdapSalesforceSinkOptions extends CdapSalesforceOptions { + + @Validation.Required + @Description( + "Strategy used to handle erroneous records.\n" + + "SKIP - Ignores erroneous records.\n" + + "STOP - Fails pipeline due to erroneous record.") + String getErrorHandling(); + + void setErrorHandling(String errorHandling); + + @Validation.Required + @Description( + "Maximum size in bytes of a batch of records when writing to Salesforce. " + + "This value cannot be greater than 10,000,000.") + String getMaxBytesPerBatch(); + + void setMaxBytesPerBatch(String maxBytesPerBatch); + + @Validation.Required + @Description( + "Maximum number of records to include in a batch when writing to Salesforce." + + "This value cannot be greater than 10,000.") + String getMaxRecordsPerBatch(); + + void setMaxRecordsPerBatch(String maxRecordsPerBatch); + + @Validation.Required + @Description( + "Operation used for sinking data into Salesforce.\n" + + "Insert - adds records.\n" + + "Upsert - upserts the records. Salesforce will decide if sObjects " + + "are the same using external id field.\n" + + "Update - updates existing records based on Id field.") + String getOperation(); + + void setOperation(String operation); + + @Validation.Required + @Description("Salesforce object name to insert records into.") + String getsObject(); + + void setsObject(String sObject); + + @Validation.Required + @Description( + "Locks directory path where locks will be stored." + + "This parameter is needed for Hadoop External Synchronization" + + "(mechanism for acquiring locks related to the write job).") + String getLocksDirPath(); + + void setLocksDirPath(String locksDirPath); + + @Validation.Required + @Description("Input .txt file path with Salesforce records.") + String getInputTxtFilePath(); + + void setInputTxtFilePath(String inputTxtFilePath); +} diff --git a/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/options/CdapSalesforceSourceOptions.java b/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/options/CdapSalesforceSourceOptions.java new file mode 100644 index 0000000000000..5f10c5c0223a5 --- /dev/null +++ b/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/options/CdapSalesforceSourceOptions.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.examples.complete.cdap.salesforce.options; + +import io.cdap.plugin.salesforce.plugin.source.batch.util.SalesforceSourceConstants; +import org.apache.beam.examples.complete.cdap.salesforce.CdapSalesforceToTxt; +import org.apache.beam.sdk.options.Description; +import org.apache.beam.sdk.options.Validation; + +/** + * The {@link CdapSalesforceSourceOptions} interface provides the custom execution options passed by + * the executor at the command-line for {@link CdapSalesforceToTxt} example. + */ +public interface CdapSalesforceSourceOptions extends CdapSalesforceOptions { + + @Validation.Required + @Description(SalesforceSourceConstants.PROPERTY_SOBJECT_NAME) + String getSObjectName(); + + void setSObjectName(String sObjectName); + + @Validation.Required + @Description( + "Path to output folder with filename prefix." + + "It will write a set of .txt files with names like {prefix}-###.") + String getOutputTxtFilePathPrefix(); + + void setOutputTxtFilePathPrefix(String outputTxtFilePathPrefix); +} diff --git a/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/options/CdapSalesforceStreamingSourceOptions.java b/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/options/CdapSalesforceStreamingSourceOptions.java new file mode 100644 index 0000000000000..46b5d5dc94467 --- /dev/null +++ b/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/options/CdapSalesforceStreamingSourceOptions.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.examples.complete.cdap.salesforce.options; + +import org.apache.beam.examples.complete.cdap.salesforce.CdapSalesforceStreamingToTxt; +import org.apache.beam.sdk.options.Description; +import org.apache.beam.sdk.options.Validation; + +/** + * The {@link CdapSalesforceStreamingSourceOptions} interface provides the custom execution options + * passed by the executor at the command-line for {@link CdapSalesforceStreamingToTxt} example. + */ +public interface CdapSalesforceStreamingSourceOptions extends CdapSalesforceSourceOptions { + + @Validation.Required + @Description("Salesforce push topic name. Plugin will track updates from this topic.") + String getPushTopicName(); + + void setPushTopicName(String pushTopicName); + + @Description("Delay in seconds between polling for new records updates.") + Long getPullFrequencySec(); + + void setPullFrequencySec(Long pullFrequencySec); + + @Description("Inclusive start offset from which the reading should be started.") + Long getStartOffset(); + + void setStartOffset(Long startOffset); +} diff --git a/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/options/package-info.java b/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/options/package-info.java new file mode 100644 index 0000000000000..ae1a86cd4df58 --- /dev/null +++ b/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/options/package-info.java @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** Cdap Plugins template. */ +package org.apache.beam.examples.complete.cdap.salesforce.options; diff --git a/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/package-info.java b/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/package-info.java new file mode 100644 index 0000000000000..6e124ed9c2349 --- /dev/null +++ b/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/package-info.java @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** Cdap Plugins template. */ +package org.apache.beam.examples.complete.cdap.salesforce; diff --git a/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/transforms/FormatInputTransform.java b/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/transforms/FormatInputTransform.java new file mode 100644 index 0000000000000..049d9c4826430 --- /dev/null +++ b/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/transforms/FormatInputTransform.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.examples.complete.cdap.salesforce.transforms; + +import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull; + +import io.cdap.cdap.api.data.schema.Schema; +import io.cdap.plugin.salesforce.plugin.source.batch.SalesforceBatchSource; +import io.cdap.plugin.salesforce.plugin.source.batch.SalesforceSourceConfig; +import io.cdap.plugin.salesforce.plugin.source.streaming.SalesforceReceiver; +import io.cdap.plugin.salesforce.plugin.source.streaming.SalesforceStreamingSource; +import io.cdap.plugin.salesforce.plugin.source.streaming.SalesforceStreamingSourceConfig; +import java.util.LinkedHashMap; +import java.util.Map; +import org.apache.beam.examples.complete.cdap.salesforce.utils.GetOffsetUtils; +import org.apache.beam.sdk.io.cdap.CdapIO; +import org.apache.beam.sdk.io.cdap.ConfigWrapper; +import org.apache.beam.sdk.io.cdap.Plugin; +import org.apache.hadoop.io.NullWritable; + +/** Different input transformations over the processed data in the pipeline. */ +public class FormatInputTransform { + + /** + * Configures Cdap Salesforce Read transform. + * + * @param pluginConfigParams Cdap Salesforce plugin config parameters + * @return configured Read transform + */ + @SuppressWarnings("rawtypes") + public static CdapIO.Read readFromCdapSalesforce( + Map pluginConfigParams) { + + final SalesforceSourceConfig pluginConfig = + new ConfigWrapper<>(SalesforceSourceConfig.class).withParams(pluginConfigParams).build(); + + checkStateNotNull(pluginConfig, "Plugin config can't be null."); + + return CdapIO.read() + .withCdapPluginClass(SalesforceBatchSource.class) + .withPluginConfig(pluginConfig) + .withKeyClass(Schema.class) + .withValueClass(LinkedHashMap.class); + } + + /** + * Configures Cdap Salesforce Streaming Read transform. + * + * @param pluginConfigParams Cdap Hubspot plugin config parameters + * @param pullFrequencySec Delay in seconds between polling for new records updates + * @param startOffset Inclusive start offset from which the reading should be started + * @return configured Read transform + */ + public static CdapIO.Read readFromCdapSalesforceStreaming( + Map pluginConfigParams, Long pullFrequencySec, Long startOffset) { + + final SalesforceStreamingSourceConfig pluginConfig = + new ConfigWrapper<>(SalesforceStreamingSourceConfig.class) + .withParams(pluginConfigParams) + .build(); + checkStateNotNull(pluginConfig, "Plugin config can't be null."); + + pluginConfig.ensurePushTopicExistAndWithCorrectFields(); + + CdapIO.Read read = + CdapIO.read() + .withCdapPlugin( + Plugin.createStreaming( + SalesforceStreamingSource.class, + GetOffsetUtils.getOffsetFnForCdapPlugin(SalesforceStreamingSource.class), + SalesforceReceiver.class, + config -> { + SalesforceStreamingSourceConfig salesforceConfig = + (SalesforceStreamingSourceConfig) config; + return new Object[] { + salesforceConfig.getAuthenticatorCredentials(), + salesforceConfig.getPushTopicName() + }; + })) + .withPluginConfig(pluginConfig) + .withKeyClass(NullWritable.class) + .withValueClass(String.class); + if (pullFrequencySec != null) { + read = read.withPullFrequencySec(pullFrequencySec); + } + if (startOffset != null) { + read = read.withStartOffset(startOffset); + } + return read; + } +} diff --git a/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/transforms/FormatOutputTransform.java b/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/transforms/FormatOutputTransform.java new file mode 100644 index 0000000000000..e71db6a8c911f --- /dev/null +++ b/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/transforms/FormatOutputTransform.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.examples.complete.cdap.salesforce.transforms; + +import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull; + +import io.cdap.cdap.api.plugin.PluginConfig; +import io.cdap.plugin.salesforce.plugin.sink.batch.CSVRecord; +import io.cdap.plugin.salesforce.plugin.sink.batch.SalesforceBatchSink; +import io.cdap.plugin.salesforce.plugin.sink.batch.SalesforceSinkConfig; +import java.util.Map; +import org.apache.beam.sdk.io.cdap.CdapIO; +import org.apache.beam.sdk.io.cdap.ConfigWrapper; +import org.apache.hadoop.io.NullWritable; + +/** Different output transformations over the processed data in the pipeline. */ +public class FormatOutputTransform { + + /** + * Configures Cdap Salesforce Write transform. + * + * @param pluginConfigParams Cdap Salesforce plugin config parameters + * @return configured Write transform to Cdap Salesforce + */ + public static CdapIO.Write writeToCdapSalesforce( + Map pluginConfigParams, String locksDirPath) { + final PluginConfig pluginConfig = + new ConfigWrapper<>(SalesforceSinkConfig.class).withParams(pluginConfigParams).build(); + + checkStateNotNull(pluginConfig, "Plugin config can't be null."); + + return CdapIO.write() + .withCdapPluginClass(SalesforceBatchSink.class) + .withPluginConfig(pluginConfig) + .withKeyClass(NullWritable.class) + .withValueClass(CSVRecord.class) + .withLocksDirPath(locksDirPath); + } +} diff --git a/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/transforms/package-info.java b/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/transforms/package-info.java new file mode 100644 index 0000000000000..ab0f33bff4c17 --- /dev/null +++ b/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/transforms/package-info.java @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** Cdap Plugins template. */ +package org.apache.beam.examples.complete.cdap.salesforce.transforms; diff --git a/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/utils/CsvRecordCoder.java b/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/utils/CsvRecordCoder.java new file mode 100644 index 0000000000000..c14e9cd7c7aa9 --- /dev/null +++ b/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/utils/CsvRecordCoder.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.examples.complete.cdap.salesforce.utils; + +import com.google.gson.Gson; +import io.cdap.plugin.salesforce.plugin.sink.batch.CSVRecord; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import org.apache.beam.sdk.coders.CustomCoder; +import org.apache.beam.sdk.coders.StringUtf8Coder; + +/** Custom coder for {@link CSVRecord}. */ +public class CsvRecordCoder extends CustomCoder { + + private static final Gson GSON = new Gson(); + private static final CsvRecordCoder CODER = new CsvRecordCoder(); + private static final StringUtf8Coder STRING_CODER = StringUtf8Coder.of(); + + public static CsvRecordCoder of() { + return CODER; + } + + @Override + public void encode(CSVRecord value, OutputStream outStream) throws IOException { + STRING_CODER.encode(GSON.toJson(value), outStream); + } + + @Override + public CSVRecord decode(InputStream inStream) throws IOException { + return GSON.fromJson(STRING_CODER.decode(inStream), CSVRecord.class); + } +} diff --git a/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/utils/GetOffsetUtils.java b/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/utils/GetOffsetUtils.java new file mode 100644 index 0000000000000..1f6b7a280d3ae --- /dev/null +++ b/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/utils/GetOffsetUtils.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.examples.complete.cdap.salesforce.utils; + +import static org.apache.beam.sdk.util.Preconditions.checkArgumentNotNull; + +import com.google.gson.Gson; +import com.google.gson.internal.LinkedTreeMap; +import io.cdap.plugin.salesforce.plugin.source.streaming.SalesforceStreamingSource; +import java.util.HashMap; +import org.apache.beam.sdk.io.cdap.Plugin; +import org.apache.beam.sdk.transforms.SerializableFunction; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.reflect.TypeToken; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Class for getting a {@link SerializableFunction} that defines how to get record offset for + * different CDAP {@link Plugin} classes. + */ +public class GetOffsetUtils { + + private static final Logger LOG = LoggerFactory.getLogger(GetOffsetUtils.class); + private static final Gson GSON = new Gson(); + + private static final String SALESFORCE_EVENT = "event"; + private static final String SALESFORCE_REPLAY_ID = "replayId"; + + /** + * Function for getting offset for given streaming Cdap {@link + * io.cdap.cdap.api.annotation.Plugin}. + */ + public static SerializableFunction getOffsetFnForCdapPlugin(Class pluginClass) { + if (SalesforceStreamingSource.class.equals(pluginClass)) { + return getOffsetFnForSalesforce(); + } + throw new UnsupportedOperationException( + String.format("Given plugin class '%s' is not supported!", pluginClass.getName())); + } + + /** + * Function for getting offset for Salesforce record that has {@link #SALESFORCE_REPLAY_ID} field. + */ + @SuppressWarnings({"rawtypes"}) + private static SerializableFunction getOffsetFnForSalesforce() { + return input -> { + if (input != null) { + try { + HashMap json = + GSON.fromJson(input, new TypeToken>() {}.getType()); + checkArgumentNotNull(json, "Can not get JSON from Salesforce input string"); + LinkedTreeMap fieldMap = (LinkedTreeMap) json.get(SALESFORCE_EVENT); + if (fieldMap != null) { + Object id = fieldMap.get(SALESFORCE_REPLAY_ID); + checkArgumentNotNull(id, "Can not get Replay ID from Salesforce input string"); + return ((Double) id).longValue(); + } + } catch (Exception e) { + LOG.error("Can not get offset from json", e); + } + } + return 0L; + }; + } +} diff --git a/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/utils/PluginConfigOptionsConverter.java b/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/utils/PluginConfigOptionsConverter.java new file mode 100644 index 0000000000000..2063a803c5309 --- /dev/null +++ b/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/utils/PluginConfigOptionsConverter.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.examples.complete.cdap.salesforce.utils; + +import io.cdap.plugin.common.Constants; +import io.cdap.plugin.salesforce.SalesforceConstants; +import io.cdap.plugin.salesforce.plugin.sink.batch.ErrorHandling; +import io.cdap.plugin.salesforce.plugin.sink.batch.SalesforceSinkConfig; +import io.cdap.plugin.salesforce.plugin.source.batch.util.SalesforceSourceConstants; +import java.util.Map; +import org.apache.beam.examples.complete.cdap.salesforce.options.CdapSalesforceSinkOptions; +import org.apache.beam.examples.complete.cdap.salesforce.options.CdapSalesforceSourceOptions; +import org.apache.beam.examples.complete.cdap.salesforce.options.CdapSalesforceStreamingSourceOptions; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap; + +/** + * Class for converting CDAP {@link org.apache.beam.sdk.options.PipelineOptions} to map for {@link + * org.apache.beam.sdk.io.cdap.ConfigWrapper}. + */ +public class PluginConfigOptionsConverter { + + private static final String SALESFORCE_STREAMING_PUSH_TOPIC_NAME = "pushTopicName"; + private static final String SALESFORCE_PUSH_TOPIC_NOTIFY_CREATE = "pushTopicNotifyCreate"; + private static final String SALESFORCE_PUSH_TOPIC_NOTIFY_UPDATE = "pushTopicNotifyUpdate"; + private static final String SALESFORCE_PUSH_TOPIC_NOTIFY_DELETE = "pushTopicNotifyDelete"; + private static final String SALESFORCE_PUSH_TOPIC_NOTIFY_FOR_FIELDS = "pushTopicNotifyForFields"; + private static final String SALESFORCE_REFERENCED_NOTIFY_FOR_FIELDS = "Referenced"; + private static final String SALESFORCE_ENABLED_NOTIFY = "Enabled"; + + /** Returns map of parameters for Cdap Salesforce streaming source plugin. */ + public static Map salesforceStreamingSourceOptionsToParamsMap( + CdapSalesforceStreamingSourceOptions options) { + return ImmutableMap.builder() + .put(Constants.Reference.REFERENCE_NAME, options.getReferenceName()) + .put(SALESFORCE_STREAMING_PUSH_TOPIC_NAME, options.getPushTopicName()) + .put(SalesforceConstants.PROPERTY_USERNAME, options.getUsername()) + .put(SalesforceConstants.PROPERTY_PASSWORD, options.getPassword()) + .put(SalesforceConstants.PROPERTY_SECURITY_TOKEN, options.getSecurityToken()) + .put(SalesforceConstants.PROPERTY_CONSUMER_KEY, options.getConsumerKey()) + .put(SalesforceConstants.PROPERTY_CONSUMER_SECRET, options.getConsumerSecret()) + .put(SalesforceConstants.PROPERTY_LOGIN_URL, options.getLoginUrl()) + .put(SalesforceSourceConstants.PROPERTY_SOBJECT_NAME, options.getSObjectName()) + .put(SALESFORCE_PUSH_TOPIC_NOTIFY_CREATE, SALESFORCE_ENABLED_NOTIFY) + .put(SALESFORCE_PUSH_TOPIC_NOTIFY_UPDATE, SALESFORCE_ENABLED_NOTIFY) + .put(SALESFORCE_PUSH_TOPIC_NOTIFY_DELETE, SALESFORCE_ENABLED_NOTIFY) + .put(SALESFORCE_PUSH_TOPIC_NOTIFY_FOR_FIELDS, SALESFORCE_REFERENCED_NOTIFY_FOR_FIELDS) + .build(); + } + + /** Returns map of parameters for Cdap Salesforce batch source plugin. */ + public static Map salesforceBatchSourceOptionsToParamsMap( + CdapSalesforceSourceOptions options) { + return ImmutableMap.builder() + .put(Constants.Reference.REFERENCE_NAME, options.getReferenceName()) + .put(SalesforceConstants.PROPERTY_USERNAME, options.getUsername()) + .put(SalesforceConstants.PROPERTY_PASSWORD, options.getPassword()) + .put(SalesforceConstants.PROPERTY_SECURITY_TOKEN, options.getSecurityToken()) + .put(SalesforceConstants.PROPERTY_CONSUMER_KEY, options.getConsumerKey()) + .put(SalesforceConstants.PROPERTY_CONSUMER_SECRET, options.getConsumerSecret()) + .put(SalesforceConstants.PROPERTY_LOGIN_URL, options.getLoginUrl()) + .put(SalesforceSourceConstants.PROPERTY_SOBJECT_NAME, options.getSObjectName()) + .build(); + } + + /** Returns map of parameters for Cdap Salesforce batch sink plugin. */ + public static Map salesforceBatchSinkOptionsToParamsMap( + CdapSalesforceSinkOptions options) { + return ImmutableMap.builder() + .put(Constants.Reference.REFERENCE_NAME, options.getReferenceName()) + .put(SalesforceConstants.PROPERTY_USERNAME, options.getUsername()) + .put(SalesforceConstants.PROPERTY_PASSWORD, options.getPassword()) + .put(SalesforceConstants.PROPERTY_SECURITY_TOKEN, options.getSecurityToken()) + .put(SalesforceConstants.PROPERTY_CONSUMER_KEY, options.getConsumerKey()) + .put(SalesforceConstants.PROPERTY_CONSUMER_SECRET, options.getConsumerSecret()) + .put(SalesforceConstants.PROPERTY_LOGIN_URL, options.getLoginUrl()) + .put(SalesforceSinkConfig.PROPERTY_SOBJECT, options.getsObject()) + .put(SalesforceSinkConfig.PROPERTY_OPERATION, options.getOperation()) + .put( + SalesforceSinkConfig.PROPERTY_ERROR_HANDLING, + ErrorHandling.valueOf(options.getErrorHandling()).getValue()) + .put(SalesforceSinkConfig.PROPERTY_MAX_BYTES_PER_BATCH, options.getMaxBytesPerBatch()) + .put(SalesforceSinkConfig.PROPERTY_MAX_RECORDS_PER_BATCH, options.getMaxRecordsPerBatch()) + .build(); + } +} diff --git a/sdks/typescript/src/apache_beam/version.ts b/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/utils/package-info.java similarity index 71% rename from sdks/typescript/src/apache_beam/version.ts rename to examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/utils/package-info.java index 3fe1d1a447468..7dbf1da2ad4b8 100644 --- a/sdks/typescript/src/apache_beam/version.ts +++ b/examples/java/cdap/salesforce/src/main/java/org/apache/beam/examples/complete/cdap/salesforce/utils/package-info.java @@ -16,11 +16,5 @@ * limitations under the License. */ -const fs = require("fs"); -const path = require("path"); - -// TODO: (Typescript) Is there a more standard way to do this? -// The require below doesn't work when installed outside the source tree -// (e.g. in the docker container). -//export const { version } = require("../../../package.json"); -export const version = "0.38.0"; +/** Cdap Plugins template. */ +package org.apache.beam.examples.complete.cdap.salesforce.utils; diff --git a/examples/java/cdap/salesforce/src/test/resources/example-txt-to-cdap-salesforce-custom.txt b/examples/java/cdap/salesforce/src/test/resources/example-txt-to-cdap-salesforce-custom.txt new file mode 100644 index 0000000000000..30951f86d4dbd --- /dev/null +++ b/examples/java/cdap/salesforce/src/test/resources/example-txt-to-cdap-salesforce-custom.txt @@ -0,0 +1,20 @@ +{"columnNames":["Name"],"values":["Name 1"]} +{"columnNames":["Name"],"values":["Name 2"]} +{"columnNames":["Name"],"values":["Name 3"]} +{"columnNames":["Name"],"values":["Name 4"]} +{"columnNames":["Name"],"values":["Name 5"]} +{"columnNames":["Name"],"values":["Name 6"]} +{"columnNames":["Name"],"values":["Name 7"]} +{"columnNames":["Name"],"values":["Name 8"]} +{"columnNames":["Name"],"values":["Name 9"]} +{"columnNames":["Name"],"values":["Name 10"]} +{"columnNames":["Name"],"values":["Name 11"]} +{"columnNames":["Name"],"values":["Name 12"]} +{"columnNames":["Name"],"values":["Name 13"]} +{"columnNames":["Name"],"values":["Name 14"]} +{"columnNames":["Name"],"values":["Name 15"]} +{"columnNames":["Name"],"values":["Name 16"]} +{"columnNames":["Name"],"values":["Name 17"]} +{"columnNames":["Name"],"values":["Name 18"]} +{"columnNames":["Name"],"values":["Name 19"]} +{"columnNames":["Name"],"values":["Name 20"]} \ No newline at end of file diff --git a/examples/java/cdap/servicenow/build.gradle b/examples/java/cdap/servicenow/build.gradle index 9147722e1e2f6..e3219e8afaea5 100644 --- a/examples/java/cdap/servicenow/build.gradle +++ b/examples/java/cdap/servicenow/build.gradle @@ -98,7 +98,7 @@ task preCommit() { } } -task executeCdap (type:JavaExec) { +task executeCdapServiceNow (type:JavaExec) { mainClass = System.getProperty("mainClass") classpath = sourceSets.main.runtimeClasspath systemProperties System.getProperties() diff --git a/examples/java/cdap/servicenow/src/main/java/org/apache/beam/examples/complete/cdap/servicenow/README.md b/examples/java/cdap/servicenow/src/main/java/org/apache/beam/examples/complete/cdap/servicenow/README.md index 1bca97c1bc311..6af97400bf2c9 100644 --- a/examples/java/cdap/servicenow/src/main/java/org/apache/beam/examples/complete/cdap/servicenow/README.md +++ b/examples/java/cdap/servicenow/src/main/java/org/apache/beam/examples/complete/cdap/servicenow/README.md @@ -20,7 +20,7 @@ To run this example your `build.gradle` file should contain the following task to execute the pipeline: ``` -task executeCdap (type:JavaExec) { +task executeCdapServiceNow (type:JavaExec) { mainClass = System.getProperty("mainClass") classpath = sourceSets.main.runtimeClasspath systemProperties System.getProperties() @@ -30,10 +30,10 @@ task executeCdap (type:JavaExec) { ## Running the CdapServiceNowToTxt pipeline example -Gradle 'executeCdap' task allows to run the pipeline via the following command: +Gradle 'executeCdapServiceNow' task allows to run the pipeline via the following command: ```bash -gradle clean executeCdap -DmainClass=org.apache.beam.examples.complete.cdap.servicenow.CdapServiceNowToTxt \ +gradle clean executeCdapServiceNow -DmainClass=org.apache.beam.examples.complete.cdap.servicenow.CdapServiceNowToTxt \ -Dexec.args="--= --=" ``` diff --git a/examples/java/cdap/zendesk/build.gradle b/examples/java/cdap/zendesk/build.gradle index 277d0761fc3ca..cb7309d8b6f99 100644 --- a/examples/java/cdap/zendesk/build.gradle +++ b/examples/java/cdap/zendesk/build.gradle @@ -98,7 +98,7 @@ task preCommit() { } } -task executeCdap (type:JavaExec) { +task executeCdapZendesk (type:JavaExec) { mainClass = System.getProperty("mainClass") classpath = sourceSets.main.runtimeClasspath systemProperties System.getProperties() diff --git a/examples/java/cdap/zendesk/src/main/java/org/apache/beam/examples/complete/cdap/zendesk/README.md b/examples/java/cdap/zendesk/src/main/java/org/apache/beam/examples/complete/cdap/zendesk/README.md index e9b90f206bf51..4a5218fdded46 100644 --- a/examples/java/cdap/zendesk/src/main/java/org/apache/beam/examples/complete/cdap/zendesk/README.md +++ b/examples/java/cdap/zendesk/src/main/java/org/apache/beam/examples/complete/cdap/zendesk/README.md @@ -20,7 +20,7 @@ To run this example your `build.gradle` file should contain the following task to execute the pipeline: ``` -task executeCdap (type:JavaExec) { +task executeCdapZendesk (type:JavaExec) { mainClass = System.getProperty("mainClass") classpath = sourceSets.main.runtimeClasspath systemProperties System.getProperties() @@ -30,10 +30,10 @@ task executeCdap (type:JavaExec) { ## Running the CdapZendeskToTxt pipeline example -Gradle 'executeCdap' task allows to run the pipeline via the following command: +Gradle 'executeCdapZendesk' task allows to run the pipeline via the following command: ```bash -gradle clean executeCdap -DmainClass=org.apache.beam.examples.complete.cdap.zendesk.CdapZendeskToTxt \ +gradle clean executeCdapZendesk -DmainClass=org.apache.beam.examples.complete.cdap.zendesk.CdapZendeskToTxt \ -Dexec.args="--= --=" ``` diff --git a/examples/notebooks/beam-ml/custom_remote_inference.ipynb b/examples/notebooks/beam-ml/custom_remote_inference.ipynb index ad25849e89edc..20051f528b585 100644 --- a/examples/notebooks/beam-ml/custom_remote_inference.ipynb +++ b/examples/notebooks/beam-ml/custom_remote_inference.ipynb @@ -37,6 +37,22 @@ "source": [ "# Remote inference in Apache Beam\n", "\n", + "\n", + " \n", + " \n", + "
\n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + "
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GNbarEZsalS2" + }, + "source": [ "This example demonstrates how to implement a custom inference call in Apache Beam using the Google Cloud Vision API.\n", "\n", "The prefered way to run inference in Apache Beam is by using the [RunInference API](https://beam.apache.org/documentation/sdks/python-machine-learning/). \n", diff --git a/examples/notebooks/beam-ml/dataframe_api_preprocessing.ipynb b/examples/notebooks/beam-ml/dataframe_api_preprocessing.ipynb index ec4e9a05ed559..a488caf7d3acc 100644 --- a/examples/notebooks/beam-ml/dataframe_api_preprocessing.ipynb +++ b/examples/notebooks/beam-ml/dataframe_api_preprocessing.ipynb @@ -36,6 +36,22 @@ "\n", "[Pandas DataFrames](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html) is one of the most common tools used for data exploration and preprocessing. Pandas is popular because of its ease of use. It has intuitive methods to perform common analytical tasks and data preprocessing. \n", "\n", + "\n", + " \n", + " \n", + "
\n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + "
\n" + ], + "metadata": { + "id": "A8xNRyZMW1yK" + } + }, + { + "cell_type": "markdown", + "source": [ "For rapid execution, Pandas loads all of the data into memory on a single machine (one node). This configuration works well when dealing with small-scale datasets. However, many projects involve datasets that are too big to fit in memory. These use cases generally require parallel data processing frameworks, such as Apache Beam.\n", "\n", "Beam DataFrames provide a Pandas-like\n", diff --git a/examples/notebooks/beam-ml/run_custom_inference.ipynb b/examples/notebooks/beam-ml/run_custom_inference.ipynb index c45405204d222..e3f0354dd8cc5 100644 --- a/examples/notebooks/beam-ml/run_custom_inference.ipynb +++ b/examples/notebooks/beam-ml/run_custom_inference.ipynb @@ -39,6 +39,22 @@ "source": [ "# Bring your own machine learning (ML) model to Beam RunInference\n", "\n", + "\n", + " \n", + " \n", + "
\n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + "
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "A8xNRyZMW1yK" + }, + "source": [ "This notebook demonstrates how to run inference on your custom framework using the\n", "[ModelHandler](https://beam.apache.org/releases/pydoc/current/apache_beam.ml.inference.base.html#apache_beam.ml.inference.base.ModelHandler) class.\n", "\n", diff --git a/examples/notebooks/beam-ml/run_inference_multi_model.ipynb b/examples/notebooks/beam-ml/run_inference_multi_model.ipynb index 3566f10a19238..7eddf95920d7b 100644 --- a/examples/notebooks/beam-ml/run_inference_multi_model.ipynb +++ b/examples/notebooks/beam-ml/run_inference_multi_model.ipynb @@ -47,7 +47,16 @@ { "cell_type": "markdown", "source": [ - "# Ensemble model using an image captioning and ranking example" + "# Ensemble model using an image captioning and ranking example", + "\n", + "\n", + " \n", + " \n", + "
\n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + "
" ], "metadata": { "id": "gPCMXWgOMt_0" diff --git a/examples/notebooks/beam-ml/run_inference_pytorch.ipynb b/examples/notebooks/beam-ml/run_inference_pytorch.ipynb index d0a350982f4e3..e52dfed67c228 100644 --- a/examples/notebooks/beam-ml/run_inference_pytorch.ipynb +++ b/examples/notebooks/beam-ml/run_inference_pytorch.ipynb @@ -51,6 +51,23 @@ }, "source": [ "# Apache Beam RunInference for PyTorch\n", + "\n", + "\n", + " \n", + " \n", + "
\n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + "
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "A8xNRyZMW1yK" + }, + "source": [ "This notebook demonstrates the use of the RunInference transform for PyTorch. Apache Beam includes implementations of the [ModelHandler](https://beam.apache.org/releases/pydoc/current/apache_beam.ml.inference.base.html#apache_beam.ml.inference.base.ModelHandler) class for [users of PyTorch](https://beam.apache.org/releases/pydoc/current/apache_beam.ml.inference.pytorch_inference.html). For more information about the RunInference API, see [Machine Learning](https://beam.apache.org/documentation/sdks/python-machine-learning) in the Apache Beam documentation.\n", "\n", "\n", diff --git a/examples/notebooks/beam-ml/run_inference_pytorch_tensorflow_sklearn.ipynb b/examples/notebooks/beam-ml/run_inference_pytorch_tensorflow_sklearn.ipynb index 60f79d63a5bb3..2ec05801e9bf9 100644 --- a/examples/notebooks/beam-ml/run_inference_pytorch_tensorflow_sklearn.ipynb +++ b/examples/notebooks/beam-ml/run_inference_pytorch_tensorflow_sklearn.ipynb @@ -51,6 +51,23 @@ }, "source": [ "# Use RunInference in Apache Beam\n", + "\n", + "\n", + " \n", + " \n", + "
\n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + "
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "A8xNRyZMW1yK" + }, + "source": [ "You can use Apache Beam versions 2.40.0 and later with the [RunInference API](https://beam.apache.org/releases/pydoc/current/apache_beam.ml.inference.base.html#apache_beam.ml.inference.base.RunInference) for local and remote inference with batch and streaming pipelines.\n", "The RunInference API leverages Apache Beam concepts, such as the `BatchElements` transform and the `Shared` class, to support models in your pipelines that create transforms optimized for machine learning inference.\n", "\n", diff --git a/examples/notebooks/beam-ml/run_inference_sklearn.ipynb b/examples/notebooks/beam-ml/run_inference_sklearn.ipynb index c9e151750a348..e7564cdd561d4 100644 --- a/examples/notebooks/beam-ml/run_inference_sklearn.ipynb +++ b/examples/notebooks/beam-ml/run_inference_sklearn.ipynb @@ -51,6 +51,23 @@ }, "source": [ "# Apache Beam RunInference for scikit-learn\n", + "\n", + "\n", + " \n", + " \n", + "
\n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + "
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "A8xNRyZMW1yK" + }, + "source": [ "This notebook demonstrates the use of the RunInference transform for [scikit-learn](https://scikit-learn.org/), also called sklearn.\n", "Apache Beam [RunInference](https://beam.apache.org/releases/pydoc/current/apache_beam.ml.inference.base.html#apache_beam.ml.inference.base.RunInference) has implementations of the [ModelHandler](https://beam.apache.org/releases/pydoc/current/apache_beam.ml.inference.base.html#apache_beam.ml.inference.base.ModelHandler) class prebuilt for scikit-learn. For more information about the RunInference API, see [Machine Learning](https://beam.apache.org/documentation/sdks/python-machine-learning) in the Apache Beam documentation.\n", "\n", diff --git a/examples/notebooks/beam-ml/run_inference_tensorflow.ipynb b/examples/notebooks/beam-ml/run_inference_tensorflow.ipynb index 81e3bd38cac60..8a00ce47b74de 100644 --- a/examples/notebooks/beam-ml/run_inference_tensorflow.ipynb +++ b/examples/notebooks/beam-ml/run_inference_tensorflow.ipynb @@ -47,8 +47,25 @@ }, { "cell_type": "markdown", + "metadata": { + "id": "A8xNRyZMW1yK" + }, "source": [ "# Apache Beam RunInference with TensorFlow\n", + "\n", + "\n", + " \n", + " \n", + "
\n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + "
\n" + ] + }, + { + "cell_type": "markdown", + "source": [ "This notebook demonstrates the use of the RunInference transform for [TensorFlow](https://www.tensorflow.org/).\n", "Beam [RunInference](https://beam.apache.org/releases/pydoc/current/apache_beam.ml.inference.base.html#apache_beam.ml.inference.base.RunInference) accepts a ModelHandler generated from [`tfx-bsl`](https://github.com/tensorflow/tfx-bsl) using `CreateModelHandler`.\n", "\n", @@ -595,4 +612,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/examples/notebooks/beam-ml/run_inference_tensorflow_hub.ipynb b/examples/notebooks/beam-ml/run_inference_tensorflow_hub.ipynb index 7d1f04f47c0ac..855de3af32aa9 100644 --- a/examples/notebooks/beam-ml/run_inference_tensorflow_hub.ipynb +++ b/examples/notebooks/beam-ml/run_inference_tensorflow_hub.ipynb @@ -38,6 +38,23 @@ "source": [ "# RunInference with Sentence-T5 (ST5) model\n", "\n", + "\n", + " \n", + " \n", + "
\n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + "
\n" + ], + "id": "3ac8fc4a-a0ef-47b9-bd80-10801eebe13e" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3402ecc9-28d6-4226-99b1-147a2d23b7a0" + }, + "source": [ "This example demonstrates the use of the RunInference transform with the pre-trained [ST5 text encoder model](https://tfhub.dev/google/sentence-t5/st5-base/1) from TensorFlow Hub. The transform runs locally using the [Interactive Runner](https://beam.apache.org/releases/pydoc/2.11.0/apache_beam.runners.interactive.interactive_runner.html)." ], "id": "3ac8fc4a-a0ef-47b9-bd80-10801eebe13e" diff --git a/learning/katas/python/IO/TextIO/ReadFromText/task.py b/learning/katas/python/IO/TextIO/ReadFromText/task.py index 720d9214abc83..fc9d51565790c 100644 --- a/learning/katas/python/IO/TextIO/ReadFromText/task.py +++ b/learning/katas/python/IO/TextIO/ReadFromText/task.py @@ -18,7 +18,9 @@ # name: ReadFromText # description: Task from katas to read from text files. # multifile: true -# context_line: 29 +# files: +# - name: countries.txt +# context_line: 33 # categories: # - IO # complexity: BASIC diff --git a/learning/katas/python/Streaming/Triggers/Early Triggers/task.py b/learning/katas/python/Streaming/Triggers/Early Triggers/task.py index 830e612a4307c..7dd22836be038 100644 --- a/learning/katas/python/Streaming/Triggers/Early Triggers/task.py +++ b/learning/katas/python/Streaming/Triggers/Early Triggers/task.py @@ -21,7 +21,9 @@ # name: EarlyTriggers # description: Task from katas to count events using early triggers # multifile: true -# context_line: 46 +# files: +# - name: generate_event.py +# context_line: 36 # categories: # - Streaming # complexity: MEDIUM diff --git a/learning/katas/python/Streaming/Triggers/Event Time Triggers/task.py b/learning/katas/python/Streaming/Triggers/Event Time Triggers/task.py index 283648499e1af..b32a453560f73 100644 --- a/learning/katas/python/Streaming/Triggers/Event Time Triggers/task.py +++ b/learning/katas/python/Streaming/Triggers/Event Time Triggers/task.py @@ -21,7 +21,9 @@ # name: EventTimeTriggers # description: Task from katas to count events with event time triggers # multifile: true -# context_line: 46 +# files: +# - name: generate_event.py +# context_line: 36 # categories: # - Streaming # complexity: MEDIUM @@ -41,15 +43,15 @@ class CountEvents(beam.PTransform): - def expand(self, events): - return (events - | beam.WindowInto(FixedWindows(5), - trigger=AfterWatermark(), - accumulation_mode=AccumulationMode.DISCARDING, - allowed_lateness=Duration(seconds=0)) - | beam.CombineGlobally(beam.combiners.CountCombineFn()).without_defaults()) + def expand(self, events): + return (events + | beam.WindowInto(FixedWindows(5), + trigger=AfterWatermark(), + accumulation_mode=AccumulationMode.DISCARDING, + allowed_lateness=Duration(seconds=0)) + | beam.CombineGlobally(beam.combiners.CountCombineFn()).without_defaults()) with beam.Pipeline() as p: - (p | GenerateEvent.sample_data() + (p | GenerateEvent.sample_data() | CountEvents() | LogElements(with_window=True)) diff --git a/learning/katas/python/Streaming/Triggers/Window Accumulation Modes/task.py b/learning/katas/python/Streaming/Triggers/Window Accumulation Modes/task.py index 51f592722a92b..3cbe9927a605c 100644 --- a/learning/katas/python/Streaming/Triggers/Window Accumulation Modes/task.py +++ b/learning/katas/python/Streaming/Triggers/Window Accumulation Modes/task.py @@ -20,7 +20,9 @@ # name: WindowAccumulationMode # description: Task from katas to count events using ACCUMULATING as accumulation mode # multifile: true -# context_line: 51 +# files: +# - name: generate_event.py +# context_line: 36 # categories: # - Streaming # complexity: ADVANCED diff --git a/learning/tour-of-beam/frontend/pubspec.yaml b/learning/tour-of-beam/frontend/pubspec.yaml index 5291908f13b87..3813ca17fbd8a 100644 --- a/learning/tour-of-beam/frontend/pubspec.yaml +++ b/learning/tour-of-beam/frontend/pubspec.yaml @@ -27,7 +27,7 @@ environment: flutter: '>=3.3.2' dependencies: - app_state: ^0.8.1 + app_state: ^0.8.4 collection: ^1.16.0 easy_localization: ^3.0.1 easy_localization_ext: ^0.1.0 diff --git a/playground/README.md b/playground/README.md index 41b1472bc0c80..4b3d9a6e574c8 100644 --- a/playground/README.md +++ b/playground/README.md @@ -120,3 +120,50 @@ cd beam See [terraform](./terraform/README.md) for details on how to build and deploy the application and its dependent infrastructure. + +# Manual Example deployment + +The following requirements are needed for deploying examples manually: + +1. GCP project with deployed Playground backend +2. Python (3.9.x) +3. Login into GCP (gcloud default login or using service account key) + +## Run example deployment script +Example deployment scripts uses following environment variables: + +GOOGLE_CLOUD_PROJECT - GCP project id where Playground backend is deployed +BEAM_ROOT_DIR - root folder to search for playground examples +SDK_CONFIG - location of sdk and default example configuration file +BEAM_EXAMPLE_CATEGORIES - location of example category configuration file +BEAM_USE_WEBGRPC - use grpc-Web instead of grpc (default) +GRPC_TIMEOUT - timeout for grpc calls (defaults to 10 sec) +BEAM_CONCURRENCY - number of eaxmples to run in parallel (defaults to 10) +SERVER_ADDRESS - address of the backend runnner service for a particular SDK + +usage: ci_cd.py [-h] +--step {CI,CD} +--sdk {SDK_JAVA,SDK_GO,SDK_PYTHON,SDK_SCIO} +--origin {PG_EXAMPLES,TB_EXAMPLES} +--subdirs SUBDIRS [SUBDIRS ...] + +Helper script to deploy examples for all supported sdk's: + +``` +cd playground/infrastructure + +export BEAM_ROOT_DIR="../../" +export SDK_CONFIG="../../playground/sdks.yaml" +export BEAM_EXAMPLE_CATEGORIES="../categories.yaml" +export BEAM_USE_WEBGRPC=yes +export BEAM_CONCURRENCY=4 +export PLAYGROUND_DNS_NAME="your registered dns name for Playground" + +for sdk in go java python scio; do + +export SDK=$sdk && +export SERVER_ADDRESS=https://${SDK}.$PLAYGROUND_DNS_NAME && + +python3 ci_cd.py --step CD --sdk SDK_${SDK^^} --origin PG_EXAMPLES --subdirs ./learning/katas ./examples ./sdks +done +``` diff --git a/playground/api/v1/api.proto b/playground/api/v1/api.proto index d7ed73fda6b64..3a62329fdb09d 100644 --- a/playground/api/v1/api.proto +++ b/playground/api/v1/api.proto @@ -77,6 +77,7 @@ message RunCodeRequest { // The pipeline options as they would be passed to the program (e.g. "--option1 value1 --option2 value2") string pipeline_options = 3; repeated Dataset datasets = 4; + repeated SnippetFile files = 5; } // RunCodeResponse contains information of the pipeline uuid. @@ -190,7 +191,7 @@ message PrecompiledObject{ repeated string tags = 12; repeated Dataset datasets = 13; -// Link to the example in the Beam repository + // Link to the example in the Beam repository string url_vcs = 14; string url_notebook = 15; } @@ -254,6 +255,7 @@ message GetPrecompiledObjectResponse{ // GetPrecompiledObjectResponse represents the source code of the PrecompiledObject. message GetPrecompiledObjectCodeResponse{ string code = 1; + repeated SnippetFile files = 2; } // GetPrecompiledObjectOutputResponse represents the result of the executed code. diff --git a/playground/backend/cmd/server/controller.go b/playground/backend/cmd/server/controller.go index f83932881ddd4..d35f45da34971 100644 --- a/playground/backend/cmd/server/controller.go +++ b/playground/backend/cmd/server/controller.go @@ -26,6 +26,7 @@ import ( "beam.apache.org/playground/backend/internal/code_processing" "beam.apache.org/playground/backend/internal/components" "beam.apache.org/playground/backend/internal/db" + "beam.apache.org/playground/backend/internal/db/entity" "beam.apache.org/playground/backend/internal/db/mapper" "beam.apache.org/playground/backend/internal/emulators" "beam.apache.org/playground/backend/internal/environment" @@ -47,9 +48,10 @@ const ( errorTitleGetDefaultExample = "Error during getting default example" errorTitleRunCode = "Error during run code" - userBadCloudPathErrMsg = "Invalid cloud path parameter" - userCloudConnectionErrMsg = "Cloud connection error" - resourceNotFoundErrMsg = "Resource is not found" + userBadCloudPathErrMsg = "Invalid cloud path parameter" + userCloudConnectionErrMsg = "Cloud connection error" + resourceNotFoundErrMsg = "Resource is not found" + resourceInconsistentErrMsg = "Resource is not consistent" ) // playgroundController processes `gRPC' requests from clients. @@ -97,8 +99,30 @@ func (controller *playgroundController) RunCode(ctx context.Context, info *pb.Ru kafkaMockCluster = kafkaMockClusters[0] prepareParams = prepareParamsVal } + sources := make([]entity.FileEntity, 0) + if len(info.Files) > 0 { + for _, file := range info.Files { + sources = append(sources, entity.FileEntity{ + Name: file.Name, + Content: file.Content, + IsMain: file.IsMain, + CntxLine: 1, + }) + } + } else { + fileName, err := utils.GetFileName("", info.Code, info.Sdk) + if err != nil { + return nil, cerrors.InternalError(errorTitleRunCode, "Failed to get default filename") + } + sources = append(sources, entity.FileEntity{ + Name: fileName, + Content: info.Code, + IsMain: true, + CntxLine: 1, + }) + } - lc, err := life_cycle.Setup(info.Sdk, info.Code, pipelineId, controller.env.ApplicationEnvs.WorkingDir(), controller.env.ApplicationEnvs.PipelinesFolder(), controller.env.BeamSdkEnvs.PreparedModDir(), kafkaMockCluster) + lc, err := life_cycle.Setup(info.Sdk, sources, pipelineId, controller.env.ApplicationEnvs.WorkingDir(), controller.env.ApplicationEnvs.PipelinesFolder(), controller.env.BeamSdkEnvs.PreparedModDir(), kafkaMockCluster) if err != nil { logger.Errorf("RunCode(): error during setup file system: %s\n", err.Error()) return nil, cerrors.InternalError("Error during preparing", "Error during setup file system for the code processing: %s", err.Error()) @@ -337,7 +361,7 @@ func (controller *playgroundController) GetPrecompiledObjectCode(ctx context.Con if err != nil { return nil, cerrors.InvalidArgumentError(errorTitleGetExampleCode, userBadCloudPathErrMsg) } - codeString, err := controller.db.GetExampleCode(ctx, exampleId) + files, err := controller.db.GetExampleCode(ctx, exampleId) if err != nil { switch err { case datastore.ErrNoSuchEntity: @@ -346,7 +370,23 @@ func (controller *playgroundController) GetPrecompiledObjectCode(ctx context.Con return nil, cerrors.InternalError(errorTitleGetExampleCode, userCloudConnectionErrMsg) } } - response := pb.GetPrecompiledObjectCodeResponse{Code: codeString} + if len(files) == 0 { + return nil, cerrors.NotFoundError(errorTitleGetExampleCode, resourceNotFoundErrMsg) + } + response := pb.GetPrecompiledObjectCodeResponse{} + for _, file := range files { + response.Files = append(response.Files, &pb.SnippetFile{ + Name: file.Name, + Content: file.Content, + IsMain: file.IsMain, + }) + if file.IsMain { + response.Code = file.Content + } + } + if len(response.Files) == 0 || response.Code == "" { + return nil, cerrors.InternalError(errorTitleGetExampleCode, resourceInconsistentErrMsg) + } return &response, nil } diff --git a/playground/backend/cmd/server/controller_test.go b/playground/backend/cmd/server/controller_test.go index ac5699b7e547e..2d61be12b9957 100644 --- a/playground/backend/cmd/server/controller_test.go +++ b/playground/backend/cmd/server/controller_test.go @@ -55,7 +55,7 @@ import ( const ( bufSize = 1024 * 1024 - javaConfig = "{\n \"compile_cmd\": \"javac\",\n \"run_cmd\": \"java\",\n \"test_cmd\": \"java\",\n \"compile_args\": [\n \"-d\",\n \"bin\",\n \"-classpath\"\n ],\n \"run_args\": [\n \"-cp\",\n \"bin:\"\n ],\n \"test_args\": [\n \"-cp\",\n \"bin:\",\n \"JUnit\"\n ]\n}" + javaConfig = "{\n \"compile_cmd\": \"javac\",\n \"run_cmd\": \"java\",\n \"test_cmd\": \"java\",\n \"compile_args\": [\n \"-d\",\n \"bin\",\n \"-parameters\",\n \"-classpath\"\n ],\n \"run_args\": [\n \"-cp\",\n \"bin:\"\n ],\n \"test_args\": [\n \"-cp\",\n \"bin:\",\n \"JUnit\"\n ]\n}" javaLogConfigFilename = "logging.properties" baseFileFolder = "executable_files" configFolder = "configs" @@ -245,6 +245,21 @@ func TestPlaygroundController_RunCode(t *testing.T) { }, wantErr: false, }, + { + name: "RunCode multifile", + args: args{ + ctx: context.Background(), + request: &pb.RunCodeRequest{ + Code: "MOCK_CODE", + Sdk: pb.Sdk_SDK_JAVA, + Files: []*pb.SnippetFile{ + {Name: "main.java", Content: "MOCK_CODE", IsMain: true}, + {Name: "import.java", Content: "import content", IsMain: false}, + }, + }, + }, + wantErr: false, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -1189,16 +1204,36 @@ func TestPlaygroundController_GetPrecompiledObjectCode(t *testing.T) { name string args args wantErr bool - wantResponse string + wantResponse *pb.GetPrecompiledObjectCodeResponse }{ { - name: "Getting the code of the specific example in the usual case", + name: "Getting the code of single-file example", args: args{ ctx: ctx, info: &pb.GetPrecompiledObjectCodeRequest{CloudPath: "SDK_JAVA/PRECOMPILED_OBJECT_TYPE_EXAMPLE/MOCK_DEFAULT_EXAMPLE"}, }, - wantErr: false, - wantResponse: "MOCK_CONTENT", + wantErr: false, + wantResponse: &pb.GetPrecompiledObjectCodeResponse{ + Code: "MOCK_CONTENT_0", + Files: []*pb.SnippetFile{ + {Name: "MOCK_NAME_0", Content: "MOCK_CONTENT_0", IsMain: true}, + }, + }, + }, + { + name: "Getting the code of multifile example", + args: args{ + ctx: ctx, + info: &pb.GetPrecompiledObjectCodeRequest{CloudPath: "SDK_JAVA/PRECOMPILED_OBJECT_TYPE_EXAMPLE/MOCK_MULTIFILE"}, + }, + wantErr: false, + wantResponse: &pb.GetPrecompiledObjectCodeResponse{ + Code: "MOCK_CONTENT_0", + Files: []*pb.SnippetFile{ + {Name: "MOCK_NAME_0", Content: "MOCK_CONTENT_0", IsMain: true}, + {Name: "MOCK_NAME_1", Content: "MOCK_CONTENT_1", IsMain: false}, + }, + }, }, } @@ -1209,9 +1244,8 @@ func TestPlaygroundController_GetPrecompiledObjectCode(t *testing.T) { t.Errorf("PlaygroundController_GetPrecompiledObjectCode() error = %v, wantErr %v", err, tt.wantErr) return } - if got.Code != tt.wantResponse { - t.Errorf("PlaygroundController_GetPrecompiledObjectCode() unexpected result") - } + assert.Equal(t, tt.wantResponse.Code, got.Code) + assert.Equal(t, tt.wantResponse.Files, got.Files) }) } } diff --git a/playground/backend/configs/SDK_JAVA.json b/playground/backend/configs/SDK_JAVA.json index 47b0339a7add9..6e7c87fed01b0 100644 --- a/playground/backend/configs/SDK_JAVA.json +++ b/playground/backend/configs/SDK_JAVA.json @@ -5,6 +5,7 @@ "compile_args": [ "-d", "bin", + "-parameters", "-classpath" ], "run_args": [ diff --git a/playground/backend/internal/api/v1/api.pb.go b/playground/backend/internal/api/v1/api.pb.go index 78fcb08e982ba..a3d7e3cd26786 100644 --- a/playground/backend/internal/api/v1/api.pb.go +++ b/playground/backend/internal/api/v1/api.pb.go @@ -393,8 +393,9 @@ type RunCodeRequest struct { Code string `protobuf:"bytes,1,opt,name=code,proto3" json:"code,omitempty"` Sdk Sdk `protobuf:"varint,2,opt,name=sdk,proto3,enum=api.v1.Sdk" json:"sdk,omitempty"` // The pipeline options as they would be passed to the program (e.g. "--option1 value1 --option2 value2") - PipelineOptions string `protobuf:"bytes,3,opt,name=pipeline_options,json=pipelineOptions,proto3" json:"pipeline_options,omitempty"` - Datasets []*Dataset `protobuf:"bytes,4,rep,name=datasets,proto3" json:"datasets,omitempty"` + PipelineOptions string `protobuf:"bytes,3,opt,name=pipeline_options,json=pipelineOptions,proto3" json:"pipeline_options,omitempty"` + Datasets []*Dataset `protobuf:"bytes,4,rep,name=datasets,proto3" json:"datasets,omitempty"` + Files []*SnippetFile `protobuf:"bytes,5,rep,name=files,proto3" json:"files,omitempty"` } func (x *RunCodeRequest) Reset() { @@ -457,6 +458,13 @@ func (x *RunCodeRequest) GetDatasets() []*Dataset { return nil } +func (x *RunCodeRequest) GetFiles() []*SnippetFile { + if x != nil { + return x.Files + } + return nil +} + // RunCodeResponse contains information of the pipeline uuid. type RunCodeResponse struct { state protoimpl.MessageState @@ -2024,7 +2032,8 @@ type GetPrecompiledObjectCodeResponse struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Code string `protobuf:"bytes,1,opt,name=code,proto3" json:"code,omitempty"` + Code string `protobuf:"bytes,1,opt,name=code,proto3" json:"code,omitempty"` + Files []*SnippetFile `protobuf:"bytes,2,rep,name=files,proto3" json:"files,omitempty"` } func (x *GetPrecompiledObjectCodeResponse) Reset() { @@ -2066,6 +2075,13 @@ func (x *GetPrecompiledObjectCodeResponse) GetCode() string { return "" } +func (x *GetPrecompiledObjectCodeResponse) GetFiles() []*SnippetFile { + if x != nil { + return x.Files + } + return nil +} + // GetPrecompiledObjectOutputResponse represents the result of the executed code. type GetPrecompiledObjectOutputResponse struct { state protoimpl.MessageState @@ -2644,7 +2660,7 @@ var file_api_v1_api_proto_rawDesc = []byte{ 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, - 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x9b, 0x01, 0x0a, 0x0e, 0x52, 0x75, 0x6e, 0x43, + 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0xc6, 0x01, 0x0a, 0x0e, 0x52, 0x75, 0x6e, 0x43, 0x6f, 0x64, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x63, 0x6f, 0x64, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x63, 0x6f, 0x64, 0x65, 0x12, 0x1d, 0x0a, 0x03, 0x73, 0x64, 0x6b, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x0b, 0x2e, 0x61, 0x70, @@ -2654,377 +2670,382 @@ var file_api_v1_api_proto_rawDesc = []byte{ 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x2b, 0x0a, 0x08, 0x64, 0x61, 0x74, 0x61, 0x73, 0x65, 0x74, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x0f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x44, 0x61, 0x74, 0x61, 0x73, 0x65, 0x74, 0x52, 0x08, 0x64, 0x61, 0x74, - 0x61, 0x73, 0x65, 0x74, 0x73, 0x22, 0x36, 0x0a, 0x0f, 0x52, 0x75, 0x6e, 0x43, 0x6f, 0x64, 0x65, - 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x23, 0x0a, 0x0d, 0x70, 0x69, 0x70, 0x65, - 0x6c, 0x69, 0x6e, 0x65, 0x5f, 0x75, 0x75, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x0c, 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x55, 0x75, 0x69, 0x64, 0x22, 0x39, 0x0a, - 0x12, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x71, 0x75, - 0x65, 0x73, 0x74, 0x12, 0x23, 0x0a, 0x0d, 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x5f, + 0x61, 0x73, 0x65, 0x74, 0x73, 0x12, 0x29, 0x0a, 0x05, 0x66, 0x69, 0x6c, 0x65, 0x73, 0x18, 0x05, + 0x20, 0x03, 0x28, 0x0b, 0x32, 0x13, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x53, 0x6e, + 0x69, 0x70, 0x70, 0x65, 0x74, 0x46, 0x69, 0x6c, 0x65, 0x52, 0x05, 0x66, 0x69, 0x6c, 0x65, 0x73, + 0x22, 0x36, 0x0a, 0x0f, 0x52, 0x75, 0x6e, 0x43, 0x6f, 0x64, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, + 0x6e, 0x73, 0x65, 0x12, 0x23, 0x0a, 0x0d, 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x5f, 0x75, 0x75, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x70, 0x69, 0x70, 0x65, - 0x6c, 0x69, 0x6e, 0x65, 0x55, 0x75, 0x69, 0x64, 0x22, 0x3d, 0x0a, 0x13, 0x43, 0x68, 0x65, 0x63, - 0x6b, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, - 0x26, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, - 0x0e, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, - 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, 0x41, 0x0a, 0x1a, 0x47, 0x65, 0x74, 0x56, 0x61, - 0x6c, 0x69, 0x64, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x52, 0x65, - 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x23, 0x0a, 0x0d, 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, - 0x65, 0x5f, 0x75, 0x75, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x70, 0x69, - 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x55, 0x75, 0x69, 0x64, 0x22, 0x35, 0x0a, 0x1b, 0x47, 0x65, - 0x74, 0x56, 0x61, 0x6c, 0x69, 0x64, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x75, 0x74, 0x70, 0x75, - 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x6f, 0x75, 0x74, - 0x70, 0x75, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x6f, 0x75, 0x74, 0x70, 0x75, - 0x74, 0x22, 0x42, 0x0a, 0x1b, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x70, 0x61, 0x72, 0x61, 0x74, + 0x6c, 0x69, 0x6e, 0x65, 0x55, 0x75, 0x69, 0x64, 0x22, 0x39, 0x0a, 0x12, 0x43, 0x68, 0x65, 0x63, + 0x6b, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x23, + 0x0a, 0x0d, 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x5f, 0x75, 0x75, 0x69, 0x64, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x55, + 0x75, 0x69, 0x64, 0x22, 0x3d, 0x0a, 0x13, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x53, 0x74, 0x61, 0x74, + 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x26, 0x0a, 0x06, 0x73, 0x74, + 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x0e, 0x2e, 0x61, 0x70, 0x69, + 0x2e, 0x76, 0x31, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, + 0x75, 0x73, 0x22, 0x41, 0x0a, 0x1a, 0x47, 0x65, 0x74, 0x56, 0x61, 0x6c, 0x69, 0x64, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x23, 0x0a, 0x0d, 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x5f, 0x75, 0x75, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, - 0x65, 0x55, 0x75, 0x69, 0x64, 0x22, 0x36, 0x0a, 0x1c, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x70, - 0x61, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x52, 0x65, 0x73, - 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x22, 0x3e, 0x0a, - 0x17, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x4f, 0x75, 0x74, 0x70, 0x75, - 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x23, 0x0a, 0x0d, 0x70, 0x69, 0x70, 0x65, - 0x6c, 0x69, 0x6e, 0x65, 0x5f, 0x75, 0x75, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x0c, 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x55, 0x75, 0x69, 0x64, 0x22, 0x32, 0x0a, - 0x18, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x4f, 0x75, 0x74, 0x70, 0x75, - 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x6f, 0x75, 0x74, - 0x70, 0x75, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x6f, 0x75, 0x74, 0x70, 0x75, - 0x74, 0x22, 0x3a, 0x0a, 0x13, 0x47, 0x65, 0x74, 0x52, 0x75, 0x6e, 0x4f, 0x75, 0x74, 0x70, 0x75, - 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x23, 0x0a, 0x0d, 0x70, 0x69, 0x70, 0x65, - 0x6c, 0x69, 0x6e, 0x65, 0x5f, 0x75, 0x75, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x0c, 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x55, 0x75, 0x69, 0x64, 0x22, 0x2e, 0x0a, - 0x14, 0x47, 0x65, 0x74, 0x52, 0x75, 0x6e, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x52, 0x65, 0x73, - 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x22, 0x39, 0x0a, - 0x12, 0x47, 0x65, 0x74, 0x52, 0x75, 0x6e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x52, 0x65, 0x71, 0x75, + 0x65, 0x55, 0x75, 0x69, 0x64, 0x22, 0x35, 0x0a, 0x1b, 0x47, 0x65, 0x74, 0x56, 0x61, 0x6c, 0x69, + 0x64, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x52, 0x65, 0x73, 0x70, + 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x22, 0x42, 0x0a, 0x1b, + 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x70, 0x61, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x75, + 0x74, 0x70, 0x75, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x23, 0x0a, 0x0d, 0x70, + 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x5f, 0x75, 0x75, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x0c, 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x55, 0x75, 0x69, 0x64, + 0x22, 0x36, 0x0a, 0x1c, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x70, 0x61, 0x72, 0x61, 0x74, 0x69, + 0x6f, 0x6e, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, + 0x12, 0x16, 0x0a, 0x06, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x06, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x22, 0x3e, 0x0a, 0x17, 0x47, 0x65, 0x74, 0x43, + 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x23, 0x0a, 0x0d, 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x5f, 0x75, 0x75, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x70, 0x69, 0x70, 0x65, - 0x6c, 0x69, 0x6e, 0x65, 0x55, 0x75, 0x69, 0x64, 0x22, 0x2d, 0x0a, 0x13, 0x47, 0x65, 0x74, 0x52, - 0x75, 0x6e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, - 0x16, 0x0a, 0x06, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x06, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x22, 0x35, 0x0a, 0x0e, 0x47, 0x65, 0x74, 0x4c, 0x6f, - 0x67, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x23, 0x0a, 0x0d, 0x70, 0x69, 0x70, - 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x5f, 0x75, 0x75, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x0c, 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x55, 0x75, 0x69, 0x64, 0x22, 0x29, - 0x0a, 0x0f, 0x47, 0x65, 0x74, 0x4c, 0x6f, 0x67, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, - 0x65, 0x12, 0x16, 0x0a, 0x06, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x06, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x22, 0x36, 0x0a, 0x0f, 0x47, 0x65, 0x74, - 0x47, 0x72, 0x61, 0x70, 0x68, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x23, 0x0a, 0x0d, - 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x5f, 0x75, 0x75, 0x69, 0x64, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x0c, 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x55, 0x75, 0x69, - 0x64, 0x22, 0x28, 0x0a, 0x10, 0x47, 0x65, 0x74, 0x47, 0x72, 0x61, 0x70, 0x68, 0x52, 0x65, 0x73, - 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x67, 0x72, 0x61, 0x70, 0x68, 0x18, 0x01, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x67, 0x72, 0x61, 0x70, 0x68, 0x22, 0x34, 0x0a, 0x0d, 0x43, - 0x61, 0x6e, 0x63, 0x65, 0x6c, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x23, 0x0a, 0x0d, - 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x5f, 0x75, 0x75, 0x69, 0x64, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x0c, 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x55, 0x75, 0x69, - 0x64, 0x22, 0x10, 0x0a, 0x0e, 0x43, 0x61, 0x6e, 0x63, 0x65, 0x6c, 0x52, 0x65, 0x73, 0x70, 0x6f, - 0x6e, 0x73, 0x65, 0x22, 0x94, 0x04, 0x0a, 0x11, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, - 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x12, 0x1d, 0x0a, 0x0a, 0x63, 0x6c, 0x6f, - 0x75, 0x64, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x63, - 0x6c, 0x6f, 0x75, 0x64, 0x50, 0x61, 0x74, 0x68, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, - 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x20, 0x0a, 0x0b, - 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x0b, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x31, - 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x1d, 0x2e, 0x61, - 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, - 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x04, 0x74, 0x79, 0x70, - 0x65, 0x12, 0x29, 0x0a, 0x10, 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x5f, 0x6f, 0x70, - 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0f, 0x70, 0x69, 0x70, - 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x12, 0x0a, 0x04, - 0x6c, 0x69, 0x6e, 0x6b, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6c, 0x69, 0x6e, 0x6b, - 0x12, 0x1c, 0x0a, 0x09, 0x6d, 0x75, 0x6c, 0x74, 0x69, 0x66, 0x69, 0x6c, 0x65, 0x18, 0x07, 0x20, - 0x01, 0x28, 0x08, 0x52, 0x09, 0x6d, 0x75, 0x6c, 0x74, 0x69, 0x66, 0x69, 0x6c, 0x65, 0x12, 0x21, - 0x0a, 0x0c, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x5f, 0x6c, 0x69, 0x6e, 0x65, 0x18, 0x08, - 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x4c, 0x69, 0x6e, - 0x65, 0x12, 0x27, 0x0a, 0x0f, 0x64, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x5f, 0x65, 0x78, 0x61, - 0x6d, 0x70, 0x6c, 0x65, 0x18, 0x09, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0e, 0x64, 0x65, 0x66, 0x61, - 0x75, 0x6c, 0x74, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x12, 0x1d, 0x0a, 0x03, 0x73, 0x64, - 0x6b, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x0b, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, - 0x2e, 0x53, 0x64, 0x6b, 0x52, 0x03, 0x73, 0x64, 0x6b, 0x12, 0x32, 0x0a, 0x0a, 0x63, 0x6f, 0x6d, - 0x70, 0x6c, 0x65, 0x78, 0x69, 0x74, 0x79, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x12, 0x2e, - 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x78, 0x69, 0x74, - 0x79, 0x52, 0x0a, 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x78, 0x69, 0x74, 0x79, 0x12, 0x12, 0x0a, - 0x04, 0x74, 0x61, 0x67, 0x73, 0x18, 0x0c, 0x20, 0x03, 0x28, 0x09, 0x52, 0x04, 0x74, 0x61, 0x67, - 0x73, 0x12, 0x2b, 0x0a, 0x08, 0x64, 0x61, 0x74, 0x61, 0x73, 0x65, 0x74, 0x73, 0x18, 0x0d, 0x20, - 0x03, 0x28, 0x0b, 0x32, 0x0f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x44, 0x61, 0x74, - 0x61, 0x73, 0x65, 0x74, 0x52, 0x08, 0x64, 0x61, 0x74, 0x61, 0x73, 0x65, 0x74, 0x73, 0x12, 0x17, - 0x0a, 0x07, 0x75, 0x72, 0x6c, 0x5f, 0x76, 0x63, 0x73, 0x18, 0x0e, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x06, 0x75, 0x72, 0x6c, 0x56, 0x63, 0x73, 0x12, 0x21, 0x0a, 0x0c, 0x75, 0x72, 0x6c, 0x5f, 0x6e, - 0x6f, 0x74, 0x65, 0x62, 0x6f, 0x6f, 0x6b, 0x18, 0x0f, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x75, - 0x72, 0x6c, 0x4e, 0x6f, 0x74, 0x65, 0x62, 0x6f, 0x6f, 0x6b, 0x22, 0xe5, 0x01, 0x0a, 0x0a, 0x43, - 0x61, 0x74, 0x65, 0x67, 0x6f, 0x72, 0x69, 0x65, 0x73, 0x12, 0x1d, 0x0a, 0x03, 0x73, 0x64, 0x6b, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x0b, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, - 0x53, 0x64, 0x6b, 0x52, 0x03, 0x73, 0x64, 0x6b, 0x12, 0x3b, 0x0a, 0x0a, 0x63, 0x61, 0x74, 0x65, - 0x67, 0x6f, 0x72, 0x69, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1b, 0x2e, 0x61, - 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x43, 0x61, 0x74, 0x65, 0x67, 0x6f, 0x72, 0x69, 0x65, 0x73, - 0x2e, 0x43, 0x61, 0x74, 0x65, 0x67, 0x6f, 0x72, 0x79, 0x52, 0x0a, 0x63, 0x61, 0x74, 0x65, 0x67, - 0x6f, 0x72, 0x69, 0x65, 0x73, 0x1a, 0x7b, 0x0a, 0x08, 0x43, 0x61, 0x74, 0x65, 0x67, 0x6f, 0x72, - 0x79, 0x12, 0x23, 0x0a, 0x0d, 0x63, 0x61, 0x74, 0x65, 0x67, 0x6f, 0x72, 0x79, 0x5f, 0x6e, 0x61, - 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x63, 0x61, 0x74, 0x65, 0x67, 0x6f, - 0x72, 0x79, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x4a, 0x0a, 0x13, 0x70, 0x72, 0x65, 0x63, 0x6f, 0x6d, - 0x70, 0x69, 0x6c, 0x65, 0x64, 0x5f, 0x6f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x73, 0x18, 0x02, 0x20, - 0x03, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x50, 0x72, 0x65, - 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x52, 0x12, - 0x70, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, - 0x74, 0x73, 0x22, 0x59, 0x0a, 0x1c, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, - 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, - 0x73, 0x74, 0x12, 0x1d, 0x0a, 0x03, 0x73, 0x64, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, - 0x0b, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x53, 0x64, 0x6b, 0x52, 0x03, 0x73, 0x64, - 0x6b, 0x12, 0x1a, 0x0a, 0x08, 0x63, 0x61, 0x74, 0x65, 0x67, 0x6f, 0x72, 0x79, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x08, 0x63, 0x61, 0x74, 0x65, 0x67, 0x6f, 0x72, 0x79, 0x22, 0x3c, 0x0a, - 0x1b, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, - 0x62, 0x6a, 0x65, 0x63, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x1d, 0x0a, 0x0a, - 0x63, 0x6c, 0x6f, 0x75, 0x64, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x09, 0x63, 0x6c, 0x6f, 0x75, 0x64, 0x50, 0x61, 0x74, 0x68, 0x22, 0x40, 0x0a, 0x1f, 0x47, - 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, - 0x65, 0x63, 0x74, 0x43, 0x6f, 0x64, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x1d, - 0x0a, 0x0a, 0x63, 0x6c, 0x6f, 0x75, 0x64, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x09, 0x63, 0x6c, 0x6f, 0x75, 0x64, 0x50, 0x61, 0x74, 0x68, 0x22, 0x42, 0x0a, - 0x21, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, - 0x62, 0x6a, 0x65, 0x63, 0x74, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, - 0x73, 0x74, 0x12, 0x1d, 0x0a, 0x0a, 0x63, 0x6c, 0x6f, 0x75, 0x64, 0x5f, 0x70, 0x61, 0x74, 0x68, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x63, 0x6c, 0x6f, 0x75, 0x64, 0x50, 0x61, 0x74, - 0x68, 0x22, 0x40, 0x0a, 0x1f, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, - 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x4c, 0x6f, 0x67, 0x73, 0x52, 0x65, 0x71, - 0x75, 0x65, 0x73, 0x74, 0x12, 0x1d, 0x0a, 0x0a, 0x63, 0x6c, 0x6f, 0x75, 0x64, 0x5f, 0x70, 0x61, + 0x6c, 0x69, 0x6e, 0x65, 0x55, 0x75, 0x69, 0x64, 0x22, 0x32, 0x0a, 0x18, 0x47, 0x65, 0x74, 0x43, + 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x52, 0x65, 0x73, 0x70, + 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x22, 0x3a, 0x0a, 0x13, + 0x47, 0x65, 0x74, 0x52, 0x75, 0x6e, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x52, 0x65, 0x71, 0x75, + 0x65, 0x73, 0x74, 0x12, 0x23, 0x0a, 0x0d, 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x5f, + 0x75, 0x75, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x70, 0x69, 0x70, 0x65, + 0x6c, 0x69, 0x6e, 0x65, 0x55, 0x75, 0x69, 0x64, 0x22, 0x2e, 0x0a, 0x14, 0x47, 0x65, 0x74, 0x52, + 0x75, 0x6e, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, + 0x12, 0x16, 0x0a, 0x06, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x06, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x22, 0x39, 0x0a, 0x12, 0x47, 0x65, 0x74, 0x52, + 0x75, 0x6e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x23, + 0x0a, 0x0d, 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x5f, 0x75, 0x75, 0x69, 0x64, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x55, + 0x75, 0x69, 0x64, 0x22, 0x2d, 0x0a, 0x13, 0x47, 0x65, 0x74, 0x52, 0x75, 0x6e, 0x45, 0x72, 0x72, + 0x6f, 0x72, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x6f, 0x75, + 0x74, 0x70, 0x75, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x6f, 0x75, 0x74, 0x70, + 0x75, 0x74, 0x22, 0x35, 0x0a, 0x0e, 0x47, 0x65, 0x74, 0x4c, 0x6f, 0x67, 0x73, 0x52, 0x65, 0x71, + 0x75, 0x65, 0x73, 0x74, 0x12, 0x23, 0x0a, 0x0d, 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, + 0x5f, 0x75, 0x75, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x70, 0x69, 0x70, + 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x55, 0x75, 0x69, 0x64, 0x22, 0x29, 0x0a, 0x0f, 0x47, 0x65, 0x74, + 0x4c, 0x6f, 0x67, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x16, 0x0a, 0x06, + 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x6f, 0x75, + 0x74, 0x70, 0x75, 0x74, 0x22, 0x36, 0x0a, 0x0f, 0x47, 0x65, 0x74, 0x47, 0x72, 0x61, 0x70, 0x68, + 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x23, 0x0a, 0x0d, 0x70, 0x69, 0x70, 0x65, 0x6c, + 0x69, 0x6e, 0x65, 0x5f, 0x75, 0x75, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, + 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x55, 0x75, 0x69, 0x64, 0x22, 0x28, 0x0a, 0x10, + 0x47, 0x65, 0x74, 0x47, 0x72, 0x61, 0x70, 0x68, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, + 0x12, 0x14, 0x0a, 0x05, 0x67, 0x72, 0x61, 0x70, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x05, 0x67, 0x72, 0x61, 0x70, 0x68, 0x22, 0x34, 0x0a, 0x0d, 0x43, 0x61, 0x6e, 0x63, 0x65, 0x6c, + 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x23, 0x0a, 0x0d, 0x70, 0x69, 0x70, 0x65, 0x6c, + 0x69, 0x6e, 0x65, 0x5f, 0x75, 0x75, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, + 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x55, 0x75, 0x69, 0x64, 0x22, 0x10, 0x0a, 0x0e, + 0x43, 0x61, 0x6e, 0x63, 0x65, 0x6c, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x94, + 0x04, 0x0a, 0x11, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, + 0x6a, 0x65, 0x63, 0x74, 0x12, 0x1d, 0x0a, 0x0a, 0x63, 0x6c, 0x6f, 0x75, 0x64, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x63, 0x6c, 0x6f, 0x75, 0x64, 0x50, - 0x61, 0x74, 0x68, 0x22, 0x41, 0x0a, 0x20, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, - 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x47, 0x72, 0x61, 0x70, 0x68, + 0x61, 0x74, 0x68, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x64, 0x65, 0x73, 0x63, 0x72, + 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x64, 0x65, + 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x31, 0x0a, 0x04, 0x74, 0x79, 0x70, + 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x1d, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, + 0x2e, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, + 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x29, 0x0a, 0x10, + 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, + 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0f, 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, + 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x6c, 0x69, 0x6e, 0x6b, 0x18, + 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6c, 0x69, 0x6e, 0x6b, 0x12, 0x1c, 0x0a, 0x09, 0x6d, + 0x75, 0x6c, 0x74, 0x69, 0x66, 0x69, 0x6c, 0x65, 0x18, 0x07, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, + 0x6d, 0x75, 0x6c, 0x74, 0x69, 0x66, 0x69, 0x6c, 0x65, 0x12, 0x21, 0x0a, 0x0c, 0x63, 0x6f, 0x6e, + 0x74, 0x65, 0x78, 0x74, 0x5f, 0x6c, 0x69, 0x6e, 0x65, 0x18, 0x08, 0x20, 0x01, 0x28, 0x05, 0x52, + 0x0b, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x4c, 0x69, 0x6e, 0x65, 0x12, 0x27, 0x0a, 0x0f, + 0x64, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x5f, 0x65, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x18, + 0x09, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0e, 0x64, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x45, 0x78, + 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x12, 0x1d, 0x0a, 0x03, 0x73, 0x64, 0x6b, 0x18, 0x0a, 0x20, 0x01, + 0x28, 0x0e, 0x32, 0x0b, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x53, 0x64, 0x6b, 0x52, + 0x03, 0x73, 0x64, 0x6b, 0x12, 0x32, 0x0a, 0x0a, 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x78, 0x69, + 0x74, 0x79, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x12, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, + 0x31, 0x2e, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x78, 0x69, 0x74, 0x79, 0x52, 0x0a, 0x63, 0x6f, + 0x6d, 0x70, 0x6c, 0x65, 0x78, 0x69, 0x74, 0x79, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x61, 0x67, 0x73, + 0x18, 0x0c, 0x20, 0x03, 0x28, 0x09, 0x52, 0x04, 0x74, 0x61, 0x67, 0x73, 0x12, 0x2b, 0x0a, 0x08, + 0x64, 0x61, 0x74, 0x61, 0x73, 0x65, 0x74, 0x73, 0x18, 0x0d, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x0f, + 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x44, 0x61, 0x74, 0x61, 0x73, 0x65, 0x74, 0x52, + 0x08, 0x64, 0x61, 0x74, 0x61, 0x73, 0x65, 0x74, 0x73, 0x12, 0x17, 0x0a, 0x07, 0x75, 0x72, 0x6c, + 0x5f, 0x76, 0x63, 0x73, 0x18, 0x0e, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x75, 0x72, 0x6c, 0x56, + 0x63, 0x73, 0x12, 0x21, 0x0a, 0x0c, 0x75, 0x72, 0x6c, 0x5f, 0x6e, 0x6f, 0x74, 0x65, 0x62, 0x6f, + 0x6f, 0x6b, 0x18, 0x0f, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x75, 0x72, 0x6c, 0x4e, 0x6f, 0x74, + 0x65, 0x62, 0x6f, 0x6f, 0x6b, 0x22, 0xe5, 0x01, 0x0a, 0x0a, 0x43, 0x61, 0x74, 0x65, 0x67, 0x6f, + 0x72, 0x69, 0x65, 0x73, 0x12, 0x1d, 0x0a, 0x03, 0x73, 0x64, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x0e, 0x32, 0x0b, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x53, 0x64, 0x6b, 0x52, 0x03, + 0x73, 0x64, 0x6b, 0x12, 0x3b, 0x0a, 0x0a, 0x63, 0x61, 0x74, 0x65, 0x67, 0x6f, 0x72, 0x69, 0x65, + 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1b, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, + 0x2e, 0x43, 0x61, 0x74, 0x65, 0x67, 0x6f, 0x72, 0x69, 0x65, 0x73, 0x2e, 0x43, 0x61, 0x74, 0x65, + 0x67, 0x6f, 0x72, 0x79, 0x52, 0x0a, 0x63, 0x61, 0x74, 0x65, 0x67, 0x6f, 0x72, 0x69, 0x65, 0x73, + 0x1a, 0x7b, 0x0a, 0x08, 0x43, 0x61, 0x74, 0x65, 0x67, 0x6f, 0x72, 0x79, 0x12, 0x23, 0x0a, 0x0d, + 0x63, 0x61, 0x74, 0x65, 0x67, 0x6f, 0x72, 0x79, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x0c, 0x63, 0x61, 0x74, 0x65, 0x67, 0x6f, 0x72, 0x79, 0x4e, 0x61, 0x6d, + 0x65, 0x12, 0x4a, 0x0a, 0x13, 0x70, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, + 0x5f, 0x6f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x19, + 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, + 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x52, 0x12, 0x70, 0x72, 0x65, 0x63, 0x6f, + 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x73, 0x22, 0x59, 0x0a, + 0x1c, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, + 0x62, 0x6a, 0x65, 0x63, 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x1d, 0x0a, + 0x03, 0x73, 0x64, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x0b, 0x2e, 0x61, 0x70, 0x69, + 0x2e, 0x76, 0x31, 0x2e, 0x53, 0x64, 0x6b, 0x52, 0x03, 0x73, 0x64, 0x6b, 0x12, 0x1a, 0x0a, 0x08, + 0x63, 0x61, 0x74, 0x65, 0x67, 0x6f, 0x72, 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, + 0x63, 0x61, 0x74, 0x65, 0x67, 0x6f, 0x72, 0x79, 0x22, 0x3c, 0x0a, 0x1b, 0x47, 0x65, 0x74, 0x50, + 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x1d, 0x0a, 0x0a, 0x63, 0x6c, 0x6f, 0x75, 0x64, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x63, 0x6c, 0x6f, - 0x75, 0x64, 0x50, 0x61, 0x74, 0x68, 0x22, 0x43, 0x0a, 0x22, 0x47, 0x65, 0x74, 0x44, 0x65, 0x66, - 0x61, 0x75, 0x6c, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, - 0x62, 0x6a, 0x65, 0x63, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x1d, 0x0a, 0x03, - 0x73, 0x64, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x0b, 0x2e, 0x61, 0x70, 0x69, 0x2e, - 0x76, 0x31, 0x2e, 0x53, 0x64, 0x6b, 0x52, 0x03, 0x73, 0x64, 0x6b, 0x22, 0x5a, 0x0a, 0x1d, 0x47, - 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, - 0x65, 0x63, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x39, 0x0a, 0x0e, - 0x73, 0x64, 0x6b, 0x5f, 0x63, 0x61, 0x74, 0x65, 0x67, 0x6f, 0x72, 0x69, 0x65, 0x73, 0x18, 0x01, - 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x43, 0x61, - 0x74, 0x65, 0x67, 0x6f, 0x72, 0x69, 0x65, 0x73, 0x52, 0x0d, 0x73, 0x64, 0x6b, 0x43, 0x61, 0x74, - 0x65, 0x67, 0x6f, 0x72, 0x69, 0x65, 0x73, 0x22, 0x68, 0x0a, 0x1c, 0x47, 0x65, 0x74, 0x50, 0x72, - 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x52, - 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x48, 0x0a, 0x12, 0x70, 0x72, 0x65, 0x63, 0x6f, - 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x5f, 0x6f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x50, 0x72, 0x65, - 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x52, 0x11, - 0x70, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, - 0x74, 0x22, 0x36, 0x0a, 0x20, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, - 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x43, 0x6f, 0x64, 0x65, 0x52, 0x65, 0x73, - 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x63, 0x6f, 0x64, 0x65, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x04, 0x63, 0x6f, 0x64, 0x65, 0x22, 0x3c, 0x0a, 0x22, 0x47, 0x65, 0x74, - 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, - 0x74, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, - 0x16, 0x0a, 0x06, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x06, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x22, 0x3a, 0x0a, 0x20, 0x47, 0x65, 0x74, 0x50, 0x72, - 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x4c, - 0x6f, 0x67, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x6f, - 0x75, 0x74, 0x70, 0x75, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x6f, 0x75, 0x74, - 0x70, 0x75, 0x74, 0x22, 0x39, 0x0a, 0x21, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, - 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x47, 0x72, 0x61, 0x70, 0x68, - 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x67, 0x72, 0x61, 0x70, - 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x67, 0x72, 0x61, 0x70, 0x68, 0x22, 0x6f, - 0x0a, 0x23, 0x47, 0x65, 0x74, 0x44, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x50, 0x72, 0x65, 0x63, - 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x52, 0x65, 0x73, - 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x48, 0x0a, 0x12, 0x70, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, - 0x69, 0x6c, 0x65, 0x64, 0x5f, 0x6f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x0b, 0x32, 0x19, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x50, 0x72, 0x65, 0x63, 0x6f, - 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x52, 0x11, 0x70, 0x72, - 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x22, - 0x54, 0x0a, 0x0b, 0x53, 0x6e, 0x69, 0x70, 0x70, 0x65, 0x74, 0x46, 0x69, 0x6c, 0x65, 0x12, 0x12, - 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, - 0x6d, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x07, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x12, 0x17, 0x0a, 0x07, - 0x69, 0x73, 0x5f, 0x6d, 0x61, 0x69, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x52, 0x06, 0x69, - 0x73, 0x4d, 0x61, 0x69, 0x6e, 0x22, 0xe6, 0x01, 0x0a, 0x12, 0x53, 0x61, 0x76, 0x65, 0x53, 0x6e, - 0x69, 0x70, 0x70, 0x65, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x29, 0x0a, 0x05, - 0x66, 0x69, 0x6c, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x13, 0x2e, 0x61, 0x70, - 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x53, 0x6e, 0x69, 0x70, 0x70, 0x65, 0x74, 0x46, 0x69, 0x6c, 0x65, - 0x52, 0x05, 0x66, 0x69, 0x6c, 0x65, 0x73, 0x12, 0x1d, 0x0a, 0x03, 0x73, 0x64, 0x6b, 0x18, 0x02, + 0x75, 0x64, 0x50, 0x61, 0x74, 0x68, 0x22, 0x40, 0x0a, 0x1f, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, + 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x43, 0x6f, + 0x64, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x1d, 0x0a, 0x0a, 0x63, 0x6c, 0x6f, + 0x75, 0x64, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x63, + 0x6c, 0x6f, 0x75, 0x64, 0x50, 0x61, 0x74, 0x68, 0x22, 0x42, 0x0a, 0x21, 0x47, 0x65, 0x74, 0x50, + 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, + 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x1d, 0x0a, + 0x0a, 0x63, 0x6c, 0x6f, 0x75, 0x64, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x09, 0x63, 0x6c, 0x6f, 0x75, 0x64, 0x50, 0x61, 0x74, 0x68, 0x22, 0x40, 0x0a, 0x1f, + 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, + 0x6a, 0x65, 0x63, 0x74, 0x4c, 0x6f, 0x67, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, + 0x1d, 0x0a, 0x0a, 0x63, 0x6c, 0x6f, 0x75, 0x64, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x09, 0x63, 0x6c, 0x6f, 0x75, 0x64, 0x50, 0x61, 0x74, 0x68, 0x22, 0x41, + 0x0a, 0x20, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, + 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x47, 0x72, 0x61, 0x70, 0x68, 0x52, 0x65, 0x71, 0x75, 0x65, + 0x73, 0x74, 0x12, 0x1d, 0x0a, 0x0a, 0x63, 0x6c, 0x6f, 0x75, 0x64, 0x5f, 0x70, 0x61, 0x74, 0x68, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x63, 0x6c, 0x6f, 0x75, 0x64, 0x50, 0x61, 0x74, + 0x68, 0x22, 0x43, 0x0a, 0x22, 0x47, 0x65, 0x74, 0x44, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x50, + 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, + 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x1d, 0x0a, 0x03, 0x73, 0x64, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x0b, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x53, 0x64, - 0x6b, 0x52, 0x03, 0x73, 0x64, 0x6b, 0x12, 0x29, 0x0a, 0x10, 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, - 0x6e, 0x65, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x0f, 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, - 0x73, 0x12, 0x32, 0x0a, 0x0a, 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x78, 0x69, 0x74, 0x79, 0x18, - 0x04, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x12, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x43, - 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x78, 0x69, 0x74, 0x79, 0x52, 0x0a, 0x63, 0x6f, 0x6d, 0x70, 0x6c, - 0x65, 0x78, 0x69, 0x74, 0x79, 0x12, 0x27, 0x0a, 0x0f, 0x70, 0x65, 0x72, 0x73, 0x69, 0x73, 0x74, - 0x65, 0x6e, 0x63, 0x65, 0x5f, 0x6b, 0x65, 0x79, 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, - 0x70, 0x65, 0x72, 0x73, 0x69, 0x73, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x4b, 0x65, 0x79, 0x22, 0x25, - 0x0a, 0x13, 0x53, 0x61, 0x76, 0x65, 0x53, 0x6e, 0x69, 0x70, 0x70, 0x65, 0x74, 0x52, 0x65, 0x73, - 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x02, 0x69, 0x64, 0x22, 0x23, 0x0a, 0x11, 0x47, 0x65, 0x74, 0x53, 0x6e, 0x69, 0x70, - 0x70, 0x65, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x22, 0xbd, 0x01, 0x0a, 0x12, 0x47, - 0x65, 0x74, 0x53, 0x6e, 0x69, 0x70, 0x70, 0x65, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, - 0x65, 0x12, 0x29, 0x0a, 0x05, 0x66, 0x69, 0x6c, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, - 0x32, 0x13, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x53, 0x6e, 0x69, 0x70, 0x70, 0x65, - 0x74, 0x46, 0x69, 0x6c, 0x65, 0x52, 0x05, 0x66, 0x69, 0x6c, 0x65, 0x73, 0x12, 0x1d, 0x0a, 0x03, - 0x73, 0x64, 0x6b, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x0b, 0x2e, 0x61, 0x70, 0x69, 0x2e, - 0x76, 0x31, 0x2e, 0x53, 0x64, 0x6b, 0x52, 0x03, 0x73, 0x64, 0x6b, 0x12, 0x29, 0x0a, 0x10, 0x70, - 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, - 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0f, 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x4f, - 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x32, 0x0a, 0x0a, 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65, - 0x78, 0x69, 0x74, 0x79, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x12, 0x2e, 0x61, 0x70, 0x69, - 0x2e, 0x76, 0x31, 0x2e, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x78, 0x69, 0x74, 0x79, 0x52, 0x0a, - 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x78, 0x69, 0x74, 0x79, 0x2a, 0x52, 0x0a, 0x03, 0x53, 0x64, - 0x6b, 0x12, 0x13, 0x0a, 0x0f, 0x53, 0x44, 0x4b, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, - 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, 0x0c, 0x0a, 0x08, 0x53, 0x44, 0x4b, 0x5f, 0x4a, 0x41, - 0x56, 0x41, 0x10, 0x01, 0x12, 0x0a, 0x0a, 0x06, 0x53, 0x44, 0x4b, 0x5f, 0x47, 0x4f, 0x10, 0x02, - 0x12, 0x0e, 0x0a, 0x0a, 0x53, 0x44, 0x4b, 0x5f, 0x50, 0x59, 0x54, 0x48, 0x4f, 0x4e, 0x10, 0x03, - 0x12, 0x0c, 0x0a, 0x08, 0x53, 0x44, 0x4b, 0x5f, 0x53, 0x43, 0x49, 0x4f, 0x10, 0x04, 0x2a, 0xb8, - 0x02, 0x0a, 0x06, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x16, 0x0a, 0x12, 0x53, 0x54, 0x41, - 0x54, 0x55, 0x53, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, - 0x00, 0x12, 0x15, 0x0a, 0x11, 0x53, 0x54, 0x41, 0x54, 0x55, 0x53, 0x5f, 0x56, 0x41, 0x4c, 0x49, - 0x44, 0x41, 0x54, 0x49, 0x4e, 0x47, 0x10, 0x01, 0x12, 0x1b, 0x0a, 0x17, 0x53, 0x54, 0x41, 0x54, - 0x55, 0x53, 0x5f, 0x56, 0x41, 0x4c, 0x49, 0x44, 0x41, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x45, 0x52, - 0x52, 0x4f, 0x52, 0x10, 0x02, 0x12, 0x14, 0x0a, 0x10, 0x53, 0x54, 0x41, 0x54, 0x55, 0x53, 0x5f, - 0x50, 0x52, 0x45, 0x50, 0x41, 0x52, 0x49, 0x4e, 0x47, 0x10, 0x03, 0x12, 0x1c, 0x0a, 0x18, 0x53, - 0x54, 0x41, 0x54, 0x55, 0x53, 0x5f, 0x50, 0x52, 0x45, 0x50, 0x41, 0x52, 0x41, 0x54, 0x49, 0x4f, - 0x4e, 0x5f, 0x45, 0x52, 0x52, 0x4f, 0x52, 0x10, 0x04, 0x12, 0x14, 0x0a, 0x10, 0x53, 0x54, 0x41, - 0x54, 0x55, 0x53, 0x5f, 0x43, 0x4f, 0x4d, 0x50, 0x49, 0x4c, 0x49, 0x4e, 0x47, 0x10, 0x05, 0x12, - 0x18, 0x0a, 0x14, 0x53, 0x54, 0x41, 0x54, 0x55, 0x53, 0x5f, 0x43, 0x4f, 0x4d, 0x50, 0x49, 0x4c, - 0x45, 0x5f, 0x45, 0x52, 0x52, 0x4f, 0x52, 0x10, 0x06, 0x12, 0x14, 0x0a, 0x10, 0x53, 0x54, 0x41, - 0x54, 0x55, 0x53, 0x5f, 0x45, 0x58, 0x45, 0x43, 0x55, 0x54, 0x49, 0x4e, 0x47, 0x10, 0x07, 0x12, - 0x13, 0x0a, 0x0f, 0x53, 0x54, 0x41, 0x54, 0x55, 0x53, 0x5f, 0x46, 0x49, 0x4e, 0x49, 0x53, 0x48, - 0x45, 0x44, 0x10, 0x08, 0x12, 0x14, 0x0a, 0x10, 0x53, 0x54, 0x41, 0x54, 0x55, 0x53, 0x5f, 0x52, - 0x55, 0x4e, 0x5f, 0x45, 0x52, 0x52, 0x4f, 0x52, 0x10, 0x09, 0x12, 0x10, 0x0a, 0x0c, 0x53, 0x54, - 0x41, 0x54, 0x55, 0x53, 0x5f, 0x45, 0x52, 0x52, 0x4f, 0x52, 0x10, 0x0a, 0x12, 0x16, 0x0a, 0x12, - 0x53, 0x54, 0x41, 0x54, 0x55, 0x53, 0x5f, 0x52, 0x55, 0x4e, 0x5f, 0x54, 0x49, 0x4d, 0x45, 0x4f, - 0x55, 0x54, 0x10, 0x0b, 0x12, 0x13, 0x0a, 0x0f, 0x53, 0x54, 0x41, 0x54, 0x55, 0x53, 0x5f, 0x43, - 0x41, 0x4e, 0x43, 0x45, 0x4c, 0x45, 0x44, 0x10, 0x0c, 0x2a, 0xae, 0x01, 0x0a, 0x15, 0x50, 0x72, - 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x54, - 0x79, 0x70, 0x65, 0x12, 0x27, 0x0a, 0x23, 0x50, 0x52, 0x45, 0x43, 0x4f, 0x4d, 0x50, 0x49, 0x4c, - 0x45, 0x44, 0x5f, 0x4f, 0x42, 0x4a, 0x45, 0x43, 0x54, 0x5f, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x55, - 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, 0x23, 0x0a, 0x1f, - 0x50, 0x52, 0x45, 0x43, 0x4f, 0x4d, 0x50, 0x49, 0x4c, 0x45, 0x44, 0x5f, 0x4f, 0x42, 0x4a, 0x45, - 0x43, 0x54, 0x5f, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x45, 0x58, 0x41, 0x4d, 0x50, 0x4c, 0x45, 0x10, - 0x01, 0x12, 0x20, 0x0a, 0x1c, 0x50, 0x52, 0x45, 0x43, 0x4f, 0x4d, 0x50, 0x49, 0x4c, 0x45, 0x44, - 0x5f, 0x4f, 0x42, 0x4a, 0x45, 0x43, 0x54, 0x5f, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x4b, 0x41, 0x54, - 0x41, 0x10, 0x02, 0x12, 0x25, 0x0a, 0x21, 0x50, 0x52, 0x45, 0x43, 0x4f, 0x4d, 0x50, 0x49, 0x4c, - 0x45, 0x44, 0x5f, 0x4f, 0x42, 0x4a, 0x45, 0x43, 0x54, 0x5f, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x55, - 0x4e, 0x49, 0x54, 0x5f, 0x54, 0x45, 0x53, 0x54, 0x10, 0x03, 0x2a, 0x6e, 0x0a, 0x0a, 0x43, 0x6f, - 0x6d, 0x70, 0x6c, 0x65, 0x78, 0x69, 0x74, 0x79, 0x12, 0x1a, 0x0a, 0x16, 0x43, 0x4f, 0x4d, 0x50, - 0x4c, 0x45, 0x58, 0x49, 0x54, 0x59, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, 0x49, - 0x45, 0x44, 0x10, 0x00, 0x12, 0x14, 0x0a, 0x10, 0x43, 0x4f, 0x4d, 0x50, 0x4c, 0x45, 0x58, 0x49, - 0x54, 0x59, 0x5f, 0x42, 0x41, 0x53, 0x49, 0x43, 0x10, 0x01, 0x12, 0x15, 0x0a, 0x11, 0x43, 0x4f, - 0x4d, 0x50, 0x4c, 0x45, 0x58, 0x49, 0x54, 0x59, 0x5f, 0x4d, 0x45, 0x44, 0x49, 0x55, 0x4d, 0x10, - 0x02, 0x12, 0x17, 0x0a, 0x13, 0x43, 0x4f, 0x4d, 0x50, 0x4c, 0x45, 0x58, 0x49, 0x54, 0x59, 0x5f, - 0x41, 0x44, 0x56, 0x41, 0x4e, 0x43, 0x45, 0x44, 0x10, 0x03, 0x2a, 0x46, 0x0a, 0x0c, 0x45, 0x6d, - 0x75, 0x6c, 0x61, 0x74, 0x6f, 0x72, 0x54, 0x79, 0x70, 0x65, 0x12, 0x1d, 0x0a, 0x19, 0x45, 0x4d, - 0x55, 0x4c, 0x41, 0x54, 0x4f, 0x52, 0x5f, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x55, 0x4e, 0x53, 0x50, - 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, 0x17, 0x0a, 0x13, 0x45, 0x4d, 0x55, - 0x4c, 0x41, 0x54, 0x4f, 0x52, 0x5f, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x4b, 0x41, 0x46, 0x4b, 0x41, - 0x10, 0x01, 0x32, 0x8b, 0x0d, 0x0a, 0x11, 0x50, 0x6c, 0x61, 0x79, 0x67, 0x72, 0x6f, 0x75, 0x6e, - 0x64, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, 0x3a, 0x0a, 0x07, 0x52, 0x75, 0x6e, 0x43, - 0x6f, 0x64, 0x65, 0x12, 0x16, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x52, 0x75, 0x6e, - 0x43, 0x6f, 0x64, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x17, 0x2e, 0x61, 0x70, - 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x52, 0x75, 0x6e, 0x43, 0x6f, 0x64, 0x65, 0x52, 0x65, 0x73, 0x70, - 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x46, 0x0a, 0x0b, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x53, 0x74, 0x61, - 0x74, 0x75, 0x73, 0x12, 0x1a, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x43, 0x68, 0x65, - 0x63, 0x6b, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, - 0x1b, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x53, 0x74, - 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x49, 0x0a, 0x0c, - 0x47, 0x65, 0x74, 0x52, 0x75, 0x6e, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x12, 0x1b, 0x2e, 0x61, - 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x52, 0x75, 0x6e, 0x4f, 0x75, 0x74, 0x70, - 0x75, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x1c, 0x2e, 0x61, 0x70, 0x69, 0x2e, - 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x52, 0x75, 0x6e, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x52, - 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x3a, 0x0a, 0x07, 0x47, 0x65, 0x74, 0x4c, 0x6f, - 0x67, 0x73, 0x12, 0x16, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x4c, - 0x6f, 0x67, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x17, 0x2e, 0x61, 0x70, 0x69, - 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x4c, 0x6f, 0x67, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, - 0x6e, 0x73, 0x65, 0x12, 0x3d, 0x0a, 0x08, 0x47, 0x65, 0x74, 0x47, 0x72, 0x61, 0x70, 0x68, 0x12, - 0x17, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x47, 0x72, 0x61, 0x70, - 0x68, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x18, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, - 0x31, 0x2e, 0x47, 0x65, 0x74, 0x47, 0x72, 0x61, 0x70, 0x68, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, - 0x73, 0x65, 0x12, 0x46, 0x0a, 0x0b, 0x47, 0x65, 0x74, 0x52, 0x75, 0x6e, 0x45, 0x72, 0x72, 0x6f, - 0x72, 0x12, 0x1a, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x52, 0x75, - 0x6e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x1b, 0x2e, - 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x52, 0x75, 0x6e, 0x45, 0x72, 0x72, - 0x6f, 0x72, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x5e, 0x0a, 0x13, 0x47, 0x65, - 0x74, 0x56, 0x61, 0x6c, 0x69, 0x64, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x75, 0x74, 0x70, 0x75, - 0x74, 0x12, 0x22, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x56, 0x61, - 0x6c, 0x69, 0x64, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x52, 0x65, - 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x23, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, - 0x65, 0x74, 0x56, 0x61, 0x6c, 0x69, 0x64, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x75, 0x74, 0x70, - 0x75, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x61, 0x0a, 0x14, 0x47, 0x65, - 0x74, 0x50, 0x72, 0x65, 0x70, 0x61, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x75, 0x74, 0x70, - 0x75, 0x74, 0x12, 0x23, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x50, - 0x72, 0x65, 0x70, 0x61, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, - 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x24, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, - 0x2e, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x70, 0x61, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4f, - 0x75, 0x74, 0x70, 0x75, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x55, 0x0a, - 0x10, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x4f, 0x75, 0x74, 0x70, 0x75, - 0x74, 0x12, 0x1f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x43, 0x6f, - 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, - 0x73, 0x74, 0x1a, 0x20, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x43, - 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x52, 0x65, 0x73, 0x70, - 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x37, 0x0a, 0x06, 0x43, 0x61, 0x6e, 0x63, 0x65, 0x6c, 0x12, 0x15, - 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x43, 0x61, 0x6e, 0x63, 0x65, 0x6c, 0x52, 0x65, - 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x16, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x43, - 0x61, 0x6e, 0x63, 0x65, 0x6c, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x64, 0x0a, - 0x15, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, - 0x62, 0x6a, 0x65, 0x63, 0x74, 0x73, 0x12, 0x24, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, + 0x6b, 0x52, 0x03, 0x73, 0x64, 0x6b, 0x22, 0x5a, 0x0a, 0x1d, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, + 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x73, 0x52, + 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x39, 0x0a, 0x0e, 0x73, 0x64, 0x6b, 0x5f, 0x63, + 0x61, 0x74, 0x65, 0x67, 0x6f, 0x72, 0x69, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, + 0x12, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x43, 0x61, 0x74, 0x65, 0x67, 0x6f, 0x72, + 0x69, 0x65, 0x73, 0x52, 0x0d, 0x73, 0x64, 0x6b, 0x43, 0x61, 0x74, 0x65, 0x67, 0x6f, 0x72, 0x69, + 0x65, 0x73, 0x22, 0x68, 0x0a, 0x1c, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, + 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, + 0x73, 0x65, 0x12, 0x48, 0x0a, 0x12, 0x70, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, + 0x64, 0x5f, 0x6f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x19, + 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, + 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x52, 0x11, 0x70, 0x72, 0x65, 0x63, 0x6f, + 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x22, 0x61, 0x0a, 0x20, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, - 0x6a, 0x65, 0x63, 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x25, 0x2e, 0x61, + 0x6a, 0x65, 0x63, 0x74, 0x43, 0x6f, 0x64, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, + 0x12, 0x12, 0x0a, 0x04, 0x63, 0x6f, 0x64, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, + 0x63, 0x6f, 0x64, 0x65, 0x12, 0x29, 0x0a, 0x05, 0x66, 0x69, 0x6c, 0x65, 0x73, 0x18, 0x02, 0x20, + 0x03, 0x28, 0x0b, 0x32, 0x13, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x53, 0x6e, 0x69, + 0x70, 0x70, 0x65, 0x74, 0x46, 0x69, 0x6c, 0x65, 0x52, 0x05, 0x66, 0x69, 0x6c, 0x65, 0x73, 0x22, + 0x3c, 0x0a, 0x22, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, + 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x52, 0x65, 0x73, + 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x22, 0x3a, 0x0a, + 0x20, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, + 0x62, 0x6a, 0x65, 0x63, 0x74, 0x4c, 0x6f, 0x67, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, + 0x65, 0x12, 0x16, 0x0a, 0x06, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x06, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x22, 0x39, 0x0a, 0x21, 0x47, 0x65, 0x74, + 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, + 0x74, 0x47, 0x72, 0x61, 0x70, 0x68, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x14, + 0x0a, 0x05, 0x67, 0x72, 0x61, 0x70, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x67, + 0x72, 0x61, 0x70, 0x68, 0x22, 0x6f, 0x0a, 0x23, 0x47, 0x65, 0x74, 0x44, 0x65, 0x66, 0x61, 0x75, + 0x6c, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, + 0x65, 0x63, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x48, 0x0a, 0x12, 0x70, + 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x5f, 0x6f, 0x62, 0x6a, 0x65, 0x63, + 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, + 0x2e, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, + 0x63, 0x74, 0x52, 0x11, 0x70, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, + 0x62, 0x6a, 0x65, 0x63, 0x74, 0x22, 0x54, 0x0a, 0x0b, 0x53, 0x6e, 0x69, 0x70, 0x70, 0x65, 0x74, + 0x46, 0x69, 0x6c, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x63, 0x6f, 0x6e, 0x74, + 0x65, 0x6e, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x63, 0x6f, 0x6e, 0x74, 0x65, + 0x6e, 0x74, 0x12, 0x17, 0x0a, 0x07, 0x69, 0x73, 0x5f, 0x6d, 0x61, 0x69, 0x6e, 0x18, 0x03, 0x20, + 0x01, 0x28, 0x08, 0x52, 0x06, 0x69, 0x73, 0x4d, 0x61, 0x69, 0x6e, 0x22, 0xe6, 0x01, 0x0a, 0x12, + 0x53, 0x61, 0x76, 0x65, 0x53, 0x6e, 0x69, 0x70, 0x70, 0x65, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, + 0x73, 0x74, 0x12, 0x29, 0x0a, 0x05, 0x66, 0x69, 0x6c, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, + 0x0b, 0x32, 0x13, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x53, 0x6e, 0x69, 0x70, 0x70, + 0x65, 0x74, 0x46, 0x69, 0x6c, 0x65, 0x52, 0x05, 0x66, 0x69, 0x6c, 0x65, 0x73, 0x12, 0x1d, 0x0a, + 0x03, 0x73, 0x64, 0x6b, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x0b, 0x2e, 0x61, 0x70, 0x69, + 0x2e, 0x76, 0x31, 0x2e, 0x53, 0x64, 0x6b, 0x52, 0x03, 0x73, 0x64, 0x6b, 0x12, 0x29, 0x0a, 0x10, + 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, + 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0f, 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, + 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x32, 0x0a, 0x0a, 0x63, 0x6f, 0x6d, 0x70, 0x6c, + 0x65, 0x78, 0x69, 0x74, 0x79, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x12, 0x2e, 0x61, 0x70, + 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x78, 0x69, 0x74, 0x79, 0x52, + 0x0a, 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x78, 0x69, 0x74, 0x79, 0x12, 0x27, 0x0a, 0x0f, 0x70, + 0x65, 0x72, 0x73, 0x69, 0x73, 0x74, 0x65, 0x6e, 0x63, 0x65, 0x5f, 0x6b, 0x65, 0x79, 0x18, 0x05, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x70, 0x65, 0x72, 0x73, 0x69, 0x73, 0x74, 0x65, 0x6e, 0x63, + 0x65, 0x4b, 0x65, 0x79, 0x22, 0x25, 0x0a, 0x13, 0x53, 0x61, 0x76, 0x65, 0x53, 0x6e, 0x69, 0x70, + 0x70, 0x65, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x0e, 0x0a, 0x02, 0x69, + 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x22, 0x23, 0x0a, 0x11, 0x47, + 0x65, 0x74, 0x53, 0x6e, 0x69, 0x70, 0x70, 0x65, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, + 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, + 0x22, 0xbd, 0x01, 0x0a, 0x12, 0x47, 0x65, 0x74, 0x53, 0x6e, 0x69, 0x70, 0x70, 0x65, 0x74, 0x52, + 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x29, 0x0a, 0x05, 0x66, 0x69, 0x6c, 0x65, 0x73, + 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x13, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, + 0x53, 0x6e, 0x69, 0x70, 0x70, 0x65, 0x74, 0x46, 0x69, 0x6c, 0x65, 0x52, 0x05, 0x66, 0x69, 0x6c, + 0x65, 0x73, 0x12, 0x1d, 0x0a, 0x03, 0x73, 0x64, 0x6b, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, + 0x0b, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x53, 0x64, 0x6b, 0x52, 0x03, 0x73, 0x64, + 0x6b, 0x12, 0x29, 0x0a, 0x10, 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x5f, 0x6f, 0x70, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0f, 0x70, 0x69, 0x70, + 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x32, 0x0a, 0x0a, + 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x78, 0x69, 0x74, 0x79, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0e, + 0x32, 0x12, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, + 0x78, 0x69, 0x74, 0x79, 0x52, 0x0a, 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x78, 0x69, 0x74, 0x79, + 0x2a, 0x52, 0x0a, 0x03, 0x53, 0x64, 0x6b, 0x12, 0x13, 0x0a, 0x0f, 0x53, 0x44, 0x4b, 0x5f, 0x55, + 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, 0x0c, 0x0a, 0x08, + 0x53, 0x44, 0x4b, 0x5f, 0x4a, 0x41, 0x56, 0x41, 0x10, 0x01, 0x12, 0x0a, 0x0a, 0x06, 0x53, 0x44, + 0x4b, 0x5f, 0x47, 0x4f, 0x10, 0x02, 0x12, 0x0e, 0x0a, 0x0a, 0x53, 0x44, 0x4b, 0x5f, 0x50, 0x59, + 0x54, 0x48, 0x4f, 0x4e, 0x10, 0x03, 0x12, 0x0c, 0x0a, 0x08, 0x53, 0x44, 0x4b, 0x5f, 0x53, 0x43, + 0x49, 0x4f, 0x10, 0x04, 0x2a, 0xb8, 0x02, 0x0a, 0x06, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, + 0x16, 0x0a, 0x12, 0x53, 0x54, 0x41, 0x54, 0x55, 0x53, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, + 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, 0x15, 0x0a, 0x11, 0x53, 0x54, 0x41, 0x54, 0x55, + 0x53, 0x5f, 0x56, 0x41, 0x4c, 0x49, 0x44, 0x41, 0x54, 0x49, 0x4e, 0x47, 0x10, 0x01, 0x12, 0x1b, + 0x0a, 0x17, 0x53, 0x54, 0x41, 0x54, 0x55, 0x53, 0x5f, 0x56, 0x41, 0x4c, 0x49, 0x44, 0x41, 0x54, + 0x49, 0x4f, 0x4e, 0x5f, 0x45, 0x52, 0x52, 0x4f, 0x52, 0x10, 0x02, 0x12, 0x14, 0x0a, 0x10, 0x53, + 0x54, 0x41, 0x54, 0x55, 0x53, 0x5f, 0x50, 0x52, 0x45, 0x50, 0x41, 0x52, 0x49, 0x4e, 0x47, 0x10, + 0x03, 0x12, 0x1c, 0x0a, 0x18, 0x53, 0x54, 0x41, 0x54, 0x55, 0x53, 0x5f, 0x50, 0x52, 0x45, 0x50, + 0x41, 0x52, 0x41, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x45, 0x52, 0x52, 0x4f, 0x52, 0x10, 0x04, 0x12, + 0x14, 0x0a, 0x10, 0x53, 0x54, 0x41, 0x54, 0x55, 0x53, 0x5f, 0x43, 0x4f, 0x4d, 0x50, 0x49, 0x4c, + 0x49, 0x4e, 0x47, 0x10, 0x05, 0x12, 0x18, 0x0a, 0x14, 0x53, 0x54, 0x41, 0x54, 0x55, 0x53, 0x5f, + 0x43, 0x4f, 0x4d, 0x50, 0x49, 0x4c, 0x45, 0x5f, 0x45, 0x52, 0x52, 0x4f, 0x52, 0x10, 0x06, 0x12, + 0x14, 0x0a, 0x10, 0x53, 0x54, 0x41, 0x54, 0x55, 0x53, 0x5f, 0x45, 0x58, 0x45, 0x43, 0x55, 0x54, + 0x49, 0x4e, 0x47, 0x10, 0x07, 0x12, 0x13, 0x0a, 0x0f, 0x53, 0x54, 0x41, 0x54, 0x55, 0x53, 0x5f, + 0x46, 0x49, 0x4e, 0x49, 0x53, 0x48, 0x45, 0x44, 0x10, 0x08, 0x12, 0x14, 0x0a, 0x10, 0x53, 0x54, + 0x41, 0x54, 0x55, 0x53, 0x5f, 0x52, 0x55, 0x4e, 0x5f, 0x45, 0x52, 0x52, 0x4f, 0x52, 0x10, 0x09, + 0x12, 0x10, 0x0a, 0x0c, 0x53, 0x54, 0x41, 0x54, 0x55, 0x53, 0x5f, 0x45, 0x52, 0x52, 0x4f, 0x52, + 0x10, 0x0a, 0x12, 0x16, 0x0a, 0x12, 0x53, 0x54, 0x41, 0x54, 0x55, 0x53, 0x5f, 0x52, 0x55, 0x4e, + 0x5f, 0x54, 0x49, 0x4d, 0x45, 0x4f, 0x55, 0x54, 0x10, 0x0b, 0x12, 0x13, 0x0a, 0x0f, 0x53, 0x54, + 0x41, 0x54, 0x55, 0x53, 0x5f, 0x43, 0x41, 0x4e, 0x43, 0x45, 0x4c, 0x45, 0x44, 0x10, 0x0c, 0x2a, + 0xae, 0x01, 0x0a, 0x15, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, + 0x62, 0x6a, 0x65, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x27, 0x0a, 0x23, 0x50, 0x52, 0x45, + 0x43, 0x4f, 0x4d, 0x50, 0x49, 0x4c, 0x45, 0x44, 0x5f, 0x4f, 0x42, 0x4a, 0x45, 0x43, 0x54, 0x5f, + 0x54, 0x59, 0x50, 0x45, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, + 0x10, 0x00, 0x12, 0x23, 0x0a, 0x1f, 0x50, 0x52, 0x45, 0x43, 0x4f, 0x4d, 0x50, 0x49, 0x4c, 0x45, + 0x44, 0x5f, 0x4f, 0x42, 0x4a, 0x45, 0x43, 0x54, 0x5f, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x45, 0x58, + 0x41, 0x4d, 0x50, 0x4c, 0x45, 0x10, 0x01, 0x12, 0x20, 0x0a, 0x1c, 0x50, 0x52, 0x45, 0x43, 0x4f, + 0x4d, 0x50, 0x49, 0x4c, 0x45, 0x44, 0x5f, 0x4f, 0x42, 0x4a, 0x45, 0x43, 0x54, 0x5f, 0x54, 0x59, + 0x50, 0x45, 0x5f, 0x4b, 0x41, 0x54, 0x41, 0x10, 0x02, 0x12, 0x25, 0x0a, 0x21, 0x50, 0x52, 0x45, + 0x43, 0x4f, 0x4d, 0x50, 0x49, 0x4c, 0x45, 0x44, 0x5f, 0x4f, 0x42, 0x4a, 0x45, 0x43, 0x54, 0x5f, + 0x54, 0x59, 0x50, 0x45, 0x5f, 0x55, 0x4e, 0x49, 0x54, 0x5f, 0x54, 0x45, 0x53, 0x54, 0x10, 0x03, + 0x2a, 0x6e, 0x0a, 0x0a, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x78, 0x69, 0x74, 0x79, 0x12, 0x1a, + 0x0a, 0x16, 0x43, 0x4f, 0x4d, 0x50, 0x4c, 0x45, 0x58, 0x49, 0x54, 0x59, 0x5f, 0x55, 0x4e, 0x53, + 0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, 0x14, 0x0a, 0x10, 0x43, 0x4f, + 0x4d, 0x50, 0x4c, 0x45, 0x58, 0x49, 0x54, 0x59, 0x5f, 0x42, 0x41, 0x53, 0x49, 0x43, 0x10, 0x01, + 0x12, 0x15, 0x0a, 0x11, 0x43, 0x4f, 0x4d, 0x50, 0x4c, 0x45, 0x58, 0x49, 0x54, 0x59, 0x5f, 0x4d, + 0x45, 0x44, 0x49, 0x55, 0x4d, 0x10, 0x02, 0x12, 0x17, 0x0a, 0x13, 0x43, 0x4f, 0x4d, 0x50, 0x4c, + 0x45, 0x58, 0x49, 0x54, 0x59, 0x5f, 0x41, 0x44, 0x56, 0x41, 0x4e, 0x43, 0x45, 0x44, 0x10, 0x03, + 0x2a, 0x46, 0x0a, 0x0c, 0x45, 0x6d, 0x75, 0x6c, 0x61, 0x74, 0x6f, 0x72, 0x54, 0x79, 0x70, 0x65, + 0x12, 0x1d, 0x0a, 0x19, 0x45, 0x4d, 0x55, 0x4c, 0x41, 0x54, 0x4f, 0x52, 0x5f, 0x54, 0x59, 0x50, + 0x45, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, + 0x17, 0x0a, 0x13, 0x45, 0x4d, 0x55, 0x4c, 0x41, 0x54, 0x4f, 0x52, 0x5f, 0x54, 0x59, 0x50, 0x45, + 0x5f, 0x4b, 0x41, 0x46, 0x4b, 0x41, 0x10, 0x01, 0x32, 0x8b, 0x0d, 0x0a, 0x11, 0x50, 0x6c, 0x61, + 0x79, 0x67, 0x72, 0x6f, 0x75, 0x6e, 0x64, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, 0x3a, + 0x0a, 0x07, 0x52, 0x75, 0x6e, 0x43, 0x6f, 0x64, 0x65, 0x12, 0x16, 0x2e, 0x61, 0x70, 0x69, 0x2e, + 0x76, 0x31, 0x2e, 0x52, 0x75, 0x6e, 0x43, 0x6f, 0x64, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, + 0x74, 0x1a, 0x17, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x52, 0x75, 0x6e, 0x43, 0x6f, + 0x64, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x46, 0x0a, 0x0b, 0x43, 0x68, + 0x65, 0x63, 0x6b, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x1a, 0x2e, 0x61, 0x70, 0x69, 0x2e, + 0x76, 0x31, 0x2e, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, + 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x1b, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x43, + 0x68, 0x65, 0x63, 0x6b, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, + 0x73, 0x65, 0x12, 0x49, 0x0a, 0x0c, 0x47, 0x65, 0x74, 0x52, 0x75, 0x6e, 0x4f, 0x75, 0x74, 0x70, + 0x75, 0x74, 0x12, 0x1b, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x52, + 0x75, 0x6e, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, + 0x1c, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x52, 0x75, 0x6e, 0x4f, + 0x75, 0x74, 0x70, 0x75, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x3a, 0x0a, + 0x07, 0x47, 0x65, 0x74, 0x4c, 0x6f, 0x67, 0x73, 0x12, 0x16, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, + 0x31, 0x2e, 0x47, 0x65, 0x74, 0x4c, 0x6f, 0x67, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, + 0x1a, 0x17, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x4c, 0x6f, 0x67, + 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x3d, 0x0a, 0x08, 0x47, 0x65, 0x74, + 0x47, 0x72, 0x61, 0x70, 0x68, 0x12, 0x17, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, + 0x65, 0x74, 0x47, 0x72, 0x61, 0x70, 0x68, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x18, + 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x47, 0x72, 0x61, 0x70, 0x68, + 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x46, 0x0a, 0x0b, 0x47, 0x65, 0x74, 0x52, + 0x75, 0x6e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x12, 0x1a, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, + 0x2e, 0x47, 0x65, 0x74, 0x52, 0x75, 0x6e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x52, 0x65, 0x71, 0x75, + 0x65, 0x73, 0x74, 0x1a, 0x1b, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, + 0x52, 0x75, 0x6e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, + 0x12, 0x5e, 0x0a, 0x13, 0x47, 0x65, 0x74, 0x56, 0x61, 0x6c, 0x69, 0x64, 0x61, 0x74, 0x69, 0x6f, + 0x6e, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x12, 0x22, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, + 0x2e, 0x47, 0x65, 0x74, 0x56, 0x61, 0x6c, 0x69, 0x64, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x75, + 0x74, 0x70, 0x75, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x23, 0x2e, 0x61, 0x70, + 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x56, 0x61, 0x6c, 0x69, 0x64, 0x61, 0x74, 0x69, + 0x6f, 0x6e, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, + 0x12, 0x61, 0x0a, 0x14, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x70, 0x61, 0x72, 0x61, 0x74, 0x69, + 0x6f, 0x6e, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x12, 0x23, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, + 0x31, 0x2e, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x70, 0x61, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, + 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x24, 0x2e, + 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x70, 0x61, 0x72, + 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, + 0x6e, 0x73, 0x65, 0x12, 0x55, 0x0a, 0x10, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6d, 0x70, 0x69, 0x6c, + 0x65, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x12, 0x1f, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, + 0x2e, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x4f, 0x75, 0x74, 0x70, 0x75, + 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x20, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, + 0x31, 0x2e, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x4f, 0x75, 0x74, 0x70, + 0x75, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x37, 0x0a, 0x06, 0x43, 0x61, + 0x6e, 0x63, 0x65, 0x6c, 0x12, 0x15, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x43, 0x61, + 0x6e, 0x63, 0x65, 0x6c, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x16, 0x2e, 0x61, 0x70, + 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x43, 0x61, 0x6e, 0x63, 0x65, 0x6c, 0x52, 0x65, 0x73, 0x70, 0x6f, + 0x6e, 0x73, 0x65, 0x12, 0x64, 0x0a, 0x15, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, + 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x73, 0x12, 0x24, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, - 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, - 0x6e, 0x73, 0x65, 0x12, 0x61, 0x0a, 0x14, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, - 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x12, 0x23, 0x2e, 0x61, 0x70, - 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, - 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, - 0x1a, 0x24, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, - 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x52, 0x65, - 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x6d, 0x0a, 0x18, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, - 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x43, 0x6f, - 0x64, 0x65, 0x12, 0x27, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x50, + 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, + 0x73, 0x74, 0x1a, 0x25, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, - 0x43, 0x6f, 0x64, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x28, 0x2e, 0x61, 0x70, - 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, - 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x43, 0x6f, 0x64, 0x65, 0x52, 0x65, 0x73, - 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x73, 0x0a, 0x1a, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, - 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x4f, 0x75, 0x74, - 0x70, 0x75, 0x74, 0x12, 0x29, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, + 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x61, 0x0a, 0x14, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, - 0x74, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x2a, - 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, - 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x4f, 0x75, 0x74, 0x70, - 0x75, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x6d, 0x0a, 0x18, 0x47, 0x65, + 0x74, 0x12, 0x23, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x50, 0x72, + 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x52, + 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x24, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, + 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, + 0x6a, 0x65, 0x63, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x6d, 0x0a, 0x18, + 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, + 0x6a, 0x65, 0x63, 0x74, 0x43, 0x6f, 0x64, 0x65, 0x12, 0x27, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, + 0x31, 0x2e, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, + 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x43, 0x6f, 0x64, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, + 0x74, 0x1a, 0x28, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x50, 0x72, + 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x43, + 0x6f, 0x64, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x73, 0x0a, 0x1a, 0x47, + 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, + 0x65, 0x63, 0x74, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x12, 0x29, 0x2e, 0x61, 0x70, 0x69, 0x2e, + 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, + 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x52, 0x65, 0x71, + 0x75, 0x65, 0x73, 0x74, 0x1a, 0x2a, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, - 0x63, 0x74, 0x4c, 0x6f, 0x67, 0x73, 0x12, 0x27, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, + 0x63, 0x74, 0x4f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, + 0x12, 0x6d, 0x0a, 0x18, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, + 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x4c, 0x6f, 0x67, 0x73, 0x12, 0x27, 0x2e, 0x61, + 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, + 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x4c, 0x6f, 0x67, 0x73, 0x52, 0x65, + 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x28, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, + 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, + 0x65, 0x63, 0x74, 0x4c, 0x6f, 0x67, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, + 0x70, 0x0a, 0x19, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, + 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x47, 0x72, 0x61, 0x70, 0x68, 0x12, 0x28, 0x2e, 0x61, + 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, + 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x47, 0x72, 0x61, 0x70, 0x68, 0x52, + 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x29, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, - 0x6a, 0x65, 0x63, 0x74, 0x4c, 0x6f, 0x67, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, - 0x28, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, - 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x4c, 0x6f, 0x67, - 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x70, 0x0a, 0x19, 0x47, 0x65, 0x74, + 0x6a, 0x65, 0x63, 0x74, 0x47, 0x72, 0x61, 0x70, 0x68, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, + 0x65, 0x12, 0x76, 0x0a, 0x1b, 0x47, 0x65, 0x74, 0x44, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x50, + 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, + 0x12, 0x2a, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x44, 0x65, 0x66, + 0x61, 0x75, 0x6c, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, + 0x62, 0x6a, 0x65, 0x63, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x2b, 0x2e, 0x61, + 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x44, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, - 0x74, 0x47, 0x72, 0x61, 0x70, 0x68, 0x12, 0x28, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, - 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, - 0x6a, 0x65, 0x63, 0x74, 0x47, 0x72, 0x61, 0x70, 0x68, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, - 0x1a, 0x29, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x50, 0x72, 0x65, - 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x47, 0x72, - 0x61, 0x70, 0x68, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x76, 0x0a, 0x1b, 0x47, - 0x65, 0x74, 0x44, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, 0x70, - 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x12, 0x2a, 0x2e, 0x61, 0x70, 0x69, - 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x44, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x50, 0x72, - 0x65, 0x63, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x52, - 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x2b, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, - 0x47, 0x65, 0x74, 0x44, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x50, 0x72, 0x65, 0x63, 0x6f, 0x6d, - 0x70, 0x69, 0x6c, 0x65, 0x64, 0x4f, 0x62, 0x6a, 0x65, 0x63, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, - 0x6e, 0x73, 0x65, 0x12, 0x46, 0x0a, 0x0b, 0x53, 0x61, 0x76, 0x65, 0x53, 0x6e, 0x69, 0x70, 0x70, - 0x65, 0x74, 0x12, 0x1a, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x53, 0x61, 0x76, 0x65, - 0x53, 0x6e, 0x69, 0x70, 0x70, 0x65, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x1b, - 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x53, 0x61, 0x76, 0x65, 0x53, 0x6e, 0x69, 0x70, - 0x70, 0x65, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x43, 0x0a, 0x0a, 0x47, - 0x65, 0x74, 0x53, 0x6e, 0x69, 0x70, 0x70, 0x65, 0x74, 0x12, 0x19, 0x2e, 0x61, 0x70, 0x69, 0x2e, - 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x53, 0x6e, 0x69, 0x70, 0x70, 0x65, 0x74, 0x52, 0x65, 0x71, - 0x75, 0x65, 0x73, 0x74, 0x1a, 0x1a, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, - 0x74, 0x53, 0x6e, 0x69, 0x70, 0x70, 0x65, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, - 0x42, 0x38, 0x5a, 0x36, 0x62, 0x65, 0x61, 0x6d, 0x2e, 0x61, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2e, - 0x6f, 0x72, 0x67, 0x2f, 0x70, 0x6c, 0x61, 0x79, 0x67, 0x72, 0x6f, 0x75, 0x6e, 0x64, 0x2f, 0x62, - 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2f, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x3b, - 0x70, 0x6c, 0x61, 0x79, 0x67, 0x72, 0x6f, 0x75, 0x6e, 0x64, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x33, + 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x46, 0x0a, 0x0b, 0x53, 0x61, 0x76, + 0x65, 0x53, 0x6e, 0x69, 0x70, 0x70, 0x65, 0x74, 0x12, 0x1a, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, + 0x31, 0x2e, 0x53, 0x61, 0x76, 0x65, 0x53, 0x6e, 0x69, 0x70, 0x70, 0x65, 0x74, 0x52, 0x65, 0x71, + 0x75, 0x65, 0x73, 0x74, 0x1a, 0x1b, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x53, 0x61, + 0x76, 0x65, 0x53, 0x6e, 0x69, 0x70, 0x70, 0x65, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, + 0x65, 0x12, 0x43, 0x0a, 0x0a, 0x47, 0x65, 0x74, 0x53, 0x6e, 0x69, 0x70, 0x70, 0x65, 0x74, 0x12, + 0x19, 0x2e, 0x61, 0x70, 0x69, 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x53, 0x6e, 0x69, 0x70, + 0x70, 0x65, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x1a, 0x2e, 0x61, 0x70, 0x69, + 0x2e, 0x76, 0x31, 0x2e, 0x47, 0x65, 0x74, 0x53, 0x6e, 0x69, 0x70, 0x70, 0x65, 0x74, 0x52, 0x65, + 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x42, 0x38, 0x5a, 0x36, 0x62, 0x65, 0x61, 0x6d, 0x2e, 0x61, + 0x70, 0x61, 0x63, 0x68, 0x65, 0x2e, 0x6f, 0x72, 0x67, 0x2f, 0x70, 0x6c, 0x61, 0x79, 0x67, 0x72, + 0x6f, 0x75, 0x6e, 0x64, 0x2f, 0x62, 0x61, 0x63, 0x6b, 0x65, 0x6e, 0x64, 0x2f, 0x69, 0x6e, 0x74, + 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x3b, 0x70, 0x6c, 0x61, 0x79, 0x67, 0x72, 0x6f, 0x75, 0x6e, 0x64, + 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( @@ -3097,68 +3118,70 @@ var file_api_v1_api_proto_depIdxs = []int32{ 47, // 1: api.v1.Dataset.options:type_name -> api.v1.Dataset.OptionsEntry 0, // 2: api.v1.RunCodeRequest.sdk:type_name -> api.v1.Sdk 5, // 3: api.v1.RunCodeRequest.datasets:type_name -> api.v1.Dataset - 1, // 4: api.v1.CheckStatusResponse.status:type_name -> api.v1.Status - 2, // 5: api.v1.PrecompiledObject.type:type_name -> api.v1.PrecompiledObjectType - 0, // 6: api.v1.PrecompiledObject.sdk:type_name -> api.v1.Sdk - 3, // 7: api.v1.PrecompiledObject.complexity:type_name -> api.v1.Complexity - 5, // 8: api.v1.PrecompiledObject.datasets:type_name -> api.v1.Dataset - 0, // 9: api.v1.Categories.sdk:type_name -> api.v1.Sdk - 48, // 10: api.v1.Categories.categories:type_name -> api.v1.Categories.Category - 0, // 11: api.v1.GetPrecompiledObjectsRequest.sdk:type_name -> api.v1.Sdk - 0, // 12: api.v1.GetDefaultPrecompiledObjectRequest.sdk:type_name -> api.v1.Sdk - 27, // 13: api.v1.GetPrecompiledObjectsResponse.sdk_categories:type_name -> api.v1.Categories - 26, // 14: api.v1.GetPrecompiledObjectResponse.precompiled_object:type_name -> api.v1.PrecompiledObject - 26, // 15: api.v1.GetDefaultPrecompiledObjectResponse.precompiled_object:type_name -> api.v1.PrecompiledObject - 42, // 16: api.v1.SaveSnippetRequest.files:type_name -> api.v1.SnippetFile - 0, // 17: api.v1.SaveSnippetRequest.sdk:type_name -> api.v1.Sdk - 3, // 18: api.v1.SaveSnippetRequest.complexity:type_name -> api.v1.Complexity - 42, // 19: api.v1.GetSnippetResponse.files:type_name -> api.v1.SnippetFile - 0, // 20: api.v1.GetSnippetResponse.sdk:type_name -> api.v1.Sdk - 3, // 21: api.v1.GetSnippetResponse.complexity:type_name -> api.v1.Complexity - 26, // 22: api.v1.Categories.Category.precompiled_objects:type_name -> api.v1.PrecompiledObject - 6, // 23: api.v1.PlaygroundService.RunCode:input_type -> api.v1.RunCodeRequest - 8, // 24: api.v1.PlaygroundService.CheckStatus:input_type -> api.v1.CheckStatusRequest - 16, // 25: api.v1.PlaygroundService.GetRunOutput:input_type -> api.v1.GetRunOutputRequest - 20, // 26: api.v1.PlaygroundService.GetLogs:input_type -> api.v1.GetLogsRequest - 22, // 27: api.v1.PlaygroundService.GetGraph:input_type -> api.v1.GetGraphRequest - 18, // 28: api.v1.PlaygroundService.GetRunError:input_type -> api.v1.GetRunErrorRequest - 10, // 29: api.v1.PlaygroundService.GetValidationOutput:input_type -> api.v1.GetValidationOutputRequest - 12, // 30: api.v1.PlaygroundService.GetPreparationOutput:input_type -> api.v1.GetPreparationOutputRequest - 14, // 31: api.v1.PlaygroundService.GetCompileOutput:input_type -> api.v1.GetCompileOutputRequest - 24, // 32: api.v1.PlaygroundService.Cancel:input_type -> api.v1.CancelRequest - 28, // 33: api.v1.PlaygroundService.GetPrecompiledObjects:input_type -> api.v1.GetPrecompiledObjectsRequest - 29, // 34: api.v1.PlaygroundService.GetPrecompiledObject:input_type -> api.v1.GetPrecompiledObjectRequest - 30, // 35: api.v1.PlaygroundService.GetPrecompiledObjectCode:input_type -> api.v1.GetPrecompiledObjectCodeRequest - 31, // 36: api.v1.PlaygroundService.GetPrecompiledObjectOutput:input_type -> api.v1.GetPrecompiledObjectOutputRequest - 32, // 37: api.v1.PlaygroundService.GetPrecompiledObjectLogs:input_type -> api.v1.GetPrecompiledObjectLogsRequest - 33, // 38: api.v1.PlaygroundService.GetPrecompiledObjectGraph:input_type -> api.v1.GetPrecompiledObjectGraphRequest - 34, // 39: api.v1.PlaygroundService.GetDefaultPrecompiledObject:input_type -> api.v1.GetDefaultPrecompiledObjectRequest - 43, // 40: api.v1.PlaygroundService.SaveSnippet:input_type -> api.v1.SaveSnippetRequest - 45, // 41: api.v1.PlaygroundService.GetSnippet:input_type -> api.v1.GetSnippetRequest - 7, // 42: api.v1.PlaygroundService.RunCode:output_type -> api.v1.RunCodeResponse - 9, // 43: api.v1.PlaygroundService.CheckStatus:output_type -> api.v1.CheckStatusResponse - 17, // 44: api.v1.PlaygroundService.GetRunOutput:output_type -> api.v1.GetRunOutputResponse - 21, // 45: api.v1.PlaygroundService.GetLogs:output_type -> api.v1.GetLogsResponse - 23, // 46: api.v1.PlaygroundService.GetGraph:output_type -> api.v1.GetGraphResponse - 19, // 47: api.v1.PlaygroundService.GetRunError:output_type -> api.v1.GetRunErrorResponse - 11, // 48: api.v1.PlaygroundService.GetValidationOutput:output_type -> api.v1.GetValidationOutputResponse - 13, // 49: api.v1.PlaygroundService.GetPreparationOutput:output_type -> api.v1.GetPreparationOutputResponse - 15, // 50: api.v1.PlaygroundService.GetCompileOutput:output_type -> api.v1.GetCompileOutputResponse - 25, // 51: api.v1.PlaygroundService.Cancel:output_type -> api.v1.CancelResponse - 35, // 52: api.v1.PlaygroundService.GetPrecompiledObjects:output_type -> api.v1.GetPrecompiledObjectsResponse - 36, // 53: api.v1.PlaygroundService.GetPrecompiledObject:output_type -> api.v1.GetPrecompiledObjectResponse - 37, // 54: api.v1.PlaygroundService.GetPrecompiledObjectCode:output_type -> api.v1.GetPrecompiledObjectCodeResponse - 38, // 55: api.v1.PlaygroundService.GetPrecompiledObjectOutput:output_type -> api.v1.GetPrecompiledObjectOutputResponse - 39, // 56: api.v1.PlaygroundService.GetPrecompiledObjectLogs:output_type -> api.v1.GetPrecompiledObjectLogsResponse - 40, // 57: api.v1.PlaygroundService.GetPrecompiledObjectGraph:output_type -> api.v1.GetPrecompiledObjectGraphResponse - 41, // 58: api.v1.PlaygroundService.GetDefaultPrecompiledObject:output_type -> api.v1.GetDefaultPrecompiledObjectResponse - 44, // 59: api.v1.PlaygroundService.SaveSnippet:output_type -> api.v1.SaveSnippetResponse - 46, // 60: api.v1.PlaygroundService.GetSnippet:output_type -> api.v1.GetSnippetResponse - 42, // [42:61] is the sub-list for method output_type - 23, // [23:42] is the sub-list for method input_type - 23, // [23:23] is the sub-list for extension type_name - 23, // [23:23] is the sub-list for extension extendee - 0, // [0:23] is the sub-list for field type_name + 42, // 4: api.v1.RunCodeRequest.files:type_name -> api.v1.SnippetFile + 1, // 5: api.v1.CheckStatusResponse.status:type_name -> api.v1.Status + 2, // 6: api.v1.PrecompiledObject.type:type_name -> api.v1.PrecompiledObjectType + 0, // 7: api.v1.PrecompiledObject.sdk:type_name -> api.v1.Sdk + 3, // 8: api.v1.PrecompiledObject.complexity:type_name -> api.v1.Complexity + 5, // 9: api.v1.PrecompiledObject.datasets:type_name -> api.v1.Dataset + 0, // 10: api.v1.Categories.sdk:type_name -> api.v1.Sdk + 48, // 11: api.v1.Categories.categories:type_name -> api.v1.Categories.Category + 0, // 12: api.v1.GetPrecompiledObjectsRequest.sdk:type_name -> api.v1.Sdk + 0, // 13: api.v1.GetDefaultPrecompiledObjectRequest.sdk:type_name -> api.v1.Sdk + 27, // 14: api.v1.GetPrecompiledObjectsResponse.sdk_categories:type_name -> api.v1.Categories + 26, // 15: api.v1.GetPrecompiledObjectResponse.precompiled_object:type_name -> api.v1.PrecompiledObject + 42, // 16: api.v1.GetPrecompiledObjectCodeResponse.files:type_name -> api.v1.SnippetFile + 26, // 17: api.v1.GetDefaultPrecompiledObjectResponse.precompiled_object:type_name -> api.v1.PrecompiledObject + 42, // 18: api.v1.SaveSnippetRequest.files:type_name -> api.v1.SnippetFile + 0, // 19: api.v1.SaveSnippetRequest.sdk:type_name -> api.v1.Sdk + 3, // 20: api.v1.SaveSnippetRequest.complexity:type_name -> api.v1.Complexity + 42, // 21: api.v1.GetSnippetResponse.files:type_name -> api.v1.SnippetFile + 0, // 22: api.v1.GetSnippetResponse.sdk:type_name -> api.v1.Sdk + 3, // 23: api.v1.GetSnippetResponse.complexity:type_name -> api.v1.Complexity + 26, // 24: api.v1.Categories.Category.precompiled_objects:type_name -> api.v1.PrecompiledObject + 6, // 25: api.v1.PlaygroundService.RunCode:input_type -> api.v1.RunCodeRequest + 8, // 26: api.v1.PlaygroundService.CheckStatus:input_type -> api.v1.CheckStatusRequest + 16, // 27: api.v1.PlaygroundService.GetRunOutput:input_type -> api.v1.GetRunOutputRequest + 20, // 28: api.v1.PlaygroundService.GetLogs:input_type -> api.v1.GetLogsRequest + 22, // 29: api.v1.PlaygroundService.GetGraph:input_type -> api.v1.GetGraphRequest + 18, // 30: api.v1.PlaygroundService.GetRunError:input_type -> api.v1.GetRunErrorRequest + 10, // 31: api.v1.PlaygroundService.GetValidationOutput:input_type -> api.v1.GetValidationOutputRequest + 12, // 32: api.v1.PlaygroundService.GetPreparationOutput:input_type -> api.v1.GetPreparationOutputRequest + 14, // 33: api.v1.PlaygroundService.GetCompileOutput:input_type -> api.v1.GetCompileOutputRequest + 24, // 34: api.v1.PlaygroundService.Cancel:input_type -> api.v1.CancelRequest + 28, // 35: api.v1.PlaygroundService.GetPrecompiledObjects:input_type -> api.v1.GetPrecompiledObjectsRequest + 29, // 36: api.v1.PlaygroundService.GetPrecompiledObject:input_type -> api.v1.GetPrecompiledObjectRequest + 30, // 37: api.v1.PlaygroundService.GetPrecompiledObjectCode:input_type -> api.v1.GetPrecompiledObjectCodeRequest + 31, // 38: api.v1.PlaygroundService.GetPrecompiledObjectOutput:input_type -> api.v1.GetPrecompiledObjectOutputRequest + 32, // 39: api.v1.PlaygroundService.GetPrecompiledObjectLogs:input_type -> api.v1.GetPrecompiledObjectLogsRequest + 33, // 40: api.v1.PlaygroundService.GetPrecompiledObjectGraph:input_type -> api.v1.GetPrecompiledObjectGraphRequest + 34, // 41: api.v1.PlaygroundService.GetDefaultPrecompiledObject:input_type -> api.v1.GetDefaultPrecompiledObjectRequest + 43, // 42: api.v1.PlaygroundService.SaveSnippet:input_type -> api.v1.SaveSnippetRequest + 45, // 43: api.v1.PlaygroundService.GetSnippet:input_type -> api.v1.GetSnippetRequest + 7, // 44: api.v1.PlaygroundService.RunCode:output_type -> api.v1.RunCodeResponse + 9, // 45: api.v1.PlaygroundService.CheckStatus:output_type -> api.v1.CheckStatusResponse + 17, // 46: api.v1.PlaygroundService.GetRunOutput:output_type -> api.v1.GetRunOutputResponse + 21, // 47: api.v1.PlaygroundService.GetLogs:output_type -> api.v1.GetLogsResponse + 23, // 48: api.v1.PlaygroundService.GetGraph:output_type -> api.v1.GetGraphResponse + 19, // 49: api.v1.PlaygroundService.GetRunError:output_type -> api.v1.GetRunErrorResponse + 11, // 50: api.v1.PlaygroundService.GetValidationOutput:output_type -> api.v1.GetValidationOutputResponse + 13, // 51: api.v1.PlaygroundService.GetPreparationOutput:output_type -> api.v1.GetPreparationOutputResponse + 15, // 52: api.v1.PlaygroundService.GetCompileOutput:output_type -> api.v1.GetCompileOutputResponse + 25, // 53: api.v1.PlaygroundService.Cancel:output_type -> api.v1.CancelResponse + 35, // 54: api.v1.PlaygroundService.GetPrecompiledObjects:output_type -> api.v1.GetPrecompiledObjectsResponse + 36, // 55: api.v1.PlaygroundService.GetPrecompiledObject:output_type -> api.v1.GetPrecompiledObjectResponse + 37, // 56: api.v1.PlaygroundService.GetPrecompiledObjectCode:output_type -> api.v1.GetPrecompiledObjectCodeResponse + 38, // 57: api.v1.PlaygroundService.GetPrecompiledObjectOutput:output_type -> api.v1.GetPrecompiledObjectOutputResponse + 39, // 58: api.v1.PlaygroundService.GetPrecompiledObjectLogs:output_type -> api.v1.GetPrecompiledObjectLogsResponse + 40, // 59: api.v1.PlaygroundService.GetPrecompiledObjectGraph:output_type -> api.v1.GetPrecompiledObjectGraphResponse + 41, // 60: api.v1.PlaygroundService.GetDefaultPrecompiledObject:output_type -> api.v1.GetDefaultPrecompiledObjectResponse + 44, // 61: api.v1.PlaygroundService.SaveSnippet:output_type -> api.v1.SaveSnippetResponse + 46, // 62: api.v1.PlaygroundService.GetSnippet:output_type -> api.v1.GetSnippetResponse + 44, // [44:63] is the sub-list for method output_type + 25, // [25:44] is the sub-list for method input_type + 25, // [25:25] is the sub-list for extension type_name + 25, // [25:25] is the sub-list for extension extendee + 0, // [0:25] is the sub-list for field type_name } func init() { file_api_v1_api_proto_init() } diff --git a/playground/backend/internal/code_processing/code_processing_test.go b/playground/backend/internal/code_processing/code_processing_test.go index b8060959a6c72..fb3a2a1313e54 100644 --- a/playground/backend/internal/code_processing/code_processing_test.go +++ b/playground/backend/internal/code_processing/code_processing_test.go @@ -37,6 +37,7 @@ import ( "beam.apache.org/playground/backend/internal/cache" "beam.apache.org/playground/backend/internal/cache/local" "beam.apache.org/playground/backend/internal/cache/redis" + "beam.apache.org/playground/backend/internal/db/entity" "beam.apache.org/playground/backend/internal/environment" "beam.apache.org/playground/backend/internal/executors" "beam.apache.org/playground/backend/internal/fs_tool" @@ -45,7 +46,7 @@ import ( ) const ( - javaConfig = "{\n \"compile_cmd\": \"javac\",\n \"run_cmd\": \"java\",\n \"test_cmd\": \"java\",\n \"compile_args\": [\n \"-d\",\n \"bin\",\n \"-classpath\"\n ],\n \"run_args\": [\n \"-cp\",\n \"bin:\"\n ],\n \"test_args\": [\n \"-cp\",\n \"bin:\",\n \"JUnit\"\n ]\n}" + javaConfig = "{\n \"compile_cmd\": \"javac\",\n \"run_cmd\": \"java\",\n \"test_cmd\": \"java\",\n \"compile_args\": [\n \"-d\",\n \"bin\",\n \"-parameters\",\n \"-classpath\"\n ],\n \"run_args\": [\n \"-cp\",\n \"bin:\"\n ],\n \"test_args\": [\n \"-cp\",\n \"bin:\",\n \"JUnit\"\n ]\n}" pythonConfig = "{\n \"compile_cmd\": \"\",\n \"run_cmd\": \"python3\",\n \"compile_args\": [],\n \"run_args\": []\n}" goConfig = "{\n \"compile_cmd\": \"go\",\n \"run_cmd\": \"\",\n \"compile_args\": [\n \"build\",\n \"-o\",\n \"bin\"\n ],\n \"run_args\": [\n ]\n}" pipelinesFolder = "executable_files" @@ -289,8 +290,10 @@ func Test_Process(t *testing.T) { if err != nil { t.Fatalf("error during prepare folders: %s", err.Error()) } + + sources := []entity.FileEntity{{Name: "main.java", Content: tt.code, IsMain: true}} if tt.createExecFile { - _ = lc.CreateSourceCodeFile(tt.code) + _ = lc.CreateSourceCodeFiles(sources) } if err = utils.SetToCache(tt.args.ctx, cacheService, tt.args.pipelineId, cache.Canceled, false); err != nil { t.Fatal("error during set cancel flag to cache") @@ -690,7 +693,8 @@ func prepareFiles(b *testing.B, pipelineId uuid.UUID, code string, sdk pb.Sdk) * if err != nil { b.Fatalf("error during prepare folders: %s", err.Error()) } - err = lc.CreateSourceCodeFile(code) + sources := []entity.FileEntity{{Name: "main.java", Content: code, IsMain: true}} + err = lc.CreateSourceCodeFiles(sources) if err != nil { b.Fatalf("error during prepare source code file: %s", err.Error()) } @@ -895,7 +899,8 @@ func Test_validateStep(t *testing.T) { if err != nil { t.Fatalf("error during prepare folders: %s", err.Error()) } - _ = lc.CreateSourceCodeFile(tt.code) + sources := []entity.FileEntity{{Name: "main.java", Content: tt.code, IsMain: true}} + _ = lc.CreateSourceCodeFiles(sources) executor := validateStep(tt.args.ctx, tt.args.cacheService, &lc.Paths, tt.args.pipelineId, tt.args.sdkEnv, tt.args.pipelineLifeCycleCtx, tt.args.validationResults, tt.args.cancelChannel) got := syncMapLen(tt.args.validationResults) if executor != nil && !reflect.DeepEqual(got, tt.want) { @@ -983,7 +988,8 @@ func Test_prepareStep(t *testing.T) { if err != nil { t.Fatalf("error during prepare folders: %s", err.Error()) } - _ = lc.CreateSourceCodeFile(tt.code) + sources := []entity.FileEntity{{Name: "main.java", Content: tt.code, IsMain: true}} + _ = lc.CreateSourceCodeFiles(sources) prepareStep(tt.args.ctx, tt.args.cacheService, &lc.Paths, tt.args.pipelineId, tt.args.sdkEnv, tt.args.pipelineLifeCycleCtx, tt.args.validationResults, tt.args.cancelChannel, nil) status, _ := cacheService.GetValue(tt.args.ctx, tt.args.pipelineId, cache.Status) if status != tt.expectedStatus { @@ -1068,7 +1074,8 @@ func Test_compileStep(t *testing.T) { if err != nil { t.Fatalf("error during prepare folders: %s", err.Error()) } - _ = lc.CreateSourceCodeFile(tt.code) + sources := []entity.FileEntity{{Name: "main.java", Content: tt.code, IsMain: true}} + _ = lc.CreateSourceCodeFiles(sources) compileStep(tt.args.ctx, tt.args.cacheService, &lc.Paths, tt.args.pipelineId, tt.args.sdkEnv, tt.args.isUnitTest, tt.args.pipelineLifeCycleCtx, tt.args.cancelChannel) status, _ := cacheService.GetValue(tt.args.ctx, tt.args.pipelineId, cache.Status) if status != tt.expectedStatus { @@ -1173,7 +1180,8 @@ func Test_runStep(t *testing.T) { if err != nil { t.Fatalf("error during prepare folders: %s", err.Error()) } - _ = lc.CreateSourceCodeFile(tt.code) + sources := []entity.FileEntity{{Name: "main.java", Content: tt.code, IsMain: true}} + _ = lc.CreateSourceCodeFiles(sources) } runStep(tt.args.ctx, tt.args.cacheService, &lc.Paths, tt.args.pipelineId, tt.args.isUnitTest, tt.args.sdkEnv, tt.args.pipelineOptions, tt.args.pipelineLifeCycleCtx, tt.args.cancelChannel) status, _ := cacheService.GetValue(tt.args.ctx, tt.args.pipelineId, cache.Status) diff --git a/playground/backend/internal/db/datastore/datastore_db.go b/playground/backend/internal/db/datastore/datastore_db.go index 8d8b0b74e4473..554a7a7f7ad99 100644 --- a/playground/backend/internal/db/datastore/datastore_db.go +++ b/playground/backend/internal/db/datastore/datastore_db.go @@ -17,7 +17,6 @@ package datastore import ( "context" - "errors" "fmt" "time" @@ -397,25 +396,28 @@ func (d *Datastore) GetExample(ctx context.Context, id string, sdks []*entity.SD }), err } -func (d *Datastore) GetExampleCode(ctx context.Context, id string) (string, error) { +func (d *Datastore) GetExampleCode(ctx context.Context, id string) ([]*entity.FileEntity, error) { + files := make([]*entity.FileEntity, 0) tx, err := d.Client.NewTransaction(ctx, datastore.ReadOnly) if err != nil { logger.Errorf(errorMsgTemplateCreatingTx, err.Error()) - return "", err + return files, err } defer rollback(tx) - fileKey := utils.GetFileKey(ctx, id, 0) - var file = new(entity.FileEntity) - if err = tx.Get(fileKey, file); err != nil { - if err == datastore.ErrNoSuchEntity { - logger.Warnf("error during getting example code by identifier, err: %s", err.Error()) - return "", err - } - logger.Errorf("error during getting example code by identifier, err: %s", err.Error()) - return "", err + // Get number of files + snpKey := utils.GetSnippetKey(ctx, id) + var snippet = new(entity.SnippetEntity) + if err = tx.Get(snpKey, snippet); err != nil { + logger.Errorf("error during getting snippet by identifier, err: %s", err.Error()) + return nil, err + } + + fileKeys := make([]*datastore.Key, 0, snippet.NumberOfFiles) + for idx := 0; idx < snippet.NumberOfFiles; idx++ { + fileKeys = append(fileKeys, utils.GetFileKey(ctx, id, idx)) } - return file.Content, nil + return getEntities[entity.FileEntity](tx, fileKeys) } func (d *Datastore) GetExampleOutput(ctx context.Context, id string) (string, error) { @@ -519,27 +521,26 @@ func rollback(tx *datastore.Transaction) { } } -func getEntities[V entity.DatastoreEntity](tx *datastore.Transaction, keys []*datastore.Key) ([]*V, error) { - var entitiesWithNils = make([]*V, len(keys)) - entities := make([]*V, 0) +// generic wrapper around GetMulti & filtering nil elements +func getEntities[V any](tx *datastore.Transaction, keys []*datastore.Key) ([]*V, error) { + entitiesWithNils := make([]*V, len(keys)) + entitiesNotNil := make([]*V, 0) if err := tx.GetMulti(keys, entitiesWithNils); err != nil { if errorsVal, ok := err.(datastore.MultiError); ok { - for _, errVal := range errorsVal { - if errors.Is(datastore.ErrNoSuchEntity, errVal) { - for _, entityVal := range entitiesWithNils { - if entityVal != nil { - entities = append(entities, entityVal) - } - } - break + for idx, errVal := range errorsVal { + if errVal == nil { + entitiesNotNil = append(entitiesNotNil, entitiesWithNils[idx]) + continue } + + logger.Warnf("Key %v not found: %s\n", keys[idx], errVal) } } else { logger.Errorf("error during the getting entities, err: %s\n", err.Error()) return nil, err } } else { - entities = entitiesWithNils + entitiesNotNil = entitiesWithNils } - return entities, nil + return entitiesNotNil, nil } diff --git a/playground/backend/internal/db/datastore/datastore_db_test.go b/playground/backend/internal/db/datastore/datastore_db_test.go index 1a497d452c333..57f0d4498ea4b 100644 --- a/playground/backend/internal/db/datastore/datastore_db_test.go +++ b/playground/backend/internal/db/datastore/datastore_db_test.go @@ -22,6 +22,8 @@ import ( "testing" "time" + "github.com/stretchr/testify/assert" + pb "beam.apache.org/playground/backend/internal/api/v1" "beam.apache.org/playground/backend/internal/constants" "beam.apache.org/playground/backend/internal/db/entity" @@ -321,7 +323,7 @@ func TestDatastore_GetFiles(t *testing.T) { }, { name: "GetFiles() in the usual case", - prepare: func() { saveSnippet("MOCK_ID", pb.Sdk_SDK_GO.String()) }, + prepare: func() { saveSnippet("MOCK_ID", pb.Sdk_SDK_GO.String(), false) }, args: args{ctx: ctx, snipId: "MOCK_ID", numberOfFiles: 1}, wantErr: false, cleanData: func() { @@ -402,7 +404,7 @@ func TestDatastore_GetCatalog(t *testing.T) { prepare: func() { exampleId := utils.GetIDWithDelimiter(pb.Sdk_SDK_JAVA.String(), "MOCK_EXAMPLE") saveExample("MOCK_EXAMPLE", pb.Sdk_SDK_JAVA.String()) - saveSnippet(exampleId, pb.Sdk_SDK_JAVA.String()) + saveSnippet(exampleId, pb.Sdk_SDK_JAVA.String(), false) savePCObjs(exampleId) }, args: args{ @@ -481,7 +483,7 @@ func TestDatastore_GetDefaultExamples(t *testing.T) { for sdk := range pb.Sdk_value { exampleId := utils.GetIDWithDelimiter(sdk, "MOCK_DEFAULT_EXAMPLE") saveExample("MOCK_DEFAULT_EXAMPLE", sdk) - saveSnippet(exampleId, sdk) + saveSnippet(exampleId, sdk, false) savePCObjs(exampleId) } }, @@ -550,7 +552,7 @@ func TestDatastore_GetExample(t *testing.T) { prepare: func() { exampleId := utils.GetIDWithDelimiter(pb.Sdk_SDK_JAVA.String(), "MOCK_EXAMPLE") saveExample("MOCK_EXAMPLE", pb.Sdk_SDK_JAVA.String()) - saveSnippet(exampleId, pb.Sdk_SDK_JAVA.String()) + saveSnippet(exampleId, pb.Sdk_SDK_JAVA.String(), false) savePCObjs(exampleId) }, args: args{ @@ -608,11 +610,11 @@ func TestDatastore_GetExampleCode(t *testing.T) { clean func() }{ { - name: "Getting an example code in the usual case", + name: "Getting multifile example code", prepare: func() { exampleId := utils.GetIDWithDelimiter(pb.Sdk_SDK_JAVA.String(), "MOCK_EXAMPLE") saveExample("MOCK_EXAMPLE", pb.Sdk_SDK_JAVA.String()) - saveSnippet(exampleId, pb.Sdk_SDK_JAVA.String()) + saveSnippet(exampleId, pb.Sdk_SDK_JAVA.String(), true /* multifile */) savePCObjs(exampleId) }, args: args{ @@ -623,7 +625,7 @@ func TestDatastore_GetExampleCode(t *testing.T) { clean: func() { exampleId := utils.GetIDWithDelimiter(pb.Sdk_SDK_JAVA.String(), "MOCK_EXAMPLE") test_cleaner.CleanPCObjs(ctx, t, exampleId) - test_cleaner.CleanFiles(ctx, t, exampleId, 1) + test_cleaner.CleanFiles(ctx, t, exampleId, 2) test_cleaner.CleanSnippet(ctx, t, exampleId) test_cleaner.CleanExample(ctx, t, exampleId) }, @@ -633,14 +635,16 @@ func TestDatastore_GetExampleCode(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { tt.prepare() - code, err := datastoreDb.GetExampleCode(tt.args.ctx, tt.args.exampleId) + files, err := datastoreDb.GetExampleCode(tt.args.ctx, tt.args.exampleId) if (err != nil) != tt.wantErr { t.Errorf("GetExampleCode() error = %v, wantErr %v", err, tt.wantErr) } if err == nil { - if code != "MOCK_CONTENT" { - t.Errorf("GetExampleCode() unexpected result: wrong precompiled obj") + expected := []*entity.FileEntity{ + {Name: "MOCK_NAME", Content: "MOCK_CONTENT", IsMain: true, CntxLine: 32}, + {Name: "MOCK_IMPORT_NAME", Content: "MOCK_IMPORT_CONTENT", IsMain: false, CntxLine: 33}, } + assert.Equal(t, expected, files) tt.clean() } }) @@ -664,7 +668,7 @@ func TestDatastore_GetExampleOutput(t *testing.T) { prepare: func() { exampleId := utils.GetIDWithDelimiter(pb.Sdk_SDK_JAVA.String(), "MOCK_EXAMPLE") saveExample("MOCK_EXAMPLE", pb.Sdk_SDK_JAVA.String()) - saveSnippet(exampleId, pb.Sdk_SDK_JAVA.String()) + saveSnippet(exampleId, pb.Sdk_SDK_JAVA.String(), false) savePCObjs(exampleId) }, args: args{ @@ -716,7 +720,7 @@ func TestDatastore_GetExampleLogs(t *testing.T) { prepare: func() { exampleId := utils.GetIDWithDelimiter(pb.Sdk_SDK_JAVA.String(), "MOCK_EXAMPLE") saveExample("MOCK_EXAMPLE", pb.Sdk_SDK_JAVA.String()) - saveSnippet(exampleId, pb.Sdk_SDK_JAVA.String()) + saveSnippet(exampleId, pb.Sdk_SDK_JAVA.String(), false) savePCObjs(exampleId) }, args: args{ @@ -768,7 +772,7 @@ func TestDatastore_GetExampleGraph(t *testing.T) { prepare: func() { exampleId := utils.GetIDWithDelimiter(pb.Sdk_SDK_JAVA.String(), "MOCK_EXAMPLE") saveExample("MOCK_EXAMPLE", pb.Sdk_SDK_JAVA.String()) - saveSnippet(exampleId, pb.Sdk_SDK_JAVA.String()) + saveSnippet(exampleId, pb.Sdk_SDK_JAVA.String(), false) savePCObjs(exampleId) }, args: args{ @@ -949,8 +953,8 @@ func saveExample(name, sdk string) { }) } -func saveSnippet(snipId, sdk string) { - _ = datastoreDb.PutSnippet(ctx, snipId, &entity.Snippet{ +func saveSnippet(snipId, sdk string, isMultifile bool) { + snippet := &entity.Snippet{ IDMeta: &entity.IDMeta{ Salt: "MOCK_SALT", IdLength: 11, @@ -959,7 +963,7 @@ func saveSnippet(snipId, sdk string) { Sdk: utils.GetSdkKey(ctx, sdk), PipeOpts: "MOCK_OPTIONS", Origin: constants.ExampleOrigin, - NumberOfFiles: 1, + NumberOfFiles: map[bool]int{false: 1, true: 2}[isMultifile], Complexity: pb.Complexity_COMPLEXITY_MEDIUM.String(), }, Files: []*entity.FileEntity{{ @@ -968,7 +972,17 @@ func saveSnippet(snipId, sdk string) { CntxLine: 32, IsMain: true, }}, - }) + } + if isMultifile { + snippet.Files = append(snippet.Files, &entity.FileEntity{ + Name: "MOCK_IMPORT_NAME", + Content: "MOCK_IMPORT_CONTENT", + CntxLine: 33, + IsMain: false, + }) + } + + _ = datastoreDb.PutSnippet(ctx, snipId, snippet) } func savePCObjs(exampleId string) { diff --git a/playground/backend/internal/db/db.go b/playground/backend/internal/db/db.go index 43e7b3235c400..8f73a57a59aaf 100644 --- a/playground/backend/internal/db/db.go +++ b/playground/backend/internal/db/db.go @@ -53,7 +53,7 @@ type ExampleDatabase interface { GetExample(ctx context.Context, id string, sdks []*entity.SDKEntity) (*pb.PrecompiledObject, error) - GetExampleCode(ctx context.Context, id string) (string, error) + GetExampleCode(ctx context.Context, id string) ([]*entity.FileEntity, error) GetExampleOutput(ctx context.Context, id string) (string, error) diff --git a/playground/backend/internal/db/entity/snippet.go b/playground/backend/internal/db/entity/snippet.go index 2552b51ce18b3..e4aa1fbf4a5a0 100644 --- a/playground/backend/internal/db/entity/snippet.go +++ b/playground/backend/internal/db/entity/snippet.go @@ -29,6 +29,11 @@ import ( "beam.apache.org/playground/backend/internal/logger" ) +type IDMeta struct { + Salt string + IdLength int8 +} + type FileEntity struct { Name string `datastore:"name"` Content string `datastore:"content,noindex"` diff --git a/playground/backend/internal/environment/environment_service_test.go b/playground/backend/internal/environment/environment_service_test.go index 0df904f14e9cc..531e765c19cd2 100644 --- a/playground/backend/internal/environment/environment_service_test.go +++ b/playground/backend/internal/environment/environment_service_test.go @@ -28,7 +28,7 @@ import ( ) const ( - javaConfig = "{\n \"compile_cmd\": \"javac\",\n \"run_cmd\": \"java\",\n \"test_cmd\": \"java\",\n \"compile_args\": [\n \"-d\",\n \"bin\",\n \"-classpath\"\n ],\n \"run_args\": [\n \"-cp\",\n \"bin:\"\n ],\n \"test_args\": [\n \"-cp\",\n \"bin:\",\n \"org.junit.runner.JUnitCore\"\n ]\n}" + javaConfig = "{\n \"compile_cmd\": \"javac\",\n \"run_cmd\": \"java\",\n \"test_cmd\": \"java\",\n \"compile_args\": [\n \"-d\",\n \"bin\",\n \"-parameters\",\n \"-classpath\"\n ],\n \"run_args\": [\n \"-cp\",\n \"bin:\"\n ],\n \"test_args\": [\n \"-cp\",\n \"bin:\",\n \"org.junit.runner.JUnitCore\"\n ]\n}" goConfig = "{\n \"compile_cmd\": \"go\",\n \"run_cmd\": \"\",\n \"test_cmd\": \"go\",\n \"compile_args\": [\n \"build\",\n \"-o\",\n \"bin\"\n ],\n \"run_args\": [\n ],\n \"test_args\": [\n \"test\",\n \"-v\"\n ]\n}\n" pythonConfig = "{\n \"compile_cmd\": \"\",\n \"run_cmd\": \"python3\",\n \"test_cmd\": \"pytest\",\n \"compile_args\": [],\n \"run_args\": [],\n \"test_args\": []\n}\n" scioConfig = "{\n \"compile_cmd\": \"\",\n \"run_cmd\": \"sbt\",\n \"test_cmd\": \"sbt\",\n \"compile_args\": [],\n \"run_args\": [\n \"runMain\"\n ],\n \"test_args\": []\n}\n" @@ -71,7 +71,7 @@ func setup() error { } executorConfig = NewExecutorConfig( "javac", "java", "java", - []string{"-d", "bin", "-classpath", jars}, + []string{"-d", "bin", "-parameters", "-classpath", jars}, []string{"-cp", "bin:" + jars}, []string{"-cp", "bin:" + jars, "org.junit.runner.JUnitCore"}, ) @@ -323,7 +323,7 @@ func Test_getConfigFromJson(t *testing.T) { { name: "Get object from json", args: args{filepath.Join(configFolderName, defaultSdk.String()+jsonExt)}, - want: NewExecutorConfig("javac", "java", "java", []string{"-d", "bin", "-classpath"}, []string{"-cp", "bin:"}, []string{"-cp", "bin:", "org.junit.runner.JUnitCore"}), + want: NewExecutorConfig("javac", "java", "java", []string{"-d", "bin", "-parameters", "-classpath"}, []string{"-cp", "bin:"}, []string{"-cp", "bin:", "org.junit.runner.JUnitCore"}), wantErr: false, }, { diff --git a/playground/backend/internal/executors/executor_test.go b/playground/backend/internal/executors/executor_test.go index 350ec65fa2170..603b748211fa6 100644 --- a/playground/backend/internal/executors/executor_test.go +++ b/playground/backend/internal/executors/executor_test.go @@ -47,13 +47,13 @@ func TestExecutor_Compile(t *testing.T) { fileName: "filePath", workingDir: "./", commandName: "testCommand", - commandArgs: []string{"-d", "bin", "-classpath", "/opt/apache/beam/jars/beam-sdks-java-harness.jar"}, + commandArgs: []string{"-d", "bin", "-parameters", "-classpath", "/opt/apache/beam/jars/beam-sdks-java-harness.jar"}, pipelineOptions: []string{""}, }, }, want: &exec.Cmd{ Path: "testCommand", - Args: []string{"javac", "-d", "bin", "-classpath", "/opt/apache/beam/jars/beam-sdks-java-harness.jar", "filePath"}, + Args: []string{"javac", "-d", "bin", "-parameters", "-classpath", "/opt/apache/beam/jars/beam-sdks-java-harness.jar", "filePath"}, Env: nil, Dir: "", Stdin: nil, diff --git a/playground/backend/internal/fs_tool/fs.go b/playground/backend/internal/fs_tool/fs.go index 43f39ae8ec89d..016014d1d40d8 100644 --- a/playground/backend/internal/fs_tool/fs.go +++ b/playground/backend/internal/fs_tool/fs.go @@ -25,6 +25,7 @@ import ( "github.com/google/uuid" pb "beam.apache.org/playground/backend/internal/api/v1" + "beam.apache.org/playground/backend/internal/db/entity" "beam.apache.org/playground/backend/internal/emulators" ) @@ -93,16 +94,21 @@ func (lc *LifeCycle) DeleteFolders() error { return nil } -// CreateSourceCodeFile creates an executable file (i.e. file.{sourceFileExtension}). -func (lc *LifeCycle) CreateSourceCodeFile(code string) error { +// CreateSourceCodeFiles creates an executable file (i.e. file.{sourceFileExtension}). +func (lc *LifeCycle) CreateSourceCodeFiles(sources []entity.FileEntity) error { if _, err := os.Stat(lc.Paths.AbsoluteSourceFileFolderPath); os.IsNotExist(err) { return err } - filePath := lc.Paths.AbsoluteSourceFilePath - err := os.WriteFile(filePath, []byte(code), fileMode) - if err != nil { - return err + for _, src := range sources { + filePath := lc.Paths.AbsoluteSourceFilePath + if !src.IsMain { + filePath = lc.Paths.AbsoluteSourceFileFolderPath + "/" + src.Name + } + err := os.WriteFile(filePath, []byte(src.Content), fileMode) + if err != nil { + return err + } } return nil } diff --git a/playground/backend/internal/fs_tool/fs_test.go b/playground/backend/internal/fs_tool/fs_test.go index 7985f1f8ea53c..7e4cc36221f7e 100644 --- a/playground/backend/internal/fs_tool/fs_test.go +++ b/playground/backend/internal/fs_tool/fs_test.go @@ -25,6 +25,7 @@ import ( "github.com/google/uuid" pb "beam.apache.org/playground/backend/internal/api/v1" + "beam.apache.org/playground/backend/internal/db/entity" "beam.apache.org/playground/backend/internal/utils" ) @@ -221,7 +222,8 @@ func TestLifeCycle_CreateSourceCodeFile(t *testing.T) { l := &LifeCycle{ Paths: tt.fields.Paths, } - if err := l.CreateSourceCodeFile(tt.args.code); (err != nil) != tt.wantErr { + sources := []entity.FileEntity{{Name: "main.java", Content: tt.args.code, IsMain: true}} + if err := l.CreateSourceCodeFiles(sources); (err != nil) != tt.wantErr { t.Errorf("CreateSourceCodeFile() error = %v, wantErr %v", err, tt.wantErr) } if !tt.wantErr { diff --git a/playground/backend/internal/preparers/java_preparers_test.go b/playground/backend/internal/preparers/java_preparers_test.go index 1e600205caac5..35b1d2d6e1e47 100644 --- a/playground/backend/internal/preparers/java_preparers_test.go +++ b/playground/backend/internal/preparers/java_preparers_test.go @@ -24,6 +24,7 @@ import ( "github.com/google/uuid" pb "beam.apache.org/playground/backend/internal/api/v1" + "beam.apache.org/playground/backend/internal/db/entity" "beam.apache.org/playground/backend/internal/fs_tool" ) @@ -39,7 +40,8 @@ func Test_replace(t *testing.T) { lc, _ := fs_tool.NewLifeCycle(pb.Sdk_SDK_JAVA, uuid.New(), filepath.Join(path, "temp")) _ = lc.CreateFolders() defer os.RemoveAll(filepath.Join(path, "temp")) - _ = lc.CreateSourceCodeFile(codeWithPublicClass) + sources := []entity.FileEntity{{Name: "main.java", Content: codeWithPublicClass, IsMain: true}} + _ = lc.CreateSourceCodeFiles(sources) type args struct { args []interface{} @@ -170,6 +172,7 @@ func createTempFileWithCode(code string) fs_tool.LifeCyclePaths { lc, _ := fs_tool.NewLifeCycle(pb.Sdk_SDK_JAVA, uuid.New(), filepath.Join(path, "temp")) _ = lc.CreateFolders() - _ = lc.CreateSourceCodeFile(code) + sources := []entity.FileEntity{{Name: "main.java", Content: code, IsMain: true}} + _ = lc.CreateSourceCodeFiles(sources) return lc.Paths } diff --git a/playground/backend/internal/setup_tools/life_cycle/life_cycle_setuper.go b/playground/backend/internal/setup_tools/life_cycle/life_cycle_setuper.go index cf92619162ae8..b7926835f61e7 100644 --- a/playground/backend/internal/setup_tools/life_cycle/life_cycle_setuper.go +++ b/playground/backend/internal/setup_tools/life_cycle/life_cycle_setuper.go @@ -28,6 +28,7 @@ import ( "github.com/google/uuid" pb "beam.apache.org/playground/backend/internal/api/v1" + "beam.apache.org/playground/backend/internal/db/entity" "beam.apache.org/playground/backend/internal/emulators" "beam.apache.org/playground/backend/internal/fs_tool" "beam.apache.org/playground/backend/internal/logger" @@ -53,7 +54,7 @@ const ( // Setup returns fs_tool.LifeCycle. // Also, prepares files and folders needed to code processing according to sdk -func Setup(sdk pb.Sdk, code string, pipelineId uuid.UUID, workingDir, pipelinesFolder, preparedModDir string, mockCluster emulators.EmulatorMockCluster) (*fs_tool.LifeCycle, error) { +func Setup(sdk pb.Sdk, sources []entity.FileEntity, pipelineId uuid.UUID, workingDir, pipelinesFolder, preparedModDir string, mockCluster emulators.EmulatorMockCluster) (*fs_tool.LifeCycle, error) { // create file system service lc, err := fs_tool.NewLifeCycle(sdk, pipelineId, filepath.Join(workingDir, pipelinesFolder)) if err != nil { @@ -97,7 +98,7 @@ func Setup(sdk pb.Sdk, code string, pipelineId uuid.UUID, workingDir, pipelinesF } // create file with code - err = lc.CreateSourceCodeFile(code) + err = lc.CreateSourceCodeFiles(sources) if err != nil { logger.Errorf("%s: RunCode(): CreateSourceCodeFile(): %s\n", pipelineId, err.Error()) lc.DeleteFolders() diff --git a/playground/backend/internal/setup_tools/life_cycle/life_cycle_setuper_test.go b/playground/backend/internal/setup_tools/life_cycle/life_cycle_setuper_test.go index 96ac3d7e8bc6f..556b4259c4431 100644 --- a/playground/backend/internal/setup_tools/life_cycle/life_cycle_setuper_test.go +++ b/playground/backend/internal/setup_tools/life_cycle/life_cycle_setuper_test.go @@ -25,6 +25,7 @@ import ( "github.com/google/uuid" playground "beam.apache.org/playground/backend/internal/api/v1" + "beam.apache.org/playground/backend/internal/db/entity" "beam.apache.org/playground/backend/internal/fs_tool" "beam.apache.org/playground/backend/internal/utils" ) @@ -328,7 +329,9 @@ func TestSetup(t *testing.T) { if err != nil { t.Errorf("Setup() error during test preparetion: %v", err) } - got, err := Setup(tt.args.sdk, tt.args.code, tt.args.pipelineId, tt.args.workingDir, tt.args.pipelinesFolder, tt.args.preparedModDir, nil) + + sources := []entity.FileEntity{{Name: "main.java", Content: tt.args.code, IsMain: true}} + got, err := Setup(tt.args.sdk, sources, tt.args.pipelineId, tt.args.workingDir, tt.args.pipelinesFolder, tt.args.preparedModDir, nil) if (err != nil) != tt.wantErr { t.Errorf("Setup() error = %v, wantErr %v", err, tt.wantErr) return diff --git a/playground/backend/internal/tests/test_data/test_data.go b/playground/backend/internal/tests/test_data/test_data.go index f302f710266c2..9e043016ed25c 100644 --- a/playground/backend/internal/tests/test_data/test_data.go +++ b/playground/backend/internal/tests/test_data/test_data.go @@ -101,7 +101,8 @@ func createSDKEntities(ctx context.Context) ([]*datastore.Key, []*entity.SDKEnti } func createExampleEntities(ctx context.Context) ([]*datastore.Key, []*entity.ExampleEntity) { - names := []string{"MOCK_DEFAULT_EXAMPLE", "MOCK_NAME_1", "MOCK_NAME_2", "MOCK_NAME_3", "MOCK_NAME_DATASET"} + names := []string{"MOCK_DEFAULT_EXAMPLE", "MOCK_NAME_1", "MOCK_NAME_2", "MOCK_NAME_3", + "MOCK_NAME_DATASET", "MOCK_MULTIFILE"} keys := make([]*datastore.Key, 0) examples := make([]*entity.ExampleEntity, 0) for _, sdk := range pb.Sdk_name { @@ -140,13 +141,17 @@ func createSnippetEntities(ctx context.Context, examples []*entity.ExampleEntity now := time.Now() for _, example := range examples { key := utils.GetSnippetKey(ctx, example.Sdk.Name, example.Name) + numberOfFiles := 1 + if example.Name == "MOCK_MULTIFILE" { + numberOfFiles = 2 + } snippet := &entity.SnippetEntity{ Sdk: example.Sdk, PipeOpts: "MOCK_P_OPTS", Created: now, Origin: constants.ExampleOrigin, SchVer: utils.GetSchemaVerKey(ctx, "MOCK_VERSION"), - NumberOfFiles: 1, + NumberOfFiles: numberOfFiles, Complexity: pb.Complexity_COMPLEXITY_MEDIUM.String(), } if example.Name == "MOCK_NAME_DATASET" { @@ -166,15 +171,21 @@ func createFileEntities(ctx context.Context, examples []*entity.ExampleEntity) ( keys := make([]*datastore.Key, 0) files := make([]*entity.FileEntity, 0) for _, example := range examples { - key := utils.GetFileKey(ctx, example.Sdk.Name, example.Name, 0) - file := &entity.FileEntity{ - Name: "MOCK_NAME", - Content: "MOCK_CONTENT", - CntxLine: 10, - IsMain: true, + numberOfFiles := 1 + if example.Name == "MOCK_MULTIFILE" { + numberOfFiles = 2 + } + for idx := 0; idx < numberOfFiles; idx++ { + key := utils.GetFileKey(ctx, example.Sdk.Name, example.Name, idx) + file := &entity.FileEntity{ + Name: fmt.Sprintf("MOCK_NAME_%d", idx), + Content: fmt.Sprintf("MOCK_CONTENT_%d", idx), + CntxLine: 10 + int32(idx), + IsMain: idx < 1, + } + keys = append(keys, key) + files = append(files, file) } - keys = append(keys, key) - files = append(files, file) } return keys, files } diff --git a/playground/buf.gen.yaml b/playground/buf.gen.yaml index d04b54a6c5dd3..f12d511c35709 100644 --- a/playground/buf.gen.yaml +++ b/playground/buf.gen.yaml @@ -30,3 +30,9 @@ plugins: - name: dart out: frontend/playground_components/lib/src opt: grpc +- plugin: buf.build/protocolbuffers/python + out: infrastructure + opt: + - pyi_out=infrastructure +- plugin: buf.build/grpc/python + out: infrastructure diff --git a/playground/frontend/README.md b/playground/frontend/README.md index 862fcdacc050d..120d9da7e2ba0 100644 --- a/playground/frontend/README.md +++ b/playground/frontend/README.md @@ -141,6 +141,37 @@ Code can be automatically reformatted using: flutter format ./lib ``` +### Unit Tests + +To delete all generated files and re-generate them again and then run tests: + +```bash +./gradlew :playground:frontend:playground_components_test +./gradlew :playground:frontend:test +``` + +To run tests without re-generating files: + +```bash +cd playground/frontend/playground_components +flutter test +cd .. +flutter test +``` + +### Integration Tests + +Integration tests currently can be run only on a local development machine. +Server testing has not been verified yet. + +1. Install and run Chrome Driver: https://chromedriver.chromium.org/downloads +2. Run it on port 4444: `chromedriver --port=4444` +3. Run: + +```bash +./gradlew :playground:frontend:integrationTest +``` + ## Localization The project is in the process of migrating from diff --git a/playground/frontend/assets/drag_horizontal.svg b/playground/frontend/assets/drag_horizontal.svg deleted file mode 100644 index f5e8dcda558a7..0000000000000 --- a/playground/frontend/assets/drag_horizontal.svg +++ /dev/null @@ -1,23 +0,0 @@ - - - - - - diff --git a/playground/frontend/assets/drag_vertical.svg b/playground/frontend/assets/drag_vertical.svg deleted file mode 100644 index fea5377776ef4..0000000000000 --- a/playground/frontend/assets/drag_vertical.svg +++ /dev/null @@ -1,23 +0,0 @@ - - - - - - diff --git a/playground/frontend/assets/streaming.svg b/playground/frontend/assets/streaming.svg new file mode 100644 index 0000000000000..d4e4f94cc7341 --- /dev/null +++ b/playground/frontend/assets/streaming.svg @@ -0,0 +1,20 @@ + + + diff --git a/playground/frontend/assets/translations/en.yaml b/playground/frontend/assets/translations/en.yaml index c7d74f96d44f7..023e0324c8e75 100644 --- a/playground/frontend/assets/translations/en.yaml +++ b/playground/frontend/assets/translations/en.yaml @@ -19,3 +19,4 @@ intents: playground: clearOutput: 'Clear Output' newExample: 'New Example' + usesEmulatedData: 'This examples uses emulated data' diff --git a/playground/frontend/build.gradle b/playground/frontend/build.gradle index 27ca2c6ce0645..4de0f867b3632 100644 --- a/playground/frontend/build.gradle +++ b/playground/frontend/build.gradle @@ -17,18 +17,11 @@ */ -apply plugin: 'org.apache.beam.module' -apply plugin: 'base' +apply(plugin: "org.apache.beam.module") +apply(plugin: "base") applyDockerNature() -def playgroundBackendUrl = project.playgroundBackendUrl -def analyticsUA = project.analyticsUA -def playgroundBackendJavaRouteUrl = project.playgroundBackendJavaRouteUrl -def playgroundBackendGoRouteUrl = project.playgroundBackendGoRouteUrl -def playgroundBackendPythonRouteUrl = project.playgroundBackendPythonRouteUrl -def playgroundBackendScioRouteUrl = project.playgroundBackendScioRouteUrl - -def playgroundJobServerProject = "${project.path.replace('-container', '')}" +def playgroundJobServerProject = "${project.path.replace("-container", "")}" description = project(playgroundJobServerProject).description + " :: Container" @@ -37,10 +30,10 @@ configurations { } dependencies { - dockerDependency project(path: playgroundJobServerProject, configuration: "shadow") + dockerDependency(project(path: playgroundJobServerProject, configuration: "shadow")) } -task generate { +tasks.register("generate") { dependsOn("playground_components:generate") dependsOn("generateCode") @@ -49,7 +42,7 @@ task generate { description = "Generates all generated files." } -task printPath { +tasks.register("printPath") { doLast { exec { executable("printenv") @@ -58,7 +51,7 @@ task printPath { } } -task analyze { +tasks.register("analyze") { dependsOn("playground_components:generateCode") dependsOn("generateCode") @@ -74,7 +67,7 @@ task analyze { } } -task pubGet { +tasks.register("pubGet") { group = "build" description = "Get packages for the playground frontend project" doLast { @@ -85,7 +78,7 @@ task pubGet { } } -task format { +tasks.register("format") { group = "build" description = "Idiomatically format Dart source code" doLast { @@ -97,9 +90,10 @@ task format { } } -task run { +tasks.register("run") { group = "application" description = "Run application on Google Chrome" + doLast { exec { executable("flutter") @@ -108,7 +102,7 @@ task run { } } -task test { +tasks.register("test") { dependsOn("playground_components:generateCode") dependsOn("generateCode") @@ -123,14 +117,14 @@ task test { } } -task precommit { +tasks.register("precommit") { dependsOn("playground_components:precommit") dependsOn("analyze") dependsOn("test") } -task generateCode { +tasks.register("generateCode") { dependsOn("playground_components:generateCode") dependsOn("cleanFlutter") @@ -147,7 +141,7 @@ task generateCode { } } -task cleanFlutter { +tasks.register("cleanFlutter") { group = "build" description = "Remove build artifacts" @@ -159,7 +153,7 @@ task cleanFlutter { } } -task cleanGenerated { +tasks.register("cleanGenerated") { dependsOn("playground_components:cleanGenerated") group = "build" @@ -188,75 +182,59 @@ ext.deleteFilesByRegExp = { re -> } } +tasks.register("integrationTest") { + dependsOn("integrationTest_standalone_change_example_sdk_run") + dependsOn("integrationTest_standalone_miscellaneous_ui") +} + +tasks.register("integrationTest_standalone_change_example_sdk_run") { + runIntegrationTest("standalone_change_example_sdk_run", "/") +} + +tasks.register("integrationTest_standalone_miscellaneous_ui") { + runIntegrationTest("standalone_miscellaneous_ui", "/") +} + +void runIntegrationTest(String path, String url) { + exec { + executable("flutter") + args( + "drive", + "--driver=test_driver/integration_test.dart", + "--target=integration_test/${path}_test.dart", + "--web-launch-url='$url'", + "--device-id=chrome", + ) + } +} + task copyDockerfileDependencies(type: Copy) { group = "build" description = "Copy files that required to build docker container" copy { - from '.' - into 'build/' - exclude 'build' - exclude 'Dockerfile' + from(".") + into("build/") + exclude("build") + exclude("Dockerfile") } copy { - from '../playground' - into 'build/playground' + from("../playground") + into("build/playground") } } docker { group = "build" description = "Build container for playground frontend application" - name containerImageName( - name: project.docker_image_default_repo_prefix + "playground-frontend", - root: project.rootProject.hasProperty(["docker-repository-root"]) ? - project.rootProject["docker-repository-root"] : - project.docker_image_default_repo_root) - files "./build/" - tags containerImageTags() - buildArgs(['FLUTTER_VERSION': project.rootProject.hasProperty(["flutter-version"]) ? - project.rootProject["flutter-version"] : - "3.3.2" ]) + name = containerImageName( + name: project.docker_image_default_repo_prefix + "playground-frontend", + root: project.rootProject.hasProperty(["docker-repository-root"]) + ? project.rootProject["docker-repository-root"] + : project.docker_image_default_repo_root + ) + files("./build/") + tags(containerImageTags()) } // Ensure that we build the required resources and copy and file dependencies from related projects -dockerPrepare.dependsOn copyDockerfileDependencies - -task("createConfig") { - group = "build" - description = "Generate config for the playground frontend project" - doLast { - def configFileName = "config.g.dart" - def modulePath = project(":playground:frontend").projectDir.absolutePath - def file = new File(modulePath + "/lib", configFileName) - file.write("""/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -const String kApiClientURL = - '${playgroundBackendUrl}'; -const String kAnalyticsUA = '${analyticsUA}'; -const String kApiJavaClientURL = - '${playgroundBackendJavaRouteUrl}'; -const String kApiGoClientURL = - '${playgroundBackendGoRouteUrl}'; -const String kApiPythonClientURL = - '${playgroundBackendPythonRouteUrl}'; -const String kApiScioClientURL = - '${playgroundBackendScioRouteUrl}'; -""") - } -} +dockerPrepare.dependsOn(copyDockerfileDependencies) diff --git a/playground/frontend/integration_test/common/common.dart b/playground/frontend/integration_test/common/common.dart new file mode 100644 index 0000000000000..83575a99cb6c7 --- /dev/null +++ b/playground/frontend/integration_test/common/common.dart @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import 'package:flutter_test/flutter_test.dart'; +import 'package:playground/main.dart' as app; + +Future init(WidgetTester wt) async { + app.main(); + await wt.pumpAndSettle(); +} + +void expectHasDescendant(Finder ancestor, Finder descendant) { + expect( + find.descendant(of: ancestor, matching: descendant), + findsOneWidget, + ); +} + +void expectSimilar(double a, double b) { + Matcher closeToFraction(num value, double fraction) => + closeTo(value, value * fraction); + Matcher onePerCentTolerance(num value) => closeToFraction(value, 0.01); + expect(a, onePerCentTolerance(b)); +} diff --git a/playground/frontend/integration_test/common/common_finders.dart b/playground/frontend/integration_test/common/common_finders.dart new file mode 100644 index 0000000000000..7d906ee055ad0 --- /dev/null +++ b/playground/frontend/integration_test/common/common_finders.dart @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import 'package:flutter/widgets.dart'; +import 'package:flutter_test/flutter_test.dart'; +import 'package:playground/modules/examples/components/description_popover/description_popover.dart'; +import 'package:playground/modules/examples/components/description_popover/description_popover_button.dart'; +import 'package:playground/modules/examples/example_selector.dart'; +import 'package:playground/modules/sdk/components/sdk_selector.dart'; +import 'package:playground/modules/sdk/components/sdk_selector_row.dart'; +import 'package:playground/modules/shortcuts/components/shortcuts_modal.dart'; +import 'package:playground/pages/standalone_playground/widgets/editor_textarea_wrapper.dart'; +import 'package:playground/pages/standalone_playground/widgets/feedback/feedback_dropdown_content.dart'; +import 'package:playground/pages/standalone_playground/widgets/feedback/playground_feedback.dart'; +import 'package:playground/pages/standalone_playground/widgets/more_actions.dart'; +import 'package:playground_components/playground_components.dart'; +import 'package:playground_components/src/widgets/drag_handle.dart'; +import 'package:playground_components_dev/playground_components_dev.dart'; + +extension CommonFindersExtension on CommonFinders { + Finder codeTextAreaWrapper() { + return byType(CodeTextAreaWrapper); + } + + Finder descriptionPopoverButton() { + return byType(DescriptionPopoverButton); + } + + Finder descriptionPopover() { + return byType(DescriptionPopover); + } + + Finder dragHandle() { + return byType(DragHandle); + } + + Finder exampleItemInDropdown(String name) { + return widgetWithText(GestureDetector, name); + } + + Finder exampleSelector() { + return byType(ExampleSelector); + } + + Finder feedbackDropdownCancelButton() { + return find.byKey(FeedbackDropdownContent.cancelButtonKey); + } + + Finder feedbackDropdownContent() { + return byType(FeedbackDropdownContent); + } + + Finder feedbackDropdownSendButton() { + return find.byKey(FeedbackDropdownContent.sendButtonKey); + } + + Finder feedbackDropdownTextField() { + return find.byKey(FeedbackDropdownContent.textFieldKey); + } + + Finder feedbackThumbDown() { + return find.byKey(PlaygroundFeedback.thumbDownKey); + } + + Finder feedbackThumbUp() { + return find.byKey(PlaygroundFeedback.thumbUpKey); + } + + Finder moreActions() { + return byType(MoreActions); + } + + Finder sdkItemInDropdown(Sdk sdk) { + return find.byType(SdkSelectorRow).and(find.byKey(ValueKey(sdk))); + } + + Finder sdkSelector() { + return byType(SDKSelector); + } + + Finder shortcutsModal() { + return byType(ShortcutsModal); + } +} diff --git a/playground/frontend/integration_test/miscellaneous_ui/description_test.dart b/playground/frontend/integration_test/miscellaneous_ui/description_test.dart new file mode 100644 index 0000000000000..17c9e29df337c --- /dev/null +++ b/playground/frontend/integration_test/miscellaneous_ui/description_test.dart @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import 'package:flutter/services.dart'; +import 'package:flutter_test/flutter_test.dart'; +import 'package:playground_components_dev/playground_components_dev.dart'; + +import '../common/common.dart'; +import '../common/common_finders.dart'; + +Future checkDescription(WidgetTester wt) async { + await wt.tap(find.descriptionPopoverButton()); + await wt.pumpAndSettle(); + + expect(find.descriptionPopover(), findsOneWidget); + + final example = wt.findPlaygroundController().selectedExample!; + + expectHasDescendant(find.descriptionPopover(), find.text(example.name)); + expectHasDescendant( + find.descriptionPopover(), + find.text(example.description), + ); + + // //TODO Check contains github and colab links, + // //when https://github.com/apache/beam/pull/24820 will be merged + + await wt.sendKeyEvent(LogicalKeyboardKey.escape); + await wt.pumpAndSettle(); + + expect(find.descriptionPopover(), findsNothing); +} diff --git a/playground/frontend/integration_test/miscellaneous_ui/enjoy_playground_test.dart b/playground/frontend/integration_test/miscellaneous_ui/enjoy_playground_test.dart new file mode 100644 index 0000000000000..a69d9eac115fc --- /dev/null +++ b/playground/frontend/integration_test/miscellaneous_ui/enjoy_playground_test.dart @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import 'package:flutter_test/flutter_test.dart'; +import 'package:playground/modules/analytics/analytics_event.dart'; +import 'package:playground/modules/analytics/analytics_events.dart'; +import 'package:playground/modules/analytics/analytics_service.dart'; + +import '../common/common_finders.dart'; + +Future checkEnjoyPlayground(WidgetTester wt) async { + await _checkEnjoyingAndSendFeedback(wt); + await _checkNotEnjoyingAndSendFeedback(wt); + await _checkNotEnjoyingAndClose(wt); +} + +Future _checkNotEnjoyingAndClose(WidgetTester wt) async { + await wt.tap(find.feedbackThumbDown()); + await wt.pumpAndSettle(); + + expect(find.feedbackDropdownContent(), findsOneWidget); + + await wt.tap(find.feedbackDropdownCancelButton()); + await wt.pumpAndSettle(); + + expect(find.feedbackDropdownContent(), findsNothing); +} + +Future _checkEnjoyingAndSendFeedback(WidgetTester wt) async { + expect(find.feedbackDropdownContent(), findsNothing); + + await wt.tap(find.feedbackThumbUp()); + await wt.pumpAndSettle(); + + expect(find.feedbackDropdownContent(), findsOneWidget); + + const text = 'This is enjoying text'; + await wt.enterText(find.feedbackDropdownTextField(), text); + await wt.pumpAndSettle(); + + expect(find.text(text), findsOneWidget); + + await wt.tap(find.feedbackDropdownSendButton()); + await wt.pumpAndSettle(); + + final context = wt.element(find.feedbackThumbUp()); + final lastSentEvent = AnalyticsService.get(context).lastSentEvent; + expect( + lastSentEvent, + AnalyticsEvent( + category: kFeedbackCategory, + action: kClickSendFeedbackEvent, + label: text, + ), + ); + + expect(find.feedbackDropdownContent(), findsNothing); +} + +Future _checkNotEnjoyingAndSendFeedback(WidgetTester wt) async { + await wt.tap(find.feedbackThumbDown()); + await wt.pumpAndSettle(); + + expect(find.feedbackDropdownContent(), findsOneWidget); + + const text = 'This is not enjoying text'; + await wt.enterText(find.feedbackDropdownTextField(), text); + await wt.pumpAndSettle(); + + expect(find.text(text), findsOneWidget); + + await wt.tap(find.feedbackDropdownSendButton()); + await wt.pumpAndSettle(); + + final context = wt.element(find.feedbackThumbDown()); + final lastSentEvent = AnalyticsService.get(context).lastSentEvent; + expect( + lastSentEvent, + AnalyticsEvent( + category: kFeedbackCategory, + action: kClickSendFeedbackEvent, + label: text, + ), + ); + + expect(find.feedbackDropdownContent(), findsNothing); +} diff --git a/playground/frontend/integration_test/miscellaneous_ui/output_placement_test.dart b/playground/frontend/integration_test/miscellaneous_ui/output_placement_test.dart new file mode 100644 index 0000000000000..d0c639d65be55 --- /dev/null +++ b/playground/frontend/integration_test/miscellaneous_ui/output_placement_test.dart @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import 'package:flutter/widgets.dart'; +import 'package:flutter_test/flutter_test.dart'; +import 'package:playground/modules/output/models/output_placement.dart'; +import 'package:playground_components_dev/playground_components_dev.dart'; + +import '../common/common.dart'; +import '../common/common_finders.dart'; + +Future checkOutputPlacement(WidgetTester wt) async { + Offset getCodeAreaCenter() => wt.getCenter(find.codeTextAreaWrapper()); + Offset getOutputCenter() => wt.getCenter(find.outputWidget()); + + await wt.tap(find.byKey(const ValueKey(OutputPlacement.left))); + await wt.pumpAndSettle(); + expect(getCodeAreaCenter().dx > getOutputCenter().dx, true); + expectSimilar(getCodeAreaCenter().dy, getOutputCenter().dy); + + await wt.tap(find.byKey(const ValueKey(OutputPlacement.right))); + await wt.pumpAndSettle(); + expect(getCodeAreaCenter().dx < getOutputCenter().dx, true); + expectSimilar(getCodeAreaCenter().dy, getOutputCenter().dy); + + await wt.tap(find.byKey(const ValueKey(OutputPlacement.bottom))); + await wt.pumpAndSettle(); + expect(getCodeAreaCenter().dy < getOutputCenter().dy, true); + expectSimilar(getCodeAreaCenter().dx, getOutputCenter().dx); +} diff --git a/playground/frontend/integration_test/miscellaneous_ui/resize_output_test.dart b/playground/frontend/integration_test/miscellaneous_ui/resize_output_test.dart new file mode 100644 index 0000000000000..ae2daaa542248 --- /dev/null +++ b/playground/frontend/integration_test/miscellaneous_ui/resize_output_test.dart @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import 'package:flutter/widgets.dart'; +import 'package:flutter_test/flutter_test.dart'; +import 'package:playground/modules/output/models/output_placement.dart'; +import 'package:playground_components/playground_components.dart'; +import 'package:playground_components_dev/playground_components_dev.dart'; + +import '../common/common.dart'; +import '../common/common_finders.dart'; + +Future checkResizeOutput(WidgetTester wt) async { + final dragHandleStartPosition = wt.getCenter(find.dragHandle()); + Future resetSplitViewRatio() async { + final currentPosition = wt.getCenter(find.dragHandle()); + final offset = dragHandleStartPosition - currentPosition; + await wt.drag(find.dragHandle(), offset); + await wt.pumpAndSettle(); + } + + await _checkDragVertically(wt); + await resetSplitViewRatio(); + + await _checkExcessivelyDragVertically(wt); + await resetSplitViewRatio(); + + await wt.tap(find.byKey(const ValueKey(OutputPlacement.left))); + await wt.pumpAndSettle(); + + await _checkDragHorizontally(wt); + await resetSplitViewRatio(); + + await _checkExcessivelyDragHorizontally(wt); + await resetSplitViewRatio(); +} + +Future _checkDragVertically(WidgetTester wt) async { + final height = wt.getSize(find.splitView()).height; + var dragHandlePosition = wt.getCenter(find.dragHandle()); + + await wt.drag(find.dragHandle(), Offset(0, height * 0.1)); + await wt.pumpAndSettle(); + + var newPosition = wt.getCenter(find.dragHandle()); + expectSimilar(newPosition.dy, dragHandlePosition.dy + height * 0.1); +} + +Future _checkExcessivelyDragVertically(WidgetTester wt) async { + final height = wt.getSize(find.splitView()).height; + final dragHandlePosition = wt.getCenter(find.dragHandle()); + + await wt.drag(find.dragHandle(), Offset(0, height * 0.9)); + await wt.pumpAndSettle(); + + final newPosition = wt.getCenter(find.dragHandle()); + final maxDy = height * (maxRatio - defaultRatio); + expectSimilar( + newPosition.dy, + dragHandlePosition.dy + maxDy, + ); +} + +Future _checkDragHorizontally(WidgetTester wt) async { + final width = wt.getSize(find.splitView()).width; + final dragHandlePosition = wt.getCenter(find.dragHandle()); + + await wt.drag(find.dragHandle(), Offset(width * 0.1, 0)); + await wt.pumpAndSettle(); + + final newPosition = wt.getCenter(find.dragHandle()); + expectSimilar(newPosition.dx, dragHandlePosition.dx + width * 0.1); +} + +Future _checkExcessivelyDragHorizontally(WidgetTester wt) async { + final width = wt.getSize(find.splitView()).width; + final dragHandlePosition = wt.getCenter(find.dragHandle()); + + await wt.drag(find.dragHandle(), Offset(width * 0.9, 0)); + await wt.pumpAndSettle(); + + final newPosition = wt.getCenter(find.dragHandle()); + final maxDx = width * (maxRatio - defaultRatio); + expectSimilar( + newPosition.dx, + dragHandlePosition.dx + maxDx, + ); +} diff --git a/playground/frontend/integration_test/miscellaneous_ui/shortcuts_modal_test.dart b/playground/frontend/integration_test/miscellaneous_ui/shortcuts_modal_test.dart new file mode 100644 index 0000000000000..e12752abef3cc --- /dev/null +++ b/playground/frontend/integration_test/miscellaneous_ui/shortcuts_modal_test.dart @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import 'package:flutter/services.dart'; +import 'package:flutter_gen/gen_l10n/app_localizations.dart'; +import 'package:flutter_test/flutter_test.dart'; + +import '../common/common_finders.dart'; + +Future checkShortcutsModal(WidgetTester wt) async { + expect(find.shortcutsModal(), findsNothing); + + AppLocalizations appLocale = + AppLocalizations.of(wt.element(find.moreActions()))!; + + await wt.tap(find.moreActions()); + await wt.pumpAndSettle(); + + expect(find.text(appLocale.shortcuts), findsOneWidget); + + await wt.tap(find.text(appLocale.shortcuts)); + await wt.pumpAndSettle(); + + expect(find.shortcutsModal(), findsOneWidget); + + await wt.tap(find.text(appLocale.close)); + await wt.pumpAndSettle(); + + expect(find.shortcutsModal(), findsNothing); + + await wt.sendKeyEvent(LogicalKeyboardKey.escape); + await wt.pumpAndSettle(); +} diff --git a/playground/frontend/integration_test/miscellaneous_ui/toggle_brightness_mode_test.dart b/playground/frontend/integration_test/miscellaneous_ui/toggle_brightness_mode_test.dart new file mode 100644 index 0000000000000..16bb9a3cc33d0 --- /dev/null +++ b/playground/frontend/integration_test/miscellaneous_ui/toggle_brightness_mode_test.dart @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import 'package:flutter/material.dart'; +import 'package:flutter_test/flutter_test.dart'; +import 'package:playground_components_dev/playground_components_dev.dart'; + +Future checkToggleBrightnessMode(WidgetTester wt) async { + Brightness getBrightness() { + return Theme.of(wt.element(find.toggleThemeButton())).brightness; + } + + Future toggleTheme() async { + await wt.tap(find.toggleThemeButton()); + await wt.pumpAndSettle(); + } + + final startBrightness = getBrightness(); + final invertedBrightness = + startBrightness == Brightness.light ? Brightness.dark : Brightness.light; + + await toggleTheme(); + expect(getBrightness(), invertedBrightness); + await toggleTheme(); + expect(getBrightness(), startBrightness); +} diff --git a/playground/frontend/integration_test/standalone_change_example_sdk_run_test.dart b/playground/frontend/integration_test/standalone_change_example_sdk_run_test.dart new file mode 100644 index 0000000000000..f7b601c877eae --- /dev/null +++ b/playground/frontend/integration_test/standalone_change_example_sdk_run_test.dart @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import 'package:flutter_test/flutter_test.dart'; +import 'package:highlight/languages/java.dart'; +import 'package:highlight/languages/python.dart'; +import 'package:integration_test/integration_test.dart'; +import 'package:playground_components/playground_components.dart'; +import 'package:playground_components_dev/playground_components_dev.dart'; + +import 'common/common.dart'; +import 'common/common_finders.dart'; + +const _outputPrefix = 'The processing has started\n'; + +void main() { + IntegrationTestWidgetsFlutterBinding.ensureInitialized(); + + /// Runs and expects that the execution is as fast as it should be for cache. + Future runExpectCached(WidgetTester wt) async { + final dateTimeStart = DateTime.now(); + + await wt.tap(find.runOrCancelButton()); + await wt.pumpAndSettle(); + + expect( + DateTime.now().difference(dateTimeStart), + lessThan(const Duration(milliseconds: 2000)), + ); + } + + Future expectJavaMinimalWordCount(WidgetTester wt) async { + expect( + wt.findOneCodeController().lastTextSpan!.toPlainText().isAsIfCutFrom( + await Examples.getVisibleTextByPath( + ExamplePaths.javaMinimalWordCount, + java, + ), + ), + true, + ); + + expect(find.graphTab(), findsOneWidget); + expect(find.resultTab(), findsOneWidget); + expect(wt.findOutputTabController().index, 0); + } + + Future changeToJavaAggregationMax(WidgetTester wt) async { + await wt.tap(find.exampleSelector()); + await wt.pumpAndSettle(); + + await wt.tap(find.exampleItemInDropdown(ExampleNames.aggregationMax)); + await wt.pumpAndSettle(); + + expect( + wt.findOneCodeController().lastTextSpan!.toPlainText().isAsIfCutFrom( + await Examples.getVisibleTextByPath( + ExamplePaths.javaAggregationMax, + java, + ), + ), + true, + ); + } + + Future runExpectJavaAggregationMax(WidgetTester wt) async { + await runExpectCached(wt); + expectOutputEndsWith(ExampleOutputs.javaAggregationMaxTail, wt); + } + + Future runCustomJava(WidgetTester wt) async { + const text = 'OK'; + const code = ''' +public class MyClass { + public static void main(String[] args) { + System.out.print("$text"); + } +} +'''; + + await wt.enterText(find.codeField(), code); + await wt.pumpAndSettle(); + + await wt.tap(find.runOrCancelButton()); + await wt.pumpAndSettle(); + + expectOutput('$_outputPrefix$text', wt); + } + + Future switchToPython(WidgetTester wt) async { + await wt.tap(find.sdkSelector()); + await wt.pumpAndSettle(); + + await wt.tap(find.sdkItemInDropdown(Sdk.python)); + await wt.pumpAndSettle(); + + expect( + wt.findOneCodeController().lastTextSpan!.toPlainText().isAsIfCutFrom( + await Examples.getVisibleTextByPath( + ExamplePaths.pythonMinimalWordCountWithMetrics, + python, + ), + ), + true, + ); + } + + Future changeToPythonAggregationMean(WidgetTester wt) async { + await wt.tap(find.exampleSelector()); + await wt.pumpAndSettle(); + + await wt.tap(find.exampleItemInDropdown(ExampleNames.aggregationMean)); + await wt.pumpAndSettle(); + + // Cannot test this because the DB examples differ from GitHub now. + // TODO(alexeyinkin): Uncomment when DB is up-to-date. + // expect( + // wt.findOneCodeController().lastTextSpan!.toPlainText().isAsIfCutFrom( + // await Examples.getVisibleTextByPath( + // ExamplePaths.pythonAggregationMean, + // python, + // ), + // ), + // true, + // ); + } + + Future runExpectPythonAggregationMean(WidgetTester wt) async { + await runExpectCached(wt); + expectOutputContains(ExampleOutputs.pythonAggregationMeanContains, wt); + } + + Future runCustomPython(WidgetTester wt) async { + const text = 'OK'; + const code = 'print("$text", end="")'; + + await wt.enterText(find.codeField(), code); + await wt.pumpAndSettle(); + + await wt.tap(find.runOrCancelButton()); + await wt.pumpAndSettle(); + + expectOutput('$_outputPrefix$text', wt); + } + + testWidgets('Change example, change SDK, run', (WidgetTester wt) async { + await init(wt); + + await expectJavaMinimalWordCount(wt); + await changeToJavaAggregationMax(wt); + await runExpectJavaAggregationMax(wt); + await runCustomJava(wt); + + await switchToPython(wt); + await changeToPythonAggregationMean(wt); + await runExpectPythonAggregationMean(wt); + await runCustomPython(wt); + }); +} diff --git a/playground/frontend/integration_test/standalone_miscellaneous_ui_test.dart b/playground/frontend/integration_test/standalone_miscellaneous_ui_test.dart new file mode 100644 index 0000000000000..82bfd7175df6f --- /dev/null +++ b/playground/frontend/integration_test/standalone_miscellaneous_ui_test.dart @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import 'package:flutter_test/flutter_test.dart'; +import 'package:integration_test/integration_test.dart'; + +import 'common/common.dart'; +import 'miscellaneous_ui/description_test.dart'; +import 'miscellaneous_ui/enjoy_playground_test.dart'; +import 'miscellaneous_ui/output_placement_test.dart'; +import 'miscellaneous_ui/resize_output_test.dart'; +import 'miscellaneous_ui/shortcuts_modal_test.dart'; +import 'miscellaneous_ui/toggle_brightness_mode_test.dart'; + +void main() { + IntegrationTestWidgetsFlutterBinding.ensureInitialized(); + testWidgets( + 'Check UI, not connected with running examples', + (WidgetTester wt) async { + await init(wt); + + await checkEnjoyPlayground(wt); + await checkDescription(wt); + await checkOutputPlacement(wt); + await checkResizeOutput(wt); + await checkShortcutsModal(wt); + await checkToggleBrightnessMode(wt); + }, + ); +} diff --git a/playground/frontend/lib/components/banner/banner_button.dart b/playground/frontend/lib/components/banner/banner_button.dart index 78e7002cf8f74..f0a9ddf835ccd 100644 --- a/playground/frontend/lib/components/banner/banner_button.dart +++ b/playground/frontend/lib/components/banner/banner_button.dart @@ -19,8 +19,9 @@ import 'package:aligned_dialog/aligned_dialog.dart'; import 'package:flutter/material.dart'; import 'package:flutter_svg/flutter_svg.dart'; -import 'package:playground/components/banner/banner_description.dart'; -import 'package:playground/constants/assets.dart'; + +import '../../src/assets/assets.gen.dart'; +import 'banner_description.dart'; class BannerButton extends StatelessWidget { const BannerButton({Key? key}) : super(key: key); @@ -38,7 +39,7 @@ class BannerButton extends StatelessWidget { barrierColor: Colors.transparent, ); }, - child: SvgPicture.asset(kBeamIconAsset), + child: SvgPicture.asset(Assets.beam.path), ); } } diff --git a/playground/frontend/lib/components/dropdown_button/dropdown_button.dart b/playground/frontend/lib/components/dropdown_button/dropdown_button.dart index c2aff6f2f9503..17a0d692f92d7 100644 --- a/playground/frontend/lib/components/dropdown_button/dropdown_button.dart +++ b/playground/frontend/lib/components/dropdown_button/dropdown_button.dart @@ -37,7 +37,7 @@ enum DropdownAlignment { class AppDropdownButton extends StatefulWidget { final Widget buttonText; final Widget Function(void Function()) createDropdown; - final double height; + final double? height; final double width; final Widget? leading; final bool showArrow; @@ -47,8 +47,8 @@ class AppDropdownButton extends StatefulWidget { super.key, required this.buttonText, required this.createDropdown, - required this.height, required this.width, + this.height, this.leading, this.showArrow = true, this.dropdownAlign = DropdownAlignment.left, diff --git a/playground/frontend/lib/components/logo/logo_component.dart b/playground/frontend/lib/components/logo/logo_component.dart index b14f096141063..d65321aa34b1d 100644 --- a/playground/frontend/lib/components/logo/logo_component.dart +++ b/playground/frontend/lib/components/logo/logo_component.dart @@ -17,10 +17,11 @@ */ import 'package:flutter/material.dart'; -import 'package:playground/constants/assets.dart'; -import 'package:playground/constants/font_weight.dart'; -import 'package:playground/constants/fonts.dart'; -import 'package:playground/constants/sizes.dart'; + +import '../../constants/font_weight.dart'; +import '../../constants/fonts.dart'; +import '../../constants/sizes.dart'; +import '../../src/assets/assets.gen.dart'; const double kTitleFontSize = 18; @@ -33,8 +34,8 @@ class Logo extends StatelessWidget { return Row( mainAxisSize: MainAxisSize.min, children: [ - const Image( - image: AssetImage(kBeamLgIconAsset), + Image( + image: AssetImage(Assets.beamLg.path), width: kIconSizeLg, height: kIconSizeLg, ), diff --git a/playground/frontend/lib/constants/assets.dart b/playground/frontend/lib/constants/assets.dart deleted file mode 100644 index 796a97daa87fb..0000000000000 --- a/playground/frontend/lib/constants/assets.dart +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -const kThemeIconAsset = 'theme.svg'; -const kResetIconAsset = 'reset.svg'; -const kOutputBottomIconAsset = 'output_bottom.svg'; -const kOutputRightIconAsset = 'output_right.svg'; -const kOutputLeftIconAsset = 'output_left.svg'; -const kShortcutsIconAsset = 'shortcuts.svg'; -const kGithubIconAsset = 'github.svg'; -const kBeamIconAsset = 'beam.png'; -const kBeamLgIconAsset = 'beam_lg.png'; -const kThumbUpIconAsset = 'thumb_up.svg'; -const kThumbUpIconAssetFilled = 'thumb_up_filled.svg'; -const kThumbDownIconAsset = 'thumb_down.svg'; -const kThumbDownIconAssetFilled = 'thumb_down_filled.svg'; -const kCopyIconAsset = 'copy.svg'; -const kLinkIconAsset = 'link.svg'; -const kDragHorizontalIconAsset = 'drag_horizontal.svg'; -const kDragVerticalIconAsset = 'drag_vertical.svg'; -const kMultifileIconAsset = 'multifile.svg'; - -// notifications icons -const kErrorNotificationIconAsset = 'error_notification.svg'; -const kWarningNotificationIconAsset = 'warning_notification.svg'; -const kSuccessNotificationIconAsset = 'success_notification.svg'; -const kInfoNotificationIconAsset = 'info_notification.svg'; diff --git a/playground/frontend/lib/main.dart b/playground/frontend/lib/main.dart index ead9321f5a12c..f65bf8aa916f7 100644 --- a/playground/frontend/lib/main.dart +++ b/playground/frontend/lib/main.dart @@ -42,7 +42,7 @@ void main() async { // Router API specific initialization. final pageStack = GetIt.instance.get(); - final routerDelegate = PageStackRouterDelegate(pageStack); + final routerDelegate = BeamRouterDelegate(pageStack); final routeInformationParser = PlaygroundRouteInformationParser(); final backButtonDispatcher = PageStackBackButtonDispatcher(pageStack); diff --git a/playground/frontend/lib/modules/analytics/analytics_event.dart b/playground/frontend/lib/modules/analytics/analytics_event.dart new file mode 100644 index 0000000000000..d82169e0b7bde --- /dev/null +++ b/playground/frontend/lib/modules/analytics/analytics_event.dart @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import 'package:equatable/equatable.dart'; + +class AnalyticsEvent with EquatableMixin { + final String action; + final String category; + final String? label; + final Map? parameters; + final int? value; + + AnalyticsEvent({ + required this.action, + required this.category, + this.label, + this.parameters, + this.value, + }); + + @override + List get props => [ + action, + category, + label, + parameters, + value, + ]; +} diff --git a/playground/frontend/lib/modules/analytics/analytics_service.dart b/playground/frontend/lib/modules/analytics/analytics_service.dart index 0416e7dcc520c..90853e9c3783e 100644 --- a/playground/frontend/lib/modules/analytics/analytics_service.dart +++ b/playground/frontend/lib/modules/analytics/analytics_service.dart @@ -17,10 +17,13 @@ */ import 'package:flutter/widgets.dart'; +import 'package:playground/modules/analytics/analytics_event.dart'; import 'package:playground_components/playground_components.dart'; import 'package:provider/provider.dart'; abstract class AnalyticsService { + AnalyticsEvent? get lastSentEvent; + static AnalyticsService get(BuildContext context) { return Provider.of(context, listen: false); } diff --git a/playground/frontend/lib/modules/analytics/google_analytics_service.dart b/playground/frontend/lib/modules/analytics/google_analytics_service.dart index 7b083b3bc25ea..ab34707081669 100644 --- a/playground/frontend/lib/modules/analytics/google_analytics_service.dart +++ b/playground/frontend/lib/modules/analytics/google_analytics_service.dart @@ -22,9 +22,14 @@ import 'package:playground/modules/analytics/analytics_service.dart'; import 'package:playground_components/playground_components.dart'; import 'package:usage/usage_html.dart'; +import 'analytics_event.dart'; + class GoogleAnalyticsService implements AnalyticsService { final _analytics = AnalyticsHtml(kAnalyticsUA, 'beam', '1.0'); + @override + AnalyticsEvent? lastSentEvent; + @override void trackSelectSdk(Sdk? oldSdk, Sdk newSdk) { safeSendEvent( @@ -137,6 +142,13 @@ class GoogleAnalyticsService implements AnalyticsService { value: value, parameters: parameters, ); + lastSentEvent = AnalyticsEvent( + category: category, + action: action, + label: label, + value: value, + parameters: parameters, + ); } catch (e) { // ignore analytics errors sync they don't affect app print(e); diff --git a/playground/frontend/lib/modules/examples/components/description_popover/description_popover.dart b/playground/frontend/lib/modules/examples/components/description_popover/description_popover.dart index e368973cb64b5..929e2006b954b 100644 --- a/playground/frontend/lib/modules/examples/components/description_popover/description_popover.dart +++ b/playground/frontend/lib/modules/examples/components/description_popover/description_popover.dart @@ -17,14 +17,15 @@ */ import 'package:flutter/material.dart'; -import 'package:flutter_svg/flutter_svg.dart'; import 'package:flutter_gen/gen_l10n/app_localizations.dart'; -import 'package:playground/constants/assets.dart'; -import 'package:playground/constants/font_weight.dart'; -import 'package:playground/constants/sizes.dart'; +import 'package:flutter_svg/flutter_svg.dart'; import 'package:playground_components/playground_components.dart'; import 'package:url_launcher/url_launcher.dart'; +import '../../../../constants/font_weight.dart'; +import '../../../../constants/sizes.dart'; +import '../../../../src/assets/assets.gen.dart'; + const kDescriptionWidth = 300.0; class DescriptionPopover extends StatelessWidget { @@ -66,7 +67,7 @@ class DescriptionPopover extends StatelessWidget { Widget getViewOnGithub(BuildContext context) { AppLocalizations appLocale = AppLocalizations.of(context)!; return TextButton.icon( - icon: SvgPicture.asset(kGithubIconAsset), + icon: SvgPicture.asset(Assets.github), onPressed: () { launchUrl(Uri.parse(example.link ?? '')); }, diff --git a/playground/frontend/lib/modules/examples/components/example_list/example_item_actions.dart b/playground/frontend/lib/modules/examples/components/example_list/example_item_actions.dart index 30b67fa0edd36..df0c8a8375014 100644 --- a/playground/frontend/lib/modules/examples/components/example_list/example_item_actions.dart +++ b/playground/frontend/lib/modules/examples/components/example_list/example_item_actions.dart @@ -16,13 +16,17 @@ * limitations under the License. */ +import 'package:easy_localization/easy_localization.dart'; import 'package:flutter/material.dart'; -import 'package:playground/modules/examples/components/description_popover/description_popover_button.dart'; -import 'package:playground/modules/examples/components/multifile_popover/multifile_popover_button.dart'; -import 'package:playground/modules/examples/models/popover_state.dart'; +import 'package:flutter_svg/flutter_svg.dart'; import 'package:playground_components/playground_components.dart'; import 'package:provider/provider.dart'; +import '../../../../src/assets/assets.gen.dart'; +import '../../models/popover_state.dart'; +import '../description_popover/description_popover_button.dart'; +import '../multifile_popover/multifile_popover_button.dart'; + class ExampleItemActions extends StatelessWidget { final ExampleBase example; final BuildContext parentContext; @@ -36,6 +40,7 @@ class ExampleItemActions extends StatelessWidget { return Row( children: [ if (example.isMultiFile) multifilePopover, + if (example.usesEmulatedData) const _EmulatedDataIcon(), if (example.complexity != null) ComplexityWidget(complexity: example.complexity!), descriptionPopover, @@ -65,3 +70,21 @@ class ExampleItemActions extends StatelessWidget { Provider.of(context, listen: false).setOpen(isOpen); } } + +class _EmulatedDataIcon extends StatelessWidget { + const _EmulatedDataIcon(); + + @override + Widget build(BuildContext context) { + return Padding( + padding: const EdgeInsets.only(right: 8.0), + child: Tooltip( + message: 'intents.playground.usesEmulatedData'.tr(), + child: SvgPicture.asset( + Assets.streaming, + color: Theme.of(context).extension()?.iconColor, + ), + ), + ); + } +} diff --git a/playground/frontend/lib/modules/examples/components/example_list/expansion_panel_item.dart b/playground/frontend/lib/modules/examples/components/example_list/expansion_panel_item.dart index 053968ff09a49..87e792169be18 100644 --- a/playground/frontend/lib/modules/examples/components/example_list/expansion_panel_item.dart +++ b/playground/frontend/lib/modules/examples/components/example_list/expansion_panel_item.dart @@ -47,21 +47,7 @@ class ExpansionPanelItem extends StatelessWidget { if (controller.selectedExample != example) { _closeDropdown(controller.exampleCache); AnalyticsService.get(context).trackSelectExample(example); - final exampleWithInfo = - await controller.exampleCache.loadExampleInfo(example); - // TODO: setCurrentSdk = false when we do - // per-SDK output and run status. - // Now using true to reset the output and run status. - // https://github.com/apache/beam/issues/23248 - final descriptor = StandardExampleLoadingDescriptor( - sdk: exampleWithInfo.sdk, - path: exampleWithInfo.path, - ); - controller.setExample( - exampleWithInfo, - descriptor: descriptor, - setCurrentSdk: true, - ); + controller.setExampleBase(example); } }, child: Container( diff --git a/playground/frontend/lib/modules/examples/components/multifile_popover/multifile_popover.dart b/playground/frontend/lib/modules/examples/components/multifile_popover/multifile_popover.dart index c4fb93fa47065..fb1c76fc0c738 100644 --- a/playground/frontend/lib/modules/examples/components/multifile_popover/multifile_popover.dart +++ b/playground/frontend/lib/modules/examples/components/multifile_popover/multifile_popover.dart @@ -17,14 +17,15 @@ */ import 'package:flutter/material.dart'; -import 'package:flutter_svg/flutter_svg.dart'; import 'package:flutter_gen/gen_l10n/app_localizations.dart'; -import 'package:playground/constants/assets.dart'; -import 'package:playground/constants/font_weight.dart'; -import 'package:playground/constants/sizes.dart'; +import 'package:flutter_svg/flutter_svg.dart'; import 'package:playground_components/playground_components.dart'; import 'package:url_launcher/url_launcher.dart'; +import '../../../../constants/font_weight.dart'; +import '../../../../constants/sizes.dart'; +import '../../../../src/assets/assets.gen.dart'; + const kMultifileWidth = 300.0; class MultifilePopover extends StatelessWidget { @@ -52,7 +53,7 @@ class MultifilePopover extends StatelessWidget { ), Text(appLocale.multifileWarning), TextButton.icon( - icon: SvgPicture.asset(kGithubIconAsset), + icon: SvgPicture.asset(Assets.github), onPressed: () { launchUrl(Uri.parse(example.link ?? '')); }, diff --git a/playground/frontend/lib/modules/examples/components/multifile_popover/multifile_popover_button.dart b/playground/frontend/lib/modules/examples/components/multifile_popover/multifile_popover_button.dart index d530aa645ebe8..78fbafdf51a94 100644 --- a/playground/frontend/lib/modules/examples/components/multifile_popover/multifile_popover_button.dart +++ b/playground/frontend/lib/modules/examples/components/multifile_popover/multifile_popover_button.dart @@ -18,13 +18,14 @@ import 'package:aligned_dialog/aligned_dialog.dart'; import 'package:flutter/material.dart'; -import 'package:flutter_svg/flutter_svg.dart'; import 'package:flutter_gen/gen_l10n/app_localizations.dart'; -import 'package:playground/constants/assets.dart'; -import 'package:playground/constants/sizes.dart'; -import 'package:playground/modules/examples/components/multifile_popover/multifile_popover.dart'; +import 'package:flutter_svg/flutter_svg.dart'; import 'package:playground_components/playground_components.dart'; +import '../../../../constants/sizes.dart'; +import '../../../../src/assets/assets.gen.dart'; +import 'multifile_popover.dart'; + class MultifilePopoverButton extends StatelessWidget { final BuildContext? parentContext; final ExampleBase example; @@ -51,7 +52,7 @@ class MultifilePopoverButton extends StatelessWidget { child: IconButton( iconSize: kIconSizeMd, splashRadius: kIconButtonSplashRadius, - icon: SvgPicture.asset(kMultifileIconAsset), + icon: SvgPicture.asset(Assets.multifile), tooltip: appLocale.exampleMultifile, onPressed: () { _showMultifilePopover( diff --git a/playground/frontend/lib/modules/output/components/output_header/output_placements.dart b/playground/frontend/lib/modules/output/components/output_header/output_placements.dart index f8eaf57102473..d82f0b12b8844 100644 --- a/playground/frontend/lib/modules/output/components/output_header/output_placements.dart +++ b/playground/frontend/lib/modules/output/components/output_header/output_placements.dart @@ -42,6 +42,7 @@ class OutputPlacements extends StatelessWidget { '${AppLocalizations.of(context)!.outputPlacementSemantic}' ' ${placement.name(context)}', child: IconButton( + key: ValueKey(placement), splashRadius: kIconButtonSplashRadius, icon: SvgPicture.asset( placement.icon, diff --git a/playground/frontend/lib/modules/output/models/output_placement.dart b/playground/frontend/lib/modules/output/models/output_placement.dart index 642553751ccda..f7db5ea1900e4 100644 --- a/playground/frontend/lib/modules/output/models/output_placement.dart +++ b/playground/frontend/lib/modules/output/models/output_placement.dart @@ -18,7 +18,8 @@ import 'package:flutter/cupertino.dart'; import 'package:flutter_gen/gen_l10n/app_localizations.dart'; -import 'package:playground/constants/assets.dart'; + +import '../../../src/assets/assets.gen.dart'; enum OutputPlacement { right, @@ -27,9 +28,7 @@ enum OutputPlacement { ; Axis get graphDirection { - return this == OutputPlacement.bottom - ? Axis.horizontal - : Axis.vertical; + return this == OutputPlacement.bottom ? Axis.horizontal : Axis.vertical; } } @@ -37,11 +36,11 @@ extension OutputPlacementToIcon on OutputPlacement { String get icon { switch (this) { case OutputPlacement.bottom: - return kOutputBottomIconAsset; + return Assets.outputBottom; case OutputPlacement.right: - return kOutputRightIconAsset; + return Assets.outputRight; case OutputPlacement.left: - return kOutputLeftIconAsset; + return Assets.outputLeft; } } } diff --git a/playground/frontend/lib/modules/sdk/components/sdk_selector.dart b/playground/frontend/lib/modules/sdk/components/sdk_selector.dart index c4c1656a17483..8bba9ed7e6aa0 100644 --- a/playground/frontend/lib/modules/sdk/components/sdk_selector.dart +++ b/playground/frontend/lib/modules/sdk/components/sdk_selector.dart @@ -18,26 +18,23 @@ import 'package:flutter/material.dart'; import 'package:flutter_gen/gen_l10n/app_localizations.dart'; -import 'package:playground/components/dropdown_button/dropdown_button.dart'; -import 'package:playground/constants/sizes.dart'; -import 'package:playground/modules/sdk/components/sdk_selector_row.dart'; import 'package:playground_components/playground_components.dart'; import 'package:provider/provider.dart'; -const kEmptyExampleName = 'Catalog'; +import '../../../components/dropdown_button/dropdown_button.dart'; +import '../../../constants/sizes.dart'; +import 'sdk_selector_row.dart'; -const double kWidth = 150; -const double kHeight = 172; +const double _width = 150; class SDKSelector extends StatelessWidget { - final Sdk? value; final ValueChanged onChanged; + final Sdk? value; const SDKSelector({ - Key? key, - required this.value, required this.onChanged, - }) : super(key: key); + required this.value, + }); @override Widget build(BuildContext context) { @@ -68,10 +65,10 @@ class SDKSelector extends StatelessWidget { ), ); }), + const SizedBox(height: kMdSpacing), ], ), - width: kWidth, - height: kHeight, + width: _width, ), ), ); diff --git a/playground/frontend/lib/modules/sdk/components/sdk_selector_row.dart b/playground/frontend/lib/modules/sdk/components/sdk_selector_row.dart index 7993723bf25dc..1039078b15e61 100644 --- a/playground/frontend/lib/modules/sdk/components/sdk_selector_row.dart +++ b/playground/frontend/lib/modules/sdk/components/sdk_selector_row.dart @@ -25,11 +25,10 @@ class SdkSelectorRow extends StatelessWidget { final Sdk sdk; final VoidCallback onSelect; - const SdkSelectorRow({ - Key? key, + SdkSelectorRow({ required this.sdk, required this.onSelect, - }) : super(key: key); + }) : super(key: ValueKey(sdk)); @override Widget build(BuildContext context) { diff --git a/playground/frontend/lib/modules/shortcuts/components/shortcut_row.dart b/playground/frontend/lib/modules/shortcuts/components/shortcut_row.dart index 27cd343570380..c68a1d95cbb8f 100644 --- a/playground/frontend/lib/modules/shortcuts/components/shortcut_row.dart +++ b/playground/frontend/lib/modules/shortcuts/components/shortcut_row.dart @@ -31,15 +31,17 @@ class ShortcutRow extends StatelessWidget { // wrap with row to shrink container to child size return Row( children: [ - Container( - decoration: BoxDecoration( - border: Border.all(color: primaryColor), - borderRadius: BorderRadius.circular(kSmBorderRadius), - ), - padding: const EdgeInsets.all(kMdSpacing), - child: Text( - shortcut.title, - style: TextStyle(color: primaryColor), + Flexible( + child: Container( + decoration: BoxDecoration( + border: Border.all(color: primaryColor), + borderRadius: BorderRadius.circular(kSmBorderRadius), + ), + padding: const EdgeInsets.all(kMdSpacing), + child: Text( + shortcut.title, + style: TextStyle(color: primaryColor), + ), ), ), ], diff --git a/playground/frontend/lib/modules/shortcuts/components/shortcuts_modal.dart b/playground/frontend/lib/modules/shortcuts/components/shortcuts_modal.dart index 1f334327f6b05..f4332e55ebd41 100644 --- a/playground/frontend/lib/modules/shortcuts/components/shortcuts_modal.dart +++ b/playground/frontend/lib/modules/shortcuts/components/shortcuts_modal.dart @@ -64,6 +64,7 @@ class ShortcutsModal extends StatelessWidget { crossAxisAlignment: CrossAxisAlignment.center, children: [ Expanded(child: ShortcutRow(shortcut: shortcut)), + const SizedBox(width: kMdSpacing), Expanded( flex: 3, child: Text( diff --git a/playground/frontend/lib/pages/embedded_playground/screen.dart b/playground/frontend/lib/pages/embedded_playground/screen.dart index 0e85c7678aa51..05d39ce3e33f2 100644 --- a/playground/frontend/lib/pages/embedded_playground/screen.dart +++ b/playground/frontend/lib/pages/embedded_playground/screen.dart @@ -41,21 +41,19 @@ class EmbeddedPlaygroundScreen extends StatelessWidget { playgroundController: notifier.playgroundController, child: PlaygroundShortcutsManager( playgroundController: notifier.playgroundController, - child: ToastListenerWidget( - child: Scaffold( - appBar: AppBar( - automaticallyImplyLeading: false, - title: const EmbeddedAppBarTitle(), - actions: const [EmbeddedActions()], - ), - body: EmbeddedSplitView( - first: EmbeddedEditor(isEditable: notifier.isEditable), - second: Container( - color: Theme.of(context).backgroundColor, - child: OutputWidget( - playgroundController: notifier.playgroundController, - graphDirection: Axis.horizontal, - ), + child: Scaffold( + appBar: AppBar( + automaticallyImplyLeading: false, + title: const EmbeddedAppBarTitle(), + actions: const [EmbeddedActions()], + ), + body: EmbeddedSplitView( + first: EmbeddedEditor(isEditable: notifier.isEditable), + second: Container( + color: Theme.of(context).backgroundColor, + child: OutputWidget( + playgroundController: notifier.playgroundController, + graphDirection: Axis.horizontal, ), ), ), diff --git a/playground/frontend/lib/pages/embedded_playground/widgets/embedded_actions.dart b/playground/frontend/lib/pages/embedded_playground/widgets/embedded_actions.dart index c66dff3ba92bd..79c7dc587840a 100644 --- a/playground/frontend/lib/pages/embedded_playground/widgets/embedded_actions.dart +++ b/playground/frontend/lib/pages/embedded_playground/widgets/embedded_actions.dart @@ -26,9 +26,9 @@ import 'package:flutter_svg/flutter_svg.dart'; import 'package:playground_components/playground_components.dart'; import 'package:provider/provider.dart'; -import '../../../constants/assets.dart'; import '../../../constants/sizes.dart'; import '../../../modules/messages/models/set_content_message.dart'; +import '../../../src/assets/assets.gen.dart'; import '../../../utils/javascript_post_message.dart'; import '../../standalone_playground/path.dart'; @@ -47,7 +47,7 @@ class EmbeddedActions extends StatelessWidget { height: kTryPlaygroundButtonHeight, child: Consumer( builder: (context, controller, child) => ElevatedButton.icon( - icon: SvgPicture.asset(kLinkIconAsset), + icon: SvgPicture.asset(Assets.link), label: Text(AppLocalizations.of(context)!.tryInPlayground), onPressed: () => _openStandalonePlayground(controller), ), diff --git a/playground/frontend/lib/pages/embedded_playground/widgets/embedded_appbar_title.dart b/playground/frontend/lib/pages/embedded_playground/widgets/embedded_appbar_title.dart index 112bea826ddb4..75443f03b5a8b 100644 --- a/playground/frontend/lib/pages/embedded_playground/widgets/embedded_appbar_title.dart +++ b/playground/frontend/lib/pages/embedded_playground/widgets/embedded_appbar_title.dart @@ -19,12 +19,13 @@ import 'package:flutter/material.dart'; import 'package:flutter/services.dart'; import 'package:flutter_svg/flutter_svg.dart'; -import 'package:playground/components/playground_run_or_cancel_button.dart'; -import 'package:playground/constants/assets.dart'; -import 'package:playground/constants/sizes.dart'; import 'package:playground_components/playground_components.dart'; import 'package:provider/provider.dart'; +import '../../../components/playground_run_or_cancel_button.dart'; +import '../../../constants/sizes.dart'; +import '../../../src/assets/assets.gen.dart'; + class EmbeddedAppBarTitle extends StatelessWidget { const EmbeddedAppBarTitle({Key? key}) : super(key: key); @@ -40,7 +41,7 @@ class EmbeddedAppBarTitle extends StatelessWidget { IconButton( iconSize: kIconSizeLg, splashRadius: kIconButtonSplashRadius, - icon: SvgPicture.asset(kCopyIconAsset), + icon: SvgPicture.asset(Assets.copy), onPressed: () { final source = controller.source; Clipboard.setData(ClipboardData(text: source)); diff --git a/playground/frontend/lib/pages/embedded_playground/widgets/embedded_editor.dart b/playground/frontend/lib/pages/embedded_playground/widgets/embedded_editor.dart index ac319426c79ae..b117adac24baf 100644 --- a/playground/frontend/lib/pages/embedded_playground/widgets/embedded_editor.dart +++ b/playground/frontend/lib/pages/embedded_playground/widgets/embedded_editor.dart @@ -30,7 +30,7 @@ class EmbeddedEditor extends StatelessWidget { final controller = Provider.of(context); final snippetController = controller.snippetEditingController; - if (snippetController == null) { + if (snippetController == null || snippetController.isLoading) { return const LoadingIndicator(); } diff --git a/playground/frontend/lib/pages/standalone_playground/screen.dart b/playground/frontend/lib/pages/standalone_playground/screen.dart index d19960e32c186..1aace1bfb9405 100644 --- a/playground/frontend/lib/pages/standalone_playground/screen.dart +++ b/playground/frontend/lib/pages/standalone_playground/screen.dart @@ -95,16 +95,14 @@ class StandalonePlaygroundScreen extends StatelessWidget { ), ], ), - body: ToastListenerWidget( - child: Column( - children: [ - const Expanded(child: PlaygroundPageBody()), - Semantics( - container: true, - child: const PlaygroundPageFooter(), - ), - ], - ), + body: Column( + children: [ + const Expanded(child: PlaygroundPageBody()), + Semantics( + container: true, + child: const PlaygroundPageFooter(), + ), + ], ), ); }, diff --git a/playground/frontend/lib/pages/standalone_playground/widgets/editor_textarea_wrapper.dart b/playground/frontend/lib/pages/standalone_playground/widgets/editor_textarea_wrapper.dart index 57c1ba3fa70e7..5ee81cb05cf39 100644 --- a/playground/frontend/lib/pages/standalone_playground/widgets/editor_textarea_wrapper.dart +++ b/playground/frontend/lib/pages/standalone_playground/widgets/editor_textarea_wrapper.dart @@ -19,7 +19,6 @@ import 'package:flutter/material.dart'; import 'package:flutter_gen/gen_l10n/app_localizations.dart'; import 'package:playground_components/playground_components.dart'; -import 'package:provider/provider.dart'; import '../../../components/playground_run_or_cancel_button.dart'; import '../../../constants/sizes.dart'; @@ -29,80 +28,81 @@ import '../../../modules/examples/components/multifile_popover/multifile_popover /// A code editor with controls stacked above it. class CodeTextAreaWrapper extends StatelessWidget { - const CodeTextAreaWrapper({Key? key}) : super(key: key); + final PlaygroundController controller; + + const CodeTextAreaWrapper({ + required this.controller, + }); @override Widget build(BuildContext context) { - return Consumer( - builder: (context, controller, child) { - if (controller.result?.errorMessage?.isNotEmpty ?? false) { - WidgetsBinding.instance.addPostFrameCallback((_) { - _handleError(context, controller); - }); - } + if (controller.result?.errorMessage?.isNotEmpty ?? false) { + WidgetsBinding.instance.addPostFrameCallback((_) { + _handleError(context, controller); + }); + } - final snippetController = controller.snippetEditingController; + final snippetController = controller.snippetEditingController; - if (snippetController == null) { - return const LoadingIndicator(); - } + if (snippetController == null) { + return const LoadingIndicator(); + } - return Column( - children: [ - Expanded( - child: Stack( - children: [ - Positioned.fill( - child: SnippetEditor( - controller: snippetController, - isEditable: true, - ), + return Column( + children: [ + Expanded( + child: Stack( + children: [ + Positioned.fill( + child: SnippetEditor( + controller: snippetController, + isEditable: true, ), - Positioned( - right: kXlSpacing, - top: kXlSpacing, - height: kButtonHeight, - child: Row( - children: [ - if (controller.selectedExample != null) ...[ - if (controller.selectedExample?.isMultiFile ?? false) - Semantics( - container: true, - child: MultifilePopoverButton( - example: controller.selectedExample!, - followerAnchor: Alignment.topRight, - targetAnchor: Alignment.bottomRight, - ), - ), + ), + Positioned( + right: kXlSpacing, + top: kXlSpacing, + height: kButtonHeight, + child: Row( + children: [ + if (controller.selectedExample != null) ...[ + if (controller.selectedExample?.isMultiFile ?? false) Semantics( container: true, - child: DescriptionPopoverButton( + child: MultifilePopoverButton( example: controller.selectedExample!, followerAnchor: Alignment.topRight, targetAnchor: Alignment.bottomRight, ), ), - ], Semantics( container: true, - child: ShareButton( - playgroundController: controller, + child: DescriptionPopoverButton( + example: controller.selectedExample!, + followerAnchor: Alignment.topRight, + targetAnchor: Alignment.bottomRight, ), ), - const SizedBox(width: kLgSpacing), - Semantics( - container: true, - child: const PlaygroundRunOrCancelButton(), - ), ], - ), + Semantics( + container: true, + child: ShareButton( + playgroundController: controller, + ), + ), + const SizedBox(width: kLgSpacing), + Semantics( + container: true, + child: const PlaygroundRunOrCancelButton(), + ), + ], ), - ], - ), + ), + ], ), - ], - ); - }); + ), + ], + ); } void _handleError(BuildContext context, PlaygroundController controller) { diff --git a/playground/frontend/lib/pages/standalone_playground/widgets/feedback/feedback_dropdown_content.dart b/playground/frontend/lib/pages/standalone_playground/widgets/feedback/feedback_dropdown_content.dart index 2f161cecc64c9..e997a270be198 100644 --- a/playground/frontend/lib/pages/standalone_playground/widgets/feedback/feedback_dropdown_content.dart +++ b/playground/frontend/lib/pages/standalone_playground/widgets/feedback/feedback_dropdown_content.dart @@ -35,6 +35,10 @@ const String kFeedbackContentText = 'Have feedback? We\'d love to hear it,' '\nHave questions? Try help or support.'; class FeedbackDropdownContent extends StatelessWidget { + static const textFieldKey = Key('feedbackTextFieldKey'); + static const cancelButtonKey = Key('cancelButtonKey'); + static const sendButtonKey = Key('sendFeedbackButtonKey'); + final void Function() close; final TextEditingController textController; @@ -46,7 +50,8 @@ class FeedbackDropdownContent extends StatelessWidget { @override Widget build(BuildContext context) { - final borderColor = Theme.of(context).extension()!.borderColor; + final borderColor = + Theme.of(context).extension()!.borderColor; final OutlineInputBorder border = OutlineInputBorder( borderSide: BorderSide(color: borderColor), @@ -110,6 +115,7 @@ class FeedbackDropdownContent extends StatelessWidget { child: ClipRRect( borderRadius: BorderRadius.circular(kMdBorderRadius), child: TextFormField( + key: textFieldKey, controller: textController, decoration: InputDecoration( focusedBorder: border, @@ -147,6 +153,7 @@ class FeedbackDropdownContent extends StatelessWidget { ), ), child: TextButton( + key: cancelButtonKey, onPressed: () { close(); textController.clear(); @@ -162,6 +169,7 @@ class FeedbackDropdownContent extends StatelessWidget { borderRadius: BorderRadius.circular(kSmBorderRadius), ), child: ElevatedButton( + key: sendButtonKey, onPressed: () { if (textController.text.isNotEmpty) { AnalyticsService.get(context).trackClickSendFeedback( diff --git a/playground/frontend/lib/pages/standalone_playground/widgets/feedback/playground_feedback.dart b/playground/frontend/lib/pages/standalone_playground/widgets/feedback/playground_feedback.dart index 4faeaaf492087..eec7d69d64ff4 100644 --- a/playground/frontend/lib/pages/standalone_playground/widgets/feedback/playground_feedback.dart +++ b/playground/frontend/lib/pages/standalone_playground/widgets/feedback/playground_feedback.dart @@ -20,14 +20,17 @@ import 'package:flutter/material.dart'; import 'package:flutter_gen/gen_l10n/app_localizations.dart'; import 'package:provider/provider.dart'; -import '../../../../constants/assets.dart'; import '../../../../constants/font_weight.dart'; import '../../../../modules/analytics/analytics_service.dart'; +import '../../../../src/assets/assets.gen.dart'; import '../../notifiers/feedback_state.dart'; import 'feedback_dropdown_icon_button.dart'; /// A status bar item for feedback. class PlaygroundFeedback extends StatelessWidget { + static const thumbUpKey = Key('thumbUp'); + static const thumbDownKey = Key('thumbDown'); + const PlaygroundFeedback({Key? key}) : super(key: key); @override @@ -42,16 +45,18 @@ class PlaygroundFeedback extends StatelessWidget { style: const TextStyle(fontWeight: kBoldWeight), ), FeedbackDropdownIconButton( + key: thumbUpKey, label: appLocale.enjoying, - iconAsset: kThumbUpIconAsset, - filledIconAsset: kThumbUpIconAssetFilled, + iconAsset: Assets.thumbUp, + filledIconAsset: Assets.thumbUpFilled, onClick: _setEnjoying(context, true), isSelected: isEnjoying != null && isEnjoying, ), FeedbackDropdownIconButton( + key: thumbDownKey, label: appLocale.notEnjoying, - iconAsset: kThumbDownIconAsset, - filledIconAsset: kThumbDownIconAssetFilled, + iconAsset: Assets.thumbDown, + filledIconAsset: Assets.thumbDownFilled, onClick: _setEnjoying(context, false), isSelected: isEnjoying != null && !isEnjoying, ), @@ -62,8 +67,7 @@ class PlaygroundFeedback extends StatelessWidget { _setEnjoying(BuildContext context, bool isEnjoying) { return () { _getFeedbackState(context, false).setEnjoying(isEnjoying); - AnalyticsService.get(context) - .trackClickEnjoyPlayground(isEnjoying); + AnalyticsService.get(context).trackClickEnjoyPlayground(isEnjoying); }; } diff --git a/playground/frontend/lib/pages/standalone_playground/widgets/more_actions.dart b/playground/frontend/lib/pages/standalone_playground/widgets/more_actions.dart index 60f4e0ff9b345..f82fb93707f67 100644 --- a/playground/frontend/lib/pages/standalone_playground/widgets/more_actions.dart +++ b/playground/frontend/lib/pages/standalone_playground/widgets/more_actions.dart @@ -19,13 +19,14 @@ import 'package:flutter/material.dart'; import 'package:flutter_gen/gen_l10n/app_localizations.dart'; import 'package:flutter_svg/flutter_svg.dart'; -import 'package:playground/constants/assets.dart'; -import 'package:playground/constants/links.dart'; -import 'package:playground/modules/analytics/analytics_service.dart'; -import 'package:playground/modules/shortcuts/components/shortcuts_modal.dart'; import 'package:playground_components/playground_components.dart'; import 'package:url_launcher/url_launcher.dart'; +import '../../../constants/links.dart'; +import '../../../modules/analytics/analytics_service.dart'; +import '../../../modules/shortcuts/components/shortcuts_modal.dart'; +import '../../../src/assets/assets.gen.dart'; + enum HeaderAction { shortcuts, beamPlaygroundGithub, @@ -63,7 +64,7 @@ class _MoreActionsState extends State { padding: EdgeInsets.zero, value: HeaderAction.shortcuts, child: ListTile( - leading: SvgPicture.asset(kShortcutsIconAsset), + leading: SvgPicture.asset(Assets.shortcuts), title: Text(appLocale.shortcuts), onTap: () { AnalyticsService.get(context).trackOpenShortcutsModal(); @@ -80,7 +81,7 @@ class _MoreActionsState extends State { padding: EdgeInsets.zero, value: HeaderAction.beamPlaygroundGithub, child: ListTile( - leading: SvgPicture.asset(kGithubIconAsset), + leading: SvgPicture.asset(Assets.github), title: Text(appLocale.beamPlaygroundOnGithub), onTap: () => _openLink(kBeamPlaygroundGithubLink, context), ), @@ -89,7 +90,7 @@ class _MoreActionsState extends State { padding: EdgeInsets.zero, value: HeaderAction.apacheBeamGithub, child: ListTile( - leading: SvgPicture.asset(kGithubIconAsset), + leading: SvgPicture.asset(Assets.github), title: Text(appLocale.apacheBeamOnGithub), onTap: () => _openLink(kApacheBeamGithubLink, context), ), @@ -98,7 +99,7 @@ class _MoreActionsState extends State { padding: EdgeInsets.zero, value: HeaderAction.scioGithub, child: ListTile( - leading: SvgPicture.asset(kGithubIconAsset), + leading: SvgPicture.asset(Assets.github), title: Text(appLocale.scioOnGithub), onTap: () => _openLink(kScioGithubLink, context), ), @@ -108,7 +109,7 @@ class _MoreActionsState extends State { padding: EdgeInsets.zero, value: HeaderAction.beamWebsite, child: ListTile( - leading: const Image(image: AssetImage(kBeamIconAsset)), + leading: Image(image: AssetImage(Assets.beam.path)), title: Text(appLocale.toApacheBeamWebsite), onTap: () => _openLink(kBeamWebsiteLink, context), ), diff --git a/playground/frontend/lib/pages/standalone_playground/widgets/playground_page_body.dart b/playground/frontend/lib/pages/standalone_playground/widgets/playground_page_body.dart index 83357abb0cd46..2ce17d79c35e1 100644 --- a/playground/frontend/lib/pages/standalone_playground/widgets/playground_page_body.dart +++ b/playground/frontend/lib/pages/standalone_playground/widgets/playground_page_body.dart @@ -20,7 +20,6 @@ import 'package:flutter/material.dart'; import 'package:playground_components/playground_components.dart'; import 'package:provider/provider.dart'; -import '../../../constants/sizes.dart'; import '../../../modules/output/components/output_header/output_placements.dart'; import '../../../modules/output/models/output_placement.dart'; import '../../../modules/output/models/output_placement_state.dart'; @@ -32,13 +31,23 @@ class PlaygroundPageBody extends StatelessWidget { @override Widget build(BuildContext context) { return Consumer2( - builder: (context, outputState, playgroundState, child) { + builder: (context, outputState, controller, child) { + final snippetController = controller.snippetEditingController; + + if (snippetController == null || snippetController.isLoading) { + return const LoadingIndicator(); + } + final output = OutputWidget( graphDirection: outputState.placement.graphDirection, - playgroundController: playgroundState, + playgroundController: controller, trailing: const OutputPlacements(), ); + final codeTextArea = CodeTextAreaWrapper( + controller: controller, + ); + switch (outputState.placement) { case OutputPlacement.bottom: return SplitView( @@ -63,16 +72,4 @@ class PlaygroundPageBody extends StatelessWidget { } }); } - - Widget get codeTextArea => const CodeTextAreaWrapper(); - - Widget getVerticalSeparator(BuildContext context) => Container( - width: kMdSpacing, - color: Theme.of(context).dividerColor, - ); - - Widget getHorizontalSeparator(BuildContext context) => Container( - height: kMdSpacing, - color: Theme.of(context).dividerColor, - ); } diff --git a/playground/frontend/lib/src/assets/assets.gen.dart b/playground/frontend/lib/src/assets/assets.gen.dart new file mode 100644 index 0000000000000..2548bf73f7acf --- /dev/null +++ b/playground/frontend/lib/src/assets/assets.gen.dart @@ -0,0 +1,127 @@ +/// GENERATED CODE - DO NOT MODIFY BY HAND +/// ***************************************************** +/// FlutterGen +/// ***************************************************** + +// coverage:ignore-file +// ignore_for_file: type=lint +// ignore_for_file: directives_ordering,unnecessary_import,implicit_dynamic_list_literal + +import 'package:flutter/widgets.dart'; + +class $AssetsTranslationsGen { + const $AssetsTranslationsGen(); + + /// File path: assets/translations/en.yaml + String get en => 'assets/translations/en.yaml'; + + /// List of all assets + List get values => [en]; +} + +class Assets { + Assets._(); + + static const AssetGenImage beam = AssetGenImage('assets/beam.png'); + static const AssetGenImage beamLg = AssetGenImage('assets/beam_lg.png'); + static const String copy = 'assets/copy.svg'; + static const String github = 'assets/github.svg'; + static const String link = 'assets/link.svg'; + static const String multifile = 'assets/multifile.svg'; + static const String outputBottom = 'assets/output_bottom.svg'; + static const String outputLeft = 'assets/output_left.svg'; + static const String outputRight = 'assets/output_right.svg'; + static const String sendFeedback = 'assets/send_feedback.svg'; + static const String shortcuts = 'assets/shortcuts.svg'; + static const String streaming = 'assets/streaming.svg'; + static const String thumbDown = 'assets/thumb_down.svg'; + static const String thumbDownFilled = 'assets/thumb_down_filled.svg'; + static const String thumbUp = 'assets/thumb_up.svg'; + static const String thumbUpFilled = 'assets/thumb_up_filled.svg'; + static const $AssetsTranslationsGen translations = $AssetsTranslationsGen(); + + /// List of all assets + List get values => [ + beam, + beamLg, + copy, + github, + link, + multifile, + outputBottom, + outputLeft, + outputRight, + sendFeedback, + shortcuts, + streaming, + thumbDown, + thumbDownFilled, + thumbUp, + thumbUpFilled + ]; +} + +class AssetGenImage { + const AssetGenImage(this._assetName); + + final String _assetName; + + Image image({ + Key? key, + AssetBundle? bundle, + ImageFrameBuilder? frameBuilder, + ImageErrorWidgetBuilder? errorBuilder, + String? semanticLabel, + bool excludeFromSemantics = false, + double? scale, + double? width, + double? height, + Color? color, + Animation? opacity, + BlendMode? colorBlendMode, + BoxFit? fit, + AlignmentGeometry alignment = Alignment.center, + ImageRepeat repeat = ImageRepeat.noRepeat, + Rect? centerSlice, + bool matchTextDirection = false, + bool gaplessPlayback = false, + bool isAntiAlias = false, + String? package, + FilterQuality filterQuality = FilterQuality.low, + int? cacheWidth, + int? cacheHeight, + }) { + return Image.asset( + _assetName, + key: key, + bundle: bundle, + frameBuilder: frameBuilder, + errorBuilder: errorBuilder, + semanticLabel: semanticLabel, + excludeFromSemantics: excludeFromSemantics, + scale: scale, + width: width, + height: height, + color: color, + opacity: opacity, + colorBlendMode: colorBlendMode, + fit: fit, + alignment: alignment, + repeat: repeat, + centerSlice: centerSlice, + matchTextDirection: matchTextDirection, + gaplessPlayback: gaplessPlayback, + isAntiAlias: isAntiAlias, + package: package, + filterQuality: filterQuality, + cacheWidth: cacheWidth, + cacheHeight: cacheHeight, + ); + } + + ImageProvider provider() => AssetImage(_assetName); + + String get path => _assetName; + + String get keyName => _assetName; +} diff --git a/playground/frontend/playground_components/LICENSE b/playground/frontend/playground_components/LICENSE deleted file mode 100644 index 8c048c96fb529..0000000000000 --- a/playground/frontend/playground_components/LICENSE +++ /dev/null @@ -1,407 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - A part of several convenience binary distributions of this software is licensed as follows: - - Google Protobuf: - Copyright 2008 Google Inc. All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - * Neither the name of Google Inc. nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - Code generated by the Protocol Buffer compiler is owned by the owner - of the input file used when generating it. This code is not - standalone and requires a support library to be linked with it. This - support library is itself covered by the above license. - - jsr-305: - Copyright (c) 2007-2009, JSR305 expert group - All rights reserved. - - https://opensource.org/licenses/BSD-3-Clause - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of the JSR305 expert group nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - - janino-compiler: - Janino - An embedded Java[TM] compiler - - Copyright (c) 2001-2016, Arno Unkrig - Copyright (c) 2015-2016 TIBCO Software Inc. - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials - provided with the distribution. - 3. Neither the name of JANINO nor the names of its contributors - may be used to endorse or promote products derived from this - software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN - IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - jline: - Copyright (c) 2002-2016, the original author or authors. - All rights reserved. - - http://www.opensource.org/licenses/bsd-license.php - - Redistribution and use in source and binary forms, with or - without modification, are permitted provided that the following - conditions are met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with - the distribution. - - Neither the name of JLine nor the names of its contributors - may be used to endorse or promote products derived from this - software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, - BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO - EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE - FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, - OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING - IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - OF THE POSSIBILITY OF SUCH DAMAGE. - - sqlline: - SQLLine - Shell for issuing SQL to relational databases via JDBC - - Copyright (c) 2002,2003,2004,2005,2006,2007 Marc Prud'hommeaux - Copyright (c) 2004-2010 The Eigenbase Project - Copyright (c) 2013-2017 Julian Hyde - All rights reserved. - - =============================================================================== - - Licensed under the Modified BSD License (the "License"); you may not - use this file except in compliance with the License. You may obtain a - copy of the License at: - - http://opensource.org/licenses/BSD-3-Clause - - Redistribution and use in source and binary forms, - with or without modification, are permitted provided - that the following conditions are met: - - (1) Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - (2) Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the - distribution. - - (3) The name of the author may not be used to endorse or promote - products derived from this software without specific prior written - permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - slf4j: - Copyright (c) 2004-2017 QOS.ch - All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE - -See the adjacent LICENSE.python file, if present, for additional licenses that -apply to parts of Apache Beam Python. diff --git a/playground/frontend/playground_components/README.md b/playground/frontend/playground_components/README.md index 9c4ef73d25d0f..6a99b6b645ced 100644 --- a/playground/frontend/playground_components/README.md +++ b/playground/frontend/playground_components/README.md @@ -17,29 +17,7 @@ under the License. --> -TODO: Put a short description of the package here that helps potential users -know whether this package might be useful for them. +# playground_components -## Features - -TODO: List what your package can do. Maybe include images, gifs, or videos. - -## Getting started - -TODO: List prerequisites and provide or point to information on how to -start using the package. - -## Usage - -TODO: Include short and useful examples for package users. Add longer examples -to `/example` folder. - -```dart -const like = 'sample'; -``` - -## Additional information - -TODO: Tell users more about the package: where to find more information, how to -contribute to the package, how to file issues, what response they can expect -from the package authors, and more. +This is a non-pub.dev Flutter package that contains common components +for both Beam Playground app and Tour of Beam app. diff --git a/playground/frontend/playground_components/analysis_options.yaml b/playground/frontend/playground_components/analysis_options.yaml index 318f01bfa2fde..fe2e0e8eb952c 100644 --- a/playground/frontend/playground_components/analysis_options.yaml +++ b/playground/frontend/playground_components/analysis_options.yaml @@ -16,6 +16,3 @@ # under the License. include: package:total_lints/app.yaml - -# Additional information about this file can be found at -# https://dart.dev/guides/language/analysis-options diff --git a/playground/frontend/playground_components/assets/symbols/go.g.yaml b/playground/frontend/playground_components/assets/symbols/go.g.yaml index 218eee2710dbe..4df541c1a86e1 100644 --- a/playground/frontend/playground_components/assets/symbols/go.g.yaml +++ b/playground/frontend/playground_components/assets/symbols/go.g.yaml @@ -555,6 +555,7 @@ - NewI - NewImpulse - NewIntervalWindow + - NewIntervalWindowCoder - NewJobServiceClient - NewKV - NewLegacyArtifactRetrievalServiceClient @@ -869,6 +870,7 @@ - UrnToType - UseAutomatedJavaExpansionService - UseAutomatedPythonExpansionService + - UseBatchSize - UseStandardSQL - UserLabels - UserStateCoderID @@ -3580,6 +3582,19 @@ MapTypeValue: - String properties: - Entries +MapWindows: + methods: + - Down + - FinishBundle + - ID + - ProcessElement + - StartBundle + - String + - Up + properties: + - Fn + - Out + - UID MavenPayload: methods: - Descriptor diff --git a/playground/frontend/playground_components/assets/symbols/java.g.yaml b/playground/frontend/playground_components/assets/symbols/java.g.yaml new file mode 100644 index 0000000000000..1fe009d436725 --- /dev/null +++ b/playground/frontend/playground_components/assets/symbols/java.g.yaml @@ -0,0 +1,13371 @@ +AbstractBeamCalcRel: + methods: + - beamComputeSelfCost + - estimateNodeStats + - getLimitCountOfSortRel + - isInputSortRelAndLimitOnly +AbstractSimulator: + methods: + - hasNext + - next + - remove + - results + - resultsPerWindow +ActionFactory: + methods: + - childPartitionsRecordAction + - dataChangeRecordAction + - detectNewPartitionsAction + - heartbeatRecordAction + - queryChangeStreamAction +AdaptableCollector: + methods: + - asContext + - collect + - getCounter + - getHistogram + - getTimer + - setProcessContext +AddFields: + methods: + - create + - expand + - field + - processElement +AddHarnessIdInterceptor: + methods: + - create +AddUuidsTransform: + methods: + - expand +AdvancingPhaser: {} +AfterAll: + methods: + - getWatermarkThatGuaranteesFiring + - of + - toString +AfterEach: + methods: + - getWatermarkThatGuaranteesFiring + - inOrder + - mayFinish + - toString +AfterFirst: + methods: + - getWatermarkThatGuaranteesFiring + - of + - toString +AfterPane: + methods: + - elementCountAtLeast + - equals + - getElementCount + - getWatermarkThatGuaranteesFiring + - hashCode + - isCompatible + - toString +AfterProcessingTime: + methods: + - alignedTo + - equals + - getTimestampTransforms + - getWatermarkThatGuaranteesFiring + - hashCode + - isCompatible + - pastFirstElementInPane + - plusDelayOf + - toString +AfterSynchronizedProcessingTime: + methods: + - equals + - getWatermarkThatGuaranteesFiring + - hashCode + - ofFirstElement + - toString +AfterWatermark: + methods: + - equals + - getContinuationTrigger + - getEarlyTrigger + - getLateTrigger + - getWatermarkThatGuaranteesFiring + - hashCode + - mayFinish + - pastEndOfWindow + - toString + - withEarlyFirings + - withLateFirings +AggregationCombineFnAdapter: + methods: + - addInput + - createAccumulator + - createCombineFn + - createCombineFnAnalyticsFunctions + - createConstantCombineFn + - extractOutput + - getAccumulatorCoder + - getDefaultOutputCoder + - mergeAccumulators + properties: + - EMPTY_ROW + - EMPTY_SCHEMA + - INSTANCE +AggregationQuery: + methods: + - apply + - create + - withMongoDbPipeline +AmqpIO: + methods: + - advance + - close + - createReader + - expand + - finalizeCheckpoint + - getCheckpointMark + - getCheckpointMarkCoder + - getCurrent + - getCurrentSource + - getCurrentTimestamp + - getOutputCoder + - getWatermark + - populateDisplayData + - processElement + - read + - setup + - split + - start + - teardown + - withAddresses + - withMaxNumRecords + - withMaxReadTime + - write +AmqpMessageCoder: + methods: + - decode + - encode +AmqpMessageCoderProviderRegistrar: + methods: + - getCoderProviders +AnnotateText: + methods: + - build + - expand + - features + - languageHint + - newBuilder + - processElement + - setFeatures + - setLanguageHint +ApiSurface: + methods: + - classesInPackage + - containsOnlyClassesMatching + - containsOnlyPackages + - describeTo + - empty + - getAnyExposurePath + - getExposedClasses + - getRootClasses + - includingClass + - includingPackage + - ofClass + - ofPackage + - pruningClass + - pruningClassName + - pruningPattern + - pruningPrefix +AppliedCombineFn: + methods: + - getAccumulatorCoder + - getFn + - getKvCoder + - getSideInputViews + - getWindowingStrategy + - withAccumulatorCoder + - withInputCoder +AppliedPTransform: + methods: + - getFullName + - getInputs + - getMainInputs + - getOutputs + - getPipeline + - getResourceHints + - getTransform + - of +ApproximateCountDistinct: + methods: + - build + - expand + - getMapping + - getPrecision + - getUdaf + - globally + - perKey + - populateDisplayData + - setMapping + - setPrecision + - toBuilder + - via + - withPercision +ApproximateDistinct: + methods: + - addInput + - apply + - create + - createAccumulator + - decode + - encode + - expand + - extractOutput + - globally + - isRegisterByteSizeObserverCheap + - mergeAccumulators + - of + - perKey + - populateDisplayData + - precisionForRelativeError + - processElement + - relativeErrorForPrecision + - withPrecision + - withSparsePrecision + - withSparseRepresentation +ApproximateQuantiles: + methods: + - addInput + - create + - createAccumulator + - decode + - empty + - encode + - equals + - extractOutput + - getAccumulatorCoder + - globally + - hasNext + - hashCode + - isEmpty + - mergeAccumulator + - next + - perKey + - populateDisplayData + - registerByteSizeObserver + - singleton + - sizedIterator + - toString + - verifyDeterministic + - withEpsilon + - withMaxInputSize + properties: + - DEFAULT_MAX_NUM_ELEMENTS +ApproximateUnique: + methods: + - add + - addInput + - createAccumulator + - equals + - expand + - extractOutput + - getAccumulatorCoder + - globally + - hashCode + - mergeAccumulators + - perKey + - populateDisplayData +ArrayAgg: + methods: + - addInput + - createAccumulator + - extractOutput + - mergeAccumulators +ArrowConversion: + methods: + - arrowSchemaFromInput + - close + - create + - get + - hasNext + - name + - next + - rowsFromRecordBatch + - rowsFromSerializedRecordBatch + - toBeamSchema + - visit +AsJsons: + methods: + - apply + - exceptionsInto + - exceptionsVia + - expand + - of + - withMapper +AssignEventTime: + methods: + - expand + - getEventTimeExtractor + - named + - of + - output + - using +AtomicCoder: + methods: + - equals + - getCoderArguments + - getComponents + - hashCode + - verifyDeterministic +AttributeValueCoder: + methods: + - decode + - encode + - of +AttributeValueCoderProviderRegistrar: + methods: + - getCoderProviders +Auction: + methods: + - decode + - encode + - equals + - hasAnnotation + - hashCode + - sizeInBytes + - structuralValue + - toString + - withAnnotation + - withoutAnnotation + properties: + - CODER + - category + - dateTime + - description + - expires + - extra + - id + - initialBid + - itemName + - reserve + - seller +AuctionBid: + methods: + - decode + - encode + - equals + - hashCode + - sizeInBytes + - structuralValue + - toString + properties: + - CODER + - auction + - bid +AuctionCount: + methods: + - decode + - encode + - equals + - hashCode + - sizeInBytes + - structuralValue + - toString + properties: + - CODER + - auction + - num +AuctionGenerator: + methods: + - lastBase0AuctionId + - nextAuction + - nextBase0AuctionId +AuctionOrBid: + methods: + - apply +AuctionPrice: + methods: + - decode + - encode + - equals + - hashCode + - sizeInBytes + - structuralValue + - toString + properties: + - CODER + - auction + - price +AutoValueSchema: + methods: + - fieldValueGetters + - fieldValueTypeInformations + - get + - schemaFor + - schemaTypeCreator + properties: + - INSTANCE +AutoValueUtils: + methods: + - appender + - getBaseAutoValueClass + - getBuilderCreator + - getConstructorCreator + - prepare +AvroCoder: + methods: + - check + - coderFor + - decode + - encode + - equals + - fromLong + - get + - getCoderProvider + - getConvertedType + - getEncodedTypeDescriptor + - getLogicalTypeName + - getSchema + - getType + - hashCode + - initialValue + - of + - toLong + - useReflectApi + - verifyDeterministic +AvroGenericCoder: + methods: + - of +AvroIO: + methods: + - apply + - constantDestinations + - expand + - flush + - from + - open + - parseAllGenericRecords + - parseFilesGenericRecords + - parseGenericRecords + - populateDisplayData + - read + - readAll + - readAllGenericRecords + - readFiles + - readFilesGenericRecords + - readGenericRecords + - sink + - sinkViaGenericRecords + - to + - toResource + - watchForNewFiles + - withBeamSchemas + - withCodec + - withCoder + - withDatumReaderFactory + - withDatumWriterFactory + - withEmptyMatchTreatment + - withFileExceptionHandler + - withFormatFunction + - withHintMatchesManyFiles + - withMatchConfiguration + - withMetadata + - withNoSpilling + - withNumShards + - withOutputFilenames + - withSchema + - withShardNameTemplate + - withSuffix + - withTempDirectory + - withUsesReshuffle + - withWindowedWrites + - withoutSharding + - write + - writeCustomType + - writeCustomTypeToGenericRecords + - writeGenericRecords +AvroPayloadSerializerProvider: + methods: + - getSerializer + - identifier +AvroRecordSchema: + methods: + - fieldValueGetters + - fieldValueTypeInformations + - schemaFor + - schemaTypeCreator +AvroSchemaIOProvider: + methods: + - buildReader + - buildWriter + - configurationSchema + - expand + - from + - identifier + - isBounded + - requiresDataSchema + - schema +AvroSink: + methods: + - createWriteOperation + - createWriter + - getDynamicDestinations + - write +AvroSource: + methods: + - close + - createForSubrangeOfFile + - from + - getCodec + - getCurrentBlock + - getCurrentBlockOffset + - getCurrentBlockSize + - getCurrentRecord + - getCurrentSource + - getFractionOfBlockConsumed + - getOutputCoder + - getSchemaString + - getSplitPointsRemaining + - getSyncMarker + - length + - read + - readNextBlock + - readNextRecord + - seek + - tell + - validate + - withDatumReaderFactory + - withEmptyMatchTreatment + - withMinBundleSize + - withParseFn + - withSchema +AvroTableProvider: + methods: + - getSchemaIOProvider + - getTableType +AvroUtils: + methods: + - apply + - convertAvroFieldStrict + - createGetterConversions + - createSetterConversions + - createTypeConversion + - equals + - fromAvroType + - fromBeamFieldType + - get + - getAvroBytesToRowFunction + - getCreator + - getFieldTypes + - getFromRowFunction + - getGenericRecordToRowFunction + - getGetters + - getRowToAvroBytesFunction + - getRowToGenericRecordFunction + - getSchema + - getSize + - getToRowFunction + - hashCode + - schemaCoder + - toAvroField + - toAvroSchema + - toAvroType + - toBeamField + - toBeamRowStrict + - toBeamSchema + - toBeamType + - toGenericRecord + - withSize + properties: + - nullable + - type +AvroWriteRequest: + methods: + - getElement + - getSchema +AwsBuilderFactory: {} +AwsCoders: + methods: + - awsResponseMetadata + - decode + - encode + - of + - responseMetadata + - sdkHttpMetadata + - sdkHttpMetadataWithoutHeaders + - sdkHttpResponse + - sdkHttpResponseWithoutHeaders + - verifyDeterministic +AwsModule: + methods: + - canCreateUsingDefault + - createUsingDefault + - deserialize + - deserializeWithType + - serialize + - serializeWithType + - setupModule + properties: + - CLIENT_EXECUTION_TIMEOUT + - CONNECTION_MAX_IDLE_TIME + - CONNECTION_TIMEOUT + - CONNECTION_TIME_TO_LIVE + - MAX_CONNECTIONS + - PROXY_HOST + - PROXY_PASSWORD + - PROXY_PORT + - PROXY_USERNAME + - REQUEST_TIMEOUT + - SOCKET_TIMEOUT +AwsPipelineOptionsRegistrar: + methods: + - getPipelineOptions +AwsSchemaProvider: + methods: + - apply + - create + - equals + - fieldValueGetters + - fieldValueTypeInformations + - fromRowFunction + - hashCode + - schemaFor + - schemaTypeCreator +AwsSchemaRegistrar: + methods: + - getSchemaProviders +AwsSerializableUtils: + methods: + - deserialize + - deserializeAwsCredentialsProvider + - serialize + - serializeAwsCredentialsProvider +AwsTypes: {} +AzureBlobStoreFileSystemRegistrar: + methods: + - fromOptions +AzureModule: + methods: + - deserialize + - deserializeWithType + - serialize + - serializeWithType +AzurePipelineOptionsRegistrar: + methods: + - getPipelineOptions +BackOffAdapter: + methods: + - nextBackOffMillis + - reset + - toGcpBackOff +BackOffUtils: + methods: + - next +BagUserState: + methods: + - append + - asyncClose + - clear + - get +BaseBeamTable: + methods: + - buildIOReader + - constructFilter + - getTableStatistics + - supportsProjects +BasicDynamoDBProvider: + methods: + - createDynamoDB +BasicDynamoDbClientProvider: + methods: + - equals + - getDynamoDbClient + - hashCode +BatchContextImpl: + methods: + - addProperties + - addTags + - createDataset + - datasetExists + - discardDataset + - getArguments + - getDataset + - getFailureCollector + - getInputFormatProvider + - getInputSchema + - getInputSchemas + - getLogicalStartTime + - getMetadata + - getMetrics + - getNamespace + - getOutputFormatProvider + - getOutputPortSchemas + - getOutputSchema + - getPipelineName + - getPluginProperties + - getServiceURL + - getStageName + - loadPluginClass + - newPluginInstance + - provide + - record + - releaseDataset + - removeMetadata + - removeProperties + - removeTags +BatchSinkContextImpl: + methods: + - addOutput + - isPreviewEnabled +BatchSourceContextImpl: + methods: + - getMaxPreviewRecords + - isPreviewEnabled + - setInput +BeamAccumulatorProvider: + methods: + - add + - create + - get + - getCounter + - getFactory + - getHistogram + - getName + - getNamespace + - getTimer + - increment +BeamAggregateProjectMergeRule: + methods: + - onMatch + properties: + - INSTANCE +BeamAggregationRel: + methods: + - beamComputeSelfCost + - buildPTransform + - copy + - estimateNodeStats + - expand + - explainTerms + - processElement +BeamAggregationRule: + methods: + - onMatch + properties: + - INSTANCE +BeamBasicAggregationRule: + methods: + - onMatch + properties: + - INSTANCE +BeamBigQuerySqlDialect: + methods: + - quoteIdentifier + - unparseCall + - unparseDateTimeLiteral + - unparseSqlIntervalLiteral + properties: + - DEFAULT + - DEFAULT_CONTEXT + - DOUBLE_NAN_WRAPPER + - DOUBLE_NEGATIVE_INF_WRAPPER + - DOUBLE_POSITIVE_INF_WRAPPER + - IN_ARRAY_OPERATOR + - NUMERIC_LITERAL_WRAPPER +BeamBuiltinAggregations: + methods: + - addInput + - apply + - create + - createAccumulator + - createBitXOr + - extractOutput + - getAccumulatorCoder + - identity + - mergeAccumulators + - toBigDecimal + properties: + - BUILTIN_AGGREGATOR_FACTORIES +BeamBuiltinAnalyticFunctions: + methods: + - addInput + - create + - createAccumulator + - extractOutput + - mergeAccumulators + - navigationFirstValue + - navigationLastValue + - numberingDenseRank + - numberingPercentRank + - numberingRank + - numberingRowNumber + properties: + - BUILTIN_ANALYTIC_FACTORIES +BeamBuiltinFunctionProvider: + methods: + - getBuiltinMethods +BeamBuiltinMethods: + properties: + - CHAR_LENGTH_METHOD + - CONCAT_METHOD + - DATE_METHOD + - ENDS_WITH_METHOD + - LIKE_METHOD + - LTRIM_METHOD + - REPLACE_METHOD + - REVERSE_METHOD + - RTRIM_METHOD + - STARTS_WITH_METHOD + - SUBSTR_METHOD + - TIMESTAMP_METHOD + - TRIM_METHOD +BeamCalcMergeRule: + methods: + - onMatch + properties: + - INSTANCE +BeamCalcRel: + methods: + - buildPTransform + - copy + - entrySet + - expand + - field + - get + - getQueryProvider + - getRootSchema + - getTypeFactory + - processElement + - setup + - size +BeamCalcRule: + methods: + - convert + - matches + properties: + - INSTANCE +BeamCalcSplittingRule: + methods: + - matches + - onMatch +BeamCalciteSchema: + methods: + - getExpression + - getFunctionNames + - getFunctions + - getPipelineOptions + - getSubSchema + - getSubSchemaNames + - getTable + - getTableNames + - getTableProvider + - getType + - getTypeNames + - isMutable + - removeAllPipelineOptions + - removePipelineOption + - setPipelineOption + - snapshot +BeamCalciteTable: + methods: + - asQueryable + - getModifiableCollection + - getRowType + - getStatistic + - of + - toModificationRel + - toRel +BeamCoGBKJoinRel: + methods: + - buildPTransform + - copy + - expand +BeamCoGBKJoinRule: + methods: + - matches + - onMatch + properties: + - INSTANCE +BeamCodegenUtils: + methods: + - toStringTimestamp + - toStringUTF8 +BeamCostModel: + methods: + - convertRelOptCost + - divideBy + - equals + - getCpu + - getCpuRate + - getIo + - getRows + - hashCode + - isEqWithEpsilon + - isInfinite + - isLe + - isLt + - makeCost + - makeHugeCost + - makeInfiniteCost + - makeTinyCost + - makeZeroCost + - minus + - multiplyBy + - plus + - toString + properties: + - FACTORY +BeamEnumerableConverter: + methods: + - computeSelfCost + - copy + - createPipelineOptions + - implement + - processElement + - startBundle + - toEnumerable + - toRowList + - visitValue +BeamEnumerableConverterRule: + methods: + - convert + properties: + - INSTANCE +BeamFnControlClient: + methods: + - delegateOnInstructionRequestType + - onCompleted + - onError + - onNext + - sendInstructionResponse + - waitForTermination +BeamFnDataGrpcClient: + methods: + - createOutboundAggregator + - registerReceiver + - unregisterReceiver +BeamFnDataGrpcMultiplexer: + methods: + - close + - getInboundObserver + - getOutboundObserver + - onCompleted + - onError + - onNext + - registerConsumer + - toString +BeamFnDataGrpcMultiplexer2: + methods: + - close + - getInboundObserver + - getOutboundObserver + - onCompleted + - onError + - onNext + - registerConsumer + - toString + - unregisterConsumer +BeamFnDataInboundObserver: + methods: + - accept + - awaitCompletion + - cancel + - complete + - fail + - forConsumer + - isDone + - runWhenComplete +BeamFnDataInboundObserver2: + methods: + - accept + - awaitCompletion + - close + - flush + - forConsumers + - getUnfinishedEndpoints + - multiplexElements + - reset + properties: + - INSTANCE +BeamFnDataOutboundAggregator: + methods: + - accept + - bufferedSize + - discard + - equals + - getByteCount + - getElementCount + - hashCode + - registerOutputDataLocation + - registerOutputTimersLocation + - resetStats + - sendElements + - sendOrCollectBufferedDataAndFinishOutboundStreams + - start + - toByteStringAndResetBuffer + - toString + properties: + - DATA_BUFFER_SIZE_LIMIT + - DATA_BUFFER_TIME_LIMIT_MS + - DEFAULT_BUFFER_LIMIT_BYTES + - DEFAULT_BUFFER_LIMIT_TIME_MS +BeamFnDataOutboundObserver: + methods: + - accept + - close + - flush +BeamFnDataReadRunner: + methods: + - blockTillReadFinishes + - createRunnerForPTransform + - forwardElementToConsumer + - getCache + - getCurrentInstructionId + - getPTransformRunnerFactories + - getStateClient + - reset + - trySplit + - updateFinalMonitoringData + - updateIntermediateMonitoringData +BeamFnDataWriteRunner: + methods: + - createRunnerForPTransform + - getCache + - getCurrentInstructionId + - getPTransformRunnerFactories + - getStateClient +BeamFnLoggingClient: + methods: + - beforeStart + - close + - flush + - onCompleted + - onError + - onNext + - publish + - run + - setProcessBundleHandler + - toString +BeamFnLoggingClientBenchmark: + methods: + - logging + - onCompleted + - onError + - onNext + - tearDown + - testLogging + - testLoggingWithAllOptionalParameters + - testSkippedLogging + properties: + - loggingClient + - loggingService + - server +BeamFnLoggingMDC: + methods: + - getInstructionId + - setInstructionId +BeamFnStateGrpcClientCache: + methods: + - forApiServiceDescriptor + - handle + - onCompleted + - onError + - onNext +BeamFnStatusClient: + methods: + - close + - equals + - getInstruction + - getTimeSinceTransition + - getTrackedThreadName + - hashCode + - onCompleted + - onError + - onNext +BeamIOPushDownRule: + methods: + - onMatch + properties: + - INSTANCE +BeamIOSinkRel: + methods: + - beamComputeSelfCost + - buildPTransform + - copy + - estimateNodeStats + - expand + - flattenRel + - getPipelineOptions + - register +BeamIOSinkRule: + methods: + - convert + properties: + - INSTANCE +BeamIOSourceRel: + methods: + - beamComputeSelfCost + - buildPTransform + - computeSelfCost + - createPushDownRel + - estimateNodeStats + - estimateRowCount + - expand + - getBeamSqlTable + - getPipelineOptions + - isBounded + properties: + - CONSTANT_WINDOW_SIZE +BeamIntersectRel: + methods: + - beamComputeSelfCost + - buildPTransform + - copy + - estimateNodeStats +BeamIntersectRule: + methods: + - convert + properties: + - INSTANCE +BeamJavaTypeFactory: + methods: + - getJavaClass + properties: + - INSTANCE +BeamJavaUdfCalcRule: + properties: + - INSTANCE +BeamJoinAssociateRule: + methods: + - onMatch + properties: + - INSTANCE +BeamJoinPushThroughJoinRule: + methods: + - onMatch + properties: + - LEFT + - RIGHT +BeamJoinRel: + methods: + - beamComputeSelfCost + - containsSeekableInput + - estimateNodeStats + - getBoundednessOfRelNode + - getPCollectionInputs + - isJoinLegal + - seekable +BeamJoinTransforms: + methods: + - expand + - getJoinColumns + - processElement + - setup + - teardown +BeamKafkaCSVTable: + methods: + - expand + - processElement +BeamKafkaTable: + methods: + - buildIOReader + - buildIOWriter + - getBootstrapServers + - getTableStatistics + - getTopics + - isBounded + - updateConsumerProperties +BeamMatchRel: + methods: + - beamComputeSelfCost + - buildPTransform + - copy + - estimateNodeStats + - expand + - processElement +BeamMatchRule: + methods: + - convert + properties: + - INSTANCE +BeamMinusRel: + methods: + - beamComputeSelfCost + - buildPTransform + - copy + - estimateNodeStats +BeamMinusRule: + methods: + - convert + properties: + - INSTANCE +BeamPCollectionTable: + methods: + - buildIOReader + - buildIOWriter + - isBounded +BeamPushDownIOSourceRel: + methods: + - beamComputeSelfCost + - buildPTransform + - expand + - explainTerms +BeamRelDataTypeSystem: + methods: + - getDefaultPrecision + - getMaxNumericPrecision + - getMaxNumericScale + - getMaxPrecision + - shouldConvertRaggedUnionTypesToVarying + properties: + - INSTANCE +BeamRelMetadataQuery: + methods: + - getNodeStats + - instance +BeamRowToBigtableMutation: + methods: + - apply + - expand +BeamRowToStorageApiProto: + methods: + - messageFromBeamRow +BeamRuleSets: + methods: + - getRuleSets +BeamSetOperatorRelBase: + methods: + - expand +BeamSetOperatorsTransforms: + methods: + - apply + - processElement +BeamSideInputJoinRel: + methods: + - buildPTransform + - copy + - expand + - sideInputJoin +BeamSideInputJoinRule: + methods: + - matches + - onMatch + properties: + - INSTANCE +BeamSideInputLookupJoinRel: + methods: + - buildPTransform + - copy + - expand +BeamSideInputLookupJoinRule: + methods: + - convert + - matches + properties: + - INSTANCE +BeamSortRel: + methods: + - beamComputeSelfCost + - buildPTransform + - compare + - copy + - estimateNodeStats + - expand + - getCount + - isLimitOnly + - processElement +BeamSortRule: + methods: + - convert + properties: + - INSTANCE +BeamSqlCli: + methods: + - execute + - explainQuery + - getMetaStore + - metaStore +BeamSqlDataCatalogExample: + methods: + - main +BeamSqlEnv: + methods: + - addSchema + - addUdaf + - addUdf + - autoLoadUserDefinedFunctions + - build + - builder + - executeDdl + - explain + - getContext + - getPipelineOptions + - inMemory + - isDdl + - parseQuery + - readOnly + - setCurrentSchema + - setPipelineOptions + - setQueryPlannerClassName + - setRuleSets + - withTableProvider +BeamSqlEnvRunner: + methods: + - runUsingBeamSqlEnv +BeamSqlLine: + methods: + - main +BeamSqlOutputToConsoleFn: + methods: + - processElement +BeamSqlParser: + methods: + - getDdlExecutor + - getParser + properties: + - DDL_EXECUTOR + - FACTORY +BeamSqlPipelineOptionsRegistrar: + methods: + - getPipelineOptions +BeamSqlRelUtils: + methods: + - getBeamRelInput + - getErrorRowSchema + - getInput + - getNodeStats + - toPCollection + properties: + - ERROR + - ROW +BeamSqlUnparseContext: + methods: + - clone + - equals + - getNullParams + - hashCode + - implementor + - toSql + - unparse +BeamTableFunctionScanRel: + methods: + - beamComputeSelfCost + - buildPTransform + - copy + - estimateNodeStats + - expand + - processElement +BeamTableFunctionScanRule: + methods: + - convert + properties: + - INSTANCE +BeamTableStatistics: + methods: + - createBoundedTableStatistics + - createUnboundedTableStatistics + - getCollations + - getDistribution + - getKeys + - getRate + - getReferentialConstraints + - getRowCount + - isKey + - isUnknown + properties: + - BOUNDED_UNKNOWN + - UNBOUNDED_UNKNOWN +BeamTableUtils: + methods: + - autoCastField + - beamRow2CsvLine + - csvLines2BeamRows +BeamTpcds: + methods: + - main +BeamUncollectRel: + methods: + - beamComputeSelfCost + - buildPTransform + - copy + - estimateNodeStats + - expand + - process +BeamUncollectRule: + methods: + - convert + properties: + - INSTANCE +BeamUnionRel: + methods: + - beamComputeSelfCost + - buildPTransform + - copy + - estimateNodeStats +BeamUnionRule: + methods: + - convert + properties: + - INSTANCE +BeamUnnestRel: + methods: + - beamComputeSelfCost + - buildPTransform + - copy + - estimateNodeStats + - expand + - explainTerms + - process +BeamUnnestRule: + methods: + - onMatch + properties: + - INSTANCE +BeamValuesRel: + methods: + - beamComputeSelfCost + - buildPTransform + - estimateNodeStats + - expand + - getPipelineOptions +BeamValuesRule: + methods: + - convert + properties: + - INSTANCE +BeamWindowRel: + methods: + - beamComputeSelfCost + - buildPTransform + - copy + - estimateNodeStats + - expand + - processElement +BeamWindowRule: + methods: + - convert + properties: + - INSTANCE +BeamZetaSqlCalcMergeRule: + methods: + - onMatch + properties: + - INSTANCE +BeamZetaSqlCalcRel: + methods: + - buildPTransform + - copy + - expand + - finishBundle + - getAllowedTimestampSkew + - output + - outputWithTimestamp + - processElement + - setup + - startBundle + - teardown +BeamZetaSqlCalcRule: + properties: + - INSTANCE +BeamZetaSqlCalcSplittingRule: + properties: + - INSTANCE +BeamZetaSqlCatalog: + properties: + - PRE_DEFINED_WINDOW_FUNCTIONS + - USER_DEFINED_JAVA_AGGREGATE_FUNCTIONS + - USER_DEFINED_JAVA_SCALAR_FUNCTIONS + - USER_DEFINED_SQL_FUNCTIONS + - ZETASQL_FUNCTION_GROUP_NAME +BeamZetaSqlUncollectRel: + methods: + - beamComputeSelfCost + - buildPTransform + - copy + - estimateNodeStats + - expand + - process +BeamZetaSqlUncollectRule: + methods: + - convert + properties: + - INSTANCE +BeamZetaSqlUnnestRel: + methods: + - beamComputeSelfCost + - buildPTransform + - copy + - estimateNodeStats + - expand + - explainTerms + - process +BeamZetaSqlUnnestRule: + methods: + - onMatch + properties: + - INSTANCE +Bid: + methods: + - decode + - encode + - equals + - hasAnnotation + - hashCode + - sizeInBytes + - structuralValue + - toString + - verifyDeterministic + - withAnnotation + - withoutAnnotation + properties: + - ASCENDING_TIME_THEN_PRICE + - CODER + - PRICE_THEN_DESCENDING_TIME + - auction + - bidder + - dateTime + - extra + - price +BidGenerator: + methods: + - nextBid +BidsPerSession: + methods: + - decode + - encode + - equals + - hashCode + - sizeInBytes + - structuralValue + - toString + - verifyDeterministic + properties: + - CODER +BigDecimalCoder: + methods: + - consistentWithEquals + - decode + - encode + - isRegisterByteSizeObserverCheap + - of + - verifyDeterministic +BigDecimalConverter: + methods: + - forSqlType +BigEndianIntegerCoder: + methods: + - consistentWithEquals + - decode + - encode + - getEncodedTypeDescriptor + - isRegisterByteSizeObserverCheap + - of + - verifyDeterministic +BigEndianLongCoder: + methods: + - consistentWithEquals + - decode + - encode + - getEncodedTypeDescriptor + - isRegisterByteSizeObserverCheap + - of + - verifyDeterministic +BigEndianShortCoder: + methods: + - consistentWithEquals + - decode + - encode + - getEncodedTypeDescriptor + - isRegisterByteSizeObserverCheap + - of + - verifyDeterministic +BigIntegerCoder: + methods: + - consistentWithEquals + - decode + - encode + - isRegisterByteSizeObserverCheap + - of + - verifyDeterministic +BigQueryClient: + methods: + - create + - createTableIfNotExists + - insertAll + - insertRow +BigQueryCoderProviderRegistrar: + methods: + - getCoderProviders +BigQueryDirectReadSchemaTransformProvider: + methods: + - build + - buildTransform + - builder + - expand + - getBigQueryServices + - getQuery + - getRowRestriction + - getSelectedFields + - getTableSpec + - identifier + - inputCollectionNames + - outputCollectionNames + - setBigQueryServices + - setQuery + - setRowRestriction + - setSelectedFields + - setTableSpec + - validate +BigQueryDlqProvider: + methods: + - expand + - identifier + - newDlqTransform +BigQueryFilter: + methods: + - getNotSupported + - getSupported + - numSupported + - toString +BigQueryHelpers: + methods: + - apply + - fromJsonString + - getNumRows + - parseTableSpec + - parseTableUrn + - stripPartitionDecorator + - toJsonString + - toString + - toTableSpec + properties: + - jobId + - shouldRetry +BigQueryIO: + methods: + - actuateProjectionPushdown + - apply + - expand + - from + - fromQuery + - getTable + - getTableProvider + - ignoreInsertIds + - ignoreUnknownValues + - optimizedWrites + - populateDisplayData + - processElement + - read + - readTableRows + - readTableRowsWithSchema + - readWithDatumReader + - setSchema + - skipInvalidRows + - supportsProjectionPushdown + - to + - useAvroLogicalTypes + - useBeamSchema + - usingStandardSql + - validate + - withAutoSchemaUpdate + - withAutoSharding + - withAvroFormatFunction + - withAvroSchemaFactory + - withAvroWriter + - withBeamRowConverters + - withClustering + - withCoder + - withCreateDisposition + - withCustomGcsTempLocation + - withDeterministicRecordIdFn + - withExtendedErrorInfo + - withFailedInsertRetryPolicy + - withFormat + - withFormatFunction + - withFormatRecordOnFailureFunction + - withJsonSchema + - withJsonTimePartitioning + - withKmsKey + - withLoadJobProjectId + - withMaxBytesPerPartition + - withMaxFilesPerBundle + - withMethod + - withNumFileShards + - withNumStorageWriteApiStreams + - withQueryLocation + - withQueryPriority + - withQueryTempDataset + - withRowRestriction + - withSchema + - withSchemaFromView + - withSchemaUpdateOptions + - withSelectedFields + - withSuccessfulInsertsPropagation + - withTableDescription + - withTemplateCompatibility + - withTestServices + - withTimePartitioning + - withTriggeringFrequency + - withWriteDisposition + - withWriteTempDataset + - withoutResultFlattening + - withoutValidation + - write + - writeTableRows + properties: + - BIGQUERY_JOB_TEMPLATE + - INSTANCE +BigQueryInsertError: + methods: + - equals + - getError + - getRow + - getTable + - hashCode +BigQueryInsertErrorCoder: + methods: + - decode + - encode + - getEncodedTypeDescriptor + - of + - verifyDeterministic +BigQuerySchemaIOProvider: + methods: + - buildReader + - buildWriter + - configurationSchema + - expand + - from + - identifier + - isBounded + - requiresDataSchema + - schema +BigQuerySchemaRetrievalException: {} +BigQuerySchemaTransformReadConfiguration: + methods: + - build + - builder + - getQuery + - getQueryLocation + - getTableSpec + - getUseStandardSql + - setQuery + - setQueryLocation + - setTableSpec + - setUseStandardSql +BigQuerySchemaTransformReadProvider: + methods: + - buildTransform + - expand + - identifier + - inputCollectionNames + - outputCollectionNames +BigQuerySchemaTransformWriteConfiguration: + methods: + - build + - builder + - getCreateDisposition + - getTableSpec + - getWriteDisposition + - setCreateDisposition + - setTableSpec + - setWriteDisposition +BigQuerySchemaTransformWriteProvider: + methods: + - buildTransform + - expand + - identifier + - inputCollectionNames + - outputCollectionNames + - validate +BigQueryStorageApiInsertError: + methods: + - getErrorMessage + - getRow + - toString +BigQueryStorageApiInsertErrorCoder: + methods: + - decode + - encode + - of +BigQueryStorageTableSource: + methods: + - create + - getEstimatedSizeBytes + - populateDisplayData +BigQueryTableProvider: + methods: + - buildBeamSqlTable + - getTableType +BigQueryUtils: + methods: + - apply + - build + - builder + - convertAvroFormat + - convertGenericRecordToTableRow + - fromTableSchema + - getInferMaps + - getTruncateTimestamps + - hashSchemaDescriptorDeterministic + - readCallMetric + - setInferMaps + - setTruncateTimestamps + - tableRowFromBeamRow + - tableRowToBeamRow + - toBeamRow + - toGenericAvroSchema + - toTableReference + - toTableRow + - toTableSchema + - writeCallMetric +BigqueryClient: + methods: + - createNewDataset + - createNewTable + - deleteDataset + - deleteTable + - getClient + - getNewBigqueryClient + - getTableResource + - insertDataToTable + - queryUnflattened + - queryWithRetries + - queryWithRetriesUsingStandardSql +BigqueryMatcher: + methods: + - create + - createQuery + - createQueryUsingStandardSql + - describeMismatchSafely + - describeTo + - getApplicationName + - getProjectId + - getQuery + - getUsingStandardSql + - queryResultHasChecksum +BigtableIO: + methods: + - advance + - close + - createReader + - expand + - finishBundle + - getBigtableOptions + - getCurrent + - getCurrentSource + - getEstimatedSizeBytes + - getFractionConsumed + - getMaxBufferElementCount + - getOutputCoder + - getRanges + - getRowFilter + - getSplitPointsConsumed + - getTableId + - populateDisplayData + - processElement + - read + - split + - splitAtFraction + - start + - startBundle + - tearDown + - toString + - validate + - withBigtableOptions + - withBigtableOptionsConfigurator + - withEmulator + - withInstanceId + - withKeyRange + - withKeyRanges + - withMaxBufferElementCount + - withProjectId + - withRowFilter + - withTableId + - withWriteResults + - withoutValidation + - write +BigtableRowToBeamRow: + methods: + - apply + - expand +BigtableRowToBeamRowFlat: + methods: + - apply + - expand +BigtableTable: + methods: + - buildIOReader + - buildIOWriter + - constructFilter + - isBounded +BigtableTableProvider: + methods: + - buildBeamSqlTable + - getTableType +BigtableUtils: + methods: + - booleanToByteArray + - byteString + - byteStringUtf8 + - doubleToByteArray + - floatToByteArray + - longToByteArray +BigtableWriteResult: + methods: + - create + - getRowsWritten +BigtableWriteResultCoder: + methods: + - decode + - encode + - getCoderProvider + - of +BitSetCoder: + methods: + - consistentWithEquals + - decode + - encode + - of + - verifyDeterministic +BlockBasedSource: + methods: + - getCurrent + - getCurrentBlock + - getCurrentBlockOffset + - getCurrentBlockSize + - getCurrentRecord + - getFractionConsumed + - getFractionOfBlockConsumed + - readNextBlock + - readNextRecord +BlockingCommitterImpl: + methods: + - close + - commitOffset +BooleanCoder: + methods: + - consistentWithEquals + - decode + - encode + - isRegisterByteSizeObserverCheap + - of +BoundedEventSource: + methods: + - advance + - close + - createReader + - getCurrent + - getCurrentSource + - getCurrentTimestamp + - getDefaultOutputCoder + - getEstimatedSizeBytes + - getFractionConsumed + - split + - splitAtFraction + - start + - validate +BoundedReadFromUnboundedSource: + methods: + - expand + - getKindString + - populateDisplayData + - process + - withMaxNumRecords + - withMaxReadTime +BoundedSideInputJoin: + methods: + - expand + - needsSideInput + - processElement +BoundedSideInputJoinModel: + methods: + - simulator +BoundedSource: + methods: + - createReader + - getCurrentSource + - getCurrentTimestamp + - getEstimatedSizeBytes + - getFractionConsumed + - getSplitPointsConsumed + - getSplitPointsRemaining + - split + - splitAtFraction + properties: + - SPLIT_POINTS_UNKNOWN +BoundedWindow: + methods: + - formatTimestamp + - maxTimestamp + - validateTimestampBounds + properties: + - TIMESTAMP_MAX_VALUE + - TIMESTAMP_MIN_VALUE +BroadcastHashJoinTranslator: + methods: + - processElement +BucketingFunction: + methods: + - add + - get + - isSignificant + - remove +BufferedElementCountingOutputStream: + methods: + - close + - finish + - flush + - markElementStart + - write + properties: + - DEFAULT_BUFFER_SIZE +BufferedExternalSorter: + methods: + - add + - create + - getExternalSorterType + - getMemoryMB + - getTempLocation + - options + - sort + - withExternalSorterType + - withMemoryMB + - withTempLocation +BufferingStreamObserver: + methods: + - getBufferSize + - getCause + - onCompleted + - onError + - onNext +Builders: {} +BuiltinHashFunctions: + methods: + - md5Bytes + - md5String + - sha1Bytes + - sha1String + - sha256Bytes + - sha256String + - sha512Bytes + - sha512String +BuiltinStringFunctions: + methods: + - endsWith + - fromHex + - lengthBytes + - lengthString + - lpad + - reverseBytes + - reverseString + - rpad + - startsWith + - toHex +BuiltinTrigonometricFunctions: + methods: + - cosh + - sinh + - tanh +BundleSplitter: {} +ByteArrayCoder: + methods: + - decode + - encode + - encodeAndOwn + - getEncodedTypeDescriptor + - isRegisterByteSizeObserverCheap + - of + - structuralValue + - verifyDeterministic +ByteBuddyUtils: + methods: + - appender + - apply + - clear + - containsKey + - containsValue + - convert + - createGetterConversions + - createSetterConversions + - createTypeConversion + - entrySet + - equals + - get + - getTransformingMap + - hashCode + - isEmpty + - isValid + - keySet + - prepare + - put + - putAll + - remove + - size + - subclassGetterInterface + - subclassSetterInterface + - toString + - transformContainer + - values +ByteCoder: + methods: + - consistentWithEquals + - decode + - encode + - getEncodedTypeDescriptor + - isRegisterByteSizeObserverCheap + - of + - verifyDeterministic +ByteKey: + methods: + - compareTo + - copyFrom + - equals + - getBytes + - getValue + - hashCode + - isEmpty + - of + - toString + properties: + - EMPTY +ByteKeyRange: + methods: + - containsKey + - equals + - estimateFractionForKey + - getEndKey + - getStartKey + - hashCode + - interpolateKey + - newTracker + - of + - overlaps + - split + - toString + - withEndKey + - withStartKey + properties: + - ALL_KEYS +ByteKeyRangeTracker: + methods: + - checkDone + - currentRestriction + - getFractionConsumed + - getProgress + - getRange + - getSplitPointsConsumed + - getStartPosition + - getStopPosition + - isBounded + - isDone + - markDone + - of + - toString + - tryClaim + - tryReturnRecordAt + - trySplit + - trySplitAtPosition +ByteMonitor: + methods: + - processElement +ByteStringCoder: + methods: + - consistentWithEquals + - decode + - encode + - getEncodedTypeDescriptor + - isRegisterByteSizeObserverCheap + - of + - verifyDeterministic +ByteStringOutputStream: + methods: + - size + - toByteString + - toByteStringAndReset + - toString + - write +ByteStringOutputStreamBenchmark: + methods: + - setup + - tearDown + - testCopyArray + - testNewArray + - testProtobufByteStringOutputStreamFewLargeWrites + - testProtobufByteStringOutputStreamFewMixedWritesWithReuse + - testProtobufByteStringOutputStreamFewMixedWritesWithoutReuse + - testProtobufByteStringOutputStreamFewSmallWrites + - testProtobufByteStringOutputStreamFewTinyWrites + - testProtobufByteStringOutputStreamManyLargeWrites + - testProtobufByteStringOutputStreamManyMixedWritesWithReuse + - testProtobufByteStringOutputStreamManyMixedWritesWithoutReuse + - testProtobufByteStringOutputStreamManySmallWrites + - testProtobufByteStringOutputStreamManyTinyWrites + - testSdkCoreByteStringOutputStreamFewLargeWrites + - testSdkCoreByteStringOutputStreamFewMixedWritesWithReuse + - testSdkCoreByteStringOutputStreamFewMixedWritesWithoutReuse + - testSdkCoreByteStringOutputStreamFewSmallWrites + - testSdkCoreByteStringOutputStreamFewTinyWrites + - testSdkCoreByteStringOutputStreamManyLargeWrites + - testSdkCoreByteStringOutputStreamManyMixedWritesWithReuse + - testSdkCoreByteStringOutputStreamManyMixedWritesWithoutReuse + - testSdkCoreByteStringOutputStreamManySmallWrites + - testSdkCoreByteStringOutputStreamManyTinyWrites + properties: + - src +BytesThroughputEstimator: + methods: + - addBytes + - get + - getBytes + - getFrom + - getSeconds + - getTimestamp + - update +CEPCall: + methods: + - getOperands + - getOperator + - of + - toString +CEPFieldRef: + methods: + - getAlpha + - getIndex + - of + - toString +CEPLiteral: + methods: + - compareTo + - equals + - getBoolean + - getByte + - getDateTime + - getDecimal + - getDouble + - getFloat + - getInt16 + - getInt32 + - getInt64 + - getString + - getTypeName + - hashCode + - of +CEPMeasure: + methods: + - getField + - getName + - getOperation + - getType +CEPOperation: + methods: + - of +CEPOperator: + methods: + - getCepKind + - of + - toString +CEPPattern: + methods: + - getPatternCondition + - getPatternVar + - getQuantifier + - of + - toString +CEPUtils: + methods: + - getCEPFieldRefFromParKeys + - getCEPPatternFromPattern + - getFieldRef + - getFieldType + - getRegexFromPattern + - makeOrderKeysFromCollation +Caches: + methods: + - clear + - computeIfAbsent + - describeStats + - equals + - eternal + - fromOptions + - getCache + - getWeight + - hashCode + - noop + - onRemoval + - peek + - put + - remove + - subCache + - toString + - weigh + properties: + - REFERENCE_SIZE + - ROOT +CachingFactory: + methods: + - create + - equals + - hashCode +CalcRelSplitter: + methods: + - canImplement + - execute + - getMaxUsingLevelOrdinals + - maxInputFor + - toString + - visitCall + - visitDynamicParam + - visitFieldAccess + - visitInputRef + - visitLiteral + - visitLocalRef +CalciteConnectionWrapper: + methods: + - abort + - clearWarnings + - close + - commit + - config + - createArrayOf + - createBlob + - createClob + - createNClob + - createPrepareContext + - createQuery + - createSQLXML + - createStatement + - createStruct + - execute + - executeQuery + - getAutoCommit + - getCatalog + - getClientInfo + - getHoldability + - getMetaData + - getNetworkTimeout + - getProperties + - getRootSchema + - getSchema + - getTransactionIsolation + - getTypeFactory + - getTypeMap + - getWarnings + - isClosed + - isReadOnly + - isValid + - isWrapperFor + - nativeSQL + - prepareCall + - prepareStatement + - releaseSavepoint + - rollback + - setAutoCommit + - setCatalog + - setClientInfo + - setHoldability + - setNetworkTimeout + - setReadOnly + - setSavepoint + - setSchema + - setTransactionIsolation + - setTypeMap + - unwrap +CalciteFactoryWrapper: + methods: + - newConnection + - newDatabaseMetaData + - newPreparedStatement + - newResultSet + - newResultSetMetaData + - newStatement +CalciteQueryPlanner: + methods: + - convertToBeamRel + - createPlanner + - defaultConfig + - getDef + - getNonCumulativeCost + - parse + properties: + - FACTORY + - SOURCE +CalciteUtils: + methods: + - isDateTimeType + - isStringType + - sqlTypeWithAutoCast + - toCalciteRowType + - toField + - toFieldType + - toRelDataType + - toSchema + - toSqlTypeName + properties: + - BIG_INT + - BOOLEAN + - CHAR + - DATE + - DECIMAL + - DOUBLE + - FLOAT + - IDENTIFIER + - INTEGER + - NULLABLE_DATE + - NULLABLE_TIME + - NULLABLE_TIMESTAMP + - NULLABLE_TIMESTAMP_WITH_LOCAL_TZ + - SMALL_INT + - TIME + - TIMESTAMP + - TIMESTAMP_WITH_LOCAL_TZ + - TIME_WITH_LOCAL_TZ + - TINY_INT + - VARBINARY + - VARCHAR +CalendarWindows: + methods: + - assignWindow + - beginningOnDay + - days + - getDayOfMonth + - getMonthOfYear + - getNumber + - getStartDate + - getTimeZone + - isCompatible + - months + - populateDisplayData + - verifyCompatibility + - weeks + - windowCoder + - withStartingDay + - withStartingMonth + - withStartingYear + - withTimeZone + - years +CancellableQueue: + methods: + - cancel + - put + - reset + - take +CannotProvideCoderException: + methods: + - getReason + - getRootCause +CassandraIO: + methods: + - build + - delete + - expand + - finishBundle + - process + - processElement + - read + - readAll + - setup + - teardown + - validate + - withCoder + - withConnectTimeout + - withConsistencyLevel + - withEntity + - withHosts + - withKeyspace + - withLocalDc + - withMapperFactoryFn + - withMinNumberOfSplits + - withPassword + - withPort + - withQuery + - withReadTimeout + - withRingRanges + - withTable + - withUsername + - write +Cast: + methods: + - accept + - accumulate + - apply + - castNumber + - castRow + - castValue + - create + - expand + - isDecimal + - isIntegral + - message + - narrowing + - of + - outputSchema + - path + - process + - toString + - validator + - verifyCompatibility + - widening +CastFunctionImpl: + methods: + - getImplementor + - getParameters + - implement +CategoryPrice: + methods: + - decode + - encode + - equals + - hashCode + - sizeInBytes + - structuralValue + - toString + - verifyDeterministic + properties: + - CODER + - category + - isLast + - price +CdapIO: + methods: + - expand + - read + - withCdapPlugin + - withCdapPluginClass + - withKeyClass + - withLocksDirPath + - withPluginConfig + - withValueClass + - write +ChangeStreamDao: + methods: + - changeStreamQuery +ChangeStreamMetrics: + methods: + - decActivePartitionReadCounter + - incActivePartitionReadCounter + - incDataRecordCounter + - incHeartbeatRecordCount + - incPartitionRecordCount + - incPartitionRecordMergeCount + - incPartitionRecordSplitCount + - incQueryCounter + - updateDataRecordCommittedToEmitted + - updatePartitionCreatedToScheduled + - updatePartitionScheduledToRunning + properties: + - ACTIVE_PARTITION_READ_COUNT + - DATA_RECORD_COMMITTED_TO_EMITTED_0MS_TO_1000MS_COUNT + - DATA_RECORD_COMMITTED_TO_EMITTED_1000MS_TO_3000MS_COUNT + - DATA_RECORD_COMMITTED_TO_EMITTED_3000MS_TO_INF_COUNT + - DATA_RECORD_COUNT + - HEARTBEAT_RECORD_COUNT + - PARTITION_CREATED_TO_SCHEDULED_MS + - PARTITION_RECORD_COUNT + - PARTITION_RECORD_MERGE_COUNT + - PARTITION_RECORD_SPLIT_COUNT + - PARTITION_SCHEDULED_TO_RUNNING_MS + - QUERY_COUNT +ChangeStreamRecordMapper: + methods: + - toChangeStreamRecords +ChangeStreamRecordMetadata: + methods: + - build + - equals + - getNumberOfRecordsRead + - getPartitionCreatedAt + - getPartitionEndTimestamp + - getPartitionRunningAt + - getPartitionScheduledAt + - getPartitionStartTimestamp + - getPartitionToken + - getQueryStartedAt + - getRecordReadAt + - getRecordStreamEndedAt + - getRecordStreamStartedAt + - getRecordTimestamp + - getTotalStreamTimeMillis + - hashCode + - newBuilder + - toString + - withNumberOfRecordsRead + - withPartitionCreatedAt + - withPartitionEndTimestamp + - withPartitionRunningAt + - withPartitionScheduledAt + - withPartitionStartTimestamp + - withPartitionToken + - withQueryStartedAt + - withRecordReadAt + - withRecordStreamEndedAt + - withRecordStreamStartedAt + - withRecordTimestamp + - withTotalStreamTimeMillis +ChangeStreamResultSet: + methods: + - close + - getCurrentRowAsStruct + - getMetadata + - getPgJsonb + - next +ChangeStreamResultSetMetadata: + methods: + - getNumberOfRecordsRead + - getQueryStartedAt + - getRecordReadAt + - getRecordStreamEndedAt + - getRecordStreamStartedAt + - getTotalStreamDuration +ChangeStreamsConstants: + properties: + - DEFAULT_CHANGE_STREAM_NAME + - DEFAULT_INCLUSIVE_END_AT + - DEFAULT_INCLUSIVE_START_AT + - DEFAULT_RPC_PRIORITY + - MAX_INCLUSIVE_END_AT + - SAMPLE_PARTITION + - THROUGHPUT_WINDOW_SECONDS +CheckpointMarkImpl: + methods: + - decode + - encode + - finalizeCheckpoint +ChildPartition: + methods: + - equals + - getParentTokens + - getToken + - hashCode + - toString +ChildPartitionsRecord: + methods: + - equals + - getChildPartitions + - getRecordSequence + - getRecordTimestamp + - getStartTimestamp + - hashCode + - toString +ChildPartitionsRecordAction: + methods: + - run +CivilTimeEncoder: + methods: + - decodePacked32TimeSeconds + - decodePacked32TimeSecondsAsJavaTime + - decodePacked64DatetimeMicros + - decodePacked64DatetimeMicrosAsJavaTime + - decodePacked64DatetimeSeconds + - decodePacked64DatetimeSecondsAsJavaTime + - decodePacked64TimeMicros + - decodePacked64TimeMicrosAsJavaTime + - decodePacked64TimeNanos + - decodePacked64TimeNanosAsJavaTime + - encodePacked32TimeSeconds + - encodePacked64DatetimeMicros + - encodePacked64DatetimeSeconds + - encodePacked64TimeMicros + - encodePacked64TimeNanos +ClassLoaderFileSystem: + methods: + - fromOptions + - getCurrentDirectory + - getFilename + - getScheme + - isDirectory + - resolve + properties: + - SCHEMA +CleanUpReadChangeStreamDoFn: + methods: + - processElement +ClickHouseIO: + methods: + - build + - expand + - finishBundle + - getTableSchema + - initialBackoff + - insertDeduplicate + - insertDistributedSync + - insertQuorum + - jdbcUrl + - maxCumulativeBackoff + - maxInsertBlockSize + - maxRetries + - processElement + - properties + - schema + - setup + - startBundle + - table + - tableSchema + - tearDown + - withInitialBackoff + - withInsertDeduplicate + - withInsertDistributedSync + - withInsertQuorum + - withMaxCumulativeBackoff + - withMaxInsertBlockSize + - withMaxRetries + - withTableSchema + - write + properties: + - DEFAULT_INITIAL_BACKOFF + - DEFAULT_MAX_CUMULATIVE_BACKOFF + - DEFAULT_MAX_INSERT_BLOCK_SIZE + - DEFAULT_MAX_RETRIES +ClickHouseWriter: {} +ClientConfiguration: + methods: + - build + - builder + - create + - credentialsProvider + - endpoint + - region + - retry + - toBuilder +CloudPubsubTransforms: + methods: + - ensureUsableAsCloudPubsub + - expand + - fromCloudPubsubMessages + - toCloudPubsubMessages +CloudVision: + methods: + - annotateImagesFromBytes + - annotateImagesFromBytesWithContext + - annotateImagesFromGcsUri + - annotateImagesFromGcsUriWithContext + - mapToRequest +CoGbkResult: + methods: + - and + - copy + - decode + - done + - empty + - encode + - equals + - fastForward + - getAll + - getCoderArguments + - getOnly + - getSchema + - getUnionCoder + - hasNext + - hashCode + - isEmpty + - iterator + - next + - observeAt + - of + - peek + - toString + - verifyDeterministic + properties: + - index + - value +CoGbkResultSchema: + methods: + - equals + - getIndex + - getTag + - getTupleTagList + - hashCode + - of + - size + - toString +CoGroup: + methods: + - crossProductJoin + - expand + - fieldAccessDescriptor + - fieldIds + - fieldNames + - join + - process + - withKeyField + - withOptionalParticipation + - withSideInput +CoGroupByKey: + methods: + - create + - expand + - processElement +CoGroupByKeyLoadTest: + methods: + - main + - processElement +Coder: + methods: + - consistentWithEquals + - decode + - encode + - equals + - getCoderArguments + - getEncodedTypeDescriptor + - getMessage + - getReasons + - hashCode + - isRegisterByteSizeObserverCheap + - nested + - registerByteSizeObserver + - structuralValue + - toString + - verifyDeterministic + properties: + - NESTED + - OUTER + - isWholeStream +CoderException: {} +CoderProperties: + methods: + - coderConsistentWithEquals + - coderConsistentWithEqualsInContext + - coderDecodeEncodeContentsEqual + - coderDecodeEncodeContentsEqualInContext + - coderDecodeEncodeContentsInSameOrder + - coderDecodeEncodeContentsInSameOrderInContext + - coderDecodeEncodeEqual + - coderDecodeEncodeEqualInContext + - coderDecodeEncodeInContext + - coderDecodesBase64 + - coderDecodesBase64ContentsEqual + - coderDeterministic + - coderDeterministicInContext + - coderEncodesBase64 + - coderSerializable + - getCount + - getMean + - getSum + - getSumAndReset + - reset + - structuralValueConsistentWithEquals + - structuralValueConsistentWithEqualsInContext + - structuralValueDecodeEncodeEqual + - structuralValueDecodeEncodeEqualInContext + - structuralValueDecodeEncodeEqualIterable + - structuralValueDecodeEncodeEqualIterableInContext + - testByteCount + properties: + - ALL_CONTEXTS +CoderProvider: + methods: + - coderFor +CoderProviders: + methods: + - coderFor + - forCoder + - fromStaticMethods + - toString +CoderRegistry: + methods: + - coderFor + - createDefault + - getCoder + - getOutputCoder + - getType + - registerCoderForClass + - registerCoderForType + - registerCoderProvider +CoderUtils: + methods: + - clone + - decodeFromBase64 + - decodeFromByteArray + - decodeFromByteString + - encodeToBase64 + - encodeToByteArray + - getCodedType +CollectionCoder: + methods: + - getEncodedTypeDescriptor + - of +ColumnType: + methods: + - equals + - getName + - getOrdinalPosition + - getType + - hashCode + - isPrimaryKey + - toString +Combine: + methods: + - accum + - addInput + - apply + - asSingletonView + - compact + - createAccumulator + - decode + - defaultValue + - encode + - equals + - expand + - extractOutput + - getAccumulatorCoder + - getAdditionalInputs + - getAppliedFn + - getCoderArguments + - getCombineFn + - getDefaultOutputCoder + - getFanout + - getFn + - getInputType + - getInsertDefault + - getNameOverride + - getOutputType + - getPipelineOptions + - getSideInputs + - globally + - groupedValues + - hashCode + - identity + - input + - isInsertDefault + - mergeAccumulators + - of + - perKey + - populateDisplayData + - processElement + - sideInput + - startBundle + - toString + - verifyDeterministic + - withFanout + - withHotKeyFanout + - withSideInputs + - withoutDefaults + properties: + - accum + - input +CombineContextFactory: + methods: + - createFromStateContext + - getPipelineOptions + - nullContext + - sideInput +CombineFnBase: + methods: + - getAccumTVariable + - getAccumulatorCoder + - getDefaultOutputCoder + - getIncompatibleGlobalWindowErrorMessage + - getInputTVariable + - getOutputTVariable + - populateDisplayData +CombineFnTester: + methods: + - testCombineFn +CombineFnUtil: + methods: + - addInput + - bindContext + - compact + - createAccumulator + - defaultValue + - extractOutput + - getAccumulatorCoder + - getDefaultOutputCoder + - mergeAccumulators + - populateDisplayData + - toFnWithContext +CombineFns: + methods: + - addInput + - compact + - compose + - createAccumulator + - decode + - encode + - equals + - extractOutput + - get + - getAccumulatorCoder + - getCoderArguments + - hasNext + - hashCode + - iterator + - mergeAccumulators + - next + - populateDisplayData + - remove + - verifyDeterministic + - with +CombineLoadTest: + methods: + - apply + - getPerKeyCombiner + - main +CombineRunners: + methods: + - createRunnerForPTransform + - getPTransformRunnerFactories +CombineWithContext: + methods: + - addInput + - apply + - compact + - createAccumulator + - defaultValue + - extractOutput + - getPipelineOptions + - mergeAccumulators + - sideInput +CompletableFutureInboundDataClient: + methods: + - awaitCompletion + - cancel + - complete + - create + - fail + - isDone + - runWhenComplete +CompositeOperatorTranslator: + methods: + - canTranslate + - translate +CompositeProvider: + methods: + - findTranslator + - of +CompositeUnaryFunction: + methods: + - apply + - of +CompressedSource: + methods: + - allowsDynamicSplitting + - close + - createDecompressingChannel + - from + - getChannelFactory + - getCount + - getCurrent + - getCurrentTimestamp + - getOutputCoder + - getSplitPointsConsumed + - getSplitPointsRemaining + - isCompressed + - isOpen + - matches + - populateDisplayData + - read + - validate + - withCompression + - withDecompression +ConfigWrapper: + methods: + - build + - fromJsonFile + - fromJsonString + - setParam + - withParams +ConfluentSchemaRegistryDeserializerProvider: + methods: + - getCoder + - getDeserializer + - of +ConnectionManager: {} +ConsoleResultPublisher: + methods: + - publish +Contextful: + methods: + - fn + - getClosure + - getRequirements + - of + - sideInput + - toString + - wrapProcessContext +ContextualTextIO: + methods: + - apply + - compare + - expand + - from + - populateDisplayData + - processElement + - read + - readFiles + - withCompression + - withDelimiter + - withEmptyMatchTreatment + - withHasMultilineCSVRecords + - withHintMatchesManyFiles + - withMatchConfiguration + - withRecordNumMetadata +ConversionContext: + methods: + - of +Convert: + methods: + - expand + - fromRows + - processElement + - to + - toRows +ConvertHelpers: + methods: + - appender + - getConvertPrimitive + - getConvertedSchemaInformation + - prepare + properties: + - outputSchemaCoder + - unboxedType +Count: + methods: + - addInput + - apply + - combineFn + - createAccumulator + - decode + - encode + - equals + - expand + - extractOutput + - getAccumulatorCoder + - getIncompatibleGlobalWindowErrorMessage + - globally + - hashCode + - isRegisterByteSizeObserverCheap + - mergeAccumulators + - perElement + - perKey +CountByKey: + methods: + - accumulationMode + - expand + - keyBy + - named + - of + - output + - triggeredBy + - windowBy + - withAllowedLateness + - withOnTimeBehavior + - withTimestampCombiner +CountIf: + methods: + - addInput + - combineFn + - createAccumulator + - extractOutput + - getAccumulatorCoder + - mergeAccumulators +CountingSource: + methods: + - advance + - apply + - close + - createReader + - createSourceForSubrange + - equals + - finalizeCheckpoint + - getBytesPerOffset + - getCheckpointMark + - getCheckpointMarkCoder + - getCurrent + - getCurrentSource + - getCurrentTimestamp + - getLastEmitted + - getMaxEndOffset + - getOutputCoder + - getSplitBacklogBytes + - getSplitPointsRemaining + - getStartTime + - getWatermark + - hashCode + - split + - start + - unbounded + - unboundedWithTimestampFn + - upTo + - withRate + - withTimestampFn +CovarianceFn: + methods: + - addInput + - createAccumulator + - extractOutput + - getAccumulatorCoder + - mergeAccumulators + - newPopulation + - newSample +CrashingRunner: + methods: + - fromOptions + - run +Create: + methods: + - apply + - close + - createReader + - createSourceForSubrange + - empty + - expand + - fromIterable + - getBytesPerOffset + - getCurrent + - getCurrentSource + - getElements + - getEstimatedSizeBytes + - getMaxEndOffset + - getOutputCoder + - of + - ofProvider + - processElement + - timestamped + - withCoder + - withRowSchema + - withSchema + - withType +CreateOptions: + methods: + - build + - builder + - expectFileToNotExist + - mimeType + - setExpectFileToNotExist + - setMimeType +CreateTableDestinations: + methods: + - expand + - processElement + - startBundle +CreateTableHelpers: {} +CreateTables: + methods: + - expand + - processElement + - startBundle +CrossLanguageConfiguration: + methods: + - getDataSourceConfiguration + - getDatabase + - getOAuthToken + - getPassword + - getPrivateKeyPassphrase + - getPrivateKeyPath + - getQuery + - getRawPrivateKey + - getRole + - getSchema + - getServerName + - getStagingBucketName + - getStorageIntegrationName + - getTable + - getUsername + - getWarehouse + - setDatabase + - setOAuthToken + - setPassword + - setPrivateKeyPassphrase + - setPrivateKeyPath + - setQuery + - setRawPrivateKey + - setRole + - setSchema + - setServerName + - setStagingBucketName + - setStorageIntegrationName + - setTable + - setUsername + - setWarehouse +CsvToRow: + methods: + - expand + - getCsvFormat +CustomCoder: + methods: + - getCoderArguments + - verifyDeterministic +CustomHttpErrors: + methods: + - addErrorForCode + - addErrorForCodeAndUrlContains + - build + - getCustomError + - getMatcher +CustomTimestampPolicyWithLimitedDelay: + methods: + - getTimestampForRecord + - getWatermark +CustomX509TrustManager: + methods: + - checkClientTrusted + - checkServerTrusted + - getAcceptedIssuers +Customer: + methods: + - equals + - getCountryOfResidence + - getId + - getName + - hashCode + - setCountryOfResidence + - setId + - setName + - toString +DLPDeidentifyText: + methods: + - build + - expand + - getBatchSizeBytes + - getColumnDelimiter + - getDeidentifyConfig + - getDeidentifyTemplateName + - getHeaderColumns + - getInspectConfig + - getInspectTemplateName + - getProjectId + - newBuilder + - processElement + - setBatchSizeBytes + - setColumnDelimiter + - setDeidentifyConfig + - setDeidentifyTemplateName + - setHeaderColumns + - setInspectConfig + - setInspectTemplateName + - setProjectId + - setup + - teardown + properties: + - DLP_PAYLOAD_LIMIT_BYTES +DLPInspectText: + methods: + - build + - expand + - getBatchSizeBytes + - getColumnDelimiter + - getHeaderColumns + - getInspectConfig + - getInspectTemplateName + - getProjectId + - newBuilder + - processElement + - setBatchSizeBytes + - setColumnDelimiter + - setHeaderColumns + - setInspectConfig + - setInspectTemplateName + - setProjectId + - setup + - teardown + properties: + - DLP_PAYLOAD_LIMIT_BYTES +DLPReidentifyText: + methods: + - build + - expand + - getBatchSizeBytes + - getColumnDelimiter + - getHeaderColumns + - getInspectConfig + - getInspectTemplateName + - getProjectId + - getReidentifyConfig + - getReidentifyTemplateName + - newBuilder + - processElement + - setBatchSizeBytes + - setColumnDelimiter + - setHeaderColumns + - setInspectConfig + - setInspectTemplateName + - setProjectId + - setReidentifyConfig + - setReidentifyTemplateName + - setup + - teardown + properties: + - DLP_PAYLOAD_LIMIT_BYTES +DaoFactory: + methods: + - getChangeStreamDao + - getPartitionMetadataAdminDao + - getPartitionMetadataDao +DataCatalogPipelineOptionsRegistrar: + methods: + - getPipelineOptions +DataCatalogTableProvider: + methods: + - buildBeamSqlTable + - close + - create + - createDataCatalogClient + - createTable + - dropTable + - getTable + - getTableByFullName + - getTableType + - getTables + - setSchemaIfNotPresent +DataChangeRecord: + methods: + - equals + - getCommitTimestamp + - getMetadata + - getModType + - getMods + - getNumberOfPartitionsInTransaction + - getNumberOfRecordsInTransaction + - getPartitionToken + - getRecordSequence + - getRecordTimestamp + - getRowType + - getServerTransactionId + - getTableName + - getTransactionTag + - getValueCaptureType + - hashCode + - isLastRecordInTransactionInPartition + - isSystemTransaction + - toString +DataChangeRecordAction: + methods: + - run +DataEndpoint: + methods: + - create + - getCoder + - getReceiver + - getTransformId +DataStoreV1SchemaIOProvider: + methods: + - buildReader + - buildWriter + - configurationSchema + - expand + - from + - getKind + - getProjectId + - identifier + - isBounded + - requiresDataSchema + - schema + properties: + - KEY_FIELD_PROPERTY +DataStoreV1TableProvider: + methods: + - getSchemaIOProvider + - getTableStatistics + - getTableType +DataStreams: + methods: + - close + - decodeFromChunkBoundaryToChunkBoundary + - delimitElement + - hasNext + - isEof + - isReady + - next + - outbound + - prefetch + - read + - remove + - write + properties: + - DEFAULT_OUTBOUND_BUFFER_LIMIT_BYTES +DataframeTransform: + methods: + - expand + - of + - withExpansionService + - withIndexes +DatastoreIO: + methods: + - v1 +DatastoreV1: + methods: + - addRequestLatency + - apply + - deleteEntity + - deleteKey + - expand + - finishBundle + - from + - getDatastore + - getLiteralGqlQuery + - getLocalhost + - getNamespace + - getNamespaceValueProvider + - getNumEntities + - getNumQuerySplits + - getProjectId + - getProjectValueProvider + - getQuery + - getQuerySplitter + - getReadTime + - nextBatchSize + - populateDisplayData + - processElement + - read + - start + - startBundle + - toString + - withHintNumWorkers + - withLiteralGqlQuery + - withLocalhost + - withNamespace + - withNumQuerySplits + - withProjectId + - withQuery + - withRampupThrottlingDisabled + - withReadTime + - write + properties: + - NUM_QUERY_SPLITS_MAX +Date: + methods: + - getArgument + - getArgumentType + - getBaseType + - getIdentifier + - toBaseType + - toInputType + properties: + - IDENTIFIER +DateFunctions: + methods: + - date +DateTime: + methods: + - getArgument + - getArgumentType + - getBaseType + - getIdentifier + - toBaseType + - toInputType + properties: + - DATETIME_SCHEMA + - DATE_FIELD_NAME + - IDENTIFIER + - TIME_FIELD_NAME +DateTimeUtils: + methods: + - findDateTimePattern + - formatTimestampWithTimeZone + - parseDate + - parseDateToValue + - parseTime + - parseTimeToValue + - parseTimestampWithLocalTimeZone + - parseTimestampWithTZToValue + - parseTimestampWithTimeZone + - parseTimestampWithUTCTimeZone + - parseTimestampWithoutTimeZone + - validateTimeInterval + - validateTimestamp + properties: + - MAX_UNIX_MILLIS + - MILLIS_PER_DAY + - MIN_UNIX_MILLIS +DeadLetteredTransform: + methods: + - expand +DebeziumIO: + methods: + - create + - expand + - getConfigurationMap + - read + - readAsJson + - withCoder + - withConnectionProperties + - withConnectionProperty + - withConnectorClass + - withConnectorConfiguration + - withFormatFunction + - withHostName + - withMaxNumberOfRecords + - withPassword + - withPort + - withSourceConnector + - withUsername +DebeziumTransformRegistrar: + methods: + - buildExternal + - knownBuilders + - setConnectionProperties + - setConnectorClass + - setHost + - setMaxNumberOfRecords + - setPassword + - setPort + - setUsername + properties: + - READ_JSON_URN +DecodingFnDataReceiver: + methods: + - accept + - create +Deduplicate: + methods: + - apply + - expand + - keyedValues + - onExpiry + - processElement + - values + - withDuration + - withRepresentativeCoder + - withRepresentativeType + - withRepresentativeValueFn + - withTimeDomain + properties: + - DEFAULT_DURATION + - DEFAULT_TIME_DOMAIN +DefaultAutoscaler: + methods: + - getTotalBacklogBytes + - start + - stop +DefaultBlobstoreClientBuilderFactory: + methods: + - createBuilder +DefaultFilenamePolicy: + methods: + - decode + - encode + - equals + - fromParams + - fromStandardParameters + - hashCode + - of + - populateDisplayData + - toString + - unwindowedFilename + - windowedFilename + - withBaseFilename + - withShardTemplate + - withSuffix + - withWindowedWrites + properties: + - DEFAULT_UNWINDOWED_SHARD_TEMPLATE + - DEFAULT_WINDOWED_SHARD_TEMPLATE +DefaultPipelineOptionsRegistrar: + methods: + - getPipelineOptions +DefaultS3ClientBuilderFactory: + methods: + - createBuilder +DefaultS3FileSystemSchemeRegistrar: + methods: + - fromOptions +DefaultTableFilter: + methods: + - getNotSupported + - numSupported +DefaultTrigger: + methods: + - getWatermarkThatGuaranteesFiring + - isCompatible + - mayFinish + - of +DelegateCoder: + methods: + - decode + - encode + - equals + - getCoder + - getEncodedTypeDescriptor + - hashCode + - of + - structuralValue + - toString + - verifyDeterministic +DelegatingCounter: + methods: + - dec + - getName + - inc +DelegatingDistribution: + methods: + - getName + - update +DelegatingHistogram: + methods: + - getName + - update +DequeCoder: + methods: + - consistentWithEquals + - getEncodedTypeDescriptor + - of + - structuralValue + - verifyDeterministic +DetectNewPartitionsAction: + methods: + - run +DetectNewPartitionsDoFn: + methods: + - getInitialWatermarkEstimatorState + - getSize + - initialRestriction + - newTracker + - newWatermarkEstimator + - processElement + - setAveragePartitionBytesSize + - setup +DetectNewPartitionsRangeTracker: + methods: + - tryClaim +DicomIO: + methods: + - expand + - finishSpecifyingOutput + - getFailedReads + - getPipeline + - getReadResponse + - instantiateHealthcareClient + - processElement + - readStudyMetadata + properties: + - ERROR_MESSAGE + - METADATA +DirectStreamObserver: + methods: + - onCompleted + - onError + - onNext +DisplayData: + methods: + - absolute + - add + - addIfNotDefault + - addIfNotNull + - asMap + - build + - delegate + - equals + - extend + - from + - getComponents + - getKey + - getLabel + - getLinkUrl + - getNamespace + - getPath + - getShortValue + - getType + - getValue + - hashCode + - include + - inferType + - item + - items + - none + - of + - root + - setKey + - setLabel + - setLinkUrl + - setNamespace + - setShortValue + - setType + - setValue + - toString + - withLabel + - withLinkUrl + - withNamespace +Distinct: + methods: + - accumulationMode + - apply + - create + - expand + - named + - of + - output + - processElement + - projected + - triggeredBy + - windowBy + - withAllowedLateness + - withOnTimeBehavior + - withRepresentativeType + - withRepresentativeValueFn + - withTimestampCombiner +DistributionResult: + methods: + - create + - getCount + - getMax + - getMean + - getMin + - getSum + properties: + - IDENTITY_ELEMENT +DlqProvider: + methods: + - expand + - identifier + - newDlqTransform +DoFn: + methods: + - element + - fireTimestamp + - getAllowedTimestampSkew + - getInputTypeDescriptor + - getOutputTypeDescriptor + - getPipelineOptions + - output + - outputWithTimestamp + - pane + - populateDisplayData + - prepareForProcessing + - resume + - resumeDelay + - shouldResume + - sideInput + - stop + - timeDomain + - timestamp + - window + - withResumeDelay +DoFnInfo: + methods: + - forFn + - getDoFn + - getDoFnSchemaInformation + - getInputCoder + - getMainOutput + - getOutputCoders + - getSideInputMapping + - getSideInputViews + - getWindowingStrategy + - withFn +DoFnInvokers: + methods: + - getErrorContext + - invokerFor + - pipelineOptions + - tryInvokeSetupFor +DoFnOutputReceivers: + methods: + - get + - getRowReceiver + - output + - outputWithTimestamp + - rowReceiver + - windowedMultiReceiver + - windowedReceiver +DoFnSchemaInformation: + methods: + - apply + - create + - getElementConverters + - getFieldAccessDescriptor + - of + - toBuilder +DoFnSignature: + methods: + - alwaysFetched + - boundedWindow + - build + - bundleFinalizer + - coderT + - dispatch + - elementParameter + - elementT + - estimatorStateT + - estimatorT + - extraParameters + - field + - fieldAccessDeclarations + - fieldAccessString + - finishBundle + - finishBundleContext + - fnClass + - getInitialRestriction + - getInitialWatermarkEstimatorState + - getMainOutputReceiver + - getRestrictionCoder + - getSchemaElementParameters + - getSideInputParameters + - getSize + - getWatermarkEstimatorStateCoder + - hasReturnValue + - id + - index + - isBoundedPerElement + - isRowReceiver + - isSplittable + - isStateful + - keyT + - match + - newTracker + - newWatermarkEstimator + - onTimerContext + - onTimerFamilyMethods + - onTimerMethods + - onWindowExpiration + - onWindowExpirationContext + - outputReceiverParameter + - paneInfoParameter + - pipelineOptions + - processContext + - processElement + - referent + - requiresStableInput + - requiresTimeSortedInput + - restrictionParameter + - restrictionT + - restrictionTracker + - schemaElementParameter + - setElementT + - setFieldAccessString + - setIndex + - setSideInputId + - setup + - sideInputId + - sideInputParameter + - splitRestriction + - startBundle + - startBundleContext + - stateDeclarations + - stateParameter + - stateType + - taggedOutputReceiverParameter + - targetMethod + - teardown + - timeDomainParameter + - timerDeclarations + - timerFamilyDeclarations + - timerFamilyParameter + - timerIdParameter + - timerParameter + - timestampParameter + - toBuilder + - trackerT + - truncateRestriction + - usesState + - usesTimers + - watermarkEstimator + - watermarkEstimatorState + - watermarkEstimatorStateT + - watermarkEstimatorT + - windowT + properties: + - PREFIX +DoFnSignatures: + methods: + - addFieldAccessDeclaration + - addFieldAccessDeclarations + - addParameter + - addStateDeclaration + - addStateDeclarations + - addTimerDeclaration + - addTimerDeclarations + - addTimerFamilyDeclaration + - addTimerFamilyDeclarations + - checkArgument + - checkNotNull + - create + - findParameter + - findParameters + - getAnnotations + - getExtraParameters + - getFieldAccessDeclarations + - getIndex + - getMethod + - getSignature + - getStateDeclarations + - getStateParameters + - getStateSpecOrThrow + - getTimerDeclarations + - getTimerFamilyDeclarations + - getTimerFamilyParameters + - getTimerFamilySpecOrThrow + - getTimerParameters + - getTimerSpecOrThrow + - getType + - hasParameter + - isSplittable + - isStateful + - of + - requiresTimeSortedInput + - setParameter + - signatureForDoFn + - usesBagState + - usesBundleFinalizer + - usesMapState + - usesOrderedListState + - usesSetState + - usesState + - usesTimers + - usesValueState + - usesWatermarkHold +DoFnTester: + methods: + - clearOutputElements + - close + - createProcessContext + - dispatch + - element + - finishBundle + - finishBundleContext + - get + - getCloningBehavior + - getErrorContext + - getMainOutputTag + - getMutableOutput + - getPipelineOptions + - key + - of + - output + - outputReceiver + - outputWithTimestamp + - pane + - paneInfo + - peekOutputElements + - peekOutputElementsInWindow + - peekOutputElementsWithTimestamp + - pipelineOptions + - processBundle + - processContext + - processElement + - processTimestampedElement + - processWindowedElement + - restriction + - restrictionTracker + - setCloningBehavior + - setSideInput + - setSideInputs + - sideInput + - startBundle + - startBundleContext + - taggedOutputReceiver + - takeOutputElements + - takeOutputElementsWithTimestamp + - timeDomain + - timerId + - timestamp + - window +DoFnWithExecutionInformation: + methods: + - getDoFn + - getMainOutputTag + - getSchemaInformation + - getSideInputMapping + - of +Done: + methods: + - decode + - encode + - equals + - hashCode + - sizeInBytes + - structuralValue + - toString + - verifyDeterministic + properties: + - CODER +DoubleCoder: + methods: + - consistentWithEquals + - decode + - encode + - getEncodedTypeDescriptor + - isRegisterByteSizeObserverCheap + - of + - verifyDeterministic +DropFields: + methods: + - expand + - fields +DurationCoder: + methods: + - consistentWithEquals + - decode + - encode + - getEncodedTypeDescriptor + - isRegisterByteSizeObserverCheap + - of + - registerByteSizeObserver + - verifyDeterministic +DynamicAvroDestinations: + methods: + - getCodec + - getDatumWriterFactory + - getMetadata + - getSchema +DynamicDestinations: + methods: + - getDestination + - getDestinationCoder + - getSchema + - getSideInputs + - getTable + - sideInput +DynamicFileDestinations: + methods: + - constant + - formatRecord + - getDefaultDestination + - getDestination + - getDestinationCoder + - getFilenamePolicy + - populateDisplayData + - toDefaultPolicies +DynamicProtoCoder: + methods: + - coderFor + - equals + - getCoderProvider + - hashCode + - of + - withExtensionsFrom + properties: + - serialVersionUID +DynamoDBIO: + methods: + - apply + - build + - builder + - create + - expand + - finishBundle + - items + - processElement + - read + - setMaxAttempts + - setMaxDuration + - setup + - startBundle + - tearDown + - test + - withAwsClientsProvider + - withClientConfiguration + - withCoder + - withDeduplicateKeys + - withDynamoDbClientProvider + - withRetryConfiguration + - withScanRequestFn + - withScanResponseMapperFn + - withScanResultMapperFn + - withWriteRequestMapperFn + - write +ElasticsearchIO: + methods: + - advance + - bulkIO + - close + - closeClient + - create + - createReader + - customizeRequestConfig + - decode + - docToBulk + - encode + - expand + - finishBundle + - fromSpec + - getAddresses + - getAllowedTimestampSkew + - getApiKey + - getApiPrefix + - getBearerToken + - getBulkDirective + - getBulkEndPoint + - getBulkIO + - getConnectTimeout + - getCountEndPoint + - getCurrent + - getCurrentSource + - getDocToBulk + - getEstimatedSizeBytes + - getHasError + - getIndex + - getInputDoc + - getKeystorePassword + - getKeystorePath + - getOutputCoder + - getPassword + - getPrefixedEndpoint + - getResponseItemJson + - getSearchEndPoint + - getSocketTimeout + - getTimestamp + - getType + - getUsername + - isTrustSelfSignedCerts + - of + - output + - populateDisplayData + - processElement + - read + - serialize + - setup + - split + - start + - startBundle + - test + - validate + - withAllowableResponseErrors + - withApiKey + - withAppendOnly + - withBackendVersion + - withBatchSize + - withBearerToken + - withBulkDirective + - withConnectTimeout + - withConnectionConfiguration + - withDocVersionFn + - withDocVersionType + - withHasError + - withIdFn + - withIgnoreVersionConflicts + - withIndexFn + - withInputDoc + - withIsDeleteFn + - withKeystorePassword + - withKeystorePath + - withMaxBatchSize + - withMaxBatchSizeBytes + - withMaxBufferingDuration + - withMaxParallelRequests + - withMaxParallelRequestsPerWindow + - withMetadata + - withPassword + - withQuery + - withResponseItemJson + - withRetryConfiguration + - withRoutingFn + - withScrollKeepalive + - withSocketTimeout + - withThrowWriteErrors + - withTimestamp + - withTrustSelfSignedCerts + - withTypeFn + - withUpsertScript + - withUsePartialUpdate + - withUseStatefulBatches + - withUsername + - write + properties: + - FAILED_WRITES + - SUCCESSFUL_WRITES +ElementByteSizeObservableIterable: + methods: + - addObserver + - iterator +ElementByteSizeObservableIterator: {} +ElementByteSizeObserver: + methods: + - advance + - getIsLazy + - setLazy + - setScalingFactor + - update +EmptyOnDeserializationThreadLocal: {} +EncodableThrowable: + methods: + - equals + - forThrowable + - hashCode + - throwable +EncodedBoundedWindow: + methods: + - consistentWithEquals + - decode + - encode + - forEncoding + - getEncodedWindow + - isRegisterByteSizeObserverCheap + - maxTimestamp + properties: + - INSTANCE +EncodingException: {} +EntityToRow: + methods: + - create + - expand + - processElement +EnumerationType: + methods: + - create + - equals + - getArgument + - getArgumentType + - getBaseType + - getIdentifier + - getValue + - getValues + - getValuesMap + - hashCode + - toBaseType + - toInputType + - toString + - valueOf + properties: + - IDENTIFIER +Event: + methods: + - decode + - encode + - equals + - hasAnnotation + - hashCode + - sizeInBytes + - structuralValue + - toString + - verifyDeterministic + - withAnnotation + properties: + - CODER + - bid + - newAuction + - newPerson +ExecutionStateSampler: + methods: + - activate + - create + - deactivate + - getCurrentThreadsPTransformId + - getLastTransitionTimeMillis + - getPTransformId + - getPTransformUniqueName + - getStatus + - getTrackedThread + - reset + - start + - stop + - takeSample + - updateFinalMonitoringData + - updateIntermediateMonitoringData + - updateMonitoringData +ExecutionStateSamplerBenchmark: + methods: + - setup + - tearDown + - testLargeBundleHarnessStateSampler + - testLargeBundleRunnersCoreStateSampler + - testTinyBundleHarnessStateSampler + - testTinyBundleRunnersCoreStateSampler + properties: + - sampler + - state1 + - state2 + - state3 + - tracker +ExpansionServer: + methods: + - close + - create + - getHost + - getPort +ExpansionService: + methods: + - close + - discoverSchemaTransform + - expand + - fromOptions + - getDependencies + - getTransform + - knownTransforms + - main + - payloadToConfig + - run +ExpansionServiceSchemaTransformProvider: + methods: + - createInput + - extractOutputs + - getTransform + - of +ExplicitShardedFile: + methods: + - readFilesWithRetries + - toString +ExposedByteArrayInputStream: + methods: + - close + - readAll +ExposedByteArrayOutputStream: + methods: + - reset + - toByteArray + - write + - writeAndOwn +ExpressionConverter: + methods: + - convertRelNodeToRexRangeRef + - convertResolvedLiteral + - convertRexNodeFromResolvedExpr + - convertTableValuedFunction + - indexOfProjectionColumnRef + - retrieveRexNode + - retrieveRexNodeFromOrderByScan + - trueLiteral +ExternalRead: + methods: + - buildExternal + - knownBuilders + - setIdLabel + - setSubscription + - setTimestampAttribute + - setTopic + - setWithAttributes + properties: + - URN +ExternalSchemaIOTransformRegistrar: + methods: + - buildExternal + - knownBuilderInstances + - setConfig + - setDataSchema + - setLocation +ExternalSorter: + methods: + - create + - getMemoryMB + - getSorterType + - getTempLocation + - setMemoryMB + - setSorterType + - setTempLocation +ExternalSqlTransformRegistrar: + methods: + - buildExternal + - knownBuilders + - setDialect + - setQuery +ExternalTransformRegistrarImpl: + methods: + - knownBuilderInstances + properties: + - READ_URN + - WRITE_URN +ExternalWorkerService: + methods: + - close + - main + - start + - startWorker + - stopWorker +ExternalWrite: + methods: + - buildExternal + - knownBuilders + - setIdLabel + - setTimestampAttribute + - setTopic + properties: + - URN +FailsafeValueInSingleWindow: + methods: + - decode + - encode + - getCoderArguments + - getComponents + - getFailsafeValue + - getPane + - getTimestamp + - getValue + - getWindow + - of + - verifyDeterministic +Failure: + methods: + - build + - getError + - getPayload + - newBuilder + - setError + - setPayload +FailureCollectorWrapper: + methods: + - addFailure + - getOrThrowException + - getValidationFailures +FakeBigQueryServices: + methods: + - cancel + - convertNumbers + - decodeQueryResult + - encodeQueryResult + - getDatasetService + - getJobService + - getStorageClient + - iterator + - withDatasetService + - withJobService + - withStorageClient +FakeDatasetService: + methods: + - appendRows + - close + - commitWriteStreams + - createDataset + - createTable + - createWriteStream + - deleteDataset + - deleteTable + - failOnInsert + - finalizeWriteStream + - flush + - getAllIds + - getAllRows + - getDataset + - getInsertCount + - getStreamAppendClient + - getTable + - getTableImpl + - insertAll + - isTableEmpty + - patchTableDescription + - pin + - setShouldFailRow + - setUp + - unpin + - updateTableSchema +FakeJobService: + methods: + - close + - dryRunQuery + - expectDryRunQuery + - getAllJobs + - getJob + - getNumExtractJobCalls + - pollJob + - setNumFailuresExpected + - setUp + - startCopyJob + - startExtractJob + - startLoadJob + - startQueryJob +FhirBundleParameter: + methods: + - getBundle + - getMetadata + - of +FhirBundleResponse: + methods: + - getFhirBundleParameter + - getResponse + - of +FhirIO: + methods: + - addToFile + - closeFile + - deidentify + - delete + - executeBundles + - expand + - exportResources + - fhirStoresImport + - finishSpecifyingOutput + - getFailedBodies + - getFailedBundles + - getFailedFiles + - getFailedReads + - getFailedSearches + - getFhirStore + - getKeyedResources + - getPatientEverything + - getPipeline + - getResources + - getSuccessfulBodies + - getSuccessfulBundles + - importBatch + - importResources + - in + - init + - initBatch + - initClient + - initFile + - instantiateHealthcareClient + - process + - processElement + - readResources + - searchResources + - searchResourcesWithGenericParameters + properties: + - DEAD_LETTER + - FAILED_BODY + - FAILED_BUNDLES + - FAILED_FILES + - OUT + - SUCCESSFUL_BODY + - SUCCESSFUL_BUNDLES + - TEMP_FILES +FhirIOPatientEverything: + methods: + - expand + - finishSpecifyingOutput + - getFailedReads + - getPatientCompartments + - getPipeline + - instantiateHealthcareClient + - processElement + properties: + - DEAD_LETTER + - OUT +FhirSearchParameter: + methods: + - equals + - getKey + - getQueries + - getResourceType + - hashCode + - of + - toString +FhirSearchParameterCoder: + methods: + - decode + - encode + - of +FieldAccessDescriptor: + methods: + - build + - builder + - create + - fieldIdsAccessed + - fieldNamesAccessed + - getAllFields + - getFieldId + - getFieldName + - getFieldRename + - getFieldsAccessed + - getKind + - getList + - getMap + - getNestedFieldsAccessed + - getQualifiers + - nestedFieldsById + - nestedFieldsByName + - of + - referencesSingleField + - resolve + - setFieldId + - setFieldName + - setFieldRename + - setQualifiers + - toString + - union + - withAllFields + - withFieldIds + - withFieldNameAs + - withFieldNames + - withFieldNamesAs + - withFields + - withNestedField + - withNestedFieldAs +FieldAccessDescriptorParser: + methods: + - getQualifiers + - parse + - visitArrayQualifierList + - visitDotExpression + - visitFieldSpecifier + - visitMapQualifierList + - visitQualifiedComponent + - visitQualifyComponent + - visitSimpleIdentifier + - visitWildcard +FieldTypeDescriptors: + methods: + - fieldTypeForJavaType + - javaTypeForFieldType +FieldValueTypeInformation: + methods: + - forField + - forGetter + - forOneOf + - forSetter + - getElementType + - getField + - getMapKeyType + - getMapValueType + - getMethod + - getName + - getNameOverride + - getNumber + - getNumberOverride + - getOneOfTypes + - getRawType + - getType + - isNullable + - setElementType + - setField + - setMapKeyType + - setMapValueType + - setMethod + - setName + - setNullable + - setNumber + - setOneOfTypes + - setRawType + - setType + - withName +FileBasedSink: + methods: + - apply + - cleanup + - close + - convertToFileResourceIfPossible + - create + - createWriteOperation + - createWriter + - decode + - encode + - formatRecord + - fromCanonical + - getCoderArguments + - getComponents + - getDefaultDestination + - getDestination + - getDestinationCoder + - getDestinationFile + - getDynamicDestinations + - getFilenamePolicy + - getMimeType + - getOutputFile + - getPaneInfo + - getShard + - getSideInputs + - getSink + - getSuggestedFilenameSuffix + - getTempDirectory + - getTempDirectoryProvider + - getTempFilename + - getWindow + - getWriteOperation + - of + - open + - populateDisplayData + - removeTemporaryFiles + - setWindowedWrites + - sideInput + - toString + - unwindowedFilename + - validate + - verifyDeterministic + - windowedFilename + - withShard + - write +FileBasedSource: + methods: + - advance + - allowsDynamicSplitting + - close + - createReader + - createSourceForSubrange + - getCurrent + - getCurrentSource + - getCurrentTimestamp + - getEmptyMatchTreatment + - getEstimatedSizeBytes + - getFileOrPatternSpec + - getFileOrPatternSpecProvider + - getFractionConsumed + - getMaxEndOffset + - getMode + - getSingleFileMetadata + - populateDisplayData + - split + - splitAtFraction + - start + - toString + - validate +FileChecksumMatcher: + methods: + - describeMismatchSafely + - describeTo + - fileContentsHaveChecksum + - matchesSafely +FileIO: + methods: + - apply + - by + - continuously + - create + - createWriteOperation + - createWriter + - defaultNaming + - equals + - expand + - filepattern + - formatRecord + - getCompression + - getDefaultDestination + - getDestination + - getDestinationCoder + - getEmptyMatchTreatment + - getFilenamePolicy + - getMatchUpdatedFiles + - getMetadata + - getSideInputs + - getWatchInterval + - hashCode + - match + - matchAll + - open + - openSeekable + - populateDisplayData + - process + - readFullyAsBytes + - readFullyAsUTF8String + - readMatches + - relativeFileNaming + - sideInput + - to + - toString + - unwindowedFilename + - via + - windowedFilename + - withCompression + - withConfiguration + - withDestinationCoder + - withDirectoryTreatment + - withEmptyGlobalWindowDestination + - withEmptyMatchTreatment + - withIgnoreWindowing + - withNaming + - withNoSpilling + - withNumShards + - withPrefix + - withSharding + - withSuffix + - withTempDirectory + - write + - writeDynamic +FilePatternMatchingShardedFile: + methods: + - readFilesWithRetries + - toString +FileSystem: {} +FileSystemUtils: + methods: + - wildcardToRegexp +FileSystems: + methods: + - apply + - copy + - create + - delete + - hasGlobWildcard + - match + - matchNewResource + - matchResources + - matchSingleFileSpec + - open + - rename + - setDefaultPipelineOptions + properties: + - DEFAULT_SCHEME + - filteredExistingSrcs + - resultDestinations + - resultSources +FillGaps: + methods: + - expand + - getNextWindow + - getPreviousWindow + - getValue + - keepEarliest + - keepLatest + - of + - onGcTimer + - onTimer + - process +Filter: + methods: + - by + - create + - equal + - expand + - getPredicate + - getRowSelector + - greaterThan + - greaterThanEq + - lessThan + - lessThanEq + - named + - of + - output + - populateDisplayData + - process + - processElement + - whereFieldId + - whereFieldIds + - whereFieldName + - whereFieldNames +FinalizeBundleHandler: + methods: + - create + - finalizeBundle + - getCallback + - getExpiryTime + - registerCallbacks +FindQuery: + methods: + - apply + - create + - withFilters + - withLimit + - withProjection +FirestoreIO: + methods: + - v1 +FirestoreV1: + methods: + - apply + - batchGetDocuments + - batchWrite + - build + - equals + - expand + - getNumBytes + - getNumWrites + - getStatus + - getWrite + - getWriteFailures + - getWriteResult + - hashCode + - listCollectionIds + - listDocuments + - partitionQuery + - populateDisplayData + - processElement + - read + - runQuery + - toBuilder + - toString + - withDeadLetterQueue + - withNameOnlyQuery + - withReadTime + - withRpcQosOptions + - write +FixedBytes: + methods: + - getLength + - getName + - of + - toBaseType + - toInputType + - toString + properties: + - IDENTIFIER +FixedPrecisionNumeric: + methods: + - of + - toInputType + properties: + - BASE_IDENTIFIER + - IDENTIFIER +FixedString: + methods: + - getLength + - getName + - of + - toInputType + - toString + properties: + - IDENTIFIER +FixedWindows: + methods: + - assignWindow + - equals + - getOffset + - getSize + - hashCode + - isCompatible + - of + - populateDisplayData + - verifyCompatibility + - windowCoder + - withOffset +FlatMap: + methods: + - eventTimeBy + - getAllowedTimestampSkew + - getEventTimeExtractor + - getFunctor + - named + - of + - output + - using +FlatMapElements: + methods: + - exceptionsInto + - exceptionsVia + - expand + - getInputTypeDescriptor + - getOutputTypeDescriptor + - getTypeDescriptor + - into + - populateDisplayData + - processElement + - via +FlatMapTranslator: + methods: + - collect + - getAllowedTimestampSkew + - processElement + - translate +Flatten: + methods: + - apply + - expand + - iterables + - pCollections +FlattenRunner: + methods: + - createRunnerForPTransform + - getPTransformRunnerFactories +FloatCoder: + methods: + - consistentWithEquals + - decode + - encode + - getEncodedTypeDescriptor + - isRegisterByteSizeObserverCheap + - of + - verifyDeterministic +FluentBackoff: + methods: + - backoff + - nextBackOffMillis + - reset + - toString + - withExponent + - withInitialBackoff + - withMaxBackoff + - withMaxCumulativeBackoff + - withMaxRetries + properties: + - DEFAULT +FnApiDoFnRunner: + methods: + - accept + - align + - bundleFinalizer + - clear + - createRunnerForPTransform + - element + - finishBundleContext + - fireTimestamp + - forRoots + - get + - getCurrentRelativeTime + - getDownstreamSplit + - getErrorContext + - getNewWindowStopIndex + - getPTransformRunnerFactories + - getPipelineOptions + - getPrimaryInFullyProcessedWindowsRoot + - getPrimarySplitRoot + - getProgress + - getResidualInUnprocessedWindowsRoot + - getResidualSplitRoot + - getWindowSplit + - key + - of + - offset + - onClaimFailed + - onClaimed + - onTimerContext + - output + - outputReceiver + - outputRowReceiver + - outputWithTimestamp + - pane + - paneInfo + - pipelineOptions + - processContext + - reset + - restriction + - restrictionTracker + - schemaElement + - set + - setRelative + - sideInput + - startBundleContext + - state + - taggedOutputReceiver + - timeDomain + - timer + - timerFamily + - timerId + - timestamp + - trySplit + - updateFinalMonitoringData + - updateIntermediateMonitoringData + - watermarkEstimator + - watermarkEstimatorState + - window + - withNoOutputTimestamp + - withOutputTimestamp +FnApiStateAccessor: + methods: + - add + - addAccum + - addIfAbsent + - apply + - bindBag + - bindCombining + - bindCombiningWithContext + - bindMap + - bindOrderedList + - bindSet + - bindValue + - bindWatermark + - clear + - computeIfAbsent + - contains + - entries + - finalizeState + - get + - getAccum + - getOrDefault + - getPipelineOptions + - isEmpty + - keys + - mergeAccumulators + - put + - read + - readLater + - remove + - sideInput + - values + - window + - write +FnApiTimerBundleTracker: + methods: + - get + - getBundleModifications + - getModifiedEventTimersOrdered + - getModifiedProcessingTimersOrdered + - getModifiedSynchronizedProcessingTimersOrdered + - getModifiedTimerIds + - getModifiedTimersOrdered + - getTimeDomain + - getTimer + - getTimerFamilyOrId + - of + - outputTimers + - reset + - timerModified +FnHarness: + methods: + - apply + - main +Fold: + methods: + - of +ForwardingClientResponseObserver: + methods: + - beforeStart + - create + - onCompleted + - onError + - onNext +FullJoin: + methods: + - by + - named + - of + - using +FullNameTableProvider: + methods: + - buildBeamSqlTable + - getSubProvider + - getTable + - getTableByFullName + - getTableType + - registerKnownTableNames +GaugeResult: + methods: + - create + - empty + - getTimestamp + - getValue +GceMetadataUtil: + methods: + - fetchDataflowJobId +GcpCredentialFactory: + methods: + - fromOptions + - getCredential +GcpIoPipelineOptionsRegistrar: + methods: + - getPipelineOptions +GcpPipelineOptionsRegistrar: + methods: + - getPipelineOptions +GcsCreateOptions: + methods: + - build + - builder + - gcsUploadBufferSizeBytes + - setGcsUploadBufferSizeBytes +GcsFileSystemRegistrar: + methods: + - fromOptions +GcsPath: + methods: + - compareTo + - endsWith + - equals + - fromComponents + - fromObject + - fromResourceName + - fromUri + - getBucket + - getFileName + - getFileSystem + - getName + - getNameCount + - getObject + - getParent + - getRoot + - hasNext + - hashCode + - isAbsolute + - iterator + - next + - normalize + - register + - relativize + - remove + - resolve + - resolveSibling + - setFileSystem + - startsWith + - subpath + - toAbsolutePath + - toFile + - toRealPath + - toResourceName + - toString + - toUri + properties: + - GCS_URI + - SCHEME +GcsPathValidator: + methods: + - fromOptions + - validateInputFilePatternSupported + - validateOutputFilePrefixSupported + - validateOutputResourceSupported + - verifyPath +GcsResourceId: + methods: + - equals + - getCurrentDirectory + - getFilename + - getScheme + - hashCode + - isDirectory + - resolve + - toString +GcsUtil: + methods: + - bucketAccessible + - bucketOwner + - build + - builder + - copy + - create + - createBucket + - enqueue + - execute + - expand + - fileSize + - getContentType + - getExpectFileToNotExist + - getFrom + - getLastError + - getNonWildcardPrefix + - getObject + - getObjects + - getReadyToEnqueue + - getTo + - getUploadBufferSizeBytes + - ioException + - isWildcard + - listObjects + - onFailure + - onSuccess + - open + - queue + - remove + - rename + - setContentType + - setExpectFileToNotExist + - setUploadBufferSizeBytes + - shouldRetry + - size + - storageObject + - verifyBucketAccessible +GenerateSequence: + methods: + - buildExternal + - expand + - from + - knownBuilders + - populateDisplayData + - setElementsPerPeriod + - setMaxReadTime + - setPeriod + - setStart + - setStop + - to + - withMaxReadTime + - withRate + - withTimestampFn + properties: + - URN +GenerateSequenceTableProvider: + methods: + - buildBeamSqlTable + - getTableType +Generator: + methods: + - compareTo + - copy + - currentInterEventDelayUs + - equals + - getCurrentConfig + - getFractionConsumed + - getNextEventId + - hasNext + - hashCode + - next + - nextEvent + - remove + - splitAtEventId + - toCheckpoint + - toString + - withDelay + properties: + - event + - eventTimestamp + - wallclockTimestamp + - watermark +GeneratorCheckpoint: + methods: + - decode + - encode + - finalizeCheckpoint + - toGenerator + - toString + - verifyDeterministic + properties: + - CODER_INSTANCE +GeneratorConfig: + methods: + - copy + - copyWith + - estimatedBytesForEvents + - getAvgAuctionByteSize + - getAvgBidByteSize + - getAvgPersonByteSize + - getEstimatedSizeBytes + - getHotAuctionRatio + - getHotBiddersRatio + - getHotSellersRatio + - getNumActivePeople + - getNumInFlightAuctions + - getOccasionalDelaySec + - getProbDelayedEvent + - getStartEventId + - getStopEventId + - nextAdjustedEventNumber + - nextEventNumber + - nextEventNumberForWatermark + - split + - timestampAndInterEventDelayUsForEvent + - toString + properties: + - AUCTION_PROPORTION + - FIRST_AUCTION_ID + - FIRST_CATEGORY_ID + - FIRST_PERSON_ID + - PERSON_PROPORTION + - PROPORTION_DENOMINATOR + - baseTime + - firstEventId + - firstEventNumber + - maxEvents +GenericDlq: + methods: + - getDlqTransform +GenericTranslatorProvider: + methods: + - build + - createWithDefaultTranslators + - findTranslator + - newBuilder + - register +GetterBasedSchemaProvider: + methods: + - apply + - create + - equals + - fieldValueGetters + - fieldValueTypeInformations + - fromRowFunction + - get + - getRaw + - hashCode + - name + - schemaTypeCreator + - toRowFunction +GetterBasedSchemaProviderBenchmark: + methods: + - processArrayOfNestedStringField + - processArrayOfStringField + - processByteBufferField + - processBytesField + - processDateTimeField + - processIntField + - processMapOfIntField + - processMapOfNestedIntField + - processNestedBytesField + - processNestedIntField + - processStringBuilderField + - processStringField +GlobalWindow: + methods: + - consistentWithEquals + - decode + - encode + - equals + - getCoderArguments + - hashCode + - maxTimestamp + - verifyDeterministic + properties: + - INSTANCE +GlobalWindows: + methods: + - assignWindows + - assignsToOneWindow + - equals + - getDefaultWindowMappingFn + - getSideInputWindow + - hashCode + - isCompatible + - toString + - verifyCompatibility + - windowCoder +Group: + methods: + - aggregate + - aggregateField + - aggregateFieldBaseValue + - aggregateFields + - aggregateFieldsById + - byFieldAccessDescriptor + - byFieldIds + - byFieldNames + - create + - expand + - getToKvs + - globally + - process + - witValueField + - withKeyField + - withValueField +GroupByKey: + methods: + - applicableTo + - create + - expand + - fewKeys + - getInputValueCoder + - getKeyCoder + - getOutputKvCoder + - populateDisplayData + - updateWindowingStrategy + - validate +GroupByKeyLoadTest: + methods: + - main + - processElement +GroupIntoBatches: + methods: + - apply + - create + - expand + - getBatchSize + - getBatchSizeBytes + - getBatchingParams + - getElementByteSize + - getMaxBufferingDuration + - getWeigher + - identity + - ofByteSize + - ofSize + - onBufferingTimer + - onWindowExpiration + - onWindowTimer + - processElement + - withMaxBufferingDuration + - withShardedKey +GrowableOffsetRangeTracker: + methods: + - getProgress + - isBounded + - trySplit +GrpcContextHeaderAccessorProvider: + methods: + - getHeaderAccessor + - getSdkWorkerId + - interceptCall + - interceptor +GrpcFnServer: + methods: + - allocatePortAndCreateFor + - close + - create + - getApiServiceDescriptor + - getServer + - getService +HBaseCoderProviderRegistrar: + methods: + - getCoderProviders +HBaseIO: + methods: + - advance + - close + - createReader + - equals + - expand + - finishBundle + - getConfiguration + - getCurrent + - getCurrentSource + - getEstimatedSizeBytes + - getFractionConsumed + - getKeyRange + - getOutputCoder + - getScan + - getSplitPointsConsumed + - getTableId + - hashCode + - populateDisplayData + - processElement + - read + - readAll + - setup + - split + - splitAtFraction + - start + - startBundle + - tearDown + - validate + - withConfiguration + - withFilter + - withKeyRange + - withScan + - withTableId + - write +HCatToRow: + methods: + - expand + - fromSpec + - processElement +HCatalogBeamSchema: + methods: + - create + - getTableSchema + - hasDatabase +HCatalogIO: + methods: + - advance + - close + - createReader + - expand + - finishBundle + - getCurrent + - getCurrentSource + - getEstimatedSizeBytes + - getOutputCoder + - initiateWrite + - populateDisplayData + - processElement + - read + - split + - start + - startBundle + - tearDown + - withBatchSize + - withConfigProperties + - withDatabase + - withFilter + - withPartition + - withPartitionCols + - withPollingInterval + - withTable + - withTerminationCondition + - write +HCatalogTable: + methods: + - buildIOReader + - buildIOWriter + - config + - database + - getSchema + - isBounded + - schema + - table +HCatalogUtils: {} +HDFSSynchronization: + methods: + - acquireTaskAttemptIdLock + - acquireTaskIdLock + - releaseJobIdLock + - tryAcquireJobLock +HL7v2IO: + methods: + - expand + - finishSpecifyingOutput + - getAll + - getEarliestToLatestRestriction + - getFailedInsertsWithErr + - getFailedReads + - getMessages + - getPipeline + - ingestMessages + - initClient + - instantiateHealthcareClient + - listMessages + - of + - processElement + - read + - readAll + - readAllWithFilter + - readWithFilter + - split + - withInitialSplitDuration + - writeMessages + properties: + - DEAD_LETTER + - FAILED + - OUT + - SUCCESS +HL7v2Message: + methods: + - fromModel + - getCreateTime + - getData + - getLabels + - getMessageType + - getName + - getSchematizedData + - getSendFacility + - getSendTime + - setSchematizedData + - toModel + - toString +HL7v2MessageCoder: + methods: + - decode + - encode + - of +HadoopFileSystemModule: + methods: + - deserialize + - serialize +HadoopFileSystemOptionsRegistrar: + methods: + - getPipelineOptions +HadoopFileSystemRegistrar: + methods: + - fromOptions +HadoopFormatIO: + methods: + - advance + - close + - createReader + - decode + - encode + - expand + - finishBundle + - getConfiguration + - getCurrent + - getCurrentSource + - getDefaultCoder + - getEstimatedSizeBytes + - getFractionConsumed + - getKeyCoder + - getKeyTranslationFunction + - getKeyTypeDescriptor + - getOutputCoder + - getSkipKeyClone + - getSkipValueClone + - getSplit + - getSplitPointsRemaining + - getValueCoder + - getValueTranslationFunction + - getValueTypeDescriptor + - getinputFormatClass + - getinputFormatKeyClass + - getinputFormatValueClass + - populateDisplayData + - processElement + - read + - setup + - split + - start + - startBundle + - toBuilder + - toString + - validate + - validateTransform + - withConfiguration + - withConfigurationTransform + - withExternalSynchronization + - withKeyTranslation + - withPartitioning + - withSkipKeyClone + - withSkipValueClone + - withValueTranslation + - withoutPartitioning + - write + properties: + - JOB_ID + - NUM_REDUCES + - OUTPUT_DIR + - OUTPUT_FORMAT_CLASS_ATTR + - OUTPUT_KEY_CLASS + - OUTPUT_VALUE_CLASS + - PARTITIONER_CLASS_ATTR +HarnessMonitoringInfosInstructionHandler: + methods: + - harnessMonitoringInfos +HarnessStreamObserverFactories: + methods: + - fromOptions +HealthcareIOError: + methods: + - getDataResource + - getErrorMessage + - getObservedTime + - getStackTrace + - getStatusCode +HealthcareIOErrorCoder: + methods: + - decode + - encode + - of +HealthcareIOErrorToTableRow: + methods: + - apply + properties: + - TABLE_FIELD_SCHEMAS + - TIMESTAMP_FIELD_NAME +HeartbeatRecord: + methods: + - equals + - getRecordTimestamp + - getTimestamp + - hashCode + - toString +HeartbeatRecordAction: + methods: + - run +HistogramData: + methods: + - clear + - equals + - getAccumulatedBucketSize + - getBottomBucketCount + - getBucketIndex + - getBucketSize + - getBucketType + - getCount + - getNumBuckets + - getPercentileString + - getRangeFrom + - getRangeTo + - getStart + - getTopBucketCount + - getTotalCount + - getWidth + - hashCode + - incBottomBucketCount + - incBucketCount + - incTopBucketCount + - linear + - of + - p50 + - p90 + - p99 + - record + - update +HllCount: + methods: + - expand + - forBytes + - forIntegers + - forLongs + - forStrings + - getSketchFromByteBuffer + - globally + - perKey + - processElement + - withPrecision + properties: + - DEFAULT_PRECISION + - MAXIMUM_PRECISION + - MINIMUM_PRECISION +HttpClientConfiguration: + methods: + - build + - builder + - connectionAcquisitionTimeout + - connectionMaxIdleTime + - connectionTimeToLive + - connectionTimeout + - maxConnections + - readTimeout + - socketTimeout + - writeTimeout +HttpHealthcareApiClient: + methods: + - createDicomStore + - createFhirStore + - createHL7v2Message + - createHL7v2Store + - deidentifyFhirStore + - deleteDicomStore + - deleteFhirStore + - deleteHL7v2Message + - deleteHL7v2Store + - executeFhirBundle + - exportFhirResourceToBigQuery + - exportFhirResourceToGcs + - getEarliestHL7v2SendTime + - getEnd + - getHL7v2Message + - getHL7v2Store + - getJsonFactory + - getLatestHL7v2SendTime + - getPatientEverything + - getStart + - hasNext + - importFhirResource + - ingestHL7v2Message + - initialize + - iterator + - listAllFhirStores + - makeHL7v2ListRequest + - makeListRequest + - makeSendTimeBoundHL7v2ListRequest + - next + - ofPatientEverything + - ofSearch + - pollOperation + - readFhirResource + - retrieveDicomStudyMetadata + - searchFhirResource + - uploadToDicomStore +IOITMetrics: + methods: + - publishToInflux +IOUtils: + methods: + - forEach +IdGenerators: + methods: + - decrementingLongs + - incrementingLongs +IdNameReserve: + methods: + - decode + - encode + - equals + - hashCode + - sizeInBytes + - structuralValue + - toString + - verifyDeterministic + properties: + - CODER +IdentityWindowFn: + methods: + - assignWindows + - assignsToOneWindow + - getDefaultWindowMappingFn + - isCompatible + - verifyCompatibility + - windowCoder +IllegalMutationException: + methods: + - getNewValue + - getSavedValue +Impulse: + methods: + - create + - expand +InMemoryMetaStore: + methods: + - buildBeamSqlTable + - createTable + - dropTable + - getTableType + - getTables + - registerProvider +InMemoryMetaTableProvider: + methods: + - createTable + - dropTable + - getTables +InProcessServerFactory: + methods: + - allocateAddressAndCreate + - create +IncompatibleWindowException: + methods: + - getMessage +InferableFunction: + methods: + - apply + - fromProcessFunctionWithOutputType + - getInputTypeDescriptor + - getOutputTypeDescriptor + - populateDisplayData +InfluxDBPublisher: + methods: + - dataPoint + - fields + - measurement + - publish + - publishNexmarkResults + - publishWithSettings + - tags + - timestamp + - timestampUnit + - toString +InfluxDBSettings: + methods: + - builder + - copyWithMeasurement + - get + - withDatabase + - withHost + - withMeasurement + - withRetentionPolicy + properties: + - database + - host + - measurement + - retentionPolicy + - userName + - userPassword +InfluxDbIO: + methods: + - advance + - checkClientTrusted + - checkServerTrusted + - close + - create + - createReader + - expand + - finishBundle + - getAcceptedIssuers + - getConnection + - getCurrent + - getCurrentSource + - getEstimatedSizeBytes + - getOutputCoder + - populateDisplayData + - processElement + - read + - setDisableCertificateValidation + - split + - start + - startBundle + - tearDown + - validate + - withBatchSize + - withConsistencyLevel + - withDataSourceConfiguration + - withDatabase + - withDisableCertificateValidation + - withFromDateTime + - withMetric + - withQuery + - withRetentionPolicy + - withToDateTime + - write +InitialPartition: + methods: + - isInitialPartition + properties: + - PARENT_TOKENS + - PARTITION_TOKEN +InitializeDoFn: + methods: + - processElement +InsertRetryPolicy: + methods: + - alwaysRetry + - getInsertErrors + - neverRetry + - retryTransientErrors + - shouldRetry +InstanceBuilder: + methods: + - build + - fromClass + - fromClassName + - fromFactoryMethod + - ofType + - withArg +InstanceUtils: + methods: + - create + - forName +InstantCoder: + methods: + - consistentWithEquals + - decode + - encode + - getEncodedTypeDescriptor + - isRegisterByteSizeObserverCheap + - of + - verifyDeterministic +InstantDeserializer: + methods: + - close + - configure + - deserialize +InstantSerializer: + methods: + - close + - configure + - serialize +IntervalWindow: + methods: + - compareTo + - consistentWithEquals + - contains + - decode + - encode + - end + - equals + - getCoder + - getCoderArguments + - hashCode + - intersects + - isDisjoint + - isRegisterByteSizeObserverCheap + - maxTimestamp + - of + - registerByteSizeObserver + - span + - start + - toString + - verifyDeterministic +InvalidConfigurationException: {} +InvalidLocationException: {} +InvalidSchemaException: {} +InvalidTableException: {} +IsInf: + methods: + - isInf +IsNan: + methods: + - isNan +IterableCoder: + methods: + - getEncodedTypeDescriptor + - of + - structuralValue +IterableLikeCoder: + methods: + - decode + - encode + - getCoderArguments + - getElemCoder + - isRegisterByteSizeObserverCheap + - registerByteSizeObserver + - update + - verifyDeterministic +IterableSideInput: + methods: + - get +JAXBCoder: + methods: + - close + - decode + - encode + - equals + - getEncodedTypeDescriptor + - getJAXBClass + - hashCode + - of + - write +JavaBeanSchema: + methods: + - create + - equals + - fieldValueGetters + - fieldValueTypeInformations + - get + - hashCode + - schemaFor + - schemaTypeCreator + properties: + - INSTANCE +JavaBeanUtils: + methods: + - appender + - createConstructorCreator + - createGetter + - createSetter + - createStaticCreator + - getConstructorCreator + - getFieldTypes + - getGetters + - getSetters + - getStaticCreator + - prepare + - schemaFromJavaBeanClass + - validateJavaBean + properties: + - CACHED_CREATORS +JavaFieldSchema: + methods: + - fieldValueGetters + - fieldValueTypeInformations + - get + - schemaFor + - schemaTypeCreator + properties: + - INSTANCE +JavaUdfLoader: + methods: + - createClassLoader + - loadAggregateFunction + - loadScalarFunction + - run +JdbcConnection: + methods: + - getCurrentSchemaPlus + - getPipelineOptions + - setPipelineOptions + - setPipelineOptionsMap +JdbcDriver: + methods: + - connect + properties: + - CONNECT_STRING_PREFIX + - INSTANCE + - OBJECT_MAPPER +JdbcIO: + methods: + - apply + - create + - expand + - finish + - finishBundle + - of + - populateDisplayData + - process + - processElement + - read + - readAll + - readRows + - readWithPartitions + - setParameters + - setup + - tearDown + - withAutoSharding + - withBatchSize + - withCoder + - withConnectionInitSqls + - withConnectionProperties + - withDataSourceConfiguration + - withDataSourceProviderFn + - withDriverClassLoader + - withFetchSize + - withLowerBound + - withNumPartitions + - withOutputParallelization + - withParameterSetter + - withPartitionColumn + - withPassword + - withPreparedStatementSetter + - withQuery + - withResults + - withRetryConfiguration + - withRetryStrategy + - withRowMapper + - withRowOutput + - withStatement + - withStatementPreparator + - withTable + - withUpperBound + - withUsername + - withWriteResults + - write + - writeVoid +JdbcSchemaIOProvider: + methods: + - buildReader + - buildWriter + - configurationSchema + - expand + - from + - identifier + - isBounded + - requiresDataSchema + - schema +JdbcWriteResult: + methods: + - create +JmsIO: + methods: + - advance + - close + - createReader + - expand + - getCheckpointMark + - getCheckpointMarkCoder + - getCurrent + - getCurrentSource + - getCurrentTimestamp + - getOutputCoder + - getTotalBacklogBytes + - getWatermark + - mapMessage + - populateDisplayData + - processElement + - read + - readMessage + - setup + - split + - start + - teardown + - withAutoScaler + - withCloseTimeout + - withCoder + - withConnectionFactory + - withMaxNumRecords + - withMaxReadTime + - withMessageMapper + - withPassword + - withQueue + - withTopic + - withTopicNameMapper + - withUsername + - withValueMapper + - write +JmsIOException: {} +JmsRecord: + methods: + - equals + - getJmsCorrelationID + - getJmsDeliveryMode + - getJmsDestination + - getJmsExpiration + - getJmsMessageID + - getJmsPriority + - getJmsRedelivered + - getJmsReplyTo + - getJmsTimestamp + - getJmsType + - getPayload + - getProperties + - hashCode +Join: + methods: + - accumulationMode + - by + - expand + - fullOuterJoin + - getJoiner + - getLeftKeyExtractor + - getRightKeyExtractor + - getType + - innerBroadcastJoin + - innerJoin + - left + - leftOuterBroadcastJoin + - leftOuterJoin + - named + - of + - on + - output + - outputValues + - processElement + - right + - rightOuterJoin + - triggeredBy + - using + - windowBy + - with + - withAllowedLateness + - withOnTimeBehavior + - withTimestampCombiner + properties: + - LHS_TAG + - RHS_TAG +JoinRelOptRuleCall: + methods: + - builder + - getChildRels + - getMetadataQuery + - getOperand0 + - getParents + - getPlanner + - getRelList + - getRule + - rel + - transformTo +JoinTranslator: + methods: + - getFnName + - processElement +JsonArrayCoder: + methods: + - decode + - encode + - of +JsonMatcher: + methods: + - describeTo + - jsonBytesLike + - jsonStringLike +JsonPayloadSerializerProvider: + methods: + - getSerializer + - identifier +JsonToRow: + methods: + - build + - create + - expand + - finishSpecifyingOutput + - getFailedToParseLines + - getJsonToRowWithErrFn + - getPipeline + - getResults + - processElement + - resultBuilder + - setErrorField + - setJsonToRowWithErrFn + - setLineField + - toBuilder + - withExceptionReporting + - withExtendedErrorInfo + - withNullBehavior + - withSchema + - withSchemaAndNullBehavior + properties: + - ERROR_ROW_SCHEMA + - ERROR_ROW_WITH_ERR_MSG_SCHEMA +JsonUtils: + methods: + - apply + - getJsonBytesToRowFunction + - getJsonStringToRowFunction + - getRowToJsonBytesFunction + - getRowToJsonStringsFunction +JvmInitializers: + methods: + - runBeforeProcessing + - runOnStartup +KV: + methods: + - compare + - equals + - getKey + - getValue + - hashCode + - of + - toString +KafkaCheckpointMark: + methods: + - finalizeCheckpoint + - getNextOffset + - getPartition + - getPartitions + - getTopic + - getWatermarkMillis + - toString +KafkaCommitOffset: + methods: + - expand + - processElement +KafkaIO: + methods: + - apply + - buildExternal + - commitOffsets + - commitOffsetsInFinalize + - decode + - encode + - expand + - externalWithMetadata + - getReplacementTransform + - getTimestamp + - knownBuilders + - mapOutputs + - populateDisplayData + - processElement + - read + - readBytes + - readSourceDescriptors + - setCommitOffsetInFinalize + - setConsumerConfig + - setKeyDeserializer + - setKeySerializer + - setMaxNumRecords + - setMaxReadTime + - setProducerConfig + - setStartReadTime + - setStopReadTime + - setTimestampPolicy + - setTopic + - setTopics + - setValueDeserializer + - setValueSerializer + - toExternalKafkaRecord + - updateConsumerProperties + - updateProducerProperties + - useCreateTime + - useLogAppendTime + - useProcessingTime + - validate + - values + - withBootstrapServers + - withCheckStopReadingFn + - withConsumerConfigOverrides + - withConsumerConfigUpdates + - withConsumerFactoryFn + - withCreatWatermarkEstimatorFn + - withCreateTime + - withDynamicRead + - withEOS + - withExtractOutputTimestampFn + - withInputTimestamp + - withKeyDeserializer + - withKeyDeserializerAndCoder + - withKeyDeserializerProvider + - withKeySerializer + - withLogAppendTime + - withManualWatermarkEstimator + - withMaxNumRecords + - withMaxReadTime + - withMonotonicallyIncreasingWatermarkEstimator + - withOffsetConsumerConfigOverrides + - withProcessingTime + - withProducerConfigUpdates + - withProducerFactoryFn + - withPublishTimestampFunction + - withReadCommitted + - withStartReadTime + - withStopReadTime + - withTimestampFn + - withTimestampFn2 + - withTimestampPolicyFactory + - withTopic + - withTopicPartitions + - withTopics + - withValueDeserializer + - withValueDeserializerAndCoder + - withValueDeserializerProvider + - withValueSerializer + - withWallTimeWatermarkEstimator + - withWatermarkFn + - withWatermarkFn2 + - withoutMetadata + - write + - writeRecords + properties: + - KAFKA_READ_OVERRIDE + - URN + - URN_WITHOUT_METADATA + - URN_WITH_METADATA +KafkaRecord: + methods: + - equals + - getHeaders + - getKV + - getOffset + - getPartition + - getTimestamp + - getTimestampType + - getTopic + - hashCode +KafkaRecordCoder: + methods: + - consistentWithEquals + - decode + - encode + - getCoderArguments + - isRegisterByteSizeObserverCheap + - of + - structuralValue + - verifyDeterministic +KafkaSchemaTransformReadConfiguration: + methods: + - build + - builder + - getAutoOffsetResetConfig + - getAvroSchema + - getBootstrapServers + - getConfluentSchemaRegistrySubject + - getConfluentSchemaRegistryUrl + - getConsumerConfigUpdates + - getDataFormat + - getTopic + - setAutoOffsetResetConfig + - setAvroSchema + - setBootstrapServers + - setConfluentSchemaRegistrySubject + - setConfluentSchemaRegistryUrl + - setConsumerConfigUpdates + - setDataFormat + - setTopic + - validate + properties: + - VALID_DATA_FORMATS + - VALID_START_OFFSET_VALUES +KafkaSchemaTransformReadProvider: + methods: + - buildTransform + - expand + - identifier + - inputCollectionNames + - outputCollectionNames +KafkaSourceConsumerFn: + methods: + - checkDone + - configs + - currentRestriction + - exists + - getHashCode + - getInitialRestriction + - getRestrictionCoder + - isBounded + - newTracker + - offset + - offsetStorageReader + - offsets + - process + - start + - storageExists + - tryClaim + - trySplit + properties: + - BEAM_INSTANCE_PROPERTY + - fetchedRecords + - history + - maxRecords + - minutesToRun + - offset +KafkaSourceDescriptor: + methods: + - getTopicPartition + - of +KafkaTableProvider: + methods: + - buildBeamSqlTable + - getTableType +KeyPairUtils: + methods: + - preparePrivateKey + - readPrivateKeyFile +KeyedPCollectionTuple: + methods: + - and + - apply + - empty + - expand + - getCoGbkResultSchema + - getCollection + - getKeyCoder + - getKeyedCollections + - getPipeline + - getTupleTag + - isEmpty + - of +Keys: + methods: + - apply + - create + - expand +KinesisClientThrottledException: {} +KinesisIO: + methods: + - addPutRecordsRequest + - build + - builder + - close + - expand + - finishBundle + - finishSpecifyingOutput + - getPipeline + - maxBufferedTime + - maxBytes + - processElement + - read + - readData + - refreshPeriodically + - setup + - shardAwareHashKey + - shardRefreshInterval + - startBundle + - teardown + - withAWSClientsProvider + - withArrivalTimeWatermarkPolicy + - withBatchMaxBytes + - withBatchMaxRecords + - withClientConfiguration + - withConcurrentRequests + - withCustomRateLimitPolicy + - withCustomWatermarkPolicy + - withDynamicDelayRateLimitPolicy + - withFixedDelayRateLimitPolicy + - withInitialPositionInStream + - withInitialTimestampInStream + - withMaxCapacityPerShard + - withMaxNumRecords + - withMaxReadTime + - withPartitionKey + - withPartitioner + - withProcessingTimeWatermarkPolicy + - withProducerProperties + - withRecordAggregation + - withRecordAggregationDisabled + - withRequestRecordsLimit + - withSerializer + - withStreamName + - withUpToDateThreshold + - write +KinesisRecord: + methods: + - equals + - getApproximateArrivalTimestamp + - getData + - getDataAsBytes + - getExtendedSequenceNumber + - getPartitionKey + - getReadTime + - getSequenceNumber + - getShardId + - getStreamName + - getSubSequenceNumber + - getUniqueId + - hashCode + - toString +KinesisTransformRegistrar: + methods: + - buildExternal + - knownBuilderInstances + - setAwsAccessKey + - setAwsSecretKey + - setInitialPositionInStream + - setInitialTimestampInStream + - setMaxCapacityPerShard + - setMaxNumRecords + - setMaxReadTime + - setPartitionKey + - setProducerProperties + - setRateLimit + - setRegion + - setRequestRecordsLimit + - setServiceEndpoint + - setStreamName + - setUpToDateThreshold + - setVerifyCertificate + - setWatermarkIdleDurationThreshold + - setWatermarkPolicy + properties: + - READ_DATA_URN + - WRITE_URN +KryoCoder: + methods: + - decode + - encode + - equals + - hashCode + - of + - verifyDeterministic + - withRegistrar +KryoCoderProvider: + methods: + - coderFor + - of + - registerTo + - withRegistrar +KuduIO: + methods: + - createReader + - expand + - finishBundle + - getEstimatedSizeBytes + - getOutputCoder + - populateDisplayData + - processElement + - read + - setup + - split + - startBundle + - teardown + - validate + - withBatchSize + - withCoder + - withFaultTolerent + - withFormatFn + - withMasterAddresses + - withParseFn + - withPredicates + - withProjectedColumns + - withTable + - write +KvCoder: + methods: + - consistentWithEquals + - decode + - encode + - getCoderArguments + - getEncodedTypeDescriptor + - getKeyCoder + - getValueCoder + - isRegisterByteSizeObserverCheap + - of + - registerByteSizeObserver + - structuralValue + - verifyDeterministic +KvSwap: + methods: + - apply + - create + - expand +LatencyRecordingHttpRequestInitializer: + methods: + - initialize + - intercept + - interceptResponse + properties: + - HISTOGRAM_BUCKET_TYPE +Latest: + methods: + - addInput + - combineFn + - createAccumulator + - expand + - extractOutput + - getAccumulatorCoder + - getDefaultOutputCoder + - globally + - mergeAccumulators + - perKey +LazyAggregateCombineFn: + methods: + - addInput + - createAccumulator + - extractOutput + - getAccumTVariable + - getAccumulatorCoder + - getUdafImpl + - iterator + - mergeAccumulators + - toString +LeftJoin: + methods: + - by + - named + - of + - using +LengthPrefixCoder: + methods: + - consistentWithEquals + - decode + - encode + - getCoderArguments + - getValueCoder + - isRegisterByteSizeObserverCheap + - of + - verifyDeterministic +ListCoder: + methods: + - consistentWithEquals + - getEncodedTypeDescriptor + - of + - structuralValue + - verifyDeterministic +LoadTestResult: + methods: + - getRuntime + - getTotalBytesCount + - toMap +LocalFileSystemRegistrar: + methods: + - fromOptions +LocalResources: + methods: + - fromFile + - fromPath + - fromString +LogicalCalcMergeRule: + methods: + - onMatch + properties: + - INSTANCE +LogicalEndpoint: + methods: + - data + - getInstructionId + - getTimerFamilyId + - getTransformId + - isTimer + - timer +LongGenerator: + methods: + - nextLong +LzoCompression: + methods: + - createLzoInputStream + - createLzoOutputStream + - createLzopInputStream + - createLzopOutputStream +Main: + methods: + - call + - main +ManagedChannelFactory: + methods: + - createDefault + - createEpoll + - createInProcess + - forDescriptor + - withDirectExecutor + - withInterceptors +ManagedFactoryImpl: + methods: + - close + - create +MapCoder: + methods: + - consistentWithEquals + - decode + - encode + - getCoderArguments + - getEncodedTypeDescriptor + - getKeyCoder + - getValueCoder + - of + - registerByteSizeObserver + - structuralValue + - verifyDeterministic +MapElements: + methods: + - exceptionsInto + - exceptionsVia + - expand + - getInputTypeDescriptor + - getMapper + - getOutputTypeDescriptor + - getTypeDescriptor + - into + - named + - of + - output + - populateDisplayData + - processElement + - using + - via +MapFnRunners: + methods: + - create + - createRunnerForPTransform + - forValueMapFnFactory + - forWindowedValueMapFnFactory +MapKeys: + methods: + - exceptionsInto + - exceptionsVia + - expand + - into + - via +MapValues: + methods: + - exceptionsInto + - exceptionsVia + - expand + - into + - via +MapperFactory: + methods: + - changeStreamRecordMapper + - partitionMetadataMapper +MappingUtils: + methods: + - registerStreamingPlugin +MatchResult: + methods: + - build + - builder + - checksum + - create + - isReadSeekEfficient + - lastModifiedMillis + - metadata + - resourceId + - setChecksum + - setIsReadSeekEfficient + - setLastModifiedMillis + - setResourceId + - setSizeBytes + - sizeBytes + - status + - unknown +Materializations: + methods: + - getUrn + - iterable + - multimap + properties: + - ITERABLE_MATERIALIZATION_URN + - MULTIMAP_MATERIALIZATION_URN +Max: + methods: + - apply + - doublesGlobally + - doublesPerKey + - globally + - identity + - integersGlobally + - integersPerKey + - longsGlobally + - longsPerKey + - naturalOrder + - of + - ofDoubles + - ofIntegers + - ofLongs + - perKey + - populateDisplayData +Mean: + methods: + - addInput + - createAccumulator + - decode + - encode + - equals + - extractOutput + - getAccumulatorCoder + - globally + - hashCode + - mergeAccumulator + - of + - perKey + - toString + - verifyDeterministic +MemoryMonitor: + methods: + - describeMemory + - dumpHeap + - fromOptions + - isThrashing + - run + - stop + - totalGCTimeMilliseconds + - tryToDumpHeap + - waitForResources + - waitForThrashingState + properties: + - DEFAULT_SLEEP_TIME_MILLIS +MergeOverlappingIntervalWindows: + methods: + - add + - apply + - intersects + - mergeWindows + - toString +MetadataCoder: + methods: + - consistentWithEquals + - decode + - encode + - of +MetadataCoderV2: + methods: + - consistentWithEquals + - decode + - encode + - of +MetricFiltering: + methods: + - matches + - matchesScope + - subPathMatches +MetricKey: + methods: + - create + - metricName + - stepName + - toString +MetricName: + methods: + - getName + - getNamespace + - named + - toString +MetricNameFilter: + methods: + - getName + - getNamespace + - inNamespace + - named +MetricQueryResults: + methods: + - create + - getCounters + - getDistributions + - getGauges + - toString +MetricResult: + methods: + - addAttempted + - addCommitted + - attempted + - create + - getAttempted + - getCommitted + - getCommittedOrNull + - getKey + - getName + - hasCommitted + - transform +MetricResults: + methods: + - allMetrics + - queryMetrics + - toString +Metrics: + methods: + - bundleProcessingThreadCounter + - bundleProcessingThreadDistribution + - counter + - dec + - distribution + - gauge + - getName + - inc + - reset + - set + - update + - updateFinalMonitoringData + - updateIntermediateMonitoringData +MetricsBenchmark: + methods: + - check + - testBundleProcessingThreadCounterMutation + - testBundleProcessingThreadCounterReset + - testCounterCellMutation + - testCounterCellReset + properties: + - bundleCounter + - counterCell +MetricsEnvironment: + methods: + - activate + - close + - getCurrentContainer + - getMetricsEnvironmentStateForCurrentThread + - getProcessWideContainer + - isMetricsSupported + - scopedMetricsContainer + - setCurrentContainer + - setMetricsSupported + - setProcessWideContainer +MetricsFilter: + methods: + - addNameFilter + - addStep + - build + - builder + - names + - steps +MetricsReader: + methods: + - getCounterMetric + - getEndTimeMetric + - getStartTimeMetric + - ofResults + - readAll + - withNamespace +MicrosInstant: + methods: + - getArgumentType + - getBaseType + - getIdentifier + - toBaseType + - toInputType + properties: + - IDENTIFIER +MimeTypes: + properties: + - BINARY + - TEXT +Min: + methods: + - apply + - doublesGlobally + - doublesPerKey + - globally + - identity + - integersGlobally + - integersPerKey + - longsGlobally + - longsPerKey + - naturalOrder + - of + - ofDoubles + - ofIntegers + - ofLongs + - perKey + - populateDisplayData +Mod: + methods: + - equals + - getKeysJson + - getNewValuesJson + - getOldValuesJson + - hashCode + - toString +MongoDbGridFSIO: + methods: + - advance + - close + - createReader + - expand + - finishBundle + - getAllowedTimestampSkew + - getCurrent + - getCurrentSource + - getCurrentTimestamp + - getEstimatedSizeBytes + - getOutputCoder + - output + - populateDisplayData + - processElement + - read + - setup + - split + - start + - startBundle + - teardown + - validate + - withBucket + - withChunkSize + - withCoder + - withDatabase + - withFilename + - withFilter + - withParser + - withSkew + - withUri + - write +MongoDbIO: + methods: + - advance + - close + - closeMongoClient + - createMongoClient + - createReader + - expand + - finishBundle + - getCurrent + - getCurrentSource + - getDocumentCount + - getEstimatedSizeBytes + - getOutputCoder + - populateDisplayData + - processElement + - read + - split + - start + - startBundle + - withBatchSize + - withBucketAuto + - withCollection + - withDatabase + - withIgnoreSSLCertificate + - withMaxConnectionIdleTime + - withNumSplits + - withOrdered + - withQueryFn + - withSSLEnabled + - withSSLInvalidHostNameAllowed + - withUpdateConfiguration + - withUri + - write +MongoDbTable: + methods: + - apply + - buildIOReader + - buildIOWriter + - constructFilter + - convert + - create + - expand + - getNotSupported + - getSupported + - getTableStatistics + - isBounded + - numSupported + - processElement + - supportsProjects + - toString + - withSchema +MongoDbTableProvider: + methods: + - buildBeamSqlTable + - getTableType +Monitor: + methods: + - getTransform + - processElement + properties: + - name + - prefix +MoreFutures: + methods: + - allAsList + - allAsListWithExceptions + - exception + - get + - getException + - getResult + - isCancelled + - isDone + - isException + - result + - runAsync + - supplyAsync +MovingFunction: + methods: + - add + - get + - isSignificant +MqttIO: + methods: + - add + - advance + - close + - closeMqttClient + - create + - createMqttClient + - createReader + - equals + - expand + - finalizeCheckpoint + - getCheckpointMark + - getCheckpointMarkCoder + - getCurrent + - getCurrentSource + - getCurrentTimestamp + - getOutputCoder + - getWatermark + - hashCode + - populateDisplayData + - processElement + - read + - split + - start + - withClientId + - withConnectionConfiguration + - withMaxNumRecords + - withMaxReadTime + - withPassword + - withRetained + - withServerUri + - withTopic + - withUsername + - write +MultimapSideInput: + methods: + - get +MultimapUserState: + methods: + - asyncClose + - clear + - createIterator + - get + - hasNext + - isReady + - keys + - next + - prefetch + - put + - remove +MutationDetectors: + methods: + - close + - forValueWithCoder + - noopMutationDetector + - verifyUnmodified +MutationGroup: + methods: + - attached + - create + - equals + - hashCode + - iterator + - primary + - size + - toString +NFA: + methods: + - addPrevEvent + - assignIndex + - atFinal + - canDecrement + - canTrim + - compile + - copy + - decrement + - equals + - getCurState + - getCurrentEvent + - getNewProceedPointer + - getNewTakePointer + - getNextState + - getPatternVar + - getPointer + - getPrevPointer + - getProceedCondition + - getQuantifier + - getRow + - getTakeCondition + - hasProceed + - hasTake + - hashCode + - isKleenePlus + - isKleenePlusSecondary + - isNull + - isProceedPointer + - proceed + - proceedIgnore + - processNewRow + - reset + - setNextState + - take + - toCEPLiteral + - toString + - trim + properties: + - isFinal + - isStart +NameCityStateId: + methods: + - decode + - encode + - equals + - hashCode + - sizeInBytes + - structuralValue + - toString + - verifyDeterministic + properties: + - CODER + - city + - id + - name + - state +NameGenerator: + methods: + - generatePartitionMetadataTableName +NameUtils: + methods: + - approximatePTransformName + - approximateSimpleName +NamedTestResult: + methods: + - create + - fields + - getMetric + - getSchema + - getValue + - tags + - toInfluxDBDataPoint + - toMap +NanosDuration: + methods: + - toBaseType + - toInputType + properties: + - IDENTIFIER +NanosInstant: + methods: + - toBaseType + - toInputType + properties: + - IDENTIFIER +Neo4jIO: + methods: + - apply + - create + - expand + - finishBundle + - of + - populateDisplayData + - processElement + - readAll + - setup + - startBundle + - withBatchSize + - withCoder + - withConfig + - withCypher + - withCypherLogging + - withDefaultConfig + - withDriverConfiguration + - withParametersFunction + - withPassword + - withReadTransaction + - withRowMapper + - withSessionConfig + - withTransactionConfig + - withUnwindMapName + - withUrl + - withUrls + - withUsername + - withWriteTransaction + - writeUnwind + properties: + - closed + - driver + - session +Never: + methods: + - ever + - getWatermarkThatGuaranteesFiring +NexmarkConfiguration: + methods: + - copy + - equals + - fromString + - hashCode + - overrideFromOptions + - toShortString + - toString + properties: + - DEFAULT + - auctionSkip + - avgAuctionByteSize + - avgBidByteSize + - avgPersonByteSize + - coderStrategy + - cpuDelayMs + - debug + - diskBusyBytes + - exportSummaryToBigQuery + - fanout + - firstEventRate + - generateEventFilePathPrefix + - hotAuctionRatio + - hotBiddersRatio + - hotSellersRatio + - isRateLimited + - maxAuctionsWaitingTime + - maxLogEvents + - nextEventRate + - numActivePeople + - numEventGenerators + - numEvents + - numInFlightAuctions + - numKeyBuckets + - occasionalDelaySec + - outOfOrderGroupSize + - pardoCPUFactor + - preloadSeconds + - probDelayedEvent + - pubSubMode + - pubsubMessageSerializationMethod + - query + - ratePeriodSec + - rateShape + - rateUnit + - sessionGap + - sideInputNumShards + - sideInputRowCount + - sideInputType + - sideInputUrl + - sinkType + - sourceType + - streamTimeout + - usePubsubPublishTime + - useWallclockEventTime + - watermarkHoldbackSec + - windowPeriodSec + - windowSizeSec +NexmarkLauncher: + methods: + - processElement + - run +NexmarkPerf: + methods: + - anyActivity + - fromString + - toMap + - toString + properties: + - errors + - eventBytesPerSec + - eventsPerSec + - jobId + - numEvents + - numResults + - processingDelaySec + - resultBytesPerSec + - resultsPerSec + - runtimeSec + - shutdownDelaySec + - snapshots + - startupDelaySec +NexmarkQuery: + methods: + - expand + - getTransform + properties: + - eventMonitor + - resultMonitor +NexmarkQueryModel: + methods: + - apply + - assertionFor + - simulator + properties: + - configuration +NexmarkQueryTransform: + methods: + - getSideInput + - needsSideInput + - setSideInput +NexmarkQueryUtil: + methods: + - expand + - processElement + properties: + - AS_AUCTION + - AS_BID + - AS_PERSON + - AUCTION_BY_ID + - AUCTION_BY_SELLER + - AUCTION_TAG + - BID_BY_AUCTION + - BID_TAG + - BID_TO_AUCTION + - BID_TO_PRICE + - EVENT_TIMESTAMP_FROM_DATA + - IS_BID + - IS_NEW_AUCTION + - IS_NEW_PERSON + - JUST_BIDS + - JUST_NEW_AUCTIONS + - JUST_NEW_PERSONS + - PERSON_BY_ID + - PERSON_TAG +NexmarkUtils: + methods: + - apply + - batchEventsSource + - castToKnownSize + - cleanUpSideInput + - console + - cpuDelay + - decode + - devNull + - diskBusy + - encode + - expand + - format + - hash + - info + - interEventDelayUs + - log + - prepareSideInput + - processElement + - processingMode + - rateToPeriodUs + - setupPipeline + - snoop + - stamp + - standardEventIterator + - stepLengthSec + - streamEventsSource + properties: + - BEGINNING_OF_TIME + - END_OF_TIME + - MAPPER + - PUBSUB_ID + - PUBSUB_TIMESTAMP +NoSuchSchemaException: {} +NodeStats: + methods: + - create + - getRate + - getRowCount + - getWindow + - isUnknown + - minus + - multiply + - plus + properties: + - UNKNOWN +NonMergingWindowFn: + methods: + - isNonMerging + - mergeWindows +NoopCredentialFactory: + methods: + - fromOptions + - getAuthenticationType + - getCredential + - getRequestMetadata + - hasRequestMetadata + - hasRequestMetadataOnly + - refresh +NoopLock: + methods: + - get + - lock + - lockInterruptibly + - newCondition + - tryLock + - unlock +NoopPathValidator: + methods: + - fromOptions + - validateInputFilePatternSupported + - validateOutputFilePrefixSupported + - validateOutputResourceSupported + - verifyPath +NullCredentialInitializer: + methods: + - handleResponse + - initialize + - throwNullCredentialException +NullThroughputEstimator: + methods: + - getFrom + - update +NullableCoder: + methods: + - consistentWithEquals + - decode + - encode + - getCoderArguments + - getEncodedTypeDescriptor + - getValueCoder + - isRegisterByteSizeObserverCheap + - of + - registerByteSizeObserver + - structuralValue + - verifyDeterministic +NumberedShardedFile: + methods: + - getFilePattern + - readFilesWithRetries + - toString +ObjectPool: + methods: + - equals + - hashCode + - pooledClientFactory + - release + - releaseByKey + - retain +OffsetBasedSource: + methods: + - advance + - allowsDynamicSplitting + - createSourceForSubrange + - getBytesPerOffset + - getCurrentSource + - getEndOffset + - getEstimatedSizeBytes + - getFractionConsumed + - getMaxEndOffset + - getMinBundleSize + - getSplitPointsConsumed + - getSplitPointsRemaining + - getStartOffset + - isDone + - isStarted + - populateDisplayData + - split + - splitAtFraction + - start + - toString + - validate +OffsetByteRangeCoder: + methods: + - decode + - encode + - getCoderProvider +OffsetRange: + methods: + - consistentWithEquals + - decode + - encode + - equals + - getEncodedTypeDescriptor + - getFrom + - getTo + - hashCode + - isRegisterByteSizeObserverCheap + - newTracker + - of + - split + - toString +OffsetRangeTracker: + methods: + - checkDone + - currentRestriction + - getFractionConsumed + - getPositionForFractionConsumed + - getProgress + - getSplitPointsProcessed + - getStartPosition + - getStopPosition + - isBounded + - isDone + - isStarted + - markDone + - toString + - tryClaim + - tryReturnRecordAt + - trySplit + - trySplitAtPosition + properties: + - OFFSET_INFINITY +OneOfType: + methods: + - create + - createValue + - equals + - getArgument + - getArgumentType + - getBaseType + - getCaseEnumType + - getCaseType + - getFieldType + - getIdentifier + - getOneOfSchema + - getValue + - hashCode + - toBaseType + - toInputType + - toString + properties: + - IDENTIFIER +OpenModuleAgent: + methods: + - premain +Operator: + methods: + - getName + - getOutputType + - toString +OperatorTransform: + methods: + - apply + - expand + - getOperator +OrFinallyTrigger: + methods: + - getMainTrigger + - getUntilTrigger + - getWatermarkThatGuaranteesFiring + - mayFinish + - toString +Order: + methods: + - equals + - getCustomerId + - getId + - hashCode + - setCustomerId + - setId + - toString +OrderKey: + methods: + - getDir + - getIndex + - getNullFirst + - of +OutboundObserverFactory: + methods: + - clientBuffered + - clientDirect + - outboundObserverFor + - serverDirect + - trivial +PAssert: + methods: + - apply + - assertFor + - containsInAnyOrder + - countAsserts + - empty + - enterCompositeTransform + - equals + - expand + - from + - hashCode + - inCombinedNonLatePanes + - inEarlyGlobalWindowPanes + - inEarlyPane + - inFinalPane + - inLatePane + - inOnTimePane + - inOnlyPane + - inWindow + - isEqualTo + - leaveCompositeTransform + - notEqualTo + - of + - prepareActuals + - processElement + - satisfies + - that + - thatFlattened + - thatList + - thatMap + - thatMultimap + - thatSingleton + - thatSingletonIterable + - visitPrimitiveTransform + - windowActuals + - windowDummy + - wrap + properties: + - FAILURE_COUNTER + - SUCCESS_COUNTER +PBegin: + methods: + - apply + - expand + - getPipeline + - in +PCollection: + methods: + - and + - apply + - createPrimitiveOutputInternal + - expand + - finishSpecifying + - finishSpecifyingOutput + - getCoder + - getFromRowFunction + - getName + - getSchema + - getToRowFunction + - getTypeDescriptor + - getWindowingStrategy + - hasSchema + - isBounded + - setCoder + - setIsBoundedInternal + - setName + - setRowSchema + - setSchema + - setTypeDescriptor + - setWindowingStrategyInternal +PCollectionConsumerRegistry: + methods: + - accept + - finishLazyUpdate + - forConsumer + - getConsumer + - getExecutionState + - getMetricsContainer + - getMultiplexingConsumer + - getPTransformId + - getProgress + - register + - trySplit + - tryUpdate +PCollectionList: + methods: + - and + - apply + - empty + - equals + - expand + - finishSpecifyingOutput + - get + - getAll + - getPipeline + - hashCode + - of + - size +PCollectionLists: + methods: + - getOnlyElement +PCollectionRowTuple: + methods: + - and + - apply + - empty + - equals + - expand + - finishSpecifyingOutput + - get + - getAll + - getPipeline + - has + - hashCode + - of +PCollectionTuple: + methods: + - and + - apply + - empty + - equals + - expand + - finishSpecifyingOutput + - get + - getAll + - getPipeline + - has + - hashCode + - of + - ofPrimitiveOutputsInternal +PCollectionViews: + methods: + - apply + - compare + - contains + - containsKey + - create + - createMetadata + - decode + - encode + - entrySet + - equals + - expand + - get + - getCoderArguments + - getCoderInternal + - getDefaultValue + - getMaterialization + - getMetadata + - getPCollection + - getTagInternal + - getTypeDescriptor + - getViewFn + - getWindowMappingFn + - getWindowingStrategyInternal + - hasDefault + - hashCode + - isMetadata + - iterableView + - iterableViewUsingVoidKey + - iterator + - listIterator + - listView + - listViewUsingVoidKey + - mapView + - mapViewUsingVoidKey + - multimapView + - multimapViewUsingVoidKey + - singletonView + - singletonViewUsingVoidKey + - size + - toAdditionalInputs + - toString + - verifyDeterministic +PDone: + methods: + - expand + - finishSpecifyingOutput + - getPipeline + - in +POJOUtils: + methods: + - appender + - createConstructorCreator + - createStaticCreator + - getConstructorCreator + - getFieldTypes + - getGetters + - getSetFieldCreator + - getSetters + - getStaticCreator + - prepare + - schemaFromPojoClass + properties: + - CACHED_CREATORS +PTransform: + methods: + - compose + - expand + - getAdditionalInputs + - getDefaultOutputCoder + - getName + - getResourceHints + - populateDisplayData + - setResourceHints + - toString + - validate +PTransformFunctionRegistry: + methods: + - getFunctions + - register +PTransformOverride: + methods: + - getMatcher + - getOverrideFactory + - of +PValueBase: + methods: + - finishSpecifying + - finishSpecifyingOutput + - getName + - getPipeline + - setName + - toString +PValues: + methods: + - expandInput + - expandOutput + - expandValue + - fullyExpand +PaneInfo: + methods: + - createPane + - decode + - decodePane + - encode + - equals + - fromTag + - getIndex + - getNonSpeculativeIndex + - getTiming + - hashCode + - isFirst + - isLast + - isUnknown + - of + - toString + - verifyDeterministic + properties: + - INSTANCE + - NO_FIRING + - ON_TIME_AND_ONLY_FIRING + - tag +ParDo: + methods: + - dispatchBag + - dispatchCombining + - dispatchMap + - dispatchOrderedList + - dispatchSet + - dispatchValue + - expand + - getAdditionalInputs + - getAdditionalOutputTags + - getDoFnSchemaInformation + - getFn + - getMainOutputTag + - getSideInputs + - of + - populateDisplayData + - toString + - withOutputTags + - withSideInput + - withSideInputs +ParDoLoadTest: + methods: + - main + - processElement +ParquetIO: + methods: + - apply + - close + - create + - createOrOverwrite + - defaultBlockSize + - expand + - flush + - from + - getConfWithModelClass + - getInitialRestriction + - getLength + - getPos + - getProgress + - getRestrictionCoder + - getSize + - makeProgress + - newStream + - newTracker + - open + - parseFilesGenericRecords + - parseGenericRecords + - populateDisplayData + - processElement + - read + - readFiles + - seek + - sink + - split + - splitBlockWithLimit + - supportsBlockSize + - withAvroDataModel + - withBeamSchemas + - withCoder + - withCompressionCodec + - withConfiguration + - withProjection + - withRowGroupSize + - write +ParquetTableProvider: + methods: + - buildBeamSqlTable + - getTableType +ParseException: {} +ParseJsons: + methods: + - apply + - exceptionsInto + - exceptionsVia + - expand + - of + - withMapper +ParseResult: + methods: + - equals + - failure + - getContent + - getError + - getErrorAsString + - getFileLocation + - getMetadata + - hashCode + - isSuccess + - success + - toString +Partition: + methods: + - expand + - getOutputTags + - getSideInputs + - of + - populateDisplayData + - processElement +PartitionMetadata: + methods: + - build + - equals + - getCreatedAt + - getEndTimestamp + - getFinishedAt + - getHeartbeatMillis + - getParentTokens + - getPartitionToken + - getRunningAt + - getScheduledAt + - getStartTimestamp + - getState + - getWatermark + - hashCode + - newBuilder + - setCreatedAt + - setEndTimestamp + - setFinishedAt + - setHeartbeatMillis + - setParentTokens + - setPartitionToken + - setRunningAt + - setScheduledAt + - setStartTimestamp + - setState + - setWatermark + - toBuilder + - toString +PartitionMetadataAdminDao: + methods: + - createPartitionMetadataTable + - deletePartitionMetadataTable + properties: + - COLUMN_CREATED_AT + - COLUMN_END_TIMESTAMP + - COLUMN_FINISHED_AT + - COLUMN_HEARTBEAT_MILLIS + - COLUMN_PARENT_TOKENS + - COLUMN_PARTITION_TOKEN + - COLUMN_RUNNING_AT + - COLUMN_SCHEDULED_AT + - COLUMN_START_TIMESTAMP + - COLUMN_STATE + - COLUMN_WATERMARK +PartitionMetadataDao: + methods: + - countPartitionsCreatedAfter + - getAllPartitionsCreatedAfter + - getCommitTimestamp + - getPartition + - getResult + - getUnfinishedMinWatermark + - insert + - runInTransaction + - tableExists + - toString + - updateToFinished + - updateToRunning + - updateToScheduled + - updateWatermark +PartitionMetadataMapper: + methods: + - from +PartitionPosition: + methods: + - done + - equals + - getMode + - getTimestamp + - hashCode + - queryChangeStream + - stop + - toString + - updateState + - waitForChildPartitions +PartitionRestriction: + methods: + - done + - equals + - getEndTimestamp + - getMetadata + - getMode + - getStartTimestamp + - getStoppedMode + - hashCode + - queryChangeStream + - stop + - toString + - updateState + - waitForChildPartitions + - withMetadata +PartitionRestrictionClaimer: + methods: + - tryClaim +PartitionRestrictionMetadata: + methods: + - build + - getPartitionEndTimestamp + - getPartitionStartTimestamp + - getPartitionToken + - newBuilder + - toString + - withPartitionEndTimestamp + - withPartitionStartTimestamp + - withPartitionToken +PartitionRestrictionProgressChecker: + methods: + - getProgress + - setTimeSupplier +PartitionRestrictionSplitter: + methods: + - trySplit +PartitionRestrictionTracker: + methods: + - checkDone + - currentRestriction + - getProgress + - isBounded + - setTimeSupplier + - tryClaim + - trySplit +PartitioningWindowFn: + methods: + - assignWindow + - assignWindows + - assignsToOneWindow + - getDefaultWindowMappingFn + - getSideInputWindow +PassThroughLogicalType: + methods: + - getArgument + - getArgumentType + - getBaseType + - getIdentifier + - toBaseType + - toInputType +PatternCondition: + methods: + - eval +PayloadSerializerKafkaTable: + methods: + - expand +PayloadSerializers: + methods: + - getSerializer +PeriodicImpulse: + methods: + - applyWindowing + - create + - expand + - startAt + - stopAt + - withInterval +PeriodicSequence: + methods: + - checkDone + - create + - currentRestriction + - equals + - expand + - getInitialRange + - getInitialWatermarkState + - getProgress + - hashCode + - isBounded + - newTracker + - newWatermarkEstimator + - processElement + - toString + - tryClaim + - trySplit + properties: + - durationMilliSec + - first + - last +Person: + methods: + - decode + - encode + - equals + - hasAnnotation + - hashCode + - sizeInBytes + - structuralValue + - toString + - verifyDeterministic + - withAnnotation + - withoutAnnotation + properties: + - CODER + - city + - creditCard + - dateTime + - emailAddress + - extra + - id + - name + - state +PersonGenerator: + methods: + - lastBase0PersonId + - nextBase0PersonId + - nextPerson +Pipeline: + methods: + - apply + - applyTransform + - begin + - create + - enterCompositeTransform + - enterPipeline + - forTransformHierarchy + - getCoderRegistry + - getOptions + - getSchemaRegistry + - leaveCompositeTransform + - leavePipeline + - replaceAll + - run + - setCoderRegistry + - toString + - traverseTopologically + - visitPrimitiveTransform + - visitValue +PipelineOptionsFactory: + methods: + - apply + - as + - compare + - create + - describe + - fromArgs + - getRegisteredOptions + - printHelp + - register + - resetCache + - withValidation + - withoutStrictParsing +PipelineOptionsValidator: + methods: + - validate + - validateCli +PipelineRunner: + methods: + - create + - fromOptions + - run +Plugin: + methods: + - build + - builder + - createBatch + - createStreaming + - getContext + - getFormatClass + - getFormatProviderClass + - getHadoopConfiguration + - getPluginClass + - getPluginConfig + - getPluginType + - initContext + - initPluginType + - isUnbounded + - prepareRun + - setContext + - setFormatClass + - setFormatProviderClass + - setPluginClass + - setPluginType + - withConfig + - withHadoopConfiguration +PluginConfigInstantiationUtils: {} +PluginConstants: + methods: + - getFormatClass + - getFormatName + - getFormatProviderClass + - getFormatProviderName + - getKeyClass + - getValueClass +PostProcessingMetricsDoFn: + methods: + - processElement +PrecombineGroupingTable: + methods: + - add + - combining + - combiningAndSampling + - compact + - equals + - estimateSize + - flush + - getAccumulator + - getGroupingKey + - getKey + - getOutputTimestamp + - getStructuralKey + - getWeight + - getWindows + - hashCode + - put + - shrink + - toString +PrecombineGroupingTableBenchmark: + methods: + - setUp + - sumIntegerBinaryCombine + properties: + - distribution + - globallyWindowed +Preconditions: + methods: + - checkArgumentNotNull + - checkStateNotNull +PrefetchableIterables: + methods: + - concat + - createIterator + - emptyIterable + - fromArray + - hasNext + - isReady + - iterator + - limit + - next + - prefetch +PrefetchableIterators: + methods: + - concat + - concatIterators + - emptyIterator + - fromArray + - hasNext + - isReady + - next + - prefetch +PrepareWrite: + methods: + - expand + - processElement +PriceGenerator: + methods: + - nextPrice +ProcessBundleBenchmark: + methods: + - getCacheTokens + - handle + - log + - process + - tearDown + - testLargeBundle + - testStateWithCaching + - testStateWithoutCaching + - testTinyBundle +ProcessBundleHandler: + methods: + - activate + - addBundleProgressReporter + - addFinishBundleFunction + - addIncomingDataEndpoint + - addIncomingTimerEndpoint + - addOutgoingDataEndpoint + - addOutgoingTimersEndpoint + - addPCollectionConsumer + - addResetFunction + - addStartBundleFunction + - addTearDownFunction + - afterBundleCommit + - close + - create + - createRunnerForPTransform + - find + - getActiveBundleProcessors + - getBeamFnDataClient + - getBeamFnStateClient + - getBundleCacheSupplier + - getBundleFinalizer + - getBundleProcessorCache + - getCacheTokensSupplier + - getCoders + - getPCollectionConsumer + - getPCollections + - getPTransform + - getPTransformId + - getPipelineOptions + - getProcessBundleInstructionIdSupplier + - getProcessWideCache + - getRunnerCapabilities + - getShortIdMap + - getSplitListener + - getStateTracker + - getWindowingStrategies + - handle + - hashCode + - load + - processBundle + - progress + - reset + - shutdown + - start + - trySplit + properties: + - JAVA_SOURCE_URN +ProducerRecordCoder: + methods: + - consistentWithEquals + - decode + - encode + - getCoderArguments + - isRegisterByteSizeObserverCheap + - of + - structuralValue + - verifyDeterministic +ProtoCoder: + methods: + - coderFor + - decode + - encode + - equals + - getCoderProvider + - getExtensionHosts + - getExtensionRegistry + - getMessageType + - hashCode + - of + - verifyDeterministic + - withExtensionsFrom + properties: + - serialVersionUID +ProtoDomain: + methods: + - buildFrom + - contains + - equals + - getDescriptor + - getFieldOptionById + - getFileDescriptor + - hashCode + properties: + - serialVersionUID +ProtoDynamicMessageSchema: + methods: + - apply + - forDescriptor + - getBaseClass + - getFromRowFunction + - getSchema + - getSubContext + - getToRowFunction + - invokeNewBuilder + properties: + - serialVersionUID +ProtoFromBytes: + methods: + - apply + - expand +ProtoMessageSchema: + methods: + - fieldValueGetters + - fieldValueTypeInformations + - get + - getProtoBytesToRowFn + - getRowToProtoBytesFn + - schemaFor + - schemaTypeCreator +ProtoPayloadSerializerProvider: + methods: + - getSerializer + - identifier +ProtoSchemaLogicalTypes: + methods: + - toDuration + - toRow + - toTimestamp + properties: + - IDENTIFIER +ProtoToBytes: + methods: + - apply + - expand +ProtobufCoderProviderRegistrar: + methods: + - getCoderProviders +Providers: + methods: + - loadProviders +PubSubPayloadTranslation: + methods: + - getTransformPayloadTranslators + - getUrn + - translate +PublishResponseCoders: + methods: + - decode + - defaultPublishResponse + - encode + - fullPublishResponse + - fullPublishResponseWithoutHeaders + - verifyDeterministic +PublishResultCoders: + methods: + - decode + - defaultPublishResult + - encode + - fullPublishResult + - fullPublishResultWithoutHeaders + - verifyDeterministic +PublisherOptions: + methods: + - build + - newBuilder + - setTopicPath + - topicPath +PubsubClient: + methods: + - ackDeadlineSeconds + - ackId + - acknowledge + - createRandomSubscription + - createSubscription + - createTopic + - deleteSubscription + - deleteTopic + - equals + - getFullPath + - getId + - getName + - getPath + - hashCode + - isEOF + - listSubscriptions + - listTopics + - message + - modifyAckDeadline + - of + - projectPathFromId + - projectPathFromPath + - publish + - pull + - recordId + - requestTimeMsSinceEpoch + - subscriptionPathFromName + - subscriptionPathFromPath + - timestampMsSinceEpoch + - toString + - topicPathFromName + - topicPathFromPath +PubsubCoderProviderRegistrar: + methods: + - getCoderProviders +PubsubDlqProvider: + methods: + - expand + - identifier + - newDlqTransform +PubsubGrpcClient: + methods: + - ackDeadlineSeconds + - acknowledge + - close + - createSubscription + - createTopic + - deleteSubscription + - deleteTopic + - getKind + - isEOF + - listSubscriptions + - listTopics + - modifyAckDeadline + - newClient + - publish + - pull + properties: + - FACTORY +PubsubHelper: + methods: + - cleanup + - create + - createOrReuseTopic + - createSubscription + - createTopic + - reuseSubscription + - reuseTopic + - subscriptionExists + - topicExists +PubsubIO: + methods: + - apply + - asPath + - asV1Beta1Path + - asV1Beta2Path + - expand + - finishBundle + - fromPath + - fromSubscription + - fromTopic + - populateDisplayData + - processElement + - readAvroGenericRecords + - readAvros + - readAvrosWithBeamSchema + - readMessages + - readMessagesWithAttributes + - readMessagesWithAttributesAndMessageId + - readMessagesWithAttributesAndMessageIdAndOrderingKey + - readMessagesWithCoderAndParseFn + - readMessagesWithMessageId + - readProtoDynamicMessages + - readProtos + - readStrings + - startBundle + - to + - toString + - withClientFactory + - withCoderAndParseFn + - withDeadLetterTopic + - withIdAttribute + - withMaxBatchBytesSize + - withMaxBatchSize + - withPubsubRootUrl + - withTimestampAttribute + - writeAvros + - writeMessages + - writeProtos + - writeStrings +PubsubJsonClient: + methods: + - ackDeadlineSeconds + - acknowledge + - close + - createSubscription + - createTopic + - deleteSubscription + - deleteTopic + - getKind + - isEOF + - listSubscriptions + - listTopics + - modifyAckDeadline + - newClient + - publish + - pull + properties: + - FACTORY +PubsubLiteIO: + methods: + - addUuids + - deduplicate + - expand + - read + - write +PubsubLiteSink: + methods: + - finishBundle + - processElement + - startBundle +PubsubLiteTableProvider: + methods: + - buildBeamSqlTable + - getTableType +PubsubMessage: + methods: + - equals + - getAttribute + - getAttributeMap + - getMessageId + - getOrderingKey + - getPayload + - hashCode + - toString +PubsubMessagePayloadOnlyCoder: + methods: + - decode + - encode + - of +PubsubMessageWithAttributesAndMessageIdAndOrderingKeyCoder: + methods: + - decode + - encode + - of +PubsubMessageWithAttributesAndMessageIdCoder: + methods: + - decode + - encode + - of +PubsubMessageWithAttributesCoder: + methods: + - decode + - encode + - of +PubsubMessageWithMessageIdCoder: + methods: + - decode + - encode + - of +PubsubMessages: + methods: + - apply + - fromProto + - toProto +PubsubSchemaIOProvider: + methods: + - buildReader + - buildWriter + - configurationSchema + - expand + - from + - identifier + - isBounded + - requiresDataSchema + - schema + properties: + - ATTRIBUTE_ARRAY_ENTRY_SCHEMA + - ATTRIBUTE_ARRAY_FIELD_TYPE + - ATTRIBUTE_MAP_FIELD_TYPE +PubsubSchemaTransformReadConfiguration: + methods: + - build + - builder + - getDataSchema + - getDeadLetterQueue + - getFormat + - getIdAttribute + - getProtoClass + - getSubscription + - getThriftClass + - getThriftProtocolFactoryClass + - getTimestampAttribute + - getTopic + - setDataSchema + - setDeadLetterQueue + - setFormat + - setIdAttribute + - setProtoClass + - setSubscription + - setThriftClass + - setThriftProtocolFactoryClass + - setTimestampAttribute + - setTopic +PubsubSchemaTransformReadProvider: + methods: + - buildTransform + - expand + - identifier + - inputCollectionNames + - outputCollectionNames + - validate +PubsubSchemaTransformWriteConfiguration: + methods: + - build + - getFormat + - getIdAttribute + - getTimestampAttribute + - getTopic + - setFormat + - setIdAttribute + - setTimestampAttribute + - setTopic +PubsubTableProvider: + methods: + - getSchemaIOProvider + - getTableType +PubsubTestClient: + methods: + - ackDeadlineSeconds + - acknowledge + - advance + - close + - createFactoryForCreateSubscription + - createFactoryForPublish + - createFactoryForPull + - createFactoryForPullAndPublish + - createSubscription + - createTopic + - deleteSubscription + - deleteTopic + - getKind + - isEOF + - listSubscriptions + - listTopics + - modifyAckDeadline + - newClient + - publish + - pull +PubsubUnboundedSink: + methods: + - decode + - encode + - expand + - finishBundle + - getIdAttribute + - getTimestampAttribute + - getTopic + - getTopicProvider + - populateDisplayData + - processElement + - startBundle + properties: + - outer +PubsubUnboundedSource: + methods: + - advance + - apply + - close + - createReader + - decode + - encode + - expand + - finalizeCheckpoint + - getCheckpointMark + - getCheckpointMarkCoder + - getCurrent + - getCurrentRecordId + - getCurrentSource + - getCurrentTimestamp + - getIdAttribute + - getNeedsAttributes + - getNeedsMessageId + - getNeedsOrderingKey + - getOutputCoder + - getProject + - getSplitBacklogBytes + - getSubscription + - getSubscriptionProvider + - getTimestampAttribute + - getTopic + - getTopicProvider + - getWatermark + - identity + - nackAll + - nackBatch + - of + - populateDisplayData + - processElement + - requiresDeduping + - split + - start + - validate + properties: + - outer +PulsarIO: + methods: + - expand + - read + - useProcessingTime + - usePublishTime + - withAdminUrl + - withClientUrl + - withEndMessageId + - withEndTimestamp + - withExtractOutputTimestampFn + - withProcessingTime + - withPublishTime + - withPulsarClient + - withStartTimestamp + - withTopic + - write +PulsarMessage: + methods: + - getMessageRecord + - getPublishTimestamp + - getTopic + - setMessageRecord +PulsarMessageCoder: + methods: + - decode + - encode + - of +PulsarSourceDescriptor: + methods: + - of +PythonCallable: + methods: + - getArgumentType + - getBaseType + - getIdentifier + - toBaseType + - toInputType + properties: + - IDENTIFIER +PythonCallableSource: + methods: + - getPythonCallableCode + - of +PythonExternalTransform: + methods: + - expand + - from + - withArgs + - withExtraPackages + - withKwarg + - withKwargs + - withOutputCoder + - withOutputCoders + - withTypeHint +PythonMap: + methods: + - expand + - viaFlatMapFn + - viaMapFn + - withExpansionService +PythonService: + methods: + - findAvailablePort + - start + - waitForPort + - withExtraPackages +Quantifier: + methods: + - toString + properties: + - ASTERISK + - ASTERISK_RELUCTANT + - NONE + - PLUS + - PLUS_RELUCTANT + - QMARK + - QMARK_RELUCTANT +Query0: + methods: + - expand + - processElement +Query0Model: + methods: + - simulator +Query1: + methods: + - expand + - processElement +Query10: + methods: + - expand + - processElement + - setMaxNumWorkers + - setOutputPath + - toString +Query11: + methods: + - expand + - processElement +Query12: + methods: + - expand + - processElement +Query13: + methods: + - expand + - processElement +Query14: + methods: + - expand +Query1Model: + methods: + - simulator +Query2: + methods: + - expand + - processElement +Query2Model: + methods: + - simulator +Query3: + methods: + - expand + - onTimerCallback + - processElement +Query3Model: + methods: + - simulator +Query4: + methods: + - expand + - processElement +Query4Model: + methods: + - simulator +Query5: + methods: + - addInput + - createAccumulator + - equals + - expand + - extractOutput + - getAccumulatorCoder + - hashCode + - mergeAccumulator + - processElement + properties: + - auctions + - count +Query5Model: + methods: + - run + - simulator +Query6: + methods: + - addInput + - createAccumulator + - expand + - extractOutput + - mergeAccumulators + - processElement +Query6Model: + methods: + - simulator +Query7: + methods: + - expand + - processElement +Query7Model: + methods: + - simulator +Query8: + methods: + - expand + - processElement +Query8Model: + methods: + - run + - simulator +Query9: + methods: + - expand +Query9Model: + methods: + - simulator +QueryChangeStreamAction: + methods: + - run +QueryReader: + methods: + - getQueryIdentifiers + - readQuery +QueryStatementConverter: + methods: + - convert + - convertRootQuery +QueryTrait: + methods: + - addOutputColumnList + - addResolvedTable + - getTablePath + - isTableResolved + - resolveAlias + - retrieveFieldNames + properties: + - outputColumnMap + - resolvedTables + - withEntries +RabbitMqIO: + methods: + - advance + - advanceWatermark + - close + - createReader + - expand + - finalizeCheckpoint + - getChannel + - getCheckpointMark + - getCheckpointMarkCoder + - getCurrent + - getCurrentRecordId + - getCurrentSource + - getCurrentTimestamp + - getOutputCoder + - getWatermark + - processElement + - read + - requiresDeduping + - setup + - split + - start + - stop + - teardown + - withExchange + - withMaxNumRecords + - withMaxReadTime + - withQueue + - withQueueDeclare + - withUri + - withUseCorrelationId + - write +RabbitMqMessage: + methods: + - createProperties + - equals + - getAppId + - getBody + - getClusterId + - getContentEncoding + - getContentType + - getCorrelationId + - getDeliveryMode + - getExpiration + - getHeaders + - getMessageId + - getPriority + - getReplyTo + - getRoutingKey + - getTimestamp + - getType + - getUserId + - hashCode +RampupThrottlingFn: + methods: + - populateDisplayData + - processElement + - setup +RawUnionValue: + methods: + - equals + - getUnionTag + - getValue + - hashCode + - toString +Read: + methods: + - advance + - checkDone + - close + - create + - createReader + - currentRestriction + - decode + - encode + - expand + - from + - getCheckpoint + - getCheckpointMark + - getCheckpointMarkCoder + - getCoderArguments + - getCurrent + - getCurrentSource + - getCurrentTimestamp + - getId + - getInitialWatermarkEstimatorState + - getKindString + - getProgress + - getSize + - getSource + - getTimestamp + - getValue + - getWatermark + - initialRestriction + - isBounded + - newWatermarkEstimator + - populateDisplayData + - processElement + - restrictionCoder + - restrictionTracker + - setUp + - split + - splitRestriction + - start + - tryClaim + - trySplit + - verifyDeterministic + - withMaxNumRecords + - withMaxReadTime +ReadAllViaFileBasedSource: + methods: + - apply + - expand + - process +ReadBuilder: + methods: + - buildExternal + - getCsvMapper +ReadChangeStreamPartitionDoFn: + methods: + - getInitialWatermarkEstimatorState + - getSize + - initialRestriction + - newTracker + - newWatermarkEstimator + - processElement + - setThroughputEstimator + - setup +ReadChangeStreamPartitionRangeTracker: + methods: + - tryClaim + - trySplit +ReadFromPulsarDoFn: + methods: + - estimate + - getInitialRestriction + - getInitialWatermarkEstimatorState + - getRestrictionCoder + - getSize + - initPulsarClients + - newReader + - newWatermarkEstimator + - processElement + - restrictionTracker + - teardown +ReadOnlyTableProvider: + methods: + - buildBeamSqlTable + - createTable + - dropTable + - getTableType + - getTables +ReadOperation: + methods: + - create + - getColumns + - getIndex + - getKeySet + - getQuery + - getQueryName + - getTable + - withColumns + - withIndex + - withKeySet + - withPartitionOptions + - withQuery + - withQueryName + - withTable +ReadableFileCoder: + methods: + - decode + - encode + - getCoderArguments + - getMetadataCoder + - of + - verifyDeterministic +ReadableStates: + methods: + - immediate + - read + - readLater +ReaderDelay: + methods: + - delayRecord + - delayStart +ReceiverBuilder: + methods: + - build + - getSparkReceiverClass + - withConstructorArgs +RecommendationAICreateCatalogItem: + methods: + - build + - catalogName + - expand + - processElement + - projectId + - setCatalogName + - setProjectId + - withCatalogName + - withProjectId + properties: + - FAILURE_TAG + - SUCCESS_TAG +RecommendationAIIO: + methods: + - createCatalogItems + - importCatalogItems + - importUserEvents + - predictAll + - writeUserEvent +RecommendationAIImportCatalogItems: + methods: + - batchSize + - build + - catalogName + - expand + - maxBufferingDuration + - processElement + - projectId + - setBatchSize + - setCatalogName + - setMaxBufferingDuration + - setProjectId + - withBatchSize + - withCatalogName + - withProjectId + properties: + - FAILURE_TAG + - SUCCESS_TAG +RecommendationAIImportUserEvents: + methods: + - batchSize + - build + - catalogName + - eventStore + - expand + - maxBufferingDuration + - processElement + - projectId + - setBatchSize + - setCatalogName + - setEventStore + - setMaxBufferingDuration + - setProjectId + - withBatchSize + - withCatalogName + - withEventStore + - withProjectId + properties: + - FAILURE_TAG + - SUCCESS_TAG +RecommendationAIPredict: + methods: + - build + - catalogName + - eventStore + - expand + - placementId + - processElement + - projectId + - setCatalogName + - setEventStore + - setPlacementId + - setProjectId + - withCatalogName + - withEventStore + - withPlacementId + - withProjectId + properties: + - FAILURE_TAG + - SUCCESS_TAG +RecommendationAIWriteUserEvent: + methods: + - build + - catalogName + - eventStore + - expand + - processElement + - projectId + - setCatalogName + - setEventStore + - setProjectId + - withCatalogName + - withEventStore + - withProjectId + properties: + - FAILURE_TAG + - SUCCESS_TAG +RecordWithMetadata: + methods: + - getArgument + - getArgumentType + - getBaseType + - getIdentifier + - getSchema + - toBaseType + - toInputType + properties: + - RANGE_OFFSET + - RECORD_NUM + - RECORD_NUM_IN_OFFSET + - RECORD_OFFSET + - RESOURCE_ID + - VALUE +RedisConnectionConfiguration: + methods: + - connect + - create + - enableSSL + - populateDisplayData + - withAuth + - withHost + - withPort + - withSSL + - withTimeout +RedisCursor: + methods: + - compareTo + - equals + - getCursor + - getDbSize + - hashCode + - isStart + - of + properties: + - END_CURSOR + - ZERO_CURSOR + - ZERO_KEY +RedisIO: + methods: + - expand + - finishBundle + - getInitialRestriction + - populateDisplayData + - processElement + - read + - readKeyPatterns + - setup + - startBundle + - teardown + - withApproximateTrim + - withAuth + - withBatchSize + - withConnectionConfiguration + - withEndpoint + - withExpireTime + - withKeyPattern + - withMaxLen + - withMethod + - withOutputParallelization + - withTimeout + - write + - writeStreams +ReduceByKey: + methods: + - accumulationMode + - combineBy + - getAccumulate + - getAccumulatorFactory + - getAccumulatorType + - getMergeAccumulators + - getOutputFn + - getReducer + - getValueComparator + - getValueExtractor + - getValueType + - isCombinable + - isCombineFnStyle + - keyBy + - named + - of + - output + - outputValues + - reduceBy + - triggeredBy + - valueBy + - windowBy + - withAllowedLateness + - withOnTimeBehavior + - withSortedValues + - withTimestampCombiner +ReduceByKeyTranslator: + methods: + - addInput + - apply + - canTranslate + - createAccumulator + - extractOutput + - getAccumulatorCoder + - mergeAccumulators + - processElement + - translate +ReduceWindow: + methods: + - accumulationMode + - combineBy + - expand + - getReducer + - getValueComparator + - getValueExtractor + - getValueType + - isCombinable + - isCombineFnStyle + - named + - of + - output + - reduceBy + - triggeredBy + - valueBy + - windowBy + - withAllowedLateness + - withOnTimeBehavior + - withSortedValues + - withTimestampCombiner +ReflectHelpers: + methods: + - compare + - declaredFieldsWithAnnotation + - declaredMethodsWithAnnotation + - findClassLoader + - formatAnnotation + - formatMethod + - formatMethodWithClass + - getClosureOfMethodsOnInterface + - getClosureOfMethodsOnInterfaces + - loadServicesOrdered + - simpleTypeDescription + properties: + - INSTANCE +ReflectUtils: + methods: + - boxIfPrimitive + - create + - getAnnotatedConstructor + - getAnnotatedCreateMethod + - getClazz + - getFields + - getIterableComponentType + - getMapType + - getMethods + - getMethodsMap + - getSchema + - isGetter + - isSetter + - stripGetterPrefix + - stripPrefix + - stripSetterPrefix +Regex: + methods: + - allMatches + - expand + - find + - findAll + - findKV + - matches + - matchesKV + - processElement + - replaceAll + - replaceFirst + - split +RegexMatcher: + methods: + - describeTo + - matches +Reify: + methods: + - expand + - extractTimestampsFromValues + - getAllowedTimestampSkew + - process + - processElement + - timestamps + - timestampsInValue + - viewAsValues + - viewInGlobalWindow + - windows + - windowsInValue +ReifyAsIterable: + methods: + - expand + - processElement +RelMdNodeStats: + methods: + - getDef + - getNodeStats + properties: + - SOURCE +ReleaseInfo: + methods: + - getDefaultDockerRepoPrefix + - getDefaultDockerRepoRoot + - getName + - getProperties + - getReleaseInfo + - getSdkVersion + - getVersion + - isDevSdkVersion +RemoteGrpcPortRead: + methods: + - fromPTransform + - getPort + - readFromPort + - toPTransform + properties: + - URN +RemoteGrpcPortWrite: + methods: + - fromPTransform + - getPort + - toPTransform + - writeToPort + properties: + - URN +RenameFields: + methods: + - create + - expand + - processElement + - rename +Repeatedly: + methods: + - forever + - getRepeatedTrigger + - getWatermarkThatGuaranteesFiring + - mayFinish + - toString +Requirements: + methods: + - empty + - getSideInputs + - isEmpty + - requiresSideInputs + - union +Reshuffle: + methods: + - expand + - of + - processElement + - setup + - viaRandomKey + - withNumBuckets +ReshuffleTrigger: + methods: + - getWatermarkThatGuaranteesFiring + - mayFinish + - toString +ResourceHint: + methods: + - equals + - hashCode + - mergeWithOuter + - toBytes +ResourceHints: + methods: + - create + - equals + - fromOptions + - hashCode + - hints + - mergeWithOuter + - parse + - toBytes + - withAccelerator + - withHint + - withMinRam +ResourceIdCoder: + methods: + - consistentWithEquals + - decode + - encode + - of +ResourceIdTester: + methods: + - runResourceIdBattery +RestrictionTracker: + methods: + - checkDone + - currentRestriction + - from + - getTruncatedRestriction + - getWorkCompleted + - getWorkRemaining + - isBounded + - of + - tryClaim + - trySplit +RestrictionTrackers: + methods: + - checkDone + - currentRestriction + - getProgress + - isBounded + - observe + - tryClaim + - trySplit +RetryConfiguration: + methods: + - baseBackoff + - build + - builder + - convert + - maxBackoff + - numRetries + - throttledBaseBackoff + - toBuilder +RetryHttpRequestInitializer: + methods: + - handleIOException + - handleResponse + - initialize + - setCustomErrors + - setWriteTimeout +RightJoin: + methods: + - by + - named + - of + - using +RingRange: + methods: + - equals + - getEnd + - getStart + - hashCode + - isWrapping + - of + - toString +Row: + methods: + - addArray + - addIterable + - addValue + - addValues + - attachValues + - build + - deepEquals + - deepHashCode + - equals + - fromRow + - getArray + - getBaseValue + - getBaseValues + - getBoolean + - getByte + - getBytes + - getDateTime + - getDecimal + - getDouble + - getFieldCount + - getFloat + - getInt16 + - getInt32 + - getInt64 + - getIterable + - getLogicalTypeValue + - getMap + - getRow + - getSchema + - getString + - getValue + - getValues + - hashCode + - nextFieldId + - nullRow + - toRow + - toString + - withFieldAccessDescriptors + - withFieldValue + - withFieldValueGetters + - withFieldValues + - withSchema +RowBundle: + methods: + - processRows + - setup +RowCoder: + methods: + - equals + - hashCode + - of + - overrideEncodingPositions +RowCoderGenerator: + methods: + - appender + - generate + - overrideEncodingPositions + - prepare +RowJson: + methods: + - deserialize + - forSchema + - serialize + - toString + - verifySchemaSupported + - withDropNullsOnWrite + - withNullBehavior +RowJsonUtils: + methods: + - jsonToRow + - newObjectMapperWith + - rowToJson +RowMessages: + methods: + - apply + - bytesToRowFn + - rowToBytesFn +RowToCsv: + methods: + - expand + - getCsvFormat +RowToEntity: + methods: + - create + - createTest + - expand + - processElement +RowUtils: + methods: + - byteString + - byteStringUtf8 + properties: + - COLUMNS_MAPPING + - KEY + - LABELS + - TIMESTAMP_MICROS + - VALUE +RowWithGetters: + methods: + - equals + - getFieldCount + - getGetterTarget + - getGetters + - getValue + - getValues + - hashCode +RowWithStorage: + methods: + - getFieldCount + - getValue + - getValues +RpcQosOptions: + methods: + - build + - defaultOptions + - equals + - getBatchInitialCount + - getBatchMaxBytes + - getBatchMaxCount + - getBatchTargetLatency + - getHintMaxNumWorkers + - getInitialBackoff + - getMaxAttempts + - getOverloadRatio + - getSamplePeriod + - getSamplePeriodBucketSize + - getThrottleDuration + - hashCode + - isShouldReportDiagnosticMetrics + - newBuilder + - populateDisplayData + - toBuilder + - toString + - withBatchInitialCount + - withBatchMaxBytes + - withBatchMaxCount + - withBatchTargetLatency + - withHintMaxNumWorkers + - withInitialBackoff + - withMaxAttempts + - withOverloadRatio + - withReportDiagnosticMetrics + - withSamplePeriod + - withSamplePeriodBucketSize + - withThrottleDuration +RunInference: + methods: + - expand + - of + - ofKVs + - withExpansionService + - withExtraPackages + - withKwarg +S3FileSystemConfiguration: + methods: + - build + - builder + - builderFrom + - fromS3Options + - getBucketKeyEnabled + - getBuilder + - getS3ClientBuilder + - getS3StorageClass + - getS3ThreadPoolSize + - getS3UploadBufferSizeBytes + - getSSEAlgorithm + - getSSEAwsKeyManagementParams + - getSSECustomerKey + - getSSEKMSKeyId + - getScheme + - setBucketKeyEnabled + - setS3ClientBuilder + - setS3StorageClass + - setS3ThreadPoolSize + - setS3UploadBufferSizeBytes + - setSSEAlgorithm + - setSSEAwsKeyManagementParams + - setSSECustomerKey + - setSSEKMSKeyId + - setScheme + - toBuilder + properties: + - MINIMUM_UPLOAD_BUFFER_SIZE_BYTES +S3FileSystemRegistrar: + methods: + - fromOptions +SSECustomerKey: + methods: + - algorithm + - build + - builder + - getAlgorithm + - getKey + - getMD5 + - key + - md5 +Sample: + methods: + - addInput + - any + - anyCombineFn + - anyValueCombineFn + - combineFn + - createAccumulator + - expand + - extractOutput + - fixedSizeGlobally + - fixedSizePerKey + - getAccumulatorCoder + - getDefaultOutputCoder + - mergeAccumulators + - populateDisplayData +SbeLogicalTypes: + methods: + - getArgumentType + - getBaseType + - getIdentifier + - toBaseType + - toInputType + properties: + - IDENTIFIER +SbeSchema: + methods: + - assumeSingleMessageSchema + - build + - builder + - fromIr + - getIr + - getIrOptions + - getSbeFields + - messageId + - messageName + - setMessageId + - setMessageName + - toBuilder + properties: + - DEFAULT +ScalarFn: {} +ScalarFnReflector: + methods: + - getApplyMethod +ScalarFunctionImpl: + methods: + - create + - createAll + - getImplementor + - getJarPath + - getReturnType + - implement +Schema: + methods: + - addArrayField + - addBooleanField + - addByteArrayField + - addByteField + - addDateTimeField + - addDecimalField + - addDoubleField + - addField + - addFields + - addFloatField + - addInt16Field + - addInt32Field + - addInt64Field + - addIterableField + - addLogicalTypeField + - addMapField + - addNullableField + - addOptions + - addRowField + - addStringField + - array + - assignableTo + - assignableToIgnoreNullable + - build + - builder + - equals + - equivalent + - forTypeName + - getAllMetadata + - getCollectionElementType + - getDescription + - getEncodingPositions + - getField + - getFieldCount + - getFieldNames + - getFields + - getLastFieldId + - getLogicalType + - getMapKeyType + - getMapValueType + - getMetadata + - getMetadataString + - getName + - getNullable + - getOptionNames + - getOptions + - getRowSchema + - getType + - getTypeName + - getUUID + - getValue + - getValueOrDefault + - hasField + - hasOption + - hasOptions + - hashCode + - indexOf + - isCollectionType + - isCompositeType + - isDateType + - isEncodingPositionsOverridden + - isLogicalType + - isMapType + - isNumericType + - isPrimitiveType + - isStringType + - isSubtypeOf + - isSupertypeOf + - iterable + - logicalType + - map + - nameOf + - none + - nullable + - of + - row + - setDescription + - setEncodingPositions + - setName + - setOption + - setOptions + - setType + - setUUID + - toBuilder + - toSchema + - toString + - typesEqual + - withDescription + - withMetadata + - withName + - withNullable + - withOptions + - withType + properties: + - BOOLEAN + - BYTE + - BYTES + - COLLECTION_TYPES + - COMPOSITE_TYPES + - DATETIME + - DATE_TYPES + - DECIMAL + - DOUBLE + - FLOAT + - INT16 + - INT32 + - INT64 + - MAP_TYPES + - NUMERIC_TYPES + - STRING + - STRING_TYPES +SchemaAndRecord: + methods: + - getRecord + - getTableSchema +SchemaBaseBeamTable: + methods: + - getSchema +SchemaCoder: + methods: + - coderForFieldType + - consistentWithEquals + - decode + - encode + - equals + - getEncodedTypeDescriptor + - getFromRowFunction + - getSchema + - getToRowFunction + - hashCode + - of + - overrideEncodingPositions + - toString + - verifyDeterministic +SchemaIOTableProviderWrapper: + methods: + - buildBeamSqlTable + - buildIOReader + - buildIOWriter + - getSchema + - getSchemaIOProvider + - getTableStatistics + - getTableType + - isBounded + - supportsProjects +SchemaLogicalType: + methods: + - getArgumentType + - getBaseType + - getIdentifier + - toBaseType + - toInputType + properties: + - IDENTIFIER +SchemaRegistry: + methods: + - createDefault + - fromRowFunction + - getFromRowFunction + - getSchema + - getSchemaCoder + - getToRowFunction + - registerJavaBean + - registerPOJO + - registerSchemaForClass + - registerSchemaForType + - registerSchemaProvider + - schemaFor + - toRowFunction +SchemaTranslation: + methods: + - rowFromProto + - rowToProto + - schemaFromProto + - schemaToProto +SchemaUtils: + methods: + - mergeWideningNullable + - toLogicalBaseType + - toLogicalInputType +SchemaVerification: + methods: + - verifyFieldValue +SchemaZipFold: + methods: + - accept + - accumulate + - apply + - create + - parent + - path + - withParent + - withPathPart + properties: + - EMPTY +Select: + methods: + - concatFieldNames + - create + - expand + - fieldAccess + - fieldIds + - fieldNames + - flattenedSchema + - keepMostNestedFieldName + - process + - withFieldNameAs + - withOutputSchema +SelectEvent: + methods: + - expand + - processElement +SelectHelpers: + methods: + - allLeavesDescriptor + - getOutputSchema + - getRowSelector + - getRowSelectorOptimized + - select + properties: + - CONCAT_FIELD_NAMES + - KEEP_NESTED_NAME +SellerPrice: + methods: + - decode + - encode + - equals + - hashCode + - sizeInBytes + - structuralValue + - toString + - verifyDeterministic + properties: + - CODER + - seller +SerializableCoder: + methods: + - coderFor + - consistentWithEquals + - decode + - encode + - equals + - getCoderProvider + - getCoderProviders + - getEncodedTypeDescriptor + - getRecordType + - hashCode + - of + - structuralValue + - toString + - verifyDeterministic +SerializableConfiguration: + methods: + - fromMap + - get + - newConfiguration + - newJob + - readExternal + - writeExternal +SerializableFunctions: + methods: + - apply + - clonesOf + - constant + - identity +SerializableIr: + methods: + - fromIr + - ir +SerializableMatchers: + methods: + - allOf + - anyOf + - anything + - arrayContaining + - arrayContainingInAnyOrder + - arrayWithSize + - closeTo + - contains + - containsInAnyOrder + - containsString + - describeMismatch + - describeTo + - empty + - emptyArray + - emptyIterable + - endsWith + - equalTo + - fromSupplier + - get + - greaterThan + - greaterThanOrEqualTo + - hasItem + - hasSize + - isIn + - isOneOf + - iterableWithSize + - kv + - kvWithKey + - kvWithValue + - lessThan + - lessThanOrEqualTo + - matches + - not + - nullValue + - startsWith + - toString +SerializableRexFieldAccess: + methods: + - getIndexes +SerializableRexInputRef: + methods: + - getIndex +SerializableRexNode: + methods: + - build + - builder +SerializableThrowable: + methods: + - equals + - getThrowable + - hashCode +SerializableUtils: + methods: + - clone + - deserializeFromByteArray + - ensureSerializable + - ensureSerializableByCoder + - ensureSerializableRoundTrip + - serializeToByteArray +ServerFactory: + methods: + - allocateAddressAndCreate + - create + - createDefault + - createEpollDomainSocket + - createEpollSocket + - createWithPortSupplier + - createWithUrlFactory + - createWithUrlFactoryAndPortSupplier +SessionSideInputJoin: + methods: + - expand + - needsSideInput + - processElement +SessionSideInputJoinModel: + methods: + - simulator +Sessions: + methods: + - assignWindows + - equals + - getDefaultWindowMappingFn + - getGapDuration + - getWindowTypeDescriptor + - hashCode + - isCompatible + - mergeWindows + - populateDisplayData + - verifyCompatibility + - windowCoder + - withGapDuration +SetCoder: + methods: + - getEncodedTypeDescriptor + - of + - verifyDeterministic +Sets: + methods: + - apply + - exceptAll + - exceptDistinct + - expand + - intersectAll + - intersectDistinct + - processElement + - unionAll + - unionDistinct +ShardNameTemplate: + properties: + - DIRECTORY_CONTAINER + - INDEX_OF_MAX +ShardedKey: + methods: + - consistentWithEquals + - decode + - encode + - equals + - getCoderArguments + - getKey + - getKeyCoder + - getShardNumber + - hashCode + - isRegisterByteSizeObserverCheap + - of + - registerByteSizeObserver + - structuralValue + - toString + - verifyDeterministic +ShardedKeyCoder: + methods: + - decode + - encode + - getCoderArguments + - of + - verifyDeterministic +ShardingWritableByteChannel: + methods: + - addChannel + - close + - getChannel + - getNumShards + - isOpen + - write + - writeToShard + properties: + - ALL_SHARDS +ShuffleOperator: + methods: + - getKeyExtractor + - getKeyType + - getWindow +SideInputLoadTest: + methods: + - main + - processElement +SideInputSpec: + methods: + - create +SimpleFunction: + methods: + - apply + - fromSerializableFunctionWithOutputType + - getOutputTypeDescriptor +SingleStoreIO: + methods: + - create + - expand + - finish + - getDataSource + - getInitialRange + - populateDisplayData + - process + - processElement + - read + - readWithPartitions + - run + - splitRange + - withBatchSize + - withConnectionProperties + - withDataSourceConfiguration + - withDatabase + - withOutputParallelization + - withPassword + - withQuery + - withRowMapper + - withStatementPreparator + - withTable + - withUserDataMapper + - withUsername + - write +SingleValueCollector: + methods: + - asContext + - collect + - get + - getCounter + - getHistogram + - getTimer +SingleValueContext: + methods: + - asContext + - collect + - get + - getAndResetValue + - getCounter + - getHistogram + - getTimer +SinkMetrics: + methods: + - bytesWritten + - elementsWritten +SizeEstimator: + methods: + - sizeOf +SketchFrequencies: + methods: + - add + - addInput + - create + - createAccumulator + - decode + - encode + - estimateCount + - expand + - extractOutput + - getAccumulatorCoder + - globally + - isRegisterByteSizeObserverCheap + - mergeAccumulators + - perKey + - populateDisplayData + - withAccuracy + - withConfidence + - withRelativeError +SlidingWindows: + methods: + - assignWindows + - assignsToOneWindow + - equals + - every + - getDefaultWindowMappingFn + - getOffset + - getPeriod + - getSideInputWindow + - getSize + - hashCode + - isCompatible + - of + - populateDisplayData + - verifyCompatibility + - windowCoder + - withOffset +SnappyCoder: + methods: + - decode + - encode + - getCoderArguments + - of + - verifyDeterministic +SnowflakeArray: + methods: + - of + - sql +SnowflakeBatchServiceConfig: + methods: + - getCreateDisposition + - getDataSourceProviderFn + - getDatabase + - getFilesList + - getQuery + - getQuotationMark + - getSchema + - getStagingBucketDir + - getStorageIntegrationName + - getTable + - getTableSchema + - getWriteDisposition +SnowflakeBatchServiceImpl: + methods: + - read + - write +SnowflakeBinary: + methods: + - getSize + - of + - setSize + - sql + properties: + - MAX_SIZE +SnowflakeBoolean: + methods: + - of + - sql +SnowflakeChar: + methods: + - of +SnowflakeColumn: + methods: + - getDataType + - getName + - isNullable + - of + - setDataType + - setName + - setNullable + - sql +SnowflakeDate: + methods: + - of + - sql +SnowflakeDateTime: + methods: + - of +SnowflakeDecimal: + methods: + - of +SnowflakeDouble: + methods: + - of +SnowflakeFloat: + methods: + - of + - sql +SnowflakeGeography: + methods: + - of + - sql +SnowflakeIO: + methods: + - addInput + - apply + - buildDatasource + - create + - createAccumulator + - expand + - extractOutput + - finishBundle + - fromQuery + - fromTable + - getAuthenticator + - getConfig + - getDataSource + - getDatabase + - getLoginTimeout + - getOauthToken + - getPassword + - getPortNumber + - getPrivateKey + - getPrivateKeyPassphrase + - getRawPrivateKey + - getRole + - getSchema + - getServerName + - getSsl + - getUrl + - getUsername + - getWarehouse + - mergeAccumulators + - of + - populateDisplayData + - processElement + - read + - setup + - to + - withAuthenticator + - withCoder + - withCreateDisposition + - withCsvMapper + - withDataSourceConfiguration + - withDataSourceProviderFn + - withDatabase + - withDebugMode + - withFileNameTemplate + - withFlushRowLimit + - withFlushTimeLimit + - withKeyPairAuth + - withKeyPairPathAuth + - withKeyPairRawAuth + - withLoginTimeout + - withOAuth + - withPortNumber + - withQueryTransformation + - withQuotationMark + - withRole + - withSchema + - withServerName + - withShardsNumber + - withSnowPipe + - withSnowflakeServices + - withStagingBucketName + - withStorageIntegrationName + - withTableSchema + - withUrl + - withUserDataMapper + - withUsernamePasswordAuth + - withWarehouse + - withWriteDisposition + - write +SnowflakeInteger: + methods: + - of +SnowflakeNumber: + methods: + - getPrecision + - getScale + - of + - setPrecision + - setScale + - sql +SnowflakeNumeric: + methods: + - of +SnowflakeObject: + methods: + - of + - sql +SnowflakeReal: + methods: + - of +SnowflakeServicesImpl: + methods: + - getBatchService + - getStreamingService +SnowflakeStreamingServiceConfig: + methods: + - getFilesList + - getIngestManager + - getStagingBucketDir +SnowflakeStreamingServiceImpl: + methods: + - read + - write +SnowflakeString: + methods: + - of +SnowflakeTableSchema: + methods: + - getColumns + - of + - setColumns + - sql +SnowflakeText: + methods: + - of +SnowflakeTime: + methods: + - of + - sql +SnowflakeTimestamp: + methods: + - of +SnowflakeTimestampLTZ: + methods: + - of + - sql +SnowflakeTimestampNTZ: + methods: + - of + - sql +SnowflakeTimestampTZ: + methods: + - of + - sql +SnowflakeTransformRegistrar: + methods: + - knownBuilderInstances + properties: + - READ_URN + - WRITE_URN +SnowflakeVarBinary: + methods: + - of +SnowflakeVarchar: + methods: + - getLength + - of + - setLength + - sql + properties: + - MAX_LENGTH +SnowflakeVariant: + methods: + - of + - sql +SnsCoderProviderRegistrar: + methods: + - getCoderProviders +SnsIO: + methods: + - create + - expand + - processElement + - setup + - tearDown + - test + - withAWSClientsProvider + - withClientConfiguration + - withCoder + - withFullPublishResponse + - withFullPublishResponseWithoutHeaders + - withFullPublishResult + - withFullPublishResultWithoutHeaders + - withPublishRequestBuilder + - withPublishRequestFn + - withResultOutputTag + - withRetryConfiguration + - withSnsClientProvider + - withTopicArn + - withTopicName + - write + - writeAsync +SocketAddressFactory: + methods: + - createFrom +SolrIO: + methods: + - baseUrl + - closeClient + - coreName + - coreUrl + - create + - expand + - finishBundle + - from + - populateDisplayData + - process + - processElement + - read + - readAll + - setup + - startBundle + - test + - to + - withBasicCredentials + - withConnectionConfiguration + - withMaxBatchSize + - withQuery + - withReplicaInfo + - withRetryConfiguration + - write +SortValues: + methods: + - create + - expand + - hasNext + - iterator + - next + - processElement + - remove +SortedMapCoder: + methods: + - consistentWithEquals + - decode + - encode + - getCoderArguments + - getEncodedTypeDescriptor + - getKeyCoder + - getValueCoder + - of + - registerByteSizeObserver + - structuralValue + - verifyDeterministic +Source: + methods: + - advance + - close + - getCurrent + - getCurrentSource + - getCurrentTimestamp + - getDefaultOutputCoder + - getOutputCoder + - populateDisplayData + - start + - validate +SourceMetrics: + methods: + - backlogBytes + - backlogBytesOfSplit + - backlogElements + - backlogElementsOfSplit + - bytesRead + - bytesReadBySplit + - elementsRead + - elementsReadBySplit +SourceRecordJson: + methods: + - mapSourceRecord + - toJson +SourceTestUtils: + methods: + - advance + - assertSourcesEqualReferenceSource + - assertSplitAtFractionBehavior + - assertSplitAtFractionExhaustive + - assertSplitAtFractionFails + - assertSplitAtFractionSucceedsAndConsistent + - assertUnstartedReaderReadsSameAsItsSource + - close + - createReader + - createStructuralValues + - equals + - getCurrent + - getCurrentSource + - getCurrentTimestamp + - getEstimatedSizeBytes + - getFractionConsumed + - getOutputCoder + - getSplitPointsConsumed + - getSplitPointsRemaining + - hashCode + - populateDisplayData + - readFromSource + - readFromSplitsOfSource + - readFromStartedReader + - readFromUnstartedReader + - readNItemsFromStartedReader + - readNItemsFromUnstartedReader + - readRemainingFromReader + - split + - splitAtFraction + - start + - toString + - toUnsplittableSource + - validate + properties: + - numPrimaryItems + - numResidualItems +SpannerAccessor: + methods: + - close + - getBatchClient + - getDatabaseAdminClient + - getDatabaseClient + - getOrCreate +SpannerConfig: + methods: + - build + - create + - getCommitDeadline + - getCommitRetrySettings + - getDatabaseId + - getDatabaseRole + - getEmulatorHost + - getExecuteStreamingSqlRetrySettings + - getHost + - getInstanceId + - getIsLocalChannelProvider + - getMaxCumulativeBackoff + - getProjectId + - getRetryableCodes + - getRpcPriority + - populateDisplayData + - validate + - withCommitDeadline + - withCommitRetrySettings + - withDatabaseId + - withDatabaseRole + - withEmulatorHost + - withExecuteStreamingSqlRetrySettings + - withHost + - withInstanceId + - withIsLocalChannelProvider + - withMaxCumulativeBackoff + - withProjectId + - withRetryableCodes + - withRpcPriority +SpannerIO: + methods: + - build + - compareTo + - createTransaction + - expand + - finishBundle + - grouped + - load + - of + - output + - outputWithTimestamp + - populateDisplayData + - processElement + - read + - readAll + - readChangeStream + - setSpannerConfig + - setTimestampBound + - setup + - teardown + - withBatchSizeBytes + - withBatching + - withChangeStreamName + - withColumns + - withCommitDeadline + - withDatabaseId + - withDialectView + - withEmulatorHost + - withFailureMode + - withGroupingFactor + - withHighPriority + - withHost + - withInclusiveEndAt + - withInclusiveStartAt + - withIndex + - withInstanceId + - withKeySet + - withLowPriority + - withMaxCumulativeBackoff + - withMaxNumMutations + - withMaxNumRows + - withMetadataDatabase + - withMetadataInstance + - withMetadataTable + - withPartitionOptions + - withProjectId + - withQuery + - withQueryName + - withReadOperation + - withRpcPriority + - withSchemaReadySignal + - withSpannerConfig + - withTable + - withTimestamp + - withTimestampBound + - withTraceSampleProbability + - withTransaction + - write +SpannerTransformRegistrar: + methods: + - buildExternal + - getReadOperation + - knownBuilderInstances + - setBatching + - setCommitDeadline + - setDatabaseId + - setEmulatorHost + - setGroupingFactor + - setHost + - setInstanceId + - setMaxBatchSizeBytes + - setMaxCumulativeBackoff + - setMaxNumberMutations + - setMaxNumberRows + - setProjectId + - setReadTimestamp + - setSchema + - setSql + - setStaleness + - setTable + - setTimeUnit + - setTimestampBoundMode + properties: + - DELETE_URN + - INSERT_OR_UPDATE_URN + - INSERT_URN + - READ_URN + - REPLACE_URN + - UPDATE_URN +SpannerWriteResult: + methods: + - expand + - finishSpecifyingOutput + - getFailedMutations + - getOutput + - getPipeline +SpannerWriteSchemaTransformProvider: + methods: + - build + - buildTransform + - builder + - expand + - getDatabaseId + - getInstanceId + - getTableId + - identifier + - inputCollectionNames + - outputCollectionNames + - setDatabaseId + - setInstanceId + - setTableId +SparkReceiverIO: + methods: + - expand + - read + - validateTransform + - withGetOffsetFn + - withSparkReceiverBuilder + - withTimestampFn +Split: + methods: + - named + - negative + - of + - output + - positive + - using +SplitResult: + methods: + - getPrimary + - getResidual + - of +SplunkEvent: + methods: + - create + - event + - host + - index + - newBuilder + - source + - sourceType + - time + - withEvent + - withHost + - withIndex + - withSource + - withSourceType + - withTime +SplunkIO: + methods: + - expand + - processElement + - setup + - withBatchCount + - withDisableCertificateValidation + - withEnableBatchLogs + - withEnableGzipHttpCompression + - withParallelism + - withRootCaCertificatePath + - write +SplunkWriteError: + methods: + - create + - newBuilder + - payload + - statusCode + - statusMessage + - withPayload + - withStatusCode + - withStatusMessage +SqlAnalyzer: {} +SqlBoundedSideInputJoin: + methods: + - calciteSqlBoundedSideInputJoin + - expand + - needsSideInput + - zetaSqlBoundedSideInputJoin +SqlCheckConstraint: + methods: + - getOperandList + - getOperator + - unparse +SqlColumnDeclaration: + methods: + - getOperandList + - getOperator + - unparse +SqlConversionException: {} +SqlCreateExternalTable: + methods: + - execute + - getOperandList + - unparse +SqlCreateFunction: + methods: + - execute + - getOperandList + - getOperator + - unparse +SqlDdlNodes: + methods: + - column + - dropTable +SqlDropTable: {} +SqlOperators: + methods: + - createStringAggOperator + - createZetaSqlFunction + - getSyntax + properties: + - ARRAY_AGG_FN + - BIT_XOR + - CAST_OP + - CHAR_LENGTH + - CONCAT + - COUNTIF + - DATE_OP + - ENDS_WITH + - LIKE + - LTRIM + - REPLACE + - REVERSE + - RTRIM + - START_WITHS + - SUBSTR + - TIMESTAMP_OP + - TRIM + - VALIDATE_TIMESTAMP + - VALIDATE_TIME_INTERVAL + - ZETASQL_TIMESTAMP_ADD +SqlQuery0: + methods: + - calciteSqlQuery0 + - expand + - processElement + - zetaSqlQuery0 +SqlQuery1: + methods: + - apply + - expand +SqlQuery2: + methods: + - calciteSqlQuery2 + - expand + - zetaSqlQuery2 +SqlQuery3: + methods: + - calciteSqlQuery3 + - expand + - zetaSqlQuery3 +SqlQuery5: + methods: + - expand +SqlQuery7: + methods: + - expand +SqlSetOptionBeam: + methods: + - execute +SqlTransform: + methods: + - expand + - query + - registerUdaf + - registerUdf + - withAutoLoading + - withDdlString + - withDefaultTableProvider + - withErrorsTransformer + - withNamedParameters + - withPositionalParameters + - withQueryPlannerClass + - withTableProvider +SqlTransformRunner: + methods: + - getIdentifiers + - runUsingSqlTransform + - visit +SqlTypes: + properties: + - DATE + - DATETIME + - TIME + - TIMESTAMP +SqsIO: + methods: + - expand + - processElement + - read + - setup + - teardown + - withClientConfiguration + - withCoder + - withMaxNumRecords + - withMaxReadTime + - withQueueUrl + - withSqsClientProvider + - write +SqsMessage: + methods: + - create + - getBody + - getMessageId + - getReceiptHandle + - getRequestTimeStamp + - getTimeStamp +StateBackedIterable: + methods: + - encode + - fromComponents + - getCoderTranslators + - getCoderURNs + - getComponents + - iterator +StateContexts: + methods: + - getPipelineOptions + - nullContext + - sideInput + - window + - windowOnlyContext +StateFetchingIterators: + methods: + - append + - clearAndAppend + - createIterator + - fromValues + - getBlocks + - getContinuationToken + - getWeight + - hasNext + - isReady + - loadPrefetchedResponse + - mutatedBlock + - next + - prefetch + - readAllAndDecodeStartingFrom + - remove + - seekToContinuationToken + - shrink +StateKeySpec: + methods: + - fields +StateSpecs: + methods: + - bag + - bind + - combining + - combiningFromInputInternal + - convertToBagSpecInternal + - convertToMapSpecInternal + - equals + - finishSpecifying + - hashCode + - map + - match + - offerCoders + - orderedList + - rowBag + - rowMap + - rowOrderedList + - rowSet + - rowValue + - set + - value + - watermarkStateInternal +StaticSchemaInference: + methods: + - fieldFromType + - schemaFromClass + - sortBySchema +StorageApiConvertMessages: + methods: + - expand + - onTeardown + - processElement +StorageApiDynamicDestinationsTableRow: + methods: + - getMessageConverter + - getTableSchema + - toMessage + - toTableRow +StorageApiFlushAndFinalizeDoFn: + methods: + - compareTo + - equals + - hashCode + - onTeardown + - process +StorageApiLoads: + methods: + - expand + - expandInconsistent + - expandTriggered + - expandUntriggered + - processElement + - setup +StorageApiWritePayload: + methods: + - getPayload +StorageApiWriteRecordsInconsistent: + methods: + - expand +StorageApiWriteUnshardedRecords: + methods: + - expand + - finishBundle + - output + - outputWithTimestamp + - process + - startBundle + - teardown +StorageApiWritesShardedRecords: + methods: + - expand + - getAllowedTimestampSkew + - onTeardown + - onTimer + - onWindowExpiration + - process + - startBundle + - toString +StreamUtils: + methods: + - getBytesWithoutClosing +StreamingInserts: + methods: + - expand + - withExtendedErrorInfo + - withInsertRetryPolicy +StreamingSourceContextImpl: + methods: + - isPreviewEnabled + - registerLineage +StreamingWriteTables: + methods: + - apply + - expand +StringAgg: + methods: + - addInput + - createAccumulator + - extractOutput + - mergeAccumulators +StringDelegateCoder: + methods: + - decode + - encode + - equals + - getEncodedTypeDescriptor + - hashCode + - of + - structuralValue + - toString + - verifyDeterministic +StringFunctions: + methods: + - charLength + - concat + - endsWith + - like + - ltrim + - replace + - reverse + - rtrim + - startsWith + - substr + - trim + properties: + - SUBSTR_PARAMETER_EXCEED_INTEGER +StringUtf8Coder: + methods: + - consistentWithEquals + - decode + - encode + - getEncodedElementByteSize + - getEncodedTypeDescriptor + - of + - verifyDeterministic +StringUtils: + methods: + - byteArrayToJsonString + - getLevenshteinDistance + - jsonStringToByteArray +StringsGenerator: + methods: + - nextExactString + - nextExtra + - nextString +StructuralByteArray: + methods: + - equals + - getValue + - hashCode + - toString +StructuredCoder: + methods: + - equals + - getComponents + - hashCode + - toString +SubscribeTransform: + methods: + - expand + - getReplacementTransform + - mapOutputs + properties: + - V1_READ_OVERRIDE +SubscriberOptions: + methods: + - build + - newBuilder + - setSubscriptionPath + - subscriptionPath + - toBuilder +SubscriptionPartition: {} +SubscriptionPartitionCoder: + methods: + - decode + - encode + - getCoderProvider +SuccessOrFailure: + methods: + - assertionError + - equals + - failure + - hashCode + - isSuccess + - success + - toString +Sum: + methods: + - apply + - doublesGlobally + - doublesPerKey + - equals + - hashCode + - identity + - integersGlobally + - integersPerKey + - longsGlobally + - longsPerKey + - ofDoubles + - ofIntegers + - ofLongs +SumByKey: + methods: + - accumulationMode + - expand + - getValueExtractor + - keyBy + - named + - of + - output + - triggeredBy + - valueBy + - windowBy + - withAllowedLateness + - withOnTimeBehavior + - withTimestampCombiner +SummaryGenerator: + methods: + - generateTable +Sums: + methods: + - apply + - identity + - ofDoubles + - ofFloats + - ofInts + - ofLongs + - valueDesc +SynchronizedStreamObserver: + methods: + - onCompleted + - onError + - onNext + - wrapping +SyntheticBoundedSource: + methods: + - allowsDynamicSplitting + - close + - createReader + - createSourceForSubrange + - getBytesPerOffset + - getCurrent + - getCurrentSource + - getDefaultOutputCoder + - getFractionConsumed + - getMaxEndOffset + - split + - toString + - validate +SyntheticDataPublisher: + methods: + - apply + - main +SyntheticDelay: + methods: + - delay +SyntheticOptions: + methods: + - deserialize + - fromIntegerDistribution + - fromJsonString + - fromRealDistribution + - genKvPair + - getDistribution + - hashFunction + - nextDelay + - sample + - setSeed + - toString + - validate + properties: + - bytesPerRecord + - cpuUtilizationInMixedDelay + - delayType + - hotKeyFraction + - keySizeBytes + - largeKeyFraction + - largeKeySizeBytes + - numHotKeys + - seed + - valueSizeBytes +SyntheticSourceOptions: + methods: + - genRecord + - nextInitializeDelay + - nextProcessingTimeDelay + - validate + properties: + - bundleSizeDistribution + - forceNumInitialBundles + - kv + - numRecords + - progressShape + - sleepMsec + - splitPointFrequencyRecords + - watermarkDriftMillis + - watermarkSearchInAdvanceCount +SyntheticStep: + methods: + - load + - processElement + - startBundle + - validate + properties: + - maxWorkerThroughput + - outputRecordsPerInputRecord + - perBundleDelay + - perBundleDelayType + - preservesInputKeyDistribution + - reportThrottlingMicros +SyntheticUnboundedSource: + methods: + - advance + - close + - createReader + - getCheckpointMark + - getCheckpointMarkCoder + - getCurrent + - getCurrentSource + - getCurrentTimestamp + - getOutputCoder + - getWatermark + - split + - start + - toString + - validate +TDigestQuantiles: + methods: + - addInput + - create + - createAccumulator + - decode + - encode + - expand + - extractOutput + - getAccumulatorCoder + - getDefaultOutputCoder + - globally + - isRegisterByteSizeObserverCheap + - mergeAccumulators + - perKey + - populateDisplayData + - withCompression +TFRecordIO: + methods: + - allowsDynamicSplitting + - apply + - createWriteOperation + - createWriter + - expand + - flush + - from + - getCurrent + - getOutputCoder + - matches + - open + - populateDisplayData + - read + - readFiles + - recordLength + - sink + - to + - toResource + - withCompression + - withCompressionType + - withNoSpilling + - withNumShards + - withShardNameTemplate + - withSuffix + - withoutSharding + - withoutValidation + - write + properties: + - DEFAULT_BYTE_ARRAY_CODER +TVFSlidingWindowFn: + methods: + - assignWindows + - getDefaultWindowMappingFn + - getPeriod + - getSize + - isCompatible + - of + - windowCoder +TVFStreamingUtils: + properties: + - FIXED_WINDOW_TVF + - SESSION_WINDOW_TVF + - SLIDING_WINDOW_TVF + - WINDOW_END + - WINDOW_START +Table: + methods: + - build + - builder + - comment + - getComment + - getLocation + - getName + - getProperties + - getSchema + - getType + - location + - name + - properties + - schema + - toBuilder + - type +TableAndRecord: + methods: + - getRecord + - getTable +TableDestination: + methods: + - equals + - getClustering + - getJsonClustering + - getJsonTimePartitioning + - getTableDescription + - getTableReference + - getTableSpec + - getTableUrn + - getTimePartitioning + - hashCode + - toString + - withTableReference +TableDestinationCoder: + methods: + - decode + - encode + - of + - verifyDeterministic +TableDestinationCoderV2: + methods: + - decode + - encode + - of + - verifyDeterministic +TableDestinationCoderV3: + methods: + - decode + - encode + - of + - verifyDeterministic +TableName: + methods: + - create + - getPath + - getPrefix + - getTableName + - isCompound + - isSimple + - removePrefix +TableNameExtractionUtils: + methods: + - extractTableNamesFromNode +TableResolution: + methods: + - registerTables + - resolveCalciteTable +TableRowJsonCoder: + methods: + - decode + - encode + - getEncodedElementByteSize + - getEncodedTypeDescriptor + - of + - verifyDeterministic +TableRowToStorageApiProto: + methods: + - getDescriptorFromTableSchema + - getFullName + - getName + - getSchemaForField + - getType + - jsonValueFromMessageValue + - messageFromMap + - messageFromTableRow + - modeToProtoMode + - protoModeToJsonMode + - protoSchemaToTableSchema + - protoTableFieldToTableField + - protoTypeToJsonType + - schemaToProtoTableSchema + - tableFieldToProtoTableField + - tableRowFromMessage + - typeToProtoType +TableSchema: + methods: + - array + - arrayElementType + - build + - builder + - columnType + - columns + - defaultType + - defaultValue + - enum16 + - enum8 + - enumValues + - fixedString + - fixedStringSize + - getEquivalentFieldType + - getEquivalentSchema + - materializedOrAlias + - name + - nullable + - of + - parse + - parseDefaultExpression + - typeName + - withNullable + properties: + - DATE + - DATETIME + - FLOAT32 + - FLOAT64 + - INT16 + - INT32 + - INT64 + - INT8 + - STRING + - UINT16 + - UINT32 + - UINT64 + - UINT8 +TableSchemaCache: + methods: + - getSchema + - isSatisfied + - putSchemaIfAbsent + - refreshSchema + - refreshThread +TableSchemaJSONLoader: + methods: + - getAllTableNames + - parseTableSchema +TaggedPValue: + methods: + - getTag + - getValue + - of + - ofExpandedValue +TestBigQuery: + methods: + - apply + - assertThatAllRows + - create + - evaluate + - eventually + - getFlatJsonRows + - insertRows + - now + - tableReference + - tableSpec +TestPipeline: + methods: + - apply + - create + - enableAbandonedNodeEnforcement + - enableAutoRunIfMissing + - evaluate + - fromOptions + - getOptions + - isEmpty + - leaveCompositeTransform + - newProvider + - run + - runWithAdditionalOptionArgs + - testingPipelineOptions + - toString + - verifyPAssertsSucceeded + - visitPrimitiveTransform + properties: + - PROPERTY_BEAM_TEST_PIPELINE_OPTIONS +TestPubsub: + methods: + - apply + - assertSubscriptionEventuallyCreated + - assertThatTopicEventuallyReceives + - checkIfAnySubscriptionExists + - create + - evaluate + - fromOptions + - publish + - subscriptionPath + - topicPath + - waitForNMessages +TestPubsubSignal: + methods: + - apply + - create + - evaluate + - expand + - processElement + - signalStart + - signalSuccessWhen + - waitForStart + - waitForSuccess +TestStream: + methods: + - add + - addElements + - advanceBy + - advanceProcessingTime + - advanceTo + - advanceWatermarkTo + - advanceWatermarkToInfinity + - create + - decode + - encode + - equals + - expand + - fromRawEvents + - getCoderArguments + - getElements + - getEvents + - getProcessingTimeAdvance + - getValueCoder + - getWatermark + - hashCode + - of + - verifyDeterministic +TextIO: + methods: + - apply + - expand + - flush + - from + - matches + - open + - populateDisplayData + - read + - readAll + - readFiles + - sink + - skipIfEmpty + - to + - toResource + - watchForNewFiles + - withCompression + - withCompressionType + - withDelimiter + - withEmptyMatchTreatment + - withFooter + - withFormatFunction + - withHeader + - withHintMatchesManyFiles + - withMatchConfiguration + - withNoSpilling + - withNumShards + - withOutputFilenames + - withShardNameTemplate + - withSuffix + - withTempDirectory + - withWindowedWrites + - withWritableByteChannelFactory + - withoutSharding + - write + - writeCustomType +TextJsonTable: {} +TextMessageMapper: + methods: + - apply +TextRowCountEstimator: + methods: + - build + - builder + - estimateRowCount + - getCompression + - getDelimiters + - getDirectoryTreatment + - getEmptyMatchTreatment + - getFilePattern + - getNumSampledBytesPerFile + - getSamplingStrategy + - setCompression + - setDelimiters + - setDirectoryTreatment + - setEmptyMatchTreatment + - setFilePattern + - setNumSampledBytesPerFile + - setSamplingStrategy + - stopSampling +TextSourceBenchmark: + methods: + - benchmarkHadoopLineReader + - benchmarkTextSource + - createFile + - deleteFile + properties: + - length + - path + - pathString +TextTable: + methods: + - buildIOReader + - buildIOWriter + - getFilePattern + - getTableStatistics + - isBounded +TextTableProvider: + methods: + - buildBeamSqlTable + - create + - deadLetterFile + - expand + - getCsvFormat + - getTableType + - processElement + - schema +TextualIntegerCoder: + methods: + - decode + - encode + - getEncodedTypeDescriptor + - of + - verifyDeterministic +ThriftCoder: + methods: + - decode + - encode + - of +ThriftIO: + methods: + - build + - close + - expand + - flush + - open + - populateDisplayData + - processElement + - readFiles + - sink + - withProtocol + - write +ThriftPayloadSerializerProvider: + methods: + - getSerializer + - identifier +ThriftSchema: + methods: + - custom + - fieldValueGetters + - fieldValueTypeInformations + - get + - name + - provider + - schemaFor + - schemaTypeCreator + - toString + - typedef +TikaIO: + methods: + - expand + - filepattern + - parse + - parseFiles + - populateDisplayData + - processElement + - setup + - withContentTypeHint + - withInputMetadata + - withTikaConfigPath +Time: + methods: + - getArgument + - getArgumentType + - getBaseType + - getIdentifier + - toBaseType + - toInputType + properties: + - IDENTIFIER +TimeMonitor: + methods: + - processElement +TimeUtil: + methods: + - toJava + - toJoda +TimerEndpoint: + methods: + - create + - getCoder + - getReceiver + - getTimerFamilyId + - getTransformId +TimerSpecs: + methods: + - getTimeDomain + - timer + - timerMap +TimestampEncoding: {} +TimestampExtractTransform: + methods: + - expand + - of + - processElement +TimestampFunctions: + methods: + - timestamp +TimestampPolicy: + methods: + - getBacklogCheckTime + - getMessageBacklog + - getTimestampForRecord + - getWatermark +TimestampPrefixingWindowCoder: + methods: + - consistentWithEquals + - decode + - encode + - getCoderArguments + - getWindowCoder + - isRegisterByteSizeObserverCheap + - of + - registerByteSizeObserver + - verifyDeterministic +TimestampRange: + methods: + - equals + - getFrom + - getTo + - hashCode + - of + - toString +TimestampRangeTracker: + methods: + - checkDone + - currentRestriction + - getProgress + - isBounded + - setTimeSupplier + - tryClaim + - trySplit +TimestampTransform: + methods: + - alignTo + - delay + - getDelay + - getOffset + - getPeriod +TimestampUtils: + methods: + - next + - previous + - toNanos + - toTimestamp +TimestampedValue: + methods: + - atMinimumTimestamp + - decode + - encode + - equals + - getCoderArguments + - getComponents + - getEncodedTypeDescriptor + - getTimestamp + - getValue + - getValueCoder + - hashCode + - of + - structuralValue + - toString + - verifyDeterministic +ToJson: + methods: + - expand + - of + - processElement +ToString: + methods: + - apply + - elements + - expand + - iterables + - kvs +ToStringFnRunner: + methods: + - getPTransformRunnerFactories +Top: + methods: + - addInput + - compare + - createAccumulator + - decode + - encode + - equals + - extractOutput + - getAccumulatorCoder + - getIncompatibleGlobalWindowErrorMessage + - getNameOverride + - hashCode + - isRegisterByteSizeObserverCheap + - largest + - largestDoublesFn + - largestFn + - largestIntsFn + - largestLongsFn + - largestPerKey + - mergeAccumulator + - of + - perKey + - populateDisplayData + - registerByteSizeObserver + - smallest + - smallestDoublesFn + - smallestFn + - smallestIntsFn + - smallestLongsFn + - smallestPerKey + - verifyDeterministic +TopPerKey: + methods: + - accumulationMode + - expand + - getScoreExtractor + - getScoreType + - getValueExtractor + - getValueType + - keyBy + - named + - of + - output + - scoreBy + - triggeredBy + - valueBy + - windowBy + - withAllowedLateness + - withOnTimeBehavior + - withTimestampCombiner +TopicPartitionCoder: + methods: + - decode + - encode + - getCoderArguments + - verifyDeterministic +TpcdsOptionsRegistrar: + methods: + - getPipelineOptions +TpcdsParametersReader: + methods: + - getAndCheckDataSize + - getAndCheckQueryNames + - getAndCheckTpcParallel + properties: + - ALL_QUERY_NAMES +TpcdsRun: + methods: + - call +TpcdsRunResult: + methods: + - getDataSize + - getDialect + - getElapsedTime + - getEndDate + - getIsSuccessful + - getJobName + - getPipelineOptions + - getPipelineResult + - getQueryName + - getStartDate +TpcdsSchemas: + methods: + - getCallCenterSchema + - getCatalogPageSchema + - getCatalogReturnsSchema + - getCatalogSalesSchema + - getCustomerAddressSchema + - getCustomerDemographicsSchema + - getCustomerSchema + - getDateDimSchema + - getHouseholdDemographicsSchema + - getIncomeBandSchema + - getInventorySchema + - getItemSchema + - getPromotionSchema + - getReasonSchema + - getShipModeSchema + - getStoreReturnsSchema + - getStoreSalesSchema + - getStoreSchema + - getTimeDimSchema + - getTpcdsSchemas + - getTpcdsSchemasImmutableMap + - getWarehouseSchema + - getWebReturnsSchema + - getWebSalesSchema + - getWebSiteSchema + - getWebpageSchema +TpcdsUtils: {} +TrackerWithProgress: {} +Transaction: + methods: + - create + - transactionId +TransformHierarchy: + methods: + - addComposite + - finishSpecifyingInput + - getCurrent + - getEnclosingNode + - getFullName + - getInputs + - getOutputs + - getTransform + - isCompositeNode + - isRootNode + - popNode + - pushNode + - replaceChild + - replaceNode + - replaceOutputs + - setOutput + - toAppliedPTransform + - toString + - visit +Transport: + methods: + - getJsonFactory + - getTransport + - newStorageClient + properties: + - rootUrl + - servicePath +Trigger: + methods: + - equals + - getContinuationTrigger + - getWatermarkThatGuaranteesFiring + - hashCode + - isCompatible + - mayFinish + - orFinally + - subTriggers + - toString +Triple: + methods: + - equals + - getFirst + - getSecond + - getThird + - hashCode + - of + - toString +TupleTag: + methods: + - equals + - getId + - getOutName + - getTypeDescriptor + - hashCode + - toString +TupleTagList: + methods: + - and + - empty + - get + - getAll + - of + - size + - toString +TypeAware: {} +TypeAwareness: + methods: + - orObjects +TypeCode: + methods: + - equals + - getCode + - hashCode + - toString +TypeDescriptor: + methods: + - equals + - getArgumentTypes + - getClasses + - getComponentType + - getInterfaces + - getRawType + - getSupertype + - getType + - getTypeParameter + - getTypes + - hasUnresolvedParameters + - hashCode + - isArray + - isSubtypeOf + - isSupertypeOf + - of + - resolveType + - toString + - where +TypeDescriptors: + methods: + - bigdecimals + - bigintegers + - booleans + - bytes + - characters + - doubles + - extractFromTypeParameters + - floats + - inputOf + - integers + - iterables + - kvs + - lists + - longs + - maps + - nulls + - outputOf + - rows + - sets + - shorts + - strings + - voids +TypeParameter: + methods: + - equals + - hashCode + - toString +TypeUtils: + methods: + - keyValues + - triplets +TypedCombineFnDelegate: + methods: + - addInput + - apply + - compact + - createAccumulator + - defaultValue + - extractOutput + - getAccumTVariable + - getAccumulatorCoder + - getDefaultOutputCoder + - getIncompatibleGlobalWindowErrorMessage + - getInputTVariable + - getInputType + - getOutputTVariable + - getOutputType + - mergeAccumulators + - populateDisplayData +TypedSchemaTransformProvider: + methods: + - configurationSchema + - dependencies + - from +UdafImpl: + methods: + - getCombineFn + - getImplementor + - getName + - getOrdinal + - getParameters + - getReturnType + - getType + - isOptional +UdfImplReflectiveFunctionBase: + methods: + - add + - addMethodParameters + - build + - builder + - getName + - getOrdinal + - getParameters + - getType + - isOptional + properties: + - method + - parameters +UdfTestProvider: + methods: + - addInput + - createAccumulator + - extractOutput + - helloWorld + - increment + - incrementAll + - isNull + - matches + - mergeAccumulators + - notRegistered + - userDefinedAggregateFunctions + - userDefinedScalarFunctions +UnboundedEventSource: + methods: + - advance + - close + - createReader + - getCheckpointMark + - getCheckpointMarkCoder + - getCurrent + - getCurrentSource + - getCurrentTimestamp + - getDefaultOutputCoder + - getSplitBacklogBytes + - getWatermark + - split + - start + - toString + - validate +UnboundedReaderImpl: + methods: + - advance + - close + - getCheckpointMark + - getCurrent + - getCurrentSource + - getCurrentTimestamp + - getSplitBacklogBytes + - getWatermark + - start +UnboundedScheduledExecutorService: + methods: + - awaitTermination + - call + - cancel + - compareTo + - execute + - getDelay + - invokeAll + - invokeAny + - isPeriodic + - isShutdown + - isTerminated + - run + - schedule + - scheduleAtFixedRate + - scheduleWithFixedDelay + - shutdown + - shutdownNow + - submit +UnboundedSource: + methods: + - advance + - createReader + - finalizeCheckpoint + - getCheckpointMark + - getCheckpointMarkCoder + - getCurrentRecordId + - getCurrentSource + - getSplitBacklogBytes + - getTotalBacklogBytes + - getWatermark + - requiresDeduping + - split + - start + properties: + - BACKLOG_UNKNOWN +UnboundedSourceImpl: + methods: + - createReader + - getCheckpointMarkCoder + - getOutputCoder + - split +Union: + methods: + - named + - of + - output +UnionCoder: + methods: + - decode + - encode + - getCoderArguments + - getComponents + - getElementCoders + - isRegisterByteSizeObserverCheap + - of + - registerByteSizeObserver + - verifyDeterministic +UnionTranslator: + methods: + - translate +UnknownLogicalType: + methods: + - getPayload +UnownedInputStream: + methods: + - close + - equals + - hashCode + - mark + - markSupported + - reset + - toString +UnownedOutputStream: + methods: + - close + - equals + - hashCode + - toString + - write +UnsignedOptions: + methods: + - build + - builder + - setUint16Behavior + - setUint32Behavior + - setUint64Behavior + - setUint8Behavior + - toBuilder + - uint16Behavior + - uint32Behavior + - uint64Behavior + - uint8Behavior + - usingHigherBitSize + - usingSameBitSize +UpdateConfiguration: + methods: + - create + - withFindKey + - withIsUpsert + - withUpdateFields + - withUpdateKey +UpdateField: + methods: + - fieldUpdate + - fullUpdate +UpdateSchemaDestination: + methods: + - finishBundle + - onTeardown + - processElement + - startBundle +UploadIdResponseInterceptor: + methods: + - interceptResponse +UserCodeException: + methods: + - wrap + - wrapIf +UserFunctionDefinitions: + methods: + - build + - create + - jarPath + - javaAggregateFunctions + - javaScalarFunctions + - method + - newBuilder + - setJavaAggregateFunctions + - setJavaScalarFunctions + - setSqlScalarFunctions + - setSqlTableValuedFunctions + - sqlScalarFunctions + - sqlTableValuedFunctions +UsesAttemptedMetrics: {} +UsesCounterMetrics: {} +UsesDistributionMetrics: {} +UsesGaugeMetrics: {} +UsesImpulse: {} +UsesOrderedListState: {} +Uuid: + methods: + - of + - random + - value + properties: + - DEFAULT_ATTRIBUTE +UuidCoder: + methods: + - decode + - encode + - getCoderProvider +UuidDeduplicationOptions: + methods: + - build + - deduplicate + - newBuilder + - setDeduplicate + - setUuidExtractor + - uuidExtractor + properties: + - DEFAULT_DEDUPLICATE_DURATION + - DEFAULT_TIME_DOMAIN + - DEFAULT_UUID_EXTRACTOR +UuidDeduplicationTransform: + methods: + - expand +UuidLogicalType: + methods: + - getArgument + - getArgumentType + - getBaseType + - getIdentifier + - toBaseType + - toInputType + properties: + - IDENTIFIER + - LEAST_SIGNIFICANT_BITS_FIELD_NAME + - MOST_SIGNIFICANT_BITS_FIELD_NAME + - UUID_SCHEMA +ValueInSingleWindow: + methods: + - decode + - encode + - getCoderArguments + - getComponents + - getPane + - getTimestamp + - getValue + - getWindow + - of + - verifyDeterministic +ValueProviders: + methods: + - updateSerializedOptions +ValueWithRecordId: + methods: + - decode + - encode + - equals + - getCoderArguments + - getId + - getValue + - getValueCoder + - hashCode + - of + - processElement + - toString + - verifyDeterministic +Values: + methods: + - apply + - create + - expand +VarInt: + methods: + - decodeInt + - decodeLong + - encode + - getLength +VarIntCoder: + methods: + - consistentWithEquals + - decode + - encode + - getEncodedTypeDescriptor + - isRegisterByteSizeObserverCheap + - of + - verifyDeterministic +VarLongCoder: + methods: + - consistentWithEquals + - decode + - encode + - getCoderArguments + - getEncodedTypeDescriptor + - isRegisterByteSizeObserverCheap + - of + - verifyDeterministic +VariableBytes: + methods: + - getMaxLength + - getName + - of + - toInputType + - toString + properties: + - IDENTIFIER +VariableString: + methods: + - getMaxLength + - getName + - of + - toInputType + - toString + properties: + - IDENTIFIER +VarianceFn: + methods: + - addInput + - createAccumulator + - extractOutput + - getAccumulatorCoder + - mergeAccumulators + - newPopulation + - newSample +VideoIntelligence: + methods: + - annotateFromBytes + - annotateFromBytesWithContext + - annotateFromURI + - annotateFromUriWithContext + - expand +View: + methods: + - apply + - asIterable + - asList + - asMap + - asMultimap + - asSingleton + - defaultValue + - expand + - finishBundle + - getView + - hasDefaultValue + - identity + - of + - processElement + - withDefaultValue + - withSingletonValues +ViewFn: + methods: + - apply + - getMaterialization + - getTypeDescriptor +VoidAccumulatorProvider: + methods: + - add + - create + - get + - getCounter + - getFactory + - getHistogram + - getTimer + - increment +VoidCoder: + methods: + - decode + - encode + - getEncodedTypeDescriptor + - isRegisterByteSizeObserverCheap + - of + - structuralValue + - verifyDeterministic +Wait: + methods: + - expand + - finishBundle + - on + - process + - startBundle +Watch: + methods: + - afterIterations + - afterTimeSinceNewOutput + - afterTotalOf + - allOf + - canStopPolling + - checkDone + - complete + - currentRestriction + - decode + - eitherOf + - encode + - equals + - expand + - forNewInput + - getCoderArguments + - getCompleted + - getInitialRestriction + - getInitialWatermarkEstimatorState + - getPending + - getPollWatermark + - getRestrictionCoder + - getStateCoder + - getTerminationState + - growthOf + - hashCode + - ignoreInput + - incomplete + - isBounded + - never + - newTracker + - newWatermarkEstimator + - of + - onPollComplete + - onSeenNewOutput + - process + - processElement + - toString + - tryClaim + - trySplit + - verifyDeterministic + - withOutputCoder + - withOutputKeyCoder + - withOutputs + - withPollInterval + - withTerminationPerInput + - withWatermark +WatermarkEstimators: + methods: + - currentWatermark + - getState + - getWatermarkAndState + - observeTimestamp + - setWatermark + - threadSafe +WatermarkLatency: + methods: + - main + - process +WatermarkParameters: + methods: + - builder + - create + - toBuilder + - withTimestampFn + - withWatermarkIdleDurationThreshold +WebPathParser: + methods: + - parseDicomWebpath + properties: + - dataset + - dicomStorePath + - instanceId + - location + - project + - seriesId + - storeId + - studyId +WeightedValue: + methods: + - equals + - getValue + - getWeight + - hashCode + - of + - toString +Window: + methods: + - accumulatingFiredPanes + - apply + - configure + - discardingFiredPanes + - expand + - getOutputStrategyInternal + - getWindowFn + - into + - populateDisplayData + - remerge + - triggering + - withAllowedLateness + - withOnTimeBehavior + - withTimestampCombiner +WindowFn: + methods: + - assignWindows + - assignsToOneWindow + - element + - getDefaultWindowMappingFn + - getWindowTypeDescriptor + - isCompatible + - isNonMerging + - merge + - mergeWindows + - populateDisplayData + - timestamp + - verifyCompatibility + - window + - windowCoder + - windows +WindowFnTestUtils: + methods: + - assignedWindows + - assignedWindowsWithValue + - element + - get + - merge + - put + - runWindowFn + - runWindowFnWithValue + - set + - timestamp + - validateGetOutputTimestamps + - validateGetOutputTimestampsWithValue + - window + - windows +WindowMappingFn: + methods: + - getSideInputWindow + - maximumLookback +WindowMappingFnRunner: + methods: + - getPTransformRunnerFactories +WindowMergingFnRunner: + methods: + - getPTransformRunnerFactories + - merge + - windows +WindowTracing: + methods: + - debug + - trace +WindowedValue: + methods: + - decode + - encode + - equals + - explodeWindows + - fromComponents + - getCoderArguments + - getComponents + - getFullCoder + - getPane + - getParamWindowedValueCoder + - getPayload + - getTimestamp + - getValue + - getValueCoder + - getValueOnlyCoder + - getWindow + - getWindowCoder + - getWindows + - getWindowsCoder + - hashCode + - isSingleWindowedValue + - of + - registerByteSizeObserver + - timestampedValueInGlobalWindow + - toString + - valueInGlobalWindow + - verifyDeterministic + - withValue + - withValueCoder +WindowingStrategy: + methods: + - equals + - fixDefaults + - getAllowedLateness + - getClosingBehavior + - getEnvironmentId + - getMode + - getOnTimeBehavior + - getTimestampCombiner + - getTrigger + - getWindowFn + - globalDefault + - hashCode + - isAllowedLatenessSpecified + - isAlreadyMerged + - isModeSpecified + - isTimestampCombinerSpecified + - isTriggerSpecified + - needsMerge + - of + - toString + - withAllowedLateness + - withAlreadyMerged + - withClosingBehavior + - withEnvironmentId + - withMode + - withOnTimeBehavior + - withTimestampCombiner + - withTrigger + - withWindowFn +WinningBids: + methods: + - assignWindows + - decode + - encode + - equals + - expand + - forAuction + - forBid + - getDefaultWindowMappingFn + - hashCode + - isAuctionWindow + - isCompatible + - mergeWindows + - of + - processElement + - structuralValue + - toString + - verifyDeterministic + - windowCoder + properties: + - auction + - isAuctionWindow +WinningBidsSimulator: {} +WithFailures: + methods: + - apply + - element + - exception + - expand + - failures + - failuresTo + - finishSpecifyingOutput + - getPipeline + - of + - output +WithKeys: + methods: + - apply + - expand + - of + - process + - withKeyType +WithTimestamps: + methods: + - expand + - getAllowedTimestampSkew + - of + - processElement + - withAllowedTimestampSkew +WordCount: + methods: + - apply + - expand + - main + - processElement +WrappedSupervisor: + methods: + - createBlockGenerator + - getCurrentRateLimit + - isReceiverStopped + - logInfo + - onReceiverStart + - pushArrayBuffer + - pushBytes + - pushIterator + - pushSingle + - reportError +WritableCoder: + methods: + - coderFor + - decode + - encode + - equals + - getCoderArguments + - getCoderProvider + - getCoderProviders + - hashCode + - of + - verifyDeterministic +WriteBuilder: + methods: + - buildExternal + - getCreateDisposition + - getTableSchema + - getWriteDisposition + - setCreateDisposition + - setTableSchema + - setWriteDisposition +WriteFiles: + methods: + - apply + - assignShardKey + - equals + - expand + - finishBundle + - getAdditionalInputs + - getComputeNumShards + - getNumShardsProvider + - getShardingFunction + - getSink + - getWindowedWrites + - hashCode + - populateDisplayData + - process + - processElement + - startBundle + - to + - validate + - withMaxNumWritersPerBundle + - withNoSpilling + - withNumShards + - withRunnerDeterminedSharding + - withSharding + - withShardingFunction + - withSideInputs + - withSkipIfEmpty + - withWindowedWrites + properties: + - CONCRETE_CLASS +WriteFilesResult: + methods: + - expand + - finishSpecifyingOutput + - getPerDestinationOutputFilenames + - getPipeline +WriteJmsResult: + methods: + - expand + - finishSpecifyingOutput + - getFailedMessages + - getPipeline +WriteResult: + methods: + - expand + - finishSpecifyingOutput + - getFailedInserts + - getFailedInsertsWithErr + - getFailedStorageApiInserts + - getPipeline + - getSuccessfulInserts + - getSuccessfulTableLoads +WriteToPulsarDoFn: + methods: + - processElement + - setup + - teardown +XmlIO: + methods: + - apply + - expand + - flush + - from + - matches + - open + - populateDisplayData + - read + - readFiles + - sink + - to + - withCharset + - withCompression + - withCompressionType + - withMinBundleSize + - withRecordClass + - withRecordElement + - withRootElement + - withValidationEventHandler + - write +XmlSource: + methods: + - getCurrent + - getCurrentSource + - getOutputCoder +ZetaSQLQueryPlanner: + methods: + - convertToBeamRel + - createPlanner + - getDefaultTimezone + - getLanguageOptions + - getZetaSqlRuleSets + - parse + - setDefaultTimezone + properties: + - DEFAULT_CALC + - FACTORY +ZetaSqlBeamTranslationUtils: + methods: + - toBeamObject + - toBeamRow + - toBeamType + - toZetaSqlStructType + - toZetaSqlStructValue + - toZetaSqlType + - toZetaSqlValue +ZetaSqlCalciteTranslationUtils: + methods: + - toCalciteType + - toRexNode + - toZetaSqlType + properties: + - ZETASQL_NUMERIC_MAX_VALUE + - ZETASQL_NUMERIC_MIN_VALUE + - ZETASQL_NUMERIC_SCALE +ZetaSqlException: {} +ZetaSqlScalarFunctionImpl: + methods: + - create + properties: + - functionGroup +ZetaSqlUnnest: + methods: + - copy + - create + - deriveUncollectRowType + - explainTerms + properties: + - withOrdinality +ZetaSqlUserDefinedSQLNativeTableValuedFunction: {} +ZipFiles: + methods: + - iterator + - openStream + - toString + - zipDirectory + - zipDirectoryOverwrite +ZstdCoder: + methods: + - consistentWithEquals + - decode + - encode + - equals + - getCoderArguments + - hashCode + - of + - structuralValue + - toString + - verifyDeterministic diff --git a/playground/frontend/playground_components/assets/symbols/python.g.yaml b/playground/frontend/playground_components/assets/symbols/python.g.yaml index e3edda0b3a4d7..01d5afbf2d89b 100644 --- a/playground/frontend/playground_components/assets/symbols/python.g.yaml +++ b/playground/frontend/playground_components/assets/symbols/python.g.yaml @@ -378,11 +378,6 @@ ArtifactRetrievalService: methods: - GetArtifact - ResolveArtifacts -ArtifactRetrievalServiceServicer: - methods: - - GetArtifact - - ResolveArtifacts -ArtifactRetrievalServiceStub: {} Artifacts: properties: - images @@ -392,10 +387,6 @@ ArtifactStagingService: - register_job - resolved_deps - ReverseArtifactRetrievalService -ArtifactStagingServiceServicer: - methods: - - ReverseArtifactRetrievalService -ArtifactStagingServiceStub: {} AsDict: {} AsIter: methods: @@ -556,6 +547,9 @@ AvroTestCoder: properties: - SCHEMA AvroTestRecord: {} +AzureOptions: + methods: + - validate BackgroundCachingJob: methods: - cancel @@ -570,7 +564,11 @@ BagInStateOutputAfterTimer: properties: - EMIT_TIMER - SET_STATE -BagRuntimeState: {} +BagRuntimeState: + methods: + - add + - clear + - read BagStateSpec: methods: - to_runner_api @@ -643,7 +641,6 @@ BatchToElementDoFn: methods: - process_batch BeamAssertException: {} -BeamConstants: {} BeamDataframeDoctestRunner: methods: - fake_pandas_module @@ -657,56 +654,24 @@ BeamFilesystemHandler: methods: - file_reader - file_writer -BeamFnControl: - methods: - - Control - - GetProcessBundleDescriptor BeamFnControlServicer: methods: - Control -BeamFnControlStub: {} -BeamFnData: - methods: - - Data BeamFnDataServicer: methods: - Data - get_conn_by_worker_id -BeamFnDataStub: {} -BeamFnExternalWorkerPool: - methods: - - StartWorker - - StopWorker BeamFnExternalWorkerPoolServicer: methods: - start - StartWorker - StopWorker -BeamFnExternalWorkerPoolStub: {} -BeamFnLogging: - methods: - - Logging BeamFnLoggingServicer: methods: - Logging -BeamFnLoggingStub: {} -BeamFnState: - methods: - - State -BeamFnStateServicer: - methods: - - State -BeamFnStateStub: {} BeamFnStatusServicer: methods: - WorkerStatus -BeamFnWorkerStatus: - methods: - - WorkerStatus -BeamFnWorkerStatusServicer: - methods: - - WorkerStatus -BeamFnWorkerStatusStub: {} BeamIOError: {} BeamJarExpansionService: {} BeamJob: @@ -2184,14 +2149,19 @@ CombiningTriggerDriver: methods: - process_elements - process_timer -CombiningValueRuntimeState: {} +CombiningValueRuntimeState: + methods: + - add + - clear + - commit + - finalize + - read CombiningValueStateSpec: methods: - to_runner_api Command: methods: - run -CommitManifestResponse: {} ComparableValue: methods: - hydrate @@ -2357,7 +2327,9 @@ ConvertToPubSubMessage: - process CopyRequest: {} CorruptMainSessionException: {} -Count: {} +Count: + methods: + - expand Count1: methods: - expand @@ -2370,11 +2342,18 @@ CountAccumulator: CountAndLog: methods: - expand -CountCombineFn: {} +CountCombineFn: + methods: + - add_input + - add_inputs + - create_accumulator + - extract_output + - merge_accumulators Counter: methods: - - dec - - inc + - error + - get + - increment CounterAggregator: methods: - combine @@ -2468,12 +2447,7 @@ CPUTime: - totalMs Create: methods: - - as_read - - expand - - get_output_type - - get_windowing - - infer_output_type - - to_runner_api_parameter + - apply CreateBitbucketServerConfigOperationMetadata: properties: - bitbucketServerConfig @@ -2492,6 +2466,9 @@ CreateDisposition: properties: - CREATE_IF_NEEDED - CREATE_NEVER +CreateFolderFn: + methods: + - process CreateGitHubEnterpriseConfigOperationMetadata: properties: - completeTime @@ -2553,6 +2530,7 @@ CustomCoder: methods: - decode - encode + - is_deterministic CustomCommands: methods: - finalize_options @@ -2662,6 +2640,7 @@ DataflowPipelineResult: - is_in_terminal_state - job_id - metrics + - monitoring_infos - state - wait_until_finish DataflowProjectsDeleteSnapshotsRequest: @@ -3582,10 +3561,19 @@ Disk: - mountPoint - sizeGb DisplayData: - methods: - - create_from - - create_from_options - - to_proto + properties: + - boolValue + - durationValue + - floatValue + - int64Value + - javaClassValue + - key + - label + - namespace + - shortStrValue + - strValue + - timestampValue + - url DisplayDataItem: methods: - drop_if_default @@ -3701,7 +3689,12 @@ DoFn: - TimestampParam - WatermarkEstimatorParam - WindowParam -DoFnContext: {} +DoFnContext: + methods: + - element + - set_element + - timestamp + - windows DoFnInfo: methods: - create @@ -3791,7 +3784,9 @@ DriverClassName: - MYSQL - ORACLE - POSTGRESQL -DummyClass: {} +DummyClass: + methods: + - func DummyCoder: methods: - decode @@ -3910,20 +3905,24 @@ Entry: - itemCount - predictedLabel Environment: - methods: - - artifacts - - capabilities - - from_options - - from_runner_api - - get_env_cls_from_urn - - register_urn - - register_urn - - register_urn - - register_urn - - register_urn - - resource_hints - - to_runner_api - - to_runner_api_parameter + properties: + - clusterManagerApiService + - dataset + - debugOptions + - experiments + - flexResourceSchedulingGoal + - internalExperiments + - sdkPipelineOptions + - serviceAccountEmail + - serviceKmsKeyName + - serviceOptions + - shuffleMode + - tempStoragePrefix + - userAgent + - version + - workerPools + - workerRegion + - workerZone EOL: properties: - CRLF @@ -4015,14 +4014,9 @@ ExpandStringsProvider: ExpansionAndArtifactRetrievalStub: methods: - artifact_service -ExpansionMethods: {} -ExpansionService: - methods: - - Expand ExpansionServiceServicer: methods: - Expand -ExpansionServiceStub: {} ExpectedSplitOutcome: properties: - MUST_BE_CONSISTENT_IF_SUCCEEDS @@ -4131,6 +4125,7 @@ ExternalTransform: - outer_namespace - replace_named_inputs - replace_named_outputs + - service - to_runner_api_transform - with_output_types ExternalTransformFinder: @@ -4365,6 +4360,7 @@ FileBasedCacheManager: - size - source - write +FileBasedIOTestOptions: {} FileBasedSink: methods: - close @@ -4511,13 +4507,9 @@ FixedWindows: - from_runner_api_parameter - get_window_coder - to_runner_api_parameter -FixedWindowsPayload: {} Flatten: methods: - - expand - - from_runner_api_parameter - - infer_output_type - - to_runner_api_parameter + - apply FlattenAndDouble: methods: - expand @@ -5028,7 +5020,6 @@ GlobalWindows: - windowed_batch - windowed_value - windowed_value_at_end_of_window -GlobalWindowsPayload: {} GoogleCloudOptions: methods: - validate @@ -5059,11 +5050,7 @@ GroupBy: - force_tuple_keys GroupByKey: methods: - - expand - - from_runner_api_parameter - - infer_output_type - - runner_api_requires_keyed_input - - to_runner_api_parameter + - apply GroupingBuffer: methods: - append @@ -5159,8 +5146,9 @@ HdfsUploader: - finish - put Histogram: - methods: - - update + properties: + - bucketCounts + - firstBucketOffset HistogramAggregator: methods: - combine @@ -5714,18 +5702,6 @@ JobServer: - start - stop JobServerOptions: {} -JobService: - methods: - - Cancel - - DescribePipelineOptions - - GetJobMetrics - - GetJobs - - GetMessageStream - - GetPipeline - - GetState - - GetStateStream - - Prepare - - Run JobServiceHandle: methods: - encode_pipeline_options @@ -5734,19 +5710,6 @@ JobServiceHandle: - run - stage - submit -JobServiceServicer: - methods: - - Cancel - - DescribePipelineOptions - - GetJobMetrics - - GetJobs - - GetMessageStream - - GetPipeline - - GetState - - GetStateStream - - Prepare - - Run -JobServiceStub: {} JobStatistics: properties: - completionRatio @@ -5838,6 +5801,7 @@ JrhReadPTransformOverride: - matches JsonCoder: methods: + - decode - encode JsonLogFormatter: methods: @@ -5974,24 +5938,6 @@ LeaseWorkItemResponse: properties: - unifiedWorkerResponse - workItems -LegacyArtifactRetrievalService: - methods: - - GetArtifact - - GetManifest -LegacyArtifactRetrievalServiceServicer: - methods: - - GetArtifact - - GetManifest -LegacyArtifactRetrievalServiceStub: {} -LegacyArtifactStagingService: - methods: - - CommitManifest - - PutArtifact -LegacyArtifactStagingServiceServicer: - methods: - - CommitManifest - - PutArtifact -LegacyArtifactStagingServiceStub: {} LengthPrefixCoder: methods: - as_cloud_object @@ -6032,13 +5978,7 @@ LinearRegressionBenchmarkConfig: - starting_point LineSource: methods: - - default_output_coder - - estimate_size - - get_range_tracker - - read - - split - properties: - - TEST_BUNDLE_SIZE + - read_records ListBatchConverter: methods: - combine_batches @@ -6186,7 +6126,6 @@ LogicalTypeRegistry: - get_logical_type_by_language_type - get_logical_type_by_urn - get_urn_by_logial_type -LogicalTypes: {} ManualWatermarkEstimator: methods: - current_watermark @@ -6361,9 +6300,7 @@ MetricResults: Metrics: methods: - counter - - distribution - - gauge - - get_namespace + - histogram MetricsContainer: methods: - get_counter @@ -6390,12 +6327,13 @@ MetricShortId: properties: - metricIndex - shortId +MetricsPublisher: + methods: + - publish MetricsReader: methods: - publish_metrics - publish_values - properties: - - publishers MetricStructuredName: properties: - context @@ -6540,9 +6478,6 @@ MonitorDoFn: - finish_bundle - process - start_bundle -MonitoringInfo: {} -MonitoringInfoSpecs: {} -MonitoringInfoTypeUrns: {} MonitorSuffix: properties: - ELEMENT_COUNTER @@ -6934,12 +6869,28 @@ OffsetRestrictionTracker: - try_split OldClassThatDoesNotImplementLen: {} Operation: - properties: - - done - - error - - metadata - - name - - response + methods: + - add_receiver + - current_element_progress + - execution_time_monitoring_infos + - finalize_bundle + - finish + - get_batching_preference + - get_input_batch_converter + - get_output_batch_converter + - monitoring_infos + - needs_finalization + - output + - pcollection_count_monitoring_infos + - process + - process_batch + - reset + - setup + - start + - str_internal + - teardown + - try_split + - user_monitoring_infos OperationCounters: methods: - do_sample @@ -7146,18 +7097,7 @@ ParamWindowedValueCoderImpl: - get_estimated_size_and_observables ParDo: methods: - - default_type_hints - - display_data - - expand - - from_runner_api_parameter - - get_restriction_coder - - infer_batch_converters - - infer_output_type - - make_fn - - runner_api_requires_keyed_input - - to_runner_api_parameter - - with_exception_handling - - with_outputs + - apply ParDoInstruction: properties: - input @@ -7444,11 +7384,30 @@ PipelineOptionsValidator: - PROJECT_ID_PATTERN - PROJECT_NUMBER_PATTERN - REQUIRED_ENVIRONMENT_OPTIONS +PipelineRenderer: + methods: + - info + - is_leaf + - layout_dot + - page + - page_callback_data + - pcoll_leaf_consumers + - pcoll_leaf_consumers_iter + - render_data + - render_json + - style + - to_dot + - to_dot_iter + - transform_attributes + - transform_node + - transform_to_dot + - update PipelineResult: methods: - cancel - - get - - read + - metrics + - pipeline_state_to_runner_api_state + - runner_api_state_to_pipeline_state - state - wait_until_finish PipelineRunner: @@ -7513,10 +7472,10 @@ Point: - value Policy: properties: - - auditConfigs - bindings - etag - - version + - kind + - resourceId PoolOption: properties: - name @@ -7568,6 +7527,9 @@ Position: - key - recordIndex - shufflePosition +Postprocess: + methods: + - process PostProcessor: methods: - process @@ -7582,6 +7544,9 @@ PrefixTransform: - expand - from_runner_api_parameter - to_runner_api_parameter +Preprocess: + methods: + - process PrintFn: methods: - process @@ -7759,13 +7724,6 @@ ProtoPlusMessageB: ProtoPlusMessageWithMap: properties: - field1 -ProvisionService: - methods: - - GetProvisionInfo -ProvisionServiceServicer: - methods: - - GetProvisionInfo -ProvisionServiceStub: {} PTransform: methods: - annotations @@ -8314,9 +8272,9 @@ RecommendationAIIT: - test_predict Record: properties: - - order_id - - product_id - - quantity + - age + - height + - name Recording: methods: - cancel @@ -8363,7 +8321,9 @@ RegressionMetrics: - meanSquaredLogError - medianAbsoluteError - rSquared -Reify: {} +Reify: + methods: + - process ReifyWindowsFn: methods: - process @@ -8373,6 +8333,14 @@ RekeyElements: RemoveBitbucketServerConnectedRepositoryRequest: properties: - connectedRepository +RenderOptions: {} +RenderPipelineResult: + methods: + - monitoring_infos + - wait_until_finish +RenderRunner: + methods: + - run_pipeline Repeatedly: methods: - from_runner_api @@ -8628,6 +8596,7 @@ RunnerIOOperation: {} RunnerResult: methods: - metrics + - monitoring_infos - monitoring_metrics - wait_until_finish RuntimeEnvironment: @@ -8742,12 +8711,19 @@ SampleCombineFn: - setup - teardown SampleOptions: {} +SchemaAwareExternalTransform: + methods: + - discover + - expand SchemaBasedPayloadBuilder: methods: - build SchemaLoadedSqlTransform: methods: - expand +SchemaTransformPayloadBuilder: + methods: + - build SchemaTranslation: methods: - atomic_value_from_runner_api @@ -8937,13 +8913,17 @@ Sessions: SessionsToStringsDoFn: methods: - process -SessionWindowsPayload: {} SetHint: {} SetIamPolicyRequest: properties: - policy - updateMask -SetRuntimeState: {} +SetRuntimeState: + methods: + - add + - clear + - is_modified + - read SetStateSpec: methods: - to_runner_api @@ -9122,7 +9102,6 @@ SlidingWindows: - from_runner_api_parameter - get_window_coder - to_runner_api_parameter -SlidingWindowsPayload: {} SlowCoders: methods: - test_using_slow_impl @@ -9168,11 +9147,9 @@ SortedConcatWithCounters: - merge_accumulators Source: properties: - - baseSpecs - - codec - - doesNotNeedSplitting - - metadata - - spec + - repoSource + - storageSource + - storageSourceManifest SourceBase: methods: - is_bounded @@ -9390,17 +9367,11 @@ StageSummary: - stageId - startTime - state -StandardArtifacts: {} -StandardCoders: {} -StandardDisplayData: {} -StandardEnvironments: {} StandardOptions: properties: - ALL_KNOWN_RUNNERS - DEFAULT_RUNNER - KNOWN_RUNNER_NAMES -StandardProtocols: {} -StandardPTransforms: {} StandardQueryParameters: properties: - access_token @@ -9415,10 +9386,6 @@ StandardQueryParameters: - trace - upload_protocol - uploadType -StandardRequirements: {} -StandardResourceHints: {} -StandardRunnerProtocols: {} -StandardSideInputTypes: {} StandardSqlDataType: properties: - arrayElementType @@ -9431,7 +9398,6 @@ StandardSqlField: StandardSqlStructType: properties: - fields -StandardUserStateTypes: {} StateBackedIterableCoder: methods: - from_runner_api_parameter @@ -10085,6 +10051,9 @@ SynchronousSetRuntimeState: - clear - commit - read +SyntheticRecordToStrFn: + methods: + - process SyntheticSDFAsSource: methods: - process @@ -10623,6 +10592,7 @@ TestIamPermissionsRequest: - permissions TestIamPermissionsResponse: properties: + - kind - permissions TestingFileSystem: methods: @@ -10962,18 +10932,11 @@ TestStreamIntegrationTests: - test_basic_execution - test_multiple_outputs - test_multiple_outputs_with_watermark_advancement -TestStreamService: - methods: - - Events TestStreamServiceController: methods: - Events - start - stop -TestStreamServiceServicer: - methods: - - Events -TestStreamServiceStub: {} TestTableReferenceParser: methods: - test_calling_with_all_arguments @@ -11716,12 +11679,9 @@ WatermarkEvent: - to_runner_api WatermarkManager: methods: - - extract_all_timers - - get_watermarks - - update_watermarks - properties: - - WATERMARK_NEG_INF - - WATERMARK_POS_INF + - get_pcoll_node + - get_stage_node + - set_pcoll_watermark WatermarkPolicy: methods: - validate_param @@ -11886,28 +11846,16 @@ WorkerOptions: - validate WorkerPool: properties: - - autoscalingSettings - - dataDisks - - defaultPackageSet - - diskSizeGb - - diskSourceImage - - diskType - - ipConfiguration - - kind - - machineType - - metadata - - network - - numThreadsPerWorker - - numWorkers - - onHostMaintenance - - packages - - poolArgs - - sdkHarnessContainerImages - - subnetwork - - taskrunnerSettings - - teardownPolicy - - workerHarnessContainerImage - - zone + - annotations + - createTime + - deleteTime + - displayName + - etag + - name + - privatePoolV1Config + - state + - uid + - updateTime WorkerSettings: properties: - baseUrl diff --git a/playground/frontend/playground_components/build.gradle.kts b/playground/frontend/playground_components/build.gradle.kts index e231cf7fc9048..1af9d60537652 100644 --- a/playground/frontend/playground_components/build.gradle.kts +++ b/playground/frontend/playground_components/build.gradle.kts @@ -137,6 +137,7 @@ tasks.register("generateCode") { tasks.register("extractBeamSymbols") { dependsOn("ensureSymbolsDirectoryExists") dependsOn("extractBeamSymbolsGo") + dependsOn("extractBeamSymbolsJava") dependsOn("extractBeamSymbolsPython") group = "build" @@ -167,6 +168,10 @@ tasks.register("extractBeamSymbolsGo") { } } +tasks.register("extractBeamSymbolsJava") { + dependsOn("tools:extract_symbols_java:buildJava") +} + tasks.register("extractBeamSymbolsPython") { doLast { exec { diff --git a/playground/frontend/playground_components/lib/playground_components.dart b/playground/frontend/playground_components/lib/playground_components.dart index b2567abbb54d0..2fab3043cb4c7 100644 --- a/playground/frontend/playground_components/lib/playground_components.dart +++ b/playground/frontend/playground_components/lib/playground_components.dart @@ -17,17 +17,13 @@ */ export 'src/cache/example_cache.dart'; - export 'src/constants/colors.dart'; export 'src/constants/links.dart'; export 'src/constants/sizes.dart'; - export 'src/controllers/example_loaders/examples_loader.dart'; export 'src/controllers/playground_controller.dart'; export 'src/controllers/public_notifier.dart'; - export 'src/enums/complexity.dart'; - export 'src/models/category_with_examples.dart'; export 'src/models/example.dart'; export 'src/models/example_base.dart'; @@ -47,21 +43,16 @@ export 'src/models/sdk.dart'; export 'src/models/shortcut.dart'; export 'src/models/toast.dart'; export 'src/models/toast_type.dart'; - export 'src/playground_components.dart'; - export 'src/repositories/code_client/grpc_code_client.dart'; export 'src/repositories/code_repository.dart'; export 'src/repositories/example_client/grpc_example_client.dart'; export 'src/repositories/example_repository.dart'; - +export 'src/router/router_delegate.dart'; export 'src/services/symbols/loaders/yaml.dart'; - export 'src/theme/switch_notifier.dart'; export 'src/theme/theme.dart'; - export 'src/util/pipeline_options.dart'; - export 'src/widgets/bubble.dart'; export 'src/widgets/clickable.dart'; export 'src/widgets/complexity.dart'; @@ -71,6 +62,9 @@ export 'src/widgets/loading_error.dart'; export 'src/widgets/loading_indicator.dart'; export 'src/widgets/logo.dart'; export 'src/widgets/output/output.dart'; +export 'src/widgets/output/output_area.dart'; +export 'src/widgets/output/output_tab.dart'; +export 'src/widgets/output/output_tabs.dart'; export 'src/widgets/overlay/body.dart'; export 'src/widgets/overlay/dismissible.dart'; export 'src/widgets/overlay/opener.dart'; diff --git a/playground/frontend/playground_components/lib/src/api/v1/api.pb.dart b/playground/frontend/playground_components/lib/src/api/v1/api.pb.dart index 8a6479dbaabbf..2b855ff5de6f0 100644 --- a/playground/frontend/playground_components/lib/src/api/v1/api.pb.dart +++ b/playground/frontend/playground_components/lib/src/api/v1/api.pb.dart @@ -88,6 +88,7 @@ class RunCodeRequest extends $pb.GeneratedMessage { ..e(2, const $core.bool.fromEnvironment('protobuf.omit_field_names') ? '' : 'sdk', $pb.PbFieldType.OE, defaultOrMaker: Sdk.SDK_UNSPECIFIED, valueOf: Sdk.valueOf, enumValues: Sdk.values) ..aOS(3, const $core.bool.fromEnvironment('protobuf.omit_field_names') ? '' : 'pipelineOptions') ..pc(4, const $core.bool.fromEnvironment('protobuf.omit_field_names') ? '' : 'datasets', $pb.PbFieldType.PM, subBuilder: Dataset.create) + ..pc(5, const $core.bool.fromEnvironment('protobuf.omit_field_names') ? '' : 'files', $pb.PbFieldType.PM, subBuilder: SnippetFile.create) ..hasRequiredFields = false ; @@ -97,6 +98,7 @@ class RunCodeRequest extends $pb.GeneratedMessage { Sdk? sdk, $core.String? pipelineOptions, $core.Iterable? datasets, + $core.Iterable? files, }) { final _result = create(); if (code != null) { @@ -111,6 +113,9 @@ class RunCodeRequest extends $pb.GeneratedMessage { if (datasets != null) { _result.datasets.addAll(datasets); } + if (files != null) { + _result.files.addAll(files); + } return _result; } factory RunCodeRequest.fromBuffer($core.List<$core.int> i, [$pb.ExtensionRegistry r = $pb.ExtensionRegistry.EMPTY]) => create()..mergeFromBuffer(i, r); @@ -163,6 +168,9 @@ class RunCodeRequest extends $pb.GeneratedMessage { @$pb.TagNumber(4) $core.List get datasets => $_getList(3); + + @$pb.TagNumber(5) + $core.List get files => $_getList(4); } class RunCodeResponse extends $pb.GeneratedMessage { @@ -1817,17 +1825,22 @@ class GetPrecompiledObjectResponse extends $pb.GeneratedMessage { class GetPrecompiledObjectCodeResponse extends $pb.GeneratedMessage { static final $pb.BuilderInfo _i = $pb.BuilderInfo(const $core.bool.fromEnvironment('protobuf.omit_message_names') ? '' : 'GetPrecompiledObjectCodeResponse', package: const $pb.PackageName(const $core.bool.fromEnvironment('protobuf.omit_message_names') ? '' : 'api.v1'), createEmptyInstance: create) ..aOS(1, const $core.bool.fromEnvironment('protobuf.omit_field_names') ? '' : 'code') + ..pc(2, const $core.bool.fromEnvironment('protobuf.omit_field_names') ? '' : 'files', $pb.PbFieldType.PM, subBuilder: SnippetFile.create) ..hasRequiredFields = false ; GetPrecompiledObjectCodeResponse._() : super(); factory GetPrecompiledObjectCodeResponse({ $core.String? code, + $core.Iterable? files, }) { final _result = create(); if (code != null) { _result.code = code; } + if (files != null) { + _result.files.addAll(files); + } return _result; } factory GetPrecompiledObjectCodeResponse.fromBuffer($core.List<$core.int> i, [$pb.ExtensionRegistry r = $pb.ExtensionRegistry.EMPTY]) => create()..mergeFromBuffer(i, r); @@ -1859,6 +1872,9 @@ class GetPrecompiledObjectCodeResponse extends $pb.GeneratedMessage { $core.bool hasCode() => $_has(0); @$pb.TagNumber(1) void clearCode() => clearField(1); + + @$pb.TagNumber(2) + $core.List get files => $_getList(1); } class GetPrecompiledObjectOutputResponse extends $pb.GeneratedMessage { diff --git a/playground/frontend/playground_components/lib/src/api/v1/api.pbjson.dart b/playground/frontend/playground_components/lib/src/api/v1/api.pbjson.dart index 51ee114e13871..92082673e66ec 100644 --- a/playground/frontend/playground_components/lib/src/api/v1/api.pbjson.dart +++ b/playground/frontend/playground_components/lib/src/api/v1/api.pbjson.dart @@ -112,11 +112,12 @@ const RunCodeRequest$json = const { const {'1': 'sdk', '3': 2, '4': 1, '5': 14, '6': '.api.v1.Sdk', '10': 'sdk'}, const {'1': 'pipeline_options', '3': 3, '4': 1, '5': 9, '10': 'pipelineOptions'}, const {'1': 'datasets', '3': 4, '4': 3, '5': 11, '6': '.api.v1.Dataset', '10': 'datasets'}, + const {'1': 'files', '3': 5, '4': 3, '5': 11, '6': '.api.v1.SnippetFile', '10': 'files'}, ], }; /// Descriptor for `RunCodeRequest`. Decode as a `google.protobuf.DescriptorProto`. -final $typed_data.Uint8List runCodeRequestDescriptor = $convert.base64Decode('Cg5SdW5Db2RlUmVxdWVzdBISCgRjb2RlGAEgASgJUgRjb2RlEh0KA3NkaxgCIAEoDjILLmFwaS52MS5TZGtSA3NkaxIpChBwaXBlbGluZV9vcHRpb25zGAMgASgJUg9waXBlbGluZU9wdGlvbnMSKwoIZGF0YXNldHMYBCADKAsyDy5hcGkudjEuRGF0YXNldFIIZGF0YXNldHM='); +final $typed_data.Uint8List runCodeRequestDescriptor = $convert.base64Decode('Cg5SdW5Db2RlUmVxdWVzdBISCgRjb2RlGAEgASgJUgRjb2RlEh0KA3NkaxgCIAEoDjILLmFwaS52MS5TZGtSA3NkaxIpChBwaXBlbGluZV9vcHRpb25zGAMgASgJUg9waXBlbGluZU9wdGlvbnMSKwoIZGF0YXNldHMYBCADKAsyDy5hcGkudjEuRGF0YXNldFIIZGF0YXNldHMSKQoFZmlsZXMYBSADKAsyEy5hcGkudjEuU25pcHBldEZpbGVSBWZpbGVz'); @$core.Deprecated('Use runCodeResponseDescriptor instead') const RunCodeResponse$json = const { '1': 'RunCodeResponse', @@ -445,11 +446,12 @@ const GetPrecompiledObjectCodeResponse$json = const { '1': 'GetPrecompiledObjectCodeResponse', '2': const [ const {'1': 'code', '3': 1, '4': 1, '5': 9, '10': 'code'}, + const {'1': 'files', '3': 2, '4': 3, '5': 11, '6': '.api.v1.SnippetFile', '10': 'files'}, ], }; /// Descriptor for `GetPrecompiledObjectCodeResponse`. Decode as a `google.protobuf.DescriptorProto`. -final $typed_data.Uint8List getPrecompiledObjectCodeResponseDescriptor = $convert.base64Decode('CiBHZXRQcmVjb21waWxlZE9iamVjdENvZGVSZXNwb25zZRISCgRjb2RlGAEgASgJUgRjb2Rl'); +final $typed_data.Uint8List getPrecompiledObjectCodeResponseDescriptor = $convert.base64Decode('CiBHZXRQcmVjb21waWxlZE9iamVjdENvZGVSZXNwb25zZRISCgRjb2RlGAEgASgJUgRjb2RlEikKBWZpbGVzGAIgAygLMhMuYXBpLnYxLlNuaXBwZXRGaWxlUgVmaWxlcw=='); @$core.Deprecated('Use getPrecompiledObjectOutputResponseDescriptor instead') const GetPrecompiledObjectOutputResponse$json = const { '1': 'GetPrecompiledObjectOutputResponse', diff --git a/playground/frontend/playground_components/lib/src/assets/assets.gen.dart b/playground/frontend/playground_components/lib/src/assets/assets.gen.dart index 0d02529875f96..2b64543628d70 100644 --- a/playground/frontend/playground_components/lib/src/assets/assets.gen.dart +++ b/playground/frontend/playground_components/lib/src/assets/assets.gen.dart @@ -58,6 +58,9 @@ class $AssetsSymbolsGen { /// File path: assets/symbols/go.g.yaml String get goG => 'assets/symbols/go.g.yaml'; + /// File path: assets/symbols/java.g.yaml + String get javaG => 'assets/symbols/java.g.yaml'; + /// File path: assets/symbols/python.g.yaml String get pythonG => 'assets/symbols/python.g.yaml'; } diff --git a/playground/frontend/playground_components/lib/src/controllers/example_loaders/standard_example_loader.dart b/playground/frontend/playground_components/lib/src/controllers/example_loaders/standard_example_loader.dart index 7a64b8aa818fa..5f180589ea3c7 100644 --- a/playground/frontend/playground_components/lib/src/controllers/example_loaders/standard_example_loader.dart +++ b/playground/frontend/playground_components/lib/src/controllers/example_loaders/standard_example_loader.dart @@ -51,16 +51,23 @@ class StandardExampleLoader extends ExampleLoader { } Future _load() async { - final example = await _loadExampleBase(); + try { + final example = await _loadExampleBase(); - if (example == null) { - _completer.completeError('Example not found: $descriptor'); + if (example == null) { + _completer.completeError('Example not found: $descriptor'); + return; + } + + _completer.complete( + exampleCache.loadExampleInfo(example), + ); + + // ignore: avoid_catches_without_on_clauses + } catch (ex, trace) { + _completer.completeError(ex, trace); return; } - - _completer.complete( - exampleCache.loadExampleInfo(example), - ); } Future _loadExampleBase() async { diff --git a/playground/frontend/playground_components/lib/src/controllers/playground_controller.dart b/playground/frontend/playground_components/lib/src/controllers/playground_controller.dart index decbd366f93a9..463c7607dda3e 100644 --- a/playground/frontend/playground_components/lib/src/controllers/playground_controller.dart +++ b/playground/frontend/playground_components/lib/src/controllers/playground_controller.dart @@ -30,6 +30,7 @@ import '../models/example_base.dart'; import '../models/example_loading_descriptors/empty_example_loading_descriptor.dart'; import '../models/example_loading_descriptors/example_loading_descriptor.dart'; import '../models/example_loading_descriptors/examples_loading_descriptor.dart'; +import '../models/example_loading_descriptors/standard_example_loading_descriptor.dart'; import '../models/example_loading_descriptors/user_shared_example_loading_descriptor.dart'; import '../models/intents.dart'; import '../models/outputs.dart'; @@ -173,6 +174,43 @@ class PlaygroundController with ChangeNotifier { ); } + Future setExampleBase(ExampleBase exampleBase) async { + final snippetEditingController = _getOrCreateSnippetEditingController( + exampleBase.sdk, + loadDefaultIfNot: false, + ); + + if (!snippetEditingController.lockExampleLoading()) { + return; + } + + notifyListeners(); + + try { + final example = await exampleCache.loadExampleInfo(exampleBase); + // TODO(alexeyinkin): setCurrentSdk = false when we do + // per-SDK output and run status. + // Now using true to reset the output and run status. + // https://github.com/apache/beam/issues/23248 + final descriptor = StandardExampleLoadingDescriptor( + sdk: example.sdk, + path: example.path, + ); + + setExample( + example, + descriptor: descriptor, + setCurrentSdk: true, + ); + + // ignore: avoid_catches_without_on_clauses + } catch (ex) { + snippetEditingController.releaseExampleLoading(); + notifyListeners(); + rethrow; + } + } + void setExample( Example example, { required ExampleLoadingDescriptor descriptor, @@ -291,6 +329,7 @@ class PlaygroundController with ChangeNotifier { code: controller.codeController.fullText, sdk: controller.sdk, pipelineOptions: parsedPipelineOptions, + datasets: selectedExample?.datasets ?? [], ); _runSubscription = _codeRepository?.runCode(request).listen((event) { _result = event; @@ -413,6 +452,7 @@ class PlaygroundController with ChangeNotifier { ); final sharedExample = Example( + datasets: controller.selectedExample?.datasets ?? [], source: code, name: name, sdk: controller.sdk, diff --git a/playground/frontend/playground_components/lib/src/controllers/snippet_editing_controller.dart b/playground/frontend/playground_components/lib/src/controllers/snippet_editing_controller.dart index 5963837f8c508..052ee41aea180 100644 --- a/playground/frontend/playground_components/lib/src/controllers/snippet_editing_controller.dart +++ b/playground/frontend/playground_components/lib/src/controllers/snippet_editing_controller.dart @@ -27,6 +27,7 @@ import '../models/example_loading_descriptors/content_example_loading_descriptor import '../models/example_loading_descriptors/empty_example_loading_descriptor.dart'; import '../models/example_loading_descriptors/example_loading_descriptor.dart'; import '../models/example_view_options.dart'; +import '../models/loading_status.dart'; import '../models/sdk.dart'; import '../services/symbols/symbols_notifier.dart'; @@ -39,6 +40,7 @@ class SnippetEditingController extends ChangeNotifier { ExampleLoadingDescriptor? _descriptor; String _pipelineOptions = ''; bool _isChanged = false; + LoadingStatus _exampleLoadingStatus = LoadingStatus.done; SnippetEditingController({ required this.sdk, @@ -65,6 +67,29 @@ class SnippetEditingController extends ChangeNotifier { } } + /// Attempts to acquire a lock for asynchronous example loading. + /// + /// This prevents race condition for quick example switching + /// and allows to show a loading indicator. + /// + /// Returns whether the lock was acquired. + bool lockExampleLoading() { + switch (_exampleLoadingStatus) { + case LoadingStatus.loading: + return false; + case LoadingStatus.done: + case LoadingStatus.error: + _exampleLoadingStatus = LoadingStatus.loading; + return true; + } + } + + void releaseExampleLoading() { + _exampleLoadingStatus = LoadingStatus.done; + } + + bool get isLoading => _exampleLoadingStatus == LoadingStatus.loading; + void setExample( Example example, { ExampleLoadingDescriptor? descriptor, @@ -73,6 +98,7 @@ class SnippetEditingController extends ChangeNotifier { _selectedExample = example; _pipelineOptions = example.pipelineOptions; _isChanged = false; + releaseExampleLoading(); final viewOptions = example.viewOptions; diff --git a/sdks/java/io/google-cloud-platform/src/test/resources/org/apache/beam/sdk/io/gcp/pubsub/all_data_types_flat_schema.proto b/playground/frontend/playground_components/lib/src/enums/emulator_type.dart similarity index 81% rename from sdks/java/io/google-cloud-platform/src/test/resources/org/apache/beam/sdk/io/gcp/pubsub/all_data_types_flat_schema.proto rename to playground/frontend/playground_components/lib/src/enums/emulator_type.dart index 0b2bbaccded54..74b9916316afb 100644 --- a/sdks/java/io/google-cloud-platform/src/test/resources/org/apache/beam/sdk/io/gcp/pubsub/all_data_types_flat_schema.proto +++ b/playground/frontend/playground_components/lib/src/enums/emulator_type.dart @@ -15,13 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -syntax = "proto3"; -message Record { - double doubleField = 1; - float floatField = 2; - int32 int32Field = 3; - int64 int64Field = 4; - bool boolField = 5; - string stringField = 6; +enum EmulatorType { + kafka, } diff --git a/playground/frontend/playground_components/lib/src/models/dataset.dart b/playground/frontend/playground_components/lib/src/models/dataset.dart new file mode 100644 index 0000000000000..5ec79d29112e6 --- /dev/null +++ b/playground/frontend/playground_components/lib/src/models/dataset.dart @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import '../enums/emulator_type.dart'; + +class Dataset { + final EmulatorType? type; + final Map options; + final String datasetPath; + + Dataset({ + required this.type, + required this.options, + required this.datasetPath, + }); +} diff --git a/playground/frontend/playground_components/lib/src/models/example.dart b/playground/frontend/playground_components/lib/src/models/example.dart index f55ee73571515..3f3d89c313b99 100644 --- a/playground/frontend/playground_components/lib/src/models/example.dart +++ b/playground/frontend/playground_components/lib/src/models/example.dart @@ -32,14 +32,15 @@ class Example extends ExampleBase { required super.sdk, required super.type, required super.path, - this.graph, - this.logs, - this.outputs, super.complexity, super.contextLine, + super.datasets, super.description, + this.graph, super.isMultiFile, + this.logs, super.link, + this.outputs, super.pipelineOptions, super.tags, super.viewOptions, @@ -54,6 +55,7 @@ class Example extends ExampleBase { }) : super( complexity: example.complexity, contextLine: example.contextLine, + datasets: example.datasets, description: example.description, isMultiFile: example.isMultiFile, link: example.link, diff --git a/playground/frontend/playground_components/lib/src/models/example_base.dart b/playground/frontend/playground_components/lib/src/models/example_base.dart index 2ca76d107b450..205f60edf2c40 100644 --- a/playground/frontend/playground_components/lib/src/models/example_base.dart +++ b/playground/frontend/playground_components/lib/src/models/example_base.dart @@ -20,6 +20,7 @@ import 'package:equatable/equatable.dart'; import '../enums/complexity.dart'; import '../repositories/example_repository.dart'; +import 'dataset.dart'; import 'example_view_options.dart'; import 'sdk.dart'; @@ -53,6 +54,7 @@ class ExampleBase with Comparable, EquatableMixin { /// Index of the line to focus, 1-based. final int contextLine; + final List datasets; final String description; final bool isMultiFile; final String? link; @@ -71,6 +73,7 @@ class ExampleBase with Comparable, EquatableMixin { required this.type, this.complexity, this.contextLine = 1, + this.datasets = const [], this.description = '', this.isMultiFile = false, this.link, @@ -87,4 +90,8 @@ class ExampleBase with Comparable, EquatableMixin { int compareTo(ExampleBase other) { return name.toLowerCase().compareTo(other.name.toLowerCase()); } + + bool get usesEmulatedData => datasets.any( + (dataset) => dataset.type != null, + ); } diff --git a/playground/frontend/playground_components/lib/src/repositories/code_client/grpc_code_client.dart b/playground/frontend/playground_components/lib/src/repositories/code_client/grpc_code_client.dart index 62f0df698bb12..a2ebdf8c14723 100644 --- a/playground/frontend/playground_components/lib/src/repositories/code_client/grpc_code_client.dart +++ b/playground/frontend/playground_components/lib/src/repositories/code_client/grpc_code_client.dart @@ -22,6 +22,7 @@ import '../../api/iis_workaround_channel.dart'; import '../../api/v1/api.pbgrpc.dart' as grpc; import '../../models/sdk.dart'; import '../../util/pipeline_options.dart'; +import '../dataset_grpc_extension.dart'; import '../models/check_status_response.dart'; import '../models/output_response.dart'; import '../models/run_code_error.dart'; @@ -213,10 +214,12 @@ class GrpcCodeClient implements CodeClient { } grpc.RunCodeRequest _grpcRunCodeRequest(RunCodeRequest request) { - return grpc.RunCodeRequest() - ..code = request.code - ..sdk = request.sdk.grpc - ..pipelineOptions = pipelineOptionsToString(request.pipelineOptions); + return grpc.RunCodeRequest( + code: request.code, + sdk: request.sdk.grpc, + pipelineOptions: pipelineOptionsToString(request.pipelineOptions), + datasets: request.datasets.map((e) => e.grpc), + ); } RunCodeStatus _toClientStatus(grpc.Status status) { diff --git a/playground/frontend/playground_components/lib/src/repositories/dataset_grpc_extension.dart b/playground/frontend/playground_components/lib/src/repositories/dataset_grpc_extension.dart new file mode 100644 index 0000000000000..ee8b7539662e6 --- /dev/null +++ b/playground/frontend/playground_components/lib/src/repositories/dataset_grpc_extension.dart @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import '../api/v1/api.pbgrpc.dart' as g; +import '../models/dataset.dart'; +import 'emulator_type_grpc_extension.dart'; + +extension DatasetExtension on Dataset { + g.Dataset get grpc { + return g.Dataset( + type: type?.grpc ?? g.EmulatorType.EMULATOR_TYPE_UNSPECIFIED, + options: options, + datasetPath: datasetPath, + ); + } +} + +extension GrpcDatasetExtension on g.Dataset { + Dataset get model { + return Dataset( + type: type.model, + options: options, + datasetPath: datasetPath, + ); + } +} diff --git a/playground/frontend/playground_components/lib/src/repositories/emulator_type_grpc_extension.dart b/playground/frontend/playground_components/lib/src/repositories/emulator_type_grpc_extension.dart new file mode 100644 index 0000000000000..7acbd0abd346d --- /dev/null +++ b/playground/frontend/playground_components/lib/src/repositories/emulator_type_grpc_extension.dart @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import '../api/v1/api.pbgrpc.dart' as g; +import '../enums/emulator_type.dart'; + +extension ExampleTypeExtension on EmulatorType { + g.EmulatorType get grpc { + switch (this) { + case EmulatorType.kafka: + return g.EmulatorType.EMULATOR_TYPE_KAFKA; + } + } +} + +extension GrpcExampleTypeExtension on g.EmulatorType { + EmulatorType? get model { + EmulatorType? result; + switch (this) { + case g.EmulatorType.EMULATOR_TYPE_KAFKA: + result = EmulatorType.kafka; + break; + case g.EmulatorType.EMULATOR_TYPE_UNSPECIFIED: + result = null; + break; + } + return result; + } +} diff --git a/playground/frontend/playground_components/lib/src/repositories/example_client/grpc_example_client.dart b/playground/frontend/playground_components/lib/src/repositories/example_client/grpc_example_client.dart index f53c14db0ab64..8df2a6b4204c6 100644 --- a/playground/frontend/playground_components/lib/src/repositories/example_client/grpc_example_client.dart +++ b/playground/frontend/playground_components/lib/src/repositories/example_client/grpc_example_client.dart @@ -24,6 +24,7 @@ import '../../models/category_with_examples.dart'; import '../../models/example_base.dart'; import '../../models/sdk.dart'; import '../complexity_grpc_extension.dart'; +import '../dataset_grpc_extension.dart'; import '../models/get_default_precompiled_object_request.dart'; import '../models/get_precompiled_object_code_response.dart'; import '../models/get_precompiled_object_request.dart'; @@ -324,17 +325,18 @@ class GrpcExampleClient implements ExampleClient { ExampleBase _toExampleModel(Sdk sdk, grpc.PrecompiledObject example) { return ExampleBase( - sdk: sdk, - name: example.name, - description: example.description, - tags: example.tags, - type: _exampleTypeFromString(example.type), - path: example.cloudPath, + complexity: example.complexity.model, contextLine: example.contextLine, - pipelineOptions: example.pipelineOptions, + datasets: example.datasets.map((e) => e.model).toList(growable: false), + description: example.description, isMultiFile: example.multifile, link: example.link, - complexity: example.complexity.model, + name: example.name, + path: example.cloudPath, + pipelineOptions: example.pipelineOptions, + sdk: sdk, + tags: example.tags, + type: _exampleTypeFromString(example.type), ); } diff --git a/playground/frontend/playground_components/lib/src/repositories/models/run_code_request.dart b/playground/frontend/playground_components/lib/src/repositories/models/run_code_request.dart index 16a1e74df4302..d2a49b9ee6d01 100644 --- a/playground/frontend/playground_components/lib/src/repositories/models/run_code_request.dart +++ b/playground/frontend/playground_components/lib/src/repositories/models/run_code_request.dart @@ -16,15 +16,18 @@ * limitations under the License. */ +import '../../models/dataset.dart'; import '../../models/sdk.dart'; class RunCodeRequest { final String code; + final List datasets; final Sdk sdk; final Map pipelineOptions; const RunCodeRequest({ required this.code, + required this.datasets, required this.sdk, required this.pipelineOptions, }); diff --git a/playground/frontend/playground_components/lib/src/repositories/sdk_grpc_extension.dart b/playground/frontend/playground_components/lib/src/repositories/sdk_grpc_extension.dart index d9c1b58633037..e0a3d30dda041 100644 --- a/playground/frontend/playground_components/lib/src/repositories/sdk_grpc_extension.dart +++ b/playground/frontend/playground_components/lib/src/repositories/sdk_grpc_extension.dart @@ -16,7 +16,6 @@ * limitations under the License. */ - import '../api/v1/api.pbgrpc.dart' as g; import '../models/sdk.dart'; @@ -29,8 +28,7 @@ extension SdkExtension on Sdk { }; g.Sdk get grpc => - _idToGrpcEnum[id] ?? - (throw Exception('SDK not supported for GRPS: $id')); + _idToGrpcEnum[id] ?? (throw Exception('SDK not supported for GRPS: $id')); } extension GrpcSdkExtension on g.Sdk { diff --git a/playground/frontend/playground_components/lib/src/router/router_delegate.dart b/playground/frontend/playground_components/lib/src/router/router_delegate.dart new file mode 100644 index 0000000000000..41857979d9e19 --- /dev/null +++ b/playground/frontend/playground_components/lib/src/router/router_delegate.dart @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import 'package:app_state/app_state.dart'; +import 'package:flutter/material.dart'; + +import '../widgets/toasts/toast_listener.dart'; + +/// Wraps [pageStack] in widgets that must be above [Navigator] and can be +/// below [MaterialApp]. +class BeamRouterDelegate extends PageStackRouterDelegate { + BeamRouterDelegate(super.pageStack); + + @override + Widget build(BuildContext context) { + // Overlay: to float toasts. + // ToastListenerWidget: turns notification events into floating toasts. + return Overlay( + initialEntries: [ + OverlayEntry( + builder: (context) => ToastListenerWidget( + child: super.build(context), + ), + ), + ], + ); + } +} diff --git a/playground/frontend/playground_components/lib/src/services/symbols/loaders/map.dart b/playground/frontend/playground_components/lib/src/services/symbols/loaders/map.dart index 44c03811a3b9e..5c5281ebcf02f 100644 --- a/playground/frontend/playground_components/lib/src/services/symbols/loaders/map.dart +++ b/playground/frontend/playground_components/lib/src/services/symbols/loaders/map.dart @@ -17,20 +17,32 @@ */ import 'package:highlight/languages/go.dart'; +import 'package:highlight/languages/java.dart'; import 'package:highlight/languages/python.dart'; +import 'package:highlight/languages/scala.dart'; import '../../../assets/assets.gen.dart'; import '../../../playground_components.dart'; import 'yaml.dart'; +final _javaLoader = YamlSymbolsLoader( + path: Assets.symbols.javaG, + package: PlaygroundComponents.packageName, + ); + final symbolLoadersByMode = { + // go: YamlSymbolsLoader( path: Assets.symbols.goG, package: PlaygroundComponents.packageName, ), + java: _javaLoader, + python: YamlSymbolsLoader( path: Assets.symbols.pythonG, package: PlaygroundComponents.packageName, ), + + scala: _javaLoader, }; diff --git a/playground/frontend/playground_components/lib/src/theme/theme.dart b/playground/frontend/playground_components/lib/src/theme/theme.dart index 287eef0a14f3b..1ebe89cb89b25 100644 --- a/playground/frontend/playground_components/lib/src/theme/theme.dart +++ b/playground/frontend/playground_components/lib/src/theme/theme.dart @@ -163,7 +163,7 @@ final kLightTheme = ThemeData( fontSize: codeFontSize, ), codeTheme: CodeThemeData( - styles: { + styles: const { 'root': TextStyle( backgroundColor: BeamLightThemeColors.primaryBackground, color: BeamLightThemeColors.text, @@ -194,8 +194,8 @@ final kLightTheme = ThemeData( 'symbol': TextStyle(color: BeamLightThemeColors.code2), 'bullet': TextStyle(color: BeamLightThemeColors.code2), 'link': TextStyle(color: BeamLightThemeColors.code2), - 'emphasis': const TextStyle(fontStyle: FontStyle.italic), - 'strong': const TextStyle(fontWeight: FontWeight.bold), + 'emphasis': TextStyle(fontStyle: FontStyle.italic), + 'strong': TextStyle(fontWeight: FontWeight.bold), }, ), ), @@ -239,7 +239,7 @@ final kDarkTheme = ThemeData( fontSize: codeFontSize, ), codeTheme: CodeThemeData( - styles: { + styles: const { 'root': TextStyle( backgroundColor: BeamDarkThemeColors.primaryBackground, color: BeamDarkThemeColors.text, @@ -270,8 +270,8 @@ final kDarkTheme = ThemeData( 'symbol': TextStyle(color: BeamDarkThemeColors.code2), 'bullet': TextStyle(color: BeamDarkThemeColors.code2), 'link': TextStyle(color: BeamDarkThemeColors.code2), - 'emphasis': const TextStyle(fontStyle: FontStyle.italic), - 'strong': const TextStyle(fontWeight: FontWeight.bold), + 'emphasis': TextStyle(fontStyle: FontStyle.italic), + 'strong': TextStyle(fontWeight: FontWeight.bold), }, ), ), diff --git a/playground/frontend/playground_components/lib/src/widgets/output/output.dart b/playground/frontend/playground_components/lib/src/widgets/output/output.dart index 194b5d7549371..81a42f2a794e9 100644 --- a/playground/frontend/playground_components/lib/src/widgets/output/output.dart +++ b/playground/frontend/playground_components/lib/src/widgets/output/output.dart @@ -79,11 +79,13 @@ class _OutputWidgetState extends State Row( mainAxisAlignment: MainAxisAlignment.spaceBetween, children: [ - TabHeader( - tabController: tabController, - tabsWidget: OutputTabs( - playgroundController: widget.playgroundController, + Flexible( + child: TabHeader( tabController: tabController, + tabsWidget: OutputTabs( + playgroundController: widget.playgroundController, + tabController: tabController, + ), ), ), if (widget.trailing != null) widget.trailing!, diff --git a/playground/frontend/playground_components/lib/src/widgets/split_view.dart b/playground/frontend/playground_components/lib/src/widgets/split_view.dart index 904d03788d619..6291161f135d0 100644 --- a/playground/frontend/playground_components/lib/src/widgets/split_view.dart +++ b/playground/frontend/playground_components/lib/src/widgets/split_view.dart @@ -48,9 +48,9 @@ class _SplitViewState extends State { double _ratio = defaultRatio; double _maxSize = 0; - get _sizeFirst => _ratio * _maxSize; + int get _sizeFirst => (_ratio * _maxSize).toInt(); - get _sizeSecond => (1 - _ratio) * _maxSize; + int get _sizeSecond => ((1 - _ratio) * _maxSize).toInt(); get _isHorizontal => widget.direction == Axis.horizontal; @@ -78,13 +78,13 @@ class _SplitViewState extends State { width: constraints.maxWidth, child: Row( children: [ - SizedBox( - width: _sizeFirst, + Expanded( + flex: _sizeFirst, child: widget.first, ), _buildSeparator(context), - SizedBox( - width: _sizeSecond, + Expanded( + flex: _sizeSecond, child: widget.second, ), ], @@ -98,13 +98,13 @@ class _SplitViewState extends State { height: constraints.maxHeight, child: Column( children: [ - SizedBox( - height: _sizeFirst, + Expanded( + flex: _sizeFirst, child: widget.first, ), _buildSeparator(context), - SizedBox( - height: _sizeSecond, + Expanded( + flex: _sizeSecond, child: widget.second, ), ], diff --git a/playground/frontend/playground_components/lib/src/widgets/toasts/toast_listener.dart b/playground/frontend/playground_components/lib/src/widgets/toasts/toast_listener.dart index 38b76d06008c4..9a83f09389769 100644 --- a/playground/frontend/playground_components/lib/src/widgets/toasts/toast_listener.dart +++ b/playground/frontend/playground_components/lib/src/widgets/toasts/toast_listener.dart @@ -27,6 +27,7 @@ import '../../models/toast.dart'; import '../../services/toast_notifier.dart'; import 'toast.dart'; +/// Turns events from [ToastNotifier] into floating [ToastWidget]s. class ToastListenerWidget extends StatefulWidget { final Widget child; diff --git a/playground/frontend/playground_components/pubspec.yaml b/playground/frontend/playground_components/pubspec.yaml index 419b8ceec8634..e7c2943c9b708 100644 --- a/playground/frontend/playground_components/pubspec.yaml +++ b/playground/frontend/playground_components/pubspec.yaml @@ -26,6 +26,7 @@ environment: dependencies: aligned_dialog: ^0.0.6 + app_state: ^0.8.4 collection: ^1.16.0 easy_localization: ^3.0.1 easy_localization_ext: ^0.1.1 @@ -33,7 +34,7 @@ dependencies: enum_map: ^0.2.1 equatable: ^2.0.5 flutter: { sdk: flutter } - flutter_code_editor: ^0.2.1 + flutter_code_editor: ^0.2.4 flutter_markdown: ^0.6.12 flutter_svg: ^1.0.3 fluttertoast: ^8.1.1 @@ -67,6 +68,7 @@ flutter: - assets/png/ - assets/svg/ - assets/symbols/go.g.yaml + - assets/symbols/java.g.yaml - assets/symbols/python.g.yaml - assets/translations/en.yaml diff --git a/playground/frontend/playground_components/test/src/common/examples.dart b/playground/frontend/playground_components/test/src/common/examples.dart index b12a9bb3bc8d2..50032f0c11274 100644 --- a/playground/frontend/playground_components/test/src/common/examples.dart +++ b/playground/frontend/playground_components/test/src/common/examples.dart @@ -22,47 +22,47 @@ import 'package:playground_components/src/models/example_base.dart'; import 'package:playground_components/src/models/sdk.dart'; const exampleMock1 = Example( + complexity: Complexity.basic, + description: 'description', + name: 'Example X1', + path: 'SDK_PYTHON/Category/Name1', sdk: Sdk.python, source: 'ex1', - name: 'Example X1', tags: ['tag1'], type: ExampleType.example, - description: 'description', - path: 'SDK_PYTHON/Category/Name1', - complexity: Complexity.basic, ); const exampleMock2 = Example( + complexity: Complexity.basic, + description: 'description', + name: 'Kata', + path: 'SDK_PYTHON/Category/Name2', sdk: Sdk.python, source: 'ex2', - name: 'Kata', tags: ['tag2'], type: ExampleType.kata, - description: 'description', - path: 'SDK_PYTHON/Category/Name2', - complexity: Complexity.basic, ); const exampleWithoutSourceMock = ExampleBase( - sdk: Sdk.python, - name: 'Test example', - type: ExampleType.example, + complexity: Complexity.basic, description: 'description', + name: 'Test example', path: 'SDK_PYTHON/Category/Name', - complexity: Complexity.basic, + sdk: Sdk.python, + type: ExampleType.example, ); const exampleWithAllAdditionsMock = Example( - sdk: Sdk.python, - name: 'Test example', - type: ExampleType.example, + complexity: Complexity.basic, description: 'description', + graph: 'test outputs', + logs: 'test outputs', + name: 'Test example', + outputs: 'test outputs', path: 'SDK_PYTHON/Category/Name', + sdk: Sdk.python, source: 'test outputs', - outputs: 'test outputs', - logs: 'test outputs', - graph: 'test outputs', - complexity: Complexity.basic, + type: ExampleType.example, ); const exampleGoPipelineOptions = Example( @@ -79,11 +79,11 @@ const exampleGoPipelineOptions = Example( ); const exampleMockGo = Example( + complexity: Complexity.medium, + description: 'description', + name: 'Example', + path: 'SDK_GO/Category/Name', sdk: Sdk.go, source: 'ex1', - name: 'Example', type: ExampleType.example, - description: 'description', - path: 'SDK_GO/Category/Name', - complexity: Complexity.medium, ); diff --git a/playground/frontend/playground_components/test/src/controllers/example_loaders/common.dart b/playground/frontend/playground_components/test/src/controllers/example_loaders/common.dart index 8763c8c0d8a87..324ff9c1e0061 100644 --- a/playground/frontend/playground_components/test/src/controllers/example_loaders/common.dart +++ b/playground/frontend/playground_components/test/src/controllers/example_loaders/common.dart @@ -17,7 +17,6 @@ */ import 'package:playground_components/playground_components.dart'; - import 'package:playground_components/src/controllers/example_loaders/example_loader.dart'; import 'package:playground_components/src/controllers/example_loaders/example_loader_factory.dart'; diff --git a/playground/frontend/playground_components/test/src/controllers/example_loaders/examples_loader_test.mocks.dart b/playground/frontend/playground_components/test/src/controllers/example_loaders/examples_loader_test.mocks.dart index e20ace76e455a..7bcc1204ebd7d 100644 --- a/playground/frontend/playground_components/test/src/controllers/example_loaders/examples_loader_test.mocks.dart +++ b/playground/frontend/playground_components/test/src/controllers/example_loaders/examples_loader_test.mocks.dart @@ -3,7 +3,7 @@ // Do not manually edit this file. // ignore_for_file: no_leading_underscores_for_library_prefixes -import 'dart:async' as _i14; +import 'dart:async' as _i13; import 'dart:ui' as _i15; import 'package:mockito/mockito.dart' as _i1; @@ -19,7 +19,7 @@ import 'package:playground_components/src/models/category_with_examples.dart' import 'package:playground_components/src/models/example.dart' as _i9; import 'package:playground_components/src/models/example_base.dart' as _i8; import 'package:playground_components/src/models/example_loading_descriptors/example_loading_descriptor.dart' - as _i13; + as _i14; import 'package:playground_components/src/models/example_loading_descriptors/examples_loading_descriptor.dart' as _i7; import 'package:playground_components/src/models/example_loading_descriptors/user_shared_example_loading_descriptor.dart' @@ -193,9 +193,19 @@ class MockPlaygroundController extends _i1.Mock returnValueForMissingStub: null, ); @override + _i13.Future setExampleBase(_i8.ExampleBase? exampleBase) => + (super.noSuchMethod( + Invocation.method( + #setExampleBase, + [exampleBase], + ), + returnValue: Future.value(), + returnValueForMissingStub: Future.value(), + ) as _i13.Future); + @override void setExample( _i9.Example? example, { - _i13.ExampleLoadingDescriptor? descriptor, + _i14.ExampleLoadingDescriptor? descriptor, bool? setCurrentSdk, }) => super.noSuchMethod( @@ -288,14 +298,14 @@ class MockPlaygroundController extends _i1.Mock returnValueForMissingStub: null, ); @override - _i14.Future cancelRun() => (super.noSuchMethod( + _i13.Future cancelRun() => (super.noSuchMethod( Invocation.method( #cancelRun, [], ), returnValue: Future.value(), returnValueForMissingStub: Future.value(), - ) as _i14.Future); + ) as _i13.Future); @override void filterOutput(_i11.OutputType? type) => super.noSuchMethod( Invocation.method( @@ -305,7 +315,7 @@ class MockPlaygroundController extends _i1.Mock returnValueForMissingStub: null, ); @override - _i14.Future<_i6.UserSharedExampleLoadingDescriptor> saveSnippet() => + _i13.Future<_i6.UserSharedExampleLoadingDescriptor> saveSnippet() => (super.noSuchMethod( Invocation.method( #saveSnippet, @@ -313,7 +323,7 @@ class MockPlaygroundController extends _i1.Mock ), returnValue: Future<_i6.UserSharedExampleLoadingDescriptor>.value( _FakeUserSharedExampleLoadingDescriptor_4()), - ) as _i14.Future<_i6.UserSharedExampleLoadingDescriptor>); + ) as _i13.Future<_i6.UserSharedExampleLoadingDescriptor>); @override _i7.ExamplesLoadingDescriptor getLoadingDescriptor() => (super.noSuchMethod( Invocation.method( @@ -389,10 +399,10 @@ class MockExampleCache extends _i1.Mock implements _i2.ExampleCache { returnValueForMissingStub: null, ); @override - _i14.Future get allExamplesFuture => (super.noSuchMethod( + _i13.Future get allExamplesFuture => (super.noSuchMethod( Invocation.getter(#allExamplesFuture), returnValue: Future.value(), - ) as _i14.Future); + ) as _i13.Future); @override _i17.LoadingStatus get catalogStatus => (super.noSuchMethod( Invocation.getter(#catalogStatus), @@ -404,14 +414,14 @@ class MockExampleCache extends _i1.Mock implements _i2.ExampleCache { returnValue: false, ) as bool); @override - _i14.Future loadAllPrecompiledObjectsIfNot() => (super.noSuchMethod( + _i13.Future loadAllPrecompiledObjectsIfNot() => (super.noSuchMethod( Invocation.method( #loadAllPrecompiledObjectsIfNot, [], ), returnValue: Future.value(), returnValueForMissingStub: Future.value(), - ) as _i14.Future); + ) as _i13.Future); @override List<_i16.CategoryWithExamples> getCategories(_i12.Sdk? sdk) => (super.noSuchMethod( @@ -422,7 +432,7 @@ class MockExampleCache extends _i1.Mock implements _i2.ExampleCache { returnValue: <_i16.CategoryWithExamples>[], ) as List<_i16.CategoryWithExamples>); @override - _i14.Future<_i8.ExampleBase> getPrecompiledObject( + _i13.Future<_i8.ExampleBase> getPrecompiledObject( String? path, _i12.Sdk? sdk, ) => @@ -435,17 +445,17 @@ class MockExampleCache extends _i1.Mock implements _i2.ExampleCache { ], ), returnValue: Future<_i8.ExampleBase>.value(_FakeExampleBase_6()), - ) as _i14.Future<_i8.ExampleBase>); + ) as _i13.Future<_i8.ExampleBase>); @override - _i14.Future<_i9.Example> loadSharedExample(String? id) => (super.noSuchMethod( + _i13.Future<_i9.Example> loadSharedExample(String? id) => (super.noSuchMethod( Invocation.method( #loadSharedExample, [id], ), returnValue: Future<_i9.Example>.value(_FakeExample_7()), - ) as _i14.Future<_i9.Example>); + ) as _i13.Future<_i9.Example>); @override - _i14.Future saveSnippet({ + _i13.Future saveSnippet({ List<_i18.SharedFile>? files, _i12.Sdk? sdk, String? pipelineOptions, @@ -461,16 +471,16 @@ class MockExampleCache extends _i1.Mock implements _i2.ExampleCache { }, ), returnValue: Future.value(''), - ) as _i14.Future); + ) as _i13.Future); @override - _i14.Future<_i9.Example> loadExampleInfo(_i8.ExampleBase? example) => + _i13.Future<_i9.Example> loadExampleInfo(_i8.ExampleBase? example) => (super.noSuchMethod( Invocation.method( #loadExampleInfo, [example], ), returnValue: Future<_i9.Example>.value(_FakeExample_7()), - ) as _i14.Future<_i9.Example>); + ) as _i13.Future<_i9.Example>); @override void setSelectorOpened(bool? value) => super.noSuchMethod( Invocation.method( @@ -480,41 +490,41 @@ class MockExampleCache extends _i1.Mock implements _i2.ExampleCache { returnValueForMissingStub: null, ); @override - _i14.Future<_i9.Example?> getDefaultExampleBySdk(_i12.Sdk? sdk) => + _i13.Future<_i9.Example?> getDefaultExampleBySdk(_i12.Sdk? sdk) => (super.noSuchMethod( Invocation.method( #getDefaultExampleBySdk, [sdk], ), returnValue: Future<_i9.Example?>.value(), - ) as _i14.Future<_i9.Example?>); + ) as _i13.Future<_i9.Example?>); @override - _i14.Future loadDefaultPrecompiledObjects() => (super.noSuchMethod( + _i13.Future loadDefaultPrecompiledObjects() => (super.noSuchMethod( Invocation.method( #loadDefaultPrecompiledObjects, [], ), returnValue: Future.value(), returnValueForMissingStub: Future.value(), - ) as _i14.Future); + ) as _i13.Future); @override - _i14.Future loadDefaultPrecompiledObjectsIfNot() => (super.noSuchMethod( + _i13.Future loadDefaultPrecompiledObjectsIfNot() => (super.noSuchMethod( Invocation.method( #loadDefaultPrecompiledObjectsIfNot, [], ), returnValue: Future.value(), returnValueForMissingStub: Future.value(), - ) as _i14.Future); + ) as _i13.Future); @override - _i14.Future<_i8.ExampleBase?> getCatalogExampleByPath(String? path) => + _i13.Future<_i8.ExampleBase?> getCatalogExampleByPath(String? path) => (super.noSuchMethod( Invocation.method( #getCatalogExampleByPath, [path], ), returnValue: Future<_i8.ExampleBase?>.value(), - ) as _i14.Future<_i8.ExampleBase?>); + ) as _i13.Future<_i8.ExampleBase?>); @override void addListener(_i15.VoidCallback? listener) => super.noSuchMethod( Invocation.method( diff --git a/playground/frontend/playground_components/test/src/controllers/example_loaders/user_shared_example_loader_test.dart b/playground/frontend/playground_components/test/src/controllers/example_loaders/user_shared_example_loader_test.dart index b3f29988a3946..a69303b7924e7 100644 --- a/playground/frontend/playground_components/test/src/controllers/example_loaders/user_shared_example_loader_test.dart +++ b/playground/frontend/playground_components/test/src/controllers/example_loaders/user_shared_example_loader_test.dart @@ -16,7 +16,6 @@ * limitations under the License. */ -import 'package:easy_localization/easy_localization.dart'; import 'package:flutter_test/flutter_test.dart'; import 'package:playground_components/playground_components.dart'; import 'package:playground_components/src/controllers/example_loaders/user_shared_example_loader.dart'; @@ -24,7 +23,7 @@ import 'package:playground_components/src/controllers/example_loaders/user_share import '../../common/example_cache.dart'; void main() async { - await EasyLocalization.ensureInitialized(); + TestWidgetsFlutterBinding.ensureInitialized(); group('UserSharedExampleLoader', () { testWidgets('non-existent', (WidgetTester wt) async { diff --git a/playground/frontend/playground_components/test/src/repositories/code_repository_test.dart b/playground/frontend/playground_components/test/src/repositories/code_repository_test.dart index 757e4f57ba3f0..7615fb115d787 100644 --- a/playground/frontend/playground_components/test/src/repositories/code_repository_test.dart +++ b/playground/frontend/playground_components/test/src/repositories/code_repository_test.dart @@ -34,6 +34,7 @@ const kRequestMock = RunCodeRequest( code: 'code', sdk: Sdk.java, pipelineOptions: {}, + datasets: [], ); const kPipelineUuid = '1234'; diff --git a/playground/frontend/playground_components/test/src/repositories/dataset_grpc_extension_test.dart b/playground/frontend/playground_components/test/src/repositories/dataset_grpc_extension_test.dart new file mode 100644 index 0000000000000..ddca310c033b0 --- /dev/null +++ b/playground/frontend/playground_components/test/src/repositories/dataset_grpc_extension_test.dart @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import 'package:flutter_test/flutter_test.dart'; +import 'package:playground_components/src/api/v1/api.pbgrpc.dart' as g; +import 'package:playground_components/src/repositories/dataset_grpc_extension.dart'; + +void main() { + final datasets = [ + // + g.Dataset( + datasetPath: 'mockPath1', + options: {'key1': 'value1'}, + type: g.EmulatorType.EMULATOR_TYPE_KAFKA, + ), + + g.Dataset( + datasetPath: 'mockPath2', + options: {'key2': 'value2'}, + type: g.EmulatorType.EMULATOR_TYPE_UNSPECIFIED, + ), + ]; + + group('Dataset extensions test.', () { + for (final dataset in datasets) { + test('Dataset with type ${dataset.type.name} converts to the same value', + () { + expect(dataset.model.grpc, dataset); + }); + } + }); +} diff --git a/playground/frontend/playground_components/test/src/repositories/emulator_type_grpc_extension_test.dart b/playground/frontend/playground_components/test/src/repositories/emulator_type_grpc_extension_test.dart new file mode 100644 index 0000000000000..627ca11a620b5 --- /dev/null +++ b/playground/frontend/playground_components/test/src/repositories/emulator_type_grpc_extension_test.dart @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import 'package:flutter_test/flutter_test.dart'; +import 'package:playground_components/src/api/v1/api.pbgrpc.dart' as g; +import 'package:playground_components/src/repositories/emulator_type_grpc_extension.dart'; + +void main() { + group('Emulator type extensions test', () { + for (final value in g.EmulatorType.values) { + test('Emulator type ${value.name} converts to the same value', () { + expect( + value.model?.grpc ?? g.EmulatorType.EMULATOR_TYPE_UNSPECIFIED, + value, + ); + }); + } + }); +} diff --git a/playground/frontend/playground_components/test/tools/extract_symbols_java/extract_symbols_java_test.dart b/playground/frontend/playground_components/test/tools/extract_symbols_java/extract_symbols_java_test.dart new file mode 100644 index 0000000000000..d944441f10a5e --- /dev/null +++ b/playground/frontend/playground_components/test/tools/extract_symbols_java/extract_symbols_java_test.dart @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import 'dart:io'; + +import 'package:flutter_test/flutter_test.dart'; + +import '../common.dart'; + +const _lang = 'java'; +const _dependenciesDir = 'test/tools/extract_symbols_$_lang/dependencies'; + +void main() { + test('Extract SDK Symbols. $_lang', () async { + final classPath = await _buildClassPath(); + await _compileClasses(classPath); + await testExtractSymbols( + language: _lang, + executables: ['java'], + arguments: [ + '-classpath', + classPath, + 'com.playground.extract_symbols.Main', + '../../test/tools/extract_symbols_$_lang/sdk_mock', + ], + ); + }); +} + +Future _buildClassPath() async { + const dependencies = [ + 'https://repo1.maven.org/maven2/com/github/javaparser/javaparser-core/3.24.9/javaparser-core-3.24.9.jar', + 'https://repo1.maven.org/maven2/com/esotericsoftware/yamlbeans/yamlbeans/1.15/yamlbeans-1.15.jar', + ]; + + await _downloadDependenciesIfNeed(dependencies); + + final workingDirectory = Directory.current.path; + + return [ + '$workingDirectory/tools/extract_symbols_$_lang/build/classes/java/main', + ...dependencies.map( + (f) => '$workingDirectory/$_dependenciesDir/${f.split('/').last}', + ), + ].join(':'); +} + +Future _downloadDependenciesIfNeed(List dependencies) async { + for (final dependency in dependencies) { + final fileName = dependency.split('/').last; + final file = File('$_dependenciesDir/$fileName'); + if (!file.existsSync()) { + final request = await HttpClient().getUrl(Uri.parse(dependency)); + final response = await request.close(); + await file.create(recursive: true); + await response.pipe(file.openWrite()); + } + } +} + +Future _compileClasses(String classPath) async { + await Process.run( + 'mkdir', + ['-p', 'build/classes/java/main'], + workingDirectory: 'tools/extract_symbols_$_lang', + ); + await Process.run( + 'javac', + [ + '-d', + 'build/classes/java/main/', + '-classpath', + classPath, + 'src/main/java/com/playground/extract_symbols/Main.java', + 'src/main/java/com/playground/extract_symbols/ClassInfo.java', + ], + workingDirectory: 'tools/extract_symbols_$_lang', + ); +} diff --git a/playground/frontend/playground_components/test/tools/extract_symbols_java/java.golden.yaml b/playground/frontend/playground_components/test/tools/extract_symbols_java/java.golden.yaml new file mode 100644 index 0000000000000..d881f0c680448 --- /dev/null +++ b/playground/frontend/playground_components/test/tools/extract_symbols_java/java.golden.yaml @@ -0,0 +1,13 @@ +ExtendedPublicClass: + methods: + - extendedPublicMethod + properties: + - extendedPublicField +PublicClass: + methods: + - anotherPublicMethod + - publicMethod + properties: + - anotherPublicField + - publicField +Test: {} diff --git a/playground/frontend/playground_components/test/tools/extract_symbols_java/sdk_mock/DefaultClass.java b/playground/frontend/playground_components/test/tools/extract_symbols_java/sdk_mock/DefaultClass.java new file mode 100644 index 0000000000000..62c016e83a363 --- /dev/null +++ b/playground/frontend/playground_components/test/tools/extract_symbols_java/sdk_mock/DefaultClass.java @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +class DefaultClass {} diff --git a/playground/frontend/playground_components/test/tools/extract_symbols_java/sdk_mock/KotlinClass.kt b/playground/frontend/playground_components/test/tools/extract_symbols_java/sdk_mock/KotlinClass.kt new file mode 100644 index 0000000000000..54d56cc4f27e1 --- /dev/null +++ b/playground/frontend/playground_components/test/tools/extract_symbols_java/sdk_mock/KotlinClass.kt @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class KotlinClass() { + public fun publicMethod() {} +} diff --git a/playground/frontend/playground_components/test/tools/extract_symbols_java/sdk_mock/PrivateClass.java b/playground/frontend/playground_components/test/tools/extract_symbols_java/sdk_mock/PrivateClass.java new file mode 100644 index 0000000000000..bc45d6127006c --- /dev/null +++ b/playground/frontend/playground_components/test/tools/extract_symbols_java/sdk_mock/PrivateClass.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +private class PrivateClass { + public void publicMethod() {} +} diff --git a/playground/frontend/playground_components/test/tools/extract_symbols_java/sdk_mock/PublicClass.java b/playground/frontend/playground_components/test/tools/extract_symbols_java/sdk_mock/PublicClass.java new file mode 100644 index 0000000000000..978345eaacc6e --- /dev/null +++ b/playground/frontend/playground_components/test/tools/extract_symbols_java/sdk_mock/PublicClass.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class PublicClass { + int defaultField; + public int publicField; + protected int protectedField; + private int privateField; + + void defaultMethod() {} + public void publicMethod() {} + protected void protectedMethod() {} + private void privateMethod() {} +} diff --git a/playground/frontend/playground_components/test/tools/extract_symbols_java/sdk_mock/Test.java b/playground/frontend/playground_components/test/tools/extract_symbols_java/sdk_mock/Test.java new file mode 100644 index 0000000000000..92029db1cb39c --- /dev/null +++ b/playground/frontend/playground_components/test/tools/extract_symbols_java/sdk_mock/Test.java @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// This test file is in the result because it is not in .../test/... . +public class Test {} diff --git a/playground/frontend/playground_components/test/tools/extract_symbols_java/sdk_mock/directory/ExtendedPublicClass.java b/playground/frontend/playground_components/test/tools/extract_symbols_java/sdk_mock/directory/ExtendedPublicClass.java new file mode 100644 index 0000000000000..924f91cbac2b5 --- /dev/null +++ b/playground/frontend/playground_components/test/tools/extract_symbols_java/sdk_mock/directory/ExtendedPublicClass.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class ExtendedPublicClass extends PublicClass { + int extendedDefaultField; + public int extendedPublicField; + protected int protectedField; + private int privateField; + + public void extendedPublicMethod() {} +} diff --git a/playground/frontend/playground_components/test/tools/extract_symbols_java/sdk_mock/directory/PublicClass.java b/playground/frontend/playground_components/test/tools/extract_symbols_java/sdk_mock/directory/PublicClass.java new file mode 100644 index 0000000000000..5f9eae157e502 --- /dev/null +++ b/playground/frontend/playground_components/test/tools/extract_symbols_java/sdk_mock/directory/PublicClass.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class PublicClass { + int defaultField; + public int publicField; + public int anotherPublicField; + protected int protectedField; + private int privateField; + + void defaultMethod() {} + public void publicMethod() {} + public void publicMethod(int i) {} + public void anotherPublicMethod() {} + protected void protectedMethod() {} + private void privateMethod() {} +} diff --git a/playground/frontend/playground_components/test/tools/extract_symbols_java/sdk_mock/test/TestFolderPublicClass.java b/playground/frontend/playground_components/test/tools/extract_symbols_java/sdk_mock/test/TestFolderPublicClass.java new file mode 100644 index 0000000000000..6ad8cd7894979 --- /dev/null +++ b/playground/frontend/playground_components/test/tools/extract_symbols_java/sdk_mock/test/TestFolderPublicClass.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// This class is skipped because it is in .../test/... . +public class TestFolderPublicClass { + public int publicField; + + public void publicMethod() {} +} diff --git a/playground/frontend/playground_components/tools/extract_symbols_java/build.gradle b/playground/frontend/playground_components/tools/extract_symbols_java/build.gradle new file mode 100644 index 0000000000000..a71f7d4d25390 --- /dev/null +++ b/playground/frontend/playground_components/tools/extract_symbols_java/build.gradle @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +plugins { + id 'java' +} + +group 'com.playground.extract_symbols' + +repositories { + mavenCentral() +} + +ext { + javaMainClass = "com.playground.extract_symbols.Main" +} + +dependencies { + implementation group: 'com.github.javaparser', name: 'javaparser-core', version: '3.23.1' + implementation group: 'com.esotericsoftware.yamlbeans', name: 'yamlbeans', version: '1.15' +} + +tasks.register("buildJava") { + dependsOn "build" + doLast { + exec { + executable "java" + args "-classpath", sourceSets.main.runtimeClasspath.getAsPath(), javaMainClass, "../../../../../sdks/java" + standardOutput = new FileOutputStream("playground/frontend/playground_components/assets/symbols/java.g.yaml") + } + } +} diff --git a/playground/frontend/playground_components/tools/extract_symbols_java/src/main/java/com/playground/extract_symbols/ClassInfo.java b/playground/frontend/playground_components/tools/extract_symbols_java/src/main/java/com/playground/extract_symbols/ClassInfo.java new file mode 100644 index 0000000000000..be1dd88a9b2d8 --- /dev/null +++ b/playground/frontend/playground_components/tools/extract_symbols_java/src/main/java/com/playground/extract_symbols/ClassInfo.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + package com.playground.extract_symbols; + +import java.util.*; +import java.util.stream.Collectors; + +public class ClassInfo { + final Set publicMethods = new HashSet<>(); + final Set publicFields = new HashSet<>(); + + Map> toMap() { + Map> map = new HashMap<>(); + if (!publicMethods.isEmpty()) { + map.put("methods", publicMethods.stream().sorted().collect(Collectors.toList())); + } + if (!publicFields.isEmpty()) { + map.put("properties", publicFields.stream().sorted().collect(Collectors.toList())); + } + return map; + } +} diff --git a/playground/frontend/playground_components/tools/extract_symbols_java/src/main/java/com/playground/extract_symbols/Main.java b/playground/frontend/playground_components/tools/extract_symbols_java/src/main/java/com/playground/extract_symbols/Main.java new file mode 100644 index 0000000000000..7d6f81597fd82 --- /dev/null +++ b/playground/frontend/playground_components/tools/extract_symbols_java/src/main/java/com/playground/extract_symbols/Main.java @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.playground.extract_symbols; + +import com.esotericsoftware.yamlbeans.YamlConfig; +import com.esotericsoftware.yamlbeans.YamlException; +import com.esotericsoftware.yamlbeans.YamlWriter; +import com.github.javaparser.ParseProblemException; +import com.github.javaparser.StaticJavaParser; +import com.github.javaparser.ast.CompilationUnit; +import com.github.javaparser.ast.body.ClassOrInterfaceDeclaration; +import com.github.javaparser.ast.body.FieldDeclaration; +import com.github.javaparser.ast.body.MethodDeclaration; + +import java.io.File; +import java.io.IOException; +import java.io.StringWriter; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.*; + +public class Main { + public static void main(String[] args) throws IOException { + final String sdkPath = args[0]; + final HashMap classInfoMap = getDirSymbols(sdkPath); + final String yamlString = buildYamlString(classInfoMap); + System.out.print(yamlString); + } + + private static HashMap getDirSymbols(String sdkPathString) throws IOException { + final HashMap classInfoMap = new HashMap<>(); + final Path sdkPath = new File(sdkPathString).toPath().toAbsolutePath(); + Files.walk(sdkPath).forEach(path -> { + String stringPath = path.toString(); + final String relativePath = sdkPath.relativize(path).toString(); + if (isJavaNonTestFile(relativePath)) { + String fileName = stringPath.substring(stringPath.lastIndexOf("/") + 1).replace(".java", ""); + try { + CompilationUnit unit = StaticJavaParser.parse(path); + if (unit.getClassByName(fileName).isPresent()) { + addClassSymbols(classInfoMap, unit.getClassByName(fileName).get()); + } + } catch (IOException | ParseProblemException ignored) { + } + } + }); + + return classInfoMap; + } + + static boolean isJavaNonTestFile(String stringPath) { + final boolean isInTestFolder = stringPath.contains("/test/") || stringPath.startsWith("test/"); + return stringPath.endsWith(".java") && !isInTestFolder; + } + + private static void addClassSymbols(HashMap classInfoList, ClassOrInterfaceDeclaration cl) { + if (!cl.isPublic()) { + return; + } + + ClassInfo classInfo; + if (classInfoList.containsKey(cl.getNameAsString())) { + classInfo = classInfoList.get(cl.getNameAsString()); + } else { + classInfo = new ClassInfo(); + classInfoList.put(cl.getNameAsString(), classInfo); + } + + cl.findAll(MethodDeclaration.class).forEach(method -> { + if (method.isPublic()) { + classInfo.publicMethods.add(method.getNameAsString()); + } + }); + cl.findAll(FieldDeclaration.class).forEach(field -> { + if (field.isPublic()) { + classInfo.publicFields.add(field.getVariable(0).getNameAsString()); + } + }); + } + + private static String buildYamlString(HashMap classInfoMap) throws YamlException { + final StringWriter stringWriter = new StringWriter(); + final YamlWriter yamlWriter = new YamlWriter(stringWriter); + yamlWriter.getConfig().writeConfig.setIndentSize(2); + yamlWriter.getConfig().writeConfig.setWriteClassname(YamlConfig.WriteClassName.NEVER); + final LinkedHashMap>> yamlMap = new LinkedHashMap<>(); + + classInfoMap.forEach((key, value) -> yamlMap.put(key, value.toMap())); + final LinkedHashMap>> sortedMap = sortMap(yamlMap); + + yamlWriter.write(sortedMap); + + yamlWriter.close(); + return stringWriter.toString(); + } + + private static LinkedHashMap>> sortMap(HashMap>> yamlMap) { + final Comparator> comparator = Comparator.comparing(Map.Entry::getKey); + final ArrayList>>> array = new ArrayList<>(yamlMap.entrySet()); + array.sort(comparator); + + final LinkedHashMap>> sortedMap = new LinkedHashMap<>(); + for (Map.Entry>> entry : array) { + sortedMap.put(entry.getKey(), entry.getValue()); + } + return sortedMap; + } +} diff --git a/playground/frontend/playground_components/CHANGELOG.md b/playground/frontend/playground_components_dev/README.md similarity index 83% rename from playground/frontend/playground_components/CHANGELOG.md rename to playground/frontend/playground_components_dev/README.md index 504fa05fe23cf..cf1f2678a28da 100644 --- a/playground/frontend/playground_components/CHANGELOG.md +++ b/playground/frontend/playground_components_dev/README.md @@ -17,6 +17,7 @@ under the License. --> -## 0.0.1 +# playground_components_dev -* TODO: Describe initial release. +This is a non-pub.dev Flutter package that contains +helpers for testing [playground_components](../playground_components) package. diff --git a/playground/frontend/playground_components_dev/analysis_options.yaml b/playground/frontend/playground_components_dev/analysis_options.yaml new file mode 100644 index 0000000000000..fe2e0e8eb952c --- /dev/null +++ b/playground/frontend/playground_components_dev/analysis_options.yaml @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +include: package:total_lints/app.yaml diff --git a/playground/frontend/playground_components_dev/lib/playground_components_dev.dart b/playground/frontend/playground_components_dev/lib/playground_components_dev.dart new file mode 100644 index 0000000000000..19a653be0add2 --- /dev/null +++ b/playground/frontend/playground_components_dev/lib/playground_components_dev.dart @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +export 'src/common_finders.dart'; +export 'src/example_names.dart'; +export 'src/example_outputs.dart'; +export 'src/example_paths.dart'; +export 'src/examples.dart'; +export 'src/expect.dart'; +export 'src/finder.dart'; +export 'src/string.dart'; +export 'src/widget_tester.dart'; diff --git a/playground/frontend/playground_components_dev/lib/src/code.dart b/playground/frontend/playground_components_dev/lib/src/code.dart new file mode 100644 index 0000000000000..2f85ac9f029db --- /dev/null +++ b/playground/frontend/playground_components_dev/lib/src/code.dart @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import 'package:flutter_code_editor/flutter_code_editor.dart'; +import 'package:highlight/highlight_core.dart'; + +String foldLicenseAndImports(String text, Mode language) { + final controller = CodeController( + text: text, + language: language, + ); + + controller.foldCommentAtLineZero(); + controller.foldImports(); + + return controller.text; +} diff --git a/playground/frontend/playground_components_dev/lib/src/common_finders.dart b/playground/frontend/playground_components_dev/lib/src/common_finders.dart new file mode 100644 index 0000000000000..4180ae02a0d14 --- /dev/null +++ b/playground/frontend/playground_components_dev/lib/src/common_finders.dart @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import 'package:flutter/material.dart'; +import 'package:flutter_code_editor/flutter_code_editor.dart'; +import 'package:flutter_test/flutter_test.dart'; +import 'package:playground_components/playground_components.dart'; + +extension CommonFindersExtension on CommonFinders { + Finder codeField() { + return byType(CodeField); + } + + Finder graphTab() { + // TODO(alexeyinkin): Use keys when output tabs get to use enum, https://github.com/apache/beam/issues/22663 + return widgetWithText(OutputTab, 'Graph'); + } + + Finder outputArea() { + return byType(OutputArea); + } + + Finder outputSelectableText() { + final outputArea = find.outputArea(); + return find.descendant( + of: outputArea, + matching: find.byType(SelectableText), + ); + } + + Finder outputWidget() { + return byType(OutputWidget); + } + + Finder resultTab() { + // TODO(alexeyinkin): Use keys when output tabs get to use enum, https://github.com/apache/beam/issues/22663 + return widgetWithText(OutputTab, 'Result'); + } + + Finder runOrCancelButton() { + return byType(RunOrCancelButton); + } + + Finder splitView() { + return byType(SplitView); + } + + Finder toggleThemeButton() { + return byType(ToggleThemeButton); + } +} diff --git a/playground/frontend/playground_components_dev/lib/src/example_names.dart b/playground/frontend/playground_components_dev/lib/src/example_names.dart new file mode 100644 index 0000000000000..204a6578ecd5d --- /dev/null +++ b/playground/frontend/playground_components_dev/lib/src/example_names.dart @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +class ExampleNames { + static const aggregationMax = 'AggregationMax'; + static const aggregationMean = 'AggregationMean'; +} diff --git a/playground/frontend/playground_components_dev/lib/src/example_outputs.dart b/playground/frontend/playground_components_dev/lib/src/example_outputs.dart new file mode 100644 index 0000000000000..5a548be683c84 --- /dev/null +++ b/playground/frontend/playground_components_dev/lib/src/example_outputs.dart @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +class ExampleOutputs { + static const javaAggregationMaxTail = 'INFO: 10\n'; + + static const pythonAggregationMeanContains = + '16 [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]'; + + static const pythonWordCountWithMetricsTail = 'average word length: 4\n'; +} diff --git a/playground/frontend/playground_components_dev/lib/src/example_paths.dart b/playground/frontend/playground_components_dev/lib/src/example_paths.dart new file mode 100644 index 0000000000000..f0f554c79226e --- /dev/null +++ b/playground/frontend/playground_components_dev/lib/src/example_paths.dart @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +class ExamplePaths { + static const javaAggregationMax = + '/learning/katas/java/Common Transforms/Aggregation/Max/src/org/apache/beam/learning/katas/commontransforms/aggregation/max/Task.java'; + static const javaMinimalWordCount = + '/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java'; + + static const pythonAggregationMean = + '/learning/katas/python/Common Transforms/Aggregation/Mean/task.py'; + static const pythonMinimalWordCountWithMetrics = + '/sdks/python/apache_beam/examples/wordcount_with_metrics.py'; +} diff --git a/playground/frontend/playground_components_dev/lib/src/examples.dart b/playground/frontend/playground_components_dev/lib/src/examples.dart new file mode 100644 index 0000000000000..c558133d742e0 --- /dev/null +++ b/playground/frontend/playground_components_dev/lib/src/examples.dart @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import 'package:highlight/highlight_core.dart'; +import 'package:http/http.dart' as http; + +import 'code.dart'; + +class Examples { + static const _repoAndBranch = 'apache/beam/master'; + + static Future getVisibleTextByPath(String path, Mode language) async { + final uri = + Uri.parse('https://raw.githubusercontent.com/$_repoAndBranch$path'); + final response = await http.get(uri); + final content = response.body; + + return foldLicenseAndImports(content, language); + } +} diff --git a/playground/frontend/playground_components_dev/lib/src/expect.dart b/playground/frontend/playground_components_dev/lib/src/expect.dart new file mode 100644 index 0000000000000..34e338dbec7bc --- /dev/null +++ b/playground/frontend/playground_components_dev/lib/src/expect.dart @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import 'package:flutter_test/flutter_test.dart'; + +import 'widget_tester.dart'; + +void expectOutput(String text, WidgetTester wt) { + final actualText = wt.findOutputText(); + expect(actualText, text); +} + +void expectOutputContains(String text, WidgetTester wt) { + final actualText = wt.findOutputText(); + expect(actualText, contains(text)); +} + +void expectOutputEndsWith(String text, WidgetTester wt) { + final actualText = wt.findOutputText(); + expect(actualText, endsWith(text)); +} diff --git a/playground/frontend/playground_components_dev/lib/src/finder.dart b/playground/frontend/playground_components_dev/lib/src/finder.dart new file mode 100644 index 0000000000000..72d2dd86a3895 --- /dev/null +++ b/playground/frontend/playground_components_dev/lib/src/finder.dart @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import 'package:flutter/widgets.dart'; +import 'package:flutter_test/flutter_test.dart'; + +extension FinderExtension on Finder { + // TODO(alexeyinkin): Push to Flutter or wait for them to make their own, https://github.com/flutter/flutter/issues/117675 + Finder and(Finder another) { + return _AndFinder(this, another); + } +} + +class _AndFinder extends ChainedFinder { + _AndFinder(super.parent, this.another); + + final Finder another; + + @override + String get description => '${parent.description} AND ${another.description}'; + + @override + Iterable filter(Iterable parentCandidates) { + return another.apply(parentCandidates); + } +} diff --git a/playground/frontend/playground_components_dev/lib/src/string.dart b/playground/frontend/playground_components_dev/lib/src/string.dart new file mode 100644 index 0000000000000..7f300aeeb980d --- /dev/null +++ b/playground/frontend/playground_components_dev/lib/src/string.dart @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import 'package:flutter/widgets.dart'; +import 'package:flutter_code_editor/flutter_code_editor.dart'; + +extension StringExtension on String { + /// Whether this is different from [another] only by cutting a single range + /// of zero or more characters. + bool isAsIfCutFrom(String another) { + final range = getChangedRange( + another, + attributeChangeTo: TextAffinity.downstream, + ); + + return range.isCollapsed; + } +} diff --git a/playground/frontend/playground_components_dev/lib/src/widget_tester.dart b/playground/frontend/playground_components_dev/lib/src/widget_tester.dart new file mode 100644 index 0000000000000..6a833f0bcb8c1 --- /dev/null +++ b/playground/frontend/playground_components_dev/lib/src/widget_tester.dart @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import 'package:flutter/material.dart'; +import 'package:flutter_code_editor/flutter_code_editor.dart'; +import 'package:flutter_test/flutter_test.dart'; +import 'package:playground_components/playground_components.dart'; +import 'package:provider/provider.dart'; + +import 'common_finders.dart'; + +extension WidgetTesterExtension on WidgetTester { + CodeController findOneCodeController() { + final codeField = find.codeField(); + expect(codeField, findsOneWidget); + + return widget(codeField).controller; + } + + TabController findOutputTabController() { + final outputTabs = find.byType(OutputTabs); + expect(outputTabs, findsOneWidget); + + return widget(outputTabs).tabController; + } + + String? findOutputText() { + final selectableText = find.outputSelectableText(); + expect(selectableText, findsOneWidget); + + return widget(selectableText).data; + } + + PlaygroundController findPlaygroundController() { + final context = element(find.codeField()); + return context.read(); + } +} diff --git a/playground/frontend/playground_components_dev/pubspec.yaml b/playground/frontend/playground_components_dev/pubspec.yaml new file mode 100644 index 0000000000000..a4998c7c1bfd3 --- /dev/null +++ b/playground/frontend/playground_components_dev/pubspec.yaml @@ -0,0 +1,35 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: playground_components_dev +description: Helpers for testing playground_components package +version: 0.0.1 +publish_to: none + +environment: + sdk: '>=2.18.1 <4.0.0' + flutter: '>=3.3.2' + +dependencies: + flutter: { sdk: flutter } + flutter_code_editor: ^0.2.4 + flutter_test: { sdk: flutter } + highlight: ^0.7.0 + http: ^0.13.5 + playground_components: { path: ../playground_components } + provider: ^6.0.3 + total_lints: ^2.18.0 diff --git a/playground/frontend/pubspec.lock b/playground/frontend/pubspec.lock index af95acc0757f2..db8ae40f11143 100644 --- a/playground/frontend/pubspec.lock +++ b/playground/frontend/pubspec.lock @@ -35,14 +35,14 @@ packages: name: app_state url: "https://pub.dartlang.org" source: hosted - version: "0.8.3" + version: "0.8.4" archive: dependency: transitive description: name: archive url: "https://pub.dartlang.org" source: hosted - version: "3.3.1" + version: "3.3.0" args: dependency: transitive description: @@ -287,12 +287,17 @@ packages: source: sdk version: "0.0.0" flutter_code_editor: - dependency: transitive + dependency: "direct dev" description: name: flutter_code_editor url: "https://pub.dartlang.org" source: hosted - version: "0.2.1" + version: "0.2.4" + flutter_driver: + dependency: transitive + description: flutter + source: sdk + version: "0.0.0" flutter_highlight: dependency: transitive description: @@ -357,6 +362,11 @@ packages: url: "https://pub.dartlang.org" source: hosted version: "2.1.3" + fuchsia_remote_debug_protocol: + dependency: transitive + description: flutter + source: sdk + version: "0.0.0" get_it: dependency: "direct main" description: @@ -448,6 +458,11 @@ packages: url: "https://pub.dartlang.org" source: hosted version: "4.0.1" + integration_test: + dependency: "direct dev" + description: flutter + source: sdk + version: "0.0.0" intl: dependency: "direct main" description: @@ -651,6 +666,13 @@ packages: relative: true source: path version: "0.0.1" + playground_components_dev: + dependency: "direct dev" + description: + path: playground_components_dev + relative: true + source: path + version: "0.0.1" plugin_platform_interface: dependency: transitive description: @@ -831,6 +853,13 @@ packages: url: "https://pub.dartlang.org" source: hosted version: "1.1.1" + sync_http: + dependency: transitive + description: + name: sync_http + url: "https://pub.dartlang.org" + source: hosted + version: "0.3.1" term_glyph: dependency: transitive description: @@ -852,6 +881,13 @@ packages: url: "https://pub.dartlang.org" source: hosted version: "1.0.0" + total_lints: + dependency: transitive + description: + name: total_lints + url: "https://pub.dartlang.org" + source: hosted + version: "2.18.0" tuple: dependency: transitive description: @@ -957,6 +993,13 @@ packages: url: "https://pub.dartlang.org" source: hosted version: "2.1.2" + vm_service: + dependency: transitive + description: + name: vm_service + url: "https://pub.dartlang.org" + source: hosted + version: "9.0.0" watcher: dependency: transitive description: @@ -978,6 +1021,13 @@ packages: url: "https://pub.dartlang.org" source: hosted version: "2.2.0" + webdriver: + dependency: transitive + description: + name: webdriver + url: "https://pub.dartlang.org" + source: hosted + version: "3.0.0" win32: dependency: transitive description: diff --git a/playground/frontend/pubspec.yaml b/playground/frontend/pubspec.yaml index 655948c14426e..10b92e26371f9 100644 --- a/playground/frontend/pubspec.yaml +++ b/playground/frontend/pubspec.yaml @@ -27,7 +27,7 @@ environment: dependencies: akvelon_flutter_issue_106664_workaround: ^0.1.2 aligned_dialog: ^0.0.6 - app_state: ^0.8.3 + app_state: ^0.8.4 collection: ^1.15.0 easy_localization: ^3.0.1 easy_localization_ext: ^0.1.1 @@ -53,9 +53,12 @@ dependencies: dev_dependencies: build_runner: ^2.1.4 fake_async: ^1.3.0 + flutter_code_editor: ^0.2.4 flutter_lints: ^2.0.1 flutter_test: { sdk: flutter } + integration_test: { sdk: flutter } mockito: ^5.0.16 + playground_components_dev: { path: playground_components_dev } flutter: assets: @@ -64,3 +67,6 @@ flutter: generate: true uses-material-design: true + +flutter_gen: + output: lib/src/assets/ diff --git a/playground/frontend/test_driver/integration_test.dart b/playground/frontend/test_driver/integration_test.dart new file mode 100644 index 0000000000000..6b59b37dd1291 --- /dev/null +++ b/playground/frontend/test_driver/integration_test.dart @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import 'package:integration_test/integration_test_driver.dart'; + +Future main() => integrationDriver(); diff --git a/playground/infrastructure/api/v1/api_pb2.py b/playground/infrastructure/api/v1/api_pb2.py index 5b2ae00e84388..aaf5ceab13a4e 100644 --- a/playground/infrastructure/api/v1/api_pb2.py +++ b/playground/infrastructure/api/v1/api_pb2.py @@ -1,27 +1,10 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! -# source: api.proto +# source: api/v1/api.proto """Generated protocol buffer code.""" -from google.protobuf.internal import enum_type_wrapper +from google.protobuf.internal import builder as _builder from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection from google.protobuf import symbol_database as _symbol_database # @@protoc_insertion_point(imports) @@ -30,507 +13,114 @@ -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\tapi.proto\x12\x06\x61pi.v1\"\xa2\x01\n\x07\x44\x61taset\x12\"\n\x04type\x18\x01 \x01(\x0e\x32\x14.api.v1.EmulatorType\x12-\n\x07options\x18\x02 \x03(\x0b\x32\x1c.api.v1.Dataset.OptionsEntry\x12\x14\n\x0c\x64\x61taset_path\x18\x03 \x01(\t\x1a.\n\x0cOptionsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"u\n\x0eRunCodeRequest\x12\x0c\n\x04\x63ode\x18\x01 \x01(\t\x12\x18\n\x03sdk\x18\x02 \x01(\x0e\x32\x0b.api.v1.Sdk\x12\x18\n\x10pipeline_options\x18\x03 \x01(\t\x12!\n\x08\x64\x61tasets\x18\x04 \x03(\x0b\x32\x0f.api.v1.Dataset\"(\n\x0fRunCodeResponse\x12\x15\n\rpipeline_uuid\x18\x01 \x01(\t\"+\n\x12\x43heckStatusRequest\x12\x15\n\rpipeline_uuid\x18\x01 \x01(\t\"5\n\x13\x43heckStatusResponse\x12\x1e\n\x06status\x18\x01 \x01(\x0e\x32\x0e.api.v1.Status\"3\n\x1aGetValidationOutputRequest\x12\x15\n\rpipeline_uuid\x18\x01 \x01(\t\"-\n\x1bGetValidationOutputResponse\x12\x0e\n\x06output\x18\x01 \x01(\t\"4\n\x1bGetPreparationOutputRequest\x12\x15\n\rpipeline_uuid\x18\x01 \x01(\t\".\n\x1cGetPreparationOutputResponse\x12\x0e\n\x06output\x18\x01 \x01(\t\"0\n\x17GetCompileOutputRequest\x12\x15\n\rpipeline_uuid\x18\x01 \x01(\t\"*\n\x18GetCompileOutputResponse\x12\x0e\n\x06output\x18\x01 \x01(\t\",\n\x13GetRunOutputRequest\x12\x15\n\rpipeline_uuid\x18\x01 \x01(\t\"&\n\x14GetRunOutputResponse\x12\x0e\n\x06output\x18\x01 \x01(\t\"+\n\x12GetRunErrorRequest\x12\x15\n\rpipeline_uuid\x18\x01 \x01(\t\"%\n\x13GetRunErrorResponse\x12\x0e\n\x06output\x18\x01 \x01(\t\"\'\n\x0eGetLogsRequest\x12\x15\n\rpipeline_uuid\x18\x01 \x01(\t\"!\n\x0fGetLogsResponse\x12\x0e\n\x06output\x18\x01 \x01(\t\"(\n\x0fGetGraphRequest\x12\x15\n\rpipeline_uuid\x18\x01 \x01(\t\"!\n\x10GetGraphResponse\x12\r\n\x05graph\x18\x01 \x01(\t\"&\n\rCancelRequest\x12\x15\n\rpipeline_uuid\x18\x01 \x01(\t\"\x10\n\x0e\x43\x61ncelResponse\"\xd4\x02\n\x11PrecompiledObject\x12\x12\n\ncloud_path\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x13\n\x0b\x64\x65scription\x18\x03 \x01(\t\x12+\n\x04type\x18\x04 \x01(\x0e\x32\x1d.api.v1.PrecompiledObjectType\x12\x18\n\x10pipeline_options\x18\x05 \x01(\t\x12\x0c\n\x04link\x18\x06 \x01(\t\x12\x11\n\tmultifile\x18\x07 \x01(\x08\x12\x14\n\x0c\x63ontext_line\x18\x08 \x01(\x05\x12\x17\n\x0f\x64\x65\x66\x61ult_example\x18\t \x01(\x08\x12\x18\n\x03sdk\x18\n \x01(\x0e\x32\x0b.api.v1.Sdk\x12&\n\ncomplexity\x18\x0b \x01(\x0e\x32\x12.api.v1.Complexity\x12\x0c\n\x04tags\x18\x0c \x03(\t\x12!\n\x08\x64\x61tasets\x18\r \x03(\x0b\x32\x0f.api.v1.Dataset\"\xb2\x01\n\nCategories\x12\x18\n\x03sdk\x18\x01 \x01(\x0e\x32\x0b.api.v1.Sdk\x12/\n\ncategories\x18\x02 \x03(\x0b\x32\x1b.api.v1.Categories.Category\x1aY\n\x08\x43\x61tegory\x12\x15\n\rcategory_name\x18\x01 \x01(\t\x12\x36\n\x13precompiled_objects\x18\x02 \x03(\x0b\x32\x19.api.v1.PrecompiledObject\"J\n\x1cGetPrecompiledObjectsRequest\x12\x18\n\x03sdk\x18\x01 \x01(\x0e\x32\x0b.api.v1.Sdk\x12\x10\n\x08\x63\x61tegory\x18\x02 \x01(\t\"1\n\x1bGetPrecompiledObjectRequest\x12\x12\n\ncloud_path\x18\x01 \x01(\t\"5\n\x1fGetPrecompiledObjectCodeRequest\x12\x12\n\ncloud_path\x18\x01 \x01(\t\"7\n!GetPrecompiledObjectOutputRequest\x12\x12\n\ncloud_path\x18\x01 \x01(\t\"5\n\x1fGetPrecompiledObjectLogsRequest\x12\x12\n\ncloud_path\x18\x01 \x01(\t\"6\n GetPrecompiledObjectGraphRequest\x12\x12\n\ncloud_path\x18\x01 \x01(\t\">\n\"GetDefaultPrecompiledObjectRequest\x12\x18\n\x03sdk\x18\x01 \x01(\x0e\x32\x0b.api.v1.Sdk\"K\n\x1dGetPrecompiledObjectsResponse\x12*\n\x0esdk_categories\x18\x01 \x03(\x0b\x32\x12.api.v1.Categories\"U\n\x1cGetPrecompiledObjectResponse\x12\x35\n\x12precompiled_object\x18\x01 \x01(\x0b\x32\x19.api.v1.PrecompiledObject\"0\n GetPrecompiledObjectCodeResponse\x12\x0c\n\x04\x63ode\x18\x01 \x01(\t\"4\n\"GetPrecompiledObjectOutputResponse\x12\x0e\n\x06output\x18\x01 \x01(\t\"2\n GetPrecompiledObjectLogsResponse\x12\x0e\n\x06output\x18\x01 \x01(\t\"2\n!GetPrecompiledObjectGraphResponse\x12\r\n\x05graph\x18\x01 \x01(\t\"\\\n#GetDefaultPrecompiledObjectResponse\x12\x35\n\x12precompiled_object\x18\x01 \x01(\x0b\x32\x19.api.v1.PrecompiledObject\"=\n\x0bSnippetFile\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t\x12\x0f\n\x07is_main\x18\x03 \x01(\x08\"\x94\x01\n\x12SaveSnippetRequest\x12\"\n\x05\x66iles\x18\x01 \x03(\x0b\x32\x13.api.v1.SnippetFile\x12\x18\n\x03sdk\x18\x02 \x01(\x0e\x32\x0b.api.v1.Sdk\x12\x18\n\x10pipeline_options\x18\x03 \x01(\t\x12&\n\ncomplexity\x18\x04 \x01(\x0e\x32\x12.api.v1.Complexity\"!\n\x13SaveSnippetResponse\x12\n\n\x02id\x18\x01 \x01(\t\"\x1f\n\x11GetSnippetRequest\x12\n\n\x02id\x18\x01 \x01(\t\"\x94\x01\n\x12GetSnippetResponse\x12\"\n\x05\x66iles\x18\x01 \x03(\x0b\x32\x13.api.v1.SnippetFile\x12\x18\n\x03sdk\x18\x02 \x01(\x0e\x32\x0b.api.v1.Sdk\x12\x18\n\x10pipeline_options\x18\x03 \x01(\t\x12&\n\ncomplexity\x18\x04 \x01(\x0e\x32\x12.api.v1.Complexity*R\n\x03Sdk\x12\x13\n\x0fSDK_UNSPECIFIED\x10\x00\x12\x0c\n\x08SDK_JAVA\x10\x01\x12\n\n\x06SDK_GO\x10\x02\x12\x0e\n\nSDK_PYTHON\x10\x03\x12\x0c\n\x08SDK_SCIO\x10\x04*\xb8\x02\n\x06Status\x12\x16\n\x12STATUS_UNSPECIFIED\x10\x00\x12\x15\n\x11STATUS_VALIDATING\x10\x01\x12\x1b\n\x17STATUS_VALIDATION_ERROR\x10\x02\x12\x14\n\x10STATUS_PREPARING\x10\x03\x12\x1c\n\x18STATUS_PREPARATION_ERROR\x10\x04\x12\x14\n\x10STATUS_COMPILING\x10\x05\x12\x18\n\x14STATUS_COMPILE_ERROR\x10\x06\x12\x14\n\x10STATUS_EXECUTING\x10\x07\x12\x13\n\x0fSTATUS_FINISHED\x10\x08\x12\x14\n\x10STATUS_RUN_ERROR\x10\t\x12\x10\n\x0cSTATUS_ERROR\x10\n\x12\x16\n\x12STATUS_RUN_TIMEOUT\x10\x0b\x12\x13\n\x0fSTATUS_CANCELED\x10\x0c*\xae\x01\n\x15PrecompiledObjectType\x12\'\n#PRECOMPILED_OBJECT_TYPE_UNSPECIFIED\x10\x00\x12#\n\x1fPRECOMPILED_OBJECT_TYPE_EXAMPLE\x10\x01\x12 \n\x1cPRECOMPILED_OBJECT_TYPE_KATA\x10\x02\x12%\n!PRECOMPILED_OBJECT_TYPE_UNIT_TEST\x10\x03*n\n\nComplexity\x12\x1a\n\x16\x43OMPLEXITY_UNSPECIFIED\x10\x00\x12\x14\n\x10\x43OMPLEXITY_BASIC\x10\x01\x12\x15\n\x11\x43OMPLEXITY_MEDIUM\x10\x02\x12\x17\n\x13\x43OMPLEXITY_ADVANCED\x10\x03*F\n\x0c\x45mulatorType\x12\x1d\n\x19\x45MULATOR_TYPE_UNSPECIFIED\x10\x00\x12\x17\n\x13\x45MULATOR_TYPE_KAFKA\x10\x01\x32\x8b\r\n\x11PlaygroundService\x12:\n\x07RunCode\x12\x16.api.v1.RunCodeRequest\x1a\x17.api.v1.RunCodeResponse\x12\x46\n\x0b\x43heckStatus\x12\x1a.api.v1.CheckStatusRequest\x1a\x1b.api.v1.CheckStatusResponse\x12I\n\x0cGetRunOutput\x12\x1b.api.v1.GetRunOutputRequest\x1a\x1c.api.v1.GetRunOutputResponse\x12:\n\x07GetLogs\x12\x16.api.v1.GetLogsRequest\x1a\x17.api.v1.GetLogsResponse\x12=\n\x08GetGraph\x12\x17.api.v1.GetGraphRequest\x1a\x18.api.v1.GetGraphResponse\x12\x46\n\x0bGetRunError\x12\x1a.api.v1.GetRunErrorRequest\x1a\x1b.api.v1.GetRunErrorResponse\x12^\n\x13GetValidationOutput\x12\".api.v1.GetValidationOutputRequest\x1a#.api.v1.GetValidationOutputResponse\x12\x61\n\x14GetPreparationOutput\x12#.api.v1.GetPreparationOutputRequest\x1a$.api.v1.GetPreparationOutputResponse\x12U\n\x10GetCompileOutput\x12\x1f.api.v1.GetCompileOutputRequest\x1a .api.v1.GetCompileOutputResponse\x12\x37\n\x06\x43\x61ncel\x12\x15.api.v1.CancelRequest\x1a\x16.api.v1.CancelResponse\x12\x64\n\x15GetPrecompiledObjects\x12$.api.v1.GetPrecompiledObjectsRequest\x1a%.api.v1.GetPrecompiledObjectsResponse\x12\x61\n\x14GetPrecompiledObject\x12#.api.v1.GetPrecompiledObjectRequest\x1a$.api.v1.GetPrecompiledObjectResponse\x12m\n\x18GetPrecompiledObjectCode\x12\'.api.v1.GetPrecompiledObjectCodeRequest\x1a(.api.v1.GetPrecompiledObjectCodeResponse\x12s\n\x1aGetPrecompiledObjectOutput\x12).api.v1.GetPrecompiledObjectOutputRequest\x1a*.api.v1.GetPrecompiledObjectOutputResponse\x12m\n\x18GetPrecompiledObjectLogs\x12\'.api.v1.GetPrecompiledObjectLogsRequest\x1a(.api.v1.GetPrecompiledObjectLogsResponse\x12p\n\x19GetPrecompiledObjectGraph\x12(.api.v1.GetPrecompiledObjectGraphRequest\x1a).api.v1.GetPrecompiledObjectGraphResponse\x12v\n\x1bGetDefaultPrecompiledObject\x12*.api.v1.GetDefaultPrecompiledObjectRequest\x1a+.api.v1.GetDefaultPrecompiledObjectResponse\x12\x46\n\x0bSaveSnippet\x12\x1a.api.v1.SaveSnippetRequest\x1a\x1b.api.v1.SaveSnippetResponse\x12\x43\n\nGetSnippet\x12\x19.api.v1.GetSnippetRequest\x1a\x1a.api.v1.GetSnippetResponseB8Z6beam.apache.org/playground/backend/internal;playgroundb\x06proto3') - -_SDK = DESCRIPTOR.enum_types_by_name['Sdk'] -Sdk = enum_type_wrapper.EnumTypeWrapper(_SDK) -_STATUS = DESCRIPTOR.enum_types_by_name['Status'] -Status = enum_type_wrapper.EnumTypeWrapper(_STATUS) -_PRECOMPILEDOBJECTTYPE = DESCRIPTOR.enum_types_by_name['PrecompiledObjectType'] -PrecompiledObjectType = enum_type_wrapper.EnumTypeWrapper(_PRECOMPILEDOBJECTTYPE) -_COMPLEXITY = DESCRIPTOR.enum_types_by_name['Complexity'] -Complexity = enum_type_wrapper.EnumTypeWrapper(_COMPLEXITY) -_EMULATORTYPE = DESCRIPTOR.enum_types_by_name['EmulatorType'] -EmulatorType = enum_type_wrapper.EnumTypeWrapper(_EMULATORTYPE) -SDK_UNSPECIFIED = 0 -SDK_JAVA = 1 -SDK_GO = 2 -SDK_PYTHON = 3 -SDK_SCIO = 4 -STATUS_UNSPECIFIED = 0 -STATUS_VALIDATING = 1 -STATUS_VALIDATION_ERROR = 2 -STATUS_PREPARING = 3 -STATUS_PREPARATION_ERROR = 4 -STATUS_COMPILING = 5 -STATUS_COMPILE_ERROR = 6 -STATUS_EXECUTING = 7 -STATUS_FINISHED = 8 -STATUS_RUN_ERROR = 9 -STATUS_ERROR = 10 -STATUS_RUN_TIMEOUT = 11 -STATUS_CANCELED = 12 -PRECOMPILED_OBJECT_TYPE_UNSPECIFIED = 0 -PRECOMPILED_OBJECT_TYPE_EXAMPLE = 1 -PRECOMPILED_OBJECT_TYPE_KATA = 2 -PRECOMPILED_OBJECT_TYPE_UNIT_TEST = 3 -COMPLEXITY_UNSPECIFIED = 0 -COMPLEXITY_BASIC = 1 -COMPLEXITY_MEDIUM = 2 -COMPLEXITY_ADVANCED = 3 -EMULATOR_TYPE_UNSPECIFIED = 0 -EMULATOR_TYPE_KAFKA = 1 - - -_DATASET = DESCRIPTOR.message_types_by_name['Dataset'] -_DATASET_OPTIONSENTRY = _DATASET.nested_types_by_name['OptionsEntry'] -_RUNCODEREQUEST = DESCRIPTOR.message_types_by_name['RunCodeRequest'] -_RUNCODERESPONSE = DESCRIPTOR.message_types_by_name['RunCodeResponse'] -_CHECKSTATUSREQUEST = DESCRIPTOR.message_types_by_name['CheckStatusRequest'] -_CHECKSTATUSRESPONSE = DESCRIPTOR.message_types_by_name['CheckStatusResponse'] -_GETVALIDATIONOUTPUTREQUEST = DESCRIPTOR.message_types_by_name['GetValidationOutputRequest'] -_GETVALIDATIONOUTPUTRESPONSE = DESCRIPTOR.message_types_by_name['GetValidationOutputResponse'] -_GETPREPARATIONOUTPUTREQUEST = DESCRIPTOR.message_types_by_name['GetPreparationOutputRequest'] -_GETPREPARATIONOUTPUTRESPONSE = DESCRIPTOR.message_types_by_name['GetPreparationOutputResponse'] -_GETCOMPILEOUTPUTREQUEST = DESCRIPTOR.message_types_by_name['GetCompileOutputRequest'] -_GETCOMPILEOUTPUTRESPONSE = DESCRIPTOR.message_types_by_name['GetCompileOutputResponse'] -_GETRUNOUTPUTREQUEST = DESCRIPTOR.message_types_by_name['GetRunOutputRequest'] -_GETRUNOUTPUTRESPONSE = DESCRIPTOR.message_types_by_name['GetRunOutputResponse'] -_GETRUNERRORREQUEST = DESCRIPTOR.message_types_by_name['GetRunErrorRequest'] -_GETRUNERRORRESPONSE = DESCRIPTOR.message_types_by_name['GetRunErrorResponse'] -_GETLOGSREQUEST = DESCRIPTOR.message_types_by_name['GetLogsRequest'] -_GETLOGSRESPONSE = DESCRIPTOR.message_types_by_name['GetLogsResponse'] -_GETGRAPHREQUEST = DESCRIPTOR.message_types_by_name['GetGraphRequest'] -_GETGRAPHRESPONSE = DESCRIPTOR.message_types_by_name['GetGraphResponse'] -_CANCELREQUEST = DESCRIPTOR.message_types_by_name['CancelRequest'] -_CANCELRESPONSE = DESCRIPTOR.message_types_by_name['CancelResponse'] -_PRECOMPILEDOBJECT = DESCRIPTOR.message_types_by_name['PrecompiledObject'] -_CATEGORIES = DESCRIPTOR.message_types_by_name['Categories'] -_CATEGORIES_CATEGORY = _CATEGORIES.nested_types_by_name['Category'] -_GETPRECOMPILEDOBJECTSREQUEST = DESCRIPTOR.message_types_by_name['GetPrecompiledObjectsRequest'] -_GETPRECOMPILEDOBJECTREQUEST = DESCRIPTOR.message_types_by_name['GetPrecompiledObjectRequest'] -_GETPRECOMPILEDOBJECTCODEREQUEST = DESCRIPTOR.message_types_by_name['GetPrecompiledObjectCodeRequest'] -_GETPRECOMPILEDOBJECTOUTPUTREQUEST = DESCRIPTOR.message_types_by_name['GetPrecompiledObjectOutputRequest'] -_GETPRECOMPILEDOBJECTLOGSREQUEST = DESCRIPTOR.message_types_by_name['GetPrecompiledObjectLogsRequest'] -_GETPRECOMPILEDOBJECTGRAPHREQUEST = DESCRIPTOR.message_types_by_name['GetPrecompiledObjectGraphRequest'] -_GETDEFAULTPRECOMPILEDOBJECTREQUEST = DESCRIPTOR.message_types_by_name['GetDefaultPrecompiledObjectRequest'] -_GETPRECOMPILEDOBJECTSRESPONSE = DESCRIPTOR.message_types_by_name['GetPrecompiledObjectsResponse'] -_GETPRECOMPILEDOBJECTRESPONSE = DESCRIPTOR.message_types_by_name['GetPrecompiledObjectResponse'] -_GETPRECOMPILEDOBJECTCODERESPONSE = DESCRIPTOR.message_types_by_name['GetPrecompiledObjectCodeResponse'] -_GETPRECOMPILEDOBJECTOUTPUTRESPONSE = DESCRIPTOR.message_types_by_name['GetPrecompiledObjectOutputResponse'] -_GETPRECOMPILEDOBJECTLOGSRESPONSE = DESCRIPTOR.message_types_by_name['GetPrecompiledObjectLogsResponse'] -_GETPRECOMPILEDOBJECTGRAPHRESPONSE = DESCRIPTOR.message_types_by_name['GetPrecompiledObjectGraphResponse'] -_GETDEFAULTPRECOMPILEDOBJECTRESPONSE = DESCRIPTOR.message_types_by_name['GetDefaultPrecompiledObjectResponse'] -_SNIPPETFILE = DESCRIPTOR.message_types_by_name['SnippetFile'] -_SAVESNIPPETREQUEST = DESCRIPTOR.message_types_by_name['SaveSnippetRequest'] -_SAVESNIPPETRESPONSE = DESCRIPTOR.message_types_by_name['SaveSnippetResponse'] -_GETSNIPPETREQUEST = DESCRIPTOR.message_types_by_name['GetSnippetRequest'] -_GETSNIPPETRESPONSE = DESCRIPTOR.message_types_by_name['GetSnippetResponse'] -Dataset = _reflection.GeneratedProtocolMessageType('Dataset', (_message.Message,), { - - 'OptionsEntry' : _reflection.GeneratedProtocolMessageType('OptionsEntry', (_message.Message,), { - 'DESCRIPTOR' : _DATASET_OPTIONSENTRY, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.Dataset.OptionsEntry) - }) - , - 'DESCRIPTOR' : _DATASET, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.Dataset) - }) -_sym_db.RegisterMessage(Dataset) -_sym_db.RegisterMessage(Dataset.OptionsEntry) - -RunCodeRequest = _reflection.GeneratedProtocolMessageType('RunCodeRequest', (_message.Message,), { - 'DESCRIPTOR' : _RUNCODEREQUEST, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.RunCodeRequest) - }) -_sym_db.RegisterMessage(RunCodeRequest) - -RunCodeResponse = _reflection.GeneratedProtocolMessageType('RunCodeResponse', (_message.Message,), { - 'DESCRIPTOR' : _RUNCODERESPONSE, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.RunCodeResponse) - }) -_sym_db.RegisterMessage(RunCodeResponse) - -CheckStatusRequest = _reflection.GeneratedProtocolMessageType('CheckStatusRequest', (_message.Message,), { - 'DESCRIPTOR' : _CHECKSTATUSREQUEST, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.CheckStatusRequest) - }) -_sym_db.RegisterMessage(CheckStatusRequest) - -CheckStatusResponse = _reflection.GeneratedProtocolMessageType('CheckStatusResponse', (_message.Message,), { - 'DESCRIPTOR' : _CHECKSTATUSRESPONSE, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.CheckStatusResponse) - }) -_sym_db.RegisterMessage(CheckStatusResponse) - -GetValidationOutputRequest = _reflection.GeneratedProtocolMessageType('GetValidationOutputRequest', (_message.Message,), { - 'DESCRIPTOR' : _GETVALIDATIONOUTPUTREQUEST, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.GetValidationOutputRequest) - }) -_sym_db.RegisterMessage(GetValidationOutputRequest) - -GetValidationOutputResponse = _reflection.GeneratedProtocolMessageType('GetValidationOutputResponse', (_message.Message,), { - 'DESCRIPTOR' : _GETVALIDATIONOUTPUTRESPONSE, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.GetValidationOutputResponse) - }) -_sym_db.RegisterMessage(GetValidationOutputResponse) - -GetPreparationOutputRequest = _reflection.GeneratedProtocolMessageType('GetPreparationOutputRequest', (_message.Message,), { - 'DESCRIPTOR' : _GETPREPARATIONOUTPUTREQUEST, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.GetPreparationOutputRequest) - }) -_sym_db.RegisterMessage(GetPreparationOutputRequest) - -GetPreparationOutputResponse = _reflection.GeneratedProtocolMessageType('GetPreparationOutputResponse', (_message.Message,), { - 'DESCRIPTOR' : _GETPREPARATIONOUTPUTRESPONSE, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.GetPreparationOutputResponse) - }) -_sym_db.RegisterMessage(GetPreparationOutputResponse) - -GetCompileOutputRequest = _reflection.GeneratedProtocolMessageType('GetCompileOutputRequest', (_message.Message,), { - 'DESCRIPTOR' : _GETCOMPILEOUTPUTREQUEST, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.GetCompileOutputRequest) - }) -_sym_db.RegisterMessage(GetCompileOutputRequest) - -GetCompileOutputResponse = _reflection.GeneratedProtocolMessageType('GetCompileOutputResponse', (_message.Message,), { - 'DESCRIPTOR' : _GETCOMPILEOUTPUTRESPONSE, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.GetCompileOutputResponse) - }) -_sym_db.RegisterMessage(GetCompileOutputResponse) - -GetRunOutputRequest = _reflection.GeneratedProtocolMessageType('GetRunOutputRequest', (_message.Message,), { - 'DESCRIPTOR' : _GETRUNOUTPUTREQUEST, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.GetRunOutputRequest) - }) -_sym_db.RegisterMessage(GetRunOutputRequest) - -GetRunOutputResponse = _reflection.GeneratedProtocolMessageType('GetRunOutputResponse', (_message.Message,), { - 'DESCRIPTOR' : _GETRUNOUTPUTRESPONSE, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.GetRunOutputResponse) - }) -_sym_db.RegisterMessage(GetRunOutputResponse) - -GetRunErrorRequest = _reflection.GeneratedProtocolMessageType('GetRunErrorRequest', (_message.Message,), { - 'DESCRIPTOR' : _GETRUNERRORREQUEST, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.GetRunErrorRequest) - }) -_sym_db.RegisterMessage(GetRunErrorRequest) - -GetRunErrorResponse = _reflection.GeneratedProtocolMessageType('GetRunErrorResponse', (_message.Message,), { - 'DESCRIPTOR' : _GETRUNERRORRESPONSE, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.GetRunErrorResponse) - }) -_sym_db.RegisterMessage(GetRunErrorResponse) - -GetLogsRequest = _reflection.GeneratedProtocolMessageType('GetLogsRequest', (_message.Message,), { - 'DESCRIPTOR' : _GETLOGSREQUEST, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.GetLogsRequest) - }) -_sym_db.RegisterMessage(GetLogsRequest) - -GetLogsResponse = _reflection.GeneratedProtocolMessageType('GetLogsResponse', (_message.Message,), { - 'DESCRIPTOR' : _GETLOGSRESPONSE, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.GetLogsResponse) - }) -_sym_db.RegisterMessage(GetLogsResponse) - -GetGraphRequest = _reflection.GeneratedProtocolMessageType('GetGraphRequest', (_message.Message,), { - 'DESCRIPTOR' : _GETGRAPHREQUEST, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.GetGraphRequest) - }) -_sym_db.RegisterMessage(GetGraphRequest) - -GetGraphResponse = _reflection.GeneratedProtocolMessageType('GetGraphResponse', (_message.Message,), { - 'DESCRIPTOR' : _GETGRAPHRESPONSE, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.GetGraphResponse) - }) -_sym_db.RegisterMessage(GetGraphResponse) - -CancelRequest = _reflection.GeneratedProtocolMessageType('CancelRequest', (_message.Message,), { - 'DESCRIPTOR' : _CANCELREQUEST, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.CancelRequest) - }) -_sym_db.RegisterMessage(CancelRequest) - -CancelResponse = _reflection.GeneratedProtocolMessageType('CancelResponse', (_message.Message,), { - 'DESCRIPTOR' : _CANCELRESPONSE, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.CancelResponse) - }) -_sym_db.RegisterMessage(CancelResponse) - -PrecompiledObject = _reflection.GeneratedProtocolMessageType('PrecompiledObject', (_message.Message,), { - 'DESCRIPTOR' : _PRECOMPILEDOBJECT, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.PrecompiledObject) - }) -_sym_db.RegisterMessage(PrecompiledObject) - -Categories = _reflection.GeneratedProtocolMessageType('Categories', (_message.Message,), { - - 'Category' : _reflection.GeneratedProtocolMessageType('Category', (_message.Message,), { - 'DESCRIPTOR' : _CATEGORIES_CATEGORY, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.Categories.Category) - }) - , - 'DESCRIPTOR' : _CATEGORIES, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.Categories) - }) -_sym_db.RegisterMessage(Categories) -_sym_db.RegisterMessage(Categories.Category) - -GetPrecompiledObjectsRequest = _reflection.GeneratedProtocolMessageType('GetPrecompiledObjectsRequest', (_message.Message,), { - 'DESCRIPTOR' : _GETPRECOMPILEDOBJECTSREQUEST, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.GetPrecompiledObjectsRequest) - }) -_sym_db.RegisterMessage(GetPrecompiledObjectsRequest) - -GetPrecompiledObjectRequest = _reflection.GeneratedProtocolMessageType('GetPrecompiledObjectRequest', (_message.Message,), { - 'DESCRIPTOR' : _GETPRECOMPILEDOBJECTREQUEST, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.GetPrecompiledObjectRequest) - }) -_sym_db.RegisterMessage(GetPrecompiledObjectRequest) - -GetPrecompiledObjectCodeRequest = _reflection.GeneratedProtocolMessageType('GetPrecompiledObjectCodeRequest', (_message.Message,), { - 'DESCRIPTOR' : _GETPRECOMPILEDOBJECTCODEREQUEST, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.GetPrecompiledObjectCodeRequest) - }) -_sym_db.RegisterMessage(GetPrecompiledObjectCodeRequest) - -GetPrecompiledObjectOutputRequest = _reflection.GeneratedProtocolMessageType('GetPrecompiledObjectOutputRequest', (_message.Message,), { - 'DESCRIPTOR' : _GETPRECOMPILEDOBJECTOUTPUTREQUEST, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.GetPrecompiledObjectOutputRequest) - }) -_sym_db.RegisterMessage(GetPrecompiledObjectOutputRequest) - -GetPrecompiledObjectLogsRequest = _reflection.GeneratedProtocolMessageType('GetPrecompiledObjectLogsRequest', (_message.Message,), { - 'DESCRIPTOR' : _GETPRECOMPILEDOBJECTLOGSREQUEST, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.GetPrecompiledObjectLogsRequest) - }) -_sym_db.RegisterMessage(GetPrecompiledObjectLogsRequest) - -GetPrecompiledObjectGraphRequest = _reflection.GeneratedProtocolMessageType('GetPrecompiledObjectGraphRequest', (_message.Message,), { - 'DESCRIPTOR' : _GETPRECOMPILEDOBJECTGRAPHREQUEST, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.GetPrecompiledObjectGraphRequest) - }) -_sym_db.RegisterMessage(GetPrecompiledObjectGraphRequest) - -GetDefaultPrecompiledObjectRequest = _reflection.GeneratedProtocolMessageType('GetDefaultPrecompiledObjectRequest', (_message.Message,), { - 'DESCRIPTOR' : _GETDEFAULTPRECOMPILEDOBJECTREQUEST, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.GetDefaultPrecompiledObjectRequest) - }) -_sym_db.RegisterMessage(GetDefaultPrecompiledObjectRequest) - -GetPrecompiledObjectsResponse = _reflection.GeneratedProtocolMessageType('GetPrecompiledObjectsResponse', (_message.Message,), { - 'DESCRIPTOR' : _GETPRECOMPILEDOBJECTSRESPONSE, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.GetPrecompiledObjectsResponse) - }) -_sym_db.RegisterMessage(GetPrecompiledObjectsResponse) - -GetPrecompiledObjectResponse = _reflection.GeneratedProtocolMessageType('GetPrecompiledObjectResponse', (_message.Message,), { - 'DESCRIPTOR' : _GETPRECOMPILEDOBJECTRESPONSE, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.GetPrecompiledObjectResponse) - }) -_sym_db.RegisterMessage(GetPrecompiledObjectResponse) - -GetPrecompiledObjectCodeResponse = _reflection.GeneratedProtocolMessageType('GetPrecompiledObjectCodeResponse', (_message.Message,), { - 'DESCRIPTOR' : _GETPRECOMPILEDOBJECTCODERESPONSE, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.GetPrecompiledObjectCodeResponse) - }) -_sym_db.RegisterMessage(GetPrecompiledObjectCodeResponse) - -GetPrecompiledObjectOutputResponse = _reflection.GeneratedProtocolMessageType('GetPrecompiledObjectOutputResponse', (_message.Message,), { - 'DESCRIPTOR' : _GETPRECOMPILEDOBJECTOUTPUTRESPONSE, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.GetPrecompiledObjectOutputResponse) - }) -_sym_db.RegisterMessage(GetPrecompiledObjectOutputResponse) - -GetPrecompiledObjectLogsResponse = _reflection.GeneratedProtocolMessageType('GetPrecompiledObjectLogsResponse', (_message.Message,), { - 'DESCRIPTOR' : _GETPRECOMPILEDOBJECTLOGSRESPONSE, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.GetPrecompiledObjectLogsResponse) - }) -_sym_db.RegisterMessage(GetPrecompiledObjectLogsResponse) - -GetPrecompiledObjectGraphResponse = _reflection.GeneratedProtocolMessageType('GetPrecompiledObjectGraphResponse', (_message.Message,), { - 'DESCRIPTOR' : _GETPRECOMPILEDOBJECTGRAPHRESPONSE, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.GetPrecompiledObjectGraphResponse) - }) -_sym_db.RegisterMessage(GetPrecompiledObjectGraphResponse) - -GetDefaultPrecompiledObjectResponse = _reflection.GeneratedProtocolMessageType('GetDefaultPrecompiledObjectResponse', (_message.Message,), { - 'DESCRIPTOR' : _GETDEFAULTPRECOMPILEDOBJECTRESPONSE, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.GetDefaultPrecompiledObjectResponse) - }) -_sym_db.RegisterMessage(GetDefaultPrecompiledObjectResponse) - -SnippetFile = _reflection.GeneratedProtocolMessageType('SnippetFile', (_message.Message,), { - 'DESCRIPTOR' : _SNIPPETFILE, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.SnippetFile) - }) -_sym_db.RegisterMessage(SnippetFile) - -SaveSnippetRequest = _reflection.GeneratedProtocolMessageType('SaveSnippetRequest', (_message.Message,), { - 'DESCRIPTOR' : _SAVESNIPPETREQUEST, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.SaveSnippetRequest) - }) -_sym_db.RegisterMessage(SaveSnippetRequest) - -SaveSnippetResponse = _reflection.GeneratedProtocolMessageType('SaveSnippetResponse', (_message.Message,), { - 'DESCRIPTOR' : _SAVESNIPPETRESPONSE, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.SaveSnippetResponse) - }) -_sym_db.RegisterMessage(SaveSnippetResponse) - -GetSnippetRequest = _reflection.GeneratedProtocolMessageType('GetSnippetRequest', (_message.Message,), { - 'DESCRIPTOR' : _GETSNIPPETREQUEST, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.GetSnippetRequest) - }) -_sym_db.RegisterMessage(GetSnippetRequest) - -GetSnippetResponse = _reflection.GeneratedProtocolMessageType('GetSnippetResponse', (_message.Message,), { - 'DESCRIPTOR' : _GETSNIPPETRESPONSE, - '__module__' : 'api_pb2' - # @@protoc_insertion_point(class_scope:api.v1.GetSnippetResponse) - }) -_sym_db.RegisterMessage(GetSnippetResponse) +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x10\x61pi/v1/api.proto\x12\x06\x61pi.v1\"\xca\x01\n\x07\x44\x61taset\x12(\n\x04type\x18\x01 \x01(\x0e\x32\x14.api.v1.EmulatorTypeR\x04type\x12\x36\n\x07options\x18\x02 \x03(\x0b\x32\x1c.api.v1.Dataset.OptionsEntryR\x07options\x12!\n\x0c\x64\x61taset_path\x18\x03 \x01(\tR\x0b\x64\x61tasetPath\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\"\xc6\x01\n\x0eRunCodeRequest\x12\x12\n\x04\x63ode\x18\x01 \x01(\tR\x04\x63ode\x12\x1d\n\x03sdk\x18\x02 \x01(\x0e\x32\x0b.api.v1.SdkR\x03sdk\x12)\n\x10pipeline_options\x18\x03 \x01(\tR\x0fpipelineOptions\x12+\n\x08\x64\x61tasets\x18\x04 \x03(\x0b\x32\x0f.api.v1.DatasetR\x08\x64\x61tasets\x12)\n\x05\x66iles\x18\x05 \x03(\x0b\x32\x13.api.v1.SnippetFileR\x05\x66iles\"6\n\x0fRunCodeResponse\x12#\n\rpipeline_uuid\x18\x01 \x01(\tR\x0cpipelineUuid\"9\n\x12\x43heckStatusRequest\x12#\n\rpipeline_uuid\x18\x01 \x01(\tR\x0cpipelineUuid\"=\n\x13\x43heckStatusResponse\x12&\n\x06status\x18\x01 \x01(\x0e\x32\x0e.api.v1.StatusR\x06status\"A\n\x1aGetValidationOutputRequest\x12#\n\rpipeline_uuid\x18\x01 \x01(\tR\x0cpipelineUuid\"5\n\x1bGetValidationOutputResponse\x12\x16\n\x06output\x18\x01 \x01(\tR\x06output\"B\n\x1bGetPreparationOutputRequest\x12#\n\rpipeline_uuid\x18\x01 \x01(\tR\x0cpipelineUuid\"6\n\x1cGetPreparationOutputResponse\x12\x16\n\x06output\x18\x01 \x01(\tR\x06output\">\n\x17GetCompileOutputRequest\x12#\n\rpipeline_uuid\x18\x01 \x01(\tR\x0cpipelineUuid\"2\n\x18GetCompileOutputResponse\x12\x16\n\x06output\x18\x01 \x01(\tR\x06output\":\n\x13GetRunOutputRequest\x12#\n\rpipeline_uuid\x18\x01 \x01(\tR\x0cpipelineUuid\".\n\x14GetRunOutputResponse\x12\x16\n\x06output\x18\x01 \x01(\tR\x06output\"9\n\x12GetRunErrorRequest\x12#\n\rpipeline_uuid\x18\x01 \x01(\tR\x0cpipelineUuid\"-\n\x13GetRunErrorResponse\x12\x16\n\x06output\x18\x01 \x01(\tR\x06output\"5\n\x0eGetLogsRequest\x12#\n\rpipeline_uuid\x18\x01 \x01(\tR\x0cpipelineUuid\")\n\x0fGetLogsResponse\x12\x16\n\x06output\x18\x01 \x01(\tR\x06output\"6\n\x0fGetGraphRequest\x12#\n\rpipeline_uuid\x18\x01 \x01(\tR\x0cpipelineUuid\"(\n\x10GetGraphResponse\x12\x14\n\x05graph\x18\x01 \x01(\tR\x05graph\"4\n\rCancelRequest\x12#\n\rpipeline_uuid\x18\x01 \x01(\tR\x0cpipelineUuid\"\x10\n\x0e\x43\x61ncelResponse\"\x94\x04\n\x11PrecompiledObject\x12\x1d\n\ncloud_path\x18\x01 \x01(\tR\tcloudPath\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12 \n\x0b\x64\x65scription\x18\x03 \x01(\tR\x0b\x64\x65scription\x12\x31\n\x04type\x18\x04 \x01(\x0e\x32\x1d.api.v1.PrecompiledObjectTypeR\x04type\x12)\n\x10pipeline_options\x18\x05 \x01(\tR\x0fpipelineOptions\x12\x12\n\x04link\x18\x06 \x01(\tR\x04link\x12\x1c\n\tmultifile\x18\x07 \x01(\x08R\tmultifile\x12!\n\x0c\x63ontext_line\x18\x08 \x01(\x05R\x0b\x63ontextLine\x12\'\n\x0f\x64\x65\x66\x61ult_example\x18\t \x01(\x08R\x0e\x64\x65\x66\x61ultExample\x12\x1d\n\x03sdk\x18\n \x01(\x0e\x32\x0b.api.v1.SdkR\x03sdk\x12\x32\n\ncomplexity\x18\x0b \x01(\x0e\x32\x12.api.v1.ComplexityR\ncomplexity\x12\x12\n\x04tags\x18\x0c \x03(\tR\x04tags\x12+\n\x08\x64\x61tasets\x18\r \x03(\x0b\x32\x0f.api.v1.DatasetR\x08\x64\x61tasets\x12\x17\n\x07url_vcs\x18\x0e \x01(\tR\x06urlVcs\x12!\n\x0curl_notebook\x18\x0f \x01(\tR\x0burlNotebook\"\xe5\x01\n\nCategories\x12\x1d\n\x03sdk\x18\x01 \x01(\x0e\x32\x0b.api.v1.SdkR\x03sdk\x12;\n\ncategories\x18\x02 \x03(\x0b\x32\x1b.api.v1.Categories.CategoryR\ncategories\x1a{\n\x08\x43\x61tegory\x12#\n\rcategory_name\x18\x01 \x01(\tR\x0c\x63\x61tegoryName\x12J\n\x13precompiled_objects\x18\x02 \x03(\x0b\x32\x19.api.v1.PrecompiledObjectR\x12precompiledObjects\"Y\n\x1cGetPrecompiledObjectsRequest\x12\x1d\n\x03sdk\x18\x01 \x01(\x0e\x32\x0b.api.v1.SdkR\x03sdk\x12\x1a\n\x08\x63\x61tegory\x18\x02 \x01(\tR\x08\x63\x61tegory\"<\n\x1bGetPrecompiledObjectRequest\x12\x1d\n\ncloud_path\x18\x01 \x01(\tR\tcloudPath\"@\n\x1fGetPrecompiledObjectCodeRequest\x12\x1d\n\ncloud_path\x18\x01 \x01(\tR\tcloudPath\"B\n!GetPrecompiledObjectOutputRequest\x12\x1d\n\ncloud_path\x18\x01 \x01(\tR\tcloudPath\"@\n\x1fGetPrecompiledObjectLogsRequest\x12\x1d\n\ncloud_path\x18\x01 \x01(\tR\tcloudPath\"A\n GetPrecompiledObjectGraphRequest\x12\x1d\n\ncloud_path\x18\x01 \x01(\tR\tcloudPath\"C\n\"GetDefaultPrecompiledObjectRequest\x12\x1d\n\x03sdk\x18\x01 \x01(\x0e\x32\x0b.api.v1.SdkR\x03sdk\"Z\n\x1dGetPrecompiledObjectsResponse\x12\x39\n\x0esdk_categories\x18\x01 \x03(\x0b\x32\x12.api.v1.CategoriesR\rsdkCategories\"h\n\x1cGetPrecompiledObjectResponse\x12H\n\x12precompiled_object\x18\x01 \x01(\x0b\x32\x19.api.v1.PrecompiledObjectR\x11precompiledObject\"a\n GetPrecompiledObjectCodeResponse\x12\x12\n\x04\x63ode\x18\x01 \x01(\tR\x04\x63ode\x12)\n\x05\x66iles\x18\x02 \x03(\x0b\x32\x13.api.v1.SnippetFileR\x05\x66iles\"<\n\"GetPrecompiledObjectOutputResponse\x12\x16\n\x06output\x18\x01 \x01(\tR\x06output\":\n GetPrecompiledObjectLogsResponse\x12\x16\n\x06output\x18\x01 \x01(\tR\x06output\"9\n!GetPrecompiledObjectGraphResponse\x12\x14\n\x05graph\x18\x01 \x01(\tR\x05graph\"o\n#GetDefaultPrecompiledObjectResponse\x12H\n\x12precompiled_object\x18\x01 \x01(\x0b\x32\x19.api.v1.PrecompiledObjectR\x11precompiledObject\"T\n\x0bSnippetFile\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x18\n\x07\x63ontent\x18\x02 \x01(\tR\x07\x63ontent\x12\x17\n\x07is_main\x18\x03 \x01(\x08R\x06isMain\"\xe6\x01\n\x12SaveSnippetRequest\x12)\n\x05\x66iles\x18\x01 \x03(\x0b\x32\x13.api.v1.SnippetFileR\x05\x66iles\x12\x1d\n\x03sdk\x18\x02 \x01(\x0e\x32\x0b.api.v1.SdkR\x03sdk\x12)\n\x10pipeline_options\x18\x03 \x01(\tR\x0fpipelineOptions\x12\x32\n\ncomplexity\x18\x04 \x01(\x0e\x32\x12.api.v1.ComplexityR\ncomplexity\x12\'\n\x0fpersistence_key\x18\x05 \x01(\tR\x0epersistenceKey\"%\n\x13SaveSnippetResponse\x12\x0e\n\x02id\x18\x01 \x01(\tR\x02id\"#\n\x11GetSnippetRequest\x12\x0e\n\x02id\x18\x01 \x01(\tR\x02id\"\xbd\x01\n\x12GetSnippetResponse\x12)\n\x05\x66iles\x18\x01 \x03(\x0b\x32\x13.api.v1.SnippetFileR\x05\x66iles\x12\x1d\n\x03sdk\x18\x02 \x01(\x0e\x32\x0b.api.v1.SdkR\x03sdk\x12)\n\x10pipeline_options\x18\x03 \x01(\tR\x0fpipelineOptions\x12\x32\n\ncomplexity\x18\x04 \x01(\x0e\x32\x12.api.v1.ComplexityR\ncomplexity*R\n\x03Sdk\x12\x13\n\x0fSDK_UNSPECIFIED\x10\x00\x12\x0c\n\x08SDK_JAVA\x10\x01\x12\n\n\x06SDK_GO\x10\x02\x12\x0e\n\nSDK_PYTHON\x10\x03\x12\x0c\n\x08SDK_SCIO\x10\x04*\xb8\x02\n\x06Status\x12\x16\n\x12STATUS_UNSPECIFIED\x10\x00\x12\x15\n\x11STATUS_VALIDATING\x10\x01\x12\x1b\n\x17STATUS_VALIDATION_ERROR\x10\x02\x12\x14\n\x10STATUS_PREPARING\x10\x03\x12\x1c\n\x18STATUS_PREPARATION_ERROR\x10\x04\x12\x14\n\x10STATUS_COMPILING\x10\x05\x12\x18\n\x14STATUS_COMPILE_ERROR\x10\x06\x12\x14\n\x10STATUS_EXECUTING\x10\x07\x12\x13\n\x0fSTATUS_FINISHED\x10\x08\x12\x14\n\x10STATUS_RUN_ERROR\x10\t\x12\x10\n\x0cSTATUS_ERROR\x10\n\x12\x16\n\x12STATUS_RUN_TIMEOUT\x10\x0b\x12\x13\n\x0fSTATUS_CANCELED\x10\x0c*\xae\x01\n\x15PrecompiledObjectType\x12\'\n#PRECOMPILED_OBJECT_TYPE_UNSPECIFIED\x10\x00\x12#\n\x1fPRECOMPILED_OBJECT_TYPE_EXAMPLE\x10\x01\x12 \n\x1cPRECOMPILED_OBJECT_TYPE_KATA\x10\x02\x12%\n!PRECOMPILED_OBJECT_TYPE_UNIT_TEST\x10\x03*n\n\nComplexity\x12\x1a\n\x16\x43OMPLEXITY_UNSPECIFIED\x10\x00\x12\x14\n\x10\x43OMPLEXITY_BASIC\x10\x01\x12\x15\n\x11\x43OMPLEXITY_MEDIUM\x10\x02\x12\x17\n\x13\x43OMPLEXITY_ADVANCED\x10\x03*F\n\x0c\x45mulatorType\x12\x1d\n\x19\x45MULATOR_TYPE_UNSPECIFIED\x10\x00\x12\x17\n\x13\x45MULATOR_TYPE_KAFKA\x10\x01\x32\x8b\r\n\x11PlaygroundService\x12:\n\x07RunCode\x12\x16.api.v1.RunCodeRequest\x1a\x17.api.v1.RunCodeResponse\x12\x46\n\x0b\x43heckStatus\x12\x1a.api.v1.CheckStatusRequest\x1a\x1b.api.v1.CheckStatusResponse\x12I\n\x0cGetRunOutput\x12\x1b.api.v1.GetRunOutputRequest\x1a\x1c.api.v1.GetRunOutputResponse\x12:\n\x07GetLogs\x12\x16.api.v1.GetLogsRequest\x1a\x17.api.v1.GetLogsResponse\x12=\n\x08GetGraph\x12\x17.api.v1.GetGraphRequest\x1a\x18.api.v1.GetGraphResponse\x12\x46\n\x0bGetRunError\x12\x1a.api.v1.GetRunErrorRequest\x1a\x1b.api.v1.GetRunErrorResponse\x12^\n\x13GetValidationOutput\x12\".api.v1.GetValidationOutputRequest\x1a#.api.v1.GetValidationOutputResponse\x12\x61\n\x14GetPreparationOutput\x12#.api.v1.GetPreparationOutputRequest\x1a$.api.v1.GetPreparationOutputResponse\x12U\n\x10GetCompileOutput\x12\x1f.api.v1.GetCompileOutputRequest\x1a .api.v1.GetCompileOutputResponse\x12\x37\n\x06\x43\x61ncel\x12\x15.api.v1.CancelRequest\x1a\x16.api.v1.CancelResponse\x12\x64\n\x15GetPrecompiledObjects\x12$.api.v1.GetPrecompiledObjectsRequest\x1a%.api.v1.GetPrecompiledObjectsResponse\x12\x61\n\x14GetPrecompiledObject\x12#.api.v1.GetPrecompiledObjectRequest\x1a$.api.v1.GetPrecompiledObjectResponse\x12m\n\x18GetPrecompiledObjectCode\x12\'.api.v1.GetPrecompiledObjectCodeRequest\x1a(.api.v1.GetPrecompiledObjectCodeResponse\x12s\n\x1aGetPrecompiledObjectOutput\x12).api.v1.GetPrecompiledObjectOutputRequest\x1a*.api.v1.GetPrecompiledObjectOutputResponse\x12m\n\x18GetPrecompiledObjectLogs\x12\'.api.v1.GetPrecompiledObjectLogsRequest\x1a(.api.v1.GetPrecompiledObjectLogsResponse\x12p\n\x19GetPrecompiledObjectGraph\x12(.api.v1.GetPrecompiledObjectGraphRequest\x1a).api.v1.GetPrecompiledObjectGraphResponse\x12v\n\x1bGetDefaultPrecompiledObject\x12*.api.v1.GetDefaultPrecompiledObjectRequest\x1a+.api.v1.GetDefaultPrecompiledObjectResponse\x12\x46\n\x0bSaveSnippet\x12\x1a.api.v1.SaveSnippetRequest\x1a\x1b.api.v1.SaveSnippetResponse\x12\x43\n\nGetSnippet\x12\x19.api.v1.GetSnippetRequest\x1a\x1a.api.v1.GetSnippetResponseB8Z6beam.apache.org/playground/backend/internal;playgroundb\x06proto3') -_PLAYGROUNDSERVICE = DESCRIPTOR.services_by_name['PlaygroundService'] +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'api.v1.api_pb2', globals()) if _descriptor._USE_C_DESCRIPTORS == False: DESCRIPTOR._options = None DESCRIPTOR._serialized_options = b'Z6beam.apache.org/playground/backend/internal;playground' _DATASET_OPTIONSENTRY._options = None _DATASET_OPTIONSENTRY._serialized_options = b'8\001' - _SDK._serialized_start=2961 - _SDK._serialized_end=3043 - _STATUS._serialized_start=3046 - _STATUS._serialized_end=3358 - _PRECOMPILEDOBJECTTYPE._serialized_start=3361 - _PRECOMPILEDOBJECTTYPE._serialized_end=3535 - _COMPLEXITY._serialized_start=3537 - _COMPLEXITY._serialized_end=3647 - _EMULATORTYPE._serialized_start=3649 - _EMULATORTYPE._serialized_end=3719 - _DATASET._serialized_start=22 - _DATASET._serialized_end=184 - _DATASET_OPTIONSENTRY._serialized_start=138 - _DATASET_OPTIONSENTRY._serialized_end=184 - _RUNCODEREQUEST._serialized_start=186 - _RUNCODEREQUEST._serialized_end=303 - _RUNCODERESPONSE._serialized_start=305 - _RUNCODERESPONSE._serialized_end=345 - _CHECKSTATUSREQUEST._serialized_start=347 - _CHECKSTATUSREQUEST._serialized_end=390 - _CHECKSTATUSRESPONSE._serialized_start=392 - _CHECKSTATUSRESPONSE._serialized_end=445 - _GETVALIDATIONOUTPUTREQUEST._serialized_start=447 - _GETVALIDATIONOUTPUTREQUEST._serialized_end=498 - _GETVALIDATIONOUTPUTRESPONSE._serialized_start=500 - _GETVALIDATIONOUTPUTRESPONSE._serialized_end=545 - _GETPREPARATIONOUTPUTREQUEST._serialized_start=547 - _GETPREPARATIONOUTPUTREQUEST._serialized_end=599 - _GETPREPARATIONOUTPUTRESPONSE._serialized_start=601 - _GETPREPARATIONOUTPUTRESPONSE._serialized_end=647 - _GETCOMPILEOUTPUTREQUEST._serialized_start=649 - _GETCOMPILEOUTPUTREQUEST._serialized_end=697 - _GETCOMPILEOUTPUTRESPONSE._serialized_start=699 - _GETCOMPILEOUTPUTRESPONSE._serialized_end=741 - _GETRUNOUTPUTREQUEST._serialized_start=743 - _GETRUNOUTPUTREQUEST._serialized_end=787 - _GETRUNOUTPUTRESPONSE._serialized_start=789 - _GETRUNOUTPUTRESPONSE._serialized_end=827 - _GETRUNERRORREQUEST._serialized_start=829 - _GETRUNERRORREQUEST._serialized_end=872 - _GETRUNERRORRESPONSE._serialized_start=874 - _GETRUNERRORRESPONSE._serialized_end=911 - _GETLOGSREQUEST._serialized_start=913 - _GETLOGSREQUEST._serialized_end=952 - _GETLOGSRESPONSE._serialized_start=954 - _GETLOGSRESPONSE._serialized_end=987 - _GETGRAPHREQUEST._serialized_start=989 - _GETGRAPHREQUEST._serialized_end=1029 - _GETGRAPHRESPONSE._serialized_start=1031 - _GETGRAPHRESPONSE._serialized_end=1064 - _CANCELREQUEST._serialized_start=1066 - _CANCELREQUEST._serialized_end=1104 - _CANCELRESPONSE._serialized_start=1106 - _CANCELRESPONSE._serialized_end=1122 - _PRECOMPILEDOBJECT._serialized_start=1125 - _PRECOMPILEDOBJECT._serialized_end=1465 - _CATEGORIES._serialized_start=1468 - _CATEGORIES._serialized_end=1646 - _CATEGORIES_CATEGORY._serialized_start=1557 - _CATEGORIES_CATEGORY._serialized_end=1646 - _GETPRECOMPILEDOBJECTSREQUEST._serialized_start=1648 - _GETPRECOMPILEDOBJECTSREQUEST._serialized_end=1722 - _GETPRECOMPILEDOBJECTREQUEST._serialized_start=1724 - _GETPRECOMPILEDOBJECTREQUEST._serialized_end=1773 - _GETPRECOMPILEDOBJECTCODEREQUEST._serialized_start=1775 - _GETPRECOMPILEDOBJECTCODEREQUEST._serialized_end=1828 - _GETPRECOMPILEDOBJECTOUTPUTREQUEST._serialized_start=1830 - _GETPRECOMPILEDOBJECTOUTPUTREQUEST._serialized_end=1885 - _GETPRECOMPILEDOBJECTLOGSREQUEST._serialized_start=1887 - _GETPRECOMPILEDOBJECTLOGSREQUEST._serialized_end=1940 - _GETPRECOMPILEDOBJECTGRAPHREQUEST._serialized_start=1942 - _GETPRECOMPILEDOBJECTGRAPHREQUEST._serialized_end=1996 - _GETDEFAULTPRECOMPILEDOBJECTREQUEST._serialized_start=1998 - _GETDEFAULTPRECOMPILEDOBJECTREQUEST._serialized_end=2060 - _GETPRECOMPILEDOBJECTSRESPONSE._serialized_start=2062 - _GETPRECOMPILEDOBJECTSRESPONSE._serialized_end=2137 - _GETPRECOMPILEDOBJECTRESPONSE._serialized_start=2139 - _GETPRECOMPILEDOBJECTRESPONSE._serialized_end=2224 - _GETPRECOMPILEDOBJECTCODERESPONSE._serialized_start=2226 - _GETPRECOMPILEDOBJECTCODERESPONSE._serialized_end=2274 - _GETPRECOMPILEDOBJECTOUTPUTRESPONSE._serialized_start=2276 - _GETPRECOMPILEDOBJECTOUTPUTRESPONSE._serialized_end=2328 - _GETPRECOMPILEDOBJECTLOGSRESPONSE._serialized_start=2330 - _GETPRECOMPILEDOBJECTLOGSRESPONSE._serialized_end=2380 - _GETPRECOMPILEDOBJECTGRAPHRESPONSE._serialized_start=2382 - _GETPRECOMPILEDOBJECTGRAPHRESPONSE._serialized_end=2432 - _GETDEFAULTPRECOMPILEDOBJECTRESPONSE._serialized_start=2434 - _GETDEFAULTPRECOMPILEDOBJECTRESPONSE._serialized_end=2526 - _SNIPPETFILE._serialized_start=2528 - _SNIPPETFILE._serialized_end=2589 - _SAVESNIPPETREQUEST._serialized_start=2592 - _SAVESNIPPETREQUEST._serialized_end=2740 - _SAVESNIPPETRESPONSE._serialized_start=2742 - _SAVESNIPPETRESPONSE._serialized_end=2775 - _GETSNIPPETREQUEST._serialized_start=2777 - _GETSNIPPETREQUEST._serialized_end=2808 - _GETSNIPPETRESPONSE._serialized_start=2811 - _GETSNIPPETRESPONSE._serialized_end=2959 - _PLAYGROUNDSERVICE._serialized_start=3722 - _PLAYGROUNDSERVICE._serialized_end=5397 + _SDK._serialized_start=3890 + _SDK._serialized_end=3972 + _STATUS._serialized_start=3975 + _STATUS._serialized_end=4287 + _PRECOMPILEDOBJECTTYPE._serialized_start=4290 + _PRECOMPILEDOBJECTTYPE._serialized_end=4464 + _COMPLEXITY._serialized_start=4466 + _COMPLEXITY._serialized_end=4576 + _EMULATORTYPE._serialized_start=4578 + _EMULATORTYPE._serialized_end=4648 + _DATASET._serialized_start=29 + _DATASET._serialized_end=231 + _DATASET_OPTIONSENTRY._serialized_start=173 + _DATASET_OPTIONSENTRY._serialized_end=231 + _RUNCODEREQUEST._serialized_start=234 + _RUNCODEREQUEST._serialized_end=432 + _RUNCODERESPONSE._serialized_start=434 + _RUNCODERESPONSE._serialized_end=488 + _CHECKSTATUSREQUEST._serialized_start=490 + _CHECKSTATUSREQUEST._serialized_end=547 + _CHECKSTATUSRESPONSE._serialized_start=549 + _CHECKSTATUSRESPONSE._serialized_end=610 + _GETVALIDATIONOUTPUTREQUEST._serialized_start=612 + _GETVALIDATIONOUTPUTREQUEST._serialized_end=677 + _GETVALIDATIONOUTPUTRESPONSE._serialized_start=679 + _GETVALIDATIONOUTPUTRESPONSE._serialized_end=732 + _GETPREPARATIONOUTPUTREQUEST._serialized_start=734 + _GETPREPARATIONOUTPUTREQUEST._serialized_end=800 + _GETPREPARATIONOUTPUTRESPONSE._serialized_start=802 + _GETPREPARATIONOUTPUTRESPONSE._serialized_end=856 + _GETCOMPILEOUTPUTREQUEST._serialized_start=858 + _GETCOMPILEOUTPUTREQUEST._serialized_end=920 + _GETCOMPILEOUTPUTRESPONSE._serialized_start=922 + _GETCOMPILEOUTPUTRESPONSE._serialized_end=972 + _GETRUNOUTPUTREQUEST._serialized_start=974 + _GETRUNOUTPUTREQUEST._serialized_end=1032 + _GETRUNOUTPUTRESPONSE._serialized_start=1034 + _GETRUNOUTPUTRESPONSE._serialized_end=1080 + _GETRUNERRORREQUEST._serialized_start=1082 + _GETRUNERRORREQUEST._serialized_end=1139 + _GETRUNERRORRESPONSE._serialized_start=1141 + _GETRUNERRORRESPONSE._serialized_end=1186 + _GETLOGSREQUEST._serialized_start=1188 + _GETLOGSREQUEST._serialized_end=1241 + _GETLOGSRESPONSE._serialized_start=1243 + _GETLOGSRESPONSE._serialized_end=1284 + _GETGRAPHREQUEST._serialized_start=1286 + _GETGRAPHREQUEST._serialized_end=1340 + _GETGRAPHRESPONSE._serialized_start=1342 + _GETGRAPHRESPONSE._serialized_end=1382 + _CANCELREQUEST._serialized_start=1384 + _CANCELREQUEST._serialized_end=1436 + _CANCELRESPONSE._serialized_start=1438 + _CANCELRESPONSE._serialized_end=1454 + _PRECOMPILEDOBJECT._serialized_start=1457 + _PRECOMPILEDOBJECT._serialized_end=1989 + _CATEGORIES._serialized_start=1992 + _CATEGORIES._serialized_end=2221 + _CATEGORIES_CATEGORY._serialized_start=2098 + _CATEGORIES_CATEGORY._serialized_end=2221 + _GETPRECOMPILEDOBJECTSREQUEST._serialized_start=2223 + _GETPRECOMPILEDOBJECTSREQUEST._serialized_end=2312 + _GETPRECOMPILEDOBJECTREQUEST._serialized_start=2314 + _GETPRECOMPILEDOBJECTREQUEST._serialized_end=2374 + _GETPRECOMPILEDOBJECTCODEREQUEST._serialized_start=2376 + _GETPRECOMPILEDOBJECTCODEREQUEST._serialized_end=2440 + _GETPRECOMPILEDOBJECTOUTPUTREQUEST._serialized_start=2442 + _GETPRECOMPILEDOBJECTOUTPUTREQUEST._serialized_end=2508 + _GETPRECOMPILEDOBJECTLOGSREQUEST._serialized_start=2510 + _GETPRECOMPILEDOBJECTLOGSREQUEST._serialized_end=2574 + _GETPRECOMPILEDOBJECTGRAPHREQUEST._serialized_start=2576 + _GETPRECOMPILEDOBJECTGRAPHREQUEST._serialized_end=2641 + _GETDEFAULTPRECOMPILEDOBJECTREQUEST._serialized_start=2643 + _GETDEFAULTPRECOMPILEDOBJECTREQUEST._serialized_end=2710 + _GETPRECOMPILEDOBJECTSRESPONSE._serialized_start=2712 + _GETPRECOMPILEDOBJECTSRESPONSE._serialized_end=2802 + _GETPRECOMPILEDOBJECTRESPONSE._serialized_start=2804 + _GETPRECOMPILEDOBJECTRESPONSE._serialized_end=2908 + _GETPRECOMPILEDOBJECTCODERESPONSE._serialized_start=2910 + _GETPRECOMPILEDOBJECTCODERESPONSE._serialized_end=3007 + _GETPRECOMPILEDOBJECTOUTPUTRESPONSE._serialized_start=3009 + _GETPRECOMPILEDOBJECTOUTPUTRESPONSE._serialized_end=3069 + _GETPRECOMPILEDOBJECTLOGSRESPONSE._serialized_start=3071 + _GETPRECOMPILEDOBJECTLOGSRESPONSE._serialized_end=3129 + _GETPRECOMPILEDOBJECTGRAPHRESPONSE._serialized_start=3131 + _GETPRECOMPILEDOBJECTGRAPHRESPONSE._serialized_end=3188 + _GETDEFAULTPRECOMPILEDOBJECTRESPONSE._serialized_start=3190 + _GETDEFAULTPRECOMPILEDOBJECTRESPONSE._serialized_end=3301 + _SNIPPETFILE._serialized_start=3303 + _SNIPPETFILE._serialized_end=3387 + _SAVESNIPPETREQUEST._serialized_start=3390 + _SAVESNIPPETREQUEST._serialized_end=3620 + _SAVESNIPPETRESPONSE._serialized_start=3622 + _SAVESNIPPETRESPONSE._serialized_end=3659 + _GETSNIPPETREQUEST._serialized_start=3661 + _GETSNIPPETREQUEST._serialized_end=3696 + _GETSNIPPETRESPONSE._serialized_start=3699 + _GETSNIPPETRESPONSE._serialized_end=3888 + _PLAYGROUNDSERVICE._serialized_start=4651 + _PLAYGROUNDSERVICE._serialized_end=6326 # @@protoc_insertion_point(module_scope) diff --git a/playground/infrastructure/api/v1/api_pb2.pyi b/playground/infrastructure/api/v1/api_pb2.pyi new file mode 100644 index 0000000000000..2c4d80ab8b424 --- /dev/null +++ b/playground/infrastructure/api/v1/api_pb2.pyi @@ -0,0 +1,378 @@ +from google.protobuf.internal import containers as _containers +from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from typing import ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Mapping, Optional as _Optional, Union as _Union + +COMPLEXITY_ADVANCED: Complexity +COMPLEXITY_BASIC: Complexity +COMPLEXITY_MEDIUM: Complexity +COMPLEXITY_UNSPECIFIED: Complexity +DESCRIPTOR: _descriptor.FileDescriptor +EMULATOR_TYPE_KAFKA: EmulatorType +EMULATOR_TYPE_UNSPECIFIED: EmulatorType +PRECOMPILED_OBJECT_TYPE_EXAMPLE: PrecompiledObjectType +PRECOMPILED_OBJECT_TYPE_KATA: PrecompiledObjectType +PRECOMPILED_OBJECT_TYPE_UNIT_TEST: PrecompiledObjectType +PRECOMPILED_OBJECT_TYPE_UNSPECIFIED: PrecompiledObjectType +SDK_GO: Sdk +SDK_JAVA: Sdk +SDK_PYTHON: Sdk +SDK_SCIO: Sdk +SDK_UNSPECIFIED: Sdk +STATUS_CANCELED: Status +STATUS_COMPILE_ERROR: Status +STATUS_COMPILING: Status +STATUS_ERROR: Status +STATUS_EXECUTING: Status +STATUS_FINISHED: Status +STATUS_PREPARATION_ERROR: Status +STATUS_PREPARING: Status +STATUS_RUN_ERROR: Status +STATUS_RUN_TIMEOUT: Status +STATUS_UNSPECIFIED: Status +STATUS_VALIDATING: Status +STATUS_VALIDATION_ERROR: Status + +class CancelRequest(_message.Message): + __slots__ = ["pipeline_uuid"] + PIPELINE_UUID_FIELD_NUMBER: _ClassVar[int] + pipeline_uuid: str + def __init__(self, pipeline_uuid: _Optional[str] = ...) -> None: ... + +class CancelResponse(_message.Message): + __slots__ = [] + def __init__(self) -> None: ... + +class Categories(_message.Message): + __slots__ = ["categories", "sdk"] + class Category(_message.Message): + __slots__ = ["category_name", "precompiled_objects"] + CATEGORY_NAME_FIELD_NUMBER: _ClassVar[int] + PRECOMPILED_OBJECTS_FIELD_NUMBER: _ClassVar[int] + category_name: str + precompiled_objects: _containers.RepeatedCompositeFieldContainer[PrecompiledObject] + def __init__(self, category_name: _Optional[str] = ..., precompiled_objects: _Optional[_Iterable[_Union[PrecompiledObject, _Mapping]]] = ...) -> None: ... + CATEGORIES_FIELD_NUMBER: _ClassVar[int] + SDK_FIELD_NUMBER: _ClassVar[int] + categories: _containers.RepeatedCompositeFieldContainer[Categories.Category] + sdk: Sdk + def __init__(self, sdk: _Optional[_Union[Sdk, str]] = ..., categories: _Optional[_Iterable[_Union[Categories.Category, _Mapping]]] = ...) -> None: ... + +class CheckStatusRequest(_message.Message): + __slots__ = ["pipeline_uuid"] + PIPELINE_UUID_FIELD_NUMBER: _ClassVar[int] + pipeline_uuid: str + def __init__(self, pipeline_uuid: _Optional[str] = ...) -> None: ... + +class CheckStatusResponse(_message.Message): + __slots__ = ["status"] + STATUS_FIELD_NUMBER: _ClassVar[int] + status: Status + def __init__(self, status: _Optional[_Union[Status, str]] = ...) -> None: ... + +class Dataset(_message.Message): + __slots__ = ["dataset_path", "options", "type"] + class OptionsEntry(_message.Message): + __slots__ = ["key", "value"] + KEY_FIELD_NUMBER: _ClassVar[int] + VALUE_FIELD_NUMBER: _ClassVar[int] + key: str + value: str + def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ... + DATASET_PATH_FIELD_NUMBER: _ClassVar[int] + OPTIONS_FIELD_NUMBER: _ClassVar[int] + TYPE_FIELD_NUMBER: _ClassVar[int] + dataset_path: str + options: _containers.ScalarMap[str, str] + type: EmulatorType + def __init__(self, type: _Optional[_Union[EmulatorType, str]] = ..., options: _Optional[_Mapping[str, str]] = ..., dataset_path: _Optional[str] = ...) -> None: ... + +class GetCompileOutputRequest(_message.Message): + __slots__ = ["pipeline_uuid"] + PIPELINE_UUID_FIELD_NUMBER: _ClassVar[int] + pipeline_uuid: str + def __init__(self, pipeline_uuid: _Optional[str] = ...) -> None: ... + +class GetCompileOutputResponse(_message.Message): + __slots__ = ["output"] + OUTPUT_FIELD_NUMBER: _ClassVar[int] + output: str + def __init__(self, output: _Optional[str] = ...) -> None: ... + +class GetDefaultPrecompiledObjectRequest(_message.Message): + __slots__ = ["sdk"] + SDK_FIELD_NUMBER: _ClassVar[int] + sdk: Sdk + def __init__(self, sdk: _Optional[_Union[Sdk, str]] = ...) -> None: ... + +class GetDefaultPrecompiledObjectResponse(_message.Message): + __slots__ = ["precompiled_object"] + PRECOMPILED_OBJECT_FIELD_NUMBER: _ClassVar[int] + precompiled_object: PrecompiledObject + def __init__(self, precompiled_object: _Optional[_Union[PrecompiledObject, _Mapping]] = ...) -> None: ... + +class GetGraphRequest(_message.Message): + __slots__ = ["pipeline_uuid"] + PIPELINE_UUID_FIELD_NUMBER: _ClassVar[int] + pipeline_uuid: str + def __init__(self, pipeline_uuid: _Optional[str] = ...) -> None: ... + +class GetGraphResponse(_message.Message): + __slots__ = ["graph"] + GRAPH_FIELD_NUMBER: _ClassVar[int] + graph: str + def __init__(self, graph: _Optional[str] = ...) -> None: ... + +class GetLogsRequest(_message.Message): + __slots__ = ["pipeline_uuid"] + PIPELINE_UUID_FIELD_NUMBER: _ClassVar[int] + pipeline_uuid: str + def __init__(self, pipeline_uuid: _Optional[str] = ...) -> None: ... + +class GetLogsResponse(_message.Message): + __slots__ = ["output"] + OUTPUT_FIELD_NUMBER: _ClassVar[int] + output: str + def __init__(self, output: _Optional[str] = ...) -> None: ... + +class GetPrecompiledObjectCodeRequest(_message.Message): + __slots__ = ["cloud_path"] + CLOUD_PATH_FIELD_NUMBER: _ClassVar[int] + cloud_path: str + def __init__(self, cloud_path: _Optional[str] = ...) -> None: ... + +class GetPrecompiledObjectCodeResponse(_message.Message): + __slots__ = ["code", "files"] + CODE_FIELD_NUMBER: _ClassVar[int] + FILES_FIELD_NUMBER: _ClassVar[int] + code: str + files: _containers.RepeatedCompositeFieldContainer[SnippetFile] + def __init__(self, code: _Optional[str] = ..., files: _Optional[_Iterable[_Union[SnippetFile, _Mapping]]] = ...) -> None: ... + +class GetPrecompiledObjectGraphRequest(_message.Message): + __slots__ = ["cloud_path"] + CLOUD_PATH_FIELD_NUMBER: _ClassVar[int] + cloud_path: str + def __init__(self, cloud_path: _Optional[str] = ...) -> None: ... + +class GetPrecompiledObjectGraphResponse(_message.Message): + __slots__ = ["graph"] + GRAPH_FIELD_NUMBER: _ClassVar[int] + graph: str + def __init__(self, graph: _Optional[str] = ...) -> None: ... + +class GetPrecompiledObjectLogsRequest(_message.Message): + __slots__ = ["cloud_path"] + CLOUD_PATH_FIELD_NUMBER: _ClassVar[int] + cloud_path: str + def __init__(self, cloud_path: _Optional[str] = ...) -> None: ... + +class GetPrecompiledObjectLogsResponse(_message.Message): + __slots__ = ["output"] + OUTPUT_FIELD_NUMBER: _ClassVar[int] + output: str + def __init__(self, output: _Optional[str] = ...) -> None: ... + +class GetPrecompiledObjectOutputRequest(_message.Message): + __slots__ = ["cloud_path"] + CLOUD_PATH_FIELD_NUMBER: _ClassVar[int] + cloud_path: str + def __init__(self, cloud_path: _Optional[str] = ...) -> None: ... + +class GetPrecompiledObjectOutputResponse(_message.Message): + __slots__ = ["output"] + OUTPUT_FIELD_NUMBER: _ClassVar[int] + output: str + def __init__(self, output: _Optional[str] = ...) -> None: ... + +class GetPrecompiledObjectRequest(_message.Message): + __slots__ = ["cloud_path"] + CLOUD_PATH_FIELD_NUMBER: _ClassVar[int] + cloud_path: str + def __init__(self, cloud_path: _Optional[str] = ...) -> None: ... + +class GetPrecompiledObjectResponse(_message.Message): + __slots__ = ["precompiled_object"] + PRECOMPILED_OBJECT_FIELD_NUMBER: _ClassVar[int] + precompiled_object: PrecompiledObject + def __init__(self, precompiled_object: _Optional[_Union[PrecompiledObject, _Mapping]] = ...) -> None: ... + +class GetPrecompiledObjectsRequest(_message.Message): + __slots__ = ["category", "sdk"] + CATEGORY_FIELD_NUMBER: _ClassVar[int] + SDK_FIELD_NUMBER: _ClassVar[int] + category: str + sdk: Sdk + def __init__(self, sdk: _Optional[_Union[Sdk, str]] = ..., category: _Optional[str] = ...) -> None: ... + +class GetPrecompiledObjectsResponse(_message.Message): + __slots__ = ["sdk_categories"] + SDK_CATEGORIES_FIELD_NUMBER: _ClassVar[int] + sdk_categories: _containers.RepeatedCompositeFieldContainer[Categories] + def __init__(self, sdk_categories: _Optional[_Iterable[_Union[Categories, _Mapping]]] = ...) -> None: ... + +class GetPreparationOutputRequest(_message.Message): + __slots__ = ["pipeline_uuid"] + PIPELINE_UUID_FIELD_NUMBER: _ClassVar[int] + pipeline_uuid: str + def __init__(self, pipeline_uuid: _Optional[str] = ...) -> None: ... + +class GetPreparationOutputResponse(_message.Message): + __slots__ = ["output"] + OUTPUT_FIELD_NUMBER: _ClassVar[int] + output: str + def __init__(self, output: _Optional[str] = ...) -> None: ... + +class GetRunErrorRequest(_message.Message): + __slots__ = ["pipeline_uuid"] + PIPELINE_UUID_FIELD_NUMBER: _ClassVar[int] + pipeline_uuid: str + def __init__(self, pipeline_uuid: _Optional[str] = ...) -> None: ... + +class GetRunErrorResponse(_message.Message): + __slots__ = ["output"] + OUTPUT_FIELD_NUMBER: _ClassVar[int] + output: str + def __init__(self, output: _Optional[str] = ...) -> None: ... + +class GetRunOutputRequest(_message.Message): + __slots__ = ["pipeline_uuid"] + PIPELINE_UUID_FIELD_NUMBER: _ClassVar[int] + pipeline_uuid: str + def __init__(self, pipeline_uuid: _Optional[str] = ...) -> None: ... + +class GetRunOutputResponse(_message.Message): + __slots__ = ["output"] + OUTPUT_FIELD_NUMBER: _ClassVar[int] + output: str + def __init__(self, output: _Optional[str] = ...) -> None: ... + +class GetSnippetRequest(_message.Message): + __slots__ = ["id"] + ID_FIELD_NUMBER: _ClassVar[int] + id: str + def __init__(self, id: _Optional[str] = ...) -> None: ... + +class GetSnippetResponse(_message.Message): + __slots__ = ["complexity", "files", "pipeline_options", "sdk"] + COMPLEXITY_FIELD_NUMBER: _ClassVar[int] + FILES_FIELD_NUMBER: _ClassVar[int] + PIPELINE_OPTIONS_FIELD_NUMBER: _ClassVar[int] + SDK_FIELD_NUMBER: _ClassVar[int] + complexity: Complexity + files: _containers.RepeatedCompositeFieldContainer[SnippetFile] + pipeline_options: str + sdk: Sdk + def __init__(self, files: _Optional[_Iterable[_Union[SnippetFile, _Mapping]]] = ..., sdk: _Optional[_Union[Sdk, str]] = ..., pipeline_options: _Optional[str] = ..., complexity: _Optional[_Union[Complexity, str]] = ...) -> None: ... + +class GetValidationOutputRequest(_message.Message): + __slots__ = ["pipeline_uuid"] + PIPELINE_UUID_FIELD_NUMBER: _ClassVar[int] + pipeline_uuid: str + def __init__(self, pipeline_uuid: _Optional[str] = ...) -> None: ... + +class GetValidationOutputResponse(_message.Message): + __slots__ = ["output"] + OUTPUT_FIELD_NUMBER: _ClassVar[int] + output: str + def __init__(self, output: _Optional[str] = ...) -> None: ... + +class PrecompiledObject(_message.Message): + __slots__ = ["cloud_path", "complexity", "context_line", "datasets", "default_example", "description", "link", "multifile", "name", "pipeline_options", "sdk", "tags", "type", "url_notebook", "url_vcs"] + CLOUD_PATH_FIELD_NUMBER: _ClassVar[int] + COMPLEXITY_FIELD_NUMBER: _ClassVar[int] + CONTEXT_LINE_FIELD_NUMBER: _ClassVar[int] + DATASETS_FIELD_NUMBER: _ClassVar[int] + DEFAULT_EXAMPLE_FIELD_NUMBER: _ClassVar[int] + DESCRIPTION_FIELD_NUMBER: _ClassVar[int] + LINK_FIELD_NUMBER: _ClassVar[int] + MULTIFILE_FIELD_NUMBER: _ClassVar[int] + NAME_FIELD_NUMBER: _ClassVar[int] + PIPELINE_OPTIONS_FIELD_NUMBER: _ClassVar[int] + SDK_FIELD_NUMBER: _ClassVar[int] + TAGS_FIELD_NUMBER: _ClassVar[int] + TYPE_FIELD_NUMBER: _ClassVar[int] + URL_NOTEBOOK_FIELD_NUMBER: _ClassVar[int] + URL_VCS_FIELD_NUMBER: _ClassVar[int] + cloud_path: str + complexity: Complexity + context_line: int + datasets: _containers.RepeatedCompositeFieldContainer[Dataset] + default_example: bool + description: str + link: str + multifile: bool + name: str + pipeline_options: str + sdk: Sdk + tags: _containers.RepeatedScalarFieldContainer[str] + type: PrecompiledObjectType + url_notebook: str + url_vcs: str + def __init__(self, cloud_path: _Optional[str] = ..., name: _Optional[str] = ..., description: _Optional[str] = ..., type: _Optional[_Union[PrecompiledObjectType, str]] = ..., pipeline_options: _Optional[str] = ..., link: _Optional[str] = ..., multifile: bool = ..., context_line: _Optional[int] = ..., default_example: bool = ..., sdk: _Optional[_Union[Sdk, str]] = ..., complexity: _Optional[_Union[Complexity, str]] = ..., tags: _Optional[_Iterable[str]] = ..., datasets: _Optional[_Iterable[_Union[Dataset, _Mapping]]] = ..., url_vcs: _Optional[str] = ..., url_notebook: _Optional[str] = ...) -> None: ... + +class RunCodeRequest(_message.Message): + __slots__ = ["code", "datasets", "files", "pipeline_options", "sdk"] + CODE_FIELD_NUMBER: _ClassVar[int] + DATASETS_FIELD_NUMBER: _ClassVar[int] + FILES_FIELD_NUMBER: _ClassVar[int] + PIPELINE_OPTIONS_FIELD_NUMBER: _ClassVar[int] + SDK_FIELD_NUMBER: _ClassVar[int] + code: str + datasets: _containers.RepeatedCompositeFieldContainer[Dataset] + files: _containers.RepeatedCompositeFieldContainer[SnippetFile] + pipeline_options: str + sdk: Sdk + def __init__(self, code: _Optional[str] = ..., sdk: _Optional[_Union[Sdk, str]] = ..., pipeline_options: _Optional[str] = ..., datasets: _Optional[_Iterable[_Union[Dataset, _Mapping]]] = ..., files: _Optional[_Iterable[_Union[SnippetFile, _Mapping]]] = ...) -> None: ... + +class RunCodeResponse(_message.Message): + __slots__ = ["pipeline_uuid"] + PIPELINE_UUID_FIELD_NUMBER: _ClassVar[int] + pipeline_uuid: str + def __init__(self, pipeline_uuid: _Optional[str] = ...) -> None: ... + +class SaveSnippetRequest(_message.Message): + __slots__ = ["complexity", "files", "persistence_key", "pipeline_options", "sdk"] + COMPLEXITY_FIELD_NUMBER: _ClassVar[int] + FILES_FIELD_NUMBER: _ClassVar[int] + PERSISTENCE_KEY_FIELD_NUMBER: _ClassVar[int] + PIPELINE_OPTIONS_FIELD_NUMBER: _ClassVar[int] + SDK_FIELD_NUMBER: _ClassVar[int] + complexity: Complexity + files: _containers.RepeatedCompositeFieldContainer[SnippetFile] + persistence_key: str + pipeline_options: str + sdk: Sdk + def __init__(self, files: _Optional[_Iterable[_Union[SnippetFile, _Mapping]]] = ..., sdk: _Optional[_Union[Sdk, str]] = ..., pipeline_options: _Optional[str] = ..., complexity: _Optional[_Union[Complexity, str]] = ..., persistence_key: _Optional[str] = ...) -> None: ... + +class SaveSnippetResponse(_message.Message): + __slots__ = ["id"] + ID_FIELD_NUMBER: _ClassVar[int] + id: str + def __init__(self, id: _Optional[str] = ...) -> None: ... + +class SnippetFile(_message.Message): + __slots__ = ["content", "is_main", "name"] + CONTENT_FIELD_NUMBER: _ClassVar[int] + IS_MAIN_FIELD_NUMBER: _ClassVar[int] + NAME_FIELD_NUMBER: _ClassVar[int] + content: str + is_main: bool + name: str + def __init__(self, name: _Optional[str] = ..., content: _Optional[str] = ..., is_main: bool = ...) -> None: ... + +class Sdk(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): + __slots__ = [] + +class Status(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): + __slots__ = [] + +class PrecompiledObjectType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): + __slots__ = [] + +class Complexity(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): + __slots__ = [] + +class EmulatorType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): + __slots__ = [] diff --git a/playground/infrastructure/api/v1/api_pb2_grpc.py b/playground/infrastructure/api/v1/api_pb2_grpc.py index 2f5b7f4d9d2d8..108d5b45e7c6d 100644 --- a/playground/infrastructure/api/v1/api_pb2_grpc.py +++ b/playground/infrastructure/api/v1/api_pb2_grpc.py @@ -1,23 +1,8 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! """Client and server classes corresponding to protobuf-defined services.""" import grpc -from api.v1 import api_pb2 as api__pb2 +from api.v1 import api_pb2 as api_dot_v1_dot_api__pb2 class PlaygroundServiceStub(object): @@ -31,88 +16,98 @@ def __init__(self, channel): """ self.RunCode = channel.unary_unary( '/api.v1.PlaygroundService/RunCode', - request_serializer=api__pb2.RunCodeRequest.SerializeToString, - response_deserializer=api__pb2.RunCodeResponse.FromString, + request_serializer=api_dot_v1_dot_api__pb2.RunCodeRequest.SerializeToString, + response_deserializer=api_dot_v1_dot_api__pb2.RunCodeResponse.FromString, ) self.CheckStatus = channel.unary_unary( '/api.v1.PlaygroundService/CheckStatus', - request_serializer=api__pb2.CheckStatusRequest.SerializeToString, - response_deserializer=api__pb2.CheckStatusResponse.FromString, + request_serializer=api_dot_v1_dot_api__pb2.CheckStatusRequest.SerializeToString, + response_deserializer=api_dot_v1_dot_api__pb2.CheckStatusResponse.FromString, ) self.GetRunOutput = channel.unary_unary( '/api.v1.PlaygroundService/GetRunOutput', - request_serializer=api__pb2.GetRunOutputRequest.SerializeToString, - response_deserializer=api__pb2.GetRunOutputResponse.FromString, + request_serializer=api_dot_v1_dot_api__pb2.GetRunOutputRequest.SerializeToString, + response_deserializer=api_dot_v1_dot_api__pb2.GetRunOutputResponse.FromString, ) self.GetLogs = channel.unary_unary( '/api.v1.PlaygroundService/GetLogs', - request_serializer=api__pb2.GetLogsRequest.SerializeToString, - response_deserializer=api__pb2.GetLogsResponse.FromString, + request_serializer=api_dot_v1_dot_api__pb2.GetLogsRequest.SerializeToString, + response_deserializer=api_dot_v1_dot_api__pb2.GetLogsResponse.FromString, ) self.GetGraph = channel.unary_unary( '/api.v1.PlaygroundService/GetGraph', - request_serializer=api__pb2.GetGraphRequest.SerializeToString, - response_deserializer=api__pb2.GetGraphResponse.FromString, + request_serializer=api_dot_v1_dot_api__pb2.GetGraphRequest.SerializeToString, + response_deserializer=api_dot_v1_dot_api__pb2.GetGraphResponse.FromString, ) self.GetRunError = channel.unary_unary( '/api.v1.PlaygroundService/GetRunError', - request_serializer=api__pb2.GetRunErrorRequest.SerializeToString, - response_deserializer=api__pb2.GetRunErrorResponse.FromString, + request_serializer=api_dot_v1_dot_api__pb2.GetRunErrorRequest.SerializeToString, + response_deserializer=api_dot_v1_dot_api__pb2.GetRunErrorResponse.FromString, ) self.GetValidationOutput = channel.unary_unary( '/api.v1.PlaygroundService/GetValidationOutput', - request_serializer=api__pb2.GetValidationOutputRequest.SerializeToString, - response_deserializer=api__pb2.GetValidationOutputResponse.FromString, + request_serializer=api_dot_v1_dot_api__pb2.GetValidationOutputRequest.SerializeToString, + response_deserializer=api_dot_v1_dot_api__pb2.GetValidationOutputResponse.FromString, ) self.GetPreparationOutput = channel.unary_unary( '/api.v1.PlaygroundService/GetPreparationOutput', - request_serializer=api__pb2.GetPreparationOutputRequest.SerializeToString, - response_deserializer=api__pb2.GetPreparationOutputResponse.FromString, + request_serializer=api_dot_v1_dot_api__pb2.GetPreparationOutputRequest.SerializeToString, + response_deserializer=api_dot_v1_dot_api__pb2.GetPreparationOutputResponse.FromString, ) self.GetCompileOutput = channel.unary_unary( '/api.v1.PlaygroundService/GetCompileOutput', - request_serializer=api__pb2.GetCompileOutputRequest.SerializeToString, - response_deserializer=api__pb2.GetCompileOutputResponse.FromString, + request_serializer=api_dot_v1_dot_api__pb2.GetCompileOutputRequest.SerializeToString, + response_deserializer=api_dot_v1_dot_api__pb2.GetCompileOutputResponse.FromString, ) self.Cancel = channel.unary_unary( '/api.v1.PlaygroundService/Cancel', - request_serializer=api__pb2.CancelRequest.SerializeToString, - response_deserializer=api__pb2.CancelResponse.FromString, + request_serializer=api_dot_v1_dot_api__pb2.CancelRequest.SerializeToString, + response_deserializer=api_dot_v1_dot_api__pb2.CancelResponse.FromString, ) self.GetPrecompiledObjects = channel.unary_unary( '/api.v1.PlaygroundService/GetPrecompiledObjects', - request_serializer=api__pb2.GetPrecompiledObjectsRequest.SerializeToString, - response_deserializer=api__pb2.GetPrecompiledObjectsResponse.FromString, + request_serializer=api_dot_v1_dot_api__pb2.GetPrecompiledObjectsRequest.SerializeToString, + response_deserializer=api_dot_v1_dot_api__pb2.GetPrecompiledObjectsResponse.FromString, ) self.GetPrecompiledObject = channel.unary_unary( '/api.v1.PlaygroundService/GetPrecompiledObject', - request_serializer=api__pb2.GetPrecompiledObjectRequest.SerializeToString, - response_deserializer=api__pb2.GetPrecompiledObjectResponse.FromString, + request_serializer=api_dot_v1_dot_api__pb2.GetPrecompiledObjectRequest.SerializeToString, + response_deserializer=api_dot_v1_dot_api__pb2.GetPrecompiledObjectResponse.FromString, ) self.GetPrecompiledObjectCode = channel.unary_unary( '/api.v1.PlaygroundService/GetPrecompiledObjectCode', - request_serializer=api__pb2.GetPrecompiledObjectCodeRequest.SerializeToString, - response_deserializer=api__pb2.GetPrecompiledObjectCodeResponse.FromString, + request_serializer=api_dot_v1_dot_api__pb2.GetPrecompiledObjectCodeRequest.SerializeToString, + response_deserializer=api_dot_v1_dot_api__pb2.GetPrecompiledObjectCodeResponse.FromString, ) self.GetPrecompiledObjectOutput = channel.unary_unary( '/api.v1.PlaygroundService/GetPrecompiledObjectOutput', - request_serializer=api__pb2.GetPrecompiledObjectOutputRequest.SerializeToString, - response_deserializer=api__pb2.GetPrecompiledObjectOutputResponse.FromString, + request_serializer=api_dot_v1_dot_api__pb2.GetPrecompiledObjectOutputRequest.SerializeToString, + response_deserializer=api_dot_v1_dot_api__pb2.GetPrecompiledObjectOutputResponse.FromString, ) self.GetPrecompiledObjectLogs = channel.unary_unary( '/api.v1.PlaygroundService/GetPrecompiledObjectLogs', - request_serializer=api__pb2.GetPrecompiledObjectLogsRequest.SerializeToString, - response_deserializer=api__pb2.GetPrecompiledObjectLogsResponse.FromString, + request_serializer=api_dot_v1_dot_api__pb2.GetPrecompiledObjectLogsRequest.SerializeToString, + response_deserializer=api_dot_v1_dot_api__pb2.GetPrecompiledObjectLogsResponse.FromString, ) self.GetPrecompiledObjectGraph = channel.unary_unary( '/api.v1.PlaygroundService/GetPrecompiledObjectGraph', - request_serializer=api__pb2.GetPrecompiledObjectGraphRequest.SerializeToString, - response_deserializer=api__pb2.GetPrecompiledObjectGraphResponse.FromString, + request_serializer=api_dot_v1_dot_api__pb2.GetPrecompiledObjectGraphRequest.SerializeToString, + response_deserializer=api_dot_v1_dot_api__pb2.GetPrecompiledObjectGraphResponse.FromString, ) self.GetDefaultPrecompiledObject = channel.unary_unary( '/api.v1.PlaygroundService/GetDefaultPrecompiledObject', - request_serializer=api__pb2.GetDefaultPrecompiledObjectRequest.SerializeToString, - response_deserializer=api__pb2.GetDefaultPrecompiledObjectResponse.FromString, + request_serializer=api_dot_v1_dot_api__pb2.GetDefaultPrecompiledObjectRequest.SerializeToString, + response_deserializer=api_dot_v1_dot_api__pb2.GetDefaultPrecompiledObjectResponse.FromString, + ) + self.SaveSnippet = channel.unary_unary( + '/api.v1.PlaygroundService/SaveSnippet', + request_serializer=api_dot_v1_dot_api__pb2.SaveSnippetRequest.SerializeToString, + response_deserializer=api_dot_v1_dot_api__pb2.SaveSnippetResponse.FromString, + ) + self.GetSnippet = channel.unary_unary( + '/api.v1.PlaygroundService/GetSnippet', + request_serializer=api_dot_v1_dot_api__pb2.GetSnippetRequest.SerializeToString, + response_deserializer=api_dot_v1_dot_api__pb2.GetSnippetResponse.FromString, ) @@ -190,14 +185,14 @@ def Cancel(self, request, context): raise NotImplementedError('Method not implemented!') def GetPrecompiledObjects(self, request, context): - """Get all precompiled objects from the cloud storage. + """Get all precompiled objects from the cloud datastore. """ context.set_code(grpc.StatusCode.UNIMPLEMENTED) context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') def GetPrecompiledObject(self, request, context): - """Get precompiled object from the cloud storage. + """Get precompiled object from the cloud datastore. """ context.set_code(grpc.StatusCode.UNIMPLEMENTED) context.set_details('Method not implemented!') @@ -238,93 +233,117 @@ def GetDefaultPrecompiledObject(self, request, context): context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') + def SaveSnippet(self, request, context): + """Save the snippet required for the sharing. + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def GetSnippet(self, request, context): + """Get the snippet of playground. + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + def add_PlaygroundServiceServicer_to_server(servicer, server): rpc_method_handlers = { 'RunCode': grpc.unary_unary_rpc_method_handler( servicer.RunCode, - request_deserializer=api__pb2.RunCodeRequest.FromString, - response_serializer=api__pb2.RunCodeResponse.SerializeToString, + request_deserializer=api_dot_v1_dot_api__pb2.RunCodeRequest.FromString, + response_serializer=api_dot_v1_dot_api__pb2.RunCodeResponse.SerializeToString, ), 'CheckStatus': grpc.unary_unary_rpc_method_handler( servicer.CheckStatus, - request_deserializer=api__pb2.CheckStatusRequest.FromString, - response_serializer=api__pb2.CheckStatusResponse.SerializeToString, + request_deserializer=api_dot_v1_dot_api__pb2.CheckStatusRequest.FromString, + response_serializer=api_dot_v1_dot_api__pb2.CheckStatusResponse.SerializeToString, ), 'GetRunOutput': grpc.unary_unary_rpc_method_handler( servicer.GetRunOutput, - request_deserializer=api__pb2.GetRunOutputRequest.FromString, - response_serializer=api__pb2.GetRunOutputResponse.SerializeToString, + request_deserializer=api_dot_v1_dot_api__pb2.GetRunOutputRequest.FromString, + response_serializer=api_dot_v1_dot_api__pb2.GetRunOutputResponse.SerializeToString, ), 'GetLogs': grpc.unary_unary_rpc_method_handler( servicer.GetLogs, - request_deserializer=api__pb2.GetLogsRequest.FromString, - response_serializer=api__pb2.GetLogsResponse.SerializeToString, + request_deserializer=api_dot_v1_dot_api__pb2.GetLogsRequest.FromString, + response_serializer=api_dot_v1_dot_api__pb2.GetLogsResponse.SerializeToString, ), 'GetGraph': grpc.unary_unary_rpc_method_handler( servicer.GetGraph, - request_deserializer=api__pb2.GetGraphRequest.FromString, - response_serializer=api__pb2.GetGraphResponse.SerializeToString, + request_deserializer=api_dot_v1_dot_api__pb2.GetGraphRequest.FromString, + response_serializer=api_dot_v1_dot_api__pb2.GetGraphResponse.SerializeToString, ), 'GetRunError': grpc.unary_unary_rpc_method_handler( servicer.GetRunError, - request_deserializer=api__pb2.GetRunErrorRequest.FromString, - response_serializer=api__pb2.GetRunErrorResponse.SerializeToString, + request_deserializer=api_dot_v1_dot_api__pb2.GetRunErrorRequest.FromString, + response_serializer=api_dot_v1_dot_api__pb2.GetRunErrorResponse.SerializeToString, ), 'GetValidationOutput': grpc.unary_unary_rpc_method_handler( servicer.GetValidationOutput, - request_deserializer=api__pb2.GetValidationOutputRequest.FromString, - response_serializer=api__pb2.GetValidationOutputResponse.SerializeToString, + request_deserializer=api_dot_v1_dot_api__pb2.GetValidationOutputRequest.FromString, + response_serializer=api_dot_v1_dot_api__pb2.GetValidationOutputResponse.SerializeToString, ), 'GetPreparationOutput': grpc.unary_unary_rpc_method_handler( servicer.GetPreparationOutput, - request_deserializer=api__pb2.GetPreparationOutputRequest.FromString, - response_serializer=api__pb2.GetPreparationOutputResponse.SerializeToString, + request_deserializer=api_dot_v1_dot_api__pb2.GetPreparationOutputRequest.FromString, + response_serializer=api_dot_v1_dot_api__pb2.GetPreparationOutputResponse.SerializeToString, ), 'GetCompileOutput': grpc.unary_unary_rpc_method_handler( servicer.GetCompileOutput, - request_deserializer=api__pb2.GetCompileOutputRequest.FromString, - response_serializer=api__pb2.GetCompileOutputResponse.SerializeToString, + request_deserializer=api_dot_v1_dot_api__pb2.GetCompileOutputRequest.FromString, + response_serializer=api_dot_v1_dot_api__pb2.GetCompileOutputResponse.SerializeToString, ), 'Cancel': grpc.unary_unary_rpc_method_handler( servicer.Cancel, - request_deserializer=api__pb2.CancelRequest.FromString, - response_serializer=api__pb2.CancelResponse.SerializeToString, + request_deserializer=api_dot_v1_dot_api__pb2.CancelRequest.FromString, + response_serializer=api_dot_v1_dot_api__pb2.CancelResponse.SerializeToString, ), 'GetPrecompiledObjects': grpc.unary_unary_rpc_method_handler( servicer.GetPrecompiledObjects, - request_deserializer=api__pb2.GetPrecompiledObjectsRequest.FromString, - response_serializer=api__pb2.GetPrecompiledObjectsResponse.SerializeToString, + request_deserializer=api_dot_v1_dot_api__pb2.GetPrecompiledObjectsRequest.FromString, + response_serializer=api_dot_v1_dot_api__pb2.GetPrecompiledObjectsResponse.SerializeToString, ), 'GetPrecompiledObject': grpc.unary_unary_rpc_method_handler( servicer.GetPrecompiledObject, - request_deserializer=api__pb2.GetPrecompiledObjectRequest.FromString, - response_serializer=api__pb2.GetPrecompiledObjectResponse.SerializeToString, + request_deserializer=api_dot_v1_dot_api__pb2.GetPrecompiledObjectRequest.FromString, + response_serializer=api_dot_v1_dot_api__pb2.GetPrecompiledObjectResponse.SerializeToString, ), 'GetPrecompiledObjectCode': grpc.unary_unary_rpc_method_handler( servicer.GetPrecompiledObjectCode, - request_deserializer=api__pb2.GetPrecompiledObjectCodeRequest.FromString, - response_serializer=api__pb2.GetPrecompiledObjectCodeResponse.SerializeToString, + request_deserializer=api_dot_v1_dot_api__pb2.GetPrecompiledObjectCodeRequest.FromString, + response_serializer=api_dot_v1_dot_api__pb2.GetPrecompiledObjectCodeResponse.SerializeToString, ), 'GetPrecompiledObjectOutput': grpc.unary_unary_rpc_method_handler( servicer.GetPrecompiledObjectOutput, - request_deserializer=api__pb2.GetPrecompiledObjectOutputRequest.FromString, - response_serializer=api__pb2.GetPrecompiledObjectOutputResponse.SerializeToString, + request_deserializer=api_dot_v1_dot_api__pb2.GetPrecompiledObjectOutputRequest.FromString, + response_serializer=api_dot_v1_dot_api__pb2.GetPrecompiledObjectOutputResponse.SerializeToString, ), 'GetPrecompiledObjectLogs': grpc.unary_unary_rpc_method_handler( servicer.GetPrecompiledObjectLogs, - request_deserializer=api__pb2.GetPrecompiledObjectLogsRequest.FromString, - response_serializer=api__pb2.GetPrecompiledObjectLogsResponse.SerializeToString, + request_deserializer=api_dot_v1_dot_api__pb2.GetPrecompiledObjectLogsRequest.FromString, + response_serializer=api_dot_v1_dot_api__pb2.GetPrecompiledObjectLogsResponse.SerializeToString, ), 'GetPrecompiledObjectGraph': grpc.unary_unary_rpc_method_handler( servicer.GetPrecompiledObjectGraph, - request_deserializer=api__pb2.GetPrecompiledObjectGraphRequest.FromString, - response_serializer=api__pb2.GetPrecompiledObjectGraphResponse.SerializeToString, + request_deserializer=api_dot_v1_dot_api__pb2.GetPrecompiledObjectGraphRequest.FromString, + response_serializer=api_dot_v1_dot_api__pb2.GetPrecompiledObjectGraphResponse.SerializeToString, ), 'GetDefaultPrecompiledObject': grpc.unary_unary_rpc_method_handler( servicer.GetDefaultPrecompiledObject, - request_deserializer=api__pb2.GetDefaultPrecompiledObjectRequest.FromString, - response_serializer=api__pb2.GetDefaultPrecompiledObjectResponse.SerializeToString, + request_deserializer=api_dot_v1_dot_api__pb2.GetDefaultPrecompiledObjectRequest.FromString, + response_serializer=api_dot_v1_dot_api__pb2.GetDefaultPrecompiledObjectResponse.SerializeToString, + ), + 'SaveSnippet': grpc.unary_unary_rpc_method_handler( + servicer.SaveSnippet, + request_deserializer=api_dot_v1_dot_api__pb2.SaveSnippetRequest.FromString, + response_serializer=api_dot_v1_dot_api__pb2.SaveSnippetResponse.SerializeToString, + ), + 'GetSnippet': grpc.unary_unary_rpc_method_handler( + servicer.GetSnippet, + request_deserializer=api_dot_v1_dot_api__pb2.GetSnippetRequest.FromString, + response_serializer=api_dot_v1_dot_api__pb2.GetSnippetResponse.SerializeToString, ), } generic_handler = grpc.method_handlers_generic_handler( @@ -348,8 +367,8 @@ def RunCode(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/api.v1.PlaygroundService/RunCode', - api__pb2.RunCodeRequest.SerializeToString, - api__pb2.RunCodeResponse.FromString, + api_dot_v1_dot_api__pb2.RunCodeRequest.SerializeToString, + api_dot_v1_dot_api__pb2.RunCodeResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -365,8 +384,8 @@ def CheckStatus(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/api.v1.PlaygroundService/CheckStatus', - api__pb2.CheckStatusRequest.SerializeToString, - api__pb2.CheckStatusResponse.FromString, + api_dot_v1_dot_api__pb2.CheckStatusRequest.SerializeToString, + api_dot_v1_dot_api__pb2.CheckStatusResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -382,8 +401,8 @@ def GetRunOutput(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/api.v1.PlaygroundService/GetRunOutput', - api__pb2.GetRunOutputRequest.SerializeToString, - api__pb2.GetRunOutputResponse.FromString, + api_dot_v1_dot_api__pb2.GetRunOutputRequest.SerializeToString, + api_dot_v1_dot_api__pb2.GetRunOutputResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -399,8 +418,8 @@ def GetLogs(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/api.v1.PlaygroundService/GetLogs', - api__pb2.GetLogsRequest.SerializeToString, - api__pb2.GetLogsResponse.FromString, + api_dot_v1_dot_api__pb2.GetLogsRequest.SerializeToString, + api_dot_v1_dot_api__pb2.GetLogsResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -416,8 +435,8 @@ def GetGraph(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/api.v1.PlaygroundService/GetGraph', - api__pb2.GetGraphRequest.SerializeToString, - api__pb2.GetGraphResponse.FromString, + api_dot_v1_dot_api__pb2.GetGraphRequest.SerializeToString, + api_dot_v1_dot_api__pb2.GetGraphResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -433,8 +452,8 @@ def GetRunError(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/api.v1.PlaygroundService/GetRunError', - api__pb2.GetRunErrorRequest.SerializeToString, - api__pb2.GetRunErrorResponse.FromString, + api_dot_v1_dot_api__pb2.GetRunErrorRequest.SerializeToString, + api_dot_v1_dot_api__pb2.GetRunErrorResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -450,8 +469,8 @@ def GetValidationOutput(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/api.v1.PlaygroundService/GetValidationOutput', - api__pb2.GetValidationOutputRequest.SerializeToString, - api__pb2.GetValidationOutputResponse.FromString, + api_dot_v1_dot_api__pb2.GetValidationOutputRequest.SerializeToString, + api_dot_v1_dot_api__pb2.GetValidationOutputResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -467,8 +486,8 @@ def GetPreparationOutput(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/api.v1.PlaygroundService/GetPreparationOutput', - api__pb2.GetPreparationOutputRequest.SerializeToString, - api__pb2.GetPreparationOutputResponse.FromString, + api_dot_v1_dot_api__pb2.GetPreparationOutputRequest.SerializeToString, + api_dot_v1_dot_api__pb2.GetPreparationOutputResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -484,8 +503,8 @@ def GetCompileOutput(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/api.v1.PlaygroundService/GetCompileOutput', - api__pb2.GetCompileOutputRequest.SerializeToString, - api__pb2.GetCompileOutputResponse.FromString, + api_dot_v1_dot_api__pb2.GetCompileOutputRequest.SerializeToString, + api_dot_v1_dot_api__pb2.GetCompileOutputResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -501,8 +520,8 @@ def Cancel(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/api.v1.PlaygroundService/Cancel', - api__pb2.CancelRequest.SerializeToString, - api__pb2.CancelResponse.FromString, + api_dot_v1_dot_api__pb2.CancelRequest.SerializeToString, + api_dot_v1_dot_api__pb2.CancelResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -518,8 +537,8 @@ def GetPrecompiledObjects(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/api.v1.PlaygroundService/GetPrecompiledObjects', - api__pb2.GetPrecompiledObjectsRequest.SerializeToString, - api__pb2.GetPrecompiledObjectsResponse.FromString, + api_dot_v1_dot_api__pb2.GetPrecompiledObjectsRequest.SerializeToString, + api_dot_v1_dot_api__pb2.GetPrecompiledObjectsResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -535,8 +554,8 @@ def GetPrecompiledObject(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/api.v1.PlaygroundService/GetPrecompiledObject', - api__pb2.GetPrecompiledObjectRequest.SerializeToString, - api__pb2.GetPrecompiledObjectResponse.FromString, + api_dot_v1_dot_api__pb2.GetPrecompiledObjectRequest.SerializeToString, + api_dot_v1_dot_api__pb2.GetPrecompiledObjectResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -552,8 +571,8 @@ def GetPrecompiledObjectCode(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/api.v1.PlaygroundService/GetPrecompiledObjectCode', - api__pb2.GetPrecompiledObjectCodeRequest.SerializeToString, - api__pb2.GetPrecompiledObjectCodeResponse.FromString, + api_dot_v1_dot_api__pb2.GetPrecompiledObjectCodeRequest.SerializeToString, + api_dot_v1_dot_api__pb2.GetPrecompiledObjectCodeResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -569,8 +588,8 @@ def GetPrecompiledObjectOutput(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/api.v1.PlaygroundService/GetPrecompiledObjectOutput', - api__pb2.GetPrecompiledObjectOutputRequest.SerializeToString, - api__pb2.GetPrecompiledObjectOutputResponse.FromString, + api_dot_v1_dot_api__pb2.GetPrecompiledObjectOutputRequest.SerializeToString, + api_dot_v1_dot_api__pb2.GetPrecompiledObjectOutputResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -586,8 +605,8 @@ def GetPrecompiledObjectLogs(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/api.v1.PlaygroundService/GetPrecompiledObjectLogs', - api__pb2.GetPrecompiledObjectLogsRequest.SerializeToString, - api__pb2.GetPrecompiledObjectLogsResponse.FromString, + api_dot_v1_dot_api__pb2.GetPrecompiledObjectLogsRequest.SerializeToString, + api_dot_v1_dot_api__pb2.GetPrecompiledObjectLogsResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -603,8 +622,8 @@ def GetPrecompiledObjectGraph(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/api.v1.PlaygroundService/GetPrecompiledObjectGraph', - api__pb2.GetPrecompiledObjectGraphRequest.SerializeToString, - api__pb2.GetPrecompiledObjectGraphResponse.FromString, + api_dot_v1_dot_api__pb2.GetPrecompiledObjectGraphRequest.SerializeToString, + api_dot_v1_dot_api__pb2.GetPrecompiledObjectGraphResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) @@ -620,7 +639,41 @@ def GetDefaultPrecompiledObject(request, timeout=None, metadata=None): return grpc.experimental.unary_unary(request, target, '/api.v1.PlaygroundService/GetDefaultPrecompiledObject', - api__pb2.GetDefaultPrecompiledObjectRequest.SerializeToString, - api__pb2.GetDefaultPrecompiledObjectResponse.FromString, + api_dot_v1_dot_api__pb2.GetDefaultPrecompiledObjectRequest.SerializeToString, + api_dot_v1_dot_api__pb2.GetDefaultPrecompiledObjectResponse.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def SaveSnippet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/api.v1.PlaygroundService/SaveSnippet', + api_dot_v1_dot_api__pb2.SaveSnippetRequest.SerializeToString, + api_dot_v1_dot_api__pb2.SaveSnippetResponse.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def GetSnippet(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/api.v1.PlaygroundService/GetSnippet', + api_dot_v1_dot_api__pb2.GetSnippetRequest.SerializeToString, + api_dot_v1_dot_api__pb2.GetSnippetResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/playground/infrastructure/checker.py b/playground/infrastructure/checker.py index c534500dc5287..5d235b415742c 100644 --- a/playground/infrastructure/checker.py +++ b/playground/infrastructure/checker.py @@ -27,10 +27,10 @@ import sys from pathlib import PurePath from typing import List -from api.v1.api_pb2 import Sdk +from api.v1.api_pb2 import Sdk from config import Config -from helper import get_tag +from helper import get_tag, load_supported_categories def parse_args() -> argparse.Namespace: @@ -78,18 +78,26 @@ def check_sdk_examples(paths: List[PurePath], sdk: Sdk, root_dir: str) -> bool: if path.suffix.lstrip(".") != Config.SDK_TO_EXTENSION[sdk]: continue path = PurePath(root_dir, path) + if not os.path.isfile(path): + # TODO file is deleted but this potentially can break multi file examples + logging.info(f"{path} not exists, continue") + continue if get_tag(path) is not None: logging.info(f"{path} is an example, return") return True return False - def main(): args = parse_args() root_dir = os.getenv("BEAM_ROOT_DIR") if root_dir is None: raise KeyError("BEAM_ROOT_DIR environment variable should be specified in os") + categories_file = os.getenv("BEAM_EXAMPLE_CATEGORIES") + if categories_file is None: + raise KeyError("BEAM_EXAMPLE_CATEGORIES environment variable should be specified in os") + + load_supported_categories(categories_file) logging.basicConfig(level=logging.DEBUG if args.verbose else logging.WARNING) diff --git a/playground/infrastructure/ci_cd.py b/playground/infrastructure/ci_cd.py index 35197ef95d3eb..9c3b8842a9aa6 100644 --- a/playground/infrastructure/ci_cd.py +++ b/playground/infrastructure/ci_cd.py @@ -88,9 +88,6 @@ def _run_ci_cd(step: str, raw_sdk: str, origin: Origin, subdirs: List[str]): logging.info("Finish of searching Playground examples") logging.info("Number of found Playground examples: %s", len(examples)) - examples = list(filter(lambda example: example.tag.multifile is False, examples)) - logging.info("Number of sinlge-file Playground examples: %s", len(examples)) - logging.info("Execute Playground examples ...") runner = Verifier(sdk, origin) runner.run_verify(examples) diff --git a/playground/infrastructure/conftest.py b/playground/infrastructure/conftest.py index 3a776a4aac5dc..face74f0e2b4d 100644 --- a/playground/infrastructure/conftest.py +++ b/playground/infrastructure/conftest.py @@ -14,6 +14,7 @@ # limitations under the License. import os.path import pytest +from pytest_mock import MockerFixture from typing import Optional, List, Dict, Any from models import Example, SdkEnum, Tag @@ -29,22 +30,29 @@ def supported_categories(): @pytest.fixture(autouse=True) -def mock_dataset_file_name(mocker): +def mock_files(mocker: MockerFixture): def _mock_isfile(filepath): if filepath in [ + # mock examples & imports + "MOCK_FILEPATH_0", + "../../examples/MOCK_EXAMPLE/main.java", + "../../examples/MOCK_EXAMPLE/utils.java", + "../../examples/MOCK_EXAMPLE/schema.java", + # datasets "../backend/datasets/dataset_id_1.json", "../backend/datasets/dataset_id_1.avro", ]: return True raise FileNotFoundError(filepath) - mocker.patch('os.path.isfile', side_effect=_mock_isfile) + + mocker.patch("os.path.isfile", side_effect=_mock_isfile) + mocker.patch("builtins.open", mocker.mock_open(read_data="file content")) @pytest.fixture def create_test_example(create_test_tag): def _create_test_example( - with_kafka=False, - tag_meta: Optional[Dict[str, Any]] = None, **example_meta + is_multifile=False, with_kafka=False, tag_meta: Optional[Dict[str, Any]] = None, **example_meta ) -> Example: if tag_meta is None: tag_meta = {} @@ -54,12 +62,14 @@ def _create_test_example( filepath="MOCK_FILEPATH", code="MOCK_CODE", output="MOCK_OUTPUT", + logs="MOCK_LOGS", + graph="MOCK_GRAPH", url_vcs="https://github.com/proj/MOCK_LINK", context_line=132, ) meta.update(**example_meta) return Example( - tag=create_test_tag(with_kafka=with_kafka, **tag_meta), + tag=create_test_tag(is_multifile=is_multifile, with_kafka=with_kafka, **tag_meta), **meta, ) @@ -68,7 +78,7 @@ def _create_test_example( @pytest.fixture def create_test_tag(): - def _create_test_tag(with_kafka=False, **tag_meta) -> Tag: + def _create_test_tag(with_kafka=False, is_multifile=False, **tag_meta) -> Tag: meta = { "name": "MOCK_NAME", "description": "MOCK_DESCRIPTION", @@ -80,16 +90,28 @@ def _create_test_tag(with_kafka=False, **tag_meta) -> Tag: if with_kafka: meta.update( emulators=[ - {"type": "kafka", "topic": {"id": "topic1", "source_dataset": "dataset_id_1"}} + { + "type": "kafka", + "topic": {"id": "topic1", "source_dataset": "dataset_id_1"}, + } ], datasets={"dataset_id_1": {"format": "avro", "location": "local"}}, ) + if is_multifile: + meta.update( + multifile=True, + files=[ + {"name": "utils.java"}, + {"name": "schema.java"} + ] + ) for k, v in tag_meta.items(): if v is None: meta.pop(k, None) else: meta[k] = v return Tag( + filepath="../../examples/MOCK_EXAMPLE/main.java", line_start=10, line_finish=20, context_line=30, diff --git a/playground/infrastructure/datastore_client.py b/playground/infrastructure/datastore_client.py index 77743b1299d14..31065177db6c6 100644 --- a/playground/infrastructure/datastore_client.py +++ b/playground/infrastructure/datastore_client.py @@ -29,7 +29,7 @@ import config from config import Config, Origin, PrecompiledExample, DatastoreProps -from models import Example, SdkEnum, Dataset, Emulator +from models import Example, SdkEnum, Dataset, Emulator, ImportFile from api.v1 import api_pb2 @@ -93,14 +93,21 @@ def save_to_cloud_datastore( ) snippet = self._to_snippet_entity( - example, example_id, sdk_key, now, actual_schema_version_key, origin + example, example_id, sdk_key, now, actual_schema_version_key, origin, ) self._datastore_client.put(snippet) self._datastore_client.put_multi( self._pc_object_entities(example, example_id) ) - # only single-file examples are supported by now - self._datastore_client.put(self._to_file_entity(example, example_id)) + self._datastore_client.put(self._to_main_file_entity(example, example_id)) + if example.tag.files: + self._datastore_client.put_multi( + [ + self._to_additional_file_entity(example_id, file, idx) + for idx, file in enumerate(example.tag.files, start=1) + ] + ) + if example.tag.datasets: self._datastore_client.put_multi( [ @@ -125,7 +132,7 @@ def save_to_cloud_datastore( self._datastore_client.delete( self._get_key(DatastoreProps.SNIPPET_KIND, ex_id) ) - self._datastore_client.delete(self._get_files_key(ex_id)) + self._datastore_client.delete(self._get_files_key(ex_id, 0)) pc_objs_keys_for_removing = [] for example_type in [ PrecompiledExample.GRAPH_EXTENSION.upper(), @@ -232,8 +239,8 @@ def _make_example_id(self, origin: Origin, sdk: SdkEnum, name: str): ] ) - def _get_files_key(self, example_id: str): - name = config.DatastoreProps.KEY_NAME_DELIMITER.join([example_id, "0"]) + def _get_files_key(self, example_id: str, idx: int): + name = config.DatastoreProps.KEY_NAME_DELIMITER.join([example_id, str(idx)]) return self._get_key(DatastoreProps.FILES_KIND, name) def _get_pc_objects_key(self, example_id: str, pc_obj_type: str): @@ -258,7 +265,7 @@ def _to_snippet_entity( "pipeOpts": self._get_pipeline_options(example), "created": now, "origin": origin, - "numberOfFiles": 1, + "numberOfFiles": 1 + len(example.tag.files), "schVer": schema_key, "complexity": f"COMPLEXITY_{example.tag.complexity}", } @@ -303,28 +310,25 @@ def _pc_object_entities( self, example: Example, example_id: str ) -> List[datastore.Entity]: entities = [] - if len(example.graph) != 0: - entities.append( - self._pc_obj_entity( - example_id, - example.graph, - PrecompiledExample.GRAPH_EXTENSION.upper(), - ) + entities.append( + self._pc_obj_entity( + example_id, + example.graph, + PrecompiledExample.GRAPH_EXTENSION.upper(), ) - if len(example.output) != 0: - entities.append( - self._pc_obj_entity( - example_id, - example.output, - PrecompiledExample.OUTPUT_EXTENSION.upper(), - ) + ) + entities.append( + self._pc_obj_entity( + example_id, + example.output, + PrecompiledExample.OUTPUT_EXTENSION.upper(), ) - if len(example.logs) != 0: - entities.append( - self._pc_obj_entity( - example_id, example.logs, PrecompiledExample.LOG_EXTENSION.upper() - ) + ) + entities.append( + self._pc_obj_entity( + example_id, example.logs, PrecompiledExample.LOG_EXTENSION.upper() ) + ) return entities def _pc_obj_entity( @@ -337,9 +341,9 @@ def _pc_obj_entity( pc_obj_entity.update({"content": content}) return pc_obj_entity - def _to_file_entity(self, example: Example, example_id: str): + def _to_main_file_entity(self, example: Example, example_id: str): file_entity = datastore.Entity( - self._get_files_key(example_id), exclude_from_indexes=("content",) + self._get_files_key(example_id, 0), exclude_from_indexes=("content",) ) file_entity.update( { @@ -353,6 +357,21 @@ def _to_file_entity(self, example: Example, example_id: str): ) return file_entity + def _to_additional_file_entity(self, example_id: str, file: ImportFile, idx: int): + file_entity = datastore.Entity( + self._get_files_key(example_id, idx), exclude_from_indexes=("content",) + ) + file_entity.update( + { + "name": file.name, + "content": file.content, + "cntxLine": file.context_line, + "isMain": False, + } + ) + return file_entity + + def _to_dataset_entity(self, dataset_id: str, file_name: str): dataset_entity = datastore.Entity(self._get_dataset_key(dataset_id)) dataset_entity.update({"path": file_name}) diff --git a/playground/infrastructure/grpc_client.py b/playground/infrastructure/grpc_client.py index b78b94beeb0b9..bf34cf5a0f254 100644 --- a/playground/infrastructure/grpc_client.py +++ b/playground/infrastructure/grpc_client.py @@ -32,8 +32,9 @@ class GRPCClient: """GRPCClient is gRPC client for sending a request to the backend.""" - def __init__(self, timeout=10, wait_for_ready=True): + def __init__(self, wait_for_ready=True): use_webgrpc = os.getenv("BEAM_USE_WEBGRPC", False) + timeout = os.getenv("GRPC_TIMEOUT", 10) if use_webgrpc: self._channel = sonora.aio.insecure_web_channel(Config.SERVER_ADDRESS) else: @@ -51,7 +52,13 @@ async def __aenter__(self): async def __aexit__(self, exc_type, exc_val, exc_tb): await self._channel.__aexit__(exc_type, exc_val, exc_tb) - async def run_code(self, code: str, sdk: SdkEnum, pipeline_options: str, datasets: List[api_pb2.Dataset]) -> str: + async def run_code(self, + code: str, + sdk: SdkEnum, + pipeline_options: str, + datasets: List[api_pb2.Dataset], + files: List[api_pb2.SnippetFile], + ) -> str: """ Run example by his code and SDK @@ -70,7 +77,7 @@ async def run_code(self, code: str, sdk: SdkEnum, pipeline_options: str, dataset raise Exception( f'Incorrect sdk: must be from this pool: {", ".join(sdks)}') request = api_pb2.RunCodeRequest( - code=code, sdk=sdk, pipeline_options=pipeline_options, datasets=datasets) + code=code, sdk=sdk, pipeline_options=pipeline_options, datasets=datasets, files=files) response = await self._stub.RunCode(request, **self._kwargs) return response.pipeline_uuid @@ -104,12 +111,13 @@ async def get_run_error(self, pipeline_uuid: str) -> str: response = await self._stub.GetRunError(request, **self._kwargs) return response.output - async def get_run_output(self, pipeline_uuid: str) -> str: + async def get_run_output(self, pipeline_uuid: str, example_filepath: str) -> str: """ Get the result of pipeline execution. Args: pipeline_uuid: uuid of the pipeline + example_filepath: path to the file of the example Returns: output: contain the result of pipeline execution @@ -117,14 +125,17 @@ async def get_run_output(self, pipeline_uuid: str) -> str: self._verify_pipeline_uuid(pipeline_uuid) request = api_pb2.GetRunOutputRequest(pipeline_uuid=pipeline_uuid) response = await self._stub.GetRunOutput(request, **self._kwargs) + if response.output == "": + logging.info("Run output for %s is empty", example_filepath) return response.output - async def get_log(self, pipeline_uuid: str) -> str: + async def get_log(self, pipeline_uuid: str, example_filepath: str) -> str: """ Get the result of pipeline execution. Args: pipeline_uuid: uuid of the pipeline + example_filepath: path to the file of the example Returns: output: contain the result of pipeline execution @@ -132,6 +143,8 @@ async def get_log(self, pipeline_uuid: str) -> str: self._verify_pipeline_uuid(pipeline_uuid) request = api_pb2.GetLogsRequest(pipeline_uuid=pipeline_uuid) response = await self._stub.GetLogs(request, **self._kwargs) + if response.output == "": + logging.info("Log for %s is empty", example_filepath) return response.output diff --git a/playground/infrastructure/helper.py b/playground/infrastructure/helper.py index d3941f3083e15..c4ed49f78cfca 100644 --- a/playground/infrastructure/helper.py +++ b/playground/infrastructure/helper.py @@ -24,6 +24,7 @@ from typing import List, Optional, Dict from api.v1 import api_pb2 +import pydantic from tqdm.asyncio import tqdm import yaml @@ -98,11 +99,19 @@ def find_examples(root_dir: str, subdirs: List[str], sdk: SdkEnum) -> List[Examp for filename in files: filepath = os.path.join(root, filename) try: - example = _load_example( - filename=filename, filepath=filepath, sdk=sdk - ) - if example is not None: - examples.append(example) + try: + example = _load_example( + filename=filename, filepath=filepath, sdk=sdk + ) + if example is not None: + examples.append(example) + except pydantic.ValidationError as err: + if len(err.errors()) > 1: + raise + if err.errors()[0]["msg"] == "multifile is True but no files defined": + logging.warning("incomplete multifile example ignored %s", filepath) + continue + raise except Exception: logging.exception("error loading example at %s", filepath) has_errors = True @@ -180,7 +189,10 @@ def get_tag(filepath) -> Optional[Tag]: ) yml = yaml.load(embdedded_yaml_content, Loader=yaml.SafeLoader) return Tag( - line_start=line_start, line_finish=line_finish, **yml[Config.BEAM_PLAYGROUND] + filepath=filepath, + line_start=line_start, + line_finish=line_finish, + **yml[Config.BEAM_PLAYGROUND], ) @@ -300,9 +312,16 @@ async def _update_example_status(example: Example, client: GRPCClient): dataset_path=dataset.file_name, ) ) + files: List[api_pb2.SnippetFile] = [ + api_pb2.SnippetFile(name=example.filepath, content=example.code, is_main=True) + ] + for file in example.tag.files: + files.append( + api_pb2.SnippetFile(name=file.name, content=file.content, is_main=False) + ) pipeline_id = await client.run_code( - example.code, example.sdk, example.tag.pipeline_options, datasets + example.code, example.sdk, example.tag.pipeline_options, datasets, files=files, ) example.pipeline_id = pipeline_id status = await client.check_status(pipeline_id) diff --git a/playground/infrastructure/models.py b/playground/infrastructure/models.py index 5479883b83f28..77d9ae26404b7 100644 --- a/playground/infrastructure/models.py +++ b/playground/infrastructure/models.py @@ -14,22 +14,17 @@ # limitations under the License. import logging import os.path +import pathlib from enum import Enum, IntEnum from typing import List, Optional, Dict from api.v1 import api_pb2 -from pydantic import ( - BaseModel, - Extra, - Field, - validator, - root_validator, - HttpUrl -) +from pydantic import BaseModel, Extra, Field, validator, root_validator, HttpUrl from config import RepoProps + class ComplexityEnum(str, Enum): BASIC = "BASIC" MEDIUM = "MEDIUM" @@ -66,10 +61,18 @@ class Emulator(BaseModel): topic: Topic +class ImportFile(BaseModel): + name: str = Field(..., min_length=1) + context_line: int = 0 + content: str = "" + + class Tag(BaseModel): """ Tag represents the beam-playground embedded yaml content """ + + filepath: str = Field(..., min_length=1) line_start: int line_finish: int context_line: int @@ -84,6 +87,7 @@ class Tag(BaseModel): default_example: bool = False tags: List[str] = [] url_notebook: Optional[HttpUrl] = None + files: List[ImportFile] = [] class Config: supported_categories = [] @@ -113,24 +117,48 @@ def dataset_defined(cls, v, values, **kwargs): f"Emulator topic {v.topic.id} has undefined dataset {v.topic.source_dataset}" ) - @validator('datasets') + @validator("datasets") def dataset_file_name(cls, datasets): for dataset_id, dataset in datasets.items(): dataset.file_name = f"{dataset_id}.{dataset.format}" if dataset.location == DatasetLocation.LOCAL: - dataset_path = os.path.join(RepoProps.REPO_DATASETS_PATH, dataset.file_name) + dataset_path = os.path.join( + RepoProps.REPO_DATASETS_PATH, dataset.file_name + ) if not os.path.isfile(dataset_path): - logging.error("File not found at the specified path: %s", dataset_path) + logging.error( + "File not found at the specified path: %s", dataset_path + ) raise FileNotFoundError return datasets - @validator("categories", each_item=True) def category_supported(cls, v, values, config, **kwargs): if v not in config.supported_categories: raise ValueError(f"Category {v} not in {config.supported_categories}") return v + @root_validator + def multifile_files(cls, values): + if values.get('multifile', False) and not values.get('files', []): + raise ValueError('multifile is True but no files defined') + return values + + @validator("filepath") + def check_filepath_exists(cls, v: str): + if not os.path.isfile(v): + logging.error("Example file not found: %s", v) + raise FileNotFoundError(v) + return v + + @validator("files", each_item=True) + def check_files(cls, v: ImportFile, values): + local_path = os.path.join(os.path.dirname(values["filepath"]), v.name) + if not os.path.isfile(local_path): + logging.error("Import file not found: %s", local_path) + raise FileNotFoundError(local_path) + v.content = open(local_path).read() + return v class SdkEnum(IntEnum): diff --git a/playground/infrastructure/proxy/allow_list.py b/playground/infrastructure/proxy/allow_list.py index df7d842d532e7..aa7a9f2e08c0d 100644 --- a/playground/infrastructure/proxy/allow_list.py +++ b/playground/infrastructure/proxy/allow_list.py @@ -21,7 +21,8 @@ "logging.googleapis.com", "datastore.googleapis.com", "oauth2.googleapis.com", - "storage.googleapis.com" + "storage.googleapis.com", + "repo1.maven.org" ] # ALLOWED_BUCKET_LIST contains all public Google Cloud Storage buckets diff --git a/playground/infrastructure/requirements.txt b/playground/infrastructure/requirements.txt index 16afc877c7f86..936b077129da8 100644 --- a/playground/infrastructure/requirements.txt +++ b/playground/infrastructure/requirements.txt @@ -15,15 +15,14 @@ # specific language governing permissions and limitations # under the License. -grpcio-tools==1.41.0 -grpcio==1.41.1 mock==4.0.3 -protobuf==3.19.1 pytest==6.2.5 pytest-asyncio==0.18.2 pytest-mock==3.6.1 PyYAML==6.0 tqdm~=4.62.3 -google-cloud-datastore==2.7.1 sonora==0.2.2 pydantic==1.10.2 +grpcio-tools==1.51.1 +protobuf==4.21.12 +google-cloud-datastore==2.11.0 diff --git a/playground/infrastructure/test_checker.py b/playground/infrastructure/test_checker.py index 777bf2967b7ba..28897565972d1 100644 --- a/playground/infrastructure/test_checker.py +++ b/playground/infrastructure/test_checker.py @@ -13,12 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +from pathlib import PurePath + import mock import pytest -from pathlib import PurePath -from api.v1.api_pb2 import SDK_JAVA import checker +from api.v1.api_pb2 import SDK_JAVA from checker import check_in_allowlist, check_sdk_examples @@ -37,11 +38,19 @@ def test_check_in_allowlist(paths, allowlist, result): @pytest.mark.parametrize( - "paths, sdk, has_tag, result", + "paths, sdk, has_tag, isfile, result", [ - ([PurePath("path"), PurePath("path/path2.java")], SDK_JAVA, True, True), + ([PurePath("path"), PurePath("path/path2.java")], SDK_JAVA, True, True, True), + ([PurePath("path"), PurePath("path/path2.java")], SDK_JAVA, True, False, False), ], ) -def test_check_sdk_examples(paths, sdk, has_tag, result): +@mock.patch('checker.os.path.isfile') +def test_check_sdk_examples(mock_os_path_isfile, paths, sdk, has_tag, isfile, result): checker.get_tag = mock.Mock(return_value=has_tag) + mock_os_path_isfile.return_value = isfile assert result == check_sdk_examples(paths, sdk, "root_dir") + mock_os_path_isfile.assert_has_calls( + [ + mock.call(PurePath("root_dir/path/path2.java")), + ] + ) diff --git a/playground/infrastructure/test_ci_cd.py b/playground/infrastructure/test_ci_cd.py index 5f5b880ba026c..9e5c041e27efa 100644 --- a/playground/infrastructure/test_ci_cd.py +++ b/playground/infrastructure/test_ci_cd.py @@ -31,7 +31,7 @@ def test_ci_step( mock_find_examples.return_value = [ create_test_example(tag_meta=dict(name="Default", default_example=True)), create_test_example(tag_meta=dict(name="Single", multifile=False)), - create_test_example(tag_meta=dict(name="Multi", multifile=True)), + create_test_example(is_multifile=True, tag_meta=dict(name="Multi")), ] _run_ci_cd( step, diff --git a/playground/infrastructure/test_datastore_client.py b/playground/infrastructure/test_datastore_client.py index a494534354220..18879a243e1d4 100644 --- a/playground/infrastructure/test_datastore_client.py +++ b/playground/infrastructure/test_datastore_client.py @@ -59,6 +59,7 @@ def test_save_to_cloud_datastore_when_google_cloud_project_id_not_set(): DatastoreClient() +@pytest.mark.parametrize("is_multifile", [False, True]) @pytest.mark.parametrize("with_kafka", [False, True]) @pytest.mark.parametrize( "origin, key_prefix", @@ -80,6 +81,7 @@ def test_save_to_cloud_datastore_in_the_usual_case( origin, key_prefix, with_kafka, + is_multifile, ): """ Test saving examples to the cloud datastore in the usual case @@ -90,7 +92,7 @@ def test_save_to_cloud_datastore_in_the_usual_case( mock_get_examples.return_value = mock_examples mock_config_project.return_value = "MOCK_PROJECT_ID" - examples = [create_test_example(with_kafka=with_kafka)] + examples = [create_test_example(is_multifile=is_multifile, with_kafka=with_kafka)] client = DatastoreClient() client.save_to_cloud_datastore(examples, SdkEnum.JAVA, origin) mock_client.assert_called_once() @@ -113,12 +115,22 @@ def test_save_to_cloud_datastore_in_the_usual_case( calls.extend( [ call().put(ANY), + call().key("pg_pc_objects", key_prefix + "SDK_JAVA_MOCK_NAME_GRAPH"), call().key("pg_pc_objects", key_prefix + "SDK_JAVA_MOCK_NAME_OUTPUT"), - call().put_multi([ANY]), + call().key("pg_pc_objects", key_prefix + "SDK_JAVA_MOCK_NAME_LOG"), + call().put_multi([ANY, ANY, ANY]), call().key("pg_files", key_prefix + "SDK_JAVA_MOCK_NAME_0"), call().put(ANY), ] ) + if is_multifile: + calls.extend( + [ + call().key("pg_files", key_prefix + "SDK_JAVA_MOCK_NAME_1"), + call().key("pg_files", key_prefix + "SDK_JAVA_MOCK_NAME_2"), + call().put_multi([ANY, ANY]), + ] + ) if with_kafka: calls.extend( [ diff --git a/playground/infrastructure/test_helper.py b/playground/infrastructure/test_helper.py index 1b2277f6d8f9f..e7ec797fd74a0 100644 --- a/playground/infrastructure/test_helper.py +++ b/playground/infrastructure/test_helper.py @@ -19,6 +19,7 @@ import pytest import pydantic +from api.v1 import api_pb2 from api.v1.api_pb2 import ( SDK_UNSPECIFIED, STATUS_UNSPECIFIED, @@ -164,12 +165,12 @@ async def test_get_statuses(mock_update_example_status, create_test_example): ) def test_load_example(): example = _load_example( - "kafka.java", "../../examples/path/kafka.java", SdkEnum.JAVA + "kafka.java", "../../examples/MOCK_EXAMPLE/main.java", SdkEnum.JAVA ) assert example == Example( sdk=SdkEnum.JAVA, type=PRECOMPILED_OBJECT_TYPE_EXAMPLE, - filepath="../../examples/path/kafka.java", + filepath="../../examples/MOCK_EXAMPLE/main.java", code=""" // license line 1 // license line 2 @@ -179,9 +180,10 @@ def test_load_example(): code line 2 """, - url_vcs="https://github.com/apache/beam/blob/master/examples/path/kafka.java", # type: ignore + url_vcs="https://github.com/apache/beam/blob/master/examples/MOCK_EXAMPLE/main.java", # type: ignore context_line=5, tag=Tag( + filepath="../../examples/MOCK_EXAMPLE/main.java", line_start=4, line_finish=27, name="KafkaWordCount", @@ -262,6 +264,7 @@ async def test__update_example_status( ): example = Example( tag=Tag( + filepath="../../examples/MOCK_EXAMPLE/main.java", line_start=10, line_finish=20, context_line=100, @@ -278,7 +281,7 @@ async def test__update_example_status( code="code", output="output", status=STATUS_UNSPECIFIED, - url_vcs="https://github.com/link", # type: ignore + url_vcs="https://github.com/link", # type: ignore ) mock_grpc_client_run_code.return_value = "pipeline_id" @@ -289,7 +292,11 @@ async def test__update_example_status( assert example.pipeline_id == "pipeline_id" assert example.status == STATUS_FINISHED mock_grpc_client_run_code.assert_called_once_with( - example.code, example.sdk, "--key value", [] + example.code, example.sdk, "--key value", [], files=[api_pb2.SnippetFile( + name="root/file.extension", + content="code", + is_main=True, + )] ) mock_grpc_client_check_status.assert_has_calls([mock.call("pipeline_id")]) @@ -390,7 +397,9 @@ def test_validate_example_fields_when_emulator_not_set_but_dataset_set(create_te pydantic.ValidationError, match="datasets w/o emulators", ): - create_test_tag(datasets={"dataset_id_1": {"format": "avro", "location": "local"}}) + create_test_tag( + datasets={"dataset_id_1": {"format": "avro", "location": "local"}} + ) def test_validate_example_fields_when_emulator_type_is_invalid(create_test_tag): @@ -480,9 +489,10 @@ def test_validate_example_fields_when_dataset_name_is_invalid(create_test_tag): ), ) def test_get_tag_with_datasets(): - tag = get_tag("filepath") + tag = get_tag("../../examples/MOCK_EXAMPLE/main.java") assert tag == Tag( **{ + "filepath": "../../examples/MOCK_EXAMPLE/main.java", "line_start": 2, "line_finish": 25, "name": "KafkaWordCount", @@ -502,6 +512,63 @@ def test_get_tag_with_datasets(): }, ) + +@mock.patch( + "builtins.open", + mock_open( + read_data=""" + +// beam-playground: +// name: MultifileExample +// description: Test example with imports +// multifile: true +// files: +// - name: utils.java +// context_line: 51 +// - name: schema.java +// context_line: 52 +// context_line: 55 +// categories: +// - Filtering +// - Options +// - Quickstart +// complexity: MEDIUM +// tags: +// - filter +// - strings +// - emulator + +""" + ), +) +def test_get_tag_multifile(): + tag = get_tag("../../examples/MOCK_EXAMPLE/main.java") + assert tag == Tag( + **{ + "filepath": "../../examples/MOCK_EXAMPLE/main.java", + "line_start": 2, + "line_finish": 21, + "name": "MultifileExample", + "description": "Test example with imports", + "multifile": True, + "context_line": 55, + "categories": ["Filtering", "Options", "Quickstart"], + "complexity": "MEDIUM", + "tags": ["filter", "strings", "emulator"], + "files": [ + { + "name": "utils.java", + "context_line": 51, + }, + { + "name": "schema.java", + "context_line": 52, + }, + ], + }, + ) + + @mock.patch("os.path.isfile", return_value=True) def test_dataset_path_ok(mock_file_check, create_test_example): example = create_test_example(with_kafka=True) @@ -512,4 +579,4 @@ def test_dataset_path_ok(mock_file_check, create_test_example): @mock.patch("os.path.isfile", return_value=False) def test_dataset_path_notfound(mock_file_check, create_test_example): with pytest.raises(FileNotFoundError): - create_test_example(with_kafka=True) \ No newline at end of file + create_test_example(with_kafka=True) diff --git a/playground/infrastructure/test_utils.py b/playground/infrastructure/test_utils.py index e65e723dccc4e..e52cb63570b0c 100644 --- a/playground/infrastructure/test_utils.py +++ b/playground/infrastructure/test_utils.py @@ -23,6 +23,7 @@ def _get_examples(number_of_examples: int) -> List[Example]: examples = [] for number in range(number_of_examples): tag = Tag( + filepath=f"MOCK_FILEPATH_{number}", line_start=100, line_finish=120, context_line=123, @@ -42,7 +43,7 @@ def _get_examples(number_of_examples: int) -> List[Example]: code=f"MOCK_CODE_{number}", output=f"MOCK_OUTPUT_{number}", status=STATUS_UNSPECIFIED, - url_vcs=f"https://mock.link/{number}", # type: ignore + url_vcs=f"https://mock.link/{number}", # type: ignore ) examples.append(example) return examples diff --git a/playground/infrastructure/verify.py b/playground/infrastructure/verify.py index 9d3784bd60014..c9830cea8b82d 100644 --- a/playground/infrastructure/verify.py +++ b/playground/infrastructure/verify.py @@ -71,8 +71,8 @@ async def _populate_fields(example: Example): example.compile_output = await client.get_compile_output( example.pipeline_id ) - example.output = await client.get_run_output(example.pipeline_id) - example.logs = await client.get_log(example.pipeline_id) + example.output = await client.get_run_output(example.pipeline_id, example.filepath) + example.logs = await client.get_log(example.pipeline_id, example.filepath) if example.sdk in [SDK_JAVA, SDK_PYTHON]: example.graph = await client.get_graph( example.pipeline_id, example.filepath diff --git a/release/src/main/scripts/set_version.sh b/release/src/main/scripts/set_version.sh index 387dd03d69acd..28521dbe66c04 100755 --- a/release/src/main/scripts/set_version.sh +++ b/release/src/main/scripts/set_version.sh @@ -76,6 +76,7 @@ if [[ -z "$IS_SNAPSHOT_VERSION" ]] ; then sed -i -e "s/^__version__ = .*/__version__ = '${TARGET_VERSION}'/" sdks/python/apache_beam/version.py sed -i -e "s/sdk_version=.*/sdk_version=$TARGET_VERSION/" gradle.properties sed -i -e "s/SdkVersion = .*/SdkVersion = \"$TARGET_VERSION\"/" sdks/go/pkg/beam/core/core.go + sed -i -e "s/\"version\": .*/\"version\": \"$TARGET_VERSION\",/" sdks/typescript/package.json else # For snapshot version: # Java/gradle appends -SNAPSHOT @@ -87,6 +88,7 @@ else sed -i -e "s/^__version__ = .*/__version__ = '${TARGET_VERSION}.dev'/" sdks/python/apache_beam/version.py sed -i -e "s/sdk_version=.*/sdk_version=$TARGET_VERSION.dev/" gradle.properties sed -i -e "s/SdkVersion = .*/SdkVersion = \"${TARGET_VERSION}.dev\"/" sdks/go/pkg/beam/core/core.go + sed -i -e "s/\"version\": .*/\"version\": \"$TARGET_VERSION-SNAPSHOT\",/" sdks/typescript/package.json fi if [[ "$GIT_ADD" == yes ]] ; then diff --git a/runners/google-cloud-dataflow-java/build.gradle b/runners/google-cloud-dataflow-java/build.gradle index 8a02d878885ad..4b7d11e99f58d 100644 --- a/runners/google-cloud-dataflow-java/build.gradle +++ b/runners/google-cloud-dataflow-java/build.gradle @@ -55,7 +55,7 @@ processResources { 'dataflow.legacy_environment_major_version' : '8', 'dataflow.fnapi_environment_major_version' : '8', 'dataflow.legacy_container_version' : 'beam-master-20220816', - 'dataflow.fnapi_container_version' : 'beam-master-20221022', + 'dataflow.fnapi_container_version' : 'beam-master-20221227', 'dataflow.container_base_repository' : 'gcr.io/cloud-dataflow/v1beta3', ] } @@ -429,8 +429,6 @@ createCrossLanguageValidatesRunnerTask( "--project=${dataflowProject}", "--region=${dataflowRegion}", "--sdk_harness_container_image_overrides=.*java.*,${dockerJavaImageContainer}:${dockerTag}", - // TODO(https://github.com/apache/beam/issues/20806) remove shuffle_mode=appliance with runner v2 once issue is resolved - "--experiments=shuffle_mode=appliance", ], javaPipelineOptions: [ "--runner=TestDataflowRunner", @@ -439,8 +437,6 @@ createCrossLanguageValidatesRunnerTask( "--tempRoot=${dataflowValidatesTempRoot}", "--sdkContainerImage=${dockerJavaImageContainer}:${dockerTag}", "--sdkHarnessContainerImageOverrides=.*python.*,${dockerPythonImageContainer}:${dockerTag}", - // TODO(https://github.com/apache/beam/issues/20806) remove shuffle_mode=appliance with runner v2 once issue is resolved. - "--experiments=shuffle_mode=appliance", ], pytestOptions: [ "--capture=no", @@ -455,7 +451,6 @@ createCrossLanguageValidatesRunnerTask( "--region ${dataflowRegion}", "--tests \"./test/integration/xlang ./test/integration/io/xlang/...\"", "--sdk_overrides \".*java.*,${dockerJavaImageContainer}:${dockerTag}\"", - "--dataflow_worker_jar ${project(":runners:google-cloud-dataflow-java:worker").shadowJar.archivePath}", ], ) @@ -464,8 +459,7 @@ task validatesRunnerV2 { description = "Runs the ValidatesRunner tests on Dataflow Runner V2" dependsOn(createRunnerV2ValidatesRunnerTest( name: 'validatesRunnerV2Test', - // TODO(https://github.com/apache/beam/issues/20806) remove shuffle_mode=appliance with runner v2 once issue is resolved. - pipelineOptions: runnerV2PipelineOptions + ['--experiments=shuffle_mode=appliance'], + pipelineOptions: runnerV2PipelineOptions, excludedCategories: [ 'org.apache.beam.sdk.testing.UsesOnWindowExpiration', 'org.apache.beam.sdk.testing.UsesStatefulParDo', diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java index fa0632ebaf404..344490bc003b9 100644 --- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java +++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java @@ -1096,6 +1096,12 @@ public DataflowPipelineJob run(Pipeline pipeline) { } } if (useUnifiedWorker(options)) { + if (hasExperiment(options, "disable_runner_v2") + || hasExperiment(options, "disable_runner_v2_until_2023") + || hasExperiment(options, "disable_prime_runner_v2")) { + throw new IllegalArgumentException( + "Runner V2 both disabled and enabled: at least one of ['beam_fn_api', 'use_unified_worker', 'use_runner_v2', 'use_portable_job_submission'] is set and also one of ['disable_runner_v2', 'disable_runner_v2_until_2023', 'disable_prime_runner_v2'] is set."); + } List experiments = new ArrayList<>(options.getExperiments()); // non-null if useUnifiedWorker is true if (!experiments.contains("use_runner_v2")) { @@ -1116,6 +1122,18 @@ public DataflowPipelineJob run(Pipeline pipeline) { logWarningIfPCollectionViewHasNonDeterministicKeyCoder(pipeline); if (shouldActAsStreaming(pipeline)) { options.setStreaming(true); + + if (useUnifiedWorker(options)) { + options.setEnableStreamingEngine(true); + List experiments = + new ArrayList<>(options.getExperiments()); // non-null if useUnifiedWorker is true + if (!experiments.contains("enable_streaming_engine")) { + experiments.add("enable_streaming_engine"); + } + if (!experiments.contains("enable_windmill_service")) { + experiments.add("enable_windmill_service"); + } + } } if (!ExperimentalOptions.hasExperiment(options, "disable_projection_pushdown")) { @@ -2412,7 +2430,8 @@ static String getDefaultContainerVersion(DataflowPipelineOptions options) { static boolean useUnifiedWorker(DataflowPipelineOptions options) { return hasExperiment(options, "beam_fn_api") || hasExperiment(options, "use_runner_v2") - || hasExperiment(options, "use_unified_worker"); + || hasExperiment(options, "use_unified_worker") + || hasExperiment(options, "use_portable_job_submission"); } static boolean useStreamingEngine(DataflowPipelineOptions options) { diff --git a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowRunnerTest.java b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowRunnerTest.java index 3737ec27e02a9..8da6748dd3e5c 100644 --- a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowRunnerTest.java +++ b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowRunnerTest.java @@ -1745,6 +1745,63 @@ public void testSdkHarnessConfigurationPrime() throws IOException { this.verifySdkHarnessConfiguration(options); } + @Test + public void testSettingAnyFnApiExperimentEnablesUnifiedWorker() throws Exception { + for (String experiment : + ImmutableList.of( + "beam_fn_api", "use_runner_v2", "use_unified_worker", "use_portable_job_submission")) { + DataflowPipelineOptions options = buildPipelineOptions(); + ExperimentalOptions.addExperiment(options, experiment); + Pipeline p = Pipeline.create(options); + p.apply(Create.of("A")); + p.run(); + assertFalse(options.isEnableStreamingEngine()); + assertThat( + options.getExperiments(), + containsInAnyOrder( + "beam_fn_api", "use_runner_v2", "use_unified_worker", "use_portable_job_submission")); + } + + for (String experiment : + ImmutableList.of( + "beam_fn_api", "use_runner_v2", "use_unified_worker", "use_portable_job_submission")) { + DataflowPipelineOptions options = buildPipelineOptions(); + options.setStreaming(true); + ExperimentalOptions.addExperiment(options, experiment); + Pipeline p = Pipeline.create(options); + p.apply(Create.of("A")); + p.run(); + assertTrue(options.isEnableStreamingEngine()); + assertThat( + options.getExperiments(), + containsInAnyOrder( + "beam_fn_api", + "use_runner_v2", + "use_unified_worker", + "use_portable_job_submission", + "enable_windmill_service", + "enable_streaming_engine")); + } + } + + @Test + public void testSettingConflictingEnableAndDisableExperimentsThrowsException() throws Exception { + for (String experiment : + ImmutableList.of( + "beam_fn_api", "use_runner_v2", "use_unified_worker", "use_portable_job_submission")) { + for (String disabledExperiment : + ImmutableList.of( + "disable_runner_v2", "disable_runner_v2_until_2023", "disable_prime_runner_v2")) { + DataflowPipelineOptions options = buildPipelineOptions(); + ExperimentalOptions.addExperiment(options, experiment); + ExperimentalOptions.addExperiment(options, disabledExperiment); + Pipeline p = Pipeline.create(options); + p.apply(Create.of("A")); + assertThrows("Runner V2 both disabled and enabled", IllegalArgumentException.class, p::run); + } + } + } + private void verifyMapStateUnsupported(PipelineOptions options) throws Exception { Pipeline p = Pipeline.create(options); p.apply(Create.of(KV.of(13, 42))) diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/StreamingDataflowWorker.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/StreamingDataflowWorker.java index 05d6a43739ec3..e06a3fb8324c4 100644 --- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/StreamingDataflowWorker.java +++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/StreamingDataflowWorker.java @@ -667,11 +667,7 @@ public static StreamingDataflowWorker fromDataflowWorkerHarnessOptions( this.isDoneFuture = new CompletableFuture<>(); this.threadFactory = - r -> { - Thread t = new Thread(r); - t.setDaemon(true); - return t; - }; + new ThreadFactoryBuilder().setNameFormat("DataflowWorkUnits-%d").setDaemon(true).build(); this.workUnitExecutor = new BoundedQueueExecutor( chooseMaximumNumberOfThreads(), @@ -691,7 +687,7 @@ public static StreamingDataflowWorker fromDataflowWorkerHarnessOptions( memoryMonitorThread.setName("MemoryMonitor"); dispatchThread = - threadFactory.newThread( + new Thread( new Runnable() { @Override public void run() { @@ -704,11 +700,12 @@ public void run() { LOG.info("Dispatch done"); } }); + dispatchThread.setDaemon(true); dispatchThread.setPriority(Thread.MIN_PRIORITY); dispatchThread.setName("DispatchThread"); commitThread = - threadFactory.newThread( + new Thread( new Runnable() { @Override public void run() { @@ -719,6 +716,7 @@ public void run() { } } }); + commitThread.setDaemon(true); commitThread.setPriority(Thread.MAX_PRIORITY); commitThread.setName("CommitThread"); diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/options/StreamingDataflowWorkerOptions.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/options/StreamingDataflowWorkerOptions.java index 8df42ea42ff56..908221973fae8 100644 --- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/options/StreamingDataflowWorkerOptions.java +++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/options/StreamingDataflowWorkerOptions.java @@ -121,6 +121,15 @@ public interface StreamingDataflowWorkerOptions extends DataflowWorkerHarnessOpt void setWindmillServiceStreamingRpcHealthCheckPeriodMs(int value); + @Description( + "If positive, the number of messages to send on streaming rpc before checking isReady." + + "Higher values reduce cost of output overhead at the cost of more memory used in grpc " + + "buffers.") + @Default.Integer(10) + int getWindmillMessagesBetweenIsReadyChecks(); + + void setWindmillMessagesBetweenIsReadyChecks(int value); + /** * Factory for creating local Windmill address. Reads from system propery 'windmill.hostport' for * backwards compatibility. diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/DirectStreamObserver.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/DirectStreamObserver.java index 0646aba9c1166..b2e9ec925153b 100644 --- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/DirectStreamObserver.java +++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/DirectStreamObserver.java @@ -28,7 +28,7 @@ import org.slf4j.LoggerFactory; /** - * A {@link StreamObserver} which uses synchronization on the underlying {@link CallStreamObserver} + * A {@link StreamObserver} which synchronizes access to the underlying {@link CallStreamObserver} * to provide thread safety. * *

Flow control with the underlying {@link CallStreamObserver} is handled with a {@link Phaser} @@ -41,45 +41,66 @@ public final class DirectStreamObserver implements StreamObserver { private static final Logger LOG = LoggerFactory.getLogger(DirectStreamObserver.class); private final Phaser phaser; - @GuardedBy("outboundObserver") + private final Object lock = new Object(); + + @GuardedBy("lock") private final CallStreamObserver outboundObserver; private final long deadlineSeconds; + private final int messagesBetweenIsReadyChecks; - @GuardedBy("outboundObserver") - private boolean firstMessage = true; + @GuardedBy("lock") + private int messagesSinceReady = 0; public DirectStreamObserver( - Phaser phaser, CallStreamObserver outboundObserver, long deadlineSeconds) { + Phaser phaser, + CallStreamObserver outboundObserver, + long deadlineSeconds, + int messagesBetweenIsReadyChecks) { this.phaser = phaser; this.outboundObserver = outboundObserver; this.deadlineSeconds = deadlineSeconds; + // We always let the first message pass through without blocking because it is performed under + // the StreamPool synchronized block and single header message isn't going to cause memory + // issues due to excessive buffering within grpc. + this.messagesBetweenIsReadyChecks = Math.max(1, messagesBetweenIsReadyChecks); } @Override public void onNext(T value) { - final int phase = phaser.getPhase(); + int awaitPhase = -1; long totalSecondsWaited = 0; long waitSeconds = 1; while (true) { try { - synchronized (outboundObserver) { - // We let the first message passthrough without blocking because it is performed under the - // StreamPool synchronized block and single message isn't going to cause memory issues due - // to excessive buffering within grpc. - if (firstMessage || outboundObserver.isReady()) { - firstMessage = false; + synchronized (lock) { + // We only check isReady periodically to effectively allow for increasing the outbound + // buffer periodically. This reduces the overhead of blocking while still restricting + // memory because there is a limited # of streams, and we have a max messages size of 2MB. + if (++messagesSinceReady <= messagesBetweenIsReadyChecks) { + outboundObserver.onNext(value); + return; + } + // If we awaited previously and timed out, wait for the same phase. Otherwise we're + // careful to observe the phase before observing isReady. + if (awaitPhase < 0) { + awaitPhase = phaser.getPhase(); + } + if (outboundObserver.isReady()) { + messagesSinceReady = 0; outboundObserver.onNext(value); return; } } - // A callback has been registered to advance the phaser whenever the observer transitions to - // is ready. Since we are waiting for a phase observed before the outboundObserver.isReady() - // returned false, we expect it to advance after the channel has become ready. This doesn't - // always seem to be the case (despite documentation stating otherwise) so we poll - // periodically and enforce an overall timeout related to the stream deadline. - phaser.awaitAdvanceInterruptibly(phase, waitSeconds, TimeUnit.SECONDS); - synchronized (outboundObserver) { + // A callback has been registered to advance the phaser whenever the observer + // transitions to is ready. Since we are waiting for a phase observed before the + // outboundObserver.isReady() returned false, we expect it to advance after the + // channel has become ready. This doesn't always seem to be the case (despite + // documentation stating otherwise) so we poll periodically and enforce an overall + // timeout related to the stream deadline. + phaser.awaitAdvanceInterruptibly(awaitPhase, waitSeconds, TimeUnit.SECONDS); + synchronized (lock) { + messagesSinceReady = 0; outboundObserver.onNext(value); return; } @@ -88,33 +109,33 @@ public void onNext(T value) { if (totalSecondsWaited > deadlineSeconds) { LOG.error( "Exceeded timeout waiting for the outboundObserver to become ready meaning " - + "that the streamdeadline was not respected."); + + "that the stream deadline was not respected."); throw new RuntimeException(e); } + if (totalSecondsWaited > 30) { + LOG.info( + "Output channel stalled for {}s, outbound thread {}.", + totalSecondsWaited, + Thread.currentThread().getName()); + } waitSeconds = waitSeconds * 2; } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new RuntimeException(e); } - if (totalSecondsWaited > 30) { - LOG.info( - "Output channel stalled for {}s, outbound thread {}.", - totalSecondsWaited, - Thread.currentThread().getName()); - } } } @Override public void onError(Throwable t) { - synchronized (outboundObserver) { + synchronized (lock) { outboundObserver.onError(t); } } @Override public void onCompleted() { - synchronized (outboundObserver) { + synchronized (lock) { outboundObserver.onCompleted(); } } diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/GrpcWindmillServer.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/GrpcWindmillServer.java index 6a5e608f5b8f7..d7ae8b2b73471 100644 --- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/GrpcWindmillServer.java +++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/GrpcWindmillServer.java @@ -625,7 +625,8 @@ private static long uniqueId() { */ private abstract class AbstractWindmillStream implements WindmillStream { private final StreamObserverFactory streamObserverFactory = - StreamObserverFactory.direct(streamDeadlineSeconds * 2); + StreamObserverFactory.direct( + streamDeadlineSeconds * 2, options.getWindmillMessagesBetweenIsReadyChecks()); private final Function, StreamObserver> clientFactory; private final Executor executor = Executors.newSingleThreadExecutor( diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/StreamObserverFactory.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/StreamObserverFactory.java index e3f344c1fb528..fe8878f8f52f5 100644 --- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/StreamObserverFactory.java +++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/StreamObserverFactory.java @@ -28,8 +28,9 @@ * to use. */ public abstract class StreamObserverFactory { - public static StreamObserverFactory direct(long deadlineSeconds) { - return new Direct(deadlineSeconds); + public static StreamObserverFactory direct( + long deadlineSeconds, int messagesBetweenIsReadyChecks) { + return new Direct(deadlineSeconds, messagesBetweenIsReadyChecks); } public abstract StreamObserver from( @@ -38,9 +39,11 @@ public abstract StreamObserver from( private static class Direct extends StreamObserverFactory { private final long deadlineSeconds; + private final int messagesBetweenIsReadyChecks; - Direct(long deadlineSeconds) { + Direct(long deadlineSeconds, int messagesBetweenIsReadyChecks) { this.deadlineSeconds = deadlineSeconds; + this.messagesBetweenIsReadyChecks = messagesBetweenIsReadyChecks; } @Override @@ -53,7 +56,8 @@ public StreamObserver from( clientFactory.apply( new ForwardingClientResponseObserver( inboundObserver, phaser::arrive, phaser::forceTermination)); - return new DirectStreamObserver<>(phaser, outboundObserver, deadlineSeconds); + return new DirectStreamObserver<>( + phaser, outboundObserver, deadlineSeconds, messagesBetweenIsReadyChecks); } } } diff --git a/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/SparkStructuredStreamingPipelineResult.java b/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/SparkStructuredStreamingPipelineResult.java index 404c2bf570431..b490ff875c316 100644 --- a/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/SparkStructuredStreamingPipelineResult.java +++ b/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/SparkStructuredStreamingPipelineResult.java @@ -18,6 +18,7 @@ package org.apache.beam.runners.spark.structuredstreaming; import static org.apache.beam.runners.core.metrics.MetricsContainerStepMap.asAttemptedOnlyMetricResults; +import static org.sparkproject.guava.base.Objects.firstNonNull; import java.io.IOException; import java.util.concurrent.ExecutionException; @@ -33,21 +34,19 @@ import org.apache.spark.SparkException; import org.joda.time.Duration; -/** Represents a Spark pipeline execution result. */ -@SuppressWarnings({ - "rawtypes", // TODO(https://github.com/apache/beam/issues/20447) - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) public class SparkStructuredStreamingPipelineResult implements PipelineResult { - final Future pipelineExecution; - @Nullable final Runnable onTerminalState; - - PipelineResult.State state; + private final Future pipelineExecution; + private final MetricsAccumulator metrics; + private @Nullable final Runnable onTerminalState; + private PipelineResult.State state; SparkStructuredStreamingPipelineResult( - final Future pipelineExecution, @Nullable final Runnable onTerminalState) { + Future pipelineExecution, + MetricsAccumulator metrics, + @Nullable final Runnable onTerminalState) { this.pipelineExecution = pipelineExecution; + this.metrics = metrics; this.onTerminalState = onTerminalState; // pipelineExecution is expected to have started executing eagerly. this.state = State.RUNNING; @@ -57,21 +56,19 @@ private static RuntimeException runtimeExceptionFrom(final Throwable e) { return (e instanceof RuntimeException) ? (RuntimeException) e : new RuntimeException(e); } - private static RuntimeException beamExceptionFrom(final Throwable e) { - // Scala doesn't declare checked exceptions in the bytecode, and the Java compiler - // won't let you catch something that is not declared, so we can't catch - // SparkException directly, instead we do an instanceof check. - - if (e instanceof SparkException) { - if (e.getCause() != null && e.getCause() instanceof UserCodeException) { - UserCodeException userException = (UserCodeException) e.getCause(); - return new Pipeline.PipelineExecutionException(userException.getCause()); - } else if (e.getCause() != null) { - return new Pipeline.PipelineExecutionException(e.getCause()); - } + /** + * Unwrap cause of SparkException or UserCodeException as PipelineExecutionException. Otherwise, + * return {@code exception} as RuntimeException. + */ + private static RuntimeException unwrapCause(Throwable exception) { + Throwable next = exception; + while (next != null && (next instanceof SparkException || next instanceof UserCodeException)) { + exception = next; + next = next.getCause(); } - - return runtimeExceptionFrom(e); + return exception == next + ? runtimeExceptionFrom(exception) + : new Pipeline.PipelineExecutionException(firstNonNull(next, exception)); } private State awaitTermination(Duration duration) @@ -96,15 +93,14 @@ public State waitUntilFinish(final Duration duration) { try { State finishState = awaitTermination(duration); offerNewState(finishState); - } catch (final TimeoutException e) { // ignore. } catch (final ExecutionException e) { offerNewState(PipelineResult.State.FAILED); - throw beamExceptionFrom(e.getCause()); + throw unwrapCause(firstNonNull(e.getCause(), e)); } catch (final Exception e) { offerNewState(PipelineResult.State.FAILED); - throw beamExceptionFrom(e); + throw unwrapCause(e); } return state; @@ -112,7 +108,7 @@ public State waitUntilFinish(final Duration duration) { @Override public MetricResults metrics() { - return asAttemptedOnlyMetricResults(MetricsAccumulator.getInstance().value()); + return asAttemptedOnlyMetricResults(metrics.value()); } @Override @@ -128,7 +124,7 @@ private void offerNewState(State newState) { try { onTerminalState.run(); } catch (Exception e) { - throw beamExceptionFrom(e); + throw unwrapCause(e); } } } diff --git a/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/SparkStructuredStreamingRunner.java b/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/SparkStructuredStreamingRunner.java index bca1bdc2a2a45..3b9f96cdb7e00 100644 --- a/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/SparkStructuredStreamingRunner.java +++ b/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/SparkStructuredStreamingRunner.java @@ -28,6 +28,7 @@ import org.apache.beam.runners.core.construction.SplittableParDo; import org.apache.beam.runners.core.construction.graph.ProjectionPushdownOptimizer; import org.apache.beam.runners.core.metrics.MetricsPusher; +import org.apache.beam.runners.core.metrics.NoOpMetricsSink; import org.apache.beam.runners.spark.structuredstreaming.metrics.MetricsAccumulator; import org.apache.beam.runners.spark.structuredstreaming.metrics.SparkBeamMetricSource; import org.apache.beam.runners.spark.structuredstreaming.translation.EvaluationContext; @@ -44,7 +45,6 @@ import org.apache.beam.sdk.options.PipelineOptionsFactory; import org.apache.beam.sdk.options.PipelineOptionsValidator; import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.util.concurrent.ThreadFactoryBuilder; -import org.apache.spark.SparkContext; import org.apache.spark.SparkEnv$; import org.apache.spark.metrics.MetricsSystem; import org.apache.spark.sql.SparkSession; @@ -139,6 +139,7 @@ private SparkStructuredStreamingRunner(SparkStructuredStreamingPipelineOptions o @Override public SparkStructuredStreamingPipelineResult run(final Pipeline pipeline) { MetricsEnvironment.setMetricsSupported(true); + MetricsAccumulator.clear(); LOG.info( "*** SparkStructuredStreamingRunner is based on spark structured streaming framework and is no more \n" @@ -150,23 +151,21 @@ public SparkStructuredStreamingPipelineResult run(final Pipeline pipeline) { checkArgument(!options.isStreaming(), "Streaming is not supported."); final SparkSession sparkSession = SparkSessionFactory.getOrCreateSession(options); - initMetrics(sparkSession.sparkContext()); + final MetricsAccumulator metrics = MetricsAccumulator.getInstance(sparkSession); final Future submissionFuture = runAsync(() -> translatePipeline(sparkSession, pipeline).evaluate()); final SparkStructuredStreamingPipelineResult result = new SparkStructuredStreamingPipelineResult( - submissionFuture, stopSparkSession(sparkSession, options.getUseActiveSparkSession())); + submissionFuture, + metrics, + sparkStopFn(sparkSession, options.getUseActiveSparkSession())); if (options.getEnableSparkMetricSinks()) { - registerMetricsSource(options.getAppName()); + registerMetricsSource(options.getAppName(), metrics); } - - MetricsPusher metricsPusher = - new MetricsPusher( - MetricsAccumulator.getInstance().value(), options.as(MetricsOptions.class), result); - metricsPusher.start(); + startMetricsPusher(result, metrics); if (options.getTestMode()) { result.waitUntilFinish(); @@ -195,19 +194,23 @@ private EvaluationContext translatePipeline(SparkSession sparkSession, Pipeline return pipelineTranslator.translate(pipeline, sparkSession, options); } - private void registerMetricsSource(String appName) { + private void registerMetricsSource(String appName, MetricsAccumulator metrics) { final MetricsSystem metricsSystem = SparkEnv$.MODULE$.get().metricsSystem(); - final SparkBeamMetricSource metricsSource = new SparkBeamMetricSource(appName + ".Beam"); + final SparkBeamMetricSource metricsSource = + new SparkBeamMetricSource(appName + ".Beam", metrics); // re-register the metrics in case of context re-use metricsSystem.removeSource(metricsSource); metricsSystem.registerSource(metricsSource); } - /** Init Metrics/Aggregators accumulators. This method is idempotent. */ - private static void initMetrics(SparkContext sparkContext) { - // Clear and init metrics accumulators - MetricsAccumulator.clear(); - MetricsAccumulator.init(sparkContext); + /** Start {@link MetricsPusher} if sink is set. */ + private void startMetricsPusher( + SparkStructuredStreamingPipelineResult result, MetricsAccumulator metrics) { + MetricsOptions metricsOpts = options.as(MetricsOptions.class); + Class metricsSink = metricsOpts.getMetricsSink(); + if (metricsSink != null && !metricsSink.equals(NoOpMetricsSink.class)) { + new MetricsPusher(metrics.value(), metricsOpts, result).start(); + } } private static Future runAsync(Runnable task) { @@ -222,7 +225,7 @@ private static Future runAsync(Runnable task) { return future; } - private static @Nullable Runnable stopSparkSession(SparkSession session, boolean isProvided) { + private static @Nullable Runnable sparkStopFn(SparkSession session, boolean isProvided) { return !isProvided ? () -> session.stop() : null; } } diff --git a/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/metrics/MetricsAccumulator.java b/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/metrics/MetricsAccumulator.java index a07ce967422b6..6edddff5831c6 100644 --- a/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/metrics/MetricsAccumulator.java +++ b/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/metrics/MetricsAccumulator.java @@ -19,51 +19,86 @@ import org.apache.beam.runners.core.metrics.MetricsContainerStepMap; import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting; -import org.apache.spark.SparkContext; +import org.apache.spark.sql.SparkSession; import org.apache.spark.util.AccumulatorV2; +import org.checkerframework.checker.nullness.qual.Nullable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** - * For resilience, {@link AccumulatorV2 Accumulators} are required to be wrapped in a Singleton. + * {@link AccumulatorV2} for Beam metrics captured in {@link MetricsContainerStepMap}. * * @see accumulatorsV2 + * href="https://spark.apache.org/docs/latest/streaming-programming-guide.html#accumulators-broadcast-variables-and-checkpoints">accumulatorsV2 */ -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public class MetricsAccumulator { +public class MetricsAccumulator + extends AccumulatorV2 { private static final Logger LOG = LoggerFactory.getLogger(MetricsAccumulator.class); - + private static final MetricsContainerStepMap EMPTY = new SparkMetricsContainerStepMap(); private static final String ACCUMULATOR_NAME = "Beam.Metrics"; - private static volatile MetricsContainerStepMapAccumulator instance = null; + private static volatile @Nullable MetricsAccumulator instance = null; - /** Init metrics accumulator if it has not been initiated. This method is idempotent. */ - public static void init(SparkContext sparkContext) { - if (instance == null) { - synchronized (MetricsAccumulator.class) { - if (instance == null) { - MetricsContainerStepMap metricsContainerStepMap = new SparkMetricsContainerStepMap(); - MetricsContainerStepMapAccumulator accumulator = - new MetricsContainerStepMapAccumulator(metricsContainerStepMap); - sparkContext.register(accumulator, ACCUMULATOR_NAME); + private MetricsContainerStepMap value; - instance = accumulator; - } - } - LOG.info("Instantiated metrics accumulator: {}", instance.value()); - } else { - instance.reset(); - } + public MetricsAccumulator() { + value = new SparkMetricsContainerStepMap(); + } + + private MetricsAccumulator(MetricsContainerStepMap value) { + this.value = value; + } + + @Override + public boolean isZero() { + return value.equals(EMPTY); + } + + @Override + public MetricsAccumulator copy() { + MetricsContainerStepMap newContainer = new SparkMetricsContainerStepMap(); + newContainer.updateAll(value); + return new MetricsAccumulator(newContainer); + } + + @Override + public void reset() { + value = new SparkMetricsContainerStepMap(); + } + + @Override + public void add(MetricsContainerStepMap other) { + value.updateAll(other); + } + + @Override + public void merge(AccumulatorV2 other) { + value.updateAll(other.value()); + } + + @Override + public MetricsContainerStepMap value() { + return value; } - public static MetricsContainerStepMapAccumulator getInstance() { - if (instance == null) { - throw new IllegalStateException("Metrics accumulator has not been instantiated"); - } else { - return instance; + /** + * Get the {@link MetricsAccumulator} on this driver. If there's no such accumulator yet, it will + * be created and registered using the provided {@link SparkSession}. + */ + public static MetricsAccumulator getInstance(SparkSession session) { + MetricsAccumulator current = instance; + if (current != null) { + return current; + } + synchronized (MetricsAccumulator.class) { + MetricsAccumulator accumulator = instance; + if (accumulator == null) { + accumulator = new MetricsAccumulator(); + session.sparkContext().register(accumulator, ACCUMULATOR_NAME); + instance = accumulator; + LOG.info("Instantiated metrics accumulator: {}", instance.value()); + } + return accumulator; } } @@ -73,4 +108,26 @@ public static void clear() { instance = null; } } + + /** + * Sole purpose of this class is to override {@link #toString()} of {@link + * MetricsContainerStepMap} in order to show meaningful metrics in Spark Web Interface. + */ + private static class SparkMetricsContainerStepMap extends MetricsContainerStepMap { + + @Override + public String toString() { + return asAttemptedOnlyMetricResults(this).toString(); + } + + @Override + public boolean equals(@Nullable Object o) { + return super.equals(o); + } + + @Override + public int hashCode() { + return super.hashCode(); + } + } } diff --git a/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/metrics/MetricsContainerStepMapAccumulator.java b/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/metrics/MetricsContainerStepMapAccumulator.java deleted file mode 100644 index 2d2a4ea175409..0000000000000 --- a/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/metrics/MetricsContainerStepMapAccumulator.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.runners.spark.structuredstreaming.metrics; - -import org.apache.beam.runners.core.metrics.MetricsContainerStepMap; -import org.apache.spark.util.AccumulatorV2; - -/** {@link AccumulatorV2} implementation for {@link MetricsContainerStepMap}. */ -public class MetricsContainerStepMapAccumulator - extends AccumulatorV2 { - private static final MetricsContainerStepMap empty = new SparkMetricsContainerStepMap(); - - private MetricsContainerStepMap value; - - public MetricsContainerStepMapAccumulator(MetricsContainerStepMap value) { - this.value = value; - } - - @Override - public boolean isZero() { - return value.equals(empty); - } - - @Override - public MetricsContainerStepMapAccumulator copy() { - MetricsContainerStepMap newContainer = new SparkMetricsContainerStepMap(); - newContainer.updateAll(value); - return new MetricsContainerStepMapAccumulator(newContainer); - } - - @Override - public void reset() { - this.value = new SparkMetricsContainerStepMap(); - } - - @Override - public void add(MetricsContainerStepMap other) { - this.value.updateAll(other); - } - - @Override - public void merge(AccumulatorV2 other) { - this.value.updateAll(other.value()); - } - - @Override - public MetricsContainerStepMap value() { - return this.value; - } -} diff --git a/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/metrics/SparkBeamMetric.java b/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/metrics/SparkBeamMetric.java index 0cecae4a25b9b..1754ac4d1678e 100644 --- a/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/metrics/SparkBeamMetric.java +++ b/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/metrics/SparkBeamMetric.java @@ -28,6 +28,7 @@ import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nullable; +import org.apache.beam.runners.core.metrics.MetricsContainerStepMap; import org.apache.beam.sdk.metrics.DistributionResult; import org.apache.beam.sdk.metrics.GaugeResult; import org.apache.beam.sdk.metrics.MetricKey; @@ -40,17 +41,22 @@ import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Streams; /** - * An adapter between the {@link SparkMetricsContainerStepMap} and the Dropwizard {@link Metric} + * An adapter between the {@link MetricsContainerStepMap} and the Dropwizard {@link Metric} * interface. */ class SparkBeamMetric extends BeamMetricSet { private static final String ILLEGAL_CHARACTERS = "[^A-Za-z0-9-]"; + private final MetricsAccumulator metrics; + + SparkBeamMetric(MetricsAccumulator metrics) { + this.metrics = metrics; + } + @Override public Map> getValue(String prefix, MetricFilter filter) { - MetricResults metricResults = - asAttemptedOnlyMetricResults(MetricsAccumulator.getInstance().value()); + MetricResults metricResults = asAttemptedOnlyMetricResults(metrics.value()); Map> metrics = new HashMap<>(); MetricQueryResults allMetrics = metricResults.allMetrics(); for (MetricResult metricResult : allMetrics.getCounters()) { diff --git a/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/metrics/SparkBeamMetricSource.java b/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/metrics/SparkBeamMetricSource.java index ed938ac841381..8a1e980ae0c5e 100644 --- a/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/metrics/SparkBeamMetricSource.java +++ b/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/metrics/SparkBeamMetricSource.java @@ -29,9 +29,9 @@ public class SparkBeamMetricSource implements Source { private final MetricRegistry metricRegistry = new MetricRegistry(); - public SparkBeamMetricSource(final String name) { + public SparkBeamMetricSource(String name, MetricsAccumulator metrics) { this.name = name; - metricRegistry.register(name, new SparkBeamMetric()); + metricRegistry.register(name, new SparkBeamMetric(metrics)); } @Override diff --git a/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/EvaluationContext.java b/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/EvaluationContext.java index e0f353e5076af..547425546d7cf 100644 --- a/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/EvaluationContext.java +++ b/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/EvaluationContext.java @@ -22,7 +22,6 @@ import org.apache.beam.sdk.annotations.Internal; import org.apache.beam.sdk.util.WindowedValue; import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Throwables; -import org.apache.spark.api.java.function.ForeachFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.execution.ExplainMode; @@ -82,8 +81,8 @@ public void evaluate() { public static void evaluate(String name, Dataset ds) { long startMs = System.currentTimeMillis(); try { - // force evaluation using a dummy foreach action - ds.foreach(NOOP); + // force computation using noop format + ds.write().mode("overwrite").format("noop").save(); LOG.info("Evaluated dataset {} in {}", name, durationSince(startMs)); } catch (RuntimeException e) { LOG.error("Failed to evaluate dataset {}: {}", name, Throwables.getRootCause(e).getMessage()); @@ -114,7 +113,4 @@ public SparkSession getSparkSession() { private static String durationSince(long startMs) { return Utils.msDurationToString(System.currentTimeMillis() - startMs); } - - @SuppressWarnings("rawtypes") - private static final ForeachFunction NOOP = obj -> {}; } diff --git a/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/PipelineTranslator.java b/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/PipelineTranslator.java index 8d751d5d81733..05f542702f19f 100644 --- a/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/PipelineTranslator.java +++ b/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/PipelineTranslator.java @@ -152,6 +152,8 @@ public String name() { public interface TranslationState extends EncoderProvider { Dataset> getDataset(PCollection pCollection); + boolean isLeave(PCollection pCollection); + void putDataset( PCollection pCollection, Dataset> dataset, boolean cache); @@ -256,6 +258,11 @@ public void putDataset( } } + @Override + public boolean isLeave(PCollection pCollection) { + return getResult(pCollection).dependentTransforms.isEmpty(); + } + @Override public Broadcast> getSideInputBroadcast( PCollection pCollection, SideInputValues.Loader loader) { diff --git a/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/SparkSessionFactory.java b/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/SparkSessionFactory.java index a9fcd8bde74bb..6632f4d707a1b 100644 --- a/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/SparkSessionFactory.java +++ b/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/SparkSessionFactory.java @@ -79,6 +79,7 @@ import org.apache.spark.serializer.KryoSerializer; import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.catalyst.InternalRow; +import org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTaskResult; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -158,6 +159,9 @@ public void registerClasses(Kryo kryo) { kryo.register(HashMap.class); kryo.register(ArrayList.class); + // support writing noop format + kryo.register(DataWritingSparkTaskResult.class); + // TODO find more efficient ways kryo.register(SerializablePipelineOptions.class, new JavaSerializer()); diff --git a/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/TransformTranslator.java b/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/TransformTranslator.java index 8a3c7579f541f..e0bbb2af820e4 100644 --- a/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/TransformTranslator.java +++ b/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/TransformTranslator.java @@ -149,6 +149,11 @@ public void putDataset( state.putDataset(pCollection, dataset, cache); } + @Override + public boolean isLeave(PCollection pCollection) { + return state.isLeave(pCollection); + } + @Override public Supplier getOptionsSupplier() { return state.getOptionsSupplier(); diff --git a/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/DoFnPartitionIteratorFactory.java b/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/DoFnPartitionIteratorFactory.java index c760efd229c8e..df844cc9f116e 100644 --- a/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/DoFnPartitionIteratorFactory.java +++ b/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/DoFnPartitionIteratorFactory.java @@ -19,7 +19,6 @@ import static org.apache.beam.runners.spark.structuredstreaming.translation.utils.ScalaInterop.scalaIterator; import static org.apache.beam.runners.spark.structuredstreaming.translation.utils.ScalaInterop.tuple; -import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull; import java.io.Serializable; import java.util.ArrayDeque; @@ -61,23 +60,26 @@ */ abstract class DoFnPartitionIteratorFactory implements Function1>, Iterator>, Serializable { - private final String stepName; - private final DoFn doFn; - private final DoFnSchemaInformation doFnSchema; - private final Supplier options; - private final Coder coder; - private final WindowingStrategy windowingStrategy; - private final TupleTag mainOutput; - private final List> additionalOutputs; - private final Map, Coder> outputCoders; - private final Map> sideInputs; - private final SideInputReader sideInputReader; + protected final String stepName; + protected final DoFn doFn; + protected final DoFnSchemaInformation doFnSchema; + protected final Supplier options; + protected final Coder coder; + protected final WindowingStrategy windowingStrategy; + protected final TupleTag mainOutput; + protected final List> additionalOutputs; + protected final Map, Coder> outputCoders; + protected final Map> sideInputs; + protected final SideInputReader sideInputReader; + + private final MetricsAccumulator metrics; private DoFnPartitionIteratorFactory( AppliedPTransform, ?, MultiOutput> appliedPT, Supplier options, PCollection input, - SideInputReader sideInputReader) { + SideInputReader sideInputReader, + MetricsAccumulator metrics) { this.stepName = appliedPT.getFullName(); this.doFn = appliedPT.getTransform().getFn(); this.doFnSchema = ParDoTranslation.getSchemaInformation(appliedPT); @@ -89,6 +91,7 @@ private DoFnPartitionIteratorFactory( this.outputCoders = outputCoders(appliedPT.getOutputs()); this.sideInputs = appliedPT.getTransform().getSideInputs(); this.sideInputReader = sideInputReader; + this.metrics = metrics; } /** @@ -99,8 +102,9 @@ private DoFnPartitionIteratorFactory( AppliedPTransform, ?, MultiOutput> appliedPT, Supplier options, PCollection input, - SideInputReader sideInputReader) { - return new SingleOut<>(appliedPT, options, input, sideInputReader); + SideInputReader sideInputReader, + MetricsAccumulator metrics) { + return new SingleOut<>(appliedPT, options, input, sideInputReader, metrics); } /** @@ -114,8 +118,9 @@ private DoFnPartitionIteratorFactory( Supplier options, PCollection input, SideInputReader sideInputReader, + MetricsAccumulator metrics, Map tagColIdx) { - return new MultiOut<>(appliedPT, options, input, sideInputReader, tagColIdx); + return new MultiOut<>(appliedPT, options, input, sideInputReader, metrics, tagColIdx); } @Override @@ -138,8 +143,9 @@ private SingleOut( AppliedPTransform, ?, MultiOutput> appliedPT, Supplier options, PCollection input, - SideInputReader sideInputReader) { - super(appliedPT, options, input, sideInputReader); + SideInputReader sideInputReader, + MetricsAccumulator metrics) { + super(appliedPT, options, input, sideInputReader, metrics); } @Override @@ -147,7 +153,11 @@ DoFnRunners.OutputManager outputManager(Deque> buffer) { return new DoFnRunners.OutputManager() { @Override public void output(TupleTag tag, WindowedValue output) { - buffer.add((WindowedValue) output); + // SingleOut will only ever emmit the mainOutput. Though, there might be additional + // outputs which are skipped if unused to avoid caching. + if (mainOutput.equals(tag)) { + buffer.add((WindowedValue) output); + } } }; } @@ -167,8 +177,9 @@ public MultiOut( Supplier options, PCollection input, SideInputReader sideInputReader, + MetricsAccumulator metrics, Map tagColIdx) { - super(appliedPT, options, input, sideInputReader); + super(appliedPT, options, input, sideInputReader, metrics); this.tagColIdx = tagColIdx; } @@ -177,8 +188,11 @@ DoFnRunners.OutputManager outputManager(Deque void output(TupleTag tag, WindowedValue output) { - Integer columnIdx = checkStateNotNull(tagColIdx.get(tag.getId()), "Unknown tag %s", tag); - buffer.add(tuple(columnIdx, (WindowedValue) output)); + // Additional unused outputs can be skipped here. In that case columnIdx is null. + Integer columnIdx = tagColIdx.get(tag.getId()); + if (columnIdx != null) { + buffer.add(tuple(columnIdx, (WindowedValue) output)); + } } }; } @@ -240,7 +254,7 @@ protected OutT computeNext() { private DoFnRunner simpleRunner(Deque buffer) { return DoFnRunners.simpleRunner( options.get(), - (DoFn) doFn, + doFn, CachedSideInputReader.of(sideInputReader, sideInputs.values()), outputManager(buffer), mainOutput, @@ -254,7 +268,7 @@ private DoFnRunner simpleRunner(Deque buffer) { } private DoFnRunner metricsRunner(DoFnRunner runner) { - return new DoFnRunnerWithMetrics<>(stepName, runner, MetricsAccumulator.getInstance()); + return new DoFnRunnerWithMetrics<>(stepName, runner, metrics); } private static Map, Coder> outputCoders(Map, PCollection> outputs) { diff --git a/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/DoFnRunnerWithMetrics.java b/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/DoFnRunnerWithMetrics.java index b80ec87d3c56d..f6b98a61e3d07 100644 --- a/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/DoFnRunnerWithMetrics.java +++ b/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/DoFnRunnerWithMetrics.java @@ -20,8 +20,7 @@ import java.io.Closeable; import java.io.IOException; import org.apache.beam.runners.core.DoFnRunner; -import org.apache.beam.runners.core.metrics.MetricsContainerImpl; -import org.apache.beam.runners.spark.structuredstreaming.metrics.MetricsContainerStepMapAccumulator; +import org.apache.beam.runners.spark.structuredstreaming.metrics.MetricsAccumulator; import org.apache.beam.sdk.metrics.MetricsContainer; import org.apache.beam.sdk.metrics.MetricsEnvironment; import org.apache.beam.sdk.state.TimeDomain; @@ -30,19 +29,19 @@ import org.apache.beam.sdk.util.WindowedValue; import org.joda.time.Instant; -/** DoFnRunner decorator which registers {@link MetricsContainerImpl}. */ +/** DoFnRunner decorator which registers {@link MetricsContainer}. */ class DoFnRunnerWithMetrics implements DoFnRunner { private final DoFnRunner delegate; - private final String stepName; - private final MetricsContainerStepMapAccumulator metricsAccum; + private final MetricsContainer metrics; DoFnRunnerWithMetrics( - String stepName, - DoFnRunner delegate, - MetricsContainerStepMapAccumulator metricsAccum) { + String stepName, DoFnRunner delegate, MetricsAccumulator metricsAccum) { + this(delegate, metricsAccum.value().getContainer(stepName)); + } + + private DoFnRunnerWithMetrics(DoFnRunner delegate, MetricsContainer metrics) { this.delegate = delegate; - this.stepName = stepName; - this.metricsAccum = metricsAccum; + this.metrics = metrics; } @Override @@ -52,7 +51,7 @@ public DoFn getFn() { @Override public void startBundle() { - try (Closeable ignored = MetricsEnvironment.scopedMetricsContainer(metricsContainer())) { + try (Closeable ignored = MetricsEnvironment.scopedMetricsContainer(metrics)) { delegate.startBundle(); } catch (IOException e) { throw new RuntimeException(e); @@ -61,7 +60,7 @@ public void startBundle() { @Override public void processElement(final WindowedValue elem) { - try (Closeable ignored = MetricsEnvironment.scopedMetricsContainer(metricsContainer())) { + try (Closeable ignored = MetricsEnvironment.scopedMetricsContainer(metrics)) { delegate.processElement(elem); } catch (IOException e) { throw new RuntimeException(e); @@ -77,7 +76,7 @@ public void onTimer( final Instant timestamp, final Instant outputTimestamp, final TimeDomain timeDomain) { - try (Closeable ignored = MetricsEnvironment.scopedMetricsContainer(metricsContainer())) { + try (Closeable ignored = MetricsEnvironment.scopedMetricsContainer(metrics)) { delegate.onTimer(timerId, timerFamilyId, key, window, timestamp, outputTimestamp, timeDomain); } catch (IOException e) { throw new RuntimeException(e); @@ -86,7 +85,7 @@ public void onTimer( @Override public void finishBundle() { - try (Closeable ignored = MetricsEnvironment.scopedMetricsContainer(metricsContainer())) { + try (Closeable ignored = MetricsEnvironment.scopedMetricsContainer(metrics)) { delegate.finishBundle(); } catch (IOException e) { throw new RuntimeException(e); @@ -97,8 +96,4 @@ public void finishBundle() { public void onWindowExpiration(BoundedWindow window, Instant timestamp, KeyT key) { delegate.onWindowExpiration(window, timestamp, key); } - - private MetricsContainer metricsContainer() { - return metricsAccum.value().getContainer(stepName); - } } diff --git a/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/ParDoTranslatorBatch.java b/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/ParDoTranslatorBatch.java index 3083ff5101b93..442522379305d 100644 --- a/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/ParDoTranslatorBatch.java +++ b/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/ParDoTranslatorBatch.java @@ -34,6 +34,7 @@ import org.apache.beam.runners.core.DoFnRunners; import org.apache.beam.runners.core.SideInputReader; import org.apache.beam.runners.spark.SparkCommonPipelineOptions; +import org.apache.beam.runners.spark.structuredstreaming.metrics.MetricsAccumulator; import org.apache.beam.runners.spark.structuredstreaming.translation.TransformTranslator; import org.apache.beam.runners.spark.structuredstreaming.translation.batch.functions.SideInputValues; import org.apache.beam.runners.spark.structuredstreaming.translation.batch.functions.SparkSideInputReader; @@ -110,11 +111,19 @@ public void translate(ParDo.MultiOutput transform, Context cxt) throws IOException { PCollection input = (PCollection) cxt.getInput(); - Map, PCollection> outputs = cxt.getOutputs(); Dataset> inputDs = cxt.getDataset(input); SideInputReader sideInputReader = createSideInputReader(transform.getSideInputs().values(), cxt); + MetricsAccumulator metrics = MetricsAccumulator.getInstance(cxt.getSparkSession()); + + TupleTag mainOut = transform.getMainOutputTag(); + // Filter out unconsumed PCollections (except mainOut) to potentially avoid the costs of caching + // if not really beneficial. + Map, PCollection> outputs = + Maps.filterEntries( + cxt.getOutputs(), + e -> e != null && (e.getKey().equals(mainOut) || !cxt.isLeave(e.getValue()))); if (outputs.size() > 1) { // In case of multiple outputs / tags, map each tag to a column by index. @@ -128,6 +137,7 @@ public void translate(ParDo.MultiOutput transform, Context cxt) cxt.getOptionsSupplier(), input, sideInputReader, + metrics, tagColIdx); // FIXME What's the strategy to unpersist Datasets / RDDs? @@ -176,10 +186,10 @@ public void translate(ParDo.MultiOutput transform, Context cxt) } } } else { - PCollection output = cxt.getOutput(transform.getMainOutputTag()); + PCollection output = cxt.getOutput(mainOut); DoFnPartitionIteratorFactory> doFnMapper = DoFnPartitionIteratorFactory.singleOutput( - cxt.getCurrentTransform(), cxt.getOptionsSupplier(), input, sideInputReader); + cxt.getCurrentTransform(), cxt.getOptionsSupplier(), input, sideInputReader, metrics); Dataset> mainDS = inputDs.mapPartitions(doFnMapper, cxt.windowedEncoder(output.getCoder())); diff --git a/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/SparkSessionRule.java b/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/SparkSessionRule.java index 33eef26ddddac..278fd012d77e4 100644 --- a/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/SparkSessionRule.java +++ b/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/SparkSessionRule.java @@ -18,6 +18,7 @@ package org.apache.beam.runners.spark.structuredstreaming; import static java.util.stream.Collectors.toMap; +import static org.apache.beam.runners.spark.structuredstreaming.translation.utils.ScalaInterop.fun1; import java.io.Serializable; import java.util.Arrays; @@ -29,6 +30,7 @@ import org.apache.beam.sdk.values.KV; import org.apache.spark.sql.SparkSession; import org.junit.rules.ExternalResource; +import org.junit.rules.TestRule; import org.junit.runner.Description; import org.junit.runners.model.Statement; @@ -69,6 +71,24 @@ public PipelineOptions configure(PipelineOptions options) { return opts; } + /** {@code true} if sessions contains cached Datasets or RDDs. */ + public boolean hasCachedData() { + return !session.sharedState().cacheManager().isEmpty() + || !session.sparkContext().getPersistentRDDs().isEmpty(); + } + + public TestRule clearCache() { + return new ExternalResource() { + @Override + protected void after() { + // clear cached datasets + session.sharedState().cacheManager().clearCache(); + // clear cached RDDs + session.sparkContext().getPersistentRDDs().foreach(fun1(t -> t._2.unpersist(true))); + } + }; + } + @Override public Statement apply(Statement base, Description description) { builder.appName(description.getDisplayName()); diff --git a/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/CombineGloballyTest.java b/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/CombineGloballyTest.java index dca8b664bd3d3..cca192df9de35 100644 --- a/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/CombineGloballyTest.java +++ b/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/CombineGloballyTest.java @@ -18,10 +18,7 @@ package org.apache.beam.runners.spark.structuredstreaming.translation.batch; import java.io.Serializable; -import org.apache.beam.runners.spark.structuredstreaming.SparkStructuredStreamingPipelineOptions; -import org.apache.beam.runners.spark.structuredstreaming.SparkStructuredStreamingRunner; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.apache.beam.runners.spark.structuredstreaming.SparkSessionRule; import org.apache.beam.sdk.testing.PAssert; import org.apache.beam.sdk.testing.TestPipeline; import org.apache.beam.sdk.transforms.Combine; @@ -38,6 +35,7 @@ import org.apache.beam.sdk.values.TimestampedValue; import org.joda.time.Duration; import org.joda.time.Instant; +import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.runner.RunWith; @@ -48,15 +46,11 @@ */ @RunWith(JUnit4.class) public class CombineGloballyTest implements Serializable { - @Rule public transient TestPipeline pipeline = TestPipeline.fromOptions(testOptions()); + @ClassRule public static final SparkSessionRule SESSION = new SparkSessionRule(); - private static PipelineOptions testOptions() { - SparkStructuredStreamingPipelineOptions options = - PipelineOptionsFactory.create().as(SparkStructuredStreamingPipelineOptions.class); - options.setRunner(SparkStructuredStreamingRunner.class); - options.setTestMode(true); - return options; - } + @Rule + public transient TestPipeline pipeline = + TestPipeline.fromOptions(SESSION.createPipelineOptions()); @Test public void testCombineGlobally() { diff --git a/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/CombineGroupedValuesTest.java b/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/CombineGroupedValuesTest.java index cce3199d2c375..774186c1821c4 100644 --- a/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/CombineGroupedValuesTest.java +++ b/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/CombineGroupedValuesTest.java @@ -18,14 +18,11 @@ package org.apache.beam.runners.spark.structuredstreaming.translation.batch; import java.io.Serializable; -import org.apache.beam.runners.spark.structuredstreaming.SparkStructuredStreamingPipelineOptions; -import org.apache.beam.runners.spark.structuredstreaming.SparkStructuredStreamingRunner; +import org.apache.beam.runners.spark.structuredstreaming.SparkSessionRule; import org.apache.beam.sdk.coders.IterableCoder; import org.apache.beam.sdk.coders.KvCoder; import org.apache.beam.sdk.coders.StringUtf8Coder; import org.apache.beam.sdk.coders.VarIntCoder; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsFactory; import org.apache.beam.sdk.testing.PAssert; import org.apache.beam.sdk.testing.TestPipeline; import org.apache.beam.sdk.transforms.Combine; @@ -34,6 +31,7 @@ import org.apache.beam.sdk.values.KV; import org.apache.beam.sdk.values.PCollection; import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList; +import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.runner.RunWith; @@ -42,15 +40,11 @@ /** Test class for beam to spark {@link Combine#groupedValues} translation. */ @RunWith(JUnit4.class) public class CombineGroupedValuesTest implements Serializable { - @Rule public transient TestPipeline pipeline = TestPipeline.fromOptions(testOptions()); + @ClassRule public static final SparkSessionRule SESSION = new SparkSessionRule(); - private static PipelineOptions testOptions() { - SparkStructuredStreamingPipelineOptions options = - PipelineOptionsFactory.create().as(SparkStructuredStreamingPipelineOptions.class); - options.setRunner(SparkStructuredStreamingRunner.class); - options.setTestMode(true); - return options; - } + @Rule + public transient TestPipeline pipeline = + TestPipeline.fromOptions(SESSION.createPipelineOptions()); @Test public void testCombineGroupedValues() { diff --git a/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/CombinePerKeyTest.java b/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/CombinePerKeyTest.java index c8b25b3355dd0..5a2335a154eb1 100644 --- a/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/CombinePerKeyTest.java +++ b/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/CombinePerKeyTest.java @@ -20,10 +20,7 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.List; -import org.apache.beam.runners.spark.structuredstreaming.SparkStructuredStreamingPipelineOptions; -import org.apache.beam.runners.spark.structuredstreaming.SparkStructuredStreamingRunner; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.apache.beam.runners.spark.structuredstreaming.SparkSessionRule; import org.apache.beam.sdk.testing.PAssert; import org.apache.beam.sdk.testing.TestPipeline; import org.apache.beam.sdk.transforms.CombineFnBase; @@ -41,6 +38,7 @@ import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists; import org.joda.time.Duration; import org.joda.time.Instant; +import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.runner.RunWith; @@ -52,15 +50,11 @@ */ @RunWith(JUnit4.class) public class CombinePerKeyTest implements Serializable { - @Rule public transient TestPipeline pipeline = TestPipeline.fromOptions(testOptions()); + @ClassRule public static final SparkSessionRule SESSION = new SparkSessionRule(); - private static PipelineOptions testOptions() { - SparkStructuredStreamingPipelineOptions options = - PipelineOptionsFactory.create().as(SparkStructuredStreamingPipelineOptions.class); - options.setRunner(SparkStructuredStreamingRunner.class); - options.setTestMode(true); - return options; - } + @Rule + public transient TestPipeline pipeline = + TestPipeline.fromOptions(SESSION.createPipelineOptions()); @Test public void testCombinePerKey() { diff --git a/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/ComplexSourceTest.java b/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/ComplexSourceTest.java index 582a31a05a6af..4ba356f6ce750 100644 --- a/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/ComplexSourceTest.java +++ b/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/ComplexSourceTest.java @@ -25,11 +25,8 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.List; -import org.apache.beam.runners.spark.structuredstreaming.SparkStructuredStreamingPipelineOptions; -import org.apache.beam.runners.spark.structuredstreaming.SparkStructuredStreamingRunner; +import org.apache.beam.runners.spark.structuredstreaming.SparkSessionRule; import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsFactory; import org.apache.beam.sdk.testing.PAssert; import org.apache.beam.sdk.testing.TestPipeline; import org.apache.beam.sdk.values.PCollection; @@ -48,15 +45,11 @@ public class ComplexSourceTest implements Serializable { private static File file; private static List lines = createLines(30); - @Rule public transient TestPipeline pipeline = TestPipeline.fromOptions(testOptions()); + @ClassRule public static final SparkSessionRule SESSION = new SparkSessionRule(); - private static PipelineOptions testOptions() { - SparkStructuredStreamingPipelineOptions options = - PipelineOptionsFactory.create().as(SparkStructuredStreamingPipelineOptions.class); - options.setRunner(SparkStructuredStreamingRunner.class); - options.setTestMode(true); - return options; - } + @Rule + public transient TestPipeline pipeline = + TestPipeline.fromOptions(SESSION.createPipelineOptions()); @BeforeClass public static void beforeClass() throws IOException { diff --git a/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/FlattenTest.java b/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/FlattenTest.java index 50b443da9ae64..bf3774ba29ece 100644 --- a/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/FlattenTest.java +++ b/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/FlattenTest.java @@ -18,16 +18,14 @@ package org.apache.beam.runners.spark.structuredstreaming.translation.batch; import java.io.Serializable; -import org.apache.beam.runners.spark.structuredstreaming.SparkStructuredStreamingPipelineOptions; -import org.apache.beam.runners.spark.structuredstreaming.SparkStructuredStreamingRunner; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.apache.beam.runners.spark.structuredstreaming.SparkSessionRule; import org.apache.beam.sdk.testing.PAssert; import org.apache.beam.sdk.testing.TestPipeline; import org.apache.beam.sdk.transforms.Create; import org.apache.beam.sdk.transforms.Flatten; import org.apache.beam.sdk.values.PCollection; import org.apache.beam.sdk.values.PCollectionList; +import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.runner.RunWith; @@ -36,20 +34,18 @@ /** Test class for beam to spark flatten translation. */ @RunWith(JUnit4.class) public class FlattenTest implements Serializable { - @Rule public transient TestPipeline pipeline = TestPipeline.fromOptions(testOptions()); + @ClassRule public static final SparkSessionRule SESSION = new SparkSessionRule(); - private static PipelineOptions testOptions() { - SparkStructuredStreamingPipelineOptions options = - PipelineOptionsFactory.create().as(SparkStructuredStreamingPipelineOptions.class); - options.setRunner(SparkStructuredStreamingRunner.class); - options.setTestMode(true); - return options; - } + @Rule + public transient TestPipeline pipeline = + TestPipeline.fromOptions(SESSION.createPipelineOptions()); @Test public void testFlatten() { - PCollection input1 = pipeline.apply(Create.of(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)); - PCollection input2 = pipeline.apply(Create.of(11, 12, 13, 14, 15, 16, 17, 18, 19, 20)); + PCollection input1 = + pipeline.apply("input1", Create.of(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)); + PCollection input2 = + pipeline.apply("input2", Create.of(11, 12, 13, 14, 15, 16, 17, 18, 19, 20)); PCollectionList pcs = PCollectionList.of(input1).and(input2); PCollection input = pcs.apply(Flatten.pCollections()); PAssert.that(input) diff --git a/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/GroupByKeyTest.java b/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/GroupByKeyTest.java index 1a84466b319b1..b1aa300fc27a3 100644 --- a/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/GroupByKeyTest.java +++ b/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/GroupByKeyTest.java @@ -29,10 +29,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; -import org.apache.beam.runners.spark.structuredstreaming.SparkStructuredStreamingPipelineOptions; -import org.apache.beam.runners.spark.structuredstreaming.SparkStructuredStreamingRunner; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.apache.beam.runners.spark.structuredstreaming.SparkSessionRule; import org.apache.beam.sdk.testing.PAssert; import org.apache.beam.sdk.testing.SerializableMatcher; import org.apache.beam.sdk.testing.TestPipeline; @@ -51,6 +48,7 @@ import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists; import org.joda.time.Duration; import org.joda.time.Instant; +import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.runner.RunWith; @@ -59,15 +57,11 @@ /** Test class for beam to spark {@link ParDo} translation. */ @RunWith(JUnit4.class) public class GroupByKeyTest implements Serializable { - @Rule public transient TestPipeline pipeline = TestPipeline.fromOptions(testOptions()); + @ClassRule public static final SparkSessionRule SESSION = new SparkSessionRule(); - private static PipelineOptions testOptions() { - SparkStructuredStreamingPipelineOptions options = - PipelineOptionsFactory.create().as(SparkStructuredStreamingPipelineOptions.class); - options.setRunner(SparkStructuredStreamingRunner.class); - options.setTestMode(true); - return options; - } + @Rule + public transient TestPipeline pipeline = + TestPipeline.fromOptions(SESSION.createPipelineOptions()); @Test public void testGroupByKeyPreservesWindowing() { diff --git a/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/ParDoTest.java b/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/ParDoTest.java index f319173ed2bb0..672a2db4fe1e7 100644 --- a/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/ParDoTest.java +++ b/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/ParDoTest.java @@ -17,14 +17,13 @@ */ package org.apache.beam.runners.spark.structuredstreaming.translation.batch; +import static org.junit.Assert.assertTrue; + import java.io.Serializable; import java.util.List; import java.util.Map; import org.apache.beam.runners.spark.SparkCommonPipelineOptions; -import org.apache.beam.runners.spark.structuredstreaming.SparkStructuredStreamingPipelineOptions; -import org.apache.beam.runners.spark.structuredstreaming.SparkStructuredStreamingRunner; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.apache.beam.runners.spark.structuredstreaming.SparkSessionRule; import org.apache.beam.sdk.testing.PAssert; import org.apache.beam.sdk.testing.TestPipeline; import org.apache.beam.sdk.transforms.Create; @@ -37,23 +36,23 @@ import org.apache.beam.sdk.values.PCollectionView; import org.apache.beam.sdk.values.TupleTag; import org.apache.beam.sdk.values.TupleTagList; +import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; +import org.junit.rules.TestRule; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; /** Test class for beam to spark {@link ParDo} translation. */ @RunWith(JUnit4.class) public class ParDoTest implements Serializable { - @Rule public transient TestPipeline pipeline = TestPipeline.fromOptions(testOptions()); - - private static PipelineOptions testOptions() { - SparkStructuredStreamingPipelineOptions options = - PipelineOptionsFactory.create().as(SparkStructuredStreamingPipelineOptions.class); - options.setRunner(SparkStructuredStreamingRunner.class); - options.setTestMode(true); - return options; - } + @ClassRule public static final SparkSessionRule SESSION = new SparkSessionRule(); + + @Rule + public transient TestPipeline pipeline = + TestPipeline.fromOptions(SESSION.createPipelineOptions()); + + @Rule public transient TestRule clearCache = SESSION.clearCache(); @Test public void testPardo() { @@ -61,32 +60,42 @@ public void testPardo() { pipeline.apply(Create.of(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)).apply(ParDo.of(PLUS_ONE_DOFN)); PAssert.that(input).containsInAnyOrder(2, 3, 4, 5, 6, 7, 8, 9, 10, 11); pipeline.run(); + + assertTrue("No usage of cache expected", !SESSION.hasCachedData()); } @Test public void testPardoWithOutputTagsCachedRDD() { - pardoWithOutputTags("MEMORY_ONLY"); + pardoWithOutputTags("MEMORY_ONLY", true); + assertTrue("Expected cached data", SESSION.hasCachedData()); } @Test public void testPardoWithOutputTagsCachedDataset() { - pardoWithOutputTags("MEMORY_AND_DISK"); + pardoWithOutputTags("MEMORY_AND_DISK", true); + assertTrue("Expected cached data", SESSION.hasCachedData()); + } + + @Test + public void testPardoWithUnusedOutputTags() { + pardoWithOutputTags("MEMORY_AND_DISK", false); + assertTrue("No usage of cache expected", !SESSION.hasCachedData()); } - private void pardoWithOutputTags(String storageLevel) { + private void pardoWithOutputTags(String storageLevel, boolean evaluateAdditionalOutputs) { pipeline.getOptions().as(SparkCommonPipelineOptions.class).setStorageLevel(storageLevel); - TupleTag even = new TupleTag() {}; - TupleTag unevenAsString = new TupleTag() {}; + TupleTag mainTag = new TupleTag() {}; + TupleTag additionalUnevenTag = new TupleTag() {}; DoFn doFn = new DoFn() { @ProcessElement public void processElement(@Element Integer i, MultiOutputReceiver out) { if (i % 2 == 0) { - out.get(even).output(i); + out.get(mainTag).output(i); } else { - out.get(unevenAsString).output(i.toString()); + out.get(additionalUnevenTag).output(i.toString()); } } }; @@ -94,10 +103,12 @@ public void processElement(@Element Integer i, MultiOutputReceiver out) { PCollectionTuple outputs = pipeline .apply(Create.of(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)) - .apply(ParDo.of(doFn).withOutputTags(even, TupleTagList.of(unevenAsString))); + .apply(ParDo.of(doFn).withOutputTags(mainTag, TupleTagList.of(additionalUnevenTag))); - PAssert.that(outputs.get(even)).containsInAnyOrder(2, 4, 6, 8, 10); - PAssert.that(outputs.get(unevenAsString)).containsInAnyOrder("1", "3", "5", "7", "9"); + PAssert.that(outputs.get(mainTag)).containsInAnyOrder(2, 4, 6, 8, 10); + if (evaluateAdditionalOutputs) { + PAssert.that(outputs.get(additionalUnevenTag)).containsInAnyOrder("1", "3", "5", "7", "9"); + } pipeline.run(); } @@ -106,10 +117,12 @@ public void testTwoPardoInRow() { PCollection input = pipeline .apply(Create.of(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)) - .apply(ParDo.of(PLUS_ONE_DOFN)) - .apply(ParDo.of(PLUS_ONE_DOFN)); + .apply("Plus 1 (1st)", ParDo.of(PLUS_ONE_DOFN)) + .apply("Plus 1 (2nd)", ParDo.of(PLUS_ONE_DOFN)); PAssert.that(input).containsInAnyOrder(3, 4, 5, 6, 7, 8, 9, 10, 11, 12); pipeline.run(); + + assertTrue("No usage of cache expected", !SESSION.hasCachedData()); } @Test @@ -133,6 +146,8 @@ public void processElement(ProcessContext c) { .withSideInputs(sideInputView)); PAssert.that(input).containsInAnyOrder(4, 5, 6, 7, 8, 9, 10); pipeline.run(); + + assertTrue("No usage of cache expected", !SESSION.hasCachedData()); } @Test @@ -158,6 +173,8 @@ public void processElement(ProcessContext c) { PAssert.that(input).containsInAnyOrder(2, 3, 4, 5, 6, 7, 8, 9, 10); pipeline.run(); + + assertTrue("No usage of cache expected", !SESSION.hasCachedData()); } @Test @@ -183,6 +200,8 @@ public void processElement(ProcessContext c) { .withSideInputs(sideInputView)); PAssert.that(input).containsInAnyOrder(3, 4, 5, 6, 7, 8, 9, 10); pipeline.run(); + + assertTrue("No usage of cache expected", !SESSION.hasCachedData()); } private static final DoFn PLUS_ONE_DOFN = diff --git a/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/SimpleSourceTest.java b/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/SimpleSourceTest.java index 0f16b6442221a..d70293d505602 100644 --- a/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/SimpleSourceTest.java +++ b/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/SimpleSourceTest.java @@ -18,14 +18,12 @@ package org.apache.beam.runners.spark.structuredstreaming.translation.batch; import java.io.Serializable; -import org.apache.beam.runners.spark.structuredstreaming.SparkStructuredStreamingPipelineOptions; -import org.apache.beam.runners.spark.structuredstreaming.SparkStructuredStreamingRunner; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.apache.beam.runners.spark.structuredstreaming.SparkSessionRule; import org.apache.beam.sdk.testing.PAssert; import org.apache.beam.sdk.testing.TestPipeline; import org.apache.beam.sdk.transforms.Create; import org.apache.beam.sdk.values.PCollection; +import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.runner.RunWith; @@ -34,15 +32,11 @@ /** Test class for beam to spark source translation. */ @RunWith(JUnit4.class) public class SimpleSourceTest implements Serializable { - @Rule public transient TestPipeline pipeline = TestPipeline.fromOptions(testOptions()); + @ClassRule public static final SparkSessionRule SESSION = new SparkSessionRule(); - private static PipelineOptions testOptions() { - SparkStructuredStreamingPipelineOptions options = - PipelineOptionsFactory.create().as(SparkStructuredStreamingPipelineOptions.class); - options.setRunner(SparkStructuredStreamingRunner.class); - options.setTestMode(true); - return options; - } + @Rule + public transient TestPipeline pipeline = + TestPipeline.fromOptions(SESSION.createPipelineOptions()); @Test public void testBoundedSource() { diff --git a/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/WindowAssignTest.java b/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/WindowAssignTest.java index 28efe754ddf63..ecb3e7ebdb5b2 100644 --- a/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/WindowAssignTest.java +++ b/runners/spark/3/src/test/java/org/apache/beam/runners/spark/structuredstreaming/translation/batch/WindowAssignTest.java @@ -18,10 +18,7 @@ package org.apache.beam.runners.spark.structuredstreaming.translation.batch; import java.io.Serializable; -import org.apache.beam.runners.spark.structuredstreaming.SparkStructuredStreamingPipelineOptions; -import org.apache.beam.runners.spark.structuredstreaming.SparkStructuredStreamingRunner; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.apache.beam.runners.spark.structuredstreaming.SparkSessionRule; import org.apache.beam.sdk.testing.PAssert; import org.apache.beam.sdk.testing.TestPipeline; import org.apache.beam.sdk.transforms.Create; @@ -32,6 +29,7 @@ import org.apache.beam.sdk.values.TimestampedValue; import org.joda.time.Duration; import org.joda.time.Instant; +import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.runner.RunWith; @@ -40,15 +38,11 @@ /** Test class for beam to spark window assign translation. */ @RunWith(JUnit4.class) public class WindowAssignTest implements Serializable { - @Rule public transient TestPipeline pipeline = TestPipeline.fromOptions(testOptions()); + @ClassRule public static final SparkSessionRule SESSION = new SparkSessionRule(); - private static PipelineOptions testOptions() { - SparkStructuredStreamingPipelineOptions options = - PipelineOptionsFactory.create().as(SparkStructuredStreamingPipelineOptions.class); - options.setRunner(SparkStructuredStreamingRunner.class); - options.setTestMode(true); - return options; - } + @Rule + public transient TestPipeline pipeline = + TestPipeline.fromOptions(SESSION.createPipelineOptions()); @Test public void testWindowAssign() { diff --git a/scripts/ci/pr-bot/processPrUpdate.ts b/scripts/ci/pr-bot/processPrUpdate.ts index 8c9d0b3c03499..f9aa15713216e 100644 --- a/scripts/ci/pr-bot/processPrUpdate.ts +++ b/scripts/ci/pr-bot/processPrUpdate.ts @@ -75,20 +75,19 @@ async function processPrComment( const commentContents = payload.comment.body; const commentAuthor = payload.sender.login; const pullAuthor = getPullAuthorFromPayload(payload); + console.log(commentContents); + const processedCommand = await processCommand( + payload, + commentAuthor, + commentContents, + stateClient, + reviewerConfig + ); // If there's been a comment by a non-author, we can remove the slow review label if (commentAuthor !== pullAuthor && commentAuthor !== BOT_NAME) { await removeSlowReviewLabel(payload); } - console.log(commentContents); - if ( - await processCommand( - payload, - commentAuthor, - commentContents, - stateClient, - reviewerConfig - ) - ) { + if (processedCommand) { // If we've processed a command, don't worry about trying to change the attention set. // This is not a meaningful push or comment from the author. console.log("Processed command"); diff --git a/scripts/ci/pr-bot/shared/persistentState.ts b/scripts/ci/pr-bot/shared/persistentState.ts index 76e23f790cd04..9c277f14d4aba 100644 --- a/scripts/ci/pr-bot/shared/persistentState.ts +++ b/scripts/ci/pr-bot/shared/persistentState.ts @@ -40,14 +40,20 @@ async function commitStateToRepo() { ); } // Print changes for observability - await exec.exec("git status", [], {ignoreReturnCode: true}); + await exec.exec("git status", [], { ignoreReturnCode: true }); await exec.exec("git add state/*"); - const changes = await exec.exec("git diff --quiet --cached origin/pr-bot-state state", [], {ignoreReturnCode: true}); + const changes = await exec.exec( + "git diff --quiet --cached origin/pr-bot-state state", + [], + { ignoreReturnCode: true } + ); if (changes == 1) { await exec.exec(`git commit -m "Updating config from bot" --allow-empty`); await exec.exec("git push origin pr-bot-state"); } else { - console.log("Skipping updating state branch since there are no changes to commit"); + console.log( + "Skipping updating state branch since there are no changes to commit" + ); } } diff --git a/sdks/go.mod b/sdks/go.mod index c7ef57eedbee2..487bdd345338e 100644 --- a/sdks/go.mod +++ b/sdks/go.mod @@ -32,7 +32,7 @@ require ( github.com/aws/aws-sdk-go-v2/config v1.18.7 github.com/aws/aws-sdk-go-v2/credentials v1.13.7 github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.46 - github.com/aws/aws-sdk-go-v2/service/s3 v1.29.6 + github.com/aws/aws-sdk-go-v2/service/s3 v1.30.0 github.com/aws/smithy-go v1.13.5 github.com/docker/go-connections v0.4.0 github.com/dustin/go-humanize v1.0.0 @@ -48,13 +48,14 @@ require ( github.com/testcontainers/testcontainers-go v0.15.0 github.com/xitongsys/parquet-go v1.6.2 github.com/xitongsys/parquet-go-source v0.0.0-20220315005136-aec0fe3e777c - golang.org/x/net v0.4.0 + go.mongodb.org/mongo-driver v1.11.1 + golang.org/x/net v0.5.0 golang.org/x/oauth2 v0.0.0-20221014153046-6fdb5e3db783 golang.org/x/sync v0.1.0 - golang.org/x/sys v0.3.0 - golang.org/x/text v0.5.0 - google.golang.org/api v0.105.0 - google.golang.org/genproto v0.0.0-20221206210731-b1a01be3a5f6 + golang.org/x/sys v0.4.0 + golang.org/x/text v0.6.0 + google.golang.org/api v0.106.0 + google.golang.org/genproto v0.0.0-20221227171554-f9683d7f8bef google.golang.org/grpc v1.51.0 google.golang.org/protobuf v1.28.1 gopkg.in/retry.v1 v1.0.3 @@ -65,13 +66,13 @@ require cloud.google.com/go/spanner v1.42.0 require ( cloud.google.com/go/bigtable v1.18.1 - github.com/tetratelabs/wazero v1.0.0-pre.4 + github.com/tetratelabs/wazero v1.0.0-pre.7 ) require ( cloud.google.com/go v0.107.0 // indirect - cloud.google.com/go/compute v1.13.0 // indirect - cloud.google.com/go/compute/metadata v0.2.2 // indirect + cloud.google.com/go/compute v1.14.0 // indirect + cloud.google.com/go/compute/metadata v0.2.3 // indirect cloud.google.com/go/iam v0.8.0 // indirect cloud.google.com/go/longrunning v0.3.0 // indirect github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 // indirect @@ -79,7 +80,7 @@ require ( github.com/Microsoft/hcsshim v0.9.4 // indirect github.com/apache/arrow/go/arrow v0.0.0-20200730104253-651201b0f516 // indirect github.com/apache/thrift v0.14.2 // indirect - github.com/aws/aws-sdk-go v1.30.19 // indirect + github.com/aws/aws-sdk-go v1.33.0 // indirect github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.10 // indirect github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.21 // indirect github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.27 // indirect @@ -109,15 +110,16 @@ require ( github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/snappy v0.0.4 // indirect github.com/google/pprof v0.0.0-20221103000818-d260c55eee4c // indirect - github.com/googleapis/enterprise-certificate-proxy v0.2.0 // indirect + github.com/googleapis/enterprise-certificate-proxy v0.2.1 // indirect github.com/googleapis/gax-go/v2 v2.7.0 // indirect github.com/inconshreveable/mousetrap v1.0.1 // indirect github.com/jmespath/go-jmespath v0.4.0 // indirect - github.com/klauspost/compress v1.13.1 // indirect + github.com/klauspost/compress v1.13.6 // indirect github.com/magiconair/properties v1.8.6 // indirect github.com/moby/sys/mount v0.3.3 // indirect github.com/moby/sys/mountinfo v0.6.2 // indirect github.com/moby/term v0.0.0-20210619224110-3f7ff695adc6 // indirect + github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe // indirect github.com/morikuni/aec v1.0.0 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/image-spec v1.0.3-0.20211202183452-c5a74bcca799 // indirect @@ -128,7 +130,12 @@ require ( github.com/shabbyrobe/gocovmerge v0.0.0-20180507124511-f6ea450bfb63 // indirect github.com/sirupsen/logrus v1.8.1 // indirect github.com/spf13/pflag v1.0.5 // indirect + github.com/xdg-go/pbkdf2 v1.0.0 // indirect + github.com/xdg-go/scram v1.1.1 // indirect + github.com/xdg-go/stringprep v1.0.3 // indirect + github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d // indirect go.opencensus.io v0.24.0 // indirect + golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d // indirect golang.org/x/tools v0.1.12 // indirect golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect google.golang.org/appengine v1.6.7 // indirect diff --git a/sdks/go.sum b/sdks/go.sum index 3d7c21812c068..dea9d0bb4e87c 100644 --- a/sdks/go.sum +++ b/sdks/go.sum @@ -20,10 +20,10 @@ cloud.google.com/go/bigquery v1.44.0 h1:Wi4dITi+cf9VYp4VH2T9O41w0kCW0uQTELq2Z6tu cloud.google.com/go/bigquery v1.44.0/go.mod h1:0Y33VqXTEsbamHJvJHdFmtqHvMIY28aK1+dFsvaChGc= cloud.google.com/go/bigtable v1.18.1 h1:SxQk9Bj6OKxeiuvevG/KBjqGn/7X8heZbWfK0tYkFd8= cloud.google.com/go/bigtable v1.18.1/go.mod h1:NAVyfJot9jlo+KmgWLUJ5DJGwNDoChzAcrecLpmuAmY= -cloud.google.com/go/compute v1.13.0 h1:AYrLkB8NPdDRslNp4Jxmzrhdr03fUAIDbiGFjLWowoU= -cloud.google.com/go/compute v1.13.0/go.mod h1:5aPTS0cUNMIc1CE546K+Th6weJUNQErARyZtRXDJ8GE= -cloud.google.com/go/compute/metadata v0.2.2 h1:aWKAjYaBaOSrpKl57+jnS/3fJRQnxL7TvR/u1VVbt6k= -cloud.google.com/go/compute/metadata v0.2.2/go.mod h1:jgHgmJd2RKBGzXqF5LR2EZMGxBkeanZ9wwa75XHJgOM= +cloud.google.com/go/compute v1.14.0 h1:hfm2+FfxVmnRlh6LpB7cg1ZNU+5edAHmW679JePztk0= +cloud.google.com/go/compute v1.14.0/go.mod h1:YfLtxrj9sU4Yxv+sXzZkyPjEyPBZfXHUvjxega5vAdo= +cloud.google.com/go/compute/metadata v0.2.3 h1:mg4jlk7mCAj6xXp9UJ4fjI9VUI5rubuGBW5aJ7UnBMY= +cloud.google.com/go/compute/metadata v0.2.3/go.mod h1:VAV5nSsACxMJvgaAuX6Pk2AawlZn8kiOGuCv6gTkwuA= cloud.google.com/go/datacatalog v1.8.0 h1:6kZ4RIOW/uT7QWC5SfPfq/G8sYzr/v+UOmOAxy4Z1TE= cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk= @@ -113,8 +113,9 @@ github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5 github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a/go.mod h1:lB+ZfQJz7igIIfQNfa7Ml4HSf2uFQQRzpGGRXenZAgY= github.com/aws/aws-sdk-go v1.15.11/go.mod h1:mFuSZ37Z9YOHbQEwBWztmVzqXrEkub65tZoCYDt7FT0= github.com/aws/aws-sdk-go v1.17.4/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo= -github.com/aws/aws-sdk-go v1.30.19 h1:vRwsYgbUvC25Cb3oKXTyTYk3R5n1LRVk8zbvL4inWsc= github.com/aws/aws-sdk-go v1.30.19/go.mod h1:5zCpMtNQVjRREroY7sYe8lOMRSxkhG6MZveU8YkpAk0= +github.com/aws/aws-sdk-go v1.33.0 h1:Bq5Y6VTLbfnJp1IV8EL/qUU5qO1DYHda/zis/sqevkY= +github.com/aws/aws-sdk-go v1.33.0/go.mod h1:5zCpMtNQVjRREroY7sYe8lOMRSxkhG6MZveU8YkpAk0= github.com/aws/aws-sdk-go-v2 v1.7.1/go.mod h1:L5LuPC1ZgDr2xQS7AmIec/Jlc7O/Y1u2KxJyNVab250= github.com/aws/aws-sdk-go-v2 v1.17.3 h1:shN7NlnVzvDUgPQ+1rLMSxY8OWRNDRYtiqe0p/PgrhY= github.com/aws/aws-sdk-go-v2 v1.17.3/go.mod h1:uzbQtefpm44goOPmdKyAlXSNcwlRgF3ePWVW6EtJvvw= @@ -153,8 +154,9 @@ github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.5.1/go.mod h1:6EQZIwNN github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.13.21 h1:vY5siRXvW5TrOKm2qKEf9tliBfdLxdfy0i02LOcmqUo= github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.13.21/go.mod h1:WZvNXT1XuH8dnJM0HvOlvk+RNn7NbAPvA/ACO0QarSc= github.com/aws/aws-sdk-go-v2/service/s3 v1.11.1/go.mod h1:XLAGFrEjbvMCLvAtWLLP32yTv8GpBquCApZEycDLunI= -github.com/aws/aws-sdk-go-v2/service/s3 v1.29.6 h1:W8pLcSn6Uy0eXgDBUUl8M8Kxv7JCoP68ZKTD04OXLEA= github.com/aws/aws-sdk-go-v2/service/s3 v1.29.6/go.mod h1:L2l2/q76teehcW7YEsgsDjqdsDTERJeX3nOMIFlgGUE= +github.com/aws/aws-sdk-go-v2/service/s3 v1.30.0 h1:wddsyuESfviaiXk3w9N6/4iRwTg/a3gktjODY6jYQBo= +github.com/aws/aws-sdk-go-v2/service/s3 v1.30.0/go.mod h1:L2l2/q76teehcW7YEsgsDjqdsDTERJeX3nOMIFlgGUE= github.com/aws/aws-sdk-go-v2/service/sso v1.3.1/go.mod h1:J3A3RGUvuCZjvSuZEcOpHDnzZP/sKbhDWV2T1EOzFIM= github.com/aws/aws-sdk-go-v2/service/sso v1.11.28 h1:gItLq3zBYyRDPmqAClgzTH8PBjDQGeyptYGHIwtYYNA= github.com/aws/aws-sdk-go-v2/service/sso v1.11.28/go.mod h1:wo/B7uUm/7zw/dWhBJ4FXuw1sySU5lyIhVg1Bu2yL9A= @@ -469,6 +471,7 @@ github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaS github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw= github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= @@ -513,8 +516,8 @@ github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+ github.com/google/uuid v1.2.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/googleapis/enterprise-certificate-proxy v0.2.0 h1:y8Yozv7SZtlU//QXbezB6QkpuE6jMD2/gfzk4AftXjs= -github.com/googleapis/enterprise-certificate-proxy v0.2.0/go.mod h1:8C0jb7/mgJe/9KK8Lm7X9ctZC2t60YyIpYEI16jx0Qg= +github.com/googleapis/enterprise-certificate-proxy v0.2.1 h1:RY7tHKZcRlk788d5WSo/e83gOyyy742E8GSs771ySpg= +github.com/googleapis/enterprise-certificate-proxy v0.2.1/go.mod h1:AwSRAtLfXpU5Nm3pW+v7rGDHp09LsPtGY9MduiEsR9k= github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= github.com/googleapis/gax-go/v2 v2.7.0 h1:IcsPKeInNvYi7eqSaDjiZqDDKu5rsmunY0Y1YupQSSQ= @@ -580,8 +583,9 @@ github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+o github.com/klauspost/compress v1.9.7/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/compress v1.11.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/compress v1.11.13/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= -github.com/klauspost/compress v1.13.1 h1:wXr2uRxZTJXHLly6qhJabee5JqIhTRoLBhDOA74hDEQ= github.com/klauspost/compress v1.13.1/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg= +github.com/klauspost/compress v1.13.6 h1:P76CopJELS0TiO2mebmnzgWaajssP/EszplttgQxcgc= +github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= @@ -639,6 +643,8 @@ github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe h1:iruDEfMl2E6fbMZ9s0scYfZQ84/6SPL6zC8ACM2oIL0= +github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc= github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A= github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ= @@ -814,8 +820,10 @@ github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG github.com/tchap/go-patricia v2.2.6+incompatible/go.mod h1:bmLyhP68RS6kStMGxByiQ23RP/odRBOTVjwp2cDyi6I= github.com/testcontainers/testcontainers-go v0.15.0 h1:3Ex7PUGFv0b2bBsdOv6R42+SK2qoZnWBd21LvZYhUtQ= github.com/testcontainers/testcontainers-go v0.15.0/go.mod h1:PkohMRH2X8Hib0IWtifVexDfLPVT+tb5E9hsf7cW12w= -github.com/tetratelabs/wazero v1.0.0-pre.4 h1:RBJQT5OzmORkSp6MmZDWoFEr0zXjk4pmvMKAdeUnsaI= -github.com/tetratelabs/wazero v1.0.0-pre.4/go.mod h1:u8wrFmpdrykiFK0DFPiFm5a4+0RzsdmXYVtijBKqUVo= +github.com/tetratelabs/wazero v1.0.0-pre.7 h1:WI5N14XxoXw+ZWhcjSazJ6rEowhJbH/x8hglxC5gN7k= +github.com/tetratelabs/wazero v1.0.0-pre.7/go.mod h1:u8wrFmpdrykiFK0DFPiFm5a4+0RzsdmXYVtijBKqUVo= +github.com/tidwall/pretty v1.0.0 h1:HsD+QiTn7sK6flMKIvNmpqz1qrpP3Ps6jOKIKMooyg4= +github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc= @@ -831,6 +839,12 @@ github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17 github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0= github.com/willf/bitset v1.1.11-0.20200630133818-d5bec3311243/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4= github.com/willf/bitset v1.1.11/go.mod h1:83CECat5yLh5zVOf4P1ErAgKA5UDvKtgyUABdr3+MjI= +github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c= +github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI= +github.com/xdg-go/scram v1.1.1 h1:VOMT+81stJgXW3CpHyqHN3AXDYIMsx56mEFrB37Mb/E= +github.com/xdg-go/scram v1.1.1/go.mod h1:RaEWvsqvNKKvBPvcKeFjrG2cJqOkHTiyTpzz23ni57g= +github.com/xdg-go/stringprep v1.0.3 h1:kdwGpVNwPFtjs98xCGkHjQtGKh86rDcRZN17QEMCOIs= +github.com/xdg-go/stringprep v1.0.3/go.mod h1:W3f5j4i+9rC0kuIEJL0ky1VpHXQU3ocBgklLGvcBnW8= github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ= github.com/xeipuuv/gojsonschema v0.0.0-20180618132009-1d523034197f/go.mod h1:5yf86TLmAcydyeJq5YvxkGPE2fm/u4myDekKRoLuqhs= @@ -843,6 +857,8 @@ github.com/xitongsys/parquet-go-source v0.0.0-20200817004010-026bad9b25d0/go.mod github.com/xitongsys/parquet-go-source v0.0.0-20220315005136-aec0fe3e777c h1:UDtocVeACpnwauljUbeHD9UOjjcvF5kLUHruww7VT9A= github.com/xitongsys/parquet-go-source v0.0.0-20220315005136-aec0fe3e777c/go.mod h1:qLb2Itmdcp7KPa5KZKvhE9U1q5bYSOmgeOckF/H2rQA= github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= +github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d h1:splanxYIlg+5LfHAM6xpdFEAYOk8iySO56hMFq6uLyA= +github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d/go.mod h1:rHwXgn7JulP+udvsHwJoVG1YGAP6VLg4y9I5dyZdqmA= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= @@ -854,6 +870,8 @@ go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.etcd.io/bbolt v1.3.5/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ= go.etcd.io/bbolt v1.3.6/go.mod h1:qXsaaIqmgQH0T+OPdb99Bf+PKfBBQVAdyD6TY9G8XM4= go.etcd.io/etcd v0.5.0-alpha.5.0.20200910180754-dd1b699fc489/go.mod h1:yVHk9ub3CSBatqGNg7GRmsnfLWtoW60w4eDYfh7vHDg= +go.mongodb.org/mongo-driver v1.11.1 h1:QP0znIRTuL0jf1oBQoAoM0C6ZJfBK4kx0Uumtv1A7w8= +go.mongodb.org/mongo-driver v1.11.1/go.mod h1:s7p5vEtfbeR1gYi6pnj3c3/urpbLv2T5Sfd6Rp2HBB8= go.mozilla.org/pkcs7 v0.0.0-20200128120323-432b2356ecb1/go.mod h1:SNgMg+EgDFwmvSmLRTNKC5fegJjB7v23qTQ0XLGUNHk= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= @@ -881,6 +899,8 @@ golang.org/x/crypto v0.0.0-20200728195943-123391ffb6de/go.mod h1:LzIPMQfyMNhhGPh golang.org/x/crypto v0.0.0-20201002170205-7f63de1d35b0/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210322153248-0c34fe9e7dc2/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4= golang.org/x/crypto v0.0.0-20210513164829-c07d793c2f9a/go.mod h1:P+XmwS30IXTQdn5tA2iutPOUgjI07+tq3H3K9MVA1s8= +golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d h1:sK3txAijHtOK88l68nt020reeT1ZdKLIYetKl95FzVY= +golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -953,8 +973,9 @@ golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= golang.org/x/net v0.0.0-20210825183410-e898025ed96a/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/net v0.4.0 h1:Q5QPcMlvfxFTAPV0+07Xz/MpK9NTXu2VDUuy0FeMfaU= -golang.org/x/net v0.4.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE= +golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.5.0 h1:GyT4nK/YDHSqa1c4753ouYCDajOYKTja9Xb/OHtgvSw= +golang.org/x/net v0.5.0/go.mod h1:DivGGAXEgPSlEBzxGzZI+ZLohi+xUj054jfeKui00ws= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -1044,14 +1065,15 @@ golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210426230700-d19ff857e887/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210906170528-6f6e22806c34/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211116061358-0a5406a5449c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.3.0 h1:w8ZOecv6NaNa/zC8944JTU3vz4u6Lagfk4RPQxv92NQ= -golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.4.0 h1:Zr2JFtRQNX3BCZ8YtxRE9hNJYC8J6I1MVbMg6owUp18= +golang.org/x/sys v0.4.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -1060,8 +1082,9 @@ golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.5.0 h1:OLmvp0KP+FVG99Ct/qFiL/Fhk4zp4QQnZ7b2U+5piUM= -golang.org/x/text v0.5.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.6.0 h1:3XmdazWV+ubf7QgHSTWeykHOci5oeekaGJBLkrkaw4k= +golang.org/x/text v0.6.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -1134,8 +1157,8 @@ google.golang.org/api v0.17.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/ google.golang.org/api v0.18.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= google.golang.org/api v0.20.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= google.golang.org/api v0.22.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= -google.golang.org/api v0.105.0 h1:t6P9Jj+6XTn4U9I2wycQai6Q/Kz7iOT+QzjJ3G2V4x8= -google.golang.org/api v0.105.0/go.mod h1:qh7eD5FJks5+BcE+cjBIm6Gz8vioK7EHvnlniqXBnqI= +google.golang.org/api v0.106.0 h1:ffmW0faWCwKkpbbtvlY/K/8fUl+JKvNS5CVzRoyfCv8= +google.golang.org/api v0.106.0/go.mod h1:2Ts0XTHNVWxypznxWOYUeI4g3WdP9Pk2Qk58+a/O9MY= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= @@ -1170,8 +1193,8 @@ google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfG google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= google.golang.org/genproto v0.0.0-20200527145253-8367513e4ece/go.mod h1:jDfRM7FcilCzHH/e9qn6dsT145K34l5v+OpcnNgKAAA= google.golang.org/genproto v0.0.0-20201110150050-8816d57aaa9a/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/genproto v0.0.0-20221206210731-b1a01be3a5f6 h1:AGXp12e/9rItf6/4QymU7WsAUwCf+ICW75cuR91nJIc= -google.golang.org/genproto v0.0.0-20221206210731-b1a01be3a5f6/go.mod h1:1dOng4TWOomJrDGhpXjfCD35wQC6jnC7HpRmOFRqEV0= +google.golang.org/genproto v0.0.0-20221227171554-f9683d7f8bef h1:uQ2vjV/sHTsWSqdKeLqmwitzgvjMl7o4IdtHwUDXSJY= +google.golang.org/genproto v0.0.0-20221227171554-f9683d7f8bef/go.mod h1:RGgjbofJ8xD9Sq1VVhDM1Vok1vRONV+rg+CjzG4SZKM= google.golang.org/grpc v0.0.0-20160317175043-d3ddb4469d5a/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= diff --git a/sdks/go/examples/wasm/wasm.go b/sdks/go/examples/wasm/wasm.go index b918f41feeeb5..3d51d55bb57bc 100644 --- a/sdks/go/examples/wasm/wasm.go +++ b/sdks/go/examples/wasm/wasm.go @@ -27,6 +27,7 @@ import ( _ "embed" "flag" "fmt" + "github.com/apache/beam/sdks/v2/go/pkg/beam" "github.com/apache/beam/sdks/v2/go/pkg/beam/io/textio" "github.com/apache/beam/sdks/v2/go/pkg/beam/log" @@ -155,9 +156,9 @@ func (fn *embeddedWasmFn) ProcessElement(ctx context.Context, s string) (string, defer fn.deallocate.Call(ctx, ptr, size) // The pointer is a linear memory offset, which is where we write the value of the DoFn's input element s. - if !fn.mod.Memory().Write(ctx, uint32(ptr), []byte(s)) { + if !fn.mod.Memory().Write(uint32(ptr), []byte(s)) { return "", fmt.Errorf("Memory.Write(%d, %d) out of range of memory size %d", - ptr, size, fn.mod.Memory().Size(ctx)) + ptr, size, fn.mod.Memory().Size()) } // Finally, we get the greeting message "Hello" concatenated to the DoFn's input element s. @@ -171,10 +172,10 @@ func (fn *embeddedWasmFn) ProcessElement(ctx context.Context, s string) (string, defer fn.deallocate.Call(ctx, uint64(resultPtr), uint64(resultSize)) // The pointer is a linear memory offset, which is where we wrote the results of the string concatenation. - bytes, ok := fn.mod.Memory().Read(ctx, resultPtr, resultSize) + bytes, ok := fn.mod.Memory().Read(resultPtr, resultSize) if !ok { return "", fmt.Errorf("Memory.Read(%d, %d) out of range of memory size %d", - resultPtr, resultSize, fn.mod.Memory().Size(ctx)) + resultPtr, resultSize, fn.mod.Memory().Size()) } // bytes contains our final result that we emit into the output PCollection @@ -193,7 +194,7 @@ func (fn *embeddedWasmFn) Teardown(ctx context.Context) error { // logString is an exported function to the wasm module that logs to console output. func logString(ctx context.Context, m api.Module, offset, byteCount uint32) { - buf, ok := m.Memory().Read(ctx, offset, byteCount) + buf, ok := m.Memory().Read(offset, byteCount) if !ok { log.Fatalf(ctx, "Memory.Read(%d, %d) out of range", offset, byteCount) } diff --git a/sdks/go/pkg/beam/core/metrics/metrics.go b/sdks/go/pkg/beam/core/metrics/metrics.go index 678a34a647d33..93dd9d0070e0f 100644 --- a/sdks/go/pkg/beam/core/metrics/metrics.go +++ b/sdks/go/pkg/beam/core/metrics/metrics.go @@ -49,6 +49,7 @@ package metrics import ( "context" "fmt" + "hash" "hash/fnv" "sort" "sync" @@ -221,28 +222,33 @@ func newName(ns, n string) name { // We hash the name to a uint64 so we avoid using go's native string hashing for // every use of a metrics. uint64s have faster lookup than strings as a result. // Collisions are possible, but statistically unlikely as namespaces and names -// are usually short enough to avoid this. +// are usually short enough to avoid this. A sync.Pool is used because it can provide +// goroutine-local values that reduce contention and profiling shows hashName from NewCounter +// can be a contention hotspot. See parallel benches metrics_test.go:BenchmarkMetrics/* var ( - hasherMu sync.Mutex - hasher = fnv.New64a() + hashPool = sync.Pool{ + New: func() interface{} { + return fnv.New64a() + }, + } ) func hashName(ns, n string) nameHash { - hasherMu.Lock() + hasher := hashPool.Get().(hash.Hash64) hasher.Reset() var buf [64]byte b := buf[:] - hashString(ns, b) - hashString(n, b) + hashString(hasher, ns, b) + hashString(hasher, n, b) h := hasher.Sum64() - hasherMu.Unlock() + hashPool.Put(hasher) return nameHash(h) } // hashString hashes a string with the package level hasher // and requires posession of the hasherMu lock. The byte // slice is assumed to be backed by a [64]byte. -func hashString(s string, b []byte) { +func hashString(hasher hash.Hash64, s string, b []byte) { l := len(s) i := 0 for len(s)-i > 64 { diff --git a/sdks/go/pkg/beam/core/metrics/metrics_test.go b/sdks/go/pkg/beam/core/metrics/metrics_test.go index 75b483184ab28..33d1b2c7ddd18 100644 --- a/sdks/go/pkg/beam/core/metrics/metrics_test.go +++ b/sdks/go/pkg/beam/core/metrics/metrics_test.go @@ -617,21 +617,23 @@ func TestPcolQueryResult(t *testing.T) { } } -// Run on @lostluck's desktop (2020/01/21) go1.13.4 +// Run on @shanemhansen's desktop (2022/01/03) go1.20 RC1 after changing hashName to use a pool of hashers +// sync.Pool can return thread-local results eliminating the need for a lock and increasing throughput. +// There are users in the wild who create an excessive number of Counters so a 4x improvement in throughput at the expense of +// creating GOMAXPROCS hasher values seems reasonable. // -// Allocs & bytes should be consistent within go versions, but ns/op is relative to the running machine. -// -// BenchmarkMetrics/counter_inplace-12 6054129 208 ns/op 48 B/op 1 allocs/op -// BenchmarkMetrics/distribution_inplace-12 5707147 228 ns/op 48 B/op 1 allocs/op -// BenchmarkMetrics/gauge_inplace-12 4742331 259 ns/op 48 B/op 1 allocs/op -// BenchmarkMetrics/counter_predeclared-12 90147133 12.7 ns/op 0 B/op 0 allocs/op -// BenchmarkMetrics/distribution_predeclared-12 55396678 21.6 ns/op 0 B/op 0 allocs/op -// BenchmarkMetrics/gauge_predeclared-12 18535839 60.5 ns/op 0 B/op 0 allocs/op -// BenchmarkMetrics/counter_raw-12 159581343 7.18 ns/op 0 B/op 0 allocs/op -// BenchmarkMetrics/distribution_raw-12 82724314 14.7 ns/op 0 B/op 0 allocs/op -// BenchmarkMetrics/gauge_raw-12 23292386 55.2 ns/op 0 B/op 0 allocs/op -// BenchmarkMetrics/getStore-12 309361303 3.78 ns/op 0 B/op 0 allocs/op -// BenchmarkMetrics/getCounterSet-12 287720998 3.98 ns/op 0 B/op 0 allocs/op +// name old time/op new time/op delta +// Metrics/counter_inplace-12 376ns ±17% 88ns ± 7% -76.66% (p=0.008 n=5+5) +// Metrics/distribution_inplace-12 394ns ± 3% 153ns ± 8% -61.17% (p=0.008 n=5+5) +// Metrics/gauge_inplace-12 371ns ± 4% 258ns ± 1% -30.37% (p=0.008 n=5+5) +// Metrics/counter_predeclared-12 16.9ns ± 6% 17.0ns ± 3% ~ (p=0.595 n=5+5) +// Metrics/distribution_predeclared-12 83.2ns ± 2% 84.9ns ± 1% ~ (p=0.056 n=5+5) +// Metrics/gauge_predeclared-12 105ns ± 6% 110ns ± 5% +4.81% (p=0.032 n=5+5) +// Metrics/counter_raw-12 10.8ns ± 4% 12.0ns ±28% ~ (p=0.151 n=5+5) +// Metrics/distribution_raw-12 77.6ns ± 7% 78.8ns ± 5% ~ (p=0.841 n=5+5) +// Metrics/gauge_raw-12 78.9ns ± 1% 77.3ns ± 4% ~ (p=0.151 n=5+5) +// Metrics/getStore-12 0.27ns ± 3% 0.27ns ± 2% ~ (p=0.841 n=5+5) +// Metrics/getCounterSet-12 0.32ns ± 3% 0.31ns ± 0% -1.28% (p=0.048 n=5+4) func BenchmarkMetrics(b *testing.B) { pt, c, d, g := "bench.bundle.data", "counter", "distribution", "gauge" aBundleID := "benchBID" @@ -664,9 +666,11 @@ func BenchmarkMetrics(b *testing.B) { } for _, test := range tests { b.Run(test.name, func(b *testing.B) { - for i := 0; i < b.N; i++ { - test.call() - } + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + test.call() + } + }) }) } } diff --git a/sdks/go/pkg/beam/core/runtime/exec/sdf.go b/sdks/go/pkg/beam/core/runtime/exec/sdf.go index be4f491b04cf8..d4b1b32d257d4 100644 --- a/sdks/go/pkg/beam/core/runtime/exec/sdf.go +++ b/sdks/go/pkg/beam/core/runtime/exec/sdf.go @@ -159,13 +159,13 @@ func (n *SplitAndSizeRestrictions) StartBundle(ctx context.Context, id string, d // Input Diagram: // // *FullValue { -// Elm: *FullValue (original input) -// Elm2: *FullValue { -// Elm: Restriction -// Elm2: Watermark estimator state -// } -// Windows -// Timestamps +// Elm: *FullValue (original input) +// Elm2: *FullValue { +// Elm: Restriction +// Elm2: Watermark estimator state +// } +// Windows +// Timestamps // } // // ProcessElement splits the given restriction into one or more restrictions and @@ -175,22 +175,25 @@ func (n *SplitAndSizeRestrictions) StartBundle(ctx context.Context, id string, d // // Output Diagram: // -// *FullValue { -// Elm: *FullValue { -// Elm: *FullValue (original input) -// Elm2: *FullValue { -// Elm: Restriction -// Elm2: Watermark estimator state -// } -// } -// Elm2: float64 (size) -// Windows -// Timestamps -// } +// *FullValue { +// Elm: *FullValue { +// Elm: *FullValue (original input) +// Elm2: *FullValue { +// Elm: Restriction +// Elm2: Watermark estimator state +// } +// } +// Elm2: float64 (size) +// Windows +// Timestamps +// } func (n *SplitAndSizeRestrictions) ProcessElement(ctx context.Context, elm *FullValue, values ...ReStream) error { rest := elm.Elm2.(*FullValue).Elm ws := elm.Elm2.(*FullValue).Elm2 - mainElm := elm.Elm.(*FullValue) + + // If receiving directly from a datasource, + // the element may not be wrapped in a *FullValue + mainElm := convertIfNeeded(elm.Elm, &FullValue{}) splitRests := n.splitInv.Invoke(mainElm, rest) diff --git a/sdks/go/pkg/beam/core/runtime/exec/translate_test.go b/sdks/go/pkg/beam/core/runtime/exec/translate_test.go index dca9e43e8c921..c6a70fe07a020 100644 --- a/sdks/go/pkg/beam/core/runtime/exec/translate_test.go +++ b/sdks/go/pkg/beam/core/runtime/exec/translate_test.go @@ -17,7 +17,9 @@ package exec import ( "fmt" + fnpb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/fnexecution_v1" "reflect" + "strings" "testing" "time" @@ -318,3 +320,197 @@ func makeWindowMappingFn(w *window.Fn) (*pipepb.FunctionSpec, error) { } return wFn, nil } + +func TestInputIdToIndex(t *testing.T) { + tests := []struct { + in string + want int + }{ + { // does not start with i + "90", + 0, + }, + { // start with i + "i0", + 0, + }, + { + "i1", + 1, + }, + { + "i10", + 10, + }, + } + + for _, test := range tests { + got, err := inputIdToIndex(test.in) + if !strings.HasPrefix(test.in, "i") { + if err == nil { + t.Errorf("should return err when string does not has a prefix of i, but didn't. inputIdToIndex(%v) = (%v, %v)", test.in, got, err) + } + } else { + if got != test.want { + t.Errorf("can not correctly convert inputId to index. inputIdToIndex(%v) = (%v, %v), want %v", test.in, got, err, test.want) + } + } + } +} + +func TestIndexToInputId(t *testing.T) { + tests := []struct { + in int + want string + }{ + { + 1, + "i1", + }, + { + 1000, + "i1000", + }, + } + + for _, test := range tests { + got := indexToInputId(test.in) + if got != test.want { + t.Errorf("can not correctly convert index to inputId. indexToInputId(%v) = (%v), want %v", test.in, got, test.want) + } + } +} + +func TestUnmarshalPort(t *testing.T) { + var port fnpb.RemoteGrpcPort + + tests := []struct { + inputData []byte + outputPort Port + outputStr string + outputError error + }{ + { + inputData: []byte{}, + outputPort: Port{URL: port.GetApiServiceDescriptor().GetUrl()}, + outputStr: fnpb.RemoteGrpcPort{}.CoderId, + outputError: nil, + }, + } + + for _, test := range tests { + port, str, err := unmarshalPort(test.inputData) + if err != nil && test.outputError == nil { + t.Errorf("there is an error where should not be. unmarshalPort(%v) = (%v, %v, %v), want (%v, %v, %v)", test.inputData, port, str, err, test.outputPort, test.outputStr, test.outputError) + } else if err != nil && err != test.outputError { + t.Errorf("got an unexpected error: %v, want: %v", err, test.outputError) + } else if port != test.outputPort { + t.Errorf("the output port is not right. unmarshalPort(%v) = (%v, %v, %v), want (%v, %v, %v)", test.inputData, port, str, err, test.outputPort, test.outputStr, test.outputError) + } else if str != test.outputStr { + t.Errorf("the output string is not right. unmarshalPort(%v) = (%v, %v, %v), want (%v, %v, %v)", test.inputData, port, str, err, test.outputPort, test.outputStr, test.outputError) + } + } +} + +func TestUnmarshalPlan(t *testing.T) { + transform := pipepb.PTransform{ + Spec: &pipepb.FunctionSpec{ + Urn: urnDataSource, + }, + Outputs: map[string]string{}, + } + tests := []struct { + name string + inputDesc *fnpb.ProcessBundleDescriptor + outputPlan *Plan + outputError error + }{ + { + name: "test_no_root_units", + inputDesc: &fnpb.ProcessBundleDescriptor{ + Id: "", + Transforms: map[string]*pipepb.PTransform{}, + }, + outputPlan: nil, + outputError: errors.New("no root units"), + }, + { + name: "test_zero_transform", + inputDesc: &fnpb.ProcessBundleDescriptor{ + Id: "", + Transforms: map[string]*pipepb.PTransform{ + "": {}, + }, + }, + outputPlan: nil, + outputError: errors.New("no root units"), + }, + { + name: "test_transform_outputs_length_not_one", + inputDesc: &fnpb.ProcessBundleDescriptor{ + Id: "", + Transforms: map[string]*pipepb.PTransform{ + "": &transform, + }, + }, + outputPlan: nil, + outputError: errors.Errorf("expected one output from DataSource, got %v", transform.GetOutputs()), + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + plan, err := UnmarshalPlan(test.inputDesc) + if err != nil && test.outputError == nil { + t.Errorf("there is an error where should not be. UnmarshalPlan(%v) = (%v, %v), want (%v, %v)", test.inputDesc, plan, err, test.outputPlan, test.outputError) + } else if err != nil && !reflect.DeepEqual(err, test.outputError) { + t.Errorf("got an unexpected error: %v, want: %v", err, test.outputError) + } else if !reflect.DeepEqual(plan, test.outputPlan) { + t.Errorf("the output builder is not right. UnmarshalPlan(%v) = (%v, %v), want (%v, %v)", test.inputDesc, plan, err, test.outputPlan, test.outputError) + } + }) + } +} + +func TestNewBuilder(t *testing.T) { + descriptor := fnpb.ProcessBundleDescriptor{ + Id: "", + Transforms: map[string]*pipepb.PTransform{}, + } + tests := []struct { + name string + inputDesc *fnpb.ProcessBundleDescriptor + outputBuilder *builder + outputError error + }{ + { + name: "test_1", + inputDesc: &descriptor, + outputBuilder: &builder{ + desc: &descriptor, + coders: graphx.NewCoderUnmarshaller(descriptor.GetCoders()), + prev: make(map[string]int), + succ: make(map[string][]linkID), + windowing: make(map[string]*window.WindowingStrategy), + nodes: make(map[string]*PCollection), + links: make(map[linkID]Node), + units: nil, + idgen: &GenID{}, + }, + outputError: nil, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + b, err := newBuilder(test.inputDesc) + if err != nil && test.outputError == nil { + t.Errorf("There is an error where should not be. newBuilder(%v) = (%v, %v), want (%v, %v)", test.inputDesc, b, err, test.outputBuilder, test.outputError) + } else if err != nil && err != test.outputError { + t.Errorf("got an unexpected error: %v, want: %v", err, test.outputError) + } else if !reflect.DeepEqual(b, test.outputBuilder) { + t.Errorf("The output builder is not right. newBuilder(%v) = (%v, %v), want (%v, %v)", test.inputDesc, b, err, test.outputBuilder, test.outputError) + } + }) + } +} diff --git a/sdks/go/pkg/beam/core/runtime/exec/userstate_test.go b/sdks/go/pkg/beam/core/runtime/exec/userstate_test.go index 9220e04f66ed1..e25e4019562c7 100644 --- a/sdks/go/pkg/beam/core/runtime/exec/userstate_test.go +++ b/sdks/go/pkg/beam/core/runtime/exec/userstate_test.go @@ -138,3 +138,68 @@ func (t *testIoWriter) Write(b []byte) (int, error) { t.b = b return len(b), nil } + +func TestNewUserStateAdapter(t *testing.T) { + testCoder := &coder.Coder{ + Kind: coder.WindowedValue, + T: nil, + Components: []*coder.Coder{ + { + Kind: coder.KV, + Components: []*coder.Coder{ + { + Kind: coder.Double, + }, + { + Kind: coder.Bool, + }, + }, + }, + }, + Custom: nil, + Window: &coder.WindowCoder{ + Kind: coder.GlobalWindow, + Payload: "", + }, + ID: "", + } + tests := []struct { + name string + sid StreamID + c *coder.Coder + stateIDToCoder map[string]*coder.Coder + stateIDToKeyCoder map[string]*coder.Coder + stateIDToCombineFn map[string]*graph.CombineFn + adapter UserStateAdapter + }{ + { + name: "", + sid: StreamID{ + Port: Port{}, + PtransformID: "", + }, + c: testCoder, + stateIDToCoder: nil, + stateIDToKeyCoder: nil, + stateIDToCombineFn: nil, + adapter: &userStateAdapter{ + sid: StreamID{}, + wc: &globalWindowEncoder{}, + kc: MakeElementEncoder(coder.SkipW(testCoder).Components[0]), + stateIDToCoder: nil, + stateIDToKeyCoder: nil, + stateIDToCombineFn: nil, + c: testCoder, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + adapter := NewUserStateAdapter(test.sid, test.c, test.stateIDToCoder, test.stateIDToKeyCoder, test.stateIDToCombineFn) + if !reflect.DeepEqual(adapter, test.adapter) { + t.Errorf("NewUserStateAdapter(%v, %v, %v, %v, %v)=%v, want %v", test.sid, test.c, test.stateIDToCoder, test.stateIDToKeyCoder, test.stateIDToCombineFn, adapter, test.adapter) + } + }) + } +} diff --git a/sdks/go/pkg/beam/io/mongodbio/coder.go b/sdks/go/pkg/beam/io/mongodbio/coder.go new file mode 100644 index 0000000000000..c140f9a8a257c --- /dev/null +++ b/sdks/go/pkg/beam/io/mongodbio/coder.go @@ -0,0 +1,68 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mongodbio + +import ( + "fmt" + "reflect" + + "github.com/apache/beam/sdks/v2/go/pkg/beam" + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/bson/primitive" +) + +func init() { + beam.RegisterCoder( + reflect.TypeOf((*bson.M)(nil)).Elem(), + encodeBSONMap, + decodeBSONMap, + ) + beam.RegisterCoder( + reflect.TypeOf((*primitive.ObjectID)(nil)).Elem(), + encodeObjectID, + decodeObjectID, + ) +} + +func encodeBSONMap(m bson.M) ([]byte, error) { + bytes, err := bson.Marshal(m) + if err != nil { + return nil, fmt.Errorf("error encoding BSON: %w", err) + } + + return bytes, nil +} + +func decodeBSONMap(bytes []byte) (bson.M, error) { + var out bson.M + if err := bson.Unmarshal(bytes, &out); err != nil { + return nil, fmt.Errorf("error decoding BSON: %w", err) + } + + return out, nil +} + +func encodeObjectID(objectID primitive.ObjectID) []byte { + return objectID[:] +} + +func decodeObjectID(bytes []byte) primitive.ObjectID { + var out primitive.ObjectID + + copy(out[:], bytes[:]) + + return out +} diff --git a/sdks/go/pkg/beam/io/mongodbio/coder_test.go b/sdks/go/pkg/beam/io/mongodbio/coder_test.go new file mode 100644 index 0000000000000..d5e3bb2974d1d --- /dev/null +++ b/sdks/go/pkg/beam/io/mongodbio/coder_test.go @@ -0,0 +1,160 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mongodbio + +import ( + "testing" + + "github.com/google/go-cmp/cmp" + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/bson/primitive" +) + +func Test_encodeBSONMap(t *testing.T) { + tests := []struct { + name string + m bson.M + want []byte + wantErr bool + }{ + { + name: "Encode bson.M", + m: bson.M{"key": "val"}, + want: []byte{18, 0, 0, 0, 2, 107, 101, 121, 0, 4, 0, 0, 0, 118, 97, 108, 0, 0}, + wantErr: false, + }, + { + name: "Encode empty bson.M", + m: bson.M{}, + want: []byte{5, 0, 0, 0, 0}, + wantErr: false, + }, + { + name: "Encode nil bson.M", + m: bson.M(nil), + want: []byte{5, 0, 0, 0, 0}, + wantErr: false, + }, + { + name: "Error - invalid bson.M", + m: bson.M{"key": make(chan int)}, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := encodeBSONMap(tt.m) + if (err != nil) != tt.wantErr { + t.Fatalf("encodeBSONMap() error = %v, wantErr %v", err, tt.wantErr) + } + + if !cmp.Equal(got, tt.want) { + t.Errorf("encodeBSONMap() got = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_decodeBSONMap(t *testing.T) { + tests := []struct { + name string + bytes []byte + want bson.M + wantErr bool + }{ + { + name: "Decode bson.M", + bytes: []byte{18, 0, 0, 0, 2, 107, 101, 121, 0, 4, 0, 0, 0, 118, 97, 108, 0, 0}, + want: bson.M{"key": "val"}, + wantErr: false, + }, + { + name: "Decode empty bson.M", + bytes: []byte{5, 0, 0, 0, 0}, + want: bson.M{}, + wantErr: false, + }, + { + name: "Error - invalid bson.M", + bytes: []byte{}, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := decodeBSONMap(tt.bytes) + if (err != nil) != tt.wantErr { + t.Fatalf("decodeBSONMap() error = %v, wantErr %v", err, tt.wantErr) + } + + if !cmp.Equal(got, tt.want) { + t.Errorf("decodeBSONMap() got = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_encodeObjectID(t *testing.T) { + tests := []struct { + name string + objectID primitive.ObjectID + want []byte + }{ + { + name: "Encode object ID", + objectID: objectIDFromHex(t, "5f1b2c3d4e5f60708090a0b0"), + want: []byte{95, 27, 44, 61, 78, 95, 96, 112, 128, 144, 160, 176}, + }, + { + name: "Encode nil object ID", + objectID: primitive.NilObjectID, + want: []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := encodeObjectID(tt.objectID); !cmp.Equal(got, tt.want) { + t.Errorf("encodeObjectID() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_decodeObjectID(t *testing.T) { + tests := []struct { + name string + bytes []byte + want primitive.ObjectID + }{ + { + name: "Decode object ID", + bytes: []byte{95, 27, 44, 61, 78, 95, 96, 112, 128, 144, 160, 176}, + want: objectIDFromHex(t, "5f1b2c3d4e5f60708090a0b0"), + }, + { + name: "Decode nil object ID", + bytes: []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + want: primitive.NilObjectID, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := decodeObjectID(tt.bytes); !cmp.Equal(got, tt.want) { + t.Errorf("decodeObjectID() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/sdks/go/pkg/beam/io/mongodbio/common.go b/sdks/go/pkg/beam/io/mongodbio/common.go new file mode 100644 index 0000000000000..9d6ffbeaa9576 --- /dev/null +++ b/sdks/go/pkg/beam/io/mongodbio/common.go @@ -0,0 +1,73 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package mongodbio contains transforms for reading from and writing to MongoDB. +package mongodbio + +import ( + "context" + "fmt" + + "go.mongodb.org/mongo-driver/mongo" + "go.mongodb.org/mongo-driver/mongo/options" + "go.mongodb.org/mongo-driver/mongo/readpref" +) + +const ( + bsonTag = "bson" +) + +type mongoDBFn struct { + URI string + Database string + Collection string + client *mongo.Client + collection *mongo.Collection +} + +func (fn *mongoDBFn) Setup(ctx context.Context) error { + client, err := newClient(ctx, fn.URI) + if err != nil { + return err + } + + fn.client = client + fn.collection = client.Database(fn.Database).Collection(fn.Collection) + + return nil +} + +func newClient(ctx context.Context, uri string) (*mongo.Client, error) { + opts := options.Client().ApplyURI(uri) + + client, err := mongo.Connect(ctx, opts) + if err != nil { + return nil, fmt.Errorf("error connecting to MongoDB: %w", err) + } + + if err := client.Ping(ctx, readpref.Primary()); err != nil { + return nil, fmt.Errorf("error pinging MongoDB: %w", err) + } + + return client, nil +} + +func (fn *mongoDBFn) Teardown(ctx context.Context) error { + if err := fn.client.Disconnect(ctx); err != nil { + return fmt.Errorf("error disconnecting from MongoDB: %w", err) + } + + return nil +} diff --git a/sdks/go/pkg/beam/io/mongodbio/example_test.go b/sdks/go/pkg/beam/io/mongodbio/example_test.go new file mode 100644 index 0000000000000..3e77303843e54 --- /dev/null +++ b/sdks/go/pkg/beam/io/mongodbio/example_test.go @@ -0,0 +1,180 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mongodbio_test + +import ( + "context" + "log" + "reflect" + "time" + + "github.com/apache/beam/sdks/v2/go/pkg/beam" + "github.com/apache/beam/sdks/v2/go/pkg/beam/io/mongodbio" + "github.com/apache/beam/sdks/v2/go/pkg/beam/x/beamx" + "github.com/apache/beam/sdks/v2/go/pkg/beam/x/debug" + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/bson/primitive" +) + +func ExampleRead_default() { + type Event struct { + ID primitive.ObjectID `bson:"_id"` + Timestamp int64 `bson:"timestamp"` + EventType int32 `bson:"event_type"` + } + + beam.Init() + p, s := beam.NewPipelineWithRoot() + + col := mongodbio.Read( + s, + "mongodb://localhost:27017", + "demo", + "events", + reflect.TypeOf(Event{}), + ) + debug.Print(s, col) + + if err := beamx.Run(context.Background(), p); err != nil { + log.Fatalf("Failed to execute job: %v", err) + } +} + +func ExampleRead_options() { + type Event struct { + ID primitive.ObjectID `bson:"_id"` + Timestamp int64 `bson:"timestamp"` + EventType int32 `bson:"event_type"` + } + + beam.Init() + p, s := beam.NewPipelineWithRoot() + + col := mongodbio.Read( + s, + "mongodb://localhost:27017", + "demo", + "events", + reflect.TypeOf(Event{}), + mongodbio.WithReadBucketAuto(true), + mongodbio.WithReadBundleSize(32*1024*1024), + mongodbio.WithReadFilter(bson.M{"timestamp": bson.M{"$gt": 1640995200000}}), + ) + debug.Print(s, col) + + if err := beamx.Run(context.Background(), p); err != nil { + log.Fatalf("Failed to execute job: %v", err) + } +} + +func ExampleWrite_default() { + type Event struct { + ID primitive.ObjectID `bson:"_id"` + Timestamp int64 `bson:"timestamp"` + EventType int32 `bson:"event_type"` + } + + beam.Init() + p, s := beam.NewPipelineWithRoot() + + input := []Event{ + { + ID: primitive.NewObjectIDFromTimestamp(time.UnixMilli(1640995200001)), + Timestamp: 1640995200001, + EventType: 1, + }, + { + ID: primitive.NewObjectIDFromTimestamp(time.UnixMilli(1640995200002)), + Timestamp: 1640995200002, + EventType: 2, + }, + } + + col := beam.CreateList(s, input) + mongodbio.Write(s, "mongodb://localhost:27017", "demo", "events", col) + + if err := beamx.Run(context.Background(), p); err != nil { + log.Fatalf("Failed to execute job: %v", err) + } +} + +func ExampleWrite_options() { + type Event struct { + ID primitive.ObjectID `bson:"_id"` + Timestamp int64 `bson:"timestamp"` + EventType int32 `bson:"event_type"` + } + + beam.Init() + p, s := beam.NewPipelineWithRoot() + + input := []Event{ + { + ID: primitive.NewObjectIDFromTimestamp(time.UnixMilli(1640995200001)), + Timestamp: 1640995200001, + EventType: 1, + }, + { + ID: primitive.NewObjectIDFromTimestamp(time.UnixMilli(1640995200002)), + Timestamp: 1640995200002, + EventType: 2, + }, + } + + col := beam.CreateList(s, input) + mongodbio.Write( + s, + "mongodb://localhost:27017", + "demo", + "events", + col, + mongodbio.WithWriteBatchSize(500), + mongodbio.WithWriteOrdered(false), + ) + + if err := beamx.Run(context.Background(), p); err != nil { + log.Fatalf("Failed to execute job: %v", err) + } +} + +func ExampleWrite_generateID() { + type Event struct { + Timestamp int64 `bson:"timestamp"` + EventType int32 `bson:"event_type"` + } + + beam.Init() + p, s := beam.NewPipelineWithRoot() + + input := []Event{ + { + Timestamp: 1640995200001, + EventType: 1, + }, + { + Timestamp: 1640995200002, + EventType: 1, + }, + } + + col := beam.CreateList(s, input) + ids := mongodbio.Write(s, "mongodb://localhost:27017", "demo", "events", col) + debug.Print(s, ids) + + if err := beamx.Run(context.Background(), p); err != nil { + log.Fatalf("Failed to execute job: %v", err) + } +} diff --git a/playground/backend/internal/db/entity/common.go b/sdks/go/pkg/beam/io/mongodbio/helper_test.go similarity index 71% rename from playground/backend/internal/db/entity/common.go rename to sdks/go/pkg/beam/io/mongodbio/helper_test.go index 1fc48c553408d..c5a63c15adb46 100644 --- a/playground/backend/internal/db/entity/common.go +++ b/sdks/go/pkg/beam/io/mongodbio/helper_test.go @@ -13,13 +13,21 @@ // See the License for the specific language governing permissions and // limitations under the License. -package entity +package mongodbio -type IDMeta struct { - Salt string - IdLength int8 -} +import ( + "testing" + + "go.mongodb.org/mongo-driver/bson/primitive" +) + +func objectIDFromHex(t *testing.T, hex string) primitive.ObjectID { + t.Helper() + + id, err := primitive.ObjectIDFromHex(hex) + if err != nil { + t.Fatalf("error parsing hex string to primitive.ObjectID: %v", err) + } -type DatastoreEntity interface { - ExampleEntity | SnippetEntity + return id } diff --git a/sdks/go/pkg/beam/io/mongodbio/read.go b/sdks/go/pkg/beam/io/mongodbio/read.go new file mode 100644 index 0000000000000..e09f2f1e1af84 --- /dev/null +++ b/sdks/go/pkg/beam/io/mongodbio/read.go @@ -0,0 +1,492 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mongodbio + +import ( + "context" + "fmt" + "math" + "reflect" + + "github.com/apache/beam/sdks/v2/go/pkg/beam" + "github.com/apache/beam/sdks/v2/go/pkg/beam/log" + "github.com/apache/beam/sdks/v2/go/pkg/beam/register" + "github.com/apache/beam/sdks/v2/go/pkg/beam/util/structx" + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/mongo" + "go.mongodb.org/mongo-driver/mongo/options" + "go.mongodb.org/mongo-driver/mongo/readpref" +) + +const ( + defaultReadBundleSize = 64 * 1024 * 1024 + + minSplitVectorChunkSize = 1024 * 1024 + maxSplitVectorChunkSize = 1024 * 1024 * 1024 + + maxBucketCount = math.MaxInt32 +) + +func init() { + register.DoFn3x1[context.Context, []byte, func(bson.M), error](&bucketAutoFn{}) + register.DoFn3x1[context.Context, []byte, func(bson.M), error](&splitVectorFn{}) + register.Emitter1[bson.M]() + + register.DoFn3x1[context.Context, bson.M, func(beam.Y), error](&readFn{}) + register.Emitter1[beam.Y]() +} + +// Read reads a MongoDB collection and returns a PCollection for a given type T. T must be a +// struct with exported fields that should have a "bson" tag. By default, the transform uses the +// MongoDB internal splitVector command to split the collection into bundles. The transform can be +// configured to use the $bucketAuto aggregation instead to support reading from MongoDB Atlas +// where the splitVector command is not allowed. This is enabled by passing the ReadOptionFn +// WithReadBucketAuto(true). +// +// The Read transform has the required parameters: +// - s: the scope of the pipeline +// - uri: the MongoDB connection string +// - database: the MongoDB database to read from +// - collection: the MongoDB collection to read from +// - t: the type of the elements in the collection +// +// The Read transform takes a variadic number of ReadOptionFn which can set the ReadOption fields: +// - BucketAuto: whether to use the bucketAuto aggregation to split the collection into bundles. +// Defaults to false +// - Filter: a bson.M map that is used to filter the documents in the collection. Defaults to nil, +// which means no filter is applied +// - BundleSize: the size in bytes to bundle the documents into when reading. Defaults to +// 64 * 1024 * 1024 (64 MB) +func Read( + s beam.Scope, + uri string, + database string, + collection string, + t reflect.Type, + opts ...ReadOptionFn, +) beam.PCollection { + s = s.Scope("mongodbio.Read") + + option := &ReadOption{ + BundleSize: defaultReadBundleSize, + } + + for _, opt := range opts { + if err := opt(option); err != nil { + panic(fmt.Sprintf("mongodbio.Read: invalid option: %v", err)) + } + } + + imp := beam.Impulse(s) + + var bundled beam.PCollection + + if option.BucketAuto { + bundled = beam.ParDo(s, newBucketAutoFn(uri, database, collection, option), imp) + } else { + bundled = beam.ParDo(s, newSplitVectorFn(uri, database, collection, option), imp) + } + + return beam.ParDo( + s, + newReadFn(uri, database, collection, t, option), + bundled, + beam.TypeDefinition{Var: beam.YType, T: t}, + ) +} + +type bucketAutoFn struct { + mongoDBFn + BundleSize int64 +} + +func newBucketAutoFn( + uri string, + database string, + collection string, + option *ReadOption, +) *bucketAutoFn { + return &bucketAutoFn{ + mongoDBFn: mongoDBFn{ + URI: uri, + Database: database, + Collection: collection, + }, + BundleSize: option.BundleSize, + } +} + +func (fn *bucketAutoFn) ProcessElement( + ctx context.Context, + _ []byte, + emit func(bson.M), +) error { + collectionSize, err := fn.getCollectionSize(ctx) + if err != nil { + return err + } + + if collectionSize == 0 { + return nil + } + + bucketCount := calculateBucketCount(collectionSize, fn.BundleSize) + + buckets, err := fn.getBuckets(ctx, bucketCount) + if err != nil { + return err + } + + idFilters := idFiltersFromBuckets(buckets) + + for _, filter := range idFilters { + emit(filter) + } + + return nil +} + +type collStats struct { + Size int64 `bson:"size"` +} + +func (fn *bucketAutoFn) getCollectionSize(ctx context.Context) (int64, error) { + cmd := bson.M{"collStats": fn.Collection} + opts := options.RunCmd().SetReadPreference(readpref.Primary()) + + var stats collStats + if err := fn.collection.Database().RunCommand(ctx, cmd, opts).Decode(&stats); err != nil { + return 0, fmt.Errorf("error executing collStats command: %w", err) + } + + return stats.Size, nil +} + +func calculateBucketCount(collectionSize int64, bundleSize int64) int32 { + if bundleSize < 0 { + panic("monogdbio.calculateBucketCount: bundle size must be greater than 0") + } + + count := collectionSize / bundleSize + if collectionSize%bundleSize != 0 { + count++ + } + + if count > int64(maxBucketCount) { + count = maxBucketCount + } + + return int32(count) +} + +type bucket struct { + ID minMax `bson:"_id"` +} + +type minMax struct { + Min any `bson:"min"` + Max any `bson:"max"` +} + +func (fn *bucketAutoFn) getBuckets(ctx context.Context, count int32) ([]bucket, error) { + pipeline := mongo.Pipeline{bson.D{{ + Key: "$bucketAuto", + Value: bson.M{ + "groupBy": "$_id", + "buckets": count, + }, + }}} + + cursor, err := fn.collection.Aggregate(ctx, pipeline) + if err != nil { + return nil, fmt.Errorf("error executing bucketAuto aggregation: %w", err) + } + + var buckets []bucket + if err = cursor.All(ctx, &buckets); err != nil { + return nil, fmt.Errorf("error decoding buckets: %w", err) + } + + return buckets, nil +} + +func idFiltersFromBuckets(buckets []bucket) []bson.M { + idFilters := make([]bson.M, len(buckets)) + + for i := 0; i < len(buckets); i++ { + filter := bson.M{} + + if i != 0 { + filter["$gt"] = buckets[i].ID.Min + } + + if i != len(buckets)-1 { + filter["$lte"] = buckets[i].ID.Max + } + + if len(filter) == 0 { + idFilters[i] = filter + } else { + idFilters[i] = bson.M{"_id": filter} + } + } + + return idFilters +} + +type splitVectorFn struct { + mongoDBFn + BundleSize int64 +} + +func newSplitVectorFn( + uri string, + database string, + collection string, + option *ReadOption, +) *splitVectorFn { + return &splitVectorFn{ + mongoDBFn: mongoDBFn{ + URI: uri, + Database: database, + Collection: collection, + }, + BundleSize: option.BundleSize, + } +} + +func (fn *splitVectorFn) ProcessElement( + ctx context.Context, + _ []byte, + emit func(bson.M), +) error { + chunkSize := getChunkSize(fn.BundleSize) + + splitKeys, err := fn.getSplitKeys(ctx, chunkSize) + if err != nil { + return err + } + + idFilters := idFiltersFromSplits(splitKeys) + + for _, filter := range idFilters { + emit(filter) + } + + return nil +} + +func getChunkSize(bundleSize int64) int64 { + var chunkSize int64 + + if bundleSize < minSplitVectorChunkSize { + chunkSize = minSplitVectorChunkSize + } else if bundleSize > maxSplitVectorChunkSize { + chunkSize = maxSplitVectorChunkSize + } else { + chunkSize = bundleSize + } + + return chunkSize +} + +type splitVector struct { + SplitKeys []splitKey `bson:"splitKeys"` +} + +type splitKey struct { + ID any `bson:"_id"` +} + +func (fn *splitVectorFn) getSplitKeys(ctx context.Context, chunkSize int64) ([]splitKey, error) { + cmd := bson.D{ + {Key: "splitVector", Value: fmt.Sprintf("%s.%s", fn.Database, fn.Collection)}, + {Key: "keyPattern", Value: bson.D{{Key: "_id", Value: 1}}}, + {Key: "maxChunkSizeBytes", Value: chunkSize}, + } + + opts := options.RunCmd().SetReadPreference(readpref.Primary()) + + var vector splitVector + if err := fn.collection.Database().RunCommand(ctx, cmd, opts).Decode(&vector); err != nil { + return nil, fmt.Errorf("error executing splitVector command: %w", err) + } + + return vector.SplitKeys, nil +} + +func idFiltersFromSplits(splitKeys []splitKey) []bson.M { + idFilters := make([]bson.M, len(splitKeys)+1) + + for i := 0; i < len(splitKeys)+1; i++ { + filter := bson.M{} + + if i > 0 { + filter["$gt"] = splitKeys[i-1].ID + } + + if i < len(splitKeys) { + filter["$lte"] = splitKeys[i].ID + } + + if len(filter) == 0 { + idFilters[i] = filter + } else { + idFilters[i] = bson.M{"_id": filter} + } + } + + return idFilters +} + +type readFn struct { + mongoDBFn + Filter []byte + Type beam.EncodedType + projection bson.D + filter bson.M +} + +func newReadFn( + uri string, + database string, + collection string, + t reflect.Type, + option *ReadOption, +) *readFn { + filter, err := encodeBSONMap(option.Filter) + if err != nil { + panic(fmt.Sprintf("mongodbio.newReadFn: %v", err)) + } + + return &readFn{ + mongoDBFn: mongoDBFn{ + URI: uri, + Database: database, + Collection: collection, + }, + Filter: filter, + Type: beam.EncodedType{T: t}, + } +} + +func (fn *readFn) Setup(ctx context.Context) error { + if err := fn.mongoDBFn.Setup(ctx); err != nil { + return err + } + + filter, err := decodeBSONMap(fn.Filter) + if err != nil { + return err + } + + fn.filter = filter + fn.projection = inferProjection(fn.Type.T, bsonTag) + + return nil +} + +func inferProjection(t reflect.Type, tagKey string) bson.D { + names := structx.InferFieldNames(t, tagKey) + if len(names) == 0 { + panic("mongodbio.inferProjection: no names to infer projection from") + } + + projection := make(bson.D, len(names)) + + for i, name := range names { + projection[i] = bson.E{Key: name, Value: 1} + } + + return projection +} + +func (fn *readFn) ProcessElement( + ctx context.Context, + elem bson.M, + emit func(beam.Y), +) (err error) { + mergedFilter := mergeFilters(elem, fn.filter) + + cursor, err := fn.findDocuments(ctx, fn.projection, mergedFilter) + if err != nil { + return err + } + + defer func() { + closeErr := cursor.Close(ctx) + + if err != nil { + if closeErr != nil { + log.Errorf(ctx, "error closing cursor: %v", closeErr) + } + return + } + + err = closeErr + }() + + for cursor.Next(ctx) { + value, err := decodeDocument(cursor, fn.Type.T) + if err != nil { + return err + } + + emit(value) + } + + return cursor.Err() +} + +func mergeFilters(idFilter bson.M, customFilter bson.M) bson.M { + if len(idFilter) == 0 { + return customFilter + } + + if len(customFilter) == 0 { + return idFilter + } + + return bson.M{ + "$and": []bson.M{idFilter, customFilter}, + } +} + +func (fn *readFn) findDocuments( + ctx context.Context, + projection bson.D, + filter bson.M, +) (*mongo.Cursor, error) { + opts := options.Find().SetProjection(projection).SetAllowDiskUse(true) + + cursor, err := fn.collection.Find(ctx, filter, opts) + if err != nil { + return nil, fmt.Errorf("error finding documents: %w", err) + } + + return cursor, nil +} + +func decodeDocument(cursor *mongo.Cursor, t reflect.Type) (any, error) { + out := reflect.New(t).Interface() + if err := cursor.Decode(out); err != nil { + return nil, fmt.Errorf("error decoding document: %w", err) + } + + value := reflect.ValueOf(out).Elem().Interface() + + return value, nil +} diff --git a/sdks/go/pkg/beam/io/mongodbio/read_option.go b/sdks/go/pkg/beam/io/mongodbio/read_option.go new file mode 100644 index 0000000000000..b724c306a817c --- /dev/null +++ b/sdks/go/pkg/beam/io/mongodbio/read_option.go @@ -0,0 +1,60 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mongodbio + +import ( + "errors" + + "go.mongodb.org/mongo-driver/bson" +) + +// ReadOption represents options for reading from MongoDB. +type ReadOption struct { + BucketAuto bool + Filter bson.M + BundleSize int64 +} + +// ReadOptionFn is a function that configures a ReadOption. +type ReadOptionFn func(option *ReadOption) error + +// WithReadBucketAuto configures the ReadOption whether to use the bucketAuto aggregation stage. +func WithReadBucketAuto(bucketAuto bool) ReadOptionFn { + return func(o *ReadOption) error { + o.BucketAuto = bucketAuto + return nil + } +} + +// WithReadFilter configures the ReadOption to use the provided filter. +func WithReadFilter(filter bson.M) ReadOptionFn { + return func(o *ReadOption) error { + o.Filter = filter + return nil + } +} + +// WithReadBundleSize configures the ReadOption to use the provided bundle size in bytes. +func WithReadBundleSize(bundleSize int64) ReadOptionFn { + return func(o *ReadOption) error { + if bundleSize <= 0 { + return errors.New("bundle size must be greater than 0") + } + + o.BundleSize = bundleSize + return nil + } +} diff --git a/sdks/go/pkg/beam/io/mongodbio/read_option_test.go b/sdks/go/pkg/beam/io/mongodbio/read_option_test.go new file mode 100644 index 0000000000000..d4fe4dfa63a37 --- /dev/null +++ b/sdks/go/pkg/beam/io/mongodbio/read_option_test.go @@ -0,0 +1,115 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mongodbio + +import ( + "testing" + + "github.com/google/go-cmp/cmp" + "go.mongodb.org/mongo-driver/bson" +) + +func TestWithReadBucketAuto(t *testing.T) { + tests := []struct { + name string + bucketAuto bool + want bool + wantErr bool + }{ + { + name: "Set bucket auto to true", + bucketAuto: true, + want: true, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var option ReadOption + + if err := WithReadBucketAuto(tt.bucketAuto)(&option); (err != nil) != tt.wantErr { + t.Fatalf("WithReadBucketAuto() error = %v, wantErr %v", err, tt.wantErr) + } + + if option.BucketAuto != tt.want { + t.Errorf("option.BucketAuto = %v, want %v", option.BucketAuto, tt.want) + } + }) + } +} + +func TestWithReadFilter(t *testing.T) { + tests := []struct { + name string + filter bson.M + want bson.M + wantErr bool + }{ + { + name: "Set filter to {\"key\": \"value\"}", + filter: bson.M{"key": "value"}, + want: bson.M{"key": "value"}, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var option ReadOption + + if err := WithReadFilter(tt.filter)(&option); (err != nil) != tt.wantErr { + t.Fatalf("WithReadFilter() error = %v, wantErr %v", err, tt.wantErr) + } + + if !cmp.Equal(option.Filter, tt.want) { + t.Errorf("option.Filter = %v, want %v", option.Filter, tt.want) + } + }) + } +} + +func TestWithReadBundleSize(t *testing.T) { + tests := []struct { + name string + bundleSize int64 + want int64 + wantErr bool + }{ + { + name: "Set bundle size to 1024", + bundleSize: 1024, + want: 1024, + wantErr: false, + }, + { + name: "Error - bundle size must be greater than 0", + bundleSize: 0, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var option ReadOption + + if err := WithReadBundleSize(tt.bundleSize)(&option); (err != nil) != tt.wantErr { + t.Fatalf("WithReadBundleSize() error = %v, wantErr %v", err, tt.wantErr) + } + + if option.BundleSize != tt.want { + t.Errorf("option.BundleSize = %v, want %v", option.BundleSize, tt.want) + } + }) + } +} diff --git a/sdks/go/pkg/beam/io/mongodbio/read_test.go b/sdks/go/pkg/beam/io/mongodbio/read_test.go new file mode 100644 index 0000000000000..5899457d5a86c --- /dev/null +++ b/sdks/go/pkg/beam/io/mongodbio/read_test.go @@ -0,0 +1,393 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mongodbio + +import ( + "math" + "reflect" + "testing" + + "github.com/google/go-cmp/cmp" + "go.mongodb.org/mongo-driver/bson" +) + +func Test_calculateBucketCount(t *testing.T) { + tests := []struct { + name string + collectionSize int64 + bundleSize int64 + want int32 + }{ + { + name: "Return ceiling of collection size / bundle size", + collectionSize: 3 * 1024 * 1024, + bundleSize: 2 * 1024 * 1024, + want: 2, + }, + { + name: "Return max int32 when calculated count is greater than max int32", + collectionSize: 1024 * 1024 * 1024 * 1024, + bundleSize: 1, + want: math.MaxInt32, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := calculateBucketCount(tt.collectionSize, tt.bundleSize); got != tt.want { + t.Errorf("calculateBucketCount() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_calculateBucketCountPanic(t *testing.T) { + t.Run("Panic when bundleSize is not greater than 0", func(t *testing.T) { + defer func() { + if r := recover(); r == nil { + t.Errorf("calculateBucketCount() does not panic") + } + }() + + calculateBucketCount(1024, 0) + }) +} + +func Test_idFiltersFromBuckets(t *testing.T) { + tests := []struct { + name string + buckets []bucket + want []bson.M + }{ + { + name: "Create one $lte filter for start range, one $gt filter for end range, and filters with both " + + "$lte and $gt for ranges in between when there are three or more bucket elements", + buckets: []bucket{ + { + ID: minMax{ + Min: objectIDFromHex(t, "6384e03f24f854c1a8ce5378"), + Max: objectIDFromHex(t, "6384e03f24f854c1a8ce5380"), + }, + }, + { + ID: minMax{ + Min: objectIDFromHex(t, "6384e03f24f854c1a8ce5380"), + Max: objectIDFromHex(t, "6384e03f24f854c1a8ce5382"), + }, + }, + { + ID: minMax{ + Min: objectIDFromHex(t, "6384e03f24f854c1a8ce5382"), + Max: objectIDFromHex(t, "6384e03f24f854c1a8ce5384"), + }, + }, + }, + want: []bson.M{ + { + "_id": bson.M{ + "$lte": objectIDFromHex(t, "6384e03f24f854c1a8ce5380"), + }, + }, + { + "_id": bson.M{ + "$gt": objectIDFromHex(t, "6384e03f24f854c1a8ce5380"), + "$lte": objectIDFromHex(t, "6384e03f24f854c1a8ce5382"), + }, + }, + { + "_id": bson.M{ + "$gt": objectIDFromHex(t, "6384e03f24f854c1a8ce5382"), + }, + }, + }, + }, + { + name: "Create one $lte filter for start range and one $gt filter for end range when there are two " + + "bucket elements", + buckets: []bucket{ + { + ID: minMax{ + Min: objectIDFromHex(t, "6384e03f24f854c1a8ce5378"), + Max: objectIDFromHex(t, "6384e03f24f854c1a8ce5380"), + }, + }, + { + ID: minMax{ + Min: objectIDFromHex(t, "6384e03f24f854c1a8ce5380"), + Max: objectIDFromHex(t, "6384e03f24f854c1a8ce5382"), + }, + }, + }, + want: []bson.M{ + { + "_id": bson.M{ + "$lte": objectIDFromHex(t, "6384e03f24f854c1a8ce5380"), + }, + }, + { + "_id": bson.M{ + "$gt": objectIDFromHex(t, "6384e03f24f854c1a8ce5380"), + }, + }, + }, + }, + { + name: "Create an empty filter when there is one bucket element", + buckets: []bucket{ + { + ID: minMax{ + Min: objectIDFromHex(t, "6384e03f24f854c1a8ce5378"), + Max: objectIDFromHex(t, "6384e03f24f854c1a8ce5380"), + }, + }, + }, + want: []bson.M{{}}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := idFiltersFromBuckets(tt.buckets); !cmp.Equal(got, tt.want) { + t.Errorf("idFiltersFromBuckets() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_getChunkSize(t *testing.T) { + tests := []struct { + name string + bundleSize int64 + want int64 + }{ + { + name: "Return 1 MB if bundle size is less than 1 MB", + bundleSize: 1024, + want: 1024 * 1024, + }, + { + name: "Return 1 GB if bundle size is greater than 1 GB", + bundleSize: 2 * 1024 * 1024 * 1024, + want: 1024 * 1024 * 1024, + }, + { + name: "Return bundle size if bundle size is between 1 MB and 1 GB", + bundleSize: 4 * 1024 * 1024, + want: 4 * 1024 * 1024, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := getChunkSize(tt.bundleSize); got != tt.want { + t.Errorf("getChunkSize() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_idFiltersFromSplits(t *testing.T) { + tests := []struct { + name string + splitKeys []splitKey + want []bson.M + }{ + { + name: "Create one $lte filter for start range, one $gt filter for end range, and filters with both " + + "$lte and $gt for ranges in between when there are two or more splitKey elements", + splitKeys: []splitKey{ + { + ID: objectIDFromHex(t, "6384e03f24f854c1a8ce5380"), + }, + { + ID: objectIDFromHex(t, "6384e03f24f854c1a8ce5382"), + }, + }, + want: []bson.M{ + { + "_id": bson.M{ + "$lte": objectIDFromHex(t, "6384e03f24f854c1a8ce5380"), + }, + }, + { + "_id": bson.M{ + "$gt": objectIDFromHex(t, "6384e03f24f854c1a8ce5380"), + "$lte": objectIDFromHex(t, "6384e03f24f854c1a8ce5382"), + }, + }, + { + "_id": bson.M{ + "$gt": objectIDFromHex(t, "6384e03f24f854c1a8ce5382"), + }, + }, + }, + }, + { + name: "Create one $lte filter for start range and one $gt filter for end range when there is one " + + "splitKey element", + splitKeys: []splitKey{ + { + ID: objectIDFromHex(t, "6384e03f24f854c1a8ce5380"), + }, + }, + want: []bson.M{ + { + "_id": bson.M{ + "$lte": objectIDFromHex(t, "6384e03f24f854c1a8ce5380"), + }, + }, + { + "_id": bson.M{ + "$gt": objectIDFromHex(t, "6384e03f24f854c1a8ce5380"), + }, + }, + }, + }, + { + name: "Create an empty filter when there are no splitKey elements", + splitKeys: nil, + want: []bson.M{{}}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := idFiltersFromSplits(tt.splitKeys); !cmp.Equal(got, tt.want) { + t.Errorf("idFiltersFromSplits() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_inferProjection(t *testing.T) { + type doc struct { + Field1 string `bson:"field1"` + Field2 string `bson:"field2"` + Field3 string `bson:"-"` + } + + tests := []struct { + name string + t reflect.Type + tagKey string + want bson.D + }{ + { + name: "Infer projection from struct bson tags", + t: reflect.TypeOf(doc{}), + tagKey: "bson", + want: bson.D{ + {Key: "field1", Value: 1}, + {Key: "field2", Value: 1}, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := inferProjection(tt.t, tt.tagKey); !cmp.Equal(got, tt.want) { + t.Errorf("inferProjection() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_inferProjectionPanic(t *testing.T) { + type doc struct{} + + t.Run("Panic when type has no fields to infer", func(t *testing.T) { + defer func() { + if r := recover(); r == nil { + t.Errorf("inferProjection() does not panic") + } + }() + + inferProjection(reflect.TypeOf(doc{}), "bson") + }) +} + +func Test_mergeFilters(t *testing.T) { + tests := []struct { + name string + idFilter bson.M + filter bson.M + want bson.M + }{ + { + name: "Returned merged ID filter and custom filter in an $and filter", + idFilter: bson.M{ + "_id": bson.M{ + "$gte": 10, + }, + }, + filter: bson.M{ + "key": bson.M{ + "$ne": "value", + }, + }, + want: bson.M{ + "$and": []bson.M{ + { + "_id": bson.M{ + "$gte": 10, + }, + }, + { + "key": bson.M{ + "$ne": "value", + }, + }, + }, + }, + }, + { + name: "Return only ID filter when custom filter is empty", + idFilter: bson.M{ + "_id": bson.M{ + "$gte": 10, + }, + }, + filter: bson.M{}, + want: bson.M{ + "_id": bson.M{ + "$gte": 10, + }, + }, + }, + { + name: "Return only custom filter when ID filter is empty", + idFilter: bson.M{}, + filter: bson.M{ + "key": bson.M{ + "$ne": "value", + }, + }, + want: bson.M{ + "key": bson.M{ + "$ne": "value", + }, + }, + }, + { + name: "Return empty filter when both ID filter and custom filter are empty", + idFilter: bson.M{}, + filter: bson.M{}, + want: bson.M{}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := mergeFilters(tt.idFilter, tt.filter); !cmp.Equal(got, tt.want) { + t.Errorf("mergeFilters() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/sdks/go/pkg/beam/io/mongodbio/write.go b/sdks/go/pkg/beam/io/mongodbio/write.go new file mode 100644 index 0000000000000..2332e3ba98136 --- /dev/null +++ b/sdks/go/pkg/beam/io/mongodbio/write.go @@ -0,0 +1,204 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mongodbio + +import ( + "context" + "fmt" + "reflect" + + "github.com/apache/beam/sdks/v2/go/pkg/beam" + "github.com/apache/beam/sdks/v2/go/pkg/beam/register" + "github.com/apache/beam/sdks/v2/go/pkg/beam/util/structx" + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/bson/primitive" + "go.mongodb.org/mongo-driver/mongo" + "go.mongodb.org/mongo-driver/mongo/options" +) + +const ( + defaultWriteBatchSize = 1000 + defaultWriteOrdered = true +) + +func init() { + register.Function1x2(createIDFn) + register.Emitter2[primitive.ObjectID, beam.Y]() + + register.DoFn3x0[context.Context, beam.Y, func(beam.X, beam.Y)]( + &extractIDFn{}, + ) + register.Emitter2[beam.X, beam.Y]() + + register.DoFn4x1[context.Context, beam.X, beam.Y, func(beam.X), error]( + &writeFn{}, + ) + register.Emitter1[primitive.ObjectID]() +} + +// Write writes a PCollection of a type T to MongoDB. T must be a struct with exported fields +// that should have a "bson" tag. If the struct has a field with the bson tag "_id", the value of +// that field will be used as the id of the document. Otherwise, a new id field of type +// primitive.ObjectID will be generated for each document. Write returns a PCollection of the +// inserted id values with type K. +// +// The Write transform has the required parameters: +// - s: the scope of the pipeline +// - uri: the MongoDB connection string +// - database: the MongoDB database to write to +// - collection: the MongoDB collection to write to +// - col: the PCollection to write to MongoDB +// +// The Write transform takes a variadic number of WriteOptionFn which can set the WriteOption +// fields: +// - BatchSize: the number of documents to write in a single batch. Defaults to 1000 +// - Ordered: whether to execute the writes in order. Defaults to true +func Write( + s beam.Scope, + uri string, + database string, + collection string, + col beam.PCollection, + opts ...WriteOptionFn, +) beam.PCollection { + s = s.Scope("mongodbio.Write") + + option := &WriteOption{ + BatchSize: defaultWriteBatchSize, + Ordered: defaultWriteOrdered, + } + + for _, opt := range opts { + if err := opt(option); err != nil { + panic(fmt.Sprintf("mongodbio.Write: invalid option: %v", err)) + } + } + + t := col.Type().Type() + idIndex := structx.FieldIndexByTag(t, bsonTag, "_id") + + var keyed beam.PCollection + + if idIndex == -1 { + pre := beam.ParDo(s, createIDFn, col) + keyed = beam.Reshuffle(s, pre) + } else { + keyed = beam.ParDo( + s, + newExtractIDFn(idIndex), + col, + beam.TypeDefinition{Var: beam.XType, T: t.Field(idIndex).Type}, + ) + } + + return beam.ParDo( + s, + newWriteFn(uri, database, collection, option), + keyed, + ) +} + +func createIDFn(elem beam.Y) (primitive.ObjectID, beam.Y) { + id := primitive.NewObjectID() + return id, elem +} + +type extractIDFn struct { + IDIndex int +} + +func newExtractIDFn(idIndex int) *extractIDFn { + return &extractIDFn{ + IDIndex: idIndex, + } +} + +func (fn *extractIDFn) ProcessElement( + _ context.Context, + elem beam.Y, + emit func(beam.X, beam.Y), +) { + id := reflect.ValueOf(elem).Field(fn.IDIndex).Interface() + emit(id, elem) +} + +type writeFn struct { + mongoDBFn + BatchSize int64 + Ordered bool + models []mongo.WriteModel +} + +func newWriteFn( + uri string, + database string, + collection string, + option *WriteOption, +) *writeFn { + return &writeFn{ + mongoDBFn: mongoDBFn{ + URI: uri, + Database: database, + Collection: collection, + }, + BatchSize: option.BatchSize, + Ordered: option.Ordered, + } +} + +func (fn *writeFn) ProcessElement( + ctx context.Context, + key beam.X, + value beam.Y, + emit func(beam.X), +) error { + model := mongo.NewReplaceOneModel(). + SetFilter(bson.M{"_id": key}). + SetUpsert(true). + SetReplacement(value) + + fn.models = append(fn.models, model) + + if len(fn.models) >= int(fn.BatchSize) { + if err := fn.flush(ctx); err != nil { + return err + } + } + + emit(key) + + return nil +} + +func (fn *writeFn) FinishBundle(ctx context.Context, _ func(beam.X)) error { + if len(fn.models) > 0 { + return fn.flush(ctx) + } + + return nil +} + +func (fn *writeFn) flush(ctx context.Context) error { + opts := options.BulkWrite().SetOrdered(fn.Ordered) + + if _, err := fn.collection.BulkWrite(ctx, fn.models, opts); err != nil { + return fmt.Errorf("error bulk writing to MongoDB: %w", err) + } + + fn.models = nil + + return nil +} diff --git a/sdks/go/pkg/beam/io/mongodbio/write_option.go b/sdks/go/pkg/beam/io/mongodbio/write_option.go new file mode 100644 index 0000000000000..8d54b6052b824 --- /dev/null +++ b/sdks/go/pkg/beam/io/mongodbio/write_option.go @@ -0,0 +1,50 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mongodbio + +import ( + "errors" +) + +// WriteOption represents options for writing to MongoDB. +type WriteOption struct { + BatchSize int64 + Ordered bool +} + +// WriteOptionFn is a function that configures a WriteOption. +type WriteOptionFn func(option *WriteOption) error + +// WithWriteBatchSize configures the WriteOption to use the provided batch size when writing +// documents. +func WithWriteBatchSize(batchSize int64) WriteOptionFn { + return func(o *WriteOption) error { + if batchSize <= 0 { + return errors.New("batch size must be greater than 0") + } + + o.BatchSize = batchSize + return nil + } +} + +// WithWriteOrdered configures the WriteOption whether to apply an ordered bulk write. +func WithWriteOrdered(ordered bool) WriteOptionFn { + return func(o *WriteOption) error { + o.Ordered = ordered + return nil + } +} diff --git a/sdks/go/pkg/beam/io/mongodbio/write_option_test.go b/sdks/go/pkg/beam/io/mongodbio/write_option_test.go new file mode 100644 index 0000000000000..1e4e66bfbc41e --- /dev/null +++ b/sdks/go/pkg/beam/io/mongodbio/write_option_test.go @@ -0,0 +1,83 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mongodbio + +import ( + "testing" +) + +func TestWithWriteBatchSize(t *testing.T) { + tests := []struct { + name string + batchSize int64 + want int64 + wantErr bool + }{ + { + name: "Set batch size to 500", + batchSize: 500, + want: 500, + wantErr: false, + }, + { + name: "Error - batch size must be greater than 0", + batchSize: 0, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var option WriteOption + + if err := WithWriteBatchSize(tt.batchSize)(&option); (err != nil) != tt.wantErr { + t.Fatalf("WithWriteBatchSize() error = %v, wantErr %v", err, tt.wantErr) + } + + if option.BatchSize != tt.want { + t.Errorf("option.BatchSize = %v, want %v", option.BatchSize, tt.want) + } + }) + } +} + +func TestWithWriteOrdered(t *testing.T) { + tests := []struct { + name string + ordered bool + want bool + wantErr bool + }{ + { + name: "Set ordered to true", + ordered: true, + want: true, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var option WriteOption + + if err := WithWriteOrdered(tt.ordered)(&option); (err != nil) != tt.wantErr { + t.Fatalf("WithWriteOrdered() err = %v, wantErr %v", err, tt.wantErr) + } + + if option.Ordered != tt.want { + t.Errorf("option.Ordered = %v, want %v", option.Ordered, tt.want) + } + }) + } +} diff --git a/sdks/go/pkg/beam/io/mongodbio/write_test.go b/sdks/go/pkg/beam/io/mongodbio/write_test.go new file mode 100644 index 0000000000000..6608df8362b63 --- /dev/null +++ b/sdks/go/pkg/beam/io/mongodbio/write_test.go @@ -0,0 +1,54 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mongodbio + +import ( + "testing" + + "github.com/apache/beam/sdks/v2/go/pkg/beam" + "github.com/google/go-cmp/cmp" +) + +func Test_createIDFn(t *testing.T) { + type doc struct { + Field1 int32 `bson:"field1"` + } + + tests := []struct { + name string + elem beam.Y + want beam.Y + }{ + { + name: "Create key-value pair of a new object ID and element", + elem: doc{Field1: 1}, + want: doc{Field1: 1}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotKey, gotValue := createIDFn(tt.elem) + + if gotKey.IsZero() { + t.Error("createIDFn() gotKey is zero") + } + + if !cmp.Equal(gotValue, tt.want) { + t.Errorf("createIDFn() gotValue = %v, want %v", gotValue, tt.want) + } + }) + } +} diff --git a/sdks/go/pkg/beam/log/log.go b/sdks/go/pkg/beam/log/log.go index feae77b6c971b..4c1f5dddb0181 100644 --- a/sdks/go/pkg/beam/log/log.go +++ b/sdks/go/pkg/beam/log/log.go @@ -22,6 +22,7 @@ import ( "context" "fmt" "os" + "sync/atomic" ) // Severity is the severity of the log message. @@ -44,9 +45,17 @@ type Logger interface { Log(ctx context.Context, sev Severity, calldepth int, msg string) } -var ( - logger Logger = &Standard{} -) +var logger atomic.Value + +// concreteLogger works around atomic.Value's requirement that the type +// be identical for all callers. +type concreteLogger struct { + Logger +} + +func init() { + logger.Store(&concreteLogger{&Standard{}}) +} // SetLogger sets the global Logger. Intended to be called during initialization // only. @@ -54,13 +63,13 @@ func SetLogger(l Logger) { if l == nil { panic("Logger cannot be nil") } - logger = l + logger.Store(&concreteLogger{l}) } // Output logs the given message to the global logger. Calldepth is the count // of the number of frames to skip when computing the file name and line number. func Output(ctx context.Context, sev Severity, calldepth int, msg string) { - logger.Log(ctx, sev, calldepth+1, msg) // +1 for this frame + logger.Load().(Logger).Log(ctx, sev, calldepth+1, msg) // +1 for this frame } // User-facing logging functions. diff --git a/sdks/go/pkg/beam/runners/dataflow/dataflow.go b/sdks/go/pkg/beam/runners/dataflow/dataflow.go index 598ec4c1aaa58..51f0eff189ce8 100644 --- a/sdks/go/pkg/beam/runners/dataflow/dataflow.go +++ b/sdks/go/pkg/beam/runners/dataflow/dataflow.go @@ -35,6 +35,7 @@ import ( "cloud.google.com/go/storage" "github.com/apache/beam/sdks/v2/go/pkg/beam" + "github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph" "github.com/apache/beam/sdks/v2/go/pkg/beam/core/runtime/graphx" "github.com/apache/beam/sdks/v2/go/pkg/beam/core/runtime/pipelinex" "github.com/apache/beam/sdks/v2/go/pkg/beam/core/util/hooks" @@ -71,7 +72,6 @@ var ( tempLocation = flag.String("temp_location", "", "Temp location (optional)") machineType = flag.String("worker_machine_type", "", "GCE machine type (optional)") minCPUPlatform = flag.String("min_cpu_platform", "", "GCE minimum cpu platform (optional)") - workerJar = flag.String("dataflow_worker_jar", "", "Dataflow worker jar (optional)") workerRegion = flag.String("worker_region", "", "Dataflow worker region (optional)") workerZone = flag.String("worker_zone", "", "Dataflow worker zone (optional)") dataflowServiceOptions = flag.String("dataflow_service_options", "", "Comma separated list of additional job modes and configurations (optional)") @@ -177,25 +177,26 @@ func Execute(ctx context.Context, p *beam.Pipeline) (beam.PipelineResult, error) panic("Beam has not been initialized. Call beam.Init() before pipeline construction.") } + edges, nodes, err := p.Build() + if err != nil { + return nil, err + } + streaming := !graph.Bounded(nodes) + beam.PipelineOptions.LoadOptionsFromFlags(flagFilter) - opts, err := getJobOptions(ctx) + opts, err := getJobOptions(ctx, streaming) if err != nil { return nil, err } // (1) Build and submit - // NOTE(herohde) 10/8/2018: the last segment of the names must be "worker" and "dataflow-worker.jar". + // NOTE(herohde) 10/8/2018: the last segment of the names must be "worker". id := fmt.Sprintf("go-%v-%v", atomic.AddInt32(&unique, 1), time.Now().UnixNano()) modelURL := gcsx.Join(*stagingLocation, id, "model") workerURL := gcsx.Join(*stagingLocation, id, "worker") - jarURL := gcsx.Join(*stagingLocation, id, "dataflow-worker.jar") xlangURL := gcsx.Join(*stagingLocation, id, "xlang") - edges, _, err := p.Build() - if err != nil { - return nil, err - } artifactURLs, err := dataflowlib.ResolveXLangArtifacts(ctx, edges, opts.Project, xlangURL) if err != nil { return nil, errors.WithContext(err, "resolving cross-language artifacts") @@ -221,7 +222,7 @@ func Execute(ctx context.Context, p *beam.Pipeline) (beam.PipelineResult, error) log.Info(ctx, "Dry-run: not submitting job!") log.Info(ctx, proto.MarshalTextString(model)) - job, err := dataflowlib.Translate(ctx, model, opts, workerURL, jarURL, modelURL) + job, err := dataflowlib.Translate(ctx, model, opts, workerURL, modelURL) if err != nil { return nil, err } @@ -229,17 +230,17 @@ func Execute(ctx context.Context, p *beam.Pipeline) (beam.PipelineResult, error) return nil, nil } - return dataflowlib.Execute(ctx, model, opts, workerURL, jarURL, modelURL, *endpoint, *jobopts.Async) + return dataflowlib.Execute(ctx, model, opts, workerURL, modelURL, *endpoint, *jobopts.Async) } -func getJobOptions(ctx context.Context) (*dataflowlib.JobOptions, error) { +func getJobOptions(ctx context.Context, streaming bool) (*dataflowlib.JobOptions, error) { project := gcpopts.GetProjectFromFlagOrEnvironment(ctx) if project == "" { return nil, errors.New("no Google Cloud project specified. Use --project=") } region := gcpopts.GetRegion(ctx) if region == "" { - return nil, errors.New("No Google Cloud region specified. Use --region=. See https://cloud.google.com/dataflow/docs/concepts/regional-endpoints") + return nil, errors.New("no Google Cloud region specified. Use --region=. See https://cloud.google.com/dataflow/docs/concepts/regional-endpoints") } if *stagingLocation == "" { return nil, errors.New("no GCS staging location specified. Use --staging_location=gs:///") @@ -269,6 +270,9 @@ func getJobOptions(ctx context.Context) (*dataflowlib.JobOptions, error) { return nil, errors.Errorf("invalid flex resource scheduling goal. Got %q; Use --flexrs_goal=(FLEXRS_UNSPECIFIED|FLEXRS_SPEED_OPTIMIZED|FLEXRS_COST_OPTIMIZED)", *flexRSGoal) } } + if !streaming && *transformMapping != "" { + return nil, errors.New("provided transform_name_mapping for a batch pipeline, did you mean to construct a streaming pipeline?") + } if !*update && *transformMapping != "" { return nil, errors.New("provided transform_name_mapping without setting the --update flag, so the pipeline would not be updated") } @@ -282,24 +286,51 @@ func getJobOptions(ctx context.Context) (*dataflowlib.JobOptions, error) { hooks.SerializeHooksToOptions() experiments := jobopts.GetExperiments() - // Always use runner v2, unless set already. - var v2set, portaSubmission bool + // Ensure that we enable the same set of experiments across all SDKs + // for runner v2. + var fnApiSet, v2set, uwSet, portaSubmission, seSet, wsSet bool for _, e := range experiments { - if strings.Contains(e, "use_runner_v2") || strings.Contains(e, "use_unified_worker") { + if strings.Contains(e, "beam_fn_api") { + fnApiSet = true + } + if strings.Contains(e, "use_runner_v2") { v2set = true } + if strings.Contains(e, "use_unified_worker") { + uwSet = true + } if strings.Contains(e, "use_portable_job_submission") { portaSubmission = true } + if strings.Contains(e, "disable_runner_v2") || strings.Contains(e, "disable_runner_v2_until_2023") || strings.Contains(e, "disable_prime_runner_v2") { + return nil, errors.New("detected one of the following experiments: disable_runner_v2 | disable_runner_v2_until_2023 | disable_prime_runner_v2. Disabling runner v2 is no longer supported as of Beam version 2.45.0+") + } + } + // Enable default experiments. + if !fnApiSet { + experiments = append(experiments, "beam_fn_api") } - // Enable by default unified worker, and portable job submission. if !v2set { + experiments = append(experiments, "use_runner_v2") + } + if !uwSet { experiments = append(experiments, "use_unified_worker") } if !portaSubmission { experiments = append(experiments, "use_portable_job_submission") } + // Ensure that streaming specific experiments are set for streaming pipelines + // since runner v2 only supports using streaming engine. + if streaming { + if !seSet { + experiments = append(experiments, "enable_streaming_engine") + } + if !wsSet { + experiments = append(experiments, "enable_windmill_service") + } + } + if *minCPUPlatform != "" { experiments = append(experiments, fmt.Sprintf("min_cpu_platform=%v", *minCPUPlatform)) } @@ -312,6 +343,7 @@ func getJobOptions(ctx context.Context) (*dataflowlib.JobOptions, error) { beam.PipelineOptions.LoadOptionsFromFlags(flagFilter) opts := &dataflowlib.JobOptions{ Name: jobopts.GetJobName(), + Streaming: streaming, Experiments: experiments, DataflowServiceOptions: dfServiceOptions, Options: beam.PipelineOptions.Export(), @@ -335,7 +367,6 @@ func getJobOptions(ctx context.Context) (*dataflowlib.JobOptions, error) { TempLocation: *tempLocation, TemplateLocation: *templateLocation, Worker: *jobopts.WorkerBinary, - WorkerJar: *workerJar, WorkerRegion: *workerRegion, WorkerZone: *workerZone, TeardownPolicy: *teardownPolicy, diff --git a/sdks/go/pkg/beam/runners/dataflow/dataflow_test.go b/sdks/go/pkg/beam/runners/dataflow/dataflow_test.go index 737c3ec7b5b5b..a8611ecb2ad07 100644 --- a/sdks/go/pkg/beam/runners/dataflow/dataflow_test.go +++ b/sdks/go/pkg/beam/runners/dataflow/dataflow_test.go @@ -49,21 +49,94 @@ func TestGetJobOptions(t *testing.T) { *jobopts.Experiments = "use_runner_v2,use_portable_job_submission" *jobopts.JobName = "testJob" - opts, err := getJobOptions(context.Background()) + opts, err := getJobOptions(context.Background(), false) if err != nil { t.Fatalf("getJobOptions() returned error %q, want %q", err, "nil") } + if got, want := opts.Streaming, false; got != want { + t.Errorf("getJobOptions().Streaming = %t, want %t", got, want) + } + if got, want := opts.Name, "testJob"; got != want { + t.Errorf("getJobOptions().Name = %q, want %q", got, want) + } + if got, want := len(opts.Experiments), 5; got != want { + t.Errorf("len(getJobOptions().Experiments) = %q, want %q", got, want) + } else { + sort.Strings(opts.Experiments) + expectedExperiments := []string{"beam_fn_api", "min_cpu_platform=testPlatform", "use_portable_job_submission", "use_runner_v2", "use_unified_worker"} + for i := 0; i < 3; i++ { + if got, want := opts.Experiments[i], expectedExperiments[i]; got != want { + t.Errorf("getJobOptions().Experiments[%d] = %q, want %q", i, got, want) + } + } + } + if got, want := len(opts.DataflowServiceOptions), 2; got != want { + t.Errorf("len(getJobOptions().DataflowServiceOptions) = %q, want %q", got, want) + } else { + sort.Strings(opts.DataflowServiceOptions) + expectedOptions := []string{"opt1", "opt2"} + for i := 0; i < 2; i++ { + if got, want := opts.DataflowServiceOptions[i], expectedOptions[i]; got != want { + t.Errorf("getJobOptions().DataflowServiceOptions = %q, want %q", got, want) + } + } + } + if got, want := opts.Project, "testProject"; got != want { + t.Errorf("getJobOptions().Project = %q, want %q", got, want) + } + if got, want := opts.Region, "testRegion"; got != want { + t.Errorf("getJobOptions().Region = %q, want %q", got, want) + } + if got, want := len(opts.Labels), 2; got != want { + t.Errorf("len(getJobOptions().Labels) = %q, want %q", got, want) + } else { + if got, want := opts.Labels["label1"], "val1"; got != want { + t.Errorf("getJobOptions().Labels[\"label1\"] = %q, want %q", got, want) + } + if got, want := opts.Labels["label2"], "val2"; got != want { + t.Errorf("getJobOptions().Labels[\"label2\"] = %q, want %q", got, want) + } + } + if got, want := opts.TempLocation, "gs://testStagingLocation/tmp"; got != want { + t.Errorf("getJobOptions().TempLocation = %q, want %q", got, want) + } + if got, want := opts.FlexRSGoal, "FLEXRS_SPEED_OPTIMIZED"; got != want { + t.Errorf("getJobOptions().FlexRSGoal = %q, want %q", got, want) + } +} + +func TestGetJobOptions_Streaming(t *testing.T) { + resetGlobals() + *labels = `{"label1": "val1", "label2": "val2"}` + *stagingLocation = "gs://testStagingLocation" + *minCPUPlatform = "testPlatform" + *flexRSGoal = "FLEXRS_SPEED_OPTIMIZED" + *dataflowServiceOptions = "opt1,opt2" + + *gcpopts.Project = "testProject" + *gcpopts.Region = "testRegion" + + *jobopts.Experiments = "use_runner_v2,use_portable_job_submission" + *jobopts.JobName = "testJob" + + opts, err := getJobOptions(context.Background(), true) + if err != nil { + t.Fatalf("getJobOptions() returned error %q, want %q", err, "nil") + } + if got, want := opts.Streaming, true; got != want { + t.Errorf("getJobOptions().Streaming = %t, want %t", got, want) + } if got, want := opts.Name, "testJob"; got != want { t.Errorf("getJobOptions().Name = %q, want %q", got, want) } - if got, want := len(opts.Experiments), 3; got != want { + if got, want := len(opts.Experiments), 7; got != want { t.Errorf("len(getJobOptions().Experiments) = %q, want %q", got, want) } else { sort.Strings(opts.Experiments) - expectedExperiments := []string{"min_cpu_platform=testPlatform", "use_portable_job_submission", "use_runner_v2"} + expectedExperiments := []string{"beam_fn_api", "enable_streaming_engine", "enable_windmill_service", "min_cpu_platform=testPlatform", "use_portable_job_submission", "use_runner_v2", "use_unified_worker"} for i := 0; i < 3; i++ { if got, want := opts.Experiments[i], expectedExperiments[i]; got != want { - t.Errorf("getJobOptions().Experiments = %q, want %q", got, want) + t.Errorf("getJobOptions().Experiments[%d] = %q, want %q", i, got, want) } } } @@ -109,30 +182,71 @@ func TestGetJobOptions_NoExperimentsSet(t *testing.T) { *gcpopts.Region = "testRegion" *jobopts.Experiments = "" - opts, err := getJobOptions(context.Background()) + opts, err := getJobOptions(context.Background(), false) + + if err != nil { + t.Fatalf("getJobOptions() returned error %q, want %q", err, "nil") + } + if got, want := len(opts.Experiments), 4; got != want { + t.Fatalf("len(getJobOptions().Experiments) = %q, want %q", got, want) + } + sort.Strings(opts.Experiments) + expectedExperiments := []string{"beam_fn_api", "use_portable_job_submission", "use_unified_worker", "use_runner_v2"} + for i := 0; i < 2; i++ { + if got, want := opts.Experiments[i], expectedExperiments[i]; got != want { + t.Errorf("getJobOptions().Experiments[%d] = %q, want %q", i, got, want) + } + } +} + +func TestGetJobOptions_NoExperimentsSetStreaming(t *testing.T) { + resetGlobals() + *stagingLocation = "gs://testStagingLocation" + *gcpopts.Project = "testProject" + *gcpopts.Region = "testRegion" + *jobopts.Experiments = "" + + opts, err := getJobOptions(context.Background(), true) if err != nil { t.Fatalf("getJobOptions() returned error %q, want %q", err, "nil") } - if got, want := len(opts.Experiments), 2; got != want { + if got, want := len(opts.Experiments), 6; got != want { t.Fatalf("len(getJobOptions().Experiments) = %q, want %q", got, want) } sort.Strings(opts.Experiments) - expectedExperiments := []string{"use_portable_job_submission", "use_unified_worker"} + expectedExperiments := []string{"beam_fn_api", "enable_streaming_engine", "enable_windmill_service", "use_portable_job_submission", "use_unified_worker", "use_runner_v2"} for i := 0; i < 2; i++ { if got, want := opts.Experiments[i], expectedExperiments[i]; got != want { - t.Errorf("getJobOptions().Experiments = %q, want %q", got, want) + t.Errorf("getJobOptions().Experiments[%d] = %q, want %q", i, got, want) } } } +func TestGetJobOptions_DisableRunnerV2ExperimentsSet(t *testing.T) { + resetGlobals() + *stagingLocation = "gs://testStagingLocation" + *gcpopts.Project = "testProject" + *gcpopts.Region = "testRegion" + *jobopts.Experiments = "disable_runner_v2" + + opts, err := getJobOptions(context.Background(), false) + + if err == nil { + t.Error("getJobOptions() returned error nil, want an error") + } + if opts != nil { + t.Errorf("getJobOptions() returned JobOptions when it should not have, got %#v, want nil", opts) + } +} + func TestGetJobOptions_NoStagingLocation(t *testing.T) { resetGlobals() *stagingLocation = "" *gcpopts.Project = "testProject" *gcpopts.Region = "testRegion" - _, err := getJobOptions(context.Background()) + _, err := getJobOptions(context.Background(), false) if err == nil { t.Fatalf("getJobOptions() returned error nil, want an error") } @@ -145,7 +259,7 @@ func TestGetJobOptions_InvalidAutoscaling(t *testing.T) { *gcpopts.Project = "testProject" *gcpopts.Region = "testRegion" - _, err := getJobOptions(context.Background()) + _, err := getJobOptions(context.Background(), false) if err == nil { t.Fatalf("getJobOptions() returned error nil, want an error") } @@ -158,7 +272,7 @@ func TestGetJobOptions_InvalidRsGoal(t *testing.T) { *gcpopts.Project = "testProject" *gcpopts.Region = "testRegion" - _, err := getJobOptions(context.Background()) + _, err := getJobOptions(context.Background(), false) if err == nil { t.Fatalf("getJobOptions() returned error nil, want an error") } @@ -204,7 +318,7 @@ func TestGetJobOptions_TransformMapping(t *testing.T) { *update = true *transformMapping = `{"transformOne": "transformTwo"}` - opts, err := getJobOptions(context.Background()) + opts, err := getJobOptions(context.Background(), true) if err != nil { t.Errorf("getJobOptions() returned error, got %v", err) } @@ -217,6 +331,23 @@ func TestGetJobOptions_TransformMapping(t *testing.T) { } +func TestGetJobOptions_TransformMappingNotStreaming(t *testing.T) { + resetGlobals() + *stagingLocation = "gs://testStagingLocation" + *gcpopts.Project = "testProject" + *gcpopts.Region = "testRegion" + *update = true + *transformMapping = `{"transformOne": "transformTwo"}` + + opts, err := getJobOptions(context.Background(), false) + if err == nil { + t.Error("getJobOptions() returned error nil, want an error") + } + if opts != nil { + t.Errorf("getJobOptions() returned JobOptions when it should not have, got %#v, want nil", opts) + } +} + func TestGetJobOptions_TransformMappingNoUpdate(t *testing.T) { resetGlobals() *stagingLocation = "gs://testStagingLocation" @@ -224,7 +355,7 @@ func TestGetJobOptions_TransformMappingNoUpdate(t *testing.T) { *gcpopts.Region = "testRegion" *transformMapping = `{"transformOne": "transformTwo"}` - opts, err := getJobOptions(context.Background()) + opts, err := getJobOptions(context.Background(), true) if err == nil { t.Error("getJobOptions() returned error nil, want an error") } @@ -241,7 +372,7 @@ func TestGetJobOptions_InvalidMapping(t *testing.T) { *update = true *transformMapping = "not a JSON-encoded string" - opts, err := getJobOptions(context.Background()) + opts, err := getJobOptions(context.Background(), true) if err == nil { t.Error("getJobOptions() returned error nil, want an error") } diff --git a/sdks/go/pkg/beam/runners/dataflow/dataflowlib/execute.go b/sdks/go/pkg/beam/runners/dataflow/dataflowlib/execute.go index 1cff2359d35a5..67ed337bed575 100644 --- a/sdks/go/pkg/beam/runners/dataflow/dataflowlib/execute.go +++ b/sdks/go/pkg/beam/runners/dataflow/dataflowlib/execute.go @@ -35,7 +35,7 @@ import ( ) // Execute submits a pipeline as a Dataflow job. -func Execute(ctx context.Context, raw *pipepb.Pipeline, opts *JobOptions, workerURL, jarURL, modelURL, endpoint string, async bool) (*dataflowPipelineResult, error) { +func Execute(ctx context.Context, raw *pipepb.Pipeline, opts *JobOptions, workerURL, modelURL, endpoint string, async bool) (*dataflowPipelineResult, error) { // (1) Upload Go binary to GCS. presult := &dataflowPipelineResult{} @@ -75,15 +75,6 @@ func Execute(ctx context.Context, raw *pipepb.Pipeline, opts *JobOptions, worker return presult, err } - if opts.WorkerJar != "" { - log.Infof(ctx, "Staging Dataflow worker jar: %v", opts.WorkerJar) - - if _, err := stageFile(ctx, opts.Project, jarURL, opts.WorkerJar); err != nil { - return presult, err - } - log.Infof(ctx, "Staged worker jar: %v", jarURL) - } - // (2) Upload model to GCS log.Info(ctx, proto.MarshalTextString(raw)) @@ -94,7 +85,7 @@ func Execute(ctx context.Context, raw *pipepb.Pipeline, opts *JobOptions, worker // (3) Translate to v1b3 and submit - job, err := Translate(ctx, raw, opts, workerURL, jarURL, modelURL) + job, err := Translate(ctx, raw, opts, workerURL, modelURL) if err != nil { return presult, err } diff --git a/sdks/go/pkg/beam/runners/dataflow/dataflowlib/job.go b/sdks/go/pkg/beam/runners/dataflow/dataflowlib/job.go index c8a00b58621fc..5499347a26751 100644 --- a/sdks/go/pkg/beam/runners/dataflow/dataflowlib/job.go +++ b/sdks/go/pkg/beam/runners/dataflow/dataflowlib/job.go @@ -46,6 +46,7 @@ type JobOptions struct { // Pipeline options Options runtime.RawOptions + Streaming bool Project string Region string Zone string @@ -80,8 +81,6 @@ type JobOptions struct { // Worker is the worker binary override. Worker string - // WorkerJar is a custom worker jar. - WorkerJar string // -- Internal use only. Not supported in public Dataflow. -- @@ -89,14 +88,13 @@ type JobOptions struct { } // Translate translates a pipeline to a Dataflow job. -func Translate(ctx context.Context, p *pipepb.Pipeline, opts *JobOptions, workerURL, jarURL, modelURL string) (*df.Job, error) { +func Translate(ctx context.Context, p *pipepb.Pipeline, opts *JobOptions, workerURL, modelURL string) (*df.Job, error) { // (1) Translate pipeline to v1b3 speak. jobType := "JOB_TYPE_BATCH" apiJobType := "FNAPI_BATCH" - streaming := !pipelinex.Bounded(p) - if streaming { + if opts.Streaming { jobType = "JOB_TYPE_STREAMING" apiJobType = "FNAPI_STREAMING" } @@ -114,16 +112,6 @@ func Translate(ctx context.Context, p *pipepb.Pipeline, opts *JobOptions, worker Name: "worker", Location: workerURL, }} - experiments := append(opts.Experiments, "beam_fn_api") - - if opts.WorkerJar != "" { - jar := &df.Package{ - Name: "dataflow-worker.jar", - Location: jarURL, - } - packages = append(packages, jar) - experiments = append(experiments, "use_staged_dataflow_worker_jar") - } for _, url := range opts.ArtifactURLs { name := url[strings.LastIndexAny(url, "/")+1:] @@ -166,7 +154,7 @@ func Translate(ctx context.Context, p *pipepb.Pipeline, opts *JobOptions, worker Options: dataflowOptions{ PipelineURL: modelURL, Region: opts.Region, - Experiments: experiments, + Experiments: opts.Experiments, TempLocation: opts.TempLocation, }, GoOptions: opts.Options, @@ -193,7 +181,7 @@ func Translate(ctx context.Context, p *pipepb.Pipeline, opts *JobOptions, worker WorkerRegion: opts.WorkerRegion, WorkerZone: opts.WorkerZone, TempStoragePrefix: opts.TempLocation, - Experiments: experiments, + Experiments: opts.Experiments, }, Labels: opts.Labels, TransformNameMapping: opts.TransformNameMapping, @@ -217,10 +205,6 @@ func Translate(ctx context.Context, p *pipepb.Pipeline, opts *JobOptions, worker if opts.TeardownPolicy != "" { workerPool.TeardownPolicy = opts.TeardownPolicy } - if streaming { - // Add separate data disk for streaming jobs - workerPool.DataDisks = []*df.Disk{{}} - } return job, nil } diff --git a/sdks/go/pkg/beam/runners/direct/buffer.go b/sdks/go/pkg/beam/runners/direct/buffer.go index e831930a6185a..383db3db306ba 100644 --- a/sdks/go/pkg/beam/runners/direct/buffer.go +++ b/sdks/go/pkg/beam/runners/direct/buffer.go @@ -72,7 +72,22 @@ func (n *buffer) NewIterable(ctx context.Context, reader exec.StateReader, w typ } func (n *buffer) NewKeyedIterable(ctx context.Context, reader exec.StateReader, w typex.Window, iterKey any) (exec.ReStream, error) { - return n.NewIterable(ctx, reader, w) + if !n.done { + panic(fmt.Sprintf("buffer[%v] incomplete: %v", n.uid, len(n.buf))) + } + s := &exec.FixedReStream{Buf: make([]exec.FullValue, 0)} + for _, v := range n.buf { + if v.Elm == iterKey { + s.Buf = append(s.Buf, exec.FullValue{ + Elm: v.Elm2, + Timestamp: v.Timestamp, + Windows: v.Windows, + Pane: v.Pane, + Continuation: v.Continuation, + }) + } + } + return s, nil } func (n *buffer) String() string { diff --git a/sdks/go/pkg/beam/runners/direct/direct_test.go b/sdks/go/pkg/beam/runners/direct/direct_test.go index ac1eeecb64b36..a8108580aa2e9 100644 --- a/sdks/go/pkg/beam/runners/direct/direct_test.go +++ b/sdks/go/pkg/beam/runners/direct/direct_test.go @@ -29,6 +29,7 @@ import ( "github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/mtime" "github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/window" "github.com/apache/beam/sdks/v2/go/pkg/beam/core/metrics" + "github.com/apache/beam/sdks/v2/go/pkg/beam/transforms/filter" "github.com/google/go-cmp/cmp" ) @@ -44,6 +45,7 @@ func init() { beam.RegisterFunction(dofn2x1) beam.RegisterFunction(dofn3x1) beam.RegisterFunction(dofn2x2KV) + beam.RegisterFunction(dofnMultiMap) beam.RegisterFunction(dofn2) beam.RegisterFunction(dofnKV) beam.RegisterFunction(dofnKV2) @@ -121,6 +123,16 @@ func dofn2x2KV(imp []byte, iter func(*string, *int64) bool, emitK func(string), emitV(sum) } +func dofnMultiMap(key string, lookup func(string) func(*int64) bool, emitK func(string), emitV func(int64)) { + var v, sum int64 + iter := lookup(key) + for iter(&v) { + sum += v + } + emitK(key) + emitV(sum) +} + // int64Check validates that within a single bundle, // we received the expected int64 values. type int64Check struct { @@ -446,6 +458,24 @@ func TestRunner_Pipelines(t *testing.T) { t.Fatal(err) } }) + t.Run("sideinput_multimap", func(t *testing.T) { + p, s := beam.NewPipelineWithRoot() + imp := beam.Impulse(s) + col1 := beam.ParDo(s, dofnKV, imp) + keys := filter.Distinct(s, beam.DropValue(s, col1)) + ks, sum := beam.ParDo2(s, dofnMultiMap, keys, beam.SideInput{Input: col1}) + beam.ParDo(s, &stringCheck{ + Name: "iterKV sideinput check K", + Want: []string{"a", "b"}, + }, ks) + beam.ParDo(s, &int64Check{ + Name: "iterKV sideinput check V", + Want: []int{9, 12}, + }, sum) + if _, err := executeWithT(context.Background(), t, p); err != nil { + t.Fatal(err) + } + }) // Validates the waiting on side input readiness in buffer. t.Run("sideinput_2iterable", func(t *testing.T) { p, s := beam.NewPipelineWithRoot() diff --git a/sdks/go/pkg/beam/runners/universal/extworker/extworker.go b/sdks/go/pkg/beam/runners/universal/extworker/extworker.go index dc75c7c8ca5b5..ffc8f8e47c09f 100644 --- a/sdks/go/pkg/beam/runners/universal/extworker/extworker.go +++ b/sdks/go/pkg/beam/runners/universal/extworker/extworker.go @@ -65,6 +65,12 @@ func (s *Loopback) StartWorker(ctx context.Context, req *fnpb.StartWorkerRequest log.Infof(ctx, "starting worker %v", req.GetWorkerId()) s.mu.Lock() defer s.mu.Unlock() + if s.workers == nil { + return &fnpb.StartWorkerResponse{ + Error: "worker pool shutting down", + }, nil + } + if _, ok := s.workers[req.GetWorkerId()]; ok { return &fnpb.StartWorkerResponse{ Error: fmt.Sprintf("worker with ID %q already exists", req.GetWorkerId()), @@ -92,6 +98,10 @@ func (s *Loopback) StopWorker(ctx context.Context, req *fnpb.StopWorkerRequest) log.Infof(ctx, "stopping worker %v", req.GetWorkerId()) s.mu.Lock() defer s.mu.Unlock() + if s.workers == nil { + // Worker pool is already shutting down, so no action is needed. + return &fnpb.StopWorkerResponse{}, nil + } if cancelfn, ok := s.workers[req.GetWorkerId()]; ok { cancelfn() delete(s.workers, req.GetWorkerId()) @@ -106,12 +116,15 @@ func (s *Loopback) StopWorker(ctx context.Context, req *fnpb.StopWorkerRequest) // Stop terminates the service and stops all workers. func (s *Loopback) Stop(ctx context.Context) error { s.mu.Lock() - defer s.mu.Unlock() log.Infof(ctx, "stopping Loopback, and %d workers", len(s.workers)) - s.workers = map[string]context.CancelFunc{} - s.lis.Close() + s.workers = nil s.rootCancel() + + // There can be a deadlock between the StopWorker RPC and GracefulStop + // which waits for all RPCs to finish, so it must be outside the critical section. + s.mu.Unlock() + s.grpcServer.GracefulStop() return nil } diff --git a/sdks/go/pkg/beam/transforms/sql/sql_test.go b/sdks/go/pkg/beam/transforms/sql/sql_test.go new file mode 100644 index 0000000000000..58d801f45f0b8 --- /dev/null +++ b/sdks/go/pkg/beam/transforms/sql/sql_test.go @@ -0,0 +1,96 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sql + +import ( + "github.com/apache/beam/sdks/v2/go/pkg/beam" + "github.com/apache/beam/sdks/v2/go/pkg/beam/transforms/sql/sqlx" + "reflect" + "testing" +) + +func TestOptions_Add(t *testing.T) { + test := struct { + opt sqlx.Option + }{ + opt: sqlx.Option{ + Urn: "this is a string", + Payload: []byte{1, 2, 3, 4}, + }, + } + + o := options{} + o.Add(test.opt) + if o.customs == nil || !reflect.DeepEqual(o.customs[len(o.customs)-1], test.opt) { + t.Errorf("options.Add(%v) failed. For the customs field in options, got %v, want %v", test.opt, o.customs, test.opt) + } +} + +func TestInput(t *testing.T) { + test := struct { + inputName string + inputIn beam.PCollection + }{ + inputName: "this is a string", + inputIn: beam.PCollection{}, + } + + o := &options{inputs: make(map[string]beam.PCollection)} + option := Input(test.inputName, test.inputIn) + if option == nil { + t.Errorf("Input(%v, %v) = %v, want not nil", test.inputName, test.inputIn, option) + } + option(o) + if o.inputs == nil || !reflect.DeepEqual(o.inputs[test.inputName], test.inputIn) { + t.Errorf("The function that Input(%v, %v) returned did not work correctly. For the inputs field in options, got %v, want %v", test.inputName, test.inputIn, o.inputs, test.inputIn) + } +} + +func TestDialect(t *testing.T) { + test := struct { + dialect string + }{ + dialect: "this is a string", + } + + o := &options{} + option := Dialect(test.dialect) + if option == nil { + t.Errorf("Dialect(%v) = %v, want not nil", test.dialect, option) + } + option(o) + if !reflect.DeepEqual(o.dialect, test.dialect) { + t.Errorf("The function that Input(%v) returned did not work correctly. For the dialect field in options, got %v, want %v", test.dialect, o.dialect, test.dialect) + } +} + +func TestExpansionAddr(t *testing.T) { + test := struct { + addr string + }{ + addr: "this is a string", + } + + o := &options{} + option := ExpansionAddr(test.addr) + if option == nil { + t.Errorf("ExpansionAddr(%v) = %v, want not nil", test.addr, option) + } + option(o) + if !reflect.DeepEqual(o.expansionAddr, test.addr) { + t.Errorf("The function that ExpansionAddr(%v) returned did not work correctly. For the expansionAddr field in options, got %v, want %v", test.addr, o.expansionAddr, test.addr) + } +} diff --git a/sdks/go/pkg/beam/util/structx/struct.go b/sdks/go/pkg/beam/util/structx/struct.go index 2659191d38d4e..aec8a63652d9c 100644 --- a/sdks/go/pkg/beam/util/structx/struct.go +++ b/sdks/go/pkg/beam/util/structx/struct.go @@ -56,3 +56,22 @@ func InferFieldNames(t reflect.Type, key string) []string { return names } + +// FieldIndexByTag returns the index of the field with the given tag key and value. Returns -1 if +// the field is not found. Panics if the type's kind is not a struct. +func FieldIndexByTag(t reflect.Type, key string, value string) int { + if t.Kind() != reflect.Struct { + panic(fmt.Sprintf("structx: FieldIndexByTag of non-struct type %s", t)) + } + + for i := 0; i < t.NumField(); i++ { + values := t.Field(i).Tag.Get(key) + name := strings.Split(values, ",")[0] + + if name == value { + return i + } + } + + return -1 +} diff --git a/sdks/go/pkg/beam/util/structx/struct_test.go b/sdks/go/pkg/beam/util/structx/struct_test.go index 6aac5869604ef..ab6c7278f628f 100644 --- a/sdks/go/pkg/beam/util/structx/struct_test.go +++ b/sdks/go/pkg/beam/util/structx/struct_test.go @@ -142,3 +142,63 @@ func TestInferFieldNamesPanic(t *testing.T) { InferFieldNames(reflect.TypeOf(""), "key") }) } + +func TestFieldIndexByTag(t *testing.T) { + tests := []struct { + name string + t reflect.Type + key string + value string + want int + }{ + { + name: "Return index of field with matching tag key and value", + t: reflect.TypeOf(struct { + Field1 string `key:"field1"` + Field2 string `key:"field2"` + }{}), + key: "key", + value: "field2", + want: 1, + }, + { + name: "Return -1 for non-existent tag key", + t: reflect.TypeOf(struct { + Field1 string `key:"field1"` + Field2 string `key:"field2"` + }{}), + key: "other", + value: "field1", + want: -1, + }, + { + name: "Return -1 for non-existent tag value", + t: reflect.TypeOf(struct { + Field1 string `key:"field1"` + Field2 string `key:"field2"` + }{}), + key: "key", + value: "field3", + want: -1, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := FieldIndexByTag(tt.t, tt.key, tt.value); got != tt.want { + t.Errorf("FieldIndexByTag() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestFieldIndexByTagPanic(t *testing.T) { + t.Run("Panic for non-struct type", func(t *testing.T) { + defer func() { + if r := recover(); r == nil { + t.Errorf("FieldIndexByTag() does not panic") + } + }() + + FieldIndexByTag(reflect.TypeOf(""), "key", "field1") + }) +} diff --git a/sdks/go/test/build.gradle b/sdks/go/test/build.gradle index 5d34f9c72c8a6..d1048b49c0bde 100644 --- a/sdks/go/test/build.gradle +++ b/sdks/go/test/build.gradle @@ -28,7 +28,6 @@ task dataflowValidatesRunner() { group = "Verification" dependsOn ":sdks:go:test:goBuild" - dependsOn ":runners:google-cloud-dataflow-java:worker:shadowJar" dependsOn ":sdks:java:testing:expansion-service:buildTestExpansionServiceJar" doLast { @@ -37,7 +36,6 @@ task dataflowValidatesRunner() { ] def options = [ "--runner dataflow", - "--dataflow_worker_jar ${project(":runners:google-cloud-dataflow-java:worker").shadowJar.archivePath}", "--pipeline_opts \"${pipelineOptions.join(' ')}\"", ] exec { diff --git a/sdks/go/test/integration/integration.go b/sdks/go/test/integration/integration.go index 2253df597bb48..c13d8b1669233 100644 --- a/sdks/go/test/integration/integration.go +++ b/sdks/go/test/integration/integration.go @@ -223,6 +223,7 @@ var dataflowFilters = []string{ "TestJDBCIO_BasicReadWrite", "TestJDBCIO_PostgresReadWrite", "TestDebeziumIO_BasicRead", + "TestMongoDBIO.*", // TODO(BEAM-11576): TestFlattenDup failing on this runner. "TestFlattenDup", // The Dataflow runner does not support the TestStream primitive diff --git a/sdks/go/test/integration/internal/containers/containers.go b/sdks/go/test/integration/internal/containers/containers.go new file mode 100644 index 0000000000000..d897c59fc52c8 --- /dev/null +++ b/sdks/go/test/integration/internal/containers/containers.go @@ -0,0 +1,121 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package containers contains utilities for running test containers in integration tests. +package containers + +import ( + "context" + "testing" + "time" + + "github.com/docker/go-connections/nat" + "github.com/testcontainers/testcontainers-go" + "github.com/testcontainers/testcontainers-go/wait" + "gopkg.in/retry.v1" +) + +type ContainerOptionFn func(*testcontainers.ContainerRequest) + +func WithEnv(env map[string]string) ContainerOptionFn { + return func(option *testcontainers.ContainerRequest) { + option.Env = env + } +} + +func WithHostname(hostname string) ContainerOptionFn { + return func(option *testcontainers.ContainerRequest) { + option.Hostname = hostname + } +} + +func WithPorts(ports []string) ContainerOptionFn { + return func(option *testcontainers.ContainerRequest) { + option.ExposedPorts = ports + } +} + +func WithWaitStrategy(waitStrategy wait.Strategy) ContainerOptionFn { + return func(option *testcontainers.ContainerRequest) { + option.WaitingFor = waitStrategy + } +} + +func NewContainer( + ctx context.Context, + t *testing.T, + image string, + maxRetries int, + opts ...ContainerOptionFn, +) testcontainers.Container { + t.Helper() + + request := testcontainers.ContainerRequest{Image: image} + + for _, opt := range opts { + opt(&request) + } + + genericRequest := testcontainers.GenericContainerRequest{ + ContainerRequest: request, + Started: true, + } + + strategy := retry.LimitCount( + maxRetries, + retry.Exponential{ + Initial: time.Second, + Factor: 2, + }, + ) + + var container testcontainers.Container + var err error + + for attempt := retry.Start(strategy, nil); attempt.Next(); { + container, err = testcontainers.GenericContainer(ctx, genericRequest) + if err == nil { + break + } + + if attempt.Count() == maxRetries { + t.Fatalf("failed to start container with %v retries: %v", maxRetries, err) + } + } + + t.Cleanup(func() { + if err := container.Terminate(ctx); err != nil { + t.Fatalf("error terminating container: %v", err) + } + }) + + return container +} + +func Port( + ctx context.Context, + t *testing.T, + container testcontainers.Container, + port nat.Port, +) string { + t.Helper() + + mappedPort, err := container.MappedPort(ctx, port) + if err != nil { + t.Fatalf("error getting mapped port: %v", err) + } + + return mappedPort.Port() +} diff --git a/sdks/go/test/integration/io/mongodbio/helper_test.go b/sdks/go/test/integration/io/mongodbio/helper_test.go new file mode 100644 index 0000000000000..0551be62225a7 --- /dev/null +++ b/sdks/go/test/integration/io/mongodbio/helper_test.go @@ -0,0 +1,123 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mongodbio + +import ( + "context" + "testing" + + "github.com/apache/beam/sdks/v2/go/test/integration/internal/containers" + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/bson/primitive" + "go.mongodb.org/mongo-driver/mongo" + "go.mongodb.org/mongo-driver/mongo/options" + "go.mongodb.org/mongo-driver/mongo/readpref" +) + +const ( + mongoImage = "mongo:6.0.3" + mongoPort = "27017/tcp" + maxRetries = 5 +) + +func setUpTestContainer(ctx context.Context, t *testing.T) string { + t.Helper() + + container := containers.NewContainer( + ctx, + t, + mongoImage, + maxRetries, + containers.WithPorts([]string{mongoPort}), + ) + + return containers.Port(ctx, t, container, mongoPort) +} + +func objectIDFromHex(t *testing.T, hex string) primitive.ObjectID { + t.Helper() + + id, err := primitive.ObjectIDFromHex(hex) + if err != nil { + t.Fatalf("error parsing hex string to primitive.ObjectID: %v", err) + } + + return id +} + +func newClient(ctx context.Context, t *testing.T, uri string) *mongo.Client { + t.Helper() + + opts := options.Client().ApplyURI(uri) + + client, err := mongo.Connect(ctx, opts) + if err != nil { + t.Fatalf("error connecting to MongoDB: %v", err) + } + + t.Cleanup(func() { + if err := client.Disconnect(ctx); err != nil { + t.Fatalf("error disconnecting from MongoDB: %v", err) + } + }) + + if err := client.Ping(ctx, readpref.Primary()); err != nil { + t.Fatalf("error pinging MongoDB: %v", err) + } + + return client +} + +func dropCollection(ctx context.Context, t *testing.T, collection *mongo.Collection) { + t.Helper() + + if err := collection.Drop(ctx); err != nil { + t.Fatalf("error dropping collection: %v", err) + } +} + +func readDocuments( + ctx context.Context, + t *testing.T, + collection *mongo.Collection, +) []bson.M { + t.Helper() + + cursor, err := collection.Find(ctx, bson.M{}) + if err != nil { + t.Fatalf("error finding documents: %v", err) + } + + var documents []bson.M + if err = cursor.All(ctx, &documents); err != nil { + t.Fatalf("error decoding documents: %v", err) + } + + return documents +} + +func writeDocuments( + ctx context.Context, + t *testing.T, + collection *mongo.Collection, + documents []any, +) { + t.Helper() + + if _, err := collection.InsertMany(ctx, documents); err != nil { + t.Fatalf("error inserting documents: %v", err) + } +} diff --git a/sdks/go/test/integration/io/mongodbio/mongodbio_test.go b/sdks/go/test/integration/io/mongodbio/mongodbio_test.go new file mode 100644 index 0000000000000..b8885e7c728dc --- /dev/null +++ b/sdks/go/test/integration/io/mongodbio/mongodbio_test.go @@ -0,0 +1,237 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package mongodbio + +import ( + "context" + "flag" + "fmt" + "reflect" + "testing" + + "github.com/apache/beam/sdks/v2/go/pkg/beam" + "github.com/apache/beam/sdks/v2/go/pkg/beam/io/mongodbio" + _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/dataflow" + _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/flink" + _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/samza" + _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/spark" + "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/passert" + "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/ptest" + "github.com/apache/beam/sdks/v2/go/test/integration" + "github.com/google/go-cmp/cmp" + "go.mongodb.org/mongo-driver/bson" + "go.mongodb.org/mongo-driver/bson/primitive" +) + +func init() { + beam.RegisterType(reflect.TypeOf((*docWithObjectID)(nil)).Elem()) + beam.RegisterType(reflect.TypeOf((*docWithStringID)(nil)).Elem()) +} + +type docWithObjectID struct { + ID primitive.ObjectID `bson:"_id"` + Field1 int32 `bson:"field1"` +} + +type docWithStringID struct { + ID string `bson:"_id"` + Field1 int32 `bson:"field1"` +} + +func TestMongoDBIO_Read(t *testing.T) { + integration.CheckFilters(t) + + ctx := context.Background() + port := setUpTestContainer(ctx, t) + uri := fmt.Sprintf("mongodb://%s:%s", "localhost", port) + + tests := []struct { + name string + input []any + t reflect.Type + options []mongodbio.ReadOptionFn + want []any + }{ + { + name: "Read documents from MongoDB with id of type primitive.ObjectID", + input: []any{ + bson.M{"_id": objectIDFromHex(t, "61cf9980dd2d24dc5cf28620"), "field1": int32(0)}, + bson.M{"_id": objectIDFromHex(t, "61cf9980dd2d24dc5cf28621"), "field1": int32(1)}, + bson.M{"_id": objectIDFromHex(t, "61cf9980dd2d24dc5cf28622"), "field1": int32(2)}, + }, + t: reflect.TypeOf(docWithObjectID{}), + want: []any{ + docWithObjectID{ID: objectIDFromHex(t, "61cf9980dd2d24dc5cf28620"), Field1: 0}, + docWithObjectID{ID: objectIDFromHex(t, "61cf9980dd2d24dc5cf28621"), Field1: 1}, + docWithObjectID{ID: objectIDFromHex(t, "61cf9980dd2d24dc5cf28622"), Field1: 2}, + }, + }, + { + name: "Read documents from MongoDB with id of type string", + input: []any{ + bson.M{"_id": "id01", "field1": int32(0)}, + bson.M{"_id": "id02", "field1": int32(1)}, + bson.M{"_id": "id03", "field1": int32(2)}, + }, + t: reflect.TypeOf(docWithStringID{}), + want: []any{ + docWithStringID{ID: "id01", Field1: 0}, + docWithStringID{ID: "id02", Field1: 1}, + docWithStringID{ID: "id03", Field1: 2}, + }, + }, + { + name: "Read documents from MongoDB where filter matches", + input: []any{ + bson.M{"_id": objectIDFromHex(t, "61cf9980dd2d24dc5cf28620"), "field1": int32(0)}, + bson.M{"_id": objectIDFromHex(t, "61cf9980dd2d24dc5cf28621"), "field1": int32(1)}, + bson.M{"_id": objectIDFromHex(t, "61cf9980dd2d24dc5cf28622"), "field1": int32(2)}, + }, + t: reflect.TypeOf(docWithObjectID{}), + options: []mongodbio.ReadOptionFn{ + mongodbio.WithReadFilter(bson.M{"field1": bson.M{"$gt": 0}}), + }, + want: []any{ + docWithObjectID{ID: objectIDFromHex(t, "61cf9980dd2d24dc5cf28621"), Field1: 1}, + docWithObjectID{ID: objectIDFromHex(t, "61cf9980dd2d24dc5cf28622"), Field1: 2}, + }, + }, + { + name: "Read documents from MongoDB with bucketAuto aggregation", + input: []any{ + bson.M{"_id": objectIDFromHex(t, "61cf9980dd2d24dc5cf28620"), "field1": int32(0)}, + bson.M{"_id": objectIDFromHex(t, "61cf9980dd2d24dc5cf28621"), "field1": int32(1)}, + bson.M{"_id": objectIDFromHex(t, "61cf9980dd2d24dc5cf28622"), "field1": int32(2)}, + }, + t: reflect.TypeOf(docWithObjectID{}), + options: []mongodbio.ReadOptionFn{ + mongodbio.WithReadBucketAuto(true), + }, + want: []any{ + docWithObjectID{ID: objectIDFromHex(t, "61cf9980dd2d24dc5cf28620"), Field1: 0}, + docWithObjectID{ID: objectIDFromHex(t, "61cf9980dd2d24dc5cf28621"), Field1: 1}, + docWithObjectID{ID: objectIDFromHex(t, "61cf9980dd2d24dc5cf28622"), Field1: 2}, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + database := "db" + collection := "coll" + + client := newClient(ctx, t, uri) + mongoCollection := client.Database(database).Collection(collection) + + t.Cleanup(func() { + dropCollection(ctx, t, mongoCollection) + }) + + writeDocuments(ctx, t, mongoCollection, tt.input) + + p, s := beam.NewPipelineWithRoot() + + got := mongodbio.Read(s, uri, database, collection, tt.t, tt.options...) + + passert.Equals(s, got, tt.want...) + ptest.RunAndValidate(t, p) + }) + } +} + +func TestMongoDBIO_Write(t *testing.T) { + integration.CheckFilters(t) + + ctx := context.Background() + port := setUpTestContainer(ctx, t) + uri := fmt.Sprintf("mongodb://%s:%s", "localhost", port) + + tests := []struct { + name string + input []any + options []mongodbio.WriteOptionFn + wantIDs []any + wantDocs []bson.M + }{ + { + name: "Write documents to MongoDB with id of type primitive.ObjectID", + input: []any{ + docWithObjectID{ID: objectIDFromHex(t, "61cf9980dd2d24dc5cf28620"), Field1: 0}, + docWithObjectID{ID: objectIDFromHex(t, "61cf9980dd2d24dc5cf28621"), Field1: 1}, + docWithObjectID{ID: objectIDFromHex(t, "61cf9980dd2d24dc5cf28622"), Field1: 2}, + }, + wantIDs: []any{ + objectIDFromHex(t, "61cf9980dd2d24dc5cf28620"), + objectIDFromHex(t, "61cf9980dd2d24dc5cf28621"), + objectIDFromHex(t, "61cf9980dd2d24dc5cf28622"), + }, + wantDocs: []bson.M{ + {"_id": objectIDFromHex(t, "61cf9980dd2d24dc5cf28620"), "field1": int32(0)}, + {"_id": objectIDFromHex(t, "61cf9980dd2d24dc5cf28621"), "field1": int32(1)}, + {"_id": objectIDFromHex(t, "61cf9980dd2d24dc5cf28622"), "field1": int32(2)}, + }, + }, + { + name: "Write documents to MongoDB with id of type string", + input: []any{ + docWithStringID{ID: "id01", Field1: 0}, + docWithStringID{ID: "id02", Field1: 1}, + docWithStringID{ID: "id03", Field1: 2}, + }, + wantIDs: []any{ + "id01", + "id02", + "id03", + }, + wantDocs: []bson.M{ + {"_id": "id01", "field1": int32(0)}, + {"_id": "id02", "field1": int32(1)}, + {"_id": "id03", "field1": int32(2)}, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + database := "db" + collection := "coll" + + client := newClient(ctx, t, uri) + mongoCollection := client.Database(database).Collection(collection) + + t.Cleanup(func() { + dropCollection(ctx, t, mongoCollection) + }) + + p, s := beam.NewPipelineWithRoot() + + col := beam.CreateList(s, tt.input) + gotIDs := mongodbio.Write(s, uri, database, collection, col, tt.options...) + + passert.Equals(s, gotIDs, tt.wantIDs...) + ptest.RunAndValidate(t, p) + + if gotDocs := readDocuments(ctx, t, mongoCollection); !cmp.Equal(gotDocs, tt.wantDocs) { + t.Errorf("readDocuments() = %v, want %v", gotDocs, tt.wantDocs) + } + }) + } +} + +func TestMain(m *testing.M) { + flag.Parse() + beam.Init() + + ptest.MainRet(m) +} diff --git a/sdks/go/test/integration/io/xlang/debezium/debezium_test.go b/sdks/go/test/integration/io/xlang/debezium/debezium_test.go index d347e4436f2e8..24c2b513b2b2f 100644 --- a/sdks/go/test/integration/io/xlang/debezium/debezium_test.go +++ b/sdks/go/test/integration/io/xlang/debezium/debezium_test.go @@ -29,9 +29,14 @@ import ( _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/spark" "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/ptest" "github.com/apache/beam/sdks/v2/go/test/integration" - "github.com/docker/go-connections/nat" + "github.com/apache/beam/sdks/v2/go/test/integration/internal/containers" _ "github.com/lib/pq" - "github.com/testcontainers/testcontainers-go" +) + +const ( + debeziumImage = "debezium/example-postgres:latest" + debeziumPort = "5432/tcp" + maxRetries = 5 ) var expansionAddr string // Populate with expansion address labelled "debeziumio". @@ -42,35 +47,25 @@ func checkFlags(t *testing.T) { } } -func setupTestContainer(t *testing.T, dbname, username, password string) string { +func setupTestContainer(ctx context.Context, t *testing.T, dbname, username, password string) string { t.Helper() - var env = map[string]string{ + env := map[string]string{ "POSTGRES_PASSWORD": password, "POSTGRES_USER": username, "POSTGRES_DB": dbname, } - var port = "5432/tcp" - req := testcontainers.GenericContainerRequest{ - ContainerRequest: testcontainers.ContainerRequest{ - Image: "debezium/example-postgres:latest", - ExposedPorts: []string{port}, - Env: env, - }, - Started: true, - } - ctx := context.Background() - container, err := testcontainers.GenericContainer(ctx, req) - if err != nil { - t.Fatalf("failed to start container: %v", err) - } + container := containers.NewContainer( + ctx, + t, + debeziumImage, + maxRetries, + containers.WithEnv(env), + containers.WithPorts([]string{debeziumPort}), + ) - mappedPort, err := container.MappedPort(ctx, nat.Port(port)) - if err != nil { - t.Fatalf("failed to get container external port: %v", err) - } - return mappedPort.Port() + return containers.Port(ctx, t, container, debeziumPort) } // TestDebeziumIO_BasicRead tests basic read transform from Debezium. @@ -78,10 +73,11 @@ func TestDebeziumIO_BasicRead(t *testing.T) { integration.CheckFilters(t) checkFlags(t) + ctx := context.Background() dbname := "inventory" username := "debezium" password := "dbz" - port := setupTestContainer(t, dbname, username, password) + port := setupTestContainer(ctx, t, dbname, username, password) host := "localhost" connectionProperties := []string{ "database.dbname=inventory", diff --git a/sdks/go/test/integration/io/xlang/jdbc/jdbc_test.go b/sdks/go/test/integration/io/xlang/jdbc/jdbc_test.go index 27021308a781e..0eddc3e788d24 100644 --- a/sdks/go/test/integration/io/xlang/jdbc/jdbc_test.go +++ b/sdks/go/test/integration/io/xlang/jdbc/jdbc_test.go @@ -30,16 +30,20 @@ import ( _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/spark" "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/ptest" "github.com/apache/beam/sdks/v2/go/test/integration" + "github.com/apache/beam/sdks/v2/go/test/integration/internal/containers" "github.com/docker/go-connections/nat" _ "github.com/go-sql-driver/mysql" _ "github.com/lib/pq" - "github.com/testcontainers/testcontainers-go" "github.com/testcontainers/testcontainers-go/wait" - "gopkg.in/retry.v1" +) + +const ( + postgresImage = "postgres" + postgresPort = "5432/tcp" + maxRetries = 5 ) var expansionAddr string // Populate with expansion address labelled "schemaio". -const maxRetryCount = 5 func checkFlags(t *testing.T) { if expansionAddr == "" { @@ -47,55 +51,35 @@ func checkFlags(t *testing.T) { } } -func setupTestContainer(t *testing.T, ctx context.Context, dbname, username, password string) (testcontainers.Container, int) { +func setupTestContainer(ctx context.Context, t *testing.T, dbname, username, password string) string { t.Helper() - var env = map[string]string{ + env := map[string]string{ "POSTGRES_PASSWORD": password, "POSTGRES_USER": username, "POSTGRES_DB": dbname, } + hostname := "localhost" - var port = "5432/tcp" dbURL := func(host string, port nat.Port) string { return fmt.Sprintf("postgres://%s:%s@%s:%s/%s?sslmode=disable", username, password, host, port.Port(), dbname) } - - req := testcontainers.GenericContainerRequest{ - ContainerRequest: testcontainers.ContainerRequest{ - Image: "postgres", - ExposedPorts: []string{port}, - Env: env, - Hostname: "localhost", - WaitingFor: wait.ForSQL(nat.Port(port), "postgres", dbURL).Timeout(time.Second * 5), - }, - Started: true, - } - - strategy := retry.LimitCount(maxRetryCount, - retry.Exponential{ - Initial: time.Second, - Factor: 2, - }, + waitStrategy := wait.ForSQL(postgresPort, "postgres", dbURL).WithStartupTimeout(time.Second * 5) + + container := containers.NewContainer( + ctx, + t, + postgresImage, + maxRetries, + containers.WithPorts([]string{postgresPort}), + containers.WithEnv(env), + containers.WithHostname(hostname), + containers.WithWaitStrategy(waitStrategy), ) - var container testcontainers.Container - var err error - for r := retry.Start(strategy, nil); r.Next(); { - container, err = testcontainers.GenericContainer(ctx, req) - if err == nil { - break - } - if r.Count() == maxRetryCount { - t.Fatalf("failed to start container with %v retries: %v", maxRetryCount, err) - } - } - mappedPort, err := container.MappedPort(ctx, nat.Port(port)) - if err != nil { - t.Fatalf("failed to get container external port: %s", err) - } + mappedPort := containers.Port(ctx, t, container, postgresPort) - url := fmt.Sprintf("postgres://%s:%s@localhost:%s/%s?sslmode=disable", username, password, mappedPort.Port(), dbname) + url := fmt.Sprintf("postgres://%s:%s@localhost:%s/%s?sslmode=disable", username, password, mappedPort, dbname) db, err := sql.Open("postgres", url) if err != nil { t.Fatalf("failed to establish database connection: %s", err) @@ -106,7 +90,8 @@ func setupTestContainer(t *testing.T, ctx context.Context, dbname, username, pas if err != nil { t.Fatalf("can't create table, check command and access level") } - return container, mappedPort.Int() + + return mappedPort } // TestJDBCIO_BasicReadWrite tests basic read and write transform from JDBC. @@ -119,11 +104,10 @@ func TestJDBCIO_BasicReadWrite(t *testing.T) { username := "newuser" password := "password" - cont, port := setupTestContainer(t, ctx, dbname, username, password) - defer cont.Terminate(ctx) + port := setupTestContainer(ctx, t, dbname, username, password) tableName := "roles" host := "localhost" - jdbcUrl := fmt.Sprintf("jdbc:postgresql://%s:%d/%s", host, port, dbname) + jdbcUrl := fmt.Sprintf("jdbc:postgresql://%s:%s/%s", host, port, dbname) write := WritePipeline(expansionAddr, tableName, "org.postgresql.Driver", jdbcUrl, username, password) ptest.RunAndValidate(t, write) @@ -141,11 +125,10 @@ func TestJDBCIO_PostgresReadWrite(t *testing.T) { username := "newuser" password := "password" ctx := context.Background() - cont, port := setupTestContainer(t, ctx, dbname, username, password) - defer cont.Terminate(ctx) + port := setupTestContainer(ctx, t, dbname, username, password) tableName := "roles" host := "localhost" - jdbcUrl := fmt.Sprintf("jdbc:postgresql://%s:%d/%s", host, port, dbname) + jdbcUrl := fmt.Sprintf("jdbc:postgresql://%s:%s/%s", host, port, dbname) write := WriteToPostgres(expansionAddr, tableName, jdbcUrl, username, password) ptest.RunAndValidate(t, write) diff --git a/sdks/go/test/run_validatesrunner_tests.sh b/sdks/go/test/run_validatesrunner_tests.sh index 0d0531c823471..444dc1ae39a71 100755 --- a/sdks/go/test/run_validatesrunner_tests.sh +++ b/sdks/go/test/run_validatesrunner_tests.sh @@ -65,9 +65,6 @@ # example in the format "us.gcr.io/". # --region -> GCP region to run Dataflow jobs on. # --gcs_location -> GCS URL for storing temporary files for Dataflow jobs. -# --dataflow_worker_jar -> The Dataflow worker jar to use when running jobs. -# If not specified, the script attempts to retrieve a previously built -# jar from the appropriate gradle module, which may not succeed. set -e trap '! [[ "$BASH_COMMAND" =~ ^(echo|read|if|ARGS|shift|SOCKET_SCRIPT|\[\[) ]] && \ @@ -162,11 +159,6 @@ case $key in shift # past argument shift # past value ;; - --dataflow_worker_jar) - DATAFLOW_WORKER_JAR="$2" - shift # past argument - shift # past value - ;; --flink_job_server_jar) FLINK_JOB_SERVER_JAR="$2" shift # past argument @@ -266,12 +258,7 @@ s.close() " # Set up environment based on runner. -if [[ "$RUNNER" == "dataflow" ]]; then - if [[ -z "$DATAFLOW_WORKER_JAR" ]]; then - DATAFLOW_WORKER_JAR=$(find $(pwd)/runners/google-cloud-dataflow-java/worker/build/libs/beam-runners-google-cloud-dataflow-java-fn-api-worker-*.jar) - fi - echo "Using Dataflow worker jar: $DATAFLOW_WORKER_JAR" -elif [[ "$RUNNER" == "flink" || "$RUNNER" == "spark" || "$RUNNER" == "samza" || "$RUNNER" == "portable" ]]; then +if [[ "$RUNNER" == "flink" || "$RUNNER" == "spark" || "$RUNNER" == "samza" || "$RUNNER" == "portable" ]]; then if [[ -z "$ENDPOINT" ]]; then JOB_PORT=$(python3 -c "$SOCKET_SCRIPT") ENDPOINT="localhost:$JOB_PORT" @@ -417,7 +404,6 @@ ARGS="$ARGS --environment_type=DOCKER" ARGS="$ARGS --environment_config=$CONTAINER:$TAG" ARGS="$ARGS --staging_location=$GCS_LOCATION/staging-validatesrunner-test/$GCS_SUBFOLDER" ARGS="$ARGS --temp_location=$GCS_LOCATION/temp-validatesrunner-test/$GCS_SUBFOLDER" -ARGS="$ARGS --dataflow_worker_jar=$DATAFLOW_WORKER_JAR" ARGS="$ARGS --endpoint=$ENDPOINT" if [[ -n "$TEST_EXPANSION_ADDR" ]]; then ARGS="$ARGS --test_expansion_addr=$TEST_EXPANSION_ADDR" diff --git a/sdks/java/container/Dockerfile b/sdks/java/container/Dockerfile index c29b7f7910be9..b941aee453d40 100644 --- a/sdks/java/container/Dockerfile +++ b/sdks/java/container/Dockerfile @@ -23,6 +23,9 @@ ARG pull_licenses ADD target/slf4j-api.jar /opt/apache/beam/jars/ ADD target/slf4j-jdk14.jar /opt/apache/beam/jars/ +ADD target/jcl-over-slf4j.jar /opt/apache/beam/jars/ +ADD target/log4j-over-slf4j.jar /opt/apache/beam/jars/ +ADD target/log4j-to-slf4j.jar /opt/apache/beam/jars/ ADD target/beam-sdks-java-harness.jar /opt/apache/beam/jars/ # Required to run cross-language pipelines with KafkaIO diff --git a/sdks/java/container/boot.go b/sdks/java/container/boot.go index 5fa85c77dd5b2..63dd1176d3703 100644 --- a/sdks/java/container/boot.go +++ b/sdks/java/container/boot.go @@ -137,6 +137,9 @@ func main() { cp := []string{ filepath.Join(jarsDir, "slf4j-api.jar"), filepath.Join(jarsDir, "slf4j-jdk14.jar"), + filepath.Join(jarsDir, "jcl-over-slf4j.jar"), + filepath.Join(jarsDir, "log4j-over-slf4j.jar"), + filepath.Join(jarsDir, "log4j-to-slf4j.jar"), filepath.Join(jarsDir, "beam-sdks-java-harness.jar"), filepath.Join(jarsDir, "beam-sdks-java-io-kafka.jar"), filepath.Join(jarsDir, "kafka-clients.jar"), diff --git a/sdks/java/container/build.gradle b/sdks/java/container/build.gradle index 552cc8de98cd3..98a6265956037 100644 --- a/sdks/java/container/build.gradle +++ b/sdks/java/container/build.gradle @@ -38,6 +38,9 @@ configurations { dependencies { dockerDependency library.java.slf4j_api dockerDependency library.java.slf4j_jdk14 + dockerDependency library.java.jcl_over_slf4j + dockerDependency library.java.log4j_over_slf4j + dockerDependency library.java.log4j2_to_slf4j dockerDependency project(path: ":sdks:java:harness", configuration: "shadow") // For executing KafkaIO, e.g. as an external transform dockerDependency project(":sdks:java:io:kafka") diff --git a/sdks/java/container/common.gradle b/sdks/java/container/common.gradle index 265d14fbe9c7d..1ec0da0098b52 100644 --- a/sdks/java/container/common.gradle +++ b/sdks/java/container/common.gradle @@ -48,6 +48,9 @@ task copyDockerfileDependencies(type: Copy) { from configurations.dockerDependency rename 'slf4j-api.*', 'slf4j-api.jar' rename 'slf4j-jdk14.*', 'slf4j-jdk14.jar' + rename 'jcl-over-slf4j.*', 'jcl-over-slf4j.jar' + rename 'log4j-over-slf4j.*', 'log4j-over-slf4j.jar' + rename 'log4j-to-slf4j.*', 'log4j-to-slf4j.jar' if (imageJavaVersion == "11" || imageJavaVersion == "17") { rename 'beam-sdks-java-container-agent.*.jar', 'open-module-agent.jar' } @@ -80,8 +83,10 @@ task copyGolangLicenses(type: Copy) { task copyJdkOptions(type: Copy) { if (imageJavaVersion == "17" || imageJavaVersion == "11") { from "option-jamm.json" - into "build/target/options" } + from "java${imageJavaVersion}-security.properties" + from "option-java${imageJavaVersion}-security.json" + into "build/target/options" } task skipPullLicenses(type: Exec) { @@ -129,4 +134,4 @@ dockerPrepare.dependsOn copySdkHarnessLauncher dockerPrepare.dependsOn copyDockerfileDependencies dockerPrepare.dependsOn ":sdks:java:container:downloadCloudProfilerAgent" dockerPrepare.dependsOn copyJdkOptions -dockerPrepare.dependsOn validateJavaHome \ No newline at end of file +dockerPrepare.dependsOn validateJavaHome diff --git a/sdks/java/container/java11/java11-security.properties b/sdks/java/container/java11/java11-security.properties new file mode 100644 index 0000000000000..caf64592c400a --- /dev/null +++ b/sdks/java/container/java11/java11-security.properties @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Java 11 java.security properties file override for JVM +# base properties derived from: +# openjdk version "11.0.16" 2022-07-19 +# OpenJDK Runtime Environment 18.9 (build 11.0.16+8) +# OpenJDK 64-Bit Server VM 18.9 (build 11.0.16+8, mixed mode, sharing) + +# Java has now disabled TLSv1 and TLSv1.1. We specifically put it in the +# legacy algorithms list to allow it to be used if something better is not +# available (e.g. TLSv1.2). This will prevent breakages for existing users +# (for example JDBC with MySQL). See +# https://bugs.java.com/bugdatabase/view_bug.do?bug_id=JDK-8202343 +# for additional details. +jdk.tls.disabledAlgorithms=SSLv3, RC4, DES, MD5withRSA, \ + DH keySize < 1024, EC keySize < 224, 3DES_EDE_CBC, anon, NULL, \ + include jdk.disabled.namedCurves + +jdk.tls.legacyAlgorithms= \ + K_NULL, C_NULL, M_NULL, \ + DH_anon, ECDH_anon, \ + RC4_128, RC4_40, DES_CBC, DES40_CBC, \ + 3DES_EDE_CBC, TLSv1, TLSv1.1 + +# /dev/random blocks in virtualized environments due to lack of +# good entropy sources, which makes SecureRandom use impractical. +# In particular, that affects the performance of HTTPS that relies +# on SecureRandom. +# +# Due to that, /dev/urandom is used as the default. +# +# See http://www.2uo.de/myths-about-urandom/ for some background +# on security of /dev/urandom on Linux. +securerandom.source=file:/dev/./urandom \ No newline at end of file diff --git a/sdks/java/container/java11/option-java11-security.json b/sdks/java/container/java11/option-java11-security.json new file mode 100644 index 0000000000000..a8ad9672a3fc1 --- /dev/null +++ b/sdks/java/container/java11/option-java11-security.json @@ -0,0 +1,9 @@ +{ + "name": "java-security", + "enabled": true, + "options": { + "properties": { + "java.security.properties": "/opt/apache/beam/options/java11-security.properties" + } + } +} diff --git a/sdks/java/container/java17/java17-security.properties b/sdks/java/container/java17/java17-security.properties new file mode 100644 index 0000000000000..ec2a5c039cb90 --- /dev/null +++ b/sdks/java/container/java17/java17-security.properties @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Java 17 java.security properties file override for JVM +# base properties derived from: +# openjdk version "17.0.2" 2022-01-18 +# OpenJDK Runtime Environment (build 17.0.2+8-86) +# OpenJDK 64-Bit Server VM (build 17.0.2+8-86, mixed mode, sharing) + +# Java has now disabled TLSv1 and TLSv1.1. We specifically put it in the +# legacy algorithms list to allow it to be used if something better is not +# available (e.g. TLSv1.2). This will prevent breakages for existing users +# (for example JDBC with MySQL). See +# https://bugs.java.com/bugdatabase/view_bug.do?bug_id=JDK-8202343 +# for additional details. +jdk.tls.disabledAlgorithms=SSLv3, RC4, DES, MD5withRSA, \ + DH keySize < 1024, EC keySize < 224, 3DES_EDE_CBC, anon, NULL + +# The raw value from 17.0.2 for legacyAlgorithms is +# NULL, anon, RC4, DES, 3DES_EDE_CBC +# Because these values are in disabledAlgorithms, it is erroneous to include +# them in legacy (they are disabled in Java 8 and Java 11 as well). Here we +# only include TLSv1 and TLSv1.1 which were removed from disabledAlgorithms +jdk.tls.legacyAlgorithms=TLSv1, TLSv1.1 + +# /dev/random blocks in virtualized environments due to lack of +# good entropy sources, which makes SecureRandom use impractical. +# In particular, that affects the performance of HTTPS that relies +# on SecureRandom. +# +# Due to that, /dev/urandom is used as the default. +# +# See http://www.2uo.de/myths-about-urandom/ for some background +# on security of /dev/urandom on Linux. +securerandom.source=file:/dev/./urandom \ No newline at end of file diff --git a/sdks/java/container/java17/option-java17-security.json b/sdks/java/container/java17/option-java17-security.json new file mode 100644 index 0000000000000..979d4be90d1e4 --- /dev/null +++ b/sdks/java/container/java17/option-java17-security.json @@ -0,0 +1,9 @@ +{ + "name": "java-security", + "enabled": true, + "options": { + "properties": { + "java.security.properties": "/opt/apache/beam/options/java17-security.properties" + } + } +} diff --git a/sdks/java/container/java8/java8-security.properties b/sdks/java/container/java8/java8-security.properties new file mode 100644 index 0000000000000..f637d3ef75674 --- /dev/null +++ b/sdks/java/container/java8/java8-security.properties @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Java 8 java.security properties file override for JVM +# base properties derived from: +# openjdk version "1.8.0_342" +# OpenJDK Runtime Environment (build 1.8.0_342-b07) +# OpenJDK 64-Bit Server VM (build 25.342-b07, mixed mode) + +# Java has now disabled TLSv1 and TLSv1.1. We specifically put it in the +# legacy algorithms list to allow it to be used if something better is not +# available (e.g. TLSv1.2). This will prevent breakages for existing users +# (for example JDBC with MySQL). See +# https://bugs.java.com/bugdatabase/view_bug.do?bug_id=JDK-8202343 +# for additional details. +jdk.tls.disabledAlgorithms=SSLv3, RC4, DES, MD5withRSA, \ + DH keySize < 1024, EC keySize < 224, 3DES_EDE_CBC, anon, NULL, \ + include jdk.disabled.namedCurves + +jdk.tls.legacyAlgorithms= \ + K_NULL, C_NULL, M_NULL, \ + DH_anon, ECDH_anon, \ + RC4_128, RC4_40, DES_CBC, DES40_CBC, \ + 3DES_EDE_CBC, TLSv1, TLSv1.1 + +# /dev/random blocks in virtualized environments due to lack of +# good entropy sources, which makes SecureRandom use impractical. +# In particular, that affects the performance of HTTPS that relies +# on SecureRandom. +# +# Due to that, /dev/urandom is used as the default. +# +# See http://www.2uo.de/myths-about-urandom/ for some background +# on security of /dev/urandom on Linux. +securerandom.source=file:/dev/./urandom \ No newline at end of file diff --git a/sdks/java/container/java8/option-java8-security.json b/sdks/java/container/java8/option-java8-security.json new file mode 100644 index 0000000000000..47f2938bf7cd3 --- /dev/null +++ b/sdks/java/container/java8/option-java8-security.json @@ -0,0 +1,9 @@ +{ + "name": "java-security", + "enabled": true, + "options": { + "properties": { + "java.security.properties": "/opt/apache/beam/options/java8-security.properties" + } + } +} diff --git a/sdks/java/core/build.gradle b/sdks/java/core/build.gradle index 890d44a28e192..2e172ec50c078 100644 --- a/sdks/java/core/build.gradle +++ b/sdks/java/core/build.gradle @@ -26,11 +26,13 @@ applyJavaNature( ], shadowClosure: { dependencies { - include(dependency("org.apache.commons:.*")) + include(dependency(library.java.commons_compress)) + include(dependency(library.java.commons_lang3)) include(dependency(library.java.antlr_runtime)) } relocate "com.google.thirdparty", getJavaRelocatedPath("com.google.thirdparty") - relocate "org.apache.commons", getJavaRelocatedPath("org.apache.commons") + relocate "org.apache.commons.compress", getJavaRelocatedPath("org.apache.commons.compress") + relocate "org.apache.commons.lang3", getJavaRelocatedPath("org.apache.commons.lang3") relocate "org.antlr.v4", getJavaRelocatedPath("org.antlr.v4") }, ) @@ -114,6 +116,9 @@ dependencies { shadowTest library.java.quickcheck_generators shadowTest library.java.avro_tests shadowTest library.java.zstd_jni + shadowTest library.java.commons_logging + shadowTest library.java.log4j + shadowTest library.java.log4j2_api shadowTest library.java.jamm testRuntimeOnly library.java.slf4j_jdk14 } diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BlockBasedSource.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BlockBasedSource.java index 1f8501b571e05..e2c6262280349 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BlockBasedSource.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/BlockBasedSource.java @@ -140,7 +140,7 @@ protected abstract static class Block { * byte of the block is within the range {@code [start, end)}. */ @Experimental(Kind.SOURCE_SINK) - protected abstract static class BlockBasedReader extends FileBasedReader { + public abstract static class BlockBasedReader extends FileBasedReader { private boolean atSplitPoint; protected BlockBasedReader(BlockBasedSource source) { @@ -195,7 +195,7 @@ public final T getCurrent() throws NoSuchElementException { * block boundaries. */ @Override - protected boolean isAtSplitPoint() { + public boolean isAtSplitPoint() { return atSplitPoint; } diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/DefaultFilenamePolicy.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/DefaultFilenamePolicy.java index 7556c32d2a651..5803f450aeaa4 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/DefaultFilenamePolicy.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/DefaultFilenamePolicy.java @@ -269,7 +269,7 @@ public static DefaultFilenamePolicy fromParams(Params params) { * ".txt", with shardNum = 1 and numShards = 100, the following is produced: * "path/to/output-001-of-100.txt". */ - static ResourceId constructName( + public static ResourceId constructName( ResourceId baseFilename, String shardTemplate, String suffix, diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/ReadAllViaFileBasedSource.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/ReadAllViaFileBasedSource.java index 82eca9193fbf8..35819b60ebf97 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/ReadAllViaFileBasedSource.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/ReadAllViaFileBasedSource.java @@ -47,7 +47,7 @@ public class ReadAllViaFileBasedSource extends PTransform, PCollection> { - protected static final boolean DEFAULT_USES_RESHUFFLE = true; + public static final boolean DEFAULT_USES_RESHUFFLE = true; private final long desiredBundleSizeBytes; private final SerializableFunction> createSource; private final Coder coder; diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/Schema.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/Schema.java index df370b4f85422..bf0b537ad9350 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/Schema.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/Schema.java @@ -999,6 +999,13 @@ public final String toString() { builder.append(getMapValueType().toString()); builder.append(">"); break; + case LOGICAL_TYPE: + builder.append("LOGICAL_TYPE<"); + if (getLogicalType() != null) { + builder.append(getLogicalType().getIdentifier()); + } + builder.append(">"); + break; default: builder.append(getTypeName().toString()); } diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/SchemaTranslation.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/SchemaTranslation.java index f79db31bf7ec1..0a96801856261 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/SchemaTranslation.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/SchemaTranslation.java @@ -56,6 +56,7 @@ import org.apache.beam.sdk.util.SerializableUtils; import org.apache.beam.sdk.values.Row; import org.apache.beam.vendor.grpc.v1p48p1.com.google.protobuf.ByteString; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting; import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions; import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap; import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables; @@ -63,6 +64,8 @@ import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.io.ByteStreams; import org.apache.commons.lang3.ClassUtils; import org.checkerframework.checker.nullness.qual.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** Utility methods for translating schemas. */ @Experimental(Kind.SCHEMAS) @@ -71,6 +74,7 @@ "rawtypes" }) public class SchemaTranslation { + private static final Logger LOG = LoggerFactory.getLogger(SchemaTranslation.class); private static final String URN_BEAM_LOGICAL_DECIMAL = FixedPrecisionNumeric.BASE_IDENTIFIER; private static final String URN_BEAM_LOGICAL_JAVASDK = "beam:logical_type:javasdk:v1"; @@ -124,8 +128,8 @@ private static SchemaApi.Field fieldToProto( .build(); } - private static SchemaApi.FieldType fieldTypeToProto( - FieldType fieldType, boolean serializeLogicalType) { + @VisibleForTesting + static SchemaApi.FieldType fieldTypeToProto(FieldType fieldType, boolean serializeLogicalType) { SchemaApi.FieldType.Builder builder = SchemaApi.FieldType.newBuilder(); switch (fieldType.getTypeName()) { case ROW: @@ -297,7 +301,8 @@ private static Field fieldFromProto(SchemaApi.Field protoField) { .withDescription(protoField.getDescription()); } - private static FieldType fieldTypeFromProto(SchemaApi.FieldType protoFieldType) { + @VisibleForTesting + static FieldType fieldTypeFromProto(SchemaApi.FieldType protoFieldType) { FieldType fieldType = fieldTypeFromProtoWithoutNullable(protoFieldType); if (protoFieldType.getNullable()) { @@ -426,26 +431,32 @@ private static FieldType fieldTypeFromProtoWithoutNullable(SchemaApi.FieldType p return FieldType.DATETIME; } else if (urn.equals(URN_BEAM_LOGICAL_DECIMAL)) { return FieldType.DECIMAL; - } else if (urn.equals(URN_BEAM_LOGICAL_JAVASDK)) { - return FieldType.logicalType( - (LogicalType) - SerializableUtils.deserializeFromByteArray( - logicalType.getPayload().toByteArray(), "logicalType")); - } else { - @Nullable FieldType argumentType = null; - @Nullable Object argumentValue = null; - if (logicalType.hasArgumentType()) { - argumentType = fieldTypeFromProto(logicalType.getArgumentType()); - argumentValue = fieldValueFromProto(argumentType, logicalType.getArgument()); + } else if (urn.startsWith("beam:logical_type:")) { + try { + return FieldType.logicalType( + (LogicalType) + SerializableUtils.deserializeFromByteArray( + logicalType.getPayload().toByteArray(), "logicalType")); + } catch (IllegalArgumentException e) { + LOG.warn( + "Unable to deserialize the logical type {} from proto. Mark as UnknownLogicalType.", + urn); } - return FieldType.logicalType( - new UnknownLogicalType( - urn, - logicalType.getPayload().toByteArray(), - argumentType, - argumentValue, - fieldTypeFromProto(logicalType.getRepresentation()))); } + // assemble an UnknownLogicalType + @Nullable FieldType argumentType = null; + @Nullable Object argumentValue = null; + if (logicalType.hasArgumentType()) { + argumentType = fieldTypeFromProto(logicalType.getArgumentType()); + argumentValue = fieldValueFromProto(argumentType, logicalType.getArgument()); + } + return FieldType.logicalType( + new UnknownLogicalType( + urn, + logicalType.getPayload().toByteArray(), + argumentType, + argumentValue, + fieldTypeFromProto(logicalType.getRepresentation()))); default: throw new IllegalArgumentException( "Unexpected type_info: " + protoFieldType.getTypeInfoCase()); diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/state/ValueState.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/state/ValueState.java index 0562c89dde448..ae6b5381af830 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/state/ValueState.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/state/ValueState.java @@ -19,6 +19,7 @@ import org.apache.beam.sdk.annotations.Experimental; import org.apache.beam.sdk.annotations.Experimental.Kind; +import org.checkerframework.checker.nullness.qual.Nullable; /** * A {@link ReadableState} cell containing a single value. @@ -26,10 +27,19 @@ * @param The type of value being stored. */ @Experimental(Kind.STATE) -public interface ValueState extends ReadableState, State { +public interface ValueState extends ReadableState<@Nullable T>, State { /** Set the value. */ void write(T input); + /** + * {@inheritDoc} + * + *

Note that {@code null} will be returned if the value has never been written. + */ + @Override + @Nullable + T read(); + @Override ValueState readLater(); } diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/SdkHarnessEnvironmentTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/SdkHarnessEnvironmentTest.java index dd2d469fd4bee..d2735265a3914 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/SdkHarnessEnvironmentTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/SdkHarnessEnvironmentTest.java @@ -17,10 +17,24 @@ */ package org.apache.beam.sdk; +import static org.apache.beam.sdk.testing.ExpectedLogs.verifyLogged; +import static org.apache.beam.sdk.testing.ExpectedLogs.verifyNotLogged; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.hasItemInArray; +import static org.hamcrest.Matchers.not; +import static org.junit.Assert.assertNotNull; +import java.security.Security; +import java.util.logging.Level; +import java.util.logging.LogManager; +import javax.net.ssl.SSLContext; import org.apache.beam.sdk.coders.StringUtf8Coder; +import org.apache.beam.sdk.options.SdkHarnessOptions; +import org.apache.beam.sdk.options.SdkHarnessOptions.LogLevel; +import org.apache.beam.sdk.options.SdkHarnessOptions.SdkHarnessLogLevelOverrides; +import org.apache.beam.sdk.testing.ExpectedLogs; +import org.apache.beam.sdk.testing.ExpectedLogs.LogSaver; import org.apache.beam.sdk.testing.PAssert; import org.apache.beam.sdk.testing.TestPipeline; import org.apache.beam.sdk.testing.UsesSdkHarnessEnvironment; @@ -66,4 +80,120 @@ public void testJammAgentAvailable() throws Exception { PAssert.that(output).containsInAnyOrder("measured"); p.run().waitUntilFinish(); } + + /** {@link DoFn} used to validate that TLS was enabled as part of java security properties. */ + private static class TLSDoFn extends DoFn { + @ProcessElement + public void processElement(ProcessContext c) throws Exception { + String[] disabledAlgorithms = + Security.getProperty("jdk.tls.disabledAlgorithms").trim().split("\\s*,\\s*"); + String[] legacyAlgorithms = + Security.getProperty("jdk.tls.legacyAlgorithms").trim().split("\\s*,\\s*"); + assertThat(disabledAlgorithms, not(hasItemInArray("TLSv1"))); + assertThat(disabledAlgorithms, not(hasItemInArray("TLSv1.1"))); + assertThat(legacyAlgorithms, hasItemInArray("TLSv1")); + assertThat(legacyAlgorithms, hasItemInArray("TLSv1.1")); + + // getDefaultSSLParameters() shows all protocols that JSSE implements that are allowed. + // getSupportedSSLParameters() shows all protocols that JSSE implements including those that + // are disabled. + SSLContext context = SSLContext.getInstance("TLS"); + context.init(null, null, null); + assertNotNull(context); + String[] defaultProtocols = context.getDefaultSSLParameters().getProtocols(); + assertThat(defaultProtocols, hasItemInArray("TLSv1")); + assertThat(defaultProtocols, hasItemInArray("TLSv1.1")); + + c.output("TLSv1-TLSv1.1 enabled"); + } + } + + @Test + @Category({ValidatesRunner.class, UsesSdkHarnessEnvironment.class}) + public void testTlsAvailable() throws Exception { + PCollection input = p.apply(Create.of("TLS").withCoder(StringUtf8Coder.of())); + + PCollection output = input.apply(ParDo.of(new TLSDoFn())); + + PAssert.that(output).containsInAnyOrder("TLSv1-TLSv1.1 enabled"); + + p.run().waitUntilFinish(); + } + + private static class LoggingDoFn extends DoFn { + @ProcessElement + public void processElement(@Element String element, OutputReceiver output) { + LogSaver logSaver = new LogSaver(); + LogManager.getLogManager().getLogger("").addHandler(logSaver); + + try { + Exception fooException = new RuntimeException("a.Foo-RuntimeException"); + // Test the different log levels for various named loggers. + final org.slf4j.Logger fooLogger = org.slf4j.LoggerFactory.getLogger("a.Foo"); + fooLogger.trace("a.Foo-Trace"); + fooLogger.debug("a.Foo-Debug"); + fooLogger.info("a.Foo-Info"); + fooLogger.warn("a.Foo-Warn"); + fooLogger.error("a.Foo-Error", fooException); + + Exception barException = new RuntimeException("a.b.Bar-RuntimeException"); + final org.slf4j.Logger barLogger = org.slf4j.LoggerFactory.getLogger("a.b.Bar"); + barLogger.trace("a.b.Bar-Trace"); + barLogger.debug("a.b.Bar-Debug"); + barLogger.info("a.b.Bar-Info"); + barLogger.warn("a.b.Bar-Warn"); + barLogger.error("a.b.Bar-Error", barException); + + // Test the different types of loggers (e.g. slf4j, jcl, jul, log4j, log4jc) + final org.slf4j.Logger slf4jLogger = org.slf4j.LoggerFactory.getLogger("logger.slf4j"); + slf4jLogger.info("SLF4J log messages work"); + final org.apache.commons.logging.Log jclLogger = + org.apache.commons.logging.LogFactory.getLog("logger.jcl"); + jclLogger.info("JCL log messages work"); + final java.util.logging.Logger julLogger = java.util.logging.Logger.getLogger("logger.jul"); + julLogger.info("JUL log messages work"); + final org.apache.log4j.Logger log4jLogger = + org.apache.log4j.Logger.getLogger("logger.log4j"); + log4jLogger.info("Log4j log messages work"); + final org.apache.logging.log4j.Logger log4j2Logger = + org.apache.logging.log4j.LogManager.getLogger("logger.log4j2"); + log4j2Logger.info("Log4j2 log messages work"); + + verifyNotLogged(ExpectedLogs.matcher(Level.FINEST, "a.Foo-Trace"), logSaver); + verifyLogged(ExpectedLogs.matcher(Level.FINE, "a.Foo-Debug"), logSaver); + verifyLogged(ExpectedLogs.matcher(Level.INFO, "a.Foo-Info"), logSaver); + verifyLogged(ExpectedLogs.matcher(Level.WARNING, "a.Foo-Warn"), logSaver); + verifyLogged(ExpectedLogs.matcher(Level.SEVERE, "a.Foo-Error", fooException), logSaver); + + verifyNotLogged(ExpectedLogs.matcher(Level.FINEST, "a.Foo-Trace"), logSaver); + verifyNotLogged(ExpectedLogs.matcher(Level.FINE, "a.b.Bar-Debug"), logSaver); + verifyNotLogged(ExpectedLogs.matcher(Level.INFO, "a.b.Bar-Info"), logSaver); + verifyLogged(ExpectedLogs.matcher(Level.WARNING, "a.b.Bar-Warn"), logSaver); + verifyLogged(ExpectedLogs.matcher(Level.SEVERE, "a.b.Bar-Error", barException), logSaver); + + verifyLogged(ExpectedLogs.matcher(Level.INFO, "SLF4J log messages work"), logSaver); + verifyLogged(ExpectedLogs.matcher(Level.INFO, "JCL log messages work"), logSaver); + verifyLogged(ExpectedLogs.matcher(Level.INFO, "JUL log messages work"), logSaver); + verifyLogged(ExpectedLogs.matcher(Level.INFO, "Log4j log messages work"), logSaver); + verifyLogged(ExpectedLogs.matcher(Level.INFO, "Log4j2 log messages work"), logSaver); + output.output(element); + } finally { + LogManager.getLogManager().getLogger("").removeHandler(logSaver); + } + } + } + + @Test + @Category({ValidatesRunner.class, UsesSdkHarnessEnvironment.class}) + public void testLogging() throws Exception { + p.getOptions().as(SdkHarnessOptions.class).setDefaultSdkHarnessLogLevel(LogLevel.DEBUG); + p.getOptions() + .as(SdkHarnessOptions.class) + .setSdkHarnessLogLevelOverrides( + new SdkHarnessLogLevelOverrides().addOverrideForName("a.b.Bar", LogLevel.WARN)); + PCollection input = p.apply(Create.of("Logging Works").withCoder(StringUtf8Coder.of())); + PCollection output = input.apply(ParDo.of(new LoggingDoFn())); + PAssert.that(output).containsInAnyOrder("Logging Works"); + p.run().waitUntilFinish(); + } } diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/schemas/SchemaTranslationTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/schemas/SchemaTranslationTest.java index 2c1ed474a0769..a648e5d662ef6 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/schemas/SchemaTranslationTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/schemas/SchemaTranslationTest.java @@ -40,9 +40,16 @@ import org.apache.beam.sdk.schemas.Schema.FieldType; import org.apache.beam.sdk.schemas.logicaltypes.DateTime; import org.apache.beam.sdk.schemas.logicaltypes.FixedBytes; +import org.apache.beam.sdk.schemas.logicaltypes.FixedPrecisionNumeric; +import org.apache.beam.sdk.schemas.logicaltypes.FixedString; import org.apache.beam.sdk.schemas.logicaltypes.MicrosInstant; +import org.apache.beam.sdk.schemas.logicaltypes.NanosDuration; +import org.apache.beam.sdk.schemas.logicaltypes.NanosInstant; import org.apache.beam.sdk.schemas.logicaltypes.PythonCallable; import org.apache.beam.sdk.schemas.logicaltypes.SchemaLogicalType; +import org.apache.beam.sdk.schemas.logicaltypes.SqlTypes; +import org.apache.beam.sdk.schemas.logicaltypes.VariableBytes; +import org.apache.beam.sdk.schemas.logicaltypes.VariableString; import org.apache.beam.sdk.values.Row; import org.apache.beam.vendor.grpc.v1p48p1.com.google.protobuf.ByteString; import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Charsets; @@ -395,6 +402,45 @@ public void typeInfoNotSet() { } } + /** Test schema translation of logical types. */ + @RunWith(Parameterized.class) + public static class LogicalTypesTest { + @Parameters(name = "{index}: {0}") + public static Iterable data() { + return ImmutableList.builder() + .add(FieldType.logicalType(SqlTypes.DATE)) + .add(FieldType.logicalType(SqlTypes.TIME)) + .add(FieldType.logicalType(SqlTypes.DATETIME)) + .add(FieldType.logicalType(SqlTypes.TIMESTAMP)) + .add(FieldType.logicalType(new NanosInstant())) + .add(FieldType.logicalType(new NanosDuration())) + .add(FieldType.logicalType(FixedBytes.of(10))) + .add(FieldType.logicalType(VariableBytes.of(10))) + .add(FieldType.logicalType(FixedString.of(10))) + .add(FieldType.logicalType(VariableString.of(10))) + .add(FieldType.logicalType(FixedPrecisionNumeric.of(10))) + .build(); + } + + @Parameter(0) + public Schema.FieldType fieldType; + + @Test + public void testPortableLogicalTypeSerializeDeserilizeCorrectly() { + SchemaApi.FieldType proto = SchemaTranslation.fieldTypeToProto(fieldType, true); + Schema.FieldType translated = SchemaTranslation.fieldTypeFromProto(proto); + + assertThat( + translated.getLogicalType().getClass(), equalTo(fieldType.getLogicalType().getClass())); + assertThat( + translated.getLogicalType().getArgumentType(), + equalTo(fieldType.getLogicalType().getArgumentType())); + assertThat( + translated.getLogicalType().getArgument(), + equalTo(fieldType.getLogicalType().getArgument())); + } + } + /** A simple logical type that has no argument. */ private static class NullArgumentLogicalType implements Schema.LogicalType { public static final String IDENTIFIER = "beam:logical_type:null_argument:v1"; diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/testing/ExpectedLogs.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/testing/ExpectedLogs.java index ad976531620a5..1e11d6ac77ec5 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/testing/ExpectedLogs.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/testing/ExpectedLogs.java @@ -162,7 +162,7 @@ public void verifyError(String substring, Throwable t) { * @param substring The message to match against. */ public void verifyNotLogged(String substring) { - verifyNotLogged(matcher(substring)); + verifyNotLogged(matcher(substring), logSaver); } /** @@ -187,10 +187,10 @@ public void verifyLogRecords(Matcher> matcher) { } private void verify(final Level level, final String substring) { - verifyLogged(matcher(level, substring)); + verifyLogged(matcher(level, substring), logSaver); } - private TypeSafeMatcher matcher(final String substring) { + public static TypeSafeMatcher matcher(final String substring) { return new TypeSafeMatcher() { @Override public void describeTo(Description description) { @@ -204,7 +204,7 @@ protected boolean matchesSafely(LogRecord item) { }; } - private TypeSafeMatcher matcher(final Level level, final String substring) { + public static TypeSafeMatcher matcher(final Level level, final String substring) { return new TypeSafeMatcher() { @Override public void describeTo(Description description) { @@ -220,14 +220,14 @@ protected boolean matchesSafely(LogRecord item) { } private void verify(final Level level, final String substring, final Throwable throwable) { - verifyLogged(matcher(level, substring, throwable)); + verifyLogged(matcher(level, substring, throwable), logSaver); } private void verifyNo(final Level level, final String substring, final Throwable throwable) { - verifyNotLogged(matcher(level, substring, throwable)); + verifyNotLogged(matcher(level, substring, throwable), logSaver); } - private TypeSafeMatcher matcher( + public static TypeSafeMatcher matcher( final Level level, final String substring, final Throwable throwable) { return new TypeSafeMatcher() { @Override @@ -249,7 +249,7 @@ protected boolean matchesSafely(LogRecord item) { }; } - private void verifyLogged(Matcher matcher) { + public static void verifyLogged(Matcher matcher, LogSaver logSaver) { for (LogRecord record : logSaver.getLogs()) { if (matcher.matches(record)) { return; @@ -259,17 +259,18 @@ private void verifyLogged(Matcher matcher) { fail(String.format("Missing match for [%s]", matcher)); } - private void verifyNotLogged(Matcher matcher) { + public static void verifyNotLogged(Matcher matcher, LogSaver logSaver) { // Don't use Matchers.everyItem(Matchers.not(matcher)) because it doesn't format the logRecord for (LogRecord record : logSaver.getLogs()) { if (matcher.matches(record)) { - fail(String.format("Unexpected match of [%s]: [%s]", matcher, logFormatter.format(record))); + fail( + String.format("Unexpected match of [%s]: [%s]", matcher, LOG_FORMATTER.format(record))); } } } @Override - protected void before() throws Throwable { + protected void before() { previousLevel = log.getLevel(); log.setLevel(Level.ALL); log.addHandler(logSaver); @@ -282,9 +283,9 @@ protected void after() { logSaver.reset(); } + private static final Formatter LOG_FORMATTER = new SimpleFormatter(); private final Logger log; private final LogSaver logSaver; - private final Formatter logFormatter = new SimpleFormatter(); private Level previousLevel; private ExpectedLogs(String name) { @@ -294,7 +295,7 @@ private ExpectedLogs(String name) { /** A JUL logging {@link Handler} that records all logging events that are passed to it. */ @ThreadSafe - private static class LogSaver extends Handler { + public static class LogSaver extends Handler { private final Collection logRecords = new ConcurrentLinkedDeque<>(); @Override diff --git a/sdks/java/extensions/avro/build.gradle b/sdks/java/extensions/avro/build.gradle new file mode 100644 index 0000000000000..dae13cd997283 --- /dev/null +++ b/sdks/java/extensions/avro/build.gradle @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +plugins { id 'org.apache.beam.module' } +applyJavaNature( + automaticModuleName: 'org.apache.beam.sdk.extensions.avro', + disableLintWarnings: ['rawtypes'], // Avro-generated test code has raw-type errors + publish: false, + exportJavadoc: false, +) +applyAvroNature() + +description = "Apache Beam :: SDKs :: Java :: Extensions :: Avro" + +// Exclude tests that need a runner +test { + systemProperty "beamUseDummyRunner", "true" + useJUnit { + excludeCategories "org.apache.beam.sdk.testing.NeedsRunner" + } +} + +dependencies { + implementation library.java.byte_buddy + implementation library.java.vendored_guava_26_0_jre + implementation (project(path: ":sdks:java:core", configuration: "shadow")) { + // Exclude Avro dependencies from "core" since Avro support moved to this extension + exclude group: "org.apache.avro", module: "avro" + } + implementation library.java.error_prone_annotations + implementation library.java.avro + implementation library.java.joda_time + testImplementation (project(path: ":sdks:java:core", configuration: "shadowTest")) { + // Exclude Avro dependencies from "core" since Avro support moved to this extension + exclude group: "org.apache.avro", module: "avro" + } + testImplementation library.java.avro_tests + testImplementation library.java.junit + testRuntimeOnly project(path: ":runners:direct-java", configuration: "shadow") + testRuntimeOnly library.java.slf4j_jdk14 +} \ No newline at end of file diff --git a/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/coders/AvroCoder.java b/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/coders/AvroCoder.java new file mode 100644 index 0000000000000..4687eb5664248 --- /dev/null +++ b/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/coders/AvroCoder.java @@ -0,0 +1,820 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.extensions.avro.coders; + +import com.google.errorprone.annotations.FormatMethod; +import com.google.errorprone.annotations.FormatString; +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.Serializable; +import java.lang.reflect.Field; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.SortedMap; +import java.util.SortedSet; +import org.apache.avro.AvroRuntimeException; +import org.apache.avro.Conversion; +import org.apache.avro.LogicalType; +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericDatumReader; +import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.generic.IndexedRecord; +import org.apache.avro.io.BinaryDecoder; +import org.apache.avro.io.BinaryEncoder; +import org.apache.avro.io.DatumReader; +import org.apache.avro.io.DatumWriter; +import org.apache.avro.io.DecoderFactory; +import org.apache.avro.io.EncoderFactory; +import org.apache.avro.reflect.AvroEncode; +import org.apache.avro.reflect.AvroName; +import org.apache.avro.reflect.AvroSchema; +import org.apache.avro.reflect.ReflectData; +import org.apache.avro.reflect.ReflectDatumReader; +import org.apache.avro.reflect.ReflectDatumWriter; +import org.apache.avro.reflect.Union; +import org.apache.avro.specific.SpecificData; +import org.apache.avro.specific.SpecificDatumReader; +import org.apache.avro.specific.SpecificDatumWriter; +import org.apache.avro.specific.SpecificRecord; +import org.apache.avro.util.ClassUtils; +import org.apache.avro.util.Utf8; +import org.apache.beam.sdk.coders.CannotProvideCoderException; +import org.apache.beam.sdk.coders.Coder; +import org.apache.beam.sdk.coders.CoderProvider; +import org.apache.beam.sdk.coders.CustomCoder; +import org.apache.beam.sdk.coders.DefaultCoder; +import org.apache.beam.sdk.util.EmptyOnDeserializationThreadLocal; +import org.apache.beam.sdk.values.TypeDescriptor; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Supplier; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Suppliers; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; + +/** + * A {@link Coder} using Avro binary format. + * + *

Each instance of {@code AvroCoder} encapsulates an Avro schema for objects of type {@code + * T}. + * + *

The Avro schema may be provided explicitly via {@link AvroCoder#of(Class, Schema)} or omitted + * via {@link AvroCoder#of(Class)}, in which case it will be inferred using Avro's {@link + * ReflectData}. + * + *

For complete details about schema generation and how it can be controlled please see the + * {@link org.apache.avro.reflect} package. Only concrete classes with a no-argument constructor can + * be mapped to Avro records. All inherited fields that are not static or transient are included. + * Fields are not permitted to be null unless annotated by {@link Nullable} or a {@link Union} + * schema containing {@code "null"}. + * + *

To use, specify the {@code Coder} type on a PCollection: + * + *

{@code
+ * PCollection records =
+ *     input.apply(...)
+ *          .setCoder(AvroCoder.of(MyCustomElement.class));
+ * }
+ * + *

or annotate the element class using {@code @DefaultCoder}. + * + *

{@code @DefaultCoder(AvroCoder.class)
+ * public class MyCustomElement {
+ *     ...
+ * }
+ * }
+ * + *

The implementation attempts to determine if the Avro encoding of the given type will satisfy + * the criteria of {@link Coder#verifyDeterministic} by inspecting both the type and the Schema + * provided or generated by Avro. Only coders that are deterministic can be used in {@link + * org.apache.beam.sdk.transforms.GroupByKey} operations. + * + * @param the type of elements handled by this coder + */ +@SuppressWarnings({ + "nullness" // TODO(https://github.com/apache/beam/issues/20497) +}) +public class AvroCoder extends CustomCoder { + + /** + * Returns an {@code AvroCoder} instance for the provided element type. + * + * @param the element type + */ + public static AvroCoder of(TypeDescriptor type) { + return of(type, true); + } + + /** + * Returns an {@code AvroCoder} instance for the provided element type, respecting whether to use + * Avro's Reflect* or Specific* suite for encoding and decoding. + * + * @param the element type + */ + public static AvroCoder of(TypeDescriptor type, boolean useReflectApi) { + @SuppressWarnings("unchecked") + Class clazz = (Class) type.getRawType(); + return of(clazz, useReflectApi); + } + + /** + * Returns an {@code AvroCoder} instance for the provided element class. + * + * @param the element type + */ + public static AvroCoder of(Class clazz) { + return of(clazz, true); + } + + /** + * Returns an {@code AvroGenericCoder} instance for the Avro schema. The implicit type is + * GenericRecord. + */ + public static AvroGenericCoder of(Schema schema) { + return AvroGenericCoder.of(schema); + } + + /** + * Returns an {@code AvroCoder} instance for the given class, respecting whether to use Avro's + * Reflect* or Specific* suite for encoding and decoding. + * + * @param the element type + */ + public static AvroCoder of(Class type, boolean useReflectApi) { + ClassLoader cl = type.getClassLoader(); + SpecificData data = useReflectApi ? new ReflectData(cl) : new SpecificData(cl); + return of(type, data.getSchema(type), useReflectApi); + } + + /** + * Returns an {@code AvroCoder} instance for the provided element type using the provided Avro + * schema. + * + *

The schema must correspond to the type provided. + * + * @param the element type + */ + public static AvroCoder of(Class type, Schema schema) { + return of(type, schema, true); + } + + /** + * Returns an {@code AvroCoder} instance for the given class and schema, respecting whether to use + * Avro's Reflect* or Specific* suite for encoding and decoding. + * + * @param the element type + */ + public static AvroCoder of(Class type, Schema schema, boolean useReflectApi) { + return new AvroCoder<>(type, schema, useReflectApi); + } + + /** + * Returns a {@link CoderProvider} which uses the {@link AvroCoder} if possible for all types. + * + *

It is unsafe to register this as a {@link CoderProvider} because Avro will reflectively + * accept dangerous types such as {@link Object}. + * + *

This method is invoked reflectively from {@link DefaultCoder}. + */ + @SuppressWarnings("unused") + public static CoderProvider getCoderProvider() { + return new AvroCoderProvider(); + } + + /** + * A {@link CoderProvider} that constructs an {@link AvroCoder} for Avro compatible classes. + * + *

It is unsafe to register this as a {@link CoderProvider} because Avro will reflectively + * accept dangerous types such as {@link Object}. + */ + static class AvroCoderProvider extends CoderProvider { + @Override + public Coder coderFor( + TypeDescriptor typeDescriptor, List> componentCoders) + throws CannotProvideCoderException { + try { + return AvroCoder.of(typeDescriptor); + } catch (AvroRuntimeException e) { + throw new CannotProvideCoderException( + String.format("%s is not compatible with Avro", typeDescriptor), e); + } + } + } + + private final Class type; + private final boolean useReflectApi; + private final SerializableSchemaSupplier schemaSupplier; + private final TypeDescriptor typeDescriptor; + + private final List nonDeterministicReasons; + + // Factories allocated by .get() are thread-safe and immutable. + private static final EncoderFactory ENCODER_FACTORY = EncoderFactory.get(); + private static final DecoderFactory DECODER_FACTORY = DecoderFactory.get(); + + /** + * A {@link Serializable} object that holds the {@link String} version of a {@link Schema}. This + * is paired with the {@link SerializableSchemaSupplier} via {@link Serializable}'s usage of the + * {@link #readResolve} method. + */ + private static class SerializableSchemaString implements Serializable { + private final String schema; + + private SerializableSchemaString(String schema) { + this.schema = schema; + } + + private Object readResolve() throws IOException, ClassNotFoundException { + return new SerializableSchemaSupplier(new Schema.Parser().parse(schema)); + } + } + + /** + * A {@link Serializable} object that delegates to the {@link SerializableSchemaString} via {@link + * Serializable}'s usage of the {@link #writeReplace} method. Kryo doesn't utilize Java's + * serialization and hence is able to encode the {@link Schema} object directly. + */ + private static class SerializableSchemaSupplier implements Serializable, Supplier { + // writeReplace makes this object serializable. This is a limitation of FindBugs as discussed + // here: + // http://stackoverflow.com/questions/26156523/is-writeobject-not-neccesary-using-the-serialization-proxy-pattern + @SuppressFBWarnings("SE_BAD_FIELD") + private final Schema schema; + + private SerializableSchemaSupplier(Schema schema) { + this.schema = schema; + } + + private Object writeReplace() { + return new SerializableSchemaString(schema.toString()); + } + + @Override + public Schema get() { + return schema; + } + } + + /** + * A {@link Serializable} object that lazily supplies a {@link ReflectData} built from the + * appropriate {@link ClassLoader} for the type encoded by this {@link AvroCoder}. + */ + private static class SerializableReflectDataSupplier + implements Serializable, Supplier { + + private final Class clazz; + + private SerializableReflectDataSupplier(Class clazz) { + this.clazz = clazz; + } + + @Override + public ReflectData get() { + ReflectData reflectData = new ReflectData(clazz.getClassLoader()); + reflectData.addLogicalTypeConversion(new JodaTimestampConversion()); + return reflectData; + } + } + + // Cache the old encoder/decoder and let the factories reuse them when possible. To be threadsafe, + // these are ThreadLocal. This code does not need to be re-entrant as AvroCoder does not use + // an inner coder. + private final EmptyOnDeserializationThreadLocal decoder; + private final EmptyOnDeserializationThreadLocal encoder; + private final EmptyOnDeserializationThreadLocal> writer; + private final EmptyOnDeserializationThreadLocal> reader; + + // Lazily re-instantiated after deserialization + private final Supplier reflectData; + + protected AvroCoder(Class type, Schema schema) { + this(type, schema, false); + } + + protected AvroCoder(Class type, Schema schema, boolean useReflectApi) { + this.type = type; + this.useReflectApi = useReflectApi; + this.schemaSupplier = new SerializableSchemaSupplier(schema); + typeDescriptor = TypeDescriptor.of(type); + nonDeterministicReasons = new AvroDeterminismChecker().check(TypeDescriptor.of(type), schema); + + // Decoder and Encoder start off null for each thread. They are allocated and potentially + // reused inside encode/decode. + this.decoder = new EmptyOnDeserializationThreadLocal<>(); + this.encoder = new EmptyOnDeserializationThreadLocal<>(); + + this.reflectData = Suppliers.memoize(new SerializableReflectDataSupplier(getType())); + + // Reader and writer are allocated once per thread per Coder + this.reader = + new EmptyOnDeserializationThreadLocal>() { + private final AvroCoder myCoder = AvroCoder.this; + + @Override + public DatumReader initialValue() { + if (myCoder.getType().equals(GenericRecord.class)) { + return new GenericDatumReader<>(myCoder.getSchema()); + } else if (SpecificRecord.class.isAssignableFrom(myCoder.getType()) && !useReflectApi) { + return new SpecificDatumReader<>(myCoder.getType()); + } + return new ReflectDatumReader<>( + myCoder.getSchema(), myCoder.getSchema(), myCoder.reflectData.get()); + } + }; + + this.writer = + new EmptyOnDeserializationThreadLocal>() { + private final AvroCoder myCoder = AvroCoder.this; + + @Override + public DatumWriter initialValue() { + if (myCoder.getType().equals(GenericRecord.class)) { + return new GenericDatumWriter<>(myCoder.getSchema()); + } else if (SpecificRecord.class.isAssignableFrom(myCoder.getType()) && !useReflectApi) { + return new SpecificDatumWriter<>(myCoder.getType()); + } + return new ReflectDatumWriter<>(myCoder.getSchema(), myCoder.reflectData.get()); + } + }; + } + + /** Returns the type this coder encodes/decodes. */ + public Class getType() { + return type; + } + + public boolean useReflectApi() { + return useReflectApi; + } + + @Override + public void encode(T value, OutputStream outStream) throws IOException { + // Get a BinaryEncoder instance from the ThreadLocal cache and attempt to reuse it. + BinaryEncoder encoderInstance = ENCODER_FACTORY.directBinaryEncoder(outStream, encoder.get()); + // Save the potentially-new instance for reuse later. + encoder.set(encoderInstance); + writer.get().write(value, encoderInstance); + // Direct binary encoder does not buffer any data and need not be flushed. + } + + @Override + public T decode(InputStream inStream) throws IOException { + // Get a BinaryDecoder instance from the ThreadLocal cache and attempt to reuse it. + BinaryDecoder decoderInstance = DECODER_FACTORY.directBinaryDecoder(inStream, decoder.get()); + // Save the potentially-new instance for later. + decoder.set(decoderInstance); + return reader.get().read(null, decoderInstance); + } + + /** + * @throws NonDeterministicException when the type may not be deterministically encoded using the + * given {@link Schema}, the {@code directBinaryEncoder}, and the {@link ReflectDatumWriter} + * or {@link GenericDatumWriter}. + */ + @Override + public void verifyDeterministic() throws NonDeterministicException { + if (!nonDeterministicReasons.isEmpty()) { + throw new NonDeterministicException(this, nonDeterministicReasons); + } + } + + /** Returns the schema used by this coder. */ + public Schema getSchema() { + return schemaSupplier.get(); + } + + @Override + public TypeDescriptor getEncodedTypeDescriptor() { + return typeDescriptor; + } + + /** + * Helper class encapsulating the various pieces of state maintained by the recursive walk used + * for checking if the encoding will be deterministic. + */ + private static class AvroDeterminismChecker { + + // Reasons that the original type are not deterministic. This accumulates + // the actual output. + private List reasons = new ArrayList<>(); + + // Types that are currently "open". Used to make sure we don't have any + // recursive types. Note that we assume that all occurrences of a given type + // are equal, rather than tracking pairs of type + schema. + private Set> activeTypes = new HashSet<>(); + + // Similarly to how we record active types, we record the schemas we visit + // to make sure we don't encounter recursive fields. + private Set activeSchemas = new HashSet<>(); + + /** Report an error in the current context. */ + @FormatMethod + private void reportError(String context, @FormatString String fmt, Object... args) { + String message = String.format(fmt, args); + reasons.add(context + ": " + message); + } + + /** + * Classes that are serialized by Avro as a String include + * + *

    + *
  • Subtypes of CharSequence (including String, Avro's mutable Utf8, etc.) + *
  • Several predefined classes (BigDecimal, BigInteger, URI, URL) + *
  • Classes annotated with @Stringable (uses their #toString() and a String constructor) + *
+ * + *

Rather than determine which of these cases are deterministic, we list some classes that + * definitely are, and treat any others as non-deterministic. + */ + private static final Set> DETERMINISTIC_STRINGABLE_CLASSES = new HashSet<>(); + + static { + // CharSequences: + DETERMINISTIC_STRINGABLE_CLASSES.add(String.class); + DETERMINISTIC_STRINGABLE_CLASSES.add(Utf8.class); + + // Explicitly Stringable: + DETERMINISTIC_STRINGABLE_CLASSES.add(java.math.BigDecimal.class); + DETERMINISTIC_STRINGABLE_CLASSES.add(java.math.BigInteger.class); + DETERMINISTIC_STRINGABLE_CLASSES.add(java.net.URI.class); + DETERMINISTIC_STRINGABLE_CLASSES.add(java.net.URL.class); + + // Classes annotated with @Stringable: + } + + /** Return true if the given type token is a subtype of *any* of the listed parents. */ + private static boolean isSubtypeOf(TypeDescriptor type, Class... parents) { + for (Class parent : parents) { + if (type.isSubtypeOf(TypeDescriptor.of(parent))) { + return true; + } + } + return false; + } + + protected AvroDeterminismChecker() {} + + // The entry point for the check. Should not be recursively called. + public List check(TypeDescriptor type, Schema schema) { + recurse(type.getRawType().getName(), type, schema); + return reasons; + } + + // This is the method that should be recursively called. It sets up the path + // and visited types correctly. + private void recurse(String context, TypeDescriptor type, Schema schema) { + if (type.getRawType().isAnnotationPresent(AvroSchema.class)) { + reportError(context, "Custom schemas are not supported -- remove @AvroSchema."); + return; + } + + if (!activeTypes.add(type)) { + reportError(context, "%s appears recursively", type); + return; + } + + // If the record isn't a true class, but rather a GenericRecord, SpecificRecord, etc. + // with a specified schema, then we need to make the decision based on the generated + // implementations. + if (isSubtypeOf(type, IndexedRecord.class)) { + checkIndexedRecord(context, schema, null); + } else { + doCheck(context, type, schema); + } + + activeTypes.remove(type); + } + + private void doCheck(String context, TypeDescriptor type, Schema schema) { + switch (schema.getType()) { + case ARRAY: + checkArray(context, type, schema); + break; + case ENUM: + // Enums should be deterministic, since they depend only on the ordinal. + break; + case FIXED: + // Depending on the implementation of GenericFixed, we don't know how + // the given field will be encoded. So, we assume that it isn't + // deterministic. + reportError(context, "FIXED encodings are not guaranteed to be deterministic"); + break; + case MAP: + checkMap(context, type, schema); + break; + case RECORD: + if (!(type.getType() instanceof Class)) { + reportError(context, "Cannot determine type from generic %s due to erasure", type); + return; + } + checkRecord(type, schema); + break; + case UNION: + checkUnion(context, type, schema); + break; + case STRING: + checkString(context, type); + break; + case BOOLEAN: + case BYTES: + case DOUBLE: + case INT: + case FLOAT: + case LONG: + case NULL: + // For types that Avro encodes using one of the above primitives, we assume they are + // deterministic. + break; + default: + // In any other case (eg., new types added to Avro) we cautiously return + // false. + reportError(context, "Unknown schema type %s may be non-deterministic", schema.getType()); + break; + } + } + + private void checkString(String context, TypeDescriptor type) { + // For types that are encoded as strings, we need to make sure they're in an approved + // list. For other types that are annotated @Stringable, Avro will just use the + // #toString() methods, which has no guarantees of determinism. + if (!DETERMINISTIC_STRINGABLE_CLASSES.contains(type.getRawType())) { + reportError(context, "%s may not have deterministic #toString()", type); + } + } + + private static final Schema AVRO_NULL_SCHEMA = Schema.create(Schema.Type.NULL); + + private void checkUnion(String context, TypeDescriptor type, Schema schema) { + final List unionTypes = schema.getTypes(); + + if (!type.getRawType().isAnnotationPresent(Union.class)) { + // First check for @Nullable field, which shows up as a union of field type and null. + if (unionTypes.size() == 2 && unionTypes.contains(AVRO_NULL_SCHEMA)) { + // Find the Schema that is not NULL and recursively check that it is deterministic. + Schema nullableFieldSchema = + unionTypes.get(0).equals(AVRO_NULL_SCHEMA) ? unionTypes.get(1) : unionTypes.get(0); + doCheck(context, type, nullableFieldSchema); + return; + } + + // Otherwise report a schema error. + reportError(context, "Expected type %s to have @Union annotation", type); + return; + } + + // Errors associated with this union will use the base class as their context. + String baseClassContext = type.getRawType().getName(); + + // For a union, we need to make sure that each possible instantiation is deterministic. + for (Schema concrete : unionTypes) { + @SuppressWarnings("unchecked") + TypeDescriptor unionType = TypeDescriptor.of(ReflectData.get().getClass(concrete)); + + recurse(baseClassContext, unionType, concrete); + } + } + + private void checkRecord(TypeDescriptor type, Schema schema) { + // For a record, we want to make sure that all the fields are deterministic. + Class clazz = type.getRawType(); + for (Schema.Field fieldSchema : schema.getFields()) { + Field field = getField(clazz, fieldSchema.name()); + String fieldContext = field.getDeclaringClass().getName() + "#" + field.getName(); + + if (field.isAnnotationPresent(AvroEncode.class)) { + reportError( + fieldContext, "Custom encoders may be non-deterministic -- remove @AvroEncode"); + continue; + } + + if (!IndexedRecord.class.isAssignableFrom(field.getType()) + && field.isAnnotationPresent(AvroSchema.class)) { + // TODO: We should be able to support custom schemas on POJO fields, but we shouldn't + // need to, so we just allow it in the case of IndexedRecords. + reportError( + fieldContext, "Custom schemas are only supported for subtypes of IndexedRecord."); + continue; + } + + TypeDescriptor fieldType = type.resolveType(field.getGenericType()); + recurse(fieldContext, fieldType, fieldSchema.schema()); + } + } + + private void checkIndexedRecord( + String context, Schema schema, @Nullable String specificClassStr) { + + if (!activeSchemas.add(schema)) { + reportError(context, "%s appears recursively", schema.getName()); + return; + } + + switch (schema.getType()) { + case ARRAY: + // Generic Records use GenericData.Array to implement arrays, which is + // essentially an ArrayList, and therefore ordering is deterministic. + // The array is thus deterministic if the elements are deterministic. + checkIndexedRecord(context, schema.getElementType(), null); + break; + case ENUM: + // Enums are deterministic because they encode as a single integer. + break; + case FIXED: + // In the case of GenericRecords, FIXED is deterministic because it + // encodes/decodes as a Byte[]. + break; + case MAP: + reportError( + context, + "GenericRecord and SpecificRecords use a HashMap to represent MAPs," + + " so it is non-deterministic"); + break; + case RECORD: + for (Schema.Field field : schema.getFields()) { + checkIndexedRecord( + schema.getName() + "." + field.name(), + field.schema(), + field.getProp(SpecificData.CLASS_PROP)); + } + break; + case STRING: + // GenericDatumWriter#findStringClass will use a CharSequence or a String + // for each string, so it is deterministic. + + // SpecificCompiler#getStringType will use java.lang.String, org.apache.avro.util.Utf8, + // or java.lang.CharSequence, unless SpecificData.CLASS_PROP overrides that. + if (specificClassStr != null) { + Class specificClass; + try { + specificClass = ClassUtils.forName(specificClassStr); + if (!DETERMINISTIC_STRINGABLE_CLASSES.contains(specificClass)) { + reportError( + context, + "Specific class %s is not known to be deterministic", + specificClassStr); + } + } catch (ClassNotFoundException e) { + reportError( + context, "Specific class %s is not known to be deterministic", specificClassStr); + } + } + break; + case UNION: + for (Schema subschema : schema.getTypes()) { + checkIndexedRecord(subschema.getName(), subschema, null); + } + break; + case BOOLEAN: + case BYTES: + case DOUBLE: + case INT: + case FLOAT: + case LONG: + case NULL: + // For types that Avro encodes using one of the above primitives, we assume they are + // deterministic. + break; + default: + reportError(context, "Unknown schema type %s may be non-deterministic", schema.getType()); + break; + } + + activeSchemas.remove(schema); + } + + private void checkMap(String context, TypeDescriptor type, Schema schema) { + if (!isSubtypeOf(type, SortedMap.class)) { + reportError(context, "%s may not be deterministically ordered", type); + } + + // Avro (currently) asserts that all keys are strings. + // In case that changes, we double check that the key was a string: + Class keyType = type.resolveType(Map.class.getTypeParameters()[0]).getRawType(); + if (!String.class.equals(keyType)) { + reportError(context, "map keys should be Strings, but was %s", keyType); + } + + recurse(context, type.resolveType(Map.class.getTypeParameters()[1]), schema.getValueType()); + } + + private void checkArray(String context, TypeDescriptor type, Schema schema) { + TypeDescriptor elementType = null; + if (type.isArray()) { + // The type is an array (with ordering)-> deterministic iff the element is deterministic. + elementType = type.getComponentType(); + } else if (isSubtypeOf(type, Collection.class)) { + if (isSubtypeOf(type, List.class, SortedSet.class)) { + // Ordered collection -> deterministic iff the element is deterministic + elementType = type.resolveType(Collection.class.getTypeParameters()[0]); + } else { + // Not an ordered collection -> not deterministic + reportError(context, "%s may not be deterministically ordered", type); + return; + } + } else { + // If it was an unknown type encoded as an array, be conservative and assume + // that we don't know anything about the order. + reportError(context, "encoding %s as an ARRAY was unexpected", type); + return; + } + + // If we get here, it's either a deterministically-ordered Collection, or + // an array. Either way, the type is deterministic iff the element type is + // deterministic. + recurse(context, elementType, schema.getElementType()); + } + + /** + * Extract a field from a class. We need to look at the declared fields so that we can see + * private fields. We may need to walk up to the parent to get classes from the parent. + */ + private static Field getField(Class originalClazz, String name) { + Class clazz = originalClazz; + while (clazz != null) { + for (Field field : clazz.getDeclaredFields()) { + AvroName avroName = field.getAnnotation(AvroName.class); + if (avroName != null && name.equals(avroName.value())) { + return field; + } else if (avroName == null && name.equals(field.getName())) { + return field; + } + } + clazz = clazz.getSuperclass(); + } + + throw new IllegalArgumentException("Unable to get field " + name + " from " + originalClazz); + } + } + + @Override + public boolean equals(@Nullable Object other) { + if (other == this) { + return true; + } + if (!(other instanceof AvroCoder)) { + return false; + } + AvroCoder that = (AvroCoder) other; + return Objects.equals(this.schemaSupplier.get(), that.schemaSupplier.get()) + && Objects.equals(this.typeDescriptor, that.typeDescriptor) + && this.useReflectApi == that.useReflectApi; + } + + @Override + public int hashCode() { + return Objects.hash(schemaSupplier.get(), typeDescriptor, useReflectApi); + } + + /** + * Conversion for DateTime. + * + *

This is a copy from Avro 1.8's TimestampConversion, which is renamed in Avro 1.9. Defining + * own copy gives flexibility for Beam Java SDK to work with Avro 1.8 and 1.9 at runtime. + * + * @see BEAM-9144: Beam's own Avro + * TimeConversion class in beam-sdk-java-core + */ + public static class JodaTimestampConversion extends Conversion { + @Override + public Class getConvertedType() { + return DateTime.class; + } + + @Override + public String getLogicalTypeName() { + return "timestamp-millis"; + } + + @Override + public DateTime fromLong(Long millisFromEpoch, Schema schema, LogicalType type) { + return new DateTime(millisFromEpoch, DateTimeZone.UTC); + } + + @Override + public Long toLong(DateTime timestamp, Schema schema, LogicalType type) { + return timestamp.getMillis(); + } + } +} diff --git a/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/coders/AvroGenericCoder.java b/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/coders/AvroGenericCoder.java new file mode 100644 index 0000000000000..46e0b9715b240 --- /dev/null +++ b/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/coders/AvroGenericCoder.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.extensions.avro.coders; + +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericRecord; + +/** AvroCoder specialisation for GenericRecord. */ +public class AvroGenericCoder extends AvroCoder { + AvroGenericCoder(Schema schema) { + super(GenericRecord.class, schema); + } + + public static AvroGenericCoder of(Schema schema) { + return new AvroGenericCoder(schema); + } +} diff --git a/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/coders/package-info.java b/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/coders/package-info.java new file mode 100644 index 0000000000000..639856878e573 --- /dev/null +++ b/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/coders/package-info.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Defines {@link org.apache.beam.sdk.coders.Coder Coders} to specify how data is encoded to and + * decoded from byte strings using Apache Avro. + */ +@DefaultAnnotation(NonNull.class) +@Experimental(Kind.EXTENSION) +package org.apache.beam.sdk.extensions.avro.coders; + +import edu.umd.cs.findbugs.annotations.DefaultAnnotation; +import org.apache.beam.sdk.annotations.Experimental; +import org.apache.beam.sdk.annotations.Experimental.Kind; +import org.checkerframework.checker.nullness.qual.NonNull; diff --git a/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/io/AvroIO.java b/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/io/AvroIO.java new file mode 100644 index 0000000000000..b159d38c603ac --- /dev/null +++ b/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/io/AvroIO.java @@ -0,0 +1,2043 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.extensions.avro.io; + +import static org.apache.beam.sdk.io.FileIO.ReadMatches.DirectoryTreatment; +import static org.apache.beam.sdk.io.ReadAllViaFileBasedSource.ReadFileRangesFnExceptionHandler; +import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument; +import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkNotNull; + +import com.google.auto.value.AutoValue; +import java.io.IOException; +import java.io.Serializable; +import java.nio.channels.Channels; +import java.nio.channels.WritableByteChannel; +import java.util.Map; +import org.apache.avro.Schema; +import org.apache.avro.file.CodecFactory; +import org.apache.avro.file.DataFileConstants; +import org.apache.avro.file.DataFileWriter; +import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.generic.IndexedRecord; +import org.apache.avro.reflect.ReflectData; +import org.apache.avro.reflect.ReflectDatumWriter; +import org.apache.beam.sdk.annotations.Experimental; +import org.apache.beam.sdk.annotations.Experimental.Kind; +import org.apache.beam.sdk.coders.CannotProvideCoderException; +import org.apache.beam.sdk.coders.Coder; +import org.apache.beam.sdk.coders.CoderRegistry; +import org.apache.beam.sdk.coders.StringUtf8Coder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; +import org.apache.beam.sdk.extensions.avro.schemas.utils.AvroUtils; +import org.apache.beam.sdk.io.DefaultFilenamePolicy; +import org.apache.beam.sdk.io.FileBasedSink; +import org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy; +import org.apache.beam.sdk.io.FileBasedSource; +import org.apache.beam.sdk.io.FileIO; +import org.apache.beam.sdk.io.FileIO.MatchConfiguration; +import org.apache.beam.sdk.io.FileIO.ReadableFile; +import org.apache.beam.sdk.io.FileSystems; +import org.apache.beam.sdk.io.ReadAllViaFileBasedSource; +import org.apache.beam.sdk.io.ShardNameTemplate; +import org.apache.beam.sdk.io.WriteFiles; +import org.apache.beam.sdk.io.WriteFilesResult; +import org.apache.beam.sdk.io.fs.EmptyMatchTreatment; +import org.apache.beam.sdk.io.fs.ResourceId; +import org.apache.beam.sdk.options.ValueProvider; +import org.apache.beam.sdk.options.ValueProvider.NestedValueProvider; +import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider; +import org.apache.beam.sdk.transforms.Create; +import org.apache.beam.sdk.transforms.PTransform; +import org.apache.beam.sdk.transforms.SerializableFunction; +import org.apache.beam.sdk.transforms.SerializableFunctions; +import org.apache.beam.sdk.transforms.Watch.Growth.TerminationCondition; +import org.apache.beam.sdk.transforms.display.DisplayData; +import org.apache.beam.sdk.values.PBegin; +import org.apache.beam.sdk.values.PCollection; +import org.apache.beam.sdk.values.PDone; +import org.apache.beam.sdk.values.TypeDescriptors; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Function; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.MoreObjects; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Supplier; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Suppliers; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Maps; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.joda.time.Duration; + +/** + * {@link PTransform}s for reading and writing Avro files. + * + *

Reading Avro files

+ * + *

To read a {@link PCollection} from one or more Avro files with the same schema known at + * pipeline construction time, use {@link #read}, using {@link Read#from} to specify the filename or + * filepattern to read from. If the filepatterns to be read are themselves in a {@link PCollection} + * you can use {@link FileIO} to match them and {@link AvroIO#readFiles} to read them. If the schema + * is unknown at pipeline construction time, use {@link #parseGenericRecords} or {@link + * #parseFilesGenericRecords}. + * + *

Many configuration options below apply to several or all of these transforms. + * + *

See {@link FileSystems} for information on supported file systems and filepatterns. + * + *

Filepattern expansion and watching

+ * + *

By default, the filepatterns are expanded only once. {@link Read#watchForNewFiles} or the + * combination of {@link FileIO.Match#continuously(Duration, TerminationCondition)} and {@link + * AvroIO#readFiles(Class)} allow streaming of new files matching the filepattern(s). + * + *

By default, {@link #read} prohibits filepatterns that match no files, and {@link + * AvroIO#readFiles(Class)} allows them in case the filepattern contains a glob wildcard character. + * Use {@link Read#withEmptyMatchTreatment} or {@link + * FileIO.Match#withEmptyMatchTreatment(EmptyMatchTreatment)} plus {@link AvroIO#readFiles(Class)} + * to configure this behavior. + * + *

Reading records of a known schema

+ * + *

To read specific records, such as Avro-generated classes, use {@link #read(Class)}. To read + * {@link GenericRecord GenericRecords}, use {@link #readGenericRecords(Schema)} which takes a + * {@link Schema} object, or {@link #readGenericRecords(String)} which takes an Avro schema in a + * JSON-encoded string form. An exception will be thrown if a record doesn't match the specified + * schema. Likewise, to read a {@link PCollection} of filepatterns, apply {@link FileIO} matching + * plus {@link #readFilesGenericRecords}. + * + *

For example: + * + *

{@code
+ * Pipeline p = ...;
+ *
+ * // Read Avro-generated classes from files on GCS
+ * PCollection records =
+ *     p.apply(AvroIO.read(AvroAutoGenClass.class).from("gs://my_bucket/path/to/records-*.avro"));
+ *
+ * // Read GenericRecord's of the given schema from files on GCS
+ * Schema schema = new Schema.Parser().parse(new File("schema.avsc"));
+ * PCollection records =
+ *     p.apply(AvroIO.readGenericRecords(schema)
+ *                .from("gs://my_bucket/path/to/records-*.avro"));
+ * }
+ * + *

Reading records of an unknown schema

+ * + *

To read records from files whose schema is unknown at pipeline construction time or differs + * between files, use {@link #parseGenericRecords} - in this case, you will need to specify a + * parsing function for converting each {@link GenericRecord} into a value of your custom type. + * Likewise, to read a {@link PCollection} of filepatterns with unknown schema, use {@link FileIO} + * matching plus {@link #parseFilesGenericRecords(SerializableFunction)}. + * + *

For example: + * + *

{@code
+ * Pipeline p = ...;
+ *
+ * PCollection records =
+ *     p.apply(AvroIO.parseGenericRecords(new SerializableFunction() {
+ *       public Foo apply(GenericRecord record) {
+ *         // If needed, access the schema of the record using record.getSchema()
+ *         return ...;
+ *       }
+ *     }));
+ * }
+ * + *

Reading from a {@link PCollection} of filepatterns

+ * + *
{@code
+ * Pipeline p = ...;
+ *
+ * PCollection filepatterns = p.apply(...);
+ * PCollection records =
+ *     filepatterns.apply(AvroIO.readAll(AvroAutoGenClass.class));
+ * PCollection records =
+ *     filepatterns
+ *         .apply(FileIO.matchAll())
+ *         .apply(FileIO.readMatches())
+ *         .apply(AvroIO.readFiles(AvroAutoGenClass.class));
+ * PCollection genericRecords =
+ *     filepatterns.apply(AvroIO.readGenericRecords(schema));
+ * PCollection records =
+ *     filepatterns
+ *         .apply(FileIO.matchAll())
+ *         .apply(FileIO.readMatches())
+ *         .apply(AvroIO.parseFilesGenericRecords(new SerializableFunction...);
+ * }
+ * + *

Streaming new files matching a filepattern

+ * + *
{@code
+ * Pipeline p = ...;
+ *
+ * PCollection lines = p.apply(AvroIO
+ *     .read(AvroAutoGenClass.class)
+ *     .from("gs://my_bucket/path/to/records-*.avro")
+ *     .watchForNewFiles(
+ *       // Check for new files every minute
+ *       Duration.standardMinutes(1),
+ *       // Stop watching the filepattern if no new files appear within an hour
+ *       afterTimeSinceNewOutput(Duration.standardHours(1))));
+ * }
+ * + *

Reading a very large number of files

+ * + *

If it is known that the filepattern will match a very large number of files (e.g. tens of + * thousands or more), use {@link Read#withHintMatchesManyFiles} for better performance and + * scalability. Note that it may decrease performance if the filepattern matches only a small number + * of files. + * + *

Inferring Beam schemas from Avro files

+ * + *

If you want to use SQL or schema based operations on an Avro-based PCollection, you must + * configure the read transform to infer the Beam schema and automatically setup the Beam related + * coders by doing: + * + *

{@code
+ * PCollection records =
+ *     p.apply(AvroIO.read(...).from(...).withBeamSchemas(true));
+ * }
+ * + *

Inferring Beam schemas from Avro PCollections

+ * + *

If you created an Avro-based PCollection by other means e.g. reading records from Kafka or as + * the output of another PTransform, you may be interested on making your PCollection schema-aware + * so you can use the Schema-based APIs or Beam's SqlTransform. + * + *

If you are using Avro specific records (generated classes from an Avro schema), you can + * register a schema provider for the specific Avro class to make any PCollection of these objects + * schema-aware. + * + *

{@code
+ * pipeline.getSchemaRegistry().registerSchemaProvider(AvroAutoGenClass.class, AvroAutoGenClass.getClassSchema());
+ * }
+ * + * You can also manually set an Avro-backed Schema coder for a PCollection using {@link + * AvroUtils#schemaCoder(Class, Schema)} to make it schema-aware. + * + *
{@code
+ * PCollection records = ...
+ * AvroCoder coder = (AvroCoder) users.getCoder();
+ * records.setCoder(AvroUtils.schemaCoder(coder.getType(), coder.getSchema()));
+ * }
+ * + *

If you are using GenericRecords you may need to set a specific Beam schema coder for each + * PCollection to match their internal Avro schema. + * + *

{@code
+ * org.apache.avro.Schema avroSchema = ...
+ * PCollection records = ...
+ * records.setCoder(AvroUtils.schemaCoder(avroSchema));
+ * }
+ * + *

Writing Avro files

+ * + *

To write a {@link PCollection} to one or more Avro files, use {@link Write}, using {@code + * AvroIO.write().to(String)} to specify the output filename prefix. The default {@link + * DefaultFilenamePolicy} will use this prefix, in conjunction with a {@link ShardNameTemplate} (set + * via {@link Write#withShardNameTemplate(String)}) and optional filename suffix (set via {@link + * Write#withSuffix(String)}, to generate output filenames in a sharded way. You can override this + * default write filename policy using {@link Write#to(FilenamePolicy)} to specify a custom file + * naming policy. + * + *

By default, {@link Write} produces output files that are compressed using the {@link + * org.apache.avro.file.Codec CodecFactory.snappyCodec()}. This default can be changed or overridden + * using {@link Write#withCodec}. + * + *

Writing specific or generic records

+ * + *

To write specific records, such as Avro-generated classes, use {@link #write(Class)}. To write + * {@link GenericRecord GenericRecords}, use either {@link #writeGenericRecords(Schema)} which takes + * a {@link Schema} object, or {@link #writeGenericRecords(String)} which takes a schema in a + * JSON-encoded string form. An exception will be thrown if a record doesn't match the specified + * schema. + * + *

For example: + * + *

{@code
+ * // A simple Write to a local file (only runs locally):
+ * PCollection records = ...;
+ * records.apply(AvroIO.write(AvroAutoGenClass.class).to("/path/to/file.avro"));
+ *
+ * // A Write to a sharded GCS file (runs locally and using remote execution):
+ * Schema schema = new Schema.Parser().parse(new File("schema.avsc"));
+ * PCollection records = ...;
+ * records.apply("WriteToAvro", AvroIO.writeGenericRecords(schema)
+ *     .to("gs://my_bucket/path/to/numbers")
+ *     .withSuffix(".avro"));
+ * }
+ * + *

Writing windowed or unbounded data

+ * + *

By default, all input is put into the global window before writing. If per-window writes are + * desired - for example, when using a streaming runner - {@link Write#withWindowedWrites()} will + * cause windowing and triggering to be preserved. When producing windowed writes with a streaming + * runner that supports triggers, the number of output shards must be set explicitly using {@link + * Write#withNumShards(int)}; some runners may set this for you to a runner-chosen value, so you may + * need not set it yourself. A {@link FilenamePolicy} must be set, and unique windows and triggers + * must produce unique filenames. + * + *

Writing data to multiple destinations

+ * + *

The following shows a more-complex example of AvroIO.Write usage, generating dynamic file + * destinations as well as a dynamic Avro schema per file. In this example, a PCollection of user + * events (e.g. actions on a website) is written out to Avro files. Each event contains the user id + * as an integer field. We want events for each user to go into a specific directory for that user, + * and each user's data should be written with a specific schema for that user; a side input is + * used, so the schema can be calculated in a different stage. + * + *

{@code
+ * // This is the user class that controls dynamic destinations for this avro write. The input to
+ * // AvroIO.Write will be UserEvent, and we will be writing GenericRecords to the file (in order
+ * // to have dynamic schemas). Everything is per userid, so we define a dynamic destination type
+ * // of Integer.
+ * class UserDynamicAvroDestinations
+ *     extends DynamicAvroDestinations {
+ *   private final PCollectionView> userToSchemaMap;
+ *   public UserDynamicAvroDestinations( PCollectionView> userToSchemaMap) {
+ *     this.userToSchemaMap = userToSchemaMap;
+ *   }
+ *   public GenericRecord formatRecord(UserEvent record) {
+ *     return formatUserRecord(record, getSchema(record.getUserId()));
+ *   }
+ *   public Schema getSchema(Integer userId) {
+ *     return new Schema.Parser().parse(sideInput(userToSchemaMap).get(userId));
+ *   }
+ *   public Integer getDestination(UserEvent record) {
+ *     return record.getUserId();
+ *   }
+ *   public Integer getDefaultDestination() {
+ *     return 0;
+ *   }
+ *   public FilenamePolicy getFilenamePolicy(Integer userId) {
+ *     return DefaultFilenamePolicy.fromParams(new Params().withBaseFilename(baseDir + "/user-"
+ *     + userId + "/events"));
+ *   }
+ *   public List> getSideInputs() {
+ *     return ImmutableList.>of(userToSchemaMap);
+ *   }
+ * }
+ * PCollection events = ...;
+ * PCollectionView> userToSchemaMap = events.apply(
+ *     "ComputePerUserSchemas", new ComputePerUserSchemas());
+ * events.apply("WriteAvros", AvroIO.writeCustomTypeToGenericRecords()
+ *     .to(new UserDynamicAvroDestinations(userToSchemaMap)));
+ * }
+ */ +@SuppressWarnings({ + "nullness" // TODO(https://github.com/apache/beam/issues/20497) +}) +public class AvroIO { + /** + * Reads records of the given type from an Avro file (or multiple Avro files matching a pattern). + * + *

The schema must be specified using one of the {@code withSchema} functions. + */ + public static Read read(Class recordClass) { + return new AutoValue_AvroIO_Read.Builder() + .setMatchConfiguration(MatchConfiguration.create(EmptyMatchTreatment.DISALLOW)) + .setRecordClass(recordClass) + .setSchema(ReflectData.get().getSchema(recordClass)) + .setInferBeamSchema(false) + .setHintMatchesManyFiles(false) + .build(); + } + + /** + * Like {@link #read}, but reads each file in a {@link PCollection} of {@link ReadableFile}, + * returned by {@link FileIO#readMatches}. + * + *

You can read {@link GenericRecord} by using {@code #readFiles(GenericRecord.class)} or + * {@code #readFiles(new Schema.Parser().parse(schema))} if the schema is a String. + */ + public static ReadFiles readFiles(Class recordClass) { + return new AutoValue_AvroIO_ReadFiles.Builder() + .setRecordClass(recordClass) + .setSchema(ReflectData.get().getSchema(recordClass)) + .setInferBeamSchema(false) + .setDesiredBundleSizeBytes(DEFAULT_BUNDLE_SIZE_BYTES) + .setUsesReshuffle(ReadAllViaFileBasedSource.DEFAULT_USES_RESHUFFLE) + .setFileExceptionHandler(new ReadFileRangesFnExceptionHandler()) + .build(); + } + + /** + * Like {@link #read}, but reads each filepattern in the input {@link PCollection}. + * + * @deprecated You can achieve The functionality of {@link #readAll} using {@link FileIO} matching + * plus {@link #readFiles(Class)}. This is the preferred method to make composition explicit. + * {@link ReadAll} will not receive upgrades and will be removed in a future version of Beam. + */ + @Deprecated + public static ReadAll readAll(Class recordClass) { + return new AutoValue_AvroIO_ReadAll.Builder() + .setMatchConfiguration(MatchConfiguration.create(EmptyMatchTreatment.ALLOW_IF_WILDCARD)) + .setRecordClass(recordClass) + .setSchema(ReflectData.get().getSchema(recordClass)) + .setInferBeamSchema(false) + .setDesiredBundleSizeBytes(DEFAULT_BUNDLE_SIZE_BYTES) + .build(); + } + + /** Reads Avro file(s) containing records of the specified schema. */ + public static Read readGenericRecords(Schema schema) { + return new AutoValue_AvroIO_Read.Builder() + .setMatchConfiguration(MatchConfiguration.create(EmptyMatchTreatment.DISALLOW)) + .setRecordClass(GenericRecord.class) + .setSchema(schema) + .setInferBeamSchema(false) + .setHintMatchesManyFiles(false) + .build(); + } + + /** + * Like {@link #readGenericRecords(Schema)}, but for a {@link PCollection} of {@link + * ReadableFile}, for example, returned by {@link FileIO#readMatches}. + */ + public static ReadFiles readFilesGenericRecords(Schema schema) { + return new AutoValue_AvroIO_ReadFiles.Builder() + .setRecordClass(GenericRecord.class) + .setSchema(schema) + .setInferBeamSchema(false) + .setDesiredBundleSizeBytes(DEFAULT_BUNDLE_SIZE_BYTES) + .setUsesReshuffle(ReadAllViaFileBasedSource.DEFAULT_USES_RESHUFFLE) + .setFileExceptionHandler(new ReadFileRangesFnExceptionHandler()) + .build(); + } + + /** + * Like {@link #readGenericRecords(Schema)}, but for a {@link PCollection} of {@link + * ReadableFile}, for example, returned by {@link FileIO#readMatches}. + * + * @deprecated You can achieve The functionality of {@link #readAllGenericRecords(Schema)} using + * {@link FileIO} matching plus {@link #readFilesGenericRecords(Schema)}. This is the + * preferred method to make composition explicit. {@link ReadAll} will not receive upgrades + * and will be removed in a future version of Beam. + */ + @Deprecated + public static ReadAll readAllGenericRecords(Schema schema) { + return new AutoValue_AvroIO_ReadAll.Builder() + .setMatchConfiguration(MatchConfiguration.create(EmptyMatchTreatment.ALLOW_IF_WILDCARD)) + .setRecordClass(GenericRecord.class) + .setSchema(schema) + .setInferBeamSchema(false) + .setDesiredBundleSizeBytes(DEFAULT_BUNDLE_SIZE_BYTES) + .build(); + } + + /** + * Reads Avro file(s) containing records of the specified schema. The schema is specified as a + * JSON-encoded string. + */ + public static Read readGenericRecords(String schema) { + return readGenericRecords(new Schema.Parser().parse(schema)); + } + + /** Like {@link #readGenericRecords(String)}, but for {@link ReadableFile} collections. */ + public static ReadFiles readFilesGenericRecords(String schema) { + return readFilesGenericRecords(new Schema.Parser().parse(schema)); + } + + /** + * Like {@link #readGenericRecords(String)}, but reads each filepattern in the input {@link + * PCollection}. + * + * @deprecated You can achieve The functionality of {@link #readAllGenericRecords(String)} using + * {@link FileIO} matching plus {@link #readFilesGenericRecords(String)}. This is the + * preferred method to make composition explicit. {@link ReadAll} will not receive upgrades + * and will be removed in a future version of Beam. + */ + @Deprecated + public static ReadAll readAllGenericRecords(String schema) { + return readAllGenericRecords(new Schema.Parser().parse(schema)); + } + + /** + * Reads Avro file(s) containing records of an unspecified schema and converting each record to a + * custom type. + */ + public static Parse parseGenericRecords(SerializableFunction parseFn) { + return new AutoValue_AvroIO_Parse.Builder() + .setMatchConfiguration(MatchConfiguration.create(EmptyMatchTreatment.DISALLOW)) + .setParseFn(parseFn) + .setHintMatchesManyFiles(false) + .build(); + } + + /** + * Like {@link #parseGenericRecords(SerializableFunction)}, but reads each {@link ReadableFile} in + * the input {@link PCollection}. + */ + public static ParseFiles parseFilesGenericRecords( + SerializableFunction parseFn) { + return new AutoValue_AvroIO_ParseFiles.Builder() + .setParseFn(parseFn) + .setDesiredBundleSizeBytes(DEFAULT_BUNDLE_SIZE_BYTES) + .setUsesReshuffle(ReadAllViaFileBasedSource.DEFAULT_USES_RESHUFFLE) + .setFileExceptionHandler(new ReadFileRangesFnExceptionHandler()) + .build(); + } + + /** + * Like {@link #parseGenericRecords(SerializableFunction)}, but reads each filepattern in the + * input {@link PCollection}. + * + * @deprecated You can achieve The functionality of {@link + * #parseAllGenericRecords(SerializableFunction)} using {@link FileIO} matching plus {@link + * #parseFilesGenericRecords(SerializableFunction)} ()}. This is the preferred method to make + * composition explicit. {@link ParseAll} will not receive upgrades and will be removed in a + * future version of Beam. + */ + @Deprecated + public static ParseAll parseAllGenericRecords( + SerializableFunction parseFn) { + return new AutoValue_AvroIO_ParseAll.Builder() + .setMatchConfiguration(MatchConfiguration.create(EmptyMatchTreatment.ALLOW_IF_WILDCARD)) + .setParseFn(parseFn) + .setDesiredBundleSizeBytes(DEFAULT_BUNDLE_SIZE_BYTES) + .build(); + } + + /** + * Writes a {@link PCollection} to an Avro file (or multiple Avro files matching a sharding + * pattern). + */ + public static Write write(Class recordClass) { + return new Write<>( + AvroIO.defaultWriteBuilder() + .setGenericRecords(false) + .setSchema(ReflectData.get().getSchema(recordClass)) + .build()); + } + + /** Writes Avro records of the specified schema. */ + public static Write writeGenericRecords(Schema schema) { + return new Write<>( + AvroIO.defaultWriteBuilder() + .setGenericRecords(true) + .setSchema(schema) + .build()); + } + + /** + * A {@link PTransform} that writes a {@link PCollection} to an avro file (or multiple avro files + * matching a sharding pattern), with each element of the input collection encoded into its own + * record of type OutputT. + * + *

This version allows you to apply {@link AvroIO} writes to a PCollection of a custom type + * {@link UserT}. A format mechanism that converts the input type {@link UserT} to the output type + * that will be written to the file must be specified. If using a custom {@link + * DynamicAvroDestinations} object this is done using {@link + * DynamicAvroDestinations#formatRecord}, otherwise the {@link TypedWrite#withFormatFunction} can + * be used to specify a format function. + * + *

The advantage of using a custom type is that is it allows a user-provided {@link + * DynamicAvroDestinations} object, set via {@link Write#to(DynamicAvroDestinations)} to examine + * the custom type when choosing a destination. + * + *

If the output type is {@link GenericRecord} use {@link #writeCustomTypeToGenericRecords()} + * instead. + */ + public static TypedWrite writeCustomType() { + return AvroIO.defaultWriteBuilder().setGenericRecords(false).build(); + } + + /** + * Similar to {@link #writeCustomType()}, but specialized for the case where the output type is + * {@link GenericRecord}. A schema must be specified either in {@link + * DynamicAvroDestinations#getSchema} or if not using dynamic destinations, by using {@link + * TypedWrite#withSchema(Schema)}. + */ + public static TypedWrite writeCustomTypeToGenericRecords() { + return AvroIO.defaultWriteBuilder().setGenericRecords(true).build(); + } + + /** + * Writes Avro records of the specified schema. The schema is specified as a JSON-encoded string. + */ + public static Write writeGenericRecords(String schema) { + return writeGenericRecords(new Schema.Parser().parse(schema)); + } + + private static TypedWrite.Builder defaultWriteBuilder() { + return new AutoValue_AvroIO_TypedWrite.Builder() + .setFilenameSuffix(null) + .setShardTemplate(null) + .setNumShards(0) + .setCodec(TypedWrite.DEFAULT_SERIALIZABLE_CODEC) + .setMetadata(ImmutableMap.of()) + .setWindowedWrites(false) + .setNoSpilling(false) + .setSyncInterval(DataFileConstants.DEFAULT_SYNC_INTERVAL); + } + + @Experimental(Kind.SCHEMAS) + private static PCollection setBeamSchema( + PCollection pc, Class clazz, @Nullable Schema schema) { + return pc.setCoder(AvroUtils.schemaCoder(clazz, schema)); + } + + /** + * 64MB is a reasonable value that allows to amortize the cost of opening files, but is not so + * large as to exhaust a typical runner's maximum amount of output per ProcessElement call. + */ + private static final long DEFAULT_BUNDLE_SIZE_BYTES = 64 * 1024 * 1024L; + + /** Implementation of {@link #read} and {@link #readGenericRecords}. */ + @AutoValue + public abstract static class Read extends PTransform> { + + abstract @Nullable ValueProvider getFilepattern(); + + abstract MatchConfiguration getMatchConfiguration(); + + abstract @Nullable Class getRecordClass(); + + abstract @Nullable Schema getSchema(); + + abstract boolean getInferBeamSchema(); + + abstract boolean getHintMatchesManyFiles(); + + abstract Builder toBuilder(); + + @AutoValue.Builder + abstract static class Builder { + abstract Builder setFilepattern(ValueProvider filepattern); + + abstract Builder setMatchConfiguration(MatchConfiguration matchConfiguration); + + abstract Builder setRecordClass(Class recordClass); + + abstract Builder setSchema(Schema schema); + + abstract Builder setInferBeamSchema(boolean infer); + + abstract Builder setHintMatchesManyFiles(boolean hintManyFiles); + + abstract Read build(); + } + + /** + * Reads from the given filename or filepattern. + * + *

If it is known that the filepattern will match a very large number of files (at least tens + * of thousands), use {@link #withHintMatchesManyFiles} for better performance and scalability. + */ + public Read from(ValueProvider filepattern) { + return toBuilder().setFilepattern(filepattern).build(); + } + + /** Like {@link #from(ValueProvider)}. */ + public Read from(String filepattern) { + return from(StaticValueProvider.of(filepattern)); + } + + /** Sets the {@link MatchConfiguration}. */ + public Read withMatchConfiguration(MatchConfiguration matchConfiguration) { + return toBuilder().setMatchConfiguration(matchConfiguration).build(); + } + + /** Configures whether or not a filepattern matching no files is allowed. */ + public Read withEmptyMatchTreatment(EmptyMatchTreatment treatment) { + return withMatchConfiguration(getMatchConfiguration().withEmptyMatchTreatment(treatment)); + } + + /** + * Continuously watches for new files matching the filepattern, polling it at the given + * interval, until the given termination condition is reached. The returned {@link PCollection} + * is unbounded. If {@code matchUpdatedFiles} is set, also watches for files with timestamp + * change. + * + *

This works only in runners supporting splittable {@link + * org.apache.beam.sdk.transforms.DoFn}. + */ + public Read watchForNewFiles( + Duration pollInterval, + TerminationCondition terminationCondition, + boolean matchUpdatedFiles) { + return withMatchConfiguration( + getMatchConfiguration() + .continuously(pollInterval, terminationCondition, matchUpdatedFiles)); + } + + /** + * Same as {@link Read#watchForNewFiles(Duration, TerminationCondition, boolean)} with {@code + * matchUpdatedFiles=false}. + */ + public Read watchForNewFiles( + Duration pollInterval, TerminationCondition terminationCondition) { + return watchForNewFiles(pollInterval, terminationCondition, false); + } + + /** + * Hints that the filepattern specified in {@link #from(String)} matches a very large number of + * files. + * + *

This hint may cause a runner to execute the transform differently, in a way that improves + * performance for this case, but it may worsen performance if the filepattern matches only a + * small number of files (e.g., in a runner that supports dynamic work rebalancing, it will + * happen less efficiently within individual files). + */ + public Read withHintMatchesManyFiles() { + return toBuilder().setHintMatchesManyFiles(true).build(); + } + + /** + * If set to true, a Beam schema will be inferred from the AVRO schema. This allows the output + * to be used by SQL and by the schema-transform library. + */ + @Experimental(Kind.SCHEMAS) + public Read withBeamSchemas(boolean withBeamSchemas) { + return toBuilder().setInferBeamSchema(withBeamSchemas).build(); + } + + @Override + @SuppressWarnings("unchecked") + public PCollection expand(PBegin input) { + checkNotNull(getFilepattern(), "filepattern"); + checkNotNull(getSchema(), "schema"); + + if (getMatchConfiguration().getWatchInterval() == null && !getHintMatchesManyFiles()) { + PCollection read = + input.apply( + "Read", + org.apache.beam.sdk.io.Read.from( + createSource( + getFilepattern(), + getMatchConfiguration().getEmptyMatchTreatment(), + getRecordClass(), + getSchema(), + null))); + return getInferBeamSchema() ? setBeamSchema(read, getRecordClass(), getSchema()) : read; + } + + // All other cases go through FileIO + ReadFiles + ReadFiles readFiles = + (getRecordClass() == GenericRecord.class) + ? (ReadFiles) readFilesGenericRecords(getSchema()) + : readFiles(getRecordClass()); + return input + .apply("Create filepattern", Create.ofProvider(getFilepattern(), StringUtf8Coder.of())) + .apply("Match All", FileIO.matchAll().withConfiguration(getMatchConfiguration())) + .apply( + "Read Matches", + FileIO.readMatches().withDirectoryTreatment(DirectoryTreatment.PROHIBIT)) + .apply("Via ReadFiles", readFiles); + } + + @Override + public void populateDisplayData(DisplayData.Builder builder) { + super.populateDisplayData(builder); + builder + .add( + DisplayData.item("inferBeamSchema", getInferBeamSchema()) + .withLabel("Infer Beam Schema")) + .addIfNotNull(DisplayData.item("schema", String.valueOf(getSchema()))) + .addIfNotNull(DisplayData.item("recordClass", getRecordClass()).withLabel("Record Class")) + .addIfNotNull( + DisplayData.item("filePattern", getFilepattern()).withLabel("Input File Pattern")) + .include("matchConfiguration", getMatchConfiguration()); + } + + @SuppressWarnings("unchecked") + private static AvroSource createSource( + ValueProvider filepattern, + EmptyMatchTreatment emptyMatchTreatment, + Class recordClass, + Schema schema, + AvroSource.@Nullable DatumReaderFactory readerFactory) { + AvroSource source = + AvroSource.from(filepattern).withEmptyMatchTreatment(emptyMatchTreatment); + + if (readerFactory != null) { + source = source.withDatumReaderFactory(readerFactory); + } + return recordClass == GenericRecord.class + ? (AvroSource) source.withSchema(schema) + : source.withSchema(recordClass); + } + } + + ///////////////////////////////////////////////////////////////////////////// + + /** Implementation of {@link #readFiles}. */ + @AutoValue + public abstract static class ReadFiles + extends PTransform, PCollection> { + + abstract @Nullable Class getRecordClass(); + + abstract @Nullable Schema getSchema(); + + abstract boolean getUsesReshuffle(); + + abstract ReadFileRangesFnExceptionHandler getFileExceptionHandler(); + + abstract long getDesiredBundleSizeBytes(); + + abstract boolean getInferBeamSchema(); + + abstract AvroSource.@Nullable DatumReaderFactory getDatumReaderFactory(); + + abstract Builder toBuilder(); + + @AutoValue.Builder + abstract static class Builder { + abstract Builder setRecordClass(Class recordClass); + + abstract Builder setSchema(Schema schema); + + abstract Builder setUsesReshuffle(boolean usesReshuffle); + + abstract Builder setFileExceptionHandler( + ReadFileRangesFnExceptionHandler exceptionHandler); + + abstract Builder setDesiredBundleSizeBytes(long desiredBundleSizeBytes); + + abstract Builder setInferBeamSchema(boolean infer); + + abstract Builder setDatumReaderFactory(AvroSource.DatumReaderFactory factory); + + abstract ReadFiles build(); + } + + @VisibleForTesting + ReadFiles withDesiredBundleSizeBytes(long desiredBundleSizeBytes) { + return toBuilder().setDesiredBundleSizeBytes(desiredBundleSizeBytes).build(); + } + + /** Specifies if a Reshuffle should run before file reads occur. */ + @Experimental(Kind.FILESYSTEM) + public ReadFiles withUsesReshuffle(boolean usesReshuffle) { + return toBuilder().setUsesReshuffle(usesReshuffle).build(); + } + + /** Specifies if exceptions should be logged only for streaming pipelines. */ + @Experimental(Kind.FILESYSTEM) + public ReadFiles withFileExceptionHandler( + ReadFileRangesFnExceptionHandler exceptionHandler) { + return toBuilder().setFileExceptionHandler(exceptionHandler).build(); + } + + /** + * If set to true, a Beam schema will be inferred from the AVRO schema. This allows the output + * to be used by SQL and by the schema-transform library. + */ + @Experimental(Kind.SCHEMAS) + public ReadFiles withBeamSchemas(boolean withBeamSchemas) { + return toBuilder().setInferBeamSchema(withBeamSchemas).build(); + } + + public ReadFiles withDatumReaderFactory(AvroSource.DatumReaderFactory factory) { + return toBuilder().setDatumReaderFactory(factory).build(); + } + + @Override + public PCollection expand(PCollection input) { + checkNotNull(getSchema(), "schema"); + PCollection read = + input.apply( + "Read all via FileBasedSource", + new ReadAllViaFileBasedSource<>( + getDesiredBundleSizeBytes(), + new CreateSourceFn<>( + getRecordClass(), getSchema().toString(), getDatumReaderFactory()), + AvroCoder.of(getRecordClass(), getSchema()), + getUsesReshuffle(), + getFileExceptionHandler())); + return getInferBeamSchema() ? setBeamSchema(read, getRecordClass(), getSchema()) : read; + } + + @Override + public void populateDisplayData(DisplayData.Builder builder) { + super.populateDisplayData(builder); + builder + .add( + DisplayData.item("inferBeamSchema", getInferBeamSchema()) + .withLabel("Infer Beam Schema")) + .addIfNotNull(DisplayData.item("schema", String.valueOf(getSchema()))) + .addIfNotNull( + DisplayData.item("recordClass", getRecordClass()).withLabel("Record Class")); + } + } + + ///////////////////////////////////////////////////////////////////////////// + + /** + * Implementation of {@link #readAll}. + * + * @deprecated See {@link #readAll(Class)} for details. + */ + @Deprecated + @AutoValue + public abstract static class ReadAll extends PTransform, PCollection> { + abstract MatchConfiguration getMatchConfiguration(); + + abstract @Nullable Class getRecordClass(); + + abstract @Nullable Schema getSchema(); + + abstract long getDesiredBundleSizeBytes(); + + abstract boolean getInferBeamSchema(); + + abstract Builder toBuilder(); + + @AutoValue.Builder + abstract static class Builder { + abstract Builder setMatchConfiguration(MatchConfiguration matchConfiguration); + + abstract Builder setRecordClass(Class recordClass); + + abstract Builder setSchema(Schema schema); + + abstract Builder setDesiredBundleSizeBytes(long desiredBundleSizeBytes); + + abstract Builder setInferBeamSchema(boolean infer); + + abstract ReadAll build(); + } + + /** Sets the {@link MatchConfiguration}. */ + public ReadAll withMatchConfiguration(MatchConfiguration configuration) { + return toBuilder().setMatchConfiguration(configuration).build(); + } + + /** Like {@link Read#withEmptyMatchTreatment}. */ + public ReadAll withEmptyMatchTreatment(EmptyMatchTreatment treatment) { + return withMatchConfiguration(getMatchConfiguration().withEmptyMatchTreatment(treatment)); + } + + /** Like {@link Read#watchForNewFiles}. */ + public ReadAll watchForNewFiles( + Duration pollInterval, TerminationCondition terminationCondition) { + return withMatchConfiguration( + getMatchConfiguration().continuously(pollInterval, terminationCondition)); + } + + @VisibleForTesting + ReadAll withDesiredBundleSizeBytes(long desiredBundleSizeBytes) { + return toBuilder().setDesiredBundleSizeBytes(desiredBundleSizeBytes).build(); + } + + /** + * If set to true, a Beam schema will be inferred from the AVRO schema. This allows the output + * to be used by SQL and by the schema-transform library. + */ + @Experimental(Kind.SCHEMAS) + public ReadAll withBeamSchemas(boolean withBeamSchemas) { + return toBuilder().setInferBeamSchema(withBeamSchemas).build(); + } + + @Override + public PCollection expand(PCollection input) { + checkNotNull(getSchema(), "schema"); + PCollection read = + input + .apply(FileIO.matchAll().withConfiguration(getMatchConfiguration())) + .apply(FileIO.readMatches().withDirectoryTreatment(DirectoryTreatment.PROHIBIT)) + .apply(readFiles(getRecordClass())); + return getInferBeamSchema() ? setBeamSchema(read, getRecordClass(), getSchema()) : read; + } + + @Override + public void populateDisplayData(DisplayData.Builder builder) { + super.populateDisplayData(builder); + builder + .add( + DisplayData.item("inferBeamSchema", getInferBeamSchema()) + .withLabel("Infer Beam Schema")) + .addIfNotNull(DisplayData.item("schema", String.valueOf(getSchema()))) + .addIfNotNull(DisplayData.item("recordClass", getRecordClass()).withLabel("Record Class")) + .include("matchConfiguration", getMatchConfiguration()); + } + } + + private static class CreateSourceFn + implements SerializableFunction> { + private final Class recordClass; + private final Supplier schemaSupplier; + private final AvroSource.DatumReaderFactory readerFactory; + + CreateSourceFn( + Class recordClass, String jsonSchema, AvroSource.DatumReaderFactory readerFactory) { + this.recordClass = recordClass; + this.schemaSupplier = + Suppliers.memoize( + Suppliers.compose(new JsonToSchema(), Suppliers.ofInstance(jsonSchema))); + this.readerFactory = readerFactory; + } + + @Override + public FileBasedSource apply(String input) { + return Read.createSource( + StaticValueProvider.of(input), + EmptyMatchTreatment.DISALLOW, + recordClass, + schemaSupplier.get(), + readerFactory); + } + + private static class JsonToSchema implements Function, Serializable { + @Override + public Schema apply(String input) { + return new Schema.Parser().parse(input); + } + } + } + + ///////////////////////////////////////////////////////////////////////////// + + /** Implementation of {@link #parseGenericRecords}. */ + @AutoValue + public abstract static class Parse extends PTransform> { + + abstract @Nullable ValueProvider getFilepattern(); + + abstract MatchConfiguration getMatchConfiguration(); + + abstract SerializableFunction getParseFn(); + + abstract @Nullable Coder getCoder(); + + abstract boolean getHintMatchesManyFiles(); + + abstract Builder toBuilder(); + + @AutoValue.Builder + abstract static class Builder { + abstract Builder setFilepattern(ValueProvider filepattern); + + abstract Builder setMatchConfiguration(MatchConfiguration matchConfiguration); + + abstract Builder setParseFn(SerializableFunction parseFn); + + abstract Builder setCoder(Coder coder); + + abstract Builder setHintMatchesManyFiles(boolean hintMatchesManyFiles); + + abstract Parse build(); + } + + /** Reads from the given filename or filepattern. */ + public Parse from(String filepattern) { + return from(StaticValueProvider.of(filepattern)); + } + + /** Like {@link #from(String)}. */ + public Parse from(ValueProvider filepattern) { + return toBuilder().setFilepattern(filepattern).build(); + } + + /** Sets the {@link MatchConfiguration}. */ + public Parse withMatchConfiguration(MatchConfiguration configuration) { + return toBuilder().setMatchConfiguration(configuration).build(); + } + + /** Like {@link Read#withEmptyMatchTreatment}. */ + public Parse withEmptyMatchTreatment(EmptyMatchTreatment treatment) { + return withMatchConfiguration(getMatchConfiguration().withEmptyMatchTreatment(treatment)); + } + + /** Like {@link Read#watchForNewFiles}. */ + public Parse watchForNewFiles( + Duration pollInterval, TerminationCondition terminationCondition) { + return withMatchConfiguration( + getMatchConfiguration().continuously(pollInterval, terminationCondition)); + } + + /** Sets a coder for the result of the parse function. */ + public Parse withCoder(Coder coder) { + return toBuilder().setCoder(coder).build(); + } + + /** Like {@link Read#withHintMatchesManyFiles()}. */ + public Parse withHintMatchesManyFiles() { + return toBuilder().setHintMatchesManyFiles(true).build(); + } + + @Override + public PCollection expand(PBegin input) { + checkNotNull(getFilepattern(), "filepattern"); + Coder coder = inferCoder(getCoder(), getParseFn(), input.getPipeline().getCoderRegistry()); + + if (getMatchConfiguration().getWatchInterval() == null && !getHintMatchesManyFiles()) { + return input.apply( + org.apache.beam.sdk.io.Read.from( + AvroSource.from(getFilepattern()).withParseFn(getParseFn(), coder))); + } + + // All other cases go through FileIO + ParseFilesGenericRecords. + return input + .apply("Create filepattern", Create.ofProvider(getFilepattern(), StringUtf8Coder.of())) + .apply("Match All", FileIO.matchAll().withConfiguration(getMatchConfiguration())) + .apply( + "Read Matches", + FileIO.readMatches().withDirectoryTreatment(DirectoryTreatment.PROHIBIT)) + .apply("Via ParseFiles", parseFilesGenericRecords(getParseFn()).withCoder(coder)); + } + + private static Coder inferCoder( + @Nullable Coder explicitCoder, + SerializableFunction parseFn, + CoderRegistry coderRegistry) { + if (explicitCoder != null) { + return explicitCoder; + } + // If a coder was not specified explicitly, infer it from parse fn. + try { + return coderRegistry.getCoder(TypeDescriptors.outputOf(parseFn)); + } catch (CannotProvideCoderException e) { + throw new IllegalArgumentException( + "Unable to infer coder for output of parseFn. Specify it explicitly using withCoder().", + e); + } + } + + @Override + public void populateDisplayData(DisplayData.Builder builder) { + super.populateDisplayData(builder); + builder + .addIfNotNull( + DisplayData.item("filePattern", getFilepattern()).withLabel("Input File Pattern")) + .add(DisplayData.item("parseFn", getParseFn().getClass()).withLabel("Parse function")) + .include("matchConfiguration", getMatchConfiguration()); + } + } + + ///////////////////////////////////////////////////////////////////////////// + + /** Implementation of {@link #parseFilesGenericRecords}. */ + @AutoValue + public abstract static class ParseFiles + extends PTransform, PCollection> { + abstract SerializableFunction getParseFn(); + + abstract @Nullable Coder getCoder(); + + abstract boolean getUsesReshuffle(); + + abstract ReadFileRangesFnExceptionHandler getFileExceptionHandler(); + + abstract long getDesiredBundleSizeBytes(); + + abstract Builder toBuilder(); + + @AutoValue.Builder + abstract static class Builder { + abstract Builder setParseFn(SerializableFunction parseFn); + + abstract Builder setCoder(Coder coder); + + abstract Builder setUsesReshuffle(boolean usesReshuffle); + + abstract Builder setFileExceptionHandler( + ReadFileRangesFnExceptionHandler exceptionHandler); + + abstract Builder setDesiredBundleSizeBytes(long desiredBundleSizeBytes); + + abstract ParseFiles build(); + } + + /** Specifies the coder for the result of the {@code parseFn}. */ + public ParseFiles withCoder(Coder coder) { + return toBuilder().setCoder(coder).build(); + } + + /** Specifies if a Reshuffle should run before file reads occur. */ + @Experimental(Kind.FILESYSTEM) + public ParseFiles withUsesReshuffle(boolean usesReshuffle) { + return toBuilder().setUsesReshuffle(usesReshuffle).build(); + } + + /** Specifies if exceptions should be logged only for streaming pipelines. */ + @Experimental(Kind.FILESYSTEM) + public ParseFiles withFileExceptionHandler( + ReadFileRangesFnExceptionHandler exceptionHandler) { + return toBuilder().setFileExceptionHandler(exceptionHandler).build(); + } + + @VisibleForTesting + ParseFiles withDesiredBundleSizeBytes(long desiredBundleSizeBytes) { + return toBuilder().setDesiredBundleSizeBytes(desiredBundleSizeBytes).build(); + } + + @Override + public PCollection expand(PCollection input) { + final Coder coder = + Parse.inferCoder(getCoder(), getParseFn(), input.getPipeline().getCoderRegistry()); + final SerializableFunction parseFn = getParseFn(); + final SerializableFunction> createSource = + new CreateParseSourceFn<>(parseFn, coder); + return input.apply( + "Parse Files via FileBasedSource", + new ReadAllViaFileBasedSource<>( + getDesiredBundleSizeBytes(), + createSource, + coder, + getUsesReshuffle(), + getFileExceptionHandler())); + } + + @Override + public void populateDisplayData(DisplayData.Builder builder) { + super.populateDisplayData(builder); + builder.add(DisplayData.item("parseFn", getParseFn().getClass()).withLabel("Parse function")); + } + + private static class CreateParseSourceFn + implements SerializableFunction> { + private final SerializableFunction parseFn; + private final Coder coder; + + CreateParseSourceFn(SerializableFunction parseFn, Coder coder) { + this.parseFn = parseFn; + this.coder = coder; + } + + @Override + public FileBasedSource apply(String input) { + return AvroSource.from(input).withParseFn(parseFn, coder); + } + } + } + + ///////////////////////////////////////////////////////////////////////////// + + /** + * Implementation of {@link #parseAllGenericRecords}. + * + * @deprecated See {@link #parseAllGenericRecords(SerializableFunction)} for details. + */ + @Deprecated + @AutoValue + public abstract static class ParseAll extends PTransform, PCollection> { + abstract MatchConfiguration getMatchConfiguration(); + + abstract SerializableFunction getParseFn(); + + abstract @Nullable Coder getCoder(); + + abstract long getDesiredBundleSizeBytes(); + + abstract Builder toBuilder(); + + @AutoValue.Builder + abstract static class Builder { + abstract Builder setMatchConfiguration(MatchConfiguration matchConfiguration); + + abstract Builder setParseFn(SerializableFunction parseFn); + + abstract Builder setCoder(Coder coder); + + abstract Builder setDesiredBundleSizeBytes(long desiredBundleSizeBytes); + + abstract ParseAll build(); + } + + /** Sets the {@link MatchConfiguration}. */ + public ParseAll withMatchConfiguration(MatchConfiguration configuration) { + return toBuilder().setMatchConfiguration(configuration).build(); + } + + /** Like {@link Read#withEmptyMatchTreatment}. */ + public ParseAll withEmptyMatchTreatment(EmptyMatchTreatment treatment) { + return withMatchConfiguration(getMatchConfiguration().withEmptyMatchTreatment(treatment)); + } + + /** Like {@link Read#watchForNewFiles(Duration, TerminationCondition, boolean)}. */ + public ParseAll watchForNewFiles( + Duration pollInterval, + TerminationCondition terminationCondition, + boolean matchUpdatedFiles) { + return withMatchConfiguration( + getMatchConfiguration() + .continuously(pollInterval, terminationCondition, matchUpdatedFiles)); + } + + /** Like {@link Read#watchForNewFiles(Duration, TerminationCondition)}. */ + public ParseAll watchForNewFiles( + Duration pollInterval, TerminationCondition terminationCondition) { + return watchForNewFiles(pollInterval, terminationCondition, false); + } + + /** Specifies the coder for the result of the {@code parseFn}. */ + public ParseAll withCoder(Coder coder) { + return toBuilder().setCoder(coder).build(); + } + + @VisibleForTesting + ParseAll withDesiredBundleSizeBytes(long desiredBundleSizeBytes) { + return toBuilder().setDesiredBundleSizeBytes(desiredBundleSizeBytes).build(); + } + + @Override + public PCollection expand(PCollection input) { + return input + .apply(FileIO.matchAll().withConfiguration(getMatchConfiguration())) + .apply(FileIO.readMatches().withDirectoryTreatment(DirectoryTreatment.PROHIBIT)) + .apply( + "Parse all via FileBasedSource", + parseFilesGenericRecords(getParseFn()).withCoder(getCoder())); + } + + @Override + public void populateDisplayData(DisplayData.Builder builder) { + super.populateDisplayData(builder); + builder + .add(DisplayData.item("parseFn", getParseFn().getClass()).withLabel("Parse function")) + .include("matchConfiguration", getMatchConfiguration()); + } + } + + ///////////////////////////////////////////////////////////////////////////// + + /** Implementation of {@link #write}. */ + @AutoValue + public abstract static class TypedWrite + extends PTransform, WriteFilesResult> { + static final CodecFactory DEFAULT_CODEC = CodecFactory.snappyCodec(); + static final SerializableAvroCodecFactory DEFAULT_SERIALIZABLE_CODEC = + new SerializableAvroCodecFactory(DEFAULT_CODEC); + + abstract @Nullable SerializableFunction getFormatFunction(); + + abstract @Nullable ValueProvider getFilenamePrefix(); + + abstract @Nullable String getShardTemplate(); + + abstract @Nullable String getFilenameSuffix(); + + abstract @Nullable ValueProvider getTempDirectory(); + + abstract int getNumShards(); + + abstract boolean getGenericRecords(); + + abstract int getSyncInterval(); + + abstract @Nullable Schema getSchema(); + + abstract boolean getWindowedWrites(); + + abstract boolean getNoSpilling(); + + abstract @Nullable FilenamePolicy getFilenamePolicy(); + + abstract @Nullable DynamicAvroDestinations + getDynamicDestinations(); + + abstract AvroSink.@Nullable DatumWriterFactory getDatumWriterFactory(); + + /** + * The codec used to encode the blocks in the Avro file. String value drawn from those in + * https://avro.apache.org/docs/1.7.7/api/java/org/apache/avro/file/CodecFactory.html + */ + abstract SerializableAvroCodecFactory getCodec(); + /** Avro file metadata. */ + abstract ImmutableMap getMetadata(); + + abstract Builder toBuilder(); + + @AutoValue.Builder + abstract static class Builder { + abstract Builder setFormatFunction( + @Nullable SerializableFunction formatFunction); + + abstract Builder setFilenamePrefix( + ValueProvider filenamePrefix); + + abstract Builder setFilenameSuffix( + @Nullable String filenameSuffix); + + abstract Builder setTempDirectory( + ValueProvider tempDirectory); + + abstract Builder setNumShards(int numShards); + + abstract Builder setShardTemplate( + @Nullable String shardTemplate); + + abstract Builder setGenericRecords(boolean genericRecords); + + abstract Builder setSyncInterval(int syncInterval); + + abstract Builder setSchema(Schema schema); + + abstract Builder setWindowedWrites(boolean windowedWrites); + + abstract Builder setNoSpilling(boolean noSpilling); + + abstract Builder setFilenamePolicy( + FilenamePolicy filenamePolicy); + + abstract Builder setCodec(SerializableAvroCodecFactory codec); + + abstract Builder setMetadata( + ImmutableMap metadata); + + abstract Builder setDynamicDestinations( + DynamicAvroDestinations dynamicDestinations); + + abstract Builder setDatumWriterFactory( + AvroSink.DatumWriterFactory datumWriterFactory); + + abstract TypedWrite build(); + } + + /** + * Writes to file(s) with the given output prefix. See {@link FileSystems} for information on + * supported file systems. + * + *

The name of the output files will be determined by the {@link FilenamePolicy} used. + * + *

By default, a {@link DefaultFilenamePolicy} will build output filenames using the + * specified prefix, a shard name template (see {@link #withShardNameTemplate(String)}, and a + * common suffix (if supplied using {@link #withSuffix(String)}). This default can be overridden + * using {@link #to(FilenamePolicy)}. + */ + public TypedWrite to(String outputPrefix) { + return to(FileBasedSink.convertToFileResourceIfPossible(outputPrefix)); + } + + /** + * Writes to file(s) with the given output prefix. See {@link FileSystems} for information on + * supported file systems. This prefix is used by the {@link DefaultFilenamePolicy} to generate + * filenames. + * + *

By default, a {@link DefaultFilenamePolicy} will build output filenames using the + * specified prefix, a shard name template (see {@link #withShardNameTemplate(String)}, and a + * common suffix (if supplied using {@link #withSuffix(String)}). This default can be overridden + * using {@link #to(FilenamePolicy)}. + * + *

This default policy can be overridden using {@link #to(FilenamePolicy)}, in which case + * {@link #withShardNameTemplate(String)} and {@link #withSuffix(String)} should not be set. + * Custom filename policies do not automatically see this prefix - you should explicitly pass + * the prefix into your {@link FilenamePolicy} object if you need this. + * + *

If {@link #withTempDirectory} has not been called, this filename prefix will be used to + * infer a directory for temporary files. + */ + @Experimental(Kind.FILESYSTEM) + public TypedWrite to(ResourceId outputPrefix) { + return toResource(StaticValueProvider.of(outputPrefix)); + } + + private static class OutputPrefixToResourceId + implements SerializableFunction { + @Override + public ResourceId apply(String input) { + return FileBasedSink.convertToFileResourceIfPossible(input); + } + } + + /** Like {@link #to(String)}. */ + public TypedWrite to(ValueProvider outputPrefix) { + return toResource( + NestedValueProvider.of( + outputPrefix, + // The function cannot be created as an anonymous class here since the enclosed class + // may contain unserializable members. + new OutputPrefixToResourceId())); + } + + /** Like {@link #to(ResourceId)}. */ + @Experimental(Kind.FILESYSTEM) + public TypedWrite toResource( + ValueProvider outputPrefix) { + return toBuilder().setFilenamePrefix(outputPrefix).build(); + } + + /** + * Writes to files named according to the given {@link FilenamePolicy}. A directory for + * temporary files must be specified using {@link #withTempDirectory}. + */ + @Experimental(Kind.FILESYSTEM) + public TypedWrite to(FilenamePolicy filenamePolicy) { + return toBuilder().setFilenamePolicy(filenamePolicy).build(); + } + + /** + * Use a {@link DynamicAvroDestinations} object to vend {@link FilenamePolicy} objects. These + * objects can examine the input record when creating a {@link FilenamePolicy}. A directory for + * temporary files must be specified using {@link #withTempDirectory}. + * + * @deprecated Use {@link FileIO#write()} or {@link FileIO#writeDynamic()} instead. + */ + @Experimental(Kind.FILESYSTEM) + @Deprecated + public TypedWrite to( + DynamicAvroDestinations dynamicDestinations) { + return toBuilder() + .setDynamicDestinations((DynamicAvroDestinations) dynamicDestinations) + .build(); + } + + /** + * Sets the approximate number of uncompressed bytes to write in each block for the AVRO + * container format. + */ + public TypedWrite withSyncInterval(int syncInterval) { + return toBuilder().setSyncInterval(syncInterval).build(); + } + + /** + * Sets the output schema. Can only be used when the output type is {@link GenericRecord} and + * when not using {@link #to(DynamicAvroDestinations)}. + */ + public TypedWrite withSchema(Schema schema) { + return toBuilder().setSchema(schema).build(); + } + + /** + * Specifies a format function to convert {@link UserT} to the output type. If {@link + * #to(DynamicAvroDestinations)} is used, {@link DynamicAvroDestinations#formatRecord} must be + * used instead. + */ + public TypedWrite withFormatFunction( + @Nullable SerializableFunction formatFunction) { + return toBuilder().setFormatFunction(formatFunction).build(); + } + + /** Set the base directory used to generate temporary files. */ + @Experimental(Kind.FILESYSTEM) + public TypedWrite withTempDirectory( + ValueProvider tempDirectory) { + return toBuilder().setTempDirectory(tempDirectory).build(); + } + + /** Set the base directory used to generate temporary files. */ + @Experimental(Kind.FILESYSTEM) + public TypedWrite withTempDirectory(ResourceId tempDirectory) { + return withTempDirectory(StaticValueProvider.of(tempDirectory)); + } + + /** + * Uses the given {@link ShardNameTemplate} for naming output files. This option may only be + * used when using one of the default filename-prefix to() overrides. + * + *

See {@link DefaultFilenamePolicy} for how the prefix, shard name template, and suffix are + * used. + */ + public TypedWrite withShardNameTemplate(String shardTemplate) { + return toBuilder().setShardTemplate(shardTemplate).build(); + } + + /** + * Configures the filename suffix for written files. This option may only be used when using one + * of the default filename-prefix to() overrides. + * + *

See {@link DefaultFilenamePolicy} for how the prefix, shard name template, and suffix are + * used. + */ + public TypedWrite withSuffix(String filenameSuffix) { + return toBuilder().setFilenameSuffix(filenameSuffix).build(); + } + + /** + * Configures the number of output shards produced overall (when using unwindowed writes) or + * per-window (when using windowed writes). + * + *

For unwindowed writes, constraining the number of shards is likely to reduce the + * performance of a pipeline. Setting this value is not recommended unless you require a + * specific number of output files. + * + * @param numShards the number of shards to use, or 0 to let the system decide. + */ + public TypedWrite withNumShards(int numShards) { + checkArgument(numShards >= 0); + return toBuilder().setNumShards(numShards).build(); + } + + /** + * Forces a single file as output and empty shard name template. This option is only compatible + * with unwindowed writes. + * + *

For unwindowed writes, constraining the number of shards is likely to reduce the + * performance of a pipeline. Setting this value is not recommended unless you require a + * specific number of output files. + * + *

This is equivalent to {@code .withNumShards(1).withShardNameTemplate("")} + */ + public TypedWrite withoutSharding() { + return withNumShards(1).withShardNameTemplate(""); + } + + /** + * Preserves windowing of input elements and writes them to files based on the element's window. + * + *

If using {@link #to(FilenamePolicy)}. Filenames will be generated using {@link + * FilenamePolicy#windowedFilename}. See also {@link WriteFiles#withWindowedWrites()}. + */ + public TypedWrite withWindowedWrites() { + return toBuilder().setWindowedWrites(true).build(); + } + + /** See {@link WriteFiles#withNoSpilling()}. */ + public TypedWrite withNoSpilling() { + return toBuilder().setNoSpilling(true).build(); + } + + /** Writes to Avro file(s) compressed using specified codec. */ + public TypedWrite withCodec(CodecFactory codec) { + return toBuilder().setCodec(new SerializableAvroCodecFactory(codec)).build(); + } + + /** + * Specifies a {@link AvroSink.DatumWriterFactory} to use for creating {@link + * org.apache.avro.io.DatumWriter} instances. + */ + public TypedWrite withDatumWriterFactory( + AvroSink.DatumWriterFactory datumWriterFactory) { + return toBuilder().setDatumWriterFactory(datumWriterFactory).build(); + } + + /** + * Writes to Avro file(s) with the specified metadata. + * + *

Supported value types are String, Long, and byte[]. + */ + public TypedWrite withMetadata(Map metadata) { + Map badKeys = Maps.newLinkedHashMap(); + for (Map.Entry entry : metadata.entrySet()) { + Object v = entry.getValue(); + if (!(v instanceof String || v instanceof Long || v instanceof byte[])) { + badKeys.put(entry.getKey(), v.getClass().getSimpleName()); + } + } + checkArgument( + badKeys.isEmpty(), + "Metadata value type must be one of String, Long, or byte[]. Found %s", + badKeys); + return toBuilder().setMetadata(ImmutableMap.copyOf(metadata)).build(); + } + + DynamicAvroDestinations resolveDynamicDestinations() { + DynamicAvroDestinations dynamicDestinations = + getDynamicDestinations(); + if (dynamicDestinations == null) { + // In this case DestinationT is Void. + FilenamePolicy usedFilenamePolicy = getFilenamePolicy(); + if (usedFilenamePolicy == null) { + usedFilenamePolicy = + DefaultFilenamePolicy.fromStandardParameters( + getFilenamePrefix(), + getShardTemplate(), + getFilenameSuffix(), + getWindowedWrites()); + } + dynamicDestinations = + (DynamicAvroDestinations) + constantDestinations( + usedFilenamePolicy, + getSchema(), + getMetadata(), + getCodec().getCodec(), + getFormatFunction(), + getDatumWriterFactory()); + } + return dynamicDestinations; + } + + @Override + public WriteFilesResult expand(PCollection input) { + checkArgument( + getFilenamePrefix() != null || getTempDirectory() != null, + "Need to set either the filename prefix or the tempDirectory of a AvroIO.Write " + + "transform."); + if (getFilenamePolicy() != null) { + checkArgument( + getShardTemplate() == null && getFilenameSuffix() == null, + "shardTemplate and filenameSuffix should only be used with the default " + + "filename policy"); + } + if (getDynamicDestinations() != null) { + checkArgument( + getFormatFunction() == null, + "A format function should not be specified " + + "with DynamicDestinations. Use DynamicDestinations.formatRecord instead"); + } else { + checkArgument( + getSchema() != null, "Unless using DynamicDestinations, .withSchema() is required."); + } + + ValueProvider tempDirectory = getTempDirectory(); + if (tempDirectory == null) { + tempDirectory = getFilenamePrefix(); + } + WriteFiles write = + WriteFiles.to( + new AvroSink<>( + tempDirectory, + resolveDynamicDestinations(), + getGenericRecords(), + getSyncInterval())); + if (getNumShards() > 0) { + write = write.withNumShards(getNumShards()); + } + if (getWindowedWrites()) { + write = write.withWindowedWrites(); + } + if (getNoSpilling()) { + write = write.withNoSpilling(); + } + return input.apply("Write", write); + } + + @Override + public void populateDisplayData(DisplayData.Builder builder) { + super.populateDisplayData(builder); + resolveDynamicDestinations().populateDisplayData(builder); + builder + .addIfNotDefault( + DisplayData.item("numShards", getNumShards()).withLabel("Maximum Output Shards"), 0) + .addIfNotNull( + DisplayData.item("tempDirectory", getTempDirectory()) + .withLabel("Directory for temporary files")); + } + } + + /** + * This class is used as the default return value of {@link AvroIO#write} + * + *

All methods in this class delegate to the appropriate method of {@link TypedWrite}. This + * class exists for backwards compatibility, and will be removed in Beam 3.0. + */ + public static class Write extends PTransform, PDone> { + @VisibleForTesting final TypedWrite inner; + + Write(TypedWrite inner) { + this.inner = inner; + } + + /** See {@link TypedWrite#to(String)}. */ + public Write to(String outputPrefix) { + return new Write<>( + inner + .to(FileBasedSink.convertToFileResourceIfPossible(outputPrefix)) + .withFormatFunction(SerializableFunctions.identity())); + } + + /** See {@link TypedWrite#to(ResourceId)} . */ + @Experimental(Kind.FILESYSTEM) + public Write to(ResourceId outputPrefix) { + return new Write<>( + inner.to(outputPrefix).withFormatFunction(SerializableFunctions.identity())); + } + + /** See {@link TypedWrite#to(ValueProvider)}. */ + public Write to(ValueProvider outputPrefix) { + return new Write<>( + inner.to(outputPrefix).withFormatFunction(SerializableFunctions.identity())); + } + + /** See {@link TypedWrite#to(ResourceId)}. */ + @Experimental(Kind.FILESYSTEM) + public Write toResource(ValueProvider outputPrefix) { + return new Write<>( + inner.toResource(outputPrefix).withFormatFunction(SerializableFunctions.identity())); + } + + /** See {@link TypedWrite#to(FilenamePolicy)}. */ + public Write to(FilenamePolicy filenamePolicy) { + return new Write<>( + inner.to(filenamePolicy).withFormatFunction(SerializableFunctions.identity())); + } + + /** + * See {@link TypedWrite#to(DynamicAvroDestinations)}. + * + * @deprecated Use {@link FileIO#write()} or {@link FileIO#writeDynamic()} instead. + */ + @Deprecated + public Write to(DynamicAvroDestinations dynamicDestinations) { + return new Write<>(inner.to(dynamicDestinations).withFormatFunction(null)); + } + + /** See {@link TypedWrite#withSyncInterval}. */ + public Write withSyncInterval(int syncInterval) { + return new Write<>(inner.withSyncInterval(syncInterval)); + } + + /** See {@link TypedWrite#withSchema}. */ + public Write withSchema(Schema schema) { + return new Write<>(inner.withSchema(schema)); + } + + /** See {@link TypedWrite#withTempDirectory(ValueProvider)}. */ + @Experimental(Kind.FILESYSTEM) + public Write withTempDirectory(ValueProvider tempDirectory) { + return new Write<>(inner.withTempDirectory(tempDirectory)); + } + + /** See {@link TypedWrite#withTempDirectory(ResourceId)}. */ + public Write withTempDirectory(ResourceId tempDirectory) { + return new Write<>(inner.withTempDirectory(tempDirectory)); + } + + /** See {@link TypedWrite#withShardNameTemplate}. */ + public Write withShardNameTemplate(String shardTemplate) { + return new Write<>(inner.withShardNameTemplate(shardTemplate)); + } + + /** See {@link TypedWrite#withSuffix}. */ + public Write withSuffix(String filenameSuffix) { + return new Write<>(inner.withSuffix(filenameSuffix)); + } + + /** See {@link TypedWrite#withNumShards}. */ + public Write withNumShards(int numShards) { + return new Write<>(inner.withNumShards(numShards)); + } + + /** See {@link TypedWrite#withoutSharding}. */ + public Write withoutSharding() { + return new Write<>(inner.withoutSharding()); + } + + /** See {@link TypedWrite#withWindowedWrites}. */ + public Write withWindowedWrites() { + return new Write<>(inner.withWindowedWrites()); + } + + /** See {@link TypedWrite#withCodec}. */ + public Write withCodec(CodecFactory codec) { + return new Write<>(inner.withCodec(codec)); + } + + /** See {@link TypedWrite#withDatumWriterFactory}. */ + public Write withDatumWriterFactory(AvroSink.DatumWriterFactory datumWriterFactory) { + return new Write<>(inner.withDatumWriterFactory(datumWriterFactory)); + } + + /** + * Specify that output filenames are wanted. + * + *

The nested {@link TypedWrite}transform always has access to output filenames, however due + * to backwards-compatibility concerns, {@link Write} cannot return them. This method simply + * returns the inner {@link TypedWrite} transform which has {@link WriteFilesResult} as its + * output type, allowing access to output files. + * + *

The supplied {@code DestinationT} type must be: the same as that supplied in {@link + * #to(DynamicAvroDestinations)} if that method was used, or {@code Void} otherwise. + */ + public TypedWrite withOutputFilenames() { + return (TypedWrite) inner; + } + + /** See {@link TypedWrite#withMetadata} . */ + public Write withMetadata(Map metadata) { + return new Write<>(inner.withMetadata(metadata)); + } + + @Override + public PDone expand(PCollection input) { + input.apply(inner); + return PDone.in(input.getPipeline()); + } + + @Override + public void populateDisplayData(DisplayData.Builder builder) { + inner.populateDisplayData(builder); + } + } + + /** + * Returns a {@link DynamicAvroDestinations} that always returns the same {@link FilenamePolicy}, + * schema, metadata, and codec. + */ + public static DynamicAvroDestinations constantDestinations( + FilenamePolicy filenamePolicy, + Schema schema, + Map metadata, + CodecFactory codec, + SerializableFunction formatFunction) { + return constantDestinations(filenamePolicy, schema, metadata, codec, formatFunction, null); + } + + /** + * Returns a {@link DynamicAvroDestinations} that always returns the same {@link FilenamePolicy}, + * schema, metadata, and codec. + */ + public static DynamicAvroDestinations constantDestinations( + FilenamePolicy filenamePolicy, + Schema schema, + Map metadata, + CodecFactory codec, + SerializableFunction formatFunction, + AvroSink.@Nullable DatumWriterFactory datumWriterFactory) { + return new ConstantAvroDestination<>( + filenamePolicy, schema, metadata, codec, formatFunction, datumWriterFactory); + } + ///////////////////////////////////////////////////////////////////////////// + + /** + * Formats an element of a user type into a record with the given schema. + * + * @deprecated Users can achieve the same by providing this transform in a {@link + * org.apache.beam.sdk.transforms.ParDo} before using write in AvroIO {@link #write(Class)}. + */ + @Deprecated + public interface RecordFormatter extends Serializable { + GenericRecord formatRecord(ElementT element, Schema schema); + } + + /** + * A {@link Sink} for use with {@link FileIO#write} and {@link FileIO#writeDynamic}, writing + * elements of the given generated class, like {@link #write(Class)}. + */ + public static Sink sink(final Class clazz) { + return new AutoValue_AvroIO_Sink.Builder() + .setJsonSchema(ReflectData.get().getSchema(clazz).toString()) + .setMetadata(ImmutableMap.of()) + .setCodec(TypedWrite.DEFAULT_SERIALIZABLE_CODEC) + .build(); + } + + /** + * A {@link Sink} for use with {@link FileIO#write} and {@link FileIO#writeDynamic}, writing + * elements with a given (common) schema, like {@link #writeGenericRecords(Schema)}. + */ + @Experimental(Kind.SOURCE_SINK) + public static Sink sink(Schema schema) { + return sink(schema.toString()); + } + + /** + * A {@link Sink} for use with {@link FileIO#write} and {@link FileIO#writeDynamic}, writing + * elements with a given (common) schema, like {@link #writeGenericRecords(String)}. + */ + @Experimental(Kind.SOURCE_SINK) + public static Sink sink(String jsonSchema) { + return new AutoValue_AvroIO_Sink.Builder() + .setJsonSchema(jsonSchema) + .setMetadata(ImmutableMap.of()) + .setCodec(TypedWrite.DEFAULT_SERIALIZABLE_CODEC) + .build(); + } + + /** + * A {@link Sink} for use with {@link FileIO#write} and {@link FileIO#writeDynamic}, writing + * elements by converting each one to a {@link GenericRecord} with a given (common) schema, like + * {@link #writeCustomTypeToGenericRecords()}. + * + * @deprecated RecordFormatter will be removed in future versions. + */ + @Deprecated + public static Sink sinkViaGenericRecords( + Schema schema, RecordFormatter formatter) { + return new AutoValue_AvroIO_Sink.Builder() + .setRecordFormatter(formatter) + .setJsonSchema(schema.toString()) + .setMetadata(ImmutableMap.of()) + .setCodec(TypedWrite.DEFAULT_SERIALIZABLE_CODEC) + .build(); + } + + /** Implementation of {@link #sink} and {@link #sinkViaGenericRecords}. */ + @AutoValue + public abstract static class Sink implements FileIO.Sink { + /** @deprecated RecordFormatter will be removed in future versions. */ + @Deprecated + abstract @Nullable RecordFormatter getRecordFormatter(); + + abstract @Nullable String getJsonSchema(); + + abstract Map getMetadata(); + + abstract SerializableAvroCodecFactory getCodec(); + + abstract Builder toBuilder(); + + @AutoValue.Builder + abstract static class Builder { + /** @deprecated RecordFormatter will be removed in future versions. */ + @Deprecated + abstract Builder setRecordFormatter(RecordFormatter formatter); + + abstract Builder setJsonSchema(String jsonSchema); + + abstract Builder setMetadata(Map metadata); + + abstract Builder setCodec(SerializableAvroCodecFactory codec); + + abstract Sink build(); + } + + /** Specifies to put the given metadata into each generated file. By default, empty. */ + public Sink withMetadata(Map metadata) { + return toBuilder().setMetadata(metadata).build(); + } + + /** + * Specifies to use the given {@link CodecFactory} for each generated file. By default, {@code + * CodecFactory.snappyCodec()}. + */ + public Sink withCodec(CodecFactory codec) { + return toBuilder().setCodec(new SerializableAvroCodecFactory(codec)).build(); + } + + private transient @Nullable Schema schema; + private transient @Nullable DataFileWriter reflectWriter; + private transient @Nullable DataFileWriter genericWriter; + + @Override + public void open(WritableByteChannel channel) throws IOException { + this.schema = new Schema.Parser().parse(getJsonSchema()); + DataFileWriter writer; + if (getRecordFormatter() == null) { + writer = reflectWriter = new DataFileWriter<>(new ReflectDatumWriter<>(schema)); + } else { + writer = genericWriter = new DataFileWriter<>(new GenericDatumWriter<>(schema)); + } + writer.setCodec(getCodec().getCodec()); + for (Map.Entry entry : getMetadata().entrySet()) { + Object v = entry.getValue(); + if (v instanceof String) { + writer.setMeta(entry.getKey(), (String) v); + } else if (v instanceof Long) { + writer.setMeta(entry.getKey(), (Long) v); + } else if (v instanceof byte[]) { + writer.setMeta(entry.getKey(), (byte[]) v); + } else { + throw new IllegalStateException( + "Metadata value type must be one of String, Long, or byte[]. Found " + + v.getClass().getSimpleName()); + } + } + writer.create(schema, Channels.newOutputStream(channel)); + } + + @Override + public void write(ElementT element) throws IOException { + if (getRecordFormatter() == null) { + reflectWriter.append(element); + } else { + genericWriter.append(getRecordFormatter().formatRecord(element, schema)); + } + } + + @Override + public void flush() throws IOException { + MoreObjects.firstNonNull(reflectWriter, genericWriter).flush(); + } + } + + /** Disallow construction of utility class. */ + private AvroIO() {} +} diff --git a/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/io/AvroSchemaIOProvider.java b/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/io/AvroSchemaIOProvider.java new file mode 100644 index 0000000000000..08a9f3a2946b0 --- /dev/null +++ b/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/io/AvroSchemaIOProvider.java @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.extensions.avro.io; + +import com.google.auto.service.AutoService; +import java.io.Serializable; +import org.apache.avro.generic.GenericRecord; +import org.apache.beam.sdk.annotations.Internal; +import org.apache.beam.sdk.extensions.avro.schemas.utils.AvroUtils; +import org.apache.beam.sdk.schemas.Schema; +import org.apache.beam.sdk.schemas.Schema.FieldType; +import org.apache.beam.sdk.schemas.io.SchemaIO; +import org.apache.beam.sdk.schemas.io.SchemaIOProvider; +import org.apache.beam.sdk.schemas.transforms.Convert; +import org.apache.beam.sdk.transforms.PTransform; +import org.apache.beam.sdk.transforms.windowing.FixedWindows; +import org.apache.beam.sdk.transforms.windowing.Window; +import org.apache.beam.sdk.values.PBegin; +import org.apache.beam.sdk.values.PCollection; +import org.apache.beam.sdk.values.PCollection.IsBounded; +import org.apache.beam.sdk.values.PDone; +import org.apache.beam.sdk.values.POutput; +import org.apache.beam.sdk.values.Row; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.joda.time.Duration; + +/** + * An implementation of {@link SchemaIOProvider} for reading and writing Avro files with {@link + * AvroIO}. + */ +@Internal +@AutoService(SchemaIOProvider.class) +@SuppressWarnings({ + "nullness" // TODO(https://github.com/apache/beam/issues/20497) +}) +public class AvroSchemaIOProvider implements SchemaIOProvider { + /** Returns an id that uniquely represents this IO. */ + @Override + public String identifier() { + return "avro"; + } + + /** + * Returns the expected schema of the configuration object. Note this is distinct from the schema + * of the data source itself. No configuration expected for Avro. + */ + @Override + public Schema configurationSchema() { + return Schema.builder().addNullableField("writeWindowSizeSeconds", FieldType.INT64).build(); + } + + /** + * Produce a SchemaIO given a String representing the data's location, the schema of the data that + * resides there, and some IO-specific configuration object. + */ + @Override + public AvroSchemaIO from(String location, Row configuration, Schema dataSchema) { + return new AvroSchemaIO(location, dataSchema, configuration); + } + + @Override + public boolean requiresDataSchema() { + return true; + } + + @Override + public IsBounded isBounded() { + // This supports streaming now as well but there's no option for this. The move to + // SchemaTransform will remove the need to provide this. + return IsBounded.BOUNDED; + } + + /** An abstraction to create schema aware IOs. */ + private static class AvroSchemaIO implements SchemaIO, Serializable { + protected final Schema dataSchema; + protected final String location; + protected final @Nullable Duration windowSize; + + private AvroSchemaIO(String location, Schema dataSchema, Row configuration) { + this.dataSchema = dataSchema; + this.location = location; + if (configuration.getInt64("writeWindowSizeSeconds") != null) { + windowSize = Duration.standardSeconds(configuration.getInt64("writeWindowSizeSeconds")); + } else { + windowSize = null; + } + } + + @Override + public Schema schema() { + return dataSchema; + } + + @Override + public PTransform> buildReader() { + return new PTransform>() { + @Override + public PCollection expand(PBegin begin) { + return begin + .apply( + "AvroIORead", + AvroIO.readGenericRecords(AvroUtils.toAvroSchema(dataSchema, null, null)) + .withBeamSchemas(true) + .from(location)) + .apply("ToRows", Convert.toRows()); + } + }; + } + + @Override + public PTransform, POutput> buildWriter() { + return new PTransform, POutput>() { + @Override + public PDone expand(PCollection input) { + PCollection asRecords = + input.apply("ToGenericRecords", Convert.to(GenericRecord.class)); + AvroIO.Write avroWrite = + AvroIO.writeGenericRecords(AvroUtils.toAvroSchema(dataSchema, null, null)) + .to(location); + if (input.isBounded() == IsBounded.UNBOUNDED || windowSize != null) { + asRecords = + asRecords.apply( + Window.into( + FixedWindows.of( + windowSize == null ? Duration.standardMinutes(1) : windowSize))); + avroWrite = avroWrite.withWindowedWrites().withNumShards(1); + } else { + avroWrite = avroWrite.withoutSharding(); + } + return asRecords.apply("AvroIOWrite", avroWrite); + } + }; + } + } +} diff --git a/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/io/AvroSink.java b/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/io/AvroSink.java new file mode 100644 index 0000000000000..c14de88dd4ee3 --- /dev/null +++ b/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/io/AvroSink.java @@ -0,0 +1,161 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.extensions.avro.io; + +import java.io.Serializable; +import java.nio.channels.Channels; +import java.nio.channels.WritableByteChannel; +import java.util.Map; +import org.apache.avro.Schema; +import org.apache.avro.file.CodecFactory; +import org.apache.avro.file.DataFileWriter; +import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.io.DatumWriter; +import org.apache.avro.reflect.ReflectDatumWriter; +import org.apache.beam.sdk.io.Compression; +import org.apache.beam.sdk.io.FileBasedSink; +import org.apache.beam.sdk.io.fs.ResourceId; +import org.apache.beam.sdk.options.ValueProvider; +import org.apache.beam.sdk.util.MimeTypes; +import org.checkerframework.checker.nullness.qual.Nullable; + +/** A {@link FileBasedSink} for Avro files. */ +@SuppressWarnings({ + "nullness" // TODO(https://github.com/apache/beam/issues/20497) +}) +public class AvroSink + extends FileBasedSink { + private final boolean genericRecords; + private final int syncInterval; + + @FunctionalInterface + public interface DatumWriterFactory extends Serializable { + DatumWriter apply(Schema writer); + } + + AvroSink( + ValueProvider outputPrefix, + DynamicAvroDestinations dynamicDestinations, + boolean genericRecords, + int syncInterval) { + // Avro handles compression internally using the codec. + super(outputPrefix, dynamicDestinations, Compression.UNCOMPRESSED); + this.genericRecords = genericRecords; + this.syncInterval = syncInterval; + } + + @Override + public DynamicAvroDestinations getDynamicDestinations() { + return (DynamicAvroDestinations) super.getDynamicDestinations(); + } + + @Override + public WriteOperation createWriteOperation() { + return new AvroWriteOperation<>(this, genericRecords, syncInterval); + } + + /** A {@link WriteOperation WriteOperation} for Avro files. */ + private static class AvroWriteOperation + extends WriteOperation { + private final DynamicAvroDestinations dynamicDestinations; + private final boolean genericRecords; + private final int syncInterval; + + private AvroWriteOperation( + AvroSink sink, boolean genericRecords, int syncInterval) { + super(sink); + this.dynamicDestinations = sink.getDynamicDestinations(); + this.genericRecords = genericRecords; + this.syncInterval = syncInterval; + } + + @Override + public Writer createWriter() throws Exception { + return new AvroWriter<>(this, dynamicDestinations, genericRecords, syncInterval); + } + } + + /** A {@link Writer Writer} for Avro files. */ + private static class AvroWriter extends Writer { + + // Initialized in prepareWrite + private @Nullable DataFileWriter dataFileWriter; + + private final DynamicAvroDestinations dynamicDestinations; + private final boolean genericRecords; + private final int syncInterval; + + public AvroWriter( + WriteOperation writeOperation, + DynamicAvroDestinations dynamicDestinations, + boolean genericRecords, + int syncInterval) { + super(writeOperation, MimeTypes.BINARY); + this.dynamicDestinations = dynamicDestinations; + this.genericRecords = genericRecords; + this.syncInterval = syncInterval; + } + + @SuppressWarnings("deprecation") // uses internal test functionality. + @Override + protected void prepareWrite(WritableByteChannel channel) throws Exception { + DestinationT destination = getDestination(); + CodecFactory codec = dynamicDestinations.getCodec(destination); + Schema schema = dynamicDestinations.getSchema(destination); + Map metadata = dynamicDestinations.getMetadata(destination); + DatumWriter datumWriter; + DatumWriterFactory datumWriterFactory = + dynamicDestinations.getDatumWriterFactory(destination); + + if (datumWriterFactory == null) { + datumWriter = + genericRecords ? new GenericDatumWriter<>(schema) : new ReflectDatumWriter<>(schema); + } else { + datumWriter = datumWriterFactory.apply(schema); + } + + dataFileWriter = new DataFileWriter<>(datumWriter).setCodec(codec); + for (Map.Entry entry : metadata.entrySet()) { + Object v = entry.getValue(); + if (v instanceof String) { + dataFileWriter.setMeta(entry.getKey(), (String) v); + } else if (v instanceof Long) { + dataFileWriter.setMeta(entry.getKey(), (Long) v); + } else if (v instanceof byte[]) { + dataFileWriter.setMeta(entry.getKey(), (byte[]) v); + } else { + throw new IllegalStateException( + "Metadata value type must be one of String, Long, or byte[]. Found " + + v.getClass().getSimpleName()); + } + } + dataFileWriter.setSyncInterval(syncInterval); + dataFileWriter.create(schema, Channels.newOutputStream(channel)); + } + + @Override + public void write(OutputT value) throws Exception { + dataFileWriter.append(value); + } + + @Override + protected void finishWrite() throws Exception { + dataFileWriter.flush(); + } + } +} diff --git a/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/io/AvroSource.java b/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/io/AvroSource.java new file mode 100644 index 0000000000000..aaa05bdc1739e --- /dev/null +++ b/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/io/AvroSource.java @@ -0,0 +1,777 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.extensions.avro.io; + +import static org.apache.beam.sdk.io.FileBasedSource.Mode.SINGLE_FILE_OR_SUBRANGE; +import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument; +import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkNotNull; +import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkState; + +import java.io.IOException; +import java.io.InputStream; +import java.io.InvalidObjectException; +import java.io.ObjectInputStream; +import java.io.ObjectStreamException; +import java.io.Serializable; +import java.nio.ByteBuffer; +import java.nio.channels.Channels; +import java.nio.channels.ReadableByteChannel; +import java.nio.channels.SeekableByteChannel; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.Iterator; +import java.util.Map; +import java.util.WeakHashMap; +import javax.annotation.concurrent.GuardedBy; +import org.apache.avro.Schema; +import org.apache.avro.file.DataFileConstants; +import org.apache.avro.file.DataFileReader; +import org.apache.avro.file.SeekableInput; +import org.apache.avro.generic.GenericDatumReader; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.io.BinaryDecoder; +import org.apache.avro.io.DatumReader; +import org.apache.avro.io.DecoderFactory; +import org.apache.avro.reflect.ReflectData; +import org.apache.avro.reflect.ReflectDatumReader; +import org.apache.beam.sdk.PipelineRunner; +import org.apache.beam.sdk.annotations.Experimental; +import org.apache.beam.sdk.annotations.Experimental.Kind; +import org.apache.beam.sdk.coders.Coder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; +import org.apache.beam.sdk.io.BlockBasedSource; +import org.apache.beam.sdk.io.FileBasedSource; +import org.apache.beam.sdk.io.FileSystems; +import org.apache.beam.sdk.io.OffsetBasedSource; +import org.apache.beam.sdk.io.Read; +import org.apache.beam.sdk.io.fs.EmptyMatchTreatment; +import org.apache.beam.sdk.io.fs.MatchResult.Metadata; +import org.apache.beam.sdk.io.fs.ResourceId; +import org.apache.beam.sdk.options.PipelineOptions; +import org.apache.beam.sdk.options.ValueProvider; +import org.apache.beam.sdk.transforms.SerializableFunction; +import org.apache.beam.sdk.util.VarInt; +import org.apache.beam.sdk.values.PCollection; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting; +import org.checkerframework.checker.nullness.qual.Nullable; + +// CHECKSTYLE.OFF: JavadocStyle +/** + * Do not use in pipelines directly: most users should use {@link AvroIO.Read}. + * + *

A {@link FileBasedSource} for reading Avro files. + * + *

To read a {@link PCollection} of objects from one or more Avro files, use {@link + * AvroSource#from} to specify the path(s) of the files to read. The {@link AvroSource} that is + * returned will read objects of type {@link GenericRecord} with the schema(s) that were written at + * file creation. To further configure the {@link AvroSource} to read with a user-defined schema, or + * to return records of a type other than {@link GenericRecord}, use {@link + * AvroSource#withSchema(Schema)} (using an Avro {@link Schema}), {@link + * AvroSource#withSchema(String)} (using a JSON schema), or {@link AvroSource#withSchema(Class)} (to + * return objects of the Avro-generated class specified). + * + *

An {@link AvroSource} can be read from using the {@link Read} transform. For example: + * + *

{@code
+ * AvroSource source = AvroSource.from(file.toPath()).withSchema(MyType.class);
+ * PCollection records = Read.from(mySource);
+ * }
+ * + *

This class's implementation is based on the Avro 1.7.7 specification and implements + * parsing of some parts of Avro Object Container Files. The rationale for doing so is that the Avro + * API does not provide efficient ways of computing the precise offsets of blocks within a file, + * which is necessary to support dynamic work rebalancing. However, whenever it is possible to use + * the Avro API in a way that supports maintaining precise offsets, this class uses the Avro API. + * + *

Avro Object Container files store records in blocks. Each block contains a collection of + * records. Blocks may be encoded (e.g., with bzip2, deflate, snappy, etc.). Blocks are delineated + * from one another by a 16-byte sync marker. + * + *

An {@link AvroSource} for a subrange of a single file contains records in the blocks such that + * the start offset of the block is greater than or equal to the start offset of the source and less + * than the end offset of the source. + * + *

To use XZ-encoded Avro files, please include an explicit dependency on {@code xz-1.8.jar}, + * which has been marked as optional in the Maven {@code sdk/pom.xml}. + * + *

{@code
+ * 
+ *   org.tukaani
+ *   xz
+ *   1.8
+ * 
+ * }
+ * + *

Permissions

+ * + *

Permission requirements depend on the {@link PipelineRunner} that is used to execute the + * pipeline. Please refer to the documentation of corresponding {@link PipelineRunner}s for more + * details. + * + * @param The type of records to be read from the source. + */ +// CHECKSTYLE.ON: JavadocStyle +@Experimental(Kind.SOURCE_SINK) +@SuppressWarnings({ + "nullness" // TODO(https://github.com/apache/beam/issues/20497) +}) +public class AvroSource extends BlockBasedSource { + // Default minimum bundle size (chosen as two default-size Avro blocks to attempt to + // ensure that every source has at least one block of records). + // The default sync interval is 64k. + private static final long DEFAULT_MIN_BUNDLE_SIZE = 2L * DataFileConstants.DEFAULT_SYNC_INTERVAL; + + @FunctionalInterface + public interface DatumReaderFactory extends Serializable { + DatumReader apply(Schema writer, Schema reader); + } + + private static final DatumReaderFactory GENERIC_DATUM_READER_FACTORY = GenericDatumReader::new; + + private static final DatumReaderFactory REFLECT_DATUM_READER_FACTORY = ReflectDatumReader::new; + + // Use cases of AvroSource are: + // 1) AvroSource Reading GenericRecord records with a specified schema. + // 2) AvroSource Reading records of a generated Avro class Foo. + // 3) AvroSource Reading GenericRecord records with an unspecified schema + // and converting them to type T. + // | Case 1 | Case 2 | Case 3 | + // type | GenericRecord | Foo | GenericRecord | + // readerSchemaString | non-null | non-null | null | + // parseFn | null | null | non-null | + // outputCoder | null | null | non-null | + // readerFactory | either | either | either | + private static class Mode implements Serializable { + private final Class type; + + // The JSON schema used to decode records. + private @Nullable String readerSchemaString; + + private final @Nullable SerializableFunction parseFn; + + private final @Nullable Coder outputCoder; + + private final @Nullable DatumReaderFactory readerFactory; + + private Mode( + Class type, + @Nullable String readerSchemaString, + @Nullable SerializableFunction parseFn, + @Nullable Coder outputCoder, + @Nullable DatumReaderFactory readerFactory) { + this.type = type; + this.readerSchemaString = internSchemaString(readerSchemaString); + this.parseFn = parseFn; + this.outputCoder = outputCoder; + this.readerFactory = readerFactory; + } + + private void readObject(ObjectInputStream is) throws IOException, ClassNotFoundException { + is.defaultReadObject(); + readerSchemaString = internSchemaString(readerSchemaString); + } + + private Coder getOutputCoder() { + if (parseFn == null) { + return AvroCoder.of((Class) type, internOrParseSchemaString(readerSchemaString)); + } else { + return outputCoder; + } + } + + private void validate() { + if (parseFn == null) { + checkArgument( + readerSchemaString != null, + "schema must be specified using withSchema() when not using a parse fn"); + } + } + + private Mode withReaderFactory(DatumReaderFactory factory) { + return new Mode<>(type, readerSchemaString, parseFn, outputCoder, factory); + } + + private DatumReader createReader(Schema writerSchema, Schema readerSchema) { + DatumReaderFactory factory = this.readerFactory; + if (factory == null) { + factory = + (type == GenericRecord.class) + ? GENERIC_DATUM_READER_FACTORY + : REFLECT_DATUM_READER_FACTORY; + } + return factory.apply(writerSchema, readerSchema); + } + } + + private static Mode readGenericRecordsWithSchema( + String schema, @Nullable DatumReaderFactory factory) { + return new Mode<>(GenericRecord.class, schema, null, null, factory); + } + + private static Mode readGeneratedClasses( + Class clazz, @Nullable DatumReaderFactory factory) { + return new Mode<>(clazz, ReflectData.get().getSchema(clazz).toString(), null, null, factory); + } + + private static Mode parseGenericRecords( + SerializableFunction parseFn, + Coder outputCoder, + @Nullable DatumReaderFactory factory) { + return new Mode<>(GenericRecord.class, null, parseFn, outputCoder, factory); + } + + private final Mode mode; + + /** + * Reads from the given file name or pattern ("glob"). The returned source needs to be further + * configured by calling {@link #withSchema} to return a type other than {@link GenericRecord}. + */ + public static AvroSource from(ValueProvider fileNameOrPattern) { + return new AvroSource<>( + fileNameOrPattern, + EmptyMatchTreatment.DISALLOW, + DEFAULT_MIN_BUNDLE_SIZE, + readGenericRecordsWithSchema(null /* will need to be specified in withSchema */, null)); + } + + public static AvroSource from(Metadata metadata) { + return new AvroSource<>( + metadata, + DEFAULT_MIN_BUNDLE_SIZE, + 0, + metadata.sizeBytes(), + readGenericRecordsWithSchema(null /* will need to be specified in withSchema */, null)); + } + + /** Like {@link #from(ValueProvider)}. */ + public static AvroSource from(String fileNameOrPattern) { + return from(ValueProvider.StaticValueProvider.of(fileNameOrPattern)); + } + + public AvroSource withEmptyMatchTreatment(EmptyMatchTreatment emptyMatchTreatment) { + return new AvroSource<>( + getFileOrPatternSpecProvider(), emptyMatchTreatment, getMinBundleSize(), mode); + } + + /** Reads files containing records that conform to the given schema. */ + public AvroSource withSchema(String schema) { + checkArgument(schema != null, "schema can not be null"); + return new AvroSource<>( + getFileOrPatternSpecProvider(), + getEmptyMatchTreatment(), + getMinBundleSize(), + readGenericRecordsWithSchema(schema, mode.readerFactory)); + } + + /** Like {@link #withSchema(String)}. */ + public AvroSource withSchema(Schema schema) { + checkArgument(schema != null, "schema can not be null"); + return withSchema(schema.toString()); + } + + /** Reads files containing records of the given class. */ + public AvroSource withSchema(Class clazz) { + checkArgument(clazz != null, "clazz can not be null"); + if (getMode() == SINGLE_FILE_OR_SUBRANGE) { + return new AvroSource<>( + getSingleFileMetadata(), + getMinBundleSize(), + getStartOffset(), + getEndOffset(), + readGeneratedClasses(clazz, mode.readerFactory)); + } + return new AvroSource<>( + getFileOrPatternSpecProvider(), + getEmptyMatchTreatment(), + getMinBundleSize(), + readGeneratedClasses(clazz, mode.readerFactory)); + } + + /** + * Reads {@link GenericRecord} of unspecified schema and maps them to instances of a custom type + * using the given {@code parseFn} and encoded using the given coder. + */ + public AvroSource withParseFn( + SerializableFunction parseFn, Coder coder) { + checkArgument(parseFn != null, "parseFn can not be null"); + checkArgument(coder != null, "coder can not be null"); + if (getMode() == SINGLE_FILE_OR_SUBRANGE) { + return new AvroSource<>( + getSingleFileMetadata(), + getMinBundleSize(), + getStartOffset(), + getEndOffset(), + parseGenericRecords(parseFn, coder, mode.readerFactory)); + } + return new AvroSource<>( + getFileOrPatternSpecProvider(), + getEmptyMatchTreatment(), + getMinBundleSize(), + parseGenericRecords(parseFn, coder, mode.readerFactory)); + } + + /** + * Sets the minimum bundle size. Refer to {@link OffsetBasedSource} for a description of {@code + * minBundleSize} and its use. + */ + public AvroSource withMinBundleSize(long minBundleSize) { + if (getMode() == SINGLE_FILE_OR_SUBRANGE) { + return new AvroSource<>( + getSingleFileMetadata(), minBundleSize, getStartOffset(), getEndOffset(), mode); + } + return new AvroSource<>( + getFileOrPatternSpecProvider(), getEmptyMatchTreatment(), minBundleSize, mode); + } + + public AvroSource withDatumReaderFactory(DatumReaderFactory factory) { + Mode newMode = mode.withReaderFactory(factory); + if (getMode() == SINGLE_FILE_OR_SUBRANGE) { + return new AvroSource<>( + getSingleFileMetadata(), getMinBundleSize(), getStartOffset(), getEndOffset(), newMode); + } + return new AvroSource<>( + getFileOrPatternSpecProvider(), getEmptyMatchTreatment(), getMinBundleSize(), newMode); + } + + /** Constructor for FILEPATTERN mode. */ + private AvroSource( + ValueProvider fileNameOrPattern, + EmptyMatchTreatment emptyMatchTreatment, + long minBundleSize, + Mode mode) { + super(fileNameOrPattern, emptyMatchTreatment, minBundleSize); + this.mode = mode; + } + + /** Constructor for SINGLE_FILE_OR_SUBRANGE mode. */ + private AvroSource( + Metadata metadata, long minBundleSize, long startOffset, long endOffset, Mode mode) { + super(metadata, minBundleSize, startOffset, endOffset); + this.mode = mode; + } + + @Override + public void validate() { + super.validate(); + mode.validate(); + } + + /** + * Used by the Dataflow worker. Do not introduce new usages. Do not delete without confirming that + * Dataflow ValidatesRunner tests pass. + * + * @deprecated Used by Dataflow worker + */ + @Deprecated + public BlockBasedSource createForSubrangeOfFile(String fileName, long start, long end) + throws IOException { + return createForSubrangeOfFile(FileSystems.matchSingleFileSpec(fileName), start, end); + } + + @Override + public BlockBasedSource createForSubrangeOfFile(Metadata fileMetadata, long start, long end) { + return new AvroSource<>(fileMetadata, getMinBundleSize(), start, end, mode); + } + + @Override + protected BlockBasedReader createSingleFileReader(PipelineOptions options) { + return new AvroReader<>(this); + } + + @Override + public Coder getOutputCoder() { + return mode.getOutputCoder(); + } + + @VisibleForTesting + @Nullable + String getReaderSchemaString() { + return mode.readerSchemaString; + } + + /** Avro file metadata. */ + @VisibleForTesting + static class AvroMetadata { + private final byte[] syncMarker; + private final String codec; + private final String schemaString; + + AvroMetadata(byte[] syncMarker, String codec, String schemaString) { + this.syncMarker = checkNotNull(syncMarker, "syncMarker"); + this.codec = checkNotNull(codec, "codec"); + this.schemaString = internSchemaString(checkNotNull(schemaString, "schemaString")); + } + + /** + * The JSON-encoded schema + * string for the file. + */ + public String getSchemaString() { + return schemaString; + } + + /** + * The codec of the + * file. + */ + public String getCodec() { + return codec; + } + + /** + * The 16-byte sync marker for the file. See the documentation for Object Container + * File for more information. + */ + public byte[] getSyncMarker() { + return syncMarker; + } + } + + /** + * Reads the {@link AvroMetadata} from the header of an Avro file. + * + *

This method parses the header of an Avro Object Container + * File. + * + * @throws IOException if the file is an invalid format. + */ + @VisibleForTesting + static AvroMetadata readMetadataFromFile(ResourceId fileResource) throws IOException { + String codec = null; + String schemaString = null; + byte[] syncMarker; + try (InputStream stream = Channels.newInputStream(FileSystems.open(fileResource))) { + BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(stream, null); + + // The header of an object container file begins with a four-byte magic number, followed + // by the file metadata (including the schema and codec), encoded as a map. Finally, the + // header ends with the file's 16-byte sync marker. + // See https://avro.apache.org/docs/1.7.7/spec.html#Object+Container+Files for details on + // the encoding of container files. + + // Read the magic number. + byte[] magic = new byte[DataFileConstants.MAGIC.length]; + decoder.readFixed(magic); + if (!Arrays.equals(magic, DataFileConstants.MAGIC)) { + throw new IOException("Missing Avro file signature: " + fileResource); + } + + // Read the metadata to find the codec and schema. + ByteBuffer valueBuffer = ByteBuffer.allocate(512); + long numRecords = decoder.readMapStart(); + while (numRecords > 0) { + for (long recordIndex = 0; recordIndex < numRecords; recordIndex++) { + String key = decoder.readString(); + // readBytes() clears the buffer and returns a buffer where: + // - position is the start of the bytes read + // - limit is the end of the bytes read + valueBuffer = decoder.readBytes(valueBuffer); + byte[] bytes = new byte[valueBuffer.remaining()]; + valueBuffer.get(bytes); + if (key.equals(DataFileConstants.CODEC)) { + codec = new String(bytes, StandardCharsets.UTF_8); + } else if (key.equals(DataFileConstants.SCHEMA)) { + schemaString = new String(bytes, StandardCharsets.UTF_8); + } + } + numRecords = decoder.mapNext(); + } + if (codec == null) { + codec = DataFileConstants.NULL_CODEC; + } + + // Finally, read the sync marker. + syncMarker = new byte[DataFileConstants.SYNC_SIZE]; + decoder.readFixed(syncMarker); + } + checkState(schemaString != null, "No schema present in Avro file metadata %s", fileResource); + return new AvroMetadata(syncMarker, codec, schemaString); + } + + // A logical reference cache used to store schemas and schema strings to allow us to + // "intern" values and reduce the number of copies of equivalent objects. + private static final Map schemaLogicalReferenceCache = new WeakHashMap<>(); + private static final Map schemaStringLogicalReferenceCache = new WeakHashMap<>(); + + // We avoid String.intern() because depending on the JVM, these may be added to the PermGenSpace + // which we want to avoid otherwise we could run out of PermGenSpace. + private static synchronized String internSchemaString(String schema) { + String internSchema = schemaStringLogicalReferenceCache.get(schema); + if (internSchema != null) { + return internSchema; + } + schemaStringLogicalReferenceCache.put(schema, schema); + return schema; + } + + static synchronized Schema internOrParseSchemaString(String schemaString) { + Schema schema = schemaLogicalReferenceCache.get(schemaString); + if (schema != null) { + return schema; + } + Schema.Parser parser = new Schema.Parser(); + schema = parser.parse(schemaString); + schemaLogicalReferenceCache.put(schemaString, schema); + return schema; + } + + // Reading the object from Java serialization typically does not go through the constructor, + // we use readResolve to replace the constructed instance with one which uses the constructor + // allowing us to intern any schemas. + @SuppressWarnings("unused") + private Object readResolve() throws ObjectStreamException { + switch (getMode()) { + case SINGLE_FILE_OR_SUBRANGE: + return new AvroSource<>( + getSingleFileMetadata(), getMinBundleSize(), getStartOffset(), getEndOffset(), mode); + case FILEPATTERN: + return new AvroSource<>( + getFileOrPatternSpecProvider(), getEmptyMatchTreatment(), getMinBundleSize(), mode); + default: + throw new InvalidObjectException( + String.format("Unknown mode %s for AvroSource %s", getMode(), this)); + } + } + + /** + * A {@link Block} of Avro records. + * + * @param The type of records stored in the block. + */ + @Experimental(Kind.SOURCE_SINK) + static class AvroBlock extends Block { + + // The current record in the block. Initialized in readNextRecord. + private @Nullable T currentRecord; + + // The index of the current record in the block. + private long currentRecordIndex = 0; + + private final Iterator iterator; + + private final SerializableFunction parseFn; + + private final long numRecordsInBlock; + + AvroBlock( + Iterator iter, SerializableFunction parseFn, long numRecordsInBlock) { + this.iterator = iter; + this.parseFn = parseFn; + this.numRecordsInBlock = numRecordsInBlock; + } + + @Override + public T getCurrentRecord() { + return currentRecord; + } + + @Override + public boolean readNextRecord() { + if (currentRecordIndex >= numRecordsInBlock) { + return false; + } + + Object record = iterator.next(); + currentRecord = (parseFn == null) ? ((T) record) : parseFn.apply((GenericRecord) record); + currentRecordIndex++; + return true; + } + + @Override + public double getFractionOfBlockConsumed() { + return ((double) currentRecordIndex) / numRecordsInBlock; + } + } + + /** + * A {@link BlockBasedReader} for reading blocks from Avro files. + * + *

An Avro Object Container File consists of a header followed by a 16-bit sync marker and then + * a sequence of blocks, where each block begins with two encoded longs representing the total + * number of records in the block and the block's size in bytes, followed by the block's + * (optionally-encoded) records. Each block is terminated by a 16-bit sync marker. + * + * @param The type of records contained in the block. + */ + @Experimental(Kind.SOURCE_SINK) + public static class AvroReader extends BlockBasedReader { + + private static class SeekableChannelInput implements SeekableInput { + + private final SeekableByteChannel channel; + private final InputStream input; + + SeekableChannelInput(SeekableByteChannel channel) { + this.channel = channel; + this.input = Channels.newInputStream(channel); + } + + @Override + public void seek(long p) throws IOException { + channel.position(p); + } + + @Override + public long tell() throws IOException { + return channel.position(); + } + + @Override + public long length() throws IOException { + return channel.size(); + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + return input.read(b, off, len); + } + + @Override + public void close() throws IOException { + channel.close(); + } + } + + // The current block. + // Initialized in readNextRecord. + private @Nullable AvroBlock currentBlock; + + private @Nullable DataFileReader dataFileReader; + + // A lock used to synchronize block offsets for getRemainingParallelism + private final Object progressLock = new Object(); + + // Offset of the current block. + @GuardedBy("progressLock") + private long currentBlockOffset = 0; + + // Size of the current block. + @GuardedBy("progressLock") + private long currentBlockSizeBytes = 0; + + /** Reads Avro records of type {@code T} from the specified source. */ + public AvroReader(AvroSource source) { + super(source); + } + + @Override + public synchronized AvroSource getCurrentSource() { + return (AvroSource) super.getCurrentSource(); + } + + // Precondition: the stream is positioned after the sync marker in the current (about to be + // previous) block. currentBlockSize equals the size of the current block, or zero if this + // reader was just started. + // + // Postcondition: same as above, but for the new current (formerly next) block. + @Override + public boolean readNextBlock() { + if (!dataFileReader.hasNext()) { + return false; + } + + long headerLength = + (long) VarInt.getLength(dataFileReader.getBlockCount()) + + VarInt.getLength(dataFileReader.getBlockSize()) + + DataFileConstants.SYNC_SIZE; + + currentBlock = + new AvroBlock<>( + dataFileReader, getCurrentSource().mode.parseFn, dataFileReader.getBlockCount()); + + // Atomically update both the position and offset of the new block. + synchronized (progressLock) { + currentBlockOffset = dataFileReader.previousSync(); + // Total block size includes the header, block content, and trailing sync marker. + currentBlockSizeBytes = dataFileReader.getBlockSize() + headerLength; + } + + return true; + } + + @Override + public AvroBlock getCurrentBlock() { + return currentBlock; + } + + @Override + public long getCurrentBlockOffset() { + synchronized (progressLock) { + return currentBlockOffset; + } + } + + @Override + public long getCurrentBlockSize() { + synchronized (progressLock) { + return currentBlockSizeBytes; + } + } + + @Override + public long getSplitPointsRemaining() { + if (isDone()) { + return 0; + } + synchronized (progressLock) { + if (currentBlockOffset + currentBlockSizeBytes >= getCurrentSource().getEndOffset()) { + // This block is known to be the last block in the range. + return 1; + } + } + return super.getSplitPointsRemaining(); + } + + // Postcondition: the stream is positioned at the beginning of the first block after the start + // of the current source, and currentBlockOffset is that position. Additionally, + // currentBlockSizeBytes will be set to 0 indicating that the previous block was empty. + @Override + protected void startReading(ReadableByteChannel channel) throws IOException { + SeekableChannelInput seekableChannelInput = + new SeekableChannelInput((SeekableByteChannel) channel); + // the channel needs to be at the beginning of the file in order for the DataFileReader to + // read the header, etc, we'll seek it back to where it should be after creating the DFR. + seekableChannelInput.seek(0); + + Schema readerSchema = null; + String readerSchemaString = this.getCurrentSource().getReaderSchemaString(); + if (readerSchemaString != null) { + readerSchema = AvroSource.internOrParseSchemaString(readerSchemaString); + } + // the DataFileReader will call setSchema with the writer schema when created. + DatumReader reader = this.getCurrentSource().mode.createReader(readerSchema, readerSchema); + + dataFileReader = new DataFileReader<>(seekableChannelInput, reader); + + long startOffset = getCurrentSource().getStartOffset(); + if (startOffset != 0) { + // the start offset may be in the middle of a sync marker, by rewinding SYNC_SIZE bytes we + // ensure that we won't miss the block if so. + dataFileReader.sync(Math.max(0, startOffset - DataFileConstants.SYNC_SIZE)); + } + + synchronized (progressLock) { + currentBlockOffset = dataFileReader.previousSync(); + currentBlockSizeBytes = 0; + } + } + } +} diff --git a/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/io/ConstantAvroDestination.java b/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/io/ConstantAvroDestination.java new file mode 100644 index 0000000000000..601c65935bec4 --- /dev/null +++ b/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/io/ConstantAvroDestination.java @@ -0,0 +1,148 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.extensions.avro.io; + +import java.io.Serializable; +import java.util.Map; +import org.apache.avro.Schema; +import org.apache.avro.file.CodecFactory; +import org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy; +import org.apache.beam.sdk.transforms.SerializableFunction; +import org.apache.beam.sdk.transforms.display.DisplayData; +import org.apache.beam.sdk.transforms.display.HasDisplayData; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Function; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Supplier; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Suppliers; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.io.BaseEncoding; +import org.checkerframework.checker.nullness.qual.Nullable; + +/** Always returns a constant {@link FilenamePolicy}, {@link Schema}, metadata, and codec. */ +@SuppressWarnings({ + "nullness" // TODO(https://github.com/apache/beam/issues/20497) +}) +class ConstantAvroDestination + extends DynamicAvroDestinations { + private static class SchemaFunction implements Serializable, Function { + @Override + public Schema apply(String input) { + return new Schema.Parser().parse(input); + } + } + + // This should be a multiple of 4 to not get a partial encoded byte. + private static final int METADATA_BYTES_MAX_LENGTH = 40; + private final FilenamePolicy filenamePolicy; + private final Supplier schema; + private final Map metadata; + private final SerializableAvroCodecFactory codec; + private final SerializableFunction formatFunction; + private final AvroSink.DatumWriterFactory datumWriterFactory; + + private class Metadata implements HasDisplayData { + @Override + public void populateDisplayData(DisplayData.Builder builder) { + for (Map.Entry entry : metadata.entrySet()) { + DisplayData.Type type = DisplayData.inferType(entry.getValue()); + if (type != null) { + builder.add(DisplayData.item(entry.getKey(), type, entry.getValue())); + } else { + String base64 = BaseEncoding.base64().encode((byte[]) entry.getValue()); + String repr = + base64.length() <= METADATA_BYTES_MAX_LENGTH + ? base64 + : base64.substring(0, METADATA_BYTES_MAX_LENGTH) + "..."; + builder.add(DisplayData.item(entry.getKey(), repr)); + } + } + } + } + + public ConstantAvroDestination( + FilenamePolicy filenamePolicy, + Schema schema, + Map metadata, + CodecFactory codec, + SerializableFunction formatFunction) { + this(filenamePolicy, schema, metadata, codec, formatFunction, null); + } + + public ConstantAvroDestination( + FilenamePolicy filenamePolicy, + Schema schema, + Map metadata, + CodecFactory codec, + SerializableFunction formatFunction, + AvroSink.@Nullable DatumWriterFactory datumWriterFactory) { + this.filenamePolicy = filenamePolicy; + this.schema = Suppliers.compose(new SchemaFunction(), Suppliers.ofInstance(schema.toString())); + this.metadata = metadata; + this.codec = new SerializableAvroCodecFactory(codec); + this.formatFunction = formatFunction; + this.datumWriterFactory = datumWriterFactory; + } + + @Override + public OutputT formatRecord(UserT record) { + return formatFunction.apply(record); + } + + @Override + public @Nullable Void getDestination(UserT element) { + return (Void) null; + } + + @Override + public @Nullable Void getDefaultDestination() { + return (Void) null; + } + + @Override + public FilenamePolicy getFilenamePolicy(Void destination) { + return filenamePolicy; + } + + @Override + public Schema getSchema(Void destination) { + return schema.get(); + } + + @Override + public Map getMetadata(Void destination) { + return metadata; + } + + @Override + public CodecFactory getCodec(Void destination) { + return codec.getCodec(); + } + + @Override + public AvroSink.@Nullable DatumWriterFactory getDatumWriterFactory(Void destination) { + return datumWriterFactory; + } + + @Override + public void populateDisplayData(DisplayData.Builder builder) { + filenamePolicy.populateDisplayData(builder); + builder.add(DisplayData.item("schema", schema.get().toString()).withLabel("Record Schema")); + builder.addIfNotDefault( + DisplayData.item("codec", codec.getCodec().toString()).withLabel("Avro Compression Codec"), + AvroIO.TypedWrite.DEFAULT_SERIALIZABLE_CODEC.toString()); + builder.include("Metadata", new Metadata()); + } +} diff --git a/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/io/DynamicAvroDestinations.java b/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/io/DynamicAvroDestinations.java new file mode 100644 index 0000000000000..c74c98ed72712 --- /dev/null +++ b/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/io/DynamicAvroDestinations.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.extensions.avro.io; + +import java.util.Map; +import org.apache.avro.Schema; +import org.apache.avro.file.CodecFactory; +import org.apache.beam.sdk.io.FileBasedSink.DynamicDestinations; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap; +import org.checkerframework.checker.nullness.qual.Nullable; + +/** + * A specialization of {@link DynamicDestinations} for {@link AvroIO}. In addition to dynamic file + * destinations, this allows specifying other AVRO properties (schema, metadata, codec, datum + * writer) per destination. + */ +public abstract class DynamicAvroDestinations + extends DynamicDestinations { + /** Return an AVRO schema for a given destination. */ + public abstract Schema getSchema(DestinationT destination); + + /** Return AVRO file metadata for a given destination. */ + public Map getMetadata(DestinationT destination) { + return ImmutableMap.of(); + } + + /** Return an AVRO codec for a given destination. */ + public CodecFactory getCodec(DestinationT destination) { + return AvroIO.TypedWrite.DEFAULT_CODEC; + } + + /** + * Return a {@link AvroSink.DatumWriterFactory} for a given destination. If provided, it will be + * used to created {@link org.apache.avro.io.DatumWriter} instances as required. + */ + public AvroSink.@Nullable DatumWriterFactory getDatumWriterFactory( + DestinationT destinationT) { + return null; + } +} diff --git a/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/io/SerializableAvroCodecFactory.java b/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/io/SerializableAvroCodecFactory.java new file mode 100644 index 0000000000000..8a82ffcbcd422 --- /dev/null +++ b/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/io/SerializableAvroCodecFactory.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.extensions.avro.io; + +import static org.apache.avro.file.DataFileConstants.BZIP2_CODEC; +import static org.apache.avro.file.DataFileConstants.DEFLATE_CODEC; +import static org.apache.avro.file.DataFileConstants.NULL_CODEC; +import static org.apache.avro.file.DataFileConstants.SNAPPY_CODEC; +import static org.apache.avro.file.DataFileConstants.XZ_CODEC; +import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkNotNull; +import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkState; + +import java.io.Externalizable; +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; +import java.util.Arrays; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.apache.avro.file.CodecFactory; +import org.checkerframework.checker.nullness.qual.Nullable; + +/** + * A wrapper that allows {@link CodecFactory}s to be serialized using Java's standard serialization + * mechanisms. + */ +@SuppressWarnings({ + "nullness" // TODO(https://github.com/apache/beam/issues/20497) +}) +class SerializableAvroCodecFactory implements Externalizable { + private static final long serialVersionUID = 7445324844109564303L; + private static final List noOptAvroCodecs = + Arrays.asList(NULL_CODEC, SNAPPY_CODEC, BZIP2_CODEC); + private static final Pattern deflatePattern = Pattern.compile(DEFLATE_CODEC + "-(?-?\\d)"); + private static final Pattern xzPattern = Pattern.compile(XZ_CODEC + "-(?\\d)"); + + private @Nullable CodecFactory codecFactory; + + // For java.io.Externalizable + public SerializableAvroCodecFactory() {} + + public SerializableAvroCodecFactory(CodecFactory codecFactory) { + checkNotNull(codecFactory, "Codec can't be null"); + checkState(checkIsSupportedCodec(codecFactory), "%s is not supported", codecFactory); + this.codecFactory = codecFactory; + } + + private boolean checkIsSupportedCodec(CodecFactory codecFactory) { + final String codecStr = codecFactory.toString(); + return noOptAvroCodecs.contains(codecStr) + || deflatePattern.matcher(codecStr).matches() + || xzPattern.matcher(codecStr).matches(); + } + + @Override + public void writeExternal(ObjectOutput out) throws IOException { + out.writeUTF(codecFactory.toString()); + } + + @Override + public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { + final String codecStr = in.readUTF(); + + switch (codecStr) { + case NULL_CODEC: + case SNAPPY_CODEC: + case BZIP2_CODEC: + codecFactory = CodecFactory.fromString(codecStr); + return; + } + + Matcher deflateMatcher = deflatePattern.matcher(codecStr); + if (deflateMatcher.find()) { + codecFactory = CodecFactory.deflateCodec(Integer.parseInt(deflateMatcher.group("level"))); + return; + } + + Matcher xzMatcher = xzPattern.matcher(codecStr); + if (xzMatcher.find()) { + codecFactory = CodecFactory.xzCodec(Integer.parseInt(xzMatcher.group("level"))); + return; + } + + throw new IllegalStateException(codecStr + " is not supported"); + } + + public CodecFactory getCodec() { + return codecFactory; + } + + @Override + public String toString() { + checkNotNull(codecFactory, "Inner CodecFactory is null, please use non default constructor"); + return codecFactory.toString(); + } +} diff --git a/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/io/package-info.java b/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/io/package-info.java new file mode 100644 index 0000000000000..8d6938347a442 --- /dev/null +++ b/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/io/package-info.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** Defines transforms for reading and writing Avro storage format. */ +@DefaultAnnotation(NonNull.class) +@Experimental(Kind.EXTENSION) +package org.apache.beam.sdk.extensions.avro.io; + +import edu.umd.cs.findbugs.annotations.DefaultAnnotation; +import org.apache.beam.sdk.annotations.Experimental; +import org.apache.beam.sdk.annotations.Experimental.Kind; +import org.checkerframework.checker.nullness.qual.NonNull; diff --git a/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/schemas/AvroRecordSchema.java b/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/schemas/AvroRecordSchema.java new file mode 100644 index 0000000000000..12b81be54c13f --- /dev/null +++ b/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/schemas/AvroRecordSchema.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.extensions.avro.schemas; + +import static org.apache.beam.sdk.extensions.avro.schemas.utils.AvroUtils.toBeamSchema; + +import java.util.List; +import org.apache.avro.reflect.ReflectData; +import org.apache.beam.sdk.extensions.avro.schemas.utils.AvroUtils; +import org.apache.beam.sdk.schemas.FieldValueGetter; +import org.apache.beam.sdk.schemas.FieldValueTypeInformation; +import org.apache.beam.sdk.schemas.GetterBasedSchemaProvider; +import org.apache.beam.sdk.schemas.Schema; +import org.apache.beam.sdk.schemas.SchemaProvider; +import org.apache.beam.sdk.schemas.SchemaUserTypeCreator; +import org.apache.beam.sdk.values.TypeDescriptor; + +/** + * A {@link SchemaProvider} for AVRO generated SpecificRecords and POJOs. + * + *

This provider infers a schema from generated SpecificRecord objects, and creates schemas and + * rows that bind to the appropriate fields. This provider also infers schemas from Java POJO + * objects, creating a schema that matches that inferred by the AVRO libraries. + */ +@SuppressWarnings({ + "rawtypes" // TODO(https://github.com/apache/beam/issues/20447) +}) +public class AvroRecordSchema extends GetterBasedSchemaProvider { + @Override + public Schema schemaFor(TypeDescriptor typeDescriptor) { + return toBeamSchema(ReflectData.get().getSchema(typeDescriptor.getRawType())); + } + + @Override + public List fieldValueGetters(Class targetClass, Schema schema) { + return AvroUtils.getGetters(targetClass, schema); + } + + @Override + public List fieldValueTypeInformations( + Class targetClass, Schema schema) { + return AvroUtils.getFieldTypes(targetClass, schema); + } + + @Override + public SchemaUserTypeCreator schemaTypeCreator(Class targetClass, Schema schema) { + return AvroUtils.getCreator(targetClass, schema); + } +} diff --git a/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/schemas/io/payloads/AvroPayloadSerializerProvider.java b/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/schemas/io/payloads/AvroPayloadSerializerProvider.java new file mode 100644 index 0000000000000..7245d4a75e0d2 --- /dev/null +++ b/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/schemas/io/payloads/AvroPayloadSerializerProvider.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.extensions.avro.schemas.io.payloads; + +import com.google.auto.service.AutoService; +import java.util.Map; +import org.apache.beam.sdk.annotations.Experimental; +import org.apache.beam.sdk.annotations.Experimental.Kind; +import org.apache.beam.sdk.annotations.Internal; +import org.apache.beam.sdk.extensions.avro.schemas.utils.AvroUtils; +import org.apache.beam.sdk.schemas.Schema; +import org.apache.beam.sdk.schemas.io.payloads.PayloadSerializer; +import org.apache.beam.sdk.schemas.io.payloads.PayloadSerializerProvider; + +@Internal +@Experimental(Kind.SCHEMAS) +@AutoService(PayloadSerializerProvider.class) +public class AvroPayloadSerializerProvider implements PayloadSerializerProvider { + @Override + public String identifier() { + return "avro"; + } + + @Override + public PayloadSerializer getSerializer(Schema schema, Map tableParams) { + return PayloadSerializer.of( + AvroUtils.getRowToAvroBytesFunction(schema), AvroUtils.getAvroBytesToRowFunction(schema)); + } +} diff --git a/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/schemas/io/payloads/package-info.java b/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/schemas/io/payloads/package-info.java new file mode 100644 index 0000000000000..01d48f89ec72f --- /dev/null +++ b/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/schemas/io/payloads/package-info.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** Provides abstractions for schema-aware AvroIO. */ +@DefaultAnnotation(NonNull.class) +@Experimental(Kind.EXTENSION) +package org.apache.beam.sdk.extensions.avro.schemas.io.payloads; + +import edu.umd.cs.findbugs.annotations.DefaultAnnotation; +import org.apache.beam.sdk.annotations.Experimental; +import org.apache.beam.sdk.annotations.Experimental.Kind; +import org.checkerframework.checker.nullness.qual.NonNull; diff --git a/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/schemas/package-info.java b/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/schemas/package-info.java new file mode 100644 index 0000000000000..6428c686400e9 --- /dev/null +++ b/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/schemas/package-info.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Defines {@link org.apache.beam.sdk.schemas.Schema} and other classes for representing schema'd + * data in a {@link org.apache.beam.sdk.Pipeline} using Apache Avro. + */ +@DefaultAnnotation(NonNull.class) +@Experimental(Kind.SCHEMAS) +package org.apache.beam.sdk.extensions.avro.schemas; + +import edu.umd.cs.findbugs.annotations.DefaultAnnotation; +import org.apache.beam.sdk.annotations.Experimental; +import org.apache.beam.sdk.annotations.Experimental.Kind; +import org.checkerframework.checker.nullness.qual.NonNull; diff --git a/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/schemas/utils/AvroByteBuddyUtils.java b/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/schemas/utils/AvroByteBuddyUtils.java new file mode 100644 index 0000000000000..a7ff6a581e68f --- /dev/null +++ b/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/schemas/utils/AvroByteBuddyUtils.java @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.extensions.avro.schemas.utils; + +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Type; +import java.util.Map; +import net.bytebuddy.ByteBuddy; +import net.bytebuddy.asm.AsmVisitorWrapper; +import net.bytebuddy.description.type.TypeDescription.ForLoadedType; +import net.bytebuddy.dynamic.DynamicType; +import net.bytebuddy.dynamic.loading.ClassLoadingStrategy; +import net.bytebuddy.implementation.MethodCall; +import net.bytebuddy.implementation.bytecode.StackManipulation; +import net.bytebuddy.implementation.bytecode.assign.TypeCasting; +import net.bytebuddy.implementation.bytecode.collection.ArrayAccess; +import net.bytebuddy.implementation.bytecode.constant.IntegerConstant; +import net.bytebuddy.implementation.bytecode.member.MethodVariableAccess; +import net.bytebuddy.jar.asm.ClassWriter; +import net.bytebuddy.matcher.ElementMatchers; +import org.apache.avro.specific.SpecificRecord; +import org.apache.beam.sdk.annotations.Experimental; +import org.apache.beam.sdk.annotations.Experimental.Kind; +import org.apache.beam.sdk.schemas.Schema; +import org.apache.beam.sdk.schemas.SchemaUserTypeCreator; +import org.apache.beam.sdk.schemas.utils.ByteBuddyUtils.InjectPackageStrategy; +import org.apache.beam.sdk.schemas.utils.ByteBuddyUtils.TypeConversion; +import org.apache.beam.sdk.schemas.utils.ByteBuddyUtils.TypeConversionsFactory; +import org.apache.beam.sdk.schemas.utils.ReflectUtils.ClassWithSchema; +import org.apache.beam.sdk.util.common.ReflectHelpers; +import org.apache.beam.sdk.values.TypeDescriptor; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Maps; + +@Experimental(Kind.SCHEMAS) +@SuppressWarnings({ + "nullness", // TODO(https://github.com/apache/beam/issues/20497) + "rawtypes" +}) +class AvroByteBuddyUtils { + private static final ByteBuddy BYTE_BUDDY = new ByteBuddy(); + + // Cache the generated constructors. + private static final Map CACHED_CREATORS = + Maps.newConcurrentMap(); + + static SchemaUserTypeCreator getCreator( + Class clazz, Schema schema) { + return CACHED_CREATORS.computeIfAbsent( + ClassWithSchema.create(clazz, schema), c -> createCreator(clazz, schema)); + } + + private static SchemaUserTypeCreator createCreator(Class clazz, Schema schema) { + Constructor baseConstructor = null; + Constructor[] constructors = clazz.getDeclaredConstructors(); + for (Constructor constructor : constructors) { + // TODO: This assumes that Avro only generates one constructor with this many fields. + if (constructor.getParameterCount() == schema.getFieldCount()) { + baseConstructor = constructor; + } + } + if (baseConstructor == null) { + throw new RuntimeException("No matching constructor found for class " + clazz); + } + + // Generate a method call to create and invoke the SpecificRecord's constructor. . + MethodCall construct = MethodCall.construct(baseConstructor); + for (int i = 0; i < baseConstructor.getParameterTypes().length; ++i) { + Class baseType = baseConstructor.getParameterTypes()[i]; + construct = construct.with(readAndConvertParameter(baseType, i), baseType); + } + + try { + DynamicType.Builder builder = + BYTE_BUDDY + .with(new InjectPackageStrategy(clazz)) + .subclass(SchemaUserTypeCreator.class) + .method(ElementMatchers.named("create")) + .intercept(construct); + + return builder + .visit(new AsmVisitorWrapper.ForDeclaredMethods().writerFlags(ClassWriter.COMPUTE_FRAMES)) + .make() + .load( + ReflectHelpers.findClassLoader(clazz.getClassLoader()), + ClassLoadingStrategy.Default.INJECTION) + .getLoaded() + .getDeclaredConstructor() + .newInstance(); + } catch (InstantiationException + | IllegalAccessException + | NoSuchMethodException + | InvocationTargetException e) { + throw new RuntimeException( + "Unable to generate a getter for class " + clazz + " with schema " + schema); + } + } + + private static StackManipulation readAndConvertParameter( + Class constructorParameterType, int index) { + TypeConversionsFactory typeConversionsFactory = new AvroUtils.AvroTypeConversionFactory(); + + // The types in the AVRO-generated constructor might be the types returned by Beam's Row class, + // so we have to convert the types used by Beam's Row class. + // We know that AVRO generates constructor parameters in the same order as fields + // in the schema, so we can just add the parameters sequentially. + TypeConversion convertType = typeConversionsFactory.createTypeConversion(true); + + // Map the AVRO-generated type to the one Beam will use. + ForLoadedType convertedType = + new ForLoadedType((Class) convertType.convert(TypeDescriptor.of(constructorParameterType))); + + // This will run inside the generated creator. Read the parameter and convert it to the + // type required by the SpecificRecord constructor. + StackManipulation readParameter = + new StackManipulation.Compound( + MethodVariableAccess.REFERENCE.loadFrom(1), + IntegerConstant.forValue(index), + ArrayAccess.REFERENCE.load(), + TypeCasting.to(convertedType)); + + // Convert to the parameter accepted by the SpecificRecord constructor. + return typeConversionsFactory + .createSetterConversions(readParameter) + .convert(TypeDescriptor.of(constructorParameterType)); + } +} diff --git a/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/schemas/utils/AvroUtils.java b/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/schemas/utils/AvroUtils.java new file mode 100644 index 0000000000000..ef014af530356 --- /dev/null +++ b/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/schemas/utils/AvroUtils.java @@ -0,0 +1,1382 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.extensions.avro.schemas.utils; + +import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument; +import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkNotNull; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.lang.reflect.Method; +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import net.bytebuddy.description.type.TypeDescription.ForLoadedType; +import net.bytebuddy.implementation.bytecode.Duplication; +import net.bytebuddy.implementation.bytecode.StackManipulation; +import net.bytebuddy.implementation.bytecode.StackManipulation.Compound; +import net.bytebuddy.implementation.bytecode.TypeCreation; +import net.bytebuddy.implementation.bytecode.assign.TypeCasting; +import net.bytebuddy.implementation.bytecode.member.MethodInvocation; +import net.bytebuddy.matcher.ElementMatchers; +import org.apache.avro.AvroRuntimeException; +import org.apache.avro.Conversions; +import org.apache.avro.LogicalType; +import org.apache.avro.LogicalTypes; +import org.apache.avro.Schema.Type; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericFixed; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.generic.GenericRecordBuilder; +import org.apache.avro.reflect.AvroIgnore; +import org.apache.avro.reflect.AvroName; +import org.apache.avro.reflect.ReflectData; +import org.apache.avro.specific.SpecificData; +import org.apache.avro.specific.SpecificRecord; +import org.apache.avro.util.Utf8; +import org.apache.beam.sdk.annotations.Experimental; +import org.apache.beam.sdk.annotations.Experimental.Kind; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; +import org.apache.beam.sdk.extensions.avro.schemas.AvroRecordSchema; +import org.apache.beam.sdk.schemas.FieldValueGetter; +import org.apache.beam.sdk.schemas.FieldValueTypeInformation; +import org.apache.beam.sdk.schemas.Schema; +import org.apache.beam.sdk.schemas.Schema.Field; +import org.apache.beam.sdk.schemas.Schema.FieldType; +import org.apache.beam.sdk.schemas.Schema.TypeName; +import org.apache.beam.sdk.schemas.SchemaCoder; +import org.apache.beam.sdk.schemas.SchemaUserTypeCreator; +import org.apache.beam.sdk.schemas.logicaltypes.EnumerationType; +import org.apache.beam.sdk.schemas.logicaltypes.FixedBytes; +import org.apache.beam.sdk.schemas.logicaltypes.FixedString; +import org.apache.beam.sdk.schemas.logicaltypes.OneOfType; +import org.apache.beam.sdk.schemas.logicaltypes.SqlTypes; +import org.apache.beam.sdk.schemas.logicaltypes.VariableBytes; +import org.apache.beam.sdk.schemas.logicaltypes.VariableString; +import org.apache.beam.sdk.schemas.utils.ByteBuddyUtils.ConvertType; +import org.apache.beam.sdk.schemas.utils.ByteBuddyUtils.ConvertValueForGetter; +import org.apache.beam.sdk.schemas.utils.ByteBuddyUtils.ConvertValueForSetter; +import org.apache.beam.sdk.schemas.utils.ByteBuddyUtils.TypeConversion; +import org.apache.beam.sdk.schemas.utils.ByteBuddyUtils.TypeConversionsFactory; +import org.apache.beam.sdk.schemas.utils.FieldValueTypeSupplier; +import org.apache.beam.sdk.schemas.utils.JavaBeanUtils; +import org.apache.beam.sdk.schemas.utils.POJOUtils; +import org.apache.beam.sdk.schemas.utils.ReflectUtils; +import org.apache.beam.sdk.schemas.utils.StaticSchemaInference; +import org.apache.beam.sdk.transforms.SerializableFunction; +import org.apache.beam.sdk.transforms.SimpleFunction; +import org.apache.beam.sdk.values.Row; +import org.apache.beam.sdk.values.TypeDescriptor; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.CaseFormat; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Strings; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Maps; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.joda.time.Days; +import org.joda.time.Duration; +import org.joda.time.Instant; +import org.joda.time.ReadableInstant; + +/** + * Utils to convert AVRO records to Beam rows. Imposes a mapping between common avro types and Beam + * portable schemas (https://s.apache.org/beam-schemas): + * + *

+ *   Avro                Beam Field Type
+ *   INT         <-----> INT32
+ *   LONG        <-----> INT64
+ *   FLOAT       <-----> FLOAT
+ *   DOUBLE      <-----> DOUBLE
+ *   BOOLEAN     <-----> BOOLEAN
+ *   STRING      <-----> STRING
+ *   BYTES       <-----> BYTES
+ *               <------ LogicalType(urn="beam:logical_type:var_bytes:v1")
+ *   FIXED       <-----> LogicalType(urn="beam:logical_type:fixed_bytes:v1")
+ *   ARRAY       <-----> ARRAY
+ *   ENUM        <-----> LogicalType(EnumerationType)
+ *   MAP         <-----> MAP
+ *   RECORD      <-----> ROW
+ *   UNION       <-----> LogicalType(OneOfType)
+ *   LogicalTypes.Date              <-----> LogicalType(DATE)
+ *                                  <------ LogicalType(urn="beam:logical_type:date:v1")
+ *   LogicalTypes.TimestampMillis   <-----> DATETIME
+ *   LogicalTypes.Decimal           <-----> DECIMAL
+ * 
+ * + * For SQL CHAR/VARCHAR types, an Avro schema + * + *
+ *   LogicalType({"type":"string","logicalType":"char","maxLength":MAX_LENGTH}) or
+ *   LogicalType({"type":"string","logicalType":"varchar","maxLength":MAX_LENGTH})
+ * 
+ * + * is used. + */ +@Experimental(Kind.SCHEMAS) +@SuppressWarnings({ + "nullness", // TODO(https://github.com/apache/beam/issues/20497) + "rawtypes" +}) +public class AvroUtils { + static { + // This works around a bug in the Avro library (AVRO-1891) around SpecificRecord's handling + // of DateTime types. + SpecificData.get().addLogicalTypeConversion(new AvroCoder.JodaTimestampConversion()); + GenericData.get().addLogicalTypeConversion(new AvroCoder.JodaTimestampConversion()); + } + + // Unwrap an AVRO schema into the base type an whether it is nullable. + static class TypeWithNullability { + public final org.apache.avro.Schema type; + public final boolean nullable; + + TypeWithNullability(org.apache.avro.Schema avroSchema) { + if (avroSchema.getType() == Type.UNION) { + List types = avroSchema.getTypes(); + + // optional fields in AVRO have form of: + // {"name": "foo", "type": ["null", "something"]} + + // don't need recursion because nested unions aren't supported in AVRO + List nonNullTypes = + types.stream().filter(x -> x.getType() != Type.NULL).collect(Collectors.toList()); + + if (nonNullTypes.size() == types.size() || nonNullTypes.isEmpty()) { + // union without `null` or all 'null' union, keep as is. + type = avroSchema; + nullable = false; + } else if (nonNullTypes.size() > 1) { + type = org.apache.avro.Schema.createUnion(nonNullTypes); + nullable = true; + } else { + // One non-null type. + type = nonNullTypes.get(0); + nullable = true; + } + } else { + type = avroSchema; + nullable = false; + } + } + } + + /** Wrapper for fixed byte fields. */ + public static class FixedBytesField { + private final int size; + + private FixedBytesField(int size) { + this.size = size; + } + + /** Create a {@link FixedBytesField} with the specified size. */ + public static FixedBytesField withSize(int size) { + return new FixedBytesField(size); + } + + /** Create a {@link FixedBytesField} from a Beam {@link FieldType}. */ + public static @Nullable FixedBytesField fromBeamFieldType(FieldType fieldType) { + if (fieldType.getTypeName().isLogicalType() + && fieldType.getLogicalType().getIdentifier().equals(FixedBytes.IDENTIFIER)) { + int length = fieldType.getLogicalType(FixedBytes.class).getLength(); + return new FixedBytesField(length); + } else { + return null; + } + } + + /** Create a {@link FixedBytesField} from an AVRO type. */ + public static @Nullable FixedBytesField fromAvroType(org.apache.avro.Schema type) { + if (type.getType().equals(Type.FIXED)) { + return new FixedBytesField(type.getFixedSize()); + } else { + return null; + } + } + + /** Get the size. */ + public int getSize() { + return size; + } + + /** Convert to a Beam type. */ + public FieldType toBeamType() { + return FieldType.logicalType(FixedBytes.of(size)); + } + + /** Convert to an AVRO type. */ + public org.apache.avro.Schema toAvroType(String name, String namespace) { + return org.apache.avro.Schema.createFixed(name, null, namespace, size); + } + } + + public static class AvroConvertType extends ConvertType { + public AvroConvertType(boolean returnRawType) { + super(returnRawType); + } + + @Override + protected java.lang.reflect.Type convertDefault(TypeDescriptor type) { + if (type.isSubtypeOf(TypeDescriptor.of(GenericFixed.class))) { + return byte[].class; + } else { + return super.convertDefault(type); + } + } + } + + public static class AvroConvertValueForGetter extends ConvertValueForGetter { + AvroConvertValueForGetter(StackManipulation readValue) { + super(readValue); + } + + @Override + protected TypeConversionsFactory getFactory() { + return new AvroTypeConversionFactory(); + } + + @Override + protected StackManipulation convertDefault(TypeDescriptor type) { + if (type.isSubtypeOf(TypeDescriptor.of(GenericFixed.class))) { + // Generate the following code: + // return value.bytes(); + return new Compound( + readValue, + MethodInvocation.invoke( + new ForLoadedType(GenericFixed.class) + .getDeclaredMethods() + .filter( + ElementMatchers.named("bytes") + .and(ElementMatchers.returns(new ForLoadedType(byte[].class)))) + .getOnly())); + } + return super.convertDefault(type); + } + } + + public static class AvroConvertValueForSetter extends ConvertValueForSetter { + AvroConvertValueForSetter(StackManipulation readValue) { + super(readValue); + } + + @Override + protected TypeConversionsFactory getFactory() { + return new AvroTypeConversionFactory(); + } + + @Override + protected StackManipulation convertDefault(TypeDescriptor type) { + final ForLoadedType byteArrayType = new ForLoadedType(byte[].class); + if (type.isSubtypeOf(TypeDescriptor.of(GenericFixed.class))) { + // Generate the following code: + // return new T((byte[]) value); + ForLoadedType loadedType = new ForLoadedType(type.getRawType()); + return new Compound( + TypeCreation.of(loadedType), + Duplication.SINGLE, + // Load the parameter and cast it to a byte[]. + readValue, + TypeCasting.to(byteArrayType), + // Create a new instance that wraps this byte[]. + MethodInvocation.invoke( + loadedType + .getDeclaredMethods() + .filter( + ElementMatchers.isConstructor() + .and(ElementMatchers.takesArguments(byteArrayType))) + .getOnly())); + } + return super.convertDefault(type); + } + } + + static class AvroTypeConversionFactory implements TypeConversionsFactory { + + @Override + public TypeConversion createTypeConversion(boolean returnRawTypes) { + return new AvroConvertType(returnRawTypes); + } + + @Override + public TypeConversion createGetterConversions(StackManipulation readValue) { + return new AvroConvertValueForGetter(readValue); + } + + @Override + public TypeConversion createSetterConversions(StackManipulation readValue) { + return new AvroConvertValueForSetter(readValue); + } + } + + /** Get Beam Field from avro Field. */ + public static Field toBeamField(org.apache.avro.Schema.Field field) { + TypeWithNullability nullableType = new TypeWithNullability(field.schema()); + FieldType beamFieldType = toFieldType(nullableType); + return Field.of(field.name(), beamFieldType); + } + + /** Get Avro Field from Beam Field. */ + public static org.apache.avro.Schema.Field toAvroField(Field field, String namespace) { + org.apache.avro.Schema fieldSchema = + getFieldSchema(field.getType(), field.getName(), namespace); + return new org.apache.avro.Schema.Field( + field.getName(), fieldSchema, field.getDescription(), (Object) null); + } + + private AvroUtils() {} + + /** + * Converts AVRO schema to Beam row schema. + * + * @param schema schema of type RECORD + */ + public static Schema toBeamSchema(org.apache.avro.Schema schema) { + Schema.Builder builder = Schema.builder(); + + for (org.apache.avro.Schema.Field field : schema.getFields()) { + Field beamField = toBeamField(field); + if (field.doc() != null) { + beamField = beamField.withDescription(field.doc()); + } + builder.addField(beamField); + } + + return builder.build(); + } + + /** Converts a Beam Schema into an AVRO schema. */ + public static org.apache.avro.Schema toAvroSchema( + Schema beamSchema, @Nullable String name, @Nullable String namespace) { + final String schemaName = Strings.isNullOrEmpty(name) ? "topLevelRecord" : name; + final String schemaNamespace = namespace == null ? "" : namespace; + String childNamespace = + !"".equals(schemaNamespace) ? schemaNamespace + "." + schemaName : schemaName; + List fields = Lists.newArrayList(); + for (Field field : beamSchema.getFields()) { + org.apache.avro.Schema.Field recordField = toAvroField(field, childNamespace); + fields.add(recordField); + } + return org.apache.avro.Schema.createRecord(schemaName, null, schemaNamespace, false, fields); + } + + public static org.apache.avro.Schema toAvroSchema(Schema beamSchema) { + return toAvroSchema(beamSchema, null, null); + } + + /** + * Strict conversion from AVRO to Beam, strict because it doesn't do widening or narrowing during + * conversion. If Schema is not provided, one is inferred from the AVRO schema. + */ + public static Row toBeamRowStrict(GenericRecord record, @Nullable Schema schema) { + if (schema == null) { + schema = toBeamSchema(record.getSchema()); + } + + Row.Builder builder = Row.withSchema(schema); + org.apache.avro.Schema avroSchema = record.getSchema(); + + for (Field field : schema.getFields()) { + Object value = record.get(field.getName()); + org.apache.avro.Schema fieldAvroSchema = avroSchema.getField(field.getName()).schema(); + builder.addValue(convertAvroFieldStrict(value, fieldAvroSchema, field.getType())); + } + + return builder.build(); + } + + /** + * Convert from a Beam Row to an AVRO GenericRecord. The Avro Schema is inferred from the Beam + * schema on the row. + */ + public static GenericRecord toGenericRecord(Row row) { + return toGenericRecord(row, null); + } + + /** + * Convert from a Beam Row to an AVRO GenericRecord. If a Schema is not provided, one is inferred + * from the Beam schema on the row. + */ + public static GenericRecord toGenericRecord( + Row row, org.apache.avro.@Nullable Schema avroSchema) { + Schema beamSchema = row.getSchema(); + // Use the provided AVRO schema if present, otherwise infer an AVRO schema from the row + // schema. + if (avroSchema != null && avroSchema.getFields().size() != beamSchema.getFieldCount()) { + throw new IllegalArgumentException( + "AVRO schema doesn't match row schema. Row schema " + + beamSchema + + ". AVRO schema + " + + avroSchema); + } + if (avroSchema == null) { + avroSchema = toAvroSchema(beamSchema); + } + + GenericRecordBuilder builder = new GenericRecordBuilder(avroSchema); + for (int i = 0; i < beamSchema.getFieldCount(); ++i) { + Field field = beamSchema.getField(i); + builder.set( + field.getName(), + genericFromBeamField( + field.getType(), avroSchema.getField(field.getName()).schema(), row.getValue(i))); + } + return builder.build(); + } + + @SuppressWarnings("unchecked") + public static SerializableFunction getToRowFunction( + Class clazz, org.apache.avro.@Nullable Schema schema) { + if (GenericRecord.class.equals(clazz)) { + Schema beamSchema = toBeamSchema(schema); + return (SerializableFunction) getGenericRecordToRowFunction(beamSchema); + } else { + return new AvroRecordSchema().toRowFunction(TypeDescriptor.of(clazz)); + } + } + + @SuppressWarnings("unchecked") + public static SerializableFunction getFromRowFunction(Class clazz) { + return GenericRecord.class.equals(clazz) + ? (SerializableFunction) getRowToGenericRecordFunction(null) + : new AvroRecordSchema().fromRowFunction(TypeDescriptor.of(clazz)); + } + + public static @Nullable Schema getSchema( + Class clazz, org.apache.avro.@Nullable Schema schema) { + if (schema != null) { + return schema.getType().equals(Type.RECORD) ? toBeamSchema(schema) : null; + } + if (GenericRecord.class.equals(clazz)) { + throw new IllegalArgumentException("No schema provided for getSchema(GenericRecord)"); + } + return new AvroRecordSchema().schemaFor(TypeDescriptor.of(clazz)); + } + + /** Returns a function mapping encoded AVRO {@link GenericRecord}s to Beam {@link Row}s. */ + public static SimpleFunction getAvroBytesToRowFunction(Schema beamSchema) { + return new AvroBytesToRowFn(beamSchema); + } + + private static class AvroBytesToRowFn extends SimpleFunction { + private final AvroCoder coder; + private final Schema beamSchema; + + AvroBytesToRowFn(Schema beamSchema) { + org.apache.avro.Schema avroSchema = toAvroSchema(beamSchema); + coder = AvroCoder.of(avroSchema); + this.beamSchema = beamSchema; + } + + @Override + public Row apply(byte[] bytes) { + try { + ByteArrayInputStream inputStream = new ByteArrayInputStream(bytes); + GenericRecord record = coder.decode(inputStream); + return AvroUtils.toBeamRowStrict(record, beamSchema); + } catch (Exception e) { + throw new AvroRuntimeException( + "Could not decode avro record from given bytes " + + new String(bytes, StandardCharsets.UTF_8), + e); + } + } + } + + /** Returns a function mapping Beam {@link Row}s to encoded AVRO {@link GenericRecord}s. */ + public static SimpleFunction getRowToAvroBytesFunction(Schema beamSchema) { + return new RowToAvroBytesFn(beamSchema); + } + + private static class RowToAvroBytesFn extends SimpleFunction { + private final transient org.apache.avro.Schema avroSchema; + private final AvroCoder coder; + + RowToAvroBytesFn(Schema beamSchema) { + avroSchema = toAvroSchema(beamSchema); + coder = AvroCoder.of(avroSchema); + } + + @Override + public byte[] apply(Row row) { + try { + GenericRecord record = toGenericRecord(row, avroSchema); + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + coder.encode(record, outputStream); + return outputStream.toByteArray(); + } catch (Exception e) { + throw new AvroRuntimeException( + String.format("Could not encode avro from given row: %s", row), e); + } + } + } + + /** + * Returns a function mapping AVRO {@link GenericRecord}s to Beam {@link Row}s for use in {@link + * org.apache.beam.sdk.values.PCollection#setSchema}. + */ + public static SerializableFunction getGenericRecordToRowFunction( + @Nullable Schema schema) { + return new GenericRecordToRowFn(schema); + } + + private static class GenericRecordToRowFn implements SerializableFunction { + private final Schema schema; + + GenericRecordToRowFn(Schema schema) { + this.schema = schema; + } + + @Override + public Row apply(GenericRecord input) { + return toBeamRowStrict(input, schema); + } + + @Override + public boolean equals(@Nullable Object other) { + if (this == other) { + return true; + } + if (other == null || getClass() != other.getClass()) { + return false; + } + GenericRecordToRowFn that = (GenericRecordToRowFn) other; + return Objects.equals(this.schema, that.schema); + } + + @Override + public int hashCode() { + return Objects.hash(schema); + } + } + + /** + * Returns a function mapping Beam {@link Row}s to AVRO {@link GenericRecord}s for use in {@link + * org.apache.beam.sdk.values.PCollection#setSchema}. + */ + public static SerializableFunction getRowToGenericRecordFunction( + org.apache.avro.@Nullable Schema avroSchema) { + return new RowToGenericRecordFn(avroSchema); + } + + private static class RowToGenericRecordFn implements SerializableFunction { + private transient org.apache.avro.Schema avroSchema; + + RowToGenericRecordFn(org.apache.avro.@Nullable Schema avroSchema) { + this.avroSchema = avroSchema; + } + + @Override + public GenericRecord apply(Row input) { + return toGenericRecord(input, avroSchema); + } + + @Override + public boolean equals(@Nullable Object other) { + if (this == other) { + return true; + } + if (other == null || getClass() != other.getClass()) { + return false; + } + RowToGenericRecordFn that = (RowToGenericRecordFn) other; + return Objects.equals(this.avroSchema, that.avroSchema); + } + + @Override + public int hashCode() { + return Objects.hash(avroSchema); + } + + private void writeObject(ObjectOutputStream out) throws IOException { + final String avroSchemaAsString = (avroSchema == null) ? null : avroSchema.toString(); + out.writeObject(avroSchemaAsString); + } + + private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException { + final String avroSchemaAsString = (String) in.readObject(); + avroSchema = + (avroSchemaAsString == null) + ? null + : new org.apache.avro.Schema.Parser().parse(avroSchemaAsString); + } + } + + /** + * Returns an {@code SchemaCoder} instance for the provided element type. + * + * @param the element type + */ + public static SchemaCoder schemaCoder(TypeDescriptor type) { + @SuppressWarnings("unchecked") + Class clazz = (Class) type.getRawType(); + org.apache.avro.Schema avroSchema = new ReflectData(clazz.getClassLoader()).getSchema(clazz); + Schema beamSchema = toBeamSchema(avroSchema); + return SchemaCoder.of( + beamSchema, type, getToRowFunction(clazz, avroSchema), getFromRowFunction(clazz)); + } + + /** + * Returns an {@code SchemaCoder} instance for the provided element class. + * + * @param the element type + */ + public static SchemaCoder schemaCoder(Class clazz) { + return schemaCoder(TypeDescriptor.of(clazz)); + } + + /** + * Returns an {@code SchemaCoder} instance for the Avro schema. The implicit type is + * GenericRecord. + */ + public static SchemaCoder schemaCoder(org.apache.avro.Schema schema) { + Schema beamSchema = toBeamSchema(schema); + return SchemaCoder.of( + beamSchema, + TypeDescriptor.of(GenericRecord.class), + getGenericRecordToRowFunction(beamSchema), + getRowToGenericRecordFunction(schema)); + } + + /** + * Returns an {@code SchemaCoder} instance for the provided element type using the provided Avro + * schema. + * + *

If the type argument is GenericRecord, the schema may be arbitrary. Otherwise, the schema + * must correspond to the type provided. + * + * @param the element type + */ + public static SchemaCoder schemaCoder(Class clazz, org.apache.avro.Schema schema) { + return SchemaCoder.of( + getSchema(clazz, schema), + TypeDescriptor.of(clazz), + getToRowFunction(clazz, schema), + getFromRowFunction(clazz)); + } + + /** + * Returns an {@code SchemaCoder} instance based on the provided AvroCoder for the element type. + * + * @param the element type + */ + public static SchemaCoder schemaCoder(AvroCoder avroCoder) { + return schemaCoder(avroCoder.getType(), avroCoder.getSchema()); + } + + private static final class AvroSpecificRecordFieldValueTypeSupplier + implements FieldValueTypeSupplier { + @Override + public List get(Class clazz) { + throw new RuntimeException("Unexpected call."); + } + + @Override + public List get(Class clazz, Schema schema) { + Map mapping = getMapping(schema); + List methods = ReflectUtils.getMethods(clazz); + List types = Lists.newArrayList(); + for (int i = 0; i < methods.size(); ++i) { + Method method = methods.get(i); + if (ReflectUtils.isGetter(method)) { + FieldValueTypeInformation fieldValueTypeInformation = + FieldValueTypeInformation.forGetter(method, i); + String name = mapping.get(fieldValueTypeInformation.getName()); + if (name != null) { + types.add(fieldValueTypeInformation.withName(name)); + } + } + } + + // Return the list ordered by the schema fields. + return StaticSchemaInference.sortBySchema(types, schema); + } + + private Map getMapping(Schema schema) { + Map mapping = Maps.newHashMap(); + for (Field field : schema.getFields()) { + String fieldName = field.getName(); + String getter; + if (fieldName.contains("_")) { + if (Character.isLowerCase(fieldName.charAt(0))) { + // field_name -> fieldName + getter = CaseFormat.LOWER_UNDERSCORE.to(CaseFormat.LOWER_CAMEL, fieldName); + } else { + // FIELD_NAME -> fIELDNAME + // must remove underscore and then convert to match compiled Avro schema getter name + getter = CaseFormat.UPPER_CAMEL.to(CaseFormat.LOWER_CAMEL, fieldName.replace("_", "")); + } + } else if (Character.isUpperCase(fieldName.charAt(0))) { + // FieldName -> fieldName + getter = CaseFormat.UPPER_CAMEL.to(CaseFormat.LOWER_CAMEL, fieldName); + } else { + // If the field is in camel case already, then it's the identity mapping. + getter = fieldName; + } + mapping.put(getter, fieldName); + // The Avro compiler might add a $ at the end of a getter to disambiguate. + mapping.put(getter + "$", fieldName); + } + return mapping; + } + } + + private static final class AvroPojoFieldValueTypeSupplier implements FieldValueTypeSupplier { + @Override + public List get(Class clazz) { + List classFields = ReflectUtils.getFields(clazz); + Map types = Maps.newHashMap(); + for (int i = 0; i < classFields.size(); ++i) { + java.lang.reflect.Field f = classFields.get(i); + if (!f.isAnnotationPresent(AvroIgnore.class)) { + FieldValueTypeInformation typeInformation = FieldValueTypeInformation.forField(f, i); + AvroName avroname = f.getAnnotation(AvroName.class); + if (avroname != null) { + typeInformation = typeInformation.withName(avroname.value()); + } + types.put(typeInformation.getName(), typeInformation); + } + } + return Lists.newArrayList(types.values()); + } + } + + /** Get field types for an AVRO-generated SpecificRecord or a POJO. */ + public static List getFieldTypes(Class clazz, Schema schema) { + if (TypeDescriptor.of(clazz).isSubtypeOf(TypeDescriptor.of(SpecificRecord.class))) { + return JavaBeanUtils.getFieldTypes( + clazz, schema, new AvroSpecificRecordFieldValueTypeSupplier()); + } else { + return POJOUtils.getFieldTypes(clazz, schema, new AvroPojoFieldValueTypeSupplier()); + } + } + + /** Get generated getters for an AVRO-generated SpecificRecord or a POJO. */ + public static List getGetters(Class clazz, Schema schema) { + if (TypeDescriptor.of(clazz).isSubtypeOf(TypeDescriptor.of(SpecificRecord.class))) { + return JavaBeanUtils.getGetters( + clazz, + schema, + new AvroSpecificRecordFieldValueTypeSupplier(), + new AvroTypeConversionFactory()); + } else { + return POJOUtils.getGetters( + clazz, schema, new AvroPojoFieldValueTypeSupplier(), new AvroTypeConversionFactory()); + } + } + + /** Get an object creator for an AVRO-generated SpecificRecord. */ + public static SchemaUserTypeCreator getCreator(Class clazz, Schema schema) { + if (TypeDescriptor.of(clazz).isSubtypeOf(TypeDescriptor.of(SpecificRecord.class))) { + return AvroByteBuddyUtils.getCreator((Class) clazz, schema); + } else { + return POJOUtils.getSetFieldCreator( + clazz, schema, new AvroPojoFieldValueTypeSupplier(), new AvroTypeConversionFactory()); + } + } + + /** Converts AVRO schema to Beam field. */ + private static FieldType toFieldType(TypeWithNullability type) { + FieldType fieldType = null; + org.apache.avro.Schema avroSchema = type.type; + + LogicalType logicalType = LogicalTypes.fromSchema(avroSchema); + if (logicalType != null) { + if (logicalType instanceof LogicalTypes.Decimal) { + fieldType = FieldType.DECIMAL; + } else if (logicalType instanceof LogicalTypes.TimestampMillis) { + // TODO: There is a desire to move Beam schema DATETIME to a micros representation. When + // this is done, this logical type needs to be changed. + fieldType = FieldType.DATETIME; + } else if (logicalType instanceof LogicalTypes.Date) { + fieldType = FieldType.DATETIME; + } + } + + if (fieldType == null) { + switch (type.type.getType()) { + case RECORD: + fieldType = FieldType.row(toBeamSchema(avroSchema)); + break; + + case ENUM: + fieldType = FieldType.logicalType(EnumerationType.create(type.type.getEnumSymbols())); + break; + + case ARRAY: + FieldType elementType = toFieldType(new TypeWithNullability(avroSchema.getElementType())); + fieldType = FieldType.array(elementType); + break; + + case MAP: + fieldType = + FieldType.map( + FieldType.STRING, + toFieldType(new TypeWithNullability(avroSchema.getValueType()))); + break; + + case FIXED: + fieldType = FixedBytesField.fromAvroType(type.type).toBeamType(); + break; + + case STRING: + fieldType = FieldType.STRING; + break; + + case BYTES: + fieldType = FieldType.BYTES; + break; + + case INT: + fieldType = FieldType.INT32; + break; + + case LONG: + fieldType = FieldType.INT64; + break; + + case FLOAT: + fieldType = FieldType.FLOAT; + break; + + case DOUBLE: + fieldType = FieldType.DOUBLE; + break; + + case BOOLEAN: + fieldType = FieldType.BOOLEAN; + break; + + case UNION: + fieldType = + FieldType.logicalType( + OneOfType.create( + avroSchema.getTypes().stream() + .map(x -> Field.of(x.getName(), toFieldType(new TypeWithNullability(x)))) + .collect(Collectors.toList()))); + break; + case NULL: + throw new IllegalArgumentException("Can't convert 'null' to FieldType"); + + default: + throw new AssertionError("Unexpected AVRO Schema.Type: " + avroSchema.getType()); + } + } + fieldType = fieldType.withNullable(type.nullable); + return fieldType; + } + + private static org.apache.avro.Schema getFieldSchema( + FieldType fieldType, String fieldName, String namespace) { + org.apache.avro.Schema baseType; + switch (fieldType.getTypeName()) { + case BYTE: + case INT16: + case INT32: + baseType = org.apache.avro.Schema.create(Type.INT); + break; + + case INT64: + baseType = org.apache.avro.Schema.create(Type.LONG); + break; + + case DECIMAL: + baseType = + LogicalTypes.decimal(Integer.MAX_VALUE) + .addToSchema(org.apache.avro.Schema.create(Type.BYTES)); + break; + + case FLOAT: + baseType = org.apache.avro.Schema.create(Type.FLOAT); + break; + + case DOUBLE: + baseType = org.apache.avro.Schema.create(Type.DOUBLE); + break; + + case STRING: + baseType = org.apache.avro.Schema.create(Type.STRING); + break; + + case DATETIME: + // TODO: There is a desire to move Beam schema DATETIME to a micros representation. When + // this is done, this logical type needs to be changed. + baseType = + LogicalTypes.timestampMillis().addToSchema(org.apache.avro.Schema.create(Type.LONG)); + break; + + case BOOLEAN: + baseType = org.apache.avro.Schema.create(Type.BOOLEAN); + break; + + case BYTES: + baseType = org.apache.avro.Schema.create(Type.BYTES); + break; + + case LOGICAL_TYPE: + String identifier = fieldType.getLogicalType().getIdentifier(); + if (FixedBytes.IDENTIFIER.equals(identifier)) { + FixedBytesField fixedBytesField = + checkNotNull(FixedBytesField.fromBeamFieldType(fieldType)); + baseType = fixedBytesField.toAvroType("fixed", namespace + "." + fieldName); + } else if (VariableBytes.IDENTIFIER.equals(identifier)) { + // treat VARBINARY as bytes as that is what avro supports + baseType = org.apache.avro.Schema.create(Type.BYTES); + } else if (FixedString.IDENTIFIER.equals(identifier) + || "CHAR".equals(identifier) + || "NCHAR".equals(identifier)) { + baseType = + buildHiveLogicalTypeSchema("char", (int) fieldType.getLogicalType().getArgument()); + } else if (VariableString.IDENTIFIER.equals(identifier) + || "NVARCHAR".equals(identifier) + || "VARCHAR".equals(identifier) + || "LONGNVARCHAR".equals(identifier) + || "LONGVARCHAR".equals(identifier)) { + baseType = + buildHiveLogicalTypeSchema("varchar", (int) fieldType.getLogicalType().getArgument()); + } else if (EnumerationType.IDENTIFIER.equals(identifier)) { + EnumerationType enumerationType = fieldType.getLogicalType(EnumerationType.class); + baseType = + org.apache.avro.Schema.createEnum(fieldName, "", "", enumerationType.getValues()); + } else if (OneOfType.IDENTIFIER.equals(identifier)) { + OneOfType oneOfType = fieldType.getLogicalType(OneOfType.class); + baseType = + org.apache.avro.Schema.createUnion( + oneOfType.getOneOfSchema().getFields().stream() + .map(x -> getFieldSchema(x.getType(), x.getName(), namespace)) + .collect(Collectors.toList())); + } else if ("DATE".equals(identifier) || SqlTypes.DATE.getIdentifier().equals(identifier)) { + baseType = LogicalTypes.date().addToSchema(org.apache.avro.Schema.create(Type.INT)); + } else if ("TIME".equals(identifier)) { + baseType = LogicalTypes.timeMillis().addToSchema(org.apache.avro.Schema.create(Type.INT)); + } else { + throw new RuntimeException( + "Unhandled logical type " + fieldType.getLogicalType().getIdentifier()); + } + break; + + case ARRAY: + case ITERABLE: + baseType = + org.apache.avro.Schema.createArray( + getFieldSchema(fieldType.getCollectionElementType(), fieldName, namespace)); + break; + + case MAP: + if (fieldType.getMapKeyType().getTypeName().isStringType()) { + // Avro only supports string keys in maps. + baseType = + org.apache.avro.Schema.createMap( + getFieldSchema(fieldType.getMapValueType(), fieldName, namespace)); + } else { + throw new IllegalArgumentException("Avro only supports maps with string keys"); + } + break; + + case ROW: + baseType = toAvroSchema(fieldType.getRowSchema(), fieldName, namespace); + break; + + default: + throw new IllegalArgumentException("Unexpected type " + fieldType); + } + return fieldType.getNullable() ? ReflectData.makeNullable(baseType) : baseType; + } + + private static @Nullable Object genericFromBeamField( + FieldType fieldType, org.apache.avro.Schema avroSchema, @Nullable Object value) { + TypeWithNullability typeWithNullability = new TypeWithNullability(avroSchema); + if (!fieldType.getNullable().equals(typeWithNullability.nullable)) { + throw new IllegalArgumentException( + "FieldType " + + fieldType + + " and AVRO schema " + + avroSchema + + " don't have matching nullability"); + } + + if (value == null) { + return value; + } + + switch (fieldType.getTypeName()) { + case BYTE: + case INT16: + case INT32: + case INT64: + case FLOAT: + case DOUBLE: + case BOOLEAN: + return value; + + case STRING: + return new Utf8((String) value); + + case DECIMAL: + BigDecimal decimal = (BigDecimal) value; + LogicalType logicalType = typeWithNullability.type.getLogicalType(); + return new Conversions.DecimalConversion().toBytes(decimal, null, logicalType); + + case DATETIME: + if (typeWithNullability.type.getType() == Type.INT) { + ReadableInstant instant = (ReadableInstant) value; + return (int) Days.daysBetween(Instant.EPOCH, instant).getDays(); + } else if (typeWithNullability.type.getType() == Type.LONG) { + ReadableInstant instant = (ReadableInstant) value; + return (long) instant.getMillis(); + } else { + throw new IllegalArgumentException( + "Can't represent " + fieldType + " as " + typeWithNullability.type.getType()); + } + + case BYTES: + return ByteBuffer.wrap((byte[]) value); + + case LOGICAL_TYPE: + String identifier = fieldType.getLogicalType().getIdentifier(); + if (FixedBytes.IDENTIFIER.equals(identifier)) { + FixedBytesField fixedBytesField = + checkNotNull(FixedBytesField.fromBeamFieldType(fieldType)); + byte[] byteArray = (byte[]) value; + if (byteArray.length != fixedBytesField.getSize()) { + throw new IllegalArgumentException("Incorrectly sized byte array."); + } + return GenericData.get().createFixed(null, (byte[]) value, typeWithNullability.type); + } else if (VariableBytes.IDENTIFIER.equals(identifier)) { + return GenericData.get().createFixed(null, (byte[]) value, typeWithNullability.type); + } else if (FixedString.IDENTIFIER.equals(identifier) + || "CHAR".equals(identifier) + || "NCHAR".equals(identifier)) { + return new Utf8((String) value); + } else if (VariableString.IDENTIFIER.equals(identifier) + || "NVARCHAR".equals(identifier) + || "VARCHAR".equals(identifier) + || "LONGNVARCHAR".equals(identifier) + || "LONGVARCHAR".equals(identifier)) { + return new Utf8((String) value); + } else if (EnumerationType.IDENTIFIER.equals(identifier)) { + EnumerationType enumerationType = fieldType.getLogicalType(EnumerationType.class); + return GenericData.get() + .createEnum( + enumerationType.toString((EnumerationType.Value) value), + typeWithNullability.type); + } else if (OneOfType.IDENTIFIER.equals(identifier)) { + OneOfType oneOfType = fieldType.getLogicalType(OneOfType.class); + OneOfType.Value oneOfValue = (OneOfType.Value) value; + FieldType innerFieldType = oneOfType.getFieldType(oneOfValue); + if (typeWithNullability.nullable && oneOfValue.getValue() == null) { + return null; + } else { + return genericFromBeamField( + innerFieldType.withNullable(false), + typeWithNullability.type.getTypes().get(oneOfValue.getCaseType().getValue()), + oneOfValue.getValue()); + } + } else if ("DATE".equals(identifier)) { + // "Date" is backed by joda.time.Instant + return Days.daysBetween(Instant.EPOCH, (Instant) value).getDays(); + } else if (SqlTypes.DATE.getIdentifier().equals(identifier)) { + // portable SqlTypes.DATE is backed by java.time.LocalDate + return ((java.time.LocalDate) value).toEpochDay(); + } else if ("TIME".equals(identifier)) { + return (int) ((Instant) value).getMillis(); + } else { + throw new RuntimeException("Unhandled logical type " + identifier); + } + + case ARRAY: + case ITERABLE: + Iterable iterable = (Iterable) value; + List translatedArray = Lists.newArrayListWithExpectedSize(Iterables.size(iterable)); + + for (Object arrayElement : iterable) { + translatedArray.add( + genericFromBeamField( + fieldType.getCollectionElementType(), + typeWithNullability.type.getElementType(), + arrayElement)); + } + return translatedArray; + + case MAP: + Map map = Maps.newHashMap(); + Map valueMap = (Map) value; + for (Map.Entry entry : valueMap.entrySet()) { + Utf8 key = new Utf8((String) entry.getKey()); + map.put( + key, + genericFromBeamField( + fieldType.getMapValueType(), + typeWithNullability.type.getValueType(), + entry.getValue())); + } + return map; + + case ROW: + return toGenericRecord((Row) value, typeWithNullability.type); + + default: + throw new IllegalArgumentException("Unsupported type " + fieldType); + } + } + + /** + * Strict conversion from AVRO to Beam, strict because it doesn't do widening or narrowing during + * conversion. + * + * @param value {@link GenericRecord} or any nested value + * @param avroSchema schema for value + * @param fieldType target beam field type + * @return value converted for {@link Row} + */ + @SuppressWarnings("unchecked") + public static @Nullable Object convertAvroFieldStrict( + @Nullable Object value, + @Nonnull org.apache.avro.Schema avroSchema, + @Nonnull FieldType fieldType) { + if (value == null) { + return null; + } + + TypeWithNullability type = new TypeWithNullability(avroSchema); + LogicalType logicalType = LogicalTypes.fromSchema(type.type); + if (logicalType != null) { + if (logicalType instanceof LogicalTypes.Decimal) { + ByteBuffer byteBuffer = (ByteBuffer) value; + BigDecimal bigDecimal = + new Conversions.DecimalConversion() + .fromBytes(byteBuffer.duplicate(), type.type, logicalType); + return convertDecimal(bigDecimal, fieldType); + } else if (logicalType instanceof LogicalTypes.TimestampMillis) { + if (value instanceof ReadableInstant) { + return convertDateTimeStrict(((ReadableInstant) value).getMillis(), fieldType); + } else { + return convertDateTimeStrict((Long) value, fieldType); + } + } else if (logicalType instanceof LogicalTypes.Date) { + if (value instanceof ReadableInstant) { + int epochDays = Days.daysBetween(Instant.EPOCH, (ReadableInstant) value).getDays(); + return convertDateStrict(epochDays, fieldType); + } else if (value instanceof java.time.LocalDate) { + return convertDateStrict((int) ((java.time.LocalDate) value).toEpochDay(), fieldType); + } else { + return convertDateStrict((Integer) value, fieldType); + } + } + } + + switch (type.type.getType()) { + case FIXED: + return convertFixedStrict((GenericFixed) value, fieldType); + + case BYTES: + return convertBytesStrict((ByteBuffer) value, fieldType); + + case STRING: + return convertStringStrict((CharSequence) value, fieldType); + + case INT: + return convertIntStrict((Integer) value, fieldType); + + case LONG: + return convertLongStrict((Long) value, fieldType); + + case FLOAT: + return convertFloatStrict((Float) value, fieldType); + + case DOUBLE: + return convertDoubleStrict((Double) value, fieldType); + + case BOOLEAN: + return convertBooleanStrict((Boolean) value, fieldType); + + case RECORD: + return convertRecordStrict((GenericRecord) value, fieldType); + + case ENUM: + // enums are either Java enums, or GenericEnumSymbol, + // they don't share common interface, but override toString() + return convertEnumStrict(value, fieldType); + + case ARRAY: + return convertArrayStrict((List) value, type.type.getElementType(), fieldType); + + case MAP: + return convertMapStrict( + (Map) value, type.type.getValueType(), fieldType); + + case UNION: + return convertUnionStrict(value, type.type, fieldType); + + case NULL: + throw new IllegalArgumentException("Can't convert 'null' to non-nullable field"); + + default: + throw new AssertionError("Unexpected AVRO Schema.Type: " + type.type.getType()); + } + } + + private static Object convertRecordStrict(GenericRecord record, FieldType fieldType) { + checkTypeName(fieldType.getTypeName(), TypeName.ROW, "record"); + return toBeamRowStrict(record, fieldType.getRowSchema()); + } + + private static Object convertBytesStrict(ByteBuffer bb, FieldType fieldType) { + checkTypeName(fieldType.getTypeName(), TypeName.BYTES, "bytes"); + + byte[] bytes = new byte[bb.remaining()]; + bb.duplicate().get(bytes); + return bytes; + } + + private static Object convertFixedStrict(GenericFixed fixed, FieldType fieldType) { + checkTypeName(fieldType.getTypeName(), TypeName.LOGICAL_TYPE, "fixed"); + checkArgument(FixedBytes.IDENTIFIER.equals(fieldType.getLogicalType().getIdentifier())); + return fixed.bytes().clone(); // clone because GenericFixed is mutable + } + + private static Object convertStringStrict(CharSequence value, FieldType fieldType) { + checkTypeName(fieldType.getTypeName(), TypeName.STRING, "string"); + return value.toString(); + } + + private static Object convertIntStrict(Integer value, FieldType fieldType) { + checkTypeName(fieldType.getTypeName(), TypeName.INT32, "int"); + return value; + } + + private static Object convertLongStrict(Long value, FieldType fieldType) { + checkTypeName(fieldType.getTypeName(), TypeName.INT64, "long"); + return value; + } + + private static Object convertDecimal(BigDecimal value, FieldType fieldType) { + checkTypeName(fieldType.getTypeName(), TypeName.DECIMAL, "decimal"); + return value; + } + + private static Object convertDateStrict(Integer epochDays, FieldType fieldType) { + checkTypeName(fieldType.getTypeName(), TypeName.DATETIME, "date"); + return Instant.EPOCH.plus(Duration.standardDays(epochDays)); + } + + private static Object convertDateTimeStrict(Long value, FieldType fieldType) { + checkTypeName(fieldType.getTypeName(), TypeName.DATETIME, "dateTime"); + return new Instant(value); + } + + private static Object convertFloatStrict(Float value, FieldType fieldType) { + checkTypeName(fieldType.getTypeName(), TypeName.FLOAT, "float"); + return value; + } + + private static Object convertDoubleStrict(Double value, FieldType fieldType) { + checkTypeName(fieldType.getTypeName(), TypeName.DOUBLE, "double"); + return value; + } + + private static Object convertBooleanStrict(Boolean value, FieldType fieldType) { + checkTypeName(fieldType.getTypeName(), TypeName.BOOLEAN, "boolean"); + return value; + } + + private static Object convertEnumStrict(Object value, FieldType fieldType) { + checkTypeName(fieldType.getTypeName(), TypeName.LOGICAL_TYPE, "enum"); + checkArgument(fieldType.getLogicalType().getIdentifier().equals(EnumerationType.IDENTIFIER)); + EnumerationType enumerationType = fieldType.getLogicalType(EnumerationType.class); + return enumerationType.valueOf(value.toString()); + } + + private static Object convertUnionStrict( + Object value, org.apache.avro.Schema unionAvroSchema, FieldType fieldType) { + checkTypeName(fieldType.getTypeName(), TypeName.LOGICAL_TYPE, "oneOfType"); + checkArgument(fieldType.getLogicalType().getIdentifier().equals(OneOfType.IDENTIFIER)); + OneOfType oneOfType = fieldType.getLogicalType(OneOfType.class); + int fieldNumber = GenericData.get().resolveUnion(unionAvroSchema, value); + FieldType baseFieldType = oneOfType.getOneOfSchema().getField(fieldNumber).getType(); + Object convertedValue = + convertAvroFieldStrict(value, unionAvroSchema.getTypes().get(fieldNumber), baseFieldType); + return oneOfType.createValue(fieldNumber, convertedValue); + } + + private static Object convertArrayStrict( + List values, org.apache.avro.Schema elemAvroSchema, FieldType fieldType) { + checkTypeName(fieldType.getTypeName(), TypeName.ARRAY, "array"); + + List ret = new ArrayList<>(values.size()); + FieldType elemFieldType = fieldType.getCollectionElementType(); + + for (Object value : values) { + ret.add(convertAvroFieldStrict(value, elemAvroSchema, elemFieldType)); + } + + return ret; + } + + private static Object convertMapStrict( + Map values, + org.apache.avro.Schema valueAvroSchema, + FieldType fieldType) { + checkTypeName(fieldType.getTypeName(), TypeName.MAP, "map"); + checkNotNull(fieldType.getMapKeyType()); + checkNotNull(fieldType.getMapValueType()); + + if (!fieldType.getMapKeyType().equals(FieldType.STRING)) { + throw new IllegalArgumentException( + "Can't convert 'string' map keys to " + fieldType.getMapKeyType()); + } + + Map ret = new HashMap<>(); + + for (Map.Entry value : values.entrySet()) { + ret.put( + convertStringStrict(value.getKey(), fieldType.getMapKeyType()), + convertAvroFieldStrict(value.getValue(), valueAvroSchema, fieldType.getMapValueType())); + } + + return ret; + } + + private static void checkTypeName(TypeName got, TypeName expected, String label) { + checkArgument( + got.equals(expected), "Can't convert '%s' to %s, expected: %s", label, got, expected); + } + + /** + * Helper factory to build Avro Logical types schemas for SQL *CHAR types. This method represents + * the logical as Hive does. + */ + private static org.apache.avro.Schema buildHiveLogicalTypeSchema( + String hiveLogicalType, int size) { + String schemaJson = + String.format( + "{\"type\": \"string\", \"logicalType\": \"%s\", \"maxLength\": %s}", + hiveLogicalType, size); + return new org.apache.avro.Schema.Parser().parse(schemaJson); + } +} diff --git a/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/schemas/utils/package-info.java b/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/schemas/utils/package-info.java new file mode 100644 index 0000000000000..df84a556c28c5 --- /dev/null +++ b/sdks/java/extensions/avro/src/main/java/org/apache/beam/sdk/extensions/avro/schemas/utils/package-info.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** Defines utilities for deailing with schemas using Apache Avro. */ +@DefaultAnnotation(NonNull.class) +@Experimental(Kind.EXTENSION) +package org.apache.beam.sdk.extensions.avro.schemas.utils; + +import edu.umd.cs.findbugs.annotations.DefaultAnnotation; +import org.apache.beam.sdk.annotations.Experimental; +import org.apache.beam.sdk.annotations.Experimental.Kind; +import org.checkerframework.checker.nullness.qual.NonNull; diff --git a/sdks/java/extensions/avro/src/test/avro/org/apache/beam/sdk/extensions/avro/io/user.avsc b/sdks/java/extensions/avro/src/test/avro/org/apache/beam/sdk/extensions/avro/io/user.avsc new file mode 100644 index 0000000000000..134829746e496 --- /dev/null +++ b/sdks/java/extensions/avro/src/test/avro/org/apache/beam/sdk/extensions/avro/io/user.avsc @@ -0,0 +1,10 @@ +{ + "namespace": "org.apache.beam.sdk.extensions.avro.io", + "type": "record", + "name": "AvroGeneratedUser", + "fields": [ + { "name": "name", "type": "string"}, + { "name": "favorite_number", "type": ["int", "null"]}, + { "name": "favorite_color", "type": ["string", "null"]} + ] +} diff --git a/sdks/java/extensions/avro/src/test/avro/org/apache/beam/sdk/extensions/avro/schemas/test.avsc b/sdks/java/extensions/avro/src/test/avro/org/apache/beam/sdk/extensions/avro/schemas/test.avsc new file mode 100644 index 0000000000000..a7d13e4ce451b --- /dev/null +++ b/sdks/java/extensions/avro/src/test/avro/org/apache/beam/sdk/extensions/avro/schemas/test.avsc @@ -0,0 +1,30 @@ +{ + "namespace": "org.apache.beam.sdk.extensions.avro.schemas", + "type": "record", + "name": "TestAvro", + "fields": [ + { "name": "bool_non_nullable", "type": "boolean"}, + { "name": "int", "type": ["int", "null"]}, + { "name": "long", "type": ["long", "null"]}, + { "name": "float", "type": ["float", "null"]}, + { "name": "double", "type": ["double", "null"]}, + { "name": "string", "type": ["string", "null"]}, + { "name": "bytes", "type": ["bytes", "null"]}, + { "name": "fixed", "type": {"type": "fixed", "size": 4, "name": "fixed4"} }, + { "name": "date", "type": {"type": "int", "logicalType": "date"} }, + { "name": "timestampMillis", "type": {"type": "long", "logicalType": "timestamp-millis"} }, + { "name": "TestEnum", "type": {"name": "TestEnum", "type": "enum", "symbols": ["abc","cde"] } }, + { "name": "row", "type": ["null", { + "type": "record", + "name": "TestAvroNested", + "fields": [ + { "name": "BOOL_NON_NULLABLE", "type": "boolean"}, + { "name": "int", "type": ["int", "null"]} + ] + }] + }, + { "name": "array", "type":["null", {"type": "array", "items": ["null", "TestAvroNested"] }]}, + { "name": "map", "type": ["null", {"type": "map", "values": ["null", "TestAvroNested"]}]} + ] +} + diff --git a/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/coders/AvroCoderTest.java b/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/coders/AvroCoderTest.java new file mode 100644 index 0000000000000..730ccf60e0b90 --- /dev/null +++ b/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/coders/AvroCoderTest.java @@ -0,0 +1,1108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.extensions.avro.coders; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import com.esotericsoftware.kryo.Kryo; +import com.esotericsoftware.kryo.io.Input; +import com.esotericsoftware.kryo.io.Output; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.SortedMap; +import java.util.SortedSet; +import java.util.TreeMap; +import java.util.TreeSet; +import org.apache.avro.AvroRuntimeException; +import org.apache.avro.Schema; +import org.apache.avro.SchemaBuilder; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.reflect.AvroName; +import org.apache.avro.reflect.AvroSchema; +import org.apache.avro.reflect.ReflectData; +import org.apache.avro.reflect.Stringable; +import org.apache.avro.reflect.Union; +import org.apache.avro.specific.SpecificData; +import org.apache.avro.specific.SpecificRecord; +import org.apache.avro.util.Utf8; +import org.apache.beam.sdk.coders.Coder.Context; +import org.apache.beam.sdk.coders.Coder.NonDeterministicException; +import org.apache.beam.sdk.coders.DefaultCoder; +import org.apache.beam.sdk.coders.SerializableCoder; +import org.apache.beam.sdk.extensions.avro.schemas.TestAvro; +import org.apache.beam.sdk.extensions.avro.schemas.TestAvroNested; +import org.apache.beam.sdk.extensions.avro.schemas.TestEnum; +import org.apache.beam.sdk.extensions.avro.schemas.fixed4; +import org.apache.beam.sdk.testing.CoderProperties; +import org.apache.beam.sdk.testing.InterceptingUrlClassLoader; +import org.apache.beam.sdk.testing.NeedsRunner; +import org.apache.beam.sdk.testing.PAssert; +import org.apache.beam.sdk.testing.TestPipeline; +import org.apache.beam.sdk.transforms.Create; +import org.apache.beam.sdk.transforms.DoFn; +import org.apache.beam.sdk.transforms.ParDo; +import org.apache.beam.sdk.util.CoderUtils; +import org.apache.beam.sdk.util.InstanceBuilder; +import org.apache.beam.sdk.util.SerializableUtils; +import org.apache.beam.sdk.values.PCollection; +import org.apache.beam.sdk.values.TypeDescriptor; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.hamcrest.Description; +import org.hamcrest.Matcher; +import org.hamcrest.Matchers; +import org.hamcrest.TypeSafeMatcher; +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; +import org.joda.time.LocalDate; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; +import org.objenesis.strategy.StdInstantiatorStrategy; + +/** Tests for {@link AvroCoder}. */ +@RunWith(JUnit4.class) +public class AvroCoderTest { + + public static final DateTime DATETIME_A = + new DateTime().withDate(1994, 10, 31).withZone(DateTimeZone.UTC); + public static final DateTime DATETIME_B = + new DateTime().withDate(1997, 4, 25).withZone(DateTimeZone.UTC); + private static final TestAvroNested AVRO_NESTED_SPECIFIC_RECORD = new TestAvroNested(true, 42); + private static final TestAvro AVRO_SPECIFIC_RECORD = + new TestAvro( + true, + 43, + 44L, + 44.1f, + 44.2d, + "mystring", + ByteBuffer.wrap(new byte[] {1, 2, 3, 4}), + new fixed4(new byte[] {1, 2, 3, 4}), + new LocalDate(1979, 3, 14), + new DateTime().withDate(1979, 3, 14).withTime(1, 2, 3, 4), + TestEnum.abc, + AVRO_NESTED_SPECIFIC_RECORD, + ImmutableList.of(AVRO_NESTED_SPECIFIC_RECORD, AVRO_NESTED_SPECIFIC_RECORD), + ImmutableMap.of("k1", AVRO_NESTED_SPECIFIC_RECORD, "k2", AVRO_NESTED_SPECIFIC_RECORD)); + + @DefaultCoder(AvroCoder.class) + private static class Pojo { + public String text; + public int count; + + @AvroSchema("{\"type\": \"long\", \"logicalType\": \"timestamp-millis\"}") + public DateTime timestamp; + + // Empty constructor required for Avro decoding. + @SuppressWarnings("unused") + public Pojo() {} + + public Pojo(String text, int count, DateTime timestamp) { + this.text = text; + this.count = count; + this.timestamp = timestamp; + } + + // auto-generated + @Override + public boolean equals(@Nullable Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + Pojo pojo = (Pojo) o; + + if (count != pojo.count) { + return false; + } + if (text != null ? !text.equals(pojo.text) : pojo.text != null) { + return false; + } + if (timestamp != null ? !timestamp.equals(pojo.timestamp) : pojo.timestamp != null) { + return false; + } + + return true; + } + + @Override + public int hashCode() { + return 0; + } + + @Override + public String toString() { + return "Pojo{" + + "text='" + + text + + '\'' + + ", count=" + + count + + ", timestamp=" + + timestamp + + '}'; + } + } + + private static class GetTextFn extends DoFn { + @ProcessElement + public void processElement(ProcessContext c) { + c.output(c.element().text); + } + } + + @Rule public TestPipeline pipeline = TestPipeline.create(); + + @Test + public void testAvroCoderEncoding() throws Exception { + AvroCoder coder = AvroCoder.of(Pojo.class); + CoderProperties.coderSerializable(coder); + AvroCoder copy = SerializableUtils.clone(coder); + + Pojo pojo = new Pojo("foo", 3, DATETIME_A); + Pojo equalPojo = new Pojo("foo", 3, DATETIME_A); + Pojo otherPojo = new Pojo("bar", -19, DATETIME_B); + CoderProperties.coderConsistentWithEquals(coder, pojo, equalPojo); + CoderProperties.coderConsistentWithEquals(copy, pojo, equalPojo); + CoderProperties.coderConsistentWithEquals(coder, pojo, otherPojo); + CoderProperties.coderConsistentWithEquals(copy, pojo, otherPojo); + } + + /** + * Tests that {@link AvroCoder} works around issues in Avro where cache classes might be from the + * wrong ClassLoader, causing confusing "Cannot cast X to X" error messages. + */ + @SuppressWarnings("ReturnValueIgnored") + @Test + public void testTwoClassLoaders() throws Exception { + ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader(); + ClassLoader loader1 = + new InterceptingUrlClassLoader(contextClassLoader, AvroCoderTestPojo.class.getName()); + ClassLoader loader2 = + new InterceptingUrlClassLoader(contextClassLoader, AvroCoderTestPojo.class.getName()); + + Class pojoClass1 = loader1.loadClass(AvroCoderTestPojo.class.getName()); + Class pojoClass2 = loader2.loadClass(AvroCoderTestPojo.class.getName()); + + Object pojo1 = InstanceBuilder.ofType(pojoClass1).withArg(String.class, "hello").build(); + Object pojo2 = InstanceBuilder.ofType(pojoClass2).withArg(String.class, "goodbye").build(); + + // Confirm incompatibility + try { + pojoClass2.cast(pojo1); + fail("Expected ClassCastException; without it, this test is vacuous"); + } catch (ClassCastException e) { + // g2g + } + + // The first coder is expected to populate the Avro SpecificData cache + // The second coder is expected to be corrupted if the caching is done wrong. + AvroCoder avroCoder1 = (AvroCoder) AvroCoder.of(pojoClass1); + AvroCoder avroCoder2 = (AvroCoder) AvroCoder.of(pojoClass2); + + Object cloned1 = CoderUtils.clone(avroCoder1, pojo1); + Object cloned2 = CoderUtils.clone(avroCoder2, pojo2); + + // Confirming that the uncorrupted coder is fine + pojoClass1.cast(cloned1); + + // Confirmed to fail prior to the fix + pojoClass2.cast(cloned2); + } + + /** + * Confirm that we can serialize and deserialize an AvroCoder object and still decode after. + * (https://github.com/apache/beam/issues/18022). + * + * @throws Exception + */ + @Test + public void testTransientFieldInitialization() throws Exception { + Pojo value = new Pojo("Hello", 42, DATETIME_A); + AvroCoder coder = AvroCoder.of(Pojo.class); + + // Serialization of object + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + ObjectOutputStream out = new ObjectOutputStream(bos); + out.writeObject(coder); + + // De-serialization of object + ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray()); + ObjectInputStream in = new ObjectInputStream(bis); + AvroCoder copied = (AvroCoder) in.readObject(); + + CoderProperties.coderDecodeEncodeEqual(copied, value); + } + + /** + * Confirm that we can serialize and deserialize an AvroCoder object using Kryo. (BEAM-626). + * + * @throws Exception + */ + @Test + public void testKryoSerialization() throws Exception { + Pojo value = new Pojo("Hello", 42, DATETIME_A); + AvroCoder coder = AvroCoder.of(Pojo.class); + + // Kryo instantiation + Kryo kryo = new Kryo(); + kryo.setInstantiatorStrategy(new StdInstantiatorStrategy()); + + // Serialization of object without any memoization + ByteArrayOutputStream coderWithoutMemoizationBos = new ByteArrayOutputStream(); + try (Output output = new Output(coderWithoutMemoizationBos)) { + kryo.writeObject(output, coder); + } + + // Force thread local memoization to store values. + CoderProperties.coderDecodeEncodeEqual(coder, value); + + // Serialization of object with memoized fields + ByteArrayOutputStream coderWithMemoizationBos = new ByteArrayOutputStream(); + try (Output output = new Output(coderWithMemoizationBos)) { + kryo.writeObject(output, coder); + } + + // Copy empty and memoized variants of the Coder + ByteArrayInputStream bisWithoutMemoization = + new ByteArrayInputStream(coderWithoutMemoizationBos.toByteArray()); + AvroCoder copiedWithoutMemoization = + (AvroCoder) kryo.readObject(new Input(bisWithoutMemoization), AvroCoder.class); + ByteArrayInputStream bisWithMemoization = + new ByteArrayInputStream(coderWithMemoizationBos.toByteArray()); + AvroCoder copiedWithMemoization = + (AvroCoder) kryo.readObject(new Input(bisWithMemoization), AvroCoder.class); + + CoderProperties.coderDecodeEncodeEqual(copiedWithoutMemoization, value); + CoderProperties.coderDecodeEncodeEqual(copiedWithMemoization, value); + } + + @Test + public void testPojoEncoding() throws Exception { + Pojo value = new Pojo("Hello", 42, DATETIME_A); + AvroCoder coder = AvroCoder.of(Pojo.class); + + CoderProperties.coderDecodeEncodeEqual(coder, value); + } + + @Test + public void testSpecificRecordEncoding() throws Exception { + AvroCoder coder = + AvroCoder.of(TestAvro.class, AVRO_SPECIFIC_RECORD.getSchema(), false); + + assertTrue(SpecificRecord.class.isAssignableFrom(coder.getType())); + CoderProperties.coderDecodeEncodeEqual(coder, AVRO_SPECIFIC_RECORD); + } + + @Test + public void testReflectRecordEncoding() throws Exception { + AvroCoder coder = AvroCoder.of(TestAvro.class, true); + AvroCoder coderWithSchema = + AvroCoder.of(TestAvro.class, AVRO_SPECIFIC_RECORD.getSchema(), true); + + assertTrue(SpecificRecord.class.isAssignableFrom(coder.getType())); + assertTrue(SpecificRecord.class.isAssignableFrom(coderWithSchema.getType())); + + CoderProperties.coderDecodeEncodeEqual(coder, AVRO_SPECIFIC_RECORD); + CoderProperties.coderDecodeEncodeEqual(coderWithSchema, AVRO_SPECIFIC_RECORD); + } + + @Test + public void testDisableReflectionEncoding() { + try { + AvroCoder.of(Pojo.class, false); + fail("When userReclectApi is disable, schema should not be generated through reflection"); + } catch (AvroRuntimeException e) { + String message = + "avro.shaded.com.google.common.util.concurrent.UncheckedExecutionException: " + + "org.apache.avro.AvroRuntimeException: " + + "Not a Specific class: class org.apache.beam.sdk.extensions.avro.coders.AvroCoderTest$Pojo"; + assertEquals(message, e.getMessage()); + } + } + + @Test + public void testGenericRecordEncoding() throws Exception { + String schemaString = + "{\"namespace\": \"example.avro\",\n" + + " \"type\": \"record\",\n" + + " \"name\": \"User\",\n" + + " \"fields\": [\n" + + " {\"name\": \"name\", \"type\": \"string\"},\n" + + " {\"name\": \"favorite_number\", \"type\": [\"int\", \"null\"]},\n" + + " {\"name\": \"favorite_color\", \"type\": [\"string\", \"null\"]}\n" + + " ]\n" + + "}"; + Schema schema = new Schema.Parser().parse(schemaString); + + GenericRecord before = new GenericData.Record(schema); + before.put("name", "Bob"); + before.put("favorite_number", 256); + // Leave favorite_color null + + AvroCoder coder = AvroCoder.of(GenericRecord.class, schema); + + CoderProperties.coderDecodeEncodeEqual(coder, before); + assertEquals(schema, coder.getSchema()); + } + + @Test + public void testEncodingNotBuffered() throws Exception { + // This test ensures that the coder doesn't read ahead and buffer data. + // Reading ahead causes a problem if the stream consists of records of different + // types. + Pojo before = new Pojo("Hello", 42, DATETIME_A); + + AvroCoder coder = AvroCoder.of(Pojo.class); + SerializableCoder intCoder = SerializableCoder.of(Integer.class); + + ByteArrayOutputStream outStream = new ByteArrayOutputStream(); + + Context context = Context.NESTED; + coder.encode(before, outStream, context); + intCoder.encode(10, outStream, context); + + ByteArrayInputStream inStream = new ByteArrayInputStream(outStream.toByteArray()); + + Pojo after = coder.decode(inStream, context); + assertEquals(before, after); + + Integer intAfter = intCoder.decode(inStream, context); + assertEquals(Integer.valueOf(10), intAfter); + } + + @Test + @Category(NeedsRunner.class) + public void testDefaultCoder() throws Exception { + // Use MyRecord as input and output types without explicitly specifying + // a coder (this uses the default coders, which may not be AvroCoder). + PCollection output = + pipeline + .apply(Create.of(new Pojo("hello", 1, DATETIME_A), new Pojo("world", 2, DATETIME_B))) + .apply(ParDo.of(new GetTextFn())); + + PAssert.that(output).containsInAnyOrder("hello", "world"); + pipeline.run(); + } + + @Test + public void testAvroCoderIsSerializable() throws Exception { + AvroCoder coder = AvroCoder.of(Pojo.class); + + // Check that the coder is serializable using the regular JSON approach. + SerializableUtils.ensureSerializable(coder); + } + + @Test + public void testAvroSpecificCoderIsSerializable() throws Exception { + AvroCoder coder = AvroCoder.of(TestAvro.class, false); + + // Check that the coder is serializable using the regular JSON approach. + SerializableUtils.ensureSerializable(coder); + } + + private void assertDeterministic(AvroCoder coder) { + try { + coder.verifyDeterministic(); + } catch (NonDeterministicException e) { + fail("Expected " + coder + " to be deterministic, but got:\n" + e); + } + } + + private void assertNonDeterministic(AvroCoder coder, Matcher reason1) { + try { + coder.verifyDeterministic(); + fail("Expected " + coder + " to be non-deterministic."); + } catch (NonDeterministicException e) { + assertThat(e.getReasons(), Matchers.iterableWithSize(1)); + assertThat(e.getReasons(), Matchers.contains(reason1)); + } + } + + @Test + public void testDeterministicInteger() { + assertDeterministic(AvroCoder.of(Integer.class)); + } + + @Test + public void testDeterministicInt() { + assertDeterministic(AvroCoder.of(int.class)); + } + + private static class SimpleDeterministicClass { + @SuppressWarnings("unused") + private Integer intField; + + @SuppressWarnings("unused") + private char charField; + + @SuppressWarnings("unused") + private Integer[] intArray; + + @SuppressWarnings("unused") + private Utf8 utf8field; + } + + @Test + public void testDeterministicSimple() { + assertDeterministic(AvroCoder.of(SimpleDeterministicClass.class)); + } + + private static class UnorderedMapClass { + @SuppressWarnings("unused") + private Map mapField; + } + + private Matcher reason(final String prefix, final String messagePart) { + return new TypeSafeMatcher(String.class) { + @Override + public void describeTo(Description description) { + description.appendText( + String.format("Reason starting with '%s:' containing '%s'", prefix, messagePart)); + } + + @Override + protected boolean matchesSafely(String item) { + return item.startsWith(prefix + ":") && item.contains(messagePart); + } + }; + } + + private Matcher reasonClass(Class clazz, String message) { + return reason(clazz.getName(), message); + } + + private Matcher reasonField(Class clazz, String field, String message) { + return reason(clazz.getName() + "#" + field, message); + } + + @Test + public void testDeterministicUnorderedMap() { + assertNonDeterministic( + AvroCoder.of(UnorderedMapClass.class), + reasonField( + UnorderedMapClass.class, + "mapField", + "java.util.Map " + + "may not be deterministically ordered")); + } + + private static class NonDeterministicArray { + @SuppressWarnings("unused") + private UnorderedMapClass[] arrayField; + } + + @Test + public void testDeterministicNonDeterministicArray() { + assertNonDeterministic( + AvroCoder.of(NonDeterministicArray.class), + reasonField( + UnorderedMapClass.class, + "mapField", + "java.util.Map" + + " may not be deterministically ordered")); + } + + private static class SubclassOfUnorderedMapClass extends UnorderedMapClass {} + + @Test + public void testDeterministicNonDeterministicChild() { + // Super class has non deterministic fields. + assertNonDeterministic( + AvroCoder.of(SubclassOfUnorderedMapClass.class), + reasonField(UnorderedMapClass.class, "mapField", "may not be deterministically ordered")); + } + + private static class SubclassHidingParent extends UnorderedMapClass { + @SuppressWarnings("unused") + @AvroName("mapField2") // AvroName is not enough + private int mapField; + } + + @Test + public void testAvroProhibitsShadowing() { + // This test verifies that Avro won't serialize a class with two fields of + // the same name. This is important for our error reporting, and also how + // we lookup a field. + try { + ReflectData.get().getSchema(SubclassHidingParent.class); + fail("Expected AvroTypeException"); + } catch (AvroRuntimeException e) { + assertThat(e.getMessage(), containsString("mapField")); + assertThat(e.getMessage(), containsString("two fields named")); + } + } + + private static class FieldWithAvroName { + @AvroName("name") + @SuppressWarnings("unused") + private int someField; + } + + @Test + public void testDeterministicWithAvroName() { + assertDeterministic(AvroCoder.of(FieldWithAvroName.class)); + } + + @Test + public void testDeterminismSortedMap() { + assertDeterministic(AvroCoder.of(StringSortedMapField.class)); + } + + private static class StringSortedMapField { + @SuppressWarnings("unused") + SortedMap sortedMapField; + } + + @Test + public void testDeterminismTreeMapValue() { + // The value is non-deterministic, so we should fail. + assertNonDeterministic( + AvroCoder.of(TreeMapNonDetValue.class), + reasonField( + UnorderedMapClass.class, + "mapField", + "java.util.Map " + + "may not be deterministically ordered")); + } + + private static class TreeMapNonDetValue { + @SuppressWarnings("unused") + TreeMap nonDeterministicField; + } + + @Test + public void testDeterminismUnorderedMap() { + // LinkedHashMap is not deterministically ordered, so we should fail. + assertNonDeterministic( + AvroCoder.of(LinkedHashMapField.class), + reasonField( + LinkedHashMapField.class, + "nonDeterministicMap", + "java.util.LinkedHashMap " + + "may not be deterministically ordered")); + } + + private static class LinkedHashMapField { + @SuppressWarnings("unused") + LinkedHashMap nonDeterministicMap; + } + + @Test + public void testDeterminismCollection() { + assertNonDeterministic( + AvroCoder.of(StringCollection.class), + reasonField( + StringCollection.class, + "stringCollection", + "java.util.Collection may not be deterministically ordered")); + } + + private static class StringCollection { + @SuppressWarnings("unused") + Collection stringCollection; + } + + @Test + public void testDeterminismList() { + assertDeterministic(AvroCoder.of(StringList.class)); + assertDeterministic(AvroCoder.of(StringArrayList.class)); + } + + private static class StringList { + @SuppressWarnings("unused") + List stringCollection; + } + + private static class StringArrayList { + @SuppressWarnings("unused") + ArrayList stringCollection; + } + + @Test + public void testDeterminismSet() { + assertDeterministic(AvroCoder.of(StringSortedSet.class)); + assertDeterministic(AvroCoder.of(StringTreeSet.class)); + assertNonDeterministic( + AvroCoder.of(StringHashSet.class), + reasonField( + StringHashSet.class, + "stringCollection", + "java.util.HashSet may not be deterministically ordered")); + } + + private static class StringSortedSet { + @SuppressWarnings("unused") + SortedSet stringCollection; + } + + private static class StringTreeSet { + @SuppressWarnings("unused") + TreeSet stringCollection; + } + + private static class StringHashSet { + @SuppressWarnings("unused") + HashSet stringCollection; + } + + @Test + public void testDeterminismCollectionValue() { + assertNonDeterministic( + AvroCoder.of(OrderedSetOfNonDetValues.class), + reasonField(UnorderedMapClass.class, "mapField", "may not be deterministically ordered")); + assertNonDeterministic( + AvroCoder.of(ListOfNonDetValues.class), + reasonField(UnorderedMapClass.class, "mapField", "may not be deterministically ordered")); + } + + private static class OrderedSetOfNonDetValues { + @SuppressWarnings("unused") + SortedSet set; + } + + private static class ListOfNonDetValues { + @SuppressWarnings("unused") + List set; + } + + @Test + public void testDeterminismUnion() { + assertDeterministic(AvroCoder.of(DeterministicUnionBase.class)); + assertNonDeterministic( + AvroCoder.of(NonDeterministicUnionBase.class), + reasonField(UnionCase3.class, "mapField", "may not be deterministically ordered")); + } + + @Test + public void testDeterminismStringable() { + assertDeterministic(AvroCoder.of(String.class)); + assertNonDeterministic( + AvroCoder.of(StringableClass.class), + reasonClass(StringableClass.class, "may not have deterministic #toString()")); + } + + @Stringable + private static class StringableClass {} + + @Test + public void testDeterminismCyclicClass() { + assertNonDeterministic( + AvroCoder.of(Cyclic.class), + reasonField(Cyclic.class, "cyclicField", "appears recursively")); + assertNonDeterministic( + AvroCoder.of(CyclicField.class), + reasonField(Cyclic.class, "cyclicField", Cyclic.class.getName() + " appears recursively")); + assertNonDeterministic( + AvroCoder.of(IndirectCycle1.class), + reasonField( + IndirectCycle2.class, + "field2", + IndirectCycle1.class.getName() + " appears recursively")); + } + + private static class Cyclic { + @SuppressWarnings("unused") + int intField; + + @SuppressWarnings("unused") + Cyclic cyclicField; + } + + private static class CyclicField { + @SuppressWarnings("unused") + Cyclic cyclicField2; + } + + private static class IndirectCycle1 { + @SuppressWarnings("unused") + IndirectCycle2 field1; + } + + private static class IndirectCycle2 { + @SuppressWarnings("unused") + IndirectCycle1 field2; + } + + @Test + public void testDeterminismHasGenericRecord() { + assertDeterministic(AvroCoder.of(HasGenericRecord.class)); + } + + private static class HasGenericRecord { + @AvroSchema( + "{\"name\": \"bar\", \"type\": \"record\", \"fields\": [" + + "{\"name\": \"foo\", \"type\": \"int\"}]}") + GenericRecord genericRecord; + } + + @Test + public void testDeterminismHasCustomSchema() { + assertNonDeterministic( + AvroCoder.of(HasCustomSchema.class), + reasonField( + HasCustomSchema.class, + "withCustomSchema", + "Custom schemas are only supported for subtypes of IndexedRecord.")); + } + + private static class HasCustomSchema { + @AvroSchema( + "{\"name\": \"bar\", \"type\": \"record\", \"fields\": [" + + "{\"name\": \"foo\", \"type\": \"int\"}]}") + int withCustomSchema; + } + + @Test + public void testAvroCoderTreeMapDeterminism() throws Exception, NonDeterministicException { + TreeMapField size1 = new TreeMapField(); + TreeMapField size2 = new TreeMapField(); + + // Different order for entries + size1.field.put("hello", "world"); + size1.field.put("another", "entry"); + + size2.field.put("another", "entry"); + size2.field.put("hello", "world"); + + AvroCoder coder = AvroCoder.of(TreeMapField.class); + coder.verifyDeterministic(); + + ByteArrayOutputStream outStream1 = new ByteArrayOutputStream(); + ByteArrayOutputStream outStream2 = new ByteArrayOutputStream(); + + Context context = Context.NESTED; + coder.encode(size1, outStream1, context); + coder.encode(size2, outStream2, context); + + assertArrayEquals(outStream1.toByteArray(), outStream2.toByteArray()); + } + + private static class TreeMapField { + private TreeMap field = new TreeMap<>(); + } + + @Union({UnionCase1.class, UnionCase2.class}) + private abstract static class DeterministicUnionBase {} + + @Union({UnionCase1.class, UnionCase2.class, UnionCase3.class}) + private abstract static class NonDeterministicUnionBase {} + + private static class UnionCase1 extends DeterministicUnionBase {} + + private static class UnionCase2 extends DeterministicUnionBase { + @SuppressWarnings("unused") + String field; + } + + private static class UnionCase3 extends NonDeterministicUnionBase { + @SuppressWarnings("unused") + private Map mapField; + } + + @Test + public void testAvroCoderSimpleSchemaDeterminism() { + assertDeterministic(AvroCoder.of(SchemaBuilder.record("someRecord").fields().endRecord())); + assertDeterministic( + AvroCoder.of( + SchemaBuilder.record("someRecord") + .fields() + .name("int") + .type() + .intType() + .noDefault() + .endRecord())); + assertDeterministic( + AvroCoder.of( + SchemaBuilder.record("someRecord") + .fields() + .name("string") + .type() + .stringType() + .noDefault() + .endRecord())); + + assertNonDeterministic( + AvroCoder.of( + SchemaBuilder.record("someRecord") + .fields() + .name("map") + .type() + .map() + .values() + .stringType() + .noDefault() + .endRecord()), + reason("someRecord.map", "HashMap to represent MAPs")); + + assertDeterministic( + AvroCoder.of( + SchemaBuilder.record("someRecord") + .fields() + .name("array") + .type() + .array() + .items() + .stringType() + .noDefault() + .endRecord())); + + assertDeterministic( + AvroCoder.of( + SchemaBuilder.record("someRecord") + .fields() + .name("enum") + .type() + .enumeration("anEnum") + .symbols("s1", "s2") + .enumDefault("s1") + .endRecord())); + + assertDeterministic( + AvroCoder.of( + SchemaBuilder.unionOf() + .intType() + .and() + .record("someRecord") + .fields() + .nullableString("someField", "") + .endRecord() + .endUnion())); + } + + @Test + public void testAvroCoderStrings() { + // Custom Strings in Records + assertDeterministic( + AvroCoder.of( + SchemaBuilder.record("someRecord") + .fields() + .name("string") + .prop(SpecificData.CLASS_PROP, "java.lang.String") + .type() + .stringType() + .noDefault() + .endRecord())); + assertNonDeterministic( + AvroCoder.of( + SchemaBuilder.record("someRecord") + .fields() + .name("string") + .prop(SpecificData.CLASS_PROP, "unknownString") + .type() + .stringType() + .noDefault() + .endRecord()), + reason("someRecord.string", "unknownString is not known to be deterministic")); + + // Custom Strings in Unions + assertNonDeterministic( + AvroCoder.of( + SchemaBuilder.unionOf() + .intType() + .and() + .record("someRecord") + .fields() + .name("someField") + .prop(SpecificData.CLASS_PROP, "unknownString") + .type() + .stringType() + .noDefault() + .endRecord() + .endUnion()), + reason("someRecord.someField", "unknownString is not known to be deterministic")); + } + + @Test + public void testAvroCoderNestedRecords() { + // Nested Record + assertDeterministic( + AvroCoder.of( + SchemaBuilder.record("nestedRecord") + .fields() + .name("subRecord") + .type() + .record("subRecord") + .fields() + .name("innerField") + .type() + .stringType() + .noDefault() + .endRecord() + .noDefault() + .endRecord())); + } + + @Test + public void testAvroCoderCyclicRecords() { + // Recursive record + assertNonDeterministic( + AvroCoder.of( + SchemaBuilder.record("cyclicRecord") + .fields() + .name("cycle") + .type("cyclicRecord") + .noDefault() + .endRecord()), + reason("cyclicRecord.cycle", "cyclicRecord appears recursively")); + } + + private static class NullableField { + @SuppressWarnings("unused") + private @Nullable String nullable; + } + + @Test + public void testNullableField() { + assertDeterministic(AvroCoder.of(NullableField.class)); + } + + private static class NullableNonDeterministicField { + @SuppressWarnings("unused") + private @Nullable NonDeterministicArray nullableNonDetArray; + } + + private static class NullableCyclic { + @SuppressWarnings("unused") + private @Nullable NullableCyclic nullableNullableCyclicField; + } + + private static class NullableCyclicField { + @SuppressWarnings("unused") + private @Nullable Cyclic nullableCyclicField; + } + + @Test + public void testNullableNonDeterministicField() { + assertNonDeterministic( + AvroCoder.of(NullableCyclic.class), + reasonField( + NullableCyclic.class, + "nullableNullableCyclicField", + NullableCyclic.class.getName() + " appears recursively")); + assertNonDeterministic( + AvroCoder.of(NullableCyclicField.class), + reasonField(Cyclic.class, "cyclicField", Cyclic.class.getName() + " appears recursively")); + assertNonDeterministic( + AvroCoder.of(NullableNonDeterministicField.class), + reasonField(UnorderedMapClass.class, "mapField", " may not be deterministically ordered")); + } + + /** + * Tests that a parameterized class can have an automatically generated schema if the generic + * field is annotated with a union tag. + */ + @Test + public void testGenericClassWithUnionAnnotation() throws Exception { + // Cast is safe as long as the same coder is used for encoding and decoding. + @SuppressWarnings({"unchecked", "rawtypes"}) + AvroCoder> coder = + (AvroCoder) AvroCoder.of(GenericWithAnnotation.class); + + assertThat( + coder.getSchema().getField("onlySomeTypesAllowed").schema().getType(), + equalTo(Schema.Type.UNION)); + + CoderProperties.coderDecodeEncodeEqual(coder, new GenericWithAnnotation<>("hello")); + } + + private static class GenericWithAnnotation { + @AvroSchema("[\"string\", \"int\"]") + private T onlySomeTypesAllowed; + + public GenericWithAnnotation(T value) { + onlySomeTypesAllowed = value; + } + + // For deserialization only + @SuppressWarnings("unused") + protected GenericWithAnnotation() {} + + @Override + public boolean equals(@Nullable Object other) { + return other instanceof GenericWithAnnotation + && onlySomeTypesAllowed.equals(((GenericWithAnnotation) other).onlySomeTypesAllowed); + } + + @Override + public int hashCode() { + return Objects.hash(getClass(), onlySomeTypesAllowed); + } + } + + @Test + public void testAvroCoderForGenerics() throws Exception { + Schema fooSchema = AvroCoder.of(Foo.class).getSchema(); + Schema schema = + new Schema.Parser() + .parse( + "{" + + "\"type\":\"record\"," + + "\"name\":\"SomeGeneric\"," + + "\"namespace\":\"ns\"," + + "\"fields\":[" + + " {\"name\":\"foo\", \"type\":" + + fooSchema.toString() + + "}" + + "]}"); + @SuppressWarnings("rawtypes") + AvroCoder coder = AvroCoder.of(SomeGeneric.class, schema); + + assertNonDeterministic(coder, reasonField(SomeGeneric.class, "foo", "erasure")); + } + + @Test + public void testEncodedTypeDescriptor() throws Exception { + AvroCoder coder = AvroCoder.of(Pojo.class); + assertThat(coder.getEncodedTypeDescriptor(), equalTo(TypeDescriptor.of(Pojo.class))); + } + + private static class SomeGeneric { + @SuppressWarnings("unused") + private T foo; + } + + private static class Foo { + @SuppressWarnings("unused") + String id; + } +} diff --git a/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/metrics/SparkMetricsContainerStepMap.java b/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/coders/AvroCoderTestPojo.java similarity index 57% rename from runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/metrics/SparkMetricsContainerStepMap.java rename to sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/coders/AvroCoderTestPojo.java index 533dceb42e26e..9d1700313dfaa 100644 --- a/runners/spark/3/src/main/java/org/apache/beam/runners/spark/structuredstreaming/metrics/SparkMetricsContainerStepMap.java +++ b/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/coders/AvroCoderTestPojo.java @@ -15,29 +15,37 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.beam.runners.spark.structuredstreaming.metrics; +package org.apache.beam.sdk.extensions.avro.coders; -import org.apache.beam.runners.core.metrics.MetricsContainerStepMap; +import java.util.Objects; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.MoreObjects; import org.checkerframework.checker.nullness.qual.Nullable; -/** - * Sole purpose of this class is to override {@link #toString()} of {@link MetricsContainerStepMap} - * in order to show meaningful metrics in Spark Web Interface. - */ -class SparkMetricsContainerStepMap extends MetricsContainerStepMap { +/** A Pojo at the top level for use in tests. */ +class AvroCoderTestPojo { - @Override - public String toString() { - return asAttemptedOnlyMetricResults(this).toString(); + public String text; + + // Empty constructor required for Avro decoding. + @SuppressWarnings("unused") + public AvroCoderTestPojo() {} + + public AvroCoderTestPojo(String text) { + this.text = text; } @Override - public boolean equals(@Nullable Object o) { - return super.equals(o); + public boolean equals(@Nullable Object other) { + return (other instanceof AvroCoderTestPojo) && ((AvroCoderTestPojo) other).text.equals(text); } @Override public int hashCode() { - return super.hashCode(); + return Objects.hash(AvroCoderTestPojo.class, text); + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this).add("text", text).toString(); } } diff --git a/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/io/AvroIOTest.java b/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/io/AvroIOTest.java new file mode 100644 index 0000000000000..ac2200b6dc4b2 --- /dev/null +++ b/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/io/AvroIOTest.java @@ -0,0 +1,1587 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.extensions.avro.io; + +import static org.apache.avro.file.DataFileConstants.SNAPPY_CODEC; +import static org.apache.beam.sdk.io.Compression.AUTO; +import static org.apache.beam.sdk.io.fs.ResolveOptions.StandardResolveOptions.RESOLVE_FILE; +import static org.apache.beam.sdk.transforms.Contextful.fn; +import static org.apache.beam.sdk.transforms.Requirements.requiresSideInputs; +import static org.apache.beam.sdk.transforms.display.DisplayDataMatchers.hasDisplayItem; +import static org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.MoreObjects.firstNonNull; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.Serializable; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Random; +import org.apache.avro.Schema; +import org.apache.avro.SchemaBuilder; +import org.apache.avro.file.CodecFactory; +import org.apache.avro.file.DataFileReader; +import org.apache.avro.file.DataFileStream; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumReader; +import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.generic.GenericRecordBuilder; +import org.apache.avro.io.DatumWriter; +import org.apache.avro.io.Encoder; +import org.apache.avro.reflect.ReflectData; +import org.apache.avro.reflect.ReflectDatumReader; +import org.apache.beam.sdk.coders.Coder; +import org.apache.beam.sdk.coders.CoderException; +import org.apache.beam.sdk.coders.DefaultCoder; +import org.apache.beam.sdk.coders.StringUtf8Coder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; +import org.apache.beam.sdk.io.DefaultFilenamePolicy; +import org.apache.beam.sdk.io.FileBasedSink; +import org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy; +import org.apache.beam.sdk.io.FileBasedSink.OutputFileHints; +import org.apache.beam.sdk.io.FileIO; +import org.apache.beam.sdk.io.FileSystems; +import org.apache.beam.sdk.io.GenerateSequence; +import org.apache.beam.sdk.io.WriteFilesResult; +import org.apache.beam.sdk.io.fs.ResourceId; +import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider; +import org.apache.beam.sdk.testing.NeedsRunner; +import org.apache.beam.sdk.testing.PAssert; +import org.apache.beam.sdk.testing.TestPipeline; +import org.apache.beam.sdk.testing.TestStream; +import org.apache.beam.sdk.testing.UsesTestStream; +import org.apache.beam.sdk.testing.UsesUnboundedSplittableParDo; +import org.apache.beam.sdk.transforms.Create; +import org.apache.beam.sdk.transforms.MapElements; +import org.apache.beam.sdk.transforms.PTransform; +import org.apache.beam.sdk.transforms.SerializableFunction; +import org.apache.beam.sdk.transforms.SimpleFunction; +import org.apache.beam.sdk.transforms.View; +import org.apache.beam.sdk.transforms.Watch; +import org.apache.beam.sdk.transforms.display.DisplayData; +import org.apache.beam.sdk.transforms.windowing.AfterPane; +import org.apache.beam.sdk.transforms.windowing.BoundedWindow; +import org.apache.beam.sdk.transforms.windowing.FixedWindows; +import org.apache.beam.sdk.transforms.windowing.IntervalWindow; +import org.apache.beam.sdk.transforms.windowing.PaneInfo; +import org.apache.beam.sdk.transforms.windowing.Repeatedly; +import org.apache.beam.sdk.transforms.windowing.Window; +import org.apache.beam.sdk.util.SerializableUtils; +import org.apache.beam.sdk.values.PCollection; +import org.apache.beam.sdk.values.PCollectionView; +import org.apache.beam.sdk.values.TimestampedValue; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Charsets; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.MoreObjects; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ArrayListMultimap; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterators; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Maps; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Multimap; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.joda.time.Duration; +import org.joda.time.Instant; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.rules.ExpectedException; +import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; +import org.junit.runners.Parameterized; + +/** Tests for AvroIO Read and Write transforms. */ +@SuppressWarnings({ + "rawtypes", // TODO(https://github.com/apache/beam/issues/20447) +}) +public class AvroIOTest implements Serializable { + /** Unit tests. */ + @RunWith(JUnit4.class) + public static class SimpleTests implements Serializable { + @Test + public void testAvroIOGetName() { + assertEquals("AvroIO.Read", AvroIO.read(String.class).from("/tmp/foo*/baz").getName()); + assertEquals("AvroIO.Write", AvroIO.write(String.class).to("/tmp/foo/baz").getName()); + } + + @Test + public void testWriteWithDefaultCodec() { + AvroIO.Write write = AvroIO.write(String.class).to("/tmp/foo/baz"); + assertEquals(CodecFactory.snappyCodec().toString(), write.inner.getCodec().toString()); + } + + @Test + public void testWriteWithCustomCodec() { + AvroIO.Write write = + AvroIO.write(String.class).to("/tmp/foo/baz").withCodec(CodecFactory.snappyCodec()); + assertEquals(SNAPPY_CODEC, write.inner.getCodec().toString()); + } + + @Test + public void testWriteWithSerDeCustomDeflateCodec() { + AvroIO.Write write = + AvroIO.write(String.class).to("/tmp/foo/baz").withCodec(CodecFactory.deflateCodec(9)); + + assertEquals( + CodecFactory.deflateCodec(9).toString(), + SerializableUtils.clone(write.inner.getCodec()).getCodec().toString()); + } + + @Test + public void testWriteWithSerDeCustomXZCodec() { + AvroIO.Write write = + AvroIO.write(String.class).to("/tmp/foo/baz").withCodec(CodecFactory.xzCodec(9)); + + assertEquals( + CodecFactory.xzCodec(9).toString(), + SerializableUtils.clone(write.inner.getCodec()).getCodec().toString()); + } + + @Test + public void testReadDisplayData() { + AvroIO.Read read = AvroIO.read(String.class).from("/foo.*"); + + DisplayData displayData = DisplayData.from(read); + assertThat(displayData, hasDisplayItem("filePattern", "/foo.*")); + } + } + + /** NeedsRunner tests. */ + @RunWith(Parameterized.class) + @Category(NeedsRunner.class) + public static class NeedsRunnerTests implements Serializable { + @Rule public transient TestPipeline writePipeline = TestPipeline.create(); + + @Rule public transient TestPipeline readPipeline = TestPipeline.create(); + + @Rule public transient TestPipeline windowedAvroWritePipeline = TestPipeline.create(); + + @Rule public transient TemporaryFolder tmpFolder = new TemporaryFolder(); + + @Rule public transient ExpectedException expectedException = ExpectedException.none(); + + @Parameterized.Parameters(name = "{index}: {0}") + public static Collection params() { + return Arrays.asList(new Object[][] {{true}, {false}}); + } + + @Parameterized.Parameter public boolean withBeamSchemas; + + @DefaultCoder(AvroCoder.class) + static class GenericClass { + int intField; + String stringField; + + GenericClass() {} + + GenericClass(int intField, String stringField) { + this.intField = intField; + this.stringField = stringField; + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(getClass()) + .add("intField", intField) + .add("stringField", stringField) + .toString(); + } + + @Override + public int hashCode() { + return Objects.hash(intField, stringField); + } + + @Override + public boolean equals(@Nullable Object other) { + if (other == null || !(other instanceof GenericClass)) { + return false; + } + GenericClass o = (GenericClass) other; + return intField == o.intField && Objects.equals(stringField, o.stringField); + } + } + + private static class ParseGenericClass + implements SerializableFunction { + @Override + public GenericClass apply(GenericRecord input) { + return new GenericClass((int) input.get("intField"), input.get("stringField").toString()); + } + + @Test + public void testWriteDisplayData() { + AvroIO.Write write = + AvroIO.write(GenericClass.class) + .to("/foo") + .withShardNameTemplate("-SS-of-NN-") + .withSuffix("bar") + .withNumShards(100) + .withCodec(CodecFactory.deflateCodec(6)); + + DisplayData displayData = DisplayData.from(write); + + assertThat(displayData, hasDisplayItem("filePrefix", "/foo")); + assertThat(displayData, hasDisplayItem("shardNameTemplate", "-SS-of-NN-")); + assertThat(displayData, hasDisplayItem("fileSuffix", "bar")); + assertThat( + displayData, + hasDisplayItem( + "schema", + "{\"type\":\"record\",\"name\":\"GenericClass\",\"namespace\":\"org.apache.beam.sdk.extensions.avro.io" + + ".AvroIOTest$\",\"fields\":[{\"name\":\"intField\",\"type\":\"int\"}," + + "{\"name\":\"stringField\",\"type\":\"string\"}]}")); + assertThat(displayData, hasDisplayItem("numShards", 100)); + assertThat(displayData, hasDisplayItem("codec", CodecFactory.deflateCodec(6).toString())); + } + } + + private enum Sharding { + RUNNER_DETERMINED, + WITHOUT_SHARDING, + FIXED_3_SHARDS + } + + private enum WriteMethod { + AVROIO_WRITE, + AVROIO_SINK_WITH_CLASS, + AVROIO_SINK_WITH_SCHEMA, + /** @deprecated Test code for the deprecated {AvroIO.RecordFormatter}. */ + @Deprecated + AVROIO_SINK_WITH_FORMATTER + } + + private static final String SCHEMA_STRING = + "{\"namespace\": \"example.avro\",\n" + + " \"type\": \"record\",\n" + + " \"name\": \"AvroGeneratedUser\",\n" + + " \"fields\": [\n" + + " {\"name\": \"name\", \"type\": \"string\"},\n" + + " {\"name\": \"favorite_number\", \"type\": [\"int\", \"null\"]},\n" + + " {\"name\": \"favorite_color\", \"type\": [\"string\", \"null\"]}\n" + + " ]\n" + + "}"; + + private static final Schema SCHEMA = new Schema.Parser().parse(SCHEMA_STRING); + + @Test + @Category(NeedsRunner.class) + public void testWriteThenReadJavaClass() throws Throwable { + List values = + ImmutableList.of(new GenericClass(3, "hi"), new GenericClass(5, "bar")); + File outputFile = tmpFolder.newFile("output.avro"); + + writePipeline + .apply(Create.of(values)) + .apply( + AvroIO.write(GenericClass.class) + .to(writePipeline.newProvider(outputFile.getAbsolutePath())) + .withoutSharding()); + writePipeline.run(); + + PAssert.that( + readPipeline.apply( + "Read", + AvroIO.read(GenericClass.class) + .withBeamSchemas(withBeamSchemas) + .from(readPipeline.newProvider(outputFile.getAbsolutePath())))) + .containsInAnyOrder(values); + + readPipeline.run(); + } + + @Test + @Category(NeedsRunner.class) + public void testWriteThenReadCustomType() throws Throwable { + List values = Arrays.asList(0L, 1L, 2L); + File outputFile = tmpFolder.newFile("output.avro"); + + writePipeline + .apply(Create.of(values)) + .apply( + AvroIO.writeCustomType() + .to(writePipeline.newProvider(outputFile.getAbsolutePath())) + .withFormatFunction(new CreateGenericClass()) + .withSchema(ReflectData.get().getSchema(GenericClass.class)) + .withoutSharding()); + writePipeline.run(); + + PAssert.that( + readPipeline + .apply( + "Read", + AvroIO.read(GenericClass.class) + .withBeamSchemas(withBeamSchemas) + .from(readPipeline.newProvider(outputFile.getAbsolutePath()))) + .apply( + MapElements.via( + new SimpleFunction() { + @Override + public Long apply(GenericClass input) { + return (long) input.intField; + } + }))) + .containsInAnyOrder(values); + + readPipeline.run(); + } + + private void testWriteThenReadGeneratedClass( + AvroIO.Write writeTransform, AvroIO.Read readTransform) throws Exception { + File outputFile = tmpFolder.newFile("output.avro"); + + List values = + ImmutableList.of( + (T) new AvroGeneratedUser("Bob", 256, null), + (T) new AvroGeneratedUser("Alice", 128, null), + (T) new AvroGeneratedUser("Ted", null, "white")); + + writePipeline + .apply(Create.of(values)) + .apply( + writeTransform + .to(writePipeline.newProvider(outputFile.getAbsolutePath())) + .withoutSharding()); + writePipeline.run(); + + PAssert.that( + readPipeline.apply( + "Read", + readTransform.from(readPipeline.newProvider(outputFile.getAbsolutePath())))) + .containsInAnyOrder(values); + + readPipeline.run(); + } + + @Test + @Category(NeedsRunner.class) + public void testWriteThenReadGeneratedClassWithClass() throws Throwable { + testWriteThenReadGeneratedClass( + AvroIO.write(AvroGeneratedUser.class), + AvroIO.read(AvroGeneratedUser.class).withBeamSchemas(withBeamSchemas)); + } + + @Test + @Category(NeedsRunner.class) + public void testWriteThenReadGeneratedClassWithSchema() throws Throwable { + testWriteThenReadGeneratedClass( + AvroIO.writeGenericRecords(SCHEMA), + AvroIO.readGenericRecords(SCHEMA).withBeamSchemas(withBeamSchemas)); + } + + @Test + @Category(NeedsRunner.class) + public void testWriteThenReadGeneratedClassWithSchemaString() throws Throwable { + testWriteThenReadGeneratedClass( + AvroIO.writeGenericRecords(SCHEMA.toString()), + AvroIO.readGenericRecords(SCHEMA.toString()).withBeamSchemas(withBeamSchemas)); + } + + @Test + @Category(NeedsRunner.class) + public void testWriteSingleFileThenReadUsingAllMethods() throws Throwable { + List values = + ImmutableList.of(new GenericClass(3, "hi"), new GenericClass(5, "bar")); + File outputFile = tmpFolder.newFile("output.avro"); + + writePipeline + .apply(Create.of(values)) + .apply( + AvroIO.write(GenericClass.class).to(outputFile.getAbsolutePath()).withoutSharding()); + writePipeline.run(); + + // Test the same data using all versions of read(). + PCollection path = + readPipeline.apply("Create path", Create.of(outputFile.getAbsolutePath())); + PAssert.that( + readPipeline.apply( + "Read", + AvroIO.read(GenericClass.class) + .withBeamSchemas(withBeamSchemas) + .from(outputFile.getAbsolutePath()))) + .containsInAnyOrder(values); + PAssert.that( + readPipeline.apply( + "Read withHintMatchesManyFiles", + AvroIO.read(GenericClass.class) + .withBeamSchemas(withBeamSchemas) + .from(outputFile.getAbsolutePath()) + .withHintMatchesManyFiles())) + .containsInAnyOrder(values); + PAssert.that( + path.apply("MatchAllReadFiles", FileIO.matchAll()) + .apply("ReadMatchesReadFiles", FileIO.readMatches().withCompression(AUTO)) + .apply( + "ReadFiles", + AvroIO.readFiles(GenericClass.class) + .withBeamSchemas(withBeamSchemas) + .withDesiredBundleSizeBytes(10))) + .containsInAnyOrder(values); + PAssert.that( + path.apply( + "ReadAll", + AvroIO.readAll(GenericClass.class) + .withBeamSchemas(withBeamSchemas) + .withDesiredBundleSizeBytes(10))) + .containsInAnyOrder(values); + PAssert.that( + readPipeline.apply( + "Parse", + AvroIO.parseGenericRecords(new ParseGenericClass()) + .from(outputFile.getAbsolutePath()) + .withCoder(AvroCoder.of(GenericClass.class)))) + .containsInAnyOrder(values); + PAssert.that( + readPipeline.apply( + "Parse withHintMatchesManyFiles", + AvroIO.parseGenericRecords(new ParseGenericClass()) + .from(outputFile.getAbsolutePath()) + .withCoder(AvroCoder.of(GenericClass.class)) + .withHintMatchesManyFiles())) + .containsInAnyOrder(values); + PAssert.that( + path.apply("MatchAllParseFilesGenericRecords", FileIO.matchAll()) + .apply( + "ReadMatchesParseFilesGenericRecords", + FileIO.readMatches() + .withDirectoryTreatment(FileIO.ReadMatches.DirectoryTreatment.PROHIBIT)) + .apply( + "ParseFilesGenericRecords", + AvroIO.parseFilesGenericRecords(new ParseGenericClass()) + .withCoder(AvroCoder.of(GenericClass.class)) + .withUsesReshuffle(false) + .withDesiredBundleSizeBytes(10))) + .containsInAnyOrder(values); + PAssert.that( + path.apply("MatchAllParseFilesGenericRecordsWithShuffle", FileIO.matchAll()) + .apply( + "ReadMatchesParseFilesGenericRecordsWithShuffle", + FileIO.readMatches() + .withDirectoryTreatment(FileIO.ReadMatches.DirectoryTreatment.PROHIBIT)) + .apply( + "ParseFilesGenericRecordsWithShuffle", + AvroIO.parseFilesGenericRecords(new ParseGenericClass()) + .withCoder(AvroCoder.of(GenericClass.class)) + .withUsesReshuffle(true) + .withDesiredBundleSizeBytes(10))) + .containsInAnyOrder(values); + PAssert.that( + path.apply( + "ParseAllGenericRecords", + AvroIO.parseAllGenericRecords(new ParseGenericClass()) + .withCoder(AvroCoder.of(GenericClass.class)) + .withDesiredBundleSizeBytes(10))) + .containsInAnyOrder(values); + + readPipeline.run(); + } + + @Test + @Category(NeedsRunner.class) + public void testWriteThenReadMultipleFilepatterns() { + List firstValues = new ArrayList<>(); + List secondValues = new ArrayList<>(); + for (int i = 0; i < 10; ++i) { + firstValues.add(new GenericClass(i, "a" + i)); + secondValues.add(new GenericClass(i, "b" + i)); + } + writePipeline + .apply("Create first", Create.of(firstValues)) + .apply( + "Write first", + AvroIO.write(GenericClass.class) + .to(tmpFolder.getRoot().getAbsolutePath() + "/first") + .withNumShards(2)); + writePipeline + .apply("Create second", Create.of(secondValues)) + .apply( + "Write second", + AvroIO.write(GenericClass.class) + .to(tmpFolder.getRoot().getAbsolutePath() + "/second") + .withNumShards(3)); + writePipeline.run(); + + // Test readFiles(), readAll(), parseFilesGenericRecords() and parseAllGenericRecords(). + PCollection paths = + readPipeline.apply( + "Create paths", + Create.of( + tmpFolder.getRoot().getAbsolutePath() + "/first*", + tmpFolder.getRoot().getAbsolutePath() + "/second*")); + PAssert.that( + paths + .apply("MatchAllReadFiles", FileIO.matchAll()) + .apply("ReadMatchesReadFiles", FileIO.readMatches().withCompression(AUTO)) + .apply( + "ReadFiles", + AvroIO.readFiles(GenericClass.class) + .withBeamSchemas(withBeamSchemas) + .withDesiredBundleSizeBytes(10))) + .containsInAnyOrder(Iterables.concat(firstValues, secondValues)); + PAssert.that( + paths.apply( + "ReadAll", + AvroIO.readAll(GenericClass.class) + .withBeamSchemas(withBeamSchemas) + .withDesiredBundleSizeBytes(10))) + .containsInAnyOrder(Iterables.concat(firstValues, secondValues)); + PAssert.that( + paths + .apply("MatchAllParseFilesGenericRecords", FileIO.matchAll()) + .apply( + "ReadMatchesParseFilesGenericRecords", + FileIO.readMatches() + .withDirectoryTreatment(FileIO.ReadMatches.DirectoryTreatment.PROHIBIT)) + .apply( + "ParseFilesGenericRecords", + AvroIO.parseFilesGenericRecords(new ParseGenericClass()) + .withCoder(AvroCoder.of(GenericClass.class)) + .withDesiredBundleSizeBytes(10))) + .containsInAnyOrder(Iterables.concat(firstValues, secondValues)); + PAssert.that( + paths.apply( + "ParseAllGenericRecords", + AvroIO.parseAllGenericRecords(new ParseGenericClass()) + .withCoder(AvroCoder.of(GenericClass.class)) + .withDesiredBundleSizeBytes(10))) + .containsInAnyOrder(Iterables.concat(firstValues, secondValues)); + + readPipeline.run(); + } + + private static class CreateGenericClass extends SimpleFunction { + @Override + public GenericClass apply(Long i) { + return new GenericClass(i.intValue(), "value" + i); + } + } + + @Test + @Category({NeedsRunner.class, UsesUnboundedSplittableParDo.class}) + public void testContinuouslyWriteAndReadMultipleFilepatterns() { + SimpleFunction mapFn = new CreateGenericClass(); + List firstValues = new ArrayList<>(); + List secondValues = new ArrayList<>(); + for (int i = 0; i < 7; ++i) { + (i < 3 ? firstValues : secondValues).add(mapFn.apply((long) i)); + } + // Configure windowing of the input so that it fires every time a new element is generated, + // so that files are written continuously. + Window window = + Window.into(FixedWindows.of(Duration.millis(100))) + .withAllowedLateness(Duration.ZERO) + .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1))) + .discardingFiredPanes(); + readPipeline + .apply("Sequence first", GenerateSequence.from(0).to(3).withRate(1, Duration.millis(300))) + .apply("Window first", window) + .apply("Map first", MapElements.via(mapFn)) + .apply( + "Write first", + AvroIO.write(GenericClass.class) + .to(tmpFolder.getRoot().getAbsolutePath() + "/first") + .withNumShards(2) + .withWindowedWrites()); + readPipeline + .apply( + "Sequence second", GenerateSequence.from(3).to(7).withRate(1, Duration.millis(300))) + .apply("Window second", window) + .apply("Map second", MapElements.via(mapFn)) + .apply( + "Write second", + AvroIO.write(GenericClass.class) + .to(tmpFolder.getRoot().getAbsolutePath() + "/second") + .withNumShards(3) + .withWindowedWrites()); + + // Test read(), readFiles(), readAll(), parse(), parseFilesGenericRecords() and + // parseAllGenericRecords() with watchForNewFiles(). + PAssert.that( + readPipeline.apply( + "Read", + AvroIO.read(GenericClass.class) + .withBeamSchemas(withBeamSchemas) + .from(tmpFolder.getRoot().getAbsolutePath() + "/first*") + .watchForNewFiles( + Duration.millis(100), + Watch.Growth.afterTimeSinceNewOutput(Duration.standardSeconds(3))))) + .containsInAnyOrder(firstValues); + PAssert.that( + readPipeline.apply( + "Parse", + AvroIO.parseGenericRecords(new ParseGenericClass()) + .from(tmpFolder.getRoot().getAbsolutePath() + "/first*") + .watchForNewFiles( + Duration.millis(100), + Watch.Growth.afterTimeSinceNewOutput(Duration.standardSeconds(3))))) + .containsInAnyOrder(firstValues); + + PCollection paths = + readPipeline.apply( + "Create paths", + Create.of( + tmpFolder.getRoot().getAbsolutePath() + "/first*", + tmpFolder.getRoot().getAbsolutePath() + "/second*")); + PAssert.that( + paths + .apply( + "Match All Read files", + FileIO.matchAll() + .continuously( + Duration.millis(100), + Watch.Growth.afterTimeSinceNewOutput(Duration.standardSeconds(3)))) + .apply( + "Read Matches Read files", + FileIO.readMatches() + .withDirectoryTreatment(FileIO.ReadMatches.DirectoryTreatment.PROHIBIT)) + .apply( + "Read files", + AvroIO.readFiles(GenericClass.class) + .withBeamSchemas(withBeamSchemas) + .withDesiredBundleSizeBytes(10))) + .containsInAnyOrder(Iterables.concat(firstValues, secondValues)); + PAssert.that( + paths.apply( + "Read all", + AvroIO.readAll(GenericClass.class) + .withBeamSchemas(withBeamSchemas) + .watchForNewFiles( + Duration.millis(100), + Watch.Growth.afterTimeSinceNewOutput(Duration.standardSeconds(3))) + .withDesiredBundleSizeBytes(10))) + .containsInAnyOrder(Iterables.concat(firstValues, secondValues)); + PAssert.that( + paths + .apply( + "Match All ParseFilesGenericRecords", + FileIO.matchAll() + .continuously( + Duration.millis(100), + Watch.Growth.afterTimeSinceNewOutput(Duration.standardSeconds(3)))) + .apply( + "Match Matches ParseFilesGenericRecords", + FileIO.readMatches() + .withDirectoryTreatment(FileIO.ReadMatches.DirectoryTreatment.PROHIBIT)) + .apply( + "ParseFilesGenericRecords", + AvroIO.parseFilesGenericRecords(new ParseGenericClass()) + .withCoder(AvroCoder.of(GenericClass.class)) + .withDesiredBundleSizeBytes(10))) + .containsInAnyOrder(Iterables.concat(firstValues, secondValues)); + PAssert.that( + paths.apply( + "ParseAllGenericRecords", + AvroIO.parseAllGenericRecords(new ParseGenericClass()) + .withCoder(AvroCoder.of(GenericClass.class)) + .watchForNewFiles( + Duration.millis(100), + Watch.Growth.afterTimeSinceNewOutput(Duration.standardSeconds(3))) + .withDesiredBundleSizeBytes(10))) + .containsInAnyOrder(Iterables.concat(firstValues, secondValues)); + readPipeline.run(); + } + + @Test + @SuppressWarnings("unchecked") + @Category(NeedsRunner.class) + public void testCompressedWriteAndReadASingleFile() throws Throwable { + List values = + ImmutableList.of(new GenericClass(3, "hi"), new GenericClass(5, "bar")); + File outputFile = tmpFolder.newFile("output.avro"); + + writePipeline + .apply(Create.of(values)) + .apply( + AvroIO.write(GenericClass.class) + .to(outputFile.getAbsolutePath()) + .withoutSharding() + .withCodec(CodecFactory.deflateCodec(9))); + writePipeline.run(); + + PAssert.that( + readPipeline.apply( + AvroIO.read(GenericClass.class) + .withBeamSchemas(withBeamSchemas) + .from(outputFile.getAbsolutePath()))) + .containsInAnyOrder(values); + readPipeline.run(); + + try (DataFileStream dataFileStream = + new DataFileStream(new FileInputStream(outputFile), new GenericDatumReader())) { + assertEquals("deflate", dataFileStream.getMetaString("avro.codec")); + } + } + + @Test + @SuppressWarnings("unchecked") + @Category(NeedsRunner.class) + public void testWriteThenReadASingleFileWithNullCodec() throws Throwable { + List values = + ImmutableList.of(new GenericClass(3, "hi"), new GenericClass(5, "bar")); + File outputFile = tmpFolder.newFile("output.avro"); + + writePipeline + .apply(Create.of(values)) + .apply( + AvroIO.write(GenericClass.class) + .to(outputFile.getAbsolutePath()) + .withoutSharding() + .withCodec(CodecFactory.nullCodec())); + writePipeline.run(); + + PAssert.that( + readPipeline.apply( + AvroIO.read(GenericClass.class) + .withBeamSchemas(withBeamSchemas) + .from(outputFile.getAbsolutePath()))) + .containsInAnyOrder(values); + readPipeline.run(); + + try (DataFileStream dataFileStream = + new DataFileStream(new FileInputStream(outputFile), new GenericDatumReader())) { + assertEquals("null", dataFileStream.getMetaString("avro.codec")); + } + } + + @DefaultCoder(AvroCoder.class) + static class GenericClassV2 { + int intField; + String stringField; + @org.apache.avro.reflect.Nullable String nullableField; + + GenericClassV2() {} + + GenericClassV2(int intValue, String stringValue, String nullableValue) { + this.intField = intValue; + this.stringField = stringValue; + this.nullableField = nullableValue; + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(getClass()) + .add("intField", intField) + .add("stringField", stringField) + .add("nullableField", nullableField) + .toString(); + } + + @Override + public int hashCode() { + return Objects.hash(intField, stringField, nullableField); + } + + @Override + public boolean equals(@Nullable Object other) { + if (!(other instanceof GenericClassV2)) { + return false; + } + GenericClassV2 o = (GenericClassV2) other; + return intField == o.intField + && Objects.equals(stringField, o.stringField) + && Objects.equals(nullableField, o.nullableField); + } + } + + /** + * Tests that {@code AvroIO} can read an upgraded version of an old class, as long as the schema + * resolution process succeeds. This test covers the case when a new, {@code @Nullable} field + * has been added. + * + *

For more information, see http://avro.apache.org/docs/1.7.7/spec.html#Schema+Resolution + */ + @Test + @Category(NeedsRunner.class) + public void testWriteThenReadSchemaUpgrade() throws Throwable { + List values = + ImmutableList.of(new GenericClass(3, "hi"), new GenericClass(5, "bar")); + File outputFile = tmpFolder.newFile("output.avro"); + + writePipeline + .apply(Create.of(values)) + .apply( + AvroIO.write(GenericClass.class).to(outputFile.getAbsolutePath()).withoutSharding()); + writePipeline.run(); + + List expected = + ImmutableList.of(new GenericClassV2(3, "hi", null), new GenericClassV2(5, "bar", null)); + + PAssert.that( + readPipeline.apply( + AvroIO.read(GenericClassV2.class) + .withBeamSchemas(withBeamSchemas) + .from(outputFile.getAbsolutePath()))) + .containsInAnyOrder(expected); + readPipeline.run(); + } + + private static class WindowedFilenamePolicy extends FilenamePolicy { + final ResourceId outputFilePrefix; + + WindowedFilenamePolicy(ResourceId outputFilePrefix) { + this.outputFilePrefix = outputFilePrefix; + } + + @Override + public ResourceId windowedFilename( + int shardNumber, + int numShards, + BoundedWindow window, + PaneInfo paneInfo, + OutputFileHints outputFileHints) { + String filenamePrefix = + outputFilePrefix.isDirectory() ? "" : firstNonNull(outputFilePrefix.getFilename(), ""); + + IntervalWindow interval = (IntervalWindow) window; + String windowStr = + String.format("%s-%s", interval.start().toString(), interval.end().toString()); + String filename = + String.format( + "%s-%s-%s-of-%s-pane-%s%s%s.avro", + filenamePrefix, + windowStr, + shardNumber, + numShards, + paneInfo.getIndex(), + paneInfo.isLast() ? "-last" : "", + outputFileHints.getSuggestedFilenameSuffix()); + return outputFilePrefix.getCurrentDirectory().resolve(filename, RESOLVE_FILE); + } + + @Override + public ResourceId unwindowedFilename( + int shardNumber, int numShards, OutputFileHints outputFileHints) { + throw new UnsupportedOperationException("Expecting windowed outputs only"); + } + + @Override + public void populateDisplayData(DisplayData.Builder builder) { + builder.add( + DisplayData.item("fileNamePrefix", outputFilePrefix.toString()) + .withLabel("File Name Prefix")); + } + } + + @Test + @Category({NeedsRunner.class, UsesTestStream.class}) + public void testWriteWindowed() throws Throwable { + testWindowedAvroIOWriteUsingMethod(WriteMethod.AVROIO_WRITE); + } + + @Test + @Category({NeedsRunner.class, UsesTestStream.class}) + public void testWindowedAvroIOWriteViaSink() throws Throwable { + testWindowedAvroIOWriteUsingMethod(WriteMethod.AVROIO_SINK_WITH_CLASS); + } + + void testWindowedAvroIOWriteUsingMethod(WriteMethod method) throws IOException { + Path baseDir = Files.createTempDirectory(tmpFolder.getRoot().toPath(), "testwrite"); + final String baseFilename = baseDir.resolve("prefix").toString(); + + Instant base = new Instant(0); + ArrayList allElements = new ArrayList<>(); + ArrayList> firstWindowElements = new ArrayList<>(); + ArrayList firstWindowTimestamps = + Lists.newArrayList( + base.plus(Duration.ZERO), base.plus(Duration.standardSeconds(10)), + base.plus(Duration.standardSeconds(20)), base.plus(Duration.standardSeconds(30))); + + Random random = new Random(); + for (int i = 0; i < 100; ++i) { + GenericClass item = new GenericClass(i, String.valueOf(i)); + allElements.add(item); + firstWindowElements.add( + TimestampedValue.of( + item, firstWindowTimestamps.get(random.nextInt(firstWindowTimestamps.size())))); + } + + ArrayList> secondWindowElements = new ArrayList<>(); + ArrayList secondWindowTimestamps = + Lists.newArrayList( + base.plus(Duration.standardSeconds(60)), base.plus(Duration.standardSeconds(70)), + base.plus(Duration.standardSeconds(80)), base.plus(Duration.standardSeconds(90))); + for (int i = 100; i < 200; ++i) { + GenericClass item = new GenericClass(i, String.valueOf(i)); + allElements.add(new GenericClass(i, String.valueOf(i))); + secondWindowElements.add( + TimestampedValue.of( + item, secondWindowTimestamps.get(random.nextInt(secondWindowTimestamps.size())))); + } + + TimestampedValue[] firstWindowArray = + firstWindowElements.toArray(new TimestampedValue[100]); + TimestampedValue[] secondWindowArray = + secondWindowElements.toArray(new TimestampedValue[100]); + + TestStream values = + TestStream.create(AvroCoder.of(GenericClass.class)) + .advanceWatermarkTo(new Instant(0)) + .addElements( + firstWindowArray[0], + Arrays.copyOfRange(firstWindowArray, 1, firstWindowArray.length)) + .advanceWatermarkTo(new Instant(0).plus(Duration.standardMinutes(1))) + .addElements( + secondWindowArray[0], + Arrays.copyOfRange(secondWindowArray, 1, secondWindowArray.length)) + .advanceWatermarkToInfinity(); + + final PTransform, WriteFilesResult> write; + switch (method) { + case AVROIO_WRITE: + { + FilenamePolicy policy = + new WindowedFilenamePolicy( + FileBasedSink.convertToFileResourceIfPossible(baseFilename)); + write = + AvroIO.write(GenericClass.class) + .to(policy) + .withTempDirectory( + StaticValueProvider.of( + FileSystems.matchNewResource(baseDir.toString(), true))) + .withWindowedWrites() + .withNumShards(2) + .withOutputFilenames(); + break; + } + + case AVROIO_SINK_WITH_CLASS: + { + write = + FileIO.write() + .via(AvroIO.sink(GenericClass.class)) + .to(baseDir.toString()) + .withPrefix("prefix") + .withSuffix(".avro") + .withTempDirectory(baseDir.toString()) + .withNumShards(2); + break; + } + + default: + throw new UnsupportedOperationException(); + } + windowedAvroWritePipeline + .apply(values) + .apply(Window.into(FixedWindows.of(Duration.standardMinutes(1)))) + .apply(write); + windowedAvroWritePipeline.run(); + + // Validate that the data written matches the expected elements in the expected order + List expectedFiles = new ArrayList<>(); + for (int shard = 0; shard < 2; shard++) { + for (int window = 0; window < 2; window++) { + Instant windowStart = new Instant(0).plus(Duration.standardMinutes(window)); + IntervalWindow iw = new IntervalWindow(windowStart, Duration.standardMinutes(1)); + String baseAndWindow = baseFilename + "-" + iw.start() + "-" + iw.end(); + switch (method) { + case AVROIO_WRITE: + expectedFiles.add(new File(baseAndWindow + "-" + shard + "-of-2-pane-0-last.avro")); + break; + case AVROIO_SINK_WITH_CLASS: + expectedFiles.add(new File(baseAndWindow + "-0000" + shard + "-of-00002.avro")); + break; + default: + throw new UnsupportedOperationException("Unknown write method " + method); + } + } + } + + List actualElements = new ArrayList<>(); + for (File outputFile : expectedFiles) { + assertTrue("Expected output file " + outputFile.getAbsolutePath(), outputFile.exists()); + try (DataFileReader reader = + new DataFileReader<>( + outputFile, + new ReflectDatumReader<>(ReflectData.get().getSchema(GenericClass.class)))) { + Iterators.addAll(actualElements, reader); + } + outputFile.delete(); + } + assertThat(actualElements, containsInAnyOrder(allElements.toArray())); + } + + private static final String SCHEMA_TEMPLATE_STRING = + "{\"namespace\": \"example.avro\",\n" + + " \"type\": \"record\",\n" + + " \"name\": \"$$TestTemplateSchema\",\n" + + " \"fields\": [\n" + + " {\"name\": \"$$full\", \"type\": \"string\"},\n" + + " {\"name\": \"$$suffix\", \"type\": [\"string\", \"null\"]}\n" + + " ]\n" + + "}"; + + private static String schemaFromPrefix(String prefix) { + return SCHEMA_TEMPLATE_STRING.replace("$$", prefix); + } + + private static GenericRecord createRecord(String record, String prefix, Schema schema) { + GenericRecord genericRecord = new GenericData.Record(schema); + genericRecord.put(prefix + "full", record); + genericRecord.put(prefix + "suffix", record.substring(1)); + return genericRecord; + } + + private static class TestDynamicDestinations + extends DynamicAvroDestinations { + final ResourceId baseDir; + final PCollectionView> schemaView; + + TestDynamicDestinations(ResourceId baseDir, PCollectionView> schemaView) { + this.baseDir = baseDir; + this.schemaView = schemaView; + } + + @Override + public Schema getSchema(String destination) { + // Return a per-destination schema. + String schema = sideInput(schemaView).get(destination); + return new Schema.Parser().parse(schema); + } + + @Override + public List> getSideInputs() { + return ImmutableList.of(schemaView); + } + + @Override + public GenericRecord formatRecord(String record) { + String prefix = record.substring(0, 1); + return createRecord(record, prefix, getSchema(prefix)); + } + + @Override + public String getDestination(String element) { + // Destination is based on first character of string. + return element.substring(0, 1); + } + + @Override + public String getDefaultDestination() { + return ""; + } + + @Override + public FilenamePolicy getFilenamePolicy(String destination) { + return DefaultFilenamePolicy.fromStandardParameters( + StaticValueProvider.of(baseDir.resolve("file_" + destination, RESOLVE_FILE)), + "-SSSSS-of-NNNNN", + ".avro", + false); + } + } + + /** + * Example of a {@link Coder} for a collection of Avro records with different schemas. + * + *

All the schemas are known at pipeline construction, and are keyed internally on the prefix + * character (lower byte only for UTF-8 data). + */ + private static class AvroMultiplexCoder extends Coder { + + /** Lookup table for the possible schemas, keyed on the prefix character. */ + private final Map> coderMap = Maps.newHashMap(); + + protected AvroMultiplexCoder(Map schemaMap) { + for (Map.Entry entry : schemaMap.entrySet()) { + coderMap.put( + entry.getKey().charAt(0), AvroCoder.of(new Schema.Parser().parse(entry.getValue()))); + } + } + + @Override + public void encode(GenericRecord value, OutputStream outStream) throws IOException { + char prefix = value.getSchema().getName().charAt(0); + outStream.write(prefix); // Only reads and writes the low byte. + coderMap.get(prefix).encode(value, outStream); + } + + @Override + public GenericRecord decode(InputStream inStream) throws CoderException, IOException { + char prefix = (char) inStream.read(); + return coderMap.get(prefix).decode(inStream); + } + + @Override + public List> getCoderArguments() { + return Collections.emptyList(); + } + + @Override + public void verifyDeterministic() throws NonDeterministicException { + for (AvroCoder internalCoder : coderMap.values()) { + internalCoder.verifyDeterministic(); + } + } + } + + private void testDynamicDestinationsUnwindowedWithSharding( + WriteMethod writeMethod, Sharding sharding) throws Exception { + final ResourceId baseDir = + FileSystems.matchNewResource( + Files.createTempDirectory(tmpFolder.getRoot().toPath(), "testDynamicDestinations") + .toString(), + true); + + List elements = Lists.newArrayList("aaaa", "aaab", "baaa", "baab", "caaa", "caab"); + Multimap expectedElements = ArrayListMultimap.create(); + Map schemaMap = Maps.newHashMap(); + for (String element : elements) { + String prefix = element.substring(0, 1); + String jsonSchema = schemaFromPrefix(prefix); + schemaMap.put(prefix, jsonSchema); + expectedElements.put( + prefix, createRecord(element, prefix, new Schema.Parser().parse(jsonSchema))); + } + final PCollectionView> schemaView = + writePipeline.apply("createSchemaView", Create.of(schemaMap)).apply(View.asMap()); + + PCollection input = + writePipeline.apply("createInput", Create.of(elements).withCoder(StringUtf8Coder.of())); + + switch (writeMethod) { + case AVROIO_WRITE: + { + AvroIO.TypedWrite write = + AvroIO.writeCustomTypeToGenericRecords() + .to(new TestDynamicDestinations(baseDir, schemaView)) + .withTempDirectory(baseDir); + + switch (sharding) { + case RUNNER_DETERMINED: + break; + case WITHOUT_SHARDING: + write = write.withoutSharding(); + break; + case FIXED_3_SHARDS: + write = write.withNumShards(3); + break; + default: + throw new IllegalArgumentException("Unknown sharding " + sharding); + } + + input.apply(write); + break; + } + + case AVROIO_SINK_WITH_SCHEMA: + { + FileIO.Write write = + FileIO.writeDynamic() + .by( + fn( + (element, c) -> { + c.sideInput(schemaView); // Ignore result + return element.getSchema().getName().substring(0, 1); + }, + requiresSideInputs(schemaView))) + .via( + fn( + (dest, c) -> { + Schema schema = + new Schema.Parser().parse(c.sideInput(schemaView).get(dest)); + return AvroIO.sink(schema); + }, + requiresSideInputs(schemaView))) + .to(baseDir.toString()) + .withNaming( + fn( + (dest, c) -> { + c.sideInput(schemaView); // Ignore result + return FileIO.Write.defaultNaming("file_" + dest, ".avro"); + }, + requiresSideInputs(schemaView))) + .withTempDirectory(baseDir.toString()) + .withDestinationCoder(StringUtf8Coder.of()) + .withIgnoreWindowing(); + switch (sharding) { + case RUNNER_DETERMINED: + break; + case WITHOUT_SHARDING: + write = write.withNumShards(1); + break; + case FIXED_3_SHARDS: + write = write.withNumShards(3); + break; + default: + throw new IllegalArgumentException("Unknown sharding " + sharding); + } + + MapElements toRecord = + MapElements.via( + new SimpleFunction() { + @Override + public GenericRecord apply(String element) { + String prefix = element.substring(0, 1); + GenericRecord record = + new GenericData.Record( + new Schema.Parser().parse(schemaFromPrefix(prefix))); + record.put(prefix + "full", element); + record.put(prefix + "suffix", element.substring(1)); + return record; + } + }); + + input.apply(toRecord).setCoder(new AvroMultiplexCoder(schemaMap)).apply(write); + break; + } + + case AVROIO_SINK_WITH_FORMATTER: + { + final AvroIO.RecordFormatter formatter = + (element, schema) -> { + String prefix = element.substring(0, 1); + GenericRecord record = new GenericData.Record(schema); + record.put(prefix + "full", element); + record.put(prefix + "suffix", element.substring(1)); + return record; + }; + FileIO.Write write = + FileIO.writeDynamic() + .by( + fn( + (element, c) -> { + c.sideInput(schemaView); // Ignore result + return element.substring(0, 1); + }, + requiresSideInputs(schemaView))) + .via( + fn( + (dest, c) -> { + Schema schema = + new Schema.Parser().parse(c.sideInput(schemaView).get(dest)); + return AvroIO.sinkViaGenericRecords(schema, formatter); + }, + requiresSideInputs(schemaView))) + .to(baseDir.toString()) + .withNaming( + fn( + (dest, c) -> { + c.sideInput(schemaView); // Ignore result + return FileIO.Write.defaultNaming("file_" + dest, ".avro"); + }, + requiresSideInputs(schemaView))) + .withTempDirectory(baseDir.toString()) + .withDestinationCoder(StringUtf8Coder.of()) + .withIgnoreWindowing(); + switch (sharding) { + case RUNNER_DETERMINED: + break; + case WITHOUT_SHARDING: + write = write.withNumShards(1); + break; + case FIXED_3_SHARDS: + write = write.withNumShards(3); + break; + default: + throw new IllegalArgumentException("Unknown sharding " + sharding); + } + + input.apply(write); + break; + } + default: + throw new UnsupportedOperationException("Unknown write method " + writeMethod); + } + + writePipeline.run(); + + // Validate that the data written matches the expected elements in the expected order. + + for (String prefix : expectedElements.keySet()) { + String shardPattern; + switch (sharding) { + case RUNNER_DETERMINED: + shardPattern = "-*"; + break; + case WITHOUT_SHARDING: + shardPattern = "-00000-of-00001"; + break; + case FIXED_3_SHARDS: + shardPattern = "-*-of-00003"; + break; + default: + throw new IllegalArgumentException("Unknown sharding " + sharding); + } + String expectedFilepattern = + baseDir.resolve("file_" + prefix + shardPattern + ".avro", RESOLVE_FILE).toString(); + + PCollection records = + readPipeline.apply( + "read_" + prefix, + AvroIO.readGenericRecords(schemaFromPrefix(prefix)) + .withBeamSchemas(withBeamSchemas) + .from(expectedFilepattern)); + PAssert.that(records).containsInAnyOrder(expectedElements.get(prefix)); + } + readPipeline.run(); + } + + @Test + @Category(NeedsRunner.class) + public void testDynamicDestinationsRunnerDeterminedSharding() throws Exception { + testDynamicDestinationsUnwindowedWithSharding( + WriteMethod.AVROIO_WRITE, Sharding.RUNNER_DETERMINED); + } + + @Test + @Category(NeedsRunner.class) + public void testDynamicDestinationsWithoutSharding() throws Exception { + testDynamicDestinationsUnwindowedWithSharding( + WriteMethod.AVROIO_WRITE, Sharding.WITHOUT_SHARDING); + } + + @Test + @Category(NeedsRunner.class) + public void testDynamicDestinationsWithNumShards() throws Exception { + testDynamicDestinationsUnwindowedWithSharding( + WriteMethod.AVROIO_WRITE, Sharding.FIXED_3_SHARDS); + } + + @Test + @Category(NeedsRunner.class) + public void testDynamicDestinationsViaSinkRunnerDeterminedSharding() throws Exception { + testDynamicDestinationsUnwindowedWithSharding( + WriteMethod.AVROIO_SINK_WITH_SCHEMA, Sharding.RUNNER_DETERMINED); + } + + @Test + @Category(NeedsRunner.class) + public void testDynamicDestinationsViaSinkWithoutSharding() throws Exception { + testDynamicDestinationsUnwindowedWithSharding( + WriteMethod.AVROIO_SINK_WITH_SCHEMA, Sharding.WITHOUT_SHARDING); + } + + @Test + @Category(NeedsRunner.class) + public void testDynamicDestinationsViaSinkWithNumShards() throws Exception { + testDynamicDestinationsUnwindowedWithSharding( + WriteMethod.AVROIO_SINK_WITH_SCHEMA, Sharding.FIXED_3_SHARDS); + } + + @Test + @Category(NeedsRunner.class) + public void testDynamicDestinationsViaSinkWithFormatterRunnerDeterminedSharding() + throws Exception { + testDynamicDestinationsUnwindowedWithSharding( + WriteMethod.AVROIO_SINK_WITH_FORMATTER, Sharding.RUNNER_DETERMINED); + } + + @Test + @Category(NeedsRunner.class) + public void testDynamicDestinationsViaSinkWithFormatterWithoutSharding() throws Exception { + testDynamicDestinationsUnwindowedWithSharding( + WriteMethod.AVROIO_SINK_WITH_FORMATTER, Sharding.WITHOUT_SHARDING); + } + + @Test + @Category(NeedsRunner.class) + public void testDynamicDestinationsViaSinkWithFormatterWithNumShards() throws Exception { + testDynamicDestinationsUnwindowedWithSharding( + WriteMethod.AVROIO_SINK_WITH_FORMATTER, Sharding.FIXED_3_SHARDS); + } + + @Test + @SuppressWarnings("unchecked") + @Category(NeedsRunner.class) + public void testMetadata() throws Exception { + List values = + ImmutableList.of(new GenericClass(3, "hi"), new GenericClass(5, "bar")); + File outputFile = tmpFolder.newFile("output.avro"); + + writePipeline + .apply(Create.of(values)) + .apply( + AvroIO.write(GenericClass.class) + .to(outputFile.getAbsolutePath()) + .withoutSharding() + .withMetadata( + ImmutableMap.of( + "stringKey", + "stringValue", + "longKey", + 100L, + "bytesKey", + "bytesValue".getBytes(Charsets.UTF_8)))); + writePipeline.run(); + + try (DataFileStream dataFileStream = + new DataFileStream(new FileInputStream(outputFile), new GenericDatumReader())) { + assertEquals("stringValue", dataFileStream.getMetaString("stringKey")); + assertEquals(100L, dataFileStream.getMetaLong("longKey")); + assertArrayEquals( + "bytesValue".getBytes(Charsets.UTF_8), dataFileStream.getMeta("bytesKey")); + } + } + + // using AvroCoder#createDatumReader for tests. + private void runTestWrite(String[] expectedElements, int numShards) throws IOException { + File baseOutputFile = new File(tmpFolder.getRoot(), "prefix"); + String outputFilePrefix = baseOutputFile.getAbsolutePath(); + + AvroIO.Write write = + AvroIO.write(String.class).to(outputFilePrefix).withSuffix(".avro"); + if (numShards > 1) { + write = write.withNumShards(numShards); + } else { + write = write.withoutSharding(); + } + writePipeline.apply(Create.of(ImmutableList.copyOf(expectedElements))).apply(write); + writePipeline.run(); + + String shardNameTemplate = + firstNonNull( + write.inner.getShardTemplate(), + DefaultFilenamePolicy.DEFAULT_UNWINDOWED_SHARD_TEMPLATE); + + assertTestOutputs(expectedElements, numShards, outputFilePrefix, shardNameTemplate); + } + + static void assertTestOutputs( + String[] expectedElements, int numShards, String outputFilePrefix, String shardNameTemplate) + throws IOException { + // Validate that the data written matches the expected elements in the expected order + List expectedFiles = new ArrayList<>(); + for (int i = 0; i < numShards; i++) { + expectedFiles.add( + new File( + DefaultFilenamePolicy.constructName( + FileBasedSink.convertToFileResourceIfPossible(outputFilePrefix), + shardNameTemplate, + ".avro", + i, + numShards, + null, + null) + .toString())); + } + + List actualElements = new ArrayList<>(); + for (File outputFile : expectedFiles) { + assertTrue("Expected output file " + outputFile.getName(), outputFile.exists()); + try (DataFileReader reader = + new DataFileReader<>( + outputFile, new ReflectDatumReader(ReflectData.get().getSchema(String.class)))) { + Iterators.addAll(actualElements, reader); + } + } + assertThat(actualElements, containsInAnyOrder(expectedElements)); + } + + @Test + @Category(NeedsRunner.class) + public void testAvroSinkWrite() throws Exception { + String[] expectedElements = new String[] {"first", "second", "third"}; + + runTestWrite(expectedElements, 1); + } + + @Test + @Category(NeedsRunner.class) + public void testAvroSinkShardedWrite() throws Exception { + String[] expectedElements = new String[] {"first", "second", "third", "fourth", "fifth"}; + + runTestWrite(expectedElements, 4); + } + + @Test + @Category(NeedsRunner.class) + public void testAvroSinkWriteWithCustomFactory() throws Exception { + Integer[] expectedElements = new Integer[] {1, 2, 3, 4, 5}; + + File baseOutputFile = new File(tmpFolder.getRoot(), "prefix"); + String outputFilePrefix = baseOutputFile.getAbsolutePath(); + + Schema recordSchema = SchemaBuilder.record("root").fields().requiredInt("i1").endRecord(); + + AvroIO.TypedWrite write = + AvroIO.writeCustomType() + .to(outputFilePrefix) + .withSchema(recordSchema) + .withFormatFunction(f -> f) + .withDatumWriterFactory( + f -> + new DatumWriter() { + private DatumWriter inner = new GenericDatumWriter<>(f); + + @Override + public void setSchema(Schema schema) { + inner.setSchema(schema); + } + + @Override + public void write(Integer datum, Encoder out) throws IOException { + GenericRecord record = + new GenericRecordBuilder(f).set("i1", datum).build(); + inner.write(record, out); + } + }) + .withSuffix(".avro"); + + write = write.withoutSharding(); + + writePipeline.apply(Create.of(ImmutableList.copyOf(expectedElements))).apply(write); + writePipeline.run(); + + File expectedFile = + new File( + DefaultFilenamePolicy.constructName( + FileBasedSink.convertToFileResourceIfPossible(outputFilePrefix), + "", + ".avro", + 1, + 1, + null, + null) + .toString()); + + assertTrue("Expected output file " + expectedFile.getName(), expectedFile.exists()); + DataFileReader dataFileReader = + new DataFileReader<>(expectedFile, new GenericDatumReader<>(recordSchema)); + + List actualRecords = new ArrayList<>(); + Iterators.addAll(actualRecords, dataFileReader); + + GenericRecord[] expectedRecords = + Arrays.stream(expectedElements) + .map(i -> new GenericRecordBuilder(recordSchema).set("i1", i).build()) + .toArray(GenericRecord[]::new); + + assertThat(actualRecords, containsInAnyOrder(expectedRecords)); + } + + // TODO: for Write only, test withSuffix, + // withShardNameTemplate and withoutSharding. + } +} diff --git a/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/io/AvroSchemaIOProviderTest.java b/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/io/AvroSchemaIOProviderTest.java new file mode 100644 index 0000000000000..b003597200eb2 --- /dev/null +++ b/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/io/AvroSchemaIOProviderTest.java @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.extensions.avro.io; + +import static org.junit.Assert.assertEquals; + +import java.io.File; +import java.time.Duration; +import java.util.Arrays; +import java.util.List; +import org.apache.beam.sdk.coders.RowCoder; +import org.apache.beam.sdk.io.FileSystems; +import org.apache.beam.sdk.io.fs.MatchResult; +import org.apache.beam.sdk.schemas.Schema; +import org.apache.beam.sdk.schemas.io.SchemaIO; +import org.apache.beam.sdk.testing.NeedsRunner; +import org.apache.beam.sdk.testing.PAssert; +import org.apache.beam.sdk.testing.TestPipeline; +import org.apache.beam.sdk.testing.TestStream; +import org.apache.beam.sdk.transforms.Create; +import org.apache.beam.sdk.values.PCollection; +import org.apache.beam.sdk.values.Row; +import org.apache.beam.sdk.values.TimestampedValue; +import org.joda.time.Instant; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** Test for AvroSchemaIOProvider. */ +@RunWith(JUnit4.class) +public class AvroSchemaIOProviderTest { + @Rule public TestPipeline writePipeline = TestPipeline.create(); + @Rule public TestPipeline readPipeline = TestPipeline.create(); + @Rule public TemporaryFolder tempFolder = new TemporaryFolder(); + + private static final Schema SCHEMA = + Schema.builder().addInt64Field("age").addStringField("age_str").build(); + + private Row createRow(long l) { + return Row.withSchema(SCHEMA).addValues(l, Long.valueOf(l).toString()).build(); + } + + @Test + @Category({NeedsRunner.class}) + public void testWriteAndReadTable() { + File destinationFile = new File(tempFolder.getRoot(), "person-info.avro"); + + AvroSchemaIOProvider provider = new AvroSchemaIOProvider(); + Row configuration = Row.withSchema(provider.configurationSchema()).addValue(null).build(); + SchemaIO io = provider.from(destinationFile.getAbsolutePath(), configuration, SCHEMA); + + List rowsList = Arrays.asList(createRow(1L), createRow(3L), createRow(4L)); + PCollection rows = + writePipeline.apply("Create", Create.of(rowsList).withCoder(RowCoder.of(SCHEMA))); + rows.apply(io.buildWriter()); + writePipeline.run(); + + PCollection read = readPipeline.begin().apply(io.buildReader()); + PAssert.that(read).containsInAnyOrder(rowsList); + readPipeline.run(); + } + + @Test + @Category({NeedsRunner.class}) + public void testStreamingWriteDefault() throws Exception { + File destinationFile = new File(tempFolder.getRoot(), "person-info"); + + AvroSchemaIOProvider provider = new AvroSchemaIOProvider(); + Row config = Row.withSchema(provider.configurationSchema()).addValue(null).build(); + SchemaIO writeIO = provider.from(destinationFile.getAbsolutePath(), config, SCHEMA); + + TestStream createEvents = + TestStream.create(RowCoder.of(SCHEMA)) + .addElements(TimestampedValue.of(createRow(1L), new Instant(1L))) + .addElements(TimestampedValue.of(createRow(2L), Instant.ofEpochSecond(120L))) + .advanceWatermarkToInfinity(); + + writePipeline.apply("create", createEvents).apply("write", writeIO.buildWriter()); + writePipeline.run(); + + // Verify we wrote two files. + String wildcardPath = destinationFile.getAbsolutePath() + "*"; + MatchResult result = FileSystems.match(wildcardPath); + assertEquals(2, result.metadata().size()); + + // Verify results of the files. + SchemaIO readIO = provider.from(wildcardPath, config, SCHEMA); + PCollection read = readPipeline.begin().apply("read", readIO.buildReader()); + PAssert.that(read).containsInAnyOrder(createRow(1L), createRow(2L)); + readPipeline.run(); + } + + @Test + @Category({NeedsRunner.class}) + public void testStreamingCustomWindowSize() throws Exception { + File destinationFile = new File(tempFolder.getRoot(), "person-info"); + + AvroSchemaIOProvider provider = new AvroSchemaIOProvider(); + Row config = + Row.withSchema(provider.configurationSchema()) + .addValue(Duration.ofMinutes(4).getSeconds()) + .build(); + SchemaIO writeIO = provider.from(destinationFile.getAbsolutePath(), config, SCHEMA); + + TestStream createEvents = + TestStream.create(RowCoder.of(SCHEMA)) + .addElements(TimestampedValue.of(createRow(1L), new Instant(1L))) + .addElements(TimestampedValue.of(createRow(2L), Instant.ofEpochSecond(120L))) + .advanceWatermarkToInfinity(); + + writePipeline.apply("create", createEvents).apply("write", writeIO.buildWriter()); + writePipeline.run(); + + // Verify we wrote one file. + String wildcardPath = destinationFile.getAbsolutePath() + "*"; + MatchResult result = FileSystems.match(wildcardPath); + assertEquals(1, result.metadata().size()); + + // Verify results of the files. + SchemaIO readIO = provider.from(wildcardPath, config, SCHEMA); + PCollection read = readPipeline.begin().apply("read", readIO.buildReader()); + PAssert.that(read).containsInAnyOrder(createRow(1L), createRow(2L)); + readPipeline.run(); + } + + @Test + @Category({NeedsRunner.class}) + public void testBatchCustomWindowSize() throws Exception { + File destinationFile = new File(tempFolder.getRoot(), "person-info"); + + AvroSchemaIOProvider provider = new AvroSchemaIOProvider(); + Row config = + Row.withSchema(provider.configurationSchema()) + .addValue(Duration.ofMinutes(4).getSeconds()) + .build(); + SchemaIO writeIO = provider.from(destinationFile.getAbsolutePath(), config, SCHEMA); + + List rowsList = Arrays.asList(createRow(1L), createRow(3L), createRow(4L)); + PCollection rows = + writePipeline.apply("Create", Create.of(rowsList).withCoder(RowCoder.of(SCHEMA))); + + rows.apply("write", writeIO.buildWriter()); + writePipeline.run(); + + // Verify we wrote one file. + String wildcardPath = destinationFile.getAbsolutePath() + "*"; + MatchResult result = FileSystems.match(wildcardPath); + assertEquals(1, result.metadata().size()); + + // Verify results of the files. + SchemaIO readIO = provider.from(wildcardPath, config, SCHEMA); + PCollection read = readPipeline.begin().apply("read", readIO.buildReader()); + PAssert.that(read).containsInAnyOrder(rowsList); + readPipeline.run(); + } +} diff --git a/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/io/AvroSourceTest.java b/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/io/AvroSourceTest.java new file mode 100644 index 0000000000000..df382d86f2150 --- /dev/null +++ b/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/io/AvroSourceTest.java @@ -0,0 +1,846 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.extensions.avro.io; + +import static org.apache.beam.sdk.transforms.display.DisplayDataMatchers.hasDisplayItem; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertSame; +import static org.junit.Assert.assertTrue; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.NoSuchElementException; +import java.util.Objects; +import java.util.Random; +import java.util.stream.Collectors; +import org.apache.avro.Schema; +import org.apache.avro.file.CodecFactory; +import org.apache.avro.file.DataFileConstants; +import org.apache.avro.file.DataFileWriter; +import org.apache.avro.generic.GenericDatumReader; +import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.io.DatumWriter; +import org.apache.avro.io.Decoder; +import org.apache.avro.reflect.AvroDefault; +import org.apache.avro.reflect.ReflectData; +import org.apache.avro.reflect.ReflectDatumWriter; +import org.apache.beam.sdk.coders.DefaultCoder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; +import org.apache.beam.sdk.io.BlockBasedSource; +import org.apache.beam.sdk.io.BlockBasedSource.BlockBasedReader; +import org.apache.beam.sdk.io.BoundedSource; +import org.apache.beam.sdk.io.BoundedSource.BoundedReader; +import org.apache.beam.sdk.io.FileBasedSource; +import org.apache.beam.sdk.io.FileSystems; +import org.apache.beam.sdk.io.fs.MatchResult.Metadata; +import org.apache.beam.sdk.options.PipelineOptions; +import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.apache.beam.sdk.testing.SourceTestUtils; +import org.apache.beam.sdk.transforms.display.DisplayData; +import org.apache.beam.sdk.util.SerializableUtils; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.MoreObjects; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.hamcrest.Matchers; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; +import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** Tests for AvroSource. */ +@RunWith(JUnit4.class) +public class AvroSourceTest { + @Rule public TemporaryFolder tmpFolder = new TemporaryFolder(); + + @Rule public ExpectedException expectedException = ExpectedException.none(); + + private enum SyncBehavior { + SYNC_REGULAR, // Sync at regular, user defined intervals + SYNC_RANDOM, // Sync at random intervals + SYNC_DEFAULT // Sync at default intervals (i.e., no manual syncing). + } + + private static final int DEFAULT_RECORD_COUNT = 1000; + + /** + * Generates an input Avro file containing the given records in the temporary directory and + * returns the full path of the file. + */ + private String generateTestFile( + String filename, + List elems, + SyncBehavior syncBehavior, + int syncInterval, + AvroCoder coder, + String codec) + throws IOException { + Random random = new Random(0); + File tmpFile = tmpFolder.newFile(filename); + String path = tmpFile.toString(); + + FileOutputStream os = new FileOutputStream(tmpFile); + DatumWriter datumWriter = + coder.getType().equals(GenericRecord.class) + ? new GenericDatumWriter<>(coder.getSchema()) + : new ReflectDatumWriter<>(coder.getSchema()); + try (DataFileWriter writer = new DataFileWriter<>(datumWriter)) { + writer.setCodec(CodecFactory.fromString(codec)); + writer.create(coder.getSchema(), os); + + int recordIndex = 0; + int syncIndex = syncBehavior == SyncBehavior.SYNC_RANDOM ? random.nextInt(syncInterval) : 0; + + for (T elem : elems) { + writer.append(elem); + recordIndex++; + + switch (syncBehavior) { + case SYNC_REGULAR: + if (recordIndex == syncInterval) { + recordIndex = 0; + writer.sync(); + } + break; + case SYNC_RANDOM: + if (recordIndex == syncIndex) { + recordIndex = 0; + writer.sync(); + syncIndex = random.nextInt(syncInterval); + } + break; + case SYNC_DEFAULT: + default: + } + } + } + return path; + } + + @Test + public void testReadWithDifferentCodecs() throws Exception { + // Test reading files generated using all codecs. + String[] codecs = { + DataFileConstants.NULL_CODEC, + DataFileConstants.BZIP2_CODEC, + DataFileConstants.DEFLATE_CODEC, + DataFileConstants.SNAPPY_CODEC, + DataFileConstants.XZ_CODEC, + }; + // As Avro's default block size is 64KB, write 64K records to ensure at least one full block. + // We could make this smaller than 64KB assuming each record is at least B bytes, but then the + // test could silently stop testing the failure condition from BEAM-422. + List expected = createRandomRecords(1 << 16); + + for (String codec : codecs) { + String filename = + generateTestFile( + codec, expected, SyncBehavior.SYNC_DEFAULT, 0, AvroCoder.of(Bird.class), codec); + AvroSource source = AvroSource.from(filename).withSchema(Bird.class); + List actual = SourceTestUtils.readFromSource(source, null); + assertThat(expected, containsInAnyOrder(actual.toArray())); + } + } + + @Test + public void testSplitAtFraction() throws Exception { + // A reduced dataset is enough here. + List expected = createFixedRecords(DEFAULT_RECORD_COUNT); + // Create an AvroSource where each block is 1/10th of the total set of records. + String filename = + generateTestFile( + "tmp.avro", + expected, + SyncBehavior.SYNC_REGULAR, + DEFAULT_RECORD_COUNT / 10 /* max records per block */, + AvroCoder.of(FixedRecord.class), + DataFileConstants.NULL_CODEC); + File file = new File(filename); + + AvroSource source = AvroSource.from(filename).withSchema(FixedRecord.class); + List> splits = source.split(file.length() / 3, null); + for (BoundedSource subSource : splits) { + int items = SourceTestUtils.readFromSource(subSource, null).size(); + // Shouldn't split while unstarted. + SourceTestUtils.assertSplitAtFractionFails(subSource, 0, 0.0, null); + SourceTestUtils.assertSplitAtFractionFails(subSource, 0, 0.7, null); + SourceTestUtils.assertSplitAtFractionSucceedsAndConsistent(subSource, 1, 0.7, null); + SourceTestUtils.assertSplitAtFractionSucceedsAndConsistent( + subSource, DEFAULT_RECORD_COUNT / 100, 0.7, null); + SourceTestUtils.assertSplitAtFractionSucceedsAndConsistent( + subSource, DEFAULT_RECORD_COUNT / 10, 0.1, null); + SourceTestUtils.assertSplitAtFractionFails( + subSource, DEFAULT_RECORD_COUNT / 10 + 1, 0.1, null); + SourceTestUtils.assertSplitAtFractionFails(subSource, DEFAULT_RECORD_COUNT / 3, 0.3, null); + SourceTestUtils.assertSplitAtFractionFails(subSource, items, 0.9, null); + SourceTestUtils.assertSplitAtFractionFails(subSource, items, 1.0, null); + SourceTestUtils.assertSplitAtFractionSucceedsAndConsistent(subSource, items, 0.999, null); + } + } + + @Test + public void testGetProgressFromUnstartedReader() throws Exception { + List records = createFixedRecords(DEFAULT_RECORD_COUNT); + String filename = + generateTestFile( + "tmp.avro", + records, + SyncBehavior.SYNC_DEFAULT, + 1000, + AvroCoder.of(FixedRecord.class), + DataFileConstants.NULL_CODEC); + File file = new File(filename); + + AvroSource source = AvroSource.from(filename).withSchema(FixedRecord.class); + try (BoundedReader reader = source.createReader(null)) { + assertEquals(Double.valueOf(0.0), reader.getFractionConsumed()); + } + + List> splits = source.split(file.length() / 3, null); + for (BoundedSource subSource : splits) { + try (BoundedReader reader = subSource.createReader(null)) { + assertEquals(Double.valueOf(0.0), reader.getFractionConsumed()); + } + } + } + + @Test + public void testProgress() throws Exception { + // 5 records, 2 per block. + List records = createFixedRecords(5); + String filename = + generateTestFile( + "tmp.avro", + records, + SyncBehavior.SYNC_REGULAR, + 2, + AvroCoder.of(FixedRecord.class), + DataFileConstants.NULL_CODEC); + + AvroSource source = AvroSource.from(filename).withSchema(FixedRecord.class); + try (BoundedReader readerOrig = source.createReader(null)) { + assertThat(readerOrig, Matchers.instanceOf(BlockBasedReader.class)); + BlockBasedReader reader = (BlockBasedReader) readerOrig; + + // Before starting + assertEquals(0.0, reader.getFractionConsumed(), 1e-6); + assertEquals(0, reader.getSplitPointsConsumed()); + assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining()); + + // First 2 records are in the same block. + assertTrue(reader.start()); + assertTrue(reader.isAtSplitPoint()); + assertEquals(0, reader.getSplitPointsConsumed()); + assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining()); + // continued + assertTrue(reader.advance()); + assertFalse(reader.isAtSplitPoint()); + assertEquals(0, reader.getSplitPointsConsumed()); + assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining()); + + // Second block -> parallelism consumed becomes 1. + assertTrue(reader.advance()); + assertTrue(reader.isAtSplitPoint()); + assertEquals(1, reader.getSplitPointsConsumed()); + assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining()); + // continued + assertTrue(reader.advance()); + assertFalse(reader.isAtSplitPoint()); + assertEquals(1, reader.getSplitPointsConsumed()); + assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining()); + + // Third and final block -> parallelism consumed becomes 2, remaining becomes 1. + assertTrue(reader.advance()); + assertTrue(reader.isAtSplitPoint()); + assertEquals(2, reader.getSplitPointsConsumed()); + assertEquals(1, reader.getSplitPointsRemaining()); + + // Done + assertFalse(reader.advance()); + assertEquals(3, reader.getSplitPointsConsumed()); + assertEquals(0, reader.getSplitPointsRemaining()); + assertEquals(1.0, reader.getFractionConsumed(), 1e-6); + } + } + + @Test + public void testProgressEmptySource() throws Exception { + // 0 records, 20 per block. + List records = Collections.emptyList(); + String filename = + generateTestFile( + "tmp.avro", + records, + SyncBehavior.SYNC_REGULAR, + 2, + AvroCoder.of(FixedRecord.class), + DataFileConstants.NULL_CODEC); + + AvroSource source = AvroSource.from(filename).withSchema(FixedRecord.class); + try (BoundedReader readerOrig = source.createReader(null)) { + assertThat(readerOrig, Matchers.instanceOf(BlockBasedReader.class)); + BlockBasedReader reader = (BlockBasedReader) readerOrig; + + // before starting + assertEquals(0.0, reader.getFractionConsumed(), 1e-6); + assertEquals(0, reader.getSplitPointsConsumed()); + assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining()); + + // confirm empty + assertFalse(reader.start()); + + // after reading empty source + assertEquals(0, reader.getSplitPointsConsumed()); + assertEquals(0, reader.getSplitPointsRemaining()); + assertEquals(1.0, reader.getFractionConsumed(), 1e-6); + } + } + + @Test + public void testGetCurrentFromUnstartedReader() throws Exception { + List records = createFixedRecords(DEFAULT_RECORD_COUNT); + String filename = + generateTestFile( + "tmp.avro", + records, + SyncBehavior.SYNC_DEFAULT, + 1000, + AvroCoder.of(FixedRecord.class), + DataFileConstants.NULL_CODEC); + + AvroSource source = AvroSource.from(filename).withSchema(FixedRecord.class); + try (BlockBasedSource.BlockBasedReader reader = + (BlockBasedSource.BlockBasedReader) source.createReader(null)) { + assertEquals(null, reader.getCurrentBlock()); + + expectedException.expect(NoSuchElementException.class); + expectedException.expectMessage("No block has been successfully read from"); + reader.getCurrent(); + } + } + + @Test + public void testSplitAtFractionExhaustive() throws Exception { + // A small-sized input is sufficient, because the test verifies that splitting is non-vacuous. + List expected = createFixedRecords(20); + String filename = + generateTestFile( + "tmp.avro", + expected, + SyncBehavior.SYNC_REGULAR, + 5, + AvroCoder.of(FixedRecord.class), + DataFileConstants.NULL_CODEC); + + AvroSource source = AvroSource.from(filename).withSchema(FixedRecord.class); + SourceTestUtils.assertSplitAtFractionExhaustive(source, null); + } + + @Test + public void testSplitsWithSmallBlocks() throws Exception { + PipelineOptions options = PipelineOptionsFactory.create(); + // Test reading from an object file with many small random-sized blocks. + // The file itself doesn't have to be big; we can use a decreased record count. + List expected = createRandomRecords(DEFAULT_RECORD_COUNT); + String filename = + generateTestFile( + "tmp.avro", + expected, + SyncBehavior.SYNC_RANDOM, + DEFAULT_RECORD_COUNT / 20 /* max records/block */, + AvroCoder.of(Bird.class), + DataFileConstants.NULL_CODEC); + File file = new File(filename); + + // Small minimum bundle size + AvroSource source = + AvroSource.from(filename).withSchema(Bird.class).withMinBundleSize(100L); + + // Assert that the source produces the expected records + assertEquals(expected, SourceTestUtils.readFromSource(source, options)); + + List> splits; + int nonEmptySplits; + + // Split with the minimum bundle size + splits = source.split(100L, options); + assertTrue(splits.size() > 2); + SourceTestUtils.assertSourcesEqualReferenceSource(source, splits, options); + nonEmptySplits = 0; + for (BoundedSource subSource : splits) { + if (SourceTestUtils.readFromSource(subSource, options).size() > 0) { + nonEmptySplits += 1; + } + } + assertTrue(nonEmptySplits > 2); + + // Split with larger bundle size + splits = source.split(file.length() / 4, options); + assertTrue(splits.size() > 2); + SourceTestUtils.assertSourcesEqualReferenceSource(source, splits, options); + nonEmptySplits = 0; + for (BoundedSource subSource : splits) { + if (SourceTestUtils.readFromSource(subSource, options).size() > 0) { + nonEmptySplits += 1; + } + } + assertTrue(nonEmptySplits > 2); + + // Split with the file length + splits = source.split(file.length(), options); + assertTrue(splits.size() == 1); + SourceTestUtils.assertSourcesEqualReferenceSource(source, splits, options); + } + + @Test + public void testMultipleFiles() throws Exception { + String baseName = "tmp-"; + List expected = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + List contents = createRandomRecords(DEFAULT_RECORD_COUNT / 10); + expected.addAll(contents); + generateTestFile( + baseName + i, + contents, + SyncBehavior.SYNC_DEFAULT, + 0, + AvroCoder.of(Bird.class), + DataFileConstants.NULL_CODEC); + } + + AvroSource source = + AvroSource.from(new File(tmpFolder.getRoot().toString(), baseName + "*").toString()) + .withSchema(Bird.class); + List actual = SourceTestUtils.readFromSource(source, null); + assertThat(actual, containsInAnyOrder(expected.toArray())); + } + + @Test + public void testCreationWithSchema() throws Exception { + List expected = createRandomRecords(100); + String filename = + generateTestFile( + "tmp.avro", + expected, + SyncBehavior.SYNC_DEFAULT, + 0, + AvroCoder.of(Bird.class), + DataFileConstants.NULL_CODEC); + + // Create a source with a schema object + Schema schema = ReflectData.get().getSchema(Bird.class); + AvroSource source = AvroSource.from(filename).withSchema(schema); + List records = SourceTestUtils.readFromSource(source, null); + assertEqualsWithGeneric(expected, records); + + // Create a source with a JSON schema + String schemaString = ReflectData.get().getSchema(Bird.class).toString(); + source = AvroSource.from(filename).withSchema(schemaString); + records = SourceTestUtils.readFromSource(source, null); + assertEqualsWithGeneric(expected, records); + } + + @Test + public void testSchemaUpdate() throws Exception { + List birds = createRandomRecords(100); + String filename = + generateTestFile( + "tmp.avro", + birds, + SyncBehavior.SYNC_DEFAULT, + 0, + AvroCoder.of(Bird.class), + DataFileConstants.NULL_CODEC); + + AvroSource source = AvroSource.from(filename).withSchema(FancyBird.class); + List actual = SourceTestUtils.readFromSource(source, null); + + List expected = new ArrayList<>(); + for (Bird bird : birds) { + expected.add( + new FancyBird( + bird.number, bird.species, bird.quality, bird.quantity, null, "MAXIMUM OVERDRIVE")); + } + + assertThat(actual, containsInAnyOrder(expected.toArray())); + } + + @Test + public void testSchemaStringIsInterned() throws Exception { + List birds = createRandomRecords(100); + String filename = + generateTestFile( + "tmp.avro", + birds, + SyncBehavior.SYNC_DEFAULT, + 0, + AvroCoder.of(Bird.class), + DataFileConstants.NULL_CODEC); + Metadata fileMetadata = FileSystems.matchSingleFileSpec(filename); + String schema = AvroSource.readMetadataFromFile(fileMetadata.resourceId()).getSchemaString(); + // Add "" to the schema to make sure it is not interned. + AvroSource sourceA = AvroSource.from(filename).withSchema("" + schema); + AvroSource sourceB = AvroSource.from(filename).withSchema("" + schema); + assertSame(sourceA.getReaderSchemaString(), sourceB.getReaderSchemaString()); + + // Ensure that deserialization still goes through interning + AvroSource sourceC = SerializableUtils.clone(sourceB); + assertSame(sourceA.getReaderSchemaString(), sourceC.getReaderSchemaString()); + } + + @Test + public void testParseFn() throws Exception { + List expected = createRandomRecords(100); + String filename = + generateTestFile( + "tmp.avro", + expected, + SyncBehavior.SYNC_DEFAULT, + 0, + AvroCoder.of(Bird.class), + DataFileConstants.NULL_CODEC); + + AvroSource source = + AvroSource.from(filename) + .withParseFn( + input -> + new Bird( + (long) input.get("number"), + input.get("species").toString(), + input.get("quality").toString(), + (long) input.get("quantity")), + AvroCoder.of(Bird.class)); + List actual = SourceTestUtils.readFromSource(source, null); + assertThat(actual, containsInAnyOrder(expected.toArray())); + } + + @Test + public void testDatumReaderFactoryWithGenericRecord() throws Exception { + List inputBirds = createRandomRecords(100); + + String filename = + generateTestFile( + "tmp.avro", + inputBirds, + SyncBehavior.SYNC_DEFAULT, + 0, + AvroCoder.of(Bird.class), + DataFileConstants.NULL_CODEC); + + AvroSource.DatumReaderFactory factory = + (writer, reader) -> + new GenericDatumReader(writer, reader) { + @Override + protected Object readString(Object old, Decoder in) throws IOException { + return super.readString(old, in) + "_custom"; + } + }; + + AvroSource source = + AvroSource.from(filename) + .withParseFn( + input -> + new Bird( + (long) input.get("number"), + input.get("species").toString(), + input.get("quality").toString(), + (long) input.get("quantity")), + AvroCoder.of(Bird.class)) + .withDatumReaderFactory(factory); + List actual = SourceTestUtils.readFromSource(source, null); + List expected = + inputBirds.stream() + .map(b -> new Bird(b.number, b.species + "_custom", b.quality + "_custom", b.quantity)) + .collect(Collectors.toList()); + + assertThat(actual, containsInAnyOrder(expected.toArray())); + } + + private void assertEqualsWithGeneric(List expected, List actual) { + assertEquals(expected.size(), actual.size()); + for (int i = 0; i < expected.size(); i++) { + Bird fixed = expected.get(i); + GenericRecord generic = actual.get(i); + assertEquals(fixed.number, generic.get("number")); + assertEquals(fixed.quality, generic.get("quality").toString()); // From Avro util.Utf8 + assertEquals(fixed.quantity, generic.get("quantity")); + assertEquals(fixed.species, generic.get("species").toString()); + } + } + + @Test + public void testDisplayData() { + AvroSource source = + AvroSource.from("foobar.txt").withSchema(Bird.class).withMinBundleSize(1234); + + DisplayData displayData = DisplayData.from(source); + assertThat(displayData, hasDisplayItem("filePattern", "foobar.txt")); + assertThat(displayData, hasDisplayItem("minBundleSize", 1234)); + } + + @Test + public void testReadMetadataWithCodecs() throws Exception { + // Test reading files generated using all codecs. + String[] codecs = { + DataFileConstants.NULL_CODEC, + DataFileConstants.BZIP2_CODEC, + DataFileConstants.DEFLATE_CODEC, + DataFileConstants.SNAPPY_CODEC, + DataFileConstants.XZ_CODEC + }; + List expected = createRandomRecords(DEFAULT_RECORD_COUNT); + + for (String codec : codecs) { + String filename = + generateTestFile( + codec, expected, SyncBehavior.SYNC_DEFAULT, 0, AvroCoder.of(Bird.class), codec); + + Metadata fileMeta = FileSystems.matchSingleFileSpec(filename); + AvroSource.AvroMetadata metadata = AvroSource.readMetadataFromFile(fileMeta.resourceId()); + assertEquals(codec, metadata.getCodec()); + } + } + + @Test + public void testReadSchemaString() throws Exception { + List expected = createRandomRecords(DEFAULT_RECORD_COUNT); + String codec = DataFileConstants.NULL_CODEC; + String filename = + generateTestFile( + codec, expected, SyncBehavior.SYNC_DEFAULT, 0, AvroCoder.of(Bird.class), codec); + Metadata fileMeta = FileSystems.matchSingleFileSpec(filename); + AvroSource.AvroMetadata metadata = AvroSource.readMetadataFromFile(fileMeta.resourceId()); + // By default, parse validates the schema, which is what we want. + Schema schema = new Schema.Parser().parse(metadata.getSchemaString()); + assertEquals(4, schema.getFields().size()); + } + + @Test + public void testCreateFromMetadata() throws Exception { + List expected = createRandomRecords(DEFAULT_RECORD_COUNT); + String codec = DataFileConstants.NULL_CODEC; + String filename = + generateTestFile( + codec, expected, SyncBehavior.SYNC_DEFAULT, 0, AvroCoder.of(Bird.class), codec); + Metadata fileMeta = FileSystems.matchSingleFileSpec(filename); + + AvroSource source = AvroSource.from(fileMeta); + AvroSource sourceWithSchema = source.withSchema(Bird.class); + AvroSource sourceWithSchemaWithMinBundleSize = sourceWithSchema.withMinBundleSize(1234); + + assertEquals(FileBasedSource.Mode.SINGLE_FILE_OR_SUBRANGE, source.getMode()); + assertEquals(FileBasedSource.Mode.SINGLE_FILE_OR_SUBRANGE, sourceWithSchema.getMode()); + assertEquals( + FileBasedSource.Mode.SINGLE_FILE_OR_SUBRANGE, sourceWithSchemaWithMinBundleSize.getMode()); + } + + /** + * Class that will encode to a fixed size: 16 bytes. + * + *

Each object has a 15-byte array. Avro encodes an object of this type as a byte array, so + * each encoded object will consist of 1 byte that encodes the length of the array, followed by 15 + * bytes. + */ + @DefaultCoder(AvroCoder.class) + public static class FixedRecord { + private byte[] value = new byte[15]; + + public FixedRecord() { + this(0); + } + + public FixedRecord(int i) { + value[0] = (byte) i; + value[1] = (byte) (i >> 8); + value[2] = (byte) (i >> 16); + value[3] = (byte) (i >> 24); + } + + public int asInt() { + return value[0] | (value[1] << 8) | (value[2] << 16) | (value[3] << 24); + } + + @Override + public boolean equals(@Nullable Object o) { + if (o instanceof FixedRecord) { + FixedRecord other = (FixedRecord) o; + return this.asInt() == other.asInt(); + } + return false; + } + + @Override + public int hashCode() { + return toString().hashCode(); + } + + @Override + public String toString() { + return Integer.toString(this.asInt()); + } + } + + /** Create a list of count 16-byte records. */ + private static List createFixedRecords(int count) { + List records = new ArrayList<>(); + for (int i = 0; i < count; i++) { + records.add(new FixedRecord(i)); + } + return records; + } + + /** Class used as the record type in tests. */ + @DefaultCoder(AvroCoder.class) + static class Bird { + long number; + String species; + String quality; + long quantity; + + public Bird() {} + + public Bird(long number, String species, String quality, long quantity) { + this.number = number; + this.species = species; + this.quality = quality; + this.quantity = quantity; + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(Bird.class) + .addValue(number) + .addValue(species) + .addValue(quantity) + .addValue(quality) + .toString(); + } + + @Override + public boolean equals(@Nullable Object obj) { + if (obj instanceof Bird) { + Bird other = (Bird) obj; + return Objects.equals(species, other.species) + && Objects.equals(quality, other.quality) + && quantity == other.quantity + && number == other.number; + } + return false; + } + + @Override + public int hashCode() { + return Objects.hash(number, species, quality, quantity); + } + } + + /** + * Class used as the record type in tests. + * + *

Contains nullable fields and fields with default values. Can be read using a file written + * with the Bird schema. + */ + @DefaultCoder(AvroCoder.class) + public static class FancyBird { + long number; + String species; + String quality; + long quantity; + + @org.apache.avro.reflect.Nullable String habitat; + + @AvroDefault("\"MAXIMUM OVERDRIVE\"") + String fancinessLevel; + + public FancyBird() {} + + public FancyBird( + long number, + String species, + String quality, + long quantity, + String habitat, + String fancinessLevel) { + this.number = number; + this.species = species; + this.quality = quality; + this.quantity = quantity; + this.habitat = habitat; + this.fancinessLevel = fancinessLevel; + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(FancyBird.class) + .addValue(number) + .addValue(species) + .addValue(quality) + .addValue(quantity) + .addValue(habitat) + .addValue(fancinessLevel) + .toString(); + } + + @Override + public boolean equals(@Nullable Object obj) { + if (obj instanceof FancyBird) { + FancyBird other = (FancyBird) obj; + return Objects.equals(species, other.species) + && Objects.equals(quality, other.quality) + && quantity == other.quantity + && number == other.number + && Objects.equals(fancinessLevel, other.fancinessLevel) + && Objects.equals(habitat, other.habitat); + } + return false; + } + + @Override + public int hashCode() { + return Objects.hash(number, species, quality, quantity, habitat, fancinessLevel); + } + } + + /** Create a list of n random records. */ + private static List createRandomRecords(long n) { + String[] qualities = { + "miserable", "forelorn", "fidgity", "squirrelly", "fanciful", "chipper", "lazy" + }; + String[] species = {"pigeons", "owls", "gulls", "hawks", "robins", "jays"}; + Random random = new Random(0); + + List records = new ArrayList<>(); + for (long i = 0; i < n; i++) { + Bird bird = new Bird(); + bird.quality = qualities[random.nextInt(qualities.length)]; + bird.species = species[random.nextInt(species.length)]; + bird.number = i; + bird.quantity = random.nextLong(); + records.add(bird); + } + return records; + } +} diff --git a/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/io/SerializableAvroCodecFactoryTest.java b/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/io/SerializableAvroCodecFactoryTest.java new file mode 100644 index 0000000000000..241ad11635a8b --- /dev/null +++ b/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/io/SerializableAvroCodecFactoryTest.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.extensions.avro.io; + +import static org.apache.avro.file.DataFileConstants.BZIP2_CODEC; +import static org.apache.avro.file.DataFileConstants.DEFLATE_CODEC; +import static org.apache.avro.file.DataFileConstants.NULL_CODEC; +import static org.apache.avro.file.DataFileConstants.SNAPPY_CODEC; +import static org.apache.avro.file.DataFileConstants.XZ_CODEC; +import static org.junit.Assert.assertEquals; + +import java.util.Arrays; +import java.util.List; +import org.apache.avro.file.CodecFactory; +import org.apache.beam.sdk.util.SerializableUtils; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** Tests of SerializableAvroCodecFactory. */ +@RunWith(JUnit4.class) +public class SerializableAvroCodecFactoryTest { + private final List avroCodecs = + Arrays.asList(NULL_CODEC, SNAPPY_CODEC, DEFLATE_CODEC, XZ_CODEC, BZIP2_CODEC); + + @Test + public void testDefaultCodecsIn() throws Exception { + for (String codec : avroCodecs) { + SerializableAvroCodecFactory codecFactory = + new SerializableAvroCodecFactory(CodecFactory.fromString(codec)); + + assertEquals(CodecFactory.fromString(codec).toString(), codecFactory.getCodec().toString()); + } + } + + @Test + public void testDefaultCodecsSerDe() throws Exception { + for (String codec : avroCodecs) { + SerializableAvroCodecFactory codecFactory = + new SerializableAvroCodecFactory(CodecFactory.fromString(codec)); + + SerializableAvroCodecFactory serdeC = SerializableUtils.clone(codecFactory); + + assertEquals(CodecFactory.fromString(codec).toString(), serdeC.getCodec().toString()); + } + } + + @Test + public void testDeflateCodecSerDeWithLevels() throws Exception { + for (int i = 0; i < 10; ++i) { + SerializableAvroCodecFactory codecFactory = + new SerializableAvroCodecFactory(CodecFactory.deflateCodec(i)); + + SerializableAvroCodecFactory serdeC = SerializableUtils.clone(codecFactory); + + assertEquals(CodecFactory.deflateCodec(i).toString(), serdeC.getCodec().toString()); + } + } + + @Test + public void testXZCodecSerDeWithLevels() throws Exception { + for (int i = 0; i < 10; ++i) { + SerializableAvroCodecFactory codecFactory = + new SerializableAvroCodecFactory(CodecFactory.xzCodec(i)); + + SerializableAvroCodecFactory serdeC = SerializableUtils.clone(codecFactory); + + assertEquals(CodecFactory.xzCodec(i).toString(), serdeC.getCodec().toString()); + } + } + + @Test(expected = NullPointerException.class) + public void testNullCodecToString() throws Exception { + // use default CTR (available cause Serializable) + SerializableAvroCodecFactory codec = new SerializableAvroCodecFactory(); + assertEquals("null", codec.toString()); + } +} diff --git a/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/schemas/AvroSchemaTest.java b/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/schemas/AvroSchemaTest.java new file mode 100644 index 0000000000000..066739ade69f8 --- /dev/null +++ b/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/schemas/AvroSchemaTest.java @@ -0,0 +1,497 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.extensions.avro.schemas; + +import static org.junit.Assert.assertEquals; + +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.generic.GenericRecordBuilder; +import org.apache.avro.reflect.AvroIgnore; +import org.apache.avro.reflect.AvroName; +import org.apache.avro.reflect.AvroSchema; +import org.apache.avro.util.Utf8; +import org.apache.beam.sdk.extensions.avro.schemas.utils.AvroUtils; +import org.apache.beam.sdk.schemas.Schema; +import org.apache.beam.sdk.schemas.Schema.FieldType; +import org.apache.beam.sdk.schemas.logicaltypes.EnumerationType; +import org.apache.beam.sdk.schemas.logicaltypes.FixedBytes; +import org.apache.beam.sdk.schemas.transforms.Group; +import org.apache.beam.sdk.testing.PAssert; +import org.apache.beam.sdk.testing.TestPipeline; +import org.apache.beam.sdk.testing.ValidatesRunner; +import org.apache.beam.sdk.transforms.Create; +import org.apache.beam.sdk.transforms.SerializableFunction; +import org.apache.beam.sdk.util.SerializableUtils; +import org.apache.beam.sdk.values.PCollection; +import org.apache.beam.sdk.values.Row; +import org.apache.beam.sdk.values.TypeDescriptor; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; +import org.joda.time.Days; +import org.joda.time.LocalDate; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +/** Tests for AVRO schema classes. */ +public class AvroSchemaTest { + /** A test POJO that corresponds to our AVRO schema. */ + public static class AvroSubPojo { + @AvroName("BOOL_NON_NULLABLE") + public boolean boolNonNullable; + + @AvroName("int") + @org.apache.avro.reflect.Nullable + public Integer anInt; + + public AvroSubPojo(boolean boolNonNullable, Integer anInt) { + this.boolNonNullable = boolNonNullable; + this.anInt = anInt; + } + + public AvroSubPojo() {} + + @Override + public boolean equals(@Nullable Object o) { + if (this == o) { + return true; + } + if (!(o instanceof AvroSubPojo)) { + return false; + } + AvroSubPojo that = (AvroSubPojo) o; + return boolNonNullable == that.boolNonNullable && Objects.equals(anInt, that.anInt); + } + + @Override + public int hashCode() { + return Objects.hash(boolNonNullable, anInt); + } + + @Override + public String toString() { + return "AvroSubPojo{" + "boolNonNullable=" + boolNonNullable + ", anInt=" + anInt + '}'; + } + } + + /** A test POJO that corresponds to our AVRO schema. */ + public static class AvroPojo { + public @AvroName("bool_non_nullable") boolean boolNonNullable; + + @org.apache.avro.reflect.Nullable + public @AvroName("int") Integer anInt; + + @org.apache.avro.reflect.Nullable + public @AvroName("long") Long aLong; + + @AvroName("float") + @org.apache.avro.reflect.Nullable + public Float aFloat; + + @AvroName("double") + @org.apache.avro.reflect.Nullable + public Double aDouble; + + @org.apache.avro.reflect.Nullable public String string; + @org.apache.avro.reflect.Nullable public ByteBuffer bytes; + + @AvroSchema("{\"type\": \"fixed\", \"size\": 4, \"name\": \"fixed4\"}") + public byte[] fixed; + + @AvroSchema("{\"type\": \"int\", \"logicalType\": \"date\"}") + public LocalDate date; + + @AvroSchema("{\"type\": \"long\", \"logicalType\": \"timestamp-millis\"}") + public DateTime timestampMillis; + + @AvroSchema("{\"name\": \"TestEnum\", \"type\": \"enum\", \"symbols\": [\"abc\",\"cde\"] }") + public TestEnum testEnum; + + @org.apache.avro.reflect.Nullable public AvroSubPojo row; + @org.apache.avro.reflect.Nullable public List array; + @org.apache.avro.reflect.Nullable public Map map; + @AvroIgnore String extraField; + + @Override + public boolean equals(@Nullable Object o) { + if (this == o) { + return true; + } + if (!(o instanceof AvroPojo)) { + return false; + } + AvroPojo avroPojo = (AvroPojo) o; + return boolNonNullable == avroPojo.boolNonNullable + && Objects.equals(anInt, avroPojo.anInt) + && Objects.equals(aLong, avroPojo.aLong) + && Objects.equals(aFloat, avroPojo.aFloat) + && Objects.equals(aDouble, avroPojo.aDouble) + && Objects.equals(string, avroPojo.string) + && Objects.equals(bytes, avroPojo.bytes) + && Arrays.equals(fixed, avroPojo.fixed) + && Objects.equals(date, avroPojo.date) + && Objects.equals(timestampMillis, avroPojo.timestampMillis) + && Objects.equals(testEnum, avroPojo.testEnum) + && Objects.equals(row, avroPojo.row) + && Objects.equals(array, avroPojo.array) + && Objects.equals(map, avroPojo.map); + } + + @Override + public int hashCode() { + return Objects.hash( + boolNonNullable, + anInt, + aLong, + aFloat, + aDouble, + string, + bytes, + Arrays.hashCode(fixed), + date, + timestampMillis, + testEnum, + row, + array, + map); + } + + public AvroPojo( + boolean boolNonNullable, + int anInt, + long aLong, + float aFloat, + double aDouble, + String string, + ByteBuffer bytes, + byte[] fixed, + LocalDate date, + DateTime timestampMillis, + TestEnum testEnum, + AvroSubPojo row, + List array, + Map map) { + this.boolNonNullable = boolNonNullable; + this.anInt = anInt; + this.aLong = aLong; + this.aFloat = aFloat; + this.aDouble = aDouble; + this.string = string; + this.bytes = bytes; + this.fixed = fixed; + this.date = date; + this.timestampMillis = timestampMillis; + this.testEnum = testEnum; + this.row = row; + this.array = array; + this.map = map; + this.extraField = ""; + } + + public AvroPojo() {} + + @Override + public String toString() { + return "AvroPojo{" + + "boolNonNullable=" + + boolNonNullable + + ", anInt=" + + anInt + + ", aLong=" + + aLong + + ", aFloat=" + + aFloat + + ", aDouble=" + + aDouble + + ", string='" + + string + + '\'' + + ", bytes=" + + bytes + + ", fixed=" + + Arrays.toString(fixed) + + ", date=" + + date + + ", timestampMillis=" + + timestampMillis + + ", testEnum=" + + testEnum + + ", row=" + + row + + ", array=" + + array + + ", map=" + + map + + ", extraField='" + + extraField + + '\'' + + '}'; + } + } + + private static final Schema SUBSCHEMA = + Schema.builder() + .addField("BOOL_NON_NULLABLE", FieldType.BOOLEAN) + .addNullableField("int", FieldType.INT32) + .build(); + private static final FieldType SUB_TYPE = FieldType.row(SUBSCHEMA).withNullable(true); + + private static final EnumerationType TEST_ENUM_TYPE = EnumerationType.create("abc", "cde"); + + private static final Schema SCHEMA = + Schema.builder() + .addField("bool_non_nullable", FieldType.BOOLEAN) + .addNullableField("int", FieldType.INT32) + .addNullableField("long", FieldType.INT64) + .addNullableField("float", FieldType.FLOAT) + .addNullableField("double", FieldType.DOUBLE) + .addNullableField("string", FieldType.STRING) + .addNullableField("bytes", FieldType.BYTES) + .addField("fixed", FieldType.logicalType(FixedBytes.of(4))) + .addField("date", FieldType.DATETIME) + .addField("timestampMillis", FieldType.DATETIME) + .addField("TestEnum", FieldType.logicalType(TEST_ENUM_TYPE)) + .addNullableField("row", SUB_TYPE) + .addNullableField("array", FieldType.array(SUB_TYPE)) + .addNullableField("map", FieldType.map(FieldType.STRING, SUB_TYPE)) + .build(); + + private static final Schema POJO_SCHEMA = + Schema.builder() + .addField("bool_non_nullable", FieldType.BOOLEAN) + .addNullableField("int", FieldType.INT32) + .addNullableField("long", FieldType.INT64) + .addNullableField("float", FieldType.FLOAT) + .addNullableField("double", FieldType.DOUBLE) + .addNullableField("string", FieldType.STRING) + .addNullableField("bytes", FieldType.BYTES) + .addField("fixed", FieldType.logicalType(FixedBytes.of(4))) + .addField("date", FieldType.DATETIME) + .addField("timestampMillis", FieldType.DATETIME) + .addField("testEnum", FieldType.logicalType(TEST_ENUM_TYPE)) + .addNullableField("row", SUB_TYPE) + .addNullableField("array", FieldType.array(SUB_TYPE.withNullable(false))) + .addNullableField("map", FieldType.map(FieldType.STRING, SUB_TYPE.withNullable(false))) + .build(); + + private static final byte[] BYTE_ARRAY = new byte[] {1, 2, 3, 4}; + private static final DateTime DATE_TIME = + new DateTime().withDate(1979, 3, 14).withTime(1, 2, 3, 4); + private static final LocalDate DATE = new LocalDate(1979, 3, 14); + private static final TestAvroNested AVRO_NESTED_SPECIFIC_RECORD = new TestAvroNested(true, 42); + private static final TestAvro AVRO_SPECIFIC_RECORD = + new TestAvro( + true, + 43, + 44L, + (float) 44.1, + (double) 44.2, + "mystring", + ByteBuffer.wrap(BYTE_ARRAY), + new fixed4(BYTE_ARRAY), + DATE, + DATE_TIME, + TestEnum.abc, + AVRO_NESTED_SPECIFIC_RECORD, + ImmutableList.of(AVRO_NESTED_SPECIFIC_RECORD, AVRO_NESTED_SPECIFIC_RECORD), + ImmutableMap.of("k1", AVRO_NESTED_SPECIFIC_RECORD, "k2", AVRO_NESTED_SPECIFIC_RECORD)); + private static final GenericRecord AVRO_NESTED_GENERIC_RECORD = + new GenericRecordBuilder(TestAvroNested.SCHEMA$) + .set("BOOL_NON_NULLABLE", true) + .set("int", 42) + .build(); + private static final GenericRecord AVRO_GENERIC_RECORD = + new GenericRecordBuilder(TestAvro.SCHEMA$) + .set("bool_non_nullable", true) + .set("int", 43) + .set("long", 44L) + .set("float", (float) 44.1) + .set("double", (double) 44.2) + .set("string", new Utf8("mystring")) + .set("bytes", ByteBuffer.wrap(BYTE_ARRAY)) + .set( + "fixed", + GenericData.get() + .createFixed( + null, BYTE_ARRAY, org.apache.avro.Schema.createFixed("fixed4", "", "", 4))) + .set("date", (int) Days.daysBetween(new LocalDate(1970, 1, 1), DATE).getDays()) + .set("timestampMillis", DATE_TIME.getMillis()) + .set("TestEnum", TestEnum.abc) + .set("row", AVRO_NESTED_GENERIC_RECORD) + .set("array", ImmutableList.of(AVRO_NESTED_GENERIC_RECORD, AVRO_NESTED_GENERIC_RECORD)) + .set( + "map", + ImmutableMap.of( + new Utf8("k1"), AVRO_NESTED_GENERIC_RECORD, + new Utf8("k2"), AVRO_NESTED_GENERIC_RECORD)) + .build(); + + private static final Row NESTED_ROW = Row.withSchema(SUBSCHEMA).addValues(true, 42).build(); + private static final Row ROW = + Row.withSchema(SCHEMA) + .addValues( + true, + 43, + 44L, + (float) 44.1, + (double) 44.2, + "mystring", + ByteBuffer.wrap(BYTE_ARRAY), + BYTE_ARRAY, + DATE.toDateTimeAtStartOfDay(DateTimeZone.UTC), + DATE_TIME, + TEST_ENUM_TYPE.valueOf("abc"), + NESTED_ROW, + ImmutableList.of(NESTED_ROW, NESTED_ROW), + ImmutableMap.of("k1", NESTED_ROW, "k2", NESTED_ROW)) + .build(); + + @Test + public void testSpecificRecordSchema() { + assertEquals(SCHEMA, new AvroRecordSchema().schemaFor(TypeDescriptor.of(TestAvro.class))); + } + + @Test + public void testPojoSchema() { + assertEquals(POJO_SCHEMA, new AvroRecordSchema().schemaFor(TypeDescriptor.of(AvroPojo.class))); + } + + @Test + public void testSpecificRecordToRow() { + SerializableFunction toRow = + new AvroRecordSchema().toRowFunction(TypeDescriptor.of(TestAvro.class)); + assertEquals(ROW, toRow.apply(AVRO_SPECIFIC_RECORD)); + } + + @Test + public void testRowToSpecificRecord() { + SerializableFunction fromRow = + new AvroRecordSchema().fromRowFunction(TypeDescriptor.of(TestAvro.class)); + assertEquals(AVRO_SPECIFIC_RECORD, fromRow.apply(ROW)); + } + + @Test + public void testGenericRecordToRow() { + SerializableFunction toRow = + AvroUtils.getGenericRecordToRowFunction(SCHEMA); + assertEquals(ROW, toRow.apply(AVRO_GENERIC_RECORD)); + } + + @Test + public void testRowToGenericRecord() { + SerializableFunction fromRow = + AvroUtils.getRowToGenericRecordFunction(TestAvro.SCHEMA$); + assertEquals(AVRO_GENERIC_RECORD, fromRow.apply(ROW)); + } + + private static final AvroSubPojo SUB_POJO = new AvroSubPojo(true, 42); + private static final AvroPojo AVRO_POJO = + new AvroPojo( + true, + 43, + 44L, + (float) 44.1, + (double) 44.2, + "mystring", + ByteBuffer.wrap(BYTE_ARRAY), + BYTE_ARRAY, + DATE, + DATE_TIME, + TestEnum.abc, + SUB_POJO, + ImmutableList.of(SUB_POJO, SUB_POJO), + ImmutableMap.of("k1", SUB_POJO, "k2", SUB_POJO)); + + private static final Row ROW_FOR_POJO = + Row.withSchema(POJO_SCHEMA) + .addValues( + true, + 43, + 44L, + (float) 44.1, + (double) 44.2, + "mystring", + ByteBuffer.wrap(BYTE_ARRAY), + BYTE_ARRAY, + DATE.toDateTimeAtStartOfDay(DateTimeZone.UTC), + DATE_TIME, + TEST_ENUM_TYPE.valueOf("abc"), + NESTED_ROW, + ImmutableList.of(NESTED_ROW, NESTED_ROW), + ImmutableMap.of("k1", NESTED_ROW, "k2", NESTED_ROW)) + .build(); + + @Test + public void testPojoRecordToRow() { + SerializableFunction toRow = + new AvroRecordSchema().toRowFunction(TypeDescriptor.of(AvroPojo.class)); + assertEquals(ROW_FOR_POJO, toRow.apply(AVRO_POJO)); + } + + @Test + public void testRowToPojo() { + SerializableFunction fromRow = + new AvroRecordSchema().fromRowFunction(TypeDescriptor.of(AvroPojo.class)); + assertEquals(AVRO_POJO, fromRow.apply(ROW_FOR_POJO)); + } + + @Test + public void testPojoRecordToRowSerializable() { + SerializableUtils.ensureSerializableRoundTrip( + new AvroRecordSchema().toRowFunction(TypeDescriptor.of(AvroPojo.class))); + } + + @Test + public void testPojoRecordFromRowSerializable() { + SerializableUtils.ensureSerializableRoundTrip( + new AvroRecordSchema().fromRowFunction(TypeDescriptor.of(AvroPojo.class))); + } + + @Rule public final transient TestPipeline pipeline = TestPipeline.create(); + + @Test + @Category(ValidatesRunner.class) + public void testAvroPipelineGroupBy() { + PCollection input = pipeline.apply(Create.of(ROW_FOR_POJO).withRowSchema(POJO_SCHEMA)); + + PCollection output = input.apply(Group.byFieldNames("string")); + Schema keySchema = Schema.builder().addStringField("string").build(); + Schema outputSchema = + Schema.builder() + .addRowField("key", keySchema) + .addIterableField("value", FieldType.row(POJO_SCHEMA)) + .build(); + PAssert.that(output) + .containsInAnyOrder( + Row.withSchema(outputSchema) + .addValue(Row.withSchema(keySchema).addValue("mystring").build()) + .addIterable(ImmutableList.of(ROW_FOR_POJO)) + .build()); + + pipeline.run(); + } +} diff --git a/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/schemas/io/AvroPayloadSerializerProviderTest.java b/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/schemas/io/AvroPayloadSerializerProviderTest.java new file mode 100644 index 0000000000000..9c56ffcdc0842 --- /dev/null +++ b/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/schemas/io/AvroPayloadSerializerProviderTest.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.extensions.avro.schemas.io; + +import static org.junit.Assert.assertEquals; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.generic.GenericRecordBuilder; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; +import org.apache.beam.sdk.extensions.avro.schemas.io.payloads.AvroPayloadSerializerProvider; +import org.apache.beam.sdk.extensions.avro.schemas.utils.AvroUtils; +import org.apache.beam.sdk.schemas.Schema; +import org.apache.beam.sdk.values.Row; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +@RunWith(JUnit4.class) +public class AvroPayloadSerializerProviderTest { + private static final Schema SCHEMA = + Schema.builder().addInt64Field("abc").addStringField("xyz").build(); + private static final org.apache.avro.Schema AVRO_SCHEMA = AvroUtils.toAvroSchema(SCHEMA); + private static final AvroCoder AVRO_CODER = AvroCoder.of(AVRO_SCHEMA); + private static final Row DESERIALIZED = + Row.withSchema(SCHEMA).withFieldValue("abc", 3L).withFieldValue("xyz", "qqq").build(); + private static final GenericRecord SERIALIZED = + new GenericRecordBuilder(AVRO_SCHEMA).set("abc", 3L).set("xyz", "qqq").build(); + + private final AvroPayloadSerializerProvider provider = new AvroPayloadSerializerProvider(); + + @Test + public void serialize() throws Exception { + byte[] bytes = provider.getSerializer(SCHEMA, ImmutableMap.of()).serialize(DESERIALIZED); + GenericRecord record = AVRO_CODER.decode(new ByteArrayInputStream(bytes)); + assertEquals(3L, record.get("abc")); + assertEquals("qqq", record.get("xyz").toString()); + } + + @Test + public void deserialize() throws Exception { + ByteArrayOutputStream os = new ByteArrayOutputStream(); + AVRO_CODER.encode(SERIALIZED, os); + Row row = provider.getSerializer(SCHEMA, ImmutableMap.of()).deserialize(os.toByteArray()); + assertEquals(DESERIALIZED, row); + } +} diff --git a/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/schemas/utils/AvroGenerators.java b/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/schemas/utils/AvroGenerators.java new file mode 100644 index 0000000000000..fa7d7cceecce1 --- /dev/null +++ b/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/schemas/utils/AvroGenerators.java @@ -0,0 +1,220 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.extensions.avro.schemas.utils; + +import com.pholser.junit.quickcheck.generator.GenerationStatus; +import com.pholser.junit.quickcheck.generator.Generator; +import com.pholser.junit.quickcheck.random.SourceOfRandomness; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import org.apache.avro.Schema; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Joiner; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ObjectArrays; + +/** QuickCheck generators for AVRO. */ +class AvroGenerators { + + /** Generates arbitrary AVRO schemas. */ + public static class SchemaGenerator extends BaseSchemaGenerator { + + public static final SchemaGenerator INSTANCE = new SchemaGenerator(); + + private static final ImmutableList PRIMITIVE_TYPES = + ImmutableList.of( + Schema.Type.STRING, + Schema.Type.BYTES, + Schema.Type.INT, + Schema.Type.LONG, + Schema.Type.FLOAT, + Schema.Type.DOUBLE, + Schema.Type.BOOLEAN); + + private static final ImmutableList ALL_TYPES = + ImmutableList.builder() + .addAll(PRIMITIVE_TYPES) + .add(Schema.Type.FIXED) + .add(Schema.Type.ENUM) + .add(Schema.Type.RECORD) + .add(Schema.Type.ARRAY) + .add(Schema.Type.MAP) + .add(Schema.Type.UNION) + .add(Schema.Type.ARRAY) + .build(); + + private static final int MAX_NESTING = 10; + + @Override + public Schema generate(SourceOfRandomness random, GenerationStatus status) { + Schema.Type type; + + if (nesting(status) >= MAX_NESTING) { + type = random.choose(PRIMITIVE_TYPES); + } else { + type = random.choose(ALL_TYPES); + } + + if (PRIMITIVE_TYPES.contains(type)) { + return Schema.create(type); + } else { + nestingInc(status); + + if (type == Schema.Type.FIXED) { + int size = random.choose(Arrays.asList(1, 5, 12)); + return Schema.createFixed("fixed_" + branch(status), "", "", size); + } else if (type == Schema.Type.UNION) { + // only nullable fields, everything else isn't supported in row conversion code + return UnionSchemaGenerator.INSTANCE.generate(random, status); + } else if (type == Schema.Type.ENUM) { + return EnumSchemaGenerator.INSTANCE.generate(random, status); + } else if (type == Schema.Type.RECORD) { + return RecordSchemaGenerator.INSTANCE.generate(random, status); + } else if (type == Schema.Type.MAP) { + return Schema.createMap(generate(random, status)); + } else if (type == Schema.Type.ARRAY) { + return Schema.createArray(generate(random, status)); + } else { + throw new AssertionError("Unexpected AVRO type: " + type); + } + } + } + } + + public static class RecordSchemaGenerator extends BaseSchemaGenerator { + + public static final RecordSchemaGenerator INSTANCE = new RecordSchemaGenerator(); + + @Override + public Schema generate(SourceOfRandomness random, GenerationStatus status) { + List fields = + IntStream.range(0, random.nextInt(0, status.size()) + 1) + .mapToObj( + i -> { + // deterministically avoid collisions in record names + branchPush(status, String.valueOf(i)); + Schema.Field field = + createField(i, SchemaGenerator.INSTANCE.generate(random, status)); + branchPop(status); + return field; + }) + .collect(Collectors.toList()); + + return Schema.createRecord("record_" + branch(status), "", "example", false, fields); + } + + private Schema.Field createField(int i, Schema schema) { + return new Schema.Field("field_" + i, schema, null, (Object) null); + } + } + + static class UnionSchemaGenerator extends BaseSchemaGenerator { + + public static final UnionSchemaGenerator INSTANCE = new UnionSchemaGenerator(); + + @Override + public Schema generate(SourceOfRandomness random, GenerationStatus status) { + Map schemaMap = + IntStream.range(0, random.nextInt(0, status.size()) + 1) + .mapToObj( + i -> { + // deterministically avoid collisions in record names + branchPush(status, String.valueOf(i)); + Schema schema = + SchemaGenerator.INSTANCE + // nested unions aren't supported in AVRO + .filter(x -> x.getType() != Schema.Type.UNION) + .generate(random, status); + branchPop(status); + return schema; + }) + // AVRO requires uniqueness by full name + .collect(Collectors.toMap(Schema::getFullName, Function.identity(), (x, y) -> x)); + + List schemas = new ArrayList<>(schemaMap.values()); + + if (random.nextBoolean()) { + Schema nullSchema = Schema.create(Schema.Type.NULL); + schemas.add(nullSchema); + Collections.shuffle(schemas, random.toJDKRandom()); + } + + return Schema.createUnion(schemas); + } + } + + static class EnumSchemaGenerator extends BaseSchemaGenerator { + + public static final EnumSchemaGenerator INSTANCE = new EnumSchemaGenerator(); + + private static final Schema FRUITS = + Schema.createEnum("Fruit", "", "example", Arrays.asList("banana", "apple", "pear")); + + private static final Schema STATUS = + Schema.createEnum("Status", "", "example", Arrays.asList("OK", "ERROR", "WARNING")); + + @Override + public Schema generate(final SourceOfRandomness random, final GenerationStatus status) { + return random.choose(Arrays.asList(FRUITS, STATUS)); + } + } + + abstract static class BaseSchemaGenerator extends Generator { + + private static final GenerationStatus.Key NESTING_KEY = + new GenerationStatus.Key<>("nesting", Integer.class); + + private static final GenerationStatus.Key BRANCH_KEY = + new GenerationStatus.Key<>("branch", String[].class); + + BaseSchemaGenerator() { + super(Schema.class); + } + + void branchPush(GenerationStatus status, String value) { + String[] current = status.valueOf(BRANCH_KEY).orElse(new String[0]); + String[] next = ObjectArrays.concat(current, value); + + status.setValue(BRANCH_KEY, next); + } + + void branchPop(GenerationStatus status) { + String[] current = status.valueOf(BRANCH_KEY).orElse(new String[0]); + String[] next = Arrays.copyOf(current, current.length - 1); + + status.setValue(BRANCH_KEY, next); + } + + String branch(GenerationStatus status) { + return Joiner.on("_").join(status.valueOf(BRANCH_KEY).orElse(new String[0])); + } + + int nesting(GenerationStatus status) { + return status.valueOf(NESTING_KEY).orElse(0); + } + + void nestingInc(GenerationStatus status) { + status.setValue(NESTING_KEY, nesting(status) + 1); + } + } +} diff --git a/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/schemas/utils/AvroUtilsTest.java b/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/schemas/utils/AvroUtilsTest.java new file mode 100644 index 0000000000000..4e282fb7094b7 --- /dev/null +++ b/sdks/java/extensions/avro/src/test/java/org/apache/beam/sdk/extensions/avro/schemas/utils/AvroUtilsTest.java @@ -0,0 +1,915 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.extensions.avro.schemas.utils; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import com.pholser.junit.quickcheck.From; +import com.pholser.junit.quickcheck.Property; +import com.pholser.junit.quickcheck.runner.JUnitQuickcheck; +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.sql.JDBCType; +import java.util.List; +import java.util.Map; +import org.apache.avro.Conversions; +import org.apache.avro.LogicalType; +import org.apache.avro.LogicalTypes; +import org.apache.avro.RandomData; +import org.apache.avro.Schema.Type; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.generic.GenericRecordBuilder; +import org.apache.avro.reflect.ReflectData; +import org.apache.avro.util.Utf8; +import org.apache.beam.sdk.Pipeline; +import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; +import org.apache.beam.sdk.extensions.avro.io.AvroGeneratedUser; +import org.apache.beam.sdk.schemas.Schema; +import org.apache.beam.sdk.schemas.Schema.Field; +import org.apache.beam.sdk.schemas.Schema.FieldType; +import org.apache.beam.sdk.schemas.logicaltypes.EnumerationType; +import org.apache.beam.sdk.schemas.logicaltypes.OneOfType; +import org.apache.beam.sdk.schemas.logicaltypes.SqlTypes; +import org.apache.beam.sdk.testing.CoderProperties; +import org.apache.beam.sdk.transforms.Create; +import org.apache.beam.sdk.transforms.SimpleFunction; +import org.apache.beam.sdk.util.SerializableUtils; +import org.apache.beam.sdk.values.PCollection; +import org.apache.beam.sdk.values.Row; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Maps; +import org.checkerframework.checker.nullness.qual.NonNull; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; +import org.joda.time.Days; +import org.joda.time.Instant; +import org.joda.time.LocalTime; +import org.junit.Test; +import org.junit.runner.RunWith; + +/** Tests for conversion between AVRO records and Beam rows. */ +@RunWith(JUnitQuickcheck.class) +@SuppressWarnings({ + "rawtypes", // TODO(https://github.com/apache/beam/issues/20447) +}) +public class AvroUtilsTest { + + private static final org.apache.avro.Schema NULL_SCHEMA = + org.apache.avro.Schema.create(Type.NULL); + + @Property(trials = 1000) + @SuppressWarnings("unchecked") + public void supportsAnyAvroSchema( + @From(AvroGenerators.RecordSchemaGenerator.class) org.apache.avro.Schema avroSchema) { + + Schema schema = AvroUtils.toBeamSchema(avroSchema); + Iterable iterable = new RandomData(avroSchema, 10); + List records = Lists.newArrayList((Iterable) iterable); + + for (GenericRecord record : records) { + AvroUtils.toBeamRowStrict(record, schema); + } + } + + @Property(trials = 1000) + @SuppressWarnings("unchecked") + public void avroToBeamRoundTrip( + @From(AvroGenerators.RecordSchemaGenerator.class) org.apache.avro.Schema avroSchema) { + + Schema schema = AvroUtils.toBeamSchema(avroSchema); + Iterable iterable = new RandomData(avroSchema, 10); + List records = Lists.newArrayList((Iterable) iterable); + + for (GenericRecord record : records) { + Row row = AvroUtils.toBeamRowStrict(record, schema); + GenericRecord out = AvroUtils.toGenericRecord(row, avroSchema); + assertEquals(record, out); + } + } + + @Test + public void testUnwrapNullableSchema() { + org.apache.avro.Schema avroSchema = + org.apache.avro.Schema.createUnion( + org.apache.avro.Schema.create(Type.NULL), org.apache.avro.Schema.create(Type.STRING)); + + AvroUtils.TypeWithNullability typeWithNullability = + new AvroUtils.TypeWithNullability(avroSchema); + assertTrue(typeWithNullability.nullable); + assertEquals(org.apache.avro.Schema.create(Type.STRING), typeWithNullability.type); + } + + @Test + public void testUnwrapNullableSchemaReordered() { + org.apache.avro.Schema avroSchema = + org.apache.avro.Schema.createUnion( + org.apache.avro.Schema.create(Type.STRING), org.apache.avro.Schema.create(Type.NULL)); + + AvroUtils.TypeWithNullability typeWithNullability = + new AvroUtils.TypeWithNullability(avroSchema); + assertTrue(typeWithNullability.nullable); + assertEquals(org.apache.avro.Schema.create(Type.STRING), typeWithNullability.type); + } + + @Test + public void testUnwrapNullableSchemaToUnion() { + org.apache.avro.Schema avroSchema = + org.apache.avro.Schema.createUnion( + org.apache.avro.Schema.create(Type.STRING), + org.apache.avro.Schema.create(Type.LONG), + org.apache.avro.Schema.create(Type.NULL)); + + AvroUtils.TypeWithNullability typeWithNullability = + new AvroUtils.TypeWithNullability(avroSchema); + assertTrue(typeWithNullability.nullable); + assertEquals( + org.apache.avro.Schema.createUnion( + org.apache.avro.Schema.create(Type.STRING), org.apache.avro.Schema.create(Type.LONG)), + typeWithNullability.type); + } + + @Test + public void testNullableArrayFieldToBeamArrayField() { + org.apache.avro.Schema.Field avroField = + new org.apache.avro.Schema.Field( + "arrayField", + ReflectData.makeNullable( + org.apache.avro.Schema.createArray(org.apache.avro.Schema.create(Type.INT))), + "", + null); + + Field expectedBeamField = Field.nullable("arrayField", FieldType.array(FieldType.INT32)); + + Field beamField = AvroUtils.toBeamField(avroField); + assertEquals(expectedBeamField, beamField); + } + + @Test + public void testNullableBeamArrayFieldToAvroField() { + Field beamField = Field.nullable("arrayField", FieldType.array(FieldType.INT32)); + + org.apache.avro.Schema.Field expectedAvroField = + new org.apache.avro.Schema.Field( + "arrayField", + ReflectData.makeNullable( + org.apache.avro.Schema.createArray(org.apache.avro.Schema.create(Type.INT))), + "", + null); + + org.apache.avro.Schema.Field avroField = AvroUtils.toAvroField(beamField, "ignored"); + assertEquals(expectedAvroField, avroField); + } + + private static List getAvroSubSchemaFields() { + List fields = Lists.newArrayList(); + fields.add( + new org.apache.avro.Schema.Field( + "bool", org.apache.avro.Schema.create(Type.BOOLEAN), "", null)); + fields.add( + new org.apache.avro.Schema.Field("int", org.apache.avro.Schema.create(Type.INT), "", null)); + return fields; + } + + private static org.apache.avro.Schema getAvroSubSchema(String name) { + return org.apache.avro.Schema.createRecord( + name, null, "topLevelRecord", false, getAvroSubSchemaFields()); + } + + private static org.apache.avro.Schema getAvroSchema() { + List fields = Lists.newArrayList(); + fields.add( + new org.apache.avro.Schema.Field( + "bool", org.apache.avro.Schema.create(Type.BOOLEAN), "", (Object) null)); + fields.add( + new org.apache.avro.Schema.Field( + "int", org.apache.avro.Schema.create(Type.INT), "", (Object) null)); + fields.add( + new org.apache.avro.Schema.Field( + "long", org.apache.avro.Schema.create(Type.LONG), "", (Object) null)); + fields.add( + new org.apache.avro.Schema.Field( + "float", org.apache.avro.Schema.create(Type.FLOAT), "", (Object) null)); + fields.add( + new org.apache.avro.Schema.Field( + "double", org.apache.avro.Schema.create(Type.DOUBLE), "", (Object) null)); + fields.add( + new org.apache.avro.Schema.Field( + "string", org.apache.avro.Schema.create(Type.STRING), "", (Object) null)); + fields.add( + new org.apache.avro.Schema.Field( + "bytes", org.apache.avro.Schema.create(Type.BYTES), "", (Object) null)); + fields.add( + new org.apache.avro.Schema.Field( + "decimal", + LogicalTypes.decimal(Integer.MAX_VALUE) + .addToSchema(org.apache.avro.Schema.create(Type.BYTES)), + "", + (Object) null)); + fields.add( + new org.apache.avro.Schema.Field( + "timestampMillis", + LogicalTypes.timestampMillis().addToSchema(org.apache.avro.Schema.create(Type.LONG)), + "", + (Object) null)); + fields.add(new org.apache.avro.Schema.Field("row", getAvroSubSchema("row"), "", (Object) null)); + fields.add( + new org.apache.avro.Schema.Field( + "array", + org.apache.avro.Schema.createArray(getAvroSubSchema("array")), + "", + (Object) null)); + fields.add( + new org.apache.avro.Schema.Field( + "map", org.apache.avro.Schema.createMap(getAvroSubSchema("map")), "", (Object) null)); + return org.apache.avro.Schema.createRecord("topLevelRecord", null, null, false, fields); + } + + private static Schema getBeamSubSchema() { + return new Schema.Builder() + .addField(Field.of("bool", FieldType.BOOLEAN)) + .addField(Field.of("int", FieldType.INT32)) + .build(); + } + + private Schema getBeamSchema() { + Schema subSchema = getBeamSubSchema(); + return new Schema.Builder() + .addField(Field.of("bool", FieldType.BOOLEAN)) + .addField(Field.of("int", FieldType.INT32)) + .addField(Field.of("long", FieldType.INT64)) + .addField(Field.of("float", FieldType.FLOAT)) + .addField(Field.of("double", FieldType.DOUBLE)) + .addField(Field.of("string", FieldType.STRING)) + .addField(Field.of("bytes", FieldType.BYTES)) + .addField(Field.of("decimal", FieldType.DECIMAL)) + .addField(Field.of("timestampMillis", FieldType.DATETIME)) + .addField(Field.of("row", FieldType.row(subSchema))) + .addField(Field.of("array", FieldType.array(FieldType.row(subSchema)))) + .addField(Field.of("map", FieldType.map(FieldType.STRING, FieldType.row(subSchema)))) + .build(); + } + + private static final byte[] BYTE_ARRAY = new byte[] {1, 2, 3, 4}; + private static final DateTime DATE_TIME = + new DateTime().withDate(1979, 3, 14).withTime(1, 2, 3, 4).withZone(DateTimeZone.UTC); + private static final BigDecimal BIG_DECIMAL = new BigDecimal(3600); + + private Row getBeamRow() { + Row subRow = Row.withSchema(getBeamSubSchema()).addValues(true, 42).build(); + return Row.withSchema(getBeamSchema()) + .addValue(true) + .addValue(43) + .addValue(44L) + .addValue((float) 44.1) + .addValue((double) 44.2) + .addValue("string") + .addValue(BYTE_ARRAY) + .addValue(BIG_DECIMAL) + .addValue(DATE_TIME) + .addValue(subRow) + .addValue(ImmutableList.of(subRow, subRow)) + .addValue(ImmutableMap.of("k1", subRow, "k2", subRow)) + .build(); + } + + private static GenericRecord getSubGenericRecord(String name) { + return new GenericRecordBuilder(getAvroSubSchema(name)) + .set("bool", true) + .set("int", 42) + .build(); + } + + private static GenericRecord getGenericRecord() { + + LogicalType decimalType = + LogicalTypes.decimal(Integer.MAX_VALUE) + .addToSchema(org.apache.avro.Schema.create(Type.BYTES)) + .getLogicalType(); + ByteBuffer encodedDecimal = + new Conversions.DecimalConversion().toBytes(BIG_DECIMAL, null, decimalType); + + return new GenericRecordBuilder(getAvroSchema()) + .set("bool", true) + .set("int", 43) + .set("long", 44L) + .set("float", (float) 44.1) + .set("double", (double) 44.2) + .set("string", new Utf8("string")) + .set("bytes", ByteBuffer.wrap(BYTE_ARRAY)) + .set("decimal", encodedDecimal) + .set("timestampMillis", DATE_TIME.getMillis()) + .set("row", getSubGenericRecord("row")) + .set("array", ImmutableList.of(getSubGenericRecord("array"), getSubGenericRecord("array"))) + .set( + "map", + ImmutableMap.of( + new Utf8("k1"), + getSubGenericRecord("map"), + new Utf8("k2"), + getSubGenericRecord("map"))) + .build(); + } + + @Test + public void testFromAvroSchema() { + assertEquals(getBeamSchema(), AvroUtils.toBeamSchema(getAvroSchema())); + } + + @Test + public void testFromBeamSchema() { + Schema beamSchema = getBeamSchema(); + org.apache.avro.Schema avroSchema = AvroUtils.toAvroSchema(beamSchema); + assertEquals(getAvroSchema(), avroSchema); + } + + @Test + public void testAvroSchemaFromBeamSchemaCanBeParsed() { + org.apache.avro.Schema convertedSchema = AvroUtils.toAvroSchema(getBeamSchema()); + org.apache.avro.Schema validatedSchema = + new org.apache.avro.Schema.Parser().parse(convertedSchema.toString()); + assertEquals(convertedSchema, validatedSchema); + } + + @Test + public void testAvroSchemaFromBeamSchemaWithFieldCollisionCanBeParsed() { + + // Two similar schemas, the only difference is the "street" field type in the nested record. + Schema contact = + new Schema.Builder() + .addField(Field.of("name", FieldType.STRING)) + .addField( + Field.of( + "address", + FieldType.row( + new Schema.Builder() + .addField(Field.of("street", FieldType.STRING)) + .addField(Field.of("city", FieldType.STRING)) + .build()))) + .build(); + + Schema contactMultiline = + new Schema.Builder() + .addField(Field.of("name", FieldType.STRING)) + .addField( + Field.of( + "address", + FieldType.row( + new Schema.Builder() + .addField(Field.of("street", FieldType.array(FieldType.STRING))) + .addField(Field.of("city", FieldType.STRING)) + .build()))) + .build(); + + // Ensure that no collisions happen between two sibling fields with same-named child fields + // (with different schemas, between a parent field and a sub-record field with the same name, + // and artificially with the generated field name. + Schema beamSchema = + new Schema.Builder() + .addField(Field.of("home", FieldType.row(contact))) + .addField(Field.of("work", FieldType.row(contactMultiline))) + .addField(Field.of("address", FieldType.row(contact))) + .addField(Field.of("topLevelRecord", FieldType.row(contactMultiline))) + .build(); + + org.apache.avro.Schema convertedSchema = AvroUtils.toAvroSchema(beamSchema); + org.apache.avro.Schema validatedSchema = + new org.apache.avro.Schema.Parser().parse(convertedSchema.toString()); + assertEquals(convertedSchema, validatedSchema); + } + + @Test + public void testNullableFieldInAvroSchema() { + List fields = Lists.newArrayList(); + fields.add( + new org.apache.avro.Schema.Field( + "int", ReflectData.makeNullable(org.apache.avro.Schema.create(Type.INT)), "", null)); + fields.add( + new org.apache.avro.Schema.Field( + "array", + org.apache.avro.Schema.createArray( + ReflectData.makeNullable(org.apache.avro.Schema.create(Type.BYTES))), + "", + null)); + fields.add( + new org.apache.avro.Schema.Field( + "map", + org.apache.avro.Schema.createMap( + ReflectData.makeNullable(org.apache.avro.Schema.create(Type.INT))), + "", + null)); + fields.add( + new org.apache.avro.Schema.Field( + "enum", + ReflectData.makeNullable( + org.apache.avro.Schema.createEnum( + "fruit", "", "", ImmutableList.of("banana", "apple", "pear"))), + "", + null)); + + org.apache.avro.Schema avroSchema = + org.apache.avro.Schema.createRecord("topLevelRecord", null, null, false, fields); + + Schema expectedSchema = + Schema.builder() + .addNullableField("int", FieldType.INT32) + .addArrayField("array", FieldType.BYTES.withNullable(true)) + .addMapField("map", FieldType.STRING, FieldType.INT32.withNullable(true)) + .addField( + "enum", + FieldType.logicalType(EnumerationType.create("banana", "apple", "pear")) + .withNullable(true)) + .build(); + assertEquals(expectedSchema, AvroUtils.toBeamSchema(avroSchema)); + + Map nullMap = Maps.newHashMap(); + nullMap.put("k1", null); + GenericRecord genericRecord = + new GenericRecordBuilder(avroSchema) + .set("int", null) + .set("array", Lists.newArrayList((Object) null)) + .set("map", nullMap) + .set("enum", null) + .build(); + Row expectedRow = + Row.withSchema(expectedSchema) + .addValue(null) + .addValue(Lists.newArrayList((Object) null)) + .addValue(nullMap) + .addValue(null) + .build(); + assertEquals(expectedRow, AvroUtils.toBeamRowStrict(genericRecord, expectedSchema)); + } + + @Test + public void testNullableFieldsInBeamSchema() { + Schema beamSchema = + Schema.builder() + .addNullableField("int", FieldType.INT32) + .addArrayField("array", FieldType.INT32.withNullable(true)) + .addMapField("map", FieldType.STRING, FieldType.INT32.withNullable(true)) + .build(); + + List fields = Lists.newArrayList(); + fields.add( + new org.apache.avro.Schema.Field( + "int", ReflectData.makeNullable(org.apache.avro.Schema.create(Type.INT)), "", null)); + fields.add( + new org.apache.avro.Schema.Field( + "array", + org.apache.avro.Schema.createArray( + ReflectData.makeNullable(org.apache.avro.Schema.create(Type.INT))), + "", + null)); + fields.add( + new org.apache.avro.Schema.Field( + "map", + org.apache.avro.Schema.createMap( + ReflectData.makeNullable(org.apache.avro.Schema.create(Type.INT))), + "", + null)); + org.apache.avro.Schema avroSchema = + org.apache.avro.Schema.createRecord("topLevelRecord", null, null, false, fields); + assertEquals(avroSchema, AvroUtils.toAvroSchema(beamSchema)); + + Map nullMapUtf8 = Maps.newHashMap(); + nullMapUtf8.put(new Utf8("k1"), null); + Map nullMapString = Maps.newHashMap(); + nullMapString.put("k1", null); + + GenericRecord expectedGenericRecord = + new GenericRecordBuilder(avroSchema) + .set("int", null) + .set("array", Lists.newArrayList((Object) null)) + .set("map", nullMapUtf8) + .build(); + Row row = + Row.withSchema(beamSchema) + .addValue(null) + .addValue(Lists.newArrayList((Object) null)) + .addValue(nullMapString) + .build(); + assertEquals(expectedGenericRecord, AvroUtils.toGenericRecord(row, avroSchema)); + } + + @Test + public void testUnionFieldInAvroSchema() { + + List fields = Lists.newArrayList(); + List unionFields = Lists.newArrayList(); + + unionFields.add(org.apache.avro.Schema.create(Type.INT)); + unionFields.add(org.apache.avro.Schema.create(Type.STRING)); + + fields.add( + new org.apache.avro.Schema.Field( + "union", org.apache.avro.Schema.createUnion(unionFields), "", null)); + org.apache.avro.Schema avroSchema = + org.apache.avro.Schema.createRecord("topLevelRecord", null, null, false, fields); + OneOfType oneOfType = + OneOfType.create(Field.of("int", FieldType.INT32), Field.of("string", FieldType.STRING)); + + Schema expectedSchema = Schema.builder().addLogicalTypeField("union", oneOfType).build(); + assertEquals(expectedSchema, AvroUtils.toBeamSchema(avroSchema)); + GenericRecord genericRecord = new GenericRecordBuilder(avroSchema).set("union", 23423).build(); + Row expectedRow = + Row.withSchema(expectedSchema).addValue(oneOfType.createValue(0, 23423)).build(); + assertEquals(expectedRow, AvroUtils.toBeamRowStrict(genericRecord, expectedSchema)); + } + + @Test + public void testUnionFieldInBeamSchema() { + OneOfType oneOfType = + OneOfType.create(Field.of("int", FieldType.INT32), Field.of("string", FieldType.STRING)); + + Schema beamSchema = Schema.builder().addLogicalTypeField("union", oneOfType).build(); + List fields = Lists.newArrayList(); + List unionFields = Lists.newArrayList(); + + unionFields.add(org.apache.avro.Schema.create(Type.INT)); + unionFields.add(org.apache.avro.Schema.create(Type.STRING)); + fields.add( + new org.apache.avro.Schema.Field( + "union", org.apache.avro.Schema.createUnion(unionFields), "", null)); + org.apache.avro.Schema avroSchema = + org.apache.avro.Schema.createRecord("topLevelRecord", null, null, false, fields); + GenericRecord expectedGenericRecord = + new GenericRecordBuilder(avroSchema).set("union", 23423).build(); + Row row = Row.withSchema(beamSchema).addValue(oneOfType.createValue(0, 23423)).build(); + assertEquals(expectedGenericRecord, AvroUtils.toGenericRecord(row, avroSchema)); + } + + @Test + public void testJdbcLogicalVarCharRowDataToAvroSchema() { + String expectedAvroSchemaJson = + "{ " + + " \"name\": \"topLevelRecord\", " + + " \"type\": \"record\", " + + " \"fields\": [{ " + + " \"name\": \"my_varchar_field\", " + + " \"type\": {\"type\": \"string\", \"logicalType\": \"varchar\", \"maxLength\": 10}" + + " }, " + + " { " + + " \"name\": \"my_longvarchar_field\", " + + " \"type\": {\"type\": \"string\", \"logicalType\": \"varchar\", \"maxLength\": 50}" + + " }, " + + " { " + + " \"name\": \"my_nvarchar_field\", " + + " \"type\": {\"type\": \"string\", \"logicalType\": \"varchar\", \"maxLength\": 10}" + + " }, " + + " { " + + " \"name\": \"my_longnvarchar_field\", " + + " \"type\": {\"type\": \"string\", \"logicalType\": \"varchar\", \"maxLength\": 50}" + + " }, " + + " { " + + " \"name\": \"fixed_length_char_field\", " + + " \"type\": {\"type\": \"string\", \"logicalType\": \"char\", \"maxLength\": 25}" + + " } " + + " ] " + + "}"; + + Schema beamSchema = + Schema.builder() + .addField( + Field.of( + "my_varchar_field", FieldType.logicalType(JdbcType.StringType.varchar(10)))) + .addField( + Field.of( + "my_longvarchar_field", + FieldType.logicalType(JdbcType.StringType.longvarchar(50)))) + .addField( + Field.of( + "my_nvarchar_field", FieldType.logicalType(JdbcType.StringType.nvarchar(10)))) + .addField( + Field.of( + "my_longnvarchar_field", + FieldType.logicalType(JdbcType.StringType.longnvarchar(50)))) + .addField( + Field.of( + "fixed_length_char_field", + FieldType.logicalType(JdbcType.StringType.fixedLengthChar(25)))) + .build(); + + assertEquals( + new org.apache.avro.Schema.Parser().parse(expectedAvroSchemaJson), + AvroUtils.toAvroSchema(beamSchema)); + } + + @Test + public void testJdbcLogicalVarCharRowDataToGenericRecord() { + Schema beamSchema = + Schema.builder() + .addField( + Field.of( + "my_varchar_field", FieldType.logicalType(JdbcType.StringType.varchar(10)))) + .addField( + Field.of( + "my_longvarchar_field", + FieldType.logicalType(JdbcType.StringType.longvarchar(50)))) + .addField( + Field.of( + "my_nvarchar_field", FieldType.logicalType(JdbcType.StringType.nvarchar(10)))) + .addField( + Field.of( + "my_longnvarchar_field", + FieldType.logicalType(JdbcType.StringType.longnvarchar(50)))) + .build(); + + Row rowData = + Row.withSchema(beamSchema) + .addValue("varchar_value") + .addValue("longvarchar_value") + .addValue("nvarchar_value") + .addValue("longnvarchar_value") + .build(); + + org.apache.avro.Schema avroSchema = AvroUtils.toAvroSchema(beamSchema); + GenericRecord expectedRecord = + new GenericRecordBuilder(avroSchema) + .set("my_varchar_field", "varchar_value") + .set("my_longvarchar_field", "longvarchar_value") + .set("my_nvarchar_field", "nvarchar_value") + .set("my_longnvarchar_field", "longnvarchar_value") + .build(); + + assertEquals(expectedRecord, AvroUtils.toGenericRecord(rowData, avroSchema)); + } + + @Test + public void testJdbcLogicalDateAndTimeRowDataToAvroSchema() { + String expectedAvroSchemaJson = + "{ " + + " \"name\": \"topLevelRecord\", " + + " \"type\": \"record\", " + + " \"fields\": [{ " + + " \"name\": \"my_date_field\", " + + " \"type\": { \"type\": \"int\", \"logicalType\": \"date\" }" + + " }, " + + " { " + + " \"name\": \"my_time_field\", " + + " \"type\": { \"type\": \"int\", \"logicalType\": \"time-millis\" }" + + " }" + + " ] " + + "}"; + + Schema beamSchema = + Schema.builder() + .addField(Field.of("my_date_field", FieldType.logicalType(JdbcType.DATE))) + .addField(Field.of("my_time_field", FieldType.logicalType(JdbcType.TIME))) + .build(); + + assertEquals( + new org.apache.avro.Schema.Parser().parse(expectedAvroSchemaJson), + AvroUtils.toAvroSchema(beamSchema)); + } + + @Test + public void testJdbcLogicalDateAndTimeRowDataToGenericRecord() { + // Test Fixed clock at + DateTime testDateTime = DateTime.parse("2021-05-29T11:15:16.234Z"); + + Schema beamSchema = + Schema.builder() + .addField(Field.of("my_date_field", FieldType.logicalType(JdbcType.DATE))) + .addField(Field.of("my_time_field", FieldType.logicalType(JdbcType.TIME))) + .build(); + + Row rowData = + Row.withSchema(beamSchema) + .addValue(testDateTime.toLocalDate().toDateTime(LocalTime.MIDNIGHT).toInstant()) + .addValue(Instant.ofEpochMilli(testDateTime.toLocalTime().millisOfDay().get())) + .build(); + + int daysFromEpoch = + Days.daysBetween( + Instant.EPOCH, + testDateTime.toLocalDate().toDateTime(LocalTime.MIDNIGHT).toInstant()) + .getDays(); + int timeSinceMidNight = testDateTime.toLocalTime().getMillisOfDay(); + + org.apache.avro.Schema avroSchema = AvroUtils.toAvroSchema(beamSchema); + GenericRecord expectedRecord = + new GenericRecordBuilder(avroSchema) + .set("my_date_field", daysFromEpoch) + .set("my_time_field", timeSinceMidNight) + .build(); + + assertEquals(expectedRecord, AvroUtils.toGenericRecord(rowData, avroSchema)); + } + + @Test + public void testSqlTypesToGenericRecord() { + // SqlTypes to LogicalTypes.date conversion is one direction + java.time.LocalDate localDate = java.time.LocalDate.of(1979, 3, 14); + + Schema beamSchema = + Schema.builder() + .addField(Field.of("local_date", FieldType.logicalType(SqlTypes.DATE))) + .build(); + + Row rowData = Row.withSchema(beamSchema).addValue(localDate).build(); + + org.apache.avro.Schema avroSchema = AvroUtils.toAvroSchema(beamSchema); + GenericRecord expectedRecord = + new GenericRecordBuilder(avroSchema).set("local_date", localDate.toEpochDay()).build(); + + assertEquals(expectedRecord, AvroUtils.toGenericRecord(rowData, avroSchema)); + } + + @Test + public void testBeamRowToGenericRecord() { + GenericRecord genericRecord = AvroUtils.toGenericRecord(getBeamRow(), null); + assertEquals(getAvroSchema(), genericRecord.getSchema()); + assertEquals(getGenericRecord(), genericRecord); + } + + @Test + public void testBeamRowToGenericRecordInferSchema() { + GenericRecord genericRecord = AvroUtils.toGenericRecord(getBeamRow()); + assertEquals(getAvroSchema(), genericRecord.getSchema()); + assertEquals(getGenericRecord(), genericRecord); + } + + @Test + public void testRowToGenericRecordFunction() { + SerializableUtils.ensureSerializable(AvroUtils.getRowToGenericRecordFunction(NULL_SCHEMA)); + SerializableUtils.ensureSerializable(AvroUtils.getRowToGenericRecordFunction(null)); + } + + @Test + public void testGenericRecordToBeamRow() { + GenericRecord genericRecord = getGenericRecord(); + Row row = AvroUtils.toBeamRowStrict(getGenericRecord(), null); + assertEquals(getBeamRow(), row); + + // Alternatively, a timestamp-millis logical type can have a joda datum. + genericRecord.put("timestampMillis", new DateTime(genericRecord.get("timestampMillis"))); + row = AvroUtils.toBeamRowStrict(getGenericRecord(), null); + assertEquals(getBeamRow(), row); + } + + @Test + public void testGenericRecordToRowFunction() { + SerializableUtils.ensureSerializable(AvroUtils.getGenericRecordToRowFunction(Schema.of())); + SerializableUtils.ensureSerializable(AvroUtils.getGenericRecordToRowFunction(null)); + } + + @Test + public void testAvroSchemaCoders() { + Pipeline pipeline = Pipeline.create(); + org.apache.avro.Schema schema = + org.apache.avro.Schema.createRecord( + "TestSubRecord", + "TestSubRecord doc", + "org.apache.beam.sdk.extensions.avro.schemas.utils", + false, + getAvroSubSchemaFields()); + GenericRecord record = + new GenericRecordBuilder(getAvroSubSchema("simple")) + .set("bool", true) + .set("int", 42) + .build(); + + PCollection records = + pipeline.apply(Create.of(record).withCoder(AvroCoder.of(schema))); + assertFalse(records.hasSchema()); + records.setCoder(AvroUtils.schemaCoder(schema)); + assertTrue(records.hasSchema()); + CoderProperties.coderSerializable(records.getCoder()); + + AvroGeneratedUser user = new AvroGeneratedUser("foo", 42, "green"); + PCollection users = + pipeline.apply(Create.of(user).withCoder(AvroCoder.of(AvroGeneratedUser.class))); + assertFalse(users.hasSchema()); + users.setCoder(AvroUtils.schemaCoder((AvroCoder) users.getCoder())); + assertTrue(users.hasSchema()); + CoderProperties.coderSerializable(users.getCoder()); + } + + @Test + public void testAvroBytesToRowAndRowToAvroBytesFunctions() { + Schema schema = + Schema.builder() + .addInt32Field("f_int") + .addInt64Field("f_long") + .addDoubleField("f_double") + .addStringField("f_string") + .build(); + + SimpleFunction toBytesFn = AvroUtils.getRowToAvroBytesFunction(schema); + SimpleFunction toRowFn = AvroUtils.getAvroBytesToRowFunction(schema); + + Row row = Row.withSchema(schema).attachValues(1, 1L, 1d, "string"); + + byte[] serializedRow = toBytesFn.apply(row); + Row deserializedRow = toRowFn.apply(serializedRow); + + assertEquals(row, deserializedRow); + } + + @Test + public void testNullSchemas() { + assertEquals( + AvroUtils.getFromRowFunction(GenericRecord.class), + AvroUtils.getFromRowFunction(GenericRecord.class)); + } + + /** Helper class that simulate JDBC Logical types. */ + private static class JdbcType implements Schema.LogicalType { + + private static final JdbcType DATE = + new JdbcType<>(JDBCType.DATE, FieldType.STRING, FieldType.DATETIME, ""); + private static final JdbcType TIME = + new JdbcType<>(JDBCType.TIME, FieldType.STRING, FieldType.DATETIME, ""); + + private final String identifier; + private final FieldType argumentType; + private final FieldType baseType; + private final Object argument; + + private static class StringType extends JdbcType { + + private static StringType fixedLengthChar(int size) { + return new StringType(JDBCType.CHAR, size); + } + + private static StringType varchar(int size) { + return new StringType(JDBCType.VARCHAR, size); + } + + private static StringType longvarchar(int size) { + return new StringType(JDBCType.LONGVARCHAR, size); + } + + private static StringType nvarchar(int size) { + return new StringType(JDBCType.NVARCHAR, size); + } + + private static StringType longnvarchar(int size) { + return new StringType(JDBCType.LONGNVARCHAR, size); + } + + private StringType(JDBCType type, int size) { + super(type, FieldType.INT32, FieldType.STRING, size); + } + } + + private JdbcType( + JDBCType jdbcType, FieldType argumentType, FieldType baseType, Object argument) { + this.identifier = jdbcType.getName(); + this.argumentType = argumentType; + this.baseType = baseType; + this.argument = argument; + } + + @Override + public String getIdentifier() { + return identifier; + } + + @Override + public @Nullable FieldType getArgumentType() { + return argumentType; + } + + @Override + public FieldType getBaseType() { + return baseType; + } + + @Override + @SuppressWarnings("TypeParameterUnusedInFormals") + public @Nullable T1 getArgument() { + return (T1) argument; + } + + @Override + public @NonNull T toBaseType(@NonNull T input) { + return input; + } + + @Override + public @NonNull T toInputType(@NonNull T base) { + return base; + } + } +} diff --git a/sdks/java/extensions/sql/perf-tests/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/bigquery/BigQueryIOPushDownIT.java b/sdks/java/extensions/sql/perf-tests/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/bigquery/BigQueryIOPushDownIT.java index 34842fa8c8db7..4de32cd1fdeed 100644 --- a/sdks/java/extensions/sql/perf-tests/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/bigquery/BigQueryIOPushDownIT.java +++ b/sdks/java/extensions/sql/perf-tests/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/bigquery/BigQueryIOPushDownIT.java @@ -27,7 +27,6 @@ import java.util.Set; import java.util.UUID; import java.util.function.Function; -import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.PipelineResult; import org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv; import org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode; @@ -39,6 +38,7 @@ import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.TypedRead.Method; import org.apache.beam.sdk.options.Description; import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.apache.beam.sdk.testing.TestPipeline; import org.apache.beam.sdk.testutils.NamedTestResult; import org.apache.beam.sdk.testutils.metrics.IOITMetrics; import org.apache.beam.sdk.testutils.metrics.MetricsReader; @@ -51,6 +51,7 @@ import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList; import org.junit.Before; import org.junit.BeforeClass; +import org.junit.Rule; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; @@ -94,7 +95,7 @@ public class BigQueryIOPushDownIT { @SuppressWarnings("initialization.static.fields.uninitialized") private static InfluxDBSettings settings; - private Pipeline pipeline = Pipeline.create(options); + @Rule public TestPipeline pipeline = TestPipeline.create(); @SuppressWarnings("initialization.fields.uninitialized") private BeamSqlEnv sqlEnv; @@ -117,7 +118,7 @@ public void before() { @Test public void readUsingDirectReadMethodPushDown() { - sqlEnv.executeDdl(String.format(CREATE_TABLE_STATEMENT, Method.DIRECT_READ.toString())); + sqlEnv.executeDdl(String.format(CREATE_TABLE_STATEMENT, Method.DIRECT_READ)); BeamRelNode beamRelNode = sqlEnv.parseQuery(SELECT_STATEMENT); BeamSqlRelUtils.toPCollection(pipeline, beamRelNode) @@ -147,7 +148,7 @@ public void readUsingDirectReadMethod() { .setPipelineOptions(PipelineOptionsFactory.create()) .setRuleSets(ImmutableList.of(RuleSets.ofList(ruleList))) .build(); - sqlEnv.executeDdl(String.format(CREATE_TABLE_STATEMENT, Method.DIRECT_READ.toString())); + sqlEnv.executeDdl(String.format(CREATE_TABLE_STATEMENT, Method.DIRECT_READ)); BeamRelNode beamRelNode = sqlEnv.parseQuery(SELECT_STATEMENT); BeamSqlRelUtils.toPCollection(pipeline, beamRelNode) @@ -162,7 +163,7 @@ public void readUsingDirectReadMethod() { @Test public void readUsingDefaultMethod() { - sqlEnv.executeDdl(String.format(CREATE_TABLE_STATEMENT, Method.DEFAULT.toString())); + sqlEnv.executeDdl(String.format(CREATE_TABLE_STATEMENT, Method.DEFAULT)); BeamRelNode beamRelNode = sqlEnv.parseQuery(SELECT_STATEMENT); BeamSqlRelUtils.toPCollection(pipeline, beamRelNode) diff --git a/sdks/java/extensions/sql/perf-tests/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/bigquery/BigQueryPerfTable.java b/sdks/java/extensions/sql/perf-tests/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/bigquery/BigQueryPerfTable.java index 98a1330af2ec2..4807ff4b9e8be 100644 --- a/sdks/java/extensions/sql/perf-tests/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/bigquery/BigQueryPerfTable.java +++ b/sdks/java/extensions/sql/perf-tests/src/test/java/org/apache/beam/sdk/extensions/sql/meta/provider/bigquery/BigQueryPerfTable.java @@ -41,14 +41,17 @@ public class BigQueryPerfTable extends BigQueryTable { @Override public PCollection buildIOReader(PBegin begin) { - return super.buildIOReader(begin).apply(ParDo.of(new RowMonitor(namespace, metric))); + return super.buildIOReader(begin) + .apply(ParDo.of(new RowMonitor(namespace, metric))) + .setRowSchema(this.schema); } @Override public PCollection buildIOReader( PBegin begin, BeamSqlTableFilter filters, List fieldNames) { return super.buildIOReader(begin, filters, fieldNames) - .apply(ParDo.of(new RowMonitor(namespace, metric))); + .apply(ParDo.of(new RowMonitor(namespace, metric))) + .setRowSchema(this.schema); } /** Monitor that records the number of Fields in each Row read from an IO. */ diff --git a/sdks/java/io/bigquery-io-perf-tests/src/test/java/org/apache/beam/sdk/bigqueryioperftests/BigQueryIOIT.java b/sdks/java/io/bigquery-io-perf-tests/src/test/java/org/apache/beam/sdk/bigqueryioperftests/BigQueryIOIT.java index 0dfc7addc6f72..ce4dc10aa87d1 100644 --- a/sdks/java/io/bigquery-io-perf-tests/src/test/java/org/apache/beam/sdk/bigqueryioperftests/BigQueryIOIT.java +++ b/sdks/java/io/bigquery-io-perf-tests/src/test/java/org/apache/beam/sdk/bigqueryioperftests/BigQueryIOIT.java @@ -204,7 +204,7 @@ private void testWrite(BigQueryIO.Write writeIO, String metricName) { Duration.standardSeconds(options.getPipelineTimeout())); extractAndPublishTime(pipelineResult, metricName); // Fail the test if pipeline failed. - assertNotEquals(pipelineState, PipelineResult.State.FAILED); + assertNotEquals(PipelineResult.State.FAILED, pipelineState); // set back streaming options.as(StreamingOptions.class).setStreaming(false); @@ -223,7 +223,7 @@ private void testRead() { sourceOptions.numRecords, readElementMetric(result, NAMESPACE, READ_ELEMENT_METRIC_NAME)); extractAndPublishTime(result, READ_TIME_METRIC_NAME); // Fail the test if pipeline failed. - assertNotEquals(pipelineState, PipelineResult.State.FAILED); + assertNotEquals(PipelineResult.State.FAILED, pipelineState); } private void extractAndPublishTime(PipelineResult pipelineResult, String writeTimeMetricName) { diff --git a/sdks/java/io/cassandra/src/main/java/org/apache/beam/sdk/io/cassandra/ReadFn.java b/sdks/java/io/cassandra/src/main/java/org/apache/beam/sdk/io/cassandra/ReadFn.java index 6bca1cf3d1773..3bb5360291879 100644 --- a/sdks/java/io/cassandra/src/main/java/org/apache/beam/sdk/io/cassandra/ReadFn.java +++ b/sdks/java/io/cassandra/src/main/java/org/apache/beam/sdk/io/cassandra/ReadFn.java @@ -141,6 +141,9 @@ private static String buildInitialQuery(Read spec, Boolean hasRingRange) { return (spec.query() == null) ? String.format("SELECT * FROM %s.%s", spec.keyspace().get(), spec.table().get()) + " WHERE " - : spec.query().get() + (hasRingRange ? " AND " : ""); + : spec.query().get() + + (hasRingRange + ? spec.query().get().toUpperCase().contains("WHERE") ? " AND " : " WHERE " + : ""); } } diff --git a/sdks/java/io/cassandra/src/test/java/org/apache/beam/sdk/io/cassandra/CassandraIOTest.java b/sdks/java/io/cassandra/src/test/java/org/apache/beam/sdk/io/cassandra/CassandraIOTest.java index 7196556abc7cf..a472b9ee1c3a2 100644 --- a/sdks/java/io/cassandra/src/test/java/org/apache/beam/sdk/io/cassandra/CassandraIOTest.java +++ b/sdks/java/io/cassandra/src/test/java/org/apache/beam/sdk/io/cassandra/CassandraIOTest.java @@ -458,6 +458,11 @@ public KV apply(Scientist scientist) { @Test public void testReadWithQuery() throws Exception { + String query = + String.format( + "select person_id, writetime(person_name) from %s.%s where person_id=10 AND person_department='logic'", + CASSANDRA_KEYSPACE, CASSANDRA_TABLE); + PCollection output = pipeline.apply( CassandraIO.read() @@ -466,8 +471,7 @@ public void testReadWithQuery() throws Exception { .withKeyspace(CASSANDRA_KEYSPACE) .withTable(CASSANDRA_TABLE) .withMinNumberOfSplits(20) - .withQuery( - "select person_id, writetime(person_name) from beam_ks.scientist where person_id=10 AND person_department='logic'") + .withQuery(query) .withCoder(SerializableCoder.of(Scientist.class)) .withEntity(Scientist.class)); @@ -485,6 +489,39 @@ public void testReadWithQuery() throws Exception { pipeline.run(); } + @Test + public void testReadWithUnfilteredQuery() throws Exception { + String query = + String.format( + "select person_id, writetime(person_name) from %s.%s", + CASSANDRA_KEYSPACE, CASSANDRA_TABLE); + + PCollection output = + pipeline.apply( + CassandraIO.read() + .withHosts(Collections.singletonList(CASSANDRA_HOST)) + .withPort(cassandraPort) + .withKeyspace(CASSANDRA_KEYSPACE) + .withTable(CASSANDRA_TABLE) + .withMinNumberOfSplits(20) + .withQuery(query) + .withCoder(SerializableCoder.of(Scientist.class)) + .withEntity(Scientist.class)); + + PAssert.thatSingleton(output.apply("Count", Count.globally())).isEqualTo(NUM_ROWS); + PAssert.that(output) + .satisfies( + input -> { + for (Scientist sci : input) { + assertNull(sci.name); + assertTrue(sci.nameTs != null && sci.nameTs > 0); + } + return null; + }); + + pipeline.run(); + } + @Test public void testWrite() { ArrayList data = new ArrayList<>(); diff --git a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-5/build.gradle b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-5/build.gradle index f6da77aa8e806..affc9db08828e 100644 --- a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-5/build.gradle +++ b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-5/build.gradle @@ -27,18 +27,8 @@ enableJavaPerformanceTesting() description = "Apache Beam :: SDKs :: Java :: IO :: Elasticsearch-Tests :: 5.x" ext.summary = "Tests of ElasticsearchIO on Elasticsearch 5.x" -def log4j_version = "2.17.1" def elastic_search_version = "5.6.3" -configurations.all { - resolutionStrategy { - // Make sure the log4j versions for api and core match instead of taking the default - // Gradle rule of using the latest. - force "org.apache.logging.log4j:log4j-core:$log4j_version" - force "org.apache.logging.log4j:log4j-api:$log4j_version" - } -} - dependencies { testImplementation project(path: ":sdks:java:io:elasticsearch-tests:elasticsearch-tests-common", configuration: "testRuntimeMigration") testImplementation library.java.testcontainers_elasticsearch @@ -50,8 +40,8 @@ dependencies { testImplementation library.java.hamcrest testImplementation library.java.junit testImplementation "org.elasticsearch.client:elasticsearch-rest-client:$elastic_search_version" - testRuntimeOnly "org.apache.logging.log4j:log4j-api:$log4j_version" - testRuntimeOnly "org.apache.logging.log4j:log4j-core:$log4j_version" + testRuntimeOnly library.java.log4j2_api + testRuntimeOnly library.java.log4j2_core testRuntimeOnly library.java.slf4j_jdk14 testRuntimeOnly project(path: ":runners:direct-java", configuration: "shadow") } diff --git a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-6/build.gradle b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-6/build.gradle index 69c48ad4ff4bf..4b8f457ad5006 100644 --- a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-6/build.gradle +++ b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-6/build.gradle @@ -27,18 +27,8 @@ enableJavaPerformanceTesting() description = "Apache Beam :: SDKs :: Java :: IO :: Elasticsearch-Tests :: 6.x" ext.summary = "Tests of ElasticsearchIO on Elasticsearch 6.x" -def log4j_version = "2.17.1" def elastic_search_version = "6.4.0" -configurations.all { - resolutionStrategy { - // Make sure the log4j versions for api and core match instead of taking the default - // Gradle rule of using the latest. - force "org.apache.logging.log4j:log4j-core:$log4j_version" - force "org.apache.logging.log4j:log4j-api:$log4j_version" - } -} - dependencies { testImplementation project(path: ":sdks:java:io:elasticsearch-tests:elasticsearch-tests-common", configuration: "testRuntimeMigration") testImplementation library.java.testcontainers_elasticsearch @@ -49,8 +39,8 @@ dependencies { testImplementation library.java.hamcrest testImplementation library.java.junit testImplementation "org.elasticsearch.client:elasticsearch-rest-client:$elastic_search_version" - testRuntimeOnly "org.apache.logging.log4j:log4j-api:$log4j_version" - testRuntimeOnly "org.apache.logging.log4j:log4j-core:$log4j_version" + testRuntimeOnly library.java.log4j2_api + testRuntimeOnly library.java.log4j2_core testRuntimeOnly library.java.slf4j_jdk14 testRuntimeOnly project(path: ":runners:direct-java", configuration: "shadow") } \ No newline at end of file diff --git a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-7/build.gradle b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-7/build.gradle index 90239086f1551..325bc04428177 100644 --- a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-7/build.gradle +++ b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-7/build.gradle @@ -27,18 +27,8 @@ enableJavaPerformanceTesting() description = "Apache Beam :: SDKs :: Java :: IO :: Elasticsearch-Tests :: 7.x" ext.summary = "Tests of ElasticsearchIO on Elasticsearch 7.x" -def log4j_version = "2.17.1" def elastic_search_version = "7.13.4" -configurations.all { - resolutionStrategy { - // Make sure the log4j versions for api and core match instead of taking the default - // Gradle rule of using the latest. - force "org.apache.logging.log4j:log4j-core:$log4j_version" - force "org.apache.logging.log4j:log4j-api:$log4j_version" - } -} - dependencies { testImplementation project(path: ":sdks:java:io:elasticsearch-tests:elasticsearch-tests-common", configuration: "testRuntimeMigration") testImplementation library.java.testcontainers_elasticsearch @@ -50,8 +40,8 @@ dependencies { testImplementation library.java.hamcrest testImplementation library.java.junit testImplementation "org.elasticsearch.client:elasticsearch-rest-client:$elastic_search_version" - testRuntimeOnly "org.apache.logging.log4j:log4j-api:$log4j_version" - testRuntimeOnly "org.apache.logging.log4j:log4j-core:$log4j_version" + testRuntimeOnly library.java.log4j2_api + testRuntimeOnly library.java.log4j2_core testRuntimeOnly library.java.slf4j_jdk14 testRuntimeOnly project(path: ":runners:direct-java", configuration: "shadow") } diff --git a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-8/build.gradle b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-8/build.gradle index c46be10385066..b90bc0b2ef4fc 100644 --- a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-8/build.gradle +++ b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-8/build.gradle @@ -27,18 +27,8 @@ enableJavaPerformanceTesting() description = "Apache Beam :: SDKs :: Java :: IO :: Elasticsearch-Tests :: 8.x" ext.summary = "Tests of ElasticsearchIO on Elasticsearch 8.x" -def log4j_version = "2.17.1" def elastic_search_version = "8.0.0" -configurations.all { - resolutionStrategy { - // Make sure the log4j versions for api and core match instead of taking the default - // Gradle rule of using the latest. - force "org.apache.logging.log4j:log4j-core:$log4j_version" - force "org.apache.logging.log4j:log4j-api:$log4j_version" - } -} - test { maxParallelForks = 1 } @@ -54,8 +44,8 @@ dependencies { testImplementation library.java.hamcrest testImplementation library.java.junit testImplementation "org.elasticsearch.client:elasticsearch-rest-client:$elastic_search_version" - testRuntimeOnly "org.apache.logging.log4j:log4j-api:$log4j_version" - testRuntimeOnly "org.apache.logging.log4j:log4j-core:$log4j_version" + testRuntimeOnly library.java.log4j2_api + testRuntimeOnly library.java.log4j2_core testRuntimeOnly library.java.slf4j_jdk14 testRuntimeOnly project(path: ":runners:direct-java", configuration: "shadow") } \ No newline at end of file diff --git a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-common/build.gradle b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-common/build.gradle index ba899173811a2..e5be6ca079b22 100644 --- a/sdks/java/io/elasticsearch-tests/elasticsearch-tests-common/build.gradle +++ b/sdks/java/io/elasticsearch-tests/elasticsearch-tests-common/build.gradle @@ -25,18 +25,8 @@ applyJavaNature( description = "Apache Beam :: SDKs :: Java :: IO :: Elasticsearch-Tests :: Common" ext.summary = "Common test classes for ElasticsearchIO" -def log4j_version = "2.17.1" def elastic_search_version = "7.9.2" -configurations.all { - resolutionStrategy { - // Make sure the log4j versions for api and core match instead of taking the default - // Gradle rule of using the latest. - force "org.apache.logging.log4j:log4j-core:$log4j_version" - force "org.apache.logging.log4j:log4j-api:$log4j_version" - } -} - dependencies { testImplementation library.java.jackson_databind testImplementation project(path: ":sdks:java:core", configuration: "shadow") @@ -49,8 +39,8 @@ dependencies { testImplementation "org.elasticsearch.client:elasticsearch-rest-high-level-client:${elastic_search_version}" testImplementation library.java.testcontainers_elasticsearch - testRuntimeOnly "org.apache.logging.log4j:log4j-api:$log4j_version" - testRuntimeOnly "org.apache.logging.log4j:log4j-core:$log4j_version" + testRuntimeOnly library.java.log4j2_api + testRuntimeOnly library.java.log4j2_core testRuntimeOnly library.java.slf4j_jdk14 testRuntimeOnly project(path: ":runners:direct-java", configuration: "shadow") } diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoads.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoads.java index 7f677dab3834d..ab0cc1e7613a2 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoads.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoads.java @@ -733,9 +733,9 @@ private PCollection> writeTempTables( // with one that makes this happen. // In the case schemaUpdateOptions are specified by the user, matching does not occur in order // to respect those options. - DynamicDestinations destinations = dynamicDestinations; + DynamicDestinations destinationsWithMatching = dynamicDestinations; if (schemaUpdateOptions.isEmpty()) { - destinations = + destinationsWithMatching = DynamicDestinationsHelpers.matchTableDynamicDestinations( dynamicDestinations, bigQueryServices); } @@ -758,7 +758,7 @@ private PCollection> writeTempTables( WriteDisposition.WRITE_EMPTY, CreateDisposition.CREATE_IF_NEEDED, sideInputs, - destinations, + destinationsWithMatching, loadJobProjectId, maxRetryJobs, ignoreUnknownValues, diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BeamRowToStorageApiProto.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BeamRowToStorageApiProto.java index 4ef88e01c760b..6028d8b9016e3 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BeamRowToStorageApiProto.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BeamRowToStorageApiProto.java @@ -24,9 +24,11 @@ import com.google.protobuf.Descriptors.FieldDescriptor; import com.google.protobuf.DynamicMessage; import java.math.BigDecimal; +import java.time.Instant; import java.time.LocalDate; import java.time.LocalDateTime; import java.time.LocalTime; +import java.time.temporal.ChronoUnit; import java.util.List; import java.util.Map; import java.util.function.BiFunction; @@ -119,7 +121,7 @@ public class BeamRowToStorageApiProto { CivilTimeEncoder.encodePacked64DatetimeSeconds((LocalDateTime) value)) .put( SqlTypes.TIMESTAMP.getIdentifier(), - (logicalType, value) -> ((java.time.Instant) value).toEpochMilli() * 1000) + (logicalType, value) -> (ChronoUnit.MICROS.between(Instant.EPOCH, (Instant) value))) .put( EnumerationType.IDENTIFIER, (logicalType, value) -> diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtils.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtils.java index fbea947d05609..773b3af9673cf 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtils.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtils.java @@ -263,6 +263,7 @@ public abstract static class Builder { .put(SqlTypes.DATE.getIdentifier(), StandardSQLTypeName.DATE) .put(SqlTypes.TIME.getIdentifier(), StandardSQLTypeName.TIME) .put(SqlTypes.DATETIME.getIdentifier(), StandardSQLTypeName.DATETIME) + .put(SqlTypes.TIMESTAMP.getIdentifier(), StandardSQLTypeName.TIMESTAMP) .put("SqlTimeWithLocalTzType", StandardSQLTypeName.TIME) .put("Enum", StandardSQLTypeName.STRING) .build(); diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StorageApiWritesShardedRecords.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StorageApiWritesShardedRecords.java index df7a23b68b0e6..191ad864407d9 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StorageApiWritesShardedRecords.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StorageApiWritesShardedRecords.java @@ -318,7 +318,7 @@ String getOrCreateStream( DatasetService datasetService) { try { String stream = streamName.read(); - if (Strings.isNullOrEmpty(stream)) { + if (stream == null || "".equals(stream)) { // In a buffered stream, data is only visible up to the offset to which it was flushed. stream = datasetService.createWriteStream(tableId, Type.BUFFERED).getName(); streamName.write(stream); @@ -426,14 +426,17 @@ public void process( appendClientInfo.get().createAppendClient(datasetService, getOrCreateStream, false); StreamAppendClient streamAppendClient = Preconditions.checkArgumentNotNull(appendClientInfo.get().streamAppendClient); + String streamNameRead = Preconditions.checkArgumentNotNull(streamName.read()); + long currentOffset = Preconditions.checkArgumentNotNull(streamOffset.read()); for (AppendRowsContext context : contexts) { - context.streamName = streamName.read(); + context.streamName = streamNameRead; streamAppendClient.pin(); context.client = appendClientInfo.get().streamAppendClient; - context.offset = streamOffset.read(); + context.offset = currentOffset; ++context.tryIteration; - streamOffset.write(context.offset + context.protoRows.getSerializedRowsCount()); + currentOffset = context.offset + context.protoRows.getSerializedRowsCount(); } + streamOffset.write(currentOffset); } catch (Exception e) { throw new RuntimeException(e); } diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/UpdateSchemaDestination.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/UpdateSchemaDestination.java index 5ee604cba55ab..90bd99c7d6170 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/UpdateSchemaDestination.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/UpdateSchemaDestination.java @@ -256,8 +256,13 @@ private BigQueryHelpers.PendingJob startZeroLoadJob( LOG.warn("Failed to get table {} with {}", tableReference, e.toString()); throw new RuntimeException(e); } - if (destinationTable.getSchema() == null || destinationTable.getSchema().equals(schema)) { - return null; // no need to update schema ahead if schema is already the same + // no need to update schema ahead if provided schema already matches destination schema + // or when destination schema is null (the write will set the schema) + // or when provided schema is null (e.g. when using CREATE_NEVER disposition) + if (destinationTable.getSchema() == null + || destinationTable.getSchema().equals(schema) + || schema == null) { + return null; } if (timePartitioning != null) { loadConfig.setTimePartitioning(timePartitioning); diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubClient.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubClient.java index b30bea862998f..e864e649e699d 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubClient.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubClient.java @@ -506,9 +506,9 @@ public abstract List listSubscriptions(ProjectPath project, To */ public abstract boolean isEOF(); - /** Create {@link com.google.api.services.pubsub.model.Schema} from resource path. */ + /** Create {@link com.google.api.services.pubsub.model.Schema} from Schema definition content. */ public abstract void createSchema( - SchemaPath schemaPath, String resourcePath, com.google.pubsub.v1.Schema.Type type) + SchemaPath schemaPath, String schemaContent, com.google.pubsub.v1.Schema.Type type) throws IOException; /** Delete {@link SchemaPath}. */ diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubGrpcClient.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubGrpcClient.java index 3f3ecbcfdbdc3..60c096f72f816 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubGrpcClient.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubGrpcClient.java @@ -59,14 +59,9 @@ import io.grpc.netty.NegotiationType; import io.grpc.netty.NettyChannelBuilder; import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.nio.file.FileSystems; -import java.nio.file.Files; -import java.nio.file.Path; import java.util.ArrayList; import java.util.List; import java.util.Map; -import java.util.Objects; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import org.apache.beam.sdk.schemas.Schema; @@ -436,19 +431,12 @@ public boolean isEOF() { return false; } - /** Create {@link com.google.pubsub.v1.Schema} from resource path. */ + /** Create {@link com.google.pubsub.v1.Schema} from Schema definition content. */ @Override public void createSchema( - SchemaPath schemaPath, String resourcePath, com.google.pubsub.v1.Schema.Type type) + SchemaPath schemaPath, String schemaContent, com.google.pubsub.v1.Schema.Type type) throws IOException { - Path path = - FileSystems.getDefault() - .getPath( - Objects.requireNonNull(PubsubGrpcClient.class.getResource(resourcePath)).getPath()); - byte[] b = Files.readAllBytes(path); - String definition = new String(b, StandardCharsets.UTF_8); - CreateSchemaRequest request = CreateSchemaRequest.newBuilder() .setSchemaId(schemaPath.getId()) @@ -456,7 +444,7 @@ public void createSchema( .setSchema( com.google.pubsub.v1.Schema.newBuilder() .setType(type) - .setDefinition(definition) + .setDefinition(schemaContent) .build()) .build(); diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubJsonClient.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubJsonClient.java index 613ca4581a5e0..ab6b6533343e1 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubJsonClient.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubJsonClient.java @@ -365,10 +365,10 @@ public boolean isEOF() { return false; } - /** Create {@link com.google.api.services.pubsub.model.Schema} from resource path. */ + /** Create {@link com.google.api.services.pubsub.model.Schema} from Schema definition content. */ @Override public void createSchema( - SchemaPath schemaPath, String resourcePath, com.google.pubsub.v1.Schema.Type type) + SchemaPath schemaPath, String schemaContent, com.google.pubsub.v1.Schema.Type type) throws IOException { throw new UnsupportedOperationException(); } diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubTestClient.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubTestClient.java index 4ad8f9fab3978..43dc244f5c252 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubTestClient.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubTestClient.java @@ -626,7 +626,7 @@ public boolean isEOF() { @Override public void createSchema( - SchemaPath schemaPath, String resourcePath, com.google.pubsub.v1.Schema.Type type) + SchemaPath schemaPath, String schemaContent, com.google.pubsub.v1.Schema.Type type) throws IOException { throw new UnsupportedOperationException(); } diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BeamRowToStorageApiProtoTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BeamRowToStorageApiProtoTest.java index fefe4d29a2ea1..ca82dc9dae6ba 100644 --- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BeamRowToStorageApiProtoTest.java +++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BeamRowToStorageApiProtoTest.java @@ -32,6 +32,7 @@ import java.time.LocalDate; import java.time.LocalDateTime; import java.time.LocalTime; +import java.time.temporal.ChronoUnit; import java.util.Map; import java.util.stream.Collectors; import org.apache.beam.sdk.schemas.Schema; @@ -228,7 +229,7 @@ public class BeamRowToStorageApiProtoTest { .withFieldValue("sqlDateValue", LocalDate.now()) .withFieldValue("sqlTimeValue", LocalTime.now()) .withFieldValue("sqlDatetimeValue", LocalDateTime.now()) - .withFieldValue("sqlTimestampValue", java.time.Instant.now()) + .withFieldValue("sqlTimestampValue", java.time.Instant.now().plus(123, ChronoUnit.MICROS)) .withFieldValue("enumValue", TEST_ENUM.valueOf("RED")) .build(); private static final Map BASE_PROTO_EXPECTED_FIELDS = @@ -261,10 +262,9 @@ public class BeamRowToStorageApiProtoTest { BASE_ROW.getLogicalTypeValue("sqlDatetimeValue", LocalDateTime.class))) .put( "sqltimestampvalue", - BASE_ROW - .getLogicalTypeValue("sqlTimestampValue", java.time.Instant.class) - .toEpochMilli() - * 1000) + ChronoUnit.MICROS.between( + java.time.Instant.EPOCH, + BASE_ROW.getLogicalTypeValue("sqlTimestampValue", java.time.Instant.class))) .put("enumvalue", "RED") .build(); diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOWriteTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOWriteTest.java index b3105da9da6c9..b654b84d6a75e 100644 --- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOWriteTest.java +++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOWriteTest.java @@ -774,6 +774,50 @@ public void testTriggeredFileLoadsWithTempTablesDefaultProject() throws Exceptio testTriggeredFileLoadsWithTempTables("dataset-id.table-id"); } + @Test + public void testTriggeredFileLoadsWithTempTablesCreateNever() throws Exception { + assumeTrue(!useStorageApi); + assumeTrue(!useStreaming); + + // Create table and give it a schema + TableSchema schema = + new TableSchema() + .setFields( + ImmutableList.of( + new TableFieldSchema().setName("str").setType("STRING"), + new TableFieldSchema().setName("num").setType("INTEGER"))); + Table fakeTable = new Table(); + TableReference ref = + new TableReference() + .setProjectId("project-id") + .setDatasetId("dataset-id") + .setTableId("table-id"); + fakeTable.setSchema(schema); + fakeTable.setTableReference(ref); + fakeDatasetService.createTable(fakeTable); + + List elements = Lists.newArrayList(); + for (int i = 1; i < 10; i++) { + elements.add(new TableRow().set("str", "a").set("num", i)); + } + + // Write to table with CREATE_NEVER and with no schema + p.apply(Create.of(elements)) + .apply( + BigQueryIO.writeTableRows() + .to("project-id:dataset-id.table-id") + .withCreateDisposition(CreateDisposition.CREATE_NEVER) + .withTestServices(fakeBqServices) + .withMaxBytesPerPartition(1) + .withMaxFilesPerPartition(1) + .withoutValidation()); + p.run(); + + assertThat( + fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"), + containsInAnyOrder(Iterables.toArray(elements, TableRow.class))); + } + @Test public void testTriggeredFileLoadsWithAutoSharding() throws Exception { assumeTrue(!useStorageApi); diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubSchemaIT.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubSchemaIT.java index 7adc0ca907d01..d9bc548b11486 100644 --- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubSchemaIT.java +++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubSchemaIT.java @@ -43,11 +43,8 @@ public class PubsubSchemaIT { private static final String HAS_PROTO_SCHEMA = "has-proto-schema"; private static final String AVRO_PRIMITIVE_TYPES_FLAT = "avro-primitive-types-flat"; - private static final String AVRO_SCHEMA_FILE = "avro_all_data_types_flat_schema.json"; private static final String PROTO_PRIMITIVE_TYPES_FLAT = "proto-primitive-types-flat"; - private static final String PROTO_SCHEMA_FILE = "all_data_types_flat_schema.proto"; - private static PubsubClient pubsubClient; private static TopicPath hasNoSchemaTopic; @@ -83,9 +80,11 @@ public static void setup() throws IOException { PubsubClient.schemaPathFromId(project, PROTO_PRIMITIVE_TYPES_FLAT + postFix); pubsubClient.createSchema( - hasAvroSchemaPath, AVRO_SCHEMA_FILE, com.google.pubsub.v1.Schema.Type.AVRO); + hasAvroSchemaPath, AVRO_ALL_DATA_TYPES_FLAT_SCHEMA, com.google.pubsub.v1.Schema.Type.AVRO); pubsubClient.createSchema( - hasProtoSchemaPath, PROTO_SCHEMA_FILE, com.google.pubsub.v1.Schema.Type.PROTOCOL_BUFFER); + hasProtoSchemaPath, + PROTO_ALL_DATA_TYPES_FLAT_SCHEMA, + com.google.pubsub.v1.Schema.Type.PROTOCOL_BUFFER); pubsubClient.createTopic(hasNoSchemaTopic); pubsubClient.createTopic(hasAvroSchemaTopic, hasAvroSchemaPath); pubsubClient.createTopic(hasProtoSchemaTopic, hasProtoSchemaPath); @@ -121,4 +120,48 @@ public void testGetSchema() throws IOException { IllegalArgumentException.class, () -> pubsubClient.getSchema(hasProtoSchemaPath)); } + + private static final String PROTO_ALL_DATA_TYPES_FLAT_SCHEMA = + "syntax = \"proto3\";\n" + + "\n" + + "message Record {\n" + + " double doubleField = 1;\n" + + " float floatField = 2;\n" + + " int32 int32Field = 3;\n" + + " int64 int64Field = 4;\n" + + " bool boolField = 5;\n" + + " string stringField = 6;\n" + + "}"; + + private static final String AVRO_ALL_DATA_TYPES_FLAT_SCHEMA = + "{\n" + + " \"type\": \"record\",\n" + + " \"name\": \"Avro\",\n" + + " \"fields\": [\n" + + " {\n" + + " \"name\": \"BooleanField\",\n" + + " \"type\": \"boolean\"\n" + + " },\n" + + " {\n" + + " \"name\": \"IntField\",\n" + + " \"type\": \"int\"\n" + + " },\n" + + " {\n" + + " \"name\": \"LongField\",\n" + + " \"type\": \"long\"\n" + + " },\n" + + " {\n" + + " \"name\": \"FloatField\",\n" + + " \"type\": \"float\"\n" + + " },\n" + + " {\n" + + " \"name\": \"DoubleField\",\n" + + " \"type\": \"double\"\n" + + " },\n" + + " {\n" + + " \"name\": \"StringField\",\n" + + " \"type\": \"string\"\n" + + " }\n" + + " ]\n" + + "}"; } diff --git a/sdks/java/io/google-cloud-platform/src/test/resources/org/apache/beam/sdk/io/gcp/pubsub/avro_all_data_types_flat_schema.json b/sdks/java/io/google-cloud-platform/src/test/resources/org/apache/beam/sdk/io/gcp/pubsub/avro_all_data_types_flat_schema.json deleted file mode 100644 index 41f3fab416721..0000000000000 --- a/sdks/java/io/google-cloud-platform/src/test/resources/org/apache/beam/sdk/io/gcp/pubsub/avro_all_data_types_flat_schema.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "type": "record", - "name": "Avro", - "fields": [ - { - "name": "BooleanField", - "type": "boolean" - }, - { - "name": "IntField", - "type": "int" - }, - { - "name": "LongField", - "type": "long" - }, - { - "name": "FloatField", - "type": "float" - }, - { - "name": "DoubleField", - "type": "double" - }, - { - "name": "StringField", - "type": "string" - } - ] -} diff --git a/sdks/java/io/hadoop-format/src/test/java/org/apache/beam/sdk/io/hadoop/format/HadoopFormatIOIT.java b/sdks/java/io/hadoop-format/src/test/java/org/apache/beam/sdk/io/hadoop/format/HadoopFormatIOIT.java index 3c3c4398b94a2..d61a14a47f839 100644 --- a/sdks/java/io/hadoop-format/src/test/java/org/apache/beam/sdk/io/hadoop/format/HadoopFormatIOIT.java +++ b/sdks/java/io/hadoop-format/src/test/java/org/apache/beam/sdk/io/hadoop/format/HadoopFormatIOIT.java @@ -230,8 +230,8 @@ public void writeAndReadUsingHadoopFormat() { collectAndPublishMetrics(writeResult, readResult); } // Fail the test if pipeline failed. - assertNotEquals(writeState, PipelineResult.State.FAILED); - assertNotEquals(readState, PipelineResult.State.FAILED); + assertNotEquals(PipelineResult.State.FAILED, writeState); + assertNotEquals(PipelineResult.State.FAILED, readState); } private void collectAndPublishMetrics(PipelineResult writeResult, PipelineResult readResult) { diff --git a/sdks/java/io/influxdb/src/test/java/org/apache/beam/sdk/io/influxdb/InfluxDbIOIT.java b/sdks/java/io/influxdb/src/test/java/org/apache/beam/sdk/io/influxdb/InfluxDbIOIT.java index 77042b882dc54..da725ab541b31 100644 --- a/sdks/java/io/influxdb/src/test/java/org/apache/beam/sdk/io/influxdb/InfluxDbIOIT.java +++ b/sdks/java/io/influxdb/src/test/java/org/apache/beam/sdk/io/influxdb/InfluxDbIOIT.java @@ -28,10 +28,15 @@ import org.apache.beam.sdk.testing.TestPipeline; import org.apache.beam.sdk.transforms.Count; import org.apache.beam.sdk.transforms.Create; +import org.apache.beam.sdk.util.BackOff; +import org.apache.beam.sdk.util.BackOffUtils; +import org.apache.beam.sdk.util.FluentBackoff; +import org.apache.beam.sdk.util.Sleeper; import org.apache.beam.sdk.values.PCollection; import org.influxdb.InfluxDB; import org.influxdb.InfluxDB.ConsistencyLevel; import org.influxdb.InfluxDBFactory; +import org.influxdb.InfluxDBIOException; import org.influxdb.dto.Query; import org.junit.After; import org.junit.Before; @@ -90,13 +95,28 @@ public void clear() { } @Before - public void initTest() { + public void initTest() throws IOException, InterruptedException { + BackOff backOff = FluentBackoff.DEFAULT.withMaxRetries(4).backoff(); + Query createQuery = new Query(String.format("CREATE DATABASE %s", options.getDatabaseName())); try (InfluxDB connection = InfluxDBFactory.connect( options.getInfluxDBURL(), options.getInfluxDBUserName(), options.getInfluxDBPassword())) { - connection.query(new Query(String.format("CREATE DATABASE %s", options.getDatabaseName()))); + InfluxDBIOException lastException; + // retry create database + do { + try { + connection.query(createQuery); + lastException = null; + break; + } catch (InfluxDBIOException e) { + lastException = e; + } + } while (BackOffUtils.next(Sleeper.DEFAULT, backOff)); + if (lastException != null) { + throw lastException; + } } } diff --git a/sdks/java/io/jdbc/src/test/java/org/apache/beam/sdk/io/jdbc/JdbcIOIT.java b/sdks/java/io/jdbc/src/test/java/org/apache/beam/sdk/io/jdbc/JdbcIOIT.java index 288f1467fa308..301e96496c5ec 100644 --- a/sdks/java/io/jdbc/src/test/java/org/apache/beam/sdk/io/jdbc/JdbcIOIT.java +++ b/sdks/java/io/jdbc/src/test/java/org/apache/beam/sdk/io/jdbc/JdbcIOIT.java @@ -140,8 +140,8 @@ public void testWriteThenRead() throws SQLException { PipelineResult.State readState = readResult.waitUntilFinish(); gatherAndPublishMetrics(writeResult, readResult); // Fail the test if pipeline failed. - assertNotEquals(writeState, PipelineResult.State.FAILED); - assertNotEquals(readState, PipelineResult.State.FAILED); + assertNotEquals(PipelineResult.State.FAILED, writeState); + assertNotEquals(PipelineResult.State.FAILED, readState); } finally { DatabaseTestHelper.deleteTable(dataSource, tableName); } diff --git a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java index 497496db1b3ce..bd6273ae251d8 100644 --- a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java +++ b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java @@ -21,11 +21,13 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; import com.google.cloud.Timestamp; import java.io.IOException; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Map; @@ -72,6 +74,7 @@ import org.apache.beam.sdk.transforms.Keys; import org.apache.beam.sdk.transforms.MapElements; import org.apache.beam.sdk.transforms.ParDo; +import org.apache.beam.sdk.transforms.Reshuffle; import org.apache.beam.sdk.transforms.SerializableFunction; import org.apache.beam.sdk.transforms.SimpleFunction; import org.apache.beam.sdk.transforms.Values; @@ -194,6 +197,7 @@ public void testKafkaIOReadsAndWritesCorrectlyInStreaming() throws IOException { // Use batch pipeline to write records. writePipeline .apply("Generate records", Read.from(new SyntheticBoundedSource(sourceOptions))) + .apply("Avoid fusion", Reshuffle.viaRandomKey()) .apply("Measure write time", ParDo.of(new TimeMonitor<>(NAMESPACE, WRITE_TIME_METRIC_NAME))) .apply("Write to Kafka", writeToKafka().withTopic(options.getKafkaTopic())); @@ -213,18 +217,23 @@ public void testKafkaIOReadsAndWritesCorrectlyInStreaming() throws IOException { readResult.waitUntilFinish(Duration.standardSeconds(options.getReadTimeout())); cancelIfTimeouted(readResult, readState); + // Delete the kafka topic after test pipeline run. + tearDownTopic(options.getKafkaTopic()); - assertEquals( - sourceOptions.numRecords, - readElementMetric(readResult, NAMESPACE, READ_ELEMENT_METRIC_NAME)); + long actualRecords = readElementMetric(readResult, NAMESPACE, READ_ELEMENT_METRIC_NAME); + assertTrue( + String.format( + "actual number of records %d smaller than expected: %d.", + actualRecords, sourceOptions.numRecords), + sourceOptions.numRecords <= actualRecords); if (!options.isWithTestcontainers()) { Set metrics = readMetrics(writeResult, readResult); IOITMetrics.publishToInflux(TEST_ID, TIMESTAMP, metrics, settings); } // Fail the test if pipeline failed. - assertNotEquals(writeState, PipelineResult.State.FAILED); - assertNotEquals(readState, PipelineResult.State.FAILED); + assertNotEquals(PipelineResult.State.FAILED, writeState); + assertNotEquals(PipelineResult.State.FAILED, readState); } @Test @@ -237,6 +246,7 @@ public void testKafkaIOReadsAndWritesCorrectlyInBatch() throws IOException { expectedHashcode = getHashForRecordCount(sourceOptions.numRecords, expectedHashes); writePipeline .apply("Generate records", Read.from(new SyntheticBoundedSource(sourceOptions))) + .apply("Avoid fusion", Reshuffle.viaRandomKey()) .apply("Measure write time", ParDo.of(new TimeMonitor<>(NAMESPACE, WRITE_TIME_METRIC_NAME))) .apply("Write to Kafka", writeToKafka().withTopic(options.getKafkaTopic())); @@ -260,8 +270,11 @@ public void testKafkaIOReadsAndWritesCorrectlyInBatch() throws IOException { readResult.waitUntilFinish(Duration.standardSeconds(options.getReadTimeout())); cancelIfTimeouted(readResult, readState); + // Delete the kafka topic after test pipeline run. + tearDownTopic(options.getKafkaTopic()); + // Fail the test if pipeline failed. - assertEquals(readState, PipelineResult.State.DONE); + assertEquals(PipelineResult.State.DONE, readState); if (!options.isWithTestcontainers()) { Set metrics = readMetrics(writeResult, readResult); @@ -479,7 +492,7 @@ public void testKafkaWithDynamicPartitions() throws IOException { cancelIfTimeouted(readResult, readState); // Fail the test if pipeline failed. - assertNotEquals(readState, PipelineResult.State.FAILED); + assertNotEquals(PipelineResult.State.FAILED, readState); } finally { client.deleteTopics(ImmutableSet.of(topicName)); } @@ -734,6 +747,14 @@ private void cancelIfTimeouted(PipelineResult readResult, PipelineResult.State r } } + /** Delete the topic after test run. */ + private void tearDownTopic(String topicName) { + AdminClient client = + AdminClient.create( + ImmutableMap.of("bootstrap.servers", options.getKafkaBootstrapServerAddresses())); + client.deleteTopics(Collections.singleton(topicName)); + } + private KafkaIO.Write writeToKafka() { return KafkaIO.write() .withBootstrapServers(options.getKafkaBootstrapServerAddresses()) diff --git a/sdks/python/apache_beam/examples/complete/game/user_score.py b/sdks/python/apache_beam/examples/complete/game/user_score.py index 0aff0ce6ef86b..aaab4c2a1c3e6 100644 --- a/sdks/python/apache_beam/examples/complete/game/user_score.py +++ b/sdks/python/apache_beam/examples/complete/game/user_score.py @@ -61,7 +61,7 @@ # description: batch processing; reading input from Google Cloud Storage or a # from a local text file, and writing output to a text file; using # standalone DoFns; use of the CombinePerKey transform. -# multifile: true +# multifile: false # pipeline_options: --output output.txt # context_line: 81 # categories: @@ -72,7 +72,6 @@ # tags: # - batch # - combine -# - combine # - io # - strings @@ -159,7 +158,7 @@ def run(argv=None, save_main_session=True): parser.add_argument( '--input', type=str, - default='gs://apache-beam-samples/game/gaming_data*.csv', + default='gs://apache-beam-samples/game/small/gaming_data.csv', help='Path to the data file(s) containing game data.') parser.add_argument( '--output', type=str, required=True, help='Path to the output file(s).') diff --git a/sdks/python/apache_beam/io/external/xlang_kafkaio_perf_test.py b/sdks/python/apache_beam/io/external/xlang_kafkaio_perf_test.py new file mode 100644 index 0000000000000..72e2e63e41229 --- /dev/null +++ b/sdks/python/apache_beam/io/external/xlang_kafkaio_perf_test.py @@ -0,0 +1,140 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import logging +import sys +import typing + +import apache_beam as beam +from apache_beam.io import iobase +from apache_beam.io import kafka +from apache_beam.testing.load_tests.load_test import LoadTest +from apache_beam.testing.load_tests.load_test import LoadTestOptions +from apache_beam.testing.load_tests.load_test_metrics_utils import CountMessages +from apache_beam.testing.load_tests.load_test_metrics_utils import MeasureTime +from apache_beam.testing.synthetic_pipeline import SyntheticSource +from apache_beam.testing.test_pipeline import TestPipeline +from apache_beam.transforms.util import Reshuffle + +WRITE_NAMESPACE = 'write' +READ_NAMESPACE = 'read' + +_LOGGER = logging.getLogger(__name__) + + +class KafkaIOTestOptions(LoadTestOptions): + @classmethod + def _add_argparse_args(cls, parser): + parser.add_argument( + '--test_class', required=True, help='Test class to run.') + + parser.add_argument('--kafka_topic', required=True, help='Kafka topic.') + + parser.add_argument( + '--bootstrap_servers', help='URL TO Kafka Bootstrap service.') + + parser.add_argument( + '--read_timeout', + type=int, + required=True, + help='Time to wait for the events to be processed by the read pipeline' + ' (in seconds)') + + +class KafkaIOPerfTest: + """Performance test for cross-language Kafka IO pipeline.""" + def run(self): + write_test = _KafkaIOBatchWritePerfTest() + read_test = _KafkaIOSDFReadPerfTest() + write_test.run() + read_test.run() + + +class _KafkaIOBatchWritePerfTest(LoadTest): + def __init__(self): + super().__init__(WRITE_NAMESPACE) + self.test_options = self.pipeline.get_pipeline_options().view_as( + KafkaIOTestOptions) + self.kafka_topic = self.test_options.kafka_topic + # otherwise see 'ValueError: Unexpected DoFn type: beam:dofn:javasdk:0.1' + self.pipeline.not_use_test_runner_api = True + + def test(self): + _ = ( + self.pipeline + | 'Generate records' >> iobase.Read( + SyntheticSource(self.parse_synthetic_source_options())) \ + .with_output_types(typing.Tuple[bytes, bytes]) + | 'Count records' >> beam.ParDo(CountMessages(self.metrics_namespace)) + | 'Avoid Fusion' >> Reshuffle() + | 'Measure time' >> beam.ParDo(MeasureTime(self.metrics_namespace)) + | 'WriteToKafka' >> kafka.WriteToKafka( + producer_config={ + 'bootstrap.servers': self.test_options.bootstrap_servers + }, + topic=self.kafka_topic)) + + def cleanup(self): + pass + + +class _KafkaIOSDFReadPerfTest(LoadTest): + def __init__(self): + super().__init__(READ_NAMESPACE) + self.test_options = self.pipeline.get_pipeline_options().view_as( + KafkaIOTestOptions) + self.timeout_ms = self.test_options.read_timeout * 1000 + self.kafka_topic = self.test_options.kafka_topic + # otherwise see 'ValueError: Unexpected DoFn type: beam:dofn:javasdk:0.1' + self.pipeline.not_use_test_runner_api = True + + def test(self): + _ = ( + self.pipeline + | 'ReadFromKafka' >> kafka.ReadFromKafka( + consumer_config={ + 'bootstrap.servers': self.test_options.bootstrap_servers, + 'auto.offset.reset': 'earliest' + }, + topics=[self.kafka_topic], + expansion_service=kafka.default_io_expansion_service()) + | 'Count records' >> beam.ParDo(CountMessages(self.metrics_namespace)) + | 'Measure time' >> beam.ParDo(MeasureTime(self.metrics_namespace))) + + def cleanup(self): + # assert number of records after test pipeline run + total_messages = self._metrics_monitor.get_counter_metric( + self.result, CountMessages.LABEL) + assert total_messages == self.input_options['num_records'] + + +if __name__ == '__main__': + logging.basicConfig(level=logging.INFO) + + test_options = TestPipeline().get_pipeline_options().view_as( + KafkaIOTestOptions) + supported_test_classes = list( + filter( + lambda s: s.endswith('PerfTest') and not s.startswith('_'), + dir(sys.modules[__name__]))) + + if test_options.test_class not in supported_test_classes: + raise RuntimeError( + f'Test {test_options.test_class} not found. ' + 'Supported tests are {supported_test_classes}') + + getattr(sys.modules[__name__], test_options.test_class)().run() diff --git a/sdks/python/apache_beam/io/filebasedio_perf_test.py b/sdks/python/apache_beam/io/filebasedio_perf_test.py index 7d5b673098d51..78a390d9bed4e 100644 --- a/sdks/python/apache_beam/io/filebasedio_perf_test.py +++ b/sdks/python/apache_beam/io/filebasedio_perf_test.py @@ -36,6 +36,7 @@ from apache_beam.testing.test_pipeline import TestPipeline from apache_beam.testing.util import assert_that from apache_beam.testing.util import equal_to +from apache_beam.transforms.util import Reshuffle WRITE_NAMESPACE = 'write' READ_NAMESPACE = 'read' @@ -126,6 +127,7 @@ def test(self): SyntheticSource(self.parse_synthetic_source_options())) | 'Count records' >> beam.ParDo(CountMessages(self.metrics_namespace)) | 'Format' >> beam.ParDo(SyntheticRecordToStrFn()) + | 'Avoid Fusion' >> Reshuffle() | 'Measure time' >> beam.ParDo(MeasureTime(self.metrics_namespace)) | 'Write Text' >> WriteToText( file_path_prefix=FileSystems.join(self.output_folder, 'test'), diff --git a/sdks/python/apache_beam/io/gcp/bigquery_tools.py b/sdks/python/apache_beam/io/gcp/bigquery_tools.py index 05c752b3cf5f1..edffd86e5c179 100644 --- a/sdks/python/apache_beam/io/gcp/bigquery_tools.py +++ b/sdks/python/apache_beam/io/gcp/bigquery_tools.py @@ -794,6 +794,12 @@ def get_or_create_dataset( return dataset except HttpError as exn: if exn.status_code == 404: + _LOGGER.info( + 'Dataset %s:%s does not exist so we will create it as temporary ' + 'with location=%s', + project_id, + dataset_id, + location) dataset_reference = bigquery.DatasetReference( projectId=project_id, datasetId=dataset_id) dataset = bigquery.Dataset(datasetReference=dataset_reference) @@ -873,29 +879,16 @@ def is_user_configured_dataset(self): num_retries=MAX_RETRIES, retry_filter=retry.retry_on_server_errors_and_timeout_filter) def create_temporary_dataset(self, project_id, location, labels=None): - # Check if dataset exists to make sure that the temporary id is unique - try: - self.client.datasets.Get( - bigquery.BigqueryDatasetsGetRequest( - projectId=project_id, datasetId=self.temp_dataset_id)) - if project_id is not None and not self.is_user_configured_dataset(): - # Unittests don't pass projectIds so they can be run without error - # User configured datasets are allowed to pre-exist. - raise RuntimeError( - 'Dataset %s:%s already exists so cannot be used as temporary.' % - (project_id, self.temp_dataset_id)) - except HttpError as exn: - if exn.status_code == 404: - _LOGGER.warning( - 'Dataset %s:%s does not exist so we will create it as temporary ' - 'with location=%s', - project_id, - self.temp_dataset_id, - location) - self.get_or_create_dataset( - project_id, self.temp_dataset_id, location=location, labels=labels) - else: - raise + self.get_or_create_dataset( + project_id, self.temp_dataset_id, location=location, labels=labels) + + if (project_id is not None and not self.is_user_configured_dataset() and + not self.created_temp_dataset): + # Unittests don't pass projectIds so they can be run without error + # User configured datasets are allowed to pre-exist. + raise RuntimeError( + 'Dataset %s:%s already exists so cannot be used as temporary.' % + (project_id, self.temp_dataset_id)) @retry.with_exponential_backoff( num_retries=MAX_RETRIES, diff --git a/sdks/python/apache_beam/testing/benchmarks/cloudml/__init__.py b/sdks/python/apache_beam/testing/benchmarks/cloudml/__init__.py new file mode 100644 index 0000000000000..cce3acad34a49 --- /dev/null +++ b/sdks/python/apache_beam/testing/benchmarks/cloudml/__init__.py @@ -0,0 +1,16 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/sdks/python/apache_beam/testing/benchmarks/cloudml/cloudml_benchmark_constants_lib.py b/sdks/python/apache_beam/testing/benchmarks/cloudml/cloudml_benchmark_constants_lib.py new file mode 100644 index 0000000000000..ad41dcc504ec3 --- /dev/null +++ b/sdks/python/apache_beam/testing/benchmarks/cloudml/cloudml_benchmark_constants_lib.py @@ -0,0 +1,29 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""A common file for CloudML benchmarks. + +This file contains constants for pipeline paths, dependency locations and +test data paths. +""" + +INPUT_CRITEO_SMALL = 'train10.tsv' +INPUT_CRITEO_SMALL_100MB = '100mb/train.txt' +INPUT_CRITEO_10GB = '10gb/train.txt' + +FREQUENCY_THRESHOLD = '5' +ENABLE_SHUFFLE = True diff --git a/sdks/python/apache_beam/testing/benchmarks/cloudml/cloudml_benchmark_test.py b/sdks/python/apache_beam/testing/benchmarks/cloudml/cloudml_benchmark_test.py new file mode 100644 index 0000000000000..1561f3b179195 --- /dev/null +++ b/sdks/python/apache_beam/testing/benchmarks/cloudml/cloudml_benchmark_test.py @@ -0,0 +1,90 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +import unittest +import uuid + +import pytest + +try: + import apache_beam.testing.benchmarks.cloudml.cloudml_benchmark_constants_lib as lib + from apache_beam.testing.benchmarks.cloudml.pipelines import workflow + from apache_beam.testing.test_pipeline import TestPipeline +except ImportError: # pylint: disable=bare-except + raise unittest.SkipTest('Dependencies are not installed') + +_INPUT_GCS_BUCKET_ROOT = 'gs://apache-beam-ml/datasets/cloudml/criteo' +_CRITEO_FEATURES_FILE = 'testdata/criteo/expected/features.tfrecord.gz' +_OUTPUT_GCS_BUCKET_ROOT = 'gs://temp-storage-for-end-to-end-tests/tft/' + + +@pytest.mark.uses_tft +class CloudMLTFTBenchmarkTest(unittest.TestCase): + def test_cloudml_benchmark_criteo_small(self): + test_pipeline = TestPipeline(is_integration_test=True) + extra_opts = {} + extra_opts['input'] = os.path.join( + _INPUT_GCS_BUCKET_ROOT, lib.INPUT_CRITEO_SMALL) + extra_opts['benchmark_type'] = 'tft' + extra_opts['classifier'] = 'criteo' + extra_opts['frequency_threshold'] = 0 + extra_opts['output'] = os.path.join( + _OUTPUT_GCS_BUCKET_ROOT, uuid.uuid4().hex) + workflow.run(test_pipeline.get_full_options_as_args(**extra_opts)) + + def test_cloudml_benchmark_cirteo_no_shuffle_10GB(self): + test_pipeline = TestPipeline(is_integration_test=True) + extra_opts = {} + extra_opts['input'] = os.path.join( + _INPUT_GCS_BUCKET_ROOT, lib.INPUT_CRITEO_10GB) + extra_opts['benchmark_type'] = 'tft' + extra_opts['classifier'] = 'criteo' + extra_opts['frequency_threshold'] = 0 + extra_opts['output'] = os.path.join( + _OUTPUT_GCS_BUCKET_ROOT, uuid.uuid4().hex) + extra_opts['shuffle'] = False + workflow.run(test_pipeline.get_full_options_as_args(**extra_opts)) + + def test_cloudml_benchmark_criteo_10GB(self): + test_pipeline = TestPipeline(is_integration_test=True) + extra_opts = {} + extra_opts['input'] = os.path.join( + _INPUT_GCS_BUCKET_ROOT, lib.INPUT_CRITEO_10GB) + extra_opts['benchmark_type'] = 'tft' + extra_opts['classifier'] = 'criteo' + extra_opts['frequency_threshold'] = 0 + extra_opts['output'] = os.path.join( + _OUTPUT_GCS_BUCKET_ROOT, uuid.uuid4().hex) + workflow.run(test_pipeline.get_full_options_as_args(**extra_opts)) + + def test_cloud_ml_benchmark_criteo_fixed_workers_10GB(self): + test_pipeline = TestPipeline(is_integration_test=True) + extra_opts = {} + extra_opts['input'] = os.path.join( + _INPUT_GCS_BUCKET_ROOT, lib.INPUT_CRITEO_10GB) + extra_opts['benchmark_type'] = 'tft' + extra_opts['classifier'] = 'criteo' + extra_opts['frequency_threshold'] = 0 + extra_opts['output'] = os.path.join( + _OUTPUT_GCS_BUCKET_ROOT, uuid.uuid4().hex) + extra_opts['num_workers'] = 50 + extra_opts['machine_type'] = 'n1-standard-4' + workflow.run(test_pipeline.get_full_options_as_args(**extra_opts)) + + +if __name__ == '__main__': + unittest.main() diff --git a/sdks/python/apache_beam/testing/benchmarks/cloudml/criteo_tft/__init__.py b/sdks/python/apache_beam/testing/benchmarks/cloudml/criteo_tft/__init__.py new file mode 100644 index 0000000000000..cce3acad34a49 --- /dev/null +++ b/sdks/python/apache_beam/testing/benchmarks/cloudml/criteo_tft/__init__.py @@ -0,0 +1,16 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/sdks/python/apache_beam/testing/benchmarks/cloudml/criteo_tft/criteo.py b/sdks/python/apache_beam/testing/benchmarks/cloudml/criteo_tft/criteo.py new file mode 100644 index 0000000000000..cd14bd9e659f6 --- /dev/null +++ b/sdks/python/apache_beam/testing/benchmarks/cloudml/criteo_tft/criteo.py @@ -0,0 +1,158 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Schema and tranform definition for the Criteo dataset.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +import tensorflow_transform as tft + + +def _get_raw_categorical_column_name(column_idx): + return 'categorical-feature-{}'.format(column_idx) + + +def get_transformed_categorical_column_name(column_name_or_id): + if isinstance(column_name_or_id, bytes): + # assume the input is column name + column_name = column_name_or_id + else: + # assume the input is column id + column_name = _get_raw_categorical_column_name(column_name_or_id) + return column_name + '_id' + + +_INTEGER_COLUMN_NAMES = [ + 'int-feature-{}'.format(column_idx) for column_idx in range(1, 14) +] +_CATEGORICAL_COLUMN_NAMES = [ + _get_raw_categorical_column_name(column_idx) + for column_idx in range(14, 40) +] +DEFAULT_DELIMITER = '\t' +# Number of buckets for integer columns. +_NUM_BUCKETS = 10 + +# Schema annotations aren't supported in this build. +tft.common.IS_ANNOTATIONS_PB_AVAILABLE = False + + +def make_ordered_column_names(include_label=True): + """Returns the column names in the dataset in the order as they appear. + + Args: + include_label: Indicates whether the label feature should be included. + Returns: + A list of column names in the dataset. + """ + result = ['clicked'] if include_label else [] + for name in _INTEGER_COLUMN_NAMES: + result.append(name) + for name in _CATEGORICAL_COLUMN_NAMES: + result.append(name) + return result + + +def make_legacy_input_feature_spec(include_label=True): + """Input schema definition. + + Args: + include_label: Indicates whether the label feature should be included. + Returns: + A `Schema` object. + """ + result = {} + if include_label: + result['clicked'] = tf.io.FixedLenFeature(shape=[], dtype=tf.int64) + for name in _INTEGER_COLUMN_NAMES: + result[name] = tf.io.FixedLenFeature( + shape=[], dtype=tf.int64, default_value=-1) + for name in _CATEGORICAL_COLUMN_NAMES: + result[name] = tf.io.FixedLenFeature( + shape=[], dtype=tf.string, default_value='') + return result + + +def make_input_feature_spec(include_label=True): + """Input schema definition. + + Args: + include_label: Indicates whether the label feature should be included. + + Returns: + A `Schema` object. + """ + result = {} + if include_label: + result['clicked'] = tf.io.FixedLenFeature(shape=[], dtype=tf.int64) + for name in _INTEGER_COLUMN_NAMES: + result[name] = tf.io.VarLenFeature(dtype=tf.int64) + + for name in _CATEGORICAL_COLUMN_NAMES: + result[name] = tf.io.VarLenFeature(dtype=tf.string) + + return result + + +def make_preprocessing_fn(frequency_threshold): + """Creates a preprocessing function for criteo. + + Args: + frequency_threshold: The frequency_threshold used when generating + vocabularies for the categorical features. + + Returns: + A preprocessing function. + """ + def preprocessing_fn(inputs): + """User defined preprocessing function for criteo columns. + + Args: + inputs: dictionary of input `tensorflow_transform.Column`. + Returns: + A dictionary of `tensorflow_transform.Column` representing the transformed + columns. + """ + result = {'clicked': inputs['clicked']} + for name in _INTEGER_COLUMN_NAMES: + feature = inputs[name] + # TODO(https://github.com/apache/beam/issues/24902): + # Replace this boilerplate with a helper function. + # This is a SparseTensor because it is optional. Here we fill in a + # default value when it is missing. + feature = tft.sparse_tensor_to_dense_with_shape( + feature, [None, 1], default_value=-1) + # Reshaping from a batch of vectors of size 1 to a batch of scalars and + # adding a bucketized version. + feature = tf.squeeze(feature, axis=1) + result[name] = feature + result[name + '_bucketized'] = tft.bucketize(feature, _NUM_BUCKETS) + for name in _CATEGORICAL_COLUMN_NAMES: + feature = inputs[name] + # Similar to for integer columns, but use '' as default. + feature = tft.sparse_tensor_to_dense_with_shape( + feature, [None, 1], default_value='') + feature = tf.squeeze(feature, axis=1) + result[get_transformed_categorical_column_name( + name)] = tft.compute_and_apply_vocabulary( + feature, frequency_threshold=frequency_threshold) + + return result + + return preprocessing_fn diff --git a/sdks/python/apache_beam/testing/benchmarks/cloudml/pipelines/__init__.py b/sdks/python/apache_beam/testing/benchmarks/cloudml/pipelines/__init__.py new file mode 100644 index 0000000000000..cce3acad34a49 --- /dev/null +++ b/sdks/python/apache_beam/testing/benchmarks/cloudml/pipelines/__init__.py @@ -0,0 +1,16 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/sdks/python/apache_beam/testing/benchmarks/cloudml/pipelines/workflow.py b/sdks/python/apache_beam/testing/benchmarks/cloudml/pipelines/workflow.py new file mode 100644 index 0000000000000..e60e3a47c0d1c --- /dev/null +++ b/sdks/python/apache_beam/testing/benchmarks/cloudml/pipelines/workflow.py @@ -0,0 +1,215 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import argparse +import logging +import os + +import apache_beam as beam +import tensorflow_transform as tft +import tensorflow_transform.beam as tft_beam +from apache_beam.testing.benchmarks.cloudml.criteo_tft import criteo +from tensorflow_transform import coders +from tensorflow_transform.tf_metadata import dataset_metadata +from tensorflow_transform.tf_metadata import schema_utils +from tfx_bsl.public import tfxio + +# Name of the column for the synthetic version of the benchmark. +_SYNTHETIC_COLUMN = 'x' + + +class _RecordBatchToPyDict(beam.PTransform): + """Converts PCollections of pa.RecordBatch to python dicts.""" + def __init__(self, input_feature_spec): + self._input_feature_spec = input_feature_spec + + def expand(self, pcoll): + def format_values(instance): + return { + k: v.squeeze(0).tolist() + if v is not None else self._input_feature_spec[k].default_value + for k, + v in instance.items() + } + + return ( + pcoll + | 'RecordBatchToDicts' >> + beam.FlatMap(lambda x: x.to_pandas().to_dict(orient='records')) + | 'FormatPyDictValues' >> beam.Map(format_values)) + + +def _synthetic_preprocessing_fn(inputs): + return { + _SYNTHETIC_COLUMN: tft.compute_and_apply_vocabulary( + inputs[_SYNTHETIC_COLUMN], + + # Execute more codepaths but do no frequency filtration. + frequency_threshold=1, + + # Execute more codepaths but do no top filtration. + top_k=2**31 - 1, + + # Execute more codepaths + num_oov_buckets=10) + } + + +class _PredictionHistogramFn(beam.DoFn): + def __init__(self): + # Beam Metrics API for Distributions only works with integers but + # predictions are floating point numbers. We thus store a "quantized" + # distribution of the prediction with sufficient granularity and for ease + # of human interpretation (eg as a percentage for logistic regression). + self._prediction_distribution = beam.metrics.Metrics.distribution( + self.__class__, 'int(scores[0]*100)') + + def process(self, element): + self._prediction_distribution.update(int(element['scores'][0] * 100)) + + +def setup_pipeline(p, args): + if args.classifier == 'criteo': + input_feature_spec = criteo.make_input_feature_spec() + input_schema = schema_utils.schema_from_feature_spec(input_feature_spec) + input_tfxio = tfxio.BeamRecordCsvTFXIO( + physical_format='text', + column_names=criteo.make_ordered_column_names(), + schema=input_schema, + delimiter=criteo.DEFAULT_DELIMITER, + telemetry_descriptors=['CriteoCloudMLBenchmark']) + preprocessing_fn = criteo.make_preprocessing_fn(args.frequency_threshold) + else: + assert False, 'Unknown args classifier <{}>'.format(args.classifier) + + input_data = p | 'ReadFromText' >> beam.io.textio.ReadFromText( + args.input, coder=beam.coders.BytesCoder()) + + if args.benchmark_type == 'tft': + logging.info('TFT benchmark') + + # Setting TFXIO output format only for Criteo benchmarks to make sure that + # both codepaths are covered. + output_record_batches = args.classifier == 'criteo' + + # pylint: disable=expression-not-assigned + input_metadata = dataset_metadata.DatasetMetadata(schema=input_schema) + ( + input_metadata + | 'WriteInputMetadata' >> tft_beam.WriteMetadata( + os.path.join(args.output, 'raw_metadata'), pipeline=p)) + + with tft_beam.Context(temp_dir=os.path.join(args.output, 'tmp'), + use_deep_copy_optimization=True): + decoded_input_data = ( + input_data | 'DecodeForAnalyze' >> input_tfxio.BeamSource()) + transform_fn = ((decoded_input_data, input_tfxio.TensorAdapterConfig()) + | 'Analyze' >> tft_beam.AnalyzeDataset(preprocessing_fn)) + + if args.shuffle: + # Shuffle the data before any decoding (more compact representation). + input_data |= 'Shuffle' >> beam.transforms.Reshuffle() # pylint: disable=no-value-for-parameter + + decoded_input_data = ( + input_data | 'DecodeForTransform' >> input_tfxio.BeamSource()) + (dataset, + metadata) = ((decoded_input_data, input_tfxio.TensorAdapterConfig()), + transform_fn) | 'Transform' >> tft_beam.TransformDataset( + output_record_batches=output_record_batches) + + if output_record_batches: + + def record_batch_to_examples(batch, unary_passthrough_features): + """Encodes transformed data as tf.Examples.""" + # Ignore unary pass-through features. + del unary_passthrough_features + # From beam: "imports, functions and other variables defined in the + # global context of your __main__ file of your Dataflow pipeline are, by + # default, not available in the worker execution environment, and such + # references will cause a NameError, unless the --save_main_session + # pipeline option is set to True. Please see + # https://cloud.google.com/dataflow/faq#how-do-i-handle-nameerrors ." + from tfx_bsl.coders.example_coder import RecordBatchToExamples + return RecordBatchToExamples(batch) + + encode_ptransform = beam.FlatMapTuple(record_batch_to_examples) + else: + example_coder = coders.ExampleProtoCoder(metadata.schema) + encode_ptransform = beam.Map(example_coder.encode) + + # TODO: Use WriteDataset instead when it becomes available. + ( + dataset + | 'Encode' >> encode_ptransform + | 'Write' >> beam.io.WriteToTFRecord( + os.path.join(args.output, 'features_train'), + file_name_suffix='.tfrecord.gz')) + # transform_fn | beam.Map(print) + transform_fn | 'WriteTransformFn' >> tft_beam.WriteTransformFn(args.output) + + # TODO: Remember to eventually also save the statistics. + else: + logging.fatal('Unknown benchmark type: %s', args.benchmark_type) + + +def parse_known_args(argv): + """Parses args for this workflow.""" + parser = argparse.ArgumentParser() + parser.add_argument( + '--input', + dest='input', + required=True, + help='Input path for input files.') + parser.add_argument( + '--output', + dest='output', + required=True, + help='Output path for output files.') + parser.add_argument( + '--classifier', + dest='classifier', + required=True, + help='Name of classifier to use.') + parser.add_argument( + '--frequency_threshold', + dest='frequency_threshold', + default=5, # TODO: Align default with TFT (ie 0). + help='Threshold for minimum number of unique values for a category.') + parser.add_argument( + '--shuffle', + action='store_false', + dest='shuffle', + default=True, + help='Skips shuffling the data.') + parser.add_argument( + '--benchmark_type', + dest='benchmark_type', + required=True, + help='Type of benchmark to run.') + + return parser.parse_known_args(argv) + + +def run(argv=None): + """Main entry point; defines and runs the pipeline.""" + known_args, pipeline_args = parse_known_args(argv) + with beam.Pipeline(argv=pipeline_args) as p: + setup_pipeline(p, known_args) + + +if __name__ == '__main__': + run() diff --git a/sdks/typescript/build_container.sh b/sdks/python/apache_beam/testing/benchmarks/cloudml/requirements.txt old mode 100755 new mode 100644 similarity index 77% rename from sdks/typescript/build_container.sh rename to sdks/python/apache_beam/testing/benchmarks/cloudml/requirements.txt index 3cd58e3eb75ea..8ddfddece5473 --- a/sdks/typescript/build_container.sh +++ b/sdks/python/apache_beam/testing/benchmarks/cloudml/requirements.txt @@ -1,4 +1,3 @@ -#!/bin/sh # # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with @@ -16,9 +15,5 @@ # limitations under the License. # - -# TODO: Do this via a gradle rather than manually. -env GOOS=linux GOARCH=amd64 go build boot.go -npm run build -npm pack -docker build . -t apache/beam_typescript_sdk:latest -t gcr.io/apache-beam-testing/beam_typescript_sdk:dev +tfx_bsl +tensorflow-transform diff --git a/sdks/python/apache_beam/testing/load_tests/load_test_metrics_utils.py b/sdks/python/apache_beam/testing/load_tests/load_test_metrics_utils.py index 60595ed02e08e..92a5f68351fe0 100644 --- a/sdks/python/apache_beam/testing/load_tests/load_test_metrics_utils.py +++ b/sdks/python/apache_beam/testing/load_tests/load_test_metrics_utils.py @@ -44,6 +44,9 @@ import apache_beam as beam from apache_beam.metrics import Metrics +from apache_beam.metrics.metric import MetricResults +from apache_beam.metrics.metric import MetricsFilter +from apache_beam.runners.runner import PipelineResult from apache_beam.transforms.window import TimestampedValue from apache_beam.utils.timestamp import Timestamp @@ -205,14 +208,17 @@ def __init__( """ self._namespace = namespace self.publishers: List[MetricsPublisher] = [] + # publish to console output self.publishers.append(ConsoleMetricsPublisher()) - check = project_name and bq_table and bq_dataset and publish_to_bq - if check: + bq_check = project_name and bq_table and bq_dataset and publish_to_bq + if bq_check: + # publish to BigQuery bq_publisher = BigQueryMetricsPublisher( project_name, bq_table, bq_dataset) self.publishers.append(bq_publisher) if influxdb_options and influxdb_options.validate(): + # publish to InfluxDB self.publishers.append(InfluxDBMetricsPublisher(influxdb_options)) else: _LOGGER.info( @@ -220,7 +226,27 @@ def __init__( 'InfluxDB') self.filters = filters - def publish_metrics(self, result, extra_metrics: Optional[dict] = None): + def get_counter_metric(self, result: PipelineResult, name: str) -> int: + """ + Return the current value for a long counter, or -1 if can't be retrieved. + Note this uses only attempted metrics because some runners don't support + committed metrics. + """ + filters = MetricsFilter().with_namespace(self._namespace).with_name(name) + counters = result.metrics().query(filters)[MetricResults.COUNTERS] + num_results = len(counters) + if num_results > 1: + raise ValueError( + f"More than one metric result matches name: {name} in namespace "\ + f"{self._namespace}. Metric results count: {num_results}") + elif num_results == 0: + return -1 + else: + return counters[0].attempted + + def publish_metrics( + self, result: PipelineResult, extra_metrics: Optional[dict] = None): + """Publish metrics from pipeline result to registered publishers.""" metric_id = uuid.uuid4().hex metrics = result.metrics().query(self.filters) diff --git a/sdks/python/apache_beam/transforms/util.py b/sdks/python/apache_beam/transforms/util.py index d91ce112471dd..5f99d68054699 100644 --- a/sdks/python/apache_beam/transforms/util.py +++ b/sdks/python/apache_beam/transforms/util.py @@ -832,6 +832,7 @@ def from_runner_api_parameter( def fn_takes_side_inputs(fn): + fn = getattr(fn, '_argspec_fn', fn) try: signature = get_signature(fn) except TypeError: diff --git a/sdks/python/apache_beam/typehints/trivial_inference.py b/sdks/python/apache_beam/typehints/trivial_inference.py index a3a5b70b5e648..f69a87192e3ac 100644 --- a/sdks/python/apache_beam/typehints/trivial_inference.py +++ b/sdks/python/apache_beam/typehints/trivial_inference.py @@ -35,6 +35,7 @@ from apache_beam.typehints import Any from apache_beam.typehints import row_type from apache_beam.typehints import typehints +from apache_beam.utils import python_callable class TypeInferenceError(ValueError): @@ -314,6 +315,10 @@ def infer_return_type(c, input_types, debug=False, depth=5): isinstance(input_types[1], Const)): from apache_beam.typehints import opcodes return opcodes._getattr(input_types[0], input_types[1].value) + elif isinstance(c, python_callable.PythonCallableWithSource): + # TODO(BEAM-24755): This can be removed once support for + # inference across *args and **kwargs is implemented. + return infer_return_type(c._callable, input_types, debug, depth) else: return Any except TypeInferenceError: @@ -439,8 +444,8 @@ def infer_return_type_func(f, input_types, debug=False, depth=0): else: return_type = Any elif opname == 'CALL_FUNCTION_KW': - # TODO(udim): Handle keyword arguments. Requires passing them by name - # to infer_return_type. + # TODO(BEAM-24755): Handle keyword arguments. Requires passing them by + # name to infer_return_type. pop_count = arg + 2 if isinstance(state.stack[-pop_count], Const): from apache_beam.pvalue import Row @@ -462,7 +467,7 @@ def infer_return_type_func(f, input_types, debug=False, depth=0): has_kwargs = arg & 1 # type: int pop_count = has_kwargs + 2 if has_kwargs: - # TODO(udim): Unimplemented. Requires same functionality as a + # TODO(BEAM-24755): Unimplemented. Requires same functionality as a # CALL_FUNCTION_KW implementation. return_type = Any else: diff --git a/sdks/python/apache_beam/typehints/trivial_inference_test.py b/sdks/python/apache_beam/typehints/trivial_inference_test.py index e2c3951d4ae2c..d8cc2ab19a03d 100644 --- a/sdks/python/apache_beam/typehints/trivial_inference_test.py +++ b/sdks/python/apache_beam/typehints/trivial_inference_test.py @@ -26,6 +26,7 @@ from apache_beam.typehints import row_type from apache_beam.typehints import trivial_inference from apache_beam.typehints import typehints +from apache_beam.utils import python_callable global_int = 1 @@ -458,6 +459,12 @@ def testRowAttr(self): lambda row: (row.x, getattr(row, 'y')), [row_type.RowTypeConstraint.from_fields([('x', int), ('y', str)])]) + def testPyCallable(self): + self.assertReturnType( + typehints.Tuple[int, str], + python_callable.PythonCallableWithSource("lambda x: (x, str(x))"), + [int]) + if __name__ == '__main__': unittest.main() diff --git a/sdks/python/apache_beam/utils/python_callable.py b/sdks/python/apache_beam/utils/python_callable.py index f4d32d41641d8..a7de214ec926c 100644 --- a/sdks/python/apache_beam/utils/python_callable.py +++ b/sdks/python/apache_beam/utils/python_callable.py @@ -106,6 +106,19 @@ def load_from_script(source): exec('\n'.join(lines), exec_globals) return exec_globals[name] + def default_label(self): + src = self._source.strip() + last_line = src.split('\n')[-1] + if last_line[0] != ' ' and len(last_line) < 72: + return last_line + # Avoid circular import. + from apache_beam.transforms.ptransform import label_from_callable + return label_from_callable(self._callable) + + @property + def _argspec_fn(self): + return self._callable + def get_source(self): # type: () -> str return self._source diff --git a/sdks/python/apache_beam/utils/python_callable_test.py b/sdks/python/apache_beam/utils/python_callable_test.py index 72d32f145098a..6fc6a1f04a69e 100644 --- a/sdks/python/apache_beam/utils/python_callable_test.py +++ b/sdks/python/apache_beam/utils/python_callable_test.py @@ -17,6 +17,7 @@ import os import unittest +import apache_beam as beam from apache_beam.utils.python_callable import PythonCallableWithSource @@ -82,6 +83,14 @@ def __init__(self, x): """)(10).x, 10) + def test_pycallable_map(self): + p = beam.Pipeline() + result = ( + p + | beam.Create([1, 2, 3]) + | beam.Map(PythonCallableWithSource("lambda x: x"))) + self.assertEqual(result.element_type, int) + if __name__ == '__main__': unittest.main() diff --git a/sdks/python/pytest.ini b/sdks/python/pytest.ini index e2ffe74ac2759..915b49c8f16ae 100644 --- a/sdks/python/pytest.ini +++ b/sdks/python/pytest.ini @@ -47,9 +47,11 @@ markers = no_xdist: run without pytest-xdist plugin # We run these tests with multiple major pyarrow versions (BEAM-11211) uses_pyarrow: tests that utilize pyarrow in some way + # ML tests uses_pytorch: tests that utilize pytorch in some way uses_sklearn: tests that utilize scikit-learn in some way uses_tensorflow: tests that utilize tensorflow in some way + uses_tft: tests that utilizes tensorflow transforms in some way. # Default timeout intended for unit tests. # If certain tests need a different value, please see the docs on how to diff --git a/sdks/python/scripts/generate_pydoc.sh b/sdks/python/scripts/generate_pydoc.sh index 376dc123cef53..9a0dc99613dd4 100755 --- a/sdks/python/scripts/generate_pydoc.sh +++ b/sdks/python/scripts/generate_pydoc.sh @@ -65,6 +65,7 @@ excluded_patterns=( 'apache_beam/runners/test/' 'apache_beam/runners/worker/' 'apache_beam/testing/benchmarks/chicago_taxi/' + 'apache_beam/testing/benchmarks/cloudml/' 'apache_beam/testing/benchmarks/inference/' 'apache_beam/testing/benchmarks/data/' 'apache_beam/testing/benchmarks/load_tests/' diff --git a/sdks/python/scripts/run_integration_test.sh b/sdks/python/scripts/run_integration_test.sh index d38f1bf3baa2b..508d9f50421ef 100755 --- a/sdks/python/scripts/run_integration_test.sh +++ b/sdks/python/scripts/run_integration_test.sh @@ -247,8 +247,6 @@ if [[ -z $PIPELINE_OPTS ]]; then # Add --runner_v2 if provided if [[ "$RUNNER_V2" = true ]]; then opts+=("--experiments=use_runner_v2") - # TODO(https://github.com/apache/beam/issues/20806) remove shuffle_mode=appliance with runner v2 once issue is resolved. - opts+=("--experiments=shuffle_mode=appliance") if [[ "$STREAMING" = true ]]; then # Dataflow Runner V2 only supports streaming engine. opts+=("--enable_streaming_engine") diff --git a/sdks/python/test-suites/dataflow/build.gradle b/sdks/python/test-suites/dataflow/build.gradle index d16111679da7b..548a50246a28e 100644 --- a/sdks/python/test-suites/dataflow/build.gradle +++ b/sdks/python/test-suites/dataflow/build.gradle @@ -84,3 +84,9 @@ task examplesPostCommit { dependsOn.add(":sdks:python:test-suites:dataflow:py${getVersionSuffix(it)}:examples") } } + +task tftTests { + getVersionsAsList('dataflow_cloudml_benchmark_tests_py_versions').each { + dependsOn.add(":sdks:python:test-suites:dataflow:py${getVersionSuffix(it)}:tftTests") + } +} diff --git a/sdks/python/test-suites/dataflow/common.gradle b/sdks/python/test-suites/dataflow/common.gradle index 5e2fa3d7f7f94..a879421e93944 100644 --- a/sdks/python/test-suites/dataflow/common.gradle +++ b/sdks/python/test-suites/dataflow/common.gradle @@ -362,6 +362,40 @@ def tensorRTTests = tasks.create("tensorRTtests") { } } +task installTFTRequirements { + dependsOn 'installGcpTest' + doLast { + exec { + workingDir "$rootProject.projectDir/sdks/python/apache_beam/testing/benchmarks/cloudml/" + executable 'sh' + args '-c', ". ${envdir}/bin/activate && pip install -r requirements.txt" + } + } +} + +// Tensorflow transform integration and benchmarking tests on Apache Beam. +task tftTests { + dependsOn 'installGcpTest' + dependsOn ':sdks:python:sdist' + dependsOn "installTFTRequirements" + + doLast { + def testOpts = basicPytestOpts + ["--numprocesses=8", "--dist=loadfile"] + def argMap = [ + "test_opts": testOpts, + "sdk_location": files(configurations.distTarBall.files).singleFile, + "suite": "TFTransformTests-df${pythonVersionSuffix}", + "collect": "uses_tft", + "requirements_file": "apache_beam/testing/benchmarks/cloudml/requirements.txt" + ] + def cmdArgs = mapToArgString(argMap) + exec { + executable 'sh' + args '-c', ". ${envdir}/bin/activate && ${runScriptsDir}/run_integration_test.sh $cmdArgs" + } + } +} + // add all RunInference E2E tests that run on DataflowRunner // As of now, this test suite is enable in py38 suite as the base NVIDIA image used for Tensor RT // contains Python 3.8. diff --git a/sdks/python/test-suites/direct/common.gradle b/sdks/python/test-suites/direct/common.gradle index c8db6388305d1..763e96ff6d216 100644 --- a/sdks/python/test-suites/direct/common.gradle +++ b/sdks/python/test-suites/direct/common.gradle @@ -229,6 +229,14 @@ task torchInferenceTest { task sklearnInferenceTest { dependsOn 'installGcpTest' dependsOn ':sdks:python:sdist' + // TODO(https://github.com/apache/beam/issues/24787) + // Scikit learn tests fails from version 1.2.0 + doFirst { + exec { + executable 'sh' + args '-c', ". ${envdir}/bin/activate && pip install 'scikit-learn<1.2.0'" + } + } doLast { def testOpts = basicTestOpts def argMap = [ diff --git a/sdks/python/test-suites/gradle.properties b/sdks/python/test-suites/gradle.properties index 050ed442f618c..055bb484bfa6e 100644 --- a/sdks/python/test-suites/gradle.properties +++ b/sdks/python/test-suites/gradle.properties @@ -37,7 +37,8 @@ dataflow_validates_container_tests=3.7,3.8,3.9,3.10 dataflow_validates_runner_batch_tests_V2=3.7,3.10 dataflow_validates_runner_streaming_tests_V2=3.7,3.10 dataflow_examples_postcommit_py_versions=3.10 - +# TFX_BSL is not yet supported on Python 3.10. +dataflow_cloudml_benchmark_tests_py_versions=3.9 # direct runner test-suites direct_mongodbio_it_task_py_versions=3.10 direct_examples_postcommit_py_versions=3.7,3.8,3.9,3.10 diff --git a/sdks/typescript/build.gradle b/sdks/typescript/build.gradle new file mode 100644 index 0000000000000..1c7ffab4bb492 --- /dev/null +++ b/sdks/typescript/build.gradle @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +plugins { id 'org.apache.beam.module' } + +description = "Apache Beam :: SDKs :: Typescript" + +configurations { + packTarball +} + +def tarballName = "apache-beam.tgz" + +def typescriptSdkDeps = project.files( + project.fileTree( + dir: "${project.rootDir}", + include: ['model/**', 'sdks/typescript/src/**', 'sdks/typescript/*.json'], + // Exclude temporary directories and files that are generated + // during build and test. + exclude: [ + '**/build/**', + '**/dist/**', + '**/target/**', + '**/.gogradle/**', + '**/reports/test/index.html', + ]) + ) + +def pack = tasks.register("pack") { + + doLast { + // Build artifact + exec { + executable 'sh' + args '-c', "npm install && npm run build && npm pack --pack-destination ${buildDir}" + } + + def collection = fileTree(buildDir){ include "**/*${project.version}*.tgz" exclude 'srcs/**'} + + // we need a fixed name for the artifact + copy { from collection.singleFile; into buildDir; rename { tarballName } } + logger.info('Create distribution tar file {} in {}', tarballName, buildDir) + } + inputs.files(typescriptSdkDeps) + .withPropertyName('typescriptSdkDeps') + .withPathSensitivity(PathSensitivity.RELATIVE) + outputs.file "${buildDir}/${tarballName}" +} + +artifacts { + packTarball file: file("${buildDir}/${tarballName}"), builtBy: pack +} diff --git a/sdks/typescript/build.sh b/sdks/typescript/build.sh index f3884a63ba099..f3f1b4e19d233 100755 --- a/sdks/typescript/build.sh +++ b/sdks/typescript/build.sh @@ -22,7 +22,12 @@ set -e +# Make the packaging version available to the code. +echo "export const version = \"$npm_package_version\";" > src/apache_beam/version.ts + # Using npx to execute ttsc from the local node_modules environment. npx ttsc -p . + +# Copy the python bootstrap script. mkdir -p dist/resources cp ../java/extensions/python/src/main/resources/org/apache/beam/sdk/extensions/python/bootstrap_beam_venv.py dist/resources diff --git a/sdks/typescript/Dockerfile b/sdks/typescript/container/Dockerfile similarity index 86% rename from sdks/typescript/Dockerfile rename to sdks/typescript/container/Dockerfile index cc43614c4b465..60019950e194b 100644 --- a/sdks/typescript/Dockerfile +++ b/sdks/typescript/container/Dockerfile @@ -24,13 +24,10 @@ ENV NODE_ENV=development WORKDIR /app # Copy beam package. -COPY apache-beam-0.38.0.tgz ./apache-beam-0.38.0.tgz +COPY target/apache-beam.tgz ./apache-beam.tgz # Install dependencies and compile -RUN npm install apache-beam-0.38.0.tgz +RUN npm install apache-beam.tgz -# Check that filesystem is set up as expected -RUN ls -a - -COPY boot /opt/apache/beam/ +COPY target/launcher/linux_amd64/boot /opt/apache/beam/ ENTRYPOINT ["/opt/apache/beam/boot"] diff --git a/sdks/typescript/boot.go b/sdks/typescript/container/boot.go similarity index 92% rename from sdks/typescript/boot.go rename to sdks/typescript/container/boot.go index 7bace0b1b716f..b3462a7d1497d 100644 --- a/sdks/typescript/boot.go +++ b/sdks/typescript/container/boot.go @@ -23,6 +23,7 @@ import ( "os" "path/filepath" "strings" + "sync" "github.com/apache/beam/sdks/v2/go/pkg/beam/artifact" "github.com/apache/beam/sdks/v2/go/pkg/beam/provision" @@ -120,7 +121,7 @@ func main() { } defer f.Close() f.WriteString("{\n") - f.WriteString(" \"name\": \"beam-worker\",\n \"version\": \"1.0\",\n") + f.WriteString(" \"name\": \"beam-worker\",\n \"version\": \"1.0.0\",\n") f.WriteString(" \"overrides\": {\n") needsComma := false for pkg, path := range npmOverrides { @@ -158,7 +159,6 @@ func main() { args := []string{ entrypoint, - "--id=" + *id, "--logging_endpoint=" + *loggingEndpoint, "--control_endpoint=" + *controlEndpoint, "--semi_persist_dir=" + *semiPersistDir, @@ -169,5 +169,15 @@ func main() { args = append(args, "--status_endpoint="+info.GetStatusEndpoint().GetUrl()) } - log.Fatalf("User program exited: %v", execx.Execute("npx", args...)) + workerIds := append([]string{*id}, info.GetSiblingWorkerIds()...) + var wg sync.WaitGroup + wg.Add(len(workerIds)) + for _, workerId := range workerIds { + go func(workerId string) { + workerArgs := append(append([]string{}, args...), "--id="+workerId) + log.Printf("Executing: npx %v", strings.Join(workerArgs, " ")) + log.Fatalf("User program exited: %v", execx.Execute("npx", workerArgs...)) + }(workerId) + } + wg.Wait() } diff --git a/sdks/typescript/container/build.gradle b/sdks/typescript/container/build.gradle new file mode 100644 index 0000000000000..2978d23ab6f58 --- /dev/null +++ b/sdks/typescript/container/build.gradle @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +plugins { id 'org.apache.beam.module' } +applyDockerNature() +applyGoNature() + +description = "Apache Beam :: SDKs :: Typescript :: Container" + +configurations { + packTarball +} + +dependencies { + packTarball project(path: ":sdks:typescript", configuration: "packTarball") +} + +goBuild { + goTargets = '*.go' // only build the immediate directory. + outputLocation = './build/target/launcher/${GOOS}_${GOARCH}/boot' +} + +def copyDockerfileDependencies = tasks.register("copyDockerfileDependencies", Copy) { + from configurations.packTarball + into "build/target" + if(configurations.packTarball.isEmpty()) { + throw new StopExecutionException("Empty sdk tarball."); + } +} + +docker { + name containerImageName( + name: project.docker_image_default_repo_prefix + "typescript_sdk", + root: project.rootProject.hasProperty(["docker-repository-root"]) ? + project.rootProject["docker-repository-root"] : + project.docker_image_default_repo_root, + tag: project.rootProject.hasProperty(["docker-tag"]) ? + project.rootProject["docker-tag"] : project.sdk_version) + // tags used by dockerTag task + tags containerImageTags() + files "./build" +} + +dockerPrepare.dependsOn goBuild +dockerPrepare.dependsOn copyDockerfileDependencies + +if (project.rootProject.hasProperty(["docker-pull-licenses"])) { + def copyGolangLicenses = tasks.register("copyGolangLicenses", Copy) { + from "${project(':release:go-licenses:py').buildDir}/output" + into "build/target/go-licenses" + dependsOn ':release:go-licenses:py:createLicenses' + } + dockerPrepare.dependsOn copyGolangLicenses +} else { + def skipPullLicenses = tasks.register("skipPullLicenses", Exec) { + executable "sh" + args "-c", "mkdir -p build/target/go-licenses" + } + dockerPrepare.dependsOn skipPullLicenses +} + +task pushAll { + dependsOn ":sdks:typescript:container:dockerPush" +} diff --git a/sdks/typescript/package-lock.json b/sdks/typescript/package-lock.json index f1a5a93a3f55a..a8a15b33f092e 100644 --- a/sdks/typescript/package-lock.json +++ b/sdks/typescript/package-lock.json @@ -1,12 +1,12 @@ { - "name": "apache_beam", - "version": "0.38.0", + "name": "apache-beam", + "version": "2.45.0.dev", "lockfileVersion": 2, "requires": true, "packages": { "": { - "name": "apache_beam", - "version": "0.38.0", + "name": "apache-beam", + "version": "2.45.0.dev", "dependencies": { "@google-cloud/pubsub": "^2.19.4", "@grpc/grpc-js": "^1.4.6", @@ -26,6 +26,9 @@ "ttypescript": "^1.5.13", "uuid": "^8.3.2" }, + "bin": { + "apache-beam-worker": "dist/src/apache_beam/worker/worker_main.js" + }, "devDependencies": { "@google-cloud/bigquery": "^5.12.0", "@types/mocha": "^9.0.0", diff --git a/sdks/typescript/package.json b/sdks/typescript/package.json index a09c141f61139..53baca78e0dad 100644 --- a/sdks/typescript/package.json +++ b/sdks/typescript/package.json @@ -1,6 +1,6 @@ { "name": "apache-beam", - "version": "0.38.0", + "version": "2.45.0-SNAPSHOT", "devDependencies": { "@google-cloud/bigquery": "^5.12.0", "@types/mocha": "^9.0.0", @@ -53,6 +53,8 @@ "main": "./dist/src/apache_beam/index.js", "exports": { ".": "./dist/src/apache_beam/index.js", - "./": "./dist/src/apache_beam/" + "./transforms": "./dist/src/apache_beam/transforms/index.js", + "./runners": "./dist/src/apache_beam/index.js", + "./*": "./dist/src/apache_beam/*.js" } } diff --git a/sdks/typescript/src/apache_beam/internal/environments.ts b/sdks/typescript/src/apache_beam/internal/environments.ts index 4ffc455539726..4e45800839ea5 100644 --- a/sdks/typescript/src/apache_beam/internal/environments.ts +++ b/sdks/typescript/src/apache_beam/internal/environments.ts @@ -21,8 +21,12 @@ import * as runnerApi from "../proto/beam_runner_api"; export const TYPESCRIPT_DEFAULT_ENVIRONMENT_URN = "js_default"; function javascriptCapabilities(): string[] { - // XXX This is needed for sessions to work... - return ["beam:coder:interval_window:v1"]; // TODO: Cleanup. Actually populate. + // TODO: Cleanup. Actually populate. + return [ + // This is needed for sessions to work... + "beam:coder:interval_window:v1", + "beam:protocol:sibling_workers:v1", + ]; } export function defaultJsEnvironment() { diff --git a/sdks/typescript/src/apache_beam/runners/dataflow.ts b/sdks/typescript/src/apache_beam/runners/dataflow.ts index 1bdd250cb1e6f..950e630d82d9a 100644 --- a/sdks/typescript/src/apache_beam/runners/dataflow.ts +++ b/sdks/typescript/src/apache_beam/runners/dataflow.ts @@ -32,13 +32,15 @@ export function dataflowRunner(runnerOptions: { pipeline: Pipeline, options: Object = {} ): Promise { + var augmentedOptions = { experiments: [] as string[], ...options }; + augmentedOptions.experiments.push("use_sibling_sdk_workers"); return new PortableRunner( runnerOptions as any, PythonService.forModule( "apache_beam.runners.dataflow.dataflow_job_service", ["--port", "{{PORT}}"] ) - ).runPipeline(pipeline, options); + ).runPipeline(pipeline, augmentedOptions); } })(); } diff --git a/sdks/typescript/src/apache_beam/runners/flink.ts b/sdks/typescript/src/apache_beam/runners/flink.ts index 878112162fdcf..db6c7e8b0a1f8 100644 --- a/sdks/typescript/src/apache_beam/runners/flink.ts +++ b/sdks/typescript/src/apache_beam/runners/flink.ts @@ -61,7 +61,7 @@ export function flinkRunner(runnerOptions: Object = {}): Runner { const jobServerJar = allOptions.flinkJobServerJar || (await JavaJarService.cachedJar( - JavaJarService.gradleToJar( + await JavaJarService.gradleToJar( `runners:flink:${allOptions.flinkVersion}:job-server:shadowJar` ) )); diff --git a/sdks/typescript/src/apache_beam/runners/portable_runner/runner.ts b/sdks/typescript/src/apache_beam/runners/portable_runner/runner.ts index 3220852ac7401..9cfb27957b0c5 100644 --- a/sdks/typescript/src/apache_beam/runners/portable_runner/runner.ts +++ b/sdks/typescript/src/apache_beam/runners/portable_runner/runner.ts @@ -19,6 +19,7 @@ const childProcess = require("child_process"); const crypto = require("crypto"); const fs = require("fs"); +const os = require("os"); const path = require("path"); import { ChannelCredentials } from "@grpc/grpc-js"; @@ -42,6 +43,7 @@ import * as artifacts from "../artifacts"; import { Service as JobService } from "../../utils/service"; import * as serialization from "../../serialization"; +import { version } from "../../version"; const TERMINAL_STATES = [ JobState_Enum.DONE, @@ -51,6 +53,10 @@ const TERMINAL_STATES = [ JobState_Enum.DRAINED, ]; +// TODO(robertwb): Change this to docker.io/apache/beam_typescript_sdk +// once we push images there. +const DOCKER_BASE = "gcr.io/apache-beam-testing/beam_typescript_sdk"; + type completionCallback = (terminalState: JobStateEvent) => Promise; class PortableRunnerPipelineResult extends PipelineResult { @@ -229,20 +235,27 @@ export class PortableRunner extends Runner { environments.asDockerEnvironment( env, (options as any)?.sdkContainerImage || - "gcr.io/apache-beam-testing/beam_typescript_sdk:dev" + DOCKER_BASE + ":" + version.replace("-SNAPSHOT", ".dev") ); const deps = pipeline.components!.environments[envId].dependencies; // Package up this code as a dependency. - const result = childProcess.spawnSync("npm", ["pack"], { - encoding: "latin1", - }); + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "beam-pack-")); + const result = childProcess.spawnSync( + "npm", + ["pack", "--pack-destination", tmpDir], + { + encoding: "latin1", + } + ); if (result.status === 0) { console.debug(result.stdout); } else { throw new Error(result.output); } - const packFile = path.resolve(result.stdout.trim()); + const packFile = path.resolve( + path.join(tmpDir, result.stdout.trim()) + ); deps.push(fileArtifact(packFile, "beam:artifact:type:npm:v1")); // If any dependencies are files, package them up as well. diff --git a/sdks/typescript/src/apache_beam/utils/service.ts b/sdks/typescript/src/apache_beam/utils/service.ts index 8be90616a0653..b53dbd626fb8a 100644 --- a/sdks/typescript/src/apache_beam/utils/service.ts +++ b/sdks/typescript/src/apache_beam/utils/service.ts @@ -216,7 +216,7 @@ export function serviceProviderFromJavaGradleTarget( } } else { jar = await JavaJarService.cachedJar( - JavaJarService.gradleToJar(gradleTarget) + await JavaJarService.gradleToJar(gradleTarget) ); } @@ -281,18 +281,18 @@ export class JavaJarService extends SubprocessService { } } - static gradleToJar( + static async gradleToJar( gradleTarget: string, appendix: string | undefined = undefined, version: string = beamVersion - ): string { + ): Promise { if (version.startsWith("0.")) { // node-ts 0.x corresponds to Beam 2.x. version = "2" + version.substring(1); } const gradlePackage = gradleTarget.match(/^:?(.*):[^:]+:?$/)![1]; const artifactId = "beam-" + gradlePackage.replaceAll(":", "-"); - const projectRoot = getProjectRoot(); + const projectRoot = getBeamProjectRoot(); const localPath = !projectRoot ? undefined : path.join( @@ -302,16 +302,20 @@ export class JavaJarService extends SubprocessService { "libs", JavaJarService.jarName( artifactId, - version.replace(".dev", ""), + version.replace("-SNAPSHOT", ""), "SNAPSHOT", appendix ) ); + if (version.includes("SNAPSHOT") && !projectRoot) { + version = "latest"; + } + if (localPath && fs.existsSync(localPath)) { console.info("Using pre-built snapshot at", localPath); return localPath; - } else if (version.includes(".dev")) { + } else if (version.includes("SNAPSHOT")) { throw new Error( `${localPath} not found. Please build the server with cd ${projectRoot}; ./gradlew ${gradleTarget})` @@ -326,14 +330,37 @@ export class JavaJarService extends SubprocessService { } } - static mavenJarUrl( + static async mavenJarUrl( artifactId: string, version: string, classifier: string | undefined = undefined, appendix: string | undefined = undefined, repo: string = JavaJarService.APACHE_REPOSITORY, groupId: string = JavaJarService.BEAM_GROUP_ID - ): string { + ): Promise { + if (version == "latest") { + const medatadataUrl = [ + repo, + groupId.replaceAll(".", "/"), + artifactId, + "maven-metadata.xml", + ].join("/"); + const metadata = await new Promise((resolve, reject) => { + let data = ""; + https.get(medatadataUrl, (res) => { + res.on("data", (chunk) => { + data += chunk; + }); + res.on("end", () => { + resolve(data); + }); + res.on("error", (e) => { + reject(e); + }); + }); + }); + version = metadata.match(/(.*)<\/latest>/)![1]; + } return [ repo, groupId.replaceAll(".", "/"), @@ -450,9 +477,18 @@ function serviceOverrideFor(name: string): string | undefined { } } -function getProjectRoot(): string | undefined { +function getBeamProjectRoot(): string | undefined { try { - return path.dirname(findGitRoot(__dirname)); + const projectRoot = path.dirname(findGitRoot(__dirname)); + if ( + fs.existsSync( + path.join(projectRoot, "sdks", "typescript", "src", "apache_beam") + ) + ) { + return projectRoot; + } else { + return undefined; + } } catch (Error) { return undefined; } diff --git a/sdks/typescript/src/apache_beam/worker/data.ts b/sdks/typescript/src/apache_beam/worker/data.ts index a4b2c3f732362..53a98ac4cc16d 100644 --- a/sdks/typescript/src/apache_beam/worker/data.ts +++ b/sdks/typescript/src/apache_beam/worker/data.ts @@ -45,7 +45,6 @@ export class MultiplexingDataChannel { ); this.dataChannel = this.dataClient.data(metadata); this.dataChannel.on("data", async (elements) => { - console.debug("data", elements); for (const data of elements.data) { const consumer = this.getConsumer(data.instructionId, data.transformId); try { diff --git a/sdks/typescript/src/apache_beam/worker/operators.ts b/sdks/typescript/src/apache_beam/worker/operators.ts index a4e7724cb2da5..6338fc0dab58c 100644 --- a/sdks/typescript/src/apache_beam/worker/operators.ts +++ b/sdks/typescript/src/apache_beam/worker/operators.ts @@ -270,7 +270,6 @@ export class DataSourceOperator implements IOperator { this.lastToProcessElement < Infinity ? this.lastToProcessElement : Number(desiredSplit.estimatedInputElements) - 1; - console.log(this.lastToProcessElement, this.lastProcessedElement, end); if (this.lastProcessedElement >= end) { return undefined; } diff --git a/sdks/typescript/src/apache_beam/worker/worker_main.ts b/sdks/typescript/src/apache_beam/worker/worker_main.ts index c457e36fd3370..e70e056abb631 100644 --- a/sdks/typescript/src/apache_beam/worker/worker_main.ts +++ b/sdks/typescript/src/apache_beam/worker/worker_main.ts @@ -41,14 +41,26 @@ async function main() { options["beam:option:registered_node_modules:v1"] || options["registered_node_modules"] || [] - ).forEach(require); + ).forEach((m) => { + try { + require(m); + } catch (error) { + console.error( + `**ERROR** + Unable to require module '${m}' used in requireForSerialization: + please ensure that it is available in the package exports.` + ); + // Explicitly exit the process to avoid the error getting swallowed + // by a long traceback. + process.exit(1); + } + }); console.info("Starting worker", argv.id); const worker = new Worker( argv.id, { controlUrl: argv.control_endpoint, - //loggingUrl: argv.logging_endpoint, }, options ); diff --git a/settings.gradle.kts b/settings.gradle.kts index a9f4de327ca35..0829c523843ba 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -54,6 +54,7 @@ include(":examples:java") include(":examples:java:twitter") include(":examples:java:cdap") include(":examples:java:cdap:hubspot") +include(":examples:java:cdap:salesforce") include(":examples:java:cdap:servicenow") include(":examples:java:cdap:zendesk") include(":examples:kotlin") @@ -65,6 +66,7 @@ include(":playground") include(":playground:backend") include(":playground:frontend") include(":playground:frontend:playground_components") +include(":playground:frontend:playground_components:tools:extract_symbols_java") include(":playground:backend:containers") include(":playground:backend:containers:java") include(":playground:backend:containers:go") @@ -129,6 +131,7 @@ include(":sdks:java:core:jmh") include(":sdks:java:expansion-service") include(":sdks:java:expansion-service:app") include(":sdks:java:extensions:arrow") +include(":sdks:java:extensions:avro") include(":sdks:java:extensions:euphoria") include(":sdks:java:extensions:kryo") include(":sdks:java:extensions:google-cloud-platform-core") @@ -250,6 +253,8 @@ include(":sdks:python:test-suites:tox:py37") include(":sdks:python:test-suites:tox:py38") include(":sdks:python:test-suites:tox:py39") include(":sdks:python:test-suites:tox:py310") +include(":sdks:typescript") +include(":sdks:typescript:container") include(":vendor:bytebuddy-1_12_8") include(":vendor:grpc-1_48_1") include(":vendor:calcite-1_28_0") diff --git a/website/www/check-links.sh b/website/www/check-links.sh index 8599ecd48bf4c..c724df271373a 100755 --- a/website/www/check-links.sh +++ b/website/www/check-links.sh @@ -34,6 +34,38 @@ function redraw_progress_bar { # int barsize, int base, int current, int top echo -n "] $current / $top " $'\r' } +function check_absolute_links { + local env="$1" + local exception_link="$2" + local -n env_exception_links="$3" + + echo "Checking $env links." + for external_link in "${external_links[@]}"; + do + processed_external_link=`awk -F/ '{print $3}' <<<"$external_link"` + if [[ "$processed_external_link" == "$exception_link" ]]; + then + checked_word=`awk -F/ '{print $4}' <<<"$external_link"` + if [[ $checked_word != "releases" ]]; then + env_exception_links+=("${external_link}") + echo "${external_link}" + fi + fi + done +} + +function report_absolute_links { + local -n env_exception_links="$1" + local env="$2" + + if [[ ${#env_exception_links[@]} -ne 0 ]]; then + echo "Found ${#env_exception_links[@]} link(s) leading to $env site. Recommended to use relative links to Apache Beam website. Absolute links to Apache Beam $env website:" + printf '%s\n' ${env_exception_links[@]} + else + echo "No absolute $env links" + fi +} + if ! command -v lynx; then echo "This script requires lynx to work properly." echo @@ -67,25 +99,54 @@ mapfile -t external_links < <(printf '%s\n' "${links[@]}" | grep "^https\?://" | echo "Found ${#links[@]} links including ${#external_links[@]} unique external links." echo "Checking links." +prod_exception_links=() +staging_exception_links=() +check_absolute_links "production" "beam.apache.org" prod_exception_links +check_absolute_links "staging" "apache-beam-website-pull-requests.storage.googleapis.com" staging_exception_links + +echo "Checking working links." +verified_list="https://reporter.apache.org/addrelease.html?beam,https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork,https://help.github.com/articles/securing-your-account-with-two-factor-authentication-2fa/,https://qwiklabs.com/focuses/608?locale=en&parent=catalog,https://www.artstation.com/jbruno,https://www.qwiklabs.com/focuses/1098?parent=catalog,https://www.infoworld.com/article/3336072/infoworlds-2019-technology-of-the-year-award-winners.html,https://be.linkedin.com/in/mattcasters,https://www.linkedin.com/company/beam-summit/,https://www.linkedin.com/company/beam-summit/?viewAsMember=true,https://www.ricardo.ch/,https://www.linkedin.com/company/apache-beam/,https://repository.apache.org/content/repositories/orgapachebeam-NNNN/),https://www.meetup.com/Apache-Beam-Stockholm/?_cookie-check=v_YHSSjYcT9rpm61,https://www.meetup.com/Apache-Beam-Stockholm/events/260634514" invalid_links=() -i=1 -for external_link in "${external_links[@]}" -do - redraw_progress_bar 50 1 $i ${#external_links[@]} - if ! curl -sSfL --max-filesize 1000000 --max-time 10 --connect-timeout 10 --retry 2 -4 "${external_link}" > /dev/null ; then - invalid_links+=("${external_link}") - echo "${external_link}" - fi - i=$((i+1)) -done -# Clear line - hide progress bar -echo -n -e "\033[2K" +function handle_urls { + i=1 + for external_link in "${external_links[@]}" + do + redraw_progress_bar 50 1 $i ${#external_links[@]} + + curl_result=$(curl -sSfL --max-filesize 1000000 --max-time 10 --connect-timeout 10 --retry 2 -4 "$external_link" 2>&1 > /dev/null) && status=$? || status=$? + if [ $status -ne 0 ] ; then + if [[ $curl_result =~ (error: )([0-9]{3}) ]]; then + error_code=${BASH_REMATCH[0]} + + # Check if link is in verified_list + if [[ $verified_list =~ "$external_link" ]]; then + continue + fi + + invalid_links+=("${error_code} ${external_link}") + echo "${external_link}" + fi + fi + i=$((i+1)) + done + # Clear line - hide progress bar + echo -n -e "\033[2K" +} + +handle_urls + +report_absolute_links prod_exception_links "production" +report_absolute_links staging_exception_links "staging" +# Sort invalid links by error status +IFS=$'\n' +sorted_invalid_links=($(sort <<<"${invalid_links[*]}")); +unset IFS -if [[ ${#invalid_links[@]} -ne 0 ]]; then - echo "Found ${#invalid_links[@]} invalid links: " - printf '%s\n' "${invalid_links[@]}" +if [[ ${#sorted_invalid_links[@]} -ne 0 ]]; then + echo "Found ${#sorted_invalid_links[@]} invalid links: " + printf '%s\n' "${sorted_invalid_links[@]}" else echo "All links work" fi diff --git a/website/www/site/content/en/blog/ApachePlayground.md b/website/www/site/content/en/blog/ApachePlayground.md index 1e29481abf4fa..bc12aed5def74 100644 --- a/website/www/site/content/en/blog/ApachePlayground.md +++ b/website/www/site/content/en/blog/ApachePlayground.md @@ -35,13 +35,13 @@ limitations under the License. * Displays pipeline execution graph (DAG) * Code editor to modify examples or try your own custom pipeline with a Direct Runner * Code editor with code highlighting, flexible layout, color schemes, and other features to provide responsive UX in desktop browsers -* Embedding a Playground example on a web page prompts the web page readers to try the example pipeline in the Playground - e.g., [Playground Quickstart](https://beam.apache.org/get-started/try-beam-playground/) page +* Embedding a Playground example on a web page prompts the web page readers to try the example pipeline in the Playground - e.g., [Playground Quickstart](/get-started/try-beam-playground/) page ### **What’s Next** * Try examples in [Apache Beam Playground](https://play.beam.apache.org/) * Submit your feedback using “Enjoying Playground?” in Apache Beam Playground or via [this form](https://docs.google.com/forms/d/e/1FAIpQLSd5_5XeOwwW2yjEVHUXmiBad8Lxk-4OtNcgG45pbyAZzd4EbA/viewform?usp=pp_url) -* Join the Beam [users@](https://beam.apache.org/community/contact-us) mailing list -* Contribute to the Apache Beam Playground codebase by following a few steps in this [Contribution Guide](https://beam.apache.org/contribute) +* Join the Beam [users@](/community/contact-us) mailing list +* Contribute to the Apache Beam Playground codebase by following a few steps in this [Contribution Guide](/contribute) -Please [reach out](https://beam.apache.org/community/contact-us) if you have any feedback or encounter any issues! +Please [reach out](/community/contact-us) if you have any feedback or encounter any issues! diff --git a/website/www/site/content/en/blog/adding-data-sources-to-sql.md b/website/www/site/content/en/blog/adding-data-sources-to-sql.md index 4cfbf05748d15..d0a45d3ec6305 100644 --- a/website/www/site/content/en/blog/adding-data-sources-to-sql.md +++ b/website/www/site/content/en/blog/adding-data-sources-to-sql.md @@ -148,7 +148,7 @@ class GenerateSequenceTable extends BaseBeamTable implements Serializable { Now that we have implemented the two basic classes (a `BaseBeamTable`, and a `TableProvider`), we can start playing with them. After building the -[SQL CLI](https://beam.apache.org/documentation/dsls/sql/shell/), we +[SQL CLI](/documentation/dsls/sql/shell/), we can now perform selections on the table: ``` diff --git a/website/www/site/content/en/blog/beam-2.21.0.md b/website/www/site/content/en/blog/beam-2.21.0.md index c23115a44b9bf..edaaf7b47d419 100644 --- a/website/www/site/content/en/blog/beam-2.21.0.md +++ b/website/www/site/content/en/blog/beam-2.21.0.md @@ -46,9 +46,9 @@ for example usage. for that function. More details can be found in - [Ensuring Python Type Safety](https://beam.apache.org/documentation/sdks/python-type-safety/) + [Ensuring Python Type Safety](/documentation/sdks/python-type-safety/) and the Python SDK Typing Changes - [blog post](https://beam.apache.org/blog/python-typing/). + [blog post](/blog/python-typing/). * Java SDK: Introducing the concept of options in Beam Schema’s. These options add extra context to fields and schemas. This replaces the current Beam metadata that is present diff --git a/website/www/site/content/en/blog/beam-2.25.0.md b/website/www/site/content/en/blog/beam-2.25.0.md index 403eabafc1241..a6e08627e286e 100644 --- a/website/www/site/content/en/blog/beam-2.25.0.md +++ b/website/www/site/content/en/blog/beam-2.25.0.md @@ -39,9 +39,9 @@ For more information on changes in 2.25.0, check out the * Support for repeatable fields in JSON decoder for `ReadFromBigQuery` added. (Python) ([BEAM-10524](https://issues.apache.org/jira/browse/BEAM-10524)) * Added an opt-in, performance-driven runtime type checking system for the Python SDK ([BEAM-10549](https://issues.apache.org/jira/browse/BEAM-10549)). - More details will be in an upcoming [blog post](https://beam.apache.org/blog/python-performance-runtime-type-checking/index.html). + More details will be in an upcoming [blog post](/blog/python-performance-runtime-type-checking/index.html). * Added support for Python 3 type annotations on PTransforms using typed PCollections ([BEAM-10258](https://issues.apache.org/jira/browse/BEAM-10258)). - More details will be in an upcoming [blog post](https://beam.apache.org/blog/python-improved-annotations/index.html). + More details will be in an upcoming [blog post](/blog/python-improved-annotations/index.html). * Improved the Interactive Beam API where recording streaming jobs now start a long running background recording job. Running ib.show() or ib.collect() samples from the recording ([BEAM-10603](https://issues.apache.org/jira/browse/BEAM-10603)). * In Interactive Beam, ib.show() and ib.collect() now have "n" and "duration" as parameters. These mean read only up to "n" elements and up to "duration" seconds of data read from the recording ([BEAM-10603](https://issues.apache.org/jira/browse/BEAM-10603)). * Initial preview of [Dataframes](https://s.apache.org/simpler-python-pipelines-2020#slide=id.g905ac9257b_1_21) support. diff --git a/website/www/site/content/en/blog/beam-2.32.0.md b/website/www/site/content/en/blog/beam-2.32.0.md index da252b6694489..7c6d297cc5a44 100644 --- a/website/www/site/content/en/blog/beam-2.32.0.md +++ b/website/www/site/content/en/blog/beam-2.32.0.md @@ -46,9 +46,9 @@ For more information on changes in 2.32.0, check out the [detailed release notes ## Highlights * The [Beam DataFrame - API](https://beam.apache.org/documentation/dsls/dataframes/overview/) is no + API](/documentation/dsls/dataframes/overview/) is no longer experimental! We've spent the time since the [2.26.0 preview - announcement](https://beam.apache.org/blog/dataframe-api-preview-available/) + announcement](/blog/dataframe-api-preview-available/) implementing the most frequently used pandas operations ([BEAM-9547](https://issues.apache.org/jira/browse/BEAM-9547)), improving [documentation](https://beam.apache.org/releases/pydoc/current/apache_beam.dataframe.html) @@ -62,7 +62,7 @@ For more information on changes in 2.32.0, check out the [detailed release notes Leaving experimental just means that we now have high confidence in the API and recommend its use for production workloads. We will continue to improve the API, guided by your - [feedback](https://beam.apache.org/community/contact-us/). + [feedback](/community/contact-us/). ## I/Os diff --git a/website/www/site/content/en/blog/beam-2.38.0.md b/website/www/site/content/en/blog/beam-2.38.0.md index 4075b981a1e65..d59f4673abd12 100644 --- a/website/www/site/content/en/blog/beam-2.38.0.md +++ b/website/www/site/content/en/blog/beam-2.38.0.md @@ -29,7 +29,7 @@ See the [download page](/get-started/downloads/#2380-2022-04-20) for this releas For more information on changes in 2.38.0 check out the [detailed release notes](https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=12319527&version=12351169). ## I/Os -* Introduce projection pushdown optimizer to the Java SDK ([BEAM-12976](https://issues.apache.org/jira/browse/BEAM-12976)). The optimizer currently only works on the [BigQuery Storage API](https://beam.apache.org/documentation/io/built-in/google-bigquery/#storage-api), but more I/Os will be added in future releases. If you encounter a bug with the optimizer, please file a JIRA and disable the optimizer using pipeline option `--experiments=disable_projection_pushdown`. +* Introduce projection pushdown optimizer to the Java SDK ([BEAM-12976](https://issues.apache.org/jira/browse/BEAM-12976)). The optimizer currently only works on the [BigQuery Storage API](/documentation/io/built-in/google-bigquery/#storage-api), but more I/Os will be added in future releases. If you encounter a bug with the optimizer, please file a JIRA and disable the optimizer using pipeline option `--experiments=disable_projection_pushdown`. * A new IO for Neo4j graph databases was added. ([BEAM-1857](https://issues.apache.org/jira/browse/BEAM-1857)) It has the ability to update nodes and relationships using UNWIND statements and to read data using cypher statements with parameters. * `amazon-web-services2` has reached feature parity and is finally recommended over the earlier `amazon-web-services` and `kinesis` modules (Java). These will be deprecated in one of the next releases ([BEAM-13174](https://issues.apache.org/jira/browse/BEAM-13174)). * Long outstanding write support for `Kinesis` was added ([BEAM-13175](https://issues.apache.org/jira/browse/BEAM-13175)). diff --git a/website/www/site/content/en/blog/beam-2.42.0.md b/website/www/site/content/en/blog/beam-2.42.0.md index 08b749621179b..6d7499df5cc43 100644 --- a/website/www/site/content/en/blog/beam-2.42.0.md +++ b/website/www/site/content/en/blog/beam-2.42.0.md @@ -32,7 +32,7 @@ For more information on changes in 2.42.0, check out the [detailed release notes * Added support for stateful DoFns to the Go SDK. * Added support for [Batched - DoFns](https://beam.apache.org/documentation/programming-guide/#batched-dofns) + DoFns](/documentation/programming-guide/#batched-dofns) to the Python SDK. ## New Features / Improvements diff --git a/website/www/site/content/en/blog/beam-2.43.0.md b/website/www/site/content/en/blog/beam-2.43.0.md index 83c3bda293e5f..227538390f2f6 100644 --- a/website/www/site/content/en/blog/beam-2.43.0.md +++ b/website/www/site/content/en/blog/beam-2.43.0.md @@ -37,7 +37,7 @@ For more information on changes in 2.43.0, check out the [detailed release notes * Decreased TextSource CPU utilization by 2.3x (Java) ([#23193](https://github.com/apache/beam/issues/23193)). * Fixed bug when using SpannerIO with RuntimeValueProvider options (Java) ([#22146](https://github.com/apache/beam/issues/22146)). -* Fixed issue for unicode rendering on WriteToBigQuery ([#10785](https://github.com/apache/beam/issues/10785)) +* Fixed issue for unicode rendering on WriteToBigQuery ([#22312](https://github.com/apache/beam/issues/22312)) * Remove obsolete variants of BigQuery Read and Write, always using Beam-native variant ([#23564](https://github.com/apache/beam/issues/23564) and [#23559](https://github.com/apache/beam/issues/23559)). * Bumped google-cloud-spanner dependency version to 3.x for Python SDK ([#21198](https://github.com/apache/beam/issues/21198)). diff --git a/website/www/site/content/en/blog/beam-2.8.0.md b/website/www/site/content/en/blog/beam-2.8.0.md index b2c7163609f65..cd2e91889007d 100644 --- a/website/www/site/content/en/blog/beam-2.8.0.md +++ b/website/www/site/content/en/blog/beam-2.8.0.md @@ -50,7 +50,7 @@ For more information on changes in 2.8.0, check out the ### Portability -* [Python on Flink MVP](https://beam.apache.org/roadmap/portability/#python-on-flink) completed. +* [Python on Flink MVP](/roadmap/portability/#python-on-flink) completed. ### I/Os diff --git a/website/www/site/content/en/blog/beam-katas-kotlin-release.md b/website/www/site/content/en/blog/beam-katas-kotlin-release.md index c0faef3c72092..1f84f96bc58af 100644 --- a/website/www/site/content/en/blog/beam-katas-kotlin-release.md +++ b/website/www/site/content/en/blog/beam-katas-kotlin-release.md @@ -29,7 +29,7 @@ Today, we are happy to announce a new addition to the Beam Katas family: Kotlin! Apache Beam and Kotlin Shaking Hands -You may remember [a post from last year](https://beam.apache.org/blog/beam-kata-release) that informed everyone of the wonderful Beam Katas available on [Stepik](https://stepik.org) +You may remember [a post from last year](/blog/beam-kata-release) that informed everyone of the wonderful Beam Katas available on [Stepik](https://stepik.org) for learning more about writing Apache Beam applications, working with its various APIs and programming model hands-on, all from the comfort of your favorite IDEs. As of today, you can now work through all of the progressive exercises to learn about the fundamentals of Beam in Kotlin. @@ -41,7 +41,7 @@ as one of the most beloved programming languages in the annual Stack Overflow De just our word for it. The relationship between Apache Beam and Kotlin isn't a new one. You can find examples scattered across the web -of engineering teams embracing the two technologies including [a series of samples announced on this very blog](https://beam.apache.org/blog/beam-kotlin/). +of engineering teams embracing the two technologies including [a series of samples announced on this very blog](/blog/beam-kotlin/). If you are new to Beam or are an experienced veteran looking for a change of pace, we'd encourage you to give Kotlin a try. diff --git a/website/www/site/content/en/blog/beam-sql-with-notebooks.md b/website/www/site/content/en/blog/beam-sql-with-notebooks.md index 872a2a3004df2..4f7c428613a12 100644 --- a/website/www/site/content/en/blog/beam-sql-with-notebooks.md +++ b/website/www/site/content/en/blog/beam-sql-with-notebooks.md @@ -22,7 +22,7 @@ limitations under the License. ## Intro -[Beam SQL](https://beam.apache.org/documentation/dsls/sql/overview/) allows a +[Beam SQL](/documentation/dsls/sql/overview/) allows a Beam user to query PCollections with SQL statements. [Interactive Beam](https://github.com/apache/beam/tree/master/sdks/python/apache_beam/runners/interactive#interactive-beam) provides an integration between Apache Beam and @@ -174,7 +174,7 @@ element_type like `BeamSchema_...(id: int32, str: str, flt: float64)`. PCollection because the `beam_sql` magic always implicitly creates a pipeline to execute your SQL query. To hold the elements with each field's type info, Beam automatically creates a -[schema](https://beam.apache.org/documentation/programming-guide/#what-is-a-schema) +[schema](/documentation/programming-guide/#what-is-a-schema) as the `element_type` for the created PCollection. You will learn more about schema-aware PCollections later. @@ -221,7 +221,7 @@ always check the content of a PCollection by invoking `ib.show(pcoll_name)` or The `beam_sql` magic provides the flexibility to seamlessly mix SQL and non-SQL Beam statements to build pipelines and even run them on Dataflow. However, each PCollection queried by Beam SQL needs to have a -[schema](https://beam.apache.org/documentation/programming-guide/#what-is-a-schema). +[schema](/documentation/programming-guide/#what-is-a-schema). For the `beam_sql` magic, it’s recommended to use `typing.NamedTuple` when a schema is desired. You can go through the below example to learn more details about schema-aware PCollections. @@ -788,7 +788,7 @@ you to learn Beam SQL and mix Beam SQL into prototyping and productionizing ( e.g., to Dataflow) your Beam pipelines with minimum setups. For more details about the Beam SQL syntax, check out the Beam Calcite SQL -[compatibility](https://beam.apache.org/documentation/dsls/sql/calcite/overview/) +[compatibility](/documentation/dsls/sql/calcite/overview/) and the Apache Calcite SQL [syntax](https://calcite.apache.org/docs/reference.html). diff --git a/website/www/site/content/en/blog/beam-starter-projects.md b/website/www/site/content/en/blog/beam-starter-projects.md index a1b7e995c97f9..9606cd94c8eba 100644 --- a/website/www/site/content/en/blog/beam-starter-projects.md +++ b/website/www/site/content/en/blog/beam-starter-projects.md @@ -72,6 +72,6 @@ Here are the starter projects; you can choose your favorite language: * **[Kotlin]** [github.com/apache/beam-starter-kotlin](https://github.com/apache/beam-starter-kotlin) – Adapted to idiomatic Kotlin * **[Scala]** [github.com/apache/beam-starter-scala](https://github.com/apache/beam-starter-scala) – Coming soon! -We have updated the [Java quickstart](https://beam.apache.org/get-started/quickstart/java/) to use the new starter project, and we're working on updating the Python and Go quickstarts as well. +We have updated the [Java quickstart](/get-started/quickstart/java/) to use the new starter project, and we're working on updating the Python and Go quickstarts as well. We hope you find this useful. Feedback and contributions are always welcome! So feel free to create a GitHub issue, or open a Pull Request to any of the starter project repositories. diff --git a/website/www/site/content/en/blog/beam-summit-europe-2019.md b/website/www/site/content/en/blog/beam-summit-europe-2019.md index 7f5351f9efd20..fcd630c1f258c 100644 --- a/website/www/site/content/en/blog/beam-summit-europe-2019.md +++ b/website/www/site/content/en/blog/beam-summit-europe-2019.md @@ -54,7 +54,7 @@ and [Stockholm](https://www.meetup.com/Apache-Beam-Stockholm/events/260634514) h Keep an eye out for a meetup in [Paris](https://www.meetup.com/Paris-Apache-Beam-Meetup). -If you are interested in starting your own meetup, feel free [to reach out](https://beam.apache.org/community/contact-us)! Good places to start include our Slack channel, the dev and user mailing lists, or the Apache Beam Twitter. +If you are interested in starting your own meetup, feel free [to reach out](/community/contact-us)! Good places to start include our Slack channel, the dev and user mailing lists, or the Apache Beam Twitter. Even if you can’t travel to these meetups, you can stay informed on the happenings of the community. The talks and sessions from previous conferences and meetups are archived on the [Apache Beam YouTube channel](https://www.youtube.com/c/ApacheBeamYT). If you want your session added to the channel, don’t hesitate to get in touch! @@ -63,7 +63,7 @@ The first summit of the year will be held in Berlin: Beam Summit Europe Banner -You can find more info on the [website](https://beamsummit.org) and read about the inaugural edition of the Beam Summit Europe [here](https://beam.apache.org/blog/2018/10/31/beam-summit-aftermath.html). At these summits, you have the opportunity to meet with other Apache Beam creators and users, get expert advice, learn from the speaker sessions, and participate in workshops. +You can find more info on the [website](https://beamsummit.org) and read about the inaugural edition of the Beam Summit Europe [here](/blog/2018/10/31/beam-summit-aftermath.html). At these summits, you have the opportunity to meet with other Apache Beam creators and users, get expert advice, learn from the speaker sessions, and participate in workshops. We strongly encourage you to get involved again this year! You can participate in the following ways for the upcoming summit in Europe: diff --git a/website/www/site/content/en/blog/dataframe-api-preview-available.md b/website/www/site/content/en/blog/dataframe-api-preview-available.md index 81ffc5fe6a17e..a48f02ff82e31 100644 --- a/website/www/site/content/en/blog/dataframe-api-preview-available.md +++ b/website/www/site/content/en/blog/dataframe-api-preview-available.md @@ -23,7 +23,7 @@ limitations under the License. We're excited to announce that a preview of the Beam Python SDK's new DataFrame API is now available in [Beam -2.26.0](https://beam.apache.org/blog/beam-2.26.0/). Much like `SqlTransform` +2.26.0](/blog/beam-2.26.0/). Much like `SqlTransform` ([Java](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/extensions/sql/SqlTransform.html), [Python](https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.sql.html#apache_beam.transforms.sql.SqlTransform)), the DataFrame API gives Beam users a way to express complex @@ -76,7 +76,7 @@ as much as possible. ## DataFrames as a DSL You may already be aware of [Beam -SQL](https://beam.apache.org/documentation/dsls/sql/overview/), which is +SQL](/documentation/dsls/sql/overview/), which is a Domain-Specific Language (DSL) built with Beam's Java SDK. SQL is considered a DSL because it's possible to express a full pipeline, including IOs and complex operations, entirely with SQL.  @@ -91,7 +91,7 @@ implementations (`pd.read_{csv,parquet,...}` and `pd.DataFrame.to_{csv,parquet,. Like SQL, it's also possible to embed the DataFrame API into a larger pipeline by using -[schemas](https://beam.apache.org/documentation/programming-guide/#what-is-a-schema). +[schemas](/documentation/programming-guide/#what-is-a-schema). A schema-aware PCollection can be converted to a DataFrame, processed, and the result converted back to another schema-aware PCollection. For example, if you wanted to use traditional Beam IOs rather than one of the DataFrame IOs you diff --git a/website/www/site/content/en/blog/go-2.40.md b/website/www/site/content/en/blog/go-2.40.md index 977790019d796..d2615b6b7c603 100644 --- a/website/www/site/content/en/blog/go-2.40.md +++ b/website/www/site/content/en/blog/go-2.40.md @@ -29,15 +29,15 @@ some of the biggest changes coming with this important release! 2.40 marks the release of one of our most anticipated feature sets yet: native streaming Go pipelines. This includes adding support for: -- [Self Checkpointing](https://beam.apache.org/documentation/programming-guide/#user-initiated-checkpoint) -- [Watermark Estimation](https://beam.apache.org/documentation/programming-guide/#watermark-estimation) -- [Pipeline Drain/Truncation](https://beam.apache.org/documentation/programming-guide/#truncating-during-drain) -- [Bundle Finalization](https://beam.apache.org/documentation/programming-guide/#bundle-finalization) (added in 2.39) +- [Self Checkpointing](/documentation/programming-guide/#user-initiated-checkpoint) +- [Watermark Estimation](/documentation/programming-guide/#watermark-estimation) +- [Pipeline Drain/Truncation](/documentation/programming-guide/#truncating-during-drain) +- [Bundle Finalization](/documentation/programming-guide/#bundle-finalization) (added in 2.39) With all of these features, it is now possible to write your own streaming pipeline source DoFns in Go without relying on cross-language transforms from Java or Python. We encourage you to try out all of these new features -in your streaming pipelines! The [programming guide](https://beam.apache.org/documentation/programming-guide/#splittable-dofns) +in your streaming pipelines! The [programming guide](/documentation/programming-guide/#splittable-dofns) has additional information on getting started with native Go streaming DoFns. # Generic Registration (Make Your Pipelines 3x Faster) @@ -61,7 +61,7 @@ gains, check out the [registration doc page](https://pkg.go.dev/github.com/apach Moving forward, we remain focused on improving the streaming experience and leveraging generics to improve the SDK. Specific improvements we are considering -include adding [State & Timers](https://beam.apache.org/documentation/programming-guide/#state-and-timers) +include adding [State & Timers](/documentation/programming-guide/#state-and-timers) support, introducing a Go expansion service so that Go DoFns can be used in other languages, and wrapping more Java and Python IOs so that they can be easily used in Go. As always, please let us know what changes you would like to see by diff --git a/website/www/site/content/en/blog/gsoc-19.md b/website/www/site/content/en/blog/gsoc-19.md index 4c6c4a431f2cd..5ec7801dfd088 100644 --- a/website/www/site/content/en/blog/gsoc-19.md +++ b/website/www/site/content/en/blog/gsoc-19.md @@ -49,8 +49,8 @@ I wanted to explore Data Engineering, so for GSoC, I wanted to work on a project I had already read the [Streaming Systems book](http://streamingsystems.net/). So, I had an idea of the concepts that Beam is built on, but had never actually used Beam. Before actually submitting a proposal, I went through a bunch of resources to make sure I had a concrete understanding of Beam. I read the [Streaming 101](https://www.oreilly.com/ideas/the-world-beyond-batch-streaming-101) and [Streaming 102](https://www.oreilly.com/ideas/the-world-beyond-batch-streaming-102) blogs by Tyler Akidau. They are the perfect introduction to Beam’s unified model for Batch and Streaming. -In addition, I watched all Beam talks on YouTube. You can find them on the [Beam Website](https://beam.apache.org/get-started/resources/videos-and-podcasts/). -Beam has really good documentation. The [Programming Guide](https://beam.apache.org/documentation/programming-guide/) lays out all of Beam’s concepts really well. [Beam’s execution model](https://beam.apache.org/documentation/runtime/model) is also documented well and is a must-read to understand how Beam processes data. +In addition, I watched all Beam talks on YouTube. You can find them on the [Beam Website](/get-started/resources/videos-and-podcasts/). +Beam has really good documentation. The [Programming Guide](/documentation/programming-guide/) lays out all of Beam’s concepts really well. [Beam’s execution model](/documentation/runtime/model) is also documented well and is a must-read to understand how Beam processes data. [waitingforcode.com](https://www.waitingforcode.com/apache-beam) also has good blog posts about Beam concepts. To get a better sense of the Beam codebase, I played around with it and worked on some PRs to understand Beam better and got familiar with the test suite and workflows. diff --git a/website/www/site/content/en/blog/hop-web-cloud.md b/website/www/site/content/en/blog/hop-web-cloud.md index 34e1aabff7803..8da6fc4a67317 100644 --- a/website/www/site/content/en/blog/hop-web-cloud.md +++ b/website/www/site/content/en/blog/hop-web-cloud.md @@ -22,7 +22,7 @@ limitations under the License. Hop is a codeless visual development environment for Apache Beam pipelines that can run jobs in any Beam runner, such as Dataflow, Flink or Spark. [In a -previous post](https://beam.apache.org/blog/apache-hop-with-dataflow/), we +previous post](/blog/apache-hop-with-dataflow/), we introduced the desktop version of Apache Hop. Hop also has a web environment, Hop Web, that you can run from a container, so you don't have to install anything on your computer to use it. @@ -234,7 +234,7 @@ access your Apache Hop instance. You are now ready to use Apache Hop in a web browser! You can try to replicate the example that was given [in a previous -post](https://beam.apache.org/blog/apache-hop-with-dataflow/) using Hop web, or +post](/blog/apache-hop-with-dataflow/) using Hop web, or just try to launch any other project from the samples included with Hop: ![Sample projects in Hop](/images/blog/hop-web-cloud/hop-web-cloud-image5.png) @@ -300,5 +300,5 @@ nothing, just your favourite web browser. If you followed the instructions in this post, head over to the post [Running Apache Hop visual pipelines with Google Cloud -Dataflow](https://beam.apache.org/blog/apache-hop-with-dataflow/) to run a +Dataflow](/blog/apache-hop-with-dataflow/) to run a Dataflow pipeline right from your web browser! diff --git a/website/www/site/content/en/blog/kafka-to-pubsub-example.md b/website/www/site/content/en/blog/kafka-to-pubsub-example.md index 31972d95ffd4a..df2439e2062d4 100644 --- a/website/www/site/content/en/blog/kafka-to-pubsub-example.md +++ b/website/www/site/content/en/blog/kafka-to-pubsub-example.md @@ -31,8 +31,8 @@ simple yet powerful pipelines and also provides an out-of-the-box solution that plug'n'play"_. This end-to-end example is included -in [Apache Beam release 2.27](https://beam.apache.org/blog/beam-2.27.0/) -and can be downloaded [here](https://beam.apache.org/get-started/downloads/#2270-2020-12-22). +in [Apache Beam release 2.27](/blog/beam-2.27.0/) +and can be downloaded [here](/get-started/downloads/#2270-2020-12-22). We hope you will find this example useful for setting up data pipelines between Kafka and Pub/Sub. @@ -85,5 +85,5 @@ you more understanding on how pipelines work and look like. If you are already u some code samples in it will be useful for your use cases. Please -[let us know](https://beam.apache.org/community/contact-us/) if you encounter any issues. +[let us know](/community/contact-us/) if you encounter any issues. diff --git a/website/www/site/content/en/blog/ml-resources.md b/website/www/site/content/en/blog/ml-resources.md index e4e14b7ba4828..3048fff87f1d2 100644 --- a/website/www/site/content/en/blog/ml-resources.md +++ b/website/www/site/content/en/blog/ml-resources.md @@ -34,10 +34,10 @@ documentation and notebooks to make it easier to use these new features and to show how Beam can be used to solve common Machine Learning problems. We're now happy to present this new and improved Beam ML experience! -To get started, we encourage you to visit Beam's new [AI/ML landing page](https://beam.apache.org/documentation/ml/overview/). -We've got plenty of content on things like [multi-model pipelines](https://beam.apache.org/documentation/ml/multi-model-pipelines/), -[performing inference with metrics](https://beam.apache.org/documentation/ml/runinference-metrics/), -[online training](https://beam.apache.org/documentation/ml/online-clustering/), and much more. +To get started, we encourage you to visit Beam's new [AI/ML landing page](/documentation/ml/overview/). +We've got plenty of content on things like [multi-model pipelines](/documentation/ml/multi-model-pipelines/), +[performing inference with metrics](/documentation/ml/runinference-metrics/), +[online training](/documentation/ml/online-clustering/), and much more. Seznam started migrating their key workloads to Apache Beam. -They decided to merge the [Euphoria API](https://beam.apache.org/documentation/sdks/java/euphoria/) +They decided to merge the [Euphoria API](/documentation/sdks/java/euphoria/) as a high-level DSL for Apache Beam Java SDK. This significant contribution to Apache Beam was a starting point for Seznam’s active participation in the community, later presenting their unique experience and findings at [Beam Summit Europe 2019](https://www.youtube.com/watch?v=ZIFtmx8nBow) @@ -121,8 +121,8 @@ Apache Beam enabled Seznam to execute batch and stream jobs much faster without thus maximizing scalability, performance, and efficiency. Apache Beam offers a variety of ways to distribute skewed data evenly. -[Windowing](https://beam.apache.org/documentation/programming-guide/#windowing) -for processing unbounded and [Partition](https://beam.apache.org/documentation/transforms/java/elementwise/partition/) +[Windowing](/documentation/programming-guide/#windowing) +for processing unbounded and [Partition](/documentation/transforms/java/elementwise/partition/) for bounded data sets transform input into finite collections of elements that can be reshuffled. Apache Beam provides a byte-based shuffle that can be executed by Spark runner or Flink runner, without requiring Apache Spark or Apache Flink to deserialize the full key. @@ -197,7 +197,7 @@ Apache Beam offered a unified model for Seznam’s stream and batch processing t Apache Beam supported multiple runners, language SDKs, and built-in and custom pluggable I/O transforms, thus eliminating the need to invest into the development and support of proprietary runners and solutions. After evaluation, Seznam transitioned their workloads to Apache Beam and integrated -[Euphoria API](https://beam.apache.org/documentation/sdks/java/euphoria/) +[Euphoria API](/documentation/sdks/java/euphoria/) (a fast prototyping framework developed by Seznam), contributing to the Apache Beam open source community. The Apache Beam abstraction and execution model allowed Seznam to robustly scale their data processing. diff --git a/website/www/site/content/en/case-studies/snowflake.md b/website/www/site/content/en/case-studies/snowflake.md index ebcfee1678c17..e9140b3e73561 100644 --- a/website/www/site/content/en/case-studies/snowflake.md +++ b/website/www/site/content/en/case-studies/snowflake.md @@ -2,7 +2,7 @@ title: "Snowflake" icon: /images/logos/powered-by/snowflake.png hasNav: true -hasLink: "https://beam.apache.org/documentation/io/built-in/snowflake/" +hasLink: "/documentation/io/built-in/snowflake/" --- # RunInference -In Apache Beam 2.40.0, Beam introduced the RunInference API, which lets you deploy a machine learning model in a Beam pipeline. A `RunInference` transform performs inference on a `PCollection` of examples using a machine learning (ML) model. The transform outputs a PCollection that contains the input examples and output predictions. For more information, see RunInference [here](https://beam.apache.org/documentation/transforms/python/elementwise/runinference/). You can also find [inference examples on GitHub](https://github.com/apache/beam/tree/master/sdks/python/apache_beam/examples/inference). +In Apache Beam 2.40.0, Beam introduced the RunInference API, which lets you deploy a machine learning model in a Beam pipeline. A `RunInference` transform performs inference on a `PCollection` of examples using a machine learning (ML) model. The transform outputs a PCollection that contains the input examples and output predictions. For more information, see RunInference [here](/documentation/transforms/python/elementwise/runinference/). You can also find [inference examples on GitHub](https://github.com/apache/beam/tree/master/sdks/python/apache_beam/examples/inference). ## Using RunInference with very large models diff --git a/website/www/site/content/en/documentation/ml/multi-model-pipelines.md b/website/www/site/content/en/documentation/ml/multi-model-pipelines.md index 8bc9cd0d416b5..569a51b8db55f 100644 --- a/website/www/site/content/en/documentation/ml/multi-model-pipelines.md +++ b/website/www/site/content/en/documentation/ml/multi-model-pipelines.md @@ -23,7 +23,7 @@ into a second model. This page explains how multi-model pipelines work and gives you need to know to build one. Before reading this section, it is recommended that you become familiar with the information in -the [Pipeline development lifecycle](https://beam.apache.org/documentation/pipelines/design-your-pipeline/). +the [Pipeline development lifecycle](/documentation/pipelines/design-your-pipeline/). ## How to build a Multi-model pipeline with Beam @@ -33,7 +33,7 @@ all of those steps together by encapsulating them in a single Apache Beam Direct resilient and scalable end-to-end machine learning systems. To deploy your machine learning model in an Apache Beam pipeline, use -the [`RunInferenceAPI`](https://beam.apache.org/documentation/sdks/python-machine-learning/), which +the [`RunInferenceAPI`](/documentation/sdks/python-machine-learning/), which facilitates the integration of your model as a `PTransform` step in your DAG. Composing multiple `RunInference` transforms within a single DAG makes it possible to build a pipeline that consists of multiple ML models. In this way, Apache Beam supports the development of complex ML systems. @@ -72,7 +72,7 @@ model_b_predictions = userset_b_traffic | RunInference() Where `beam.partition` is used to split the data source into 50/50 split partitions. For more information about data partitioning, -see [Partition](https://beam.apache.org/documentation/transforms/python/elementwise/partition/). +see [Partition](/documentation/transforms/python/elementwise/partition/). ### Cascade Pattern diff --git a/website/www/site/content/en/documentation/ml/online-clustering.md b/website/www/site/content/en/documentation/ml/online-clustering.md index f4c67bfb0e9da..fa63664fb6716 100644 --- a/website/www/site/content/en/documentation/ml/online-clustering.md +++ b/website/www/site/content/en/documentation/ml/online-clustering.md @@ -140,7 +140,7 @@ The next sections examine three important pipeline steps: 1. Tokenize the text. 2. Feed the tokenized text to get embedding from a transformer-based language model. -3. Perform clustering using [stateful processing](https://beam.apache.org/blog/stateful-processing/). +3. Perform clustering using [stateful processing](/blog/stateful-processing/). ### Get Embedding from a Language Model @@ -173,7 +173,7 @@ To make better clusters, after getting the embedding for each piece of Twitter t ### StatefulOnlineClustering -Because the data is streaming, you need to use an iterative clustering algorithm, like BIRCH. And because the algorithm is iterative, you need a mechanism to store the previous state so that when Twitter text arrives, it can be updated. **Stateful processing** enables a `DoFn` to have persistent state, which can be read and written during the processing of each element. For more information about stateful processing, see [Stateful processing with Apache Beam](https://beam.apache.org/blog/stateful-processing/). +Because the data is streaming, you need to use an iterative clustering algorithm, like BIRCH. And because the algorithm is iterative, you need a mechanism to store the previous state so that when Twitter text arrives, it can be updated. **Stateful processing** enables a `DoFn` to have persistent state, which can be read and written during the processing of each element. For more information about stateful processing, see [Stateful processing with Apache Beam](/blog/stateful-processing/). In this example, every time a new message is read from Pub/Sub, you retrieve the existing state of the clustering model, update it, and write it back to the state. diff --git a/website/www/site/content/en/documentation/ml/orchestration.md b/website/www/site/content/en/documentation/ml/orchestration.md index c1f47320d6aed..6411b0f724429 100644 --- a/website/www/site/content/en/documentation/ml/orchestration.md +++ b/website/www/site/content/en/documentation/ml/orchestration.md @@ -26,7 +26,7 @@ Apache Beam is an open source, unified model for defining both batch and streami ![A standalone beam pipeline](/images/standalone-beam-pipeline.svg) -Defining a pipeline and the corresponding DAG does not mean that data starts flowing through the pipeline. To run the pipeline, you need to deploy it to one of the [supported Beam runners](https://beam.apache.org/documentation/runners/capability-matrix/). These distributed processing backends include Apache Flink, Apache Spark, and Google Cloud Dataflow. To run the pipeline locally on your machine for development and debugging purposes, a [Direct Runner](https://beam.apache.org/documentation/runners/direct/) is also provided. View the [runner capability matrix](https://beam.apache.org/documentation/runners/capability-matrix/) to verify that your chosen runner supports the data processing steps defined in your pipeline, especially when using the Direct Runner. +Defining a pipeline and the corresponding DAG does not mean that data starts flowing through the pipeline. To run the pipeline, you need to deploy it to one of the [supported Beam runners](/documentation/runners/capability-matrix/). These distributed processing backends include Apache Flink, Apache Spark, and Google Cloud Dataflow. To run the pipeline locally on your machine for development and debugging purposes, a [Direct Runner](/documentation/runners/direct/) is also provided. View the [runner capability matrix](/documentation/runners/capability-matrix/) to verify that your chosen runner supports the data processing steps defined in your pipeline, especially when using the Direct Runner. ## Orchestrating frameworks diff --git a/website/www/site/content/en/documentation/ml/overview.md b/website/www/site/content/en/documentation/ml/overview.md index d2737f5fe383b..ed77da115c2f9 100644 --- a/website/www/site/content/en/documentation/ml/overview.md +++ b/website/www/site/content/en/documentation/ml/overview.md @@ -43,9 +43,9 @@ You can use Apache Beam for data validation, data preprocessing, and model deplo ## Data processing -You can use Apache Beam for data validation and preprocessing by setting up data pipelines that transform your data and output metrics computed from your data. Beam has a rich set of [I/O connectors](https://beam.apache.org/documentation/io/built-in/) for ingesting and writing data, which allows you to integrate it with your existing file system, database, or messaging queue. +You can use Apache Beam for data validation and preprocessing by setting up data pipelines that transform your data and output metrics computed from your data. Beam has a rich set of [I/O connectors](/documentation/io/built-in/) for ingesting and writing data, which allows you to integrate it with your existing file system, database, or messaging queue. -When developing your ML model, you can also first explore your data with the [Beam DataFrame API](https://beam.apache.org/documentation/dsls/dataframes/overview/). The DataFrom API lets you identify and implement the required preprocessing steps, making it easier for you to move your pipeline to production. +When developing your ML model, you can also first explore your data with the [Beam DataFrame API](/documentation/dsls/dataframes/overview/). The DataFrom API lets you identify and implement the required preprocessing steps, making it easier for you to move your pipeline to production. Steps executed during preprocessing often also need to be applied before running inference, in which case you can use the same Beam implementation twice. Lastly, when you need to do postprocessing after running inference, Apache Beam allows you to incoporate the postprocessing into your model inference pipeline. @@ -58,7 +58,7 @@ Beam provides different ways to implement inference as part of your pipeline. Yo ### RunInference -The recommended way to implement inference is by using the [RunInference API](https://beam.apache.org/documentation/sdks/python-machine-learning/). RunInference takes advantage of existing Apache Beam concepts, such as the `BatchElements` transform and the `Shared` class, to enable you to use models in your pipelines to create transforms optimized for machine learning inferences. The ability to create arbitrarily complex workflow graphs also allows you to build multi-model pipelines. +The recommended way to implement inference is by using the [RunInference API](/documentation/sdks/python-machine-learning/). RunInference takes advantage of existing Apache Beam concepts, such as the `BatchElements` transform and the `Shared` class, to enable you to use models in your pipelines to create transforms optimized for machine learning inferences. The ability to create arbitrarily complex workflow graphs also allows you to build multi-model pipelines. You can integrate your model in your pipeline by using the corresponding model handlers. A `ModelHandler` is an object that wraps the underlying model and allows you to configure its parameters. Model handlers are available for PyTorch, scikit-learn, and TensorFlow. Examples of how to use RunInference for PyTorch, scikit-learn, and TensorFlow are shown in this [notebook](https://github.com/apache/beam/blob/master/examples/notebooks/beam-ml/run_inference_pytorch_tensorflow_sklearn.ipynb). diff --git a/website/www/site/content/en/documentation/ml/runinference-metrics.md b/website/www/site/content/en/documentation/ml/runinference-metrics.md index b31d1bb25705d..8bf4d713c3ad9 100644 --- a/website/www/site/content/en/documentation/ml/runinference-metrics.md +++ b/website/www/site/content/en/documentation/ml/runinference-metrics.md @@ -17,7 +17,7 @@ limitations under the License. # RunInference Metrics -This example demonstrates and explains different metrics that are available when using the [RunInference](https://beam.apache.org/documentation/transforms/python/elementwise/runinference/) transform to perform inference using a machine learning model. The example uses a pipeline that reads a list of sentences, tokenizes the text, and uses the transformer-based model `distilbert-base-uncased-finetuned-sst-2-english` with `RunInference` to classify the pieces of text into two classes. +This example demonstrates and explains different metrics that are available when using the [RunInference](/documentation/transforms/python/elementwise/runinference/) transform to perform inference using a machine learning model. The example uses a pipeline that reads a list of sentences, tokenizes the text, and uses the transformer-based model `distilbert-base-uncased-finetuned-sst-2-english` with `RunInference` to classify the pieces of text into two classes. When you run the pipeline with the Dataflow runner, different RunInference metrics are available with CPU and with GPU. This example demonstrates both types of metrics. diff --git a/website/www/site/content/en/documentation/patterns/bqml.md b/website/www/site/content/en/documentation/patterns/bqml.md index e56802fb64004..1ec70ab233857 100644 --- a/website/www/site/content/en/documentation/patterns/bqml.md +++ b/website/www/site/content/en/documentation/patterns/bqml.md @@ -60,7 +60,7 @@ bq extract -m bqml_tutorial.sample_model gs://some/gcs/path ## Create an Apache Beam transform that uses your BigQuery ML model -In this section we will construct an Apache Beam pipeline that will use the BigQuery ML model we just created and exported. The model can be served using Google Cloud AI Platform Prediction - for this please refer to the [AI Platform patterns](https://beam.apache.org/documentation/patterns/ai-platform/). In this case, we'll be illustrating how to use the tfx_bsl library to do local predictions (on your Apache Beam workers). +In this section we will construct an Apache Beam pipeline that will use the BigQuery ML model we just created and exported. The model can be served using Google Cloud AI Platform Prediction - for this please refer to the [AI Platform patterns](/documentation/patterns/ai-platform/). In this case, we'll be illustrating how to use the tfx_bsl library to do local predictions (on your Apache Beam workers). First, the model needs to be downloaded to a local directory where you will be developing the rest of your pipeline (e.g. to `serving_dir/sample_model/1`). diff --git a/website/www/site/content/en/documentation/patterns/grouping-elements-for-efficient-external-service-calls.md b/website/www/site/content/en/documentation/patterns/grouping-elements-for-efficient-external-service-calls.md index b0081ee620123..2c8a99b5cca77 100644 --- a/website/www/site/content/en/documentation/patterns/grouping-elements-for-efficient-external-service-calls.md +++ b/website/www/site/content/en/documentation/patterns/grouping-elements-for-efficient-external-service-calls.md @@ -26,7 +26,7 @@ State is kept on a per-key and per-windows basis, and as such, the input to your Examples of use cases are: assigning a unique ID to each element, joining streams of data in 'more exotic' ways, or batching up API calls to external services. In this section we'll go over the last one in particular. -Make sure to check the [docs](https://beam.apache.org/documentation/programming-guide/#state-and-timers) for deeper understanding on state and timers. +Make sure to check the [docs](/documentation/programming-guide/#state-and-timers) for deeper understanding on state and timers. The `GroupIntoBatches`-transform uses state and timers under the hood to allow the user to exercise tight control over the following parameters: diff --git a/website/www/site/content/en/documentation/programming-guide.md b/website/www/site/content/en/documentation/programming-guide.md index f6585127a8a5d..cd829d6caf56c 100644 --- a/website/www/site/content/en/documentation/programming-guide.md +++ b/website/www/site/content/en/documentation/programming-guide.md @@ -38,11 +38,11 @@ programming guide, take a look at the The Python SDK supports Python 3.7, 3.8, 3.9, and 3.10. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} The Go SDK supports Go v1.18+. SDK release 2.32.0 is the last experimental version. {{< /paragraph >}} -{{< paragraph class="language-typescript" >}} +{{< paragraph class="language-typescript">}} The Typescript SDK supports Node v16+ and is still experimental. {{< /paragraph >}} @@ -168,7 +168,7 @@ configuration required by the chosen runner. Your pipeline options will potentially include information such as your project ID or a location for storing files. -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} When you run the pipeline on a runner of your choice, a copy of the PipelineOptions will be available to your code. For example, if you add a PipelineOptions parameter to a DoFn's `@ProcessElement` method, it will be populated by the system. @@ -187,12 +187,12 @@ To read options from the command-line, construct your `PipelineOptions` object as demonstrated in the following example code: {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} Use Go flags to parse command line arguments to configure your pipeline. Flags must be parsed before `beam.Init()` is called. {{< /paragraph >}} -{{< paragraph class="language-typescript" >}} +{{< paragraph class="language-typescript">}} Any Javascript object can be used as pipeline options. One can either construct one manually, but it is also common to pass an object created from command line options such as `yargs.argv`. @@ -233,7 +233,7 @@ Building your `PipelineOptions` this way lets you specify any of the options as a command-line argument. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} Defining flag variables this way lets you specify any of the options as a command-line argument. {{< /paragraph >}} @@ -245,7 +245,7 @@ Defining flag variables this way lets you specify any of the options as a comman You can add your own custom options in addition to the standard `PipelineOptions`. -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} To add your own options, define an interface with getter and setter methods for each option. {{< /paragraph >}} @@ -308,7 +308,7 @@ For Python, you can also simply parse your custom options with argparse; there is no need to create a separate PipelineOptions subclass. {{< /paragraph >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} It's recommended that you register your interface with `PipelineOptionsFactory` and then pass the interface when creating the `PipelineOptions` object. When you register your interface with `PipelineOptionsFactory`, the `--help` can find @@ -317,7 +317,7 @@ your custom options interface and add it to the output of the `--help` command. compatible with all other registered options. {{< /paragraph >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} The following example code shows how to register your custom options interface with `PipelineOptionsFactory`: {{< /paragraph >}} @@ -409,13 +409,13 @@ various data sources supported by the Beam SDK. #### 3.1.2. Creating a PCollection from in-memory data {#creating-pcollection-in-memory} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} To create a `PCollection` from an in-memory Java `Collection`, you use the Beam-provided `Create` transform. Much like a data adapter's `Read`, you apply `Create` directly to your `Pipeline` object itself. {{< /paragraph >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} As parameters, `Create` accepts the Java `Collection` and a `Coder` object. The `Coder` specifies how the elements in the `Collection` should be [encoded](#element-type). @@ -427,12 +427,12 @@ To create a `PCollection` from an in-memory `list`, you use the Beam-provided itself. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} To create a `PCollection` from an in-memory `slice`, you use the Beam-provided `beam.CreateList` transform. Pass the pipeline `scope`, and the `slice` to this transform. {{< /paragraph >}} -{{< paragraph class="language-typescript" >}} +{{< paragraph class="language-typescript">}} To create a `PCollection` from an in-memory `array`, you use the Beam-provided `Create` transform. Apply this transform directly to your `Root` object. {{< /paragraph >}} @@ -627,7 +627,7 @@ nested within (called [composite transforms](#composite-transforms) in the Beam SDKs). {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} It's recommended to create a new variable for each new `PCollection` to sequentially transform input data. `Scope`s can be used to create functions that contain other transforms @@ -718,13 +718,13 @@ nest multiple transforms inside a single, larger transform. Composite transforms are particularly useful for building a reusable sequence of simple steps that get used in a lot of different places. -{{< paragraph class="language-python" >}} +{{< paragraph class="language-python">}} The pipe syntax allows one to apply PTransforms to `tuple`s and `dict`s of PCollections as well for those transforms accepting multiple inputs (such as `Flatten` and `CoGroupByKey`). {{< /paragraph >}} -{{< paragraph class="language-typescript" >}} +{{< paragraph class="language-typescript">}} PTransforms can also be applied to any `PValue`, which include the Root object, PCollections, arrays of `PValue`s, and objects with `PValue` values. One can apply transforms to these composite types by wrapping them with @@ -732,7 +732,7 @@ One can apply transforms to these composite types by wrapping them with `beam.P({left: pcollA, right: pcollB}).apply(transformExpectingTwoPCollections)`. {{< /paragraph >}} -{{< paragraph class="language-typescript" >}} +{{< paragraph class="language-typescript">}} PTransforms come in two flavors, synchronous and asynchronous, depending on whether their *application** involves asynchronous invocations. An `AsyncTransform` must be applied with `applyAsync` and returns a `Promise` @@ -751,7 +751,7 @@ processing paradigm: * `Flatten` * `Partition` -{{< paragraph class="language-typescript" >}} +{{< paragraph class="language-typescript">}} The Typescript SDK provides some of the most basic of these transforms as methods on `PCollection` itself. {{< /paragraph >}} @@ -799,7 +799,7 @@ processing function. -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} All DoFns should be registered using a generic `register.DoFnXxY[...]` function. This allows the Go SDK to infer an encoding from any inputs/outputs, registers the DoFn for execution on remote runners, and optimizes the runtime @@ -832,7 +832,7 @@ input `PCollection` and passing `ParDo` as an argument, as shown in the following example code: {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} `beam.ParDo` applies the passed in `DoFn` argument to the input `PCollection`, as shown in the following example code: {{< /paragraph >}} @@ -894,7 +894,7 @@ define your pipeline's exact data processing tasks. > for writing user code for Beam transforms](#requirements-for-writing-user-code-for-beam-transforms) > and ensure that your code follows them. -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} A `DoFn` processes one element at a time from the input `PCollection`. When you create a subclass of `DoFn`, you'll need to provide type parameters that match the types of the input and output elements. If your `DoFn` processes incoming @@ -903,7 +903,7 @@ the types of the input and output elements. If your `DoFn` processes incoming look like this: {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} A `DoFn` processes one element at a time from the input `PCollection`. When you create a `DoFn` struct, you'll need to provide type parameters that match the types of the input and output elements in a ProcessElement method. @@ -936,7 +936,7 @@ func init() { } {{< /highlight >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} Inside your `DoFn` subclass, you'll write a method annotated with `@ProcessElement` where you provide the actual processing logic. You don't need to manually extract the elements from the input collection; the Beam SDKs handle @@ -959,7 +959,7 @@ elements with `yield` statements. You can also use a `return` statement with an iterable, like a list or a generator. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} For your `DoFn` type, you'll write a method `ProcessElement` where you provide the actual processing logic. You don't need to manually extract the elements from the input collection; the Beam SDKs handle that for you. Your `ProcessElement` method @@ -991,7 +991,7 @@ static class ComputeWordLengthFn extends DoFn { {{< code_sample "sdks/typescript/test/docs/programming_guide.ts" model_pardo_pardo >}} {{< /highlight >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} Simple DoFns can also be written as functions. {{< /paragraph >}} @@ -1007,7 +1007,7 @@ func init() { } {{< /highlight >}} - + > **Note:** Whether using a structural `DoFn` type or a functional `DoFn`, they should be registered with > beam in an `init` block. Otherwise they may not execute on distributed runners. @@ -1172,14 +1172,14 @@ words = ... {{< code_sample "sdks/typescript/test/docs/programming_guide.ts" model_pardo_using_map >}} {{< /highlight >}} - + > **Note:** You can use Java 8 lambda functions with several other Beam > transforms, including `Filter`, `FlatMapElements`, and `Partition`. - + > **Note:** Anonymous function DoFns may not work on distributed runners. > It's recommended to use named functions and register them with `register.FunctionXxY` in @@ -1343,7 +1343,7 @@ windowing](#setting-your-pcollections-windowing-function) or an [GroupByKey and unbounded PCollections](#groupbykey-and-unbounded-pcollections) for more details. -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} In the Beam SDK for Java, `CoGroupByKey` accepts a tuple of keyed `PCollection`s (`PCollection>`) as input. For type safety, the SDK requires you to pass each `PCollection` as part of a `KeyedPCollectionTuple`. @@ -1364,7 +1364,7 @@ In the Beam SDK for Python, `CoGroupByKey` accepts a dictionary of keyed iterable of the values under they key in the corresponding `PCollection`. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} In the Beam Go SDK, `CoGroupByKey` accepts an arbitrary number of `PCollection`s as input. As output, `CoGroupByKey` creates a single output `PCollection` that groups each key with value iterator functions for each @@ -1375,7 +1375,7 @@ input `PCollection`. The iterator functions map to input `PCollections` in The following conceptual examples use two input collections to show the mechanics of `CoGroupByKey`. -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} The first set of data has a `TupleTag` called `emailsTag` and contains names and email addresses. The second set of data has a `TupleTag` called `phonesTag` and contains names and phone numbers. @@ -1429,7 +1429,7 @@ followed by a `ParDo` to consume the result. Then, the code uses tags to look up and format data from each collection. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} The following code example joins the two `PCollection`s with `CoGroupByKey`, followed by a `ParDo` to consume the result. The ordering of the `DoFn` iterator parameters maps to the ordering of the `CoGroupByKey` inputs. @@ -1545,7 +1545,7 @@ public static class SumInts implements SerializableFunction, I {{< code_sample "sdks/typescript/test/docs/programming_guide.ts" combine_simple_sum >}} {{< /highlight >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} All Combiners should be registered using a generic `register.CombinerX[...]` function. This allows the Go SDK to infer an encoding from any inputs/outputs, registers the Combiner for execution on remote runners, and optimizes the runtime @@ -1737,7 +1737,7 @@ applying `Combine`: -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} If your `PCollection` uses any non-global windowing function, the Beam Go SDK behaves the same way as with global windowing. Windows that are empty in the input `PCollection` will likewise be empty in the output collection. @@ -1874,7 +1874,7 @@ is a Beam transform for `PCollection` objects that store the same data type. `Partition` splits a single `PCollection` into a fixed number of smaller collections. -{{< paragraph class="language-typescript" >}} +{{< paragraph class="language-typescript">}} Often in the Typescript SDK the `Split` transform is more natural to use. {{< /paragraph >}} @@ -2052,7 +2052,7 @@ processing each element in the input `PCollection`, but the additional data needs to be determined at runtime (and not hard-coded). Such values might be determined by the input data, or depend on a different branch of your pipeline. -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} All side input iterables should be registered using a generic `register.IterX[...]` function. This optimizes runtime execution of the iterable. {{< /paragraph >}} @@ -2172,7 +2172,7 @@ returns all of the output `PCollection`s (including the main output) bundled together. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} While `beam.ParDo` always produces an output `PCollection`, your `DoFn` can produce any number of additional output `PCollections`s, or even none at all. If you choose to have multiple outputs, your `DoFn` needs to be called with the `ParDo` @@ -2181,7 +2181,7 @@ function that matches the number of outputs. `beam.ParDo2` for two output `PColl use `beam.ParDoN` which will return a `[]beam.PCollection`. {{< /paragraph >}} -{{< paragraph class="language-typescript" >}} +{{< paragraph class="language-typescript">}} While `ParDo` always produces a main output `PCollection` (as the return value from `apply`). If you want to have multiple outputs, emit an object with distinct properties in your `ParDo` operation and follow this operation with a `Split` @@ -2190,7 +2190,7 @@ to break it into multiple `PCollection`s. #### 4.5.1. Tags for multiple outputs {#output-tags} -{{< paragraph class="language-typescript" >}} +{{< paragraph class="language-typescript">}} The `Split` PTransform will take a PCollection of elements of the form `{tagA?: A, tagB?: B, ...}` and return a object `{tagA: PCollection, tagB: PCollection, ...}`. @@ -2199,7 +2199,7 @@ unknown tags are handled can be specified by passing a non-default `SplitOptions` instance. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} The Go SDK doesn't use output tags, and instead uses positional ordering for multiple output PCollections. {{< /paragraph >}} @@ -2277,18 +2277,18 @@ multiple output PCollections. #### 4.5.2. Emitting to multiple outputs in your DoFn {#multiple-outputs-dofn} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} Call emitter functions as needed to produce 0 or more elements for its matching `PCollection`. The same value can be emitted with multiple emitters. As normal, do not mutate values after emitting them from any emitter. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} All emitters should be registered using a generic `register.EmitterX[...]` function. This optimizes runtime execution of the emitter. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} DoFns can also return a single element via the standard return. The standard return is always the first PCollection returned from beam.ParDo. Other emitters output to their own PCollections in their defined parameter order. @@ -2340,7 +2340,7 @@ Other emitters output to their own PCollections in their defined parameter order #### 4.5.3. Accessing additional parameters in your DoFn {#other-dofn-parameters} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} In addition to the element and the `OutputReceiver`, Beam will populate other parameters to your DoFn's `@ProcessElement` method. Any combination of these parameters can be added to your process method in any order. {{< /paragraph >}} @@ -2350,17 +2350,17 @@ In addition to the element, Beam will populate other parameters to your DoFn's ` Any combination of these parameters can be added to your process method in any order. {{< /paragraph >}} -{{< paragraph class="language-typescript" >}} +{{< paragraph class="language-typescript">}} In addition to the element, Beam will populate other parameters to your DoFn's `process` method. These are available by placing accessors in the context argument, just as for side inputs. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} In addition to the element, Beam will populate other parameters to your DoFn's `ProcessElement` method. Any combination of these parameters can be added to your process method in a standard order. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} **context.Context:** To support consolidated logging and user defined metrics, a `context.Context` parameter can be requested. Per Go conventions, if present it's required to be the first parameter of the `DoFn` method. @@ -2370,7 +2370,7 @@ Per Go conventions, if present it's required to be the first parameter of the `D func MyDoFn(ctx context.Context, word string) string { ... } {{< /highlight >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} **Timestamp:** To access the timestamp of an input element, add a parameter annotated with `@Timestamp` of type `Instant`. For example: {{< /paragraph >}} @@ -2380,12 +2380,12 @@ To access the timestamp of an input element, add a parameter annotated with `@Ti To access the timestamp of an input element, add a keyword parameter default to `DoFn.TimestampParam`. For example: {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} **Timestamp:** To access the timestamp of an input element, add a `beam.EventTime` parameter before the element. For example: {{< /paragraph >}} -{{< paragraph class="language-typescript" >}} +{{< paragraph class="language-typescript">}} **Timestamp:** To access the window an input element falls into, add a `pardo.windowParam()` to the context argument. {{< /paragraph >}} @@ -2415,7 +2415,7 @@ func MyDoFn(ts beam.EventTime, word string) string { ... } {{< code_sample "sdks/typescript/test/docs/programming_guide.ts" timestamp_param >}} {{< /highlight >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} **Window:** To access the window an input element falls into, add a parameter of the type of the window used for the input `PCollection`. If the parameter is a window type (a subclass of `BoundedWindow`) that does not match the input `PCollection`, then an error @@ -2431,7 +2431,7 @@ If an element falls in multiple windows (for example, this will happen when usin `process` method will be invoked multiple time for the element, once for each window. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} **Window:** To access the window an input element falls into, add a `beam.Window` parameter before the element. If an element falls in multiple windows (for example, this will happen when using SlidingWindows), @@ -2440,7 +2440,7 @@ Since `beam.Window` is an interface it's possible to type assert to the concrete For example, when fixed windows are being used, the window is of type `window.IntervalWindow`. {{< /paragraph >}} -{{< paragraph class="language-typescript" >}} +{{< paragraph class="language-typescript">}} **Window:** To access the window an input element falls into, add a `pardo.windowParam()` to the context argument. If an element falls in multiple windows (for example, this will happen when using `SlidingWindows`), then the @@ -2475,7 +2475,7 @@ func MyDoFn(w beam.Window, word string) string { {{< code_sample "sdks/typescript/test/docs/programming_guide.ts" window_param >}} {{< /highlight >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} **PaneInfo:** When triggers are used, Beam provides a `PaneInfo` object that contains information about the current firing. Using `PaneInfo` you can determine whether this is an early or a late firing, and how many times this window has already fired for this key. @@ -2488,13 +2488,13 @@ you can determine whether this is an early or a late firing, and how many times This feature implementation in Python SDK is not fully completed; see more at [Issue 17821](https://github.com/apache/beam/issues/18721). {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} **PaneInfo:** When triggers are used, Beam provides `beam.PaneInfo` object that contains information about the current firing. Using `beam.PaneInfo` you can determine whether this is an early or a late firing, and how many times this window has already fired for this key. {{< /paragraph >}} -{{< paragraph class="language-typescript" >}} +{{< paragraph class="language-typescript">}} **Window:** To access the window an input element falls into, add a `pardo.paneInfoParam()` to the context argument. Using `beam.PaneInfo` you can determine whether this is an early or a late firing, @@ -2526,7 +2526,7 @@ class ProcessRecord(beam.DoFn): {{< code_sample "sdks/typescript/test/docs/programming_guide.ts" pane_info_param >}} {{< /highlight >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} **PipelineOptions:** The `PipelineOptions` for the current pipeline can always be accessed in a process method by adding it as a parameter: @@ -2538,7 +2538,7 @@ as a parameter: }}) {{< /highlight >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} `@OnTimer` methods can also access many of these parameters. Timestamp, Window, key, `PipelineOptions`, `OutputReceiver`, and `MultiOutputReceiver` parameters can all be accessed in an `@OnTimer` method. In addition, an `@OnTimer` method can take a parameter of type `TimeDomain` which tells whether the timer is based on event time or processing time. @@ -2553,7 +2553,7 @@ Timers and States are explained in more detail in the [Timely (and Stateful) Processing with Apache Beam](/blog/2017/08/28/timely-processing.html) blog post. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} **Timer and State:** User defined State parameters can be used in a stateful DoFn. Timers aren't implemented in the Go SDK yet; see more at [Issue 22737](https://github.com/apache/beam/issues/22737). Once implemented, user defined Timer @@ -2562,10 +2562,10 @@ Timers and States are explained in more detail in the [Timely (and Stateful) Processing with Apache Beam](/blog/2017/08/28/timely-processing.html) blog post. {{< /paragraph >}} -{{< paragraph class="language-typescript" >}} +{{< paragraph class="language-typescript">}} **Timer and State:** This feature isn't yet implemented in the Typescript SDK, -but we welcome [contributions](https://beam.apache.org/contribute/). +but we welcome [contributions](/contribute/). In the meantime, Typescript pipelines wishing to use state and timers can do so using [cross-language transforms](#use-x-lang-transforms). {{< /paragraph >}} @@ -2704,7 +2704,7 @@ transform operations: #### 4.6.2. Creating a composite transform {#composite-transform-creation} -{{< paragraph class="language-typescript" >}} +{{< paragraph class="language-typescript">}} A PTransform in the Typescript SDK is simply a function that accepts and returns `PValue`s such as `PCollection`s. {{< /paragraph >}} @@ -2716,20 +2716,20 @@ You can then use this transform just as you would a built-in transform from the Beam SDK. {{< /paragraph >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} For the `PTransform` class type parameters, you pass the `PCollection` types that your transform takes as input, and produces as output. To take multiple `PCollection`s as input, or produce multiple `PCollection`s as output, use one of the multi-collection types for the relevant type parameter. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} To create your own composite `PTransform` call the `Scope` method on the current pipeline scope variable. Transforms passed this new sub-`Scope` will be a part of the same composite `PTransform`. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} To be able to re-use your Composite, build it inside a normal Go function or method. This function is passed a scope and input PCollections, and returns any output PCollections it produces. **Note:** Such functions cannot be passed directly to @@ -2769,7 +2769,7 @@ The following code sample shows how to override `expand` for the `ComputeWordLengths` class declared in the previous example: {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} The following code sample shows how to call the `CountWords` composite PTransform, adding it to your pipeline: {{< /paragraph >}} @@ -2802,7 +2802,7 @@ transforms can include core transforms, composite transforms, or the transforms included in the Beam SDK libraries. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} Your composite `PTransform`s can include as many transforms as you want. These transforms can include core transforms, other composite transforms, or the transforms included in the Beam SDK libraries. They can also consume and return as many @@ -3075,7 +3075,7 @@ While schemas themselves are language independent, they are designed to embed na of the Beam SDK being used. This allows Beam users to continue using native types while reaping the advantage of having Beam understand their element schemas. -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} In Java you could use the following set of classes to represent the purchase schema. Beam will automatically infer the correct schema based on the members of the class. {{< /paragraph >}} @@ -3084,12 +3084,12 @@ infer the correct schema based on the members of the class. In Python you can use the following set of classes to represent the purchase schema. Beam will automatically infer the correct schema based on the members of the class. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} In Go, schema encoding is used by default for struct types, with Exported fields becoming part of the schema. Beam will automatically infer the schema based on the fields and field tags of the struct, and their order. {{< /paragraph >}} -{{< paragraph class="language-typescript" >}} +{{< paragraph class="language-typescript">}} In Typescript, JSON objects are used to represent schema'd data. Unfortunately type information in Typescript is not propagated to the runtime layer, so it needs to be manually specified in some places (e.g. when using cross-language pipelines). @@ -3168,7 +3168,7 @@ class Transaction(typing.NamedTuple): {{< code_sample "sdks/typescript/test/docs/programming_guide.ts" schema_def >}} {{< /highlight >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} Using JavaBean classes as above is one way to map a schema to Java classes. However multiple Java classes might have the same schema, in which case the different Java types can often be used interchangeably. Beam will add implicit conversions between types that have matching schemas. For example, the above @@ -3183,7 +3183,7 @@ public class TransactionPojo { } {{< /highlight >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} So if we had two `PCollection`s as follows {{< /paragraph >}} @@ -3192,7 +3192,7 @@ PCollection transactionBeans = readTransactionsAsJavaBean(); PCollection transactionPojos = readTransactionsAsPojo(); {{< /highlight >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} Then these two `PCollection`s would have the same schema, even though their Java types would be different. This means for example the following two code snippets are valid: {{< /paragraph >}} @@ -3205,7 +3205,7 @@ transactionBeans.apply(ParDo.of(new DoFn<...>() { })); {{< /highlight >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} and {{< /paragraph >}} @@ -3216,7 +3216,7 @@ transactionPojos.apply(ParDo.of(new DoFn<...>() { })); {{< /highlight >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} Even though the in both cases the `@Element` parameter differs from the `PCollection`'s Java type, since the schemas are the same Beam will automatically make the conversion. The built-in `Convert` transform can also be used to translate between Java types of equivalent schemas, as detailed below. @@ -3320,18 +3320,18 @@ Logical types are also specified by an argument, which allows creating a class o limited-precision decimal type would have an integer argument indicating how many digits of precision are represented. The argument is represented by a schema type, so can itself be a complex type. -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} In Java, a logical type is specified as a subclass of the `LogicalType` class. A custom Java class can be specified to represent the logical type and conversion functions must be supplied to convert back and forth between this Java class and the underlying Schema type representation. For example, the logical type representing nanosecond timestamp might be implemented as follows {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} In Go, a logical type is specified with a custom implementation of the `beam.SchemaProvider` interface. For example, the logical type provider representing nanosecond timestamps might be implemented as follows {{< /paragraph >}} -{{< paragraph class="language-typescript" >}} +{{< paragraph class="language-typescript">}} In Typescript, a logical type defined by the [LogicalTypeInfo](https://github.com/apache/beam/blob/master/sdks/typescript/src/apache_beam/coders/row_coder.ts) interface which associates a logical type's URN with its representation and its conversion to and from this representation. @@ -3382,7 +3382,7 @@ Currently the Python SDK provides minimal convenience logical types, other than to handle `MicrosInstant`. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} Currently the Go SDK provides minimal convenience logical types, other than to handle additional integer primitives, and `time.Time`. {{< /paragraph >}} @@ -3393,11 +3393,11 @@ other than to handle additional integer primitives, and `time.Time`. This convenience builder doesn't yet exist for the Python SDK. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} This convenience builder doesn't yet exist for the Go SDK. {{< /paragraph >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} This logical type allows creating an enumeration type consisting of a set of named constants. {{< /paragraph >}} @@ -3408,7 +3408,7 @@ Schema schema = Schema.builder() .build(); {{< /highlight >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} The value of this field is stored in the row as an INT32 type, however the logical type defines a value type that lets you access the enumeration either as a string or a value. For example: {{< /paragraph >}} @@ -3419,7 +3419,7 @@ enumValue.getValue(); // Returns 0, the integer value of the constant. enumValue.toString(); // Returns "RED", the string value of the constant {{< /highlight >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} Given a row object with an enumeration field, you can also extract the field as the enumeration value. {{< /paragraph >}} @@ -3427,7 +3427,7 @@ Given a row object with an enumeration field, you can also extract the field as EnumerationType.Value enumValue = row.getLogicalTypeValue("color", EnumerationType.Value.class); {{< /highlight >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} Automatic schema inference from Java POJOs and JavaBeans automatically converts Java enums to EnumerationType logical types. {{< /paragraph >}} @@ -3438,11 +3438,11 @@ types. This convenience builder doesn't yet exist for the Python SDK. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} This convenience builder doesn't yet exist for the Go SDK. {{< /paragraph >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} OneOfType allows creating a disjoint union type over a set of schema fields. For example: {{< /paragraph >}} @@ -3456,7 +3456,7 @@ Schema schema = Schema.builder() .build(); {{< /highlight >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} The value of this field is stored in the row as another Row type, where all the fields are marked as nullable. The logical type however defines a Value object that contains an enumeration value indicating which field was set and allows getting just that field: @@ -3482,7 +3482,7 @@ switch (oneOfValue.getCaseEnumType().toString()) { } {{< /highlight >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} In the above example we used the field names in the switch statement for clarity, however the enum integer values could also be used. {{< /paragraph >}} @@ -3499,36 +3499,36 @@ In addition, often Beam pipelines have intermediate stages and types, and those {{< language-switcher java py go typescript >}} -{{< paragraph class="language-typescript" >}} +{{< paragraph class="language-typescript">}} Unfortunately, Beam is unable to access Typescript's type information at runtime. Schemas must be manually declared with `beam.withRowCoder`. On the other hand, schema-aware operations such as `GroupBy` can be used without an explicit schema declared. {{< /paragraph >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} Beam is able to infer schemas from a variety of common Java types. The `@DefaultSchema` annotation can be used to tell Beam to infer schemas from a specific type. The annotation takes a `SchemaProvider` as an argument, and `SchemaProvider` classes are already built in for common Java types. The `SchemaRegistry` can also be invoked programmatically for cases where it is not practical to annotate the Java type itself. {{< /paragraph >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} **Java POJOs** {{< /paragraph >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} A POJO (Plain Old Java Object) is a Java object that is not bound by any restriction other than the Java Language Specification. A POJO can contain member variables that are primitives, that are other POJOs, or are collections maps or arrays thereof. POJOs do not have to extend prespecified classes or extend any specific interfaces. {{< /paragraph >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} If a POJO class is annotated with `@DefaultSchema(JavaFieldSchema.class)`, Beam will automatically infer a schema for this class. Nested classes are supported as are classes with `List`, array, and `Map` fields. {{< /paragraph >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} For example, annotating the following class tells Beam to infer a schema from this POJO class and apply it to any `PCollection`. {{< /paragraph >}} @@ -3548,14 +3548,14 @@ public class TransactionPojo { PCollection pojos = readPojos(); {{< /highlight >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} The `@SchemaCreate` annotation tells Beam that this constructor can be used to create instances of TransactionPojo, assuming that constructor parameters have the same names as the field names. `@SchemaCreate` can also be used to annotate static factory methods on the class, allowing the constructor to remain private. If there is no `@SchemaCreate` annotation then all the fields must be non-final and the class must have a zero-argument constructor. {{< /paragraph >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} There are a couple of other useful annotations that affect how Beam infers schemas. By default the schema field names inferred will match that of the class field names. However `@SchemaFieldName` can be used to specify a different name to be used for the schema field. `@SchemaIgnore` can be used to mark specific class fields as excluded from the inferred @@ -3564,7 +3564,7 @@ schema. For example, it’s common to have ephemeral fields in a class that shou exclude these fields. Note that ignored fields will not be included in the encoding of these records. {{< /paragraph >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} In some cases it is not convenient to annotate the POJO class, for example if the POJO is in a different package that is not owned by the Beam pipeline author. In these cases the schema inference can be triggered programmatically in pipeline’s main function as follows: @@ -3574,11 +3574,11 @@ pipeline’s main function as follows: pipeline.getSchemaRegistry().registerPOJO(TransactionPOJO.class); {{< /highlight >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} **Java Beans** {{< /paragraph >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} Java Beans are a de-facto standard for creating reusable property classes in Java. While the full standard has many characteristics, the key ones are that all properties are accessed via getter and setter classes, and the name format for these getters and setters is standardized. A Java Bean class can be annotated with @@ -3598,7 +3598,7 @@ public class TransactionBean { PCollection beans = readBeans(); {{< /highlight >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} The `@SchemaCreate` annotation can be used to specify a constructor or a static factory method, in which case the setters and zero-argument constructor can be omitted. {{< /paragraph >}} @@ -3613,21 +3613,21 @@ public class TransactionBean { } {{< /highlight >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} `@SchemaFieldName` and `@SchemaIgnore` can be used to alter the schema inferred, just like with POJO classes. {{< /paragraph >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} **AutoValue** {{< /paragraph >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} Java value classes are notoriously difficult to generate correctly. There is a lot of boilerplate you must create in order to properly implement a value class. AutoValue is a popular library for easily generating such classes by implementing a simple abstract base class. {{< /paragraph >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} Beam can infer a schema from an AutoValue class. For example: {{< /paragraph >}} @@ -3640,12 +3640,12 @@ public abstract class TransactionValue { } {{< /highlight >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} This is all that’s needed to generate a simple AutoValue class, and the above `@DefaultSchema` annotation tells Beam to infer a schema from it. This also allows AutoValue elements to be used inside of `PCollection`s. {{< /paragraph >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} `@SchemaFieldName` and `@SchemaIgnore` can be used to alter the schema inferred. {{< /paragraph >}} @@ -3713,15 +3713,15 @@ output_pc = input_pc | beam.Map(lambda item: beam.Row(bank=str(item["bank"]), purchase_amount=float(item["purchase_amount"]))) {{< /highlight >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} Beam currently only infers schemas for exported fields in Go structs. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} **Structs** {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} Beam will automatically infer schemas for all Go structs used as PCollection elements, and default to encoding them using schema encoding. @@ -3736,14 +3736,14 @@ type Transaction struct{ } {{< /highlight >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} Unexported fields are ignored, and cannot be automatically inferred as part of the schema. Fields of type func, channel, unsafe.Pointer, or uintptr will be ignored by inference. Fields of interface types are ignored, unless a schema provider is registered for them. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} By default, schema field names will match the exported struct field names. In the above example, "Bank" and "PurchaseAmount" are the schema field names. A schema field name can be overridden with a struct tag for the field. @@ -3756,7 +3756,7 @@ type Transaction struct{ } {{< /highlight >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} Overriding schema field names is useful for compatibility cross language transforms, as schema fields may have different requirements or restrictions from Go exported fields. {{< /paragraph >}} @@ -3767,7 +3767,7 @@ A schema on a `PCollection` enables a rich variety of relational transforms. The named fields allows for simple and readable aggregations that reference fields by name, similar to the aggregations in a SQL expression. -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} Beam does not yet support Schema transforms natively in Go. However, it will be implemented with the following behavior. {{< /paragraph >}} @@ -3805,11 +3805,11 @@ output_pc = input_pc | beam.Select("user_id") Support for Nested fields hasn't been developed for the Python SDK yet. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} Support for Nested fields hasn't been developed for the Go SDK yet. {{< /paragraph >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} Individual nested fields can be specified using the dot operator. For example, to select just the postal code from the shipping address one would write {{< /paragraph >}} @@ -3828,11 +3828,11 @@ output_pc = input_pc | beam.Select(post_code=lambda item: str(item["shipping_add Support for wildcards hasn't been developed for the Python SDK yet. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} Support for wildcards hasn't been developed for the Go SDK yet. {{< /paragraph >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} The * operator can be specified at any nesting level to represent all fields at that level. For example, to select all shipping-address fields one would write {{< /paragraph >}} @@ -3849,7 +3849,7 @@ output_pc = input_pc | beam.Select("shipping_address.*")) {{< /highlight >}} --> ##### **Arrays** -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} An array field, where the array element type is a row, can also have subfields of the element type addressed. When selected, the result is an array of the selected subfield type. For example {{< /paragraph >}} @@ -3858,7 +3858,7 @@ selected, the result is an array of the selected subfield type. For example Support for Array fields hasn't been developed for the Python SDK yet. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} Support for Array fields hasn't been developed for the Go SDK yet. {{< /paragraph >}} @@ -3866,12 +3866,12 @@ Support for Array fields hasn't been developed for the Go SDK yet. purchases.apply(Select.fieldNames("transactions[].bank")); {{< /highlight >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} Will result in a row containing an array field with element-type string, containing the list of banks for each transaction. {{< /paragraph >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} While the use of [] brackets in the selector is recommended, to make it clear that array elements are being selected, they can be omitted for brevity. In the future, array slicing will be supported, allowing selection of portions of the array. @@ -3914,7 +3914,7 @@ purchasesByType.apply(Select.fieldNames("purchases{}.userId")); Support for Map fields hasn't been developed for the Python SDK yet. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} Support for Map fields hasn't been developed for the Go SDK yet. {{< /paragraph >}} @@ -3946,7 +3946,7 @@ purchases.apply(Select.fieldNames("userId", "shippingAddress.streetAddress")); Support for Nested fields hasn't been developed for the Python SDK yet. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} Support for Nested fields hasn't been developed for the Go SDK yet. {{< /paragraph >}} @@ -3982,7 +3982,7 @@ purchases.apply(Select.fieldNames("userId", "shippingAddress.*")); Support for Wildcards hasn't been developed for the Python SDK yet. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} Support for Wildcards hasn't been developed for the Go SDK yet. {{< /paragraph >}} @@ -4036,11 +4036,11 @@ purchases.apply(Select.fieldNames( "transactions.bank", "transactions.purchaseAm Support for nested fields hasn't been developed for the Python SDK yet. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} Support for nested fields hasn't been developed for the Go SDK yet. {{< /paragraph >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} Will result in the following schema @@ -4083,11 +4083,11 @@ purchases.apply(Select.flattenedSchema()); Support for nested fields hasn't been developed for the Python SDK yet. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} Support for nested fields hasn't been developed for the Go SDK yet. {{< /paragraph >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} Will result in the following schema
@@ -4146,7 +4146,7 @@ Will result in the following schema ##### **Grouping aggregations** -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} The `Group` transform allows simply grouping data by any number of fields in the input schema, applying aggregations to those groupings, and storing the result of those aggregations in a new schema field. The output of the `Group` transform has a schema with one field corresponding to each aggregation performed. @@ -4158,7 +4158,7 @@ those groupings, and storing the result of those aggregations in a new schema fi has a schema with one field corresponding to each aggregation performed. {{< /paragraph >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} The simplest usage of `Group` specifies no aggregations, in which case all inputs matching the provided set of fields are grouped together into an `ITERABLE` field. For example {{< /paragraph >}} @@ -4177,7 +4177,7 @@ input_pc = ... # {"user_id": ...,"bank": ..., "purchase_amount": ...} output_pc = input_pc | beam.GroupBy('user_id','bank') {{< /highlight >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} Support for schema-aware grouping hasn't been developed for the Go SDK yet. {{< /paragraph >}} @@ -4236,7 +4236,7 @@ output_pc = input_pc | beam.GroupBy("user_id") .aggregate_field("cost_cents", TopCombineFn, "top_purchases") {{< /highlight >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} Support for schema-aware grouping hasn't been developed for the Go SDK yet. {{< /paragraph >}} @@ -4275,7 +4275,7 @@ and is specified with the `using` keyword: Support for joins hasn't been developed for the Python SDK yet. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} Support for joins hasn't been developed for the Go SDK yet. {{< /paragraph >}} @@ -4286,7 +4286,7 @@ PCollection joined = transactions.apply( Join.innerJoin(reviews).using("userId", "productId")); {{< /highlight >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} The resulting schema is the following:
@@ -4318,7 +4318,7 @@ Review schema named those fields differently than the Transaction schema, then w Support for joins hasn't been developed for the Python SDK yet. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} Support for joins hasn't been developed for the Go SDK yet. {{< /paragraph >}} @@ -4346,7 +4346,7 @@ that key. Support for joins hasn't been developed for the Python SDK yet. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} Support for joins hasn't been developed for the Go SDK yet. {{< /paragraph >}} @@ -4456,7 +4456,7 @@ A `PCollection` with a schema can apply a `ParDo`, just like any other `PCollect ##### **Input conversion** -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} Beam does not yet support input conversion in Go. {{< /paragraph >}} @@ -4538,7 +4538,7 @@ the elements of a given `PCollection` may be encoded and decoded. > typically be done explicitly, using transforms such as `ParDo` or > `MapElements`. -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} In the Beam SDK for Java, the type `Coder` provides the methods required for encoding and decoding data. The SDK for Java provides a number of Coder subclasses that work with a variety of standard Java types, such as Integer, @@ -4556,7 +4556,7 @@ Coder subclasses in the package. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} Standard Go types like `int`, `int64` `float64`, `[]byte`, and `string` and more are coded using builtin coders. Structs and pointers to structs default using Beam Schema Row encoding. However, users can build and register custom coders with `beam.RegisterCoder`. @@ -4565,7 +4565,7 @@ You can find available Coder functions in the package. {{< /paragraph >}} -{{< paragraph class="language-typescript" >}} +{{< paragraph class="language-typescript">}} Standard Typescript types like `number`, `UInt8Array` and `string` and more are coded using builtin coders. Json objects and arrays are encoded via a BSON encoding. For these types, coders need not be specified unless interacting with cross-language transforms. @@ -4587,13 +4587,13 @@ based on its element type or the transform that produces it, however, in some cases the pipeline author will need to specify a `Coder` explicitly, or develop a `Coder` for their custom type. -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} You can explicitly set the coder for an existing `PCollection` by using the method `PCollection.setCoder`. Note that you cannot call `setCoder` on a `PCollection` that has been finalized (e.g. by calling `.apply` on it). {{< /paragraph >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} You can get the coder for an existing `PCollection` by using the method `getCoder`. This method will fail with an `IllegalStateException` if a coder has not been set and cannot be inferred for the given `PCollection`. @@ -4602,7 +4602,7 @@ not been set and cannot be inferred for the given `PCollection`. Beam SDKs use a variety of mechanisms when attempting to automatically infer the `Coder` for a `PCollection`. -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} Each pipeline object has a `CoderRegistry`. The `CoderRegistry` represents a mapping of Java types to the default coders that the pipeline should use for `PCollection`s of each type. @@ -4614,12 +4614,12 @@ Python types to the default coder that should be used for `PCollection`s of each type. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} The Beam SDK for Go allows users to register default coder implementations with `beam.RegisterCoder`. {{< /paragraph >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} By default, the Beam SDK for Java automatically infers the `Coder` for the elements of a `PCollection` produced by a `PTransform` using the type parameter from the transform's function object, such as `DoFn`. In the case of `ParDo`, @@ -4641,7 +4641,7 @@ Python will automatically infer the default `Coder` for the output `PCollection` (in the default pipeline `CoderRegistry`, this is `BytesCoder`). {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} By default, the Beam SDK for Go automatically infers the `Coder` for the elements of an output `PCollection` by the output of the transform's function object, such as a `DoFn`. In the case of `ParDo`, for example a `DoFn` with the parameters of `v int, emit func(string)` accepts an input element of type `int` @@ -4659,7 +4659,7 @@ In such a case, the Beam SDK for Go will automatically infer the default `Coder` -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} When using `Create`, the simplest way to ensure that you have the correct coder is by invoking `withCoder` when you apply the `Create` transform. {{< /paragraph >}} @@ -4677,7 +4677,7 @@ types for any pipeline you create using the Beam SDK for JavaPython. The following table shows the standard mapping: -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}}
@@ -4779,7 +4779,7 @@ The following table shows the standard mapping: #### 7.2.1. Looking up a default coder {#default-coder-lookup} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} You can use the method `CoderRegistry.getCoder` to determine the default Coder for a Java type. You can access the `CoderRegistry` for a given pipeline by using the method `Pipeline.getCoderRegistry`. This allows you to determine @@ -4794,7 +4794,7 @@ for a Python type. You can use `coders.registry` to access the `CoderRegistry`. This allows you to determine (or set) the default Coder for a Python type. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} You can use the `beam.NewCoder` function to determine the default Coder for a Go type. {{< /paragraph >}} @@ -4813,7 +4813,7 @@ to get the `CoderRegistry` object, and then use the method to register a new `Coder` for the target type. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} To set the default Coder for a Go type you use the function `beam.RegisterCoder` to register a encoder and decoder functions for the target type. However, built in types like `int`, `string`, `float64`, etc cannot have their coders override. {{< /paragraph >}} @@ -4825,7 +4825,7 @@ The following example code demonstrates how to set a default Coder, in this case values for a pipeline. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} The following example code demonstrates how to set a custom Coder for `MyCustomType` elements. {{< /paragraph >}} @@ -4886,7 +4886,7 @@ public class MyCustomDataType { } {{< /highlight >}} -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} If you've created a custom coder to match your data type, and you want to use the `@DefaultCoder` annotation, your coder class must implement a static `Coder.of(Class)` factory method. @@ -5496,7 +5496,7 @@ When you set a windowing function for a `PCollection` by using the `Window``WindowInto``beam.WindowInto` transform, you can also specify a trigger. -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} You set the trigger(s) for a `PCollection` by invoking the method `.triggering()` on the result of your `Window.into()` transform. This code sample sets a time-based trigger for a `PCollection`, which emits results one @@ -5513,7 +5513,7 @@ element in that window has been processed. The `accumulation_mode` parameter sets the window's **accumulation mode**. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} You set the trigger(s) for a `PCollection` by passing in the `beam.Trigger` parameter when you use the `beam.WindowInto` transform. This code sample sets a time-based trigger for a `PCollection`, which emits results one minute after the first @@ -5548,7 +5548,7 @@ pane. Since a trigger can fire multiple times, the accumulation mode determines whether the system *accumulates* the window panes as the trigger fires, or *discards* them. -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} To set a window to accumulate the panes that are produced when the trigger fires, invoke`.accumulatingFiredPanes()` when you set the trigger. To set a window to discard fired panes, invoke `.discardingFiredPanes()`. @@ -5561,7 +5561,7 @@ trigger. To set a window to discard fired panes, set `accumulation_mode` to `DISCARDING`. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} To set a window to accumulate the panes that are produced when the trigger fires, set the `beam.AccumulationMode` parameter to `beam.PanesAccumulate()` when you set the trigger. To set a window to discard fired panes, set `beam.AccumulationMode` to @@ -5703,7 +5703,7 @@ example trigger code fires on the following conditions: * Any time late data arrives, after a ten-minute delay -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} * After two days, we assume no more data of interest will arrive, and the trigger stops executing {{< /paragraph >}} @@ -5775,7 +5775,7 @@ after jobs have completed. There are three types of metrics that are supported for the moment: `Counter`, `Distribution` and `Gauge`. -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} In the Beam SDK for Go, a `context.Context` provided by the framework must be passed to the metric or the metric value will not be recorded. The framework will automatically provide a valid `context.Context` to `ProcessElement` and similar methods when it's the first parameter. @@ -5968,7 +5968,7 @@ care must be taken to remember that the elements in input PCollection have no gu program logic is resilient to this. Unit tests written using the DirectRunner will shuffle the order of element processing, and are recommended to test for correctness. -{{< paragraph class="language-java" >}} +{{< paragraph class="language-java">}} In Java, DoFn declares states to be accessed by creating final `StateSpec` member variables representing each state. Each state must be named using the `StateId` annotation; this name is unique to a ParDo in the graph and has no relation to other nodes in the graph. A `DoFn` can declare multiple state variables. @@ -5980,7 +5980,7 @@ In Python, DoFn declares states to be accessed by creating `StateSpec` class mem to other nodes in the graph. A `DoFn` can declare multiple state variables. {{< /paragraph >}} -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} In Go, DoFn declares states to be accessed by creating state struct member variables representing each state. Each state variable is initialized with a key, this key is unique to a ParDo in the graph and has no relation to other nodes in the graph. If no name is supplied, the key defaults to the member variable's name. @@ -7146,7 +7146,7 @@ unbounded restrictions finish processing at the next SDF-initiated checkpoint or You are able to override this default behavior by defining the appropriate method on the restriction provider. -{{< paragraph class="language-go" >}} +{{< paragraph class="language-go">}} Note: Once the pipeline drain starts and truncate restriction transform is triggered, the `sdf.ProcessContinuation` will not be rescheduled. {{< /paragraph >}} @@ -7298,7 +7298,7 @@ You can use the Java class directly from your Python pipeline using a stub trans Constructor and method parameter types are mapped between Python and Java using a Beam schema. The schema is auto-generated using the object types provided on the Python side. If the Java class constructor method or builder method accepts any complex object types, make sure that the Beam schema for these objects is registered and available for the Java expansion service. If a schema has not been registered, the Java expansion service will -try to register a schema using [JavaFieldSchema](https://beam.apache.org/documentation/programming-guide/#creating-schemas). In Python, arbitrary objects +try to register a schema using [JavaFieldSchema](/documentation/programming-guide/#creating-schemas). In Python, arbitrary objects can be represented using `NamedTuple`s, which will be represented as Beam rows in the schema. Here is a Python stub transform that represents the above mentioned Java transform: @@ -7503,7 +7503,7 @@ An expansion service can be used with multiple transforms in the same pipeline. Perform the following steps to start up the default Python expansion service directly: -1. Create a virtual environment and [install the Apache Beam SDK](https://beam.apache.org/get-started/quickstart-py/). +1. Create a virtual environment and [install the Apache Beam SDK](/get-started/quickstart-py/). 2. Start the Python SDK’s expansion service with a specified port. {{< highlight >}} diff --git a/website/www/site/content/en/documentation/runners/direct.md b/website/www/site/content/en/documentation/runners/direct.md index ef5c43c42ec3b..26e23b4bd09ff 100644 --- a/website/www/site/content/en/documentation/runners/direct.md +++ b/website/www/site/content/en/documentation/runners/direct.md @@ -81,7 +81,7 @@ If your pipeline uses an unbounded data source or sink, you must set the `stream ### Parallel execution {{< paragraph class="language-py" >}} -Python [FnApiRunner](https://beam.apache.org/contribute/runner-guide/#the-fn-api) supports multi-threading and multi-processing mode. +Python [FnApiRunner](/contribute/runner-guide/#the-fn-api) supports multi-threading and multi-processing mode. {{< /paragraph >}} #### Setting parallelism diff --git a/website/www/site/content/en/documentation/runners/spark.md b/website/www/site/content/en/documentation/runners/spark.md index b7283f0cbe1b6..15cf6cf5ac7c9 100644 --- a/website/www/site/content/en/documentation/runners/spark.md +++ b/website/www/site/content/en/documentation/runners/spark.md @@ -243,7 +243,7 @@ See [here](/roadmap/portability/#sdk-harness-config) for details.) ### Running on Dataproc cluster (YARN backed) -To run Beam jobs written in Python, Go, and other supported languages, you can use the `SparkRunner` and `PortableRunner` as described on the Beam's [Spark Runner](https://beam.apache.org/documentation/runners/spark/) page (also see [Portability Framework Roadmap](https://beam.apache.org/roadmap/portability/)). +To run Beam jobs written in Python, Go, and other supported languages, you can use the `SparkRunner` and `PortableRunner` as described on the Beam's [Spark Runner](/documentation/runners/spark/) page (also see [Portability Framework Roadmap](/roadmap/portability/)). The following example runs a portable Beam job in Python from the Dataproc cluster's master node with Yarn backed. diff --git a/website/www/site/content/en/documentation/runtime/model.md b/website/www/site/content/en/documentation/runtime/model.md index 5078b36ede73b..6ed57b64cdc70 100644 --- a/website/www/site/content/en/documentation/runtime/model.md +++ b/website/www/site/content/en/documentation/runtime/model.md @@ -50,7 +50,7 @@ ways, such as: This may allow the runner to avoid serializing elements; instead, the runner can just pass the elements in memory. This is done as part of an optimization that is known as - [fusion](https://beam.apache.org/documentation/glossary/#fusion). + [fusion](/documentation/glossary/#fusion). Some situations where the runner may serialize and persist elements are: diff --git a/website/www/site/content/en/documentation/sdks/java-multi-language-pipelines.md b/website/www/site/content/en/documentation/sdks/java-multi-language-pipelines.md index e84dcfdb849ba..1ce3f60060bb6 100644 --- a/website/www/site/content/en/documentation/sdks/java-multi-language-pipelines.md +++ b/website/www/site/content/en/documentation/sdks/java-multi-language-pipelines.md @@ -142,7 +142,7 @@ cases, [start the expansion service](#advanced-start-an-expansion-service) before running your pipeline. Before running the pipeline, make sure to perform the -[runner specific setup](https://beam.apache.org/get-started/quickstart-java/#run-a-pipeline) for your selected Beam runner. +[runner specific setup](/get-started/quickstart-java/#run-a-pipeline) for your selected Beam runner. ### Run with Dataflow runner using a Maven Archetype (Beam 2.43.0 and later) @@ -260,7 +260,7 @@ For example, to start the standard expansion service for a Python transform, follow these steps: 1. Activate a new virtual environment following -[these instructions](https://beam.apache.org/get-started/quickstart-py/#create-and-activate-a-virtual-environment). +[these instructions](/get-started/quickstart-py/#create-and-activate-a-virtual-environment). 2. Install Apache Beam with `gcp` and `dataframe` packages. diff --git a/website/www/site/content/en/documentation/sdks/python-machine-learning.md b/website/www/site/content/en/documentation/sdks/python-machine-learning.md index 98dc0c6ca839c..e24abdf7e0cc4 100644 --- a/website/www/site/content/en/documentation/sdks/python-machine-learning.md +++ b/website/www/site/content/en/documentation/sdks/python-machine-learning.md @@ -157,7 +157,7 @@ with pipeline as p: accelerator="type:nvidia-tesla-k80;count:1;install-nvidia-driver") ``` -For more information on resource hints, see [Resource hints](https://beam.apache.org/documentation/runtime/resource-hints/). +For more information on resource hints, see [Resource hints](/documentation/runtime/resource-hints/). ### Use a keyed ModelHandler @@ -219,7 +219,7 @@ For detailed instructions explaining how to build and run a pipeline that uses M ## Beam Java SDK support -The RunInference API is available with the Beam Java SDK versions 2.41.0 and later through Apache Beam's [Multi-language Pipelines framework](https://beam.apache.org/documentation/programming-guide/#multi-language-pipelines). For information about the Java wrapper transform, see [RunInference.java](https://github.com/apache/beam/blob/master/sdks/java/extensions/python/src/main/java/org/apache/beam/sdk/extensions/python/transforms/RunInference.java). To try it out, see the [Java Sklearn Mnist Classification example](https://github.com/apache/beam/tree/master/examples/multi-language). +The RunInference API is available with the Beam Java SDK versions 2.41.0 and later through Apache Beam's [Multi-language Pipelines framework](/documentation/programming-guide/#multi-language-pipelines). For information about the Java wrapper transform, see [RunInference.java](https://github.com/apache/beam/blob/master/sdks/java/extensions/python/src/main/java/org/apache/beam/sdk/extensions/python/transforms/RunInference.java). To try it out, see the [Java Sklearn Mnist Classification example](https://github.com/apache/beam/tree/master/examples/multi-language). ## Troubleshooting diff --git a/website/www/site/content/en/documentation/sdks/python-pipeline-dependencies.md b/website/www/site/content/en/documentation/sdks/python-pipeline-dependencies.md index 330a8af8e449d..efef407981cf4 100644 --- a/website/www/site/content/en/documentation/sdks/python-pipeline-dependencies.md +++ b/website/www/site/content/en/documentation/sdks/python-pipeline-dependencies.md @@ -48,7 +48,7 @@ If your pipeline uses public packages from the [Python Package Index](https://py ## Custom Containers {#custom-containers} -You can pass a [container](https://hub.docker.com/search?q=apache%2Fbeam&type=image) image with all the dependencies that are needed for the pipeline instead of `requirements.txt`. [Follow the instructions on how to run pipeline with Custom Container images](https://beam.apache.org/documentation/runtime/environments/#running-pipelines). +You can pass a [container](https://hub.docker.com/search?q=apache%2Fbeam&type=image) image with all the dependencies that are needed for the pipeline instead of `requirements.txt`. [Follow the instructions on how to run pipeline with Custom Container images](/documentation/runtime/environments/#running-pipelines). 1. If you are using a custom container image, we recommend that you install the dependencies from the `--requirements_file` directly into your image at build time. In this case, you do not need to pass `--requirements_file` option at runtime, which will reduce the pipeline startup time. diff --git a/website/www/site/content/en/documentation/sdks/python-streaming.md b/website/www/site/content/en/documentation/sdks/python-streaming.md index d2d3e13ca11ba..2d0bdfa9500b2 100644 --- a/website/www/site/content/en/documentation/sdks/python-streaming.md +++ b/website/www/site/content/en/documentation/sdks/python-streaming.md @@ -127,11 +127,11 @@ python -m apache_beam.examples.streaming_wordcount \ {{< /runner >}} {{< runner flink >}} -See https://beam.apache.org/documentation/runners/flink/ for more information. +See /documentation/runners/flink/ for more information. {{< /runner >}} {{< runner spark >}} -See https://beam.apache.org/documentation/runners/spark/ for more information. +See /documentation/runners/spark/ for more information. {{< /runner >}} {{< runner dataflow >}} diff --git a/website/www/site/content/en/documentation/transforms/java/aggregation/approximatequantiles.md b/website/www/site/content/en/documentation/transforms/java/aggregation/approximatequantiles.md index 6ab1d5beeccb7..3f543a8dea097 100644 --- a/website/www/site/content/en/documentation/transforms/java/aggregation/approximatequantiles.md +++ b/website/www/site/content/en/documentation/transforms/java/aggregation/approximatequantiles.md @@ -18,7 +18,7 @@ limitations under the License.
- Javadoc Javadoc diff --git a/website/www/site/content/en/documentation/transforms/java/aggregation/approximateunique.md b/website/www/site/content/en/documentation/transforms/java/aggregation/approximateunique.md index a5e9b59318ab2..c0bf79aa3d615 100644 --- a/website/www/site/content/en/documentation/transforms/java/aggregation/approximateunique.md +++ b/website/www/site/content/en/documentation/transforms/java/aggregation/approximateunique.md @@ -18,7 +18,7 @@ limitations under the License.
- Javadoc Javadoc diff --git a/website/www/site/content/en/documentation/transforms/java/aggregation/cogroupbykey.md b/website/www/site/content/en/documentation/transforms/java/aggregation/cogroupbykey.md index 4aded7986f4c1..90c0984f3df2c 100644 --- a/website/www/site/content/en/documentation/transforms/java/aggregation/cogroupbykey.md +++ b/website/www/site/content/en/documentation/transforms/java/aggregation/cogroupbykey.md @@ -18,7 +18,7 @@ limitations under the License.
- Javadoc Javadoc diff --git a/website/www/site/content/en/documentation/transforms/java/aggregation/combine.md b/website/www/site/content/en/documentation/transforms/java/aggregation/combine.md index 6daf89a20c617..f40c694692bcc 100644 --- a/website/www/site/content/en/documentation/transforms/java/aggregation/combine.md +++ b/website/www/site/content/en/documentation/transforms/java/aggregation/combine.md @@ -18,7 +18,7 @@ limitations under the License.
- Javadoc Javadoc diff --git a/website/www/site/content/en/documentation/transforms/java/aggregation/combinewithcontext.md b/website/www/site/content/en/documentation/transforms/java/aggregation/combinewithcontext.md index 573a66e1f3a09..6e78770a3a472 100644 --- a/website/www/site/content/en/documentation/transforms/java/aggregation/combinewithcontext.md +++ b/website/www/site/content/en/documentation/transforms/java/aggregation/combinewithcontext.md @@ -18,7 +18,7 @@ limitations under the License.
- Javadoc Javadoc diff --git a/website/www/site/content/en/documentation/transforms/java/aggregation/count.md b/website/www/site/content/en/documentation/transforms/java/aggregation/count.md index fdb855d92fdf7..0b84ead8391a7 100644 --- a/website/www/site/content/en/documentation/transforms/java/aggregation/count.md +++ b/website/www/site/content/en/documentation/transforms/java/aggregation/count.md @@ -18,7 +18,7 @@ limitations under the License.
- Javadoc Javadoc diff --git a/website/www/site/content/en/documentation/transforms/java/aggregation/distinct.md b/website/www/site/content/en/documentation/transforms/java/aggregation/distinct.md index 3a7e6dbf0112a..7c5cbd2e9316f 100644 --- a/website/www/site/content/en/documentation/transforms/java/aggregation/distinct.md +++ b/website/www/site/content/en/documentation/transforms/java/aggregation/distinct.md @@ -18,7 +18,7 @@ limitations under the License.
- Javadoc Javadoc diff --git a/website/www/site/content/en/documentation/transforms/java/aggregation/groupbykey.md b/website/www/site/content/en/documentation/transforms/java/aggregation/groupbykey.md index 6eb389586a391..c9986f72e99fc 100644 --- a/website/www/site/content/en/documentation/transforms/java/aggregation/groupbykey.md +++ b/website/www/site/content/en/documentation/transforms/java/aggregation/groupbykey.md @@ -18,7 +18,7 @@ limitations under the License.
- Javadoc Javadoc diff --git a/website/www/site/content/en/documentation/transforms/java/aggregation/groupintobatches.md b/website/www/site/content/en/documentation/transforms/java/aggregation/groupintobatches.md index e80682b48bc49..6d1963fd07605 100644 --- a/website/www/site/content/en/documentation/transforms/java/aggregation/groupintobatches.md +++ b/website/www/site/content/en/documentation/transforms/java/aggregation/groupintobatches.md @@ -18,7 +18,7 @@ limitations under the License.
- Javadoc Javadoc diff --git a/website/www/site/content/en/documentation/transforms/java/aggregation/hllcount.md b/website/www/site/content/en/documentation/transforms/java/aggregation/hllcount.md index 1f1ec6793d81d..89ed66415dce1 100644 --- a/website/www/site/content/en/documentation/transforms/java/aggregation/hllcount.md +++ b/website/www/site/content/en/documentation/transforms/java/aggregation/hllcount.md @@ -18,7 +18,7 @@ limitations under the License.
- Javadoc Javadoc diff --git a/website/www/site/content/en/documentation/transforms/java/aggregation/latest.md b/website/www/site/content/en/documentation/transforms/java/aggregation/latest.md index 7476c0c591d8b..454d39bf14e41 100644 --- a/website/www/site/content/en/documentation/transforms/java/aggregation/latest.md +++ b/website/www/site/content/en/documentation/transforms/java/aggregation/latest.md @@ -18,7 +18,7 @@ limitations under the License.
- Javadoc Javadoc diff --git a/website/www/site/content/en/documentation/transforms/java/aggregation/max.md b/website/www/site/content/en/documentation/transforms/java/aggregation/max.md index 9b5cff487042c..edc07d2edb55b 100644 --- a/website/www/site/content/en/documentation/transforms/java/aggregation/max.md +++ b/website/www/site/content/en/documentation/transforms/java/aggregation/max.md @@ -18,7 +18,7 @@ limitations under the License.
- Javadoc Javadoc diff --git a/website/www/site/content/en/documentation/transforms/java/aggregation/mean.md b/website/www/site/content/en/documentation/transforms/java/aggregation/mean.md index d23aecc52c688..88f90d585d225 100644 --- a/website/www/site/content/en/documentation/transforms/java/aggregation/mean.md +++ b/website/www/site/content/en/documentation/transforms/java/aggregation/mean.md @@ -18,7 +18,7 @@ limitations under the License.
- Javadoc Javadoc diff --git a/website/www/site/content/en/documentation/transforms/java/aggregation/min.md b/website/www/site/content/en/documentation/transforms/java/aggregation/min.md index 71490e42e73a1..e5ecf67cd5d09 100644 --- a/website/www/site/content/en/documentation/transforms/java/aggregation/min.md +++ b/website/www/site/content/en/documentation/transforms/java/aggregation/min.md @@ -18,7 +18,7 @@ limitations under the License.
- Javadoc Javadoc diff --git a/website/www/site/content/en/documentation/transforms/java/aggregation/sample.md b/website/www/site/content/en/documentation/transforms/java/aggregation/sample.md index 79eb73d0fd367..e3328af66f06d 100644 --- a/website/www/site/content/en/documentation/transforms/java/aggregation/sample.md +++ b/website/www/site/content/en/documentation/transforms/java/aggregation/sample.md @@ -18,7 +18,7 @@ limitations under the License.
- Javadoc Javadoc diff --git a/website/www/site/content/en/documentation/transforms/java/aggregation/sum.md b/website/www/site/content/en/documentation/transforms/java/aggregation/sum.md index 72d807165919b..2c49cbe4c0393 100644 --- a/website/www/site/content/en/documentation/transforms/java/aggregation/sum.md +++ b/website/www/site/content/en/documentation/transforms/java/aggregation/sum.md @@ -18,7 +18,7 @@ limitations under the License.
- Javadoc Javadoc diff --git a/website/www/site/content/en/documentation/transforms/java/aggregation/top.md b/website/www/site/content/en/documentation/transforms/java/aggregation/top.md index dbf8fe26a7248..0181544376810 100644 --- a/website/www/site/content/en/documentation/transforms/java/aggregation/top.md +++ b/website/www/site/content/en/documentation/transforms/java/aggregation/top.md @@ -18,7 +18,7 @@ limitations under the License.
- Javadoc Javadoc diff --git a/website/www/site/content/en/documentation/transforms/java/elementwise/filter.md b/website/www/site/content/en/documentation/transforms/java/elementwise/filter.md index 9735c7b78a26b..8bdce38b05acc 100644 --- a/website/www/site/content/en/documentation/transforms/java/elementwise/filter.md +++ b/website/www/site/content/en/documentation/transforms/java/elementwise/filter.md @@ -18,7 +18,7 @@ limitations under the License.
- Javadoc Javadoc diff --git a/website/www/site/content/en/documentation/transforms/java/elementwise/flatmapelements.md b/website/www/site/content/en/documentation/transforms/java/elementwise/flatmapelements.md index 3b0e2fca7bb02..bfbc3e1f88b0a 100644 --- a/website/www/site/content/en/documentation/transforms/java/elementwise/flatmapelements.md +++ b/website/www/site/content/en/documentation/transforms/java/elementwise/flatmapelements.md @@ -18,7 +18,7 @@ limitations under the License.
- Javadoc Javadoc diff --git a/website/www/site/content/en/documentation/transforms/java/elementwise/keys.md b/website/www/site/content/en/documentation/transforms/java/elementwise/keys.md index f194c069c0bd9..c62efd30abb72 100644 --- a/website/www/site/content/en/documentation/transforms/java/elementwise/keys.md +++ b/website/www/site/content/en/documentation/transforms/java/elementwise/keys.md @@ -18,7 +18,7 @@ limitations under the License.
- Javadoc Javadoc diff --git a/website/www/site/content/en/documentation/transforms/java/elementwise/kvswap.md b/website/www/site/content/en/documentation/transforms/java/elementwise/kvswap.md index 5d028bc68ec52..b0f8b5eb4b57f 100644 --- a/website/www/site/content/en/documentation/transforms/java/elementwise/kvswap.md +++ b/website/www/site/content/en/documentation/transforms/java/elementwise/kvswap.md @@ -18,7 +18,7 @@ limitations under the License.
- Javadoc Javadoc diff --git a/website/www/site/content/en/documentation/transforms/java/elementwise/mapelements.md b/website/www/site/content/en/documentation/transforms/java/elementwise/mapelements.md index b0505e091dd5c..5b900baf9690b 100644 --- a/website/www/site/content/en/documentation/transforms/java/elementwise/mapelements.md +++ b/website/www/site/content/en/documentation/transforms/java/elementwise/mapelements.md @@ -18,7 +18,7 @@ limitations under the License.
- Javadoc Javadoc diff --git a/website/www/site/content/en/documentation/transforms/java/elementwise/pardo.md b/website/www/site/content/en/documentation/transforms/java/elementwise/pardo.md index 905f17a7f5227..05b1990ffdef4 100644 --- a/website/www/site/content/en/documentation/transforms/java/elementwise/pardo.md +++ b/website/www/site/content/en/documentation/transforms/java/elementwise/pardo.md @@ -18,7 +18,7 @@ limitations under the License.
- Javadoc Javadoc diff --git a/website/www/site/content/en/documentation/transforms/java/elementwise/partition.md b/website/www/site/content/en/documentation/transforms/java/elementwise/partition.md index 5234dc97781c0..66c27019b5fd5 100644 --- a/website/www/site/content/en/documentation/transforms/java/elementwise/partition.md +++ b/website/www/site/content/en/documentation/transforms/java/elementwise/partition.md @@ -18,7 +18,7 @@ limitations under the License.
- Javadoc Javadoc diff --git a/website/www/site/content/en/documentation/transforms/java/elementwise/regex.md b/website/www/site/content/en/documentation/transforms/java/elementwise/regex.md index ff554db26446f..60545f26e597d 100644 --- a/website/www/site/content/en/documentation/transforms/java/elementwise/regex.md +++ b/website/www/site/content/en/documentation/transforms/java/elementwise/regex.md @@ -18,7 +18,7 @@ limitations under the License.
- Javadoc Javadoc diff --git a/website/www/site/content/en/documentation/transforms/java/elementwise/reify.md b/website/www/site/content/en/documentation/transforms/java/elementwise/reify.md index 706dc7a1d7efc..4c708f8eebf8b 100644 --- a/website/www/site/content/en/documentation/transforms/java/elementwise/reify.md +++ b/website/www/site/content/en/documentation/transforms/java/elementwise/reify.md @@ -18,7 +18,7 @@ limitations under the License.
- Javadoc Javadoc diff --git a/website/www/site/content/en/documentation/transforms/java/elementwise/tostring.md b/website/www/site/content/en/documentation/transforms/java/elementwise/tostring.md index 33edf7d005d73..fd5329ff1c814 100644 --- a/website/www/site/content/en/documentation/transforms/java/elementwise/tostring.md +++ b/website/www/site/content/en/documentation/transforms/java/elementwise/tostring.md @@ -18,7 +18,7 @@ limitations under the License.
- Javadoc Javadoc diff --git a/website/www/site/content/en/documentation/transforms/java/elementwise/values.md b/website/www/site/content/en/documentation/transforms/java/elementwise/values.md index 6dbd654c9d883..5e6f1cb0975f1 100644 --- a/website/www/site/content/en/documentation/transforms/java/elementwise/values.md +++ b/website/www/site/content/en/documentation/transforms/java/elementwise/values.md @@ -18,7 +18,7 @@ limitations under the License.
- Javadoc Javadoc diff --git a/website/www/site/content/en/documentation/transforms/java/elementwise/withkeys.md b/website/www/site/content/en/documentation/transforms/java/elementwise/withkeys.md index 1ecbf0fa6f32e..c6281b6ddf933 100644 --- a/website/www/site/content/en/documentation/transforms/java/elementwise/withkeys.md +++ b/website/www/site/content/en/documentation/transforms/java/elementwise/withkeys.md @@ -18,7 +18,7 @@ limitations under the License.
- Javadoc Javadoc diff --git a/website/www/site/content/en/documentation/transforms/java/elementwise/withtimestamps.md b/website/www/site/content/en/documentation/transforms/java/elementwise/withtimestamps.md index 37606a72a2fce..b2595d8bc36a6 100644 --- a/website/www/site/content/en/documentation/transforms/java/elementwise/withtimestamps.md +++ b/website/www/site/content/en/documentation/transforms/java/elementwise/withtimestamps.md @@ -18,7 +18,7 @@ limitations under the License.
- Javadoc Javadoc diff --git a/website/www/site/content/en/documentation/transforms/java/other/create.md b/website/www/site/content/en/documentation/transforms/java/other/create.md index c318ae1276995..13bdd0789b364 100644 --- a/website/www/site/content/en/documentation/transforms/java/other/create.md +++ b/website/www/site/content/en/documentation/transforms/java/other/create.md @@ -18,7 +18,7 @@ limitations under the License.
- Javadoc Javadoc diff --git a/website/www/site/content/en/documentation/transforms/java/other/flatten.md b/website/www/site/content/en/documentation/transforms/java/other/flatten.md index d99e5b9cf61d5..ffb2d0573d54c 100644 --- a/website/www/site/content/en/documentation/transforms/java/other/flatten.md +++ b/website/www/site/content/en/documentation/transforms/java/other/flatten.md @@ -18,7 +18,7 @@ limitations under the License.
- Javadoc Javadoc diff --git a/website/www/site/content/en/documentation/transforms/java/other/passert.md b/website/www/site/content/en/documentation/transforms/java/other/passert.md index 0830657d54fd0..95c62f213b200 100644 --- a/website/www/site/content/en/documentation/transforms/java/other/passert.md +++ b/website/www/site/content/en/documentation/transforms/java/other/passert.md @@ -18,7 +18,7 @@ limitations under the License.
- Javadoc Javadoc diff --git a/website/www/site/content/en/documentation/transforms/java/other/view.md b/website/www/site/content/en/documentation/transforms/java/other/view.md index fc70fba297d93..a4a31efb8f56e 100644 --- a/website/www/site/content/en/documentation/transforms/java/other/view.md +++ b/website/www/site/content/en/documentation/transforms/java/other/view.md @@ -18,7 +18,7 @@ limitations under the License.
- Javadoc Javadoc diff --git a/website/www/site/content/en/documentation/transforms/java/other/window.md b/website/www/site/content/en/documentation/transforms/java/other/window.md index c96275c622630..439f484697f8e 100644 --- a/website/www/site/content/en/documentation/transforms/java/other/window.md +++ b/website/www/site/content/en/documentation/transforms/java/other/window.md @@ -18,7 +18,7 @@ limitations under the License.
- Javadoc Javadoc diff --git a/website/www/site/content/en/documentation/transforms/python/elementwise/pardo.md b/website/www/site/content/en/documentation/transforms/python/elementwise/pardo.md index 19157d2c70adb..9c54a83dd24ee 100644 --- a/website/www/site/content/en/documentation/transforms/python/elementwise/pardo.md +++ b/website/www/site/content/en/documentation/transforms/python/elementwise/pardo.md @@ -86,7 +86,7 @@ A [`DoFn`](https://beam.apache.org/releases/pydoc/current/apache_beam.transforms can be customized with a number of methods that can help create more complex behaviors. You can customize what a worker does when it starts and shuts down with `setup` and `teardown`. You can also customize what to do when a -[*bundle of elements*](https://beam.apache.org/documentation/runtime/model/#bundling-and-persistence) +[*bundle of elements*](/documentation/runtime/model/#bundling-and-persistence) starts and finishes with `start_bundle` and `finish_bundle`. * [`DoFn.setup()`](https://beam.apache.org/releases/pydoc/current/apache_beam.transforms.core.html#apache_beam.transforms.core.DoFn.setup): diff --git a/website/www/site/content/en/documentation/transforms/python/other/create.md b/website/www/site/content/en/documentation/transforms/python/other/create.md index 53a6f91f839e5..0ad28d022dc41 100644 --- a/website/www/site/content/en/documentation/transforms/python/other/create.md +++ b/website/www/site/content/en/documentation/transforms/python/other/create.md @@ -18,7 +18,7 @@ limitations under the License.
- Pydoc Pydoc diff --git a/website/www/site/content/en/documentation/transforms/python/other/flatten.md b/website/www/site/content/en/documentation/transforms/python/other/flatten.md index a150841c5005a..d76b5b817ec94 100644 --- a/website/www/site/content/en/documentation/transforms/python/other/flatten.md +++ b/website/www/site/content/en/documentation/transforms/python/other/flatten.md @@ -19,7 +19,7 @@ limitations under the License.
- Pydoc Pydoc diff --git a/website/www/site/content/en/documentation/transforms/python/other/reshuffle.md b/website/www/site/content/en/documentation/transforms/python/other/reshuffle.md index d2264f039b457..dd8c1f3114068 100644 --- a/website/www/site/content/en/documentation/transforms/python/other/reshuffle.md +++ b/website/www/site/content/en/documentation/transforms/python/other/reshuffle.md @@ -19,7 +19,7 @@ limitations under the License.
- Pydoc Pydoc diff --git a/website/www/site/content/en/documentation/transforms/python/other/windowinto.md b/website/www/site/content/en/documentation/transforms/python/other/windowinto.md index 035e34ad43841..121d5e4551ae3 100644 --- a/website/www/site/content/en/documentation/transforms/python/other/windowinto.md +++ b/website/www/site/content/en/documentation/transforms/python/other/windowinto.md @@ -19,7 +19,7 @@ limitations under the License.
- Pydoc + Pydoc Pydoc
diff --git a/website/www/site/content/en/get-started/from-spark.md b/website/www/site/content/en/get-started/from-spark.md index b1659b02cfcaf..26a615304b3c4 100644 --- a/website/www/site/content/en/get-started/from-spark.md +++ b/website/www/site/content/en/get-started/from-spark.md @@ -87,7 +87,7 @@ closed. > it implicitly calls `pipeline.run()` which triggers the computation to happen. The pipeline is then sent to your -[runner of choice](https://beam.apache.org/documentation/runners/capability-matrix/) +[runner of choice](/documentation/runners/capability-matrix/) and it processes the data. > ℹ️ The pipeline can run locally with the _DirectRunner_, diff --git a/website/www/site/content/en/get-started/quickstart/java.md b/website/www/site/content/en/get-started/quickstart/java.md index 101586d8ca5f8..3eeeff0bf89de 100644 --- a/website/www/site/content/en/get-started/quickstart/java.md +++ b/website/www/site/content/en/get-started/quickstart/java.md @@ -42,8 +42,8 @@ Use [`sdkman`](https://sdkman.io/) to install the Java Development Kit (JDK). # Install sdkman curl -s "https://get.sdkman.io" | bash -# Install Java 11 -sdk install java 11.0.16-tem +# Install Java 17 +sdk install java 17.0.5-tem {{< /highlight >}} You can use either [Gradle](https://gradle.org/) or @@ -168,13 +168,13 @@ process any data yet. To process data, you run the pipeline: pipeline.run().waitUntilFinish(); ``` -A Beam [runner](https://beam.apache.org/documentation/basics/#runner) runs a +A Beam [runner](/documentation/basics/#runner) runs a Beam pipeline on a specific platform. This example uses the [Direct Runner](https://beam.apache.org/releases/javadoc/2.3.0/org/apache/beam/runners/direct/DirectRunner.html), which is the default runner if you don't specify one. The Direct Runner runs the pipeline locally on your machine. It is meant for testing and development, rather than being optimized for efficiency. For more information, see -[Using the Direct Runner](https://beam.apache.org/documentation/runners/direct/). +[Using the Direct Runner](/documentation/runners/direct/). For production workloads, you typically use a distributed runner that runs the pipeline on a big data processing system such as Apache Flink, Apache Spark, or diff --git a/website/www/site/content/en/get-started/resources/learning-resources.md b/website/www/site/content/en/get-started/resources/learning-resources.md index 689da7d60ddf9..e435a07b2874a 100644 --- a/website/www/site/content/en/get-started/resources/learning-resources.md +++ b/website/www/site/content/en/get-started/resources/learning-resources.md @@ -30,23 +30,23 @@ If you have additional material that you would like to see here, please let us k ### Quickstart -* **[Java Quickstart](https://beam.apache.org/get-started/quickstart-java/)** - How to set up and run a WordCount pipeline on the Java SDK. -* **[Python Quickstart](https://beam.apache.org/get-started/quickstart-py/)** - How to set up and run a WordCount pipeline on the Python SDK. -* **[Go Quickstart](https://beam.apache.org/get-started/quickstart-go/)** - How to set up and run a WordCount pipeline on the Go SDK. +* **[Java Quickstart](/get-started/quickstart-java/)** - How to set up and run a WordCount pipeline on the Java SDK. +* **[Python Quickstart](/get-started/quickstart-py/)** - How to set up and run a WordCount pipeline on the Python SDK. +* **[Go Quickstart](/get-started/quickstart-go/)** - How to set up and run a WordCount pipeline on the Go SDK. * **[Java Development Environment](https://medium.com/google-cloud/setting-up-a-java-development-environment-for-apache-beam-on-google-cloud-platform-ec0c6c9fbb39)** - Setting up a Java development environment for Apache Beam using IntelliJ and Maven. * **[Python Development Environment](https://medium.com/google-cloud/python-development-environments-for-apache-beam-on-google-cloud-platform-b6f276b344df)** - Setting up a Python development environment for Apache Beam using PyCharm. ### Learning the Basics -* **[WordCount](https://beam.apache.org/get-started/wordcount-example/)** - Walks you through the code of a simple WordCount pipeline. This is a very basic pipeline intended to show the most basic concepts of data processing. WordCount is the "Hello World" for data processing. -* **[Mobile Gaming](https://beam.apache.org/get-started/mobile-gaming-example/)** - Introduces how to consider time while processing data, user defined transforms, windowing, filtering data, streaming pipelines, triggers, and session analysis. This is a great place to start once you get the hang of WordCount. +* **[WordCount](/get-started/wordcount-example/)** - Walks you through the code of a simple WordCount pipeline. This is a very basic pipeline intended to show the most basic concepts of data processing. WordCount is the "Hello World" for data processing. +* **[Mobile Gaming](/get-started/mobile-gaming-example/)** - Introduces how to consider time while processing data, user defined transforms, windowing, filtering data, streaming pipelines, triggers, and session analysis. This is a great place to start once you get the hang of WordCount. ### Fundamentals -* **[Programming Guide](https://beam.apache.org/documentation/programming-guide/)** - The Programming Guide contains more in-depth information on most topics in the Apache Beam SDK. These include descriptions on how everything works as well as code snippets to see how to use every part. This can be used as a reference guidebook. +* **[Programming Guide](/documentation/programming-guide/)** - The Programming Guide contains more in-depth information on most topics in the Apache Beam SDK. These include descriptions on how everything works as well as code snippets to see how to use every part. This can be used as a reference guidebook. * **[The world beyond batch: Streaming 101](https://www.oreilly.com/ideas/the-world-beyond-batch-streaming-101)** - Covers some basic background information, terminology, time domains, batch processing, and streaming. * **[The world beyond batch: Streaming 102](https://www.oreilly.com/ideas/the-world-beyond-batch-streaming-102)** - Tour of the unified batch and streaming programming model in Beam, alongside with an example to explain many of the concepts. -* **[Apache Beam Execution Model](https://beam.apache.org/documentation/runtime/model)** - Explanation on how runners execute an Apache Beam pipeline. This includes why serialization is important, and how a runner might distribute the work in parallel to multiple machines. +* **[Apache Beam Execution Model](/documentation/runtime/model)** - Explanation on how runners execute an Apache Beam pipeline. This includes why serialization is important, and how a runner might distribute the work in parallel to multiple machines. ### Common Patterns @@ -76,8 +76,8 @@ If you have additional material that you would like to see here, please let us k ### Advanced Concepts * **[Running on AppEngine](https://amygdala.github.io/dataflow/app_engine/2017/10/24/gae_dataflow.html)** - Use a Dataflow template to launch a pipeline from Google AppEngine, and how to run the pipeline periodically via a cron job. -* **[Stateful Processing](https://beam.apache.org/blog/2017/02/13/stateful-processing.html)** - Learn how to access a persistent mutable state while processing input elements, this allows for _side effects_ in a `DoFn`. This can be used for arbitrary-but-consistent index assignment, if you want to assign a unique incrementing index to each incoming element where order doesn't matter. -* **[Timely and Stateful Processing](https://beam.apache.org/blog/2017/08/28/timely-processing.html)** - An example on how to do batched RPC calls. The call requests are stored in a mutable state as they are received. Once there are either enough requests or a certain time has passed, the batch of requests is triggered to be sent. +* **[Stateful Processing](/blog/2017/02/13/stateful-processing.html)** - Learn how to access a persistent mutable state while processing input elements, this allows for _side effects_ in a `DoFn`. This can be used for arbitrary-but-consistent index assignment, if you want to assign a unique incrementing index to each incoming element where order doesn't matter. +* **[Timely and Stateful Processing](/blog/2017/08/28/timely-processing.html)** - An example on how to do batched RPC calls. The call requests are stored in a mutable state as they are received. Once there are either enough requests or a certain time has passed, the batch of requests is triggered to be sent. * **[Running External Libraries](https://cloud.google.com/blog/products/gcp/running-external-libraries-with-cloud-dataflow-for-grid-computing-workloads)** - Call an external library written in a language that does not have a native SDK in Apache Beam such as C++. ## Books {#books} @@ -148,20 +148,20 @@ complexity. Beam Katas are available for both Java and Python SDKs. * [Beam Playground](https://play.beam.apache.org) is an interactive environment to try out Beam transforms and examples without having to install Apache Beam in your environment. You can try the available Apache Beam examples at [Beam Playground](https://play.beam.apache.org). -* Learn more about how to add an Apache Beam example/test/kata into Beam Playground catalog [here](https://beam.apache.org/get-started/try-beam-playground/#how-to-add-new-examples). +* Learn more about how to add an Apache Beam example/test/kata into Beam Playground catalog [here](/get-started/try-beam-playground/#how-to-add-new-examples). ## API Reference {#api-reference} -* **[Java API Reference](https://beam.apache.org/documentation/sdks/javadoc/)** - Official API Reference for the Java SDK. -* **[Python API Reference](https://beam.apache.org/documentation/sdks/pydoc/)** - Official API Reference for the Python SDK. +* **[Java API Reference](/documentation/sdks/javadoc/)** - Official API Reference for the Java SDK. +* **[Python API Reference](/documentation/sdks/pydoc/)** - Official API Reference for the Python SDK. * **[Go API Reference](https://pkg.go.dev/github.com/apache/beam/sdks/v2/go/pkg/beam)** - Official API Reference for the Go SDK. ## Feedback and Suggestions {#feedback-and-suggestions} -We are open for feedback and suggestions, you can find different ways to reach out to the community in the [Contact Us](https://beam.apache.org/community/contact-us/) page. +We are open for feedback and suggestions, you can find different ways to reach out to the community in the [Contact Us](/community/contact-us/) page. If you have a bug report or want to suggest a new feature, you can let us know by [submitting a new issue](https://github.com/apache/beam/issues/new/choose). ## How to Contribute {#how-to-contribute} -We welcome contributions from everyone! To learn more on how to contribute, check our [Contribution Guide](https://beam.apache.org/contribute/). +We welcome contributions from everyone! To learn more on how to contribute, check our [Contribution Guide](/contribute/). diff --git a/website/www/site/content/en/get-started/tour-of-beam.md b/website/www/site/content/en/get-started/tour-of-beam.md index b2f1484e0d5bc..80dcb7eb21def 100644 --- a/website/www/site/content/en/get-started/tour-of-beam.md +++ b/website/www/site/content/en/get-started/tour-of-beam.md @@ -54,7 +54,7 @@ We introduce the `GlobalWindow`, `FixedWindows`, `SlidingWindows`, and `Sessions Beam DataFrames provide a pandas-like [DataFrame](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html) API to declare Beam pipelines. To learn more about Beam DataFrames, take a look at the -[Beam DataFrames overview](https://beam.apache.org/documentation/dsls/dataframes/overview) page. +[Beam DataFrames overview](/documentation/dsls/dataframes/overview) page. {{< button-colab url="https://colab.research.google.com/github/apache/beam/blob/master/examples/notebooks/tour-of-beam/dataframes.ipynb" >}} diff --git a/website/www/site/content/en/get-started/wordcount-example.md b/website/www/site/content/en/get-started/wordcount-example.md index 8d2d5d5521aa1..332473d367dd7 100644 --- a/website/www/site/content/en/get-started/wordcount-example.md +++ b/website/www/site/content/en/get-started/wordcount-example.md @@ -400,7 +400,7 @@ python -m apache_beam.examples.wordcount --input /path/to/inputfile \ {{< runner flinkCluster >}} # Running Beam Python on a distributed Flink cluster requires additional configuration. -# See https://beam.apache.org/documentation/runners/flink/ for more information. +# See /documentation/runners/flink/ for more information. {{< /runner >}} {{< runner spark >}} diff --git a/website/www/site/content/en/roadmap/connectors-multi-sdk.md b/website/www/site/content/en/roadmap/connectors-multi-sdk.md index 69a00a02b0159..3a404b22becf9 100644 --- a/website/www/site/content/en/roadmap/connectors-multi-sdk.md +++ b/website/www/site/content/en/roadmap/connectors-multi-sdk.md @@ -21,7 +21,7 @@ Connector-related efforts that will benefit multiple SDKs. Splittable DoFn is the next generation sources framework for Beam that will replace current frameworks for developing bounded and unbounded sources. Splittable DoFn is being developed along side current Beam portability -efforts. See [Beam portability framework roadmap](https://beam.apache.org/roadmap/portability/) for more details. +efforts. See [Beam portability framework roadmap](/roadmap/portability/) for more details. # Cross-language transforms @@ -35,7 +35,7 @@ As an added benefit of Beam portability effort, we are able to utilize Beam tran + Go SDK, will be able to utilize connectors currently available for Java and Python SDKs. * Ease of developing and maintaining Beam transforms - in general, with cross-language transforms, Beam transform authors will be able to implement new Beam transforms using a language of choice and utilize these transforms from other languages reducing the maintenance and support overheads. -* [Beam SQL](https://beam.apache.org/documentation/dsls/sql/overview/), that is currently only available to Java SDK, will become available to Python and Go SDKs. +* [Beam SQL](/documentation/dsls/sql/overview/), that is currently only available to Java SDK, will become available to Python and Go SDKs. * [Beam TFX transforms](https://www.tensorflow.org/tfx/transform/get_started), that are currently only available to Beam Python SDK pipelines will become available to Java and Go SDKs. ## Completed and Ongoing Efforts