From fa62325a2d1fdfb4a00bee8c3fcbd57235239365 Mon Sep 17 00:00:00 2001 From: Andrey Devyatkin Date: Mon, 11 Sep 2023 10:58:05 +0200 Subject: [PATCH 1/3] # This is a combination of 2 commits. # This is the 1st commit message: added beam_PostCommit_Java_ValidatesRunner jobs to GitHub Actions # This is the commit message #2: Don't improperly filter newly-added elements that overlap with a delete. --- .../arc/environments/beam.env | 4 +- .github/workflows/README.md | 39 +- .../workflows/beam_PostCommit_Go_VR_Flink.yml | 77 ++ .../workflows/beam_PostCommit_Go_VR_Spark.yml | 78 ++ .../beam_PostCommit_Java_Avro_Versions.yml | 77 ++ .../beam_PostCommit_Java_DataflowV1.yml | 89 +++ .../beam_PostCommit_Java_DataflowV2.yml | 82 +++ ...m_PostCommit_Java_IO_Performance_Tests.yml | 114 +++ ...m_PostCommit_Java_Jpms_Dataflow_Java11.yml | 87 +++ ...m_PostCommit_Java_Jpms_Dataflow_Java17.yml | 92 +++ ...eam_PostCommit_Java_Jpms_Direct_Java11.yml | 87 +++ ...eam_PostCommit_Java_Jpms_Direct_Java17.yml | 92 +++ ...beam_PostCommit_Java_Jpms_Flink_Java11.yml | 87 +++ ...beam_PostCommit_Java_Jpms_Spark_Java11.yml | 87 +++ .../beam_PostCommit_Java_Sickbay.yml | 82 +++ ...stCommit_Java_ValidatesRunner_Dataflow.yml | 94 +++ ..._ValidatesRunner_Dataflow_JavaVersions.yml | 114 +++ ...ava_ValidatesRunner_Dataflow_Streaming.yml | 94 +++ ...ommit_Java_ValidatesRunner_Dataflow_V2.yml | 94 +++ ..._ValidatesRunner_Dataflow_V2_Streaming.yml | 94 +++ ...PostCommit_Java_ValidatesRunner_Direct.yml | 93 +++ ...va_ValidatesRunner_Direct_JavaVersions.yml | 109 +++ ..._PostCommit_Java_ValidatesRunner_Flink.yml | 99 +++ ...mmit_Java_ValidatesRunner_Flink_Java11.yml | 112 +++ ..._PostCommit_Java_ValidatesRunner_Samza.yml | 91 +++ ..._PostCommit_Java_ValidatesRunner_Spark.yml | 91 +++ ...lidatesRunner_SparkStructuredStreaming.yml | 91 +++ ...mmit_Java_ValidatesRunner_Spark_Java11.yml | 112 +++ ...stCommit_Java_ValidatesRunner_Twister2.yml | 91 +++ ...am_PostCommit_Java_ValidatesRunner_ULR.yml | 95 +++ ...am_PostCommit_Python_Examples_Dataflow.yml | 1 - ...eam_PostCommit_TransformService_Direct.yml | 98 +++ .../beam_PostCommit_Website_Publish.yml | 63 ++ ...am_PostCommit_XVR_GoUsingJava_Dataflow.yml | 87 +++ .../beam_PreCommit_Java_GCP_IO_Direct.yml | 122 ++++ .../beam_PreCommit_Java_Kafka_IO_Direct.yml | 109 +++ .../workflows/beam_PreCommit_SQL_Java11.yml | 8 +- .github/workflows/beam_PreCommit_Spotless.yml | 9 +- .../workflows/beam_PreCommit_Whitespace.yml | 4 +- .../job_PostCommit_Website_Publish.groovy | 41 -- .../jenkins/job_PreCommit_PythonDocs.groovy | 33 - .../jenkins/job_PreCommit_PythonLint.groovy | 30 - .test-infra/jenkins/job_PreCommit_RAT.groovy | 26 - .../jenkins/job_PreCommit_Spotless.groovy | 45 -- .../jenkins/job_PreCommit_Website.groovy | 27 - .../jenkins/job_PreCommit_Whitespace.groovy | 31 - .../jenkins/job_sonarqube_report.groovy | 55 -- CHANGES.md | 5 +- .../gradle/IoPerformanceTestUtilities.groovy | 45 ++ examples/notebooks/beam-ml/README.md | 1 + .../notebooks/beam-ml/mltransform_basic.ipynb | 679 ++++++++++++++++++ .../beam-ml/run_inference_huggingface.ipynb | 534 ++++++++++++++ .../build.gradle | 26 +- it/google-cloud-platform/build.gradle | 6 +- .../google-cloud-dataflow-java/build.gradle | 3 + .../worker/build.gradle | 135 ++-- .../windmill/AbstractWindmillStream.java | 17 +- .../ForwardingClientResponseObserver.java | 14 +- .../windmill/StreamObserverFactory.java | 18 +- .../worker/windmill/WindmillEndpoints.java | 221 ++++++ .../windmill/WindmillServiceAddress.java | 45 ++ .../worker/windmill/WindmillStream.java | 4 + .../grpcclient/GrpcCommitWorkStream.java | 31 +- .../grpcclient/GrpcGetDataStream.java | 41 +- .../grpcclient/GrpcGetWorkStream.java | 25 +- .../GrpcGetWorkerMetadataStream.java | 170 +++++ .../grpcclient/GrpcWindmillServer.java | 40 +- .../GrpcGetWorkerMetadataStreamTest.java | 328 +++++++++ .../windmill/src/main/proto/windmill.proto | 11 +- .../src/main/proto/windmill_service.proto | 2 +- sdks/go.mod | 24 +- sdks/go.sum | 70 +- sdks/go/container/tools/buffered_logging.go | 36 +- .../container/tools/buffered_logging_test.go | 72 ++ .../beam/checkstyle/suppressions.xml | 1 + .../beam/sdk/io/gcp/bigquery/BatchLoads.java | 11 +- .../sdk/io/gcp/bigquery/BigQueryUtils.java | 4 + .../io/gcp/bigquery/BigQueryUtilsTest.java | 27 +- .../io/gcp/bigquery/FileLoadsStreamingIT.java | 497 +++++++++++++ .../runners/dataflow/internal/names.py | 2 +- .../runners/worker/data_sampler.py | 10 +- .../apache_beam/transforms/ptransform.py | 52 ++ .../typehints/trivial_inference.py | 15 +- .../typehints/trivial_inference_test.py | 19 + .../yaml/cache_provider_artifacts.py | 46 ++ sdks/python/apache_beam/yaml/yaml_provider.py | 76 +- .../apache_beam/yaml/yaml_transform_test.py | 30 + .../base_image_requirements_manual.txt | 1 - sdks/python/container/boot.go | 13 +- sdks/python/container/piputil.go | 22 +- .../py310/base_image_requirements.txt | 80 +-- .../py311/base_image_requirements.txt | 74 +- .../py38/base_image_requirements.txt | 80 +-- .../py39/base_image_requirements.txt | 82 +-- sdks/python/setup.py | 13 +- .../www/site/content/en/blog/beam-2.50.0.md | 1 + .../content/en/contribute/release-guide.md | 8 +- 97 files changed, 6414 insertions(+), 790 deletions(-) create mode 100644 .github/workflows/beam_PostCommit_Go_VR_Flink.yml create mode 100644 .github/workflows/beam_PostCommit_Go_VR_Spark.yml create mode 100644 .github/workflows/beam_PostCommit_Java_Avro_Versions.yml create mode 100644 .github/workflows/beam_PostCommit_Java_DataflowV1.yml create mode 100644 .github/workflows/beam_PostCommit_Java_DataflowV2.yml create mode 100644 .github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml create mode 100644 .github/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java11.yml create mode 100644 .github/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java17.yml create mode 100644 .github/workflows/beam_PostCommit_Java_Jpms_Direct_Java11.yml create mode 100644 .github/workflows/beam_PostCommit_Java_Jpms_Direct_Java17.yml create mode 100644 .github/workflows/beam_PostCommit_Java_Jpms_Flink_Java11.yml create mode 100644 .github/workflows/beam_PostCommit_Java_Jpms_Spark_Java11.yml create mode 100644 .github/workflows/beam_PostCommit_Java_Sickbay.yml create mode 100644 .github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow.yml create mode 100644 .github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_JavaVersions.yml create mode 100644 .github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming.yml create mode 100644 .github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2.yml create mode 100644 .github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2_Streaming.yml create mode 100644 .github/workflows/beam_PostCommit_Java_ValidatesRunner_Direct.yml create mode 100644 .github/workflows/beam_PostCommit_Java_ValidatesRunner_Direct_JavaVersions.yml create mode 100644 .github/workflows/beam_PostCommit_Java_ValidatesRunner_Flink.yml create mode 100644 .github/workflows/beam_PostCommit_Java_ValidatesRunner_Flink_Java11.yml create mode 100644 .github/workflows/beam_PostCommit_Java_ValidatesRunner_Samza.yml create mode 100644 .github/workflows/beam_PostCommit_Java_ValidatesRunner_Spark.yml create mode 100644 .github/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml create mode 100644 .github/workflows/beam_PostCommit_Java_ValidatesRunner_Spark_Java11.yml create mode 100644 .github/workflows/beam_PostCommit_Java_ValidatesRunner_Twister2.yml create mode 100644 .github/workflows/beam_PostCommit_Java_ValidatesRunner_ULR.yml create mode 100644 .github/workflows/beam_PostCommit_TransformService_Direct.yml create mode 100644 .github/workflows/beam_PostCommit_Website_Publish.yml create mode 100644 .github/workflows/beam_PostCommit_XVR_GoUsingJava_Dataflow.yml create mode 100644 .github/workflows/beam_PreCommit_Java_GCP_IO_Direct.yml create mode 100644 .github/workflows/beam_PreCommit_Java_Kafka_IO_Direct.yml delete mode 100644 .test-infra/jenkins/job_PostCommit_Website_Publish.groovy delete mode 100644 .test-infra/jenkins/job_PreCommit_PythonDocs.groovy delete mode 100644 .test-infra/jenkins/job_PreCommit_PythonLint.groovy delete mode 100644 .test-infra/jenkins/job_PreCommit_RAT.groovy delete mode 100644 .test-infra/jenkins/job_PreCommit_Spotless.groovy delete mode 100644 .test-infra/jenkins/job_PreCommit_Website.groovy delete mode 100644 .test-infra/jenkins/job_PreCommit_Whitespace.groovy delete mode 100644 .test-infra/jenkins/job_sonarqube_report.groovy create mode 100644 buildSrc/src/main/groovy/org/apache/beam/gradle/IoPerformanceTestUtilities.groovy create mode 100644 examples/notebooks/beam-ml/mltransform_basic.ipynb create mode 100644 examples/notebooks/beam-ml/run_inference_huggingface.ipynb rename .test-infra/jenkins/job_PreCommit_Website_Stage_GCS.groovy => it/build.gradle (52%) create mode 100644 runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/WindmillEndpoints.java create mode 100644 runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/WindmillServiceAddress.java create mode 100644 runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/grpcclient/GrpcGetWorkerMetadataStream.java create mode 100644 runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/windmill/grpcclient/GrpcGetWorkerMetadataStreamTest.java create mode 100644 sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/FileLoadsStreamingIT.java create mode 100644 sdks/python/apache_beam/yaml/cache_provider_artifacts.py diff --git a/.github/gh-actions-self-hosted-runners/arc/environments/beam.env b/.github/gh-actions-self-hosted-runners/arc/environments/beam.env index bfabf721796ca..62a15edf5d6cb 100644 --- a/.github/gh-actions-self-hosted-runners/arc/environments/beam.env +++ b/.github/gh-actions-self-hosted-runners/arc/environments/beam.env @@ -34,9 +34,9 @@ main_runner = { runner_image = "us-central1-docker.pkg.dev/apache-beam-testing/beam-github-actions/beam-arc-runner:60d397ecfbd2b10a1929615c70d500eb71a2c053" machine_type = "e2-standard-16" min_node_count = "1" - max_node_count = "16" + max_node_count = "24" min_replicas = "1" - max_replicas = "128" + max_replicas = "200" webhook_scaling = true disk_size_gb = 200 requests = { diff --git a/.github/workflows/README.md b/.github/workflows/README.md index f51476acbc83c..7b41a53fbadb8 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -172,16 +172,47 @@ Please note that jobs with matrix need to have matrix element in the comment. Ex | Workflow name | Matrix | Trigger Phrase | Cron Status | |:-------------:|:------:|:--------------:|:-----------:| | [ PostCommit BeamMetrics Publish ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_BeamMetrics_Publish.yml) | N/A |`Run Beam Metrics Deployment`| [![.github/workflows/beam_PostCommit_BeamMetrics_Publish](https://github.com/apache/beam/actions/workflows/beam_PostCommit_BeamMetrics_Publish.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_Dataflow_ARM.yml) +| [ PostCommit TransformService Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_TransformService_Direct.yml) | N/A |`Run TransformService_Direct PostCommit`| [![.github/workflows/beam_PostCommit_TransformService_Direct](https://github.com/apache/beam/actions/workflows/beam_PostCommit_TransformService_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_TransformService_Direct.yml) | [ PostCommit Go Dataflow ARM](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_Dataflow_ARM.yml) | N/A |`Run Go PostCommit Dataflow ARM`| [![.github/workflows/beam_PostCommit_Go_Dataflow_ARM](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_Dataflow_ARM.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_Dataflow_ARM.yml) | +| [ PostCommit Go VR Flink](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Flink.yml) | N/A |`Run Go Flink ValidatesRunner`| [![.github/workflows/beam_PostCommit_Go_VR_Flink](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Flink.yml) | | [ PostCommit Go VR Samza](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Samza.yml) | N/A |`Run Go Samza ValidatesRunner`| [![.github/workflows/beam_PostCommit_Go_VR_Samza](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Samza.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Samza.yml) | +| [ PostCommit Go VR Spark](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Spark.yml) | N/A |`Run Go Spark ValidatesRunner`| [![.github/workflows/beam_PostCommit_Go_VR_Spark](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Spark.yml) | | [ PostCommit Java Examples Dataflow Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml) | N/A |`Run Java examples on Dataflow Java 11`| [![PostCommit Java Examples Dataflow Java11](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml) | +| [ PostCommit Java Avro Versions ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Avro_Versions.yml) | N/A |`Run Java Avro Versions PostCommit`| [![PostCommit Java Avro Versions](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Avro_Versions.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Avro_Versions.yml) | +| [ PostCommit Java Dataflow V1 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV1.yml) | N/A |`Run PostCommit_Java_Dataflow`| [![PostCommit Java Dataflow V1](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV1.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV1.yml) | +| [ PostCommit Java Dataflow V2 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV2.yml) | N/A |`Run PostCommit_Java_DataflowV2`| [![PostCommit Java Dataflow V2](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV2.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV2.yml) | | [ PostCommit Java Examples Dataflow ARM ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml) | N/A |`Run Java_Examples_Dataflow_ARM PostCommit`| [![PostCommit Java Examples Dataflow ARM](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml) | +| [ PostCommit Java Examples Dataflow Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml) | N/A |`Run Java examples on Dataflow Java 11`| [![PostCommit Java Examples Dataflow Java11](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml) | | [ PostCommit Java Examples Dataflow Java17 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java17.yml) | N/A |`Run Java examples on Dataflow Java 17`| [![PostCommit Java Examples Dataflow Java17](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java17.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java17.yml) | +| [ PostCommit Java Jpms Dataflow Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java11.yml) | N/A |`Run Jpms Dataflow Java 11 PostCommit`| [![.github/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java11](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java11.yml) | +| [ PostCommit Java Jpms Dataflow Java17 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java17.yml) | N/A |`Run Jpms Dataflow Java 17 PostCommit`| [![.github/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java17](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java17.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java17.yml) | +| [ PostCommit Java Jpms Direct Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Direct_Java11.yml) | N/A |`Run Jpms Direct Java 11 PostCommit`| [![.github/workflows/beam_PostCommit_Java_Jpms_Direct_Java11](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Direct_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Direct_Java11.yml) | +| [ PostCommit Java Jpms Direct Java17 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Direct_Java17.yml) | N/A |`Run Jpms Direct Java 17 PostCommit`| [![.github/workflows/beam_PostCommit_Java_Jpms_Direct_Java17](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Direct_Java17.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Direct_Java17.yml) | +| [ PostCommit Java Jpms Flink Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Flink_Java11.yml) | N/A |`Run Jpms Flink Java 11 PostCommit`| [![.github/workflows/beam_PostCommit_Java_Jpms_Flink_Java11](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Flink_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Flink_Java11.yml) | +| [ PostCommit Java Jpms Spark Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Spark_Java11.yml) | N/A |`Run Jpms Spark Java 11 PostCommit`| [![.github/workflows/beam_PostCommit_Java_Jpms_Spark_Java11](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Spark_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Spark_Java11.yml) | +| [ PostCommit Java Sickbay ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Sickbay.yml) | N/A |`Run Java Sickbay`| [![PostCommit Java Sickbay](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Sickbay.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Sickbay.yml) | +| [ PostCommit Java ValidatesRunner Dataflow JavaVersions ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Java.yml) | ['11','17'] |`Run Dataflow ValidatesRunner Java (matrix_element)`| [![PostCommit Java ValidatesRunner Dataflow JavaVersions](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Java.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Java.yml) | +| [ PostCommit Java ValidatesRunner Dataflow Streaming ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming.yml) | N/A |`Run Dataflow Streaming ValidatesRunner`| [![PostCommit Java ValidatesRunner Dataflow Streaming](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming.yml) | +| [ PostCommit Java ValidatesRunner Dataflow V2 Streaming ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2_Streaming.yml) | N/A |`Run Java Dataflow V2 ValidatesRunner Streaming`| [![PostCommit Java ValidatesRunner Dataflow V2 Streaming](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2_Streaming.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2_Streaming.yml) | +| [ PostCommit Java ValidatesRunner Dataflow V2 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2.yml) | N/A |`Run Java Dataflow V2 ValidatesRunner`| [![PostCommit Java ValidatesRunner Dataflow V2](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2.yml) | +| [ PostCommit Java ValidatesRunner Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow.yml) | N/A |`Run Dataflow ValidatesRunner`| [![PostCommit Java ValidatesRunner Dataflow](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow.yml) | +| [ PostCommit Java ValidatesRunner Direct JavaVersions ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct_Java.yml) | ['11','17'] |`Run Direct ValidatesRunner Java (matrix_element)`| [![PostCommit Java ValidatesRunner Direct JavaVersions](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct_Java.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct_Java.yml) | +| [ PostCommit Java ValidatesRunner Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct.yml) | N/A |`Run Direct ValidatesRunner`| [![PostCommit Java ValidatesRunner Direct](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct.yml) | +| [ PostCommit Java ValidatesRunner Flink Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink_Java11.yml) | N/A |`Run Flink ValidatesRunner Java 11`| [![PostCommit Java ValidatesRunner Flink Java11](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink_Java11.yml) | +| [ PostCommit Java ValidatesRunner Flink ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink.yml) | N/A |`Run Flink ValidatesRunner`| [![PostCommit Java ValidatesRunner Flink](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink.yml) | +| [ PostCommit Java ValidatesRunner Samza ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Samza.yml) | N/A |`Run Samza ValidatesRunner`| [![PostCommit Java ValidatesRunner Samza](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Samza.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Samza.yml) | +| [ PostCommit Java ValidatesRunner Spark Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark_Java11.yml) | N/A |`Run Spark ValidatesRunner Java 11`| [![PostCommit Java ValidatesRunner Spark Java11](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark_Java11.yml) | +| [ PostCommit Java ValidatesRunner Spark ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark.yml) | N/A |`Run Spark ValidatesRunner`| [![PostCommit Java ValidatesRunner Spark](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark.yml) | +| [ PostCommit Java ValidatesRunner SparkStructuredStreaming ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml) | N/A |`Run Spark StructuredStreaming ValidatesRunner`| [![PostCommit Java ValidatesRunner SparkStructuredStreaming](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml) | +| [ PostCommit Java ValidatesRunner Twister2 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Twister2.yml) | N/A |`Run Twister2 ValidatesRunner`| [![PostCommit Java ValidatesRunner Twister2](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Twister2.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Twister2.yml) | +| [ PostCommit Java ValidatesRunner ULR ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_ULR.yml) | N/A |`Run ULR Loopback ValidatesRunner`| [![PostCommit Java ValidatesRunner ULR](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_ULR.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_ULR.yml) | | [ PostCommit Python Examples Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Dataflow.yml) | N/A |`Run Python Examples_Dataflow`| [![PostCommit Python Examples Dataflow](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Dataflow.yml) | -| [ PostCommit Python Examples Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Direct.yml) | N/A |`Run Python Examples_Direct`| [![PostCommit Python Examples Direct](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Direct.yml) | -| [ PostCommit Python Examples Flink ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Flink.yml) | N/A |`Run Python Examples_Flink`| [![PostCommit Python Examples Flink](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Flink.yml) | -| [ PostCommit Python Examples Spark ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Spark.yml) | N/A |`Run Python Examples_Spark`| [![PostCommit Python Examples Spark](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Spark.yml) | +| [ PostCommit Python Examples Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Direct.yml) | ['3.8','3.9','3.10','3.11'] |`Run Python Examples_Direct (matrix_element)`| [![PostCommit Python Examples Direct](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Direct.yml) | +| [ PostCommit Python Examples Flink ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Flink.yml) | ['3.8','3.11'] |`Run Python Examples_Flink (matrix_element)`| [![PostCommit Python Examples Flink](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Flink.yml) | +| [ PostCommit Python Examples Spark ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Spark.yml) | ['3.8','3.11'] |`Run Python Examples_Spark (matrix_element)`| [![PostCommit Python Examples Spark](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Spark.yml) | | [ PostCommit Sickbay Python ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Sickbay_Python.yml) | ['3.8','3.9','3.10','3.11'] |`Run Python PostCommit Sickbay tests (matrix_element)Upda`| [![PostCommit Sickbay Python ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Sickbay_Python.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Sickbay_Python.yml) | +| [ PostCommit Website Publish ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Website_Publish.yml) | N/A | N/A | [![PostCommit Website Publish](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Website_Publish.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Website_Publish.yml) | +| [ PostCommit XVR GoUsingJava Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_GoUsingJava_Dataflow.yml) | N/A |`Run XVR_GoUsingJava_Dataflow PostCommit`| [![.github/workflows/beam_PostCommit_XVR_GoUsingJava_Dataflow](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_GoUsingJava_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_GoUsingJava_Dataflow.yml) | | [ PreCommit Community Metrics ](https://github.com/apache/beam/actions/workflows/beam_PreCommit_CommunityMetrics.yml) | N/A |`Run CommunityMetrics PreCommit`| [![.github/workflows/beam_PreCommit_CommunityMetrics.yml](https://github.com/apache/beam/actions/workflows/beam_PreCommit_CommunityMetrics.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PreCommit_CommunityMetrics.yml) | | [ PreCommit Go ](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Go.yml) | N/A |`Run Go PreCommit`| [![.github/workflows/beam_PreCommit_Go.yml](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Go.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Go.yml) | | [ PreCommit Java ](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java.yml) | N/A |`Run Java PreCommit`| [![.github/workflows/beam_PreCommit_Java.yml](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java.yml) | @@ -197,12 +228,14 @@ Please note that jobs with matrix need to have matrix element in the comment. Ex | [ PreCommit Java ElasticSearch IO Direct ](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_ElasticSearch_IO_Direct.yml) | N/A |`Run Java_ElasticSearch_IO_Direct PreCommit`| [![.github/workflows/beam_PreCommit_Java_ElasticSearch_IO_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_ElasticSearch_IO_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_ElasticSearch_IO_Direct.yml) | | [ PreCommit Java Examples Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_Examples_Dataflow.yml) | N/A |`Run Java_Examples_Dataflow PreCommit`| [![.github/workflows/beam_PreCommit_Java_Examples_Dataflow.yml](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_Examples_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_Examples_Dataflow.yml) | | [ PreCommit Java Flink Versions ](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_Flink_Versions.yml) | N/A |`Run Java_Flink_Versions PreCommit`| [![.github/workflows/beam_PreCommit_Java_Flink_Versions.yml](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_Flink_Versions.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_Flink_Versions.yml) | +| [ PreCommit Java GCP IO Direct ](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_GCP_IO_Direct.yml) | N/A |`Run Java_GCP_IO_Direct PreCommit`| [![.github\workflows\beam_PreCommit_Java_GCP_IO_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_GCP_IO_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_GCP_IO_Direct.yml) | | [ PreCommit Java Examples Dataflow Java11 ](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_Examples_Dataflow_Java11.yml) | N/A | `Run Java_Examples_Dataflow_Java11 PreCommit` | [![.github/workflows/beam_PreCommit_Java_Examples_Dataflow_Java11.yml](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_Examples_Dataflow_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_Examples_Dataflow_Java11.yml) | | [ PreCommit Java Examples Dataflow Java17 ](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_Examples_Dataflow_Java17.yml) | N/A | `Run Java_Examples_Dataflow_Java17 PreCommit` | [![.github/workflows/beam_PreCommit_Java_Examples_Dataflow_Java17.yml](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_Examples_Dataflow_Java17.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_Examples_Dataflow_Java17.yml) | | [ PreCommit Java File-schema-transform IO Direct ](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_File-schema-transform_IO_Direct.yml) | N/A |`Run Java_File-schema-transform_IO_Direct PreCommit`| [![.github/workflows/beam_PreCommit_Java_File-schema-transform_IO_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_File-schema-transform_IO_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_File-schema-transform_IO_Direct.yml) | | [ PreCommit Java Hadoop IO Direct ](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_Hadoop_IO_Direct.yml) | N/A |`Run Java_Hadoop_IO_Direct PreCommit`| [![.github/workflows/beam_PreCommit_Java_Hadoop_IO_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_Hadoop_IO_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_Hadoop_IO_Direct.yml) | | [ PreCommit Java HBase IO Direct ](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_HBase_IO_Direct.yml) | N/A |`Run Java_HBase_IO_Direct PreCommit`| [![.github/workflows/beam_PreCommit_Java_HBase_IO_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_HBase_IO_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_HBase_IO_Direct.yml) | | [ PreCommit Java HCatalog IO Direct ](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_HCatalog_IO_Direct.yml) | N/A |`Run Java_HCatalog_IO_Direct PreCommit`| [![.github/workflows/beam_PreCommit_Java_HCatalog_IO_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_HCatalog_IO_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_HCatalog_IO_Direct.yml) | +| [ PreCommit Java Kafka IO Direct ](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_Kafka_IO_Direct.yml) | N/A |`Run Java_Kafka_IO_Direct PreCommit`| [![.github/workflows/beam_PreCommit_Java_Kafka_IO_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_Kafka_IO_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_Kafka_IO_Direct.yml) | | [ PreCommit Java InfluxDb IO Direct ](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_InfluxDb_IO_Direct.yml) | N/A |`Run Java_InfluxDb_IO_Direct PreCommit`| [![.github/workflows/beam_PreCommit_Java_InfluxDb_IO_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_InfluxDb_IO_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_InfluxDb_IO_Direct.yml) | | [ PreCommit Java IOs Direct ](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_IOs_Direct.yml) | N/A |`Run Java_IOs_Direct PreCommit`| N/A | | [ PreCommit Java JDBC IO Direct ](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_JDBC_IO_Direct.yml) | N/A |`Run Java_JDBC_IO_Direct PreCommit`| [![.github/workflows/beam_PreCommit_Java_JDBC_IO_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_JDBC_IO_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java_JDBC_IO_Direct.yml) | diff --git a/.github/workflows/beam_PostCommit_Go_VR_Flink.yml b/.github/workflows/beam_PostCommit_Go_VR_Flink.yml new file mode 100644 index 0000000000000..0f7d9b0c5355c --- /dev/null +++ b/.github/workflows/beam_PostCommit_Go_VR_Flink.yml @@ -0,0 +1,77 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Go VR Flink + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login }}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Go_VR_Flink: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Go Flink ValidatesRunner' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PostCommit_Go_VR_Flink"] + job_phrase: ["Run Go Flink ValidatesRunner"] + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: run Go Flink ValidatesRunner script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:go:test:flinkValidatesRunner \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Go_VR_Spark.yml b/.github/workflows/beam_PostCommit_Go_VR_Spark.yml new file mode 100644 index 0000000000000..eed15c1fe9662 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Go_VR_Spark.yml @@ -0,0 +1,78 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Go VR Spark + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Go_VR_Spark: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Go Spark ValidatesRunner' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PostCommit_Go_VR_Spark"] + job_phrase: ["Run Go Spark ValidatesRunner"] + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: run Go Spark ValidatesRunner script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:go:test:sparkValidatesRunner + \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Avro_Versions.yml b/.github/workflows/beam_PostCommit_Java_Avro_Versions.yml new file mode 100644 index 0000000000000..de50553ede7a2 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_Avro_Versions.yml @@ -0,0 +1,77 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java Avro Versions + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +jobs: + beam_PostCommit_Java_Avro_Versions: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 240 + strategy: + matrix: + job_name: [beam_PostCommit_Java_Avro_Versions] + job_phrase: [Run Java Avro Versions PostCommit] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Java Avro Versions PostCommit' + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: run PostCommit Java Avro Versions script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :javaAvroVersionsTest + - name: Upload test report + uses: actions/upload-artifact@v3 + with: + name: java-code-coverage-report + path: "**/build/test-results/**/*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_DataflowV1.yml b/.github/workflows/beam_PostCommit_Java_DataflowV1.yml new file mode 100644 index 0000000000000..c1f5278494263 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_DataflowV1.yml @@ -0,0 +1,89 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java Dataflow V1 + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_DataflowV1: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 240 + strategy: + matrix: + job_name: [beam_PostCommit_Java_DataflowV1] + job_phrase: [Run PostCommit_Java_Dataflow] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run PostCommit_Java_Dataflow' + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Set up Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'temurin' + java-version: | + 11 + 8 + - name: run PostCommit Java Dataflow V1 script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:google-cloud-dataflow-java:postCommit + - name: Upload test report + uses: actions/upload-artifact@v3 + with: + name: java-code-coverage-report + path: "**/build/test-results/**/*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_DataflowV2.yml b/.github/workflows/beam_PostCommit_Java_DataflowV2.yml new file mode 100644 index 0000000000000..b10cabdd1e774 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_DataflowV2.yml @@ -0,0 +1,82 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java Dataflow V2 + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_DataflowV2: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 240 + strategy: + matrix: + job_name: [beam_PostCommit_Java_DataflowV2] + job_phrase: [Run PostCommit_Java_DataflowV2] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run PostCommit_Java_DataflowV2' + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: run PostCommit Java Dataflow V2 script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:google-cloud-dataflow-java:postCommitRunnerV2 + - name: Upload test report + uses: actions/upload-artifact@v3 + with: + name: java-code-coverage-report + path: "**/build/test-results/**/*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml b/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml new file mode 100644 index 0000000000000..ffd5751fd8b3e --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml @@ -0,0 +1,114 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Java IO Performance Tests + +on: + push: + tags: ['v*'] + branches: ['master', 'release-*'] + paths: ['it/google-cloud-platform/**','.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml'] + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.sender.login }}-${{ github.event.schedule }}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_IO_Performance_Tests: + if: | + github.event_name == 'push' || + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Java PostCommit IO Performance Tests' + runs-on: [self-hosted, ubuntu-20.04, main] + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.test_case }}) + strategy: + matrix: + job_name: ["beam_PostCommit_Java_IO_Performance_Tests"] + job_phrase: ["Run Java PostCommit IO Performance Tests"] + test_case: ["GCSPerformanceTest", "BigTablePerformanceTest"] + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.test_case }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.test_case }}) + - name: Checkout release branch + if: github.event_name == 'schedule' #This has scheduled runs run against the latest release + uses: actions/checkout@v3 + with: + ref: v2.50.0 #TODO(https://github.com/apache/beam/issues/28330) automate updating this + repository: apache/beam + - name: Install Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'zulu' + java-version: '8' + - name: Setup Gradle + uses: gradle/gradle-build-action@v2 + with: + cache-read-only: false + - name: Authenticate on GCP + uses: google-github-actions/setup-gcloud@v0 + with: + service_account_email: ${{ secrets.GCP_SA_EMAIL }} + service_account_key: ${{ secrets.GCP_SA_KEY }} + project_id: ${{ secrets.GCP_PROJECT_ID }} + export_default_credentials: true + - name: run scheduled javaPostcommitIOPerformanceTests script + if: github.event_name == 'schedule' #This ensures only scheduled runs publish metrics publicly by changing which exportTable is configured + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :it:${{ matrix.test_case }} + env: + exportDataset: performance_tests + exportTable: io_performance_metrics + - name: run triggered javaPostcommitIOPerformanceTests script + if: github.event_name != 'schedule' + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :it:${{ matrix.test_case }} + env: + exportDataset: performance_tests + exportTable: io_performance_metrics_test \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java11.yml b/.github/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java11.yml new file mode 100644 index 0000000000000..53e7aafd93218 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java11.yml @@ -0,0 +1,87 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Java Jpms Dataflow Java11 + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_Jpms_Dataflow_Java11: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Jpms Dataflow Java 11 PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 240 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PostCommit_Java_Jpms_Dataflow_Java11"] + job_phrase: ["Run Jpms Dataflow Java 11 PostCommit"] + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Set up Java 11 + uses: actions/setup-java@v3.11.0 + with: + distribution: 'temurin' + java-version: '11' + - name: run PostCommit Java Jpms Dataflow Java11 script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:testing:jpms-tests:dataflowRunnerIntegrationTest + arguments: -Dorg.gradle.java.home=$JAVA_HOME_11_X64 + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + if: always() + with: + name: archiveJunit + path: "**/build/test-results/**/*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java17.yml b/.github/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java17.yml new file mode 100644 index 0000000000000..d1b67ea9e92fe --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java17.yml @@ -0,0 +1,92 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Java Jpms Dataflow Java17 + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_Jpms_Dataflow_Java17: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Jpms Dataflow Java 17 PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 240 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PostCommit_Java_Jpms_Dataflow_Java17"] + job_phrase: ["Run Jpms Dataflow Java 17 PostCommit"] + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Set up Java + uses: actions/setup-java@v3.11.0 + with: + distribution: 'temurin' + java-version: | + 17 + 8 + - name: run PostCommit Java Jpms Dataflow Java17 script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:testing:jpms-tests:dataflowRunnerIntegrationTest + arguments: + -PskipCheckerFramework + -PcompileAndRunTestsWithJava17 + -Pjava17Home=$JAVA_HOME_17_X64 + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + if: always() + with: + name: archiveJunit + path: "**/build/test-results/**/*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Jpms_Direct_Java11.yml b/.github/workflows/beam_PostCommit_Java_Jpms_Direct_Java11.yml new file mode 100644 index 0000000000000..4cb0d9af89a63 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_Jpms_Direct_Java11.yml @@ -0,0 +1,87 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Java Jpms Direct Java11 + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_Jpms_Direct_Java11: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Jpms Direct Java 11 PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 240 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PostCommit_Java_Jpms_Direct_Java11"] + job_phrase: ["Run Jpms Direct Java 11 PostCommit"] + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Set up Java 11 + uses: actions/setup-java@v3.11.0 + with: + distribution: 'temurin' + java-version: '11' + - name: run PostCommit Java Jpms Direct Java11 script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:testing:jpms-tests:directRunnerIntegrationTest + arguments: -Dorg.gradle.java.home=$JAVA_HOME_11_X64 + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + if: always() + with: + name: archiveJunit + path: "**/build/test-results/**/*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Jpms_Direct_Java17.yml b/.github/workflows/beam_PostCommit_Java_Jpms_Direct_Java17.yml new file mode 100644 index 0000000000000..f4e917453dde2 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_Jpms_Direct_Java17.yml @@ -0,0 +1,92 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Java Jpms Direct Java17 + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_Jpms_Direct_Java17: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Jpms Direct Java 17 PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 240 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PostCommit_Java_Jpms_Direct_Java17"] + job_phrase: ["Run Jpms Direct Java 17 PostCommit"] + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Set up Java + uses: actions/setup-java@v3.11.0 + with: + distribution: 'temurin' + java-version: | + 17 + 8 + - name: run PostCommit Java Jpms Direct Java17 script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:testing:jpms-tests:directRunnerIntegrationTest + arguments: + -PskipCheckerFramework + -PcompileAndRunTestsWithJava17 + -Pjava17Home=$JAVA_HOME_17_X64 + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + if: always() + with: + name: archiveJunit + path: "**/build/test-results/**/*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Jpms_Flink_Java11.yml b/.github/workflows/beam_PostCommit_Java_Jpms_Flink_Java11.yml new file mode 100644 index 0000000000000..e1a51075cf2da --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_Jpms_Flink_Java11.yml @@ -0,0 +1,87 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Java Jpms Flink Java11 + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_Jpms_Flink_Java11: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Jpms Flink Java 11 PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 240 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PostCommit_Java_Jpms_Flink_Java11"] + job_phrase: ["Run Jpms Flink Java 11 PostCommit"] + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Set up Java 11 + uses: actions/setup-java@v3.11.0 + with: + distribution: 'temurin' + java-version: '11' + - name: run PostCommit Java Jpms Flink Java11 script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:testing:jpms-tests:flinkRunnerIntegrationTest + arguments: -Dorg.gradle.java.home=$JAVA_HOME_11_X64 + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + if: always() + with: + name: archiveJunit + path: "**/build/test-results/**/*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Jpms_Spark_Java11.yml b/.github/workflows/beam_PostCommit_Java_Jpms_Spark_Java11.yml new file mode 100644 index 0000000000000..4d5bf97ed18dd --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_Jpms_Spark_Java11.yml @@ -0,0 +1,87 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Java Jpms Spark Java11 + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_Jpms_Spark_Java11: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Jpms Spark Java 11 PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 240 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PostCommit_Java_Jpms_Spark_Java11"] + job_phrase: ["Run Jpms Spark Java 11 PostCommit"] + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Set up Java 11 + uses: actions/setup-java@v3.11.0 + with: + distribution: 'temurin' + java-version: '11' + - name: run PostCommit Java Jpms Spark Java11 script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:testing:jpms-tests:sparkRunnerIntegrationTest + arguments: -Dorg.gradle.java.home=$JAVA_HOME_11_X64 + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + if: always() + with: + name: archiveJunit + path: "**/build/test-results/**/*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Sickbay.yml b/.github/workflows/beam_PostCommit_Java_Sickbay.yml new file mode 100644 index 0000000000000..166170c47ce8a --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_Sickbay.yml @@ -0,0 +1,82 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java Sickbay + +on: + issue_comment: + types: [created] + schedule: + - cron: '29 6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_Sickbay: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 120 + strategy: + matrix: + job_name: [beam_PostCommit_Java_Sickbay] + job_phrase: [Run Java Sickbay] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Java Sickbay' + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: run PostCommit Java Sickbay script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :javaPostCommitSickbay + - name: Upload test report + uses: actions/upload-artifact@v3 + with: + name: java-code-coverage-report + path: "**/build/test-results/**/*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow.yml new file mode 100644 index 0000000000000..b7f87fe201335 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow.yml @@ -0,0 +1,94 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java ValidatesRunner Dataflow + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_Dataflow: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 480 + strategy: + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_Dataflow] + job_phrase: [Run Dataflow ValidatesRunner] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Dataflow ValidatesRunner' + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'zulu' + java-version: '8' + - name: run validatesRunner script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:google-cloud-dataflow-java:validatesRunner + max-workers: 12 + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_JavaVersions.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_JavaVersions.yml new file mode 100644 index 0000000000000..08740fad64ad6 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_JavaVersions.yml @@ -0,0 +1,114 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java ValidatesRunner Dataflow JavaVersions + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_Dataflow_JavaVersions: + name: ${{matrix.job_name}} (${{matrix.job_phrase}} ${{matrix.java_version}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 480 + strategy: + fail-fast: false + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_Dataflow_JavaVersions] + job_phrase: [Run Dataflow ValidatesRunner Java] + java_version: ['11','17'] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + startswith(github.event.comment.body, 'Run Dataflow ValidatesRunner Java') + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{matrix.java_version}} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) ${{matrix.java_version}} + - name: Set up Java${{ matrix.java_version }} + uses: actions/setup-java@v3.8.0 + with: + distribution: 'temurin' + java-version: | + ${{ matrix.java_version }} + 8 + - name: run jar Java${{ matrix.java_version }} script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:google-cloud-dataflow-java:testJar :runners:google-cloud-dataflow-java:worker:shadowJar + arguments: | + -Dorg.gradle.java.home=$JAVA_HOME_8_X64 \ + - name: run validatesRunner Java${{ matrix.java_version }} script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:google-cloud-dataflow-java:validatesRunner + arguments: | + -x shadowJar \ + -x shadowTestJar \ + -x compileJava \ + -x compileTestJava \ + -x jar \ + -x testJar \ + -x classes \ + -x testClasses \ + -Dorg.gradle.java.home=$JAVA_HOME_${{ matrix.java_version }}_X64 \ + max-workers: 12 + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming.yml new file mode 100644 index 0000000000000..b58ffb72ed76c --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming.yml @@ -0,0 +1,94 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java ValidatesRunner Dataflow Streaming + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 720 + strategy: + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming] + job_phrase: [Run Dataflow Streaming ValidatesRunner] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Dataflow Streaming ValidatesRunner' + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'zulu' + java-version: '8' + - name: run validatesRunnerStreaming script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:google-cloud-dataflow-java:validatesRunnerStreaming + max-workers: 12 + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2.yml new file mode 100644 index 0000000000000..eef664fdf0aa2 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2.yml @@ -0,0 +1,94 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java ValidatesRunner Dataflow V2 + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */8 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_Dataflow_V2: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 390 + strategy: + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_Dataflow_V2] + job_phrase: [Run Java Dataflow V2 ValidatesRunner] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Java Dataflow V2 ValidatesRunner' + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'zulu' + java-version: '8' + - name: run validatesRunnerV2 script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:google-cloud-dataflow-java:validatesRunnerV2 + max-workers: 12 + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2_Streaming.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2_Streaming.yml new file mode 100644 index 0000000000000..df71c3079a5c3 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2_Streaming.yml @@ -0,0 +1,94 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java ValidatesRunner Dataflow V2 Streaming + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */8 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_Dataflow_V2_Streaming: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 510 + strategy: + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_Dataflow_V2_Streaming] + job_phrase: [Run Java Dataflow V2 ValidatesRunner Streaming] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Java Dataflow V2 ValidatesRunner Streaming' + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'zulu' + java-version: '8' + - name: run validatesRunnerV2Streaming script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:google-cloud-dataflow-java:validatesRunnerV2Streaming + max-workers: 12 + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Direct.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Direct.yml new file mode 100644 index 0000000000000..6a260ad4595ee --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Direct.yml @@ -0,0 +1,93 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java ValidatesRunner Direct + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_Direct: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 180 + strategy: + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_Direct] + job_phrase: [Run Direct ValidatesRunner] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Direct ValidatesRunner' + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'zulu' + java-version: '8' + - name: run validatesRunner script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:direct-java:validatesRunner + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Direct_JavaVersions.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Direct_JavaVersions.yml new file mode 100644 index 0000000000000..bb73a15b71b98 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Direct_JavaVersions.yml @@ -0,0 +1,109 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java ValidatesRunner Direct JavaVersions + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_Direct_JavaVersions: + name: ${{matrix.job_name}} (${{matrix.job_phrase}} ${{matrix.java_version}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 480 + strategy: + fail-fast: false + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_Direct_JavaVersions] + job_phrase: [Run Direct ValidatesRunner Java] + java_version: ['11','17'] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + startswith(github.event.comment.body, 'Run Direct ValidatesRunner Java') + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{matrix.java_version}} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) ${{matrix.java_version}} + - name: Set up Java${{ matrix.java_version }} + uses: actions/setup-java@v3.8.0 + with: + distribution: 'temurin' + java-version: | + ${{ matrix.java_version }} + 8 + - name: run jar Java${{ matrix.java_version }} script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:direct-java:shadowJar :runners:direct-java:shadowTestJar + arguments: | + -Dorg.gradle.java.home=$JAVA_HOME_8_X64 \ + - name: run validatesRunner Java${{ matrix.java_version }} script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:direct-java:validatesRunner + arguments: | + -x shadowJar \ + -x shadowTestJar \ + -x compileJava \ + -x compileTestJava \ + -Dorg.gradle.java.home=$JAVA_HOME_${{ matrix.java_version }}_X64 \ + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Flink.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Flink.yml new file mode 100644 index 0000000000000..0c273713def38 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Flink.yml @@ -0,0 +1,99 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Java ValidatesRunner Flink + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_Flink: + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + strategy: + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_Flink] + job_phrase: [Run Flink ValidatesRunner] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Flink ValidatesRunner' + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'zulu' + java-version: '8' + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: '3.8' + - name: Setup Gradle + uses: gradle/gradle-build-action@v2 + with: + cache-read-only: false + - name: run validatesRunner script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:flink:1.15:validatesRunner + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Flink_Java11.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Flink_Java11.yml new file mode 100644 index 0000000000000..68486868756ba --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Flink_Java11.yml @@ -0,0 +1,112 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java ValidatesRunner Flink Java11 + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_Flink_Java11: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 270 + strategy: + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_Flink_Java11] + job_phrase: [Run Flink ValidatesRunner Java 11] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + startswith(github.event.comment.body, 'Run Flink ValidatesRunner Java 11') + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Set up Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'temurin' + java-version: | + 11 + 8 + - name: run jar Java8 script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:flink:1.15:jar :runners:flink:1.15:testJar + arguments: | + -Dorg.gradle.java.home=$JAVA_HOME_8_X64 \ + - name: run validatesRunner Java11 script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:flink:1.15:validatesRunner + arguments: | + -x shadowJar \ + -x shadowTestJar \ + -x compileJava \ + -x compileTestJava \ + -x jar \ + -x testJar \ + -x classes \ + -x testClasses \ + -Dorg.gradle.java.home=$JAVA_HOME_11_X64 \ + max-workers: 12 + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Samza.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Samza.yml new file mode 100644 index 0000000000000..5d17fbc61346e --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Samza.yml @@ -0,0 +1,91 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Java ValidatesRunner Samza + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_Samza: + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + strategy: + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_Samza] + job_phrase: [Run Samza ValidatesRunner] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Samza ValidatesRunner' + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'zulu' + java-version: '8' + - name: run validatesRunner script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:samza:validatesRunner + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Spark.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Spark.yml new file mode 100644 index 0000000000000..3204d9ad6ed57 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Spark.yml @@ -0,0 +1,91 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Java ValidatesRunner Spark + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_Spark: + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 120 + strategy: + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_Spark] + job_phrase: [Run Spark ValidatesRunner] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Spark ValidatesRunner' + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'zulu' + java-version: '8' + - name: run validatesRunner script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:spark:3:validatesRunner + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml new file mode 100644 index 0000000000000..7e97f50dcac3b --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml @@ -0,0 +1,91 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Java ValidatesRunner SparkStructuredStreaming + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming: + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 120 + strategy: + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming] + job_phrase: [Run Spark StructuredStreaming ValidatesRunner] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Spark StructuredStreaming ValidatesRunner' + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'zulu' + java-version: '8' + - name: run validatesStructuredStreamingRunnerBatch script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:spark:3:validatesStructuredStreamingRunnerBatch + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Spark_Java11.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Spark_Java11.yml new file mode 100644 index 0000000000000..ce63b24d2c337 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Spark_Java11.yml @@ -0,0 +1,112 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java ValidatesRunner Spark Java11 + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_Spark_Java11: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 270 + strategy: + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_Spark_Java11] + job_phrase: [Run Spark ValidatesRunner Java 11] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + startswith(github.event.comment.body, 'Run Spark ValidatesRunner Java 11') + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Set up Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'temurin' + java-version: | + 11 + 8 + - name: run jar Java8 script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:spark:3:jar :runners:spark:3:testJar + arguments: | + -Dorg.gradle.java.home=$JAVA_HOME_8_X64 \ + - name: run validatesRunner Java11 script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:spark:3:validatesRunner + arguments: | + -x shadowJar \ + -x shadowTestJar \ + -x compileJava \ + -x compileTestJava \ + -x jar \ + -x testJar \ + -x classes \ + -x testClasses \ + -Dorg.gradle.java.home=$JAVA_HOME_11_X64 \ + max-workers: 12 + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Twister2.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Twister2.yml new file mode 100644 index 0000000000000..1f13269e7b95f --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Twister2.yml @@ -0,0 +1,91 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Java ValidatesRunner Twister2 + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_Twister2: + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 120 + strategy: + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_Twister2] + job_phrase: [Run Twister2 ValidatesRunner] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Twister2 ValidatesRunner' + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'zulu' + java-version: '8' + - name: run validatesRunner script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:twister2:validatesRunner + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_ULR.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_ULR.yml new file mode 100644 index 0000000000000..65b20d5e2a662 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_ULR.yml @@ -0,0 +1,95 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Java ValidatesRunner ULR + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_ULR: + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 180 + strategy: + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_ULR] + job_phrase: [Run ULR Loopback ValidatesRunner] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run ULR Loopback ValidatesRunner' + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'zulu' + java-version: '8' + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: '3.8' + - name: run ulrLoopbackValidatesRunner script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:portability:java:ulrLoopbackValidatesRunner + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Python_Examples_Dataflow.yml b/.github/workflows/beam_PostCommit_Python_Examples_Dataflow.yml index 2aba1760606c3..811f1f098ebfe 100644 --- a/.github/workflows/beam_PostCommit_Python_Examples_Dataflow.yml +++ b/.github/workflows/beam_PostCommit_Python_Examples_Dataflow.yml @@ -58,7 +58,6 @@ jobs: runs-on: [self-hosted, ubuntu-20.04, main] timeout-minutes: 180 strategy: - fail-fast: false matrix: job_name: ["beam_PostCommit_Python_Examples_Dataflow"] job_phrase: ["Run Python Examples_Dataflow"] diff --git a/.github/workflows/beam_PostCommit_TransformService_Direct.yml b/.github/workflows/beam_PostCommit_TransformService_Direct.yml new file mode 100644 index 0000000000000..346abcc9951a9 --- /dev/null +++ b/.github/workflows/beam_PostCommit_TransformService_Direct.yml @@ -0,0 +1,98 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit TransformService Direct + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_TransformService_Direct: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run TransformService_Direct PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 120 + name: ${{matrix.job_name}} (${{matrix.job_phrase}} ${{ matrix.python_version }}) + strategy: + matrix: + job_name: ["beam_PostCommit_TransformService_Direct"] + job_phrase: ["Run TransformService_Direct PostCommit"] + python_version: ['3.8','3.11'] + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + - name: Set up Java 11 + uses: actions/setup-java@v3.11.0 + with: + distribution: 'temurin' + java-version: '11' + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: | + 3.8 + 3.11 + - name: run TransformService Direct script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:direct:xlang:transformServicePythonUsingJava + arguments: | + -PcompileAndRunTestsWithJava11 \ + -Pjava11Home=$JAVA_HOME_11_X64 \ + -PuseWheelDistribution \ + -PpythonVersion=${{ matrix.python_version }} \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + if: always() + with: + name: archiveJunit + path: "**/pytest*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Website_Publish.yml b/.github/workflows/beam_PostCommit_Website_Publish.yml new file mode 100644 index 0000000000000..1493c506772f3 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Website_Publish.yml @@ -0,0 +1,63 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Website Publish + +on: + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: write + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Website_Publish: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 30 + name: beam_PostCommit_Website_Publish + steps: + - uses: actions/checkout@v3 + - name: run PostCommit Website Publish script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :website:clean :website:publishWebsite + arguments: -PgitPublishRemote="https://github.com/apache/beam.git" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_XVR_GoUsingJava_Dataflow.yml b/.github/workflows/beam_PostCommit_XVR_GoUsingJava_Dataflow.yml new file mode 100644 index 0000000000000..ef5a4e5c82c0d --- /dev/null +++ b/.github/workflows/beam_PostCommit_XVR_GoUsingJava_Dataflow.yml @@ -0,0 +1,87 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit XVR GoUsingJava Dataflow + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_XVR_GoUsingJava_Dataflow: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run XVR_GoUsingJava_Dataflow PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PostCommit_XVR_GoUsingJava_Dataflow"] + job_phrase: ["Run XVR_GoUsingJava_Dataflow PostCommit"] + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: '3.8' + - name: run XVR GoUsingJava Dataflow script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:google-cloud-dataflow-java:validatesCrossLanguageRunnerGoUsingJava + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + if: always() + with: + name: java-code-coverage-report + path: "**/build/test-results/**/*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PreCommit_Java_GCP_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_GCP_IO_Direct.yml new file mode 100644 index 0000000000000..8c6195d2608c6 --- /dev/null +++ b/.github/workflows/beam_PreCommit_Java_GCP_IO_Direct.yml @@ -0,0 +1,122 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PreCommit Java GCP IO Direct + +on: + push: + tags: ['v*'] + branches: ['master', 'release-*'] + paths: + - "runners/core-construction-java/**" + - "runners/core-java/**" + - "sdks/java/core/src/main/**" + - "sdks/java/extensions/arrow/**" + - "sdks/java/extensions/google-cloud-platform-core/**" + - "sdks/java/extensions/protobuf/**" + - "sdks/java/testing/test-utils/**" + - "sdks/java/io/common/**" + - "sdks/java/io/expansion-service/**" + - "sdks/java/io/google-cloud-platform/**" + - ".github/workflows/beam_PreCommit_Java_GCP_IO_Direct.yml" + pull_request_target: + branches: ['master', 'release-*'] + paths: + - "runners/core-construction-java/**" + - "runners/core-java/**" + - "sdks/java/core/src/main/**" + - "sdks/java/extensions/arrow/**" + - "sdks/java/extensions/google-cloud-platform-core/**" + - "sdks/java/extensions/protobuf/**" + - "sdks/java/testing/test-utils/**" + - "sdks/java/io/common/**" + - "sdks/java/io/expansion-service/**" + - "sdks/java/io/google-cloud-platform/**" + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PreCommit_Java_GCP_IO_Direct: + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PreCommit_Java_GCP_IO_Direct"] + job_phrase: ["Run Java_GCP_IO_Direct PreCommit"] + timeout-minutes: 120 + if: | + github.event_name == 'push' || + github.event_name == 'pull_request_target' || + github.event_name == 'schedule' || + github.event_name == 'workflow_dispatch' || + github.event.comment.body == 'Run Java_GCP_IO_Direct PreCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: run PreCommit Java GCP IO Direct script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: | + :sdks:java:io:google-cloud-platform:build \ + :sdks:java:io:google-cloud-platform:expansion-service:build \ + :sdks:java:io:google-cloud-platform:postCommit \ + arguments: | + -PdisableSpotlessCheck=true \ + -PdisableCheckStyle=true \ + -PenableJacocoReport \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + with: + name: java-code-coverage-report + path: "**/build/test-results/**/*.xml" + - name: Archive SpotBugs Results + uses: actions/upload-artifact@v3 + with: + name: SpotBugs Results + path: "**/build/reports/spotbugs/*.html" \ No newline at end of file diff --git a/.github/workflows/beam_PreCommit_Java_Kafka_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Kafka_IO_Direct.yml new file mode 100644 index 0000000000000..1c67c0b7ec446 --- /dev/null +++ b/.github/workflows/beam_PreCommit_Java_Kafka_IO_Direct.yml @@ -0,0 +1,109 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PreCommit Java Kafka IO Direct + +on: + push: + tags: ['v*'] + branches: ['master', 'release-*'] + paths: + - "sdks/java/io/kafka/**" + - "sdks/java/testing/test-utils/**" + - "sdks/java/expansion-service/**" + - "sdks/java/io/synthetic/**" + - "sdks/java/io/expansion-service/**" + - ".github/workflows/beam_PreCommit_Java_Kafka_IO_Direct.yml" + pull_request_target: + branches: ['master', 'release-*'] + paths: + - "sdks/java/io/kafka/**" + - "sdks/java/testing/test-utils/**" + - "sdks/java/expansion-service/**" + - "sdks/java/io/synthetic/**" + - "sdks/java/io/expansion-service/**" + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PreCommit_Java_Kafka_IO_Direct: + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PreCommit_Java_Kafka_IO_Direct"] + job_phrase: ["Run Java_Kafka_IO_Direct PreCommit"] + timeout-minutes: 60 + if: | + github.event_name == 'push' || + github.event_name == 'pull_request_target' || + github.event_name == 'schedule' || + github.event_name == 'workflow_dispatch' || + github.event.comment.body == 'Run Java_Kafka_IO_Direct PreCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + steps: + - uses: actions/checkout@v3 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: run Kafka IO build script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:io:kafka:build :sdks:java:io:kafka:kafkaVersionsCompatibilityTest + arguments: | + -PdisableSpotlessCheck=true \ + -PdisableCheckStyle=true \ + --no-parallel \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + with: + name: java-code-coverage-report + path: "**/build/test-results/**/*.xml" + - name: Archive SpotBugs Results + uses: actions/upload-artifact@v3 + with: + name: SpotBugs Results + path: "**/build/reports/spotbugs/*.html" \ No newline at end of file diff --git a/.github/workflows/beam_PreCommit_SQL_Java11.yml b/.github/workflows/beam_PreCommit_SQL_Java11.yml index fdbb68f940a4d..e2c666dafa819 100644 --- a/.github/workflows/beam_PreCommit_SQL_Java11.yml +++ b/.github/workflows/beam_PreCommit_SQL_Java11.yml @@ -43,7 +43,7 @@ env: permissions: actions: write pull-requests: read - checks: read + checks: write contents: read deployments: read id-token: none @@ -115,4 +115,10 @@ jobs: uses: actions/upload-artifact@v3 with: name: SpotBugs Results + path: '**/build/reports/spotbugs/*.html' + - name: Publish SpotBugs Results + uses: jwgmeligmeyling/spotbugs-github-action@v1.2 + if: always() + with: + name: SpotBugs path: '**/build/reports/spotbugs/*.html' \ No newline at end of file diff --git a/.github/workflows/beam_PreCommit_Spotless.yml b/.github/workflows/beam_PreCommit_Spotless.yml index 0300ac07b4c65..a703454a74fed 100644 --- a/.github/workflows/beam_PreCommit_Spotless.yml +++ b/.github/workflows/beam_PreCommit_Spotless.yml @@ -50,7 +50,7 @@ concurrency: permissions: actions: write pull-requests: read - checks: read + checks: write contents: read deployments: read id-token: none @@ -94,4 +94,9 @@ jobs: uses: actions/upload-artifact@v3 with: name: java-code-coverage-report - path: "**/build/reports/checkstyle/*.xml" \ No newline at end of file + path: "**/build/reports/checkstyle/*.xml" + - name: Publish checkstyle check + uses: jwgmeligmeyling/checkstyle-github-action@v1 + if: always() + with: + path: '**/build/reports/checkstyle/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PreCommit_Whitespace.yml b/.github/workflows/beam_PreCommit_Whitespace.yml index 4443c1d0d1a0f..04705d49928c3 100644 --- a/.github/workflows/beam_PreCommit_Whitespace.yml +++ b/.github/workflows/beam_PreCommit_Whitespace.yml @@ -19,10 +19,10 @@ on: push: tags: ['v*'] branches: ['master', 'release-*'] - paths: ['*.md', '*.build.gradle','.github/workflows/beam_PreCommit_Whitespace.yml'] + paths: ['**.md', '**.build.gradle', 'build.gradle.kts', '.github/workflows/beam_PreCommit_Whitespace.yml'] pull_request_target: branches: ['master', 'release-*'] - paths: ['*.md', '*.build.gradle'] + paths: ['**.md', '**.build.gradle', 'build.gradle.kts'] issue_comment: types: [created] schedule: diff --git a/.test-infra/jenkins/job_PostCommit_Website_Publish.groovy b/.test-infra/jenkins/job_PostCommit_Website_Publish.groovy deleted file mode 100644 index fcf22e2b98983..0000000000000 --- a/.test-infra/jenkins/job_PostCommit_Website_Publish.groovy +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import CommonJobProperties as commonJobProperties -import PostcommitJobBuilder - - -// This job builds and publishes the website into the asf-site branch of the beam repo. -PostcommitJobBuilder.postCommitJob('beam_PostCommit_Website_Publish', '', - 'Website Publish', this) { - - description('Publish generated website content into asf-site branch for hosting.') - - // Set common parameters. - commonJobProperties.setTopLevelMainJobProperties(delegate, 'master', 30, true, 'git-websites') - - // Gradle goals for this job. - steps { - gradle { - rootBuildScriptDir(commonJobProperties.checkoutDir) - tasks(':website:clean') - tasks(':website:publishWebsite') - commonJobProperties.setGradleSwitches(delegate) - } - } - } diff --git a/.test-infra/jenkins/job_PreCommit_PythonDocs.groovy b/.test-infra/jenkins/job_PreCommit_PythonDocs.groovy deleted file mode 100644 index 17202263493c6..0000000000000 --- a/.test-infra/jenkins/job_PreCommit_PythonDocs.groovy +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PrecommitJobBuilder -import CommonJobProperties as common - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'PythonDocs', - gradleTask: ':pythonDocsPreCommit', - timeoutMins: 30, - triggerPathPatterns: [ - '^sdks/python/.*$', - ] - ) -builder.build { - publishers {} -} diff --git a/.test-infra/jenkins/job_PreCommit_PythonLint.groovy b/.test-infra/jenkins/job_PreCommit_PythonLint.groovy deleted file mode 100644 index 118ca7b412b71..0000000000000 --- a/.test-infra/jenkins/job_PreCommit_PythonLint.groovy +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PrecommitJobBuilder - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'PythonLint', - gradleTask: ':pythonLintPreCommit', - triggerPathPatterns: [ - '^sdks/python/.*$', - '^release/.*$', - ] - ) -builder.build() diff --git a/.test-infra/jenkins/job_PreCommit_RAT.groovy b/.test-infra/jenkins/job_PreCommit_RAT.groovy deleted file mode 100644 index 613caa9af0de6..0000000000000 --- a/.test-infra/jenkins/job_PreCommit_RAT.groovy +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PrecommitJobBuilder - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'RAT', - gradleTask: ':rat' - ) -builder.build() diff --git a/.test-infra/jenkins/job_PreCommit_Spotless.groovy b/.test-infra/jenkins/job_PreCommit_Spotless.groovy deleted file mode 100644 index a9da1ad5491a8..0000000000000 --- a/.test-infra/jenkins/job_PreCommit_Spotless.groovy +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PrecommitJobBuilder - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'Spotless', - gradleTask: 'spotlessCheck checkStyleMain checkStyleTest', - triggerPathPatterns: [ - '^buildSrc/.*$', - '^sdks/java/.*$', - '^runners/.*$', - '^examples/java/.*$', - '^examples/kotlin/.*$', - '^.test-infra/jenkins/.*$', - ] - ) -builder.build { - publishers { - recordIssues { - tools { - checkStyle { - pattern('**/build/reports/checkstyle/*.xml') - } - } - enabledForFailure(true) - } - } -} diff --git a/.test-infra/jenkins/job_PreCommit_Website.groovy b/.test-infra/jenkins/job_PreCommit_Website.groovy deleted file mode 100644 index 73014819ed004..0000000000000 --- a/.test-infra/jenkins/job_PreCommit_Website.groovy +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PrecommitJobBuilder - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'Website', - gradleTask: ':websitePreCommit', - triggerPathPatterns: ['^website/.*$']) -builder.build() - diff --git a/.test-infra/jenkins/job_PreCommit_Whitespace.groovy b/.test-infra/jenkins/job_PreCommit_Whitespace.groovy deleted file mode 100644 index 0221cf72917d6..0000000000000 --- a/.test-infra/jenkins/job_PreCommit_Whitespace.groovy +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PrecommitJobBuilder - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'Whitespace', - gradleTask: ':whitespacePreCommit', - triggerPathPatterns: [ - '.*\\.md$', - '.*build\\.gradle$', - '.*build\\.gradle.kts$', - ] - ) -builder.build() diff --git a/.test-infra/jenkins/job_sonarqube_report.groovy b/.test-infra/jenkins/job_sonarqube_report.groovy deleted file mode 100644 index 515b7e43061e6..0000000000000 --- a/.test-infra/jenkins/job_sonarqube_report.groovy +++ /dev/null @@ -1,55 +0,0 @@ -/* * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import CommonJobProperties as commonJobProperties - -job('beam_sonarqube_report') { - commonJobProperties.setTopLevelMainJobProperties( - delegate, 'master', 120, - true) - - /** - * https://issues.jenkins-ci.org/browse/JENKINS-42741 - */ - wrappers { - withSonarQubeEnv { - installationName('ASF Sonar Analysis') - } - } - - - // TODO(https://github.com/apache/beam/issues/24768) remove or fix this job. - // commonJobProperties.setAutoJob delegate - - publishers { - archiveJunit('**/build/test-results/**/*.xml') - } - - steps { - gradle { - rootBuildScriptDir(commonJobProperties.checkoutDir) - tasks("test") - tasks("jacocoTestReport") - tasks("sonarqube") - switches("--continue") - switches("-PdisableSpotlessCheck=true") - switches("-PdisableCheckStyle=true") - // disable parallelization to avoid output collisions - switches("--no-parallel") - } - } -} diff --git a/CHANGES.md b/CHANGES.md index e9a3044b6ea91..fb1a14fb1287b 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -71,8 +71,8 @@ ## Breaking Changes -* X behavior was changed ([#X](https://github.com/apache/beam/issues/X)). * Removed fastjson library dependency for Beam SQL. Table property is changed to be based on jackson ObjectNode (Java) ([#24154](https://github.com/apache/beam/issues/24154)). +* Removed TensorFlow from Beam Python container images [PR](https://github.com/apache/beam/pull/28424). If you have been negatively affected by this change, please comment on [#20605](https://github.com/apache/beam/issues/20605). ## Deprecations @@ -83,7 +83,7 @@ * Fixed X (Java/Python) ([#X](https://github.com/apache/beam/issues/X)). ## Security Fixes -* Fixed (CVE-YYYY-NNNN)[https://www.cve.org/CVERecord?id=CVE-YYYY-NNNN] (Java/Python/Go) ([#X](https://github.com/apache/beam/issues/X)). +* Python containers updated, fixing [CVE-2021-30474](https://nvd.nist.gov/vuln/detail/CVE-2021-30474), [CVE-2021-30475](https://nvd.nist.gov/vuln/detail/CVE-2021-30475), [CVE-2021-30473](https://nvd.nist.gov/vuln/detail/CVE-2021-30473), [CVE-2020-36133](https://nvd.nist.gov/vuln/detail/CVE-2020-36133), [CVE-2020-36131](https://nvd.nist.gov/vuln/detail/CVE-2020-36131), [CVE-2020-36130](https://nvd.nist.gov/vuln/detail/CVE-2020-36130), and [CVE-2020-36135](https://nvd.nist.gov/vuln/detail/CVE-2020-36135) ## Known Issues @@ -146,6 +146,7 @@ * Long-running Python pipelines might experience a memory leak: [#28246](https://github.com/apache/beam/issues/28246). * Python Pipelines using BigQuery IO or `orjson` dependency might experience segmentation faults or get stuck: [#28318](https://github.com/apache/beam/issues/28318). +* Beam Python containers rely on a version of Debian/aom that has several security vulnerabilities: [CVE-2021-30474](https://nvd.nist.gov/vuln/detail/CVE-2021-30474), [CVE-2021-30475](https://nvd.nist.gov/vuln/detail/CVE-2021-30475), [CVE-2021-30473](https://nvd.nist.gov/vuln/detail/CVE-2021-30473), [CVE-2020-36133](https://nvd.nist.gov/vuln/detail/CVE-2020-36133), [CVE-2020-36131](https://nvd.nist.gov/vuln/detail/CVE-2020-36131), [CVE-2020-36130](https://nvd.nist.gov/vuln/detail/CVE-2020-36130), and [CVE-2020-36135](https://nvd.nist.gov/vuln/detail/CVE-2020-36135) # [2.49.0] - 2023-07-17 diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/IoPerformanceTestUtilities.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/IoPerformanceTestUtilities.groovy new file mode 100644 index 0000000000000..844afd75f008b --- /dev/null +++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/IoPerformanceTestUtilities.groovy @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.gradle + +import org.gradle.api.Project +import org.gradle.api.tasks.testing.Test + +import javax.inject.Inject + +class IoPerformanceTestUtilities { + abstract static class IoPerformanceTest extends Test { + @Inject + IoPerformanceTest(Project runningProject, String module, String testClass, Map systemProperties){ + group = "Verification" + description = "Runs IO Performance Test for $testClass" + outputs.upToDateWhen { false } + testClassesDirs = runningProject.findProject(":it:${module}").sourceSets.test.output.classesDirs + classpath = runningProject.sourceSets.test.runtimeClasspath + runningProject.findProject(":it:${module}").sourceSets.test.runtimeClasspath + + include "**/${testClass}.class" + + systemProperty 'exportDataset', System.getenv('exportDataset') + systemProperty 'exportTable', System.getenv('exportTable') + + for (entry in systemProperties){ + systemProperty entry.key, entry.value + } + } + } +} diff --git a/examples/notebooks/beam-ml/README.md b/examples/notebooks/beam-ml/README.md index 3a1ff935eb518..77bf3fc99f155 100644 --- a/examples/notebooks/beam-ml/README.md +++ b/examples/notebooks/beam-ml/README.md @@ -54,6 +54,7 @@ This section contains the following example notebooks. * [Apache Beam RunInference for scikit-learn](https://github.com/apache/beam/blob/master/examples/notebooks/beam-ml/run_inference_sklearn.ipynb) * [Apache Beam RunInference with TensorFlow](https://github.com/apache/beam/blob/master/examples/notebooks/beam-ml/run_inference_tensorflow.ipynb) * [Use RunInference with a model from TensorFlow Hub](https://github.com/apache/beam/blob/master/examples/notebooks/beam-ml/run_inference_with_tensorflow_hub.ipynb) +* [Apache Beam RunInference with Hugging Face](https://github.com/apache/beam/blob/master/examples/notebooks/beam-ml/run_inference_huggingface.ipynb) * [Apache Beam RunInference with XGBoost](https://github.com/apache/beam/blob/master/examples/notebooks/beam-ml/run_inference_xgboost.ipynb) * [Use RunInference with TFX](https://github.com/apache/beam/blob/master/examples/notebooks/beam-ml/run_inference_tensorflow_with_tfx.ipynb) * [Use RunInference in Apache Beam](https://github.com/apache/beam/blob/master/examples/notebooks/beam-ml/run_inference_pytorch_tensorflow_sklearn.ipynb) diff --git a/examples/notebooks/beam-ml/mltransform_basic.ipynb b/examples/notebooks/beam-ml/mltransform_basic.ipynb new file mode 100644 index 0000000000000..820bc3400b580 --- /dev/null +++ b/examples/notebooks/beam-ml/mltransform_basic.ipynb @@ -0,0 +1,679 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "source": [ + "# @title ###### Licensed to the Apache Software Foundation (ASF), Version 2.0 (the \"License\")\n", + "\n", + "# Licensed to the Apache Software Foundation (ASF) under one\n", + "# or more contributor license agreements. See the NOTICE file\n", + "# distributed with this work for additional information\n", + "# regarding copyright ownership. The ASF licenses this file\n", + "# to you under the Apache License, Version 2.0 (the\n", + "# \"License\"); you may not use this file except in compliance\n", + "# with the License. You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing,\n", + "# software distributed under the License is distributed on an\n", + "# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n", + "# KIND, either express or implied. See the License for the\n", + "# specific language governing permissions and limitations\n", + "# under the License" + ], + "metadata": { + "id": "34gTXZ7BIArp" + }, + "id": "34gTXZ7BIArp", + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Preprocess data with MLTransform\n", + "\n", + "\n", + " \n", + " \n", + "
\n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + "
\n" + ], + "metadata": { + "id": "0n0YAd-0KQyi" + }, + "id": "0n0YAd-0KQyi" + }, + { + "cell_type": "markdown", + "id": "d3b81cf2-8603-42bd-995e-9e14631effd0", + "metadata": { + "id": "d3b81cf2-8603-42bd-995e-9e14631effd0" + }, + "source": [ + "This notebook demonstrates how to use `MLTransform` to preprocess your data for machine learning models. `MLTransform` is a `PTransform` that wraps multiple Apache Beam data processing transforms. As a result, `MLTransform` gives you the ability to preprocess different types of data in multiple ways with one transform.\n", + "\n", + "This notebook uses data processing transforms defined in the [apache_beam/ml/transforms/tft](https://beam.apache.org/releases/pydoc/current/apache_beam.ml.transforms.tft.html) module." + ] + }, + { + "cell_type": "markdown", + "id": "f0097dbd-2657-4cbe-a334-e0401816db01", + "metadata": { + "id": "f0097dbd-2657-4cbe-a334-e0401816db01" + }, + "source": [ + "## Import the requried modules\n", + "\n", + "To use `MLTransfrom`, install `tensorflow_transform` and the Apache Beam SDK version 2.50.0 or later.\n" + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install tensorflow_transform --quiet\n", + "!pip install apache_beam>=2.50.0 --quiet" + ], + "metadata": { + "id": "MRWkC-n2DmjM" + }, + "id": "MRWkC-n2DmjM", + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "88ddd3a4-3643-4731-b99e-a5d697fbc165", + "metadata": { + "id": "88ddd3a4-3643-4731-b99e-a5d697fbc165" + }, + "outputs": [], + "source": [ + "import apache_beam as beam\n", + "from apache_beam.ml.transforms.base import MLTransform\n", + "from apache_beam.ml.transforms.tft import ComputeAndApplyVocabulary\n", + "from apache_beam.options.pipeline_options import PipelineOptions\n", + "from apache_beam.ml.transforms.utils import ArtifactsFetcher" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Artifacts are additional data elements created by data transformations. Examples of artifacts are the `minimum` and `maximum` values from a `ScaleTo01` transformation, or the `mean` and `variance` from a `ScaleToZScore` transformation. For more information about artifacts, see [Artifacts](https://beam.apache.org/documentation/ml/preprocess-data/#artifacts).\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "90nXXc_A4Bmf" + }, + "id": "90nXXc_A4Bmf" + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bdabbc57-ec98-4113-b37e-61962f488d61", + "metadata": { + "id": "bdabbc57-ec98-4113-b37e-61962f488d61" + }, + "outputs": [], + "source": [ + "# Store artifacts generated by MLTransform.\n", + "# Each MLTransform instance requires an empty artifact location.\n", + "# This method deletes and refreshes the artifact location for each example.\n", + "artifact_location = './my_artifacts'\n", + "def delete_artifact_location(artifact_location):\n", + " import shutil\n", + " import os\n", + " if os.path.exists(artifact_location):\n", + " shutil.rmtree(artifact_location)" + ] + }, + { + "cell_type": "markdown", + "id": "28b1719c-7287-4cec-870b-9fabc4c4a4ef", + "metadata": { + "id": "28b1719c-7287-4cec-870b-9fabc4c4a4ef" + }, + "source": [ + "## Compute and map the vocabulary\n", + "\n", + "\n", + "[ComputeAndApplyVocabulary](https://beam.apache.org/releases/pydoc/current/apache_beam.ml.transforms.tft.html#apache_beam.ml.transforms.tft.ComputeAndApplyVocabulary) is a data processing transform that computes a unique vocabulary from a dataset and then maps each word or token to a distinct integer index. It facilitates transforming textual data into numerical representations for machine learning tasks.\n", + "\n", + "Use `ComputeAndApplyVocabulary` with `MLTransform`.\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "56d6d09a-8d34-444f-a1e4-a75624b36932", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "56d6d09a-8d34-444f-a1e4-a75624b36932", + "outputId": "2eb99e87-fb23-498c-ed08-775befa3a823" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Row(x=array([1, 0, 4]))\n", + "Row(x=array([1, 0, 6, 2, 3, 5]))\n" + ] + } + ], + "source": [ + "delete_artifact_location(artifact_location)\n", + "\n", + "data = [\n", + " {'x': ['I', 'love', 'pie']},\n", + " {'x': ['I', 'love', 'going', 'to', 'the', 'park']}\n", + "]\n", + "options = PipelineOptions()\n", + "with beam.Pipeline(options=options) as p:\n", + " data = (\n", + " p\n", + " | 'CreateData' >> beam.Create(data)\n", + " | 'MLTransform' >> MLTransform(write_artifact_location=artifact_location).with_transform(ComputeAndApplyVocabulary(columns=['x']))\n", + " | 'PrintResults' >> beam.Map(print)\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "1e133002-7229-459d-8e3c-b41f4d65e76d", + "metadata": { + "id": "1e133002-7229-459d-8e3c-b41f4d65e76d" + }, + "source": [ + "### Fetch vocabulary artifacts\n", + "\n", + "This example generates a file with all the vocabulary in the dataset, referred to in `MLTransform` as an artifact. To fetch artifacts generated by the `ComputeAndApplyVocabulary` transform, use the `ArtifactsFetcher` class. This class fetches both a vocabulary list and a path to the vocabulary file." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9c5fe46a-c718-4a82-bad8-aa091c0b0538", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "9c5fe46a-c718-4a82-bad8-aa091c0b0538", + "outputId": "cd8b6cf3-6093-4b1b-a063-ff327c090a92" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "['love', 'I', 'to', 'the', 'pie', 'park', 'going']\n", + "./my_artifacts/transform_fn/assets/compute_and_apply_vocab\n", + "7\n" + ] + } + ], + "source": [ + "fetcher = ArtifactsFetcher(artifact_location=artifact_location)\n", + "# get vocab list\n", + "vocab_list = fetcher.get_vocab_list()\n", + "print(vocab_list)\n", + "# get vocab file path\n", + "vocab_file_path = fetcher.get_vocab_filepath()\n", + "print(vocab_file_path)\n", + "# get vocab size\n", + "vocab_size = fetcher.get_vocab_size()\n", + "print(vocab_size)" + ] + }, + { + "cell_type": "markdown", + "id": "5f955f3d-3192-42f7-aa55-48249223418d", + "metadata": { + "id": "5f955f3d-3192-42f7-aa55-48249223418d" + }, + "source": [ + "## Use TD-IDF to weight terms\n", + "\n", + "[TF-IDF](https://beam.apache.org/releases/pydoc/current/apache_beam.ml.transforms.tft.html#apache_beam.ml.transforms.tft.ComputeAndApplyVocabulary) (Term Frequency-Inverse Document Frequency) is a numerical statistic used in text processing to reflect how important a word is to a document in a collection or corpus. It balances the frequency of a word in a document against its frequency in the entire corpus, giving higher value to more specific terms.\n", + "\n", + "Use `TF-IDF` with `MLTransform`.\n", + "\n", + "1. Compute the vocabulary of the dataset by using `ComputeAndApplyVocabulary`.\n", + "2. Use the output of `ComputeAndApplyVocabulary` to calculate the `TF-IDF` weights.\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8a8cb94b-57eb-4c4c-aa4c-22cf3193ea85", + "metadata": { + "id": "8a8cb94b-57eb-4c4c-aa4c-22cf3193ea85" + }, + "outputs": [], + "source": [ + "from apache_beam.ml.transforms.tft import TFIDF" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "970d7222-194e-460e-b698-a00f1fcafb95", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "970d7222-194e-460e-b698-a00f1fcafb95", + "outputId": "e87409ed-5e33-43fa-d3b6-a0c012636cef" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Row(x=array([1, 0, 4]), x_tfidf_weight=array([0.33333334, 0.33333334, 0.4684884 ], dtype=float32), x_vocab_index=array([0, 1, 4]))\n", + "Row(x=array([1, 0, 6, 2, 3, 5]), x_tfidf_weight=array([0.16666667, 0.16666667, 0.2342442 , 0.2342442 , 0.2342442 ,\n", + " 0.2342442 ], dtype=float32), x_vocab_index=array([0, 1, 2, 3, 5, 6]))\n" + ] + } + ], + "source": [ + "data = [\n", + " {'x': ['I', 'love', 'pie']},\n", + " {'x': ['I', 'love', 'going', 'to', 'the', 'park']}\n", + "]\n", + "delete_artifact_location(artifact_location)\n", + "options = PipelineOptions()\n", + "with beam.Pipeline(options=options) as p:\n", + " data = (\n", + " p\n", + " | beam.Create(data)\n", + " | MLTransform(write_artifact_location=artifact_location\n", + " ).with_transform(ComputeAndApplyVocabulary(columns=['x'])\n", + " ).with_transform(TFIDF(columns=['x']))\n", + " )\n", + " _ = data | beam.Map(print)" + ] + }, + { + "cell_type": "markdown", + "id": "7b1feb4f-bb0b-4f61-8349-e1ba411858cf", + "metadata": { + "id": "7b1feb4f-bb0b-4f61-8349-e1ba411858cf" + }, + "source": [ + "### TF-IDF output\n", + "\n", + "`TF-IDF` produces two output columns for a given input. For example, if you input `x`, the output column names in the dictionary are `x_vocab_index` and `x_tfidf_weight`.\n", + "\n", + "- `vocab_index`: indices of the words computed in the `ComputeAndApplyVocabulary` transform.\n", + "- `tfidif_weight`: the weight for each vocabulary index. The weight represents how important the word present at that `vocab_index` is to the document.\n" + ] + }, + { + "cell_type": "markdown", + "id": "d3b5b9dd-ed35-460b-9fb3-0ffb5c3633db", + "metadata": { + "id": "d3b5b9dd-ed35-460b-9fb3-0ffb5c3633db" + }, + "source": [ + "## Scale the data\n", + "\n", + "The following examples show two ways to scale data:\n", + "\n", + "* Scale data between 0 and 1.\n", + "* Scale data using z-score.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "3bd20692-6d14-4ece-a2e7-69a2a6fac5d4", + "metadata": { + "id": "3bd20692-6d14-4ece-a2e7-69a2a6fac5d4" + }, + "source": [ + "### Scale the data between 0 and 1\n", + "\n", + "Scale the data so that it's in the range of 0 and 1. To scale the data, the transform calculates `minimum` and `maximum` values on the whole dataset, and then performs the following calculation:\n", + "\n", + "`x = (x - x_min) / (x_max)`\n", + "\n", + "To scale the data, use the [ScaleTo01](https://beam.apache.org/releases/pydoc/current/apache_beam.ml.transforms.tft.html#apache_beam.ml.transforms.tft.ScaleTo01) data processing transform in `MLTransform`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "841a8e1f-2f5b-4fd9-bb35-12a2393922de", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "841a8e1f-2f5b-4fd9-bb35-12a2393922de", + "outputId": "efcae38d-96f6-4394-e5f5-c36644d3a9ff" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Row(x=array([0. , 0.01010101, 0.02020202], dtype=float32), x_max=array([100.], dtype=float32), x_min=array([1.], dtype=float32))\n", + "Row(x=array([0.03030303, 0.04040404, 0.06060606], dtype=float32), x_max=array([100.], dtype=float32), x_min=array([1.], dtype=float32))\n", + "Row(x=array([0.09090909, 0.01010101, 0.09090909, 0.33333334, 1. ,\n", + " 0.53535354, 0.1919192 , 0.09090909, 0.01010101, 0.02020202,\n", + " 0.1010101 , 0.11111111], dtype=float32), x_max=array([100.], dtype=float32), x_min=array([1.], dtype=float32))\n" + ] + } + ], + "source": [ + "delete_artifact_location(artifact_location)\n", + "\n", + "from apache_beam.ml.transforms.tft import ScaleTo01\n", + "data = [\n", + " {'x': [1, 2, 3]}, {'x': [4, 5, 7]}, {'x': [10, 2, 10, 34, 100, 54, 20, 10, 2, 3, 11, 12]}]\n", + "\n", + "with beam.Pipeline() as p:\n", + " _ = (\n", + " p\n", + " | 'CreateData' >> beam.Create(data)\n", + " | 'MLTransform' >> MLTransform(write_artifact_location=artifact_location).with_transform(ScaleTo01(columns=['x']))\n", + " | 'PrintResults' >> beam.Map(print)\n", + " )\n" + ] + }, + { + "cell_type": "markdown", + "id": "b1838ecb-2168-45f8-bdf2-41ae0007cb71", + "metadata": { + "id": "b1838ecb-2168-45f8-bdf2-41ae0007cb71" + }, + "source": [ + "The output contains artifacts such as `x_max` and `x_min`, which represent the maximum and minimum values of the entire dataset.\n" + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Scale the data by using the z-score\n", + "\n", + "Scale to the data using the z-score\n", + "\n", + "Similar to `ScaleTo01`, use [ScaleToZScore](https://beam.apache.org/releases/pydoc/current/apache_beam.ml.transforms.tft.html#apache_beam.ml.transforms.tft.ScaleToZScore) to scale the values by using the [z-score]([z-score](https://www.tensorflow.org/tfx/transform/api_docs/python/tft/scale_to_z_score#:~:text=Scaling%20to%20z%2Dscore%20subtracts%20out%20the%20mean%20and%20divides%20by%20standard%20deviation.%20Note%20that%20the%20standard%20deviation%20computed%20here%20is%20based%20on%20the%20biased%20variance%20(0%20delta%20degrees%20of%20freedom)%2C%20as%20computed%20by%20analyzers.var.).\n" + ], + "metadata": { + "id": "_bHdYkuF74Fe" + }, + "id": "_bHdYkuF74Fe" + }, + { + "cell_type": "code", + "source": [ + "delete_artifact_location(artifact_location)\n", + "\n", + "from apache_beam.ml.transforms.tft import ScaleToZScore\n", + "data = [\n", + " {'x': [1, 2, 3]}, {'x': [4, 5, 7]}, {'x': [10, 2, 10, 34, 100, 54, 20, 10, 2, 3, 11, 12]}]\n", + "\n", + "# delete_artifact_location(artifact_location)\n", + "with beam.Pipeline() as p:\n", + " _ = (\n", + " p\n", + " | 'CreateData' >> beam.Create(data)\n", + " | 'MLTransform' >> MLTransform(write_artifact_location=artifact_location).with_transform(ScaleToZScore(columns=['x']))\n", + " | 'PrintResults' >> beam.Map(print)\n", + " )\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "aHK6zdfE732A", + "outputId": "8b4f5082-35a2-42c4-9342-a77f99338e17" + }, + "id": "aHK6zdfE732A", + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Row(x=array([-0.62608355, -0.5846515 , -0.54321957], dtype=float32), x_mean=array([16.11111], dtype=float32), x_var=array([582.5432], dtype=float32))\n", + "Row(x=array([-0.50178754, -0.46035555, -0.37749153], dtype=float32), x_mean=array([16.11111], dtype=float32), x_var=array([582.5432], dtype=float32))\n", + "Row(x=array([-0.25319555, -0.5846515 , -0.25319555, 0.7411725 , 3.4756844 ,\n", + " 1.5698125 , 0.16112447, -0.25319555, -0.5846515 , -0.54321957,\n", + " -0.21176355, -0.17033154], dtype=float32), x_mean=array([16.11111], dtype=float32), x_var=array([582.5432], dtype=float32))\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Use multiple transforms on a single MLTransform\n", + "\n", + "Apply the same transform on multiple columns. For example, columns `x` and\n", + "`y` require scaling by 0 and 1. For column `s`, compute vocabulary. You can use a single `MLTransform` for both of these tasks.\n", + "\n", + "When using multiple data processing transforms, either pass the transforms as chained transforms or directly as a list." + ], + "metadata": { + "id": "FNoWfyMR8JI-" + }, + "id": "FNoWfyMR8JI-" + }, + { + "cell_type": "markdown", + "source": [ + "### Use multiple data processing transforms in a single MLTransform\n", + "\n", + "The following example shows multiple data processing transforms chained to `MLTransform`." + ], + "metadata": { + "id": "Mj6hd3jZ9-nr" + }, + "id": "Mj6hd3jZ9-nr" + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6e382cca-cfd3-4ac1-956a-16480603dd5b", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "6e382cca-cfd3-4ac1-956a-16480603dd5b", + "outputId": "7f185d92-ad91-4067-c11f-66150968ec97" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Row(s=array([1, 0, 4]), x=array([0. , 0.16666667, 0.33333334], dtype=float32), x_max=array([7.], dtype=float32), x_min=array([1.], dtype=float32), y=array([0. , 0.8910891, 1. ], dtype=float32), y_max=array([111.], dtype=float32), y_min=array([10.], dtype=float32))\n", + "Row(s=array([1, 0, 6, 2, 3, 5]), x=array([0.5 , 0.6666667, 1. ], dtype=float32), x_max=array([7.], dtype=float32), x_min=array([1.], dtype=float32), y=array([0.00990099, 0.10891089, 0.3960396 ], dtype=float32), y_max=array([111.], dtype=float32), y_min=array([10.], dtype=float32))\n" + ] + } + ], + "source": [ + "delete_artifact_location(artifact_location)\n", + "\n", + "from apache_beam.ml.transforms.tft import ScaleTo01\n", + "from apache_beam.ml.transforms.tft import ComputeAndApplyVocabulary\n", + "\n", + "data = [\n", + " {'x': [1, 2, 3], 'y': [10, 100, 111], 's': ['I', 'love', 'pie']},\n", + " {'x': [4, 5, 7], 'y': [11, 21, 50], 's': ['I', 'love', 'going', 'to', 'the', 'park']}\n", + "]\n", + "\n", + "# delete_artifact_location(artifact_location)\n", + "with beam.Pipeline() as p:\n", + " _ = (\n", + " p\n", + " | 'CreateData' >> beam.Create(data)\n", + " | 'MLTransform' >> MLTransform(write_artifact_location=artifact_location).with_transform(\n", + " ScaleTo01(columns=['x', 'y'])).with_transform(ComputeAndApplyVocabulary(columns=['s']))\n", + " | 'PrintResults' >> beam.Map(print)\n", + " )" + ] + }, + { + "cell_type": "markdown", + "source": [ + "The following example shows multiple data processing transforms passed in as a list to `MLTransform`." + ], + "metadata": { + "id": "IIrL13uEG3mH" + }, + "id": "IIrL13uEG3mH" + }, + { + "cell_type": "code", + "source": [ + "delete_artifact_location(artifact_location)\n", + "\n", + "from apache_beam.ml.transforms.tft import ScaleTo01\n", + "from apache_beam.ml.transforms.tft import ComputeAndApplyVocabulary\n", + "\n", + "data = [\n", + " {'x': [1, 2, 3], 'y': [10, 100, 111], 's': ['I', 'love', 'pie']},\n", + " {'x': [4, 5, 7], 'y': [11, 21, 50], 's': ['I', 'love', 'going', 'to', 'the', 'park']}\n", + "]\n", + "\n", + "transforms = [\n", + " ScaleTo01(columns=['x', 'y']),\n", + " ComputeAndApplyVocabulary(columns=['s'])\n", + "]\n", + "\n", + "with beam.Pipeline() as p:\n", + " _ = (\n", + " p\n", + " | 'CreateData' >> beam.Create(data)\n", + " | 'MLTransform' >> MLTransform(write_artifact_location=artifact_location,\n", + " transforms=transforms)\n", + " | 'PrintResults' >> beam.Map(print)\n", + " )" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "equV7ptY-FKL", + "outputId": "9c3f9461-31e9-41de-cc5d-96e7ff5a3600" + }, + "id": "equV7ptY-FKL", + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Row(s=array([1, 0, 4]), x=array([0. , 0.16666667, 0.33333334], dtype=float32), x_max=array([7.], dtype=float32), x_min=array([1.], dtype=float32), y=array([0. , 0.8910891, 1. ], dtype=float32), y_max=array([111.], dtype=float32), y_min=array([10.], dtype=float32))\n", + "Row(s=array([1, 0, 6, 2, 3, 5]), x=array([0.5 , 0.6666667, 1. ], dtype=float32), x_max=array([7.], dtype=float32), x_min=array([1.], dtype=float32), y=array([0.00990099, 0.10891089, 0.3960396 ], dtype=float32), y_max=array([111.], dtype=float32), y_min=array([10.], dtype=float32))\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### MLTransform for inference workloads\n", + "\n", + "The previous examples show how to preprocess data for model training. This example uses the same preprocessing steps on the inference data. By using the same steps on the inference data, you can maintain consistent results.\n", + "\n", + "Preprocess the data going into the inference by using the same preprocessing steps used on the data prior to training. To do this with `MLTransform`, pass the artifact location from the previous transforms to the parameter `read_artifact_location`. `MLTransform` uses the values and artifacts produced in the previous steps. You don't need to provide the transforms, because they are saved with the artifacts in the artifact location.\n" + ], + "metadata": { + "id": "kcnQSwkA-eSA" + }, + "id": "kcnQSwkA-eSA" + }, + { + "cell_type": "code", + "source": [ + "data = [\n", + " {'x': [2], 'y': [59, 91, 85], 's': ['love']},\n", + " {'x': [4, 5, 7], 'y': [111, 26, 30], 's': ['I', 'love', 'parks', 'and', 'dogs']}\n", + "]\n", + "\n", + "with beam.Pipeline() as p:\n", + " _ = (\n", + " p\n", + " | 'CreateData' >> beam.Create(data)\n", + " | 'MLTransform' >> MLTransform(read_artifact_location=artifact_location)\n", + " | 'PrintResults' >> beam.Map(print)\n", + " )" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "m0HpQ-Ff-Xmz", + "outputId": "1631e0f6-ee58-4c90-f90d-0e183aaaf3c2" + }, + "id": "m0HpQ-Ff-Xmz", + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Row(s=array([0]), x=array([0.16666667], dtype=float32), x_max=array([7.], dtype=float32), x_min=array([1.], dtype=float32), y=array([0.48514852, 0.8019802 , 0.7425743 ], dtype=float32), y_max=array([111.], dtype=float32), y_min=array([10.], dtype=float32))\n", + "Row(s=array([ 1, 0, -1, -1, -1]), x=array([0.5 , 0.6666667, 1. ], dtype=float32), x_max=array([7.], dtype=float32), x_min=array([1.], dtype=float32), y=array([1. , 0.15841584, 0.1980198 ], dtype=float32), y_max=array([111.], dtype=float32), y_min=array([10.], dtype=float32))\n" + ] + } + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.5" + }, + "colab": { + "provenance": [], + "include_colab_link": true + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/examples/notebooks/beam-ml/run_inference_huggingface.ipynb b/examples/notebooks/beam-ml/run_inference_huggingface.ipynb new file mode 100644 index 0000000000000..71f7e3f0a3fb9 --- /dev/null +++ b/examples/notebooks/beam-ml/run_inference_huggingface.ipynb @@ -0,0 +1,534 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "code", + "source": [ + "# @title ###### Licensed to the Apache Software Foundation (ASF), Version 2.0 (the \"License\")\n", + "\n", + "# Licensed to the Apache Software Foundation (ASF) under one\n", + "# or more contributor license agreements. See the NOTICE file\n", + "# distributed with this work for additional information\n", + "# regarding copyright ownership. The ASF licenses this file\n", + "# to you under the Apache License, Version 2.0 (the\n", + "# \"License\"); you may not use this file except in compliance\n", + "# with the License. You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing,\n", + "# software distributed under the License is distributed on an\n", + "# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n", + "# KIND, either express or implied. See the License for the\n", + "# specific language governing permissions and limitations\n", + "# under the License" + ], + "metadata": { + "id": "SGjEjVxwudf2" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Apache Beam RunInference with Hugging Face\n", + "\n", + "\n", + " \n", + " \n", + "
\n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + "
" + ], + "metadata": { + "id": "BJ0mTOhFucGg" + } + }, + { + "cell_type": "markdown", + "source": [ + "This notebook shows how to use models from [Hugging Face](https://huggingface.co/) and [Hugging Face pipeline](https://huggingface.co/docs/transformers/main_classes/pipelines) in Apache Beam pipelines that uses the [RunInference](https://beam.apache.org/releases/pydoc/current/apache_beam.ml.inference.base.html#apache_beam.ml.inference.base.RunInference) transform.\n", + "\n", + "Apache Beam has built-in support for Hugging Face model handlers. Hugging Face has three model handlers:\n", + "\n", + "\n", + "\n", + "* Use the [`HuggingFacePipelineModelHandler`](https://github.com/apache/beam/blob/926774dd02be5eacbe899ee5eceab23afb30abca/sdks/python/apache_beam/ml/inference/huggingface_inference.py#L567) model handler to run inference with [Hugging Face pipelines](https://huggingface.co/docs/transformers/main_classes/pipelines#pipelines).\n", + "* Use the [`HuggingFaceModelHandlerKeyedTensor`](https://github.com/apache/beam/blob/926774dd02be5eacbe899ee5eceab23afb30abca/sdks/python/apache_beam/ml/inference/huggingface_inference.py#L208) model handler to run inference with models that uses keyed tensors as inputs. For example, you might use this model handler with language modeling tasks.\n", + "* Use the [`HuggingFaceModelHandlerTensor`](https://github.com/apache/beam/blob/926774dd02be5eacbe899ee5eceab23afb30abca/sdks/python/apache_beam/ml/inference/huggingface_inference.py#L392) model handler to run inference with models that uses tensor inputs, such as `tf.Tensor` or `torch.Tensor`. \n", + "\n", + "\n", + "For more information about using RunInference, see [Get started with AI/ML pipelines](https://beam.apache.org/documentation/ml/overview/) in the Apache Beam documentation." + ], + "metadata": { + "id": "GBloorZevCXC" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Install dependencies" + ], + "metadata": { + "id": "xpylrB7_2Jzk" + } + }, + { + "cell_type": "markdown", + "source": [ + "Install both Apache Beam and the required dependencies for Hugging Face." + ], + "metadata": { + "id": "IBQLg8on2S40" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yrqNSBB3qsI1" + }, + "outputs": [], + "source": [ + "!pip install torch --quiet\n", + "!pip install tensorflow --quiet\n", + "!pip install transformers==4.30.0 --quiet\n", + "!pip install apache-beam[gcp]>=2.50 --quiet" + ] + }, + { + "cell_type": "code", + "source": [ + "from typing import Dict\n", + "from typing import Iterable\n", + "from typing import Tuple\n", + "\n", + "import tensorflow as tf\n", + "import torch\n", + "from transformers import AutoTokenizer\n", + "from transformers import TFAutoModelForMaskedLM\n", + "\n", + "import apache_beam as beam\n", + "from apache_beam.ml.inference.base import KeyedModelHandler\n", + "from apache_beam.ml.inference.base import PredictionResult\n", + "from apache_beam.ml.inference.base import RunInference\n", + "from apache_beam.ml.inference.huggingface_inference import HuggingFacePipelineModelHandler\n", + "from apache_beam.ml.inference.huggingface_inference import HuggingFaceModelHandlerKeyedTensor\n", + "from apache_beam.ml.inference.huggingface_inference import HuggingFaceModelHandlerTensor\n", + "from apache_beam.ml.inference.huggingface_inference import PipelineTask\n" + ], + "metadata": { + "id": "BIDZLGFRrAmF" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Use RunInference with Hugging Face pipelines" + ], + "metadata": { + "id": "OQ1wv6xk3UeV" + } + }, + { + "cell_type": "markdown", + "source": [ + "You can use [Hugging Face pipelines](https://huggingface.co/docs/transformers/main_classes/pipelines#pipelines) with `RunInference` by using the `HuggingFacePipelineModelHandler` model handler. Similar to the Hugging Face pipelines, to instantiate the model handler, the model handler needs either the pipeline `task` or the `model` that defines the task. To pass any optional arguments to load the pipeline, use `load_pipeline_args`. To pass the optional arguments for inference, use `inference_args`.\n", + "\n", + "\n", + "\n", + "You can define the pipeline task in one of the following two ways:\n", + "\n", + "\n", + "\n", + "* In the form of string, for example `\"translation\"`. This option is similar to how the pipeline task is defined when using Hugging Face.\n", + "* In the form of a [`PipelineTask`](https://github.com/apache/beam/blob/ac936b0b89a92d836af59f3fc04f5733ad6819b3/sdks/python/apache_beam/ml/inference/huggingface_inference.py#L75) enum object defined in Apache Beam, such as `PipelineTask.Translation`.\n" + ], + "metadata": { + "id": "hWZQ49Pt3ojg" + } + }, + { + "cell_type": "markdown", + "source": [ + "### Create a model handler\n", + "\n", + "This example demonstrates a task that translates text from English to Spanish." + ], + "metadata": { + "id": "pVVg9RfET86L" + } + }, + { + "cell_type": "code", + "source": [ + "model_handler = HuggingFacePipelineModelHandler(\n", + " task=PipelineTask.Translation_XX_to_YY,\n", + " model = \"google/flan-t5-small\",\n", + " load_pipeline_args={'framework': 'pt'},\n", + " inference_args={'max_length': 200}\n", + ")" + ], + "metadata": { + "id": "aF_BDPXk3UG4" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Define the input examples\n", + "\n", + "Use this code to define the input examples." + ], + "metadata": { + "id": "lxTImFGJUBIw" + } + }, + { + "cell_type": "code", + "source": [ + "text = [\"translate English to Spanish: How are you doing?\",\n", + " \"translate English to Spanish: This is the Apache Beam project.\"]" + ], + "metadata": { + "id": "POAuIFS_UDgE" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Postprocess the results\n", + "\n", + "The output from the `RunInference` transform is a `PredictionResult` object. Use that output to extract inferences, and then format and print the results." + ], + "metadata": { + "id": "Ay6-7DD5TZLn" + } + }, + { + "cell_type": "code", + "source": [ + "class FormatOutput(beam.DoFn):\n", + " \"\"\"\n", + " Extract the results from PredictionResult and print the results.\n", + " \"\"\"\n", + " def process(self, element):\n", + " example = element.example\n", + " translated_text = element.inference[0]['translation_text']\n", + " print(f'Example: {example}')\n", + " print(f'Translated text: {translated_text}')\n", + " print('-' * 80)\n" + ], + "metadata": { + "id": "74I3U1JsrG0R" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Run the pipeline\n", + "\n", + "Use the following code to run the pipeline." + ], + "metadata": { + "id": "Ve2cpHZ_UH0o" + } + }, + { + "cell_type": "code", + "source": [ + "with beam.Pipeline() as beam_pipeline:\n", + " examples = (\n", + " beam_pipeline\n", + " | \"CreateExamples\" >> beam.Create(text)\n", + " )\n", + " inferences = (\n", + " examples\n", + " | \"RunInference\" >> RunInference(model_handler)\n", + " | \"Print\" >> beam.ParDo(FormatOutput())\n", + " )" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "_xStwO3qubqr", + "outputId": "5aeef601-c3e5-4b0f-e982-183ff36dc56e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Example: translate English to Spanish: How are you doing?\n", + "Translated text: Cómo está acerca?\n", + "--------------------------------------------------------------------------------\n", + "Example: translate English to Spanish: This is the Apache Beam project.\n", + "Translated text: Esto es el proyecto Apache Beam.\n", + "--------------------------------------------------------------------------------\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## RunInference with a pretrained model from Hugging Face Hub\n" + ], + "metadata": { + "id": "KJEsPkXnUS5y" + } + }, + { + "cell_type": "markdown", + "source": [ + "To use pretrained models directly from [Hugging Face Hub](https://huggingface.co/docs/hub/models), use either the `HuggingFaceModelHandlerTensor` model handler or the `HuggingFaceModelHandlerKeyedTensor` model handler. Which model handler you use depends on your input type:\n", + "\n", + "\n", + "* Use `HuggingFaceModelHandlerKeyedTensor` to run inference with models that uses keyed tensors as inputs.\n", + "* Use `HuggingFaceModelHandlerTensor` to run inference with models that uses tensor inputs, such as `tf.Tensor` or `torch.Tensor`.\n", + "\n", + "When you construct your pipeline, you might also need to use the following items:\n", + "\n", + "\n", + "* Use the `load_model_args` to provide optional arguments to load the model.\n", + "* Use the `inference_args` argument to do the inference.\n", + "* For TensorFlow models, specify the `framework='tf'`.\n", + "* For PyTorch models, specify the `framework='pt'`.\n", + "\n", + "\n", + "\n", + "The following language modeling task predicts the masked word in a sentence." + ], + "metadata": { + "id": "mDcpG78tWcBN" + } + }, + { + "cell_type": "markdown", + "source": [ + "### Create a model handler\n", + "\n", + "This example shows a masked language modeling task. These models take keyed tensors as inputs." + ], + "metadata": { + "id": "dU6NDE4DaRuF" + } + }, + { + "cell_type": "code", + "source": [ + "model_handler = HuggingFaceModelHandlerKeyedTensor(\n", + " model_uri=\"stevhliu/my_awesome_eli5_mlm_model\",\n", + " model_class=TFAutoModelForMaskedLM,\n", + " framework='tf',\n", + " load_model_args={'from_pt': True},\n", + " max_batch_size=1\n", + ")" + ], + "metadata": { + "id": "Zx1ep1UXWYrq" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Define the input examples\n", + "\n", + "Use this code to define the input examples." + ], + "metadata": { + "id": "D18eQZfgcIM6" + } + }, + { + "cell_type": "code", + "source": [ + "text = ['The capital of France is Paris .',\n", + " 'It is raining cats and dogs .',\n", + " 'He looked up and saw the sun and stars .',\n", + " 'Today is Monday and tomorrow is Tuesday .',\n", + " 'There are 5 coconuts on this palm tree .']" + ], + "metadata": { + "id": "vWI_A6VrcH-m" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Preprocess the input\n", + "\n", + "Edit the given input to replace the last word with a ``. Then, tokenize the input for doing inference." + ], + "metadata": { + "id": "-62nvMSbeNBy" + } + }, + { + "cell_type": "code", + "source": [ + "def add_mask_to_last_word(text: str) -> Tuple[str, str]:\n", + " \"\"\"Replace the last word of sentence with and return\n", + " the original sentence and the masked sentence.\"\"\"\n", + " text_list = text.split()\n", + " masked = ' '.join(text_list[:-2] + ['' + text_list[-1]])\n", + " return text, masked\n", + "\n", + "tokenizer = AutoTokenizer.from_pretrained(\"stevhliu/my_awesome_eli5_mlm_model\")\n", + "\n", + "def tokenize_sentence(\n", + " text_and_mask: Tuple[str, str],\n", + " tokenizer) -> Tuple[str, Dict[str, tf.Tensor]]:\n", + " \"\"\"Convert string examples to tensors.\"\"\"\n", + " text, masked_text = text_and_mask\n", + " tokenized_sentence = tokenizer.encode_plus(\n", + " masked_text, return_tensors=\"tf\")\n", + "\n", + " # Workaround to manually remove batch dim until we have the feature to\n", + " # add optional batching flag.\n", + " # TODO(https://github.com/apache/beam/issues/21863): Remove when optional\n", + " # batching flag added\n", + " return text, {\n", + " k: tf.squeeze(v)\n", + " for k, v in dict(tokenized_sentence).items()\n", + " }" + ], + "metadata": { + "id": "d6TXVfWhWRzz" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Postprocess the results\n", + "\n", + "Extract the result from the `PredictionResult` object. Then, format the output to print the actual sentence and the word predicted for the last word in the sentence." + ], + "metadata": { + "id": "KnLtuYyTfC-g" + } + }, + { + "cell_type": "code", + "source": [ + "class PostProcessor(beam.DoFn):\n", + " \"\"\"Processes the PredictionResult to get the predicted word.\n", + "\n", + " The logits are the output of the BERT Model. To get the word with the highest\n", + " probability of being the masked word, take the argmax.\n", + " \"\"\"\n", + " def __init__(self, tokenizer):\n", + " super().__init__()\n", + " self.tokenizer = tokenizer\n", + "\n", + " def process(self, element: Tuple[str, PredictionResult]) -> Iterable[str]:\n", + " text, prediction_result = element\n", + " inputs = prediction_result.example\n", + " logits = prediction_result.inference['logits']\n", + " mask_token_index = tf.where(inputs[\"input_ids\"] == self.tokenizer.mask_token_id)[0]\n", + " predicted_token_id = tf.math.argmax(logits[mask_token_index[0]], axis=-1)\n", + " decoded_word = self.tokenizer.decode(predicted_token_id)\n", + " print(f\"Actual Sentence: {text}\\nPredicted last word: {decoded_word}\")\n", + " print('-' * 80)" + ], + "metadata": { + "id": "DnWlNV1kelnq" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Run the pipeline\n", + "\n", + "Use the following code to run the pipeline." + ], + "metadata": { + "id": "scepcVUpgD63" + } + }, + { + "cell_type": "code", + "source": [ + "with beam.Pipeline() as beam_pipeline:\n", + " tokenized_examples = (\n", + " beam_pipeline\n", + " | \"CreateExamples\" >> beam.Create(text)\n", + " | 'AddMask' >> beam.Map(add_mask_to_last_word)\n", + " | 'TokenizeSentence' >>\n", + " beam.Map(lambda x: tokenize_sentence(x, tokenizer)))\n", + "\n", + " result = (\n", + " tokenized_examples\n", + " | \"RunInference\" >> RunInference(KeyedModelHandler(model_handler))\n", + " | \"PostProcess\" >> beam.ParDo(PostProcessor(tokenizer))\n", + " )" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "IEPrQGEWgBCo", + "outputId": "218cc1f4-2613-4bf1-9666-782df020536b" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Actual Sentence: The capital of France is Paris .\n", + "Predicted last word: Paris\n", + "--------------------------------------------------------------------------------\n", + "Actual Sentence: It is raining cats and dogs .\n", + "Predicted last word: dogs\n", + "--------------------------------------------------------------------------------\n", + "Actual Sentence: He looked up and saw the sun and stars .\n", + "Predicted last word: stars\n", + "--------------------------------------------------------------------------------\n", + "Actual Sentence: Today is Monday and tomorrow is Tuesday .\n", + "Predicted last word: Tuesday\n", + "--------------------------------------------------------------------------------\n", + "Actual Sentence: There are 5 coconuts on this palm tree .\n", + "Predicted last word: tree\n", + "--------------------------------------------------------------------------------\n" + ] + } + ] + } + ] +} \ No newline at end of file diff --git a/.test-infra/jenkins/job_PreCommit_Website_Stage_GCS.groovy b/it/build.gradle similarity index 52% rename from .test-infra/jenkins/job_PreCommit_Website_Stage_GCS.groovy rename to it/build.gradle index e2f7202d14eb4..35ccbba4c3606 100644 --- a/.test-infra/jenkins/job_PreCommit_Website_Stage_GCS.groovy +++ b/it/build.gradle @@ -4,28 +4,30 @@ * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance + * License); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, + * distributed under the License is distributed on an AS IS BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ +plugins { id 'org.apache.beam.module' } +applyJavaNature( + automaticModuleName: 'org.apache.beam.it', +) -import PrecommitJobBuilder +description = "Apache Beam :: IT" +ext.summary = "Integration test utilities suites." -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'Website_Stage_GCS', - gradleTask: ':website:stageWebsite', - triggerPathPatterns: ['^website/.*$']) -builder.build { - publishers { - buildDescription(/Website published to (http:\/\/.+\/index.html)/) - } +//These registrations exist to make our matrix Github Action simple to configure +tasks.register('GCSPerformanceTest') { + dependsOn(":it:google-cloud-platform:GCSPerformanceTest") } +tasks.register('BigTablePerformanceTest') { + dependsOn(":it:google-cloud-platform:BigTablePerformanceTest") +} \ No newline at end of file diff --git a/it/google-cloud-platform/build.gradle b/it/google-cloud-platform/build.gradle index f43b3f25720b0..0917ddd3e21aa 100644 --- a/it/google-cloud-platform/build.gradle +++ b/it/google-cloud-platform/build.gradle @@ -15,6 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +import org.apache.beam.gradle.IoPerformanceTestUtilities plugins { id 'org.apache.beam.module' } applyJavaNature( @@ -74,4 +75,7 @@ dependencies { testImplementation library.java.mockito_inline testRuntimeOnly project(path: ":runners:direct-java", configuration: "shadowTest") testRuntimeOnly library.java.slf4j_simple -} \ No newline at end of file +} + +tasks.register("GCSPerformanceTest", IoPerformanceTestUtilities.IoPerformanceTest, project, 'google-cloud-platform', 'FileBasedIOLT', ['configuration':'large','project':'apache-beam-testing', 'artifactBucket':'io-performance-temp']) +tasks.register("BigTablePerformanceTest", IoPerformanceTestUtilities.IoPerformanceTest, project, 'google-cloud-platform', 'BigTableIOLT', ['configuration':'large','project':'apache-beam-testing', 'artifactBucket':'io-performance-temp']) \ No newline at end of file diff --git a/runners/google-cloud-dataflow-java/build.gradle b/runners/google-cloud-dataflow-java/build.gradle index f6e2b9b147c54..2acc30455e225 100644 --- a/runners/google-cloud-dataflow-java/build.gradle +++ b/runners/google-cloud-dataflow-java/build.gradle @@ -612,6 +612,9 @@ task googleCloudPlatformRunnerV2IntegrationTest(type: Test) { exclude '**/FhirIOLROIT.class' exclude '**/FhirIOSearchIT.class' exclude '**/FhirIOPatientEverythingIT.class' + // failing due to pane index not incrementing after Reshuffle: + // https://github.com/apache/beam/issues/28219 + exclude '**/FileLoadsStreamingIT.class' maxParallelForks 4 classpath = configurations.googleCloudPlatformIntegrationTest diff --git a/runners/google-cloud-dataflow-java/worker/build.gradle b/runners/google-cloud-dataflow-java/worker/build.gradle index e1448e313c607..ce06063c9b52d 100644 --- a/runners/google-cloud-dataflow-java/worker/build.gradle +++ b/runners/google-cloud-dataflow-java/worker/build.gradle @@ -67,90 +67,91 @@ def excluded_dependencies = [ library.java.error_prone_annotations, // Provided scope added in worker library.java.hamcrest, // Test only library.java.junit, // Test only - library.java.jsonassert // Test only + library.java.jsonassert, // Test only + library.java.truth // Test only ] applyJavaNature( automaticModuleName: 'org.apache.beam.runners.dataflow.worker', archivesBaseName: 'beam-runners-google-cloud-dataflow-java-legacy-worker', classesTriggerCheckerBugs: [ - 'BatchGroupAlsoByWindowAndCombineFn': 'TODO: file a bug report', - 'AssignWindowsParDoFnFactory': 'TODO: file a bug report', - 'FetchAndFilterStreamingSideInputsOperation': 'https://github.com/typetools/checker-framework/issues/5436', + 'BatchGroupAlsoByWindowAndCombineFn' : 'TODO: file a bug report', + 'AssignWindowsParDoFnFactory' : 'TODO: file a bug report', + 'FetchAndFilterStreamingSideInputsOperation': 'https://github.com/typetools/checker-framework/issues/5436', ], exportJavadoc: false, enableSpotbugs: false /* TODO(BEAM-5658): enable spotbugs */, shadowJarValidationExcludes: [ - "org/apache/beam/runners/dataflow/worker/**", - "org/apache/beam/repackaged/beam_runners_google_cloud_dataflow_java_legacy_worker/**", - // TODO(https://github.com/apache/beam/issues/19114): Move DataflowRunnerHarness class under org.apache.beam.runners.dataflow.worker namespace - "com/google/cloud/dataflow/worker/DataflowRunnerHarness.class", - // Allow slf4j implementation worker for logging during pipeline execution - "org/slf4j/impl/**" + "org/apache/beam/runners/dataflow/worker/**", + "org/apache/beam/repackaged/beam_runners_google_cloud_dataflow_java_legacy_worker/**", + // TODO(https://github.com/apache/beam/issues/19114): Move DataflowRunnerHarness class under org.apache.beam.runners.dataflow.worker namespace + "com/google/cloud/dataflow/worker/DataflowRunnerHarness.class", + // Allow slf4j implementation worker for logging during pipeline execution + "org/slf4j/impl/**" ], shadowClosure: { - // Each included dependency must also include all of its necessary transitive dependencies - // or have them provided by the users pipeline during job submission. Typically a users - // pipeline includes :runners:google-cloud-dataflow-java and its transitive dependencies - // so those dependencies don't need to be shaded (bundled and relocated) away. All other - // dependencies needed to run the worker must be shaded (bundled and relocated) to prevent - // ClassNotFound and/or MethodNotFound errors during pipeline execution. - // - // Each included dependency should have a matching relocation rule below that ensures - // that the shaded jar is correctly built. + // Each included dependency must also include all of its necessary transitive dependencies + // or have them provided by the users pipeline during job submission. Typically a users + // pipeline includes :runners:google-cloud-dataflow-java and its transitive dependencies + // so those dependencies don't need to be shaded (bundled and relocated) away. All other + // dependencies needed to run the worker must be shaded (bundled and relocated) to prevent + // ClassNotFound and/or MethodNotFound errors during pipeline execution. + // + // Each included dependency should have a matching relocation rule below that ensures + // that the shaded jar is correctly built. - dependencies { - include(dependency(library.java.slf4j_jdk14)) - } + dependencies { + include(dependency(library.java.slf4j_jdk14)) + } - dependencies { - include(project(path: ":model:fn-execution", configuration: "shadow")) - } - relocate("org.apache.beam.model.fnexecution.v1", getWorkerRelocatedPath("org.apache.beam.model.fnexecution.v1")) + dependencies { + include(project(path: ":model:fn-execution", configuration: "shadow")) + } + relocate("org.apache.beam.model.fnexecution.v1", getWorkerRelocatedPath("org.apache.beam.model.fnexecution.v1")) - dependencies { - include(project(":runners:core-construction-java")) - include(project(":runners:core-java")) - } - relocate("org.apache.beam.runners.core", getWorkerRelocatedPath("org.apache.beam.runners.core")) - relocate("org.apache.beam.repackaged.beam_runners_core_construction_java", getWorkerRelocatedPath("org.apache.beam.repackaged.beam_runners_core_construction_java")) - relocate("org.apache.beam.repackaged.beam_runners_core_java", getWorkerRelocatedPath("org.apache.beam.repackaged.beam_runners_core_java")) + dependencies { + include(project(":runners:core-construction-java")) + include(project(":runners:core-java")) + } + relocate("org.apache.beam.runners.core", getWorkerRelocatedPath("org.apache.beam.runners.core")) + relocate("org.apache.beam.repackaged.beam_runners_core_construction_java", getWorkerRelocatedPath("org.apache.beam.repackaged.beam_runners_core_construction_java")) + relocate("org.apache.beam.repackaged.beam_runners_core_java", getWorkerRelocatedPath("org.apache.beam.repackaged.beam_runners_core_java")) - dependencies { - include(project(":runners:java-fn-execution")) - } - relocate("org.apache.beam.runners.fnexecution", getWorkerRelocatedPath("org.apache.beam.runners.fnexecution")) - relocate("org.apache.beam.repackaged.beam_runners_java_fn_execution", getWorkerRelocatedPath("org.apache.beam.repackaged.beam_runners_java_fn_execution")) + dependencies { + include(project(":runners:java-fn-execution")) + } + relocate("org.apache.beam.runners.fnexecution", getWorkerRelocatedPath("org.apache.beam.runners.fnexecution")) + relocate("org.apache.beam.repackaged.beam_runners_java_fn_execution", getWorkerRelocatedPath("org.apache.beam.repackaged.beam_runners_java_fn_execution")) - dependencies { - include(project(":sdks:java:fn-execution")) - } - relocate("org.apache.beam.sdk.fn", getWorkerRelocatedPath("org.apache.beam.sdk.fn")) - relocate("org.apache.beam.repackaged.beam_sdks_java_fn_execution", getWorkerRelocatedPath("org.apache.beam.repackaged.beam_sdks_java_fn_execution")) + dependencies { + include(project(":sdks:java:fn-execution")) + } + relocate("org.apache.beam.sdk.fn", getWorkerRelocatedPath("org.apache.beam.sdk.fn")) + relocate("org.apache.beam.repackaged.beam_sdks_java_fn_execution", getWorkerRelocatedPath("org.apache.beam.repackaged.beam_sdks_java_fn_execution")) - dependencies { - // We have to include jetty-server/jetty-servlet and all of its transitive dependencies - // which includes several org.eclipse.jetty artifacts + servlet-api - include(dependency("org.eclipse.jetty:.*:9.2.10.v20150310")) - include(dependency("javax.servlet:javax.servlet-api:3.1.0")) - } - relocate("org.eclipse.jetty", getWorkerRelocatedPath("org.eclipse.jetty")) - relocate("javax.servlet", getWorkerRelocatedPath("javax.servlet")) + dependencies { + // We have to include jetty-server/jetty-servlet and all of its transitive dependencies + // which includes several org.eclipse.jetty artifacts + servlet-api + include(dependency("org.eclipse.jetty:.*:9.2.10.v20150310")) + include(dependency("javax.servlet:javax.servlet-api:3.1.0")) + } + relocate("org.eclipse.jetty", getWorkerRelocatedPath("org.eclipse.jetty")) + relocate("javax.servlet", getWorkerRelocatedPath("javax.servlet")) - // We don't relocate windmill since it is already underneath the org.apache.beam.runners.dataflow.worker namespace and never - // expect a user pipeline to include it. There is also a JNI component that windmill server relies on which makes - // arbitrary relocation more difficult. - dependencies { - include(project(path: ":runners:google-cloud-dataflow-java:worker:windmill", configuration: "shadow")) - } + // We don't relocate windmill since it is already underneath the org.apache.beam.runners.dataflow.worker namespace and never + // expect a user pipeline to include it. There is also a JNI component that windmill server relies on which makes + // arbitrary relocation more difficult. + dependencies { + include(project(path: ":runners:google-cloud-dataflow-java:worker:windmill", configuration: "shadow")) + } - // Include original source files extracted under - // '$buildDir/original_sources_to_package' to jar - from "$buildDir/original_sources_to_package" + // Include original source files extracted under + // '$buildDir/original_sources_to_package' to jar + from "$buildDir/original_sources_to_package" - exclude "META-INF/LICENSE.txt" - exclude "about.html" -}) + exclude "META-INF/LICENSE.txt" + exclude "about.html" + }) /******************************************************************************/ // Configure the worker root project @@ -219,6 +220,10 @@ dependencies { // as well and placed within the testImplementation configuration. Otherwise we can place it within // the shadowTest configuration. testImplementation project(path: ":runners:core-java", configuration: "testRuntimeMigration") + // TODO: excluding Guava until Truth updates it to >32.1.x + testImplementation(library.java.truth) { + exclude group: 'com.google.guava', module: 'guava' + } shadowTest project(path: ":sdks:java:extensions:google-cloud-platform-core", configuration: "testRuntimeMigration") shadowTest project(path: ":runners:direct-java", configuration: "shadow") shadowTest project(path: ":sdks:java:harness", configuration: "shadowTest") @@ -232,8 +237,8 @@ dependencies { project.task('validateShadedJarContainsSlf4jJdk14', dependsOn: 'shadowJar') { ext.outFile = project.file("${project.reportsDir}/${name}.out") inputs.files(project.configurations.shadow.artifacts.files) - .withPropertyName("shadowArtifactsFiles") - .withPathSensitivity(PathSensitivity.RELATIVE) + .withPropertyName("shadowArtifactsFiles") + .withPathSensitivity(PathSensitivity.RELATIVE) outputs.files outFile doLast { project.configurations.shadow.artifacts.files.each { diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/AbstractWindmillStream.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/AbstractWindmillStream.java index d3e7de58931f4..ea7efff7a06d9 100644 --- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/AbstractWindmillStream.java +++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/AbstractWindmillStream.java @@ -60,21 +60,16 @@ * synchronizing on this. */ public abstract class AbstractWindmillStream implements WindmillStream { - protected static final long DEFAULT_STREAM_RPC_DEADLINE_SECONDS = 300; + public static final long DEFAULT_STREAM_RPC_DEADLINE_SECONDS = 300; // Default gRPC streams to 2MB chunks, which has shown to be a large enough chunk size to reduce // per-chunk overhead, and small enough that we can still perform granular flow-control. protected static final int RPC_STREAM_CHUNK_SIZE = 2 << 20; - private static final Logger LOG = LoggerFactory.getLogger(AbstractWindmillStream.class); - protected final AtomicBoolean clientClosed; - + private final AtomicLong lastSendTimeMs; private final Executor executor; private final BackOff backoff; - // Indicates if the current stream in requestObserver is closed by calling close() method - private final AtomicBoolean streamClosed; private final AtomicLong startTimeMs; - private final AtomicLong lastSendTimeMs; private final AtomicLong lastResponseTimeMs; private final AtomicInteger errorCount; private final AtomicReference lastError; @@ -83,6 +78,8 @@ public abstract class AbstractWindmillStream implements Win private final Set> streamRegistry; private final int logEveryNStreamFailures; private final Supplier> requestObserverSupplier; + // Indicates if the current stream in requestObserver is closed by calling close() method + private final AtomicBoolean streamClosed; private @Nullable StreamObserver requestObserver; protected AbstractWindmillStream( @@ -132,9 +129,9 @@ private static long debugDuration(long nowMs, long startMs) { protected abstract boolean hasPendingRequests(); /** - * Called when the stream is throttled due to resource exhausted errors. Will be called for each - * resource exhausted error not just the first. onResponse() must stop throttling on receipt of - * the first good message. + * Called when the client side stream is throttled due to resource exhausted errors. Will be + * called for each resource exhausted error not just the first. onResponse() must stop throttling + * on receipt of the first good message. */ protected abstract void startThrottleTimer(); diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/ForwardingClientResponseObserver.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/ForwardingClientResponseObserver.java index 3737e29efb133..a1f80598d89a8 100644 --- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/ForwardingClientResponseObserver.java +++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/ForwardingClientResponseObserver.java @@ -27,23 +27,23 @@ *

Used to wrap existing {@link StreamObserver}s to be able to install an {@link * ClientCallStreamObserver#setOnReadyHandler(Runnable) onReadyHandler}. * - *

This is as thread-safe as the undering stream observer that is being wrapped. + *

This is as thread-safe as the underlying stream observer that is being wrapped. */ -final class ForwardingClientResponseObserver - implements ClientResponseObserver { +final class ForwardingClientResponseObserver + implements ClientResponseObserver { private final Runnable onReadyHandler; private final Runnable onDoneHandler; - private final StreamObserver inboundObserver; + private final StreamObserver inboundObserver; ForwardingClientResponseObserver( - StreamObserver inboundObserver, Runnable onReadyHandler, Runnable onDoneHandler) { + StreamObserver inboundObserver, Runnable onReadyHandler, Runnable onDoneHandler) { this.inboundObserver = inboundObserver; this.onReadyHandler = onReadyHandler; this.onDoneHandler = onDoneHandler; } @Override - public void onNext(ReqT value) { + public void onNext(ResponseT value) { inboundObserver.onNext(value); } @@ -60,7 +60,7 @@ public void onCompleted() { } @Override - public void beforeStart(ClientCallStreamObserver stream) { + public void beforeStart(ClientCallStreamObserver stream) { stream.setOnReadyHandler(onReadyHandler); } } diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/StreamObserverFactory.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/StreamObserverFactory.java index a046f2fd46ac3..e0878b7b0b91b 100644 --- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/StreamObserverFactory.java +++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/StreamObserverFactory.java @@ -33,9 +33,9 @@ public static StreamObserverFactory direct( return new Direct(deadlineSeconds, messagesBetweenIsReadyChecks); } - public abstract StreamObserver from( - Function, StreamObserver> clientFactory, - StreamObserver responseObserver); + public abstract StreamObserver from( + Function, StreamObserver> clientFactory, + StreamObserver responseObserver); private static class Direct extends StreamObserverFactory { private final long deadlineSeconds; @@ -47,14 +47,14 @@ private static class Direct extends StreamObserverFactory { } @Override - public StreamObserver from( - Function, StreamObserver> clientFactory, - StreamObserver inboundObserver) { + public StreamObserver from( + Function, StreamObserver> clientFactory, + StreamObserver inboundObserver) { AdvancingPhaser phaser = new AdvancingPhaser(1); - CallStreamObserver outboundObserver = - (CallStreamObserver) + CallStreamObserver outboundObserver = + (CallStreamObserver) clientFactory.apply( - new ForwardingClientResponseObserver( + new ForwardingClientResponseObserver( inboundObserver, phaser::arrive, phaser::forceTermination)); return new DirectStreamObserver<>( phaser, outboundObserver, deadlineSeconds, messagesBetweenIsReadyChecks); diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/WindmillEndpoints.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/WindmillEndpoints.java new file mode 100644 index 0000000000000..64b6e675ef5ff --- /dev/null +++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/WindmillEndpoints.java @@ -0,0 +1,221 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.runners.dataflow.worker.windmill; + +import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList.toImmutableList; +import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap.toImmutableMap; + +import com.google.auto.value.AutoValue; +import java.net.Inet6Address; +import java.net.InetAddress; +import java.net.UnknownHostException; +import java.util.Map; +import java.util.Optional; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.net.HostAndPort; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Value class for holding endpoints used for communicating with Windmill service. Corresponds + * directly with {@link Windmill.WorkerMetadataResponse}. + */ +@AutoValue +public abstract class WindmillEndpoints { + private static final Logger LOG = LoggerFactory.getLogger(WindmillEndpoints.class); + + /** + * Used by GetData GlobalDataRequest(s) to support Beam side inputs. Returns a map where the key + * is a global data tag and the value is the endpoint where the data associated with the global + * data tag resides. + * + * @see Beam Side + * Inputs + */ + public abstract ImmutableMap globalDataEndpoints(); + + /** + * Used by GetWork/GetData/CommitWork calls to send, receive, and commit work directly to/from + * Windmill servers. Returns a list of endpoints used to communicate with the corresponding + * Windmill servers. + */ + public abstract ImmutableList windmillEndpoints(); + + public static WindmillEndpoints from( + Windmill.WorkerMetadataResponse workerMetadataResponseProto) { + ImmutableMap globalDataServers = + workerMetadataResponseProto.getGlobalDataEndpointsMap().entrySet().stream() + .collect( + toImmutableMap( + Map.Entry::getKey, // global data key + endpoint -> WindmillEndpoints.Endpoint.from(endpoint.getValue()))); + + ImmutableList windmillServers = + workerMetadataResponseProto.getWorkEndpointsList().stream() + .map(WindmillEndpoints.Endpoint::from) + .collect(toImmutableList()); + + return WindmillEndpoints.builder() + .setGlobalDataEndpoints(globalDataServers) + .setWindmillEndpoints(windmillServers) + .build(); + } + + public static WindmillEndpoints.Builder builder() { + return new AutoValue_WindmillEndpoints.Builder(); + } + + /** + * Representation of an endpoint in {@link Windmill.WorkerMetadataResponse.Endpoint} proto with + * the worker_token field, and direct_endpoint field parsed into a {@link WindmillServiceAddress} + * which holds either a {@link Inet6Address} or {@link HostAndPort} used to connect to Streaming + * Engine. {@link Inet6Address}(s) represent direct Windmill worker connections, and {@link + * HostAndPort}(s) represent connections to the Windmill Dispatcher. + */ + @AutoValue + public abstract static class Endpoint { + /** + * {@link WindmillServiceAddress} representation of {@link + * Windmill.WorkerMetadataResponse.Endpoint#getDirectEndpoint()}. The proto's direct_endpoint + * string can be converted to either {@link Inet6Address} or {@link HostAndPort}. + */ + public abstract Optional directEndpoint(); + + /** + * Corresponds to {@link Windmill.WorkerMetadataResponse.Endpoint#getWorkerToken()} in the + * windmill.proto file. + */ + public abstract Optional workerToken(); + + public static Endpoint.Builder builder() { + return new AutoValue_WindmillEndpoints_Endpoint.Builder(); + } + + public static Endpoint from(Windmill.WorkerMetadataResponse.Endpoint endpointProto) { + Endpoint.Builder endpointBuilder = Endpoint.builder(); + if (endpointProto.hasDirectEndpoint() && !endpointProto.getDirectEndpoint().isEmpty()) { + parseDirectEndpoint(endpointProto.getDirectEndpoint()) + .ifPresent(endpointBuilder::setDirectEndpoint); + } + if (endpointProto.hasWorkerToken() && !endpointProto.getWorkerToken().isEmpty()) { + endpointBuilder.setWorkerToken(endpointProto.getWorkerToken()); + } + + Endpoint endpoint = endpointBuilder.build(); + + if (!endpoint.directEndpoint().isPresent() && !endpoint.workerToken().isPresent()) { + throw new IllegalArgumentException( + String.format( + "direct_endpoint=[%s] not present or could not be parsed, and worker_token" + + " not present. At least one of these fields is required.", + endpointProto.getDirectEndpoint())); + } + + return endpoint; + } + + @AutoValue.Builder + public abstract static class Builder { + public abstract Builder setDirectEndpoint(WindmillServiceAddress directEndpoint); + + public abstract Builder setWorkerToken(String workerToken); + + public abstract Endpoint build(); + } + } + + @AutoValue.Builder + public abstract static class Builder { + public abstract Builder setGlobalDataEndpoints( + ImmutableMap globalDataServers); + + public abstract Builder setWindmillEndpoints( + ImmutableList windmillServers); + + abstract ImmutableList.Builder windmillEndpointsBuilder(); + + public final Builder addWindmillEndpoint(WindmillEndpoints.Endpoint endpoint) { + windmillEndpointsBuilder().add(endpoint); + return this; + } + + public final Builder addAllWindmillEndpoints(Iterable endpoints) { + windmillEndpointsBuilder().addAll(endpoints); + return this; + } + + abstract ImmutableMap.Builder globalDataEndpointsBuilder(); + + public final Builder addGlobalDataEndpoint( + String globalDataKey, WindmillEndpoints.Endpoint endpoint) { + globalDataEndpointsBuilder().put(globalDataKey, endpoint); + return this; + } + + public final Builder addAllGlobalDataEndpoints( + Map globalDataEndpoints) { + globalDataEndpointsBuilder().putAll(globalDataEndpoints); + return this; + } + + public abstract WindmillEndpoints build(); + } + + private static Optional parseDirectEndpoint(String directEndpoint) { + Optional directEndpointIpV6Address = + tryParseDirectEndpointIntoIpV6Address(directEndpoint).map(WindmillServiceAddress::create); + + return directEndpointIpV6Address.isPresent() + ? directEndpointIpV6Address + : tryParseEndpointIntoHostAndPort(directEndpoint).map(WindmillServiceAddress::create); + } + + private static Optional tryParseEndpointIntoHostAndPort(String directEndpoint) { + try { + return Optional.of(HostAndPort.fromString(directEndpoint)); + } catch (IllegalArgumentException e) { + LOG.warn("{} cannot be parsed into a gcpServiceAddress", directEndpoint); + return Optional.empty(); + } + } + + private static Optional tryParseDirectEndpointIntoIpV6Address( + String directEndpoint) { + InetAddress directEndpointAddress = null; + try { + directEndpointAddress = Inet6Address.getByName(directEndpoint); + } catch (UnknownHostException e) { + LOG.warn( + "Error occurred trying to parse direct_endpoint={} into IPv6 address. Exception={}", + directEndpoint, + e.toString()); + } + + // Inet6Address.getByAddress returns either an IPv4 or an IPv6 address depending on the format + // of the direct_endpoint string. + if (!(directEndpointAddress instanceof Inet6Address)) { + LOG.warn( + "{} is not an IPv6 address. Direct endpoints are expected to be in IPv6 format.", + directEndpoint); + return Optional.empty(); + } + + return Optional.ofNullable((Inet6Address) directEndpointAddress); + } +} diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/WindmillServiceAddress.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/WindmillServiceAddress.java new file mode 100644 index 0000000000000..3ebda8fab8edb --- /dev/null +++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/WindmillServiceAddress.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.runners.dataflow.worker.windmill; + +import com.google.auto.value.AutoOneOf; +import java.net.Inet6Address; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.net.HostAndPort; + +/** Used to create channels to communicate with Streaming Engine via gRpc. */ +@AutoOneOf(WindmillServiceAddress.Kind.class) +public abstract class WindmillServiceAddress { + public static WindmillServiceAddress create(Inet6Address ipv6Address) { + return AutoOneOf_WindmillServiceAddress.ipv6(ipv6Address); + } + + public static WindmillServiceAddress create(HostAndPort gcpServiceAddress) { + return AutoOneOf_WindmillServiceAddress.gcpServiceAddress(gcpServiceAddress); + } + + public abstract Kind getKind(); + + public abstract Inet6Address ipv6(); + + public abstract HostAndPort gcpServiceAddress(); + + public enum Kind { + IPV6, + GCP_SERVICE_ADDRESS + } +} diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/WindmillStream.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/WindmillStream.java index 70c7cc36ba315..4dd4164fc4efd 100644 --- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/WindmillStream.java +++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/WindmillStream.java @@ -86,4 +86,8 @@ boolean commitWorkItem( /** Flushes any pending work items to the wire. */ void flush(); } + + /** Interface for streaming GetWorkerMetadata requests to Windmill. */ + @ThreadSafe + interface GetWorkerMetadataStream extends WindmillStream {} } diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/grpcclient/GrpcCommitWorkStream.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/grpcclient/GrpcCommitWorkStream.java index 74bd93a5474fa..1bba40805dec4 100644 --- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/grpcclient/GrpcCommitWorkStream.java +++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/grpcclient/GrpcCommitWorkStream.java @@ -17,16 +17,17 @@ */ package org.apache.beam.runners.dataflow.worker.windmill.grpcclient; +import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkNotNull; + import java.io.PrintWriter; import java.util.HashMap; import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; import java.util.function.Consumer; +import java.util.function.Function; import org.apache.beam.runners.dataflow.worker.windmill.AbstractWindmillStream; -import org.apache.beam.runners.dataflow.worker.windmill.CloudWindmillServiceV1Alpha1Grpc; import org.apache.beam.runners.dataflow.worker.windmill.StreamObserverFactory; import org.apache.beam.runners.dataflow.worker.windmill.Windmill.CommitStatus; import org.apache.beam.runners.dataflow.worker.windmill.Windmill.JobHeader; @@ -37,7 +38,7 @@ import org.apache.beam.runners.dataflow.worker.windmill.WindmillStream.CommitWorkStream; import org.apache.beam.sdk.util.BackOff; import org.apache.beam.vendor.grpc.v1p54p0.com.google.protobuf.ByteString; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions; +import org.apache.beam.vendor.grpc.v1p54p0.io.grpc.stub.StreamObserver; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -56,7 +57,8 @@ final class GrpcCommitWorkStream private final int streamingRpcBatchLimit; private GrpcCommitWorkStream( - CloudWindmillServiceV1Alpha1Grpc.CloudWindmillServiceV1Alpha1Stub stub, + Function, StreamObserver> + startCommitWorkRpcFn, BackOff backoff, StreamObserverFactory streamObserverFactory, Set> streamRegistry, @@ -66,10 +68,7 @@ private GrpcCommitWorkStream( AtomicLong idGenerator, int streamingRpcBatchLimit) { super( - responseObserver -> - stub.withDeadlineAfter( - AbstractWindmillStream.DEFAULT_STREAM_RPC_DEADLINE_SECONDS, TimeUnit.SECONDS) - .commitWorkStream(responseObserver), + startCommitWorkRpcFn, backoff, streamObserverFactory, streamRegistry, @@ -83,7 +82,8 @@ private GrpcCommitWorkStream( } static GrpcCommitWorkStream create( - CloudWindmillServiceV1Alpha1Grpc.CloudWindmillServiceV1Alpha1Stub stub, + Function, StreamObserver> + startCommitWorkRpcFn, BackOff backoff, StreamObserverFactory streamObserverFactory, Set> streamRegistry, @@ -94,7 +94,7 @@ static GrpcCommitWorkStream create( int streamingRpcBatchLimit) { GrpcCommitWorkStream commitWorkStream = new GrpcCommitWorkStream( - stub, + startCommitWorkRpcFn, backoff, streamObserverFactory, streamRegistry, @@ -252,7 +252,7 @@ private void issueBatchedRequest(Map requests) { } private void issueMultiChunkRequest(final long id, PendingRequest pendingRequest) { - Preconditions.checkNotNull(pendingRequest.computation); + checkNotNull(pendingRequest.computation); final ByteString serializedCommit = pendingRequest.request.toByteString(); synchronized (this) { @@ -306,8 +306,13 @@ long getBytes() { private class Batcher { - final Map queue = new HashMap<>(); - long queuedBytes = 0; + private final Map queue; + private long queuedBytes; + + private Batcher() { + this.queuedBytes = 0; + this.queue = new HashMap<>(); + } boolean canAccept(PendingRequest request) { return queue.isEmpty() diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/grpcclient/GrpcGetDataStream.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/grpcclient/GrpcGetDataStream.java index b51daabb1a2bf..238cc771dce8b 100644 --- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/grpcclient/GrpcGetDataStream.java +++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/grpcclient/GrpcGetDataStream.java @@ -17,6 +17,9 @@ */ package org.apache.beam.runners.dataflow.worker.windmill.grpcclient; +import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument; +import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Verify.verify; + import java.io.IOException; import java.io.InputStream; import java.io.PrintWriter; @@ -28,10 +31,9 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentLinkedDeque; import java.util.concurrent.CountDownLatch; -import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; +import java.util.function.Function; import org.apache.beam.runners.dataflow.worker.windmill.AbstractWindmillStream; -import org.apache.beam.runners.dataflow.worker.windmill.CloudWindmillServiceV1Alpha1Grpc; import org.apache.beam.runners.dataflow.worker.windmill.StreamObserverFactory; import org.apache.beam.runners.dataflow.worker.windmill.Windmill.ComputationGetDataRequest; import org.apache.beam.runners.dataflow.worker.windmill.Windmill.GlobalData; @@ -45,8 +47,7 @@ import org.apache.beam.runners.dataflow.worker.windmill.grpcclient.GrpcGetDataStreamRequests.QueuedBatch; import org.apache.beam.runners.dataflow.worker.windmill.grpcclient.GrpcGetDataStreamRequests.QueuedRequest; import org.apache.beam.sdk.util.BackOff; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Verify; +import org.apache.beam.vendor.grpc.v1p54p0.io.grpc.stub.StreamObserver; import org.joda.time.Instant; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -64,7 +65,8 @@ final class GrpcGetDataStream private final int streamingRpcBatchLimit; private GrpcGetDataStream( - CloudWindmillServiceV1Alpha1Grpc.CloudWindmillServiceV1Alpha1Stub stub, + Function, StreamObserver> + startGetDataRpcFn, BackOff backoff, StreamObserverFactory streamObserverFactory, Set> streamRegistry, @@ -74,14 +76,7 @@ private GrpcGetDataStream( AtomicLong idGenerator, int streamingRpcBatchLimit) { super( - responseObserver -> - stub.withDeadlineAfter( - AbstractWindmillStream.DEFAULT_STREAM_RPC_DEADLINE_SECONDS, TimeUnit.SECONDS) - .getDataStream(responseObserver), - backoff, - streamObserverFactory, - streamRegistry, - logEveryNStreamFailures); + startGetDataRpcFn, backoff, streamObserverFactory, streamRegistry, logEveryNStreamFailures); this.idGenerator = idGenerator; this.getDataThrottleTimer = getDataThrottleTimer; this.jobHeader = jobHeader; @@ -91,7 +86,8 @@ private GrpcGetDataStream( } static GrpcGetDataStream create( - CloudWindmillServiceV1Alpha1Grpc.CloudWindmillServiceV1Alpha1Stub stub, + Function, StreamObserver> + startGetDataRpcFn, BackOff backoff, StreamObserverFactory streamObserverFactory, Set> streamRegistry, @@ -102,7 +98,7 @@ static GrpcGetDataStream create( int streamingRpcBatchLimit) { GrpcGetDataStream getDataStream = new GrpcGetDataStream( - stub, + startGetDataRpcFn, backoff, streamObserverFactory, streamRegistry, @@ -122,7 +118,7 @@ protected synchronized void onNewStream() { // We rely on close only occurring after all methods on the stream have returned. // Since the requestKeyedData and requestGlobalData methods are blocking this // means there should be no pending requests. - Verify.verify(!hasPendingRequests()); + verify(!hasPendingRequests()); } else { for (AppendableInputStream responseStream : pending.values()) { responseStream.cancel(); @@ -138,14 +134,13 @@ protected boolean hasPendingRequests() { @Override @SuppressWarnings("dereference.of.nullable") protected void onResponse(StreamingGetDataResponse chunk) { - Preconditions.checkArgument(chunk.getRequestIdCount() == chunk.getSerializedResponseCount()); - Preconditions.checkArgument( - chunk.getRemainingBytesForResponse() == 0 || chunk.getRequestIdCount() == 1); + checkArgument(chunk.getRequestIdCount() == chunk.getSerializedResponseCount()); + checkArgument(chunk.getRemainingBytesForResponse() == 0 || chunk.getRequestIdCount() == 1); getDataThrottleTimer.stop(); for (int i = 0; i < chunk.getRequestIdCount(); ++i) { AppendableInputStream responseStream = pending.get(chunk.getRequestId(i)); - Verify.verify(responseStream != null, "No pending response stream"); + verify(responseStream != null, "No pending response stream"); responseStream.append(chunk.getSerializedResponse(i).newInput()); if (chunk.getRemainingBytesForResponse() == 0) { responseStream.complete(); @@ -283,12 +278,12 @@ private void queueRequestAndWait(QueuedRequest request) throws InterruptedExcept // Finalize the batch so that no additional requests will be added. Leave the batch in the // queue so that a subsequent batch will wait for it's completion. synchronized (batches) { - Verify.verify(batch == batches.peekFirst()); + verify(batch == batches.peekFirst()); batch.markFinalized(); } sendBatch(batch.requests()); synchronized (batches) { - Verify.verify(batch == batches.pollFirst()); + verify(batch == batches.pollFirst()); } // Notify all waiters with requests in this batch as well as the sender // of the next batch (if one exists). @@ -308,7 +303,7 @@ private void sendBatch(List requests) { for (QueuedRequest request : requests) { // Map#put returns null if there was no previous mapping for the key, meaning we have not // seen it before. - Verify.verify(pending.put(request.id(), request.getResponseStream()) == null); + verify(pending.put(request.id(), request.getResponseStream()) == null); } try { send(batchedRequest); diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/grpcclient/GrpcGetWorkStream.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/grpcclient/GrpcGetWorkStream.java index 6e35beccdb6aa..4660fe25b13b3 100644 --- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/grpcclient/GrpcGetWorkStream.java +++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/grpcclient/GrpcGetWorkStream.java @@ -23,12 +23,11 @@ import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; +import java.util.function.Function; import javax.annotation.Nullable; import org.apache.beam.runners.dataflow.worker.WindmillTimeUtils; import org.apache.beam.runners.dataflow.worker.windmill.AbstractWindmillStream; -import org.apache.beam.runners.dataflow.worker.windmill.CloudWindmillServiceV1Alpha1Grpc; import org.apache.beam.runners.dataflow.worker.windmill.StreamObserverFactory; import org.apache.beam.runners.dataflow.worker.windmill.Windmill; import org.apache.beam.runners.dataflow.worker.windmill.Windmill.GetWorkRequest; @@ -40,6 +39,7 @@ import org.apache.beam.runners.dataflow.worker.windmill.WindmillStream.GetWorkStream.WorkItemReceiver; import org.apache.beam.sdk.util.BackOff; import org.apache.beam.vendor.grpc.v1p54p0.com.google.protobuf.ByteString; +import org.apache.beam.vendor.grpc.v1p54p0.io.grpc.stub.StreamObserver; import org.joda.time.Instant; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -58,7 +58,10 @@ final class GrpcGetWorkStream private final AtomicLong inflightBytes; private GrpcGetWorkStream( - CloudWindmillServiceV1Alpha1Grpc.CloudWindmillServiceV1Alpha1Stub stub, + Function< + StreamObserver, + StreamObserver> + startGetWorkRpcFn, GetWorkRequest request, BackOff backoff, StreamObserverFactory streamObserverFactory, @@ -67,14 +70,7 @@ private GrpcGetWorkStream( ThrottleTimer getWorkThrottleTimer, WorkItemReceiver receiver) { super( - responseObserver -> - stub.withDeadlineAfter( - AbstractWindmillStream.DEFAULT_STREAM_RPC_DEADLINE_SECONDS, TimeUnit.SECONDS) - .getWorkStream(responseObserver), - backoff, - streamObserverFactory, - streamRegistry, - logEveryNStreamFailures); + startGetWorkRpcFn, backoff, streamObserverFactory, streamRegistry, logEveryNStreamFailures); this.request = request; this.getWorkThrottleTimer = getWorkThrottleTimer; this.receiver = receiver; @@ -84,7 +80,10 @@ private GrpcGetWorkStream( } static GrpcGetWorkStream create( - CloudWindmillServiceV1Alpha1Grpc.CloudWindmillServiceV1Alpha1Stub stub, + Function< + StreamObserver, + StreamObserver> + startGetWorkRpcFn, GetWorkRequest request, BackOff backoff, StreamObserverFactory streamObserverFactory, @@ -94,7 +93,7 @@ static GrpcGetWorkStream create( WorkItemReceiver receiver) { GrpcGetWorkStream getWorkStream = new GrpcGetWorkStream( - stub, + startGetWorkRpcFn, request, backoff, streamObserverFactory, diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/grpcclient/GrpcGetWorkerMetadataStream.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/grpcclient/GrpcGetWorkerMetadataStream.java new file mode 100644 index 0000000000000..427fd412ec7f4 --- /dev/null +++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/grpcclient/GrpcGetWorkerMetadataStream.java @@ -0,0 +1,170 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.runners.dataflow.worker.windmill.grpcclient; + +import com.google.errorprone.annotations.concurrent.GuardedBy; +import java.io.PrintWriter; +import java.util.Optional; +import java.util.Set; +import java.util.function.Consumer; +import java.util.function.Function; +import org.apache.beam.runners.dataflow.worker.windmill.AbstractWindmillStream; +import org.apache.beam.runners.dataflow.worker.windmill.StreamObserverFactory; +import org.apache.beam.runners.dataflow.worker.windmill.Windmill.JobHeader; +import org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkerMetadataRequest; +import org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkerMetadataResponse; +import org.apache.beam.runners.dataflow.worker.windmill.WindmillEndpoints; +import org.apache.beam.runners.dataflow.worker.windmill.WindmillStream.GetWorkerMetadataStream; +import org.apache.beam.sdk.util.BackOff; +import org.apache.beam.vendor.grpc.v1p54p0.io.grpc.stub.StreamObserver; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +final class GrpcGetWorkerMetadataStream + extends AbstractWindmillStream + implements GetWorkerMetadataStream { + private static final Logger LOG = LoggerFactory.getLogger(GrpcGetWorkerMetadataStream.class); + private static final WorkerMetadataRequest HEALTH_CHECK_REQUEST = + WorkerMetadataRequest.getDefaultInstance(); + private final WorkerMetadataRequest workerMetadataRequest; + private final ThrottleTimer getWorkerMetadataThrottleTimer; + private final Consumer serverMappingConsumer; + private final Object metadataLock; + + @GuardedBy("metadataLock") + private long metadataVersion; + + @GuardedBy("metadataLock") + private WorkerMetadataResponse latestResponse; + + private GrpcGetWorkerMetadataStream( + Function, StreamObserver> + startGetWorkerMetadataRpcFn, + BackOff backoff, + StreamObserverFactory streamObserverFactory, + Set> streamRegistry, + int logEveryNStreamFailures, + JobHeader jobHeader, + long metadataVersion, + ThrottleTimer getWorkerMetadataThrottleTimer, + Consumer serverMappingConsumer) { + super( + startGetWorkerMetadataRpcFn, + backoff, + streamObserverFactory, + streamRegistry, + logEveryNStreamFailures); + this.workerMetadataRequest = WorkerMetadataRequest.newBuilder().setHeader(jobHeader).build(); + this.metadataVersion = metadataVersion; + this.getWorkerMetadataThrottleTimer = getWorkerMetadataThrottleTimer; + this.serverMappingConsumer = serverMappingConsumer; + this.latestResponse = WorkerMetadataResponse.getDefaultInstance(); + this.metadataLock = new Object(); + } + + public static GrpcGetWorkerMetadataStream create( + Function, StreamObserver> + startGetWorkerMetadataRpcFn, + BackOff backoff, + StreamObserverFactory streamObserverFactory, + Set> streamRegistry, + int logEveryNStreamFailures, + JobHeader jobHeader, + int metadataVersion, + ThrottleTimer getWorkerMetadataThrottleTimer, + Consumer serverMappingUpdater) { + GrpcGetWorkerMetadataStream getWorkerMetadataStream = + new GrpcGetWorkerMetadataStream( + startGetWorkerMetadataRpcFn, + backoff, + streamObserverFactory, + streamRegistry, + logEveryNStreamFailures, + jobHeader, + metadataVersion, + getWorkerMetadataThrottleTimer, + serverMappingUpdater); + getWorkerMetadataStream.startStream(); + return getWorkerMetadataStream; + } + + /** + * Each instance of {@link AbstractWindmillStream} owns its own responseObserver that calls + * onResponse(). + */ + @Override + protected void onResponse(WorkerMetadataResponse response) { + extractWindmillEndpointsFrom(response).ifPresent(serverMappingConsumer); + } + + /** + * Acquires the {@link #metadataLock} Returns {@link Optional} if the + * metadataVersion in the response is not stale (older or equal to {@link #metadataVersion}), else + * returns empty {@link Optional}. + */ + private Optional extractWindmillEndpointsFrom( + WorkerMetadataResponse response) { + synchronized (metadataLock) { + if (response.getMetadataVersion() > this.metadataVersion) { + this.metadataVersion = response.getMetadataVersion(); + this.latestResponse = response; + return Optional.of(WindmillEndpoints.from(response)); + } else { + // If the currentMetadataVersion is greater than or equal to one in the response, the + // response data is stale, and we do not want to do anything. + LOG.info( + "Received WorkerMetadataResponse={}; Received metadata version={}; Current metadata version={}. " + + "Skipping update because received stale metadata", + response, + response.getMetadataVersion(), + this.metadataVersion); + } + } + + return Optional.empty(); + } + + @Override + protected synchronized void onNewStream() { + send(workerMetadataRequest); + } + + @Override + protected boolean hasPendingRequests() { + return false; + } + + @Override + protected void startThrottleTimer() { + getWorkerMetadataThrottleTimer.start(); + } + + @Override + protected void sendHealthCheck() { + send(HEALTH_CHECK_REQUEST); + } + + @Override + protected void appendSpecificHtml(PrintWriter writer) { + synchronized (metadataLock) { + writer.format( + "GetWorkerMetadataStream: version=[%d] , job_header=[%s], latest_response=[%s]", + this.metadataVersion, workerMetadataRequest.getHeader(), this.latestResponse); + } + } +} diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/grpcclient/GrpcWindmillServer.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/grpcclient/GrpcWindmillServer.java index e8745e265eea8..19cb90297df5b 100644 --- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/grpcclient/GrpcWindmillServer.java +++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/grpcclient/GrpcWindmillServer.java @@ -107,7 +107,6 @@ public final class GrpcWindmillServer extends WindmillServerStub { private final ThrottleTimer commitWorkThrottleTimer; private final Random rand; private final Set> streamRegistry; - private ImmutableSet endpoints; private int logEveryNStreamFailures; private Duration maxBackoff = MAX_BACKOFF; @@ -301,14 +300,21 @@ private Channel remoteChannel(HostAndPort endpoint) throws IOException { .build(); } + /** + * Stubs returned from this method do not (and should not) have {@link + * org.apache.beam.vendor.grpc.v1p54p0.io.grpc.Deadline}(s) set since they represent an absolute + * point in time. {@link org.apache.beam.vendor.grpc.v1p54p0.io.grpc.Deadline}(s) should not be + * treated as a timeout which represents a relative point in time. + * + * @see Official gRPC deadline documentation for more + * details. + */ private synchronized CloudWindmillServiceV1Alpha1Grpc.CloudWindmillServiceV1Alpha1Stub stub() { if (stubList.isEmpty()) { throw new RuntimeException("windmillServiceEndpoint has not been set"); } - if (stubList.size() == 1) { - return stubList.get(0); - } - return stubList.get(rand.nextInt(stubList.size())); + + return stubList.size() == 1 ? stubList.get(0) : stubList.get(rand.nextInt(stubList.size())); } @Override @@ -398,7 +404,13 @@ public GetWorkStream getWorkStream(GetWorkRequest request, WorkItemReceiver rece .build(); return GrpcGetWorkStream.create( - stub(), + responseObserver -> + stub() + // Deadlines are absolute points in time, so generate a new one everytime this + // function is called. + .withDeadlineAfter( + AbstractWindmillStream.DEFAULT_STREAM_RPC_DEADLINE_SECONDS, TimeUnit.SECONDS) + .getWorkStream(responseObserver), getWorkRequest, grpcBackoff(), newStreamObserverFactory(), @@ -411,7 +423,13 @@ public GetWorkStream getWorkStream(GetWorkRequest request, WorkItemReceiver rece @Override public GetDataStream getDataStream() { return GrpcGetDataStream.create( - stub(), + responseObserver -> + stub() + // Deadlines are absolute points in time, so generate a new one everytime this + // function is called. + .withDeadlineAfter( + AbstractWindmillStream.DEFAULT_STREAM_RPC_DEADLINE_SECONDS, TimeUnit.SECONDS) + .getDataStream(responseObserver), grpcBackoff(), newStreamObserverFactory(), streamRegistry, @@ -425,7 +443,13 @@ public GetDataStream getDataStream() { @Override public CommitWorkStream commitWorkStream() { return GrpcCommitWorkStream.create( - stub(), + responseObserver -> + stub() + // Deadlines are absolute points in time, so generate a new one everytime this + // function is called. + .withDeadlineAfter( + AbstractWindmillStream.DEFAULT_STREAM_RPC_DEADLINE_SECONDS, TimeUnit.SECONDS) + .commitWorkStream(responseObserver), grpcBackoff(), newStreamObserverFactory(), streamRegistry, diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/windmill/grpcclient/GrpcGetWorkerMetadataStreamTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/windmill/grpcclient/GrpcGetWorkerMetadataStreamTest.java new file mode 100644 index 0000000000000..45ed3381a8bfe --- /dev/null +++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/windmill/grpcclient/GrpcGetWorkerMetadataStreamTest.java @@ -0,0 +1,328 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.runners.dataflow.worker.windmill.grpcclient; + +import static com.google.common.truth.Truth.assertThat; +import static org.apache.beam.runners.dataflow.worker.windmill.AbstractWindmillStream.DEFAULT_STREAM_RPC_DEADLINE_SECONDS; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyNoMoreInteractions; + +import java.io.IOException; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Consumer; +import java.util.stream.Collectors; +import javax.annotation.Nullable; +import org.apache.beam.runners.dataflow.worker.windmill.AbstractWindmillStream; +import org.apache.beam.runners.dataflow.worker.windmill.CloudWindmillServiceV1Alpha1Grpc; +import org.apache.beam.runners.dataflow.worker.windmill.StreamObserverFactory; +import org.apache.beam.runners.dataflow.worker.windmill.Windmill.JobHeader; +import org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkerMetadataRequest; +import org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkerMetadataResponse; +import org.apache.beam.runners.dataflow.worker.windmill.WindmillEndpoints; +import org.apache.beam.sdk.util.FluentBackoff; +import org.apache.beam.vendor.grpc.v1p54p0.io.grpc.ManagedChannel; +import org.apache.beam.vendor.grpc.v1p54p0.io.grpc.Server; +import org.apache.beam.vendor.grpc.v1p54p0.io.grpc.inprocess.InProcessChannelBuilder; +import org.apache.beam.vendor.grpc.v1p54p0.io.grpc.inprocess.InProcessServerBuilder; +import org.apache.beam.vendor.grpc.v1p54p0.io.grpc.stub.StreamObserver; +import org.apache.beam.vendor.grpc.v1p54p0.io.grpc.testing.GrpcCleanupRule; +import org.apache.beam.vendor.grpc.v1p54p0.io.grpc.util.MutableHandlerRegistry; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Lists; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Maps; +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; +import org.mockito.Mockito; + +@RunWith(JUnit4.class) +public class GrpcGetWorkerMetadataStreamTest { + private static final String IPV6_ADDRESS_1 = "2001:db8:0000:bac5:0000:0000:fed0:81a2"; + private static final String IPV6_ADDRESS_2 = "2001:db8:0000:bac5:0000:0000:fed0:82a3"; + private static final List DIRECT_PATH_ENDPOINTS = + Lists.newArrayList( + WorkerMetadataResponse.Endpoint.newBuilder() + .setDirectEndpoint(IPV6_ADDRESS_1) + .setWorkerToken("worker_token") + .build()); + private static final Map GLOBAL_DATA_ENDPOINTS = + Maps.newHashMap(); + private static final JobHeader TEST_JOB_HEADER = + JobHeader.newBuilder() + .setJobId("test_job") + .setWorkerId("test_worker") + .setProjectId("test_project") + .build(); + private static final String FAKE_SERVER_NAME = "Fake server for GrpcGetWorkerMetadataStreamTest"; + @Rule public final GrpcCleanupRule grpcCleanup = new GrpcCleanupRule(); + private final MutableHandlerRegistry serviceRegistry = new MutableHandlerRegistry(); + private final Set> streamRegistry = new HashSet<>(); + private ManagedChannel inProcessChannel; + private GrpcGetWorkerMetadataStream stream; + + private GrpcGetWorkerMetadataStream getWorkerMetadataTestStream( + GetWorkerMetadataTestStub getWorkerMetadataTestStub, + int metadataVersion, + Consumer endpointsConsumer) { + serviceRegistry.addService(getWorkerMetadataTestStub); + return GrpcGetWorkerMetadataStream.create( + responseObserver -> + CloudWindmillServiceV1Alpha1Grpc.newStub(inProcessChannel) + .getWorkerMetadataStream(responseObserver), + FluentBackoff.DEFAULT.backoff(), + StreamObserverFactory.direct(DEFAULT_STREAM_RPC_DEADLINE_SECONDS * 2, 1), + streamRegistry, + 1, // logEveryNStreamFailures + TEST_JOB_HEADER, + metadataVersion, + new ThrottleTimer(), + endpointsConsumer); + } + + @Before + public void setUp() throws IOException { + Server server = + InProcessServerBuilder.forName(FAKE_SERVER_NAME) + .fallbackHandlerRegistry(serviceRegistry) + .directExecutor() + .build() + .start(); + + inProcessChannel = + grpcCleanup.register( + InProcessChannelBuilder.forName(FAKE_SERVER_NAME).directExecutor().build()); + grpcCleanup.register(server); + grpcCleanup.register(inProcessChannel); + GLOBAL_DATA_ENDPOINTS.put( + "global_data", + WorkerMetadataResponse.Endpoint.newBuilder() + .setDirectEndpoint(IPV6_ADDRESS_1) + .setWorkerToken("worker_token") + .build()); + } + + @After + public void cleanUp() { + inProcessChannel.shutdownNow(); + } + + @Test + public void testGetWorkerMetadata() { + WorkerMetadataResponse mockResponse = + WorkerMetadataResponse.newBuilder() + .setMetadataVersion(1) + .addAllWorkEndpoints(DIRECT_PATH_ENDPOINTS) + .putAllGlobalDataEndpoints(GLOBAL_DATA_ENDPOINTS) + .build(); + TestWindmillEndpointsConsumer testWindmillEndpointsConsumer = + new TestWindmillEndpointsConsumer(); + GetWorkerMetadataTestStub testStub = + new GetWorkerMetadataTestStub(new TestGetWorkMetadataRequestObserver()); + int metadataVersion = -1; + stream = getWorkerMetadataTestStream(testStub, metadataVersion, testWindmillEndpointsConsumer); + testStub.injectWorkerMetadata(mockResponse); + + assertThat(testWindmillEndpointsConsumer.globalDataEndpoints.keySet()) + .containsExactlyElementsIn(GLOBAL_DATA_ENDPOINTS.keySet()); + assertThat(testWindmillEndpointsConsumer.windmillEndpoints) + .containsExactlyElementsIn( + DIRECT_PATH_ENDPOINTS.stream() + .map(WindmillEndpoints.Endpoint::from) + .collect(Collectors.toList())); + } + + @Test + public void testGetWorkerMetadata_consumesSubsequentResponseMetadata() { + WorkerMetadataResponse initialResponse = + WorkerMetadataResponse.newBuilder() + .setMetadataVersion(1) + .addAllWorkEndpoints(DIRECT_PATH_ENDPOINTS) + .putAllGlobalDataEndpoints(GLOBAL_DATA_ENDPOINTS) + .build(); + TestWindmillEndpointsConsumer testWindmillEndpointsConsumer = + Mockito.spy(new TestWindmillEndpointsConsumer()); + + GetWorkerMetadataTestStub testStub = + new GetWorkerMetadataTestStub(new TestGetWorkMetadataRequestObserver()); + int metadataVersion = 0; + stream = getWorkerMetadataTestStream(testStub, metadataVersion, testWindmillEndpointsConsumer); + testStub.injectWorkerMetadata(initialResponse); + + List newDirectPathEndpoints = + Lists.newArrayList( + WorkerMetadataResponse.Endpoint.newBuilder().setDirectEndpoint(IPV6_ADDRESS_2).build()); + Map newGlobalDataEndpoints = new HashMap<>(); + newGlobalDataEndpoints.put( + "new_global_data", + WorkerMetadataResponse.Endpoint.newBuilder().setDirectEndpoint(IPV6_ADDRESS_2).build()); + + WorkerMetadataResponse newWorkMetadataResponse = + WorkerMetadataResponse.newBuilder() + .setMetadataVersion(initialResponse.getMetadataVersion() + 1) + .addAllWorkEndpoints(newDirectPathEndpoints) + .putAllGlobalDataEndpoints(newGlobalDataEndpoints) + .build(); + + testStub.injectWorkerMetadata(newWorkMetadataResponse); + + assertThat(newGlobalDataEndpoints.keySet()) + .containsExactlyElementsIn(testWindmillEndpointsConsumer.globalDataEndpoints.keySet()); + assertThat(testWindmillEndpointsConsumer.windmillEndpoints) + .containsExactlyElementsIn( + newDirectPathEndpoints.stream() + .map(WindmillEndpoints.Endpoint::from) + .collect(Collectors.toList())); + } + + @Test + public void testGetWorkerMetadata_doesNotConsumeResponseIfMetadataStale() { + WorkerMetadataResponse freshEndpoints = + WorkerMetadataResponse.newBuilder() + .setMetadataVersion(2) + .addAllWorkEndpoints(DIRECT_PATH_ENDPOINTS) + .putAllGlobalDataEndpoints(GLOBAL_DATA_ENDPOINTS) + .build(); + + TestWindmillEndpointsConsumer testWindmillEndpointsConsumer = + Mockito.spy(new TestWindmillEndpointsConsumer()); + GetWorkerMetadataTestStub testStub = + new GetWorkerMetadataTestStub(new TestGetWorkMetadataRequestObserver()); + int metadataVersion = 0; + stream = getWorkerMetadataTestStream(testStub, metadataVersion, testWindmillEndpointsConsumer); + testStub.injectWorkerMetadata(freshEndpoints); + + List staleDirectPathEndpoints = + Lists.newArrayList( + WorkerMetadataResponse.Endpoint.newBuilder() + .setDirectEndpoint("staleWindmillEndpoint") + .build()); + Map staleGlobalDataEndpoints = new HashMap<>(); + staleGlobalDataEndpoints.put( + "stale_global_data", + WorkerMetadataResponse.Endpoint.newBuilder().setDirectEndpoint("staleGlobalData").build()); + + testStub.injectWorkerMetadata( + WorkerMetadataResponse.newBuilder() + .setMetadataVersion(1) + .addAllWorkEndpoints(staleDirectPathEndpoints) + .putAllGlobalDataEndpoints(staleGlobalDataEndpoints) + .build()); + + // Should have ignored the stale update and only used initial. + verify(testWindmillEndpointsConsumer).accept(WindmillEndpoints.from(freshEndpoints)); + verifyNoMoreInteractions(testWindmillEndpointsConsumer); + } + + @Test + public void testGetWorkerMetadata_correctlyAddsAndRemovesStreamFromRegistry() { + GetWorkerMetadataTestStub testStub = + new GetWorkerMetadataTestStub(new TestGetWorkMetadataRequestObserver()); + stream = getWorkerMetadataTestStream(testStub, 0, new TestWindmillEndpointsConsumer()); + testStub.injectWorkerMetadata( + WorkerMetadataResponse.newBuilder() + .setMetadataVersion(1) + .addAllWorkEndpoints(DIRECT_PATH_ENDPOINTS) + .putAllGlobalDataEndpoints(GLOBAL_DATA_ENDPOINTS) + .build()); + + assertTrue(streamRegistry.contains(stream)); + stream.close(); + assertFalse(streamRegistry.contains(stream)); + } + + @Test + public void testSendHealthCheck() { + TestGetWorkMetadataRequestObserver requestObserver = + Mockito.spy(new TestGetWorkMetadataRequestObserver()); + GetWorkerMetadataTestStub testStub = new GetWorkerMetadataTestStub(requestObserver); + stream = getWorkerMetadataTestStream(testStub, 0, new TestWindmillEndpointsConsumer()); + stream.sendHealthCheck(); + + verify(requestObserver).onNext(WorkerMetadataRequest.getDefaultInstance()); + } + + private static class GetWorkerMetadataTestStub + extends CloudWindmillServiceV1Alpha1Grpc.CloudWindmillServiceV1Alpha1ImplBase { + private final TestGetWorkMetadataRequestObserver requestObserver; + private @Nullable StreamObserver responseObserver; + + private GetWorkerMetadataTestStub(TestGetWorkMetadataRequestObserver requestObserver) { + this.requestObserver = requestObserver; + } + + @Override + public StreamObserver getWorkerMetadataStream( + StreamObserver responseObserver) { + if (this.responseObserver == null) { + this.responseObserver = responseObserver; + requestObserver.responseObserver = this.responseObserver; + } + + return requestObserver; + } + + private void injectWorkerMetadata(WorkerMetadataResponse response) { + if (responseObserver != null) { + responseObserver.onNext(response); + } + } + } + + @SuppressWarnings("UnusedVariable") + private static class TestGetWorkMetadataRequestObserver + implements StreamObserver { + private @Nullable StreamObserver responseObserver; + + @Override + public void onNext(WorkerMetadataRequest workerMetadataRequest) {} + + @Override + public void onError(Throwable throwable) {} + + @Override + public void onCompleted() { + responseObserver.onCompleted(); + } + } + + private static class TestWindmillEndpointsConsumer implements Consumer { + private final Map globalDataEndpoints; + private final Set windmillEndpoints; + + private TestWindmillEndpointsConsumer() { + this.globalDataEndpoints = new HashMap<>(); + this.windmillEndpoints = new HashSet<>(); + } + + @Override + public void accept(WindmillEndpoints windmillEndpoints) { + this.globalDataEndpoints.clear(); + this.windmillEndpoints.clear(); + this.globalDataEndpoints.putAll(windmillEndpoints.globalDataEndpoints()); + this.windmillEndpoints.addAll(windmillEndpoints.windmillEndpoints()); + } + } +} diff --git a/runners/google-cloud-dataflow-java/worker/windmill/src/main/proto/windmill.proto b/runners/google-cloud-dataflow-java/worker/windmill/src/main/proto/windmill.proto index f66b2bed48c65..1759185911d49 100644 --- a/runners/google-cloud-dataflow-java/worker/windmill/src/main/proto/windmill.proto +++ b/runners/google-cloud-dataflow-java/worker/windmill/src/main/proto/windmill.proto @@ -746,6 +746,8 @@ message WorkerMetadataRequest { optional JobHeader header = 1; } +// Converted into org.apache.beam.runners.dataflow.worker.windmill.WindmillEndpoints +// used to connect to Streaming Engine. message WorkerMetadataResponse { // The metadata version increases with every modification. Within a single // stream it will always be increasing. The version may be used across streams @@ -758,7 +760,9 @@ message WorkerMetadataResponse { // CommitWorkStream. Each response on this stream replaces the previous, and // connections to endpoints that are no longer present should be closed. message Endpoint { - optional string endpoint = 1; + // IPv6 address of a streaming engine windmill worker. + optional string direct_endpoint = 1; + optional string worker_token = 2; } repeated Endpoint work_endpoints = 2; @@ -766,10 +770,7 @@ message WorkerMetadataResponse { // calls to retrieve that global data. map global_data_endpoints = 3; - // DirectPath endpoints to be used by user workers for streaming engine jobs. - // DirectPath endpoints here are virtual IPv6 addresses of the windmill - // workers. - repeated Endpoint direct_path_endpoints = 4; + reserved 4; } service WindmillAppliance { diff --git a/runners/google-cloud-dataflow-java/worker/windmill/src/main/proto/windmill_service.proto b/runners/google-cloud-dataflow-java/worker/windmill/src/main/proto/windmill_service.proto index 803766d1a4646..d9183e54e0dd3 100644 --- a/runners/google-cloud-dataflow-java/worker/windmill/src/main/proto/windmill_service.proto +++ b/runners/google-cloud-dataflow-java/worker/windmill/src/main/proto/windmill_service.proto @@ -34,7 +34,7 @@ service CloudWindmillServiceV1Alpha1 { returns (stream .windmill.StreamingGetWorkResponseChunk); // Gets worker metadata. Response is a stream. - rpc GetWorkerMetadataStream(.windmill.WorkerMetadataRequest) + rpc GetWorkerMetadataStream(stream .windmill.WorkerMetadataRequest) returns (stream .windmill.WorkerMetadataResponse); // Gets data from Windmill. diff --git a/sdks/go.mod b/sdks/go.mod index 8435590ad4771..5e91aea021f8d 100644 --- a/sdks/go.mod +++ b/sdks/go.mod @@ -23,13 +23,13 @@ module github.com/apache/beam/sdks/v2 go 1.20 require ( - cloud.google.com/go/bigquery v1.54.0 + cloud.google.com/go/bigquery v1.55.0 cloud.google.com/go/bigtable v1.19.0 - cloud.google.com/go/datastore v1.13.0 + cloud.google.com/go/datastore v1.14.0 cloud.google.com/go/profiler v0.3.1 cloud.google.com/go/pubsub v1.33.0 cloud.google.com/go/spanner v1.49.0 - cloud.google.com/go/storage v1.32.0 + cloud.google.com/go/storage v1.33.0 github.com/aws/aws-sdk-go-v2 v1.21.0 github.com/aws/aws-sdk-go-v2/config v1.18.39 github.com/aws/aws-sdk-go-v2/credentials v1.13.37 @@ -57,9 +57,9 @@ require ( golang.org/x/sync v0.3.0 golang.org/x/sys v0.12.0 golang.org/x/text v0.13.0 - google.golang.org/api v0.138.0 - google.golang.org/genproto v0.0.0-20230803162519-f966b187b2e5 - google.golang.org/grpc v1.57.0 + google.golang.org/api v0.140.0 + google.golang.org/genproto v0.0.0-20230821184602-ccc8af3d0e93 + google.golang.org/grpc v1.58.0 google.golang.org/protobuf v1.31.0 gopkg.in/retry.v1 v1.0.3 gopkg.in/yaml.v2 v2.4.0 @@ -74,7 +74,7 @@ require ( require dario.cat/mergo v1.0.0 // indirect require ( - cloud.google.com/go v0.110.6 // indirect + cloud.google.com/go v0.110.7 // indirect cloud.google.com/go/compute v1.23.0 // indirect cloud.google.com/go/compute/metadata v0.2.3 // indirect cloud.google.com/go/iam v1.1.1 // indirect @@ -109,8 +109,8 @@ require ( github.com/docker/distribution v2.8.2+incompatible // indirect github.com/docker/docker v24.0.5+incompatible // indirect; but required to resolve issue docker has with go1.20 github.com/docker/go-units v0.5.0 // indirect - github.com/envoyproxy/go-control-plane v0.11.1-0.20230524094728-9239064ad72f // indirect - github.com/envoyproxy/protoc-gen-validate v0.10.1 // indirect + github.com/envoyproxy/go-control-plane v0.11.1 // indirect + github.com/envoyproxy/protoc-gen-validate v1.0.2 // indirect github.com/felixge/httpsnoop v1.0.2 // indirect github.com/goccy/go-json v0.9.11 // indirect github.com/gogo/protobuf v1.3.2 // indirect @@ -119,7 +119,7 @@ require ( github.com/google/flatbuffers v2.0.8+incompatible // indirect github.com/google/pprof v0.0.0-20221103000818-d260c55eee4c // indirect github.com/google/renameio/v2 v2.0.0 // indirect - github.com/google/s2a-go v0.1.5 // indirect + github.com/google/s2a-go v0.1.7 // indirect github.com/googleapis/enterprise-certificate-proxy v0.2.5 // indirect github.com/googleapis/gax-go/v2 v2.12.0 // indirect github.com/gorilla/handlers v1.5.1 // indirect @@ -156,9 +156,9 @@ require ( go.opencensus.io v0.24.0 // indirect golang.org/x/crypto v0.13.0 // indirect golang.org/x/mod v0.11.0 // indirect - golang.org/x/tools v0.9.1 // indirect + golang.org/x/tools v0.10.0 // indirect golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect google.golang.org/appengine v1.6.7 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20230803162519-f966b187b2e5 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20230807174057-1744710a1577 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20230911183012-2d3300fd4832 // indirect ) diff --git a/sdks/go.sum b/sdks/go.sum index 49534a1f99381..c30891294dbd6 100644 --- a/sdks/go.sum +++ b/sdks/go.sum @@ -8,13 +8,13 @@ cloud.google.com/go v0.46.3/go.mod h1:a6bKKbmY7er1mI7TEI4lsAkts/mkhTSZK8w33B4RAg cloud.google.com/go v0.50.0/go.mod h1:r9sluTvynVuxRIOHXQEHMFffphuXHOMZMycpNR5e6To= cloud.google.com/go v0.52.0/go.mod h1:pXajvRH/6o3+F9jDHZWQ5PbGhn+o8w9qiu/CffaVdO4= cloud.google.com/go v0.53.0/go.mod h1:fp/UouUEsRkN6ryDKNW/Upv/JBKnv6WDthjR6+vze6M= -cloud.google.com/go v0.110.6 h1:8uYAkj3YHTP/1iwReuHPxLSbdcyc+dSBbzFMrVwDR6Q= -cloud.google.com/go v0.110.6/go.mod h1:+EYjdK8e5RME/VY/qLCAtuyALQ9q67dvuum8i+H5xsI= +cloud.google.com/go v0.110.7 h1:rJyC7nWRg2jWGZ4wSJ5nY65GTdYJkg0cd/uXb+ACI6o= +cloud.google.com/go v0.110.7/go.mod h1:+EYjdK8e5RME/VY/qLCAtuyALQ9q67dvuum8i+H5xsI= cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE= cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc= -cloud.google.com/go/bigquery v1.54.0 h1:ify6s7sy+kQuAimRnVTrPUzaeY0+X5GEsKt2C5CiA8w= -cloud.google.com/go/bigquery v1.54.0/go.mod h1:9Y5I3PN9kQWuid6183JFhOGOW3GcirA5LpsKCUn+2ec= +cloud.google.com/go/bigquery v1.55.0 h1:hs44Xxov3XLWQiCx2J8lK5U/ihLqnpm4RVVl5fdtLLI= +cloud.google.com/go/bigquery v1.55.0/go.mod h1:9Y5I3PN9kQWuid6183JFhOGOW3GcirA5LpsKCUn+2ec= cloud.google.com/go/bigtable v1.19.0 h1:wiq9LT0kukfInzvy1joMDijCw/OD1UChpSbORXYn0LI= cloud.google.com/go/bigtable v1.19.0/go.mod h1:xl5kPa8PTkJjdBxg6qdGH88464nNqmbISHSRU+D2yFE= cloud.google.com/go/compute v1.23.0 h1:tP41Zoavr8ptEqaW6j+LQOnyBBhO7OkOMAGrgLopTwY= @@ -24,8 +24,8 @@ cloud.google.com/go/compute/metadata v0.2.3/go.mod h1:VAV5nSsACxMJvgaAuX6Pk2Aawl cloud.google.com/go/datacatalog v1.16.0 h1:qVeQcw1Cz93/cGu2E7TYUPh8Lz5dn5Ws2siIuQ17Vng= cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk= -cloud.google.com/go/datastore v1.13.0 h1:ktbC66bOQB3HJPQe8qNI1/aiQ77PMu7hD4mzE6uxe3w= -cloud.google.com/go/datastore v1.13.0/go.mod h1:KjdB88W897MRITkvWWJrg2OUtrR5XVj1EoLgSp6/N70= +cloud.google.com/go/datastore v1.14.0 h1:Mq0ApTRdLW3/dyiw+DkjTk0+iGIUvkbzaC8sfPwWTH4= +cloud.google.com/go/datastore v1.14.0/go.mod h1:GAeStMBIt9bPS7jMJA85kgkpsMkvseWWXiaHya9Jes8= cloud.google.com/go/iam v1.1.1 h1:lW7fzj15aVIXYHREOqjRBV9PsH0Z6u8Y46a1YGvQP4Y= cloud.google.com/go/iam v1.1.1/go.mod h1:A5avdyVL2tCppe4unb0951eI9jreack+RJ0/d+KUZOU= cloud.google.com/go/kms v1.15.0 h1:xYl5WEaSekKYN5gGRyhjvZKM22GVBBCzegGNVPy+aIs= @@ -43,8 +43,8 @@ cloud.google.com/go/spanner v1.49.0/go.mod h1:eGj9mQGK8+hkgSVbHNQ06pQ4oS+cyc4tXX cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw= cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0ZeosJ0Rtdos= cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk= -cloud.google.com/go/storage v1.32.0 h1:5w6DxEGOnktmJHarxAOUywxVW9lbNWIzlzzUltG/3+o= -cloud.google.com/go/storage v1.32.0/go.mod h1:Hhh/dogNRGca7IWv1RC2YqEn0c0G77ctA/OxflYkiD8= +cloud.google.com/go/storage v1.33.0 h1:PVrDOkIC8qQVa1P3SXGpQvfuJhN2LHOoyZvWs8D2X5M= +cloud.google.com/go/storage v1.33.0/go.mod h1:Hhh/dogNRGca7IWv1RC2YqEn0c0G77ctA/OxflYkiD8= dario.cat/mergo v1.0.0 h1:AGCNq9Evsj31mOgNPcLyXc+4PNABt905YmuqPYYpBWk= dario.cat/mergo v1.0.0/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk= dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= @@ -67,7 +67,6 @@ github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5 github.com/Microsoft/hcsshim v0.10.0-rc.8 h1:YSZVvlIIDD1UxQpJp0h+dnpLUw+TrY0cx8obKsp3bek= github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY= github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= -github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= github.com/apache/arrow/go/arrow v0.0.0-20200730104253-651201b0f516 h1:byKBBF2CKWBjjA4J1ZL2JXttJULvWSl50LegTyRZ728= github.com/apache/arrow/go/arrow v0.0.0-20200730104253-651201b0f516/go.mod h1:QNYViu/X0HXDHw7m3KXzWSVXIbfUvJqBFe6Gj8/pYA0= github.com/apache/arrow/go/v12 v12.0.0 h1:xtZE63VWl7qLdB0JObIXvvhGjoVNrQ9ciIHG2OK5cmc= @@ -136,7 +135,6 @@ github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyY github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/census-instrumentation/opencensus-proto v0.4.1 h1:iKLQ0xPNFxR/2hzXZMrBo8f1j86j5WHzznCCQxV/b8g= github.com/census-instrumentation/opencensus-proto v0.4.1/go.mod h1:4T9NM4+4Vw91VeyqjLS6ao50K5bOcLKN6Q42XnYaRYw= -github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/checkpoint-restore/go-criu/v5 v5.3.0/go.mod h1:E/eQpaFtUKGOOSEBZgmKAcn+zUUwWxqcaKZlF54wK8E= @@ -146,13 +144,9 @@ github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMn github.com/cilium/ebpf v0.7.0/go.mod h1:/oI2+1shJiTGAMgl6/RgJr36Eo1jzrRcAWbcXO2usCA= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= -github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= -github.com/cncf/udpa/go v0.0.0-20210930031921-04548b0d99d4/go.mod h1:6pvJx4me5XPnfI9Z40ddWsdw2W/uZgQLFXToKeRcDiI= github.com/cncf/udpa/go v0.0.0-20220112060539-c52dc94e7fbe h1:QQ3GSy+MqSHxm/d8nCtnAiZdYFd45cYZPs8vOOIYKfk= github.com/cncf/udpa/go v0.0.0-20220112060539-c52dc94e7fbe/go.mod h1:6pvJx4me5XPnfI9Z40ddWsdw2W/uZgQLFXToKeRcDiI= -github.com/cncf/xds/go v0.0.0-20210805033703-aa0b78936158/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20210922020428-25de7278fc84/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= -github.com/cncf/xds/go v0.0.0-20211011173535-cb28da3451f1/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4 h1:/inchEIKaYC1Akx+H+gqO04wryn5h75LSazbRlnya1k= github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/colinmarc/hdfs/v2 v2.1.1/go.mod h1:M3x+k8UKKmxtFu++uAZ0OtDU8jR3jnaZIAc6yK4Ue0c= @@ -185,13 +179,11 @@ github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+m github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= -github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= -github.com/envoyproxy/go-control-plane v0.9.10-0.20210907150352-cf90f659a021/go.mod h1:AFq3mo9L8Lqqiid3OhADV3RfLJnjiw63cSpi+fDTRC0= -github.com/envoyproxy/go-control-plane v0.11.1-0.20230524094728-9239064ad72f h1:7T++XKzy4xg7PKy+bM+Sa9/oe1OC88yz2hXQUISoXfA= -github.com/envoyproxy/go-control-plane v0.11.1-0.20230524094728-9239064ad72f/go.mod h1:sfYdkwUW4BA3PbKjySwjJy+O4Pu0h62rlqCMHNk+K+Q= +github.com/envoyproxy/go-control-plane v0.11.1 h1:wSUXTlLfiAQRWs2F+p+EKOY9rUyis1MyGqJ2DIk5HpM= +github.com/envoyproxy/go-control-plane v0.11.1/go.mod h1:uhMcXKCQMEJHiAb0w+YGefQLaTEw+YhGluxZkrTmD0g= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= -github.com/envoyproxy/protoc-gen-validate v0.10.1 h1:c0g45+xCJhdgFGw7a5QAfdS4byAbud7miNWJ1WwEVf8= -github.com/envoyproxy/protoc-gen-validate v0.10.1/go.mod h1:DRjgyB0I43LtJapqN6NiRwroiAU2PaFuvk/vjgh61ss= +github.com/envoyproxy/protoc-gen-validate v1.0.2 h1:QkIBuU5k+x7/QXPvPPnWXWlCdaBFApVqftFV6k087DA= +github.com/envoyproxy/protoc-gen-validate v1.0.2/go.mod h1:GpiZQP3dDbg4JouG/NNS7QWXpgx6x8QiMKdmN72jogE= github.com/felixge/httpsnoop v1.0.1/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/felixge/httpsnoop v1.0.2 h1:+nS9g82KMXccJ/wp0zyRW9ZBHFETmMGtkk+2CTTrW4o= github.com/felixge/httpsnoop v1.0.2/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= @@ -201,7 +193,6 @@ github.com/frankban/quicktest v1.11.3 h1:8sXhOn0uLys67V8EsXLc6eszDs8VXWxL3iRvebP github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k= github.com/fsouza/fake-gcs-server v1.47.4 h1:gfBhBxEra20/Om02cvcyL8EnekV8KDb01Yffjat6AKQ= github.com/fsouza/fake-gcs-server v1.47.4/go.mod h1:vqUZbI12uy9IkRQ54Q4p5AniQsSiUq8alO9Nv2egMmA= -github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= @@ -239,10 +230,8 @@ github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrU github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= -github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= -github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= @@ -282,8 +271,8 @@ github.com/google/pprof v0.0.0-20221103000818-d260c55eee4c/go.mod h1:dDKJzRmX4S3 github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/google/renameio/v2 v2.0.0 h1:UifI23ZTGY8Tt29JbYFiuyIU3eX+RNFtUwefq9qAhxg= github.com/google/renameio/v2 v2.0.0/go.mod h1:BtmJXm5YlszgC+TD4HOEEUFgkJP3nLxehU6hfe7jRt4= -github.com/google/s2a-go v0.1.5 h1:8IYp3w9nysqv3JH+NJgXJzGbDHzLOTj43BmSkp+O7qg= -github.com/google/s2a-go v0.1.5/go.mod h1:Ej+mSEMGRnqRzjc7VtF+jdBwYG5fuJfiZ8ELkjEwM0A= +github.com/google/s2a-go v0.1.7 h1:60BLSyTrOV4/haCDW4zb1guZItoSq8foHCXrAnjBo/o= +github.com/google/s2a-go v0.1.7/go.mod h1:50CgR4k1jNlWBu4UfS4AcfhVe1r6pdZPygJ3R8F0Qdw= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.2.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4= @@ -298,7 +287,6 @@ github.com/gorilla/handlers v1.5.1 h1:9lRY6j8DEeeBT10CvO9hGW0gmky0BprnvDI5vfhUHH github.com/gorilla/handlers v1.5.1/go.mod h1:t8XrUpc4KVXb7HGyJ4/cEnwQiaxrX/hz1Zv/4g96P1Q= github.com/gorilla/mux v1.8.0 h1:i40aqfkR1h2SlN9hojwV5ZA91wcXFOvkdNIeFDP5koI= github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= -github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/hashicorp/go-uuid v0.0.0-20180228145832-27454136f036/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= @@ -392,7 +380,6 @@ github.com/proullon/ramsql v0.1.2 h1:PTtsy2iml/CW3Lsopyr86dlIs7JyYEmfLrfYvQVXD2U github.com/proullon/ramsql v0.1.2/go.mod h1:CFGqeQHQpdRfWqYmWD3yXqPTEaHkF4zgXy1C6qDWc9E= github.com/rogpeppe/clock v0.0.0-20190514195947-2896927a307a h1:3QH7VyOaaiUHNrA9Se4YQIRkDTCw1EJls9xTUCaCeRM= github.com/rogpeppe/clock v0.0.0-20190514195947-2896927a307a/go.mod h1:4r5QyqhjIWCcK8DO4KMclc5Iknq5qVBAlbYYzAbUScQ= -github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= github.com/rs/xid v1.5.0 h1:mKX4bl4iPYJtEIxp6CYiUuLQ/8DYMoz0PUdtGgMFRVc= @@ -465,7 +452,6 @@ go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= -go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= golang.org/x/crypto v0.0.0-20180723164146-c126467f60eb/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= @@ -475,7 +461,6 @@ golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPh golang.org/x/crypto v0.0.0-20201002170205-7f63de1d35b0/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210513164829-c07d793c2f9a/go.mod h1:P+XmwS30IXTQdn5tA2iutPOUgjI07+tq3H3K9MVA1s8= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.0.0-20220314234659-1baeb1ce4c0b/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.13.0 h1:mvySKfSWJ+UKUii46M40LOvyWfN0s2U+46/jDd0e6Ck= golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc= @@ -531,7 +516,6 @@ golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200222125558-5a598a2470a0/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= @@ -578,7 +562,6 @@ golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200828194041-157a740278f4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -646,8 +629,8 @@ golang.org/x/tools v0.0.0-20200224181240-023911ca70b2/go.mod h1:TB2adYChydJhpapK golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= -golang.org/x/tools v0.9.1 h1:8WMNJAz3zrtPmnYC7ISf5dEn3MT0gY7jBJfw27yrrLo= -golang.org/x/tools v0.9.1/go.mod h1:owI94Op576fPu3cIGQeHs3joujW/2Oc6MtlxbF5dfNc= +golang.org/x/tools v0.10.0 h1:tvDr/iQoUqNdohiYm0LmmKcBk+q86lb9EprIUFhHHGg= +golang.org/x/tools v0.10.0/go.mod h1:UJwyiVBsOA2uwvK/e5OY3GTpDUJriEd+/YlqAwLPmyM= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -664,8 +647,8 @@ google.golang.org/api v0.14.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsb google.golang.org/api v0.15.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= google.golang.org/api v0.17.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= google.golang.org/api v0.18.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= -google.golang.org/api v0.138.0 h1:K/tVp05MxNVbHShRw9m7e9VJGdagNeTdMzqPH7AUqr0= -google.golang.org/api v0.138.0/go.mod h1:4xyob8CxC+0GChNBvEUAk8VBKNvYOTWM9T3v3UfRxuY= +google.golang.org/api v0.140.0 h1:CaXNdYOH5oQQI7l6iKTHHiMTdxZca4/02hRg2U8c2hM= +google.golang.org/api v0.140.0/go.mod h1:aGbCiFgtwb2P6badchFbSBUurV6oR5d50Af4iNJtDdI= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= @@ -690,14 +673,13 @@ google.golang.org/genproto v0.0.0-20200122232147-0452cf42e150/go.mod h1:n3cpQtvx google.golang.org/genproto v0.0.0-20200204135345-fa8e72b47b90/go.mod h1:GmwEX6Z4W5gMy59cAlVYjN9JhxgbQH6Gn+gFDQe2lzA= google.golang.org/genproto v0.0.0-20200212174721-66ed5ce911ce/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= -google.golang.org/genproto v0.0.0-20230803162519-f966b187b2e5 h1:L6iMMGrtzgHsWofoFcihmDEMYeDR9KN/ThbPWGrh++g= -google.golang.org/genproto v0.0.0-20230803162519-f966b187b2e5/go.mod h1:oH/ZOT02u4kWEp7oYBGYFFkCdKS/uYR9Z7+0/xuuFp8= +google.golang.org/genproto v0.0.0-20230821184602-ccc8af3d0e93 h1:zv6ieVm8jNcN33At1+APsRISkRgynuWUxUhv6G123jY= +google.golang.org/genproto v0.0.0-20230821184602-ccc8af3d0e93/go.mod h1:yZTlhN0tQnXo3h00fuXNCxJdLdIdnVFVBaRJ5LWBbw4= google.golang.org/genproto/googleapis/api v0.0.0-20230803162519-f966b187b2e5 h1:nIgk/EEq3/YlnmVVXVnm14rC2oxgs1o0ong4sD/rd44= google.golang.org/genproto/googleapis/api v0.0.0-20230803162519-f966b187b2e5/go.mod h1:5DZzOUPCLYL3mNkQ0ms0F3EuUNZ7py1Bqeq6sxzI7/Q= -google.golang.org/genproto/googleapis/rpc v0.0.0-20230807174057-1744710a1577 h1:wukfNtZmZUurLN/atp2hiIeTKn7QJWIQdHzqmsOnAOk= -google.golang.org/genproto/googleapis/rpc v0.0.0-20230807174057-1744710a1577/go.mod h1:+Bk1OCOj40wS2hwAMA+aCW9ypzm63QTBBHp6lQ3p+9M= +google.golang.org/genproto/googleapis/rpc v0.0.0-20230911183012-2d3300fd4832 h1:o4LtQxebKIJ4vkzyhtD2rfUNZ20Zf0ik5YVP5E7G7VE= +google.golang.org/genproto/googleapis/rpc v0.0.0-20230911183012-2d3300fd4832/go.mod h1:+Bk1OCOj40wS2hwAMA+aCW9ypzm63QTBBHp6lQ3p+9M= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= @@ -706,12 +688,9 @@ google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQ google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= -google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0= google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= -google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= -google.golang.org/grpc v1.45.0/go.mod h1:lN7owxKUQEqMfSyQikvvk5tf/6zMPsrK+ONuO11+0rQ= -google.golang.org/grpc v1.57.0 h1:kfzNeI/klCGD2YPMUlaGNT3pxvYfga7smW3Vth8Zsiw= -google.golang.org/grpc v1.57.0/go.mod h1:Sd+9RMTACXwmub0zcNY2c4arhtrbBYD1AUHI/dt16Mo= +google.golang.org/grpc v1.58.0 h1:32JY8YpPMSR45K+c3o6b8VL73V+rR8k+DeMIr4vRH8o= +google.golang.org/grpc v1.58.0/go.mod h1:tgX3ZQDlNJGU96V6yHh1T/JeoBQ2TXdr43YbYSsCJk0= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= @@ -741,7 +720,6 @@ gopkg.in/mgo.v2 v2.0.0-20180705113604-9856a29383ce/go.mod h1:yeKp02qBN3iKW1OzL3M gopkg.in/retry.v1 v1.0.3 h1:a9CArYczAVv6Qs6VGoLMio99GEs7kY9UzSF9+LD+iGs= gopkg.in/retry.v1 v1.0.3/go.mod h1:FJkXmWiMaAo7xB+xhvDF59zhfjDWyzmyAxiT4dB688g= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= diff --git a/sdks/go/container/tools/buffered_logging.go b/sdks/go/container/tools/buffered_logging.go index ef5e8310c3b68..445d19fabfdc8 100644 --- a/sdks/go/container/tools/buffered_logging.go +++ b/sdks/go/container/tools/buffered_logging.go @@ -17,8 +17,11 @@ package tools import ( "context" + "log" + "math" "os" "strings" + "time" ) const initialLogSize int = 255 @@ -27,14 +30,24 @@ const initialLogSize int = 255 // in place of stdout and stderr in bootloader subprocesses. Not intended for // Beam end users. type BufferedLogger struct { - logger *Logger - builder strings.Builder - logs []string + logger *Logger + builder strings.Builder + logs []string + lastFlush time.Time + flushInterval time.Duration + periodicFlushContext context.Context + now func() time.Time } // NewBufferedLogger returns a new BufferedLogger type by reference. func NewBufferedLogger(logger *Logger) *BufferedLogger { - return &BufferedLogger{logger: logger} + return &BufferedLogger{logger: logger, lastFlush: time.Now(), flushInterval: time.Duration(math.MaxInt64), periodicFlushContext: context.Background(), now: time.Now} +} + +// NewBufferedLoggerWithFlushInterval returns a new BufferedLogger type by reference. This type will +// flush logs periodically on Write() calls as well as when Flush*() functions are called. +func NewBufferedLoggerWithFlushInterval(ctx context.Context, logger *Logger, interval time.Duration) *BufferedLogger { + return &BufferedLogger{logger: logger, lastFlush: time.Now(), flushInterval: interval, periodicFlushContext: ctx, now: time.Now} } // Write implements the io.Writer interface, converting input to a string @@ -50,6 +63,9 @@ func (b *BufferedLogger) Write(p []byte) (int, error) { } b.logs = append(b.logs, b.builder.String()) b.builder.Reset() + if b.now().Sub(b.lastFlush) > b.flushInterval { + b.FlushAtDebug(b.periodicFlushContext) + } return n, err } @@ -63,6 +79,7 @@ func (b *BufferedLogger) FlushAtError(ctx context.Context) { b.logger.Errorf(ctx, message) } b.logs = nil + b.lastFlush = time.Now() } // FlushAtDebug flushes the contents of the buffer to the logging @@ -75,4 +92,15 @@ func (b *BufferedLogger) FlushAtDebug(ctx context.Context) { b.logger.Printf(ctx, message) } b.logs = nil + b.lastFlush = time.Now() +} + +// Prints directly to the logging service. If the logger is nil, prints directly to the +// console. Used for the container pre-build workflow. +func (b *BufferedLogger) Printf(ctx context.Context, format string, args ...any) { + if b.logger == nil { + log.Printf(format, args...) + return + } + b.logger.Printf(ctx, format, args...) } diff --git a/sdks/go/container/tools/buffered_logging_test.go b/sdks/go/container/tools/buffered_logging_test.go index 8feef7b413d3d..9f542d2d5ab6b 100644 --- a/sdks/go/container/tools/buffered_logging_test.go +++ b/sdks/go/container/tools/buffered_logging_test.go @@ -18,6 +18,7 @@ package tools import ( "context" "testing" + "time" fnpb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/fnexecution_v1" ) @@ -166,4 +167,75 @@ func TestBufferedLogger(t *testing.T) { } } }) + + t.Run("direct print", func(t *testing.T) { + catcher := &logCatcher{} + l := &Logger{client: catcher} + bl := NewBufferedLogger(l) + + bl.Printf(ctx, "foo %v", "bar") + + received := catcher.msgs[0].GetLogEntries()[0] + + if got, want := received.Message, "foo bar"; got != want { + t.Errorf("l.Printf(\"foo %%v\", \"bar\"): got message %q, want %q", got, want) + } + + if got, want := received.Severity, fnpb.LogEntry_Severity_DEBUG; got != want { + t.Errorf("l.Printf(\"foo %%v\", \"bar\"): got severity %v, want %v", got, want) + } + }) + + t.Run("debug flush at interval", func(t *testing.T) { + catcher := &logCatcher{} + l := &Logger{client: catcher} + interval := 5 * time.Second + bl := NewBufferedLoggerWithFlushInterval(context.Background(), l, interval) + + startTime := time.Now() + bl.now = func() time.Time { return startTime } + + messages := []string{"foo", "bar"} + + for i, message := range messages { + if i > 1 { + bl.now = func() time.Time { return startTime.Add(6 * time.Second) } + } + messBytes := []byte(message) + n, err := bl.Write(messBytes) + + if err != nil { + t.Errorf("got error %v", err) + } + if got, want := n, len(messBytes); got != want { + t.Errorf("got %d bytes written, want %d", got, want) + } + } + + lastMessage := "baz" + bl.now = func() time.Time { return startTime.Add(6 * time.Second) } + messBytes := []byte(lastMessage) + n, err := bl.Write(messBytes) + + if err != nil { + t.Errorf("got error %v", err) + } + if got, want := n, len(messBytes); got != want { + t.Errorf("got %d bytes written, want %d", got, want) + } + + // Type should have auto-flushed at debug after the third message + received := catcher.msgs[0].GetLogEntries() + messages = append(messages, lastMessage) + + for i, message := range received { + if got, want := message.Message, messages[i]; got != want { + t.Errorf("got message %q, want %q", got, want) + } + + if got, want := message.Severity, fnpb.LogEntry_Severity_DEBUG; got != want { + t.Errorf("got severity %v, want %v", got, want) + } + } + }) } diff --git a/sdks/java/build-tools/src/main/resources/beam/checkstyle/suppressions.xml b/sdks/java/build-tools/src/main/resources/beam/checkstyle/suppressions.xml index bb8954839d506..7037f0543f4fa 100644 --- a/sdks/java/build-tools/src/main/resources/beam/checkstyle/suppressions.xml +++ b/sdks/java/build-tools/src/main/resources/beam/checkstyle/suppressions.xml @@ -20,6 +20,7 @@ + diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoads.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoads.java index f5f193aecb747..32ee29738bf85 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoads.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoads.java @@ -34,7 +34,6 @@ import org.apache.beam.sdk.coders.KvCoder; import org.apache.beam.sdk.coders.NullableCoder; import org.apache.beam.sdk.coders.ShardedKeyCoder; -import org.apache.beam.sdk.coders.VoidCoder; import org.apache.beam.sdk.extensions.gcp.options.GcsOptions; import org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath; import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.CreateDisposition; @@ -399,10 +398,12 @@ private WriteResult expandTriggered(PCollection> inpu "Window Into Global Windows", Window.>into(new GlobalWindows()) .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1)))) - .apply("Add Void Key", WithKeys.of((Void) null)) - .setCoder(KvCoder.of(VoidCoder.of(), tempTables.getCoder())) - .apply("GroupByKey", GroupByKey.create()) - .apply("Extract Values", Values.create()) + // We use this and the following GBK to aggregate by final destination. + // This way, each destination has its own pane sequence + .apply("AddDestinationKeys", WithKeys.of(result -> result.getKey())) + .setCoder(KvCoder.of(destinationCoder, tempTables.getCoder())) + .apply("GroupTempTablesByFinalDestination", GroupByKey.create()) + .apply("ExtractTempTables", Values.create()) .apply( ParDo.of( new UpdateSchemaDestination( diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtils.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtils.java index 0063952d8b132..00ee815c3c930 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtils.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtils.java @@ -689,6 +689,10 @@ public static Row toBeamRow(Schema rowSchema, TableSchema bqSchema, TableRow jso } } + if (jsonBQValue instanceof byte[] && fieldType.getTypeName() == TypeName.BYTES) { + return jsonBQValue; + } + if (jsonBQValue instanceof List) { if (fieldType.getCollectionElementType() == null) { throw new IllegalArgumentException( diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtilsTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtilsTest.java index 9bff77a16588d..f4074cc1a556b 100644 --- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtilsTest.java +++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtilsTest.java @@ -87,6 +87,7 @@ public class BigQueryUtilsTest { .addNullableField("time0s_0ns", Schema.FieldType.logicalType(SqlTypes.TIME)) .addNullableField("valid", Schema.FieldType.BOOLEAN) .addNullableField("binary", Schema.FieldType.BYTES) + .addNullableField("raw_bytes", Schema.FieldType.BYTES) .addNullableField("numeric", Schema.FieldType.DECIMAL) .addNullableField("boolean", Schema.FieldType.BOOLEAN) .addNullableField("long", Schema.FieldType.INT64) @@ -188,6 +189,9 @@ public class BigQueryUtilsTest { private static final TableFieldSchema BINARY = new TableFieldSchema().setName("binary").setType(StandardSQLTypeName.BYTES.toString()); + private static final TableFieldSchema RAW_BYTES = + new TableFieldSchema().setName("raw_bytes").setType(StandardSQLTypeName.BYTES.toString()); + private static final TableFieldSchema NUMERIC = new TableFieldSchema().setName("numeric").setType(StandardSQLTypeName.NUMERIC.toString()); @@ -246,6 +250,7 @@ public class BigQueryUtilsTest { TIME_0S_0NS, VALID, BINARY, + RAW_BYTES, NUMERIC, BOOLEAN, LONG, @@ -276,6 +281,7 @@ public class BigQueryUtilsTest { TIME_0S_0NS, VALID, BINARY, + RAW_BYTES, NUMERIC, BOOLEAN, LONG, @@ -316,6 +322,7 @@ public class BigQueryUtilsTest { LocalTime.parse("12:34"), false, Base64.getDecoder().decode("ABCD1234"), + Base64.getDecoder().decode("ABCD1234"), new BigDecimal("123.456").setScale(3, RoundingMode.HALF_UP), true, 123L, @@ -346,6 +353,7 @@ public class BigQueryUtilsTest { .set("time0s_0ns", "12:34:00") .set("valid", "false") .set("binary", "ABCD1234") + .set("raw_bytes", Base64.getDecoder().decode("ABCD1234")) .set("numeric", "123.456") .set("boolean", true) .set("long", 123L) @@ -355,7 +363,7 @@ public class BigQueryUtilsTest { Row.withSchema(FLAT_TYPE) .addValues( null, null, null, null, null, null, null, null, null, null, null, null, null, null, - null, null, null, null, null, null, null, null) + null, null, null, null, null, null, null, null, null) .build(); private static final TableRow BQ_NULL_FLAT_ROW = @@ -378,6 +386,7 @@ public class BigQueryUtilsTest { .set("time0s_0ns", null) .set("valid", null) .set("binary", null) + .set("raw_bytes", null) .set("numeric", null) .set("boolean", null) .set("long", null) @@ -457,6 +466,7 @@ public class BigQueryUtilsTest { TIME_0S_0NS, VALID, BINARY, + RAW_BYTES, NUMERIC, BOOLEAN, LONG, @@ -512,6 +522,7 @@ public void testToTableSchema_flat() { TIME_0S_0NS, VALID, BINARY, + RAW_BYTES, NUMERIC, BOOLEAN, LONG, @@ -562,6 +573,7 @@ public void testToTableSchema_row() { TIME_0S_0NS, VALID, BINARY, + RAW_BYTES, NUMERIC, BOOLEAN, LONG, @@ -598,6 +610,7 @@ public void testToTableSchema_array_row() { TIME_0S_0NS, VALID, BINARY, + RAW_BYTES, NUMERIC, BOOLEAN, LONG, @@ -620,7 +633,7 @@ public void testToTableSchema_map() { public void testToTableRow_flat() { TableRow row = toTableRow().apply(FLAT_ROW); - assertThat(row.size(), equalTo(22)); + assertThat(row.size(), equalTo(23)); assertThat(row, hasEntry("id", "123")); assertThat(row, hasEntry("value", "123.456")); assertThat(row, hasEntry("datetime", "2020-11-02T12:34:56.789876")); @@ -635,6 +648,7 @@ public void testToTableRow_flat() { assertThat(row, hasEntry("name", "test")); assertThat(row, hasEntry("valid", "false")); assertThat(row, hasEntry("binary", "ABCD1234")); + assertThat(row, hasEntry("raw_bytes", "ABCD1234")); assertThat(row, hasEntry("numeric", "123.456")); assertThat(row, hasEntry("boolean", "true")); assertThat(row, hasEntry("long", "123")); @@ -674,7 +688,7 @@ public void testToTableRow_row() { assertThat(row.size(), equalTo(1)); row = (TableRow) row.get("row"); - assertThat(row.size(), equalTo(22)); + assertThat(row.size(), equalTo(23)); assertThat(row, hasEntry("id", "123")); assertThat(row, hasEntry("value", "123.456")); assertThat(row, hasEntry("datetime", "2020-11-02T12:34:56.789876")); @@ -689,6 +703,7 @@ public void testToTableRow_row() { assertThat(row, hasEntry("name", "test")); assertThat(row, hasEntry("valid", "false")); assertThat(row, hasEntry("binary", "ABCD1234")); + assertThat(row, hasEntry("raw_bytes", "ABCD1234")); assertThat(row, hasEntry("numeric", "123.456")); assertThat(row, hasEntry("boolean", "true")); assertThat(row, hasEntry("long", "123")); @@ -701,7 +716,7 @@ public void testToTableRow_array_row() { assertThat(row.size(), equalTo(1)); row = ((List) row.get("rows")).get(0); - assertThat(row.size(), equalTo(22)); + assertThat(row.size(), equalTo(23)); assertThat(row, hasEntry("id", "123")); assertThat(row, hasEntry("value", "123.456")); assertThat(row, hasEntry("datetime", "2020-11-02T12:34:56.789876")); @@ -716,6 +731,7 @@ public void testToTableRow_array_row() { assertThat(row, hasEntry("name", "test")); assertThat(row, hasEntry("valid", "false")); assertThat(row, hasEntry("binary", "ABCD1234")); + assertThat(row, hasEntry("raw_bytes", "ABCD1234")); assertThat(row, hasEntry("numeric", "123.456")); assertThat(row, hasEntry("boolean", "true")); assertThat(row, hasEntry("long", "123")); @@ -726,7 +742,7 @@ public void testToTableRow_array_row() { public void testToTableRow_null_row() { TableRow row = toTableRow().apply(NULL_FLAT_ROW); - assertThat(row.size(), equalTo(22)); + assertThat(row.size(), equalTo(23)); assertThat(row, hasEntry("id", null)); assertThat(row, hasEntry("value", null)); assertThat(row, hasEntry("name", null)); @@ -745,6 +761,7 @@ public void testToTableRow_null_row() { assertThat(row, hasEntry("time0s_0ns", null)); assertThat(row, hasEntry("valid", null)); assertThat(row, hasEntry("binary", null)); + assertThat(row, hasEntry("raw_bytes", null)); assertThat(row, hasEntry("numeric", null)); assertThat(row, hasEntry("boolean", null)); assertThat(row, hasEntry("long", null)); diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/FileLoadsStreamingIT.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/FileLoadsStreamingIT.java new file mode 100644 index 0000000000000..012afed6fb436 --- /dev/null +++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/FileLoadsStreamingIT.java @@ -0,0 +1,497 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.io.gcp.bigquery; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.junit.Assert.assertEquals; +import static org.junit.Assume.assumeTrue; + +import com.google.api.services.bigquery.model.Table; +import com.google.api.services.bigquery.model.TableFieldSchema; +import com.google.api.services.bigquery.model.TableReference; +import com.google.api.services.bigquery.model.TableRow; +import com.google.api.services.bigquery.model.TableSchema; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Random; +import java.util.stream.Collectors; +import org.apache.beam.sdk.Pipeline; +import org.apache.beam.sdk.extensions.gcp.options.GcpOptions; +import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write; +import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.CreateDisposition; +import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.WriteDisposition; +import org.apache.beam.sdk.io.gcp.testing.BigqueryClient; +import org.apache.beam.sdk.options.ExperimentalOptions; +import org.apache.beam.sdk.schemas.Schema; +import org.apache.beam.sdk.testing.TestPipeline; +import org.apache.beam.sdk.testing.TestPipelineOptions; +import org.apache.beam.sdk.transforms.MapElements; +import org.apache.beam.sdk.transforms.PeriodicImpulse; +import org.apache.beam.sdk.transforms.SerializableFunction; +import org.apache.beam.sdk.values.PCollection; +import org.apache.beam.sdk.values.Row; +import org.apache.beam.sdk.values.TypeDescriptor; +import org.apache.beam.sdk.values.TypeDescriptors; +import org.apache.beam.sdk.values.ValueInSingleWindow; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Splitter; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Iterables; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.joda.time.Duration; +import org.joda.time.Instant; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TestName; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@RunWith(Parameterized.class) +public class FileLoadsStreamingIT { + private static final Logger LOG = LoggerFactory.getLogger(FileLoadsStreamingIT.class); + + @Parameterized.Parameters + public static Iterable data() { + return ImmutableList.of(new Object[] {false}, new Object[] {true}); + } + + @Parameterized.Parameter(0) + public boolean useInputSchema; + + @Rule public TestName testName = new TestName(); + + private static final BigqueryClient BQ_CLIENT = new BigqueryClient("FileLoadsStreamingIT"); + private static final String PROJECT = + TestPipeline.testingPipelineOptions().as(GcpOptions.class).getProject(); + private static final String BIG_QUERY_DATASET_ID = "file_loads_streaming_it_" + System.nanoTime(); + + private static final String[] FIELDS = { + "BOOL", + "BOOLEAN", + "BYTES", + "INT64", + "INTEGER", + "FLOAT", + "FLOAT64", + "NUMERIC", + "STRING", + "DATE", + "TIMESTAMP" + }; + + private static final int TOTAL_N = 50; + + private final Random randomGenerator = new Random(); + + @BeforeClass + public static void setUpTestEnvironment() throws IOException, InterruptedException { + // Create one BQ dataset for all test cases. + cleanUp(); + BQ_CLIENT.createNewDataset(PROJECT, BIG_QUERY_DATASET_ID); + } + + @AfterClass + public static void cleanUp() { + BQ_CLIENT.deleteDataset(PROJECT, BIG_QUERY_DATASET_ID); + } + + static class GenerateRowFunc implements SerializableFunction { + private final List fieldNames; + + public GenerateRowFunc(List fieldNames) { + this.fieldNames = fieldNames; + } + + @Override + public TableRow apply(Long rowId) { + TableRow row = new TableRow(); + row.set("id", rowId); + + for (String name : fieldNames) { + String type = Iterables.get(Splitter.on('_').split(name), 0); + switch (type) { + case "BOOL": + case "BOOLEAN": + if (rowId % 2 == 0) { + row.set(name, false); + } else { + row.set(name, true); + } + break; + case "BYTES": + row.set(name, String.format("test_blob_%s", rowId).getBytes(StandardCharsets.UTF_8)); + break; + case "INT64": + case "INTEGER": + row.set(name, String.valueOf(rowId + 10)); + break; + case "FLOAT": + case "FLOAT64": + row.set(name, String.valueOf(0.5 + rowId)); + break; + case "NUMERIC": + row.set(name, String.valueOf(rowId + 0.12345)); + break; + case "DATE": + row.set(name, "2022-01-01"); + break; + case "TIMESTAMP": + row.set(name, "2022-01-01 10:10:10.012 UTC"); + break; + case "STRING": + row.set(name, "test_string" + rowId); + break; + default: + row.set(name, "unknown" + rowId); + break; + } + } + return row; + } + } + + private static TableSchema makeTableSchemaFromTypes(List fieldNames) { + ImmutableList.Builder builder = ImmutableList.builder(); + + // Add an id field for verification of correctness + builder.add(new TableFieldSchema().setType("INTEGER").setName("id").setMode("REQUIRED")); + + // the name is prefix with type_. + for (String name : fieldNames) { + String mode = "REQUIRED"; + builder.add(new TableFieldSchema().setType(name).setName(name).setMode(mode)); + } + + return new TableSchema().setFields(builder.build()); + } + + private String maybeCreateTable(TableSchema tableSchema, String suffix) + throws IOException, InterruptedException { + String tableId = Iterables.get(Splitter.on('[').split(testName.getMethodName()), 0); + + BQ_CLIENT.deleteTable(PROJECT, BIG_QUERY_DATASET_ID, tableId + suffix); + if (!useInputSchema) { + BQ_CLIENT.createNewTable( + PROJECT, + BIG_QUERY_DATASET_ID, + new Table() + .setSchema(tableSchema) + .setTableReference( + new TableReference() + .setTableId(tableId + suffix) + .setDatasetId(BIG_QUERY_DATASET_ID) + .setProjectId(PROJECT))); + } else { + tableId += "WithInputSchema"; + } + return String.format("%s.%s.%s", PROJECT, BIG_QUERY_DATASET_ID, tableId + suffix); + } + + private void runStreaming(int numFileShards, boolean useCopyJobs) + throws IOException, InterruptedException { + TestPipelineOptions opts = TestPipeline.testingPipelineOptions().as(TestPipelineOptions.class); + opts.setTempLocation(opts.getTempRoot()); + Pipeline p = Pipeline.create(opts); + + // Only run the most relevant test case on Dataflow. + // Testing this dimension on DirectRunner is sufficient + if (p.getOptions().getRunner().getName().contains("DataflowRunner")) { + assumeTrue("Skipping in favor of more relevant test case", useInputSchema); + // Need to manually enable streaming engine for legacy dataflow runner + ExperimentalOptions.addExperiment( + p.getOptions().as(ExperimentalOptions.class), GcpOptions.STREAMING_ENGINE_EXPERIMENT); + } + + List fieldNamesOrigin = Arrays.asList(FIELDS); + // Shuffle the fields in the write schema to do fuzz testing on field order + List fieldNamesShuffled = new ArrayList(fieldNamesOrigin); + Collections.shuffle(fieldNamesShuffled, randomGenerator); + + TableSchema bqTableSchema = makeTableSchemaFromTypes(fieldNamesOrigin); + TableSchema inputSchema = makeTableSchemaFromTypes(fieldNamesShuffled); + String tableSpec = maybeCreateTable(bqTableSchema, ""); + + // set up and build pipeline + Instant start = new Instant(0); + GenerateRowFunc generateRowFunc = new GenerateRowFunc(fieldNamesShuffled); + PCollection instants = + p.apply( + "Generate Instants", + PeriodicImpulse.create() + .startAt(start) + .stopAt(start.plus(Duration.standardSeconds(TOTAL_N - 1))) + .withInterval(Duration.standardSeconds(1)) + .catchUpToNow(false)); + PCollection rows = + instants.apply( + "Create TableRows", + MapElements.into(TypeDescriptor.of(TableRow.class)) + .via(instant -> generateRowFunc.apply(instant.getMillis() / 1000))); + // build write transform + Write write = + BigQueryIO.writeTableRows() + .to(tableSpec) + .withMethod(Write.Method.FILE_LOADS) + .withTriggeringFrequency(Duration.standardSeconds(10)); + if (useCopyJobs) { + write = write.withMaxBytesPerPartition(250); + } + if (useInputSchema) { + // we're creating the table with the input schema + write = + write + .withSchema(inputSchema) + .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED) + .withWriteDisposition(WriteDisposition.WRITE_TRUNCATE); + } else { + // table already exists with a schema, no need to create it + write = + write + .withCreateDisposition(CreateDisposition.CREATE_NEVER) + .withWriteDisposition(WriteDisposition.WRITE_APPEND); + } + write = numFileShards == 0 ? write.withAutoSharding() : write.withNumFileShards(numFileShards); + + rows.apply("Stream loads to BigQuery", write); + p.run().waitUntilFinish(); + + List expectedRows = new ArrayList<>(); + for (long i = 0; i < TOTAL_N; i++) { + expectedRows.add(generateRowFunc.apply(i)); + } + + // Perform checks + checkRowCompleteness(tableSpec, inputSchema, expectedRows); + } + + // Check that the expected rows reached the table. + private static void checkRowCompleteness( + String tableSpec, TableSchema schema, List expectedRows) + throws IOException, InterruptedException { + List actualTableRows = + BQ_CLIENT.queryUnflattened( + String.format("SELECT * FROM [%s]", tableSpec), PROJECT, true, false); + + Schema rowSchema = BigQueryUtils.fromTableSchema(schema); + List actualBeamRows = + actualTableRows.stream() + .map(tableRow -> BigQueryUtils.toBeamRow(rowSchema, tableRow)) + .collect(Collectors.toList()); + List expectedBeamRows = + expectedRows.stream() + .map(tableRow -> BigQueryUtils.toBeamRow(rowSchema, tableRow)) + .collect(Collectors.toList()); + LOG.info( + "Actual rows number: {}, expected: {}", actualBeamRows.size(), expectedBeamRows.size()); + + assertThat( + "Comparing expected rows with actual rows", + actualBeamRows, + containsInAnyOrder(expectedBeamRows.toArray())); + assertEquals( + "Checking there is no duplication", expectedBeamRows.size(), actualBeamRows.size()); + } + + @Test + public void testLoadWithFixedShards() throws IOException, InterruptedException { + runStreaming(5, false); + } + + @Test + public void testLoadWithAutoShardingAndCopyJobs() throws IOException, InterruptedException { + runStreaming(0, true); + } + + @Test + public void testDynamicDestinationsWithFixedShards() throws IOException, InterruptedException { + runStreamingToDynamicDestinations(6, false); + } + + @Test + public void testDynamicDestinationsWithAutoShardingAndCopyJobs() + throws IOException, InterruptedException { + runStreamingToDynamicDestinations(0, true); + } + + private void runStreamingToDynamicDestinations(int numFileShards, boolean useCopyJobs) + throws IOException, InterruptedException { + TestPipelineOptions opts = TestPipeline.testingPipelineOptions().as(TestPipelineOptions.class); + opts.setTempLocation(opts.getTempRoot()); + Pipeline p = Pipeline.create(opts); + // Only run the most relevant test cases on Dataflow. Testing this dimension on DirectRunner is + // sufficient + if (p.getOptions().getRunner().getName().contains("DataflowRunner")) { + assumeTrue("Skipping in favor of more relevant test case", useInputSchema); + // Need to manually enable streaming engine for legacy dataflow runner + ExperimentalOptions.addExperiment( + p.getOptions().as(ExperimentalOptions.class), GcpOptions.STREAMING_ENGINE_EXPERIMENT); + } + + List allFields = Arrays.asList(FIELDS); + List subFields0 = new ArrayList<>(allFields.subList(0, 4)); + List subFields1 = new ArrayList<>(allFields.subList(4, 8)); + List subFields2 = new ArrayList<>(allFields.subList(8, 11)); + TableSchema table0Schema = makeTableSchemaFromTypes(subFields0); + TableSchema table1Schema = makeTableSchemaFromTypes(subFields1); + TableSchema table2Schema = makeTableSchemaFromTypes(subFields2); + String table0Id = maybeCreateTable(table0Schema, "-0"); + String table1Id = maybeCreateTable(table1Schema, "-1"); + String table2Id = maybeCreateTable(table2Schema, "-2"); + GenerateRowFunc generateRowFunc0 = new GenerateRowFunc(subFields0); + GenerateRowFunc generateRowFunc1 = new GenerateRowFunc(subFields1); + GenerateRowFunc generateRowFunc2 = new GenerateRowFunc(subFields2); + + String tablePrefix = table0Id.substring(0, table0Id.length() - 2); + + // set up and build pipeline + Instant start = new Instant(0); + PCollection instants = + p.apply( + "Generate Instants", + PeriodicImpulse.create() + .startAt(start) + .stopAt(start.plus(Duration.standardSeconds(TOTAL_N - 1))) + .withInterval(Duration.standardSeconds(1)) + .catchUpToNow(false)); + PCollection longs = + instants.apply( + "Create TableRows", + MapElements.into(TypeDescriptors.longs()).via(instant -> instant.getMillis() / 1000)); + // build write transform + Write write = + BigQueryIO.write() + .to( + new TestDynamicDest( + tablePrefix, subFields0, subFields1, subFields2, useInputSchema)) + .withFormatFunction( + id -> { + long dest = id % 3; + TableRow row; + if (dest == 0) { + row = generateRowFunc0.apply(id); + } else if (dest == 1) { + row = generateRowFunc1.apply(id); + } else { + row = generateRowFunc2.apply(id); + } + return row; + }) + .withMethod(Write.Method.FILE_LOADS) + .withTriggeringFrequency(Duration.standardSeconds(10)); + if (useCopyJobs) { + write = write.withMaxBytesPerPartition(150); + } + if (useInputSchema) { + // we're creating the table with the input schema + write = + write + .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED) + .withWriteDisposition(WriteDisposition.WRITE_TRUNCATE); + } else { + // table already exists with a schema, no need to create it + write = + write + .withCreateDisposition(CreateDisposition.CREATE_NEVER) + .withWriteDisposition(WriteDisposition.WRITE_APPEND); + } + write = numFileShards == 0 ? write.withAutoSharding() : write.withNumFileShards(numFileShards); + + longs.apply("Stream loads to dynamic destinations", write); + p.run().waitUntilFinish(); + + List expectedRows0 = new ArrayList<>(); + List expectedRows1 = new ArrayList<>(); + List expectedRows2 = new ArrayList<>(); + for (long i = 0; i < TOTAL_N; i++) { + long dest = i % 3; + if (dest == 0) { + expectedRows0.add(generateRowFunc0.apply(i)); + } else if (dest == 1) { + expectedRows1.add(generateRowFunc1.apply(i)); + } else { + expectedRows2.add(generateRowFunc2.apply(i)); + } + } + // Perform checks + checkRowCompleteness(table0Id, makeTableSchemaFromTypes(subFields0), expectedRows0); + checkRowCompleteness(table1Id, makeTableSchemaFromTypes(subFields1), expectedRows1); + checkRowCompleteness(table2Id, makeTableSchemaFromTypes(subFields2), expectedRows2); + } + + static class TestDynamicDest extends DynamicDestinations { + String tablePrefix; + List table0Fields; + List table1Fields; + List table2Fields; + boolean useInputSchema; + + public TestDynamicDest( + String tablePrefix, + List table0Fields, + List table1Fields, + List table2Fields, + boolean useInputSchema) { + this.tablePrefix = tablePrefix; + this.table0Fields = table0Fields; + this.table1Fields = table1Fields; + this.table2Fields = table2Fields; + this.useInputSchema = useInputSchema; + } + + @Override + public Long getDestination(@Nullable ValueInSingleWindow element) { + return element.getValue() % 3; + } + + @Override + public TableDestination getTable(Long destination) { + return new TableDestination(tablePrefix + "-" + destination, null); + } + + @Override + public @Nullable TableSchema getSchema(Long destination) { + if (!useInputSchema) { + return null; + } + List fields; + if (destination == 0) { + fields = table0Fields; + } else if (destination == 1) { + fields = table1Fields; + } else { + fields = table2Fields; + } + List tableFields = + fields.stream() + .map(name -> new TableFieldSchema().setName(name).setType(name).setMode("REQUIRED")) + .collect(Collectors.toList()); + // we attach an ID to each row in addition to the existing schema fields + tableFields.add( + 0, new TableFieldSchema().setName("id").setType("INTEGER").setMode("REQUIRED")); + return new TableSchema().setFields(tableFields); + } + } +} diff --git a/sdks/python/apache_beam/runners/dataflow/internal/names.py b/sdks/python/apache_beam/runners/dataflow/internal/names.py index 2075c8eee3f1d..8bb39940e4842 100644 --- a/sdks/python/apache_beam/runners/dataflow/internal/names.py +++ b/sdks/python/apache_beam/runners/dataflow/internal/names.py @@ -34,6 +34,6 @@ # Unreleased sdks use container image tag specified below. # Update this tag whenever there is a change that # requires changes to SDK harness container or SDK harness launcher. -BEAM_DEV_SDK_CONTAINER_TAG = 'beam-master-20230717' +BEAM_DEV_SDK_CONTAINER_TAG = 'beam-master-20230912' DATAFLOW_CONTAINER_IMAGE_REPOSITORY = 'gcr.io/cloud-dataflow/v1beta3' diff --git a/sdks/python/apache_beam/runners/worker/data_sampler.py b/sdks/python/apache_beam/runners/worker/data_sampler.py index a5992b9cebac8..303648738f3d5 100644 --- a/sdks/python/apache_beam/runners/worker/data_sampler.py +++ b/sdks/python/apache_beam/runners/worker/data_sampler.py @@ -49,11 +49,19 @@ class SampleTimer: """Periodic timer for sampling elements.""" def __init__(self, timeout_secs: float, sampler: OutputSampler) -> None: - self._timeout_secs = timeout_secs + self._target_timeout_secs = timeout_secs + self._timeout_secs = min(timeout_secs, 0.5) if timeout_secs > 0 else 0.0 self._timer = Timer(self._timeout_secs, self.sample) self._sampler = sampler + self._sample_duration_secs = 0.0 def reset(self) -> None: + # For the first 30 seconds, sample every 0.5 seconds. After that, sample at + # the normal rate. + if self._sample_duration_secs >= 30.0: + self._timeout_secs = self._target_timeout_secs + self._sample_duration_secs += self._timeout_secs + self._timer.cancel() self._timer = Timer(self._timeout_secs, self.sample) self._timer.start() diff --git a/sdks/python/apache_beam/transforms/ptransform.py b/sdks/python/apache_beam/transforms/ptransform.py index c7eaa152ae063..28614c6561c7f 100644 --- a/sdks/python/apache_beam/transforms/ptransform.py +++ b/sdks/python/apache_beam/transforms/ptransform.py @@ -38,11 +38,13 @@ class and wrapper class that allows lambda functions to be used as import copy import itertools +import json import logging import operator import os import sys import threading +import warnings from functools import reduce from functools import wraps from typing import TYPE_CHECKING @@ -83,6 +85,7 @@ class and wrapper class that allows lambda functions to be used as from apache_beam.typehints.trivial_inference import instance_to_type from apache_beam.typehints.typehints import validate_composite_type_param from apache_beam.utils import proto_utils +from apache_beam.utils import python_callable if TYPE_CHECKING: from apache_beam import coders @@ -95,6 +98,7 @@ class and wrapper class that allows lambda functions to be used as 'PTransform', 'ptransform_fn', 'label_from_callable', + 'annotate_yaml', ] _LOGGER = logging.getLogger(__name__) @@ -1096,3 +1100,51 @@ def __ror__(self, pvalueish, _unused=None): def expand(self, pvalue): raise RuntimeError("Should never be expanded directly.") + + +# Defined here to avoid circular import issues for Beam library transforms. +def annotate_yaml(constructor): + """Causes instances of this transform to be annotated with their yaml syntax. + + Should only be used for transforms that are fully defined by their constructor + arguments. + """ + @wraps(constructor) + def wrapper(*args, **kwargs): + transform = constructor(*args, **kwargs) + + fully_qualified_name = ( + f'{constructor.__module__}.{constructor.__qualname__}') + try: + imported_constructor = ( + python_callable.PythonCallableWithSource. + load_from_fully_qualified_name(fully_qualified_name)) + if imported_constructor != wrapper: + raise ImportError('Different object.') + except ImportError: + warnings.warn(f'Cannot import {constructor} as {fully_qualified_name}.') + return transform + + try: + config = json.dumps({ + 'constructor': fully_qualified_name, + 'args': args, + 'kwargs': kwargs, + }) + except TypeError as exn: + warnings.warn( + f'Cannot serialize arguments for {constructor} as json: {exn}') + return transform + + original_annotations = transform.annotations + transform.annotations = lambda: { + **original_annotations(), + # These override whatever may have been provided earlier. + # The outermost call is expected to be the most specific. + 'yaml_provider': 'python', + 'yaml_type': 'PyTransform', + 'yaml_args': config, + } + return transform + + return wrapper diff --git a/sdks/python/apache_beam/typehints/trivial_inference.py b/sdks/python/apache_beam/typehints/trivial_inference.py index f4b350e8f0522..a880b5c70ea19 100644 --- a/sdks/python/apache_beam/typehints/trivial_inference.py +++ b/sdks/python/apache_beam/typehints/trivial_inference.py @@ -399,7 +399,10 @@ def infer_return_type_func(f, input_types, debug=False, depth=0): jump_multiplier = 1 last_pc = -1 + last_real_opname = opname = None while pc < end: # pylint: disable=too-many-nested-blocks + if opname not in ('PRECALL', 'CACHE'): + last_real_opname = opname start = pc instruction = ofs_table[pc] op = instruction.opcode @@ -534,13 +537,13 @@ def infer_return_type_func(f, input_types, debug=False, depth=0): return_type = Any state.kw_names = None else: - # Handle lambdas always having an arg of 0 for CALL + # Handle comprehensions always having an arg of 0 for CALL # See https://github.com/python/cpython/issues/102403 for context. - if pop_count == 1: - while pop_count <= len(state.stack): - if isinstance(state.stack[-pop_count], Const): - break - pop_count += 1 + if (pop_count == 1 and last_real_opname == 'GET_ITER' and + len(state.stack) > 1 and isinstance(state.stack[-2], Const) and + getattr(state.stack[-2].value, '__name__', None) in ( + '', '', '', '')): + pop_count += 1 if depth <= 0 or pop_count > len(state.stack): return_type = Any elif isinstance(state.stack[-pop_count], Const): diff --git a/sdks/python/apache_beam/typehints/trivial_inference_test.py b/sdks/python/apache_beam/typehints/trivial_inference_test.py index d8cc2ab19a03d..4341d11d36040 100644 --- a/sdks/python/apache_beam/typehints/trivial_inference_test.py +++ b/sdks/python/apache_beam/typehints/trivial_inference_test.py @@ -251,11 +251,30 @@ def testCall(self): self.assertReturnType( typehints.Tuple[int, typehints.Any], lambda: (1, f(x=1.0))) + def testCallNullaryMethod(self): + class Foo: + pass + + self.assertReturnType( + typehints.Tuple[Foo, typehints.Any], lambda x: (x, x.unknown()), [Foo]) + + def testCallNestedLambda(self): + class Foo: + pass + + self.assertReturnType( + typehints.Tuple[Foo, int], lambda x: (x, (lambda: 3)()), [Foo]) + def testClosure(self): x = 1 y = 1.0 self.assertReturnType(typehints.Tuple[int, float], lambda: (x, y)) + @unittest.skip("https://github.com/apache/beam/issues/28420") + def testLocalClosure(self): + self.assertReturnType( + typehints.Tuple[int, int], lambda x: (x, (lambda: x)()), [int]) + def testGlobals(self): self.assertReturnType(int, lambda: global_int) diff --git a/sdks/python/apache_beam/yaml/cache_provider_artifacts.py b/sdks/python/apache_beam/yaml/cache_provider_artifacts.py new file mode 100644 index 0000000000000..6c96dd3b0fd92 --- /dev/null +++ b/sdks/python/apache_beam/yaml/cache_provider_artifacts.py @@ -0,0 +1,46 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import logging +import time + +from apache_beam.version import __version__ as beam_version +from apache_beam.yaml import yaml_provider + + +def cache_provider_artifacts(): + providers_by_id = {} + for providers in yaml_provider.standard_providers().values(): + for provider in providers: + # Dedup for better logging. + providers_by_id[id(provider)] = provider + for provider in providers_by_id.values(): + t = time.time() + artifacts = provider.cache_artifacts() + if artifacts: + logging.info( + 'Cached %s in %0.03f seconds.', ', '.join(artifacts), time.time() - t) + if '.dev' not in beam_version: + # Also cache a base python venv for fast cloning. + t = time.time() + artifacts = yaml_provider.PypiExpansionService._create_venv_to_clone() + logging.info('Cached %s in %0.03f seconds.', artifacts, time.time() - t) + + +if __name__ == '__main__': + logging.getLogger().setLevel(logging.INFO) + cache_provider_artifacts() diff --git a/sdks/python/apache_beam/yaml/yaml_provider.py b/sdks/python/apache_beam/yaml/yaml_provider.py index d42d7aaffeeea..6e035811d4b9a 100644 --- a/sdks/python/apache_beam/yaml/yaml_provider.py +++ b/sdks/python/apache_beam/yaml/yaml_provider.py @@ -32,6 +32,7 @@ from typing import Dict from typing import Iterable from typing import Mapping +from typing import Optional import yaml from yaml.loader import SafeLoader @@ -57,6 +58,9 @@ def available(self) -> bool: """Returns whether this provider is available to use in this environment.""" raise NotImplementedError(type(self)) + def cache_artifacts(self) -> Optional[Iterable[str]]: + raise NotImplementedError(type(self)) + def provided_transforms(self) -> Iterable[str]: """Returns a list of transform type names this provider can handle.""" raise NotImplementedError(type(self)) @@ -256,17 +260,24 @@ def available(self): self._is_available = False return self._is_available + def cache_artifacts(self): + pass + class ExternalJavaProvider(ExternalProvider): def __init__(self, urns, jar_provider): super().__init__( urns, lambda: external.JavaJarExpansionService(jar_provider())) + self._jar_provider = jar_provider def available(self): # pylint: disable=subprocess-run-check return subprocess.run(['which', 'java'], capture_output=True).returncode == 0 + def cache_artifacts(self): + return [self._jar_provider()] + @ExternalProvider.register_provider_type('python') def python(urns, packages=()): @@ -289,6 +300,9 @@ def __init__(self, urns, packages): def available(self): return True # If we're running this script, we have Python installed. + def cache_artifacts(self): + return [self._service._venv()] + def create_external_transform(self, urn, args): # Python transforms are "registered" by fully qualified name. return external.ExternalTransform( @@ -351,6 +365,9 @@ def __init__(self, transform_factories): def available(self): return True + def cache_artifacts(self): + pass + def provided_transforms(self): return self._transform_factories.keys() @@ -527,23 +544,60 @@ def __init__(self, packages, base_python=sys.executable): self._packages = packages self._base_python = base_python - def _key(self): - return json.dumps({'binary': self._base_python, 'packages': self._packages}) + @classmethod + def _key(cls, base_python, packages): + return json.dumps({ + 'binary': base_python, 'packages': sorted(packages) + }, + sort_keys=True) - def _venv(self): - venv = os.path.join( - self.VENV_CACHE, - hashlib.sha256(self._key().encode('utf-8')).hexdigest()) + @classmethod + def _path(cls, base_python, packages): + return os.path.join( + cls.VENV_CACHE, + hashlib.sha256(cls._key(base_python, + packages).encode('utf-8')).hexdigest()) + + @classmethod + def _create_venv_from_scratch(cls, base_python, packages): + venv = cls._path(base_python, packages) if not os.path.exists(venv): - python_binary = os.path.join(venv, 'bin', 'python') - subprocess.run([self._base_python, '-m', 'venv', venv], check=True) - subprocess.run([python_binary, '-m', 'ensurepip'], check=True) - subprocess.run([python_binary, '-m', 'pip', 'install'] + self._packages, + subprocess.run([base_python, '-m', 'venv', venv], check=True) + venv_python = os.path.join(venv, 'bin', 'python') + subprocess.run([venv_python, '-m', 'ensurepip'], check=True) + subprocess.run([venv_python, '-m', 'pip', 'install'] + packages, check=True) with open(venv + '-requirements.txt', 'w') as fout: - fout.write('\n'.join(self._packages)) + fout.write('\n'.join(packages)) return venv + @classmethod + def _create_venv_from_clone(cls, base_python, packages): + venv = cls._path(base_python, packages) + if not os.path.exists(venv): + clonable_venv = cls._create_venv_to_clone(base_python) + clonable_python = os.path.join(clonable_venv, 'bin', 'python') + subprocess.run( + [clonable_python, '-m', 'clonevirtualenv', clonable_venv, venv], + check=True) + venv_binary = os.path.join(venv, 'bin', 'python') + subprocess.run([venv_binary, '-m', 'pip', 'install'] + packages, + check=True) + with open(venv + '-requirements.txt', 'w') as fout: + fout.write('\n'.join(packages)) + return venv + + @classmethod + def _create_venv_to_clone(cls, base_python): + return cls._create_venv_from_scratch( + base_python, [ + 'apache_beam[dataframe,gcp,test]==' + beam_version, + 'virtualenv-clone' + ]) + + def _venv(self): + return self._create_venv_from_clone(self._base_python, self._packages) + def __enter__(self): venv = self._venv() self._service_provider = subprocess_server.SubprocessServer( diff --git a/sdks/python/apache_beam/yaml/yaml_transform_test.py b/sdks/python/apache_beam/yaml/yaml_transform_test.py index f969761092e00..26baebec86e49 100644 --- a/sdks/python/apache_beam/yaml/yaml_transform_test.py +++ b/sdks/python/apache_beam/yaml/yaml_transform_test.py @@ -250,6 +250,23 @@ def test_name_is_ambiguous(self): output: AnotherFilter ''') + def test_annotations(self): + t = LinearTransform(5, b=100) + annotations = t.annotations() + with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( + pickle_library='cloudpickle')) as p: + result = p | YamlTransform( + ''' + type: chain + transforms: + - type: Create + config: + elements: [0, 1, 2, 3] + - type: %r + config: %s + ''' % (annotations['yaml_type'], annotations['yaml_args'])) + assert_that(result, equal_to([100, 105, 110, 115])) + class CreateTimestamped(beam.PTransform): def __init__(self, elements): @@ -631,6 +648,19 @@ def test_prefers_same_provider_class(self): label='StartWith3') +@beam.transforms.ptransform.annotate_yaml +class LinearTransform(beam.PTransform): + """A transform used for testing annotate_yaml.""" + def __init__(self, a, b): + self._a = a + self._b = b + + def expand(self, pcoll): + a = self._a + b = self._b + return pcoll | beam.Map(lambda x: a * x + b) + + if __name__ == '__main__': logging.getLogger().setLevel(logging.INFO) unittest.main() diff --git a/sdks/python/container/base_image_requirements_manual.txt b/sdks/python/container/base_image_requirements_manual.txt index a1d80320d42d8..e952b2126604c 100644 --- a/sdks/python/container/base_image_requirements_manual.txt +++ b/sdks/python/container/base_image_requirements_manual.txt @@ -43,4 +43,3 @@ nose==1.3.7 # For Dataflow internal testing. TODO: remove this. python-snappy;python_version<"3.11" # Optimizes execution of some Beam codepaths. scipy scikit-learn -tensorflow>=2.12.0 diff --git a/sdks/python/container/boot.go b/sdks/python/container/boot.go index 73a2f8324401a..ded10a44204a6 100644 --- a/sdks/python/container/boot.go +++ b/sdks/python/container/boot.go @@ -371,23 +371,18 @@ func setupAcceptableWheelSpecs() error { return fmt.Errorf("cannot get parse Python version from %s", stdoutStderr) } pyVersion := fmt.Sprintf("%s%s", pyVersions[1], pyVersions[2]) - var wheelName string - switch pyVersion { - case "36", "37": - wheelName = fmt.Sprintf("cp%s-cp%sm-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", pyVersion, pyVersion) - default: - wheelName = fmt.Sprintf("cp%s-cp%s-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", pyVersion, pyVersion) - } + wheelName := fmt.Sprintf("cp%s-cp%s-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", pyVersion, pyVersion) acceptableWhlSpecs = append(acceptableWhlSpecs, wheelName) return nil } // installSetupPackages installs Beam SDK and user dependencies. func installSetupPackages(ctx context.Context, logger *tools.Logger, files []string, workDir string, requirementsFiles []string) error { - log.Printf("Installing setup packages ...") + bufLogger := tools.NewBufferedLogger(logger) + bufLogger.Printf(ctx, "Installing setup packages ...") if err := setupAcceptableWheelSpecs(); err != nil { - log.Printf("Failed to setup acceptable wheel specs, leave it as empty: %v", err) + bufLogger.Printf(ctx, "Failed to setup acceptable wheel specs, leave it as empty: %v", err) } pkgName := "apache-beam" diff --git a/sdks/python/container/piputil.go b/sdks/python/container/piputil.go index fec5cf0ab50d0..67488bdc39f78 100644 --- a/sdks/python/container/piputil.go +++ b/sdks/python/container/piputil.go @@ -21,11 +21,11 @@ import ( "context" "errors" "fmt" - "log" "os" "os/exec" "path/filepath" "strings" + "time" "github.com/apache/beam/sdks/v2/go/container/tools" "github.com/apache/beam/sdks/v2/go/pkg/beam/core/runtime/xlangx/expansionx" @@ -47,7 +47,7 @@ func pipInstallRequirements(ctx context.Context, logger *tools.Logger, files []s // used without following their dependencies. args := []string{"-m", "pip", "install", "-q", "-r", filepath.Join(dir, name), "--no-cache-dir", "--disable-pip-version-check", "--no-index", "--no-deps", "--find-links", dir} if err := execx.Execute(pythonVersion, args...); err != nil { - fmt.Println("Some packages could not be installed solely from the requirements cache. Installing packages from PyPI.") + bufLogger.Printf(ctx, "Some packages could not be installed solely from the requirements cache. Installing packages from PyPI.") } // The second install round opens up the search for packages on PyPI and // also installs dependencies. The key is that if all the packages have @@ -77,13 +77,15 @@ func isPackageInstalled(pkgName string) bool { return true } +const pipLogFlushInterval time.Duration = 15 * time.Second + // pipInstallPackage installs the given package, if present. func pipInstallPackage(ctx context.Context, logger *tools.Logger, files []string, dir, name string, force, optional bool, extras []string) error { pythonVersion, err := expansionx.GetPythonVersion() if err != nil { return err } - bufLogger := tools.NewBufferedLogger(logger) + bufLogger := tools.NewBufferedLoggerWithFlushInterval(ctx, logger, pipLogFlushInterval) for _, file := range files { if file == name { var packageSpec = name @@ -146,6 +148,7 @@ func pipInstallPackage(ctx context.Context, logger *tools.Logger, files []string // installExtraPackages installs all the packages declared in the extra // packages manifest file. func installExtraPackages(ctx context.Context, logger *tools.Logger, files []string, extraPackagesFile, dir string) error { + bufLogger := tools.NewBufferedLogger(logger) // First check that extra packages manifest file is present. for _, file := range files { if file != extraPackagesFile { @@ -163,7 +166,7 @@ func installExtraPackages(ctx context.Context, logger *tools.Logger, files []str for s.Scan() { extraPackage := s.Text() - log.Printf("Installing extra package: %s", extraPackage) + bufLogger.Printf(ctx, "Installing extra package: %s", extraPackage) if err = pipInstallPackage(ctx, logger, files, dir, extraPackage, true, false, nil); err != nil { return fmt.Errorf("failed to install extra package %s: %v", extraPackage, err) } @@ -173,12 +176,13 @@ func installExtraPackages(ctx context.Context, logger *tools.Logger, files []str return nil } -func findBeamSdkWhl(files []string, acceptableWhlSpecs []string) string { +func findBeamSdkWhl(ctx context.Context, logger *tools.Logger, files []string, acceptableWhlSpecs []string) string { + bufLogger := tools.NewBufferedLogger(logger) for _, file := range files { if strings.HasPrefix(file, "apache_beam") { for _, s := range acceptableWhlSpecs { if strings.HasSuffix(file, s) { - log.Printf("Found Apache Beam SDK wheel: %v", file) + bufLogger.Printf(ctx, "Found Apache Beam SDK wheel: %v", file) return file } } @@ -193,8 +197,8 @@ func findBeamSdkWhl(files []string, acceptableWhlSpecs []string) string { // file, and we try to install it. If not successful, we fall back to installing // SDK from source tarball provided in sdkSrcFile. func installSdk(ctx context.Context, logger *tools.Logger, files []string, workDir string, sdkSrcFile string, acceptableWhlSpecs []string, required bool) error { - sdkWhlFile := findBeamSdkWhl(files, acceptableWhlSpecs) - + sdkWhlFile := findBeamSdkWhl(ctx, logger, files, acceptableWhlSpecs) + bufLogger := tools.NewBufferedLogger(logger) if sdkWhlFile != "" { // by default, pip rejects to install wheel if same version already installed isDev := strings.Contains(sdkWhlFile, ".dev") @@ -202,7 +206,7 @@ func installSdk(ctx context.Context, logger *tools.Logger, files []string, workD if err == nil { return nil } - log.Printf("Could not install Apache Beam SDK from a wheel: %v, proceeding to install SDK from source tarball.", err) + bufLogger.Printf(ctx, "Could not install Apache Beam SDK from a wheel: %v, proceeding to install SDK from source tarball.", err) } if !required { _, err := os.Stat(filepath.Join(workDir, sdkSrcFile)) diff --git a/sdks/python/container/py310/base_image_requirements.txt b/sdks/python/container/py310/base_image_requirements.txt index 0cae8ca6ef086..58aca4a4aea7c 100644 --- a/sdks/python/container/py310/base_image_requirements.txt +++ b/sdks/python/container/py310/base_image_requirements.txt @@ -21,8 +21,6 @@ # https://s.apache.org/beam-python-dev-wiki # Reach out to a committer if you need help. -absl-py==1.4.0 -astunparse==1.6.3 attrs==23.1.0 beautifulsoup4==4.12.2 bs4==0.0.1 @@ -30,7 +28,7 @@ cachetools==5.3.1 certifi==2023.7.22 cffi==1.15.1 charset-normalizer==3.2.0 -click==8.1.6 +click==8.1.7 cloudpickle==2.2.1 crcmod==1.7 cryptography==41.0.3 @@ -40,74 +38,63 @@ dill==0.3.1.1 dnspython==2.4.2 docker==6.1.3 docopt==0.6.2 -exceptiongroup==1.1.2 +exceptiongroup==1.1.3 execnet==2.0.2 -fastavro==1.8.2 +fastavro==1.8.3 fasteners==0.18 -flatbuffers==23.5.26 freezegun==1.2.2 future==0.18.3 -gast==0.4.0 google-api-core==2.11.1 -google-api-python-client==2.96.0 +google-api-python-client==2.99.0 google-apitools==0.5.31 -google-auth==2.22.0 -google-auth-httplib2==0.1.0 -google-auth-oauthlib==1.0.0 -google-cloud-aiplatform==1.29.0 +google-auth==2.23.0 +google-auth-httplib2==0.1.1 +google-cloud-aiplatform==1.32.0 google-cloud-bigquery==3.11.4 google-cloud-bigquery-storage==2.22.0 google-cloud-bigtable==2.21.0 google-cloud-core==2.3.3 -google-cloud-datastore==2.17.0 +google-cloud-datastore==2.18.0 google-cloud-dlp==3.12.2 -google-cloud-language==2.10.1 -google-cloud-profiler==4.0.0 -google-cloud-pubsub==2.18.2 +google-cloud-language==2.11.0 +google-cloud-profiler==4.1.0 +google-cloud-pubsub==2.18.4 google-cloud-pubsublite==1.8.3 google-cloud-recommendations-ai==0.10.4 google-cloud-resource-manager==1.10.3 -google-cloud-spanner==3.40.0 +google-cloud-spanner==3.40.1 google-cloud-storage==2.10.0 google-cloud-videointelligence==2.11.3 google-cloud-vision==3.4.4 google-crc32c==1.5.0 -google-pasta==0.2.0 -google-resumable-media==2.5.0 +google-resumable-media==2.6.0 googleapis-common-protos==1.60.0 greenlet==2.0.2 grpc-google-iam-v1==0.12.6 -grpcio==1.56.2 -grpcio-status==1.56.2 +grpcio==1.58.0 +grpcio-status==1.58.0 guppy3==3.1.3 -h5py==3.9.0 hdfs==2.7.2 httplib2==0.22.0 -hypothesis==6.82.3 +hypothesis==6.84.3 idna==3.4 iniconfig==2.0.0 joblib==1.3.2 -keras==2.13.1 -libclang==16.0.6 -Markdown==3.4.4 -MarkupSafe==2.1.3 mmh3==4.0.1 mock==5.1.0 nltk==3.8.1 nose==1.3.7 -numpy==1.24.3 +numpy==1.24.4 oauth2client==4.1.3 -oauthlib==3.2.2 objsize==0.6.1 -opt-einsum==3.3.0 -orjson==3.9.2 +orjson==3.9.7 overrides==6.5.0 packaging==23.1 pandas==1.5.3 parameterized==0.9.0 -pluggy==1.2.0 +pluggy==1.3.0 proto-plus==1.22.3 -protobuf==4.23.4 +protobuf==4.24.3 psycopg2-binary==2.9.7 pyarrow==11.0.0 pyasn1==0.5.0 @@ -115,45 +102,36 @@ pyasn1-modules==0.3.0 pycparser==2.21 pydot==1.4.2 PyHamcrest==2.0.4 -pymongo==4.4.1 +pymongo==4.5.0 PyMySQL==1.1.0 pyparsing==3.1.1 -pytest==7.4.0 +pytest==7.4.2 pytest-timeout==2.1.0 pytest-xdist==3.3.1 python-dateutil==2.8.2 python-snappy==0.6.1 -pytz==2023.3 +pytz==2023.3.post1 PyYAML==6.0.1 regex==2023.8.8 requests==2.31.0 requests-mock==1.11.0 -requests-oauthlib==1.3.1 rsa==4.9 scikit-learn==1.3.0 -scipy==1.11.1 +scipy==1.11.2 Shapely==1.8.5.post1 six==1.16.0 sortedcontainers==2.4.0 -soupsieve==2.4.1 +soupsieve==2.5 SQLAlchemy==1.4.49 sqlparse==0.4.4 -tenacity==8.2.2 -tensorboard==2.13.0 -tensorboard-data-server==0.7.1 -tensorflow==2.13.0 -tensorflow-cpu-aws==2.13.0;platform_machine=="aarch64" -tensorflow-estimator==2.13.0 -tensorflow-io-gcs-filesystem==0.33.0 -termcolor==2.3.0 +tenacity==8.2.3 testcontainers==3.7.1 threadpoolctl==3.2.0 tomli==2.0.1 -tqdm==4.66.0 -typing_extensions==4.5.0 +tqdm==4.66.1 +typing_extensions==4.7.1 uritemplate==4.1.1 urllib3==1.26.16 -websocket-client==1.6.1 -Werkzeug==2.3.6 +websocket-client==1.6.3 wrapt==1.15.0 zstandard==0.21.0 diff --git a/sdks/python/container/py311/base_image_requirements.txt b/sdks/python/container/py311/base_image_requirements.txt index 241a9ad581cb6..5aaeba15c69e2 100644 --- a/sdks/python/container/py311/base_image_requirements.txt +++ b/sdks/python/container/py311/base_image_requirements.txt @@ -21,8 +21,6 @@ # https://s.apache.org/beam-python-dev-wiki # Reach out to a committer if you need help. -absl-py==1.4.0 -astunparse==1.6.3 attrs==23.1.0 beautifulsoup4==4.12.2 bs4==0.0.1 @@ -30,7 +28,7 @@ cachetools==5.3.1 certifi==2023.7.22 cffi==1.15.1 charset-normalizer==3.2.0 -click==8.1.6 +click==8.1.7 cloudpickle==2.2.1 crcmod==1.7 cryptography==41.0.3 @@ -41,70 +39,59 @@ dnspython==2.4.2 docker==6.1.3 docopt==0.6.2 execnet==2.0.2 -fastavro==1.8.2 +fastavro==1.8.3 fasteners==0.18 -flatbuffers==23.5.26 freezegun==1.2.2 future==0.18.3 -gast==0.4.0 google-api-core==2.11.1 google-apitools==0.5.31 -google-auth==2.22.0 -google-auth-httplib2==0.1.0 -google-auth-oauthlib==1.0.0 -google-cloud-aiplatform==1.29.0 +google-auth==2.23.0 +google-auth-httplib2==0.1.1 +google-cloud-aiplatform==1.32.0 google-cloud-bigquery==3.11.4 google-cloud-bigquery-storage==2.22.0 google-cloud-bigtable==2.21.0 google-cloud-core==2.3.3 -google-cloud-datastore==2.17.0 +google-cloud-datastore==2.18.0 google-cloud-dlp==3.12.2 -google-cloud-language==2.10.1 -google-cloud-pubsub==2.18.2 +google-cloud-language==2.11.0 +google-cloud-pubsub==2.18.4 google-cloud-pubsublite==1.8.3 google-cloud-recommendations-ai==0.10.4 google-cloud-resource-manager==1.10.3 -google-cloud-spanner==3.40.0 +google-cloud-spanner==3.40.1 google-cloud-storage==2.10.0 google-cloud-videointelligence==2.11.3 google-cloud-vision==3.4.4 google-crc32c==1.5.0 -google-pasta==0.2.0 -google-resumable-media==2.5.0 +google-resumable-media==2.6.0 googleapis-common-protos==1.60.0 greenlet==2.0.2 grpc-google-iam-v1==0.12.6 -grpcio==1.56.2 -grpcio-status==1.56.2 +grpcio==1.58.0 +grpcio-status==1.58.0 guppy3==3.1.3 -h5py==3.9.0 hdfs==2.7.2 httplib2==0.22.0 -hypothesis==6.82.3 +hypothesis==6.84.3 idna==3.4 iniconfig==2.0.0 joblib==1.3.2 -keras==2.13.1 -libclang==16.0.6 -Markdown==3.4.4 -MarkupSafe==2.1.3 mmh3==4.0.1 mock==5.1.0 nltk==3.8.1 nose==1.3.7 -numpy==1.24.3 +numpy==1.24.4 oauth2client==4.1.3 -oauthlib==3.2.2 objsize==0.6.1 -opt-einsum==3.3.0 -orjson==3.9.2 +orjson==3.9.7 overrides==6.5.0 packaging==23.1 pandas==1.5.3 parameterized==0.9.0 -pluggy==1.2.0 +pluggy==1.3.0 proto-plus==1.22.3 -protobuf==4.23.4 +protobuf==4.24.3 psycopg2-binary==2.9.7 pyarrow==11.0.0 pyasn1==0.5.0 @@ -112,42 +99,33 @@ pyasn1-modules==0.3.0 pycparser==2.21 pydot==1.4.2 PyHamcrest==2.0.4 -pymongo==4.4.1 +pymongo==4.5.0 PyMySQL==1.1.0 pyparsing==3.1.1 -pytest==7.4.0 +pytest==7.4.2 pytest-timeout==2.1.0 pytest-xdist==3.3.1 python-dateutil==2.8.2 -pytz==2023.3 +pytz==2023.3.post1 PyYAML==6.0.1 regex==2023.8.8 requests==2.31.0 requests-mock==1.11.0 -requests-oauthlib==1.3.1 rsa==4.9 scikit-learn==1.3.0 -scipy==1.11.1 +scipy==1.11.2 Shapely==1.8.5.post1 six==1.16.0 sortedcontainers==2.4.0 -soupsieve==2.4.1 +soupsieve==2.5 SQLAlchemy==1.4.49 sqlparse==0.4.4 -tenacity==8.2.2 -tensorboard==2.13.0 -tensorboard-data-server==0.7.1 -tensorflow==2.13.0 -tensorflow-cpu-aws==2.13.0;platform_machine=="aarch64" -tensorflow-estimator==2.13.0 -tensorflow-io-gcs-filesystem==0.33.0 -termcolor==2.3.0 +tenacity==8.2.3 testcontainers==3.7.1 threadpoolctl==3.2.0 -tqdm==4.66.0 -typing_extensions==4.5.0 +tqdm==4.66.1 +typing_extensions==4.7.1 urllib3==1.26.16 -websocket-client==1.6.1 -Werkzeug==2.3.6 +websocket-client==1.6.3 wrapt==1.15.0 zstandard==0.21.0 diff --git a/sdks/python/container/py38/base_image_requirements.txt b/sdks/python/container/py38/base_image_requirements.txt index 96caec61c09a6..472ee0c0bf8d0 100644 --- a/sdks/python/container/py38/base_image_requirements.txt +++ b/sdks/python/container/py38/base_image_requirements.txt @@ -21,8 +21,6 @@ # https://s.apache.org/beam-python-dev-wiki # Reach out to a committer if you need help. -absl-py==1.4.0 -astunparse==1.6.3 attrs==23.1.0 beautifulsoup4==4.12.2 bs4==0.0.1 @@ -30,7 +28,7 @@ cachetools==5.3.1 certifi==2023.7.22 cffi==1.15.1 charset-normalizer==3.2.0 -click==8.1.6 +click==8.1.7 cloudpickle==2.2.1 crcmod==1.7 cryptography==41.0.3 @@ -40,75 +38,63 @@ dill==0.3.1.1 dnspython==2.4.2 docker==6.1.3 docopt==0.6.2 -exceptiongroup==1.1.2 +exceptiongroup==1.1.3 execnet==2.0.2 -fastavro==1.8.2 +fastavro==1.8.3 fasteners==0.18 -flatbuffers==23.5.26 freezegun==1.2.2 future==0.18.3 -gast==0.4.0 google-api-core==2.11.1 -google-api-python-client==2.96.0 +google-api-python-client==2.99.0 google-apitools==0.5.31 -google-auth==2.22.0 -google-auth-httplib2==0.1.0 -google-auth-oauthlib==1.0.0 -google-cloud-aiplatform==1.29.0 +google-auth==2.23.0 +google-auth-httplib2==0.1.1 +google-cloud-aiplatform==1.32.0 google-cloud-bigquery==3.11.4 google-cloud-bigquery-storage==2.22.0 google-cloud-bigtable==2.21.0 google-cloud-core==2.3.3 -google-cloud-datastore==2.17.0 +google-cloud-datastore==2.18.0 google-cloud-dlp==3.12.2 -google-cloud-language==2.10.1 -google-cloud-profiler==4.0.0 -google-cloud-pubsub==2.18.2 +google-cloud-language==2.11.0 +google-cloud-profiler==4.1.0 +google-cloud-pubsub==2.18.4 google-cloud-pubsublite==1.8.3 google-cloud-recommendations-ai==0.10.4 google-cloud-resource-manager==1.10.3 -google-cloud-spanner==3.40.0 +google-cloud-spanner==3.40.1 google-cloud-storage==2.10.0 google-cloud-videointelligence==2.11.3 google-cloud-vision==3.4.4 google-crc32c==1.5.0 -google-pasta==0.2.0 -google-resumable-media==2.5.0 +google-resumable-media==2.6.0 googleapis-common-protos==1.60.0 greenlet==2.0.2 grpc-google-iam-v1==0.12.6 -grpcio==1.56.2 -grpcio-status==1.56.2 +grpcio==1.58.0 +grpcio-status==1.58.0 guppy3==3.1.3 -h5py==3.9.0 hdfs==2.7.2 httplib2==0.22.0 -hypothesis==6.82.3 +hypothesis==6.84.3 idna==3.4 -importlib-metadata==6.8.0 iniconfig==2.0.0 joblib==1.3.2 -keras==2.13.1 -libclang==16.0.6 -Markdown==3.4.4 -MarkupSafe==2.1.3 mmh3==4.0.1 mock==5.1.0 nltk==3.8.1 nose==1.3.7 -numpy==1.24.3 +numpy==1.24.4 oauth2client==4.1.3 -oauthlib==3.2.2 objsize==0.6.1 -opt-einsum==3.3.0 -orjson==3.9.2 +orjson==3.9.7 overrides==6.5.0 packaging==23.1 pandas==1.5.3 parameterized==0.9.0 -pluggy==1.2.0 +pluggy==1.3.0 proto-plus==1.22.3 -protobuf==4.23.4 +protobuf==4.24.3 psycopg2-binary==2.9.7 pyarrow==11.0.0 pyasn1==0.5.0 @@ -116,46 +102,36 @@ pyasn1-modules==0.3.0 pycparser==2.21 pydot==1.4.2 PyHamcrest==2.0.4 -pymongo==4.4.1 +pymongo==4.5.0 PyMySQL==1.1.0 pyparsing==3.1.1 -pytest==7.4.0 +pytest==7.4.2 pytest-timeout==2.1.0 pytest-xdist==3.3.1 python-dateutil==2.8.2 python-snappy==0.6.1 -pytz==2023.3 +pytz==2023.3.post1 PyYAML==6.0.1 regex==2023.8.8 requests==2.31.0 requests-mock==1.11.0 -requests-oauthlib==1.3.1 rsa==4.9 scikit-learn==1.3.0 scipy==1.10.1 Shapely==1.8.5.post1 six==1.16.0 sortedcontainers==2.4.0 -soupsieve==2.4.1 +soupsieve==2.5 SQLAlchemy==1.4.49 sqlparse==0.4.4 -tenacity==8.2.2 -tensorboard==2.13.0 -tensorboard-data-server==0.7.1 -tensorflow==2.13.0 -tensorflow-cpu-aws==2.13.0;platform_machine=="aarch64" -tensorflow-estimator==2.13.0 -tensorflow-io-gcs-filesystem==0.33.0 -termcolor==2.3.0 +tenacity==8.2.3 testcontainers==3.7.1 threadpoolctl==3.2.0 tomli==2.0.1 -tqdm==4.66.0 -typing_extensions==4.5.0 +tqdm==4.66.1 +typing_extensions==4.7.1 uritemplate==4.1.1 urllib3==1.26.16 -websocket-client==1.6.1 -Werkzeug==2.3.6 +websocket-client==1.6.3 wrapt==1.15.0 -zipp==3.16.2 zstandard==0.21.0 diff --git a/sdks/python/container/py39/base_image_requirements.txt b/sdks/python/container/py39/base_image_requirements.txt index 417b82fbb29b7..257bcf9869e29 100644 --- a/sdks/python/container/py39/base_image_requirements.txt +++ b/sdks/python/container/py39/base_image_requirements.txt @@ -21,8 +21,6 @@ # https://s.apache.org/beam-python-dev-wiki # Reach out to a committer if you need help. -absl-py==1.4.0 -astunparse==1.6.3 attrs==23.1.0 beautifulsoup4==4.12.2 bs4==0.0.1 @@ -30,7 +28,7 @@ cachetools==5.3.1 certifi==2023.7.22 cffi==1.15.1 charset-normalizer==3.2.0 -click==8.1.6 +click==8.1.7 cloudpickle==2.2.1 crcmod==1.7 cryptography==41.0.3 @@ -40,75 +38,63 @@ dill==0.3.1.1 dnspython==2.4.2 docker==6.1.3 docopt==0.6.2 -exceptiongroup==1.1.2 +exceptiongroup==1.1.3 execnet==2.0.2 -fastavro==1.8.2 +fastavro==1.8.3 fasteners==0.18 -flatbuffers==23.5.26 freezegun==1.2.2 future==0.18.3 -gast==0.4.0 google-api-core==2.11.1 -google-api-python-client==2.96.0 +google-api-python-client==2.99.0 google-apitools==0.5.31 -google-auth==2.22.0 -google-auth-httplib2==0.1.0 -google-auth-oauthlib==1.0.0 -google-cloud-aiplatform==1.29.0 +google-auth==2.23.0 +google-auth-httplib2==0.1.1 +google-cloud-aiplatform==1.32.0 google-cloud-bigquery==3.11.4 google-cloud-bigquery-storage==2.22.0 google-cloud-bigtable==2.21.0 google-cloud-core==2.3.3 -google-cloud-datastore==2.17.0 +google-cloud-datastore==2.18.0 google-cloud-dlp==3.12.2 -google-cloud-language==2.10.1 -google-cloud-profiler==4.0.0 -google-cloud-pubsub==2.18.2 +google-cloud-language==2.11.0 +google-cloud-profiler==4.1.0 +google-cloud-pubsub==2.18.4 google-cloud-pubsublite==1.8.3 google-cloud-recommendations-ai==0.10.4 google-cloud-resource-manager==1.10.3 -google-cloud-spanner==3.40.0 +google-cloud-spanner==3.40.1 google-cloud-storage==2.10.0 google-cloud-videointelligence==2.11.3 google-cloud-vision==3.4.4 google-crc32c==1.5.0 -google-pasta==0.2.0 -google-resumable-media==2.5.0 +google-resumable-media==2.6.0 googleapis-common-protos==1.60.0 greenlet==2.0.2 grpc-google-iam-v1==0.12.6 -grpcio==1.56.2 -grpcio-status==1.56.2 +grpcio==1.58.0 +grpcio-status==1.58.0 guppy3==3.1.3 -h5py==3.9.0 hdfs==2.7.2 httplib2==0.22.0 -hypothesis==6.82.3 +hypothesis==6.84.3 idna==3.4 -importlib-metadata==6.8.0 iniconfig==2.0.0 joblib==1.3.2 -keras==2.13.1 -libclang==16.0.6 -Markdown==3.4.4 -MarkupSafe==2.1.3 mmh3==4.0.1 mock==5.1.0 nltk==3.8.1 nose==1.3.7 -numpy==1.24.3 +numpy==1.24.4 oauth2client==4.1.3 -oauthlib==3.2.2 objsize==0.6.1 -opt-einsum==3.3.0 -orjson==3.9.2 +orjson==3.9.7 overrides==6.5.0 packaging==23.1 pandas==1.5.3 parameterized==0.9.0 -pluggy==1.2.0 +pluggy==1.3.0 proto-plus==1.22.3 -protobuf==4.23.4 +protobuf==4.24.3 psycopg2-binary==2.9.7 pyarrow==11.0.0 pyasn1==0.5.0 @@ -116,46 +102,36 @@ pyasn1-modules==0.3.0 pycparser==2.21 pydot==1.4.2 PyHamcrest==2.0.4 -pymongo==4.4.1 +pymongo==4.5.0 PyMySQL==1.1.0 pyparsing==3.1.1 -pytest==7.4.0 +pytest==7.4.2 pytest-timeout==2.1.0 pytest-xdist==3.3.1 python-dateutil==2.8.2 python-snappy==0.6.1 -pytz==2023.3 +pytz==2023.3.post1 PyYAML==6.0.1 regex==2023.8.8 requests==2.31.0 requests-mock==1.11.0 -requests-oauthlib==1.3.1 rsa==4.9 scikit-learn==1.3.0 -scipy==1.11.1 +scipy==1.11.2 Shapely==1.8.5.post1 six==1.16.0 sortedcontainers==2.4.0 -soupsieve==2.4.1 +soupsieve==2.5 SQLAlchemy==1.4.49 sqlparse==0.4.4 -tenacity==8.2.2 -tensorboard==2.13.0 -tensorboard-data-server==0.7.1 -tensorflow==2.13.0 -tensorflow-cpu-aws==2.13.0;platform_machine=="aarch64" -tensorflow-estimator==2.13.0 -tensorflow-io-gcs-filesystem==0.33.0 -termcolor==2.3.0 +tenacity==8.2.3 testcontainers==3.7.1 threadpoolctl==3.2.0 tomli==2.0.1 -tqdm==4.66.0 -typing_extensions==4.5.0 +tqdm==4.66.1 +typing_extensions==4.7.1 uritemplate==4.1.1 urllib3==1.26.16 -websocket-client==1.6.1 -Werkzeug==2.3.6 +websocket-client==1.6.3 wrapt==1.15.0 -zipp==3.16.2 zstandard==0.21.0 diff --git a/sdks/python/setup.py b/sdks/python/setup.py index 355b75ee90235..d5ca354fcfbe2 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -230,7 +230,7 @@ def get_portability_package_data(): language_level=3), install_requires=[ 'crcmod>=1.7,<2.0', - 'orjson<3.9.3', # https://github.com/ijl/orjson/issues/415 + 'orjson>=3.9.7,<4', # Dill doesn't have forwards-compatibility guarantees within minor # version. Pickles created with a new version of dill may not unpickle # using older version of dill. It is best to use the same version of @@ -254,12 +254,17 @@ def get_portability_package_data(): 'packaging>=22.0', 'pymongo>=3.8.0,<5.0.0', 'proto-plus>=1.7.1,<2', - # use a tighter upper bound in protobuf dependency - # to make sure the minor version at job submission + # 1. Use a tighter upper bound in protobuf dependency to make sure + # the minor version at job submission # does not exceed the minor version at runtime. # To avoid depending on an old dependency, update the minor version on # every Beam release, see: https://github.com/apache/beam/issues/25590 - 'protobuf>=3.20.3,<4.24.0', + + # 2. Allow latest protobuf 3 version as a courtesy to some customers. + # + # 3. Exclude protobuf 4 versions that leak memory, see: + # https://github.com/apache/beam/issues/28246 + 'protobuf>=3.20.3,<4.25.0,!=4.0.*,!=4.21.*,!=4.22.0,!=4.23.*,!=4.24.0,!=4.24.1,!=4.24.2', # pylint: disable=line-too-long 'pydot>=1.2.0,<2', 'python-dateutil>=2.8.0,<3', 'pytz>=2018.3', diff --git a/website/www/site/content/en/blog/beam-2.50.0.md b/website/www/site/content/en/blog/beam-2.50.0.md index 7610459087c53..4cfddd6167a68 100644 --- a/website/www/site/content/en/blog/beam-2.50.0.md +++ b/website/www/site/content/en/blog/beam-2.50.0.md @@ -77,6 +77,7 @@ For more information on changes in 2.50.0, check out the [detailed release notes * Fixed DirectRunner bug in Python SDK where GroupByKey gets empty PCollection and fails when pipeline option `direct_num_workers!=1`.([#27373](https://github.com/apache/beam/pull/27373)) * Fixed BigQuery I/O bug when estimating size on queries that utilize row-level security ([#27474](https://github.com/apache/beam/pull/27474)) +* Beam Python containers rely on a version of Debian/aom that has several security vulnerabilities: [CVE-2021-30474](https://nvd.nist.gov/vuln/detail/CVE-2021-30474), [CVE-2021-30475](https://nvd.nist.gov/vuln/detail/CVE-2021-30475), [CVE-2021-30473](https://nvd.nist.gov/vuln/detail/CVE-2021-30473), [CVE-2020-36133](https://nvd.nist.gov/vuln/detail/CVE-2020-36133), [CVE-2020-36131](https://nvd.nist.gov/vuln/detail/CVE-2020-36131), [CVE-2020-36130](https://nvd.nist.gov/vuln/detail/CVE-2020-36130), and [CVE-2020-36135](https://nvd.nist.gov/vuln/detail/CVE-2020-36135). ## Known Issues diff --git a/website/www/site/content/en/contribute/release-guide.md b/website/www/site/content/en/contribute/release-guide.md index 19e022b65b7f6..964d2bfa7050e 100644 --- a/website/www/site/content/en/contribute/release-guide.md +++ b/website/www/site/content/en/contribute/release-guide.md @@ -1301,15 +1301,15 @@ After new Beam Release is published, Beam Playground can be updated following th 1. Change the value for _SDK_TAG variable (Advanced -> Substitution Variables) to the actual version of Beam SDK (e.g. 2.47.0) 1. Click the Save button. The settings window should close without any errors 1. Click the RUN button next to the trigger name - 1. Set the value for the _CONTAINER_TAG variable in format DD-MM-vXX (DD - day, MM - month, XX - version, e.g., 20-12-v01) + 1. In the panel that opened, set the value for the _CONTAINER_TAG variable in format DD-MM-vXX (DD - day, MM - month, XX - version, e.g., 20-12-v01) 1. Click the Run Trigger button 1. Open the [Trigger History](https://console.cloud.google.com/cloud-build/builds?project=apache-beam-testing) and wait for the job completion. Ensure that the job completed successfully (Status field shows a green tick) -1. Find the trigger "Playground-CD-stable-manual-stg": +1. Find the trigger "Playground-CD-stable-manual-stg", it will be run twice, once with default variables, and once with some overridden: 1. Click the RUN button next to the trigger name - 1. Click the Run Trigger button (with default varaible vaues) + 1. In the panel that opened, click the Run Trigger button (with default variable values) 1. Open the [Trigger History](https://console.cloud.google.com/cloud-build/builds?project=apache-beam-testing) and wait for the job completion. Ensure that the job completed successfully (Status field shows a green tick) 1. Click the RUN button next to the trigger name - 1. Change values for the variables: + 1. In the panel that opened, change values for the variables: * _ORIGIN = PG_BEAMDOC * _SUBDIRS = ./learning/beamdoc 1. Click the Run Trigger button From 8f880c0f3b35c9020cf0deb69650192902534cda Mon Sep 17 00:00:00 2001 From: Andrey Devyatkin Date: Thu, 14 Sep 2023 19:44:09 +0200 Subject: [PATCH 2/3] merge master into beam_PostCommit_Java_ValidatesRunner branch --- .github/workflows/README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/README.md b/.github/workflows/README.md index e219660d753fd..02a87c5fdbbf0 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -191,7 +191,6 @@ Please note that jobs with matrix need to have matrix element in the comment. Ex | [ PostCommit Java Jpms Direct Java17 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Direct_Java17.yml) | N/A |`Run Jpms Direct Java 17 PostCommit`| [![.github/workflows/beam_PostCommit_Java_Jpms_Direct_Java17](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Direct_Java17.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Direct_Java17.yml) | | [ PostCommit Java Jpms Flink Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Flink_Java11.yml) | N/A |`Run Jpms Flink Java 11 PostCommit`| [![.github/workflows/beam_PostCommit_Java_Jpms_Flink_Java11](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Flink_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Flink_Java11.yml) | | [ PostCommit Java Jpms Spark Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Spark_Java11.yml) | N/A |`Run Jpms Spark Java 11 PostCommit`| [![.github/workflows/beam_PostCommit_Java_Jpms_Spark_Java11](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Spark_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Spark_Java11.yml) | -<<<<<<< HEAD | [ PostCommit Java Sickbay ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Sickbay.yml) | N/A |`Run Java Sickbay`| [![PostCommit Java Sickbay](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Sickbay.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Sickbay.yml) | | [ PostCommit Java ValidatesRunner Dataflow JavaVersions ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Java.yml) | ['11','17'] |`Run Dataflow ValidatesRunner Java (matrix_element)`| [![PostCommit Java ValidatesRunner Dataflow JavaVersions](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Java.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Java.yml) | | [ PostCommit Java ValidatesRunner Dataflow Streaming ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming.yml) | N/A |`Run Dataflow Streaming ValidatesRunner`| [![PostCommit Java ValidatesRunner Dataflow Streaming](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming.yml) | From 9a72dc2220fefb027c4a010d5a952a7aad31962c Mon Sep 17 00:00:00 2001 From: Andrey Devyatkin Date: Thu, 14 Sep 2023 20:04:56 +0200 Subject: [PATCH 3/3] updated README file --- .github/workflows/README.md | 72 ++++++++++++++++++------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/.github/workflows/README.md b/.github/workflows/README.md index 02a87c5fdbbf0..bfb49a14bb3e7 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -171,48 +171,48 @@ Please note that jobs with matrix need to have matrix element in the comment. Ex ```Run Python PreCommit (3.8)``` | Workflow name | Matrix | Trigger Phrase | Cron Status | |:-------------:|:------:|:--------------:|:-----------:| -| [ PostCommit BeamMetrics Publish ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_BeamMetrics_Publish.yml) | N/A |`Run Beam Metrics Deployment`| [![.github/workflows/beam_PostCommit_BeamMetrics_Publish](https://github.com/apache/beam/actions/workflows/beam_PostCommit_BeamMetrics_Publish.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_Dataflow_ARM.yml) -| [ PostCommit TransformService Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_TransformService_Direct.yml) | N/A |`Run TransformService_Direct PostCommit`| [![.github/workflows/beam_PostCommit_TransformService_Direct](https://github.com/apache/beam/actions/workflows/beam_PostCommit_TransformService_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_TransformService_Direct.yml) -| [ PostCommit Go ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go.yml) | N/A |`Run Go PostCommit`| [![.github/workflows/beam_PostCommit_Go](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go.yml) | -| [ PostCommit Go Dataflow ARM](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_Dataflow_ARM.yml) | N/A |`Run Go PostCommit Dataflow ARM`| [![.github/workflows/beam_PostCommit_Go_Dataflow_ARM](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_Dataflow_ARM.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_Dataflow_ARM.yml) | -| [ PostCommit Go VR Flink](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Flink.yml) | N/A |`Run Go Flink ValidatesRunner`| [![.github/workflows/beam_PostCommit_Go_VR_Flink](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Flink.yml) | -| [ PostCommit Go VR Samza](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Samza.yml) | N/A |`Run Go Samza ValidatesRunner`| [![.github/workflows/beam_PostCommit_Go_VR_Samza](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Samza.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Samza.yml) | -| [ PostCommit Go VR Spark](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Spark.yml) | N/A |`Run Go Spark ValidatesRunner`| [![.github/workflows/beam_PostCommit_Go_VR_Spark](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Spark.yml) | -| [ PostCommit Java ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java.yml) | N/A |`Run Java PostCommit`| [![PostCommit Java](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java.yml) | -| [ PostCommit Java Avro Versions ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Avro_Versions.yml) | N/A |`Run Java Avro Versions PostCommit`| [![PostCommit Java Avro Versions](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Avro_Versions.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Avro_Versions.yml) | -| [ PostCommit Java Dataflow V1 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV1.yml) | N/A |`Run PostCommit_Java_Dataflow`| [![PostCommit Java Dataflow V1](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV1.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV1.yml) | -| [ PostCommit Java Dataflow V2 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV2.yml) | N/A |`Run PostCommit_Java_DataflowV2`| [![PostCommit Java Dataflow V2](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV2.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV2.yml) | -| [ PostCommit Java Examples Dataflow ARM ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml) | N/A |`Run Java_Examples_Dataflow_ARM PostCommit`| [![PostCommit Java Examples Dataflow ARM](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml) | -| [ PostCommit Java Examples Dataflow Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml) | N/A |`Run Java examples on Dataflow Java 11`| [![PostCommit Java Examples Dataflow Java11](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml) | -| [ PostCommit Java Examples Dataflow Java17 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java17.yml) | N/A |`Run Java examples on Dataflow Java 17`| [![PostCommit Java Examples Dataflow Java17](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java17.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java17.yml) | +| [ PostCommit BeamMetrics Publish ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_BeamMetrics_Publish.yml) | N/A |`Run Beam Metrics Deployment`| [![.github/workflows/beam_PostCommit_BeamMetrics_Publish.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_BeamMetrics_Publish.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_BeamMetrics_Publish.yml) +| [ PostCommit Go ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go.yml) | N/A |`Run Go PostCommit`| [![.github/workflows/beam_PostCommit_Go.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go.yml) | +| [ PostCommit Go Dataflow ARM](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_Dataflow_ARM.yml) | N/A |`Run Go PostCommit Dataflow ARM`| [![.github/workflows/beam_PostCommit_Go_Dataflow_ARM.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_Dataflow_ARM.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_Dataflow_ARM.yml) |[label](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_Dataflow_ARM.yml) +| [ PostCommit Go VR Flink](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Flink.yml) | N/A |`Run Go Flink ValidatesRunner`| [![.github/workflows/beam_PostCommit_Go_VR_Flink.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Flink.yml) | +| [ PostCommit Go VR Samza](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Samza.yml) | N/A |`Run Go Samza ValidatesRunner`| [![.github/workflows/beam_PostCommit_Go_VR_Samza.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Samza.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Samza.yml) | +| [ PostCommit Go VR Spark](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Spark.yml) | N/A |`Run Go Spark ValidatesRunner`| [![.github/workflows/beam_PostCommit_Go_VR_Spark.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Spark.yml) | +| [ PostCommit Java ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java.yml) | N/A |`Run Java PostCommit`| [![.github/workflows/beam_PostCommit_Java.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java.yml) | +| [ PostCommit Java Avro Versions ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Avro_Versions.yml) | N/A |`Run Java Avro Versions PostCommit`| [![.github/workflows/beam_PostCommit_Java_Avro_Versions.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Avro_Versions.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Avro_Versions.yml) | +| [ PostCommit Java Dataflow V1 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV1.yml) | N/A |`Run PostCommit_Java_Dataflow`| [![.github/workflows/beam_PostCommit_Java_DataflowV1.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV1.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV1.yml) | +| [ PostCommit Java Dataflow V2 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV2.yml) | N/A |`Run PostCommit_Java_DataflowV2`| [![.github/workflows/beam_PostCommit_Java_DataflowV2.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV2.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV2.yml) | +| [ PostCommit Java Examples Dataflow ARM ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml) | N/A |`Run Java_Examples_Dataflow_ARM PostCommit`| [![.github/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml) | +| [ PostCommit Java Examples Dataflow Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml) | N/A |`Run Java examples on Dataflow Java 11`| [![.github/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml) | +| [ PostCommit Java Examples Dataflow Java17 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java17.yml) | N/A |`Run Java examples on Dataflow Java 17`| [![.github/workflows/beam_PostCommit_Java_Examples_Dataflow_Java17.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java17.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java17.yml) | | [ PostCommit Java Jpms Dataflow Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java11.yml) | N/A |`Run Jpms Dataflow Java 11 PostCommit`| [![.github/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java11](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java11.yml) | | [ PostCommit Java Jpms Dataflow Java17 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java17.yml) | N/A |`Run Jpms Dataflow Java 17 PostCommit`| [![.github/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java17](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java17.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java17.yml) | | [ PostCommit Java Jpms Direct Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Direct_Java11.yml) | N/A |`Run Jpms Direct Java 11 PostCommit`| [![.github/workflows/beam_PostCommit_Java_Jpms_Direct_Java11](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Direct_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Direct_Java11.yml) | | [ PostCommit Java Jpms Direct Java17 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Direct_Java17.yml) | N/A |`Run Jpms Direct Java 17 PostCommit`| [![.github/workflows/beam_PostCommit_Java_Jpms_Direct_Java17](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Direct_Java17.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Direct_Java17.yml) | | [ PostCommit Java Jpms Flink Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Flink_Java11.yml) | N/A |`Run Jpms Flink Java 11 PostCommit`| [![.github/workflows/beam_PostCommit_Java_Jpms_Flink_Java11](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Flink_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Flink_Java11.yml) | | [ PostCommit Java Jpms Spark Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Spark_Java11.yml) | N/A |`Run Jpms Spark Java 11 PostCommit`| [![.github/workflows/beam_PostCommit_Java_Jpms_Spark_Java11](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Spark_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Spark_Java11.yml) | -| [ PostCommit Java Sickbay ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Sickbay.yml) | N/A |`Run Java Sickbay`| [![PostCommit Java Sickbay](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Sickbay.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Sickbay.yml) | -| [ PostCommit Java ValidatesRunner Dataflow JavaVersions ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Java.yml) | ['11','17'] |`Run Dataflow ValidatesRunner Java (matrix_element)`| [![PostCommit Java ValidatesRunner Dataflow JavaVersions](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Java.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Java.yml) | -| [ PostCommit Java ValidatesRunner Dataflow Streaming ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming.yml) | N/A |`Run Dataflow Streaming ValidatesRunner`| [![PostCommit Java ValidatesRunner Dataflow Streaming](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming.yml) | -| [ PostCommit Java ValidatesRunner Dataflow V2 Streaming ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2_Streaming.yml) | N/A |`Run Java Dataflow V2 ValidatesRunner Streaming`| [![PostCommit Java ValidatesRunner Dataflow V2 Streaming](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2_Streaming.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2_Streaming.yml) | -| [ PostCommit Java ValidatesRunner Dataflow V2 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2.yml) | N/A |`Run Java Dataflow V2 ValidatesRunner`| [![PostCommit Java ValidatesRunner Dataflow V2](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2.yml) | -| [ PostCommit Java ValidatesRunner Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow.yml) | N/A |`Run Dataflow ValidatesRunner`| [![PostCommit Java ValidatesRunner Dataflow](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow.yml) | -| [ PostCommit Java ValidatesRunner Direct JavaVersions ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct_Java.yml) | ['11','17'] |`Run Direct ValidatesRunner Java (matrix_element)`| [![PostCommit Java ValidatesRunner Direct JavaVersions](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct_Java.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct_Java.yml) | -| [ PostCommit Java ValidatesRunner Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct.yml) | N/A |`Run Direct ValidatesRunner`| [![PostCommit Java ValidatesRunner Direct](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct.yml) | -| [ PostCommit Java ValidatesRunner Flink Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink_Java11.yml) | N/A |`Run Flink ValidatesRunner Java 11`| [![PostCommit Java ValidatesRunner Flink Java11](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink_Java11.yml) | -| [ PostCommit Java ValidatesRunner Flink ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink.yml) | N/A |`Run Flink ValidatesRunner`| [![PostCommit Java ValidatesRunner Flink](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink.yml) | -| [ PostCommit Java ValidatesRunner Samza ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Samza.yml) | N/A |`Run Samza ValidatesRunner`| [![PostCommit Java ValidatesRunner Samza](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Samza.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Samza.yml) | -| [ PostCommit Java ValidatesRunner Spark Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark_Java11.yml) | N/A |`Run Spark ValidatesRunner Java 11`| [![PostCommit Java ValidatesRunner Spark Java11](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark_Java11.yml) | -| [ PostCommit Java ValidatesRunner Spark ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark.yml) | N/A |`Run Spark ValidatesRunner`| [![PostCommit Java ValidatesRunner Spark](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark.yml) | -| [ PostCommit Java ValidatesRunner SparkStructuredStreaming ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml) | N/A |`Run Spark StructuredStreaming ValidatesRunner`| [![PostCommit Java ValidatesRunner SparkStructuredStreaming](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml) | -| [ PostCommit Java ValidatesRunner Twister2 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Twister2.yml) | N/A |`Run Twister2 ValidatesRunner`| [![PostCommit Java ValidatesRunner Twister2](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Twister2.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Twister2.yml) | -| [ PostCommit Java ValidatesRunner ULR ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_ULR.yml) | N/A |`Run ULR Loopback ValidatesRunner`| [![PostCommit Java ValidatesRunner ULR](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_ULR.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_ULR.yml) | -| [ PostCommit Python Examples Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Dataflow.yml) | N/A |`Run Python Examples_Dataflow`| [![PostCommit Python Examples Dataflow](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Dataflow.yml) | -| [ PostCommit Python Examples Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Direct.yml) | ['3.8','3.9','3.10','3.11'] |`Run Python Examples_Direct (matrix_element)`| [![PostCommit Python Examples Direct](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Direct.yml) | -| [ PostCommit Python Examples Flink ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Flink.yml) | ['3.8','3.11'] |`Run Python Examples_Flink (matrix_element)`| [![PostCommit Python Examples Flink](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Flink.yml) | -| [ PostCommit Python Examples Spark ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Spark.yml) | ['3.8','3.11'] |`Run Python Examples_Spark (matrix_element)`| [![PostCommit Python Examples Spark](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Spark.yml) | -| [ PostCommit Sickbay Python ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Sickbay_Python.yml) | ['3.8','3.9','3.10','3.11'] |`Run Python PostCommit Sickbay tests (matrix_element)Upda`| [![PostCommit Sickbay Python ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Sickbay_Python.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Sickbay_Python.yml) | -| [ PostCommit Website Publish ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Website_Publish.yml) | N/A | N/A | [![PostCommit Website Publish](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Website_Publish.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Website_Publish.yml) | +| [ PostCommit Java Sickbay ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Sickbay.yml) | N/A |`Run Java Sickbay`| [![.github/workflows/beam_PostCommit_Java_Sickbay.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Sickbay.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Sickbay.yml) | +| [ PostCommit Java ValidatesRunner Dataflow JavaVersions ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Java.yml) | ['11','17'] |`Run Dataflow ValidatesRunner Java (matrix_element)`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Java.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Java.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Java.yml) | +| [ PostCommit Java ValidatesRunner Dataflow Streaming ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming.yml) | N/A |`Run Dataflow Streaming ValidatesRunner`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming.yml) | +| [ PostCommit Java ValidatesRunner Dataflow V2 Streaming ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2_Streaming.yml) | N/A |`Run Java Dataflow V2 ValidatesRunner Streaming`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2_Streaming.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2_Streaming.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2_Streaming.yml) | +| [ PostCommit Java ValidatesRunner Dataflow V2 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2.yml) | N/A |`Run Java Dataflow V2 ValidatesRunner`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2.yml) | +| [.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow.yml) | N/A |`Run Dataflow ValidatesRunner`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow.yml) | +| [ PostCommit Java ValidatesRunner Direct JavaVersions ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct_Java.yml) | ['11','17'] |`Run Direct ValidatesRunner Java (matrix_element)`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Direct_Java.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct_Java.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct_Java.yml) | +| [ PostCommit Java ValidatesRunner Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct.yml) | N/A |`Run Direct ValidatesRunner`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct.yml) | +| [ PostCommit Java ValidatesRunner Flink Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink_Java11.yml) | N/A |`Run Flink ValidatesRunner Java 11`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Flink_Java11.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink_Java11.yml) | +| [ PostCommit Java ValidatesRunner Flink ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink.yml) | N/A |`Run Flink ValidatesRunner`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Flink.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink.yml) | +| [ PostCommit Java ValidatesRunner Samza ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Samza.yml) | N/A |`Run Samza ValidatesRunner`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Samza.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Samza.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Samza.yml) | +| [ PostCommit Java ValidatesRunner Spark Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark_Java11.yml) | N/A |`Run Spark ValidatesRunner Java 11`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Spark_Java11.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark_Java11.yml) | +| [ PostCommit Java ValidatesRunner Spark ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark.yml) | N/A |`Run Spark ValidatesRunner`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Spark.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark.yml) | +| [ PostCommit Java ValidatesRunner SparkStructuredStreaming ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml) | N/A |`Run Spark StructuredStreaming ValidatesRunner`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml) | +| [ PostCommit Java ValidatesRunner Twister2 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Twister2.yml) | N/A |`Run Twister2 ValidatesRunner`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Twister2.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Twister2.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Twister2.yml) | +| [ PostCommit Java ValidatesRunner ULR ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_ULR.yml) | N/A |`Run ULR Loopback ValidatesRunner`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_ULR.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_ULR.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_ULR.yml) | +| [ PostCommit Python Examples Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Dataflow.yml) | N/A |`Run Python Examples_Dataflow`| [![.github/workflows/beam_PostCommit_Python_Examples_Dataflow.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Dataflow.yml) | +| [ PostCommit Python Examples Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Direct.yml) | ['3.8','3.9','3.10','3.11'] |`Run Python Examples_Direct (matrix_element)`| [![.github/workflows/beam_PostCommit_Python_Examples_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Direct.yml) | +| [ PostCommit Python Examples Flink ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Flink.yml) | ['3.8','3.11'] |`Run Python Examples_Flink (matrix_element)`| [![.github/workflows/beam_PostCommit_Python_Examples_Flink.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Flink.yml) | +| [ PostCommit Python Examples Spark ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Spark.yml) | ['3.8','3.11'] |`Run Python Examples_Spark (matrix_element)`| [![.github/workflows/beam_PostCommit_Python_Examples_Spark.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Spark.yml) | +| [ PostCommit Sickbay Python ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Sickbay_Python.yml) | ['3.8','3.9','3.10','3.11'] |`Run Python (matrix_element) PostCommit Sickbay`| [![.github/workflows/beam_PostCommit_Sickbay_Python.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Sickbay_Python.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Sickbay_Python.yml) | +| [ PostCommit TransformService Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_TransformService_Direct.yml) | N/A |`Run TransformService_Direct PostCommit`| [![.github/workflows/beam_PostCommit_TransformService_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_TransformService_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_TransformService_Direct.yml) +| [ PostCommit Website Publish ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Website_Publish.yml) | N/A | N/A | [![.github/workflows/beam_PostCommit_Website_Publish.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Website_Publish.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Website_Publish.yml) | | [ PostCommit XVR GoUsingJava Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_GoUsingJava_Dataflow.yml) | N/A |`Run XVR_GoUsingJava_Dataflow PostCommit`| [![.github/workflows/beam_PostCommit_XVR_GoUsingJava_Dataflow](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_GoUsingJava_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_GoUsingJava_Dataflow.yml) | | [ PreCommit Community Metrics ](https://github.com/apache/beam/actions/workflows/beam_PreCommit_CommunityMetrics.yml) | N/A |`Run CommunityMetrics PreCommit`| [![.github/workflows/beam_PreCommit_CommunityMetrics.yml](https://github.com/apache/beam/actions/workflows/beam_PreCommit_CommunityMetrics.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PreCommit_CommunityMetrics.yml) | | [ PreCommit Go ](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Go.yml) | N/A |`Run Go PreCommit`| [![.github/workflows/beam_PreCommit_Go.yml](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Go.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Go.yml) |