From cec8533ce7e41f8c9f0e8a48aebdf399836a179b Mon Sep 17 00:00:00 2001 From: shreyakhajanchi <92910380+shreyakhajanchi@users.noreply.github.com> Date: Tue, 30 Jul 2024 12:25:52 +0530 Subject: [PATCH] Terraform template updates for custom transformation (#1746) * terraform changes for end-to-end template * terraform updates * formatting fix --- .../samples/mysql-end-to-end/main.tf | 6 ++- .../samples/mysql-end-to-end/terraform.tfvars | 31 ++++++++------ .../mysql-end-to-end/terraform_simple.tfvars | 12 +++--- .../samples/mysql-end-to-end/variables.tf | 4 ++ .../samples/mysql-sharded-end-to-end/main.tf | 6 ++- .../mysql-sharded-end-to-end/terraform.tfvars | 42 ++++++++++--------- .../mysql-sharded-end-to-end/variables.tf | 4 ++ .../pre-configured-conn-profiles/main.tf | 6 ++- .../terraform.tfvars | 26 ++++++------ .../terraform_simple.tfvars | 12 +++--- .../pre-configured-conn-profiles/variables.tf | 4 ++ 11 files changed, 92 insertions(+), 61 deletions(-) diff --git a/v2/datastream-to-spanner/terraform/samples/mysql-end-to-end/main.tf b/v2/datastream-to-spanner/terraform/samples/mysql-end-to-end/main.tf index 94472ec599..f2664c4b8f 100644 --- a/v2/datastream-to-spanner/terraform/samples/mysql-end-to-end/main.tf +++ b/v2/datastream-to-spanner/terraform/samples/mysql-end-to-end/main.tf @@ -248,6 +248,10 @@ resource "google_dataflow_flex_template_job" "live_migration_job" { directoryWatchDurationInMinutes = tostring(var.dataflow_params.template_params.directory_watch_duration_in_minutes) spannerPriority = var.dataflow_params.template_params.spanner_priority dlqGcsPubSubSubscription = var.dataflow_params.template_params.dlq_gcs_pub_sub_subscription + transformationJarPath = var.dataflow_params.template_params.transformation_jar_path + transformationClassName = var.dataflow_params.template_params.transformation_class_name + transformationCustomParameters = var.dataflow_params.template_params.transformation_custom_parameters + filteredEventsDirectory = var.dataflow_params.template_params.filtered_events_directory } # Additional Job Configurations @@ -265,7 +269,7 @@ resource "google_dataflow_flex_template_job" "live_migration_job" { service_account_email = var.dataflow_params.runner_params.service_account_email skip_wait_on_job_termination = var.dataflow_params.runner_params.skip_wait_on_job_termination staging_location = var.dataflow_params.runner_params.staging_location - subnetwork = var.common_params.host_project != null ? "https://www.googleapis.com/compute/v1/projects/${var.common_params.host_project}/regions/${var.common_params.region}/subnetworks/${var.dataflow_params.runner_params.subnetwork}" : "https://www.googleapis.com/compute/v1/projects/${var.common_params.project}/regions/${var.common_params.region}/subnetworks/${var.dataflow_params.runner_params.subnetwork}" + subnetwork = var.dataflow_params.runner_params.subnetwork != null ? var.common_params.host_project != null ? "https://www.googleapis.com/compute/v1/projects/${var.common_params.host_project}/regions/${var.common_params.region}/subnetworks/${var.dataflow_params.runner_params.subnetwork}" : "https://www.googleapis.com/compute/v1/projects/${var.common_params.project}/regions/${var.common_params.region}/subnetworks/${var.dataflow_params.runner_params.subnetwork}" : null temp_location = var.dataflow_params.runner_params.temp_location on_delete = var.dataflow_params.runner_params.on_delete region = var.common_params.region diff --git a/v2/datastream-to-spanner/terraform/samples/mysql-end-to-end/terraform.tfvars b/v2/datastream-to-spanner/terraform/samples/mysql-end-to-end/terraform.tfvars index 0aa942f67e..1dcf5ce111 100644 --- a/v2/datastream-to-spanner/terraform/samples/mysql-end-to-end/terraform.tfvars +++ b/v2/datastream-to-spanner/terraform/samples/mysql-end-to-end/terraform.tfvars @@ -16,13 +16,16 @@ datastream_params = { stream_id = "mysql-stream" # Or provide a custom stream ID max_concurrent_cdc_tasks = 50 # Adjust as needed max_concurrent_backfill_tasks = 50 # Adjust as needed - mysql_databases = [ - { - database = "" - tables = [] # List specific tables to replicate (optional) - } - # Add more database objects if needed - ] + mysql_host = "" + # Use the Public IP if using IP allowlisting and Private IP if using + # private connectivity. + mysql_username = "" + mysql_password = "" + mysql_port = 3306 + mysql_database = { + database = "" + tables = [] # List specific tables to replicate (optional) + } private_connectivity_id = "" # Only one of `private_connectivity_id` or `private_connectivity` block # may exist. Use `private_connectivity_id` to specify an existing @@ -52,18 +55,22 @@ dataflow_params = { dlq_retry_minutes = 10 # Adjust as needed dlq_max_retry_count = 3 # Adjust as needed datastream_root_url = "" # Base URL of your Datastream API (optional) - datastream_source_type = "MYSQL" + datastream_source_type = "mysql" round_json_decimals = false - run_mode = "STREAMING" + run_mode = "regular" transformation_context_file_path = "" # Path to your transformation file (optional) directory_watch_duration_in_minutes = "5" # Adjust as needed - spanner_priority = "high" - dlq_gcs_pub_sub_subscription = "" # Optional + spanner_priority = "HIGH" + dlq_gcs_pub_sub_subscription = "" # Optional + transformation_jar_path = "" # Optional + transformation_custom_parameters = "" # Optional + transformation_class_name = "" # Fully Classified Class Name(Optional) + filtered_events_directory = "" # Optional } runner_params = { additional_experiments = [] # Add any additional experiments or leave empty - autoscaling_algorithm = "THROUGHPUT_BASED" # Or NONE + autoscaling_algorithm = "BASIC" # Or NONE enable_streaming_engine = true # true or false kms_key_name = "" # If you're using customer-managed encryption key labels = {} # Add any labels you want diff --git a/v2/datastream-to-spanner/terraform/samples/mysql-end-to-end/terraform_simple.tfvars b/v2/datastream-to-spanner/terraform/samples/mysql-end-to-end/terraform_simple.tfvars index 4748c61f0d..27e2ef73ec 100644 --- a/v2/datastream-to-spanner/terraform/samples/mysql-end-to-end/terraform_simple.tfvars +++ b/v2/datastream-to-spanner/terraform/samples/mysql-end-to-end/terraform_simple.tfvars @@ -12,13 +12,11 @@ datastream_params = { mysql_username = "" mysql_password = "" mysql_port = 3306 - mysql_databases = [ - { - database = "" - tables = [] - # Optionally list specific tables, or remove "tables" all together for all tables - } - ] + mysql_database = { + database = "" + tables = [] + # Optionally list specific tables, or remove "tables" all together for all tables + } private_connectivity_id = "" # Only one of `private_connectivity_id` or `private_connectivity` block # may exist. Use `private_connectivity_id` to specify an existing diff --git a/v2/datastream-to-spanner/terraform/samples/mysql-end-to-end/variables.tf b/v2/datastream-to-spanner/terraform/samples/mysql-end-to-end/variables.tf index 853763a09e..8c7ec1acdf 100644 --- a/v2/datastream-to-spanner/terraform/samples/mysql-end-to-end/variables.tf +++ b/v2/datastream-to-spanner/terraform/samples/mysql-end-to-end/variables.tf @@ -69,6 +69,10 @@ variable "dataflow_params" { directory_watch_duration_in_minutes = optional(string) spanner_priority = optional(string) dlq_gcs_pub_sub_subscription = optional(string) + transformation_jar_path = optional(string) + transformation_custom_parameters = optional(string) + transformation_class_name = optional(string) + filtered_events_directory = optional(string) }) runner_params = object({ additional_experiments = optional(set(string), [ diff --git a/v2/datastream-to-spanner/terraform/samples/mysql-sharded-end-to-end/main.tf b/v2/datastream-to-spanner/terraform/samples/mysql-sharded-end-to-end/main.tf index 12c59a7d87..92d6b094e7 100644 --- a/v2/datastream-to-spanner/terraform/samples/mysql-sharded-end-to-end/main.tf +++ b/v2/datastream-to-spanner/terraform/samples/mysql-sharded-end-to-end/main.tf @@ -276,6 +276,10 @@ resource "google_dataflow_flex_template_job" "live_migration_job" { directoryWatchDurationInMinutes = tostring(var.common_params.dataflow_params.template_params.directory_watch_duration_in_minutes) spannerPriority = var.common_params.dataflow_params.template_params.spanner_priority dlqGcsPubSubSubscription = var.shard_list[count.index].dataflow_params.template_params.dlq_gcs_pub_sub_subscription + transformationJarPath = var.common_params.dataflow_params.template_params.transformation_jar_path + transformationClassName = var.common_params.dataflow_params.template_params.transformation_class_name + transformationCustomParameters = var.common_params.dataflow_params.template_params.transformation_custom_parameters + filteredEventsDirectory = var.common_params.dataflow_params.template_params.filtered_events_directory } # Additional Job Configurations @@ -293,7 +297,7 @@ resource "google_dataflow_flex_template_job" "live_migration_job" { service_account_email = var.common_params.dataflow_params.runner_params.service_account_email skip_wait_on_job_termination = var.common_params.dataflow_params.runner_params.skip_wait_on_job_termination staging_location = var.common_params.dataflow_params.runner_params.staging_location - subnetwork = var.common_params.host_project != null ? "https://www.googleapis.com/compute/v1/projects/${var.common_params.host_project}/regions/${var.common_params.region}/subnetworks/${var.common_params.dataflow_params.runner_params.subnetwork}" : "https://www.googleapis.com/compute/v1/projects/${var.common_params.project}/regions/${var.common_params.region}/subnetworks/${var.common_params.dataflow_params.runner_params.subnetwork}" + subnetwork = var.common_params.dataflow_params.runner_params.subnetwork != null ? var.common_params.host_project != null ? "https://www.googleapis.com/compute/v1/projects/${var.common_params.host_project}/regions/${var.common_params.region}/subnetworks/${var.common_params.dataflow_params.runner_params.subnetwork}" : "https://www.googleapis.com/compute/v1/projects/${var.common_params.project}/regions/${var.common_params.region}/subnetworks/${var.common_params.dataflow_params.runner_params.subnetwork}" : null temp_location = var.common_params.dataflow_params.runner_params.temp_location on_delete = var.common_params.dataflow_params.runner_params.on_delete region = var.common_params.region diff --git a/v2/datastream-to-spanner/terraform/samples/mysql-sharded-end-to-end/terraform.tfvars b/v2/datastream-to-spanner/terraform/samples/mysql-sharded-end-to-end/terraform.tfvars index 3eb22133f9..3e2476be47 100644 --- a/v2/datastream-to-spanner/terraform/samples/mysql-sharded-end-to-end/terraform.tfvars +++ b/v2/datastream-to-spanner/terraform/samples/mysql-sharded-end-to-end/terraform.tfvars @@ -30,28 +30,32 @@ common_params = { dataflow_params = { template_params = { - shadow_table_prefix = "" # Prefix for shadow tables (e.g., "shadow_") - create_shadow_tables = "" # Whether to create shadow tables in Spanner - rfc_start_date_time = "" # RFC 3339 timestamp for the start of replication (optional) - file_read_concurrency = "" # File read concurrency for Dataflow - spanner_project_id = "" # GCP project ID for Spanner - spanner_instance_id = "" # Spanner instance ID - spanner_database_id = "" # Spanner database ID - spanner_host = "" # Spanner host (typically "spanner.googleapis.com") - dlq_retry_minutes = "" # Retry interval for dead-letter queue messages (in minutes) - dlq_max_retry_count = "" # Maximum retry count for dead-letter queue messages - datastream_root_url = "" # Datastream API root URL (typically "https://datastream.googleapis.com/v1") - datastream_source_type = "" # Datastream source type (e.g., "MYSQL") - round_json_decimals = "" # Whether to round JSON decimal values in Dataflow - directory_watch_duration_in_minutes = "" # Directory watch duration (in minutes) for Dataflow - spanner_priority = "" # Spanner priority ("high", "medium", or "low") - local_session_file_path = "" # Path to local session file (optional) + shadow_table_prefix = "" # Prefix for shadow tables (e.g., "shadow_") + create_shadow_tables = "" # Whether to create shadow tables in Spanner + rfc_start_date_time = "" # RFC 3339 timestamp for the start of replication (optional) + file_read_concurrency = "" # File read concurrency for Dataflow + spanner_project_id = "" # GCP project ID for Spanner + spanner_instance_id = "" # Spanner instance ID + spanner_database_id = "" # Spanner database ID + spanner_host = "" # Spanner host (typically "spanner.googleapis.com") + dlq_retry_minutes = "" # Retry interval for dead-letter queue messages (in minutes) + dlq_max_retry_count = "" # Maximum retry count for dead-letter queue messages + datastream_root_url = "" # Datastream API root URL (typically "https://datastream.googleapis.com/v1") + datastream_source_type = "" # Datastream source type (e.g., "mysql") + round_json_decimals = "" # Whether to round JSON decimal values in Dataflow + directory_watch_duration_in_minutes = "" # Directory watch duration (in minutes) for Dataflow + spanner_priority = "" # Spanner priority ("HIGH", "MEDIUM", or "LOW") + local_session_file_path = "" # Path to local session file (optional) + transformation_jar_path = "" # GCS path to the custom transformation JAR(Optional) + transformation_custom_parameters = "" # Custom parameters used by the transformation JAR(Optional) + transformation_class_name = "" # Fully Classified Class Name(Optional) + filtered_events_directory = "" # GCS path to store the filtered events(Optional) } runner_params = { additional_experiments = ["enable_google_cloud_profiler", "enable_stackdriver_agent_metrics", "disable_runner_v2", "enable_google_cloud_heap_sampling"] - autoscaling_algorithm = "" # e.g., "THROUGHPUT_BASED", "NONE" + autoscaling_algorithm = "" # e.g., "BASIC", "NONE" enable_streaming_engine = "" # Whether to use Dataflow Streaming Engine kms_key_name = "" # KMS key name for encryption (optional) labels = { env = "" } # Labels for the Dataflow job @@ -65,7 +69,7 @@ common_params = { service_account_email = "" # Service account email for Dataflow skip_wait_on_job_termination = "" # Whether to skip waiting for job termination on deletion staging_location = "gs:///staging" # GCS staging location for Dataflow - subnetwork = "