Add the latest changes in Datascan API (#8541)

GoogleCloudPlatform · Aug 17, 2023 · d23370a · d23370a
1 parent f1fc0d9
commit d23370a
Show file tree

Hide file tree

Showing 4 changed files with 83 additions and 3 deletions.
diff --git a/mmv1/products/dataplex/Datascan.yaml b/mmv1/products/dataplex/Datascan.yaml
@@ -250,10 +250,27 @@ properties:
         name: 'samplingPercent'
         description: |
           The percentage of the records to be selected from the dataset for DataScan.
+          Value can range between 0.0 and 100.0 with up to 3 significant decimal digits.
+          Sampling is not applied if `sampling_percent` is not specified, 0 or 100.
       - !ruby/object:Api::Type::String
         name: 'rowFilter'
         description: |
           A filter applied to all rows in a single DataScan job. The filter needs to be a valid SQL expression for a WHERE clause in BigQuery standard SQL syntax. Example: col1 >= 0 AND col2 < 10
+      - !ruby/object:Api::Type::NestedObject
+        name: 'postScanActions'
+        description: |
+          Actions to take upon job completion.
+        properties:
+          - !ruby/object:Api::Type::NestedObject
+            name: 'bigqueryExport'
+            description: |
+              If set, results will be exported to the provided BigQuery table.
+            properties:
+              - !ruby/object:Api::Type::String
+                name: 'resultsTable'
+                description: |
+                  The BigQuery table to export DataQualityScan results to.
+                  Format://bigquery.googleapis.com/projects/PROJECT_ID/datasets/DATASET_ID/tables/TABLE_ID
       - !ruby/object:Api::Type::Array
         name: 'rules'
         min_size: 1
@@ -278,6 +295,19 @@ properties:
               name: 'threshold'
               description: |
                 The minimum ratio of passing_rows / total_rows required to pass this rule, with a range of [0.0, 1.0]. 0 indicates default value (i.e. 1.0).
+            - !ruby/object:Api::Type::String
+              name: 'name'
+              description: |
+                A mutable name for the rule.
+                The name must contain only letters (a-z, A-Z), numbers (0-9), or hyphens (-).
+                The maximum length is 63 characters.
+                Must start with a letter.
+                Must end with a number or a letter.
+            - !ruby/object:Api::Type::String
+              name: 'description'
+              description: |
+                Description of the rule.
+                The maximum length is 1,024 characters.
             - !ruby/object:Api::Type::NestedObject
               name: 'rangeExpectation'
               description: |
@@ -336,7 +366,7 @@ properties:
               allow_empty_object: true
               send_empty_value: true
               description: |
-                ColumnAggregate rule which evaluates whether the column has duplicates.
+                Row-level rule which evaluates whether each column value is unique.
               properties: []
             - !ruby/object:Api::Type::NestedObject
               name: 'statisticRangeExpectation'
@@ -409,10 +439,53 @@ properties:
         name: 'samplingPercent'
         description: |
           The percentage of the records to be selected from the dataset for DataScan.
+          Value can range between 0.0 and 100.0 with up to 3 significant decimal digits.
+          Sampling is not applied if `sampling_percent` is not specified, 0 or 100.
       - !ruby/object:Api::Type::String
         name: 'rowFilter'
         description: |
           A filter applied to all rows in a single DataScan job. The filter needs to be a valid SQL expression for a WHERE clause in BigQuery standard SQL syntax. Example: col1 >= 0 AND col2 < 10
+      - !ruby/object:Api::Type::NestedObject
+        name: 'postScanActions'
+        description: |
+          Actions to take upon job completion.
+        properties:
+          - !ruby/object:Api::Type::NestedObject
+            name: 'bigqueryExport'
+            description: |
+              If set, results will be exported to the provided BigQuery table.
+            properties:
+              - !ruby/object:Api::Type::String
+                name: 'resultsTable'
+                description: |
+                  The BigQuery table to export DataProfileScan results to.
+                  Format://bigquery.googleapis.com/projects/PROJECT_ID/datasets/DATASET_ID/tables/TABLE_ID
+      - !ruby/object:Api::Type::NestedObject
+        name: 'includeFields'
+        description: |
+          The fields to include in data profile.
+          If not specified, all fields at the time of profile scan job execution are included, except for ones listed in `exclude_fields`.
+        properties:
+          - !ruby/object:Api::Type::Array
+            name: 'fieldNames'
+            description: |
+              Expected input is a list of fully qualified names of fields as in the schema.
+              Only top-level field names for nested fields are supported.
+              For instance, if 'x' is of nested field type, listing 'x' is supported but 'x.y.z' is not supported. Here 'y' and 'y.z' are nested fields of 'x'.
+            item_type: Api::Type::String
+      - !ruby/object:Api::Type::NestedObject
+        name: 'excludeFields'
+        description: |
+          The fields to exclude from data profile.
+          If specified, the fields will be excluded from data profile, regardless of `include_fields` value.
+        properties:
+          - !ruby/object:Api::Type::Array
+            name: 'fieldNames'
+            description: |
+              Expected input is a list of fully qualified names of fields as in the schema.
+              Only top-level field names for nested fields are supported.
+              For instance, if 'x' is of nested field type, listing 'x' is supported but 'x.y.z' is not supported. Here 'y' and 'y.z' are nested fields of 'x'.
+            item_type: Api::Type::String
   - !ruby/object:Api::Type::NestedObject
     name: 'dataQualityResult'
     output: true

diff --git a/mmv1/templates/terraform/examples/dataplex_datascan_basic_profile.tf.erb b/mmv1/templates/terraform/examples/dataplex_datascan_basic_profile.tf.erb
@@ -12,8 +12,7 @@ resource "google_dataplex_datascan" "<%= ctx[:primary_resource_id] %>" {
     }
   }
 
-  data_profile_spec {  
-  }
+data_profile_spec {}
 
   project = "<%= ctx[:test_env_vars]['project_name'] %>"
 }

diff --git a/mmv1/templates/terraform/examples/dataplex_datascan_basic_quality.tf.erb b/mmv1/templates/terraform/examples/dataplex_datascan_basic_quality.tf.erb
@@ -15,6 +15,8 @@ resource "google_dataplex_datascan" "<%= ctx[:primary_resource_id] %>" {
   data_quality_spec {
     rules {
       dimension = "VALIDITY"
+      name = "rule1"
+      description = "rule 1 for validity dimension"
       table_condition_expectation {
         sql_expression = "COUNT(*) > 0"
       }

diff --git a/mmv1/templates/terraform/examples/dataplex_datascan_full_profile.tf.erb b/mmv1/templates/terraform/examples/dataplex_datascan_full_profile.tf.erb
@@ -22,6 +22,12 @@ resource "google_dataplex_datascan" "<%= ctx[:primary_resource_id] %>" {
   data_profile_spec {
     sampling_percent = 80
     row_filter = "word_count > 10"
+    include_fields {
+      field_names = ["word_count"]
+    }
+    exclude_fields {
+      field_names = ["property_type"]
+    }
   }
 
   project = "<%= ctx[:test_env_vars]['project_name'] %>"