Skip to content

Commit

Permalink
[HUDI-5111] Improve integration test coverage (apache#7092)
Browse files Browse the repository at this point in the history

Co-authored-by: Raymond Xu <2701446+xushiyan@users.noreply.github.com>
  • Loading branch information
2 people authored and Alexey Kudinkin committed Dec 14, 2022
1 parent f7e5065 commit 2c18077
Show file tree
Hide file tree
Showing 19 changed files with 238 additions and 52 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,21 @@ dag_content:
delete_input_data: false
type: ValidateDatasetNode
deps: first_insert
first_presto_query:
config:
execute_itr_count: 5
presto_props:
prop1: "SET SESSION hive.parquet_use_column_names = true"
presto_queries:
query1: "select count(*) from testdb.table1"
result1: 30000
query2: "select count(*) from testdb.table1 group by _row_key having count(*) > 1"
result2: 0
type: PrestoQueryNode
deps: second_validate
last_validate:
config:
execute_itr_count: 5
delete_input_data: true
type: ValidateAsyncOperations
deps: second_validate
deps: first_presto_query
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
dag_name: deltastreamer-long-running-multi-partitions.yaml
dag_rounds: 50
dag_rounds: 20
dag_intermittent_delay_mins: 1
dag_content:
first_insert:
Expand Down Expand Up @@ -71,15 +71,15 @@ dag_content:
deps: first_delete
second_validate:
config:
validate_once_every_itr : 5
execute_itr_count: 20
validate_hive: true
delete_input_data: true
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateDatasetNode
deps: second_hive_sync
last_validate:
config:
execute_itr_count: 50
execute_itr_count: 20
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateAsyncOperations
deps: second_validate
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
dag_name: deltastreamer-long-running-multi-partitions.yaml
dag_rounds: 30
dag_rounds: 20
dag_intermittent_delay_mins: 1
dag_content:
first_insert:
Expand Down Expand Up @@ -65,9 +65,21 @@ dag_content:
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateDatasetNode
deps: first_delete
first_presto_query:
config:
execute_itr_count: 20
presto_props:
prop1: "SET SESSION hive.parquet_use_column_names = true"
presto_queries:
query1: "select count(*) from testdb.table1"
result1: 7600
query2: "select count(*) from testdb.table1 group by _row_key having count(*) > 1"
result2: 0
type: PrestoQueryNode
deps: second_validate
last_validate:
config:
execute_itr_count: 30
execute_itr_count: 20
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateAsyncOperations
deps: second_validate
deps: first_presto_query
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
dag_name: deltastreamer-long-running-multi-partitions.yaml
dag_rounds: 50
dag_rounds: 20
dag_intermittent_delay_mins: 1
dag_content:
first_insert:
Expand Down Expand Up @@ -65,9 +65,21 @@ dag_content:
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateDatasetNode
deps: first_delete
first_presto_query:
config:
execute_itr_count: 20
presto_props:
prop1: "SET SESSION hive.parquet_use_column_names = true"
presto_queries:
query1: "select count(*) from testdb.table1"
result1: 7600
query2: "select count(*) from testdb.table1 group by _row_key having count(*) > 1"
result2: 0
type: PrestoQueryNode
deps: second_validate
last_validate:
config:
execute_itr_count: 50
execute_itr_count: 20
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateAsyncOperations
deps: second_validate
deps: first_presto_query
19 changes: 16 additions & 3 deletions docker/demo/config/test-suite/deltastreamer-medium-clustering.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# to be used with test-aggressive-clean-archival.properties

dag_name: deltastreamer-medium-clustering.yaml
dag_rounds: 20
dag_rounds: 15
dag_intermittent_delay_mins: 1
dag_content:
first_insert:
Expand Down Expand Up @@ -62,14 +62,27 @@ dag_content:
deps: first_upsert
second_validate:
config:
validate_once_every_itr: 3
validate_hive: false
delete_input_data: true
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateDatasetNode
deps: first_delete
first_presto_query:
config:
execute_itr_count: 15
presto_props:
prop1: "SET SESSION hive.parquet_use_column_names = true"
presto_queries:
query1: "select count(*) from testdb.table1"
result1: 3600
query2: "select count(*) from testdb.table1 group by _row_key having count(*) > 1"
result2: 0
type: PrestoQueryNode
deps: second_validate
last_validate:
config:
execute_itr_count: 20
execute_itr_count: 15
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateAsyncOperations
deps: second_validate
deps: first_presto_query
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# to be used with test-aggressive-clean-archival.properties

dag_name: deltastreamer-long-running-multi-partitions.yaml
dag_rounds: 20
dag_rounds: 15
dag_intermittent_delay_mins: 1
dag_content:
first_insert:
Expand Down Expand Up @@ -68,9 +68,21 @@ dag_content:
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateDatasetNode
deps: first_delete
first_presto_query:
config:
execute_itr_count: 15
presto_props:
prop1: "SET SESSION hive.parquet_use_column_names = true"
presto_queries:
query1: "select count(*) from testdb.table1"
result1: 3600
query2: "select count(*) from testdb.table1 group by _row_key having count(*) > 1"
result2: 0
type: PrestoQueryNode
deps: second_validate
last_validate:
config:
execute_itr_count: 20
execute_itr_count: 15
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateAsyncOperations
deps: second_validate
deps: first_presto_query
14 changes: 13 additions & 1 deletion docker/demo/config/test-suite/deltastreamer-non-partitioned.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,20 @@ dag_content:
delete_input_data: true
type: ValidateDatasetNode
deps: first_delete
first_presto_query:
config:
execute_itr_count: 6
presto_props:
prop1: "SET SESSION hive.parquet_use_column_names = true"
presto_queries:
query1: "select count(*) from testdb.table1"
result1: 11000
query2: "select count(*) from testdb.table1 group by _row_key having count(*) > 1"
result2: 0
type: PrestoQueryNode
deps: second_validate
last_validate:
config:
execute_itr_count: 6
type: ValidateAsyncOperations
deps: second_validate
deps: first_presto_query
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
dag_name: detlastreamer-long-running-example.yaml
dag_rounds: 50
dag_rounds: 20
dag_intermittent_delay_mins: 1
dag_content:
first_insert:
Expand Down Expand Up @@ -65,9 +65,21 @@ dag_content:
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateDatasetNode
deps: first_delete
first_presto_query:
config:
execute_itr_count: 20
presto_props:
prop1: "SET SESSION hive.parquet_use_column_names = true"
presto_queries:
query1: "select count(*) from testdb.table1"
result1: 3600
query2: "select count(*) from testdb.table1 group by _row_key having count(*) > 1"
result2: 0
type: PrestoQueryNode
deps: second_validate
last_validate:
config:
execute_itr_count: 50
execute_itr_count: 20
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateAsyncOperations
deps: second_validate
deps: first_presto_query
16 changes: 14 additions & 2 deletions docker/demo/config/test-suite/simple-clustering.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
dag_name: simple-clustering.yaml
dag_rounds: 30
dag_rounds: 15
dag_intermittent_delay_mins: 0
dag_content:
first_insert:
Expand Down Expand Up @@ -54,11 +54,23 @@ dag_content:
deps: first_delete
first_cluster:
config:
execute_itr_count: 25
execute_itr_count: 10
type: ClusteringNode
deps: first_validate
second_validate:
config:
validate_hive: false
type: ValidateDatasetNode
deps: first_cluster
first_presto_query:
config:
validate_once_every_itr: 5
presto_props:
prop1: "SET SESSION hive.parquet_use_column_names = true"
presto_queries:
query1: "select count(*) from testdb.table1"
result1: 8300
query2: "select count(*) from testdb.table1 group by _row_key having count(*) > 1"
result2: 0
type: PrestoQueryNode
deps: second_validate
12 changes: 12 additions & 0 deletions docker/demo/config/test-suite/simple-deltastreamer.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,15 @@ dag_content:
delete_input_data: true
type: ValidateDatasetNode
deps: first_delete
first_presto_query:
config:
validate_once_every_itr: 3
presto_props:
prop1: "SET SESSION hive.parquet_use_column_names = true"
presto_queries:
query1: "select count(*) from testdb.table1"
result1: 9600
query2: "select count(*) from testdb.table1 group by _row_key having count(*) > 1"
result2: 0
type: PrestoQueryNode
deps: second_validate
14 changes: 13 additions & 1 deletion docker/demo/config/test-suite/spark-immutable-dataset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,21 @@ dag_content:
delete_input_data: false
type: ValidateDatasetNode
deps: first_insert
first_presto_query:
config:
execute_itr_count: 5
presto_props:
prop1: "SET SESSION hive.parquet_use_column_names = true"
presto_queries:
query1: "select count(*) from testdb.table1"
result1: 48000
query2: "select count(*) from testdb.table1 group by _row_key having count(*) > 1"
result2: 0
type: PrestoQueryNode
deps: second_validate
last_validate:
config:
execute_itr_count: 5
delete_input_data: true
type: ValidateAsyncOperations
deps: second_validate
deps: first_presto_query
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,18 @@ dag_content:
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateDatasetNode
deps: first_delete
first_presto_query:
config:
execute_itr_count: 6
presto_props:
prop1: "SET SESSION hive.parquet_use_column_names = true"
presto_queries:
query1: "select count(*) from testdb.table1"
result1: 6000
query2: "select count(*) from testdb.table1 group by _row_key having count(*) > 1"
result2: 0
type: PrestoQueryNode
deps: second_validate
last_validate:
config:
execute_itr_count: 6
Expand Down
16 changes: 14 additions & 2 deletions docker/demo/config/test-suite/spark-long-running.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
dag_name: cow-spark-deltastreamer-long-running-multi-partitions.yaml
dag_rounds: 30
dag_rounds: 20
dag_intermittent_delay_mins: 0
dag_content:
first_insert:
Expand Down Expand Up @@ -49,9 +49,21 @@ dag_content:
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateDatasetNode
deps: first_delete
first_presto_query:
config:
execute_itr_count: 30
presto_props:
prop1: "SET SESSION hive.parquet_use_column_names = true"
presto_queries:
query1: "select count(*) from testdb.table1"
result1: 189000
query2: "select count(*) from testdb.table1 group by _row_key having count(*) > 1"
result2: 0
type: PrestoQueryNode
deps: second_validate
last_validate:
config:
execute_itr_count: 30
max_wait_time_for_deltastreamer_catch_up_ms: 600000
type: ValidateAsyncOperations
deps: second_validate
deps: first_presto_query
Loading

0 comments on commit 2c18077

Please sign in to comment.