Skip to content

Commit

Permalink
update test results with new type in the binary_hash column
Browse files Browse the repository at this point in the history
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
  • Loading branch information
dolfim-ibm committed Nov 11, 2024
1 parent 4999604 commit 6f5e2cd
Show file tree
Hide file tree
Showing 14 changed files with 47 additions and 35 deletions.
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
"job name": "pdf2parquet",
"job type": "pure python",
"job id": "job_id",
"start_time": "2024-10-29 14:17:59",
"end_time": "2024-10-29 14:18:05",
"start_time": "2024-11-11 21:04:30",
"end_time": "2024-11-11 21:04:38",
"status": "success"
},
"code": {
Expand All @@ -15,6 +15,7 @@
"path": "path"
},
"job_input_params": {
"batch_size": -1,
"artifacts_path": null,
"contents_type": "text/markdown",
"do_table_structure": true,
Expand All @@ -28,23 +29,25 @@
"random_samples": -1,
"files_to_use": [
".pdf",
".docx",
".pptx",
".zip"
],
"num_processors": 0
},
"execution_stats": {
"cpus": 16.8,
"cpus": 21.1,
"gpus": 0,
"memory": 31.22,
"memory": 32.09,
"object_store": 0,
"execution time, min": 0.108
"execution time, min": 0.139
},
"job_output_stats": {
"source_files": 2,
"source_size": 605137,
"result_files": 2,
"result_size": 33044,
"processing_time": 6.478,
"result_size": 32939,
"processing_time": 5.596,
"nrows": 3,
"nsuccess": 3,
"nfail": 0,
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
"job name": "pdf2parquet",
"job type": "pure python",
"job id": "job_id",
"start_time": "2024-10-31 13:14:39",
"end_time": "2024-10-31 13:16:41",
"start_time": "2024-11-11 21:06:08",
"end_time": "2024-11-11 21:06:14",
"status": "success"
},
"code": {
Expand Down Expand Up @@ -36,22 +36,22 @@
"num_processors": 0
},
"execution_stats": {
"cpus": 39.0,
"cpus": 21.5,
"gpus": 0,
"memory": 29.87,
"memory": 32.19,
"object_store": 0,
"execution time, min": 2.029
"execution time, min": 0.1
},
"job_output_stats": {
"source_files": 2,
"source_size": 605137,
"result_files": 1,
"processing_time": 3.888,
"processing_time": 3.353,
"nrows": 3,
"nsuccess": 3,
"nfail": 0,
"nskip": 0,
"result_size": 27200
"result_size": 27147
},
"source": {
"name": "/Users/dol/codes/data-prep-kit/transforms/language/pdf2parquet/python/test-data/input",
Expand Down
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
"job name": "pdf2parquet",
"job type": "pure python",
"job id": "job_id",
"start_time": "2024-10-29 14:20:01",
"end_time": "2024-10-29 14:20:07",
"start_time": "2024-11-11 21:05:31",
"end_time": "2024-11-11 21:05:36",
"status": "success"
},
"code": {
Expand All @@ -15,6 +15,7 @@
"path": "path"
},
"job_input_params": {
"batch_size": -1,
"artifacts_path": null,
"contents_type": "application/json",
"do_table_structure": true,
Expand All @@ -28,23 +29,25 @@
"random_samples": -1,
"files_to_use": [
".pdf",
".docx",
".pptx",
".zip"
],
"num_processors": 0
},
"execution_stats": {
"cpus": 18.0,
"cpus": 21.4,
"gpus": 0,
"memory": 30.77,
"memory": 32.33,
"object_store": 0,
"execution time, min": 0.105
"execution time, min": 0.096
},
"job_output_stats": {
"source_files": 2,
"source_size": 605137,
"result_files": 2,
"result_size": 22953,
"processing_time": 6.282,
"result_size": 22850,
"processing_time": 3.229,
"nrows": 3,
"nsuccess": 3,
"nfail": 0,
Expand Down
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
"job name": "pdf2parquet",
"job type": "pure python",
"job id": "job_id",
"start_time": "2024-10-29 14:19:30",
"end_time": "2024-10-29 14:19:33",
"start_time": "2024-11-11 21:05:04",
"end_time": "2024-11-11 21:05:06",
"status": "success"
},
"code": {
Expand All @@ -15,6 +15,7 @@
"path": "path"
},
"job_input_params": {
"batch_size": -1,
"artifacts_path": null,
"contents_type": "text/markdown",
"do_table_structure": false,
Expand All @@ -28,23 +29,25 @@
"random_samples": -1,
"files_to_use": [
".pdf",
".docx",
".pptx",
".zip"
],
"num_processors": 0
},
"execution_stats": {
"cpus": 17.3,
"cpus": 21.6,
"gpus": 0,
"memory": 28.85,
"memory": 29.57,
"object_store": 0,
"execution time, min": 0.043
"execution time, min": 0.041
},
"job_output_stats": {
"source_files": 2,
"source_size": 605137,
"result_files": 2,
"result_size": 29659,
"processing_time": 2.554,
"result_size": 29555,
"processing_time": 1.997,
"nrows": 3,
"nsuccess": 3,
"nfail": 0,
Expand Down
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
"job name": "pdf2parquet",
"job type": "pure python",
"job id": "job_id",
"start_time": "2024-10-29 14:17:59",
"end_time": "2024-10-29 14:18:05",
"start_time": "2024-11-11 21:04:30",
"end_time": "2024-11-11 21:04:38",
"status": "success"
},
"code": {
Expand All @@ -15,6 +15,7 @@
"path": "path"
},
"job_input_params": {
"batch_size": -1,
"artifacts_path": null,
"contents_type": "text/markdown",
"do_table_structure": true,
Expand All @@ -28,23 +29,25 @@
"random_samples": -1,
"files_to_use": [
".pdf",
".docx",
".pptx",
".zip"
],
"num_processors": 0
},
"execution_stats": {
"cpus": 16.8,
"cpus": 21.1,
"gpus": 0,
"memory": 31.22,
"memory": 32.09,
"object_store": 0,
"execution time, min": 0.108
"execution time, min": 0.139
},
"job_output_stats": {
"source_files": 2,
"source_size": 605137,
"result_files": 2,
"result_size": 33044,
"processing_time": 6.478,
"result_size": 32939,
"processing_time": 5.596,
"nrows": 3,
"nsuccess": 3,
"nfail": 0,
Expand Down
Binary file not shown.

0 comments on commit 6f5e2cd

Please sign in to comment.