diff --git a/CHANGELOG.md b/CHANGELOG.md index 1bc74f2d7..028a84177 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ - [#668](https://github.com/LayerManager/layman/issues/668) Fix broken size of raster in EPSG:3034 during normalization. - [#669](https://github.com/LayerManager/layman/issues/669) Fix slow publication of vector layers metadata to Micka. The reason was slow guessing of [`spatial_resolution.scale_denominator`](doc/metadata.md#spatial_resolution) metadata property. - [#697](https://github.com/LayerManager/layman/issues/697) Normalized GeoTIFF files are created as BigTIFF +- [#660](https://github.com/LayerManager/layman/issues/660) Vector data files with invalid byte sequence are converted with `iconv` ## v1.17.0 2022-07-21 diff --git a/src/layman/layer/db/__init__.py b/src/layman/layer/db/__init__.py index fcf671449..f6bdc7cbf 100644 --- a/src/layman/layer/db/__init__.py +++ b/src/layman/layer/db/__init__.py @@ -103,6 +103,67 @@ def import_layer_vector_file(workspace, layername, main_filepath, crs_id): raise LaymanError(11, private_data=pg_error) +def import_layer_vector_file_async_with_iconv(schema, table_name, main_filepath, crs_id): + import subprocess + assert table_name, f'schema={schema}, table_name={table_name}, main_filepath={main_filepath}' + pg_conn = ' '.join([f"{k}='{v}'" for k, v in PG_CONN.items()]) + + first_ogr2ogr_args = [ + 'ogr2ogr', + '-nlt', 'GEOMETRY', + '--config', 'OGR_ENABLE_PARTIAL_REPROJECTION', 'TRUE', + '-unsetFid', + '-a_srs', crs_id, + '-f', 'GeoJSON', + '/vsistdout/', + f'{main_filepath}', + ] + iconv_args = [ + 'iconv', + '-c', + '-t', 'utf8', + ] + final_ogr2ogr_args = [ + 'ogr2ogr', + '-nln', table_name, + '-nlt', 'GEOMETRY', + '--config', 'OGR_ENABLE_PARTIAL_REPROJECTION', 'TRUE', + '-lco', f'SCHEMA={schema}', + # '-clipsrc', '-180', '-85.06', '180', '85.06', + '-f', 'PostgreSQL', + '-unsetFid', + f'PG:{pg_conn}', + # 'PG:{} active_schema={}'.format(PG_CONN, username), + ] + if crs_id is not None: + final_ogr2ogr_args.extend([ + '-a_srs', crs_id, + ]) + if os.path.splitext(main_filepath)[1] == '.shp': + final_ogr2ogr_args.extend([ + '-lco', 'PRECISION=NO', + ]) + final_ogr2ogr_args.extend([ + '/vsistdin/', + ]) + + first_ogr2ogr_process = subprocess.Popen(first_ogr2ogr_args, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + with first_ogr2ogr_process.stdout: + iconv_process = subprocess.Popen(iconv_args, + stdin=first_ogr2ogr_process.stdout, + stdout=subprocess.PIPE) + with iconv_process.stdout: + final_ogr2ogr_process = subprocess.Popen(final_ogr2ogr_args, + stdin=iconv_process.stdout, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + for proc in [first_ogr2ogr_process, iconv_process, final_ogr2ogr_process]: + proc.wait() + return [first_ogr2ogr_process, iconv_process, final_ogr2ogr_process] + + def import_layer_vector_file_async(schema, table_name, main_filepath, crs_id): # import file to database table diff --git a/src/layman/layer/db/tasks.py b/src/layman/layer/db/tasks.py index 549fdf5df..d1acfab0a 100644 --- a/src/layman/layer/db/tasks.py +++ b/src/layman/layer/db/tasks.py @@ -43,6 +43,7 @@ def refresh_table( assert len(main_filepaths) == 1 main_filepath = main_filepaths[0] table_name = db.get_table_name(workspace, layername) + process = db.import_layer_vector_file_async(workspace, table_name, main_filepath, crs_id) while process.poll() is None and not self.is_aborted(): pass @@ -61,9 +62,30 @@ def refresh_table( logger.error(f"STDOUT: {pg_error}") if "ERROR: zero-length delimited identifier at or near" in pg_error: err_code = 28 + raise LaymanError(err_code, private_data=pg_error) + if 'ERROR: invalid byte sequence for encoding "UTF8":' in pg_error: + processes = db.import_layer_vector_file_async_with_iconv(workspace, table_name, main_filepath, crs_id) + while any(proc.poll() for proc in processes) is None and not self.is_aborted(): + pass + if self.is_aborted(): + logger.info(f'terminating {workspace} {layername}') + for proc in processes: + proc.terminate() + logger.info(f'terminating {workspace} {layername}') + table.delete_layer(workspace, layername) + raise AbortedException + return_code = process.poll() + output = process.stdout.read() + if return_code != 0 or output: + info = table.get_layer_info(workspace, layername) + if not info: + pg_error = str(output) + logger.error(f"STDOUT: {pg_error}") + err_code = 11 + raise LaymanError(err_code, private_data=pg_error) else: err_code = 11 - raise LaymanError(err_code, private_data=pg_error) + raise LaymanError(err_code, private_data=pg_error) crs = db.get_crs(workspace, table_name) if crs_def.CRSDefinitions[crs].srid: diff --git a/tests/dynamic_data/publications/file_input/__init__.py b/tests/dynamic_data/publications/file_input/__init__.py index 06a1d197e..a43348c02 100644 --- a/tests/dynamic_data/publications/file_input/__init__.py +++ b/tests/dynamic_data/publications/file_input/__init__.py @@ -1,9 +1,13 @@ +import os + import tests.asserts.processing as processing import tests.asserts.final.publication as publication from test_tools import process_client, util from .. import common_publications as publications from .... import Action, Publication, dynamic_data as consts +DIRECTORY = os.path.dirname(os.path.abspath(__file__)) + KEY_PUBLICATION_TYPE = 'publ_type' KEY_ACTION_PARAMS = 'action_params' @@ -58,12 +62,14 @@ }), ], }, - 'capslock_extension_json': { - KEY_PUBLICATION_TYPE: process_client.MAP_TYPE, + 'invalid_byte_sequence': { + KEY_PUBLICATION_TYPE: process_client.LAYER_TYPE, KEY_ACTION_PARAMS: { 'file_paths': [ - 'sample/layman.map/smaLL_Map.jSOn', + f'{DIRECTORY}/invalid_byte_sequence.zip', ], + 'crs': 'EPSG:5514', + 'compress': False, }, consts.KEY_FINAL_ASSERTS: [ ], diff --git a/tests/dynamic_data/publications/file_input/invalid_byte_sequence.zip b/tests/dynamic_data/publications/file_input/invalid_byte_sequence.zip new file mode 100644 index 000000000..8b907e4b9 Binary files /dev/null and b/tests/dynamic_data/publications/file_input/invalid_byte_sequence.zip differ