Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

660 invalid byte sequence #704

Merged
merged 7 commits into from
Nov 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
- [#668](https://github.com/LayerManager/layman/issues/668) Fix broken size of raster in EPSG:3034 during normalization.
- [#669](https://github.com/LayerManager/layman/issues/669) Fix slow publication of vector layers metadata to Micka. The reason was slow guessing of [`spatial_resolution.scale_denominator`](doc/metadata.md#spatial_resolution) metadata property.
- [#697](https://github.com/LayerManager/layman/issues/697) Normalized GeoTIFF files are created as BigTIFF
- [#660](https://github.com/LayerManager/layman/issues/660) Vector data files with invalid byte sequence are first converted to GeoJSON, then cleaned with iconv, and finally imported to database.
- After publishing to GeoServer, Layman check that Layer is available in GetCapabilities

## v1.17.0
Expand Down
1 change: 1 addition & 0 deletions doc/data-storage.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ When user [publishes new layer](rest.md#post-workspace-layers)
Subsequently, asynchronous tasks ensure following steps:
- data file chunks and completed data files are saved to [filesystem](#filesystem) (if sent [asynchronously](async-file-upload.md))
- vector data files are imported to [PostgreSQL](#postgresql)
- files with invalid byte sequence are first converted to GeoJSON, then cleaned with iconv, and finally imported to database.
- PostgreSQL table with vector data is registered to [GeoServer](#geoserver)
- raster files are normalized and compressed to BigTIFF GeoTIFF with overviews (pyramids)
- normalized GeoTIFF is registered to [GeoServer](#geoserver)
Expand Down
1 change: 1 addition & 0 deletions doc/rest.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ Processing chain consists of few steps:
- save file to workspace directory within Layman data directory
- save basic information (name, title, access_rights) into PostgreSQL
- for vector layers import the vector file to PostgreSQL database as new table into workspace schema
- files with invalid byte sequence are first converted to GeoJSON, then cleaned with iconv, and finally imported to database.
- for raster layers normalize and compress raster file to GeoTIFF with overviews (pyramids); NoData values are normalized as transparent
- for vector layers publish the vector table as new layer (feature type) within appropriate WFS workspaces of GeoServer
- save bounding box into PostgreSQL
Expand Down
66 changes: 55 additions & 11 deletions src/layman/layer/db/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
import math
import os
import logging
import subprocess

from db import util as db_util, PG_CONN
from layman.common import empty_method
from layman.common.language import get_languages_iso639_2
from layman.http import LaymanError
from layman import settings
Expand Down Expand Up @@ -103,13 +103,9 @@ def import_layer_vector_file(workspace, layername, main_filepath, crs_id):
raise LaymanError(11, private_data=pg_error)


def import_layer_vector_file_async(schema, table_name, main_filepath,
crs_id):
# import file to database table
import subprocess
assert table_name, f'schema={schema}, table_name={table_name}, main_filepath={main_filepath}'
def create_ogr2ogr_args(*, schema, table_name, main_filepath, crs_id, output):
pg_conn = ' '.join([f"{k}='{v}'" for k, v in PG_CONN.items()])
bash_args = [
ogr2ogr_args = [
'ogr2ogr',
'-nln', table_name,
'-nlt', 'GEOMETRY',
Expand All @@ -122,16 +118,64 @@ def import_layer_vector_file_async(schema, table_name, main_filepath,
# 'PG:{} active_schema={}'.format(PG_CONN, username),
]
if crs_id is not None:
bash_args.extend([
ogr2ogr_args.extend([
'-a_srs', crs_id,
])
if os.path.splitext(main_filepath)[1] == '.shp':
bash_args.extend([
ogr2ogr_args.extend([
'-lco', 'PRECISION=NO',
])
bash_args.extend([
f'{main_filepath}',
ogr2ogr_args.extend([
output,
])
return ogr2ogr_args


def import_layer_vector_file_async_with_iconv(schema, table_name, main_filepath, crs_id):
assert table_name, f'schema={schema}, table_name={table_name}, main_filepath={main_filepath}'

first_ogr2ogr_args = [
'ogr2ogr',
'--config', 'OGR_ENABLE_PARTIAL_REPROJECTION', 'TRUE',
'-unsetFid',
'-a_srs', crs_id,
'-f', 'GeoJSON',
'/vsistdout/',
f'{main_filepath}',
]
iconv_args = [
'iconv',
'-c',
'-t', 'utf8',
]
final_ogr2ogr_args = create_ogr2ogr_args(schema=schema,
table_name=table_name,
main_filepath=main_filepath,
crs_id=crs_id,
output='/vsistdin/')

first_ogr2ogr_process = subprocess.Popen(first_ogr2ogr_args,
stdout=subprocess.PIPE)
with first_ogr2ogr_process.stdout:
iconv_process = subprocess.Popen(iconv_args,
stdin=first_ogr2ogr_process.stdout,
stdout=subprocess.PIPE)
with iconv_process.stdout:
final_ogr2ogr_process = subprocess.Popen(final_ogr2ogr_args,
stdin=iconv_process.stdout,
stdout=subprocess.PIPE)
return [first_ogr2ogr_process, iconv_process, final_ogr2ogr_process]


def import_layer_vector_file_async(schema, table_name, main_filepath,
crs_id):
# import file to database table
assert table_name, f'schema={schema}, table_name={table_name}, main_filepath={main_filepath}'
bash_args = create_ogr2ogr_args(schema=schema,
table_name=table_name,
main_filepath=main_filepath,
crs_id=crs_id,
output=main_filepath)

# print(' '.join(bash_args))
process = subprocess.Popen(bash_args, stdout=subprocess.PIPE,
Expand Down
52 changes: 31 additions & 21 deletions src/layman/layer/db/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,27 +43,37 @@ def refresh_table(
assert len(main_filepaths) == 1
main_filepath = main_filepaths[0]
table_name = db.get_table_name(workspace, layername)
process = db.import_layer_vector_file_async(workspace, table_name, main_filepath, crs_id)
while process.poll() is None and not self.is_aborted():
pass
if self.is_aborted():
logger.info(f'terminating {workspace} {layername}')
process.terminate()
logger.info(f'terminating {workspace} {layername}')
table.delete_layer(workspace, layername)
raise AbortedException
return_code = process.poll()
output = process.stdout.read()
if return_code != 0 or output:
info = table.get_layer_info(workspace, layername)
if not info:
pg_error = str(output)
logger.error(f"STDOUT: {pg_error}")
if "ERROR: zero-length delimited identifier at or near" in pg_error:
err_code = 28
else:
err_code = 11
raise LaymanError(err_code, private_data=pg_error)

for try_num in [1, 2]:
if try_num == 1:
processes = [db.import_layer_vector_file_async(workspace, table_name, main_filepath, crs_id)]
elif try_num == 2:
processes = db.import_layer_vector_file_async_with_iconv(workspace, table_name, main_filepath, crs_id)
process = processes[-1]
stdout, stderr = process.communicate()
return_code = process.poll()
if self.is_aborted():
logger.info(f'terminating {workspace} {layername}')
for proc in processes:
proc.terminate()
logger.info(f'deleting {workspace} {layername}')
table.delete_layer(workspace, layername)
raise AbortedException
if return_code != 0 or stdout or stderr:
info = table.get_layer_info(workspace, layername)
if not info:
str_error = str(stderr)
str_out = str(stdout)
logger.error(f"STDOUT: {str(stdout)}")
logger.error(f"STDERR: {str_error}")
if "ERROR: zero-length delimited identifier at or near" in str_out:
err_code = 28
elif 'ERROR: invalid byte sequence for encoding "UTF8":' in str_out:
continue
else:
err_code = 11
raise LaymanError(err_code, private_data=str_error)
break

crs = db.get_crs(workspace, table_name)
if crs_def.CRSDefinitions[crs].srid:
Expand Down
2 changes: 2 additions & 0 deletions src/layman/layer/rest_workspace_layer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import shutil
import tempfile
import logging
from flask import Blueprint, jsonify, request, current_app as app, g

from layman.common import rest as rest_util
Expand All @@ -12,6 +13,7 @@
from .filesystem import input_file, input_style, input_chunk, util as fs_util

bp = Blueprint('rest_workspace_layer', __name__)
logger = logging.getLogger(__name__)


@bp.before_request
Expand Down
3 changes: 3 additions & 0 deletions test_tools/process_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ def patch_workspace_publication(publication_type,
assert not compress_settings or compress

assert not (time_regex and publication_type == MAP_TYPE)
assert not (publication_type == LAYER_TYPE and crs and not file_paths)

with app.app_context():
r_url = url_for(publication_type_def.patch_workspace_publication_url,
Expand Down Expand Up @@ -235,6 +236,8 @@ def patch_workspace_publication(publication_type,
data['overview_resampling'] = overview_resampling
if time_regex:
data['time_regex'] = time_regex
if publication_type == LAYER_TYPE and crs:
data['crs'] = crs
index-git marked this conversation as resolved.
Show resolved Hide resolved

response = requests.patch(r_url,
files=files,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import os

import tests.asserts.processing as processing
import tests.asserts.final.publication as publication
from test_tools import process_client, util
from . import common_publications as publications
from ... import Action, Publication, dynamic_data as consts
from .. import common_publications as publications
from .... import Action, Publication, dynamic_data as consts

DIRECTORY = os.path.dirname(os.path.abspath(__file__))

KEY_PUBLICATION_TYPE = 'publ_type'
KEY_ACTION_PARAMS = 'action_params'
Expand Down Expand Up @@ -68,6 +72,22 @@
consts.KEY_FINAL_ASSERTS: [
],
},
'invalid_byte_sequence': {
index-git marked this conversation as resolved.
Show resolved Hide resolved
KEY_PUBLICATION_TYPE: process_client.LAYER_TYPE,
KEY_ACTION_PARAMS: {
'file_paths': [
f'{DIRECTORY}/invalid_byte_sequence.zip',
],
'crs': 'EPSG:5514',
'compress': False,
},
consts.KEY_FINAL_ASSERTS: [
index-git marked this conversation as resolved.
Show resolved Hide resolved
*publication.IS_LAYER_COMPLETE_AND_CONSISTENT,
Action(publication.internal.thumbnail_equals, {
'exp_thumbnail': f'{DIRECTORY}/thumbnail_invalid_byte_sequence.png',
}),
],
},
}


Expand Down
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.