Skip to content

Commit

Permalink
Merge pull request #288 from nasa/270_DSWx_S1_input_validation
Browse files Browse the repository at this point in the history
DSWx-S1 Input Validation
  • Loading branch information
collinss-jpl authored May 17, 2023
2 parents 35c7e83 + a70ef88 commit 92175f7
Show file tree
Hide file tree
Showing 8 changed files with 489 additions and 164 deletions.
34 changes: 5 additions & 29 deletions src/opera/pge/dswx_hls/dswx_hls_pge.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from opera.util.img_utils import get_geotiff_spacecraft_name
from opera.util.img_utils import get_hls_filename_fields
from opera.util.img_utils import set_geotiff_metadata
from opera.util.input_validation import validate_dswx_inputs
from opera.util.metadata_utils import get_geographic_boundaries_from_mgrs_tile
from opera.util.metadata_utils import get_sensor_from_spacecraft_name
from opera.util.render_jinja2 import render_jinja2
Expand All @@ -48,37 +49,10 @@ class DSWxHLSPreProcessorMixin(PreProcessorMixin):

_pre_mixin_name = "DSWxHLSPreProcessorMixin"

def _validate_inputs(self):
"""
Evaluates the list of inputs from the RunConfig to ensure they are valid.
For directories, this means checking for directory existence, and that
at least one .tif file resides within the directory. For files,
each file is checked for existence and that it has a .tif extension.
"""
for input_file in self.runconfig.input_files:
input_file_path = abspath(input_file)

if not exists(input_file_path):
error_msg = f"Could not locate specified input file/directory {input_file_path}"

self.logger.critical(self.name, ErrorCode.INPUT_NOT_FOUND, error_msg)
elif isdir(input_file_path):
list_of_input_tifs = glob.glob(join(input_file_path, '*.tif*'))

if len(list_of_input_tifs) <= 0:
error_msg = f"Input directory {input_file_path} does not contain any tif files"

self.logger.critical(self.name, ErrorCode.INPUT_NOT_FOUND, error_msg)
elif not input_file_path.endswith(".tif"):
error_msg = f"Input file {input_file_path} does not have .tif extension"

self.logger.critical(self.name, ErrorCode.INVALID_INPUT, error_msg)

def _validate_ancillary_inputs(self):
"""
Evaluates the list of ancillary inputs from the RunConfig to ensure they
are exist and have an expected file extension.
exist and have an expected file extension.
For the shoreline shapefile, this method also checks to ensure a full
set of expected shapefiles were provided alongside the .shp file configured
Expand Down Expand Up @@ -182,7 +156,9 @@ def run_preprocessor(self, **kwargs):
"""
super().run_preprocessor(**kwargs)

self._validate_inputs()
validate_dswx_inputs(
self.runconfig, self.logger, self.runconfig.pge_name, valid_extensions=(".tif",)
)
self._validate_ancillary_inputs()
self._validate_expected_input_platforms()

Expand Down
54 changes: 52 additions & 2 deletions src/opera/pge/dswx_s1/dswx_s1_pge.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,16 @@
from Sentinel-1 A/B (S1) PGE.
"""

from os.path import isfile
from os.path import abspath, exists, isfile, splitext

import yamale

import opera.util.input_validation as input_validation
from opera.pge.base.base_pge import PgeExecutor
from opera.pge.base.base_pge import PostProcessorMixin
from opera.pge.base.base_pge import PreProcessorMixin
from opera.util.error_codes import ErrorCode
from opera.util.input_validation import validate_dswx_inputs


class DSWxS1PreProcessorMixin(PreProcessorMixin):
Expand Down Expand Up @@ -75,12 +77,56 @@ def _validate_algorithm_parameters_config(self):
# Validate the algorithm parameter Runconfig against its schema file
yamale.validate(algorithm_parameters_schema, algorithm_parameters_config_data, strict=True)

def _validate_ancillary_inputs(self):
"""
Evaluates the list of ancillary inputs from the RunConfig to ensure they
exist and have an expected file extension.
"""
dynamic_ancillary_file_group_dict = \
self.runconfig.sas_config['runconfig']['groups']['dynamic_ancillary_file_group']

for key, value in dynamic_ancillary_file_group_dict.items():
if key in ('dem_file', ):
input_validation.check_input(
value, self.logger, self.name, valid_extensions=('.tif', '.tiff', '.vrt')
)
elif key in ('reference_water_file', 'world_file', 'hand_file'):
input_validation.check_input(
value, self.logger, self.name, valid_extensions=('.tif', '.tiff')
)
elif key in ('shoreline_shapefile',):
input_validation.check_input(
value, self.logger, self.name, valid_extensions=('.shp',)
)

# Only the .shp file is configured in the runconfig, but we
# need to ensure the other required files are co-located with it
for extension in ('.dbf', '.prj', '.shx'):
additional_shapefile = splitext(value)[0] + extension

if not exists(abspath(additional_shapefile)):
error_msg = f"Additional shapefile {additional_shapefile} could not be located"

self.logger.critical(self.name, ErrorCode.INVALID_INPUT, error_msg)

elif key in ('dem_file_description', 'worldcover_file_description',
'reference_water_file_description', 'hand_file_description',
'shoreline_shapefile_description'):
# these fields are included in the SAS input paths, but are not
# actually file paths, so skip them
continue
elif key in ('algorithm_parameters',):
input_validation.check_input(
value, self.logger, self.name, valid_extensions=('.yaml', )
)

def run_preprocessor(self, **kwargs):
"""
Executes the pre-processing steps for DSWx-S1 PGE initialization.
The DswxS1PreProcessorMixin version of this class performs all actions
of the base PreProcessorMixin class, and adds an input validation step for
the inputs defined within the RunConfig (TODO).
the inputs defined within the RunConfig.
Parameters
----------
Expand All @@ -90,7 +136,11 @@ def run_preprocessor(self, **kwargs):
"""
super().run_preprocessor(**kwargs)

validate_dswx_inputs(
self.runconfig, self.logger, self.runconfig.pge_name, valid_extensions=(".tif", ".h5")
)
self._validate_algorithm_parameters_config()
self._validate_ancillary_inputs()


class DSWxS1PostProcessorMixin(PostProcessorMixin):
Expand Down
2 changes: 1 addition & 1 deletion src/opera/test/data/t64_135524_iw2_20220501_VV.json
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@
"burst_id": "t64_iw2_b204"
},
"dynamic_ancillary_file_group": {
"dem_file": "input_data/dem_4326.tiff"
"dem_file": "input_data/dem.tiff",
},
"product_path_group": {
"product_path": "output_dir",
Expand Down
140 changes: 59 additions & 81 deletions src/opera/test/data/test_dswx_s1_config.yaml
Original file line number Diff line number Diff line change
@@ -1,82 +1,60 @@
RunConfig:
Name: OPERA-DSWX-S1-PGE-TEST-CONFIG

Groups:

PGE:
PGENameGroup:
PGEName: DSWX_S1_PGE

InputFilesGroup:
InputFilePaths:
- dswx_s1_pge_test/input_dir

DynamicAncillaryFilesGroup:
AncillaryFileMap:
dem_file: dswx_s1_pge_test/input_dir/dem.tiff
hand_file: dswx_s1_pge_test/input_data/ancillary_data/hand.tif
reference_water_file: dswx_s1_pge_test/input_data/ancillary_data/reference_water.tif
world_cover: dswx_s1_pge_test/input_data/ancillary_data/worldcover.tif

ProductPathGroup:
OutputProductPath: dswx_s1_pge_test/output_dir
ScratchPath: dswx_s1_pge_test/scratch_dir

PrimaryExecutable:
ProductIdentifier: DSWX_S1
ProgramPath: /bin/echo
ProgramOptions:
- 'hello world > dswx_s1_pge_test/output_dir/dswx_s1_test.txt;'
- '/bin/echo DSWx-S1 invoked with RunConfig'
ErrorCodeBase: 400000
SchemaPath: pge/dswx_s1/schema/dswx_s1_sas_schema.yaml
AlgorithmParametersSchemaPath: pge/dswx_s1/schema/algorithm_parameters_s1_schema.yaml
IsoTemplatePath:

QAExecutable:
Enabled: False
ProgramPath:
ProgramOptions: []

DebugLevelGroup:
DebugSwitch: False
ExecuteViaShell: True # Must be set to True for test to work

SAS:
runconfig:
name: dswx_s1_workflow_default

groups:
pge_name_group:
pge_name: DSWX_S1_PGE

input_file_group:
# Required. List of SAFE files (min=1)
input_file_path:
- dswx_s1_pge_test/input_data

dynamic_ancillary_file_group:
# Digital elevation model
dem_file: dswx_s1_pge_test/input_data/dem_4326.tiff
# Reference water body map (Required)
# https://global-surface-water.appspot.com/download
reference_water_file: dswx_s1_pge_test/input_data/ancillary_data/reference_water.tif
# Height Above Nearest Drainage (optional)
hand_file: dswx_s1_pge_test/input_data/ancillary_data/hand.tif
# Separate runconfig file for algorithm parameters
algorithm_parameters: test/data/test_algorithm_parameters_s1.yaml

primary_executable:
product_type: dswx_s1

product_path_group:
# Directory where PGE will place results
product_path: dswx_s1_pge_test/output_dir
# Directory where SAS writes temporary data
scratch_path: dswx_s1_pge_test/output_dir/scratch_dir
# Intermediate file name. SAS writes the output to this file.
# PGE may rename the product according to file naming convention
sas_output_path: dswx_s1_pge_test/output_dir

log_file: dswx_s1_pge_test/output_dir/test_log.log

Name: OPERA-DSWX-S1-PGE-TEST-CONFIG
Groups:
PGE:
PGENameGroup:
PGEName: DSWX_S1_PGE
InputFilesGroup:
InputFilePaths:
- dswx_s1_pge_test/input_dir
DynamicAncillaryFilesGroup:
AncillaryFileMap:
dem_file: dswx_s1_pge_test/input_dir/dem.tif
hand_file: dswx_s1_pge_test/input_dir/hand.tif
reference_water_file: dswx_s1_pge_test/input_dir/reference_water.tif
world_cover: dswx_s1_pge_test/input_dir/worldcover.tif
shoreline_shapefile: dswx_s1_pge_test/input_dir/shoreline.shp
ProductPathGroup:
OutputProductPath: dswx_s1_pge_test/output_dir
ScratchPath: dswx_s1_pge_test/scratch_dir
PrimaryExecutable:
ProductIdentifier: DSWX_S1
ProgramPath: /bin/echo
ProgramOptions:
- hello world > dswx_s1_pge_test/output_dir/dswx_s1_test.txt;
- /bin/echo DSWx-S1 invoked with RunConfig
ErrorCodeBase: 400000
SchemaPath: pge/dswx_s1/schema/dswx_s1_sas_schema.yaml
AlgorithmParametersSchemaPath: pge/dswx_s1/schema/algorithm_parameters_s1_schema.yaml
IsoTemplatePath: null
QAExecutable:
Enabled: false
ProgramPath: null
ProgramOptions: []
DebugLevelGroup:
DebugSwitch: false
ExecuteViaShell: true
SAS:
runconfig:
name: dswx_s1_workflow_default
groups:
pge_name_group:
pge_name: DSWX_S1_PGE
input_file_group:
input_file_path:
- dswx_s1_pge_test/input_dir
dynamic_ancillary_file_group:
dem_file: dswx_s1_pge_test/input_dir/dem.tif
dem_file_description: DEM
world_file: dswx_s1_pge_test/input_dir/worldcover.tif
reference_water_file: dswx_s1_pge_test/input_dir/reference_water.tif
shoreline_shapefile: dswx_s1_pge_test/input_dir/shoreline.shp
hand_file: dswx_s1_pge_test/input_dir/hand.tif
algorithm_parameters: dswx_s1_pge_test/input_dir/test_algorithm_parameters_s1.yaml
primary_executable:
product_type: dswx_s1
product_path_group:
product_path: dswx_s1_pge_test/output_dir
scratch_path: dswx_s1_pge_test/scratch_dir
sas_output_path: dswx_s1_pge_test/output_dir
log_file: dswx_s1_pge_test/output_dir/test_log.log
21 changes: 13 additions & 8 deletions src/opera/test/pge/base/test_base_pge.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import tempfile
import unittest
from io import StringIO
from os.path import abspath, join
from os.path import abspath, exists, join
from pathlib import Path
from unittest.mock import patch

Expand Down Expand Up @@ -438,15 +438,20 @@ def test_qa_logger(self):

pge = PgeExecutor(pge_name="PgeQATest", runconfig_path=test_runconfig_path)

with self.assertRaises(RuntimeError):
pge.run()
try:
with self.assertRaises(RuntimeError):
pge.run()

expected_log_file = pge.qa_logger.get_file_name()
self.assertTrue(os.path.exists(expected_log_file))
expected_log_file = pge.qa_logger.get_file_name()
self.assertTrue(os.path.exists(expected_log_file))

with open(expected_log_file, 'r', encoding='utf-8') as infile:
log_contents = infile.read()
self.assertIn("Starting SAS QA executable", log_contents)
with open(expected_log_file, 'r', encoding='utf-8') as infile:
log_contents = infile.read()
self.assertIn("Starting SAS QA executable", log_contents)

finally:
if exists(test_runconfig_path):
os.unlink(test_runconfig_path)


if __name__ == "__main__":
Expand Down
5 changes: 3 additions & 2 deletions src/opera/test/pge/dswx_hls/test_dswx_hls_pge.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ def test_dswx_hls_pge_input_validation(self):
log_contents = infile.read()

self.assertIn(f"Input directory {abspath('dswx_hls_pge_test/scratch_dir')} "
f"does not contain any tif files", log_contents)
f"does not contain any .tif files", log_contents)

# Lastly, check that a file that exists but is not a tif is caught
input_files_group['InputFilePaths'] = [runconfig_path]
Expand All @@ -227,7 +227,8 @@ def test_dswx_hls_pge_input_validation(self):
log_contents = infile.read()

self.assertIn(f"Input file {abspath(runconfig_path)} does not have "
f".tif extension", log_contents)
f"an expected extension", log_contents)

finally:
if os.path.exists(test_runconfig_path):
os.unlink(test_runconfig_path)
Expand Down
Loading

0 comments on commit 92175f7

Please sign in to comment.