Skip to content

Commit

Permalink
Mesher 2.0 adds MPI compatible backend for large distributed computing
Browse files Browse the repository at this point in the history
  • Loading branch information
Chrismarsh committed Jan 11, 2023
1 parent 643f0e6 commit d2e0b7c
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 24 deletions.
34 changes: 31 additions & 3 deletions docs/source/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ Mesher depends heavily upon GDAL to handle the geospatial data and the GDAL pyth

Required
=========
Configuration files are stored in a python file and passed as an argument to ``mesher.py`` on the command line. For example:
Configuration files are stored in a python file and passed as an argument to ``meshgen.py`` on the command line. For example:
::

python mesher.py example_mesher_config.py
python meshgen.py example_mesher_config.py


Therefore this configuration file must be compliant python code, but as such can contain arbitrary python code.
Expand Down Expand Up @@ -78,7 +78,7 @@ The RMSE produces the best distribution of triangle sizes and does not penalized

:type: string

The ``mesher.py`` script needs to know where the backend mesher executable is located. Optionally use the MESHER_EXE environment variable.
The ``meshgen.py`` script needs to know where the backend mesher executable is located. Optionally use the MESHER_EXE environment variable.

.. confval:: nworkers

Expand Down Expand Up @@ -503,8 +503,36 @@ This is further shown in :ref:`examples:flat_stream`.
Amount to simplify the shapefile edges by. Measured as maximum error between old and new lines. In the units of the shp file.


MPI
======

By default Mesher will use MPI to launch the Mesher backend tasks.

.. confval:: MPI_nworkers

:type: int
:default: Number of cores on machine (e.g., 4, 8, 10, etc)

Set this to limit the number of processors used.

If Mesher is used on a cluster to process a large domain, the use of a job scheduler, such as SLURM, may be optimal.

.. confval:: MPI_exec_str

:type: string
:default: None

Set this to a command to use to invoked the MPI job. For example
`MPI_exec_str='./submit_job.sh job.sh'`
where `submit_job.sh` invokes the queue submission, e.g.,
`sbatch "$@"`
and `job.sh` contains
`srun --label --unbuffered python "$@"`

The exec string used is `f"""{MPI_exec_str} {MPI_do_parameterize_path} pickled_param_args_RANK.pickle False {configfile}"""` where MPI_do_parameterize_path
holds the path to the helper script that is run with python.

If `MPI_exec_str` is provided `MPI_nworkers` must also be provided.



Expand Down
29 changes: 16 additions & 13 deletions meshgen.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
import glob

import inspect

from natsort import natsorted
import mesher

gdal.UseExceptions() # Enable exception support
Expand All @@ -54,7 +54,7 @@ def main():
# Check user defined configuration file

if len(sys.argv) == 1:
print('ERROR: mesher.py requires one argument [configuration file] (i.e. mesher.py Bow.py)')
print('ERROR: meshgen.py requires one argument [configuration file] (i.e. meshgen.py Bow.py)')
return

# Get name of configuration file/module
Expand Down Expand Up @@ -698,7 +698,7 @@ def main():
# we don't need neigh for param estimation
# subset_mesh['mesh']['neigh'] = mesh['mesh']['neigh']

# because we have the full vertex set, we don't have to futz around remaping the elem vertex ids
# because we have the full vertex set, we don't have to futz around remapping the elem vertex ids
subset_mesh['mesh']['elem'] = itemgetter(*my_tris[cz])(mesh['mesh']['elem'])

# we need to pass the local number tri count through to the MPI process
Expand All @@ -724,7 +724,8 @@ def main():
comm.Disconnect()


files = sorted(glob.glob('pickled_param_args_rets_*.pickle'))
files = natsorted(glob.glob('pickled_param_args_rets_*.pickle'))
# print(files)

for file in files:
with open(file, 'rb') as f:
Expand Down Expand Up @@ -1061,22 +1062,24 @@ def regularize_inputs(base_dir, exec_str, gdal_prefix, input_files, pixel_height

ret = []
for file in glob.glob('pickled_param_args_rets_*.pickle'):
with open(file,'rb') as f:
with open(file, 'rb') as f:
ret.append(cloudpickle.load(f))
os.remove(file)

for r in ret:
if len(r) == 0:
for w in ret:
if len(w) == 0:
continue

# it comes as a [ {} ]
r = r[0]
key = r['key']
input_files[key]['filename'] = r['filename']
input_files[key]['file'] = None

if not isinstance(input_files[key]['method'], list):
input_files[key]['method'] = [input_files[key]['method']]
# r will a list of dicts, len(r) is the amount of work that MPI rank did
for r in w:
key = r['key']
input_files[key]['filename'] = r['filename']
input_files[key]['file'] = None

if not isinstance(input_files[key]['method'], list):
input_files[key]['method'] = [input_files[key]['method']]

return total_weights, use_weights

Expand Down
17 changes: 13 additions & 4 deletions pysrc/mesher/MPI_do_parameterize.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ def main(pickle_file: str,

gt, is_geographic, mesh, parameter_files, initial_conditions, RasterXSize, RasterYSize, srs_proj4 = param_args

ret_tri = []
ret_tri = [{} for _ in range(mesh['mesh']['nelem'])]

for key, data in parameter_files.items():

Expand All @@ -288,10 +288,18 @@ def main(pickle_file: str,

parameter_files[key]['file'].append(ds)

for elem in range(0, param_args[2]['mesh']['nelem']):
ret_tri.append(do_parameterize(gt, is_geographic, mesh, parameter_files, key,
for elem in range(0, mesh['mesh']['nelem']):
ret = do_parameterize(gt, is_geographic, mesh, parameter_files, key,
initial_conditions, RasterXSize, RasterYSize,
srs_proj4, elem, configfile))
srs_proj4, elem, configfile)
# ret looks
# [{'id': 10354, 'area': 3485.748251695186, 'landcover': 4.0},
# {'id': 10355, 'area': 3740.141789605841, 'landcover': 4.0},
# {'id': 10356, 'area': 2721.806314367801, 'landcover': 4.0}]
for k, d in ret.items():
ret_tri[ret['id']][k] = d

# ret_tri.append(ret)

parameter_files[key]['file'] = []

Expand All @@ -301,6 +309,7 @@ def main(pickle_file: str,
cloudpickle.dump(ret_tri, f)

os.remove(pickle_file)

# have been run from the MPI.spawn, so disconnect from parent
if disconnect:
comm = MPI.Comm.Get_parent()
Expand Down
2 changes: 1 addition & 1 deletion pysrc/mesher/MPI_regularize_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def main(pickle_file: str,

# there is no way to return the uuid mangled filename + param name so save it to a pickly
with open(f'pickled_param_args_rets_{MPI.COMM_WORLD.rank}.pickle', 'wb') as f:
cloudpickle.dump(r,f)
cloudpickle.dump(r, f)

# have been run from the MPI.spawn, so disconnect from parent
if disconnect:
Expand Down
7 changes: 4 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def get_installed_gdal_version():
#pygdal don'ts always have the most up todate version so we will need to swtich to pygdal-chm if it is not available
# 3.3.2 is most recent as of this version
chm=''
if packaging.version.parse(version) > packaging.version.parse("3.3.2"):
if packaging.version.parse(version) > packaging.version.parse("3.5.1"):
chm='-chm'

version = chm + "=="+version+".*"
Expand Down Expand Up @@ -45,7 +45,7 @@ def get_installed_gdal_version():
USE_CONAN = str(USE_CONAN).upper()

setup(name='mesher',
version='2.0.dev14',
version='2.0.0',
description='Landsurface model mesh generation',
long_description="""
Mesher is a novel multi-objective unstructured mesh generation software that allows mesh generation to be generated from an arbitrary number of hydrologically important features while maintaining a variable spatial resolution.
Expand All @@ -69,7 +69,8 @@ def get_installed_gdal_version():
cmake_args=['-DCMAKE_BUILD_TYPE:STRING=Release',
'-DUSE_CONAN:BOOL='+USE_CONAN],
scripts=["meshgen.py","tools/mesh2vtu.py", "tools/meshmerge.py","tools/meshpermutation.py","tools/meshstats.py"],
install_requires=['vtk','pygdal'+get_installed_gdal_version(),'numpy','scipy','matplotlib','cloudpickle','metis','mpi4py'],
install_requires=['vtk','pygdal'+get_installed_gdal_version(), 'numpy', 'scipy', 'matplotlib', 'cloudpickle',
'metis', 'mpi4py', 'natsort'],
setup_requires=setup_requires,
python_requires='>=3.7'
)

0 comments on commit d2e0b7c

Please sign in to comment.