```python
-def load_structure(self, fname):
+def load_structure(self, fname, single_parallel_file=True):
```
diff --git a/doc/docs/Python_User_Interface.md.in b/doc/docs/Python_User_Interface.md.in
index a1c7eec14..4fa2065a9 100644
--- a/doc/docs/Python_User_Interface.md.in
+++ b/doc/docs/Python_User_Interface.md.in
@@ -351,6 +351,8 @@ And this [`DftNear2Far`](#DftNear2Far) method:
These functions dump the raw ε and μ data to disk and load it back for doing multiple simulations with the same materials but different sources etc. The only prerequisite is that the dump/load simulations have the same [chunks](Chunks_and_Symmetry.md) (i.e. the same grid, number of processors, symmetries, and PML). When using `split_chunks_evenly=False`, you must also dump the original chunk layout using `dump_chunk_layout` and load it into the new `Simulation` using the `chunk_layout` parameter. Currently only stores dispersive and non-dispersive $\varepsilon$ and $\mu$ but not nonlinearities. Note that loading data from a file in this way overwrites any `geometry` data passed to the `Simulation` constructor.
+For dump_structure and load_structure, when `single_parallel_file=True` (the default) - all chunks write to the same/single file after computing their respective offsets into this file. When set to 'False', each chunk writes writes its data to a separate file that is appropriately named by its chunk index.
+
@@ Simulation.dump_structure @@
@@ Simulation.load_structure @@
@@ Simulation.dump_chunk_layout @@
diff --git a/python/simulation.py b/python/simulation.py
index 62a8b54a4..aa059198b 100644
--- a/python/simulation.py
+++ b/python/simulation.py
@@ -961,7 +961,6 @@ def __init__(self,
filename_prefix=None,
output_volume=None,
output_single_precision=False,
- load_structure='',
geometry_center=mp.Vector3(),
force_all_components=False,
split_chunks_evenly=True,
@@ -1158,11 +1157,6 @@ def __init__(self,
that are specified by geometric objects. You should list any materials other
than scalar dielectrics that are returned by `material_function` here.
- + **`load_structure` [`string`]** — If not empty, Meep will load the structure
- file specified by this string. The file must have been created by
- `mp.dump_structure`. Defaults to an empty string. See [Load and Dump
- Structure](#load-and-dump-structure) for more information.
-
+ **`chunk_layout` [`string` or `Simulation` instance or `BinaryPartition` class]** —
This will cause the `Simulation` to use the chunk layout described by either
(1) an `.h5` file (created using `Simulation.dump_chunk_layout`), (2) another
@@ -1235,7 +1229,6 @@ def __init__(self,
self.is_cylindrical = False
self.material_function = material_function
self.epsilon_func = epsilon_func
- self.load_structure_file = load_structure
self.dft_objects = []
self._is_initialized = False
self.force_all_components = force_all_components
@@ -1246,6 +1239,9 @@ def __init__(self,
self.fragment_stats = None
self._output_stats = os.environ.get('MEEP_STATS', None)
+ self.load_single_parallel_file = True
+ self.load_structure_file = None
+
self.special_kz = False
if self.cell_size.z == 0 and self.k_point and self.k_point.z != 0:
if kz_2d == "complex":
@@ -1717,7 +1713,8 @@ def _init_structure(self, k=False):
self.num_chunks = self.chunk_layout.numchunks()
if self.load_structure_file:
- self.load_structure(self.load_structure_file)
+ self.load_structure(
+ self.load_structure_file, self.load_single_parallel_file)
def _is_outer_boundary(self, vol, direction, side):
@@ -1861,22 +1858,22 @@ def set_materials(self, geometry=None, default_material=None):
None
)
- def dump_structure(self, fname):
+ def dump_structure(self, fname, single_parallel_file=True):
"""
Dumps the structure to the file `fname`.
"""
if self.structure is None:
raise ValueError("Fields must be initialized before calling dump_structure")
- self.structure.dump(fname)
+ self.structure.dump(fname, single_parallel_file)
- def load_structure(self, fname):
+ def load_structure(self, fname, single_parallel_file=True):
"""
Loads a structure from the file `fname`. A file name to load can also be passed to
the `Simulation` constructor via the `load_structure` keyword argument.
"""
if self.structure is None:
raise ValueError("Fields must be initialized before calling load_structure")
- self.structure.load(fname)
+ self.structure.load(fname, single_parallel_file)
def dump_chunk_layout(self, fname):
"""
@@ -1898,6 +1895,41 @@ def load_chunk_layout(self, br, source):
## source is either filename (string)
self.structure.load_chunk_layout(source, br)
+ def _get_load_dump_dirname(self, dirname, single_parallel_file):
+ """
+ Get the dirname to dump simulation state to.
+ """
+ if single_parallel_file:
+ dump_dirname = dirname
+ else:
+ # When doing a sharded dump (each process to its own file), use
+ # the process rank to get a unique name.
+ dump_dirname = os.path.join(dirname, 'rank%02d' % mp.my_rank())
+ return dump_dirname
+
+ def dump(self, dirname, structure=True, single_parallel_file=True):
+ """
+ Dumps simulation state.
+ """
+ dump_dirname = self._get_load_dump_dirname(dirname, single_parallel_file)
+ os.makedirs(dump_dirname, exist_ok=True)
+
+ if structure:
+ structure_dump_filename = os.path.join(dump_dirname, 'structure.h5')
+ self.dump_structure(structure_dump_filename, single_parallel_file)
+
+ def load(self, dirname, structure=True, single_parallel_file=True):
+ """
+ Loads simulation state.
+
+ This should called right after the Simulation object has been created
+ but before 'init_sim' is called.
+ """
+ dump_dirname = self._get_load_dump_dirname(dirname, single_parallel_file)
+ self.load_single_parallel_file = single_parallel_file
+ if structure:
+ self.load_structure_file = os.path.join(dump_dirname, 'structure.h5')
+
def init_sim(self):
if self._is_initialized:
return
diff --git a/python/tests/test_simulation.py b/python/tests/test_simulation.py
index ed751082e..f85afeb7f 100644
--- a/python/tests/test_simulation.py
+++ b/python/tests/test_simulation.py
@@ -302,7 +302,7 @@ def test_in_box_volumes(self):
sim.field_energy_in_box(tv)
sim.field_energy_in_box(v)
- def _load_dump_structure(self, chunk_file=False, chunk_sim=False):
+ def _load_dump_structure(self, chunk_file=False, chunk_sim=False, single_parallel_file=True):
from meep.materials import Al
resolution = 50
cell = mp.Vector3(5, 5)
@@ -329,12 +329,14 @@ def get_ref_field_point(sim):
ref_field_points.append(p.real)
sim1.run(mp.at_every(5, get_ref_field_point), until=50)
- dump_fn = os.path.join(self.temp_dir, 'test_load_dump_structure.h5')
+
+ dump_dirname = os.path.join(self.temp_dir, 'test_load_dump')
+ sim1.dump(dump_dirname, structure=True, single_parallel_file=single_parallel_file)
+
dump_chunk_fname = None
chunk_layout = None
- sim1.dump_structure(dump_fn)
if chunk_file:
- dump_chunk_fname = os.path.join(self.temp_dir, 'test_load_dump_structure_chunks.h5')
+ dump_chunk_fname = os.path.join(dump_dirname, 'chunk_layout.h5')
sim1.dump_chunk_layout(dump_chunk_fname)
chunk_layout = dump_chunk_fname
if chunk_sim:
@@ -345,9 +347,8 @@ def get_ref_field_point(sim):
boundary_layers=pml_layers,
sources=[sources],
symmetries=symmetries,
- chunk_layout=chunk_layout,
- load_structure=dump_fn)
-
+ chunk_layout=chunk_layout)
+ sim.load(dump_dirname, structure=True, single_parallel_file=single_parallel_file)
field_points = []
def get_field_point(sim):
@@ -362,6 +363,10 @@ def get_field_point(sim):
def test_load_dump_structure(self):
self._load_dump_structure()
+ @unittest.skipIf(not mp.with_mpi(), "MPI specific test")
+ def test_load_dump_structure_sharded(self):
+ self._load_dump_structure(single_parallel_file=False)
+
def test_load_dump_chunk_layout_file(self):
self._load_dump_structure(chunk_file=True)
diff --git a/src/h5file.cpp b/src/h5file.cpp
index 484640942..366e5d63c 100644
--- a/src/h5file.cpp
+++ b/src/h5file.cpp
@@ -150,7 +150,7 @@ void h5file::close_id() {
/* note: if parallel is true, then *all* processes must call this,
and all processes will use I/O. */
-h5file::h5file(const char *filename_, access_mode m, bool parallel_) {
+h5file::h5file(const char *filename_, access_mode m, bool parallel_, bool local_) {
cur_dataname = NULL;
id = (void *)malloc(sizeof(hid_t));
cur_id = (void *)malloc(sizeof(hid_t));
@@ -160,7 +160,12 @@ h5file::h5file(const char *filename_, access_mode m, bool parallel_) {
filename = new char[strlen(filename_) + 1];
strcpy(filename, filename_);
mode = m;
+
+ if (parallel_ && local_) {
+ meep::abort("Can not open h5file (%s) in both parallel and local mode.", filename);
+ }
parallel = parallel_;
+ local = local_;
}
h5file::~h5file() {
@@ -191,7 +196,9 @@ void h5file::remove() {
extending = 0;
IF_EXCLUSIVE(if (parallel) all_wait(), (void)0);
- if (am_master() && std::remove(filename)) meep::abort("error removing file %s", filename);
+ if (am_master() || local) {
+ if (std::remove(filename)) meep::abort("error removing file %s", filename);
+ }
}
h5file::extending_s *h5file::get_extending(const char *dataname) const {
@@ -226,7 +233,7 @@ void h5file::set_cur(const char *dataname, void *data_id) {
void h5file::read_size(const char *dataname, int *rank, size_t *dims, int maxrank) {
#ifdef HAVE_HDF5
- if (parallel || am_master()) {
+ if (parallel || am_master() || local) {
hid_t file_id = HID(get_id()), space_id, data_id;
CHECK(file_id >= 0, "error opening HDF5 input file");
@@ -253,7 +260,7 @@ void h5file::read_size(const char *dataname, int *rank, size_t *dims, int maxran
H5Sclose(space_id);
}
- if (!parallel) {
+ if (!parallel && !local) {
*rank = broadcast(0, *rank);
broadcast(0, dims, *rank);
@@ -291,7 +298,7 @@ void *h5file::read(const char *dataname, int *rank, size_t *dims, int maxrank,
bool single_precision) {
#ifdef HAVE_HDF5
void *data = 0;
- if (parallel || am_master()) {
+ if (parallel || am_master() || local) {
int i, N;
hid_t file_id = HID(get_id()), space_id, data_id;
@@ -345,7 +352,7 @@ void *h5file::read(const char *dataname, int *rank, size_t *dims, int maxrank,
if (close_data_id) H5Dclose(data_id);
}
- if (!parallel) {
+ if (!parallel && !local) {
*rank = broadcast(0, *rank);
broadcast(0, dims, *rank);
size_t N = 1;
@@ -375,7 +382,7 @@ char *h5file::read(const char *dataname) {
#ifdef HAVE_HDF5
char *data = 0;
int len = 0;
- if (parallel || am_master()) {
+ if (parallel || am_master() || local) {
hid_t file_id = HID(get_id()), space_id, data_id, type_id;
CHECK(file_id >= 0, "error opening HDF5 input file");
@@ -404,7 +411,7 @@ char *h5file::read(const char *dataname) {
H5Dclose(data_id);
}
- if (!parallel) {
+ if (!parallel && !local) {
len = broadcast(0, len);
if (!am_master()) data = new char[len];
broadcast(0, data, len);
@@ -442,7 +449,7 @@ void h5file::remove_data(const char *dataname) {
if (dataset_exists(dataname)) {
/* this is hackish ...need to pester HDF5 developers to make
H5Gunlink a collective operation for parallel mode */
- if (!parallel || am_master()) {
+ if (!parallel || am_master() || local) {
H5Gunlink(file_id, dataname); /* delete it */
H5Fflush(file_id, H5F_SCOPE_GLOBAL);
}
@@ -471,7 +478,7 @@ void h5file::create_data(const char *dataname, int rank, const size_t *dims, boo
unset_cur();
remove_data(dataname); // HDF5 gives error if we H5Dcreate existing dataset
- if (IF_EXCLUSIVE(!parallel || am_master(), 1)) {
+ if (local || IF_EXCLUSIVE(!parallel || am_master(), 1)) {
hsize_t *dims_copy = new hsize_t[rank1 + append_data];
hsize_t *maxdims = new hsize_t[rank1 + append_data];
hsize_t N = 1;
@@ -708,12 +715,12 @@ void h5file::done_writing_chunks() {
I'm assuming(?) that non-extensible datasets will use different
files, etcetera, for different timesteps. All of this hackery
goes away if we just use an MPI-compiled version of HDF5. */
- if (parallel && cur_dataname && get_extending(cur_dataname)) prevent_deadlock(); // closes id
+ if (parallel && !local && cur_dataname && get_extending(cur_dataname)) prevent_deadlock(); // closes id
}
void h5file::write(const char *dataname, int rank, const size_t *dims, void *data,
bool single_precision) {
- if (parallel || am_master()) {
+ if (parallel || am_master() || local) {
size_t *start = new size_t[rank + 1];
for (int i = 0; i < rank; i++)
start[i] = 0;
@@ -732,7 +739,7 @@ void h5file::write(const char *dataname, int rank, const size_t *dims, void *dat
void h5file::write(const char *dataname, const char *data) {
#ifdef HAVE_HDF5
- if (IF_EXCLUSIVE(am_master(), parallel || am_master())) {
+ if (local || IF_EXCLUSIVE(am_master(), parallel || am_master())) {
hid_t file_id = HID(get_id()), type_id, data_id, space_id;
CHECK(file_id >= 0, "error opening HDF5 output file");
diff --git a/src/meep.hpp b/src/meep.hpp
index aeb28211e..7f336280f 100644
--- a/src/meep.hpp
+++ b/src/meep.hpp
@@ -384,7 +384,13 @@ class h5file {
public:
typedef enum { READONLY, READWRITE, WRITE } access_mode;
- h5file(const char *filename_, access_mode m = READWRITE, bool parallel_ = true);
+ // If 'parallel_' is true, then we assume that all processes will be doing
+ // I/O, else we assume that *only* the master is doing I/O and all other
+ // processes will send/receive data to/from the master.
+ // If 'local_' is true, then 'parallel_' *must* be false and assumes that
+ // each process is writing to a local non-shared file and the filename is
+ // unique to the process.
+ h5file(const char *filename_, access_mode m = READWRITE, bool parallel_ = true, bool local_ = false);
~h5file(); // closes the files (and any open dataset)
bool ok();
@@ -423,6 +429,7 @@ class h5file {
access_mode mode;
char *filename;
bool parallel;
+ bool local;
bool is_cur(const char *dataname);
void unset_cur();
@@ -849,9 +856,15 @@ class structure {
std::vector get_chunk_owners() const;
// structure_dump.cpp
- void dump(const char *filename);
+ // Dump structure to specified file. If 'single_parallel_file'
+ // is 'true' (the default) - then all processes write to the same/single file
+ // file after computing their respective offsets into this file. When set to
+ // 'false', each process writes data for the chunks it owns to a separate
+ // (process unique) file.
+ void dump(const char *filename, bool single_parallel_file=true);
+ void load(const char *filename, bool single_parallel_file=true);
+
void dump_chunk_layout(const char *filename);
- void load(const char *filename);
void load_chunk_layout(const char *filename, boundary_region &br);
void load_chunk_layout(const std::vector &gvs,
const std::vector &ids,
@@ -889,7 +902,7 @@ class structure {
void changing_chunks();
// Helper methods for dumping and loading susceptibilities
void set_chiP_from_file(h5file *file, const char *dataset, field_type ft);
- void write_susceptibility_params(h5file *file, const char *dname, int EorH);
+ void write_susceptibility_params(h5file *file, bool single_parallel_file, const char *dname, int EorH);
std::unique_ptr bp;
};
diff --git a/src/structure_dump.cpp b/src/structure_dump.cpp
index ac57a7c99..d0fcb91a5 100644
--- a/src/structure_dump.cpp
+++ b/src/structure_dump.cpp
@@ -33,7 +33,9 @@ namespace meep {
// Write the parameters required to reconstruct the susceptibility (id, noise_amp (for noisy),
// omega_0, gamma, no_omega_0_denominator)
-void structure::write_susceptibility_params(h5file *file, const char *dname, int EorH) {
+void structure::write_susceptibility_params(h5file *file,
+ bool single_parallel_file,
+ const char *dname, int EorH) {
// Get number of susceptibility params from first chunk, since all chunks will have
// the same susceptibility list.
size_t params_ntotal = 0;
@@ -47,7 +49,7 @@ void structure::write_susceptibility_params(h5file *file, const char *dname, int
size_t params_start = 0;
file->create_data(dname, 1, ¶ms_ntotal, false /* append_data */,
sizeof(realnum) == sizeof(float) /* single_precision */);
- if (am_master()) {
+ if (am_master() || !single_parallel_file) {
susceptibility *sus = chunks[0]->chiP[EorH];
while (sus) {
sus->dump_params(file, ¶ms_start);
@@ -85,36 +87,60 @@ void structure::dump_chunk_layout(const char *filename) {
delete[] nums;
}
-void structure::dump(const char *filename) {
- if (verbosity > 0) master_printf("creating epsilon output file \"%s\"...\n", filename);
-
- // make/save a num_chunks x NUM_FIELD_COMPONENTS x 5 array counting
- // the number of entries in the chi1inv array for each chunk.
- size_t *num_chi1inv_ = new size_t[num_chunks * NUM_FIELD_COMPONENTS * 5];
- memset(num_chi1inv_, 0, sizeof(size_t) * size_t(num_chunks * NUM_FIELD_COMPONENTS * 5));
+void structure::dump(const char *filename, bool single_parallel_file) {
+ if (verbosity > 0) printf("creating epsilon from file \"%s\" (%d)...\n", filename, single_parallel_file);
+
+ /*
+ * make/save a num_chunks x NUM_FIELD_COMPONENTS x 5 array counting
+ * the number of entries in the chi1inv array for each chunk.
+ *
+ * When 'single_parallel_file' is true, we are creating a single block of data
+ * for ALL chunks (that are merged using MPI). Otherwise, we are just
+ * making a copy of just the chunks that are ours.
+ */
+ int my_num_chunks = 0;
+ for (int i = 0; i < num_chunks; i++) {
+ my_num_chunks += (single_parallel_file || chunks[i]->is_mine());
+ }
+ size_t num_chi1inv_size = my_num_chunks * NUM_FIELD_COMPONENTS * 5;
+ std::vector num_chi1inv_(num_chi1inv_size);
size_t my_ntot = 0;
- for (int i = 0; i < num_chunks; i++)
+ for (int i = 0, chunk_i = 0; i < num_chunks; i++) {
if (chunks[i]->is_mine()) {
size_t ntot = chunks[i]->gv.ntot();
- for (int c = 0; c < NUM_FIELD_COMPONENTS; ++c)
- for (int d = 0; d < 5; ++d)
+ for (int c = 0; c < NUM_FIELD_COMPONENTS; ++c) {
+ for (int d = 0; d < 5; ++d) {
if (chunks[i]->chi1inv[c][d])
- my_ntot += (num_chi1inv_[(i * NUM_FIELD_COMPONENTS + c) * 5 + d] = ntot);
+ my_ntot += (num_chi1inv_[(chunk_i * NUM_FIELD_COMPONENTS + c) * 5 + d] = ntot);
+ }
+ }
}
- size_t *num_chi1inv = new size_t[num_chunks * NUM_FIELD_COMPONENTS * 5];
- sum_to_master(num_chi1inv_, num_chi1inv, num_chunks * NUM_FIELD_COMPONENTS * 5);
- delete[] num_chi1inv_;
+ chunk_i += (chunks[i]->is_mine() || single_parallel_file);
+ }
+
+ std::vector num_chi1inv;
+ if (single_parallel_file) {
+ num_chi1inv.resize(num_chi1inv_size);
+ sum_to_master(num_chi1inv_.data(), num_chi1inv.data(), num_chi1inv_size);
+ } else {
+ num_chi1inv = std::move(num_chi1inv_);
+ }
// determine total dataset size and offset of this process's data
- size_t my_start = partial_sum_to_all(my_ntot) - my_ntot;
- size_t ntotal = sum_to_all(my_ntot);
+ size_t my_start = 0;
+ size_t ntotal = my_ntot;
+ if (single_parallel_file) {
+ my_start = partial_sum_to_all(my_ntot) - my_ntot;
+ ntotal = sum_to_all(my_ntot);
+ }
- h5file file(filename, h5file::WRITE, true);
- size_t dims[3] = {(size_t)num_chunks, NUM_FIELD_COMPONENTS, 5};
+ h5file file(filename, h5file::WRITE, single_parallel_file, !single_parallel_file);
+ size_t dims[3] = {(size_t)my_num_chunks, NUM_FIELD_COMPONENTS, 5};
size_t start[3] = {0, 0, 0};
file.create_data("num_chi1inv", 3, dims);
- if (am_master()) file.write_chunk(3, start, dims, num_chi1inv);
- delete[] num_chi1inv;
+ if (am_master() || !single_parallel_file) {
+ file.write_chunk(3, start, dims, num_chi1inv.data());
+ }
// write the data
file.create_data("chi1inv", 1, &ntotal, false /* append_data */, false /* single_precision */);
@@ -147,7 +173,7 @@ void structure::dump(const char *filename) {
// Write the number of susceptibilites
size_t len = 2;
file.create_data("num_sus", 1, &len);
- if (am_master()) {
+ if (am_master() || !single_parallel_file) {
size_t start = 0;
size_t ntot = 2;
file.write_chunk(1, &start, &ntot, num_sus);
@@ -157,13 +183,11 @@ void structure::dump(const char *filename) {
// Get number of non-null sigma entries for each chiP in each chunk.
// Assumes each susceptibility in the chiP[E_stuff] list has the
// same number of non-null sigma elements. Likewise for chiP[H_stuff]
- size_t *my_num_sigmas[2];
- size_t *num_sigmas[2];
+ std::vector my_num_sigmas[2];
+ std::vector num_sigmas[2];
for (int ft = 0; ft < 2; ++ft) {
- my_num_sigmas[ft] = new size_t[num_chunks];
- num_sigmas[ft] = new size_t[num_chunks];
- memset(my_num_sigmas[ft], 0, sizeof(size_t) * num_chunks);
- memset(num_sigmas[ft], 0, sizeof(size_t) * num_chunks);
+ my_num_sigmas[ft].resize(num_chunks);
+ num_sigmas[ft].resize(num_chunks);
}
for (int i = 0; i < num_chunks; ++i) {
@@ -199,7 +223,7 @@ void structure::dump(const char *filename) {
file.prevent_deadlock();
for (int ft = 0; ft < 2; ++ft) {
- sum_to_all(my_num_sigmas[ft], num_sigmas[ft], num_chunks);
+ sum_to_all(my_num_sigmas[ft].data(), num_sigmas[ft].data(), num_chunks);
}
size_t num_E_sigmas = 0;
@@ -210,17 +234,13 @@ void structure::dump(const char *filename) {
}
// Allocate space for component and direction of non-null sigmas
- size_t *my_sigma_cd[2] = {NULL, NULL};
- my_sigma_cd[E_stuff] = new size_t[num_E_sigmas * 2];
- memset(my_sigma_cd[E_stuff], 0, sizeof(size_t) * num_E_sigmas * 2);
- my_sigma_cd[H_stuff] = new size_t[num_H_sigmas * 2];
- memset(my_sigma_cd[H_stuff], 0, sizeof(size_t) * num_H_sigmas * 2);
-
- size_t *sigma_cd[2] = {NULL, NULL};
- sigma_cd[E_stuff] = new size_t[num_E_sigmas * 2];
- memset(sigma_cd[E_stuff], 0, sizeof(size_t) * num_E_sigmas * 2);
- sigma_cd[H_stuff] = new size_t[num_H_sigmas * 2];
- memset(sigma_cd[H_stuff], 0, sizeof(size_t) * num_H_sigmas * 2);
+ std::vector my_sigma_cd[2];
+ my_sigma_cd[E_stuff].resize(num_E_sigmas * 2);
+ my_sigma_cd[H_stuff].resize(num_H_sigmas * 2);
+
+ std::vector sigma_cd[2];
+ sigma_cd[E_stuff].resize(num_E_sigmas * 2);
+ sigma_cd[H_stuff].resize(num_H_sigmas * 2);
// Find component and direction of non-null sigmas
{
@@ -246,8 +266,8 @@ void structure::dump(const char *filename) {
}
}
}
- bw_or_to_all(my_sigma_cd[E_stuff], sigma_cd[E_stuff], num_E_sigmas * 2);
- bw_or_to_all(my_sigma_cd[H_stuff], sigma_cd[H_stuff], num_H_sigmas * 2);
+ bw_or_to_all(my_sigma_cd[E_stuff].data(), sigma_cd[E_stuff].data(), num_E_sigmas * 2);
+ bw_or_to_all(my_sigma_cd[H_stuff].data(), sigma_cd[H_stuff].data(), num_H_sigmas * 2);
}
// Write location (component and direction) data of non-null sigmas (sigma[c][d])
@@ -256,9 +276,9 @@ void structure::dump(const char *filename) {
file.create_data("sigma_cd", 1, &len);
size_t start = 0;
for (int ft = 0; ft < 2; ++ft) {
- if (am_master()) {
- size_t count = (ft == 0 ? num_E_sigmas : num_H_sigmas) * 2;
- file.write_chunk(1, &start, &count, sigma_cd[ft]);
+ if (am_master() || !single_parallel_file) {
+ size_t count = sigma_cd[ft].size();
+ file.write_chunk(1, &start, &count, sigma_cd[ft].data());
start += count;
}
}
@@ -292,15 +312,8 @@ void structure::dump(const char *filename) {
}
}
- write_susceptibility_params(&file, "E_params", E_stuff);
- write_susceptibility_params(&file, "H_params", H_stuff);
-
- for (int ft = 0; ft < 2; ++ft) {
- delete[] sigma_cd[ft];
- delete[] my_sigma_cd[ft];
- delete[] num_sigmas[ft];
- delete[] my_num_sigmas[ft];
- }
+ write_susceptibility_params(&file, single_parallel_file, "E_params", E_stuff);
+ write_susceptibility_params(&file, single_parallel_file, "H_params", H_stuff);
}
// Reconstruct the chiP lists of susceptibilities from the params hdf5 data
@@ -406,7 +419,7 @@ void structure::set_chiP_from_file(h5file *file, const char *dataset, field_type
size_t dims[3] = {0, 0, 0};
file->read_size(dataset, &rank, dims, 1);
- if (rank != 1) meep::abort("inconsistent data size in structure::load");
+ if (rank != 1) meep::abort("inconsistent data size in structure::set_chiP_from_file");
if (dims[0] != 0) {
for (int i = 0; i < num_chunks; ++i) {
@@ -542,35 +555,49 @@ void structure::load_chunk_layout(const std::vector &gvs,
check_chunks();
}
-void structure::load(const char *filename) {
- h5file file(filename, h5file::READONLY, true);
+void structure::load(const char *filename, bool single_parallel_file) {
+ h5file file(filename, h5file::READONLY, single_parallel_file, !single_parallel_file);
- if (verbosity > 0) master_printf("reading epsilon from file \"%s\"...\n", filename);
+ if (verbosity > 0) printf("reading epsilon from file \"%s\" (%d)...\n", filename, single_parallel_file);
+
+ /*
+ * make/save a num_chunks x NUM_FIELD_COMPONENTS x 5 array counting
+ * the number of entries in the chi1inv array for each chunk.
+ *
+ * When 'single_parallel_file' is true, we are creating a single block of data
+ * for ALL chunks (that are merged using MPI). Otherwise, we are just
+ * making a copy of just the chunks that are ours.
+ */
+ int my_num_chunks = 0;
+ for (int i = 0; i < num_chunks; i++) {
+ my_num_chunks += (single_parallel_file || chunks[i]->is_mine());
+ }
+ size_t num_chi1inv_size = my_num_chunks * NUM_FIELD_COMPONENTS * 5;
+ std::vector num_chi1inv(num_chi1inv_size);
- // make/save a num_chunks x NUM_FIELD_COMPONENTS x 5 array counting
- // the number of entries in the chi1inv array for each chunk.
- size_t *num_chi1inv = new size_t[num_chunks * NUM_FIELD_COMPONENTS * 5];
int rank;
- size_t dims[3], _dims[3] = {(size_t)num_chunks, NUM_FIELD_COMPONENTS, 5};
+ size_t dims[3], _dims[3] = {(size_t)my_num_chunks, NUM_FIELD_COMPONENTS, 5};
size_t start[3] = {0, 0, 0};
file.read_size("num_chi1inv", &rank, dims, 3);
if (rank != 3 || _dims[0] != dims[0] || _dims[1] != dims[1] || _dims[2] != dims[2])
meep::abort("chunk mismatch in structure::load");
- if (am_master()) file.read_chunk(3, start, dims, num_chi1inv);
+ if (am_master() || !single_parallel_file) file.read_chunk(3, start, dims, num_chi1inv.data());
- file.prevent_deadlock();
- broadcast(0, num_chi1inv, dims[0] * dims[1] * dims[2]);
+ if (single_parallel_file) {
+ file.prevent_deadlock();
+ broadcast(0, num_chi1inv.data(), dims[0] * dims[1] * dims[2]);
+ }
changing_chunks();
// allocate data as needed and check sizes
size_t my_ntot = 0;
- for (int i = 0; i < num_chunks; i++)
+ for (int i = 0, chunk_i = 0; i < num_chunks; i++) {
if (chunks[i]->is_mine()) {
size_t ntot = chunks[i]->gv.ntot();
for (int c = 0; c < NUM_FIELD_COMPONENTS; ++c)
for (int d = 0; d < 5; ++d) {
- size_t n = num_chi1inv[(i * NUM_FIELD_COMPONENTS + c) * 5 + d];
+ size_t n = num_chi1inv[(chunk_i * NUM_FIELD_COMPONENTS + c) * 5 + d];
if (n == 0) {
delete[] chunks[i]->chi1inv[c][d];
chunks[i]->chi1inv[c][d] = NULL;
@@ -582,14 +609,25 @@ void structure::load(const char *filename) {
}
}
}
+ chunk_i += (chunks[i]->is_mine() || single_parallel_file);
+ }
// determine total dataset size and offset of this process's data
- size_t my_start = partial_sum_to_all(my_ntot) - my_ntot;
- size_t ntotal = sum_to_all(my_ntot);
+ size_t my_start = 0;
+ size_t ntotal = my_ntot;
+ if (single_parallel_file) {
+ my_start = partial_sum_to_all(my_ntot) - my_ntot;
+ ntotal = sum_to_all(my_ntot);
+ }
// read the data
file.read_size("chi1inv", &rank, dims, 1);
- if (rank != 1 || dims[0] != ntotal) meep::abort("inconsistent data size in structure::load");
+ if (rank != 1 || dims[0] != ntotal) {
+ meep::abort(
+ "inconsistent data size for chi1inv in structure::load (rank, dims[0]): "
+ "(%d, %zu) != (1, %zu)",
+ rank, dims[0], ntotal);
+ }
for (int i = 0; i < num_chunks; i++)
if (chunks[i]->is_mine()) {
size_t ntot = chunks[i]->gv.ntot();
@@ -611,44 +649,44 @@ void structure::load(const char *filename) {
size_t dims[] = {0, 0, 0};
file.read_size("num_sus", &rank, dims, 1);
if (dims[0] > 0) {
- if (am_master()) {
+ if (am_master() || !single_parallel_file) {
size_t start = 0;
size_t count = 2;
file.read_chunk(rank, &start, &count, num_sus);
}
}
- file.prevent_deadlock();
- broadcast(0, num_sus, 2);
+ if (single_parallel_file) {
+ file.prevent_deadlock();
+ broadcast(0, num_sus, 2);
+ }
}
// Allocate and read non-null sigma entry data
- size_t *num_sigmas[2] = {NULL, NULL};
- num_sigmas[E_stuff] = new size_t[num_chunks];
- memset(num_sigmas[E_stuff], 0, sizeof(size_t) * num_chunks);
- num_sigmas[H_stuff] = new size_t[num_chunks];
- memset(num_sigmas[H_stuff], 0, sizeof(size_t) * num_chunks);
+ std::vector num_sigmas[2];
+ num_sigmas[E_stuff].resize(num_chunks);
+ num_sigmas[H_stuff].resize(num_chunks);
// Read num_sigmas data
{
int rank = 0;
size_t dims[] = {0, 0, 0};
file.read_size("num_sigmas", &rank, dims, 1);
- if (dims[0] != (size_t)num_chunks * 2) { meep::abort("inconsistent data size in structure::load"); }
- if (am_master()) {
+ if (dims[0] != (size_t)num_chunks * 2) { meep::abort("inconsistent data size for num_sigmas in structure::load"); }
+ if (am_master() || !single_parallel_file) {
size_t start = 0;
size_t count = num_chunks;
- file.read_chunk(rank, &start, &count, num_sigmas[E_stuff]);
+ file.read_chunk(rank, &start, &count, num_sigmas[E_stuff].data());
start += count;
- file.read_chunk(rank, &start, &count, num_sigmas[H_stuff]);
+ file.read_chunk(rank, &start, &count, num_sigmas[H_stuff].data());
+ }
+ if (single_parallel_file) {
+ file.prevent_deadlock();
+ broadcast(0, num_sigmas[E_stuff].data(), num_chunks);
+ broadcast(0, num_sigmas[H_stuff].data(), num_chunks);
}
- file.prevent_deadlock();
- broadcast(0, num_sigmas[E_stuff], num_chunks);
- broadcast(0, num_sigmas[H_stuff], num_chunks);
}
// Allocate space for component and direction data of the non-null susceptibilities
- size_t *sigma_cd[2] = {NULL, NULL};
-
size_t nsig[2] = {0, 0};
for (int ft = 0; ft < 2; ++ft) {
for (int i = 0; i < num_chunks; ++i) {
@@ -656,10 +694,9 @@ void structure::load(const char *filename) {
}
}
- sigma_cd[E_stuff] = new size_t[nsig[E_stuff] * 2];
- memset(sigma_cd[E_stuff], 0, sizeof(size_t) * nsig[E_stuff] * 2);
- sigma_cd[H_stuff] = new size_t[nsig[H_stuff] * 2];
- memset(sigma_cd[H_stuff], 0, sizeof(size_t) * nsig[H_stuff] * 2);
+ std::vector sigma_cd[2];
+ sigma_cd[E_stuff].resize(nsig[E_stuff] * 2);
+ sigma_cd[H_stuff].resize(nsig[H_stuff] * 2);
// Read the component/direction data
{
@@ -667,20 +704,22 @@ void structure::load(const char *filename) {
size_t dims[] = {0, 0, 0};
file.read_size("sigma_cd", &rank, dims, 1);
if (dims[0] != 2 * (nsig[E_stuff] + nsig[H_stuff])) {
- meep::abort("inconsistent data size in structure::load");
+ meep::abort("inconsistent data size for sigma_cd in structure::load");
}
- if (am_master()) {
+ if (am_master() || !single_parallel_file) {
size_t start = 0;
for (int ft = 0; ft < 2; ++ft) {
- size_t count = nsig[ft] * 2;
- file.read_chunk(rank, &start, &count, sigma_cd[ft]);
+ size_t count = sigma_cd[ft].size();
+ file.read_chunk(rank, &start, &count, sigma_cd[ft].data());
start += count;
}
}
- file.prevent_deadlock();
- broadcast(0, sigma_cd[E_stuff], nsig[E_stuff] * 2);
- broadcast(0, sigma_cd[H_stuff], nsig[H_stuff] * 2);
+ if (single_parallel_file) {
+ file.prevent_deadlock();
+ broadcast(0, sigma_cd[E_stuff].data(), nsig[E_stuff] * 2);
+ broadcast(0, sigma_cd[H_stuff].data(), nsig[H_stuff] * 2);
+ }
}
for (int ft = 0; ft < 2; ++ft) {
@@ -730,10 +769,5 @@ void structure::load(const char *filename) {
}
}
}
-
- for (int ft = 0; ft < 2; ++ft) {
- delete[] num_sigmas[ft];
- delete[] sigma_cd[ft];
- }
}
} // namespace meep