diff --git a/doc/docs/Python_User_Interface.md b/doc/docs/Python_User_Interface.md index c539ea90b..f4696fdaf 100644 --- a/doc/docs/Python_User_Interface.md +++ b/doc/docs/Python_User_Interface.md @@ -114,7 +114,6 @@ def __init__(self, filename_prefix=None, output_volume=None, output_single_precision=False, - load_structure='', geometry_center=Vector3<0.0, 0.0, 0.0>, force_all_components=False, split_chunks_evenly=True, @@ -314,11 +313,6 @@ Python. `Vector3` is a `meep` class. that are specified by geometric objects. You should list any materials other than scalar dielectrics that are returned by `material_function` here. -+ **`load_structure` [`string`]** — If not empty, Meep will load the structure - file specified by this string. The file must have been created by - `mp.dump_structure`. Defaults to an empty string. See [Load and Dump - Structure](#load-and-dump-structure) for more information. - + **`chunk_layout` [`string` or `Simulation` instance or `BinaryPartition` class]** — This will cause the `Simulation` to use the chunk layout described by either (1) an `.h5` file (created using `Simulation.dump_chunk_layout`), (2) another @@ -2305,13 +2299,15 @@ is a 1d array of `nfreq` dimensions. These functions dump the raw ε and μ data to disk and load it back for doing multiple simulations with the same materials but different sources etc. The only prerequisite is that the dump/load simulations have the same [chunks](Chunks_and_Symmetry.md) (i.e. the same grid, number of processors, symmetries, and PML). When using `split_chunks_evenly=False`, you must also dump the original chunk layout using `dump_chunk_layout` and load it into the new `Simulation` using the `chunk_layout` parameter. Currently only stores dispersive and non-dispersive $\varepsilon$ and $\mu$ but not nonlinearities. Note that loading data from a file in this way overwrites any `geometry` data passed to the `Simulation` constructor. +For dump_structure and load_structure, when `single_parallel_file=True` (the default) - all chunks write to the same/single file after computing their respective offsets into this file. When set to 'False', each chunk writes writes its data to a separate file that is appropriately named by its chunk index. +
```python -def dump_structure(self, fname): +def dump_structure(self, fname, single_parallel_file=True): ```
@@ -2327,7 +2323,7 @@ Dumps the structure to the file `fname`.
```python -def load_structure(self, fname): +def load_structure(self, fname, single_parallel_file=True): ```
diff --git a/doc/docs/Python_User_Interface.md.in b/doc/docs/Python_User_Interface.md.in index a1c7eec14..4fa2065a9 100644 --- a/doc/docs/Python_User_Interface.md.in +++ b/doc/docs/Python_User_Interface.md.in @@ -351,6 +351,8 @@ And this [`DftNear2Far`](#DftNear2Far) method: These functions dump the raw ε and μ data to disk and load it back for doing multiple simulations with the same materials but different sources etc. The only prerequisite is that the dump/load simulations have the same [chunks](Chunks_and_Symmetry.md) (i.e. the same grid, number of processors, symmetries, and PML). When using `split_chunks_evenly=False`, you must also dump the original chunk layout using `dump_chunk_layout` and load it into the new `Simulation` using the `chunk_layout` parameter. Currently only stores dispersive and non-dispersive $\varepsilon$ and $\mu$ but not nonlinearities. Note that loading data from a file in this way overwrites any `geometry` data passed to the `Simulation` constructor. +For dump_structure and load_structure, when `single_parallel_file=True` (the default) - all chunks write to the same/single file after computing their respective offsets into this file. When set to 'False', each chunk writes writes its data to a separate file that is appropriately named by its chunk index. + @@ Simulation.dump_structure @@ @@ Simulation.load_structure @@ @@ Simulation.dump_chunk_layout @@ diff --git a/python/simulation.py b/python/simulation.py index 62a8b54a4..aa059198b 100644 --- a/python/simulation.py +++ b/python/simulation.py @@ -961,7 +961,6 @@ def __init__(self, filename_prefix=None, output_volume=None, output_single_precision=False, - load_structure='', geometry_center=mp.Vector3(), force_all_components=False, split_chunks_evenly=True, @@ -1158,11 +1157,6 @@ def __init__(self, that are specified by geometric objects. You should list any materials other than scalar dielectrics that are returned by `material_function` here. - + **`load_structure` [`string`]** — If not empty, Meep will load the structure - file specified by this string. The file must have been created by - `mp.dump_structure`. Defaults to an empty string. See [Load and Dump - Structure](#load-and-dump-structure) for more information. - + **`chunk_layout` [`string` or `Simulation` instance or `BinaryPartition` class]** — This will cause the `Simulation` to use the chunk layout described by either (1) an `.h5` file (created using `Simulation.dump_chunk_layout`), (2) another @@ -1235,7 +1229,6 @@ def __init__(self, self.is_cylindrical = False self.material_function = material_function self.epsilon_func = epsilon_func - self.load_structure_file = load_structure self.dft_objects = [] self._is_initialized = False self.force_all_components = force_all_components @@ -1246,6 +1239,9 @@ def __init__(self, self.fragment_stats = None self._output_stats = os.environ.get('MEEP_STATS', None) + self.load_single_parallel_file = True + self.load_structure_file = None + self.special_kz = False if self.cell_size.z == 0 and self.k_point and self.k_point.z != 0: if kz_2d == "complex": @@ -1717,7 +1713,8 @@ def _init_structure(self, k=False): self.num_chunks = self.chunk_layout.numchunks() if self.load_structure_file: - self.load_structure(self.load_structure_file) + self.load_structure( + self.load_structure_file, self.load_single_parallel_file) def _is_outer_boundary(self, vol, direction, side): @@ -1861,22 +1858,22 @@ def set_materials(self, geometry=None, default_material=None): None ) - def dump_structure(self, fname): + def dump_structure(self, fname, single_parallel_file=True): """ Dumps the structure to the file `fname`. """ if self.structure is None: raise ValueError("Fields must be initialized before calling dump_structure") - self.structure.dump(fname) + self.structure.dump(fname, single_parallel_file) - def load_structure(self, fname): + def load_structure(self, fname, single_parallel_file=True): """ Loads a structure from the file `fname`. A file name to load can also be passed to the `Simulation` constructor via the `load_structure` keyword argument. """ if self.structure is None: raise ValueError("Fields must be initialized before calling load_structure") - self.structure.load(fname) + self.structure.load(fname, single_parallel_file) def dump_chunk_layout(self, fname): """ @@ -1898,6 +1895,41 @@ def load_chunk_layout(self, br, source): ## source is either filename (string) self.structure.load_chunk_layout(source, br) + def _get_load_dump_dirname(self, dirname, single_parallel_file): + """ + Get the dirname to dump simulation state to. + """ + if single_parallel_file: + dump_dirname = dirname + else: + # When doing a sharded dump (each process to its own file), use + # the process rank to get a unique name. + dump_dirname = os.path.join(dirname, 'rank%02d' % mp.my_rank()) + return dump_dirname + + def dump(self, dirname, structure=True, single_parallel_file=True): + """ + Dumps simulation state. + """ + dump_dirname = self._get_load_dump_dirname(dirname, single_parallel_file) + os.makedirs(dump_dirname, exist_ok=True) + + if structure: + structure_dump_filename = os.path.join(dump_dirname, 'structure.h5') + self.dump_structure(structure_dump_filename, single_parallel_file) + + def load(self, dirname, structure=True, single_parallel_file=True): + """ + Loads simulation state. + + This should called right after the Simulation object has been created + but before 'init_sim' is called. + """ + dump_dirname = self._get_load_dump_dirname(dirname, single_parallel_file) + self.load_single_parallel_file = single_parallel_file + if structure: + self.load_structure_file = os.path.join(dump_dirname, 'structure.h5') + def init_sim(self): if self._is_initialized: return diff --git a/python/tests/test_simulation.py b/python/tests/test_simulation.py index ed751082e..f85afeb7f 100644 --- a/python/tests/test_simulation.py +++ b/python/tests/test_simulation.py @@ -302,7 +302,7 @@ def test_in_box_volumes(self): sim.field_energy_in_box(tv) sim.field_energy_in_box(v) - def _load_dump_structure(self, chunk_file=False, chunk_sim=False): + def _load_dump_structure(self, chunk_file=False, chunk_sim=False, single_parallel_file=True): from meep.materials import Al resolution = 50 cell = mp.Vector3(5, 5) @@ -329,12 +329,14 @@ def get_ref_field_point(sim): ref_field_points.append(p.real) sim1.run(mp.at_every(5, get_ref_field_point), until=50) - dump_fn = os.path.join(self.temp_dir, 'test_load_dump_structure.h5') + + dump_dirname = os.path.join(self.temp_dir, 'test_load_dump') + sim1.dump(dump_dirname, structure=True, single_parallel_file=single_parallel_file) + dump_chunk_fname = None chunk_layout = None - sim1.dump_structure(dump_fn) if chunk_file: - dump_chunk_fname = os.path.join(self.temp_dir, 'test_load_dump_structure_chunks.h5') + dump_chunk_fname = os.path.join(dump_dirname, 'chunk_layout.h5') sim1.dump_chunk_layout(dump_chunk_fname) chunk_layout = dump_chunk_fname if chunk_sim: @@ -345,9 +347,8 @@ def get_ref_field_point(sim): boundary_layers=pml_layers, sources=[sources], symmetries=symmetries, - chunk_layout=chunk_layout, - load_structure=dump_fn) - + chunk_layout=chunk_layout) + sim.load(dump_dirname, structure=True, single_parallel_file=single_parallel_file) field_points = [] def get_field_point(sim): @@ -362,6 +363,10 @@ def get_field_point(sim): def test_load_dump_structure(self): self._load_dump_structure() + @unittest.skipIf(not mp.with_mpi(), "MPI specific test") + def test_load_dump_structure_sharded(self): + self._load_dump_structure(single_parallel_file=False) + def test_load_dump_chunk_layout_file(self): self._load_dump_structure(chunk_file=True) diff --git a/src/h5file.cpp b/src/h5file.cpp index 484640942..366e5d63c 100644 --- a/src/h5file.cpp +++ b/src/h5file.cpp @@ -150,7 +150,7 @@ void h5file::close_id() { /* note: if parallel is true, then *all* processes must call this, and all processes will use I/O. */ -h5file::h5file(const char *filename_, access_mode m, bool parallel_) { +h5file::h5file(const char *filename_, access_mode m, bool parallel_, bool local_) { cur_dataname = NULL; id = (void *)malloc(sizeof(hid_t)); cur_id = (void *)malloc(sizeof(hid_t)); @@ -160,7 +160,12 @@ h5file::h5file(const char *filename_, access_mode m, bool parallel_) { filename = new char[strlen(filename_) + 1]; strcpy(filename, filename_); mode = m; + + if (parallel_ && local_) { + meep::abort("Can not open h5file (%s) in both parallel and local mode.", filename); + } parallel = parallel_; + local = local_; } h5file::~h5file() { @@ -191,7 +196,9 @@ void h5file::remove() { extending = 0; IF_EXCLUSIVE(if (parallel) all_wait(), (void)0); - if (am_master() && std::remove(filename)) meep::abort("error removing file %s", filename); + if (am_master() || local) { + if (std::remove(filename)) meep::abort("error removing file %s", filename); + } } h5file::extending_s *h5file::get_extending(const char *dataname) const { @@ -226,7 +233,7 @@ void h5file::set_cur(const char *dataname, void *data_id) { void h5file::read_size(const char *dataname, int *rank, size_t *dims, int maxrank) { #ifdef HAVE_HDF5 - if (parallel || am_master()) { + if (parallel || am_master() || local) { hid_t file_id = HID(get_id()), space_id, data_id; CHECK(file_id >= 0, "error opening HDF5 input file"); @@ -253,7 +260,7 @@ void h5file::read_size(const char *dataname, int *rank, size_t *dims, int maxran H5Sclose(space_id); } - if (!parallel) { + if (!parallel && !local) { *rank = broadcast(0, *rank); broadcast(0, dims, *rank); @@ -291,7 +298,7 @@ void *h5file::read(const char *dataname, int *rank, size_t *dims, int maxrank, bool single_precision) { #ifdef HAVE_HDF5 void *data = 0; - if (parallel || am_master()) { + if (parallel || am_master() || local) { int i, N; hid_t file_id = HID(get_id()), space_id, data_id; @@ -345,7 +352,7 @@ void *h5file::read(const char *dataname, int *rank, size_t *dims, int maxrank, if (close_data_id) H5Dclose(data_id); } - if (!parallel) { + if (!parallel && !local) { *rank = broadcast(0, *rank); broadcast(0, dims, *rank); size_t N = 1; @@ -375,7 +382,7 @@ char *h5file::read(const char *dataname) { #ifdef HAVE_HDF5 char *data = 0; int len = 0; - if (parallel || am_master()) { + if (parallel || am_master() || local) { hid_t file_id = HID(get_id()), space_id, data_id, type_id; CHECK(file_id >= 0, "error opening HDF5 input file"); @@ -404,7 +411,7 @@ char *h5file::read(const char *dataname) { H5Dclose(data_id); } - if (!parallel) { + if (!parallel && !local) { len = broadcast(0, len); if (!am_master()) data = new char[len]; broadcast(0, data, len); @@ -442,7 +449,7 @@ void h5file::remove_data(const char *dataname) { if (dataset_exists(dataname)) { /* this is hackish ...need to pester HDF5 developers to make H5Gunlink a collective operation for parallel mode */ - if (!parallel || am_master()) { + if (!parallel || am_master() || local) { H5Gunlink(file_id, dataname); /* delete it */ H5Fflush(file_id, H5F_SCOPE_GLOBAL); } @@ -471,7 +478,7 @@ void h5file::create_data(const char *dataname, int rank, const size_t *dims, boo unset_cur(); remove_data(dataname); // HDF5 gives error if we H5Dcreate existing dataset - if (IF_EXCLUSIVE(!parallel || am_master(), 1)) { + if (local || IF_EXCLUSIVE(!parallel || am_master(), 1)) { hsize_t *dims_copy = new hsize_t[rank1 + append_data]; hsize_t *maxdims = new hsize_t[rank1 + append_data]; hsize_t N = 1; @@ -708,12 +715,12 @@ void h5file::done_writing_chunks() { I'm assuming(?) that non-extensible datasets will use different files, etcetera, for different timesteps. All of this hackery goes away if we just use an MPI-compiled version of HDF5. */ - if (parallel && cur_dataname && get_extending(cur_dataname)) prevent_deadlock(); // closes id + if (parallel && !local && cur_dataname && get_extending(cur_dataname)) prevent_deadlock(); // closes id } void h5file::write(const char *dataname, int rank, const size_t *dims, void *data, bool single_precision) { - if (parallel || am_master()) { + if (parallel || am_master() || local) { size_t *start = new size_t[rank + 1]; for (int i = 0; i < rank; i++) start[i] = 0; @@ -732,7 +739,7 @@ void h5file::write(const char *dataname, int rank, const size_t *dims, void *dat void h5file::write(const char *dataname, const char *data) { #ifdef HAVE_HDF5 - if (IF_EXCLUSIVE(am_master(), parallel || am_master())) { + if (local || IF_EXCLUSIVE(am_master(), parallel || am_master())) { hid_t file_id = HID(get_id()), type_id, data_id, space_id; CHECK(file_id >= 0, "error opening HDF5 output file"); diff --git a/src/meep.hpp b/src/meep.hpp index aeb28211e..7f336280f 100644 --- a/src/meep.hpp +++ b/src/meep.hpp @@ -384,7 +384,13 @@ class h5file { public: typedef enum { READONLY, READWRITE, WRITE } access_mode; - h5file(const char *filename_, access_mode m = READWRITE, bool parallel_ = true); + // If 'parallel_' is true, then we assume that all processes will be doing + // I/O, else we assume that *only* the master is doing I/O and all other + // processes will send/receive data to/from the master. + // If 'local_' is true, then 'parallel_' *must* be false and assumes that + // each process is writing to a local non-shared file and the filename is + // unique to the process. + h5file(const char *filename_, access_mode m = READWRITE, bool parallel_ = true, bool local_ = false); ~h5file(); // closes the files (and any open dataset) bool ok(); @@ -423,6 +429,7 @@ class h5file { access_mode mode; char *filename; bool parallel; + bool local; bool is_cur(const char *dataname); void unset_cur(); @@ -849,9 +856,15 @@ class structure { std::vector get_chunk_owners() const; // structure_dump.cpp - void dump(const char *filename); + // Dump structure to specified file. If 'single_parallel_file' + // is 'true' (the default) - then all processes write to the same/single file + // file after computing their respective offsets into this file. When set to + // 'false', each process writes data for the chunks it owns to a separate + // (process unique) file. + void dump(const char *filename, bool single_parallel_file=true); + void load(const char *filename, bool single_parallel_file=true); + void dump_chunk_layout(const char *filename); - void load(const char *filename); void load_chunk_layout(const char *filename, boundary_region &br); void load_chunk_layout(const std::vector &gvs, const std::vector &ids, @@ -889,7 +902,7 @@ class structure { void changing_chunks(); // Helper methods for dumping and loading susceptibilities void set_chiP_from_file(h5file *file, const char *dataset, field_type ft); - void write_susceptibility_params(h5file *file, const char *dname, int EorH); + void write_susceptibility_params(h5file *file, bool single_parallel_file, const char *dname, int EorH); std::unique_ptr bp; }; diff --git a/src/structure_dump.cpp b/src/structure_dump.cpp index ac57a7c99..d0fcb91a5 100644 --- a/src/structure_dump.cpp +++ b/src/structure_dump.cpp @@ -33,7 +33,9 @@ namespace meep { // Write the parameters required to reconstruct the susceptibility (id, noise_amp (for noisy), // omega_0, gamma, no_omega_0_denominator) -void structure::write_susceptibility_params(h5file *file, const char *dname, int EorH) { +void structure::write_susceptibility_params(h5file *file, + bool single_parallel_file, + const char *dname, int EorH) { // Get number of susceptibility params from first chunk, since all chunks will have // the same susceptibility list. size_t params_ntotal = 0; @@ -47,7 +49,7 @@ void structure::write_susceptibility_params(h5file *file, const char *dname, int size_t params_start = 0; file->create_data(dname, 1, ¶ms_ntotal, false /* append_data */, sizeof(realnum) == sizeof(float) /* single_precision */); - if (am_master()) { + if (am_master() || !single_parallel_file) { susceptibility *sus = chunks[0]->chiP[EorH]; while (sus) { sus->dump_params(file, ¶ms_start); @@ -85,36 +87,60 @@ void structure::dump_chunk_layout(const char *filename) { delete[] nums; } -void structure::dump(const char *filename) { - if (verbosity > 0) master_printf("creating epsilon output file \"%s\"...\n", filename); - - // make/save a num_chunks x NUM_FIELD_COMPONENTS x 5 array counting - // the number of entries in the chi1inv array for each chunk. - size_t *num_chi1inv_ = new size_t[num_chunks * NUM_FIELD_COMPONENTS * 5]; - memset(num_chi1inv_, 0, sizeof(size_t) * size_t(num_chunks * NUM_FIELD_COMPONENTS * 5)); +void structure::dump(const char *filename, bool single_parallel_file) { + if (verbosity > 0) printf("creating epsilon from file \"%s\" (%d)...\n", filename, single_parallel_file); + + /* + * make/save a num_chunks x NUM_FIELD_COMPONENTS x 5 array counting + * the number of entries in the chi1inv array for each chunk. + * + * When 'single_parallel_file' is true, we are creating a single block of data + * for ALL chunks (that are merged using MPI). Otherwise, we are just + * making a copy of just the chunks that are ours. + */ + int my_num_chunks = 0; + for (int i = 0; i < num_chunks; i++) { + my_num_chunks += (single_parallel_file || chunks[i]->is_mine()); + } + size_t num_chi1inv_size = my_num_chunks * NUM_FIELD_COMPONENTS * 5; + std::vector num_chi1inv_(num_chi1inv_size); size_t my_ntot = 0; - for (int i = 0; i < num_chunks; i++) + for (int i = 0, chunk_i = 0; i < num_chunks; i++) { if (chunks[i]->is_mine()) { size_t ntot = chunks[i]->gv.ntot(); - for (int c = 0; c < NUM_FIELD_COMPONENTS; ++c) - for (int d = 0; d < 5; ++d) + for (int c = 0; c < NUM_FIELD_COMPONENTS; ++c) { + for (int d = 0; d < 5; ++d) { if (chunks[i]->chi1inv[c][d]) - my_ntot += (num_chi1inv_[(i * NUM_FIELD_COMPONENTS + c) * 5 + d] = ntot); + my_ntot += (num_chi1inv_[(chunk_i * NUM_FIELD_COMPONENTS + c) * 5 + d] = ntot); + } + } } - size_t *num_chi1inv = new size_t[num_chunks * NUM_FIELD_COMPONENTS * 5]; - sum_to_master(num_chi1inv_, num_chi1inv, num_chunks * NUM_FIELD_COMPONENTS * 5); - delete[] num_chi1inv_; + chunk_i += (chunks[i]->is_mine() || single_parallel_file); + } + + std::vector num_chi1inv; + if (single_parallel_file) { + num_chi1inv.resize(num_chi1inv_size); + sum_to_master(num_chi1inv_.data(), num_chi1inv.data(), num_chi1inv_size); + } else { + num_chi1inv = std::move(num_chi1inv_); + } // determine total dataset size and offset of this process's data - size_t my_start = partial_sum_to_all(my_ntot) - my_ntot; - size_t ntotal = sum_to_all(my_ntot); + size_t my_start = 0; + size_t ntotal = my_ntot; + if (single_parallel_file) { + my_start = partial_sum_to_all(my_ntot) - my_ntot; + ntotal = sum_to_all(my_ntot); + } - h5file file(filename, h5file::WRITE, true); - size_t dims[3] = {(size_t)num_chunks, NUM_FIELD_COMPONENTS, 5}; + h5file file(filename, h5file::WRITE, single_parallel_file, !single_parallel_file); + size_t dims[3] = {(size_t)my_num_chunks, NUM_FIELD_COMPONENTS, 5}; size_t start[3] = {0, 0, 0}; file.create_data("num_chi1inv", 3, dims); - if (am_master()) file.write_chunk(3, start, dims, num_chi1inv); - delete[] num_chi1inv; + if (am_master() || !single_parallel_file) { + file.write_chunk(3, start, dims, num_chi1inv.data()); + } // write the data file.create_data("chi1inv", 1, &ntotal, false /* append_data */, false /* single_precision */); @@ -147,7 +173,7 @@ void structure::dump(const char *filename) { // Write the number of susceptibilites size_t len = 2; file.create_data("num_sus", 1, &len); - if (am_master()) { + if (am_master() || !single_parallel_file) { size_t start = 0; size_t ntot = 2; file.write_chunk(1, &start, &ntot, num_sus); @@ -157,13 +183,11 @@ void structure::dump(const char *filename) { // Get number of non-null sigma entries for each chiP in each chunk. // Assumes each susceptibility in the chiP[E_stuff] list has the // same number of non-null sigma elements. Likewise for chiP[H_stuff] - size_t *my_num_sigmas[2]; - size_t *num_sigmas[2]; + std::vector my_num_sigmas[2]; + std::vector num_sigmas[2]; for (int ft = 0; ft < 2; ++ft) { - my_num_sigmas[ft] = new size_t[num_chunks]; - num_sigmas[ft] = new size_t[num_chunks]; - memset(my_num_sigmas[ft], 0, sizeof(size_t) * num_chunks); - memset(num_sigmas[ft], 0, sizeof(size_t) * num_chunks); + my_num_sigmas[ft].resize(num_chunks); + num_sigmas[ft].resize(num_chunks); } for (int i = 0; i < num_chunks; ++i) { @@ -199,7 +223,7 @@ void structure::dump(const char *filename) { file.prevent_deadlock(); for (int ft = 0; ft < 2; ++ft) { - sum_to_all(my_num_sigmas[ft], num_sigmas[ft], num_chunks); + sum_to_all(my_num_sigmas[ft].data(), num_sigmas[ft].data(), num_chunks); } size_t num_E_sigmas = 0; @@ -210,17 +234,13 @@ void structure::dump(const char *filename) { } // Allocate space for component and direction of non-null sigmas - size_t *my_sigma_cd[2] = {NULL, NULL}; - my_sigma_cd[E_stuff] = new size_t[num_E_sigmas * 2]; - memset(my_sigma_cd[E_stuff], 0, sizeof(size_t) * num_E_sigmas * 2); - my_sigma_cd[H_stuff] = new size_t[num_H_sigmas * 2]; - memset(my_sigma_cd[H_stuff], 0, sizeof(size_t) * num_H_sigmas * 2); - - size_t *sigma_cd[2] = {NULL, NULL}; - sigma_cd[E_stuff] = new size_t[num_E_sigmas * 2]; - memset(sigma_cd[E_stuff], 0, sizeof(size_t) * num_E_sigmas * 2); - sigma_cd[H_stuff] = new size_t[num_H_sigmas * 2]; - memset(sigma_cd[H_stuff], 0, sizeof(size_t) * num_H_sigmas * 2); + std::vector my_sigma_cd[2]; + my_sigma_cd[E_stuff].resize(num_E_sigmas * 2); + my_sigma_cd[H_stuff].resize(num_H_sigmas * 2); + + std::vector sigma_cd[2]; + sigma_cd[E_stuff].resize(num_E_sigmas * 2); + sigma_cd[H_stuff].resize(num_H_sigmas * 2); // Find component and direction of non-null sigmas { @@ -246,8 +266,8 @@ void structure::dump(const char *filename) { } } } - bw_or_to_all(my_sigma_cd[E_stuff], sigma_cd[E_stuff], num_E_sigmas * 2); - bw_or_to_all(my_sigma_cd[H_stuff], sigma_cd[H_stuff], num_H_sigmas * 2); + bw_or_to_all(my_sigma_cd[E_stuff].data(), sigma_cd[E_stuff].data(), num_E_sigmas * 2); + bw_or_to_all(my_sigma_cd[H_stuff].data(), sigma_cd[H_stuff].data(), num_H_sigmas * 2); } // Write location (component and direction) data of non-null sigmas (sigma[c][d]) @@ -256,9 +276,9 @@ void structure::dump(const char *filename) { file.create_data("sigma_cd", 1, &len); size_t start = 0; for (int ft = 0; ft < 2; ++ft) { - if (am_master()) { - size_t count = (ft == 0 ? num_E_sigmas : num_H_sigmas) * 2; - file.write_chunk(1, &start, &count, sigma_cd[ft]); + if (am_master() || !single_parallel_file) { + size_t count = sigma_cd[ft].size(); + file.write_chunk(1, &start, &count, sigma_cd[ft].data()); start += count; } } @@ -292,15 +312,8 @@ void structure::dump(const char *filename) { } } - write_susceptibility_params(&file, "E_params", E_stuff); - write_susceptibility_params(&file, "H_params", H_stuff); - - for (int ft = 0; ft < 2; ++ft) { - delete[] sigma_cd[ft]; - delete[] my_sigma_cd[ft]; - delete[] num_sigmas[ft]; - delete[] my_num_sigmas[ft]; - } + write_susceptibility_params(&file, single_parallel_file, "E_params", E_stuff); + write_susceptibility_params(&file, single_parallel_file, "H_params", H_stuff); } // Reconstruct the chiP lists of susceptibilities from the params hdf5 data @@ -406,7 +419,7 @@ void structure::set_chiP_from_file(h5file *file, const char *dataset, field_type size_t dims[3] = {0, 0, 0}; file->read_size(dataset, &rank, dims, 1); - if (rank != 1) meep::abort("inconsistent data size in structure::load"); + if (rank != 1) meep::abort("inconsistent data size in structure::set_chiP_from_file"); if (dims[0] != 0) { for (int i = 0; i < num_chunks; ++i) { @@ -542,35 +555,49 @@ void structure::load_chunk_layout(const std::vector &gvs, check_chunks(); } -void structure::load(const char *filename) { - h5file file(filename, h5file::READONLY, true); +void structure::load(const char *filename, bool single_parallel_file) { + h5file file(filename, h5file::READONLY, single_parallel_file, !single_parallel_file); - if (verbosity > 0) master_printf("reading epsilon from file \"%s\"...\n", filename); + if (verbosity > 0) printf("reading epsilon from file \"%s\" (%d)...\n", filename, single_parallel_file); + + /* + * make/save a num_chunks x NUM_FIELD_COMPONENTS x 5 array counting + * the number of entries in the chi1inv array for each chunk. + * + * When 'single_parallel_file' is true, we are creating a single block of data + * for ALL chunks (that are merged using MPI). Otherwise, we are just + * making a copy of just the chunks that are ours. + */ + int my_num_chunks = 0; + for (int i = 0; i < num_chunks; i++) { + my_num_chunks += (single_parallel_file || chunks[i]->is_mine()); + } + size_t num_chi1inv_size = my_num_chunks * NUM_FIELD_COMPONENTS * 5; + std::vector num_chi1inv(num_chi1inv_size); - // make/save a num_chunks x NUM_FIELD_COMPONENTS x 5 array counting - // the number of entries in the chi1inv array for each chunk. - size_t *num_chi1inv = new size_t[num_chunks * NUM_FIELD_COMPONENTS * 5]; int rank; - size_t dims[3], _dims[3] = {(size_t)num_chunks, NUM_FIELD_COMPONENTS, 5}; + size_t dims[3], _dims[3] = {(size_t)my_num_chunks, NUM_FIELD_COMPONENTS, 5}; size_t start[3] = {0, 0, 0}; file.read_size("num_chi1inv", &rank, dims, 3); if (rank != 3 || _dims[0] != dims[0] || _dims[1] != dims[1] || _dims[2] != dims[2]) meep::abort("chunk mismatch in structure::load"); - if (am_master()) file.read_chunk(3, start, dims, num_chi1inv); + if (am_master() || !single_parallel_file) file.read_chunk(3, start, dims, num_chi1inv.data()); - file.prevent_deadlock(); - broadcast(0, num_chi1inv, dims[0] * dims[1] * dims[2]); + if (single_parallel_file) { + file.prevent_deadlock(); + broadcast(0, num_chi1inv.data(), dims[0] * dims[1] * dims[2]); + } changing_chunks(); // allocate data as needed and check sizes size_t my_ntot = 0; - for (int i = 0; i < num_chunks; i++) + for (int i = 0, chunk_i = 0; i < num_chunks; i++) { if (chunks[i]->is_mine()) { size_t ntot = chunks[i]->gv.ntot(); for (int c = 0; c < NUM_FIELD_COMPONENTS; ++c) for (int d = 0; d < 5; ++d) { - size_t n = num_chi1inv[(i * NUM_FIELD_COMPONENTS + c) * 5 + d]; + size_t n = num_chi1inv[(chunk_i * NUM_FIELD_COMPONENTS + c) * 5 + d]; if (n == 0) { delete[] chunks[i]->chi1inv[c][d]; chunks[i]->chi1inv[c][d] = NULL; @@ -582,14 +609,25 @@ void structure::load(const char *filename) { } } } + chunk_i += (chunks[i]->is_mine() || single_parallel_file); + } // determine total dataset size and offset of this process's data - size_t my_start = partial_sum_to_all(my_ntot) - my_ntot; - size_t ntotal = sum_to_all(my_ntot); + size_t my_start = 0; + size_t ntotal = my_ntot; + if (single_parallel_file) { + my_start = partial_sum_to_all(my_ntot) - my_ntot; + ntotal = sum_to_all(my_ntot); + } // read the data file.read_size("chi1inv", &rank, dims, 1); - if (rank != 1 || dims[0] != ntotal) meep::abort("inconsistent data size in structure::load"); + if (rank != 1 || dims[0] != ntotal) { + meep::abort( + "inconsistent data size for chi1inv in structure::load (rank, dims[0]): " + "(%d, %zu) != (1, %zu)", + rank, dims[0], ntotal); + } for (int i = 0; i < num_chunks; i++) if (chunks[i]->is_mine()) { size_t ntot = chunks[i]->gv.ntot(); @@ -611,44 +649,44 @@ void structure::load(const char *filename) { size_t dims[] = {0, 0, 0}; file.read_size("num_sus", &rank, dims, 1); if (dims[0] > 0) { - if (am_master()) { + if (am_master() || !single_parallel_file) { size_t start = 0; size_t count = 2; file.read_chunk(rank, &start, &count, num_sus); } } - file.prevent_deadlock(); - broadcast(0, num_sus, 2); + if (single_parallel_file) { + file.prevent_deadlock(); + broadcast(0, num_sus, 2); + } } // Allocate and read non-null sigma entry data - size_t *num_sigmas[2] = {NULL, NULL}; - num_sigmas[E_stuff] = new size_t[num_chunks]; - memset(num_sigmas[E_stuff], 0, sizeof(size_t) * num_chunks); - num_sigmas[H_stuff] = new size_t[num_chunks]; - memset(num_sigmas[H_stuff], 0, sizeof(size_t) * num_chunks); + std::vector num_sigmas[2]; + num_sigmas[E_stuff].resize(num_chunks); + num_sigmas[H_stuff].resize(num_chunks); // Read num_sigmas data { int rank = 0; size_t dims[] = {0, 0, 0}; file.read_size("num_sigmas", &rank, dims, 1); - if (dims[0] != (size_t)num_chunks * 2) { meep::abort("inconsistent data size in structure::load"); } - if (am_master()) { + if (dims[0] != (size_t)num_chunks * 2) { meep::abort("inconsistent data size for num_sigmas in structure::load"); } + if (am_master() || !single_parallel_file) { size_t start = 0; size_t count = num_chunks; - file.read_chunk(rank, &start, &count, num_sigmas[E_stuff]); + file.read_chunk(rank, &start, &count, num_sigmas[E_stuff].data()); start += count; - file.read_chunk(rank, &start, &count, num_sigmas[H_stuff]); + file.read_chunk(rank, &start, &count, num_sigmas[H_stuff].data()); + } + if (single_parallel_file) { + file.prevent_deadlock(); + broadcast(0, num_sigmas[E_stuff].data(), num_chunks); + broadcast(0, num_sigmas[H_stuff].data(), num_chunks); } - file.prevent_deadlock(); - broadcast(0, num_sigmas[E_stuff], num_chunks); - broadcast(0, num_sigmas[H_stuff], num_chunks); } // Allocate space for component and direction data of the non-null susceptibilities - size_t *sigma_cd[2] = {NULL, NULL}; - size_t nsig[2] = {0, 0}; for (int ft = 0; ft < 2; ++ft) { for (int i = 0; i < num_chunks; ++i) { @@ -656,10 +694,9 @@ void structure::load(const char *filename) { } } - sigma_cd[E_stuff] = new size_t[nsig[E_stuff] * 2]; - memset(sigma_cd[E_stuff], 0, sizeof(size_t) * nsig[E_stuff] * 2); - sigma_cd[H_stuff] = new size_t[nsig[H_stuff] * 2]; - memset(sigma_cd[H_stuff], 0, sizeof(size_t) * nsig[H_stuff] * 2); + std::vector sigma_cd[2]; + sigma_cd[E_stuff].resize(nsig[E_stuff] * 2); + sigma_cd[H_stuff].resize(nsig[H_stuff] * 2); // Read the component/direction data { @@ -667,20 +704,22 @@ void structure::load(const char *filename) { size_t dims[] = {0, 0, 0}; file.read_size("sigma_cd", &rank, dims, 1); if (dims[0] != 2 * (nsig[E_stuff] + nsig[H_stuff])) { - meep::abort("inconsistent data size in structure::load"); + meep::abort("inconsistent data size for sigma_cd in structure::load"); } - if (am_master()) { + if (am_master() || !single_parallel_file) { size_t start = 0; for (int ft = 0; ft < 2; ++ft) { - size_t count = nsig[ft] * 2; - file.read_chunk(rank, &start, &count, sigma_cd[ft]); + size_t count = sigma_cd[ft].size(); + file.read_chunk(rank, &start, &count, sigma_cd[ft].data()); start += count; } } - file.prevent_deadlock(); - broadcast(0, sigma_cd[E_stuff], nsig[E_stuff] * 2); - broadcast(0, sigma_cd[H_stuff], nsig[H_stuff] * 2); + if (single_parallel_file) { + file.prevent_deadlock(); + broadcast(0, sigma_cd[E_stuff].data(), nsig[E_stuff] * 2); + broadcast(0, sigma_cd[H_stuff].data(), nsig[H_stuff] * 2); + } } for (int ft = 0; ft < 2; ++ft) { @@ -730,10 +769,5 @@ void structure::load(const char *filename) { } } } - - for (int ft = 0; ft < 2; ++ft) { - delete[] num_sigmas[ft]; - delete[] sigma_cd[ft]; - } } } // namespace meep