diff --git a/python/meep.i b/python/meep.i index 207d6a002..585423ece 100644 --- a/python/meep.i +++ b/python/meep.i @@ -322,17 +322,14 @@ PyObject *_get_farfield(meep::dft_near2far *f, const meep::vec & v) { } // Wrapper around meep::dft_near2far::get_farfields_array - PyObject *_get_farfields_array(meep::dft_near2far *n2f, const meep::volume &where, - double resolution) { +PyObject *_get_farfields_array(meep::dft_near2far *n2f, const meep::volume &where, + double resolution) { // Return value: New reference size_t dims[4] = {1, 1, 1, 1}; int rank = 0; size_t N = 1; - // TODO: Support single precision? - if (sizeof(realnum) == sizeof(float)) abort("Single precision not supported for get_farfields"); - - meep::realnum *EH = n2f->get_farfields_array(where, rank, dims, N, resolution); + double *EH = n2f->get_farfields_array(where, rank, dims, N, resolution); if (!EH) return PyArray_SimpleNew(0, 0, NPY_CDOUBLE); @@ -348,7 +345,7 @@ PyObject *_get_farfield(meep::dft_near2far *f, const meep::vec & v) { } PyObject *py_arr = PyArray_SimpleNew(rank, arr_dims, NPY_DOUBLE); - memcpy(PyArray_DATA((PyArrayObject*)py_arr), EH, sizeof(meep::realnum) * 2 * N * 6 * n2f->freq.size()); + memcpy(PyArray_DATA((PyArrayObject*)py_arr), EH, sizeof(double) * 2 * N * 6 * n2f->freq.size()); delete[] arr_dims; delete[] EH; @@ -455,7 +452,7 @@ size_t _get_dft_data_size(meep::dft_chunk *dc) { return meep::dft_chunks_Ntotal(dc, &istart) / 2; } -void _get_dft_data(meep::dft_chunk *dc, std::complex *cdata, int size) { +void _get_dft_data(meep::dft_chunk *dc, std::complex *cdata, int size) { size_t istart; size_t n = meep::dft_chunks_Ntotal(dc, &istart) / 2; istart /= 2; @@ -473,7 +470,7 @@ void _get_dft_data(meep::dft_chunk *dc, std::complex *cdata, int } } -void _load_dft_data(meep::dft_chunk *dc, std::complex *cdata, int size) { +void _load_dft_data(meep::dft_chunk *dc, std::complex *cdata, int size) { size_t istart; size_t n = meep::dft_chunks_Ntotal(dc, &istart) / 2; istart /= 2; @@ -597,10 +594,10 @@ void _get_eigenmode(meep::fields *f, double frequency, meep::direction d, const #endif %} -%numpy_typemaps(std::complex, NPY_CDOUBLE, int); +%numpy_typemaps(std::complex, NPY_CDOUBLE, int); %numpy_typemaps(std::complex, NPY_CDOUBLE, size_t); -%apply (std::complex *INPLACE_ARRAY1, int DIM1) {(std::complex *cdata, int size)}; +%apply (std::complex *INPLACE_ARRAY1, int DIM1) {(std::complex *cdata, int size)}; // add_volume_source %apply (std::complex *INPLACE_ARRAY3, size_t DIM1, size_t DIM2, size_t DIM3) { @@ -630,8 +627,8 @@ PyObject *_dft_ldos_J(meep::dft_ldos *f); template PyObject *_get_dft_array(meep::fields *f, dft_type dft, meep::component c, int num_freq); size_t _get_dft_data_size(meep::dft_chunk *dc); -void _get_dft_data(meep::dft_chunk *dc, std::complex *cdata, int size); -void _load_dft_data(meep::dft_chunk *dc, std::complex *cdata, int size); +void _get_dft_data(meep::dft_chunk *dc, std::complex *cdata, int size); +void _load_dft_data(meep::dft_chunk *dc, std::complex *cdata, int size); meep::volume_list *make_volume_list(const meep::volume &v, int c, std::complex weight, meep::volume_list *next); @@ -831,7 +828,7 @@ void _get_gradient(PyObject *grad, PyObject *fields_a, PyObject *fields_f, PyObj if (!PyArray_Check(pao_grad)) meep::abort("grad parameter must be numpy array."); if (!PyArray_ISCARRAY(pao_grad)) meep::abort("Numpy grad array must be C-style contiguous."); if (PyArray_NDIM(pao_grad) !=2) {meep::abort("Numpy grad array must have 2 dimensions.");} - meep::realnum * grad_c = (meep::realnum *)PyArray_DATA(pao_grad); + double *grad_c = (double *)PyArray_DATA(pao_grad); int ng = PyArray_DIMS(pao_grad)[1]; // number of design parameters // clean the adjoint fields array @@ -839,14 +836,14 @@ void _get_gradient(PyObject *grad, PyObject *fields_a, PyObject *fields_f, PyObj if (!PyArray_Check(pao_fields_a)) meep::abort("adjoint fields parameter must be numpy array."); if (!PyArray_ISCARRAY(pao_fields_a)) meep::abort("Numpy adjoint fields array must be C-style contiguous."); if (PyArray_NDIM(pao_fields_a) !=1) {meep::abort("Numpy adjoint fields array must have 1 dimension.");} - std::complex * fields_a_c = (std::complex *)PyArray_DATA(pao_fields_a); + std::complex *fields_a_c = (std::complex *)PyArray_DATA(pao_fields_a); // clean the forward fields array PyArrayObject *pao_fields_f = (PyArrayObject *)fields_f; if (!PyArray_Check(pao_fields_f)) meep::abort("forward fields parameter must be numpy array."); if (!PyArray_ISCARRAY(pao_fields_f)) meep::abort("Numpy forward fields array must be C-style contiguous."); if (PyArray_NDIM(pao_fields_f) !=1) {meep::abort("Numpy forward fields array must have 1 dimension.");} - std::complex * fields_f_c = (std::complex *)PyArray_DATA(pao_fields_f); + std::complex *fields_f_c = (std::complex *)PyArray_DATA(pao_fields_f); // scalegrad not currently used double scalegrad = 1.0; @@ -862,12 +859,12 @@ void _get_gradient(PyObject *grad, PyObject *fields_a, PyObject *fields_f, PyObj PyArrayObject *pao_freqs = (PyArrayObject *)frequencies; if (!PyArray_Check(pao_freqs)) meep::abort("frequencies parameter must be numpy array."); if (!PyArray_ISCARRAY(pao_freqs)) meep::abort("Numpy fields array must be C-style contiguous."); - meep::realnum* frequencies_c = (meep::realnum *)PyArray_DATA(pao_freqs); + double *frequencies_c = (double *)PyArray_DATA(pao_freqs); int nf = PyArray_DIMS(pao_freqs)[0]; if (PyArray_DIMS(pao_grad)[0] != nf) meep::abort("Numpy grad array is allocated for %d frequencies; it should be allocated for %d.",PyArray_DIMS(pao_grad)[0],nf); // prepare a geometry_tree - //TODO eventually it would be nice to store the geom tree within the structure object so we don't have to recreate it here + // TODO eventually it would be nice to store the geom tree within the structure object so we don't have to recreate it here geometric_object_list *l; l = new geometric_object_list(); if (!py_list_to_gobj_list(py_geom_list,l)) meep::abort("Unable to convert geometry tree."); diff --git a/python/typemap_utils.cpp b/python/typemap_utils.cpp index 6c6435082..9b00c7738 100644 --- a/python/typemap_utils.cpp +++ b/python/typemap_utils.cpp @@ -488,8 +488,8 @@ static int pymaterial_grid_to_material_grid(PyObject *po, material_data *md) { if (!PyArray_ISCARRAY(pao)) { meep::abort("Numpy array weights must be C-style contiguous."); } - md->weights = new realnum[PyArray_SIZE(pao)]; - memcpy(md->weights, (realnum *)PyArray_DATA(pao), PyArray_SIZE(pao) * sizeof(realnum)); + md->weights = new double[PyArray_SIZE(pao)]; + memcpy(md->weights, (double *)PyArray_DATA(pao), PyArray_SIZE(pao) * sizeof(double)); // if needed, combine sus structs to main object PyObject *py_e_sus_m1 = PyObject_GetAttrString(po_medium1, "E_susceptibilities"); @@ -567,8 +567,8 @@ static int pymaterial_to_material(PyObject *po, material_type *mt) { md = new material_data(); md->which_subclass = material_data::MATERIAL_FILE; md->epsilon_dims[0] = md->epsilon_dims[1] = md->epsilon_dims[2] = 1; - md->epsilon_data = new realnum[PyArray_SIZE(pao)]; - memcpy(md->epsilon_data, (realnum *)PyArray_DATA(pao), PyArray_SIZE(pao) * sizeof(realnum)); + md->epsilon_data = new double[PyArray_SIZE(pao)]; + memcpy(md->epsilon_data, (double *)PyArray_DATA(pao), PyArray_SIZE(pao) * sizeof(double)); for (int i = 0; i < PyArray_NDIM(pao); ++i) { md->epsilon_dims[i] = (size_t)PyArray_DIMS(pao)[i]; diff --git a/scheme/structure.cpp b/scheme/structure.cpp index 7946ddb4a..cd713c047 100644 --- a/scheme/structure.cpp +++ b/scheme/structure.cpp @@ -160,7 +160,7 @@ static geom_box gv2box(const meep::volume &v) { /***********************************************************************/ -static meep::realnum *epsilon_data = NULL; +static double *epsilon_data = NULL; static size_t epsilon_dims[3] = {0, 0, 0}; static void read_epsilon_file(const char *eps_input_file) { @@ -175,7 +175,7 @@ static void read_epsilon_file(const char *eps_input_file) { if (dataname) *(dataname++) = 0; meep::h5file eps_file(fname, meep::h5file::READONLY, false); int rank; // ignored since rank < 3 is equivalent to singleton dims - epsilon_data = eps_file.read(dataname, &rank, epsilon_dims, 3); + epsilon_data = (double *)eps_file.read(dataname, &rank, epsilon_dims, 3, false); master_printf("read in %zdx%zdx%zd epsilon-input-file \"%s\"\n", epsilon_dims[0], epsilon_dims[1], epsilon_dims[2], eps_input_file); delete[] fname; diff --git a/src/array_slice.cpp b/src/array_slice.cpp index 34981c2e1..fcd63dc89 100644 --- a/src/array_slice.cpp +++ b/src/array_slice.cpp @@ -28,8 +28,6 @@ using namespace std; -typedef complex cdouble; - namespace meep { /***************************************************************************/ @@ -62,8 +60,8 @@ typedef struct { // temporary internal storage buffers component *cS; - cdouble *ph; - cdouble *fields; + complex *ph; + complex *fields; ptrdiff_t *offsets; double frequency; @@ -83,13 +81,13 @@ typedef struct { #define UNUSED(x) (void)x // silence compiler warnings /* passthrough field function equivalent to component_fun in h5fields.cpp */ -static cdouble default_field_func(const cdouble *fields, const vec &loc, void *data_) { +static complex default_field_func(const complex *fields, const vec &loc, void *data_) { (void)loc; // unused (void)data_; // unused return fields[0]; } -static double default_field_rfunc(const cdouble *fields, const vec &loc, void *data_) { +static double default_field_rfunc(const complex *fields, const vec &loc, void *data_) { (void)loc; // unused (void)data_; // unused return real(fields[0]); @@ -138,7 +136,7 @@ static void get_array_slice_dimensions_chunkloop(fields_chunk *fc, int ichnk, co typedef struct { component source_component; ivec slice_imin, slice_imax; - cdouble *slice; + complex *slice; } source_slice_data; bool in_range(int imin, int i, int imax) { return (imin <= i && i <= imax); } @@ -186,7 +184,7 @@ static void get_source_slice_chunkloop(fields_chunk *fc, int ichnk, component cg // local index of the symmetry-child grid point within this // slice (that is, if it even lies within the slice) for (size_t npt = 0; npt < s->npts; npt++) { - cdouble amp = s->A[npt]; + complex amp = s->A[npt]; ptrdiff_t chunk_index = s->index[npt]; ivec iloc_parent = fc->gv.iloc(Dielectric, chunk_index); ivec iloc_child = S.transform(iloc_parent, sn) + shift; @@ -275,10 +273,10 @@ static void get_array_slice_chunkloop(fields_chunk *fc, int ichnk, component cgr // tabulated on the slice, exactly as in fields::integrate. // //-----------------------------------------------------------------------// double *slice = 0; - cdouble *zslice = 0; + complex *zslice = 0; bool complex_data = (data->rfun == 0); if (complex_data) - zslice = (cdouble *)data->vslice; + zslice = (complex *)data->vslice; else slice = (double *)data->vslice; @@ -410,8 +408,8 @@ double *array_to_all(double *array, size_t array_size) { return array; } -cdouble *array_to_all(cdouble *array, size_t array_size) { - return (cdouble *)array_to_all((double *)array, 2 * array_size); +complex *array_to_all(complex *array, size_t array_size) { + return (complex *)array_to_all((double *)array, 2 * array_size); } /***************************************************************/ @@ -569,9 +567,9 @@ double *collapse_array(double *array, int *rank, size_t dims[3], direction dirs[ return reduced_array; } -cdouble *collapse_array(cdouble *array, int *rank, size_t dims[3], direction dirs[3], - volume where) { - return (cdouble *)collapse_array((double *)array, rank, dims, dirs, where, 2); +complex *collapse_array(complex *array, int *rank, size_t dims[3], direction dirs[3], + volume where) { + return (complex *)collapse_array((double *)array, rank, dims, dirs, where, 2); } /**********************************************************************/ @@ -606,8 +604,8 @@ void *fields::do_get_array_slice(const volume &where, std::vector com data.frequency = frequency; int num_components = components.size(); data.cS = new component[num_components]; - data.ph = new cdouble[num_components]; - data.fields = new cdouble[num_components]; + data.ph = new complex[num_components]; + data.fields = new complex[num_components]; data.offsets = new ptrdiff_t[2 * num_components]; memset(data.offsets, 0, 2 * num_components * sizeof(ptrdiff_t)); data.empty_dim[0] = data.empty_dim[1] = data.empty_dim[2] = data.empty_dim[3] = @@ -680,11 +678,11 @@ double *fields::get_array_slice(const volume &where, std::vector comp frequency, snap); } -cdouble *fields::get_complex_array_slice(const volume &where, std::vector components, - field_function fun, void *fun_data, cdouble *slice, - double frequency, bool snap) { - return (cdouble *)do_get_array_slice(where, components, fun, 0, fun_data, (void *)slice, - frequency, snap); +complex *fields::get_complex_array_slice(const volume &where, std::vector components, + field_function fun, void *fun_data, complex *slice, + double frequency, bool snap) { + return (complex *)do_get_array_slice(where, components, fun, 0, fun_data, (void *)slice, + frequency, snap); } double *fields::get_array_slice(const volume &where, component c, double *slice, double frequency, @@ -705,16 +703,16 @@ double *fields::get_array_slice(const volume &where, derived_component c, double frequency, snap); } -cdouble *fields::get_complex_array_slice(const volume &where, component c, cdouble *slice, - double frequency, bool snap) { +complex *fields::get_complex_array_slice(const volume &where, component c, complex *slice, + double frequency, bool snap) { std::vector components(1); components[0] = c; - return (cdouble *)do_get_array_slice(where, components, default_field_func, 0, 0, (void *)slice, + return (complex *)do_get_array_slice(where, components, default_field_func, 0, 0, (void *)slice, frequency, snap); } -cdouble *fields::get_source_slice(const volume &where, component source_slice_component, - cdouble *slice) { +complex *fields::get_source_slice(const volume &where, component source_slice_component, + complex *slice) { size_t dims[3]; direction dirs[3]; vec min_max_loc[2]; @@ -725,18 +723,18 @@ cdouble *fields::get_source_slice(const volume &where, component source_slice_co data.source_component = source_slice_component; data.slice_imin = gv.round_vec(min_max_loc[0]); data.slice_imax = gv.round_vec(min_max_loc[1]); - data.slice = new cdouble[slice_size]; + data.slice = new complex[slice_size]; if (!data.slice) abort("%s:%i: out of memory (%zu)", __FILE__, __LINE__, slice_size); loop_in_chunks(get_source_slice_chunkloop, (void *)&data, where, Centered, true, false); - cdouble *slice_collapsed = collapse_array(data.slice, &rank, dims, dirs, where); + complex *slice_collapsed = collapse_array(data.slice, &rank, dims, dirs, where); rank = get_array_slice_dimensions(where, dims, dirs, true, false); slice_size = dims[0] * (rank >= 2 ? dims[1] : 1) * (rank == 3 ? dims[2] : 1); if (slice) { memcpy(slice, slice_collapsed, 2 * slice_size * sizeof(double)); - delete[] (cdouble*) slice_collapsed; + delete[] (complex*) slice_collapsed; } else slice = slice_collapsed; diff --git a/src/dft.cpp b/src/dft.cpp index c48beb99d..3cf95871a 100644 --- a/src/dft.cpp +++ b/src/dft.cpp @@ -26,8 +26,6 @@ using namespace std; -typedef complex cdouble; - namespace meep { std::vector linspace(double freq_min, double freq_max, size_t Nfreq) { @@ -56,7 +54,7 @@ struct dft_chunk_data { // for passing to field::loop_in_chunks as void* dft_chunk::dft_chunk(fields_chunk *fc_, ivec is_, ivec ie_, vec s0_, vec s1_, vec e0_, vec e1_, double dV0_, double dV1_, component c_, bool use_centered_grid, - cdouble phase_factor, ivec shift_, const symmetry &S_, int sn_, + complex phase_factor, ivec shift_, const symmetry &S_, int sn_, const void *data_) { dft_chunk_data *data = (dft_chunk_data *)data_; if (!fc_->f[c_][0]) abort("invalid fields_chunk/component combination in dft_chunk"); @@ -97,11 +95,11 @@ dft_chunk::dft_chunk(fields_chunk *fc_, ivec is_, ivec ie_, vec s0_, vec s1_, ve const int Nomega = data->omega.size(); omega = data->omega; - dft_phase = new complex[Nomega]; + dft_phase = new complex[Nomega]; N = 1; LOOP_OVER_DIRECTIONS(is.dim, d) { N *= (ie.in_direction(d) - is.in_direction(d)) / 2 + 1; } - dft = new complex[N * Nomega]; + dft = new complex[N * Nomega]; for (size_t i = 0; i < N * Nomega; ++i) dft[i] = 0.0; for (int i = 0; i < 5; ++i) @@ -195,7 +193,7 @@ dft_chunk *fields::add_dft(const volume_list *where, const std::vector f dft_chunk *chunks = 0; while (where) { if (is_derived(where->c)) abort("derived_component invalid for dft"); - cdouble stored_weight = where->weight; + complex stored_weight = where->weight; chunks = add_dft(component(where->c), where->v, freq, include_dV_and_interp_weights, stored_weight, chunks); where = where->next; @@ -248,12 +246,12 @@ void dft_chunk::update_dft(double time) { f[cmp] = w * fc->f[c][cmp][idx]; if (numcmp == 2) { - complex fc(f[0], f[1]); + complex fc(f[0], f[1]); for (int i = 0; i < Nomega; ++i) dft[Nomega * idx_dft + i] += dft_phase[i] * fc; } else { - realnum fr = f[0]; + double fr = f[0]; for (int i = 0; i < Nomega; ++i) dft[Nomega * idx_dft + i] += dft_phase[i] * fr; } @@ -302,7 +300,7 @@ void save_dft_hdf5(dft_chunk *dft_chunks, const char *name, h5file *file, const for (dft_chunk *cur = dft_chunks; cur; cur = cur->next_in_dft) { size_t Nchunk = cur->N * cur->omega.size() * 2; - file->write_chunk(1, &istart, &Nchunk, (realnum *)cur->dft); + file->write_chunk(1, &istart, &Nchunk, (double *)cur->dft); istart += Nchunk; } file->done_writing_chunks(); @@ -330,7 +328,7 @@ void load_dft_hdf5(dft_chunk *dft_chunks, const char *name, h5file *file, const for (dft_chunk *cur = dft_chunks; cur; cur = cur->next_in_dft) { size_t Nchunk = cur->N * cur->omega.size() * 2; - file->read_chunk(1, &istart, &Nchunk, (realnum *)cur->dft); + file->read_chunk(1, &istart, &Nchunk, (double *)cur->dft); istart += Nchunk; } } @@ -712,7 +710,7 @@ dft_fields::dft_fields(dft_chunk *chunks_, const double *freq_, size_t Nfreq, co freq[i] = freq_[i]; } -void dft_fields::scale_dfts(cdouble scale) { chunks->scale_dft(scale); } +void dft_fields::scale_dfts(complex scale) { chunks->scale_dft(scale); } void dft_fields::remove() { while (chunks) { @@ -727,7 +725,7 @@ dft_fields fields::add_dft_fields(component *components, int num_components, con bool include_dV_and_interp_weights = false; bool sqrt_dV_and_interp_weights = false; // default option from meep.hpp (expose to user?) std::complex extra_weight = 1.0; // default option from meep.hpp (expose to user?) - cdouble stored_weight = 1.0; + complex stored_weight = 1.0; dft_chunk *chunks = 0; for (int nc = 0; nc < num_components; nc++) chunks = @@ -740,11 +738,11 @@ dft_fields fields::add_dft_fields(component *components, int num_components, con /***************************************************************/ /* chunk-level processing for fields::process_dft_component. */ /***************************************************************/ -cdouble dft_chunk::process_dft_component(int rank, direction *ds, ivec min_corner, ivec max_corner, - int num_freq, h5file *file, double *buffer, int reim, - cdouble *field_array, void *mode1_data, void *mode2_data, - int ic_conjugate, bool retain_interp_weights, - fields *parent) { +complex dft_chunk::process_dft_component(int rank, direction *ds, ivec min_corner, ivec max_corner, + int num_freq, h5file *file, double *buffer, int reim, + complex *field_array, void *mode1_data, void *mode2_data, + int ic_conjugate, bool retain_interp_weights, + fields *parent) { if ((num_freq < 0) || (num_freq > omega.size()-1)) abort("process_dft_component: frequency index %d is outside the range of the frequency array of size %lu",num_freq,omega.size()); @@ -804,7 +802,7 @@ cdouble dft_chunk::process_dft_component(int rank, direction *ds, ivec min_corne /***************************************************************/ vec rshift(shift * (0.5 * fc->gv.inva)); int chunk_idx = 0; - cdouble integral = 0.0; + complex integral = 0.0; component c_conjugate = (component)(ic_conjugate >= 0 ? ic_conjugate : -ic_conjugate); LOOP_OVER_IVECS(fc->gv, is, ie, idx) { IVEC_LOOP_LOC(fc->gv, loc); @@ -812,7 +810,7 @@ cdouble dft_chunk::process_dft_component(int rank, direction *ds, ivec min_corne double w = IVEC_LOOP_WEIGHT(s0, s1, e0, e1, dV0 + dV1 * loop_i2); double interp_w = retain_interp_weights ? IVEC_LOOP_WEIGHT(s0i, s1i, e0i, e1i, 1.0) : 1.0; - cdouble dft_val = + complex dft_val = (c_conjugate == NO_COMPONENT ? w : c_conjugate == Dielectric @@ -822,7 +820,7 @@ cdouble dft_chunk::process_dft_component(int rank, direction *ds, ivec min_corne : dft[omega.size() * (chunk_idx++) + num_freq] / stored_weight); if (include_dV_and_interp_weights) dft_val /= (sqrt_dV_and_interp_weights ? sqrt(w) : w); - cdouble mode1val = 0.0, mode2val = 0.0; + complex mode1val = 0.0, mode2val = 0.0; if (mode1_data) mode1val = eigenmode_amplitude(mode1_data, loc, S.transform(c_conjugate, sn)); if (mode2_data) mode2val = eigenmode_amplitude(mode2_data, loc, S.transform(c, sn)); @@ -833,7 +831,7 @@ cdouble dft_chunk::process_dft_component(int rank, direction *ds, ivec min_corne dft_val *= interp_w; - cdouble val = (mode1_data ? mode1val : dft_val); + complex val = (mode1_data ? mode1val : dft_val); buffer[idx2] = reim ? imag(val) : real(val); } else if (field_array) { @@ -906,11 +904,11 @@ cdouble dft_chunk::process_dft_component(int rank, direction *ds, ivec min_corne /* if where is non-null, only field components inside *where */ /* are processed. */ /***************************************************************/ -cdouble fields::process_dft_component(dft_chunk **chunklists, int num_chunklists, int num_freq, - component c, const char *HDF5FileName, cdouble **pfield_array, - int *array_rank, size_t *array_dims, direction *array_dirs, - void *mode1_data, void *mode2_data, component c_conjugate, - bool *first_component, bool retain_interp_weights) { +complex fields::process_dft_component(dft_chunk **chunklists, int num_chunklists, int num_freq, + component c, const char *HDF5FileName, complex **pfield_array, + int *array_rank, size_t *array_dims, direction *array_dirs, + void *mode1_data, void *mode2_data, component c_conjugate, + bool *first_component, bool retain_interp_weights) { /***************************************************************/ /***************************************************************/ @@ -980,19 +978,17 @@ cdouble fields::process_dft_component(dft_chunk **chunklists, int num_chunklists /* buffer for process-local contributions to HDF5 output files,*/ /* like h5_output_data::buf in h5fields.cpp */ /***************************************************************/ - realnum *buffer = 0; - cdouble *field_array = 0; + double *buffer = 0; + complex *field_array = 0; int reim_max = 0; if (HDF5FileName) { - buffer = new realnum[bufsz]; + buffer = new double[bufsz]; reim_max = 1; } else if (pfield_array) - *pfield_array = field_array = (array_size ? new cdouble[array_size] : 0); + *pfield_array = field_array = (array_size ? new complex[array_size] : 0); - bool append_data = false; - bool single_precision = false; - cdouble overlap = 0.0; + complex overlap = 0.0; for (int reim = 0; reim <= reim_max; reim++) { h5file *file = 0; if (HDF5FileName) { @@ -1000,7 +996,8 @@ cdouble fields::process_dft_component(dft_chunk **chunklists, int num_chunklists *first_component = false; char dataname[100]; snprintf(dataname, 100, "%s_%i.%c", component_name(c), num_freq, reim ? 'i' : 'r'); - file->create_or_extend_data(dataname, rank, dims, append_data, single_precision); + file->create_or_extend_data(dataname, rank, dims, false /* append_data */, + false /* single_precision */); } for (int ncl = 0; ncl < num_chunklists; ncl++) @@ -1021,7 +1018,7 @@ cdouble fields::process_dft_component(dft_chunk **chunklists, int num_chunklists /* on all cores */ /***************************************************************/ #define BUFSIZE 1 << 16 // use 64k buffer - cdouble *buf = new cdouble[BUFSIZE]; + complex *buf = new complex[BUFSIZE]; ptrdiff_t offset = 0; size_t remaining = array_size; while (remaining != 0) { @@ -1029,7 +1026,7 @@ cdouble fields::process_dft_component(dft_chunk **chunklists, int num_chunklists am_now_working_on(MpiAllTime); sum_to_all(field_array + offset, buf, size); finished_working(); - memcpy(field_array + offset, buf, size * sizeof(cdouble)); + memcpy(field_array + offset, buf, size * sizeof(complex)); remaining -= size; offset += size; } @@ -1051,46 +1048,46 @@ cdouble fields::process_dft_component(dft_chunk **chunklists, int num_chunklists /***************************************************************/ /* routines for fetching arrays of dft fields */ /***************************************************************/ -cdouble *collapse_array(cdouble *array, int *rank, size_t dims[3], direction dirs[3], volume where); +complex *collapse_array(complex *array, int *rank, size_t dims[3], direction dirs[3], volume where); -cdouble *fields::get_dft_array(dft_flux flux, component c, int num_freq, int *rank, - size_t dims[3]) { +complex *fields::get_dft_array(dft_flux flux, component c, int num_freq, int *rank, + size_t dims[3]) { dft_chunk *chunklists[2]; chunklists[0] = flux.E; chunklists[1] = flux.H; - cdouble *array; + complex *array; direction dirs[3]; process_dft_component(chunklists, 2, num_freq, c, 0, &array, rank, dims, dirs); return collapse_array(array, rank, dims, dirs, flux.where); } -cdouble *fields::get_dft_array(dft_force force, component c, int num_freq, int *rank, - size_t dims[3]) { +complex *fields::get_dft_array(dft_force force, component c, int num_freq, int *rank, + size_t dims[3]) { dft_chunk *chunklists[3]; chunklists[0] = force.offdiag1; chunklists[1] = force.offdiag2; chunklists[2] = force.diag; - cdouble *array; + complex *array; direction dirs[3]; process_dft_component(chunklists, 3, num_freq, c, 0, &array, rank, dims, dirs); return collapse_array(array, rank, dims, dirs, force.where); } -cdouble *fields::get_dft_array(dft_near2far n2f, component c, int num_freq, int *rank, - size_t dims[3]) { +complex *fields::get_dft_array(dft_near2far n2f, component c, int num_freq, int *rank, + size_t dims[3]) { dft_chunk *chunklists[1]; chunklists[0] = n2f.F; - cdouble *array; + complex *array; direction dirs[3]; process_dft_component(chunklists, 1, num_freq, c, 0, &array, rank, dims, dirs); return collapse_array(array, rank, dims, dirs, n2f.where); } -cdouble *fields::get_dft_array(dft_fields fdft, component c, int num_freq, int *rank, - size_t dims[3]) { +complex *fields::get_dft_array(dft_fields fdft, component c, int num_freq, int *rank, + size_t dims[3]) { dft_chunk *chunklists[1]; chunklists[0] = fdft.chunks; - cdouble *array; + complex *array; direction dirs[3]; process_dft_component(chunklists, 1, num_freq, c, 0, &array, rank, dims, dirs); return collapse_array(array, rank, dims, dirs, fdft.where); @@ -1137,7 +1134,7 @@ void fields::output_dft_components(dft_chunk **chunklists, int num_chunklists, v 0, Ex, &first_component); } else { - cdouble *array = 0; + complex *array = 0; int rank; size_t dims[3]; direction dirs[3]; @@ -1155,8 +1152,7 @@ void fields::output_dft_components(dft_chunk **chunklists, int num_chunklists, v char dataname[100], filename[100]; snprintf(dataname, 100, "%s_%i.%c", component_name(c), num_freq, reim ? 'i' : 'r'); snprintf(filename, 100, "%s%s", HDF5FileName, strstr(".h5", HDF5FileName) ? "" : ".h5"); - bool single_precision = false; - file->write(dataname, rank, dims, real_array, single_precision); + file->write(dataname, rank, dims, real_array, false /* single_precision */); } delete[] real_array; } @@ -1197,7 +1193,7 @@ void fields::output_dft(dft_fields fdft, const char *HDF5FileName) { /***************************************************************/ /***************************************************************/ void fields::get_overlap(void *mode1_data, void *mode2_data, dft_flux flux, int num_freq, - cdouble overlaps[2]) { + complex overlaps[2]) { component cE[2], cH[2]; switch (flux.normal_direction) { case X: @@ -1236,14 +1232,14 @@ void fields::get_overlap(void *mode1_data, void *mode2_data, dft_flux flux, int dft_chunk *chunklists[2]; chunklists[0] = flux.E; chunklists[1] = flux.H; - cdouble ExHy = process_dft_component(chunklists, 2, num_freq, cE[0], 0, 0, 0, 0, 0, mode1_data, - mode2_data, cH[0]); - cdouble EyHx = process_dft_component(chunklists, 2, num_freq, cE[1], 0, 0, 0, 0, 0, mode1_data, - mode2_data, cH[1]); - cdouble HyEx = process_dft_component(chunklists, 2, num_freq, cH[0], 0, 0, 0, 0, 0, mode1_data, - mode2_data, cE[0]); - cdouble HxEy = process_dft_component(chunklists, 2, num_freq, cH[1], 0, 0, 0, 0, 0, mode1_data, - mode2_data, cE[1]); + complex ExHy = process_dft_component(chunklists, 2, num_freq, cE[0], 0, 0, 0, 0, 0, mode1_data, + mode2_data, cH[0]); + complex EyHx = process_dft_component(chunklists, 2, num_freq, cE[1], 0, 0, 0, 0, 0, mode1_data, + mode2_data, cH[1]); + complex HyEx = process_dft_component(chunklists, 2, num_freq, cH[0], 0, 0, 0, 0, 0, mode1_data, + mode2_data, cE[0]); + complex HxEy = process_dft_component(chunklists, 2, num_freq, cH[1], 0, 0, 0, 0, 0, mode1_data, + mode2_data, cE[1]); overlaps[0] = ExHy - EyHx; overlaps[1] = HyEx - HxEy; } diff --git a/src/dft_ldos.cpp b/src/dft_ldos.cpp index d6d46c720..a5348f4e7 100644 --- a/src/dft_ldos.cpp +++ b/src/dft_ldos.cpp @@ -24,8 +24,8 @@ namespace meep { dft_ldos::dft_ldos(double freq_min, double freq_max, int Nfreq) { freq = meep::linspace(freq_min, freq_max, Nfreq); - Fdft = new complex[Nfreq]; - Jdft = new complex[Nfreq]; + Fdft = new complex[Nfreq]; + Jdft = new complex[Nfreq]; for (int i = 0; i < Nfreq; ++i) Fdft[i] = Jdft[i] = 0.0; Jsum = 1.0; @@ -34,8 +34,8 @@ dft_ldos::dft_ldos(double freq_min, double freq_max, int Nfreq) { dft_ldos::dft_ldos(const std::vector freq_) { const size_t Nfreq = freq_.size(); freq = freq_; - Fdft = new complex[Nfreq]; - Jdft = new complex[Nfreq]; + Fdft = new complex[Nfreq]; + Jdft = new complex[Nfreq]; for (size_t i = 0; i < Nfreq; ++i) Fdft[i] = Jdft[i] = 0.0; Jsum = 1.0; @@ -44,8 +44,8 @@ dft_ldos::dft_ldos(const std::vector freq_) { dft_ldos::dft_ldos(const double *freq_, size_t Nfreq) : freq(Nfreq) { for (size_t i = 0; i < Nfreq; ++i) freq[i] = freq_[i]; - Fdft = new complex[Nfreq]; - Jdft = new complex[Nfreq]; + Fdft = new complex[Nfreq]; + Jdft = new complex[Nfreq]; for (size_t i = 0; i < Nfreq; ++i) Fdft[i] = Jdft[i] = 0.0; Jsum = 1.0; diff --git a/src/fields.cpp b/src/fields.cpp index 06a934a05..aee3e7c6e 100644 --- a/src/fields.cpp +++ b/src/fields.cpp @@ -690,11 +690,11 @@ static int mirrorindex(int i, int n) { return i >= n ? 2 * n - 1 - i : (i < 0 ? /* Linearly interpolate a given point in a 3d grid of data. The point coordinates should be in the range [0,1], or at the very least [-1,2] ... anything outside [0,1] is *mirror* reflected into [0,1] */ -realnum linear_interpolate(realnum rx, realnum ry, realnum rz, realnum *data, int nx, int ny, - int nz, int stride) { +double linear_interpolate(double rx, double ry, double rz, double *data, + int nx, int ny, int nz, int stride) { int x, y, z, x2, y2, z2; - realnum dx, dy, dz; + double dx, dy, dz; /* mirror boundary conditions for r just beyond the boundary */ rx = rx < 0.0 ? -rx : (rx > 1.0 ? 1.0 - rx : rx); diff --git a/src/h5fields.cpp b/src/h5fields.cpp index 81fed84c0..041e80810 100644 --- a/src/h5fields.cpp +++ b/src/h5fields.cpp @@ -36,7 +36,7 @@ typedef struct { h5file *file; ivec min_corner, max_corner; int num_chunks; - realnum *buf; + double *buf; size_t bufsz; int rank; direction ds[3]; @@ -261,7 +261,7 @@ void fields::output_hdf5(h5file *file, const char *dataname, int num_fields, file->create_or_extend_data(dataname, rank, dims, append_data, single_precision); - data.buf = new realnum[data.bufsz]; + data.buf = new double[data.bufsz]; data.num_fields = num_fields; data.components = components; diff --git a/src/h5file.cpp b/src/h5file.cpp index f525bd775..d34958557 100644 --- a/src/h5file.cpp +++ b/src/h5file.cpp @@ -282,16 +282,15 @@ static herr_t find_dataset(hid_t group_id, const char *name, void *d) { #endif #ifdef HAVE_HDF5 -#define REALNUM_H5T (sizeof(realnum) == sizeof(double) ? H5T_NATIVE_DOUBLE : H5T_NATIVE_FLOAT) #define SIZE_T_H5T (sizeof(size_t) == 4 ? H5T_NATIVE_UINT32 : H5T_NATIVE_UINT64) #else -#define REALNUM_H5T 0 #define SIZE_T_H5T 0 #endif -realnum *h5file::read(const char *dataname, int *rank, size_t *dims, int maxrank) { +void *h5file::read(const char *dataname, int *rank, size_t *dims, int maxrank, + bool single_precision) { #ifdef HAVE_HDF5 - realnum *data = 0; + void *data = 0; if (parallel || am_master()) { int i, N; hid_t file_id = HID(get_id()), space_id, data_id; @@ -336,8 +335,12 @@ realnum *h5file::read(const char *dataname, int *rank, size_t *dims, int maxrank delete[] dims_copy; H5Sclose(space_id); - data = new realnum[N]; - H5Dread(data_id, REALNUM_H5T, H5S_ALL, H5S_ALL, H5P_DEFAULT, (void *)data); + if (single_precision) + data = new float[N]; + else + data = new double[N]; + H5Dread(data_id, single_precision ? H5T_NATIVE_FLOAT : H5T_NATIVE_DOUBLE, + H5S_ALL, H5S_ALL, H5P_DEFAULT, (void *)data); if (close_data_id) H5Dclose(data_id); } @@ -348,8 +351,16 @@ realnum *h5file::read(const char *dataname, int *rank, size_t *dims, int maxrank size_t N = 1; for (int i = 0; i < *rank; ++i) N *= dims[i]; - if (!am_master()) data = new realnum[N]; - broadcast(0, data, N); + if (!am_master()) { + if (single_precision) + data = new float[N]; + else + data = new double[N]; + } + if (single_precision) + broadcast(0, (float *)data, N); + else + broadcast(0, (double *)data, N); } if (*rank == 1 && dims[0] == 1) *rank = 0; @@ -487,7 +498,7 @@ void h5file::create_data(const char *dataname, int rank, const size_t *dims, boo delete[] dims_copy; - hid_t type_id = single_precision ? H5T_NATIVE_FLOAT : REALNUM_H5T; + hid_t type_id = single_precision ? H5T_NATIVE_FLOAT : H5T_NATIVE_DOUBLE; data_id = H5Dcreate(file_id, dataname, type_id, space_id, prop_id); if (data_id < 0) abort("Error creating dataset"); @@ -601,8 +612,8 @@ void h5file::create_or_extend_data(const char *dataname, int rank, const size_t that have no data can be skipped). */ static void _write_chunk(hid_t data_id, h5file::extending_s *cur, int rank, - const size_t *chunk_start, const size_t *chunk_dims, hid_t datatype, - void *data) { + const size_t *chunk_start, const size_t *chunk_dims, + hid_t datatype, void *data) { #ifdef HAVE_HDF5 int i; bool do_write = true; @@ -672,9 +683,15 @@ static void _write_chunk(hid_t data_id, h5file::extending_s *cur, int rank, } void h5file::write_chunk(int rank, const size_t *chunk_start, const size_t *chunk_dims, - realnum *data) { - _write_chunk(HID(cur_id), get_extending(cur_dataname), rank, chunk_start, chunk_dims, REALNUM_H5T, - (void *)data); + float *data) { + _write_chunk(HID(cur_id), get_extending(cur_dataname), rank, chunk_start, chunk_dims, + H5T_NATIVE_FLOAT, data); +} + +void h5file::write_chunk(int rank, const size_t *chunk_start, const size_t *chunk_dims, + double *data) { + _write_chunk(HID(cur_id), get_extending(cur_dataname), rank, chunk_start, chunk_dims, + H5T_NATIVE_DOUBLE, data); } void h5file::write_chunk(int rank, const size_t *chunk_start, const size_t *chunk_dims, @@ -694,14 +711,19 @@ void h5file::done_writing_chunks() { if (parallel && cur_dataname && get_extending(cur_dataname)) prevent_deadlock(); // closes id } -void h5file::write(const char *dataname, int rank, const size_t *dims, realnum *data, +void h5file::write(const char *dataname, int rank, const size_t *dims, void *data, bool single_precision) { if (parallel || am_master()) { size_t *start = new size_t[rank + 1]; for (int i = 0; i < rank; i++) start[i] = 0; create_data(dataname, rank, dims, false, single_precision); - if (am_master()) write_chunk(rank, start, dims, data); + if (am_master()) { + if (single_precision) + write_chunk(rank, start, dims, (float *)data); + else + write_chunk(rank, start, dims, (double *)data); + } done_writing_chunks(); unset_cur(); delete[] start; @@ -718,7 +740,6 @@ void h5file::write(const char *dataname, const char *data) { remove_data(dataname); // HDF5 gives error if we H5Dcreate existing dataset type_id = H5Tcopy(H5T_C_S1); - ; H5Tset_size(type_id, strlen(data) + 1); space_id = H5Screate(H5S_SCALAR); @@ -803,8 +824,15 @@ static void _read_chunk(hid_t data_id, int rank, const size_t *chunk_start, } void h5file::read_chunk(int rank, const size_t *chunk_start, const size_t *chunk_dims, - realnum *data) { - _read_chunk(HID(cur_id), rank, chunk_start, chunk_dims, REALNUM_H5T, (void *)data); + float *data) { + _read_chunk(HID(cur_id), rank, chunk_start, chunk_dims, + H5T_NATIVE_FLOAT, data); +} + +void h5file::read_chunk(int rank, const size_t *chunk_start, const size_t *chunk_dims, + double *data) { + _read_chunk(HID(cur_id), rank, chunk_start, chunk_dims, + H5T_NATIVE_DOUBLE, data); } void h5file::read_chunk(int rank, const size_t *chunk_start, const size_t *chunk_dims, diff --git a/src/loop_in_chunks.cpp b/src/loop_in_chunks.cpp index ad1e850c3..8a143c9dc 100644 --- a/src/loop_in_chunks.cpp +++ b/src/loop_in_chunks.cpp @@ -213,7 +213,7 @@ void chunkloop_field_components::update_values(ptrdiff_t idx) { ptrdiff_t ofs1 = offsets[2 * nc], ofs2 = offsets[2 * nc + 1]; double favg[2] = {0.0, 0.0}; // real, imag parts for (int reim = 0; reim < 2; reim++) { - const double *fgrid = fc->f[cparent][reim]; + const realnum *fgrid = fc->f[cparent][reim]; if (!fgrid) continue; favg[reim] = 0.25 * (fgrid[idx] + fgrid[idx + ofs1] + fgrid[idx + ofs2] + fgrid[idx + ofs1 + ofs2]); diff --git a/src/material_data.hpp b/src/material_data.hpp index 104246b42..2cc3690fe 100644 --- a/src/material_data.hpp +++ b/src/material_data.hpp @@ -182,16 +182,16 @@ struct material_data { bool do_averaging; // these fields used only if which_subclass==MATERIAL_FILE - meep::realnum *epsilon_data; + double *epsilon_data; size_t epsilon_dims[3]; // these fields used only if which_subclass==MATERIAL_GRID vector3 grid_size; - meep::realnum *weights; + double *weights; medium_struct medium_1; medium_struct medium_2; - meep::realnum beta; - meep::realnum eta; + double beta; + double eta; /* There are several possible scenarios when material grids overlap -- these different scenarios enable different applications. diff --git a/src/meep.hpp b/src/meep.hpp index 079405496..d24f85a06 100644 --- a/src/meep.hpp +++ b/src/meep.hpp @@ -28,13 +28,12 @@ namespace meep { -/* We use the type realnum for large arrays, e.g. the fields. - For local variables and small arrays, we use double precision, - but for things like the fields we can often get away with - single precision (since the errors are not dominated by roundoff). - However, we will default to using double-precision for large - arrays, as the factor of two in memory and the moderate increase - in speed currently don't seem worth the loss of precision. */ +/* The (time-domain) fields arrays of the fields_chunk class as well + as the material arrays of the structure_chunk class chi1inv, + chi3, sigma, etc. can be stored using single-precision floating + point rather than double precision (the default). The reduced + precision can provide for up to a factor of 2X improvement in the + time-stepping rate with generally negligible loss in accuracy. */ #define MEEP_SINGLE 0 // 1 for single precision, 0 for double #if MEEP_SINGLE typedef float realnum; @@ -99,12 +98,12 @@ class susceptibility { bool operator==(const susceptibility &s) const { return id == s.id; }; // Returns the 1st order nonlinear susceptibility (generic) - virtual std::complex chi1(double freq, double sigma = 1); + virtual std::complex chi1(realnum freq, realnum sigma = 1); // update all of the internal polarization state given the W field // at the current time step, possibly the previous field W_prev, etc. virtual void update_P(realnum *W[NUM_FIELD_COMPONENTS][2], - realnum *W_prev[NUM_FIELD_COMPONENTS][2], double dt, const grid_volume &gv, + realnum *W_prev[NUM_FIELD_COMPONENTS][2], realnum dt, const grid_volume &gv, void *P_internal_data) const { (void)P; (void)W; @@ -147,7 +146,7 @@ class susceptibility { return 0; } virtual void delete_internal_data(void *data) const; - virtual void init_internal_data(realnum *W[NUM_FIELD_COMPONENTS][2], double dt, + virtual void init_internal_data(realnum *W[NUM_FIELD_COMPONENTS][2], realnum dt, const grid_volume &gv, void *data) const { (void)W; (void)dt; @@ -236,23 +235,23 @@ class susceptibility { denominator to obtain a Drude model. */ class lorentzian_susceptibility : public susceptibility { public: - lorentzian_susceptibility(double omega_0, double gamma, bool no_omega_0_denominator = false) + lorentzian_susceptibility(realnum omega_0, realnum gamma, bool no_omega_0_denominator = false) : omega_0(omega_0), gamma(gamma), no_omega_0_denominator(no_omega_0_denominator) {} virtual susceptibility *clone() const { return new lorentzian_susceptibility(*this); } virtual ~lorentzian_susceptibility() {} // Returns the 1st order nonlinear susceptibility - virtual std::complex chi1(double freq, double sigma = 1); + virtual std::complex chi1(realnum freq, realnum sigma = 1); virtual void update_P(realnum *W[NUM_FIELD_COMPONENTS][2], - realnum *W_prev[NUM_FIELD_COMPONENTS][2], double dt, const grid_volume &gv, + realnum *W_prev[NUM_FIELD_COMPONENTS][2], realnum dt, const grid_volume &gv, void *P_internal_data) const; virtual void subtract_P(field_type ft, realnum *f_minus_p[NUM_FIELD_COMPONENTS][2], void *P_internal_data) const; virtual void *new_internal_data(realnum *W[NUM_FIELD_COMPONENTS][2], const grid_volume &gv) const; - virtual void init_internal_data(realnum *W[NUM_FIELD_COMPONENTS][2], double dt, + virtual void init_internal_data(realnum *W[NUM_FIELD_COMPONENTS][2], realnum dt, const grid_volume &gv, void *data) const; virtual void *copy_internal_data(void *data) const; @@ -264,7 +263,7 @@ class lorentzian_susceptibility : public susceptibility { virtual int get_num_params() { return 4; } protected: - double omega_0, gamma; + realnum omega_0, gamma; bool no_omega_0_denominator; }; @@ -272,21 +271,21 @@ class lorentzian_susceptibility : public susceptibility { includes white noise with a specified amplitude */ class noisy_lorentzian_susceptibility : public lorentzian_susceptibility { public: - noisy_lorentzian_susceptibility(double noise_amp, double omega_0, double gamma, + noisy_lorentzian_susceptibility(realnum noise_amp, realnum omega_0, realnum gamma, bool no_omega_0_denominator = false) : lorentzian_susceptibility(omega_0, gamma, no_omega_0_denominator), noise_amp(noise_amp) {} virtual susceptibility *clone() const { return new noisy_lorentzian_susceptibility(*this); } virtual void update_P(realnum *W[NUM_FIELD_COMPONENTS][2], - realnum *W_prev[NUM_FIELD_COMPONENTS][2], double dt, const grid_volume &gv, + realnum *W_prev[NUM_FIELD_COMPONENTS][2], realnum dt, const grid_volume &gv, void *P_internal_data) const; virtual void dump_params(h5file *h5f, size_t *start); virtual int get_num_params() { return 5; } protected: - double noise_amp; + realnum noise_amp; }; typedef enum { GYROTROPIC_LORENTZIAN, GYROTROPIC_DRUDE, GYROTROPIC_SATURATED } gyrotropy_model; @@ -294,18 +293,18 @@ typedef enum { GYROTROPIC_LORENTZIAN, GYROTROPIC_DRUDE, GYROTROPIC_SATURATED } g /* gyrotropic susceptibility */ class gyrotropic_susceptibility : public susceptibility { public: - gyrotropic_susceptibility(const vec &bias, double omega_0, double gamma, double alpha = 0.0, + gyrotropic_susceptibility(const vec &bias, realnum omega_0, realnum gamma, realnum alpha = 0.0, gyrotropy_model model = GYROTROPIC_LORENTZIAN); virtual susceptibility *clone() const { return new gyrotropic_susceptibility(*this); } virtual void *new_internal_data(realnum *W[NUM_FIELD_COMPONENTS][2], const grid_volume &gv) const; - virtual void init_internal_data(realnum *W[NUM_FIELD_COMPONENTS][2], double dt, + virtual void init_internal_data(realnum *W[NUM_FIELD_COMPONENTS][2], realnum dt, const grid_volume &gv, void *data) const; virtual void *copy_internal_data(void *data) const; virtual bool needs_P(component c, int cmp, realnum *W[NUM_FIELD_COMPONENTS][2]) const; virtual void update_P(realnum *W[NUM_FIELD_COMPONENTS][2], - realnum *W_prev[NUM_FIELD_COMPONENTS][2], double dt, const grid_volume &gv, + realnum *W_prev[NUM_FIELD_COMPONENTS][2], realnum dt, const grid_volume &gv, void *P_internal_data) const; virtual void subtract_P(field_type ft, realnum *f_minus_p[NUM_FIELD_COMPONENTS][2], void *P_internal_data) const; @@ -323,8 +322,8 @@ class gyrotropic_susceptibility : public susceptibility { } protected: - double gyro_tensor[3][3]; - double omega_0, gamma, alpha; + realnum gyro_tensor[3][3]; + realnum omega_0, gamma, alpha; gyrotropy_model model; }; @@ -339,14 +338,14 @@ class multilevel_susceptibility : public susceptibility { virtual ~multilevel_susceptibility(); virtual void update_P(realnum *W[NUM_FIELD_COMPONENTS][2], - realnum *W_prev[NUM_FIELD_COMPONENTS][2], double dt, const grid_volume &gv, + realnum *W_prev[NUM_FIELD_COMPONENTS][2], realnum dt, const grid_volume &gv, void *P_internal_data) const; virtual void subtract_P(field_type ft, realnum *f_minus_p[NUM_FIELD_COMPONENTS][2], void *P_internal_data) const; virtual void *new_internal_data(realnum *W[NUM_FIELD_COMPONENTS][2], const grid_volume &gv) const; - virtual void init_internal_data(realnum *W[NUM_FIELD_COMPONENTS][2], double dt, + virtual void init_internal_data(realnum *W[NUM_FIELD_COMPONENTS][2], realnum dt, const grid_volume &gv, void *data) const; virtual void *copy_internal_data(void *data) const; virtual void delete_internal_data(void *data) const; @@ -386,10 +385,10 @@ class h5file { bool ok(); - realnum *read(const char *dataname, int *rank, size_t *dims, int maxrank); - void write(const char *dataname, int rank, const size_t *dims, realnum *data, + void *read(const char *dataname, int *rank, size_t *dims, int maxrank, + bool single_precision = true); + void write(const char *dataname, int rank, const size_t *dims, void *data, bool single_precision = true); - char *read(const char *dataname); void write(const char *dataname, const char *data); @@ -397,13 +396,15 @@ class h5file { bool single_precision = true); void extend_data(const char *dataname, int rank, const size_t *dims); void create_or_extend_data(const char *dataname, int rank, const size_t *dims, bool append_data, - bool single_precision); - void write_chunk(int rank, const size_t *chunk_start, const size_t *chunk_dims, realnum *data); + bool single_precision = true); + void write_chunk(int rank, const size_t *chunk_start, const size_t *chunk_dims, float *data); + void write_chunk(int rank, const size_t *chunk_start, const size_t *chunk_dims, double *data); void write_chunk(int rank, const size_t *chunk_start, const size_t *chunk_dims, size_t *data); void done_writing_chunks(); void read_size(const char *dataname, int *rank, size_t *dims, int maxrank); - void read_chunk(int rank, const size_t *chunk_start, const size_t *chunk_dims, realnum *data); + void read_chunk(int rank, const size_t *chunk_start, const size_t *chunk_dims, float *data); + void read_chunk(int rank, const size_t *chunk_start, const size_t *chunk_dims, double *data); void read_chunk(int rank, const size_t *chunk_start, const size_t *chunk_dims, size_t *data); void remove(); @@ -566,14 +567,14 @@ class structure; class structure_chunk { public: - double a, Courant, dt; // res. a, Courant num., and timestep dt=Courant/a + double a, Courant, dt; // resolution a, Courant number, and timestep dt=Courant/a realnum *chi3[NUM_FIELD_COMPONENTS], *chi2[NUM_FIELD_COMPONENTS]; realnum *chi1inv[NUM_FIELD_COMPONENTS][5]; bool trivial_chi1inv[NUM_FIELD_COMPONENTS][5]; realnum *conductivity[NUM_FIELD_COMPONENTS][5]; realnum *condinv[NUM_FIELD_COMPONENTS][5]; // cache of 1/(1+conduct*dt/2) bool condinv_stale; // true if condinv needs to be recomputed - double *sig[6], *kap[6], *siginv[6]; // conductivity array for uPML + realnum *sig[6], *kap[6], *siginv[6]; // conductivity array for uPML int sigsize[6]; // conductivity array size grid_volume gv; // integer grid_volume that could be bigger than non-overlapping v below volume v; @@ -715,7 +716,7 @@ class structure { int num_chunks; bool shared_chunks; // whether modifications to chunks will be visible to fields objects grid_volume gv, user_volume; - double a, Courant, dt; // res. a, Courant num., and timestep dt=Courant/a + double a, Courant, dt; // resolution a, Courant number, and timestep dt=Courant/a volume v; symmetry S; const char *outdir; @@ -1030,7 +1031,7 @@ class dft_chunk { component c; // component to DFT (possibly transformed by symmetry) size_t N; // number of spatial points (on epsilon grid) - std::complex *dft; // N x Nomega array of DFT values. + std::complex *dft; // N x Nomega array of DFT values. class dft_chunk *next_in_chunk; // per-fields_chunk list of DFT chunks class dft_chunk *next_in_dft; // next for this particular DFT vol./component @@ -1081,7 +1082,7 @@ class dft_chunk { int sn; // cache of exp(iwt) * scale, of length Nomega - std::complex *dft_phase; + std::complex *dft_phase; ptrdiff_t avg1, avg2; // index offsets for average to get epsilon grid @@ -1200,7 +1201,7 @@ class dft_force { }; -struct sourcedata{ +struct sourcedata { component near_fd_comp; std::vector idx_arr; int fc_idx; @@ -1234,8 +1235,8 @@ class dft_near2far { void farfield_lowlevel(std::complex *F, const vec &x); /* Return a newly allocated array with all far fields */ - realnum *get_farfields_array(const volume &where, int &rank, size_t *dims, size_t &N, - double resolution); + double *get_farfields_array(const volume &where, int &rank, size_t *dims, size_t &N, + double resolution); /* output far fields on a grid to an HDF5 file */ void save_farfields(const char *fname, const char *prefix, const volume &where, @@ -1290,8 +1291,8 @@ class dft_ldos { std::vector freq; private: - std::complex *Fdft; // Nomega array of field * J*(x) DFT values - std::complex *Jdft; // Nomega array of J(t) DFT values + std::complex *Fdft; // Nomega array of field * J*(x) DFT values + std::complex *Jdft; // Nomega array of J(t) DFT values double Jsum; // sum of |J| over all points }; @@ -1357,7 +1358,7 @@ class fields_chunk { int npol[NUM_FIELD_TYPES]; // only E_stuff and H_stuff are used polarization_state *pol[NUM_FIELD_TYPES]; // array of npol[i] polarization_state structures - double a, Courant, dt; // res. a, Courant num., and timestep dt=Courant/a + double a, Courant, dt; // resolution a, Courant number, and timestep dt=Courant/a grid_volume gv; volume v; double m; // angular dependence in cyl. coords @@ -2122,11 +2123,6 @@ make_casimir_gfunc(double T, double dt, double sigma, field_type ft, std::complex *make_casimir_gfunc_kz(double T, double dt, double sigma, field_type ft); -#if MEEP_SINGLE -// in mympi.cpp ... must be here in order to use realnum type -void broadcast(int from, realnum *data, int size); -#endif - // random number generation: random.cpp void set_random_seed(unsigned long seed); void restore_random_seed(); @@ -2152,8 +2148,8 @@ std::complex eigenmode_amplitude(void *vedata, const vec &p, component c double get_group_velocity(void *vedata); vec get_k(void *vedata); -realnum linear_interpolate(realnum rx, realnum ry, realnum rz, realnum *data, int nx, int ny, - int nz, int stride); +double linear_interpolate(double rx, double ry, double rz, double *data, + int nx, int ny, int nz, int stride); // binary tree class for importing layout of chunk partition class binary_partition { diff --git a/src/meep/mympi.hpp b/src/meep/mympi.hpp index 6abdbce38..80a4e704d 100644 --- a/src/meep/mympi.hpp +++ b/src/meep/mympi.hpp @@ -55,6 +55,7 @@ inline int am_master() { return my_rank() == 0; } bool with_mpi(); void send(int from, int to, double *data, int size = 1); +void broadcast(int from, float *data, int size); void broadcast(int from, double *data, int size); void broadcast(int from, char *data, int size); void broadcast(int from, int *data, int size); diff --git a/src/meep_internals.hpp b/src/meep_internals.hpp index e6e70b659..4f78b7569 100644 --- a/src/meep_internals.hpp +++ b/src/meep_internals.hpp @@ -79,41 +79,41 @@ symmetry r_to_minus_r_symmetry(int m); // functions in step_generic.cpp: -void step_curl(realnum *f, component c, const realnum *g1, const realnum *g2, ptrdiff_t s1, - ptrdiff_t s2, // strides for g1/g2 shift - const grid_volume &gv, double dtdx, direction dsig, const double *sig, - const double *kap, const double *siginv, realnum *fu, direction dsigu, - const double *sigu, const double *kapu, const double *siginvu, double dt, +void step_curl(realnum *f, component c, const realnum *g1, const realnum *g2, + ptrdiff_t s1, ptrdiff_t s2, // strides for g1/g2 shift + const grid_volume &gv, realnum dtdx, direction dsig, const realnum *sig, + const realnum *kap, const realnum *siginv, realnum *fu, direction dsigu, + const realnum *sigu, const realnum *kapu, const realnum *siginvu, realnum dt, const realnum *cnd, const realnum *cndinv, realnum *fcnd); void step_update_EDHB(realnum *f, component fc, const grid_volume &gv, const realnum *g, const realnum *g1, const realnum *g2, const realnum *u, const realnum *u1, const realnum *u2, ptrdiff_t s, ptrdiff_t s1, ptrdiff_t s2, const realnum *chi2, const realnum *chi3, realnum *fw, direction dsigw, - const double *sigw, const double *kapw); + const realnum *sigw, const realnum *kapw); -void step_beta(realnum *f, component c, const realnum *g, const grid_volume &gv, double betadt, - direction dsig, const double *siginv, realnum *fu, direction dsigu, - const double *siginvu, const realnum *cndinv, realnum *fcnd); +void step_beta(realnum *f, component c, const realnum *g, const grid_volume &gv, realnum betadt, + direction dsig, const realnum *siginv, realnum *fu, direction dsigu, + const realnum *siginvu, const realnum *cndinv, realnum *fcnd); // functions in step_generic_stride1.cpp, generated from step_generic.cpp: -void step_curl_stride1(realnum *f, component c, const realnum *g1, const realnum *g2, ptrdiff_t s1, - ptrdiff_t s2, // strides for g1/g2 shift - const grid_volume &gv, double dtdx, direction dsig, const double *sig, - const double *kap, const double *siginv, realnum *fu, direction dsigu, - const double *sigu, const double *kapu, const double *siginvu, double dt, +void step_curl_stride1(realnum *f, component c, const realnum *g1, const realnum *g2, + ptrdiff_t s1, ptrdiff_t s2, // strides for g1/g2 shift + const grid_volume &gv, realnum dtdx, direction dsig, const realnum *sig, + const realnum *kap, const realnum *siginv, realnum *fu, direction dsigu, + const realnum *sigu, const realnum *kapu, const realnum *siginvu, realnum dt, const realnum *cnd, const realnum *cndinv, realnum *fcnd); void step_update_EDHB_stride1(realnum *f, component fc, const grid_volume &gv, const realnum *g, const realnum *g1, const realnum *g2, const realnum *u, const realnum *u1, const realnum *u2, ptrdiff_t s, ptrdiff_t s1, ptrdiff_t s2, const realnum *chi2, const realnum *chi3, realnum *fw, - direction dsigw, const double *sigw, const double *kapw); + direction dsigw, const realnum *sigw, const realnum *kapw); void step_beta_stride1(realnum *f, component c, const realnum *g, const grid_volume &gv, - double betadt, direction dsig, const double *siginv, realnum *fu, - direction dsigu, const double *siginvu, const realnum *cndinv, + realnum betadt, direction dsig, const realnum *siginv, realnum *fu, + direction dsigu, const realnum *siginvu, const realnum *cndinv, realnum *fcnd); /* macro wrappers around time-stepping functions: for performance reasons, diff --git a/src/meepgeom.cpp b/src/meepgeom.cpp index a8d93f182..db120aeee 100644 --- a/src/meepgeom.cpp +++ b/src/meepgeom.cpp @@ -307,18 +307,17 @@ bool is_metal(meep::field_type ft, const material_type *material) { } } -meep::realnum material_grid_val(vector3 p, material_data *md) { +double material_grid_val(vector3 p, material_data *md) { // given the relative location, p, interpolate the material grid point. if (!is_material_grid(md)) { meep::abort("Invalid material grid detected.\n"); } - meep::realnum u; - u = meep::linear_interpolate(p.x, p.y, p.z, md->weights, md->grid_size.x, - md->grid_size.y, md->grid_size.z, 1); - return u; + return meep::linear_interpolate(p.x, p.y, p.z, md->weights, md->grid_size.x, + md->grid_size.y, md->grid_size.z, 1); + } -meep::realnum matgrid_val(vector3 p, geom_box_tree tp, int oi, material_data *md) { - meep::realnum uprod = 1.0, umin = 1.0, usum = 0.0, udefault = 0.0, u; +double matgrid_val(vector3 p, geom_box_tree tp, int oi, material_data *md) { + double uprod = 1.0, umin = 1.0, usum = 0.0, udefault = 0.0, u; int matgrid_val_count = 0; // iterate through object tree at current point @@ -350,15 +349,15 @@ meep::realnum matgrid_val(vector3 p, geom_box_tree tp, int oi, material_data *md ++matgrid_val_count; } - meep::realnum u_interp = (md->material_grid_kinds == material_data::U_MIN - ? umin - : (md->material_grid_kinds == material_data::U_PROD - ? uprod - : (md->material_grid_kinds == material_data::U_MEAN ? usum / matgrid_val_count - : udefault))); + double u_interp = (md->material_grid_kinds == material_data::U_MIN + ? umin + : (md->material_grid_kinds == material_data::U_PROD + ? uprod + : (md->material_grid_kinds == material_data::U_MEAN ? usum / matgrid_val_count + : udefault))); // project interpolated grid point - meep::realnum u_proj; + double u_proj; if (md->beta != 0) { double tanh_beta_eta = tanh(md->beta*md->eta); u_proj = (tanh_beta_eta + tanh(md->beta*(u_interp-md->eta))) / @@ -394,7 +393,7 @@ static void interp_tensors(vector3 diag_in_1, vector3 offdiag_in_1, vector3 diag offdiag_out->z = offdiag_in_1.z + u * (offdiag_in_2.z - offdiag_in_1.z); } // return material of the point p from the material grid -void epsilon_material_grid(material_data *md, meep::realnum u) { +void epsilon_material_grid(material_data *md, double u) { // NOTE: assume p lies on normalized grid within (0,1) if (!(md->weights)) meep::abort("material params were not initialized!"); @@ -701,7 +700,7 @@ void geom_epsilon::get_material_pt(material_type &material, const meep::vec &r) switch (md->which_subclass) { // material grid: interpolate onto user specified material grid to get properties at r case material_data::MATERIAL_GRID: - meep::realnum u; + double u; int oi; geom_box_tree tp; @@ -1409,7 +1408,7 @@ void geom_epsilon::sigma_row(meep::component c, double sigrow[3], const meep::ve } if (mat->which_subclass == material_data::MATERIAL_GRID) { - meep::realnum u; + double u; int oi; geom_box_tree tp; @@ -1796,7 +1795,7 @@ material_type make_file_material(const char *eps_input_file) { if (dataname) *(dataname++) = 0; meep::h5file eps_file(fname, meep::h5file::READONLY, false); int rank; // ignored since rank < 3 is equivalent to singleton dims - md->epsilon_data = eps_file.read(dataname, &rank, md->epsilon_dims, 3); + md->epsilon_data = (double *)eps_file.read(dataname, &rank, md->epsilon_dims, 3, false); if (meep::verbosity > 0) master_printf("read in %zdx%zdx%zd epsilon-input-file \"%s\"\n", md->epsilon_dims[0], md->epsilon_dims[1], md->epsilon_dims[2], eps_input_file); @@ -1820,7 +1819,7 @@ material_type make_material_grid(bool do_averaging, double beta, double eta) { void update_weights(material_type matgrid, double *weights) { int N = matgrid->grid_size.x * matgrid->grid_size.y * matgrid->grid_size.z; - memcpy(matgrid->weights, weights, N * sizeof(meep::realnum)); + memcpy(matgrid->weights, weights, N * sizeof(double)); } /******************************************************************************/ @@ -2284,7 +2283,7 @@ double vec_to_value(vector3 diag, vector3 offdiag, int idx) { return val; } -void get_material_tensor(const medium_struct *mm, meep::realnum freq, +void get_material_tensor(const medium_struct *mm, double freq, std::complex *tensor) { /* Note that the current implementation assumes that any dispersion @@ -2315,14 +2314,14 @@ void get_material_tensor(const medium_struct *mm, meep::realnum freq, } } -meep::realnum get_material_gradient( - meep::realnum u, // material parameter at current point +double get_material_gradient( + double u, // material parameter at current point std::complex fields_a, // adjoint field at current point std::complex fields_f, // forward field at current point - meep::realnum freq, // frequency + double freq, // frequency material_data *md, // material meep::component field_dir, // current field component - meep::realnum du // step size + double du // step size ) { /* Note that the current implementation assumes that no materials have off-diag components and that if a material @@ -2381,11 +2380,11 @@ static int mirrorindex(int i, int n) { return i >= n ? 2 * n - 1 - i : (i < 0 ? linear_interpolate is changed!! ...also multiply by scaleby etc. for different gradient types */ -void add_interpolate_weights(meep::realnum rx, meep::realnum ry, meep::realnum rz, - meep::realnum *data, int nx, int ny, int nz, int stride, - double scaleby, const meep::realnum *udata, int ukind, double uval) { +void add_interpolate_weights(double rx, double ry, double rz, + double *data, int nx, int ny, int nz, int stride, + double scaleby, const double *udata, int ukind, double uval) { int x, y, z, x2, y2, z2; - meep::realnum dx, dy, dz, u; + double dx, dy, dz, u; /* mirror boundary conditions for r just beyond the boundary */ rx = rx < 0.0 ? -rx : (rx > 1.0 ? 1.0 - rx : rx); @@ -2440,14 +2439,14 @@ in row-major order (the order used by HDF5): */ #undef U } -void material_grids_addgradient_point(meep::realnum *v, std::complex fields_a, +void material_grids_addgradient_point(double *v, std::complex fields_a, std::complex fields_f, meep::component field_dir, - vector3 p, meep::realnum scalegrad, meep::realnum freq, + vector3 p, double scalegrad, double freq, geom_box_tree geometry_tree) { geom_box_tree tp; int oi, ois; material_data *mg, *mg_sum; - meep::realnum uval; + double uval; int kind; tp = geom_tree_search(p, geometry_tree, &oi); @@ -2487,7 +2486,7 @@ void material_grids_addgradient_point(meep::realnum *v, std::complex fie fashion. Since we aren't checking if each design grid is unique, however, it's up to the user to only have one unique design grid in this volume.*/ vector3 sz = mg->grid_size; - meep::realnum *vcur = v, *ucur; + double *vcur = v, *ucur; ucur = mg->weights; uval = matgrid_val(p, tp, oi, mg); do { @@ -2504,7 +2503,7 @@ void material_grids_addgradient_point(meep::realnum *v, std::complex fie if (!tp && is_material_grid(&default_material)) { vector3 pb = to_geom_box_coords(p, &tp->objects[oi]); vector3 sz = mg->grid_size; - meep::realnum *vcur = v, *ucur; + double *vcur = v, *ucur; ucur = mg->weights; uval = matgrid_val(p, tp, oi, mg); add_interpolate_weights(pb.x, pb.y, pb.z, vcur, sz.x, sz.y, sz.z, 1, @@ -2515,12 +2514,12 @@ void material_grids_addgradient_point(meep::realnum *v, std::complex fie } } -void material_grids_addgradient(meep::realnum *v, size_t ng, std::complex *fields_a, - std::complex *fields_f, meep::realnum *frequencies, - size_t nf, meep::realnum scalegrad, const meep::volume &where, +void material_grids_addgradient(double *v, size_t ng, std::complex *fields_a, + std::complex *fields_f, double *frequencies, + size_t nf, double scalegrad, const meep::volume &where, geom_box_tree geometry_tree, meep::fields *f) { int n2, n3, n4; - meep::realnum s[3][3], cen[3][3], c1, c2, c3, s1, s2, s3; + double s[3][3], cen[3][3], c1, c2, c3, s1, s2, s3; vector3 p; // calculate cell dimensions @@ -2534,9 +2533,9 @@ void material_grids_addgradient(meep::realnum *v, size_t ng, std::complex cdouble; -#endif - // constants from meep-ctl-const.hpp #define CYLINDRICAL -2 @@ -176,7 +172,7 @@ material_type make_material_grid(bool do_averaging, double beta, double eta); vector3 vec_to_vector3(const meep::vec &pt); meep::vec vector3_to_vec(const vector3 v3); -void epsilon_material_grid(material_data *md, meep::realnum u); +void epsilon_material_grid(material_data *md, double u); void epsilon_file_material(material_data *md, vector3 p); bool susceptibility_equal(const susceptibility &s1, const susceptibility &s2); bool susceptibility_list_equal(const susceptibility_list &s1, const susceptibility_list &s2); @@ -213,24 +209,24 @@ void init_libctl(material_type default_mat, bool ensure_per, // material grid functions /***************************************************************/ void update_weights(material_type matgrid, double *weights); -meep::realnum matgrid_val(vector3 p, geom_box_tree tp, int oi, material_data *md); -meep::realnum material_grid_val(vector3 p, material_data *md); +double matgrid_val(vector3 p, geom_box_tree tp, int oi, material_data *md); +double material_grid_val(vector3 p, material_data *md); geom_box_tree calculate_tree(const meep::volume &v, geometric_object_list g); -void get_material_tensor(const medium_struct *mm, meep::realnum freq, std::complex *tensor); -meep::realnum get_material_gradient(meep::realnum u, std::complex fields_a, - std::complex fields_f, meep::realnum freq, - material_data *md, meep::component field_dir, - meep::realnum du = 1.0e-3); -void add_interpolate_weights(meep::realnum rx, meep::realnum ry, meep::realnum rz, - meep::realnum *data, int nx, int ny, int nz, int stride, - double scaleby, const meep::realnum *udata, int ukind, double uval); -void material_grids_addgradient_point(meep::realnum *v, std::complex fields_a, +void get_material_tensor(const medium_struct *mm, double freq, std::complex *tensor); +double get_material_gradient(double u, std::complex fields_a, + std::complex fields_f, double freq, + material_data *md, meep::component field_dir, + double du = 1.0e-3); +void add_interpolate_weights(double rx, double ry, double rz, + double *data, int nx, int ny, int nz, int stride, + double scaleby, const double *udata, int ukind, double uval); +void material_grids_addgradient_point(double *v, std::complex fields_a, std::complex fields_f, meep::component field_dir, - vector3 p, meep::realnum scalegrad, meep::realnum freq, + vector3 p, double scalegrad, double freq, geom_box_tree geometry_tree); -void material_grids_addgradient(meep::realnum *v, size_t ng, std::complex *fields_a, - std::complex *fields_f, meep::realnum *frequencies, - size_t nf, meep::realnum scalegrad, const meep::volume &where, +void material_grids_addgradient(double *v, size_t ng, std::complex *fields_a, + std::complex *fields_f, double *frequencies, + size_t nf, double scalegrad, const meep::volume &where, geom_box_tree geometry_tree, meep::fields *f); /***************************************************************/ diff --git a/src/mpb.cpp b/src/mpb.cpp index 7f58ba56a..c41e3c2a4 100644 --- a/src/mpb.cpp +++ b/src/mpb.cpp @@ -32,8 +32,6 @@ using namespace std; -typedef complex cdouble; - namespace meep { #ifdef HAVE_MPB @@ -722,7 +720,7 @@ void *fields::get_eigenmode(double frequency, direction d, const volume where, c // so we need to divide the E-field amplitudes by -frequency; we also take this // opportunity to rescale the overall E and H amplitudes to yield unit power flux. double scale = -1.0 / frequency, factor = 2.0 / sqrt(fabs(vgrp)); - cdouble *efield = (cdouble *)fft_data_E, *hfield = (cdouble *)(mdata->fft_data); + complex *efield = (complex *)fft_data_E, *hfield = (complex *)(mdata->fft_data); for (int n = 0; n < NFFT; ++n) { efield[n] *= factor * scale; hfield[n] *= factor; @@ -938,15 +936,15 @@ void fields::get_eigenmode_coefficients(dft_flux flux, const volume &eig_vol, in /*--------------------------------------------------------------*/ /*--------------------------------------------------------------*/ /*--------------------------------------------------------------*/ - cdouble mode_flux[2], mode_mode[2]; + complex mode_flux[2], mode_mode[2]; get_mode_flux_overlap(mode_data, flux, nf, mode_flux); get_mode_mode_overlap(mode_data, mode_data, flux, mode_mode); - cdouble cplus = 0.5 * (mode_flux[0] + mode_flux[1]); - cdouble cminus = 0.5 * (mode_flux[0] - mode_flux[1]); + complex cplus = 0.5 * (mode_flux[0] + mode_flux[1]); + complex cminus = 0.5 * (mode_flux[0] - mode_flux[1]); /* MPB modes are normalized to unit power above, but we need to re-normalize here to have unit power as integrated on Meep's Yee grid and not on MPB's grid. Thus, normfac differs from a constant factor only because of discretization effects. */ - cdouble normfac = 0.5 * (mode_mode[0] + mode_mode[1]); + complex normfac = 0.5 * (mode_mode[0] + mode_mode[1]); if (normfac == 0.0) normfac = 1.0; double csc = sqrt((flux.use_symmetry ? S.multiplicity() : 1.0) / abs(normfac)); if (cscale) diff --git a/src/multilevel-atom.cpp b/src/multilevel-atom.cpp index 8f6608f2d..02b49ac1a 100644 --- a/src/multilevel-atom.cpp +++ b/src/multilevel-atom.cpp @@ -141,7 +141,7 @@ void *multilevel_susceptibility::new_internal_data(realnum *W[NUM_FIELD_COMPONEN return (void *)d; } -void multilevel_susceptibility::init_internal_data(realnum *W[NUM_FIELD_COMPONENTS][2], double dt, +void multilevel_susceptibility::init_internal_data(realnum *W[NUM_FIELD_COMPONENTS][2], realnum dt, const grid_volume &gv, void *data) const { multilevel_data *d = (multilevel_data *)data; size_t sz_data = d->sz_data; @@ -237,10 +237,10 @@ realnum *multilevel_susceptibility::cinternal_notowned_ptr(int inotowned, compon } void multilevel_susceptibility::update_P(realnum *W[NUM_FIELD_COMPONENTS][2], - realnum *W_prev[NUM_FIELD_COMPONENTS][2], double dt, + realnum *W_prev[NUM_FIELD_COMPONENTS][2], realnum dt, const grid_volume &gv, void *P_internal_data) const { multilevel_data *d = (multilevel_data *)P_internal_data; - double dt2 = 0.5 * dt; + realnum dt2 = 0.5 * dt; // field directions and offsets for E * dP dot product. component cdot[3] = {Dielectric, Dielectric, Dielectric}; @@ -269,7 +269,7 @@ void multilevel_susceptibility::update_P(realnum *W[NUM_FIELD_COMPONENTS][2], } // compute E*8 at point i - double E8[3][2] = {{0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}}; + realnum E8[3][2] = {{0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}}; for (idot = 0; idot < 3 && cdot[idot] != Dielectric; ++idot) { realnum *w = W[cdot[idot]][0], *wp = W_prev[cdot[idot]][0]; E8[idot][0] = w[i] + w[i + o1[idot]] + w[i + o2[idot]] + w[i + o1[idot] + o2[idot]] + wp[i] + @@ -287,9 +287,9 @@ void multilevel_susceptibility::update_P(realnum *W[NUM_FIELD_COMPONENTS][2], // Ntmp = Ntmp + alpha * E * dP for (int t = 0; t < T; ++t) { // compute 32 * E * dP and 64 * E * P at point i - double EdP32 = 0; - double EPave64 = 0; - double gperpdt = gamma[t] * pi * dt; + realnum EdP32 = 0; + realnum EPave64 = 0; + realnum gperpdt = gamma[t] * pi * dt; for (idot = 0; idot < 3 && cdot[idot] != Dielectric; ++idot) { realnum *p = d->P[cdot[idot]][0][t], *pp = d->P_prev[cdot[idot]][0][t]; realnum dP = p[i] + p[i + o1[idot]] + p[i + o2[idot]] + p[i + o1[idot] + o2[idot]] - @@ -325,10 +325,10 @@ void multilevel_susceptibility::update_P(realnum *W[NUM_FIELD_COMPONENTS][2], // each P is updated as a damped harmonic oscillator for (int t = 0; t < T; ++t) { - const double omega2pi = 2 * pi * omega[t], g2pi = gamma[t] * 2 * pi, gperp = gamma[t] * pi; - const double omega0dtsqrCorrected = omega2pi * omega2pi * dt * dt + gperp * gperp * dt * dt; - const double gamma1inv = 1 / (1 + g2pi * dt2), gamma1 = (1 - g2pi * dt2); - const double dtsqr = dt * dt; + const realnum omega2pi = 2 * pi * omega[t], g2pi = gamma[t] * 2 * pi, gperp = gamma[t] * pi; + const realnum omega0dtsqrCorrected = omega2pi * omega2pi * dt * dt + gperp * gperp * dt * dt; + const realnum gamma1inv = 1 / (1 + g2pi * dt2), gamma1 = (1 - g2pi * dt2); + const realnum dtsqr = dt * dt; // note that gamma[t]*2*pi = 2*gamma_perp as one would usually write it in SALT. -- AWC // figure out which levels this transition couples @@ -342,7 +342,7 @@ void multilevel_susceptibility::update_P(realnum *W[NUM_FIELD_COMPONENTS][2], FOR_COMPONENTS(c) DOCMP2 { if (d->P[c][cmp]) { const realnum *w = W[c][cmp], *s = sigma[c][component_direction(c)]; - const double st = sigmat[5 * t + component_direction(c)]; + const realnum st = sigmat[5 * t + component_direction(c)]; if (w && s) { realnum *p = d->P[c][cmp][t], *pp = d->P_prev[c][cmp][t]; @@ -369,8 +369,8 @@ void multilevel_susceptibility::update_P(realnum *W[NUM_FIELD_COMPONENTS][2], realnum pcur = p[i]; const realnum *Ni = N + i * L; // dNi is population inversion for this transition - double dNi = 0.25 * (Ni[lp] + Ni[lp + o1] + Ni[lp + o2] + Ni[lp + o1 + o2] - Ni[lm] - - Ni[lm + o1] - Ni[lm + o2] - Ni[lm + o1 + o2]); + realnum dNi = 0.25 * (Ni[lp] + Ni[lp + o1] + Ni[lp + o2] + Ni[lp + o1 + o2] - Ni[lm] - + Ni[lm + o1] - Ni[lm + o2] - Ni[lm + o1 + o2]); p[i] = gamma1inv * (pcur * (2 - omega0dtsqrCorrected) - gamma1 * pp[i] - dtsqr * (st * s[i] * w[i]) * dNi); pp[i] = pcur; diff --git a/src/mympi.cpp b/src/mympi.cpp index a09492df0..845b325e7 100644 --- a/src/mympi.cpp +++ b/src/mympi.cpp @@ -152,8 +152,7 @@ void send(int from, int to, double *data, int size) { #endif } -#if MEEP_SINGLE -void broadcast(int from, realnum *data, int size) { +void broadcast(int from, float *data, int size) { #ifdef HAVE_MPI if (size == 0) return; MPI_Bcast(data, size, MPI_FLOAT, from, mycomm); @@ -163,7 +162,6 @@ void broadcast(int from, realnum *data, int size) { UNUSED(size); #endif } -#endif void broadcast(int from, double *data, int size) { #ifdef HAVE_MPI diff --git a/src/near2far.cpp b/src/near2far.cpp index faeece4a9..6cb069804 100644 --- a/src/near2far.cpp +++ b/src/near2far.cpp @@ -122,7 +122,7 @@ void dft_near2far::scale_dfts(complex scale) { } typedef void (*greenfunc)(std::complex *EH, const vec &x, double freq, double eps, - double mu, const vec &x0, component c0, std::complex); + double mu, const vec &x0, component c0, std::complex f0); /* Given the field f0 correponding to current-source component c0 at x0, compute the E/H fields EH[6] (6 components) at x for a frequency @@ -151,7 +151,8 @@ void green3d(std::complex *EH, const vec &x, double freq, double eps, do vec p = zero_vec(rhat.dim); p.set_direction(component_direction(c0), 1); double pdotrhat = p & rhat; - vec rhatcrossp = vec(rhat.y() * p.z() - rhat.z() * p.y(), rhat.z() * p.x() - rhat.x() * p.z(), + vec rhatcrossp = vec(rhat.y() * p.z() - rhat.z() * p.y(), + rhat.z() * p.x() - rhat.x() * p.z(), rhat.x() * p.y() - rhat.y() * p.x()); /* compute the various scalar quantities in the point source formulae */ @@ -397,8 +398,8 @@ std::complex *dft_near2far::farfield(const vec &x) { return EH; } -realnum *dft_near2far::get_farfields_array(const volume &where, int &rank, size_t *dims, size_t &N, - double resolution) { +double *dft_near2far::get_farfields_array(const volume &where, int &rank, size_t *dims, size_t &N, + double resolution) { /* compute output grid size etc. */ double dx[3] = {0, 0, 0}; direction dirs[3] = {X, Y, Z}; @@ -421,8 +422,8 @@ realnum *dft_near2far::get_farfields_array(const volume &where, int &rank, size_ if (N * Nfreq < 1) return NULL; /* nothing to output */ /* 6 x 2 x N x Nfreq array of fields in row-major order */ - realnum *EH = new realnum[6 * 2 * N * Nfreq]; - realnum *EH_ = new realnum[6 * 2 * N * Nfreq]; // temp array for sum_to_all + double *EH = new double[6 * 2 * N * Nfreq]; + double *EH_ = new double[6 * 2 * N * Nfreq]; // temp array for sum_to_all /* fields for farfield_lowlevel for a single output point x */ std::complex *EH1 = new std::complex[6 * Nfreq]; @@ -477,7 +478,7 @@ void dft_near2far::save_farfields(const char *fname, const char *prefix, const v int rank = 0; size_t N = 1; - realnum *EH = get_farfields_array(where, rank, dims, N, resolution); + double *EH = get_farfields_array(where, rank, dims, N, resolution); if (!EH) return; /* nothing to output */ const size_t Nfreq = freq.size(); @@ -511,12 +512,12 @@ double *dft_near2far::flux(direction df, const volume &where, double resolution) int rank = 0; size_t N = 1; - realnum *EH = get_farfields_array(where, rank, dims, N, resolution); + double *EH = get_farfields_array(where, rank, dims, N, resolution); const size_t Nfreq = freq.size(); double *F = new double[Nfreq]; - std::complex ff_EH[6]; - std::complex cE[2], cH[2]; + std::complex ff_EH[6]; + std::complex cE[2], cH[2]; for (size_t i = 0; i < Nfreq; ++i) F[i] = 0; @@ -524,8 +525,8 @@ double *dft_near2far::flux(direction df, const volume &where, double resolution) for (size_t idx = 0; idx < N; ++idx) { for (size_t i = 0; i < Nfreq; ++i) { for (int k = 0; k < 6; ++k) - ff_EH[k] = std::complex(*(EH + ((k * 2 + 0) * N + idx) * Nfreq + i), - *(EH + ((k * 2 + 1) * N + idx) * Nfreq + i)); + ff_EH[k] = std::complex(*(EH + ((k * 2 + 0) * N + idx) * Nfreq + i), + *(EH + ((k * 2 + 1) * N + idx) * Nfreq + i)); switch (df) { case X: cE[0] = ff_EH[1], cE[1] = ff_EH[2], cH[0] = ff_EH[5], cH[1] = ff_EH[4]; break; case Y: cE[0] = ff_EH[2], cE[1] = ff_EH[0], cH[0] = ff_EH[3], cH[1] = ff_EH[5]; break; diff --git a/src/sources.cpp b/src/sources.cpp index 229018a9b..b1f8afeb8 100644 --- a/src/sources.cpp +++ b/src/sources.cpp @@ -325,8 +325,8 @@ void fields::add_srcdata(struct sourcedata cur_data, src_time *src, size_t n, st // after all add_srcdata calls are complete. } -static realnum *amp_func_data_re = NULL; -static realnum *amp_func_data_im = NULL; +static double *amp_func_data_re = NULL; +static double *amp_func_data_im = NULL; static const volume *amp_func_vol = NULL; static size_t amp_file_dims[3]; @@ -398,13 +398,13 @@ void fields::add_volume_source(component c, const src_time &src, const volume &w std::string dataset_im = std::string(dataset) + ".im"; size_t re_dims[] = {1, 1, 1}; - double *real_data = eps_file.read(dataset_re.c_str(), &rank, re_dims, 3); + realnum *real_data = (realnum *)eps_file.read(dataset_re.c_str(), &rank, re_dims, 3, sizeof(realnum) == sizeof(float)); if (verbosity > 0) master_printf("read in %zdx%zdx%zd amplitude function file \"%s:%s\"\n", re_dims[0], re_dims[1], re_dims[2], filename, dataset_re.c_str()); size_t im_dims[] = {1, 1, 1}; - double *imag_data = eps_file.read(dataset_im.c_str(), &rank, im_dims, 3); + realnum *imag_data = (realnum *)eps_file.read(dataset_im.c_str(), &rank, im_dims, 3, sizeof(realnum) == sizeof(float)); if (verbosity > 0) master_printf("read in %zdx%zdx%zd amplitude function file \"%s:%s\"\n", im_dims[0], im_dims[1], im_dims[2], filename, dataset_im.c_str()); diff --git a/src/step_db.cpp b/src/step_db.cpp index ead13a7ea..b9f4250b5 100644 --- a/src/step_db.cpp +++ b/src/step_db.cpp @@ -92,12 +92,12 @@ bool fields_chunk::step_db(field_type ft) { the derivative and integral are replaced by differences and sums, but you get the idea). */ if (!f_rderiv_int) f_rderiv_int = new realnum[gv.ntot()]; - double ir0 = gv.origin_r() * gv.a + 0.5 * gv.iyee_shift(c_p).in_direction(R); + realnum ir0 = gv.origin_r() * gv.a + 0.5 * gv.iyee_shift(c_p).in_direction(R); for (int iz = 0; iz <= gv.nz(); ++iz) f_rderiv_int[iz] = 0; int sr = gv.nz() + 1; for (int ir = 1; ir <= gv.nr(); ++ir) { - double rinv = 1.0 / ((ir + ir0) - 0.5); + realnum rinv = 1.0 / ((ir + ir0) - 0.5); for (int iz = 0; iz <= gv.nz(); ++iz) { ptrdiff_t idx = ir * sr + iz; f_rderiv_int[idx] = @@ -140,7 +140,7 @@ bool fields_chunk::step_db(field_type ft) { const direction dsig = s->sigsize[dsig0] > 1 ? dsig0 : NO_DIRECTION; const direction dsigu0 = cycle_direction(gv.dim, d_c, 2); const direction dsigu = s->sigsize[dsigu0] > 1 ? dsigu0 : NO_DIRECTION; - const double betadt = 2 * pi * beta * dt * (d_c == X ? +1 : -1) * + const realnum betadt = 2 * pi * beta * dt * (d_c == X ? +1 : -1) * (f[c_g][1 - cmp] ? (ft == D_stuff ? -1 : +1) * (2 * cmp - 1) : 1); STEP_BETA(the_f, cc, g, gv, betadt, dsig, s->siginv[dsig], f_u[cc][cmp], dsigu, s->siginv[dsigu], s->condinv[cc][d_c], f_cond[cc][cmp]); @@ -157,14 +157,14 @@ bool fields_chunk::step_db(field_type ft) { realnum *fcnd = f_cond[cc][cmp]; realnum *fu = f_u[cc][cmp]; const direction dsig = cycle_direction(gv.dim, d_c, 1); - const double *siginv = s->sigsize[dsig] > 1 ? s->siginv[dsig] : 0; + const realnum *siginv = s->sigsize[dsig] > 1 ? s->siginv[dsig] : 0; const int dk = gv.iyee_shift(cc).in_direction(dsig); const direction dsigu = cycle_direction(gv.dim, d_c, 2); - const double *siginvu = s->sigsize[dsigu] > 1 ? s->siginv[dsigu] : 0; + const realnum *siginvu = s->sigsize[dsigu] > 1 ? s->siginv[dsigu] : 0; const int dku = gv.iyee_shift(cc).in_direction(dsigu); - const double the_m = + const realnum the_m = m * (1 - 2 * cmp) * (1 - 2 * (ft == B_stuff)) * (1 - 2 * (d_c == R)) * Courant; - const double ir0 = gv.origin_r() * gv.a + 0.5 * gv.iyee_shift(cc).in_direction(R); + const realnum ir0 = gv.origin_r() * gv.a + 0.5 * gv.iyee_shift(cc).in_direction(R); int sr = gv.nz() + 1; // 8 special cases of the same loop (sigh): @@ -173,12 +173,12 @@ bool fields_chunk::step_db(field_type ft) { if (cndinv) // PML + fu + conductivity //////////////////// MOST GENERAL CASE ////////////////////// for (int ir = ir0 == 0; ir <= gv.nr(); ++ir) { - double rinv = the_m / (ir + ir0); + realnum rinv = the_m / (ir + ir0); for (int iz = 0; iz <= gv.nz(); ++iz) { ptrdiff_t idx = ir * sr + iz; int k = dk + 2 * (dsig == Z ? iz : ir); int ku = dku + 2 * (dsigu == Z ? iz : ir); - double df, dfcnd = rinv * g[idx] * cndinv[idx]; + realnum df, dfcnd = rinv * g[idx] * cndinv[idx]; fcnd[idx] += dfcnd; fu[idx] += (df = dfcnd * siginv[k]); the_f[idx] += siginvu[ku] * df; @@ -187,12 +187,12 @@ bool fields_chunk::step_db(field_type ft) { ///////////////////////////////////////////////////////////// else // PML + fu - conductivity for (int ir = ir0 == 0; ir <= gv.nr(); ++ir) { - double rinv = the_m / (ir + ir0); + realnum rinv = the_m / (ir + ir0); for (int iz = 0; iz <= gv.nz(); ++iz) { ptrdiff_t idx = ir * sr + iz; int k = dk + 2 * (dsig == Z ? iz : ir); int ku = dku + 2 * (dsigu == Z ? iz : ir); - double df, dfcnd = rinv * g[idx]; + realnum df, dfcnd = rinv * g[idx]; fu[idx] += (df = dfcnd * siginv[k]); the_f[idx] += siginvu[ku] * df; } @@ -201,22 +201,22 @@ bool fields_chunk::step_db(field_type ft) { else { // PML - fu if (cndinv) // PML - fu + conductivity for (int ir = ir0 == 0; ir <= gv.nr(); ++ir) { - double rinv = the_m / (ir + ir0); + realnum rinv = the_m / (ir + ir0); for (int iz = 0; iz <= gv.nz(); ++iz) { ptrdiff_t idx = ir * sr + iz; int k = dk + 2 * (dsig == Z ? iz : ir); - double dfcnd = rinv * g[idx] * cndinv[idx]; + realnum dfcnd = rinv * g[idx] * cndinv[idx]; fcnd[idx] += dfcnd; the_f[idx] += dfcnd * siginv[k]; } } else // PML - fu - conductivity for (int ir = ir0 == 0; ir <= gv.nr(); ++ir) { - double rinv = the_m / (ir + ir0); + realnum rinv = the_m / (ir + ir0); for (int iz = 0; iz <= gv.nz(); ++iz) { ptrdiff_t idx = ir * sr + iz; int k = dk + 2 * (dsig == Z ? iz : ir); - double dfcnd = rinv * g[idx]; + realnum dfcnd = rinv * g[idx]; the_f[idx] += dfcnd * siginv[k]; } } @@ -226,22 +226,22 @@ bool fields_chunk::step_db(field_type ft) { if (siginvu) { // no PML + fu if (cndinv) // no PML + fu + conductivity for (int ir = ir0 == 0; ir <= gv.nr(); ++ir) { - double rinv = the_m / (ir + ir0); + realnum rinv = the_m / (ir + ir0); for (int iz = 0; iz <= gv.nz(); ++iz) { ptrdiff_t idx = ir * sr + iz; int ku = dku + 2 * (dsigu == Z ? iz : ir); - double df = rinv * g[idx] * cndinv[idx]; + realnum df = rinv * g[idx] * cndinv[idx]; fu[idx] += df; the_f[idx] += siginvu[ku] * df; } } else // no PML + fu - conductivity for (int ir = ir0 == 0; ir <= gv.nr(); ++ir) { - double rinv = the_m / (ir + ir0); + realnum rinv = the_m / (ir + ir0); for (int iz = 0; iz <= gv.nz(); ++iz) { ptrdiff_t idx = ir * sr + iz; int ku = dku + 2 * (dsigu == Z ? iz : ir); - double df = rinv * g[idx]; + realnum df = rinv * g[idx]; fu[idx] += df; the_f[idx] += siginvu[ku] * df; } @@ -250,7 +250,7 @@ bool fields_chunk::step_db(field_type ft) { else { // no PML - fu if (cndinv) // no PML - fu + conductivity for (int ir = ir0 == 0; ir <= gv.nr(); ++ir) { - double rinv = the_m / (ir + ir0); + realnum rinv = the_m / (ir + ir0); for (int iz = 0; iz <= gv.nz(); ++iz) { ptrdiff_t idx = ir * sr + iz; the_f[idx] += rinv * g[idx] * cndinv[idx]; @@ -258,7 +258,7 @@ bool fields_chunk::step_db(field_type ft) { } else // no PML - fu - conductivity for (int ir = ir0 == 0; ir <= gv.nr(); ++ir) { - double rinv = the_m / (ir + ir0); + realnum rinv = the_m / (ir + ir0); for (int iz = 0; iz <= gv.nz(); ++iz) { ptrdiff_t idx = ir * sr + iz; the_f[idx] += rinv * g[idx]; @@ -280,16 +280,16 @@ bool fields_chunk::step_db(field_type ft) { const realnum *cndinv = s->condinv[Dz][Z]; realnum *fcnd = f_cond[Dz][cmp]; const direction dsig = cycle_direction(gv.dim, Z, 1); - const double *siginv = s->sigsize[dsig] > 1 ? s->siginv[dsig] : 0; + const realnum *siginv = s->sigsize[dsig] > 1 ? s->siginv[dsig] : 0; const int dk = gv.iyee_shift(Dz).in_direction(dsig); const direction dsigu = cycle_direction(gv.dim, Z, 2); - const double *siginvu = s->sigsize[dsigu] > 1 ? s->siginv[dsigu] : 0; + const realnum *siginvu = s->sigsize[dsigu] > 1 ? s->siginv[dsigu] : 0; const int dku = gv.iyee_shift(Dz).in_direction(dsigu); realnum *fu = siginvu && f_u[Dz][cmp] ? f[Dz][cmp] : 0; realnum *the_f = fu ? f_u[Dz][cmp] : f[Dz][cmp]; for (int iz = 0; iz < nz; ++iz) { // Note: old code (prior to Meep 0.2) was missing factor of 4?? - double df, dfcnd = g[iz] * (Courant * 4) * (cndinv ? cndinv[iz] : 1); + realnum df, dfcnd = g[iz] * (Courant * 4) * (cndinv ? cndinv[iz] : 1); if (fcnd) fcnd[iz] += dfcnd; the_f[iz] += (df = dfcnd * (siginv ? siginv[dk + 2 * (dsig == Z) * iz] : 1)); if (fu) fu[iz] += siginvu[dku + 2 * (dsigu == Z) * iz] * df; @@ -314,19 +314,19 @@ bool fields_chunk::step_db(field_type ft) { const realnum *cndinv = s->condinv[cc][d_c]; realnum *fcnd = f_cond[cc][cmp]; const direction dsig = cycle_direction(gv.dim, d_c, 1); - const double *siginv = s->sigsize[dsig] > 1 ? s->siginv[dsig] : 0; + const realnum *siginv = s->sigsize[dsig] > 1 ? s->siginv[dsig] : 0; const int dk = gv.iyee_shift(cc).in_direction(dsig); const direction dsigu = cycle_direction(gv.dim, d_c, 2); - const double *siginvu = s->sigsize[dsigu] > 1 ? s->siginv[dsigu] : 0; + const realnum *siginvu = s->sigsize[dsigu] > 1 ? s->siginv[dsigu] : 0; const int dku = gv.iyee_shift(cc).in_direction(dsigu); realnum *fu = siginvu && f_u[cc][cmp] ? f[cc][cmp] : 0; realnum *the_f = fu ? f_u[cc][cmp] : f[cc][cmp]; int sd = ft == D_stuff ? +1 : -1; - double f_m_mult = ft == D_stuff ? 2 : (1 - 2 * cmp); + realnum f_m_mult = ft == D_stuff ? 2 : (1 - 2 * cmp); for (int iz = (ft == D_stuff); iz < nz + (ft == D_stuff); ++iz) { - double df; - double dfcnd = (sd * Courant) * (f_p[iz] - f_p[iz - sd] - f_m_mult * f_m[iz]) * + realnum df; + realnum dfcnd = (sd * Courant) * (f_p[iz] - f_p[iz - sd] - f_m_mult * f_m[iz]) * (cndinv ? cndinv[iz] : 1); if (fcnd) fcnd[iz] += dfcnd; the_f[iz] += (df = dfcnd * (siginv ? siginv[dk + 2 * (dsig == Z) * iz] : 1)); diff --git a/src/step_generic.cpp b/src/step_generic.cpp index 63a7d081e..4b103d6c4 100644 --- a/src/step_generic.cpp +++ b/src/step_generic.cpp @@ -2,7 +2,6 @@ #include "meep_internals.hpp" #include "config.h" -#define DPR double *restrict #define RPR realnum *restrict /* These macros get into the guts of the LOOP_OVER_VOL loops to @@ -64,11 +63,11 @@ namespace meep { df/dt = dfu/dt - sigma_u * f and fu replaces f in the equations above (fu += dt curl g etcetera). */ -void step_curl(RPR f, component c, const RPR g1, const RPR g2, ptrdiff_t s1, - ptrdiff_t s2, // strides for g1/g2 shift - const grid_volume &gv, double dtdx, direction dsig, const DPR sig, const DPR kap, - const DPR siginv, RPR fu, direction dsigu, const DPR sigu, const DPR kapu, - const DPR siginvu, double dt, const RPR cnd, const RPR cndinv, RPR fcnd) { +void step_curl(RPR f, component c, const RPR g1, const RPR g2, + ptrdiff_t s1, ptrdiff_t s2, // strides for g1/g2 shift + const grid_volume &gv, realnum dtdx, direction dsig, const RPR sig, const RPR kap, + const RPR siginv, RPR fu, direction dsigu, const RPR sigu, const RPR kapu, + const RPR siginvu, realnum dt, const RPR cnd, const RPR cndinv, RPR fcnd) { if (!g1) { // swap g1 and g2 SWAP(const RPR, g1, g2); SWAP(ptrdiff_t, s1, s2); @@ -85,7 +84,7 @@ void step_curl(RPR f, component c, const RPR g1, const RPR g2, ptrdiff_t s1, if (dsig == NO_DIRECTION) { // no PML in f update if (dsigu == NO_DIRECTION) { // no fu update if (cnd) { - double dt2 = dt * 0.5; + realnum dt2 = dt * 0.5; if (g2) { LOOP_OVER_VOL_OWNED0(gv, c, i) { f[i] = ((1 - dt2 * cnd[i]) * f[i] - dtdx * (g1[i + s1] - g1[i] + g2[i] - g2[i + s2])) * @@ -112,11 +111,11 @@ void step_curl(RPR f, component c, const RPR g1, const RPR g2, ptrdiff_t s1, else { // fu update, no PML in f update KSTRIDE_DEF(dsigu, ku, gv.little_owned_corner0(c)); if (cnd) { - double dt2 = dt * 0.5; + realnum dt2 = dt * 0.5; if (g2) { LOOP_OVER_VOL_OWNED0(gv, c, i) { DEF_ku; - double fprev = fu[i]; + realnum fprev = fu[i]; fu[i] = ((1 - dt2 * cnd[i]) * fprev - dtdx * (g1[i + s1] - g1[i] + g2[i] - g2[i + s2])) * cndinv[i]; @@ -126,7 +125,7 @@ void step_curl(RPR f, component c, const RPR g1, const RPR g2, ptrdiff_t s1, else { LOOP_OVER_VOL_OWNED0(gv, c, i) { DEF_ku; - double fprev = fu[i]; + realnum fprev = fu[i]; fu[i] = ((1 - dt2 * cnd[i]) * fprev - dtdx * (g1[i + s1] - g1[i])) * cndinv[i]; f[i] = siginvu[ku] * ((kapu[ku] - sigu[ku]) * f[i] + fu[i] - fprev); } @@ -136,7 +135,7 @@ void step_curl(RPR f, component c, const RPR g1, const RPR g2, ptrdiff_t s1, if (g2) { LOOP_OVER_VOL_OWNED0(gv, c, i) { DEF_ku; - double fprev = fu[i]; + realnum fprev = fu[i]; fu[i] -= dtdx * (g1[i + s1] - g1[i] + g2[i] - g2[i + s2]); f[i] = siginvu[ku] * ((kapu[ku] - sigu[ku]) * f[i] + fu[i] - fprev); } @@ -144,7 +143,7 @@ void step_curl(RPR f, component c, const RPR g1, const RPR g2, ptrdiff_t s1, else { LOOP_OVER_VOL_OWNED0(gv, c, i) { DEF_ku; - double fprev = fu[i]; + realnum fprev = fu[i]; fu[i] -= dtdx * (g1[i + s1] - g1[i]); f[i] = siginvu[ku] * ((kapu[ku] - sigu[ku]) * f[i] + fu[i] - fprev); } @@ -156,7 +155,7 @@ void step_curl(RPR f, component c, const RPR g1, const RPR g2, ptrdiff_t s1, KSTRIDE_DEF(dsig, k, gv.little_owned_corner0(c)); if (dsigu == NO_DIRECTION) { // no fu update if (cnd) { - double dt2 = dt * 0.5; + realnum dt2 = dt * 0.5; if (g2) { LOOP_OVER_VOL_OWNED0(gv, c, i) { DEF_k; @@ -195,13 +194,13 @@ void step_curl(RPR f, component c, const RPR g1, const RPR g2, ptrdiff_t s1, else { // fu update + PML in f update KSTRIDE_DEF(dsigu, ku, gv.little_owned_corner0(c)); if (cnd) { - double dt2 = dt * 0.5; + realnum dt2 = dt * 0.5; if (g2) { //////////////////// MOST GENERAL CASE ////////////////////// LOOP_OVER_VOL_OWNED0(gv, c, i) { DEF_k; DEF_ku; - double fprev = fu[i]; + realnum fprev = fu[i]; realnum fcnd_prev = fcnd[i]; fcnd[i] = ((1 - dt2 * cnd[i]) * fcnd[i] - dtdx * (g1[i + s1] - g1[i] + g2[i] - g2[i + s2])) * @@ -215,7 +214,7 @@ void step_curl(RPR f, component c, const RPR g1, const RPR g2, ptrdiff_t s1, LOOP_OVER_VOL_OWNED0(gv, c, i) { DEF_k; DEF_ku; - double fprev = fu[i]; + realnum fprev = fu[i]; realnum fcnd_prev = fcnd[i]; fcnd[i] = ((1 - dt2 * cnd[i]) * fcnd[i] - dtdx * (g1[i + s1] - g1[i])) * cndinv[i]; fu[i] = ((kap[k] - sig[k]) * fu[i] + (fcnd[i] - fcnd_prev)) * siginv[k]; @@ -228,7 +227,7 @@ void step_curl(RPR f, component c, const RPR g1, const RPR g2, ptrdiff_t s1, LOOP_OVER_VOL_OWNED0(gv, c, i) { DEF_k; DEF_ku; - double fprev = fu[i]; + realnum fprev = fu[i]; fu[i] = ((kap[k] - sig[k]) * fu[i] - dtdx * (g1[i + s1] - g1[i] + g2[i] - g2[i + s2])) * siginv[k]; f[i] = siginvu[ku] * ((kapu[ku] - sigu[ku]) * f[i] + fu[i] - fprev); @@ -238,7 +237,7 @@ void step_curl(RPR f, component c, const RPR g1, const RPR g2, ptrdiff_t s1, LOOP_OVER_VOL_OWNED0(gv, c, i) { DEF_k; DEF_ku; - double fprev = fu[i]; + realnum fprev = fu[i]; fu[i] = ((kap[k] - sig[k]) * fu[i] - dtdx * (g1[i + s1] - g1[i])) * siginv[k]; f[i] = siginvu[ku] * ((kapu[ku] - sigu[ku]) * f[i] + fu[i] - fprev); } @@ -252,8 +251,8 @@ void step_curl(RPR f, component c, const RPR g1, const RPR g2, ptrdiff_t s1, and/or PML). This is used in 2d calculations to add an exp(i beta z) time dependence, which gives an additional i \beta \hat{z} \times cross-product in the curl equations. */ -void step_beta(RPR f, component c, const RPR g, const grid_volume &gv, double betadt, - direction dsig, const DPR siginv, RPR fu, direction dsigu, const DPR siginvu, +void step_beta(RPR f, component c, const RPR g, const grid_volume &gv, realnum betadt, + direction dsig, const RPR siginv, RPR fu, direction dsigu, const RPR siginvu, const RPR cndinv, RPR fcnd) { if (!g) return; if (dsig != NO_DIRECTION) { // PML in f update @@ -265,8 +264,8 @@ void step_beta(RPR f, component c, const RPR g, const grid_volume &gv, double be LOOP_OVER_VOL_OWNED0(gv, c, i) { DEF_k; DEF_ku; - double df; - double dfcnd = betadt * g[i] * cndinv[i]; + realnum df; + realnum dfcnd = betadt * g[i] * cndinv[i]; fcnd[i] += dfcnd; fu[i] += (df = dfcnd * siginv[k]); f[i] += siginvu[ku] * df; @@ -277,7 +276,7 @@ void step_beta(RPR f, component c, const RPR g, const grid_volume &gv, double be LOOP_OVER_VOL_OWNED0(gv, c, i) { DEF_k; DEF_ku; - double df; + realnum df; fu[i] += (df = betadt * g[i] * siginv[k]); f[i] += siginvu[ku] * df; } @@ -287,7 +286,7 @@ void step_beta(RPR f, component c, const RPR g, const grid_volume &gv, double be if (cndinv) { // conductivity + PML LOOP_OVER_VOL_OWNED0(gv, c, i) { DEF_k; - double dfcnd = betadt * g[i] * cndinv[i]; + realnum dfcnd = betadt * g[i] * cndinv[i]; fcnd[i] += dfcnd; f[i] += dfcnd * siginv[k]; } @@ -306,7 +305,7 @@ void step_beta(RPR f, component c, const RPR g, const grid_volume &gv, double be if (cndinv) { // conductivity, no PML LOOP_OVER_VOL_OWNED0(gv, c, i) { DEF_ku; - double df; + realnum df; fu[i] += (df = betadt * g[i] * cndinv[i]); f[i] += siginvu[ku] * df; } @@ -314,7 +313,7 @@ void step_beta(RPR f, component c, const RPR g, const grid_volume &gv, double be else { // no conductivity or PML LOOP_OVER_VOL_OWNED0(gv, c, i) { DEF_ku; - double df; + realnum df; fu[i] += (df = betadt * g[i]); f[i] += siginvu[ku] * df; } @@ -338,10 +337,10 @@ void step_beta(RPR f, component c, const RPR g, const grid_volume &gv, double be index change is large, of course, but in that case the chi2/chi3 power-series expansion isn't accurate anyway, so the cubic isn't physical there either. */ -inline double calc_nonlinear_u(const double Dsqr, const double Di, const double chi1inv, - const double chi2, const double chi3) { - double c2 = Di * chi2 * (chi1inv * chi1inv); - double c3 = Dsqr * chi3 * (chi1inv * chi1inv * chi1inv); +inline realnum calc_nonlinear_u(const realnum Dsqr, const realnum Di, const realnum chi1inv, + const realnum chi2, const realnum chi3) { + realnum c2 = Di * chi2 * (chi1inv * chi1inv); + realnum c3 = Dsqr * chi3 * (chi1inv * chi1inv * chi1inv); return (1 + c2 + 2 * c3) / (1 + 2 * c2 + 3 * c3); } @@ -365,7 +364,7 @@ inline double calc_nonlinear_u(const double Dsqr, const double Di, const double void step_update_EDHB(RPR f, component fc, const grid_volume &gv, const RPR g, const RPR g1, const RPR g2, const RPR u, const RPR u1, const RPR u2, ptrdiff_t s, ptrdiff_t s1, ptrdiff_t s2, const RPR chi2, const RPR chi3, RPR fw, - direction dsigw, const DPR sigw, const DPR kapw) { + direction dsigw, const RPR sigw, const RPR kapw) { if (!f) return; if ((!g1 && g2) || (g1 && g2 && !u1 && u2)) { /* swap g1 and g2 */ @@ -374,7 +373,7 @@ void step_update_EDHB(RPR f, component fc, const grid_volume &gv, const RPR g, c SWAP(ptrdiff_t, s1, s2); } - // stable averaging of offdiagonal components +// stable averaging of offdiagonal components #define OFFDIAG(u, g, sx) \ (0.25 * ((g[i] + g[i - sx]) * u[i] + (g[i + s] + g[(i + s) - sx]) * u[i + s])) @@ -387,12 +386,12 @@ void step_update_EDHB(RPR f, component fc, const grid_volume &gv, const RPR g, c if (chi3) { //////////////////// MOST GENERAL CASE ////////////////////// LOOP_OVER_VOL_OWNED(gv, fc, i) { - double g1s = g1[i] + g1[i + s] + g1[i - s1] + g1[i + (s - s1)]; - double g2s = g2[i] + g2[i + s] + g2[i - s2] + g2[i + (s - s2)]; - double gs = g[i]; - double us = u[i]; + realnum g1s = g1[i] + g1[i + s] + g1[i - s1] + g1[i + (s - s1)]; + realnum g2s = g2[i] + g2[i + s] + g2[i - s2] + g2[i + (s - s2)]; + realnum gs = g[i]; + realnum us = u[i]; DEF_kw; - double fwprev = fw[i], kapwkw = kapw[kw], sigwkw = sigw[kw]; + realnum fwprev = fw[i], kapwkw = kapw[kw], sigwkw = sigw[kw]; fw[i] = (gs * us + OFFDIAG(u1, g1, s1) + OFFDIAG(u2, g2, s2)) * calc_nonlinear_u(gs * gs + 0.0625 * (g1s * g1s + g2s * g2s), gs, us, chi2[i], chi3[i]); @@ -402,10 +401,10 @@ void step_update_EDHB(RPR f, component fc, const grid_volume &gv, const RPR g, c } else { LOOP_OVER_VOL_OWNED(gv, fc, i) { - double gs = g[i]; - double us = u[i]; + realnum gs = g[i]; + realnum us = u[i]; DEF_kw; - double fwprev = fw[i], kapwkw = kapw[kw], sigwkw = sigw[kw]; + realnum fwprev = fw[i], kapwkw = kapw[kw], sigwkw = sigw[kw]; fw[i] = (gs * us + OFFDIAG(u1, g1, s1) + OFFDIAG(u2, g2, s2)); f[i] += (kapwkw + sigwkw) * fw[i] - (kapwkw - sigwkw) * fwprev; } @@ -414,11 +413,11 @@ void step_update_EDHB(RPR f, component fc, const grid_volume &gv, const RPR g, c else if (u1) { // 2x2 off-diagonal u if (chi3) { LOOP_OVER_VOL_OWNED(gv, fc, i) { - double g1s = g1[i] + g1[i + s] + g1[i - s1] + g1[i + (s - s1)]; - double gs = g[i]; - double us = u[i]; + realnum g1s = g1[i] + g1[i + s] + g1[i - s1] + g1[i + (s - s1)]; + realnum gs = g[i]; + realnum us = u[i]; DEF_kw; - double fwprev = fw[i], kapwkw = kapw[kw], sigwkw = sigw[kw]; + realnum fwprev = fw[i], kapwkw = kapw[kw], sigwkw = sigw[kw]; fw[i] = (gs * us + OFFDIAG(u1, g1, s1)) * calc_nonlinear_u(gs * gs + 0.0625 * (g1s * g1s), gs, us, chi2[i], chi3[i]); f[i] += (kapwkw + sigwkw) * fw[i] - (kapwkw - sigwkw) * fwprev; @@ -426,10 +425,10 @@ void step_update_EDHB(RPR f, component fc, const grid_volume &gv, const RPR g, c } else { LOOP_OVER_VOL_OWNED(gv, fc, i) { - double gs = g[i]; - double us = u[i]; + realnum gs = g[i]; + realnum us = u[i]; DEF_kw; - double fwprev = fw[i], kapwkw = kapw[kw], sigwkw = sigw[kw]; + realnum fwprev = fw[i], kapwkw = kapw[kw], sigwkw = sigw[kw]; fw[i] = (gs * us + OFFDIAG(u1, g1, s1)); f[i] += (kapwkw + sigwkw) * fw[i] - (kapwkw - sigwkw) * fwprev; } @@ -442,12 +441,12 @@ void step_update_EDHB(RPR f, component fc, const grid_volume &gv, const RPR g, c if (chi3) { if (g1 && g2) { LOOP_OVER_VOL_OWNED(gv, fc, i) { - double g1s = g1[i] + g1[i + s] + g1[i - s1] + g1[i + (s - s1)]; - double g2s = g2[i] + g2[i + s] + g2[i - s2] + g2[i + (s - s2)]; - double gs = g[i]; - double us = u[i]; + realnum g1s = g1[i] + g1[i + s] + g1[i - s1] + g1[i + (s - s1)]; + realnum g2s = g2[i] + g2[i + s] + g2[i - s2] + g2[i + (s - s2)]; + realnum gs = g[i]; + realnum us = u[i]; DEF_kw; - double fwprev = fw[i], kapwkw = kapw[kw], sigwkw = sigw[kw]; + realnum fwprev = fw[i], kapwkw = kapw[kw], sigwkw = sigw[kw]; fw[i] = (gs * us) * calc_nonlinear_u(gs * gs + 0.0625 * (g1s * g1s + g2s * g2s), gs, us, chi2[i], chi3[i]); f[i] += (kapwkw + sigwkw) * fw[i] - (kapwkw - sigwkw) * fwprev; @@ -455,11 +454,11 @@ void step_update_EDHB(RPR f, component fc, const grid_volume &gv, const RPR g, c } else if (g1) { LOOP_OVER_VOL_OWNED(gv, fc, i) { - double g1s = g1[i] + g1[i + s] + g1[i - s1] + g1[i + (s - s1)]; - double gs = g[i]; - double us = u[i]; + realnum g1s = g1[i] + g1[i + s] + g1[i - s1] + g1[i + (s - s1)]; + realnum gs = g[i]; + realnum us = u[i]; DEF_kw; - double fwprev = fw[i], kapwkw = kapw[kw], sigwkw = sigw[kw]; + realnum fwprev = fw[i], kapwkw = kapw[kw], sigwkw = sigw[kw]; fw[i] = (gs * us) * calc_nonlinear_u(gs * gs + 0.0625 * (g1s * g1s), gs, us, chi2[i], chi3[i]); f[i] += (kapwkw + sigwkw) * fw[i] - (kapwkw - sigwkw) * fwprev; @@ -470,10 +469,10 @@ void step_update_EDHB(RPR f, component fc, const grid_volume &gv, const RPR g, c } else { LOOP_OVER_VOL_OWNED(gv, fc, i) { - double gs = g[i]; - double us = u[i]; + realnum gs = g[i]; + realnum us = u[i]; DEF_kw; - double fwprev = fw[i], kapwkw = kapw[kw], sigwkw = sigw[kw]; + realnum fwprev = fw[i], kapwkw = kapw[kw], sigwkw = sigw[kw]; fw[i] = (gs * us) * calc_nonlinear_u(gs * gs, gs, us, chi2[i], chi3[i]); f[i] += (kapwkw + sigwkw) * fw[i] - (kapwkw - sigwkw) * fwprev; } @@ -481,10 +480,10 @@ void step_update_EDHB(RPR f, component fc, const grid_volume &gv, const RPR g, c } else if (u) { LOOP_OVER_VOL_OWNED(gv, fc, i) { - double gs = g[i]; - double us = u[i]; + realnum gs = g[i]; + realnum us = u[i]; DEF_kw; - double fwprev = fw[i], kapwkw = kapw[kw], sigwkw = sigw[kw]; + realnum fwprev = fw[i], kapwkw = kapw[kw], sigwkw = sigw[kw]; fw[i] = (gs * us); f[i] += (kapwkw + sigwkw) * fw[i] - (kapwkw - sigwkw) * fwprev; } @@ -492,7 +491,7 @@ void step_update_EDHB(RPR f, component fc, const grid_volume &gv, const RPR g, c else { LOOP_OVER_VOL_OWNED(gv, fc, i) { DEF_kw; - double fwprev = fw[i], kapwkw = kapw[kw], sigwkw = sigw[kw]; + realnum fwprev = fw[i], kapwkw = kapw[kw], sigwkw = sigw[kw]; fw[i] = g[i]; f[i] += (kapwkw + sigwkw) * fw[i] - (kapwkw - sigwkw) * fwprev; } @@ -503,10 +502,10 @@ void step_update_EDHB(RPR f, component fc, const grid_volume &gv, const RPR g, c if (u1 && u2) { // 3x3 off-diagonal u if (chi3) { LOOP_OVER_VOL_OWNED(gv, fc, i) { - double g1s = g1[i] + g1[i + s] + g1[i - s1] + g1[i + (s - s1)]; - double g2s = g2[i] + g2[i + s] + g2[i - s2] + g2[i + (s - s2)]; - double gs = g[i]; - double us = u[i]; + realnum g1s = g1[i] + g1[i + s] + g1[i - s1] + g1[i + (s - s1)]; + realnum g2s = g2[i] + g2[i + s] + g2[i - s2] + g2[i + (s - s2)]; + realnum gs = g[i]; + realnum us = u[i]; f[i] = (gs * us + OFFDIAG(u1, g1, s1) + OFFDIAG(u2, g2, s2)) * calc_nonlinear_u(gs * gs + 0.0625 * (g1s * g1s + g2s * g2s), gs, us, chi2[i], chi3[i]); @@ -514,8 +513,8 @@ void step_update_EDHB(RPR f, component fc, const grid_volume &gv, const RPR g, c } else { LOOP_OVER_VOL_OWNED(gv, fc, i) { - double gs = g[i]; - double us = u[i]; + realnum gs = g[i]; + realnum us = u[i]; f[i] = (gs * us + OFFDIAG(u1, g1, s1) + OFFDIAG(u2, g2, s2)); } } @@ -523,17 +522,17 @@ void step_update_EDHB(RPR f, component fc, const grid_volume &gv, const RPR g, c else if (u1) { // 2x2 off-diagonal u if (chi3) { LOOP_OVER_VOL_OWNED(gv, fc, i) { - double g1s = g1[i] + g1[i + s] + g1[i - s1] + g1[i + (s - s1)]; - double gs = g[i]; - double us = u[i]; + realnum g1s = g1[i] + g1[i + s] + g1[i - s1] + g1[i + (s - s1)]; + realnum gs = g[i]; + realnum us = u[i]; f[i] = (gs * us + OFFDIAG(u1, g1, s1)) * calc_nonlinear_u(gs * gs + 0.0625 * (g1s * g1s), gs, us, chi2[i], chi3[i]); } } else { LOOP_OVER_VOL_OWNED(gv, fc, i) { - double gs = g[i]; - double us = u[i]; + realnum gs = g[i]; + realnum us = u[i]; f[i] = (gs * us + OFFDIAG(u1, g1, s1)); } } @@ -545,19 +544,19 @@ void step_update_EDHB(RPR f, component fc, const grid_volume &gv, const RPR g, c if (chi3) { if (g1 && g2) { LOOP_OVER_VOL_OWNED(gv, fc, i) { - double g1s = g1[i] + g1[i + s] + g1[i - s1] + g1[i + (s - s1)]; - double g2s = g2[i] + g2[i + s] + g2[i - s2] + g2[i + (s - s2)]; - double gs = g[i]; - double us = u[i]; + realnum g1s = g1[i] + g1[i + s] + g1[i - s1] + g1[i + (s - s1)]; + realnum g2s = g2[i] + g2[i + s] + g2[i - s2] + g2[i + (s - s2)]; + realnum gs = g[i]; + realnum us = u[i]; f[i] = (gs * us) * calc_nonlinear_u(gs * gs + 0.0625 * (g1s * g1s + g2s * g2s), gs, us, chi2[i], chi3[i]); } } else if (g1) { LOOP_OVER_VOL_OWNED(gv, fc, i) { - double g1s = g1[i] + g1[i + s] + g1[i - s1] + g1[i + (s - s1)]; - double gs = g[i]; - double us = u[i]; + realnum g1s = g1[i] + g1[i + s] + g1[i - s1] + g1[i + (s - s1)]; + realnum gs = g[i]; + realnum us = u[i]; f[i] = (gs * us) * calc_nonlinear_u(gs * gs + 0.0625 * (g1s * g1s), gs, us, chi2[i], chi3[i]); } @@ -567,16 +566,16 @@ void step_update_EDHB(RPR f, component fc, const grid_volume &gv, const RPR g, c } else { LOOP_OVER_VOL_OWNED(gv, fc, i) { - double gs = g[i]; - double us = u[i]; + realnum gs = g[i]; + realnum us = u[i]; f[i] = (gs * us) * calc_nonlinear_u(gs * gs, gs, us, chi2[i], chi3[i]); } } } else if (u) { LOOP_OVER_VOL_OWNED(gv, fc, i) { - double gs = g[i]; - double us = u[i]; + realnum gs = g[i]; + realnum us = u[i]; f[i] = (gs * us); } } diff --git a/src/stress.cpp b/src/stress.cpp index d1832d318..99d2ca06a 100644 --- a/src/stress.cpp +++ b/src/stress.cpp @@ -90,7 +90,7 @@ void dft_force::operator-=(const dft_force &st) { static void stress_sum(size_t Nfreq, double *F, const dft_chunk *F1, const dft_chunk *F2) { for (const dft_chunk *curF1 = F1, *curF2 = F2; curF1 && curF2; curF1 = curF1->next_in_dft, curF2 = curF2->next_in_dft) { - complex extra_weight(real(curF1->extra_weight), imag(curF1->extra_weight)); + complex extra_weight(real(curF1->extra_weight), imag(curF1->extra_weight)); for (size_t k = 0; k < curF1->N; ++k) for (size_t i = 0; i < Nfreq; ++i) F[i] += real(extra_weight * curF1->dft[k * Nfreq + i] * conj(curF2->dft[k * Nfreq + i])); diff --git a/src/structure.cpp b/src/structure.cpp index 23d68c5b1..dfbbb16e3 100644 --- a/src/structure.cpp +++ b/src/structure.cpp @@ -685,9 +685,9 @@ void structure_chunk::use_pml(direction d, double dx, double bloc, double Rasymp if (!sig[dd]) { int spml = (dd == d) ? (2 * gv.num_direction(d) + 2) : 1; sigsize[dd] = spml; - sig[dd] = new double[spml]; - kap[dd] = new double[spml]; - siginv[dd] = new double[spml]; + sig[dd] = new realnum[spml]; + kap[dd] = new realnum[spml]; + siginv[dd] = new realnum[spml]; for (int i = 0; i < spml; ++i) { sig[dd][i] = 0.0; kap[dd][i] = 1.0; @@ -806,9 +806,9 @@ structure_chunk::structure_chunk(const structure_chunk *o) : v(o->v) { // Copy over the PML conductivity arrays: if (is_mine()) FOR_DIRECTIONS(d) { if (o->sig[d]) { - sig[d] = new double[2 * gv.num_direction(d) + 1]; - kap[d] = new double[2 * gv.num_direction(d) + 1]; - siginv[d] = new double[2 * gv.num_direction(d) + 1]; + sig[d] = new realnum[2 * gv.num_direction(d) + 1]; + kap[d] = new realnum[2 * gv.num_direction(d) + 1]; + siginv[d] = new realnum[2 * gv.num_direction(d) + 1]; sigsize[d] = o->sigsize[d]; for (int i = 0; i < 2 * gv.num_direction(d) + 1; i++) { sig[d][i] = o->sig[d][i]; diff --git a/src/structure_dump.cpp b/src/structure_dump.cpp index 40d959545..f5002a5e2 100644 --- a/src/structure_dump.cpp +++ b/src/structure_dump.cpp @@ -44,7 +44,8 @@ void structure::write_susceptibility_params(h5file *file, const char *dname, int // Write params size_t params_start = 0; - file->create_data(dname, 1, ¶ms_ntotal); + file->create_data(dname, 1, ¶ms_ntotal, false /* append_data */, + sizeof(realnum) == sizeof(float) /* single_precision */); if (am_master()) { susceptibility *sus = chunks[0]->chiP[EorH]; while (sus) { @@ -57,10 +58,10 @@ void structure::write_susceptibility_params(h5file *file, const char *dname, int void structure::dump_chunk_layout(const char *filename) { // Write grid_volume info for each chunk so we can reconstruct chunk division from split_by_cost size_t sz = num_chunks * 3; - realnum *origins = new realnum[sz]; + double *origins = new double[sz]; size_t *nums = new size_t[sz]; memset(nums, 0, sizeof(size_t) * sz); - memset(origins, 0, sizeof(realnum) * sz); + memset(origins, 0, sizeof(double) * sz); for (int i = 0; i < num_chunks; ++i) { int idx = i * 3; LOOP_OVER_DIRECTIONS(gv.dim, d) { @@ -69,7 +70,7 @@ void structure::dump_chunk_layout(const char *filename) { } } h5file file(filename, h5file::WRITE, true); - file.create_data("gv_origins", 1, &sz); + file.create_data("gv_origins", 1, &sz, false /* append_data */, false /* single_precision */); if (am_master()) { size_t gv_origins_start = 0; file.write_chunk(1, &gv_origins_start, &sz, origins); @@ -115,7 +116,7 @@ void structure::dump(const char *filename) { delete[] num_chi1inv; // write the data - file.create_data("chi1inv", 1, &ntotal); + file.create_data("chi1inv", 1, &ntotal, false /* append_data */, false /* single_precision */); for (int i = 0; i < num_chunks; i++) if (chunks[i]->is_mine()) { size_t ntot = chunks[i]->gv.ntot(); @@ -312,7 +313,6 @@ susceptibility *make_sus_list_from_params(h5file *file, int rank, size_t dims[3] realnum num_params; file->read_chunk(rank, &start, num_params_dims, &num_params); start += num_params_dims[0]; - if (num_params == 4) { // This is a lorentzian_susceptibility and the next 4 values in the dataset // are id, omega_0, gamma, and no_omega_0_denominator. @@ -467,8 +467,8 @@ void structure::load_chunk_layout(const char *filename, boundary_region &br) { // Load chunk grid_volumes from a file h5file file(filename, h5file::READONLY, true); size_t sz = num_chunks * 3; - realnum *origins = new realnum[sz]; - memset(origins, 0, sizeof(realnum) * sz); + double *origins = new double[sz]; + memset(origins, 0, sizeof(double) * sz); size_t *nums = new size_t[sz]; memset(nums, 0, sizeof(size_t) * sz); diff --git a/src/susceptibility.cpp b/src/susceptibility.cpp index 941cc36e8..112ac1712 100644 --- a/src/susceptibility.cpp +++ b/src/susceptibility.cpp @@ -55,10 +55,10 @@ susceptibility *susceptibility::clone() const { } // generic base class definition. -std::complex susceptibility::chi1(double freq, double sigma) { +std::complex susceptibility::chi1(realnum freq, realnum sigma) { (void)freq; (void)sigma; - return std::complex(0, 0); + return std::complex(0, 0); } void susceptibility::delete_internal_data(void *data) const { free(data); } @@ -118,7 +118,7 @@ void *lorentzian_susceptibility::new_internal_data(realnum *W[NUM_FIELD_COMPONEN return (void *)d; } -void lorentzian_susceptibility::init_internal_data(realnum *W[NUM_FIELD_COMPONENTS][2], double dt, +void lorentzian_susceptibility::init_internal_data(realnum *W[NUM_FIELD_COMPONENTS][2], realnum dt, const grid_volume &gv, void *data) const { (void)dt; // unused lorentzian_data *d = (lorentzian_data *)data; @@ -166,10 +166,10 @@ void *lorentzian_susceptibility::copy_internal_data(void *data) const { algebra from this to get the condition for a root with |z| > 1. FIXME: this test seems to be too conservative (issue #12) */ -static bool lorentzian_unstable(double omega_0, double gamma, double dt) { - double w = 2 * pi * omega_0, g = 2 * pi * gamma; - double g2 = g * dt / 2, w2 = (w * dt) * (w * dt); - double b = (1 - w2 / 2) / (1 + g2), c = (1 - g2) / (1 + g2); +static bool lorentzian_unstable(realnum omega_0, realnum gamma, realnum dt) { + realnum w = 2 * pi * omega_0, g = 2 * pi * gamma; + realnum g2 = g * dt / 2, w2 = (w * dt) * (w * dt); + realnum b = (1 - w2 / 2) / (1 + g2), c = (1 - g2) / (1 + g2); return b * b > c && 2 * b * b - c + 2 * fabs(b) * sqrt(b * b - c) > 1; } #endif @@ -186,13 +186,13 @@ static bool lorentzian_unstable(double omega_0, double gamma, double dt) { (0.25 * ((g[i] + g[i - sx]) * u[i] + (g[i + s] + g[(i + s) - sx]) * u[i + s])) void lorentzian_susceptibility::update_P(realnum *W[NUM_FIELD_COMPONENTS][2], - realnum *W_prev[NUM_FIELD_COMPONENTS][2], double dt, + realnum *W_prev[NUM_FIELD_COMPONENTS][2], realnum dt, const grid_volume &gv, void *P_internal_data) const { lorentzian_data *d = (lorentzian_data *)P_internal_data; - const double omega2pi = 2 * pi * omega_0, g2pi = gamma * 2 * pi; - const double omega0dtsqr = omega2pi * omega2pi * dt * dt; - const double gamma1inv = 1 / (1 + g2pi * dt / 2), gamma1 = (1 - g2pi * dt / 2); - const double omega0dtsqr_denom = no_omega_0_denominator ? 0 : omega0dtsqr; + const realnum omega2pi = 2 * pi * omega_0, g2pi = gamma * 2 * pi; + const realnum omega0dtsqr = omega2pi * omega2pi * dt * dt; + const realnum gamma1inv = 1 / (1 + g2pi * dt / 2), gamma1 = (1 - g2pi * dt / 2); + const realnum omega0dtsqr_denom = no_omega_0_denominator ? 0 : omega0dtsqr; (void)W_prev; // unused; // TODO: add back lorentzian_unstable(omega_0, gamma, dt) if we can improve the stability test @@ -294,35 +294,35 @@ realnum *lorentzian_susceptibility::cinternal_notowned_ptr(int inotowned, compon return d->P[c][cmp] + n; } -std::complex lorentzian_susceptibility::chi1(double freq, double sigma) { +std::complex lorentzian_susceptibility::chi1(realnum freq, realnum sigma) { if (no_omega_0_denominator) { // Drude model - return sigma * omega_0 * omega_0 / std::complex(-freq * freq, -gamma * freq); + return sigma * omega_0 * omega_0 / std::complex(-freq * freq, -gamma * freq); } else { // Standard Lorentzian model return sigma * omega_0 * omega_0 / - std::complex(omega_0 * omega_0 - freq * freq, -gamma * freq); + std::complex(omega_0 * omega_0 - freq * freq, -gamma * freq); } } void lorentzian_susceptibility::dump_params(h5file *h5f, size_t *start) { size_t num_params = 5; size_t params_dims[1] = {num_params}; - double params_data[] = {4, (double)get_id(), omega_0, gamma, (double)no_omega_0_denominator}; + realnum params_data[] = {4, (realnum)get_id(), omega_0, gamma, (realnum)no_omega_0_denominator}; h5f->write_chunk(1, start, params_dims, params_data); *start += num_params; } void noisy_lorentzian_susceptibility::update_P(realnum *W[NUM_FIELD_COMPONENTS][2], - realnum *W_prev[NUM_FIELD_COMPONENTS][2], double dt, + realnum *W_prev[NUM_FIELD_COMPONENTS][2], realnum dt, const grid_volume &gv, void *P_internal_data) const { lorentzian_susceptibility::update_P(W, W_prev, dt, gv, P_internal_data); lorentzian_data *d = (lorentzian_data *)P_internal_data; - const double g2pi = gamma * 2 * pi; - const double w2pi = omega_0 * 2 * pi; - const double amp = w2pi * noise_amp * sqrt(g2pi) * dt * dt / (1 + g2pi * dt / 2); + const realnum g2pi = gamma * 2 * pi; + const realnum w2pi = omega_0 * 2 * pi; + const realnum amp = w2pi * noise_amp * sqrt(g2pi) * dt * dt / (1 + g2pi * dt / 2); /* for uniform random numbers in [-amp,amp] below, multiply amp by sqrt(3) */ FOR_COMPONENTS(c) DOCMP2 { @@ -341,19 +341,19 @@ void noisy_lorentzian_susceptibility::update_P(realnum *W[NUM_FIELD_COMPONENTS][ void noisy_lorentzian_susceptibility::dump_params(h5file *h5f, size_t *start) { size_t num_params = 6; size_t params_dims[1] = {num_params}; - double params_data[] = { - 5, (double)get_id(), noise_amp, omega_0, gamma, (double)no_omega_0_denominator}; + realnum params_data[] = { + 5, (realnum)get_id(), noise_amp, omega_0, gamma, (realnum)no_omega_0_denominator}; h5f->write_chunk(1, start, params_dims, params_data); *start += num_params; } -gyrotropic_susceptibility::gyrotropic_susceptibility(const vec &bias, double omega_0, double gamma, - double alpha, gyrotropy_model model) +gyrotropic_susceptibility::gyrotropic_susceptibility(const vec &bias, realnum omega_0, realnum gamma, + realnum alpha, gyrotropy_model model) : omega_0(omega_0), gamma(gamma), alpha(alpha), model(model) { // Precalculate g_{ij} = sum_k epsilon_{ijk} b_k, used in update_P. // Ignore |b| for Landau-Lifshitz-Gilbert gyrotropy model. const vec b = (model == GYROTROPIC_SATURATED) ? bias / abs(bias) : bias; - memset(gyro_tensor, 0, 9 * sizeof(double)); + memset(gyro_tensor, 0, 9 * sizeof(realnum)); gyro_tensor[X][Y] = b.z(); gyro_tensor[Y][X] = -b.z(); gyro_tensor[Y][Z] = b.x(); @@ -365,7 +365,7 @@ gyrotropic_susceptibility::gyrotropic_susceptibility(const vec &bias, double ome /* To implement gyrotropic susceptibilities, we track three polarization components (e.g. Px, Py, Pz) on EACH of the Yee cell's three driving field positions (e.g., Ex, Ey, and Ez), i.e. 9 - numbers per cell. This takes 3x the memory and runtime compared to + numbers per cell. This takes 3X the memory and runtime compared to Lorentzian susceptibility. The advantage is that during update_P, we can directly access the value of P at each update point without averaging. */ @@ -391,7 +391,7 @@ void *gyrotropic_susceptibility::new_internal_data(realnum *W[NUM_FIELD_COMPONEN return (void *)d; } -void gyrotropic_susceptibility::init_internal_data(realnum *W[NUM_FIELD_COMPONENTS][2], double dt, +void gyrotropic_susceptibility::init_internal_data(realnum *W[NUM_FIELD_COMPONENTS][2], realnum dt, const grid_volume &gv, void *data) const { (void)dt; // unused gyrotropy_data *d = (gyrotropy_data *)data; @@ -442,33 +442,33 @@ bool gyrotropic_susceptibility::needs_P(component c, int cmp, #define OFFDIAGW(g, sx, s) (0.25 * (g[i] + g[i - sx] + g[i + s] + g[i + s - sx])) void gyrotropic_susceptibility::update_P(realnum *W[NUM_FIELD_COMPONENTS][2], - realnum *W_prev[NUM_FIELD_COMPONENTS][2], double dt, + realnum *W_prev[NUM_FIELD_COMPONENTS][2], realnum dt, const grid_volume &gv, void *P_internal_data) const { gyrotropy_data *d = (gyrotropy_data *)P_internal_data; - const double omega2pidt = 2 * pi * omega_0 * dt; - const double g2pidt = 2 * pi * gamma * dt; + const realnum omega2pidt = 2 * pi * omega_0 * dt; + const realnum g2pidt = 2 * pi * gamma * dt; (void)W_prev; // unused; switch (model) { case GYROTROPIC_LORENTZIAN: case GYROTROPIC_DRUDE: { - const double omega0dtsqr = omega2pidt * omega2pidt; - const double gamma1 = (1 - g2pidt / 2); - const double diag = 2 - (model == GYROTROPIC_DRUDE ? 0 : omega0dtsqr); - const double pt = pi * dt; + const realnum omega0dtsqr = omega2pidt * omega2pidt; + const realnum gamma1 = (1 - g2pidt / 2); + const realnum diag = 2 - (model == GYROTROPIC_DRUDE ? 0 : omega0dtsqr); + const realnum pt = pi * dt; // Precalculate 3x3 matrix inverse, exploiting skew symmetry - const double gd = (1 + g2pidt / 2); - const double gx = pt * gyro_tensor[Y][Z]; - const double gy = pt * gyro_tensor[Z][X]; - const double gz = pt * gyro_tensor[X][Y]; - const double invdet = 1.0 / gd / (gd * gd + gx * gx + gy * gy + gz * gz); - const double inv[3][3] = {{invdet * (gd * gd + gx * gx), invdet * (gx * gy + gd * gz), - invdet * (gx * gz - gd * gy)}, - {invdet * (gy * gx - gd * gz), invdet * (gd * gd + gy * gy), - invdet * (gy * gz + gd * gx)}, - {invdet * (gz * gx + gd * gy), invdet * (gz * gy - gd * gx), - invdet * (gd * gd + gz * gz)}}; + const realnum gd = (1 + g2pidt / 2); + const realnum gx = pt * gyro_tensor[Y][Z]; + const realnum gy = pt * gyro_tensor[Z][X]; + const realnum gz = pt * gyro_tensor[X][Y]; + const realnum invdet = 1.0 / gd / (gd * gd + gx * gx + gy * gy + gz * gz); + const realnum inv[3][3] = {{invdet * (gd * gd + gx * gx), invdet * (gx * gy + gd * gz), + invdet * (gx * gz - gd * gy)}, + {invdet * (gy * gx - gd * gz), invdet * (gd * gd + gy * gy), + invdet * (gy * gz + gd * gx)}, + {invdet * (gz * gx + gd * gy), invdet * (gz * gy - gd * gx), + invdet * (gd * gd + gz * gz)}}; FOR_COMPONENTS(c) DOCMP2 { if (d->P[c][cmp][0]) { @@ -516,20 +516,20 @@ void gyrotropic_susceptibility::update_P(realnum *W[NUM_FIELD_COMPONENTS][2], } break; case GYROTROPIC_SATURATED: { - const double dt2pi = 2 * pi * dt; + const realnum dt2pi = 2 * pi * dt; // Precalculate 3x3 matrix inverse, exploiting skew symmetry - const double gd = 0.5; - const double gx = -0.5 * alpha * gyro_tensor[Y][Z]; - const double gy = -0.5 * alpha * gyro_tensor[Z][X]; - const double gz = -0.5 * alpha * gyro_tensor[X][Y]; - const double invdet = 1.0 / gd / (gd * gd + gx * gx + gy * gy + gz * gz); - const double inv[3][3] = {{invdet * (gd * gd + gx * gx), invdet * (gx * gy + gd * gz), - invdet * (gx * gz - gd * gy)}, - {invdet * (gy * gx - gd * gz), invdet * (gd * gd + gy * gy), - invdet * (gy * gz + gd * gx)}, - {invdet * (gz * gx + gd * gy), invdet * (gz * gy - gd * gx), - invdet * (gd * gd + gz * gz)}}; + const realnum gd = 0.5; + const realnum gx = -0.5 * alpha * gyro_tensor[Y][Z]; + const realnum gy = -0.5 * alpha * gyro_tensor[Z][X]; + const realnum gz = -0.5 * alpha * gyro_tensor[X][Y]; + const realnum invdet = 1.0 / gd / (gd * gd + gx * gx + gy * gy + gz * gz); + const realnum inv[3][3] = {{invdet * (gd * gd + gx * gx), invdet * (gx * gy + gd * gz), + invdet * (gx * gz - gd * gy)}, + {invdet * (gy * gx - gd * gz), invdet * (gd * gd + gy * gy), + invdet * (gy * gz + gd * gx)}, + {invdet * (gz * gx + gd * gy), invdet * (gz * gy - gd * gx), + invdet * (gd * gd + gz * gz)}}; FOR_COMPONENTS(c) DOCMP2 { if (d->P[c][cmp][0]) { @@ -618,9 +618,9 @@ realnum *gyrotropic_susceptibility::cinternal_notowned_ptr(int inotowned, compon void gyrotropic_susceptibility::dump_params(h5file *h5f, size_t *start) { size_t num_params = 9; size_t params_dims[1] = {num_params}; - double bias[] = {gyro_tensor[Y][Z], gyro_tensor[Z][X], gyro_tensor[X][Y]}; - double params_data[] = {8, (double)get_id(), bias[X], bias[Y], bias[Z], omega_0, gamma, - alpha, (double)model}; + realnum bias[] = {gyro_tensor[Y][Z], gyro_tensor[Z][X], gyro_tensor[X][Y]}; + realnum params_data[] = {8, (realnum)get_id(), bias[X], bias[Y], bias[Z], omega_0, gamma, + alpha, (realnum)model}; h5f->write_chunk(1, start, params_dims, params_data); *start += num_params; } diff --git a/tests/array-slice-ll.cpp b/tests/array-slice-ll.cpp index 88a247ebd..a590473d5 100644 --- a/tests/array-slice-ll.cpp +++ b/tests/array-slice-ll.cpp @@ -19,8 +19,6 @@ using namespace meep; -typedef std::complex cdouble; - vector3 v3(double x, double y = 0.0, double z = 0.0) { vector3 v; v.x = x; @@ -30,7 +28,7 @@ vector3 v3(double x, double y = 0.0, double z = 0.0) { } // passthrough field function -cdouble default_field_function(const cdouble *fields, const vec &loc, void *data_) { +std::complex default_field_function(const std::complex *fields, const vec &loc, void *data_) { (void)loc; // unused (void)data_; // unused return fields[0]; @@ -40,7 +38,7 @@ cdouble default_field_function(const cdouble *fields, const vec &loc, void *data /***************************************************************/ /***************************************************************/ #define RELTOL 1.0e-6 -double Compare(double *d1, double *d2, int N, const char *Name) { +double Compare(realnum *d1, realnum *d2, int N, const char *Name) { double Norm1 = 0.0, Norm2 = 0.0, NormDelta = 0.0; for (int n = 0; n < N; n++) { Norm1 += d1[n] * d1[n]; @@ -55,7 +53,7 @@ double Compare(double *d1, double *d2, int N, const char *Name) { return RelErr; } -double Compare(cdouble *d1, cdouble *d2, int N, const char *Name) { +double Compare(std::complex *d1, std::complex *d2, int N, const char *Name) { double Norm1 = 0.0, Norm2 = 0.0, NormDelta = 0.0; for (int n = 0; n < N; n++) { Norm1 += norm(d1[n]); @@ -182,8 +180,8 @@ int main(int argc, char *argv[]) { int rank; size_t dims1D[1], dims2D[2]; direction dirs1D[1], dirs2D[2]; - cdouble *file_slice1d = 0; - double *file_slice2d = 0; + std::complex *file_slice1d = 0; + realnum *file_slice2d = 0; #define H5FILENAME DATADIR "array-slice-ll-ref" #define NX 126 @@ -201,19 +199,19 @@ int main(int argc, char *argv[]) { // read 1D and 2D array-slice data from HDF5 file // h5file *file = f.open_h5file(H5FILENAME, h5file::READONLY); - double *rdata = file->read("hz.r", &rank, dims1D, 1); + realnum *rdata = (realnum *)file->read("hz.r", &rank, dims1D, 1, sizeof(realnum) == sizeof(float)); if (rank != 1 || dims1D[0] != NX) abort("failed to read 1D data(hz.r) from file %s.h5", H5FILENAME); - double *idata = file->read("hz.i", &rank, dims1D, 1); + realnum *idata = (realnum *)file->read("hz.i", &rank, dims1D, 1, sizeof(realnum) == sizeof(float)); if (rank != 1 || dims1D[0] != NX) abort("failed to read 1D data(hz.i) from file %s.h5", H5FILENAME); - file_slice1d = new cdouble[dims1D[0]]; + file_slice1d = new std::complex[dims1D[0]]; for (size_t n = 0; n < dims1D[0]; n++) - file_slice1d[n] = cdouble(rdata[n], idata[n]); + file_slice1d[n] = std::complex(rdata[n], idata[n]); delete[] rdata; delete[] idata; - file_slice2d = file->read("sy", &rank, dims2D, 2); + file_slice2d = (realnum *)file->read("sy", &rank, dims2D, 2, sizeof(realnum) == sizeof(float)); if (rank != 2 || dims2D[0] != NX || dims2D[1] != NY) abort("failed to read 2D reference data from file %s.h5", H5FILENAME); delete file; @@ -224,14 +222,20 @@ int main(int argc, char *argv[]) { // rank = f.get_array_slice_dimensions(v1d, dims1D, dirs1D, true, false); if (rank != 1 || dims1D[0] != NX) abort("incorrect dimensions for 1D slice"); - cdouble *slice1d = f.get_complex_array_slice(v1d, Hz, 0, 0, true); - double RelErr1D = Compare(slice1d, file_slice1d, NX, "Hz_1d"); + std::complex *slice1d = f.get_complex_array_slice(v1d, Hz, 0, 0, true); + std::complex *slice1d_realnum = new std::complex[NX]; + for (int i = 0; i < NX; ++i) + slice1d_realnum[i] = std::complex(slice1d[i]); + double RelErr1D = Compare(slice1d_realnum, file_slice1d, NX, "Hz_1d"); master_printf("1D: rel error %e\n", RelErr1D); rank = f.get_array_slice_dimensions(v2d, dims2D, dirs2D, true, false); if (rank != 2 || dims2D[0] != NX || dims2D[1] != NY) abort("incorrect dimensions for 2D slice"); double *slice2d = f.get_array_slice(v2d, Sy, 0, 0, true); - double RelErr2D = Compare(slice2d, file_slice2d, NX * NY, "Sy_2d"); + realnum *slice2d_realnum = new realnum[NX*NY]; + for (int i = 0; i < NX*NY; ++i) + slice2d_realnum[i] = realnum(slice2d[i]); + double RelErr2D = Compare(slice2d_realnum, file_slice2d, NX * NY, "Sy_2d"); master_printf("2D: rel error %e\n", RelErr2D); }; // if (write_files) ... else ... diff --git a/tests/cyl-ellipsoid-ll.cpp b/tests/cyl-ellipsoid-ll.cpp index 996af3193..359499fb1 100644 --- a/tests/cyl-ellipsoid-ll.cpp +++ b/tests/cyl-ellipsoid-ll.cpp @@ -30,13 +30,13 @@ bool compare_hdf5_datasets(const char *file1, const char *name1, const char *fil h5file f1(file1, h5file::READONLY, false); int rank1; size_t *dims1 = new size_t[expected_rank]; - double *data1 = f1.read(name1, &rank1, dims1, expected_rank); + realnum *data1 = (realnum *)f1.read(name1, &rank1, dims1, expected_rank, sizeof(realnum) == sizeof(float)); if (!data1) return false; h5file f2(file2, h5file::READONLY, false); int rank2; size_t *dims2 = new size_t[expected_rank]; - double *data2 = f2.read(name2, &rank2, dims2, expected_rank); + realnum *data2 = (realnum *)f2.read(name2, &rank2, dims2, expected_rank, sizeof(realnum) == sizeof(float)); if (!data2) return false; if (rank1 != expected_rank || rank2 != expected_rank) return false; diff --git a/tests/dft-fields.cpp b/tests/dft-fields.cpp index 3d298cd47..23f6b832e 100644 --- a/tests/dft-fields.cpp +++ b/tests/dft-fields.cpp @@ -111,9 +111,9 @@ double compare_array_to_dataset(cdouble *field_array, int array_rank, size_t *ar h5file f(file, h5file::READONLY, false); char dataname[100]; snprintf(dataname, 100, "%s.r", name); - double *rdata = f.read(dataname, &file_rank, file_dims, 2); + double *rdata = (double *)f.read(dataname, &file_rank, file_dims, 2, false /* single_precision */); snprintf(dataname, 100, "%s.i", name); - double *idata = f.read(dataname, &file_rank, file_dims, 2); + double *idata = (double *)f.read(dataname, &file_rank, file_dims, 2, false /* single_precision */); if (!rdata || !idata) return -1.0; if (file_rank != array_rank) return -1.0; for (int n = 0; n < file_rank; n++) @@ -150,9 +150,9 @@ double compare_complex_hdf5_datasets(const char *file1, const char *name1, const int rank1; size_t *dims1 = new size_t[expected_rank]; snprintf(dataname, 100, "%s.r", name1); - double *rdata1 = f1.read(dataname, &rank1, dims1, expected_rank); + double *rdata1 = (double *)f1.read(dataname, &rank1, dims1, expected_rank, false /* single_precision */); snprintf(dataname, 100, "%s.i", name1); - double *idata1 = f1.read(dataname, &rank1, dims1, expected_rank); + double *idata1 = (double *)f1.read(dataname, &rank1, dims1, expected_rank, false /* single_precision */); if (!rdata1 || !idata1) return -1.0; // read dataset 2 @@ -160,9 +160,9 @@ double compare_complex_hdf5_datasets(const char *file1, const char *name1, const int rank2; size_t *dims2 = new size_t[expected_rank]; snprintf(dataname, 100, "%s.r", name2); - double *rdata2 = f2.read(dataname, &rank2, dims2, expected_rank); + double *rdata2 = (double *)f2.read(dataname, &rank2, dims2, expected_rank, false /* single_precision */); snprintf(dataname, 100, "%s.i", name2); - double *idata2 = f2.read(dataname, &rank2, dims2, expected_rank); + double *idata2 = (double *)f2.read(dataname, &rank2, dims2, expected_rank, false /* single_precision */); if (!rdata2 || !idata2) return -1.0; // check same size diff --git a/tests/h5test.cpp b/tests/h5test.cpp index 6895f413f..5ca24b02a 100644 --- a/tests/h5test.cpp +++ b/tests/h5test.cpp @@ -114,7 +114,7 @@ bool check_2d(double eps(const vec &), double a, int splitting, symfunc Sf, doub snprintf(dataname, 256, "%s%s", component_name(file_c), reim ? ".i" : (real_fields ? "" : ".r")); - realnum *h5data = file->read(dataname, &rank, dims, 2); + realnum *h5data = (realnum *)file->read(dataname, &rank, dims, 2, sizeof(realnum) == sizeof(float)); file->prevent_deadlock(); // hackery if (!h5data) abort("failed to read dataset %s:%s\n", name, dataname); if (rank != expected_rank) @@ -223,7 +223,7 @@ bool check_3d(double eps(const vec &), double a, int splitting, symfunc Sf, comp snprintf(dataname, 256, "%s%s", component_name(file_c), reim ? ".i" : (real_fields ? "" : ".r")); - realnum *h5data = file->read(dataname, &rank, dims, 3); + realnum *h5data = (realnum *)file->read(dataname, &rank, dims, 3, sizeof(realnum) == sizeof(float)); file->prevent_deadlock(); // hackery if (!h5data) abort("failed to read dataset %s:%s\n", name, dataname); if (rank != expected_rank) @@ -330,7 +330,7 @@ bool check_2d_monitor(double eps(const vec &), double a, int splitting, symfunc snprintf(dataname, 256, "%s%s", component_name(file_c), reim ? ".i" : (real_fields ? "" : ".r")); - realnum *h5data = file->read(dataname, &rank, dims, 2); + realnum *h5data = (realnum *)file->read(dataname, &rank, dims, 2, sizeof(realnum) == sizeof(float)); file->prevent_deadlock(); // hackery if (!h5data) abort("failed to read dataset %s:%s\n", file->file_name(), dataname); if (rank != 1) abort("monitor-point data is not one-dimensional"); diff --git a/tests/user-defined-material.cpp b/tests/user-defined-material.cpp index 430bfd59c..8024a316e 100644 --- a/tests/user-defined-material.cpp +++ b/tests/user-defined-material.cpp @@ -54,13 +54,13 @@ bool compare_hdf5_datasets(const char *file1, const char *name1, const char *fil h5file f1(file1, h5file::READONLY, false); int rank1; size_t *dims1 = new size_t[expected_rank]; - double *data1 = f1.read(name1, &rank1, dims1, expected_rank); + realnum *data1 = (realnum *)f1.read(name1, &rank1, dims1, expected_rank, sizeof(realnum) == sizeof(float)); if (!data1) return false; h5file f2(file2, h5file::READONLY, false); int rank2; size_t *dims2 = new size_t[expected_rank]; - double *data2 = f2.read(name2, &rank2, dims2, expected_rank); + realnum *data2 = (realnum *)f2.read(name2, &rank2, dims2, expected_rank, sizeof(realnum) == sizeof(float)); if (!data2) return false; if (rank1 != expected_rank || rank2 != expected_rank) return false;