From 871b4ca06d22d54dd7e9fe7717c37e7fe6687fd7 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Tue, 7 May 2019 13:27:56 -0400 Subject: [PATCH 1/6] try openmp for near2far loop --- src/near2far.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/near2far.cpp b/src/near2far.cpp index f1937f5b9..09b9bead3 100644 --- a/src/near2far.cpp +++ b/src/near2far.cpp @@ -250,12 +250,13 @@ void dft_near2far::farfield_lowlevel(std::complex *EH, const vec &x) { component c0 = component(f->vc); /* equivalent source component */ vec rshift(f->shift * (0.5 * f->fc->gv.inva)); - size_t idx_dft = 0; - LOOP_OVER_IVECS(f->fc->gv, f->is, f->ie, idx) { - IVEC_LOOP_LOC(f->fc->gv, x0); - x0 = f->S.transform(x0, f->sn) + rshift; - for (int i = 0; i < Nfreq; ++i) { - double freq = freq_min + i * dfreq; +#pragma omp parallel for + for (int i = 0; i < Nfreq; ++i) { + double freq = freq_min + i * dfreq; + size_t idx_dft = 0; + LOOP_OVER_IVECS(f->fc->gv, f->is, f->ie, idx) { + IVEC_LOOP_LOC(f->fc->gv, x0); + x0 = f->S.transform(x0, f->sn) + rshift; vec xs(x0); for (int i0 = -periodic_n[0]; i0 <= periodic_n[0]; ++i0) { if (periodic_d[0] != NO_DIRECTION) @@ -271,8 +272,8 @@ void dft_near2far::farfield_lowlevel(std::complex *EH, const vec &x) { EH[i * 6 + j] += EH6[j] * cphase; } } + idx_dft++; } - idx_dft++; } } } From be6c00074983d1510f75b24cf72e88610a7e487c Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Thu, 9 May 2019 14:24:38 -0400 Subject: [PATCH 2/6] add --with-openmp configure flag --- configure.ac | 6 +++ m4/ax_openmp.m4 | 123 +++++++++++++++++++++++++++++++++++++++++++++++ src/near2far.cpp | 4 +- 3 files changed, 132 insertions(+), 1 deletion(-) create mode 100644 m4/ax_openmp.m4 diff --git a/configure.ac b/configure.ac index 291e9594a..c7ab24f5b 100644 --- a/configure.ac +++ b/configure.ac @@ -59,6 +59,12 @@ if test "x$with_mpi" = "xyes"; then fi fi +AC_ARG_ENABLE(openmp, [AC_HELP_STRING([--enable-openmp],[use OpenMP directives for parallelism])], enable_openmp=$enableval, enable_openmp=no) +if test x"$enable_openmp" = "xyes"; then + AC_DEFINE(HAVE_OPENMP,1,[Define to enable OpenMP]) + AX_OPENMP([], [AC_MSG_ERROR([don't know how to enable OpenMP])]) +fi + ############################################################################## # Compiler flags diff --git a/m4/ax_openmp.m4 b/m4/ax_openmp.m4 new file mode 100644 index 000000000..866e1d664 --- /dev/null +++ b/m4/ax_openmp.m4 @@ -0,0 +1,123 @@ +# =========================================================================== +# https://www.gnu.org/software/autoconf-archive/ax_openmp.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_OPENMP([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]) +# +# DESCRIPTION +# +# This macro tries to find out how to compile programs that use OpenMP a +# standard API and set of compiler directives for parallel programming +# (see http://www-unix.mcs/) +# +# On success, it sets the OPENMP_CFLAGS/OPENMP_CXXFLAGS/OPENMP_F77FLAGS +# output variable to the flag (e.g. -omp) used both to compile *and* link +# OpenMP programs in the current language. +# +# NOTE: You are assumed to not only compile your program with these flags, +# but also link it with them as well. +# +# If you want to compile everything with OpenMP, you should set: +# +# CFLAGS="$CFLAGS $OPENMP_CFLAGS" +# #OR# CXXFLAGS="$CXXFLAGS $OPENMP_CXXFLAGS" +# #OR# FFLAGS="$FFLAGS $OPENMP_FFLAGS" +# +# (depending on the selected language). +# +# The user can override the default choice by setting the corresponding +# environment variable (e.g. OPENMP_CFLAGS). +# +# ACTION-IF-FOUND is a list of shell commands to run if an OpenMP flag is +# found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it is +# not found. If ACTION-IF-FOUND is not specified, the default action will +# define HAVE_OPENMP. +# +# LICENSE +# +# Copyright (c) 2008 Steven G. Johnson +# Copyright (c) 2015 John W. Peterson +# Copyright (c) 2016 Nick R. Papior +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see . +# +# As a special exception, the respective Autoconf Macro's copyright owner +# gives unlimited permission to copy, distribute and modify the configure +# scripts that are the output of Autoconf when processing the Macro. You +# need not follow the terms of the GNU General Public License when using +# or distributing such scripts, even though portions of the text of the +# Macro appear in them. The GNU General Public License (GPL) does govern +# all other use of the material that constitutes the Autoconf Macro. +# +# This special exception to the GPL applies to versions of the Autoconf +# Macro released by the Autoconf Archive. When you make and distribute a +# modified version of the Autoconf Macro, you may extend this special +# exception to the GPL to apply to your modified version as well. + +#serial 13 + +AC_DEFUN([AX_OPENMP], [ +AC_PREREQ([2.69]) dnl for _AC_LANG_PREFIX + +AC_CACHE_CHECK([for OpenMP flag of _AC_LANG compiler], ax_cv_[]_AC_LANG_ABBREV[]_openmp, [save[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS +ax_cv_[]_AC_LANG_ABBREV[]_openmp=unknown +# Flags to try: -fopenmp (gcc), -mp (SGI & PGI), +# -qopenmp (icc>=15), -openmp (icc), +# -xopenmp (Sun), -omp (Tru64), +# -qsmp=omp (AIX), +# none +ax_openmp_flags="-fopenmp -openmp -qopenmp -mp -xopenmp -omp -qsmp=omp none" +if test "x$OPENMP_[]_AC_LANG_PREFIX[]FLAGS" != x; then + ax_openmp_flags="$OPENMP_[]_AC_LANG_PREFIX[]FLAGS $ax_openmp_flags" +fi +for ax_openmp_flag in $ax_openmp_flags; do + case $ax_openmp_flag in + none) []_AC_LANG_PREFIX[]FLAGS=$save[]_AC_LANG_PREFIX[] ;; + *) []_AC_LANG_PREFIX[]FLAGS="$save[]_AC_LANG_PREFIX[]FLAGS $ax_openmp_flag" ;; + esac + AC_LINK_IFELSE([AC_LANG_SOURCE([[ +@%:@include + +static void +parallel_fill(int * data, int n) +{ + int i; +@%:@pragma omp parallel for + for (i = 0; i < n; ++i) + data[i] = i; +} + +int +main() +{ + int arr[100000]; + omp_set_num_threads(2); + parallel_fill(arr, 100000); + return 0; +} +]])],[ax_cv_[]_AC_LANG_ABBREV[]_openmp=$ax_openmp_flag; break],[]) +done +[]_AC_LANG_PREFIX[]FLAGS=$save[]_AC_LANG_PREFIX[]FLAGS +]) +if test "x$ax_cv_[]_AC_LANG_ABBREV[]_openmp" = "xunknown"; then + m4_default([$2],:) +else + if test "x$ax_cv_[]_AC_LANG_ABBREV[]_openmp" != "xnone"; then + OPENMP_[]_AC_LANG_PREFIX[]FLAGS=$ax_cv_[]_AC_LANG_ABBREV[]_openmp + fi + m4_default([$1], [AC_DEFINE(HAVE_OPENMP,1,[Define if OpenMP is enabled])]) +fi +])dnl AX_OPENMP diff --git a/src/near2far.cpp b/src/near2far.cpp index 09b9bead3..d2dabec15 100644 --- a/src/near2far.cpp +++ b/src/near2far.cpp @@ -250,7 +250,9 @@ void dft_near2far::farfield_lowlevel(std::complex *EH, const vec &x) { component c0 = component(f->vc); /* equivalent source component */ vec rshift(f->shift * (0.5 * f->fc->gv.inva)); -#pragma omp parallel for +#ifdef HAVE_OPENMP +# pragma omp parallel for +#endif for (int i = 0; i < Nfreq; ++i) { double freq = freq_min + i * dfreq; size_t idx_dft = 0; From 25f16991eab82f9394ac54b503ce87a52b00f14d Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Thu, 9 May 2019 14:30:17 -0400 Subject: [PATCH 3/6] use --with and not --enable --- configure.ac | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index c7ab24f5b..0c81c073e 100644 --- a/configure.ac +++ b/configure.ac @@ -59,8 +59,8 @@ if test "x$with_mpi" = "xyes"; then fi fi -AC_ARG_ENABLE(openmp, [AC_HELP_STRING([--enable-openmp],[use OpenMP directives for parallelism])], enable_openmp=$enableval, enable_openmp=no) -if test x"$enable_openmp" = "xyes"; then +AC_ARG_WITH(openmp, [AC_HELP_STRING([--with-openmp],[use OpenMP directives for parallelism])], enable_openmp=$enableval, with_openmp=no) +if test x"$with_openmp" = "xyes"; then AC_DEFINE(HAVE_OPENMP,1,[Define to enable OpenMP]) AX_OPENMP([], [AC_MSG_ERROR([don't know how to enable OpenMP])]) fi From b7ee07743d8d4c95c8464e3071d2db4dd4cee02e Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Thu, 9 May 2019 21:53:36 -0400 Subject: [PATCH 4/6] whoops need to add OPENMP_CXXFLAGS to CXXFLAGS --- configure.ac | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/configure.ac b/configure.ac index 0c81c073e..bdf1d09da 100644 --- a/configure.ac +++ b/configure.ac @@ -59,12 +59,6 @@ if test "x$with_mpi" = "xyes"; then fi fi -AC_ARG_WITH(openmp, [AC_HELP_STRING([--with-openmp],[use OpenMP directives for parallelism])], enable_openmp=$enableval, with_openmp=no) -if test x"$with_openmp" = "xyes"; then - AC_DEFINE(HAVE_OPENMP,1,[Define to enable OpenMP]) - AX_OPENMP([], [AC_MSG_ERROR([don't know how to enable OpenMP])]) -fi - ############################################################################## # Compiler flags @@ -99,6 +93,12 @@ for flg in $CXXFLAGS; do done AC_SUBST(ARCHFLAG) +AC_ARG_WITH(openmp, [AC_HELP_STRING([--with-openmp],[use OpenMP directives for parallelism])], enable_openmp=$enableval, with_openmp=no) +if test x"$with_openmp" = "xyes"; then + AC_DEFINE(HAVE_OPENMP,1,[Define to enable OpenMP]) + AX_OPENMP([CXXFLAGS="$CXXFLAGS $OPENMP_CXXFLAGS"], [AC_MSG_ERROR([don't know how to enable OpenMP])]) +fi + ############################################################################## # More checks From 3f52d58f602bd483db2ce50f3e0c3ed444215176 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Tue, 14 May 2019 15:01:04 -0400 Subject: [PATCH 5/6] docs --- doc/docs/Build_From_Source.md | 6 ++++++ doc/docs/Python_User_Interface.md | 6 +++++- doc/docs/Scheme_User_Interface.md | 4 ++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/doc/docs/Build_From_Source.md b/doc/docs/Build_From_Source.md index 875d765b0..28134ddc7 100644 --- a/doc/docs/Build_From_Source.md +++ b/doc/docs/Build_From_Source.md @@ -244,6 +244,12 @@ By default, Meep's `configure` script picks compiler flags to optimize Meep as m — By default, Meep's configure script tries to guess the gcc `-march` flag for the system you are compiling on using `-mtune` instead when `--enable-portable-binary` is specified. If it guesses wrong, or if you want to specify a different architecture, you can pass it here. If you want to omit `-march`/`-mtune` flags entirely, pass `--without-gcc-arch`. +**`--with-openmp`** +— +This flag enables some experimental support for [OpenMP](https://en.wikipedia.org/wiki/OpenMP) multithreading parallelism on multi-core machines (*instead* of MPI, or in addition to MPI if you have multiple processor cores per MPI process). Currently, only multi-frequency `near2far` calculations are sped up this way, but in the future we [hope to add](https://github.com/NanoComp/meep/issues/228) additional OpenMP parallelism. (When you run Meep, you can first set the `OMP_NUM_THREADS` environment variable to +the number of threads you want OpenMP to use.) + + ### Building From Source The following instructions are for building parallel PyMeep with all optional features from source on Ubuntu 16.04. The parallel version can still be run serially by running a script with just `python` instead of `mpirun -np 4 python`. If you really don't want to install MPI and parallel HDF5, just replace `libhdf5-openmpi-dev` with `libhdf5-dev`, and remove the `--with-mpi`, `CC=mpicc`, and `CPP=mpicxx` flags. The paths to HDF5 will also need to be adjusted to `/usr/lib/x86_64-linux-gnu/hdf5/serial` and `/usr/include/hdf5/serial`. Note that this script builds with Python 3 by default. If you want to use Python 2, just point the `PYTHON` variable to the appropriate interpreter when calling `autogen.sh` for building Meep, and use `pip` instead of `pip3`. diff --git a/doc/docs/Python_User_Interface.md b/doc/docs/Python_User_Interface.md index 88b0050e6..bf7ab8460 100644 --- a/doc/docs/Python_User_Interface.md +++ b/doc/docs/Python_User_Interface.md @@ -1399,6 +1399,10 @@ Like `output_farfields` but returns a dictionary of numpy arrays instead of writ Note that far fields have the same units and scaling as the *Fourier transforms* of the fields, and hence cannot be directly compared to time-domain fields. In practice, it is easiest to use the far fields in computations where overall scaling (units) cancel out or are irrelevant, e.g. to compute the fraction of the far fields in one region vs. another region. +(Multi-frequency `get_farfields` and `output_farfields` can be accelerated by +[compiling Meep](Build_From_Source.md#meep) with `--with-openmp` and using the +`OMP_NUM_THREADS` environment variable to specify multiple threads.) + For a scattered-field computation, you often want to separate the scattered and incident fields. Just as is described in [Tutorial/Basics](Python_Tutorials/Basics.md) for flux computations, you can do this by saving the Fourier-transformed incident from a "normalization" run and then load them into another run to be subtracted. This can be done via: **`save_near2far(filename, near2far)`** @@ -1494,7 +1498,7 @@ This feature is only available if Meep is built with [libGDSII](Build_From_Sourc Returns a list of integer-valued layer indices for the layers present in the specified GDSII file. -```python +```python mp.GDSII_layers('python/examples/coupler.gds') Out[2]: [0, 1, 2, 3, 4, 5, 31, 32] ``` diff --git a/doc/docs/Scheme_User_Interface.md b/doc/docs/Scheme_User_Interface.md index c422c2945..37f2fb5ba 100644 --- a/doc/docs/Scheme_User_Interface.md +++ b/doc/docs/Scheme_User_Interface.md @@ -1167,6 +1167,10 @@ Given an HDF5 file name `fname` (does *not* include the `.h5` suffix), a `volume Note that far fields have the same units and scaling as the *Fourier transforms* of the fields, and hence cannot be directly compared to time-domain fields. In practice, it is easiest to use the far fields in computations where overall scaling (units) cancel out or are irrelevant, e.g. to compute the fraction of the far fields in one region vs. another region. +(Multi-frequency `output-farfields` can be accelerated by +[compiling Meep](Build_From_Source.md#meep) with `--with-openmp` and using the +`OMP_NUM_THREADS` environment variable to specify multiple threads.) + For a scattered-field computation, you often want to separate the scattered and incident fields. Just as is described in [Tutorial/Basics](Scheme_Tutorials/Basics.md) for flux computations, you can do this by saving the Fourier-transformed incident from a "normalization" run and then load them into another run to be subtracted. This can be done via: **`(save-near2far filename near2far)`** From 622028932dbbbb306db44e9e605ddbd8226e3658 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Tue, 14 May 2019 15:02:49 -0400 Subject: [PATCH 6/6] typo --- doc/docs/Build_From_Source.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/docs/Build_From_Source.md b/doc/docs/Build_From_Source.md index 28134ddc7..6e5a60cd1 100644 --- a/doc/docs/Build_From_Source.md +++ b/doc/docs/Build_From_Source.md @@ -246,8 +246,7 @@ By default, Meep's configure script tries to guess the gcc `-march` flag for the **`--with-openmp`** — -This flag enables some experimental support for [OpenMP](https://en.wikipedia.org/wiki/OpenMP) multithreading parallelism on multi-core machines (*instead* of MPI, or in addition to MPI if you have multiple processor cores per MPI process). Currently, only multi-frequency `near2far` calculations are sped up this way, but in the future we [hope to add](https://github.com/NanoComp/meep/issues/228) additional OpenMP parallelism. (When you run Meep, you can first set the `OMP_NUM_THREADS` environment variable to -the number of threads you want OpenMP to use.) +This flag enables some experimental support for [OpenMP](https://en.wikipedia.org/wiki/OpenMP) multithreading parallelism on multi-core machines (*instead* of MPI, or in addition to MPI if you have multiple processor cores per MPI process). Currently, only multi-frequency `near2far` calculations are sped up this way, but in the future we [hope to add](https://github.com/NanoComp/meep/issues/228) additional OpenMP parallelism. (When you run Meep, you can first set the `OMP_NUM_THREADS` environment variable to the number of threads you want OpenMP to use.) ### Building From Source