From 039f6e344cd6dd64d88a704a98e0b29d796b2767 Mon Sep 17 00:00:00 2001 From: fritzgoebel Date: Mon, 8 Aug 2022 05:31:58 +0200 Subject: [PATCH] Amd direct solver (#521) * Working Ginkgo direct solver on AMD * fix build failure without ma57 * minor corrections * Update Ascent CI script to use ginkgo@ea106a945a390a1580baee4648c19ca2b665acdf * Add ginkgo_exec option to choose the hardware architecture the Ginkgo solver is run on. * Add tests for CUDA and HIP backends * Fix typo * Fix PNNL CI (#526) * Update marianas variables and add spack.yaml. * Add debugging lines for failing spack build. * Fix syntax error. * Update Newell variables and re-enable CI. * Fix bugs in newell variables. Fixup. * Disable ginkgo+cuda test on Marianas. * Bugfix test config on marianas. * Final attempt at disablingng specific tests. Co-authored-by: Nicholson Koukpaizan Co-authored-by: Cameron Rutherford --- .github/workflows/spack_build.yml | 13 +- .gitlab-ci.yml | 27 ++-- scripts/ascentVariables.sh | 2 +- scripts/marianasVariables.sh | 121 +++++++++--------- scripts/newellVariables.sh | 109 ++++++++++------ scripts/platforms/marianas/spack.yaml | 82 ++++++++++++ scripts/platforms/newell/spack.yaml | 65 ++++++++++ src/Drivers/Sparse/CMakeLists.txt | 14 +- src/Drivers/Sparse/NlpSparseEx1Driver.cpp | 33 ++++- src/Drivers/Sparse/NlpSparseEx2Driver.cpp | 41 +++++- src/LinAlg/hiopLinSolverSparseGinkgo.cpp | 60 ++++++--- .../hiopKKTLinSysSparseNormalEqn.cpp | 2 + src/Utils/hiopOptions.cpp | 37 ++++++ 13 files changed, 472 insertions(+), 134 deletions(-) create mode 100644 scripts/platforms/marianas/spack.yaml create mode 100644 scripts/platforms/newell/spack.yaml diff --git a/.github/workflows/spack_build.yml b/.github/workflows/spack_build.yml index 12e45fb1e..ed869ed02 100644 --- a/.github/workflows/spack_build.yml +++ b/.github/workflows/spack_build.yml @@ -31,10 +31,11 @@ jobs: # for x86 systems. This allows us to use far more prebuilt packages, # which should speed up the builds by quite a bit. run: | - ls && pwd && \ - . /opt/spack/share/spack/setup-env.sh && \ - spack mirror add binary_mirror https://binaries.spack.io/develop && \ - spack buildcache keys --install --trust && \ - spack spec $SPACK_SPEC target=x86_64 && \ - spack install --fail-fast $SPACK_SPEC target=x86_64 + ls && pwd + . /opt/spack/share/spack/setup-env.sh + spack -d debug report + spack -d mirror add binary_mirror https://binaries.spack.io/develop + spack -d buildcache keys --install --trust + spack -d spec $SPACK_SPEC target=x86_64 + spack -d --stacktrace install --fail-fast $SPACK_SPEC target=x86_64 diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 68382b89c..b30c2c872 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -115,6 +115,9 @@ export SLURM_Q=`perl $WORKDIR/scripts/findIdleDLNodes.pl` fi + # Extra args for ctest + export CTEST_CMD=$CTEST_CMD + sbatch -A EXASGD --exclusive -N 1 -n 8 -p $SLURM_Q -t $TIMELIMIT $SLURM_ARGS -o output -e output $WORKDIR/BUILD.sh $BUILD_SCRIPT_ARGS res=1 set +xv @@ -158,22 +161,24 @@ build_on_marianas: MY_CLUSTER: "marianas" TIMELIMIT: '1:30:00' SLURM_ARGS: --gres=gpu:1 --exclusive + # Through the steps, the argument to -E is automatically surrounded by quotes + CTEST_CMD: 'ctest -VV -E NlpSparse1_6|NlpSparse2_5' <<: *pnnl_tags_definition <<: *pnnl_script_definition rules: - if: '$CI_PROJECT_ROOT_NAMESPACE == "exasgd"' -# build_on_newell: -# stage: default_build -# variables: -# SLURM_Q: "newell_shared" -# MY_CLUSTER: "newell" -# TIMELIMIT: '1:00:00' -# SLURM_ARGS: --gres=gpu:1 --exclusive -# <<: *pnnl_tags_definition -# <<: *pnnl_script_definition -# rules: -# - if: '$CI_PROJECT_ROOT_NAMESPACE == "exasgd"' +build_on_newell: + stage: default_build + variables: + SLURM_Q: "newell_shared" + MY_CLUSTER: "newell" + TIMELIMIT: '1:00:00' + SLURM_ARGS: --gres=gpu:1 --exclusive + <<: *pnnl_tags_definition + <<: *pnnl_script_definition + rules: + - if: '$CI_PROJECT_ROOT_NAMESPACE == "exasgd"' build_on_incline: allow_failure: true diff --git a/scripts/ascentVariables.sh b/scripts/ascentVariables.sh index 832251a5f..352d13a37 100644 --- a/scripts/ascentVariables.sh +++ b/scripts/ascentVariables.sh @@ -22,7 +22,7 @@ module load exasgd-cub/1.16.0/gcc-9.1.0-o5zdbep # cuda@11.4.2%gcc@9.1.0~allow-unsupported-compilers~dev arch=linux-rhel8-power9le module load exasgd-cuda/11.4.2/gcc-9.1.0-4676kh5 # ginkgo@glu%gcc@9.1.0+cuda~develtools~full_optimizations~hwloc~ipo~oneapi+openmp~rocm+shared build_type=Release cuda_arch=70 dev_path=/gpfs/wolf/proj-shared/csc359/src/ginkgo arch=linux-rhel8-power9le -module load exasgd-ginkgo/glu/cuda-11.4.2/gcc-9.1.0-4ole5wn +module load exasgd-ginkgo/glu/cuda-11.4.2/gcc-9.1.0-fpuykyc # gmp@6.2.1%gcc@9.1.0 libs=shared,static arch=linux-rhel8-power9le module load exasgd-gmp/6.2.1/gcc-9.1.0-umqilrg # gnuconfig@2021-08-14%gcc@9.1.0 arch=linux-rhel8-power9le diff --git a/scripts/marianasVariables.sh b/scripts/marianasVariables.sh index 0b7767e2e..44764ee04 100644 --- a/scripts/marianasVariables.sh +++ b/scripts/marianasVariables.sh @@ -1,68 +1,71 @@ # NOTE: The following is required when running from Gitlab CI via slurm job source /etc/profile.d/modules.sh -module use -a /qfs/projects/exasgd/src/cameron-spack/share/spack/modules/linux-centos7-x86_64_v3 - +module use -a /qfs/projects/exasgd/src/cameron/spack/share/spack/modules/linux-centos7-zen2 # Load spack-built modules -# autoconf@2.69%gcc@7.3.0 patches=35c4492,7793209,a49dd5b arch=linux-centos7-x86_64_v3 -module load autoconf-2.69-gcc-7.3.0-gvh7nxv -# autoconf-archive@2022.02.11%gcc@7.3.0 patches=130cd48 arch=linux-centos7-x86_64_v3 -module load autoconf-archive-2022.02.11-gcc-7.3.0-lrajcp3 -# automake@1.16.5%gcc@7.3.0 arch=linux-centos7-x86_64_v3 -module load automake-1.16.5-gcc-7.3.0-la5kvuy -# blt@0.4.1%gcc@7.3.0 arch=linux-centos7-x86_64_v3 -module load blt-0.4.1-gcc-7.3.0-qeolwyb -# ca-certificates-mozilla@2022-03-29%gcc@7.3.0 arch=linux-centos7-x86_64_v3 -module load ca-certificates-mozilla-2022-03-29-gcc-7.3.0-fjb4zc5 -# camp@0.2.2%gcc@7.3.0+cuda~ipo~rocm~tests build_type=RelWithDebInfo cuda_arch=60 arch=linux-centos7-x86_64_v3 -module load camp-0.2.2-gcc-7.3.0-ifdwyok -# cmake@3.23.2%gcc@7.3.0~doc+ncurses+ownlibs~qt build_type=Release arch=linux-centos7-x86_64_v3 -module load cmake-3.23.2-gcc-7.3.0-riu7fla -# coinhsl@2015.06.23%gcc@7.3.0+blas arch=linux-centos7-x86_64_v3 -module load coinhsl-2015.06.23-gcc-7.3.0-r42slsl -# cub@1.16.0%gcc@7.3.0 arch=linux-centos7-x86_64_v3 -module load cub-1.16.0-gcc-7.3.0-4zaltzb -# ginkgo@glu%gcc@7.3.0+cuda~develtools~full_optimizations~hwloc~ipo~oneapi+openmp~rocm+shared build_type=Release cuda_arch=60 arch=linux-centos7-x86_64_v3 -module load ginkgo-glu-gcc-7.3.0-63ouzce -# gmp@6.2.1%gcc@7.3.0 libs=shared,static arch=linux-centos7-x86_64_v3 -module load gmp-6.2.1-gcc-7.3.0-if7iflm -# libsigsegv@2.13%gcc@7.3.0 arch=linux-centos7-x86_64_v3 -module load libsigsegv-2.13-gcc-7.3.0-n653jc7 -# libtool@2.4.7%gcc@7.3.0 arch=linux-centos7-x86_64_v3 -module load libtool-2.4.7-gcc-7.3.0-atzgxc2 -# m4@1.4.19%gcc@7.3.0+sigsegv patches=9dc5fbd,bfdffa7 arch=linux-centos7-x86_64_v3 -module load m4-1.4.19-gcc-7.3.0-lcthdqt -# magma@2.6.2%gcc@7.3.0+cuda+fortran~ipo~rocm+shared build_type=RelWithDebInfo cuda_arch=60 arch=linux-centos7-x86_64_v3 -module load magma-2.6.2-gcc-7.3.0-kqvdxay -# metis@5.1.0%gcc@7.3.0~gdb~int64~real64+shared build_type=Release patches=4991da9,b1225da arch=linux-centos7-x86_64_v3 -module load metis-5.1.0-gcc-7.3.0-xfajh3x -# mpfr@4.1.0%gcc@7.3.0 libs=shared,static arch=linux-centos7-x86_64_v3 -module load mpfr-4.1.0-gcc-7.3.0-zcatq2v -# ncurses@6.2%gcc@7.3.0~symlinks+termlib abi=none arch=linux-centos7-x86_64_v3 -module load ncurses-6.2-gcc-7.3.0-sqnhgdg -# openblas@0.3.20%gcc@4.8.5~bignuma~consistent_fpcsr~ilp64+locking+pic+shared symbol_suffix=none threads=none arch=linux-centos7-x86_64_v3 -module load openblas-0.3.20-gcc-4.8.5-rpolrqa -# openmpi@3.1.3%gcc@7.3.0~atomics~cuda~cxx~cxx_exceptions~gpfs~internal-hwloc~java~legacylaunchers~lustre~memchecker~pmi+romio+rsh~singularity+static+vt+wrapper-rpath fabrics=none schedulers=none arch=linux-centos7-x86_64_v3 -module load openmpi-3.1.3-gcc-7.3.0-2ocdjy4 -# openssl@1.1.1p%gcc@7.3.0~docs~shared certs=mozilla arch=linux-centos7-x86_64_v3 -module load openssl-1.1.1p-gcc-7.3.0-tz3ln5w -# perl@5.26.0%gcc@7.3.0+cpanm+shared+threads patches=0eac10e,8cf4302 arch=linux-centos7-x86_64_v3 -module load perl-5.26.0-gcc-7.3.0-f7w3oxq -# pkgconf@1.8.0%gcc@7.3.0 arch=linux-centos7-x86_64_v3 -module load pkgconf-1.8.0-gcc-7.3.0-gxowfdy -# raja@0.14.0%gcc@7.3.0+cuda~examples~exercises~ipo+openmp~rocm+shared~tests build_type=RelWithDebInfo cuda_arch=60 arch=linux-centos7-x86_64_v3 -module load raja-0.14.0-gcc-7.3.0-vtbmo6k -# suite-sparse@5.10.1%gcc@7.3.0~cuda~graphblas~openmp+pic~tbb arch=linux-centos7-x86_64_v3 -module load suite-sparse-5.10.1-gcc-7.3.0-thhoxwy -# texinfo@6.5%gcc@7.3.0 patches=12f6edb,1732115 arch=linux-centos7-x86_64_v3 -module load texinfo-6.5-gcc-7.3.0-crm3bgr -# umpire@6.0.0%gcc@7.3.0+c+cuda~device_alloc~deviceconst~examples~fortran~ipo~numa~openmp~rocm~shared build_type=RelWithDebInfo cuda_arch=60 tests=none arch=linux-centos7-x86_64_v3 -module load umpire-6.0.0-gcc-7.3.0-z22n3zy -# zlib@1.2.12%gcc@7.3.0+optimize+pic+shared patches=0d38234 arch=linux-centos7-x86_64_v3 -module load zlib-1.2.12-gcc-7.3.0-hq7ha7b +# autoconf@2.69%gcc@10.2.0 patches=35c4492,7793209,a49dd5b arch=linux-centos7-zen2 +module load autoconf-2.69-gcc-10.2.0-r677m42 +# autoconf-archive@2022.02.11%gcc@10.2.0 patches=130cd48 arch=linux-centos7-zen2 +module load autoconf-archive-2022.02.11-gcc-10.2.0-pbrbzut +# automake@1.16.5%gcc@10.2.0 arch=linux-centos7-zen2 +module load automake-1.16.5-gcc-10.2.0-j4bwm4o +# blt@0.4.1%gcc@10.2.0 arch=linux-centos7-zen2 +module load blt-0.4.1-gcc-10.2.0-tanugdw +# ca-certificates-mozilla@2022-07-19%gcc@10.2.0 arch=linux-centos7-zen2 +module load ca-certificates-mozilla-2022-07-19-gcc-10.2.0-h2opehw +# camp@0.2.3%gcc@10.2.0+cuda~ipo~rocm~tests build_type=RelWithDebInfo cuda_arch=60 arch=linux-centos7-zen2 +module load camp-0.2.3-gcc-10.2.0-vpkkybx +# cmake@3.23.2%gcc@10.2.0~doc+ncurses+ownlibs~qt build_type=Release arch=linux-centos7-zen2 +module load cmake-3.23.2-gcc-10.2.0-i24avzq +# coinhsl@2019.05.21%gcc@10.2.0+blas arch=linux-centos7-zen2 +module load coinhsl-2019.05.21-gcc-10.2.0-j7hsujd +# cub@1.16.0%gcc@10.2.0 arch=linux-centos7-zen2 +module load cub-1.16.0-gcc-10.2.0-ovgrtom +# diffutils@3.8%gcc@10.2.0 arch=linux-centos7-zen2 +module load diffutils-3.8-gcc-10.2.0-mjfwces +# ginkgo@glu_experimental%gcc@10.2.0+cuda~develtools~full_optimizations~hwloc~ipo~oneapi+openmp~rocm+shared build_type=Release cuda_arch=60 arch=linux-centos7-zen2 +module load ginkgo-glu_experimental-gcc-10.2.0-dbmokiq +# gmp@6.2.1%gcc@10.2.0 libs=shared,static arch=linux-centos7-zen2 +module load gmp-6.2.1-gcc-10.2.0-ac4z3oa +# libiconv@1.16%gcc@10.2.0 libs=shared,static arch=linux-centos7-zen2 +module load libiconv-1.16-gcc-10.2.0-gbg7l5p +# libsigsegv@2.13%gcc@10.2.0 arch=linux-centos7-zen2 +module load libsigsegv-2.13-gcc-10.2.0-aj5goyi +# libtool@2.4.7%gcc@10.2.0 arch=linux-centos7-zen2 +module load libtool-2.4.7-gcc-10.2.0-mzc2mvw +# m4@1.4.19%gcc@10.2.0+sigsegv patches=9dc5fbd,bfdffa7 arch=linux-centos7-zen2 +module load m4-1.4.19-gcc-10.2.0-k5kkyx6 +# magma@2.6.2%gcc@10.2.0+cuda+fortran~ipo~rocm+shared build_type=RelWithDebInfocuda_arch=60 arch=linux-centos7-zen2 +module load magma-2.6.2-gcc-10.2.0-o7gg2nj +# metis@5.1.0%gcc@10.2.0~gdb~int64~real64+shared build_type=Release patches=4991da9,b1225da arch=linux-centos7-zen2 +module load metis-5.1.0-gcc-10.2.0-h2r63pj +# mpfr@4.1.0%gcc@10.2.0 libs=shared,static arch=linux-centos7-zen2 +module load mpfr-4.1.0-gcc-10.2.0-ixeo4lu +# ncurses@6.2%gcc@10.2.0~symlinks+termlib abi=none arch=linux-centos7-zen2 +module load ncurses-6.2-gcc-10.2.0-3b2uqgl +# openblas@0.3.20%gcc@10.2.0~bignuma~consistent_fpcsr~ilp64+locking+pic+shared symbol_suffix=none threads=none arch=linux-centos7-zen2 +module load openblas-0.3.20-gcc-10.2.0-qhcutll +# openmpi@4.1.0%gcc@10.2.0~atomics~cuda~cxx~cxx_exceptions~gpfs~internal-hwloc~java~legacylaunchers~lustre~memchecker+romio+rsh~singularity+static+vt+wrapper-rpath fabrics=none patches=60ce20b schedulers=none arch=linux-centos7-zen2 +module load openmpi-4.1.0-gcc-10.2.0-wnndpcg +# openssl@1.1.1q%gcc@10.2.0~docs~shared certs=mozilla patches=3fdcf2d arch=linux-centos7-zen2 +module load openssl-1.1.1q-gcc-10.2.0-t5hsb3s +# perl@5.26.0%gcc@10.2.0+cpanm+shared+threads patches=0eac10e,8cf4302 arch=linux-centos7-zen2 +module load perl-5.26.0-gcc-10.2.0-l2yiybo +# pkgconf@1.8.0%gcc@10.2.0 arch=linux-centos7-zen2 +module load pkgconf-1.8.0-gcc-10.2.0-fuflwbl +# raja@0.14.0%gcc@10.2.0+cuda~examples~exercises~ipo+openmp~rocm+shared~tests build_type=RelWithDebInfo cuda_arch=60 arch=linux-centos7-zen2 +module load raja-0.14.0-gcc-10.2.0-pc2ckhw +# suite-sparse@5.10.1%gcc@10.2.0~cuda~graphblas~openmp+pic~tbb arch=linux-centos7-zen2 +module load suite-sparse-5.10.1-gcc-10.2.0-jkighdn +# texinfo@6.5%gcc@10.2.0 patches=12f6edb,1732115 arch=linux-centos7-zen2 +module load texinfo-6.5-gcc-10.2.0-mzqgqla +# umpire@6.0.0%gcc@10.2.0+c+cuda~device_alloc~deviceconst~examples~fortran~ipo~numa~openmp~rocm~shared build_type=RelWithDebInfo cuda_arch=60 tests=none arch=linux-centos7-zen2 +module load umpire-6.0.0-gcc-10.2.0-eunwzka +# zlib@1.2.12%gcc@10.2.0+optimize+pic+shared patches=0d38234 arch=linux-centos7-zen2 +module load zlib-1.2.12-gcc-10.2.0-gnkqokp # Load system modules -module load gcc/7.3.0 +module load gcc/10.2.0 module load cuda/11.4 [ -f $PWD/nvblas.conf ] && rm $PWD/nvblas.conf diff --git a/scripts/newellVariables.sh b/scripts/newellVariables.sh index 0ae2cb9ea..d22004703 100644 --- a/scripts/newellVariables.sh +++ b/scripts/newellVariables.sh @@ -1,50 +1,87 @@ # NOTE: The following is required when running from Gitlab CI via slurm job source /etc/profile.d/modules.sh +module purge module use -a /usr/share/Modules/modulefiles module use -a /share/apps/modules/tools module use -a /share/apps/modules/compilers module use -a /share/apps/modules/mpi module use -a /etc/modulefiles -module use -a /qfs/projects/exasgd/src/cameron-spack/share/spack/modules/linux-rhel7-power9le +module use -a /qfs/projects/exasgd/src/cameron/spack/share/spack/modules/linux-centos8-power9le/ # Load spack-built modules -# blt@0.4.1%gcc@7.4.0 arch=linux-rhel7-power9le -module load blt-0.4.1-gcc-7.4.0-2th7jgq -# camp@0.2.2%gcc@7.4.0+cuda~ipo~rocm~tests build_type=RelWithDebInfo cuda_arch=70 arch=linux-rhel7-power9le -module load camp-0.2.2-gcc-7.4.0-vsu2jwh -# cmake@3.23.1%gcc@7.4.0~doc+ncurses+ownlibs~qt build_type=Release arch=linux-rhel7-power9le -module load cmake-3.23.1-gcc-7.4.0-ckfugtf -# coinhsl@2015.06.23%gcc@7.4.0+blas arch=linux-rhel7-power9le -module load coinhsl-2015.06.23-gcc-7.4.0-ts5vjfq -# cub@1.12.0%gcc@7.4.0 arch=linux-rhel7-power9le -module load cub-1.12.0-gcc-7.4.0-4qyvoqn -# ginkgo@glu%gcc@7.4.0+cuda~develtools~full_optimizations~hwloc~ipo~oneapi+openmp~rocm+shared build_type=Release cuda_arch=70 arch=linux-rhel7-power9le -module load ginkgo-glu-gcc-7.4.0-r5wjmju -# gmp@6.2.1%gcc@7.4.0 libs=shared,static arch=linux-rhel7-power9le -module load gmp-6.2.1-gcc-7.4.0-oea2aet -# magma@2.6.2%gcc@7.4.0+cuda+fortran~ipo~rocm+shared build_type=RelWithDebInfo cuda_arch=70 arch=linux-rhel7-power9le -module load magma-2.6.2-gcc-7.4.0-6yuqfpm -# metis@5.1.0%gcc@7.4.0~gdb~int64~real64+shared build_type=Release patches=4991da9,b1225da arch=linux-rhel7-power9le -module load metis-5.1.0-gcc-7.4.0-shhhyku -# mpfr@4.1.0%gcc@7.4.0 libs=shared,static arch=linux-rhel7-power9le -module load mpfr-4.1.0-gcc-7.4.0-tz5esun -# ncurses@6.2%gcc@7.4.0~symlinks+termlib abi=none arch=linux-rhel7-power9le -module load ncurses-6.2-gcc-7.4.0-kqhmmpv -# openblas@0.3.20%gcc@7.4.0~bignuma~consistent_fpcsr~ilp64+locking+pic+shared symbol_suffix=none threads=none arch=linux-rhel7-power9le -module load openblas-0.3.20-gcc-7.4.0-3zdqw2i -# raja@0.14.0%gcc@7.4.0+cuda~examples~exercises~ipo+openmp~rocm+shared~tests build_type=RelWithDebInfo cuda_arch=70 arch=linux-rhel7-power9le -module load raja-0.14.0-gcc-7.4.0-sew5thv -# suite-sparse@5.10.1%gcc@7.4.0~cuda~graphblas~openmp+pic~tbb arch=linux-rhel7-power9le -module load suite-sparse-5.10.1-gcc-7.4.0-e5qockg -# umpire@6.0.0%gcc@7.4.0+c+cuda~deviceconst~examples~fortran~ipo~numa~openmp~rocm~shared build_type=RelWithDebInfo cuda_arch=70 tests=none arch=linux-rhel7-power9le -module load umpire-6.0.0-gcc-7.4.0-rpwrj4p -# zlib@1.2.12%gcc@7.4.0+optimize+pic+shared patches=0d38234 arch=linux-rhel7-power9le -module load zlib-1.2.12-gcc-7.4.0-d6xlzc6 +# autoconf@2.69%gcc@8.5.0 patches=35c4492,7793209,a49dd5b arch=linux-centos8-power9le +module load autoconf-2.69-gcc-8.5.0-2mzbyqj +# autoconf-archive@2022.02.11%gcc@8.5.0 patches=130cd48 arch=linux-centos8-power9le +module load autoconf-archive-2022.02.11-gcc-8.5.0-nolgalj +# automake@1.16.5%gcc@8.5.0 arch=linux-centos8-power9le +module load automake-1.16.5-gcc-8.5.0-pnnvoal +# berkeley-db@18.1.40%gcc@8.5.0+cxx~docs+stl patches=b231fcc arch=linux-centos8-power9le +module load berkeley-db-18.1.40-gcc-8.5.0-cuzn6qn +# blt@0.4.1%gcc@8.5.0 arch=linux-centos8-power9le +module load blt-0.4.1-gcc-8.5.0-4drqwl4 +# bzip2@1.0.8%gcc@8.5.0~debug~pic+shared arch=linux-centos8-power9le +module load bzip2-1.0.8-gcc-8.5.0-tsweuon +# ca-certificates-mozilla@2022-07-19%gcc@8.5.0 arch=linux-centos8-power9le +module load ca-certificates-mozilla-2022-07-19-gcc-8.5.0-db3wqwx +# camp@0.2.3%gcc@8.5.0+cuda~ipo~rocm~tests build_type=RelWithDebInfo cuda_arch=70 arch=linux-centos8-power9le +module load camp-0.2.3-gcc-8.5.0-mtks7g5 +# cmake@3.23.2%gcc@8.5.0~doc+ncurses+ownlibs~qt build_type=Release arch=linux-centos8-power9le +module load cmake-3.23.2-gcc-8.5.0-tpplkft +# coinhsl@2019.05.21%gcc@8.5.0+blas arch=linux-centos8-power9le +module load coinhsl-2019.05.21-gcc-8.5.0-vng3am5 +# cub@1.16.0%gcc@8.5.0 arch=linux-centos8-power9le +module load cub-1.16.0-gcc-8.5.0-p3cnthb +# diffutils@3.8%gcc@8.5.0 arch=linux-centos8-power9le +module load diffutils-3.8-gcc-8.5.0-ppyuisg +# gdbm@1.19%gcc@8.5.0 arch=linux-centos8-power9le +module load gdbm-1.19-gcc-8.5.0-unfo3x4 +# ginkgo@glu_experimental%gcc@8.5.0+cuda~develtools~full_optimizations~hwloc~ipo~oneapi+openmp~rocm+shared build_type=Release cuda_arch=70 arch=linux-centos8-power9le +module load ginkgo-glu_experimental-gcc-8.5.0-m3p5yj4 +# gmp@6.2.1%gcc@8.5.0 libs=shared,static arch=linux-centos8-power9le +module load gmp-6.2.1-gcc-8.5.0-xlcuuht +# gnuconfig@2021-08-14%gcc@8.5.0 arch=linux-centos8-power9le +module load gnuconfig-2021-08-14-gcc-8.5.0-qjyg7ls +# hiop@develop%gcc@8.5.0+cuda+cusolver+deepchecking+ginkgo~ipo~jsrun+kron+mpi+raja~rocm~shared+sparse build_type=RelWithDebInfo cuda_arch=70 dev_path=/qfs/projects/exasgd/src/cameron/hiop-git arch=linux-centos8-power9le +module load hiop-develop-gcc-8.5.0-p2l3auf +# libiconv@1.16%gcc@8.5.0 libs=shared,static arch=linux-centos8-power9le +module load libiconv-1.16-gcc-8.5.0-qqwmnok +# libsigsegv@2.13%gcc@8.5.0 arch=linux-centos8-power9le +module load libsigsegv-2.13-gcc-8.5.0-pa77xit +# libtool@2.4.7%gcc@8.5.0 arch=linux-centos8-power9le +module load libtool-2.4.7-gcc-8.5.0-kxdso3c +# m4@1.4.19%gcc@8.5.0+sigsegv patches=9dc5fbd,bfdffa7 arch=linux-centos8-power9le +module load m4-1.4.19-gcc-8.5.0-untfsqf +# magma@2.6.2%gcc@8.5.0+cuda+fortran~ipo~rocm+shared build_type=RelWithDebInfo cuda_arch=70 arch=linux-centos8-power9le +module load magma-2.6.2-gcc-8.5.0-4oanlpm +# metis@5.1.0%gcc@8.5.0~gdb~int64~real64+shared build_type=Release patches=4991da9,b1225da arch=linux-centos8-power9le +module load metis-5.1.0-gcc-8.5.0-hcv2jnr +# mpfr@4.1.0%gcc@8.5.0 libs=shared,static arch=linux-centos8-power9le +module load mpfr-4.1.0-gcc-8.5.0-esdxmf2 +# ncurses@6.2%gcc@8.5.0~symlinks+termlib abi=none arch=linux-centos8-power9le +module load ncurses-6.2-gcc-8.5.0-v24hmxo +# openblas@0.3.20%gcc@8.5.0~bignuma~consistent_fpcsr~ilp64+locking+pic+shared symbol_suffix=none threads=none arch=linux-centos8-power9le +module load openblas-0.3.20-gcc-8.5.0-rwstn2s +# perl@5.34.1%gcc@8.5.0+cpanm+shared+threads arch=linux-centos8-power9le +module load perl-5.34.1-gcc-8.5.0-fn534xj +# pkgconf@1.8.0%gcc@8.5.0 arch=linux-centos8-power9le +module load pkgconf-1.8.0-gcc-8.5.0-imrnro2 +# raja@0.14.0%gcc@8.5.0+cuda+examples+exercises~ipo+openmp~rocm+shared~tests build_type=RelWithDebInfo cuda_arch=70 arch=linux-centos8-power9le +module load raja-0.14.0-gcc-8.5.0-yd3im6p +# readline@8.1.2%gcc@8.5.0 arch=linux-centos8-power9le +module load readline-8.1.2-gcc-8.5.0-l4hzlyf +# suite-sparse@5.10.1%gcc@8.5.0~cuda~graphblas~openmp+pic~tbb arch=linux-centos8-power9le +module load suite-sparse-5.10.1-gcc-8.5.0-6ra3sp4 +# texinfo@6.5%gcc@8.5.0 patches=12f6edb,1732115 arch=linux-centos8-power9le +module load texinfo-6.5-gcc-8.5.0-fvxyl2q +# umpire@6.0.0%gcc@8.5.0+c+cuda~device_alloc~deviceconst+examples~fortran~ipo~numa~openmp~rocm~shared build_type=RelWithDebInfo cuda_arch=70 tests=none arch=linux-centos8-power9le +module load umpire-6.0.0-gcc-8.5.0-ogbxb44 +# zlib@1.2.12%gcc@8.5.0+optimize+pic+shared patches=0d38234 arch=linux-centos8-power9le +module load zlib-1.2.12-gcc-8.5.0-spb5k73 # Load system modules -module load gcc/7.4.0 -module load cuda/10.2 -module load openmpi/3.1.5 +module load gcc/8.5.0 +module load cuda/11.4 +module load openmpi/4.1.4 [ -f $PWD/nvblas.conf ] && rm $PWD/nvblas.conf cat > $PWD/nvblas.conf <<-EOD diff --git a/scripts/platforms/marianas/spack.yaml b/scripts/platforms/marianas/spack.yaml new file mode 100644 index 000000000..37dcac3c5 --- /dev/null +++ b/scripts/platforms/marianas/spack.yaml @@ -0,0 +1,82 @@ +spack: + specs: + - hiop%gcc@10.2.0@develop+cuda+deepchecking+sparse+kron+cusolver+ginkgo+raja cuda_arch=60 + - raja@0.14.0 + - umpire@6.0.0 + - coinhsl@2019.05.21 + view: false + concretizer: + unify: true + packages: + ipopt: + version: + - 3.12.10 + variants: + - +debug + - +coinhsl + - ~mumps + raja: + variants: + - ~examples + - ~exercises + umpire: + variants: + - ~openmp + - ~examples + python: + externals: + - spec: python@3.7.0 + prefix: /share/apps/python/3.7.0 + modules: + - python/3.7.0 + all: + providers: + mpi: + - openmpi + openmpi: + externals: + - spec: openmpi@4.1.0%gcc@10.2.0 + modules: + - openmpi/4.1.0 + prefix: /share/apps/openmpi/4.1.0/gcc/10.2.0 + buildable: false + cuda: + externals: + - spec: cuda@11.4 + modules: + - cuda/11.4 + buildable: false + perl: + externals: + - spec: perl@5.26.0 + modules: + - perl/5.26.0 + compilers: + - compiler: + spec: gcc@4.8.5 + paths: + cc: /usr/bin/gcc + cxx: /usr/bin/g++ + f77: /usr/bin/gfortran + fc: /usr/bin/gfortran + operating_system: centos7 + target: x86_64 + modules: + - gcc/4.8.5 + - compiler: + spec: gcc@10.2.0 + paths: + cc: /share/apps/gcc/10.2.0/bin/gcc + cxx: /share/apps/gcc/10.2.0/bin/g++ + f77: /share/apps/gcc/10.2.0/bin/gfortran + fc: /share/apps/gcc/10.2.0/bin/gfortran + flags: {} + operating_system: centos7 + target: x86_64 + modules: [] + environment: {} + extra_rpaths: [] + develop: + hiop: + path: /qfs/projects/exasgd/src/cameron/hiop-git + spec: hiop@develop diff --git a/scripts/platforms/newell/spack.yaml b/scripts/platforms/newell/spack.yaml new file mode 100644 index 000000000..05aef3004 --- /dev/null +++ b/scripts/platforms/newell/spack.yaml @@ -0,0 +1,65 @@ +spack: + specs: + - hiop@develop+kron+mpi+raja+sparse+cuda+ginkgo+deepchecking+cusolver cuda_arch=70 + - raja@0.14.0 + - umpire@6.0.0 + - coinhsl@2019.05.21 + view: false + concretizer: + unify: true + reuse: false + packages: + coinhsl: + variants: +blas + openmpi: + externals: + - spec: openmpi@4.1.4%gcc@8.5.0 + prefix: /share/apps/openmpi/4.1.4/gcc/8.5.0 + modules: + - openmpi/4.1.4 + buildable: false + variants: +cuda + python: + externals: + - spec: python@3.8.5 + modules: [python/miniconda3.8] + - spec: python@2.7.5-system + prefix: /usr + buildable: false + cuda: + externals: + - spec: cuda@11.4 + modules: + - cuda/11.4 + buildable: false + compilers: + - compiler: + spec: gcc@11.2.0 + paths: + cc: /share/apps/gcc/11.2.0/bin/gcc + cxx: /share/apps/gcc/11.2.0/bin/g++ + f77: /share/apps/gcc/11.2.0/bin/gfortran + fc: /share/apps/gcc/11.2.0/bin/gfortran + flags: {} + operating_system: centos8 + target: ppc64le + modules: [] + environment: {} + extra_rpaths: [] + - compiler: + spec: gcc@8.5.0 + paths: + cc: /usr/bin/gcc + cxx: /usr/bin/g++ + f77: /usr/bin/gfortran + fc: /usr/bin/gfortran + flags: {} + operating_system: centos8 + target: ppc64le + modules: [] + environment: {} + extra_rpaths: [] + develop: + hiop: + path: /qfs/projects/exasgd/src/cameron/hiop-git + spec: hiop@develop diff --git a/src/Drivers/Sparse/CMakeLists.txt b/src/Drivers/Sparse/CMakeLists.txt index 5144e4393..e81de4862 100644 --- a/src/Drivers/Sparse/CMakeLists.txt +++ b/src/Drivers/Sparse/CMakeLists.txt @@ -29,6 +29,12 @@ if(HIOP_USE_PARDISO) endif(HIOP_USE_PARDISO) if(HIOP_USE_GINKGO) add_test(NAME NlpSparse1_5 COMMAND ${RUNCMD} "$" "500" "-ginkgo" "-selfcheck") + if(HIOP_USE_CUDA) + add_test(NAME NlpSparse1_6 COMMAND ${RUNCMD} "$" "500" "-ginkgo_cuda" "-selfcheck") + endif(HIOP_USE_CUDA) + if(HIOP_USE_HIP) + add_test(NAME NlpSparse1_7 COMMAND ${RUNCMD} "$" "500" "-ginkgo_hip" "-selfcheck") + endif(HIOP_USE_HIP) endif(HIOP_USE_GINKGO) add_test(NAME NlpSparse2_1 COMMAND ${RUNCMD} "$" "500" "-selfcheck") add_test(NAME NlpSparse2_2 COMMAND ${RUNCMD} "$" "500" "-inertiafree" "-selfcheck") @@ -37,9 +43,15 @@ if(HIOP_USE_CUDA) endif(HIOP_USE_CUDA) if(HIOP_USE_GINKGO) add_test(NAME NlpSparse2_4 COMMAND ${RUNCMD} "$" "500" "-ginkgo" "-inertiafree" "-selfcheck") + if(HIOP_USE_CUDA) + add_test(NAME NlpSparse2_5 COMMAND ${RUNCMD} "$" "500" "-ginkgo_cuda" "-inertiafree" "-selfcheck") + endif(HIOP_USE_CUDA) + if(HIOP_USE_HIP) + add_test(NAME NlpSparse2_6 COMMAND ${RUNCMD} "$" "500" "-ginkgo_hip" "-inertiafree" "-selfcheck") + endif(HIOP_USE_HIP) endif(HIOP_USE_GINKGO) add_test(NAME NlpSparse3_1 COMMAND ${RUNCMD} "$" "500" "-selfcheck") if(HIOP_BUILD_SHARED AND NOT HIOP_USE_GPU) add_test(NAME NlpSparseCinterface COMMAND ${RUNCMD} "$") add_test(NAME NlpSparseFinterface COMMAND ${RUNCMD} "$") -endif() \ No newline at end of file +endif() diff --git a/src/Drivers/Sparse/NlpSparseEx1Driver.cpp b/src/Drivers/Sparse/NlpSparseEx1Driver.cpp index f5112bd4b..0590b7ce9 100644 --- a/src/Drivers/Sparse/NlpSparseEx1Driver.cpp +++ b/src/Drivers/Sparse/NlpSparseEx1Driver.cpp @@ -17,11 +17,15 @@ static bool parse_arguments(int argc, bool& use_pardiso, bool& use_cusolver, bool& use_ginkgo, + bool& use_ginkgo_cuda, + bool& use_ginkgo_hip, bool& force_fr) { self_check = false; use_pardiso = false; use_ginkgo = false; + use_ginkgo_cuda = false; + use_ginkgo_cuda = false; force_fr = false; n = 3; scal = 1.0; @@ -48,6 +52,12 @@ static bool parse_arguments(int argc, use_cusolver = true; } else if(std::string(argv[4]) == "-ginkgo"){ use_ginkgo = true; + } else if(std::string(argv[4]) == "-ginkgo_cuda"){ + use_ginkgo = true; + use_ginkgo_cuda = true; + } else if(std::string(argv[4]) == "-ginkgo_hip"){ + use_ginkgo = true; + use_ginkgo_hip = true; } } case 4: //3 arguments @@ -62,6 +72,12 @@ static bool parse_arguments(int argc, use_cusolver = true; } else if(std::string(argv[3]) == "-ginkgo"){ use_ginkgo = true; + } else if(std::string(argv[3]) == "-ginkgo_cuda"){ + use_ginkgo = true; + use_ginkgo_cuda = true; + } else if(std::string(argv[3]) == "-ginkgo_hip"){ + use_ginkgo = true; + use_ginkgo_hip = true; } } case 3: //2 arguments @@ -76,6 +92,12 @@ static bool parse_arguments(int argc, use_cusolver = true; } else if(std::string(argv[2]) == "-ginkgo"){ use_ginkgo = true; + } else if(std::string(argv[2]) == "-ginkgo_cuda"){ + use_ginkgo = true; + use_ginkgo_cuda = true; + } else if(std::string(argv[2]) == "-ginkgo_hip"){ + use_ginkgo = true; + use_ginkgo_hip = true; } else { scal = std::atof(argv[2]); } @@ -165,11 +187,13 @@ int main(int argc, char **argv) bool use_pardiso = false; bool use_cusolver = false; bool use_ginkgo = false; + bool use_ginkgo_cuda = false; + bool use_ginkgo_hip = false; bool force_fr = false; size_type n; double scal; - if(!parse_arguments(argc, argv, n, scal, selfCheck, use_pardiso, use_cusolver, use_ginkgo, force_fr)) { + if(!parse_arguments(argc, argv, n, scal, selfCheck, use_pardiso, use_cusolver, use_ginkgo, use_ginkgo_cuda, use_ginkgo_hip, force_fr)) { usage(argv[0]); #ifdef HIOP_USE_MPI MPI_Finalize(); @@ -212,6 +236,13 @@ int main(int argc, char **argv) nlp.options->SetStringValue("linsol_mode", "speculative"); nlp.options->SetStringValue("linear_solver_sparse", "ginkgo"); nlp.options->SetStringValue("fact_acceptor", "inertia_free"); + if (use_ginkgo_cuda) { + nlp.options->SetStringValue("ginkgo_exec", "cuda"); + } else if (use_ginkgo_hip) { + nlp.options->SetStringValue("ginkgo_exec", "hip"); + } else { + nlp.options->SetStringValue("ginkgo_exec", "reference"); + } } if(force_fr) { nlp.options->SetStringValue("force_resto", "yes"); diff --git a/src/Drivers/Sparse/NlpSparseEx2Driver.cpp b/src/Drivers/Sparse/NlpSparseEx2Driver.cpp index 213edbfe7..a1285e9e2 100644 --- a/src/Drivers/Sparse/NlpSparseEx2Driver.cpp +++ b/src/Drivers/Sparse/NlpSparseEx2Driver.cpp @@ -16,7 +16,9 @@ static bool parse_arguments(int argc, bool& inertia_free, bool& use_cusolver, bool& use_cusolver_lu, - bool& use_ginkgo) + bool& use_ginkgo, + bool& use_ginkgo_cuda, + bool& use_ginkgo_hip) { self_check = false; n = 3; @@ -24,6 +26,8 @@ static bool parse_arguments(int argc, use_cusolver = false; use_cusolver_lu = false; use_ginkgo = false; + use_ginkgo_cuda = false; + use_ginkgo_cuda = false; switch(argc) { case 1: //no arguments @@ -39,6 +43,12 @@ static bool parse_arguments(int argc, use_cusolver = true; } else if(std::string(argv[4]) == "-ginkgo"){ use_ginkgo = true; + } else if(std::string(argv[4]) == "-ginkgo_cuda"){ + use_ginkgo = true; + use_ginkgo_cuda = true; + } else if(std::string(argv[4]) == "-ginkgo_hip"){ + use_ginkgo = true; + use_ginkgo_hip = true; } else { n = std::atoi(argv[4]); if(n<=0) { @@ -56,6 +66,12 @@ static bool parse_arguments(int argc, use_cusolver = true; } else if(std::string(argv[3]) == "-ginkgo"){ use_ginkgo = true; + } else if(std::string(argv[3]) == "-ginkgo_cuda"){ + use_ginkgo = true; + use_ginkgo_cuda = true; + } else if(std::string(argv[3]) == "-ginkgo_hip"){ + use_ginkgo = true; + use_ginkgo_hip = true; } else { n = std::atoi(argv[3]); if(n<=0) { @@ -73,6 +89,12 @@ static bool parse_arguments(int argc, use_cusolver = true; } else if(std::string(argv[2]) == "-ginkgo"){ use_ginkgo = true; + } else if(std::string(argv[2]) == "-ginkgo_cuda"){ + use_ginkgo = true; + use_ginkgo_cuda = true; + } else if(std::string(argv[2]) == "-ginkgo_hip"){ + use_ginkgo = true; + use_ginkgo_hip = true; } else { n = std::atoi(argv[2]); if(n<=0) { @@ -90,6 +112,12 @@ static bool parse_arguments(int argc, use_cusolver = true; } else if(std::string(argv[1]) == "-ginkgo"){ use_ginkgo = true; + } else if(std::string(argv[1]) == "-ginkgo_cuda"){ + use_ginkgo = true; + use_ginkgo_cuda = true; + } else if(std::string(argv[1]) == "-ginkgo_hip"){ + use_ginkgo = true; + use_ginkgo_hip = true; } else { n = std::atoi(argv[1]); if(n<=0) { @@ -178,7 +206,9 @@ int main(int argc, char **argv) bool use_cusolver = false; bool use_cusolver_lu = false; bool use_ginkgo = false; - if(!parse_arguments(argc, argv, n, selfCheck, inertia_free, use_cusolver, use_cusolver_lu, use_ginkgo)) { + bool use_ginkgo_cuda = false; + bool use_ginkgo_hip = false; + if(!parse_arguments(argc, argv, n, selfCheck, inertia_free, use_cusolver, use_cusolver_lu, use_ginkgo, use_ginkgo_cuda, use_ginkgo_hip)) { usage(argv[0]); #ifdef HIOP_USE_MPI MPI_Finalize(); @@ -212,6 +242,13 @@ int main(int argc, char **argv) if(use_ginkgo) { nlp.options->SetStringValue("linsol_mode", "speculative"); nlp.options->SetStringValue("linear_solver_sparse", "ginkgo"); + if (use_ginkgo_cuda) { + nlp.options->SetStringValue("ginkgo_exec", "cuda"); + } else if (use_ginkgo_hip) { + nlp.options->SetStringValue("ginkgo_exec", "hip"); + } else { + nlp.options->SetStringValue("ginkgo_exec", "reference"); + } } hiopAlgFilterIPMNewton solver(&nlp); hiopSolveStatus status = solver.run(); diff --git a/src/LinAlg/hiopLinSolverSparseGinkgo.cpp b/src/LinAlg/hiopLinSolverSparseGinkgo.cpp index 81a1014a9..d1f55cab2 100644 --- a/src/LinAlg/hiopLinSolverSparseGinkgo.cpp +++ b/src/LinAlg/hiopLinSolverSparseGinkgo.cpp @@ -56,7 +56,6 @@ #include "hiop_blasdefs.hpp" - namespace hiop { @@ -201,17 +200,42 @@ void update_matrix(hiopMatrixSparse* M_, } +std::shared_ptr create_exec(std::string executor_string) +{ + // The omp and dpcpp currently do not support LU factorization. + std::map()>> + exec_map{ + {"omp", [] { return gko::OmpExecutor::create(); }}, + {"cuda", + [] { + return gko::CudaExecutor::create(0, gko::ReferenceExecutor::create(), + true); + }}, + {"hip", + [] { + return gko::HipExecutor::create(0, gko::ReferenceExecutor::create(), + true); + }}, + {"dpcpp", + [] { + return gko::DpcppExecutor::create(0, + gko::ReferenceExecutor::create()); + }}, + {"reference", [] { return gko::ReferenceExecutor::create(); }}}; + + return exec_map.at(executor_string)(); +} + + std::shared_ptr setup_solver_factory(std::shared_ptr exec, std::shared_ptr> mtx) { - auto preprocessing_fact = gko::share(gko::reorder::Mc64::build().on(exec)); auto preprocessing = gko::share(preprocessing_fact->generate(mtx)); - - auto lu_fact = gko::share(gko::factorization::Glu::build_reusable() + auto lu_fact = gko::share(gko::experimental::factorization::Glu::build_reusable() .on(exec, mtx.get(), preprocessing.get())); - auto inner_solver_fact = gko::share(gko::preconditioner::Ilu<>::build() - .with_factorization_factory(lu_fact) + auto inner_solver_fact = gko::share(gko::experimental::solver::Direct::build() + .with_factorization(lu_fact) .on(exec)); auto solver_fact = gko::share(gko::solver::Gmres<>::build() .with_criteria( @@ -259,7 +283,7 @@ std::shared_ptr setup_solver_factory(std::shared_ptrn() && M_->n()==M_->m()); assert(n_>0); - exec_ = gko::ReferenceExecutor::create(); //gko::CudaExecutor::create(0, gko::OmpExecutor::create()); + exec_ = create_exec(nlp_->options->GetString("ginkgo_exec"));//gko::HipExecutor::create(0, gko::ReferenceExecutor::create()); mtx_ = transferTripletToCSR(exec_, n_, M_, &index_covert_CSR2Triplet_, &index_covert_extra_Diag2CSR_); nnz_ = mtx_->get_num_stored_elements(); @@ -284,10 +308,10 @@ std::shared_ptr setup_solver_factory(std::shared_ptr>(gko::as>(gko_solver_)->get_solver()); - auto precond = gko::as>(sol->get_preconditioner()); - auto status = precond->get_status(); + auto precond = gko::as>(sol->get_preconditioner()); + auto status = precond->get_factorization_status(); - return status; + return status == gko::experimental::factorization::status::success ? 0 : -1; } bool hiopLinSolverSymSparseGinkgo::solve ( hiopVector& x_ ) @@ -303,15 +327,17 @@ std::shared_ptr setup_solver_factory(std::shared_ptr(x->new_copy()); double* dx = x->local_data(); double* drhs = rhs->local_data(); - auto x_array = gko::Array::view(exec_, n_, dx); + auto x_array = gko::Array::view(exec_->get_master(), n_, dx); auto b_array = gko::Array::view(exec_, n_, drhs); - auto dense_x = gko::matrix::Dense::create(exec_, gko::dim<2>{n_, 1}, gko::Array::view(exec_, n_, dx), 1); + auto dense_x_host = gko::matrix::Dense::create(exec_->get_master(), gko::dim<2>{n_, 1}, gko::Array::view(exec_->get_master(), n_, dx), 1); + auto dense_x= gko::matrix::Dense::create(exec_, gko::dim<2>{n_, 1}); + dense_x->copy_from(dense_x_host.get()); auto dense_b = gko::matrix::Dense::create(exec_, gko::dim<2>{n_, 1}, b_array, 1); gko_solver_->apply(dense_b.get(), dense_x.get()); - nlp_->runStats.linsolv.tmTriuSolves.stop(); + dense_x_host->copy_from(dense_x.get()); delete rhs; rhs=nullptr; return 1; } @@ -343,7 +369,7 @@ std::shared_ptr setup_solver_factory(std::shared_ptrn() && M_->n()==M_->m()); assert(n_>0); - exec_= gko::ReferenceExecutor::create(); //gko::CudaExecutor::create(0, gko::OmpExecutor::create()); + exec_ = create_exec(nlp_->options->GetString("ginkgo_exec"));//gko::HipExecutor::create(0, gko::ReferenceExecutor::create()); mtx_ = transferTripletToCSR(exec_, n_, M_, &index_covert_CSR2Triplet_, &index_covert_extra_Diag2CSR_); nnz_ = mtx_->get_num_stored_elements(); @@ -368,10 +394,10 @@ std::shared_ptr setup_solver_factory(std::shared_ptr>(gko::as>(gko_solver_)->get_solver()); - auto precond = gko::as>(sol->get_preconditioner()); - auto status = precond->get_status(); + auto precond = gko::as>(sol->get_preconditioner()); + auto status = precond->get_factorization_status(); - return status; + return status == gko::experimental::factorization::status::success ? 0 : -1; } bool hiopLinSolverNonSymSparseGinkgo::solve(hiopVector& x_) diff --git a/src/Optimization/hiopKKTLinSysSparseNormalEqn.cpp b/src/Optimization/hiopKKTLinSysSparseNormalEqn.cpp index 1b8290108..f3009b088 100644 --- a/src/Optimization/hiopKKTLinSysSparseNormalEqn.cpp +++ b/src/Optimization/hiopKKTLinSysSparseNormalEqn.cpp @@ -279,6 +279,7 @@ bool hiopKKTLinSysSparseNormalEqn::build_kkt_matrix(const double& delta_wx_in, } { +#ifdef HIOP_USE_COINHSL hiopLinSolverSymSparseMA57* linSolver_ma57 = dynamic_cast(linSys_); if(linSolver_ma57) { auto* linSys = dynamic_cast (linSys_); @@ -300,6 +301,7 @@ bool hiopKKTLinSysSparseNormalEqn::build_kkt_matrix(const double& delta_wx_in, } assert(itnz == Msys->numberOfNonzeros()); } +#endif } t.stop(); diff --git a/src/Utils/hiopOptions.cpp b/src/Utils/hiopOptions.cpp index 39e903651..cbcc69ae9 100644 --- a/src/Utils/hiopOptions.cpp +++ b/src/Utils/hiopOptions.cpp @@ -858,6 +858,17 @@ void hiopOptionsNLP::register_options() "Selects among MA57, PARDISO, cuSOLVER, STRUMPACK, and GINKGO for the sparse linear solves."); } + // choose hardware backend for the Ginkgo solver to run on. + // - Default is 'reference' which uses sequential CPU implementations + // - 'cuda' uses NVIDIA, 'hip' uses AMD GPUs (if available) + { + vector range {"cuda", "hip", "reference"}; + + register_str_option("ginkgo_exec", + "reference", + range, + "Selects the hardware architecture to run the Ginkgo linear solver on."); + } // choose sparsity permutation (to reduce nz in the factors). This option is available only when using // Cholesky linear solvers @@ -1191,6 +1202,32 @@ void hiopOptionsNLP::ensure_consistence() } #endif // HIOP_USE_CUDA +#ifdef HIOP_USE_GINKGO + auto exec_string = GetString("ginkgo_exec"); +#ifndef HIOP_USE_CUDA + if(sol_sp == "ginkgo" && exec_string == "cuda") { + if(is_user_defined("linear_solver_sparse")) { + log_printf(hovWarning, + "The option 'ginkgo_exec=%s' is not valid without CUDA support enabled." + " Will use 'ginkgo_exec=reference'.\n", + GetString("ginkgo_exec").c_str()); + } + set_val("ginkgo_exec", "reference"); + } +#endif // HIOP_USE_CUDA +#ifndef HIOP_USE_HIP + if(sol_sp == "ginkgo" && exec_string == "hip") { + if(is_user_defined("linear_solver_sparse")) { + log_printf(hovWarning, + "The option 'ginkgo_exec=%s' is not valid without HIP support enabled." + " Will use 'ginkgo_exec=reference'.\n", + GetString("ginkgo_exec").c_str()); + } + set_val("ginkgo_exec", "reference"); + } +#endif // HIOP_USE_HIP +#endif // HIOP_USE_GINKGO + //linear_solver_sparse_ordering checks and warnings #ifndef HIOP_USE_CUDA