From 77efa0486166268658f614466a4d052cb444b7e7 Mon Sep 17 00:00:00 2001 From: tomoleary Date: Fri, 8 Oct 2021 18:02:03 -0500 Subject: [PATCH] Some minor refactoring of the general structure, and a new memory efficient active subspace algorithm (#11) * try / excepting krylov solution failure for nonlinear problems * checking in on tom_dev * updating drivers and so on to be compatible with hessianlearn * rearranging things to sort better by paper * updating markdown readmes * small readme edit * more efficient saving procedure * not sure what changed here * updating * updating data saving procedure * updating some work on improving active subspace memory footprint * working on memory efficient active subspace slowly * a lot more refactoring of the E[JTJ] operator and so on for memory issues * updating * updating and working towards continuous integration * typo * updating unit tests * missing python instruction * updating travis yml * updating yaml for travis * updating travis yaml again trying to fix build issues related to cloning hippylib * issue importing hippyflow in the test * more testing stuff * more things being resolved * more ci issues * updating readme and playing around more with unit tests * updating readme and unit tests * updating * updating * updating * struggling to get the unit tests to work properly * ci issues * updating again * updating again * updating again * updating please work thanks * one last try * continuing issues with travis build * library linking error is driving me crazy * library linking error is driving me crazy * library linking error is driving me crazy * library linking error is driving me crazy * exploring all these different ways travis doesnt make any sense * exploring all these different ways travis doesnt make any sense * exploring all these different ways travis doesnt make any sense * exploring all these different ways travis doesnt make any sense * exploring all these different ways travis doesnt make any sense * finally got ci working * adding citation --- .travis.yml | 23 ++ README.md | 114 ++++++- .../confusion/confusion_linear_observable.py | 2 +- .../confusion/confusion_problem_setup.py | 2 +- applications/confusion/confusion_utilities.py | 37 ++- .../dipnet_paper/confusion_multirun.py | 2 +- .../dipnet_paper/confusion_training.py | 2 +- .../confusion/dipnet_paper/neuralNetworks.py | 2 +- applications/confusion/generate_confusion.py | 2 +- applications/helmholtz_2d/HelmholtzProblem.py | 2 +- .../dipnet_paper/helmholtz_multirun.py | 19 +- .../dipnet_paper/helmholtz_training.py | 19 +- .../dipnet_paper/neuralNetworks.py | 19 +- .../helmholtz_2d/generate_helmholtz.py | 2 +- .../helmholtz_linear_observable.py | 2 +- .../helmholtz_2d/helmholtz_problem_setup.py | 2 +- .../helmholtz_2d/helmholtz_utilities.py | 36 ++- citation.cff | 13 + hippyflow/__init__.py | 2 +- hippyflow/collectives/__init__.py | 2 +- hippyflow/collectives/collective.py | 33 +- hippyflow/collectives/collectiveOperator.py | 58 +++- hippyflow/collectives/comm_utils.py | 13 +- hippyflow/modeling/PODProjector.py | 130 +++++--- hippyflow/modeling/activeSubspaceProjector.py | 287 +++++++++++++++--- hippyflow/modeling/multiPDEProblem.py | 16 +- hippyflow/test/__init__.py | 13 + hippyflow/test/test_derivativeSubspace.py | 120 ++++++++ hippyflow/utilities/__init__.py | 2 +- hippyflow/version.py | 4 +- 30 files changed, 785 insertions(+), 195 deletions(-) create mode 100644 .travis.yml create mode 100644 citation.cff create mode 100644 hippyflow/test/__init__.py create mode 100644 hippyflow/test/test_derivativeSubspace.py diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..4ae1570 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,23 @@ +language: python +python: + - "3.7" +install: + - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; + - bash miniconda.sh -b -p $HOME/miniconda + - source "$HOME/miniconda/etc/profile.d/conda.sh" + - hash -r + - conda config --set always_yes yes --set changeps1 no + - conda update -q conda + # Useful for debugging any issues with conda + # - conda info -a + - conda create -q -n fenics19 -c conda-forge python=$TRAVIS_PYTHON_VERSION fenics==2019.1.0 + - conda activate fenics19 + - git clone --depth 1 --branch matmvmult https://github.com/hippylib/hippylib.git + - cd hippylib + - python setup.py install + - cd ../ + - export HIPPYLIB_PATH=$(pwd)/hippylib/ + - export HIPPYFLOW_PATH=$(pwd) + +script: + - python hippyflow/test/test_derivativeSubspace.py \ No newline at end of file diff --git a/README.md b/README.md index 53ecd26..157bc81 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,7 @@ +[![Build Status](https://travis-ci.com/hippylib/hippyflow.svg?branch=master)](https://travis-ci.com/github/hippylib/hippyflow) [![DOI](https://zenodo.org/badge/301823282.svg)](https://zenodo.org/badge/latestdoi/301823282) [![License](https://img.shields.io/github/license/hippylib/hippyflow)](./LICENSE.md) [![Top language](https://img.shields.io/github/languages/top/hippylib/hippyflow)](https://www.python.org) @@ -44,13 +45,106 @@ * [`hessianlearn`](https://github.com/tomoleary/hessianlearn) is used for second order optimization of keras neural network models. -## Model Based Projectors: - -* `hIPPYflow` implements software infrastructure for input and output dimension reduction strategies for parametric mappings governed by PDEs. Given a parametric PDE Variational Problem implemented in `hIPPYlib` (using `FEniCS` for finite element representation), and a PDE observable, this code automates the construction of dominant subspaces of the input and output for these mappings. `hIPPYflow` implements both active subspace (AS) and Karhunen Loeve expansion (KLE) for input dimension reduction. `hIPPYflow` implements proper orthogonal decomposition (POD) for output dimension reduction. - -* These constructs also implement the generation of training data to be used in surrogate construction, as well as projection error tests that exemplify how good the different model projectors are at capturing key information, and help to detect the "intrinsic dimensionality" of the mappings from inputs to outputs. - -## Dimension Reduced Neural Network Strategies +# Model Based Projectors: + +`hIPPYflow` implements software infrastructure for input and output dimension reduction strategies for parametric mappings governed by PDEs. Given a parametric PDE Variational Problem implemented in `hIPPYlib` (using `FEniCS` for finite element representation), and a PDE observable, this code automates the construction of dominant subspaces of the input and output for these mappings. + +

+ +

+ + +`hIPPYflow` implements both active subspace (AS) and Karhunen Loeve expansion (KLE) for input dimension reduction. `hIPPYflow` implements proper orthogonal decomposition (POD) for output dimension reduction. + +AS computes the dominant eigenvalues of the following operator: +

+ +

+KLE computes the dominant eigenvectors of the covariance of the parameter distribution +

+ +

+POD computes the dominant eigenvalues of the following operator: +

+ +

+ +These constructs also implement the generation of training data to be used in surrogate construction, as well as projection error tests that exemplify how good the different model projectors are at capturing key information, and help to detect the "intrinsic dimensionality" of the mappings from inputs to outputs. + +## Example Usage (reduced basis construction) + +* Install [`hIPPYlib`](https://github.com/hippylib/hippylib), set `HIPPYLIB_PATH`, `HIPPYFLOW_PATH` environmental variables. + +```python +import dolfin as dl +import ufl +import numpy as np +import sys, os +sys.path.append(os.environ.get('HIPPYLIB_PATH')) +from hippylib import * + +sys.path.append(os.environ.get('HIPPYFLOW_PATH')) +from hippyflow import * + +# Set up PDE Variational Problem and observable using a function +def build_observable(mesh, **kwargs): + # Set up the PDE problem in hIPPYlib + rank = dl.MPI.rank(mesh.mpi_comm()) + Vh2 = dl.FunctionSpace(mesh, 'Lagrange', 2) + Vh1 = dl.FunctionSpace(mesh, 'Lagrange', 1) + Vh = [Vh2, Vh1, Vh2] + # Initialize Expressions + f = dl.Constant(0.0) + + u_bdr = dl.Expression("x[1]", degree=1) + u_bdr0 = dl.Constant(0.0) + bc = dl.DirichletBC(Vh[STATE], u_bdr, u_boundary) + bc0 = dl.DirichletBC(Vh[STATE], u_bdr0, u_boundary) + + def pde_varf(u,m,p): + return ufl.exp(m)*ufl.inner(ufl.grad(u), ufl.grad(p))*ufl.dx - f*p*ufl.dx + + pde = PDEVariationalProblem(Vh, pde_varf, bc, bc0, is_fwd_linear=True) + + # Instance observable operator (in this case pointwise observation of state) + x_targets = np.linspace(0.1,0.9,10) + y_targets = np.linspace(0.1,0.9,10) + targets = [] + for xi in x_targets: + for yi in y_targets: + targets.append((xi,yi)) + targets = np.array(targets) + + B = assemblePointwiseObservation(Vh[STATE], targets) + return LinearStateObservable(pde,B) + +# Set up mesh +ndim = 2 +nx = 10 +ny = 10 +mesh = dl.UnitSquareMesh(nx, ny) +# Instance observable +observable_kwargs = {} # No kwargs given in this example +observable = build_observable(mesh,**observable_kwargs) + +# Instance probability distribution for the parameter +prior = BiLaplacian2D(Vh[PARAMETER],gamma = 0.1, delta = 1.0) + +# Instance Active Subspace Operator +AS = ActiveSubspaceProjector(observable,prior) +# Compute and save input reduced basis to file: +AS.construct_input_subspace() + +# Instance POD Operator to compute POD basis and training data +POD = PODProjector(observable,prior) +POD.construct_subspace() +output_directory = 'location/for/training/data/' +POD.generate_training_data(output_directory) + +``` + + +# Dimension Reduced Neural Network Strategies * Given information about dominant subspaces of the input and output spaces for the parametric mappings, `hIPPYflow` implements dimension reduced neural network surrogates. These surrogates allow for parsimonious representations of input-output mappings that can achieve good accuracy for very few training data. Few data is a key feature of many high dimensional PDE based inference problems. @@ -63,13 +157,13 @@ These publications use the hippyflow library - \[1\] O'Leary-Roseberry, T., Villa, U., Chen P., Ghattas O., [**Derivative-Informed Projected Neural Networks for High-Dimensional Parametric Maps Governed by PDEs**](https://arxiv.org/abs/2011.15110). -arXiv:2011.15110. +Computer Methods in Applied Mechanics and Engineering (accepted). ([Download](https://arxiv.org/pdf/2011.15110.pdf))
BibTeX
 @article{o2020derivative,
   title={Derivative-Informed Projected Neural Networks for High-Dimensional Parametric Maps Governed by PDEs},
   author={O'Leary-Roseberry, Thomas and Villa, Umberto and Chen, Peng and Ghattas, Omar},
-  journal={arXiv preprint arXiv:2011.15110},
-  year={2020}
+  journal={Computer Methods in Applied Mechanics and Engineering},
+  year={2021}
 }
 }
diff --git a/applications/confusion/confusion_linear_observable.py b/applications/confusion/confusion_linear_observable.py index f8dfc36..6aa6a5e 100644 --- a/applications/confusion/confusion_linear_observable.py +++ b/applications/confusion/confusion_linear_observable.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, The University of Texas at Austin +# Copyright (c) 2020-2021, The University of Texas at Austin # & Washington University in St. Louis. # # All Rights reserved. diff --git a/applications/confusion/confusion_problem_setup.py b/applications/confusion/confusion_problem_setup.py index 63f83f5..c4424bd 100644 --- a/applications/confusion/confusion_problem_setup.py +++ b/applications/confusion/confusion_problem_setup.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, The University of Texas at Austin +# Copyright (c) 2020-2021, The University of Texas at Austin # & Washington University in St. Louis. # # All Rights reserved. diff --git a/applications/confusion/confusion_utilities.py b/applications/confusion/confusion_utilities.py index 8e2157a..636e036 100644 --- a/applications/confusion/confusion_utilities.py +++ b/applications/confusion/confusion_utilities.py @@ -1,4 +1,5 @@ -# Copyright (c) 2020, The University of Texas at Austin + +# Copyright (c) 2020-2021, The University of Texas at Austin # & Washington University in St. Louis. # # All Rights reserved. @@ -21,27 +22,41 @@ def load_confusion_data(data_dir,rescale = False,derivatives = False,n_data = np m_files = [] q_files = [] + mq_files = [] for file in data_files: if 'ms_on_rank_' in file: m_files.append(file) if 'qs_on_rank_' in file: q_files.append(file) + if 'mq_on_rank_' in file: + mq_files.append(file) - ranks = [int(file.split(data_dir+'ms_on_rank_')[-1].split('.npy')[0]) for file in m_files] + if len(mq_files) == 0: + ranks = [int(file.split(data_dir+'ms_on_rank_')[-1].split('.npy')[0]) for file in m_files] + else: + ranks = [int(file.split(data_dir+'mq_on_rank_')[-1].split('.npz')[0]) for file in m_files] max_rank = max(ranks) # Serially concatenate data - m_data = np.load(data_dir+'ms_on_rank_0.npy') - q_data = np.load(data_dir+'qs_on_rank_0.npy') - for i in range(1,max_rank+1): - appendage_m = np.load(data_dir+'ms_on_rank_'+str(i)+'.npy') + if len(mq_files) == 0: + m_data = np.load(data_dir+'ms_on_rank_0.npy') + q_data = np.load(data_dir+'qs_on_rank_0.npy') + for i in range(1,max_rank+1): + appendage_m = np.load(data_dir+'ms_on_rank_'+str(i)+'.npy') + m_data = np.concatenate((m_data,appendage_m)) + appendage_q = np.load(data_dir+'qs_on_rank_'+str(i)+'.npy') + q_data = np.concatenate((q_data,appendage_q)) + else: + npz_data = np.load(data_dir+'mq_on_rank_0.npz') + m_data = npz_data['m_data'] + q_data = npz_data['q_data'] + for i in range(1,max_rank+1): + npz_data = np.load(data_dir+'mq_on_rank_'+str(i)+'.npz') + appendage_m = npz_data['m_data'] + appendage_q = npz_data['q_data'] m_data = np.concatenate((m_data,appendage_m)) - appendage_q = np.load(data_dir+'qs_on_rank_'+str(i)+'.npy') q_data = np.concatenate((q_data,appendage_q)) - - # print('m_data.shape = ',m_data.shape) - # print('q_data.shape = ',q_data.shape) - + if n_data < np.inf: assert type(n_data) is int m_data = m_data[:n_data] diff --git a/applications/confusion/dipnet_paper/confusion_multirun.py b/applications/confusion/dipnet_paper/confusion_multirun.py index e59f708..1927bc0 100644 --- a/applications/confusion/dipnet_paper/confusion_multirun.py +++ b/applications/confusion/dipnet_paper/confusion_multirun.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, The University of Texas at Austin +# Copyright (c) 2020-2021, The University of Texas at Austin # & Washington University in St. Louis. # # All Rights reserved. diff --git a/applications/confusion/dipnet_paper/confusion_training.py b/applications/confusion/dipnet_paper/confusion_training.py index 20f8782..69edc54 100644 --- a/applications/confusion/dipnet_paper/confusion_training.py +++ b/applications/confusion/dipnet_paper/confusion_training.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, The University of Texas at Austin +# Copyright (c) 2020-2021, The University of Texas at Austin # & Washington University in St. Louis. # # All Rights reserved. diff --git a/applications/confusion/dipnet_paper/neuralNetworks.py b/applications/confusion/dipnet_paper/neuralNetworks.py index e57edf3..00e706e 100644 --- a/applications/confusion/dipnet_paper/neuralNetworks.py +++ b/applications/confusion/dipnet_paper/neuralNetworks.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, The University of Texas at Austin +# Copyright (c) 2020-2021, The University of Texas at Austin # & Washington University in St. Louis. # # All Rights reserved. diff --git a/applications/confusion/generate_confusion.py b/applications/confusion/generate_confusion.py index c1101ec..1b46b2e 100644 --- a/applications/confusion/generate_confusion.py +++ b/applications/confusion/generate_confusion.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, The University of Texas at Austin +# Copyright (c) 2020-2021, The University of Texas at Austin # & Washington University in St. Louis. # # All Rights reserved. diff --git a/applications/helmholtz_2d/HelmholtzProblem.py b/applications/helmholtz_2d/HelmholtzProblem.py index 9d05697..22c4a15 100644 --- a/applications/helmholtz_2d/HelmholtzProblem.py +++ b/applications/helmholtz_2d/HelmholtzProblem.py @@ -1,7 +1,7 @@ # Copyright (c) 2016-2018, The University of Texas at Austin # & University of California--Merced. -# Copyright (c) 2019-2020, The University of Texas at Austin +# Copyright (c) 2019-2021, The University of Texas at Austin # University of California--Merced, Washington University in St. Louis. # # All Rights reserved. diff --git a/applications/helmholtz_2d/dipnet_paper/helmholtz_multirun.py b/applications/helmholtz_2d/dipnet_paper/helmholtz_multirun.py index e13876b..a958244 100644 --- a/applications/helmholtz_2d/dipnet_paper/helmholtz_multirun.py +++ b/applications/helmholtz_2d/dipnet_paper/helmholtz_multirun.py @@ -1,16 +1,15 @@ -# This file is part of the hIPPYflow package +# Copyright (c) 2020-2021, The University of Texas at Austin +# & Washington University in St. Louis. # -# hIPPYflow is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or any later version. +# All Rights reserved. +# See file COPYRIGHT for details. # -# hIPPYflow is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. +# This file is part of the hIPPYflow package. For more information see +# https://github.com/hippylib/hippyflow/ # -# You should have received a copy of the GNU General Public License -# If not, see . +# hIPPYflow is free software; you can redistribute it and/or modify it under the +# terms of the GNU General Public License (as published by the Free +# Software Foundation) version 2.0 dated June 1991. # # Author: Tom O'Leary-Roseberry # Contact: tom.olearyroseberry@utexas.edu diff --git a/applications/helmholtz_2d/dipnet_paper/helmholtz_training.py b/applications/helmholtz_2d/dipnet_paper/helmholtz_training.py index a4f242c..925cd4b 100644 --- a/applications/helmholtz_2d/dipnet_paper/helmholtz_training.py +++ b/applications/helmholtz_2d/dipnet_paper/helmholtz_training.py @@ -1,16 +1,15 @@ -# This file is part of the hIPPYflow package +# Copyright (c) 2020-2021, The University of Texas at Austin +# & Washington University in St. Louis. # -# hIPPYflow is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or any later version. +# All Rights reserved. +# See file COPYRIGHT for details. # -# hIPPYflow is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. +# This file is part of the hIPPYflow package. For more information see +# https://github.com/hippylib/hippyflow/ # -# You should have received a copy of the GNU General Public License -# If not, see . +# hIPPYflow is free software; you can redistribute it and/or modify it under the +# terms of the GNU General Public License (as published by the Free +# Software Foundation) version 2.0 dated June 1991. # # Author: Tom O'Leary-Roseberry # Contact: tom.olearyroseberry@utexas.edu diff --git a/applications/helmholtz_2d/dipnet_paper/neuralNetworks.py b/applications/helmholtz_2d/dipnet_paper/neuralNetworks.py index f3cb020..73fe693 100644 --- a/applications/helmholtz_2d/dipnet_paper/neuralNetworks.py +++ b/applications/helmholtz_2d/dipnet_paper/neuralNetworks.py @@ -1,16 +1,15 @@ -# This file is part of the hIPPYflow package +# Copyright (c) 2020-2021, The University of Texas at Austin +# & Washington University in St. Louis. # -# hIPPYflow is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or any later version. +# All Rights reserved. +# See file COPYRIGHT for details. # -# hIPPYflow is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. +# This file is part of the hIPPYflow package. For more information see +# https://github.com/hippylib/hippyflow/ # -# You should have received a copy of the GNU General Public License -# If not, see . +# hIPPYflow is free software; you can redistribute it and/or modify it under the +# terms of the GNU General Public License (as published by the Free +# Software Foundation) version 2.0 dated June 1991. # # Author: Tom O'Leary-Roseberry # Contact: tom.olearyroseberry@utexas.edu diff --git a/applications/helmholtz_2d/generate_helmholtz.py b/applications/helmholtz_2d/generate_helmholtz.py index a98c634..7e14999 100644 --- a/applications/helmholtz_2d/generate_helmholtz.py +++ b/applications/helmholtz_2d/generate_helmholtz.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, The University of Texas at Austin +# Copyright (c) 2020-2021, The University of Texas at Austin # & Washington University in St. Louis. # # All Rights reserved. diff --git a/applications/helmholtz_2d/helmholtz_linear_observable.py b/applications/helmholtz_2d/helmholtz_linear_observable.py index ced34aa..f8ed1cc 100644 --- a/applications/helmholtz_2d/helmholtz_linear_observable.py +++ b/applications/helmholtz_2d/helmholtz_linear_observable.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, The University of Texas at Austin +# Copyright (c) 2020-2021, The University of Texas at Austin # & Washington University in St. Louis. # # All Rights reserved. diff --git a/applications/helmholtz_2d/helmholtz_problem_setup.py b/applications/helmholtz_2d/helmholtz_problem_setup.py index f73dfd6..995f822 100644 --- a/applications/helmholtz_2d/helmholtz_problem_setup.py +++ b/applications/helmholtz_2d/helmholtz_problem_setup.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, The University of Texas at Austin +# Copyright (c) 2020-2021, The University of Texas at Austin # & Washington University in St. Louis. # # All Rights reserved. diff --git a/applications/helmholtz_2d/helmholtz_utilities.py b/applications/helmholtz_2d/helmholtz_utilities.py index fccbda4..f441a3b 100644 --- a/applications/helmholtz_2d/helmholtz_utilities.py +++ b/applications/helmholtz_2d/helmholtz_utilities.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, The University of Texas at Austin +# Copyright (c) 2020-2021, The University of Texas at Austin # & Washington University in St. Louis. # # All Rights reserved. @@ -21,28 +21,42 @@ def load_helmholtz_data(data_dir,rescale = False,derivatives = False,n_data = np m_files = [] q_files = [] + + mq_files = [] for file in data_files: if 'ms_on_rank_' in file: m_files.append(file) if 'qs_on_rank_' in file: q_files.append(file) + if 'mq_on_rank_' in file: + mq_files.append(file) - ranks = [int(file.split(data_dir+'ms_on_rank_')[-1].split('.npy')[0]) for file in m_files] - + if len(mq_files) == 0: + ranks = [int(file.split(data_dir+'ms_on_rank_')[-1].split('.npy')[0]) for file in m_files] + else: + ranks = [int(file.split(data_dir+'mq_on_rank_')[-1].split('.npz')[0]) for file in m_files] max_rank = max(ranks) # Serially concatenate data - m_data = np.load(data_dir+'ms_on_rank_0.npy') - q_data = np.load(data_dir+'qs_on_rank_0.npy') - for i in range(1,max_rank+1): - appendage_m = np.load(data_dir+'ms_on_rank_'+str(i)+'.npy') + if len(mq_files) == 0: + m_data = np.load(data_dir+'ms_on_rank_0.npy') + q_data = np.load(data_dir+'qs_on_rank_0.npy') + for i in range(1,max_rank+1): + appendage_m = np.load(data_dir+'ms_on_rank_'+str(i)+'.npy') + m_data = np.concatenate((m_data,appendage_m)) + appendage_q = np.load(data_dir+'qs_on_rank_'+str(i)+'.npy') + q_data = np.concatenate((q_data,appendage_q)) + else: + npz_data = np.load(data_dir+'mq_on_rank_0.npz') + m_data = npz_data['m_data'] + q_data = npz_data['q_data'] + for i in range(1,max_rank+1): + npz_data = np.load(data_dir+'mq_on_rank_'+str(i)+'.npz') + appendage_m = npz_data['m_data'] + appendage_q = npz_data['q_data'] m_data = np.concatenate((m_data,appendage_m)) - appendage_q = np.load(data_dir+'qs_on_rank_'+str(i)+'.npy') q_data = np.concatenate((q_data,appendage_q)) - # print('m_data.shape = ',m_data.shape) - # print('q_data.shape = ',q_data.shape) - if n_data < np.inf: assert type(n_data) is int m_data = m_data[:n_data] diff --git a/citation.cff b/citation.cff new file mode 100644 index 0000000..fcace7e --- /dev/null +++ b/citation.cff @@ -0,0 +1,13 @@ +cff-version: 1.2.0 +message: "If you use this software, please cite it as below." +authors: +- family-names: "O'Leary-Roseberry" + given-names: "Thomas" + orcid: "https://orcid.org/0000-0002-8938-7074" + - family-names: "Villa" + given-names: "Umberto" +title: "hippyflow: Dimension reduced surrogate construction for parametric PDE maps in Python" +version: 0.1.0 +doi: 10.5281/zenodo.4608729 +date-released: 2021-03-16 +url: "https://github.com/hippylib/hippyflow" \ No newline at end of file diff --git a/hippyflow/__init__.py b/hippyflow/__init__.py index 9fe1b91..a9a3f1d 100644 --- a/hippyflow/__init__.py +++ b/hippyflow/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, The University of Texas at Austin +# Copyright (c) 2020-2021, The University of Texas at Austin # & Washington University in St. Louis. # # All Rights reserved. diff --git a/hippyflow/collectives/__init__.py b/hippyflow/collectives/__init__.py index b26db2b..5ca9605 100644 --- a/hippyflow/collectives/__init__.py +++ b/hippyflow/collectives/__init__.py @@ -11,7 +11,7 @@ # terms of the GNU General Public License (as published by the Free # Software Foundation) version 2.0 dated June 1991. -from .collectiveOperator import CollectiveOperator +from .collectiveOperator import CollectiveOperator, MatrixMultCollectiveOperator from .collective import NullCollective, MultipleSerialPDEsCollective, MultipleSamePartitioningPDEsCollective diff --git a/hippyflow/collectives/collective.py b/hippyflow/collectives/collective.py index 12f3bc2..6e85023 100644 --- a/hippyflow/collectives/collective.py +++ b/hippyflow/collectives/collective.py @@ -1,18 +1,17 @@ -# Copyright (c) 2016-2018, The University of Texas at Austin -# & University of California, Merced. -# Copyright (c) 2019-2021, The University of Texas at Austin -# University of California--Merced, Washington University in St. Louis. +# Copyright (c) 2020-2021, The University of Texas at Austin +# & Washington University in St. Louis. # # All Rights reserved. # See file COPYRIGHT for details. # -# This file is part of the hIPPYlib library. For more information and source code -# availability see https://hippylib.github.io. +# This file is part of the hIPPYflow package. For more information see +# https://github.com/hippylib/hippyflow/ # -# hIPPYlib is free software; you can redistribute it and/or modify it under the +# hIPPYflow is free software; you can redistribute it and/or modify it under the # terms of the GNU General Public License (as published by the Free # Software Foundation) version 2.0 dated June 1991. + import dolfin as dl import numpy as np from mpi4py import MPI @@ -20,29 +19,27 @@ class NullCollective: """ No-overhead "Parallel" reduction utilities when a serial system of PDEs is solved on 1 process. - """ - def __init__(self): - pass - + """ + def bcast(self, v, root=0): + return v + def size(self): return 1 - + def rank(self): return 0 - + def allReduce(self, v, op): - + if op.lower() not in ["sum", "avg"]: err_msg = "Unknown operation *{0}* in NullCollective.allReduce".format(op) raise NotImplementedError(err_msg) - - return v - - def bcast(self, v, root=0): return v + + class MultipleSamePartitioningPDEsCollective: """ Parallel reduction utilities when several serial systems of PDEs (one per process) are solved concurrently. diff --git a/hippyflow/collectives/collectiveOperator.py b/hippyflow/collectives/collectiveOperator.py index 98c5d6a..57a1dc2 100644 --- a/hippyflow/collectives/collectiveOperator.py +++ b/hippyflow/collectives/collectiveOperator.py @@ -13,15 +13,15 @@ class CollectiveOperator: """ - This class implements an MPI parallel version of linear operators - """ + This class implements an MPI parallel version of linear operators + """ def __init__(self, local_op, collective, mpi_op = 'sum'): """ - Constructor - - :code:`local_op` - - - :code:`collective` - - - :code:`mpi_op` - - """ + Constructor + - :code:`local_op` - + - :code:`collective` - + - :code:`mpi_op` - + """ assert hasattr(local_op,'mult') self.local_op = local_op @@ -54,3 +54,47 @@ def init_vector(self,x,dim): """ self.local_op.init_vector(x,dim) + +class MatrixMultCollectiveOperator: + """ + """ + def __init__(self, local_op, collective, mpi_op = 'sum'): + """ + Constructor + - :code:`local_op` - + - :code:`collective` - + - :code:`mpi_op` - + """ + assert hasattr(local_op,'matMvMult') + self.local_op = local_op + self.collective = collective + self.mpi_op = mpi_op + + def matMvMult(self, x,y): + """ + Implements matrix multiplication function for the collective operator + - :code:`x` - MultiVector to be multiplied + - :code:`y` - storage for multiplication results + """ + self.local_op.matMvMult(x,y) + self.collective.allReduce(y, self.mpi_op) + + def matMvTranspmult(self,x,y): + """ + Implements matrix transpose multiplication function for the collective operator + - :code:`x` - MultiVector to be matrix transpose multiplied + - :code:`y` - storage for matrix transpose multiplication results + """ + assert hasattr(self.local_op, 'MatMvTranspmult') + self.local_op.MatMvTranspmult(x,y) + self.collective.allReduce(y,self.mpi_op) + + def init_vector(self,x,dim): + """ + Implements vector constructor for operator + - :code:`x` - vector to be initialized + """ + self.local_op.init_vector(x,dim) + + + diff --git a/hippyflow/collectives/comm_utils.py b/hippyflow/collectives/comm_utils.py index 3ddf1c6..e59ed91 100644 --- a/hippyflow/collectives/comm_utils.py +++ b/hippyflow/collectives/comm_utils.py @@ -1,18 +1,17 @@ -# Copyright (c) 2016-2018, The University of Texas at Austin -# & University of California, Merced. -# Copyright (c) 2019-2021, The University of Texas at Austin -# University of California--Merced, Washington University in St. Louis. +# Copyright (c) 2020-2021, The University of Texas at Austin +# & Washington University in St. Louis. # # All Rights reserved. # See file COPYRIGHT for details. # -# This file is part of the hIPPYlib library. For more information and source code -# availability see https://hippylib.github.io. +# This file is part of the hIPPYflow package. For more information see +# https://github.com/hippylib/hippyflow/ # -# hIPPYlib is free software; you can redistribute it and/or modify it under the +# hIPPYflow is free software; you can redistribute it and/or modify it under the # terms of the GNU General Public License (as published by the Free # Software Foundation) version 2.0 dated June 1991. + import dolfin as dl import numpy as np from mpi4py import MPI diff --git a/hippyflow/modeling/PODProjector.py b/hippyflow/modeling/PODProjector.py index 3bc8e6c..d9b4c96 100644 --- a/hippyflow/modeling/PODProjector.py +++ b/hippyflow/modeling/PODProjector.py @@ -95,7 +95,8 @@ def solve_at_mean(self): self.observable.problem.solveFwd(self.u_at_mean,[self.u_at_mean,m_mean,None]) - def generate_training_data(self,output_directory = 'data/',check_for_data = True): + def generate_training_data(self,output_directory = 'data/',check_for_data = True,sequential = True,\ + compress_files = True): """ This method generates training data - :code:`output_directory` - a string specifying the path to the directory where data @@ -111,52 +112,101 @@ def generate_training_data(self,output_directory = 'data/',check_for_data = True pass observable_vector = dl.Vector(self.mesh_constructor_comm) self.observable.init_vector(observable_vector,dim = 0) - last_datum_generated = -1 + last_datum_generated = 0 m_shape = self.m.get_local().shape[0] q_shape = observable_vector.get_local().shape[0] print('m_shape = ',m_shape) print('q_shape = ',q_shape) - local_ms = np.zeros((0,m_shape)) - local_qs = np.zeros((0,q_shape)) - if check_for_data: - if os.path.isfile(output_directory+'ms_on_rank_'+str(my_rank)+'.npy') and \ - os.path.isfile(output_directory+'qs_on_rank_'+str(my_rank)+'.npy'): - local_ms = np.load(output_directory+'ms_on_rank_'+str(my_rank)+'.npy') - local_qs = np.load(output_directory+'qs_on_rank_'+str(my_rank)+'.npy') - last_datum_generated = min(local_ms.shape[0],local_qs.shape[0]) + if sequential: + rank_specific_directory = output_directory+'data_on_rank_'+str(my_rank)+'/' + os.makedirs(rank_specific_directory,exist_ok = True) + if check_for_data: + if os.path.isfile(rank_specific_directory+'ms_sample_'+str(my_rank)+'.npy') and \ + os.path.isfile(rank_specific_directory+'qs_sample_'+str(my_rank)+'.npy'): + ms_generated = os.listdir(rank_specific_directory) + qs_generated = os.listdir(rank_specific_directory) + m_indices = [int(m_.split('m_sample_')[-1].split('.npy')[0]) for m_ in ms_generated] + last_m = max(m_indices) + q_indices = [int(m_.split('q_sample_')[-1].split('.npy')[0]) for q_ in qs_generated] + last_q = max(q_indices) + last_datum_generated = min(last_m,last_q) - t0 = time.time() - # I think this is all hard coded for a single serial mesh, check if - # the arrays need to be communicated to mesh rank 0 before being saved - for i in range(last_datum_generated,self.parameters['data_per_process']): - print('Generating data number '+str(i)) - parRandom.normal(1,self.noise) - self.prior.sample(self.noise,self.m) - - self.u.zero() - self.u.axpy(1.,self.u_at_mean) - x = [self.u,self.m,None] - self.observable.setLinearizationPoint(x) - solution = self.observable.eval(self.m).get_local() - # If there is an issue with the solve move on - # local_qs = np.concatenate((local_qs,np.expand_dims(solution,0))) - # local_ms = np.concatenate((local_ms,np.expand_dims(self.m.get_local(),0))) - # np.save(output_directory+'ms_on_rank_'+str(my_rank)+'.npy',np.array(local_ms)) - # np.save(output_directory+'qs_on_rank_'+str(my_rank)+'.npy',np.array(local_qs)) - try: + t0 = time.time() + for i in range(last_datum_generated,self.parameters['data_per_process']): + print('Generating data number '+str(i)) + parRandom.normal(1,self.noise) + self.prior.sample(self.noise,self.m) + + self.u.zero() + self.u.axpy(1.,self.u_at_mean) + x = [self.u,self.m,None] + self.observable.setLinearizationPoint(x) + solution = self.observable.eval(self.m).get_local() + try: + solution = self.observable.eval(self.m).get_local() + this_m = self.m.get_local() + this_q = solution + np.save(rank_specific_directory+'m_sample_'+str(i)+'.npy',this_m) + np.save(rank_specific_directory+'q_sample_'+str(i)+'.npy',this_q) + # If there is an issue with the solve move on + except: + print('Issue with the nonlinear solve, moving on') + pass + + if self.parameters['verbose']: + print('On datum generated every ',(time.time() -t0)/(i - last_datum_generated+1),' s, on average.') + self._data_generation_time = time.time() - t0 + if compress_files: + local_ms = np.zeros(self.parameters['data_per_process'],m_shape) + local_qs = np.zeros(self.parameters['data_per_process'],q_shape) + for i in range(0,self.parameters['data_per_process']): + local_ms[i] = np.load(rank_specific_directory+'m_sample_'+str(i)+'.npy') + local_qs[i] = np.load(rank_specific_directory+'q_sample_'+str(i)+'.npy') + np.savez_compressed('mq_on_rank'+str(my_rank)+'.npz',m_data = local_ms,q_data = local_qs) + + else: + + local_ms = np.zeros((0,m_shape)) + local_qs = np.zeros((0,q_shape)) + if check_for_data: + if os.path.isfile(output_directory+'ms_on_rank_'+str(my_rank)+'.npy') and \ + os.path.isfile(output_directory+'qs_on_rank_'+str(my_rank)+'.npy'): + local_ms = np.load(output_directory+'ms_on_rank_'+str(my_rank)+'.npy') + local_qs = np.load(output_directory+'qs_on_rank_'+str(my_rank)+'.npy') + last_datum_generated = min(local_ms.shape[0],local_qs.shape[0]) + + t0 = time.time() + # I think this is all hard coded for a single serial mesh, check if + # the arrays need to be communicated to mesh rank 0 before being saved + for i in range(last_datum_generated,self.parameters['data_per_process']): + print('Generating data number '+str(i)) + parRandom.normal(1,self.noise) + self.prior.sample(self.noise,self.m) + + self.u.zero() + self.u.axpy(1.,self.u_at_mean) + x = [self.u,self.m,None] + self.observable.setLinearizationPoint(x) solution = self.observable.eval(self.m).get_local() # If there is an issue with the solve move on - local_qs = np.concatenate((local_qs,np.expand_dims(solution,0))) - local_ms = np.concatenate((local_ms,np.expand_dims(self.m.get_local(),0))) - np.save(output_directory+'ms_on_rank_'+str(my_rank)+'.npy',np.array(local_ms)) - np.save(output_directory+'qs_on_rank_'+str(my_rank)+'.npy',np.array(local_qs)) - except: - print('Issue with the nonlinear solve, moving on') - pass - - if self.parameters['verbose']: - print('On datum generated every ',(time.time() -t0)/(i - last_datum_generated+1),' s, on average.') - self._data_generation_time = time.time() - t0 + # local_qs = np.concatenate((local_qs,np.expand_dims(solution,0))) + # local_ms = np.concatenate((local_ms,np.expand_dims(self.m.get_local(),0))) + # np.save(output_directory+'ms_on_rank_'+str(my_rank)+'.npy',np.array(local_ms)) + # np.save(output_directory+'qs_on_rank_'+str(my_rank)+'.npy',np.array(local_qs)) + try: + solution = self.observable.eval(self.m).get_local() + # If there is an issue with the solve move on + local_qs = np.concatenate((local_qs,np.expand_dims(solution,0))) + local_ms = np.concatenate((local_ms,np.expand_dims(self.m.get_local(),0))) + np.save(output_directory+'ms_on_rank_'+str(my_rank)+'.npy',np.array(local_ms)) + np.save(output_directory+'qs_on_rank_'+str(my_rank)+'.npy',np.array(local_qs)) + except: + print('Issue with the nonlinear solve, moving on') + pass + + if self.parameters['verbose']: + print('On datum generated every ',(time.time() -t0)/(i - last_datum_generated+1),' s, on average.') + self._data_generation_time = time.time() - t0 def construct_subspace(self): diff --git a/hippyflow/modeling/activeSubspaceProjector.py b/hippyflow/modeling/activeSubspaceProjector.py index 78af9e1..2186368 100644 --- a/hippyflow/modeling/activeSubspaceProjector.py +++ b/hippyflow/modeling/activeSubspaceProjector.py @@ -20,7 +20,8 @@ import time -from ..collectives.collectiveOperator import CollectiveOperator +from ..collectives.collectiveOperator import CollectiveOperator, MatrixMultCollectiveOperator +from ..collectives.collective import NullCollective from ..collectives.comm_utils import checkMeshConsistentPartitioning from .jacobian import * from ..utilities.mv_utilities import mv_to_dense @@ -40,12 +41,20 @@ def ActiveSubspaceParameterList(): parameters['double_loop_samples'] = [20, 'Number of samples used in double loop MC approximation'] parameters['verbose'] = [True, 'Boolean for printing'] + + parameters['initialize_samples'] = [False,'Boolean for the initialization of samples when\ + many samples are allocated on one process '] + parameters['serialized_sampling'] = [False, 'Boolean for the serialization of sampling on a process\ + to reduce memory for large problems'] + parameters['observable_constructor'] = [None,'observable constructor function, assumed to take a mesh, and kwargs'] parameters['observable_kwargs'] = [{},'kwargs used when instantiating multiple local instances of observables'] parameters['output_directory'] = [None,'output directory for saving arrays and plots'] parameters['plot_label_suffix'] = ['', 'suffix for plot label'] - + parameters['save_and_plot'] = [True,'Boolean for saving data and plots (only False for unit testing)'] + parameters['store_Omega'] = [False,'Boolean for storing Gaussian random matrix (only True for unit testing)'] + parameters['ms_given'] = [False,'Boolean for passing ms into serialized AS construction (only True for unit testing)'] return ParameterList(parameters) @@ -76,6 +85,117 @@ def mult(self,x,y): else: y.axpy(1.,temp) +class SeriallySampledJacobianOperator: + ''' + Alterantive to SummedListOperator when memory is an issue for active subspace + ''' + def __init__(self,observable,noise,prior,operation = 'JTJ',nsamples = None,ms = None,communicator=None,average=True): + ''' + ''' + assert operation in ['JTJ','JJT'] + assert (nsamples is not None) or (ms is not None) + self.observable = observable + self.noise = noise + self.prior = prior + self.operation = operation + self.nsamples = nsamples + self.average = average + self.ms = ms + + if communicator is None: + self.temp = None + else: + self.temp = dl.Vector(communicator) + + self.u = self.observable.generate_vector(STATE) + self.m = self.observable.generate_vector(PARAMETER) + + def init_vector(self,x): + """ + Reshape the Vector :code:`x` so that it is compatible with the Jacobian + operator. + + Parameters: + + - :code:`x`: the vector to reshape. + """ + if self.operation == 'JJT': + self.observable.init_vector(x,0) + elif self.operation == 'JTJ': + self.observable.init_vector(x,1) + else: + raise + + def matMvMult(self,x,y): + ''' + ''' + if self.temp is None: + temp = dl.Vector(y[0]) + else: + temp = self.temp + assert x.nvec() == y.nvec(), "x and y have non-matching number of vectors" + # Instance Jacobian operator for this input-output pair + if self.operation == 'JTJ': + operator_i = JTJ(ObservableJacobian(self.observable)) + elif self.operation == 'JJT': + operator_i = JJT(ObservableJacobian(self.observable)) + + if self.ms is None: + for i in range(self.nsamples): + # Iterate if there are solver issues + solved = False + while not solved: + try: + # Sample from the prior + self.m.zero() + self.noise.zero() + parRandom.normal(1,self.noise) + self.prior.sample(self.noise,self.m) + # Solve the PDE + linearization_x = [self.u,self.m,None] + print('Attempting to solve') + self.observable.solveFwd(u,linearization_x) + print('Solution succesful') + # set linearization point + self.observable.setLinearizationPoint(linearization_x) + solved = True + except: + print('Issue with the solution, moving on') + pass + # Define action on matrix (as represented by MultiVector) + for j in range(x.nvec()): + temp.zero() + operator_i.mult(x[j],temp) + if self.average: + y[j].axpy(1./self.nsamples, temp) + else: + y[j].axpy(1., temp) + ################################################################################ + # Otherwise m represents points already chosen, + # where we assume the solution will hold, and thus we + # do not handle the sampling of m here + # This section is mostly for the sake of unit testing + else: + # Each m should already not run into solver issues + nsamples = len(self.ms) + for m in self.ms: + # Solve the PDE + print('Attempting to solve') + linearization_x = [self.u,m,None] + self.observable.solveFwd(self.u,linearization_x) + print('Solution succesful') + # set linearization point + self.observable.setLinearizationPoint(linearization_x) + solved = True + # Define action on matrix (as represented by MultiVector) + for j in range(x.nvec()): + temp.zero() + operator_i.mult(x[j],temp) + if self.average: + y[j].axpy(1./nsamples, temp) + else: + y[j].axpy(1., temp) + class ActiveSubspaceProjector: @@ -84,11 +204,11 @@ class ActiveSubspaceProjector: We have a forward mapping: :math:`m -> q(m)` And a forward map Jacobian: :math:`\nabla q(m)` Jacobian SVD: :math:`J = U S V^*` - Output active subspace: :math:`J'J = US^2U^*` - Input active subspace: :math:`JJ' = VS^2V^*` + Output active subspace: :math:`J^*J = US^2U^*` + Input active subspace: :math:`JJ^* = VS^2V^*` """ - def __init__(self,observable, prior, mesh_constructor_comm = None ,collective = None,\ - initialize_samples = False, parameters = ActiveSubspaceParameterList()): + def __init__(self,observable, prior, mesh_constructor_comm = None ,collective = NullCollective(),\ + parameters = ActiveSubspaceParameterList()): """ Constructor - :code:`observable` - object that implements the observable mapping :math:`m -> q(m)` @@ -110,12 +230,7 @@ def __init__(self,observable, prior, mesh_constructor_comm = None ,collective = else: self.mesh_constructor_comm = self.observable.mpi_comm() - if collective is not None: - self.collective = collective - else: - self.collective = NullCollective() - - + self.collective = collective consistent_partitioning = checkMeshConsistentPartitioning(\ self.observable.problem.Vh[0].mesh(), self.collective) @@ -131,16 +246,9 @@ def __init__(self,observable, prior, mesh_constructor_comm = None ,collective = # and avoid the time consuming sample initialization. - - - - - - - - - - if self.parameters['samples_per_process'] > 1: + # Here we allocate many copies of the observable if serialized_sampling is not True in the + # active subspace parameters + if self.parameters['samples_per_process'] > 1 and not self.parameters['serialized_sampling']: assert self.parameters['observable_constructor'] is not None for i in range(self.parameters['samples_per_process']-1): new_observable = self.parameters['observable_constructor'](self.observable.problem.Vh[0].mesh(),**self.parameters['observable_kwargs']) @@ -149,17 +257,21 @@ def __init__(self,observable, prior, mesh_constructor_comm = None ,collective = self.noise = dl.Vector(self.mesh_constructor_comm) self.prior.init_vector(self.noise,"noise") - - self.us = None - self.ms = None - self.Js = None + if self.parameters['serialized_sampling']: + self.u = None + self.m = None + self.J = None + else: + self.us = None + self.ms = None + self.Js = None # Draw a new sample and set linearization point. - if initialize_samples: - self.initialize_samples() + if self.parameters['initialize_samples']: + if not self.parameters['serialized_sampling']: + self._initialize_batched_samples() - self.d_GN = None self.V_GN = None self.d_GN_noprior = None @@ -169,8 +281,11 @@ def __init__(self,observable, prior, mesh_constructor_comm = None ,collective = self.d_NG = None self.U_NG = None + # For unit testing different methods, want to save Omega + self.Omega_GN = None + self.Omega_NG = None - def initialize_samples(self): + def _initialize_batched_samples(self): """ This method initializes the samples from the prior used in sampling """ @@ -191,6 +306,7 @@ def initialize_samples(self): observable.setLinearizationPoint(x) solved = True except: + self.m.zero() print('Issue with the solution, moving on') pass if self.parameters['verbose']: @@ -204,13 +320,24 @@ def initialize_samples(self): def construct_input_subspace(self,prior_preconditioned = True): + if self.parameters['serialized_sampling']: + print('Construction via serialized AS construction') + self._construct_serialized_jacobian_subspace(prior_preconditioned = prior_preconditioned,operation = 'JTJ') + else: + print('Construction via batched AS construction') + self._construct_input_subspace_batched(prior_preconditioned = prior_preconditioned) + + + + + def _construct_input_subspace_batched(self,prior_preconditioned = True): """ This method implements the input subspace constructor -:code:`prior_preconditioned` - a Boolean to decide whether to include the prior covariance in the decomposition The default parameter is True which is customary in active subspace construction """ if self.Js is None: - self.initialize_samples() + self._initialize_batched_samples() if self.parameters['verbose']: print(80*'#') @@ -230,18 +357,20 @@ def construct_input_subspace(self,prior_preconditioned = True): if self.collective.rank() == 0: parRandom.normal(1.,Omega) + if self.parameters['store_Omega']: + self.Omega_GN = Omega else: Omega.zero() - self.collective.bcast(Omega,root = 0) + if prior_preconditioned: if hasattr(self.prior, "R"): self.d_GN, self.V_GN = doublePassG(Average_GN_Hessian,\ - self.prior.R, self.prior.Rsolver, Omega,self.parameters['rank'],s=1) + self.prior.R, self.prior.Rsolver, Omega,self.parameters['rank'],s=1) else: self.d_GN, self.V_GN = doublePassG(Average_GN_Hessian,\ - self.prior.Hlr, self.prior.Hlr, Omega,self.parameters['rank'],s=1) + self.prior.Hlr, self.prior.Hlr, Omega,self.parameters['rank'],s=1) else: self.d_GN, self.V_GN = doublePass(Average_GN_Hessian,Omega,self.parameters['rank'],s=1) @@ -249,7 +378,7 @@ def construct_input_subspace(self,prior_preconditioned = True): self._input_subspace_construction_time = time.time() - t0 if self.parameters['verbose'] and (self.mesh_constructor_comm.rank == 0): print(('Input subspace construction took '+str(self._input_subspace_construction_time)[:5]+' s').center(80)) - if True and MPI.COMM_WORLD.rank == 0: + if self.parameters['save_and_plot'] and MPI.COMM_WORLD.rank == 0: np.save(self.parameters['output_directory']+'AS_input_projector',mv_to_dense(self.V_GN)) np.save(self.parameters['output_directory']+'AS_d_GN',self.d_GN) @@ -258,13 +387,93 @@ def construct_input_subspace(self,prior_preconditioned = True): axis_label = ['i',r'$\lambda_i$',\ r'Eigenvalues of $\mathbb{E}_{\nu}[C{\nabla} q^T {\nabla} q]$'+self.parameters['plot_label_suffix']], out_name = out_name) + def _construct_serialized_jacobian_subspace(self,prior_preconditioned = True, operation = 'JTJ'): + """ + This method implements the input subspace constructor + -:code:`prior_preconditioned` - a Boolean to decide whether to include the prior covariance in the decomposition + The default parameter is True which is customary in active subspace construction + -:code:`operation` - + """ + t0 = time.time() + # ms_given is a unit testing case + if self.parameters['ms_given']: + assert self.ms is not None + Local_Average_Jacobian_Operator = SeriallySampledJacobianOperator(self.observable,self.noise,self.prior,operation = operation,\ + ms = self.ms) + else: + Local_Average_Jacobian_Operator = SeriallySampledJacobianOperator(self.observable,self.noise,self.prior,operation = operation,\ + nsamples = self.parameters['samples_per_process']) + # This averaging assumes every process has an equal number of samples + # Otherwise it will bias towards a process with the fewest samples + Average_Jacobian_Operator = MatrixMultCollectiveOperator(Local_Average_Jacobian_Operator, self.collective, mpi_op = 'avg') + # Instantiate Gaussian random matrix + x_Omega_construction = dl.Vector(self.mesh_constructor_comm) + Local_Average_Jacobian_Operator.init_vector(x_Omega_construction) + Omega = MultiVector(x_Omega_construction,self.parameters['rank'] + self.parameters['oversampling']) + Omega.zero() + + if self.collective.rank() == 0: + + if (operation == 'JTJ' and self.Omega_GN is None) or (operation == 'JJT' and self.Omega_NG is None): + parRandom.normal(1.,Omega) + else: + if operation == 'JTJ': + Omega = self.Omega_GN + elif operation == 'JJT': + Omega = self.Omega_NG + + self.collective.bcast(Omega,root = 0) + + if operation == 'JTJ': + if prior_preconditioned: + if hasattr(self.prior, "R"): + self.d_GN, self.V_GN = doublePassG(Average_Jacobian_Operator,\ + self.prior.R, self.prior.Rsolver, Omega,self.parameters['rank'],s=1) + else: + self.d_GN, self.V_GN = doublePassG(Average_Jacobian_Operator,\ + self.prior.Hlr, self.prior.Hlr, Omega,self.parameters['rank'],s=1) + else: + self.d_GN, self.V_GN = doublePass(Average_Jacobian_Operator,Omega,self.parameters['rank'],s=1) + self.prior_preconditioned = prior_preconditioned + self._input_subspace_construction_time = time.time() - t0 + if self.parameters['verbose'] and (self.mesh_constructor_comm.rank == 0): + print(('Input subspace construction took '+str(self._input_subspace_construction_time)[:5]+' s').center(80)) + elif operation == 'JJT': + self.d_NG, self.U_NG = doublePass(Average_Jacobian_Operator,Omega,self.parameters['rank'],s=1) + self._output_subspace_construction_time = time.time() - t0 + if self.parameters['verbose'] and (self.mesh_constructor_comm.rank ==0): + print(('Output subspace construction took '+str(self._output_subspace_construction_time)[:5]+' s').center(80)) + + if self.parameters['save_and_plot'] and MPI.COMM_WORLD.rank == 0: + if operation == 'JTJ': + np.save(self.parameters['output_directory']+'AS_input_projector',mv_to_dense(self.V_GN)) + np.save(self.parameters['output_directory']+'AS_d_GN',self.d_GN) + out_name = self.parameters['output_directory']+'AS_input_eigenvalues_'+str(self.parameters['rank'])+'.pdf' + _ = spectrum_plot(self.d_GN,\ + axis_label = ['i',r'$\lambda_i$',\ + r'Eigenvalues of $\mathbb{E}_{\nu}[C{\nabla} q^T {\nabla} q]$'+self.parameters['plot_label_suffix']], out_name = out_name) + if operation == 'JJT': + np.save(self.parameters['output_directory']+'AS_output_projector',mv_to_dense(self.U_NG)) + np.save(self.parameters['output_directory']+'AS_d_NG',self.d_NG) + + out_name = self.parameters['output_directory']+'AS_output_eigenvalues_'+str(self.parameters['rank'])+'.pdf' + _ = spectrum_plot(self.d_NG,\ + axis_label = ['i',r'$\lambda_i$',\ + r'Eigenvalues of $\mathbb{E}_{\nu}[{\nabla} q {\nabla} q^T]$'+self.parameters['plot_label_suffix']], out_name = out_name) + + + def construct_output_subspace(self,prior_preconditioned = True): + if self.parameters['serialized_sampling']: + pass + else: + self._construct_output_subspace_batched(prior_preconditioned = prior_preconditioned) - def construct_output_subspace(self): + def _construct_output_subspace_batched(self): """ This method implements the output subspace constructor """ if self.Js is None: - self.initialize_samples() + self._initialize_batched_samples() if self.parameters['verbose']: print(80*'#') @@ -282,6 +491,8 @@ def construct_output_subspace(self): if self.collective.rank() == 0: parRandom.normal(1.,Omega) + if self.parameters['store_Omega']: + self.Omega_NG = Omega else: Omega.zero() @@ -290,7 +501,7 @@ def construct_output_subspace(self): self._output_subspace_construction_time = time.time() - t0 if self.parameters['verbose'] and (self.mesh_constructor_comm.rank ==0): print(('Output subspace construction took '+str(self._output_subspace_construction_time)[:5]+' s').center(80)) - if True and MPI.COMM_WORLD.rank == 0: + if self.parameters['save_and_plot'] and MPI.COMM_WORLD.rank == 0: np.save(self.parameters['output_directory']+'AS_output_projector',mv_to_dense(self.U_NG)) np.save(self.parameters['output_directory']+'AS_d_NG',self.d_NG) diff --git a/hippyflow/modeling/multiPDEProblem.py b/hippyflow/modeling/multiPDEProblem.py index fb36225..8683308 100644 --- a/hippyflow/modeling/multiPDEProblem.py +++ b/hippyflow/modeling/multiPDEProblem.py @@ -1,15 +1,13 @@ -# Copyright (c) 2016-2018, The University of Texas at Austin -# & University of California--Merced. -# Copyright (c) 2019-2021, The University of Texas at Austin -# University of California--Merced, Washington University in St. Louis. +# Copyright (c) 2020-2021, The University of Texas at Austin +# & Washington University in St. Louis. # # All Rights reserved. # See file COPYRIGHT for details. # -# This file is part of the hIPPYlib library. For more information and source code -# availability see https://hippylib.github.io. +# This file is part of the hIPPYflow package. For more information see +# https://github.com/hippylib/hippyflow/ # -# hIPPYlib is free software; you can redistribute it and/or modify it under the +# hIPPYflow is free software; you can redistribute it and/or modify it under the # terms of the GNU General Public License (as published by the Free # Software Foundation) version 2.0 dated June 1991. @@ -140,4 +138,6 @@ def apply_ij(self,i,j, dir, out): self.problems[k].apply_ij(i,j, dir,out.data[k] ) else: for k in range(self.n_problems): - self.problems[k].apply_ij(i,j, dir.data[k],out.data[k]) \ No newline at end of file + self.problems[k].apply_ij(i,j, dir.data[k],out.data[k]) + + \ No newline at end of file diff --git a/hippyflow/test/__init__.py b/hippyflow/test/__init__.py new file mode 100644 index 0000000..e866d56 --- /dev/null +++ b/hippyflow/test/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2020-2021, The University of Texas at Austin +# & Washington University in St. Louis. +# +# All Rights reserved. +# See file COPYRIGHT for details. +# +# This file is part of the hIPPYflow package. For more information see +# https://github.com/hippylib/hippyflow/ +# +# hIPPYflow is free software; you can redistribute it and/or modify it under the +# terms of the GNU General Public License (as published by the Free +# Software Foundation) version 2.0 dated June 1991. + diff --git a/hippyflow/test/test_derivativeSubspace.py b/hippyflow/test/test_derivativeSubspace.py new file mode 100644 index 0000000..6146720 --- /dev/null +++ b/hippyflow/test/test_derivativeSubspace.py @@ -0,0 +1,120 @@ +# Copyright (c) 2020-2021, The University of Texas at Austin +# & Washington University in St. Louis. +# +# All Rights reserved. +# See file COPYRIGHT for details. +# +# This file is part of the hIPPYflow package. For more information see +# https://github.com/hippylib/hippyflow/ +# +# hIPPYflow is free software; you can redistribute it and/or modify it under the +# terms of the GNU General Public License (as published by the Free +# Software Foundation) version 2.0 dated June 1991. + +import unittest +import dolfin as dl +import ufl +import numpy as np + + +import sys, os +sys.path.append(os.environ.get('HIPPYLIB_PATH')) +import hippylib as hl + +sys.path.append(os.environ.get('HIPPYFLOW_PATH')) +import hippyflow as hf + +def u_boundary(x, on_boundary): + return on_boundary and ( x[1] < dl.DOLFIN_EPS or x[1] > 1.0 - dl.DOLFIN_EPS) + +class TestDerivativeSubspace(unittest.TestCase): + def setUp(self): + dl.parameters["ghost_mode"] = "shared_facet" + ndim = 2 + nx = 10 + ny = 10 + self.mesh = dl.UnitSquareMesh(nx, ny) + + self.observable = self.buildObservable(self.mesh) + + self.prior = hf.BiLaplacian2D(self.Vh[hl.PARAMETER],gamma = 0.1, delta = 1.0) + + + + def buildObservable(self,mesh): + self.rank = dl.MPI.rank(mesh.mpi_comm()) + + Vh2 = dl.FunctionSpace(mesh, 'Lagrange', 2) + Vh1 = dl.FunctionSpace(mesh, 'Lagrange', 1) + self.Vh = [Vh2, Vh1, Vh2] + # Initialize Expressions + f = dl.Constant(0.0) + + u_bdr = dl.Expression("x[1]", degree=1) + u_bdr0 = dl.Constant(0.0) + bc = dl.DirichletBC(self.Vh[hl.STATE], u_bdr, u_boundary) + bc0 = dl.DirichletBC(self.Vh[hl.STATE], u_bdr0, u_boundary) + + def pde_varf(u,m,p): + return ufl.exp(m)*ufl.inner(ufl.grad(u), ufl.grad(p))*ufl.dx - f*p*ufl.dx + + self.pde = hl.PDEVariationalProblem(self.Vh, pde_varf, bc, bc0, is_fwd_linear=True) + + + x_targets = np.linspace(0.1,0.9,10) + y_targets = np.linspace(0.1,0.9,10) + targets = [] + for xi in x_targets: + for yi in y_targets: + targets.append((xi,yi)) + targets = np.array(targets) + + B = hl.assemblePointwiseObservation(self.Vh[hl.STATE], targets) + return hf.LinearStateObservable(self.pde,B) + + + + + + def testSerializedBatchMethods(self): + """ + Test the agreement of the two different routines for computing active subspace + """ + AS_parameters = hf.ActiveSubspaceParameterList() + AS_parameters['observable_constructor'] = self.buildObservable + AS_parameters['observable_kwargs'] = {} + AS_parameters['save_and_plot'] = False + AS_parameters['serialized_sampling'] = False + AS_parameters['store_Omega'] = True + AS_parameters['rank'] = 64 + my_collective = hf.NullCollective() + + AS = hf.ActiveSubspaceProjector(self.observable,self.prior,collective = my_collective,parameters = AS_parameters) + # Construct the subspace via batching + AS.construct_input_subspace() + d_batch_in = AS.d_GN + AS.d_GN = None + AS_parameters['serialized_sampling'] = True + AS_parameters['ms_given'] = True + AS.construct_input_subspace() + d_serialized_in = AS.d_GN + input_d_error = np.linalg.norm(d_batch_in - d_serialized_in) + # assert input_d_error < 1e-12 + # Is this too aggressive? + assert input_d_error == 0.0 + + AS_parameters['serialized_sampling'] = False + AS.construct_input_subspace() + d_batch_out = AS.d_NG + AS_parameters['serialized_sampling'] = True + AS_parameters['ms_given'] = True + AS.construct_input_subspace() + d_serialized_out = AS.d_NG + output_d_error = np.linalg.norm(d_batch_in - d_serialized_in) + # Is this too aggressive? + assert output_d_error == 0.0 + + +if __name__ == '__main__': + dl.parameters["ghost_mode"] = "shared_facet" + unittest.main() diff --git a/hippyflow/utilities/__init__.py b/hippyflow/utilities/__init__.py index 4845c11..f30e168 100644 --- a/hippyflow/utilities/__init__.py +++ b/hippyflow/utilities/__init__.py @@ -13,7 +13,7 @@ from .mesh_utils import read_serial_write_parallel_mesh -from .mv_utilities import mv_to_dense +from .mv_utilities import mv_to_dense, dense_to_mv_local from .plotting import * diff --git a/hippyflow/version.py b/hippyflow/version.py index 8945ad1..bcd3912 100644 --- a/hippyflow/version.py +++ b/hippyflow/version.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, The University of Texas at Austin +# Copyright (c) 2020-2021, The University of Texas at Austin # & Washington University in St. Louis. # # All Rights reserved. @@ -12,5 +12,5 @@ # Software Foundation) version 2.0 dated June 1991. -version_info = (0, 1, 0) +version_info = (0, 2, 0) __version__ = '.'.join([str(x) for x in version_info]) \ No newline at end of file