From b5fd75576123bbab04b9c1004fa270fa2d6a9a5a Mon Sep 17 00:00:00 2001
From: Jonathan Vandermause <jonathan_vandermause@g.harvard.edu>
Date: Tue, 26 May 2020 16:28:09 -0400
Subject: [PATCH] remove mgp_en (which was spuriously resurrected from the last
 few merges)

---
 flare/mgp/mgp_en.py | 1184 -------------------------------------------
 1 file changed, 1184 deletions(-)
 delete mode 100644 flare/mgp/mgp_en.py

diff --git a/flare/mgp/mgp_en.py b/flare/mgp/mgp_en.py
deleted file mode 100644
index 6e5b0c3b1..000000000
--- a/flare/mgp/mgp_en.py
+++ /dev/null
@@ -1,1184 +0,0 @@
-import inspect
-import json
-import multiprocessing as mp
-import numpy as np
-import os
-import pickle as pickle
-import subprocess
-import time
-import warnings
-
-from copy import deepcopy
-from math import floor, ceil
-from scipy.linalg import solve_triangular
-from typing import List
-
-from flare.env import AtomicEnvironment
-from flare.gp import GaussianProcess
-from flare.gp_algebra import partition_vector, energy_force_vector_unit, \
-    energy_energy_vector_unit, _global_training_data
-from flare.kernels.utils import from_mask_to_args, str_to_kernel_set, str_to_mapped_kernel
-from flare.kernels.cutoffs import quadratic_cutoff
-from flare.struc import Structure
-from flare.utils.element_coder import Z_to_element, NumpyEncoder
-from flare.utils.mask_helper import HyperParameterMasking as hpm
-
-from flare.mgp.utils import get_bonds, get_triplets, get_triplets_en, \
-    get_2bkernel, get_3bkernel
-from flare.mgp.splines_methods import PCASplines, CubicSpline
-
-
-class MappedGaussianProcess:
-    '''
-    Build Mapped Gaussian Process (MGP)
-    and automatically save coefficients for LAMMPS pair style.
-
-    :param: struc_params : Parameters for a dummy structure which will be
-        internally used to probe/store forces associated with different atomic
-        configurations
-    :param: grid_params : Parameters for the mapping itself, such as
-        grid size of spline fit, etc.
-    :param: mean_only : if True: only build mapping for mean (force)
-    :param: container_only : if True: only build splines container
-        (with no coefficients)
-    :param: GP: None or a GaussianProcess object. If a GP is input,
-        and autorun is true, automatically build a mapping corresponding
-        to the GaussianProcess.
-    :param: lmp_file_name : LAMMPS coefficient file name
-    :param: autorun: Attempt to build map immediately
-    Examples:
-
-    >>> struc_params = {'species': [0, 1],
-                        'cube_lat': cell, # should input the cell matrix
-                        'mass_dict': {'0': 27 * unit, '1': 16 * unit}}
-    >>> grid_params =  {'bounds_2': [[1.2], [3.5]],
-                                    # [[lower_bound], [upper_bound]]
-                                    # These describe the lower and upper
-                                    # bounds used to specify the 2-body spline
-                                    # fits.
-                        'bounds_3': [[1.2, 1.2, 1.2], [3.5, 3.5, 3.5]],
-                                    # [[lower,lower,lower],[upper,upper,upper]]
-                                    # Values describe lower and upper bounds
-                                    # for the bondlength-bondlength-bondlength
-                                    # grid used to construct and fit 3-body
-                                    # kernels; note that for force MGPs
-                                    # bondlength-bondlength-costheta
-                                    # are the bounds used instead.
-                        'bodies':   [2, 3] # use 2+3 body
-                        'grid_num_2': 64,# Fidelity of the grid
-                        'grid_num_3': [16, 16, 16],# Fidelity of the grid
-                        'svd_rank_2': 64, #Fidelity of uncertainty estimation
-                        'svd_rank_3': 16**3,
-                        'update': True, # if True: accelerating grids
-                                        # generating by saving intermediate
-                                        # coeff when generating grids
-                        'load_grid': None  # Used to load from file
-                        }
-    '''
-
-    def __init__(self, grid_params: dict, struc_params: dict,
-                 GP: GaussianProcess = None, mean_only: bool = False,
-                 container_only: bool = True, lmp_file_name: str = 'lmp.mgp',
-                 n_cpus: int = None, n_sample: int = 100,
-                 autorun: bool = True):
-
-        # load all arguments as attributes
-        self.mean_only = mean_only
-        self.lmp_file_name = lmp_file_name
-        self.n_cpus = n_cpus
-        self.n_sample = n_sample
-        self.grid_params = grid_params
-        self.struc_params = struc_params
-        self.hyps_mask = None
-        self.cutoffs = None
-
-        # It would be helpful to define the attributes explicitly.
-        self.__dict__.update(grid_params)
-
-        # if GP exists, the GP setup overrides the grid_params setup
-        if GP is not None:
-
-            self.cutoffs = GP.cutoffs
-            self.hyps_mask = GP.hyps_mask
-
-            self.bodies = []
-            if "two" in GP.kernel_name:
-                self.bodies.append(2)
-                self.kernel2b_info = get_2bkernel(GP)
-            if "three" in GP.kernel_name:
-                self.bodies.append(3)
-                self.kernel3b_info = get_3bkernel(GP)
-
-        self.build_bond_struc(struc_params)
-        self.maps_2 = []
-        self.maps_3 = []
-        self.build_map_container(GP)
-        self.mean_only = mean_only
-
-        if not container_only and (GP is not None) and \
-                (len(GP.training_data) > 0) and autorun:
-            self.build_map(GP)
-
-    def build_map_container(self, GP=None):
-        '''
-        construct an empty spline container without coefficients.
-        '''
-
-        if (GP is not None):
-            self.cutoffs = GP.cutoffs
-            self.hyps_mask = GP.hyps_mask
-
-        if 2 in self.bodies:
-            for b_struc in self.bond_struc[0]:
-                if (GP is not None):
-                    self.bounds_2[1][0] = hpm.get_cutoff(b_struc.coded_species,
-                            self.cutoffs, self.hyps_mask)
-                map_2 = Map2body(self.grid_num_2, self.bounds_2,
-                                 b_struc, self.svd_rank_2,
-                                 self.mean_only, self.n_cpus, self.n_sample)
-                self.maps_2.append(map_2)
-
-        if 3 in self.bodies:
-            for b_struc in self.bond_struc[1]:
-                if (GP is not None):
-                    self.bounds_3[1] = hpm.get_cutoff(b_struc.coded_species,
-                            self.cutoffs, self.hyps_mask)
-                map_3 = Map3body(self.grid_num_3, self.bounds_3,
-                                 b_struc, self.svd_rank_3,
-                                 self.mean_only,
-                                 self.grid_params['load_grid'],
-                                 self.update, self.n_cpus, self.n_sample)
-                self.maps_3.append(map_3)
-
-    def build_map(self, GP):
-        '''
-        generate/load grids and get spline coefficients
-        '''
-
-        # double check the container and the GP is the consistent
-        if not hpm.compare_dict(GP.hyps_mask, self.hyps_mask):
-            self.build_map_container(GP)
-
-        if 2 in self.bodies:
-            self.kernel2b_info = get_2bkernel(GP)
-        if 3 in self.bodies:
-            self.kernel3b_info = get_3bkernel(GP)
-
-        for map_2 in self.maps_2:
-            map_2.build_map(GP)
-        for map_3 in self.maps_3:
-            map_3.build_map(GP)
-
-        # write to lammps pair style coefficient file
-        self.write_lmp_file(self.lmp_file_name)
-
-    def build_bond_struc(self, struc_params):
-        '''
-        build a bond structure, used in grid generating
-        '''
-
-        cutoff = 0.1
-        cell = struc_params['cube_lat']
-        mass_dict = struc_params['mass_dict']
-        species_list = struc_params['species']
-        N_spc = len(species_list)
-
-        # 2 body (2 atoms (1 bond) config)
-        bond_struc_2 = []
-        spc_2 = []
-        spc_2_set = []
-        if 2 in self.bodies:
-            bodies = 2
-            for spc1_ind, spc1 in enumerate(species_list):
-                for spc2 in species_list[spc1_ind:]:
-                    species = [spc1, spc2]
-                    spc_2.append(species)
-                    spc_2_set.append(set(species))
-                    positions = [[(i+1)/(bodies+1)*cutoff, 0, 0]
-                                 for i in range(bodies)]
-                    spc_struc = \
-                        Structure(cell, species, positions, mass_dict)
-                    spc_struc.coded_species = np.array(species)
-                    bond_struc_2.append(spc_struc)
-
-        #  3 body (3 atoms (1 triplet) config)
-        bond_struc_3 = []
-        spc_3 = []
-        if 3 in self.bodies:
-            bodies = 3
-            for spc1_ind in range(N_spc):
-                spc1 = species_list[spc1_ind]
-                for spc2_ind in range(N_spc):  # (spc1_ind, N_spc):
-                    spc2 = species_list[spc2_ind]
-                    for spc3_ind in range(N_spc):  # (spc2_ind, N_spc):
-                        spc3 = species_list[spc3_ind]
-                        species = [spc1, spc2, spc3]
-                        spc_3.append(species)
-                        positions = [[(i+1)/(bodies+1)*cutoff, 0, 0]
-                                     for i in range(bodies)]
-                        spc_struc = Structure(cell, species, positions,
-                                              mass_dict)
-                        spc_struc.coded_species = np.array(species)
-                        bond_struc_3.append(spc_struc)
-#                        if spc1 != spc2:
-#                            species = [spc2, spc3, spc1]
-#                            spc_3.append(species)
-#                            positions = [[(i+1)/(bodies+1)*cutoff, 0, 0] \
-#                                        for i in range(bodies)]
-#                            spc_struc = Structure(cell, species, positions,
-#                                                        mass_dict)
-#                            spc_struc.coded_species = np.array(species)
-#                            bond_struc_3.append(spc_struc)
-#                        if spc2 != spc3:
-#                            species = [spc3, spc1, spc2]
-#                            spc_3.append(species)
-#                            positions = [[(i+1)/(bodies+1)*cutoff, 0, 0] \
-#                                        for i in range(bodies)]
-#                            spc_struc = Structure(cell, species, positions,
-#                                                        mass_dict)
-#                            spc_struc.coded_species = np.array(species)
-#                            bond_struc_3.append(spc_struc)
-
-        self.bond_struc = [bond_struc_2, bond_struc_3]
-        self.spcs = [spc_2, spc_3]
-        self.spcs_set = [spc_2_set, spc_3]
-
-    def predict(self, atom_env: AtomicEnvironment, mean_only: bool = False)\
-            -> (float, 'ndarray', 'ndarray', float):
-        '''
-        predict force, variance, stress and local energy for given
-            atomic environment
-        Args:
-            atom_env: atomic environment (with a center atom and its neighbors)
-            mean_only: if True: only predict force (variance is always 0)
-        Return:
-            force: 3d array of atomic force
-            variance: 3d array of the predictive variance
-            stress: 6d array of the virial stress
-            energy: the local energy (atomic energy)
-        '''
-        if self.mean_only:  # if not build mapping for var
-            mean_only = True
-
-        # ---------------- predict for two body -------------------
-        f2 = vir2 = kern2 = v2 = e2 = 0
-        if 2 in self.bodies:
-
-            f2, vir2, kern2, v2, e2 = \
-                self.predict_multicomponent(2, atom_env, self.kernel2b_info,
-                                            self.spcs_set[0],
-                                            self.maps_2, mean_only)
-
-        # ---------------- predict for three body -------------------
-        f3 = vir3 = kern3 = v3 = e3 = 0
-        if 3 in self.bodies:
-
-            f3, vir3, kern3, v3, e3 = \
-                self.predict_multicomponent(3, atom_env, self.kernel3b_info,
-                                            self.spcs[1], self.maps_3,
-                                            mean_only)
-
-        force = f2 + f3
-        variance = kern2 + kern3 - np.sum((v2 + v3)**2, axis=0)
-        virial = vir2 + vir3
-        energy = e2 + e3
-
-        return force, variance, virial, energy
-
-    def predict_multicomponent(self, body, atom_env, kernel_info,
-                               spcs_list, mappings, mean_only):
-        '''
-        Add up results from `predict_component` to get the total contribution
-        of all species
-        '''
-
-        kernel, en_kernel, en_force_kernel, cutoffs, hyps, hyps_mask = \
-            kernel_info
-
-        args = from_mask_to_args(hyps, hyps_mask, cutoffs)
-
-        kern = np.zeros(3)
-        for d in range(3):
-            kern[d] = \
-                kernel(atom_env, atom_env, d+1, d+1, *args)
-
-        if (body == 2):
-            spcs, comp_r, comp_xyz = \
-                get_bonds(atom_env.ctype, atom_env.etypes,
-                          atom_env.bond_array_2)
-            set_spcs = []
-            for spc in spcs:
-                set_spcs += [set(spc)]
-            spcs = set_spcs
-        elif (body == 3):
-            spcs, comp_r, comp_xyz = \
-                get_triplets_en(
-                    atom_env.ctype, atom_env.etypes, atom_env.bond_array_3,
-                    atom_env.cross_bond_inds, atom_env.cross_bond_dists,
-                    atom_env.triplet_counts)
-
-        # predict for each species
-        f_spcs = 0
-        vir_spcs = 0
-        v_spcs = 0
-        e_spcs = 0
-        for i, spc in enumerate(spcs):
-            lengths = np.array(comp_r[i])
-            xyzs = np.array(comp_xyz[i])
-            map_ind = spcs_list.index(spc)
-            f, vir, v, e = \
-                self.predict_component(lengths, xyzs, mappings[map_ind],
-                                       mean_only)
-            f_spcs += f
-            vir_spcs += vir
-            v_spcs += v
-            e_spcs += e
-
-        return f_spcs, vir_spcs, kern, v_spcs, e_spcs
-
-    def predict_component(self, lengths, xyzs, mapping, mean_only):
-        '''
-        predict force and variance contribution of one component
-        '''
-        lengths = np.array(lengths)
-        xyzs = np.array(xyzs)
-
-        # predict mean
-        e_0, f_0 = mapping.mean(lengths, with_derivatives=True)
-        e = np.sum(e_0)  # energy
-
-        # predict forces and stress
-        vir = np.zeros(6)
-        # match the ASE order
-        vir_order = ((0, 0), (1, 1), (2, 2), (1, 2), (0, 2), (0, 1))
-
-        # two-body
-        if lengths.shape[-1] == 1:
-            f_d = np.diag(f_0[:, 0, 0]) @ xyzs
-            f = 2 * np.sum(f_d, axis=0)  # force: need to check prefactor 2
-
-            for i in range(6):
-                vir_i = f_d[:, vir_order[i][0]]\
-                    * xyzs[:, vir_order[i][1]] * lengths[:, 0]
-                vir[i] = np.sum(vir_i)
-
-        # three-body
-        if lengths.shape[-1] == 3:
-            # factor1 = 1/lengths[:,1] - 1/lengths[:,0] * lengths[:,2]
-            # factor2 = 1/lengths[:,0] - 1/lengths[:,1] * lengths[:,2]
-            # f_d1 = np.diag(f_0[:,0,0]+f_0[:,2,0]*factor1) @ xyzs[:,0,:]
-            # f_d2 = np.diag(f_0[:,1,0]+f_0[:,2,0]*factor2) @ xyzs[:,1,:]
-            f_d1 = np.diag(f_0[:, 0, 0]) @ xyzs[:, 0, :]
-            f_d2 = np.diag(f_0[:, 1, 0]) @ xyzs[:, 1, :]
-            f_d = f_d1 + f_d2
-            f = 3 * np.sum(f_d, axis=0)  # force: need to check prefactor 3
-
-            for i in range(6):
-                vir_i1 = f_d1[:, vir_order[i][0]]\
-                    * xyzs[:, 0, vir_order[i][1]] * lengths[:, 0]
-                vir_i2 = f_d2[:, vir_order[i][0]]\
-                    * xyzs[:, 1, vir_order[i][1]] * lengths[:, 1]
-                vir[i] = np.sum(vir_i1 + vir_i2)
-            vir *= 1.5
-
-        # predict var
-        v = np.zeros(3)
-        if not mean_only:
-            v_0 = mapping.var(lengths)
-            v_d = v_0 @ xyzs
-            v = mapping.var.V @ v_d
-        return f, vir, v, e
-
-    def write_lmp_file(self, lammps_name):
-        '''
-        write the coefficients to a file that can be used by lammps pair style
-        '''
-
-        # write header
-        f = open(lammps_name, 'w')
-
-        header_comment = '''# #2bodyarray #3bodyarray\n# elem1 elem2 a b order
-        '''
-        f.write(header_comment)
-
-        twobodyarray = len(self.spcs[0])
-        threebodyarray = len(self.spcs[1])
-        header = '\n{} {}\n'.format(twobodyarray, threebodyarray)
-        f.write(header)
-
-        # write two body
-        if twobodyarray > 0:
-            for ind, spc in enumerate(self.spcs[0]):
-                self.maps_2[ind].write(f, spc)
-
-        # write three body
-        if threebodyarray > 0:
-            for ind, spc in enumerate(self.spcs[1]):
-                self.maps_3[ind].write(f, spc)
-
-        f.close()
-
-    def as_dict(self) -> dict:
-        """
-        Dictionary representation of the MGP model.
-        """
-
-        out_dict = deepcopy(dict(vars(self)))
-
-        # Uncertainty mappings currently not serializable;
-        if not self.mean_only:
-            warnings.warn("Uncertainty mappings cannot be serialized, "
-                          "and so the MGP dict outputted will not have "
-                          "them.", Warning)
-            out_dict['mean_only'] = True
-
-        # Iterate through the mappings for various bodies
-        for i in self.bodies:
-            kern_info = f'kernel{i}b_info'
-            kernel, ek, efk, cutoffs, hyps, hyps_mask = out_dict[kern_info]
-            out_dict[kern_info] = (kernel.__name__, efk.__name__,
-                                   cutoffs, hyps, hyps_mask)
-
-        # only save the coefficients
-        out_dict['maps_2'] = [map_2.mean.__coeffs__ for map_2 in self.maps_2]
-        out_dict['maps_3'] = [map_3.mean.__coeffs__ for map_3 in self.maps_3]
-
-        # don't need these since they are built in the __init__ function
-        key_list = ['bond_struc', 'spcs_set', ]
-        for key in key_list:
-            if out_dict.get(key) is not None:
-                del out_dict[key]
-
-        return out_dict
-
-    @staticmethod
-    def from_dict(dictionary: dict):
-        """
-        Create MGP object from dictionary representation.
-        """
-        new_mgp = MappedGaussianProcess(
-            grid_params=dictionary['grid_params'],
-            struc_params=dictionary['struc_params'],
-            GP=None, mean_only=dictionary['mean_only'], container_only=True,
-            lmp_file_name=dictionary['lmp_file_name'],
-            n_cpus=dictionary['n_cpus'], n_sample=dictionary['n_sample'],
-            autorun=False)
-
-        # Restore kernel_info
-        for i in dictionary['bodies']:
-            kern_info = f'kernel{i}b_info'
-            hyps_mask = dictionary[kern_info][-1]
-            if (hyps_mask is None):
-                multihyps = False
-            else:
-                multihyps = True
-
-            kernel_info = dictionary[kern_info]
-            kernel_name = kernel_info[0]
-            kernel, _, ek, efk = str_to_kernel_set(kernel_name, multihyps)
-            kernel_info[0] = kernel
-            kernel_info[1] = ek
-            kernel_info[2] = efk
-            setattr(new_mgp, kern_info, kernel_info)
-
-        # Fill up the model with the saved coeffs
-        for m, map_2 in enumerate(new_mgp.maps_2):
-            map_2.mean.__coeffs__ = np.array(dictionary['maps_2'][m])
-        for m, map_3 in enumerate(new_mgp.maps_3):
-            map_3.mean.__coeffs__ = np.array(dictionary['maps_3'][m])
-
-        # Set GP
-        if dictionary.get('GP'):
-            new_mgp.GP = GaussianProcess.from_dict(dictionary.get("GP"))
-
-        return new_mgp
-
-    def write_model(self, name: str, format='json'):
-        """
-        Write everything necessary to re-load and re-use the model
-        :param model_name:
-        :return:
-        """
-        if 'json' in format.lower():
-            with open(f'{name}.json', 'w') as f:
-                json.dump(self.as_dict(), f, cls=NumpyEncoder)
-
-        elif 'pickle' in format.lower() or 'binary' in format.lower():
-            with open(f'{name}.pickle', 'wb') as f:
-                pickle.dump(self, f)
-
-        else:
-            raise ValueError("Requested format not found.")
-
-
-#    @staticmethod
-#    def load_model(elements: List[int], directory:str = './',
-#                   kernel: str = '2+3',
-#                   load_var: bool = False):
-#        """
-#        Loads the relevant files in a directory to an MGP model
-#        :param elements:
-#        :param directory:
-#        :param kernel:
-#        :return:
-#        """
-#
-#        # Check to see if two body or three body kernels will be loaded
-#        # so "2+3" makes b2 and b3 true
-#        b2 = '2' in kernel or 'two' in kernel
-#        b3 = '3' in kernel or 'three' in kernel
-#
-#
-#        raise NotImplementedError
-
-    @staticmethod
-    def from_file(filename: str):
-        if '.json' in filename:
-            with open(filename, 'r') as f:
-                model = \
-                    MappedGaussianProcess.from_dict(json.loads(f.readline()))
-            return model
-
-        elif 'pickle' in filename:
-            with open(filename, 'rb') as f:
-                return pickle.load(f)
-        else:
-            raise NotImplementedError
-
-
-class Map2body:
-    def __init__(self, grid_num: int, bounds, bond_struc: Structure,
-                 svd_rank=0, mean_only: bool = False, n_cpus: int = None,
-                 n_sample: int = 100):
-        '''
-        Build 2-body MGP
-
-        bond_struc: Mock structure used to sample 2-body forces on 2 atoms
-        '''
-
-        self.grid_num = grid_num
-        self.bounds = bounds
-        self.bond_struc = bond_struc
-        self.svd_rank = svd_rank
-        self.mean_only = mean_only
-        self.n_cpus = n_cpus
-        self.n_sample = n_sample
-
-        spc = bond_struc.coded_species
-        self.species_code = Z_to_element(spc[0]) + '_' + Z_to_element(spc[1])
-
-#        arg_dict = inspect.getargvalues(inspect.currentframe())[3]
-#        del arg_dict['self']
-#        self.__dict__.update(arg_dict)
-
-        self.build_map_container()
-
-    def GenGrid(self, GP):
-        '''
-        To use GP to predict value on each grid point, we need to generate the
-        kernel vector kv whose length is the same as the training set size.
-
-        1. We divide the training set into several batches, corresponding to
-           different segments of kv
-        2. Distribute each batch to a processor, i.e. each processor calculate
-           the kv segment of one batch for all grids
-        3. Collect kv segments and form a complete kv vector for each grid,
-           and calculate the grid value by multiplying the complete kv vector
-           with GP.alpha
-        '''
-
-        kernel_info = get_2bkernel(GP)
-
-        if (self.n_cpus is None):
-            processes = mp.cpu_count()
-        else:
-            processes = self.n_cpus
-
-        # ------ construct grids ------
-        nop = self.grid_num
-        bond_lengths = np.linspace(self.bounds[0][0], self.bounds[1][0], nop)
-        bond_means = np.zeros([nop])
-        if not self.mean_only:
-            bond_vars = np.zeros([nop, len(GP.alpha)])
-        else:
-            bond_vars = None
-        env12 = AtomicEnvironment(
-            self.bond_struc, 0, GP.cutoffs, cutoffs_mask=GP.hyps_mask)
-
-        # --------- calculate force kernels ---------------
-        with mp.Pool(processes=processes) as pool:
-            n_envs = len(GP.training_data)
-            n_strucs = len(GP.training_structures)
-            n_kern = n_envs * 3 + n_strucs
-
-            block_id, nbatch = \
-                partition_vector(self.n_sample, n_envs, processes)
-
-            k12_slice = []
-            for ibatch in range(nbatch):
-                s, e = block_id[ibatch]
-                k12_slice.append(pool.apply_async(
-                    self._GenGrid_inner, args=(GP.name, s, e, bond_lengths,
-                                               env12, kernel_info)))
-            k12_matrix = []
-            for ibatch in range(nbatch):
-                k12_matrix += [k12_slice[ibatch].get()]
-            pool.close()
-            pool.join()
-        del k12_slice
-        k12_v_force = np.vstack(k12_matrix)
-        del k12_matrix
-
-        # --------- calculate energy kernels ---------------
-        with mp.Pool(processes=processes) as pool:
-            block_id, nbatch = \
-                partition_vector(self.n_sample, n_strucs, processes)
-
-            k12_slice = []
-            for ibatch in range(nbatch):
-                s, e = block_id[ibatch]
-                k12_slice.append(pool.apply_async(
-                    self._GenGrid_energy,
-                    args=(GP.name, s, e, bond_lengths, env12, kernel_info)))
-            k12_matrix = []
-            for ibatch in range(nbatch):
-                k12_matrix += [k12_slice[ibatch].get()]
-            pool.close()
-            pool.join()
-        del k12_slice
-        k12_v_energy = np.vstack(k12_matrix)
-        del k12_matrix
-
-        k12_v_all = np.vstack([k12_v_force, k12_v_energy])
-        k12_v_all = np.moveaxis(k12_v_all, 0, -1)
-        del k12_v_force
-        del k12_v_energy
-
-        # ------- compute bond means and variances ---------------
-        for b, _ in enumerate(bond_lengths):
-            k12_v = k12_v_all[b, :]
-            bond_means[b] = np.matmul(k12_v, GP.alpha)
-            if not self.mean_only:
-                bond_vars[b, :] = solve_triangular(GP.l_mat, k12_v, lower=True)
-
-        write_species_name = ''
-        for x in self.bond_struc.coded_species:
-            write_species_name += "_" + Z_to_element(x)
-        # ------ save mean and var to file -------
-        np.save('grid2_mean' + write_species_name, bond_means)
-        np.save('grid2_var' + write_species_name, bond_vars)
-
-        return bond_means, bond_vars
-
-    def _GenGrid_inner(self, name, s, e, bond_lengths,
-                       env12, kernel_info):
-        '''
-        Calculate kv segments of the given batch of training data for all grids
-        '''
-
-        kernel, en_kernel, en_force_kernel, cutoffs, hyps, hyps_mask = \
-            kernel_info
-        size = e - s
-        k12_v = np.zeros([len(bond_lengths), size*3])
-        for b, r in enumerate(bond_lengths):
-            env12.bond_array_2 = np.array([[r, 1, 0, 0]])
-            k12_v[b, :] = \
-                energy_force_vector_unit(name, s, e, env12, en_force_kernel,
-                                         hyps, cutoffs, hyps_mask)
-        return np.moveaxis(k12_v, 0, -1)
-
-    def _GenGrid_energy(self, name, s, e, bond_lengths, env12, kernel_info):
-        '''
-        Calculate kv segments of the given batch of training data for all grids
-        '''
-
-        kernel, en_kernel, en_force_kernel, cutoffs, hyps, hyps_mask = \
-            kernel_info
-        size = e - s
-        k12_v = np.zeros([len(bond_lengths), size])
-        for b, r in enumerate(bond_lengths):
-            env12.bond_array_2 = np.array([[r, 1, 0, 0]])
-            k12_v[b, :] = \
-                energy_energy_vector_unit(name, s, e, env12, en_kernel,
-                                          hyps, cutoffs, hyps_mask)
-        return np.moveaxis(k12_v, 0, -1)
-
-    def build_map_container(self):
-        '''
-        build 1-d spline function for mean, 2-d for var
-        '''
-        self.mean = CubicSpline(self.bounds[0], self.bounds[1],
-                                orders=[self.grid_num])
-
-        if not self.mean_only:
-            self.var = PCASplines(self.bounds[0], self.bounds[1],
-                                  orders=[self.grid_num],
-                                  svd_rank=self.svd_rank)
-
-    def build_map(self, GP):
-        y_mean, y_var = self.GenGrid(GP)
-        self.mean.set_values(y_mean)
-        if not self.mean_only:
-            self.var.set_values(y_var)
-
-    def write(self, f, spc):
-        '''
-        Write LAMMPS coefficient file
-        '''
-        a = self.bounds[0][0]
-        b = self.bounds[1][0]
-        order = self.grid_num
-
-        coefs_2 = self.mean.__coeffs__
-
-        elem1 = Z_to_element(spc[0])
-        elem2 = Z_to_element(spc[1])
-        header_2 = '{elem1} {elem2} {a} {b} {order}\n'\
-            .format(elem1=elem1, elem2=elem2, a=a, b=b, order=order)
-        f.write(header_2)
-
-        for c, coef in enumerate(coefs_2):
-            f.write('{:.10e} '.format(coef))
-            if c % 5 == 4 and c != len(coefs_2)-1:
-                f.write('\n')
-
-        f.write('\n')
-
-
-class Map3body:
-
-    def __init__(self, grid_num, bounds, bond_struc: Structure,
-                 svd_rank: int = 0, mean_only: bool = False,
-                 load_grid: str = '', update: bool = True, n_cpus=None,
-                 n_sample: int = 10):
-        '''
-        Build 3-body MGP
-
-        bond_struc: Mock Structure object which contains 3 atoms to get map
-        from
-        '''
-        self.grid_num = grid_num
-        self.bounds = bounds
-        self.bond_struc = bond_struc
-        self.svd_rank = svd_rank
-        self.mean_only = mean_only
-        self.load_grid = load_grid
-        self.update = update
-        self.n_sample = n_sample
-
-        spc = bond_struc.coded_species
-        self.species_code = Z_to_element(spc[0]) + '_' + \
-            Z_to_element(spc[1]) + '_' + Z_to_element(spc[2])
-        self.kv3name = f'kv3_{self.species_code}'
-
-        self.build_map_container()
-        self.n_cpus = n_cpus
-        self.bounds = bounds
-        self.mean_only = mean_only
-
-    def GenGrid(self, GP):
-        '''
-        To use GP to predict value on each grid point, we need to generate the
-        kernel vector kv whose length is the same as the training set size.
-
-        1. We divide the training set into several batches, corresponding to
-           different segments of kv
-        2. Distribute each batch to a processor, i.e. each processor calculate
-           the kv segment of one batch for all grids
-        3. Collect kv segments and form a complete kv vector for each grid,
-           and calculate the grid value by multiplying the complete kv vector
-           with GP.alpha
-        '''
-
-        if self.n_cpus is None:
-            processes = mp.cpu_count()
-        else:
-            processes = self.n_cpus
-
-        # ------ get 3body kernel info ------
-        kernel_info = get_3bkernel(GP)
-
-        # ------ construct grids ------
-        n1, n2, n12 = self.grid_num
-        bonds1 = np.linspace(self.bounds[0][0], self.bounds[1][0], n1)
-        bonds2 = np.linspace(self.bounds[0][1], self.bounds[1][1], n2)
-        bonds12 = np.linspace(self.bounds[0][2], self.bounds[1][2], n12)
-        grid_means = np.zeros([n1, n2, n12])
-
-        if not self.mean_only:
-            grid_vars = np.zeros([n1, n2, n12, len(GP.alpha)])
-        else:
-            grid_vars = None
-
-        env12 = AtomicEnvironment(
-            self.bond_struc, 0, GP.cutoffs, cutoffs_mask=GP.hyps_mask)
-        n_envs = len(GP.training_data)
-        n_strucs = len(GP.training_structures)
-        n_kern = n_envs * 3 + n_strucs
-
-        mapk, mapk_en = str_to_mapped_kernel('3', GP.multihyps)
-        mapped_kernel_info = (kernel_info[0], mapk_en, mapk,
-                              kernel_info[3], kernel_info[4], kernel_info[5])
-
-        if processes == 1:
-            if self.update:
-                raise NotImplementedError("the update function is "
-                                          "not yet implemented")
-            else:
-                if (n_envs > 0):
-                    k12_v_force = \
-                        self._GenGrid_numba(GP.name, 0, n_envs, self.bounds,
-                                            n1, n2, n12, env12, mapped_kernel_info)
-                if (n_strucs > 0):
-                    k12_v_energy = \
-                        self._GenGrid_energy_numba(GP.name, 0, n_strucs, self.bounds,
-                                             n1, n2, n12, env12, mapped_kernel_info)
-        else:
-
-            if (n_envs > 0):
-                # ------------ force kernels -------------
-                if self.update:
-
-                    raise NotImplementedError(
-                        "the update function is "
-                        "not yet implemented")
-
-                else:
-                    block_id, nbatch = \
-                        partition_vector(self.n_sample, n_envs, processes)
-
-                    k12_slice = []
-                    with mp.Pool(processes=processes) as pool:
-                        for ibatch in range(nbatch):
-                            s, e = block_id[ibatch]
-                            k12_slice.append(pool.apply_async(
-                                self._GenGrid_inner,
-                                args=(GP.name, s, e, bonds1, bonds2, bonds12,
-                                      env12, kernel_info)))
-                            # # numba parallel should be off in this case
-                            # # need to think of a way to do this...
-                            # k12_slice.append(pool.apply_async(
-                            #     self._GenGrid_numba,
-                            #     args=(GP.name, s, e, self.bounds,
-                            #           n1, n2, n12, env12, mapped_kernel_info)))
-                        k12_matrix = []
-                        for ibatch in range(nbatch):
-                            k12_matrix += [k12_slice[ibatch].get()]
-                        pool.close()
-                        pool.join()
-
-                    del k12_slice
-                    k12_v_force = np.vstack(k12_matrix)
-                    del k12_matrix
-
-            # ------------ energy kernels -------------
-
-            if (n_strucs > 0):
-                if self.update:
-
-                    raise NotImplementedError(
-                        "the update function is "
-                        "not yet implemented")
-
-                else:
-                    block_id, nbatch = \
-                        partition_vector(self.n_sample, n_strucs, processes)
-
-                    k12_slice = []
-                    with mp.Pool(processes=processes) as pool:
-                        for ibatch in range(nbatch):
-                            s, e = block_id[ibatch]
-                            k12_slice.append(pool.apply_async(
-                                self._GenGrid_energy,
-                                args=(GP.name, s, e, bonds1, bonds2, bonds12,
-                                      env12, kernel_info)))
-                        k12_matrix = []
-                        for ibatch in range(nbatch):
-                            k12_matrix += [k12_slice[ibatch].get()]
-                        pool.close()
-                        pool.join()
-
-                    del k12_slice
-                    k12_v_energy = np.vstack(k12_matrix)
-                    del k12_matrix
-
-        if (n_envs > 0 and n_strucs > 0):
-            k12_v_all = np.vstack([k12_v_force, k12_v_energy])
-            k12_v_all = np.moveaxis(k12_v_all, 0, -1)
-            del k12_v_force
-            del k12_v_energy
-        elif (n_envs > 0):
-            k12_v_all = np.moveaxis(k12_v_force, 0, -1)
-            del k12_v_force
-        elif (n_strucs > 0):
-            k12_v_all = np.moveaxis(k12_v_energy, 0, -1)
-            del k12_v_energy
-        else:
-            return np.zeros(n1, n2, n12), None
-
-        for b12 in range(len(bonds12)):
-            for b1 in range(len(bonds1)):
-                for b2 in range(len(bonds2)):
-                    k12_v = k12_v_all[b1, b2, b12, :]
-                    grid_means[b1, b2, b12] = np.matmul(k12_v, GP.alpha)
-                    if not self.mean_only:
-                        grid_vars[b1, b2, b12, :] = \
-                            solve_triangular(GP.l_mat, k12_v, lower=True)
-
-        # Construct file names according to current mapping
-
-        # ------ save mean and var to file -------
-        np.save('grid3_mean_'+self.species_code, grid_means)
-        np.save('grid3_var_'+self.species_code, grid_vars)
-
-        return grid_means, grid_vars
-
-    def _GenGrid_inner(self, name, s, e, bonds1, bonds2, bonds12, env12,
-                       kernel_info):
-        '''
-        Calculate kv segments of the given batch of training data for all grids
-        '''
-
-        kernel, en_kernel, en_force_kernel, cutoffs, hyps, hyps_mask = \
-            kernel_info
-
-        # open saved k vector file, and write to new file
-        size = (e - s) * 3
-        k12_v = np.zeros([len(bonds1), len(bonds2), len(bonds12), size])
-        for b12, r12 in enumerate(bonds12):
-            for b1, r1 in enumerate(bonds1):
-                for b2, r2 in enumerate(bonds2):
-
-                    env12.bond_array_3 = np.array([[r1, 1, 0, 0],
-                                                   [r2, 0, 0, 0]])
-                    env12.cross_bond_dists = np.array([[0, r12], [r12, 0]])
-                    k12_v[b1, b2, b12, :] = \
-                        energy_force_vector_unit(
-                            name, s, e, env12, en_force_kernel, hyps, cutoffs,
-                            hyps_mask)
-
-        # open saved k vector file, and write to new file
-        if self.update:
-
-            raise NotImplementedError("the update function is not yet"
-                                      "implemented")
-
-            # s, e = block
-            # chunk = e - s
-            # new_kv_file = \
-            #     np.zeros((chunk, self.grid_num[0] * self.grid_num[1] + 1,
-            #               total_size))
-            # new_kv_file[:,0,0] = np.ones(chunk) * total_size
-            # for i in range(s, e):
-            #     kv_filename = f'{self.kv3name}/{i}'
-            #     if kv_filename in os.listdir(self.kv3name):
-            #         old_kv_file = np.load(kv_filename+'.npy')
-            #         last_size = int(old_kv_file[0,0])
-            #         new_kv_file[i, :, :last_size] = old_kv_file
-            #     else:
-            #         last_size = 0
-            # ds = [1, 2, 3]
-            # nop = self.grid_num[0]
-
-            # k12_v = new_kv_file[:, 1:, :]
-            # for i in range(s, e):
-            #     np.save(f'{self.kv3name}/{i}', new_kv_file[i, :, :])
-
-        return np.moveaxis(k12_v, -1, 0)
-
-    def _GenGrid_numba(self, name, s, e, bounds, nb1, nb2, nb12, env12, kernel_info):
-        """
-        Loop over different parts of the training set. from element s to element e
-
-        Args:
-            name: name of the gp instance
-            s: start index of the training data parition
-            e: end index of the training data parition
-            bonds1: list of bond to consider for edge center-1
-            bonds2: list of bond to consider for edge center-2
-            bonds12: list of bond to consider for edge 1-2
-            env12: AtomicEnvironment container of the triplet
-            kernel_info: return value of the get_3b_kernel
-        """
-
-        kernel, en_kernel, en_force_kernel, cutoffs, hyps, hyps_mask = \
-            kernel_info
-
-        training_data = _global_training_data[name]
-
-        ds = [1, 2, 3]
-        size = (e-s) * 3
-
-        bonds1 = np.linspace(bounds[0][0], bounds[1][0], nb1)
-        bonds2 = np.linspace(bounds[0][0], bounds[1][0], nb2)
-        bonds12 = np.linspace(bounds[0][2], bounds[1][2], nb12)
-
-        r1 = np.ones([nb1, nb2, nb12], dtype=np.float64)
-        r2 = np.ones([nb1, nb2, nb12], dtype=np.float64)
-        r12 = np.ones([nb1, nb2, nb12], dtype=np.float64)
-        for b12 in range(nb12):
-            for b1 in range(nb1):
-                for b2 in range(nb2):
-                    r1[b1, b2, b12] = bonds1[b1]
-                    r2[b1, b2, b12] = bonds2[b2]
-                    r12[b1, b2, b12] = bonds12[b12]
-        del bonds1
-        del bonds2
-        del bonds12
-
-        args = from_mask_to_args(hyps, hyps_mask, cutoffs)
-
-        k_v = []
-        for m_index in range(size):
-            x_2 = training_data[int(floor(m_index / 3))+s]
-            d_2 = ds[m_index % 3]
-            k_v += [[en_force_kernel(x_2, r1, r2, r12,
-                                     env12.ctype, env12.etypes,
-                                     d_2, *args)]]
-
-        return np.vstack(k_v)
-
-    def _GenGrid_energy_numba(self, name, s, e, bounds, nb1, nb2, nb12, env12, kernel_info):
-        """
-        Loop over different parts of the training set. from element s to element e
-
-        Args:
-            name: name of the gp instance
-            s: start index of the training data parition
-            e: end index of the training data parition
-            bonds1: list of bond to consider for edge center-1
-            bonds2: list of bond to consider for edge center-2
-            bonds12: list of bond to consider for edge 1-2
-            env12: AtomicEnvironment container of the triplet
-            kernel_info: return value of the get_3b_kernel
-        """
-
-        kernel, en_kernel, en_force_kernel, cutoffs, hyps, hyps_mask = \
-            kernel_info
-
-        training_data = _global_training_data[name]
-
-        ds = [1, 2, 3]
-        size = (e-s) * 3
-
-        bonds1 = np.linspace(bounds[0][0], bounds[1][0], nb1)
-        bonds2 = np.linspace(bounds[0][0], bounds[1][0], nb2)
-        bonds12 = np.linspace(bounds[0][2], bounds[1][2], nb12)
-
-        r1 = np.ones([nb1, nb2, nb12], dtype=np.float64)
-        r2 = np.ones([nb1, nb2, nb12], dtype=np.float64)
-        r12 = np.ones([nb1, nb2, nb12], dtype=np.float64)
-        for b12 in range(nb12):
-            for b1 in range(nb1):
-                for b2 in range(nb2):
-                    r1[b1, b2, b12] = bonds1[b1]
-                    r2[b1, b2, b12] = bonds2[b2]
-                    r12[b1, b2, b12] = bonds12[b12]
-        del bonds1
-        del bonds2
-        del bonds12
-
-        args = from_mask_to_args(hyps, hyps_mask, cutoffs)
-
-        k_v = []
-        for m_index in range(size):
-            structure = training_structures[m_index + s]
-            kern_curr = 0
-            for environment in structure:
-                kern_curr += en_kernel(x, environment, *args)
-            kv += [kern_curr]
-
-        return np.hstack(k_v)
-
-    def _GenGrid_energy(self, name, s, e, bonds1, bonds2, bonds12, env12,
-                        kernel_info):
-        '''
-        Calculate kv segments of the given batch of training data for all grids
-        '''
-
-        kernel, en_kernel, en_force_kernel, cutoffs, hyps, hyps_mask = \
-            kernel_info
-
-        # open saved k vector file, and write to new file
-        size = e - s
-        k12_v = np.zeros([len(bonds1), len(bonds2), len(bonds12), size])
-        for b12, r12 in enumerate(bonds12):
-            for b1, r1 in enumerate(bonds1):
-                for b2, r2 in enumerate(bonds2):
-
-                    env12.bond_array_3 = np.array([[r1, 1, 0, 0],
-                                                   [r2, 0, 0, 0]])
-                    env12.cross_bond_dists = np.array([[0, r12], [r12, 0]])
-                    k12_v[b1, b2, b12, :] = \
-                        energy_energy_vector_unit(
-                            name, s, e, env12, en_kernel, hyps, cutoffs,
-                            hyps_mask)
-
-        # open saved k vector file, and write to new file
-        if self.update:
-            raise NotImplementedError("the update function is not yet"
-                                      "implemented")
-
-        return np.moveaxis(k12_v, -1, 0)
-
-    def build_map_container(self):
-        '''
-        build 3-d spline function for mean,
-        3-d for the low rank approximation of L^{-1}k*
-        '''
-
-        # create spline interpolation class object
-        self.mean = CubicSpline(self.bounds[0], self.bounds[1],
-                                orders=self.grid_num)
-
-        if not self.mean_only:
-            self.var = PCASplines(self.bounds[0], self.bounds[1],
-                                  orders=self.grid_num,
-                                  svd_rank=self.svd_rank)
-
-    def build_map(self, GP):
-        # Load grid or generate grid values
-        # If load grid was not specified, will be none
-        if not self.load_grid:
-            y_mean, y_var = self.GenGrid(GP)
-        # If load grid is blank string '' or pre-fix, load in
-        else:
-            y_mean = np.load(self.load_grid+'grid3_mean_' +
-                             self.species_code+'.npy')
-            y_var = np.load(self.load_grid+'grid3_var_' +
-                            self.species_code+'.npy')
-
-        self.mean.set_values(y_mean)
-        if not self.mean_only:
-            self.var.set_values(y_var)
-
-    def write(self, f, spc):
-        a = self.bounds[0]
-        b = self.bounds[1]
-        order = self.grid_num
-
-        coefs_3 = self.mean.__coeffs__
-
-        elem1 = Z_to_element(spc[0])
-        elem2 = Z_to_element(spc[1])
-        elem3 = Z_to_element(spc[2])
-
-        header_3 = '{elem1} {elem2} {elem3} {a1} {a2} {a3} {b1}'\
-                   ' {b2} {b3:.10e} {order1} {order2} {order3}\n'\
-            .format(elem1=elem1, elem2=elem2, elem3=elem3,
-                    a1=a[0], a2=a[1], a3=a[2],
-                    b1=b[0], b2=b[1], b3=b[2],
-                    order1=order[0], order2=order[1], order3=order[2])
-        f.write(header_3)
-
-        n = 0
-        for i in range(coefs_3.shape[0]):
-            for j in range(coefs_3.shape[1]):
-                for k in range(coefs_3.shape[2]):
-                    coef = coefs_3[i, j, k]
-                    f.write('{:.10e} '.format(coef))
-                    if n % 5 == 4:
-                        f.write('\n')
-                    n += 1
-
-        f.write('\n')