diff --git a/summit/acquisition.py b/summit/acquisition.py
index dd950d36..ba3a23c0 100644
--- a/summit/acquisition.py
+++ b/summit/acquisition.py
@@ -101,7 +101,7 @@ def select_max(self, samples, num_evals=1):
         
         for i in range(num_evals):
             masked_samples = samples[mask, :]
-            Yfront, _ = pareto_efficient(Ynew)
+            Yfront, _ = pareto_efficient(Ynew, maximize=True)
             if len(Yfront) ==0:
                 raise ValueError('Pareto front length too short')
 
@@ -112,7 +112,7 @@ def select_max(self, samples, num_evals=1):
             for sample in masked_samples:
                 sample = sample.reshape(1,n)
                 A = np.append(Ynew, sample, axis=0)
-                Afront, _ = pareto_efficient(A)
+                Afront, _ = pareto_efficient(A, maximize=True)
                 hv = HvI.hypervolume(-Afront, [0,0])
                 hv_improvement.append(hv-hvY)
             
@@ -147,8 +147,6 @@ def hypervolume(pointset, ref):
         """Compute the absolute hypervolume of a *pointset* according to the
         reference point *ref*.
         """
-        warnings.warn("Falling back to the python version of hypervolume "
-            "module. Expect this to be very slow.", RuntimeWarning)
         hv = _HyperVolume(ref)
         return hv.compute(pointset)
 
diff --git a/summit/data/dataset.py b/summit/data/dataset.py
index 1cb9cd3c..200b6b6b 100644
--- a/summit/data/dataset.py
+++ b/summit/data/dataset.py
@@ -14,6 +14,24 @@ class DataSet(pd.core.frame.DataFrame):
     ----
     Based on https://notes.mikejarrett.ca/storing-metadata-in-pandas-dataframes/
     """
+    def __init__(self, data=None, index=None, columns=None, metadata_columns=[], units=None, dtype=None, copy=False):
+        if isinstance(columns, pd.MultiIndex):
+            pass
+        elif columns is not None:
+            column_names = columns
+            if metadata_columns:
+                types = ['METADATA' if x in metadata_columns else 'DATA' for x in column_names]
+            else:
+                types = ['DATA' for _ in range(len(column_names))]
+            arrays = [column_names, types]
+            levels = ['NAME', 'TYPE']
+            if units:
+                arrays.append(units)
+                levels.append('UNITS')
+            tuples=list(zip(*arrays))
+            columns = pd.MultiIndex.from_tuples(tuples, names=levels)
+        pd.core.frame.DataFrame.__init__(self, data=data, index=index, columns=columns, dtype=dtype, copy=copy)
+
     @staticmethod
     def from_df(df: pd.DataFrame, metadata_columns: List=[], 
                 units: List = []):
@@ -83,7 +101,8 @@ def zero_to_one(self, small_tol=1.0e-5) -> np.ndarray:
         scaled[abs(scaled) < small_tol] = 0.0
         return scaled
 
-    def standardize(self, small_tol=1.0e-5) -> np.ndarray:
+    def standardize(self, small_tol=1.0e-5,
+                    return_mean=False, return_std=False, **kwargs) -> np.ndarray:
         """Standardize data columns by removing the mean and scaling to unit variance
 
         The standard score of each data column is calculated as:
@@ -97,6 +116,16 @@ def standardize(self, small_tol=1.0e-5) -> np.ndarray:
             The minimum value of any value in the final scaled array. 
             This is used to prevent very small values that will cause
             issues in later calcualtions. Defaults to 1e-5.
+        return_mean: bool, optional
+            Return an array with the mean of each column in the DataSet
+        return_std: bool, optional
+            Return an array with the stnadard deviation of each column
+            in the DataSet
+        mean: array, optional
+            Pass a precalculated array of means for the columns
+        std: array, optional
+            Pass a precalculated array of standard deviations 
+            for the columns
         
         Returns
         -------
@@ -110,11 +139,21 @@ def standardize(self, small_tol=1.0e-5) -> np.ndarray:
         """
         values = self.data_to_numpy()
         values = values.astype(np.float64)
-        mean = np.mean(values, axis=0)
-        sigma = np.std(values, axis=0)
+        
+        mean = kwargs.get('mean',
+                          np.mean(values, axis=0))
+        sigma = kwargs.get('std',
+                           np.std(values, axis=0))
         standard = (values-mean)/sigma
         standard[abs(standard) < small_tol] = 0.0
-        return standard
+        if return_mean and return_std:
+            return standard, mean, sigma
+        elif return_mean:
+            return standard, mean
+        elif return_std:
+            return standard, sigma
+        else:
+            return standard
 
     @property  
     def _constructor(self):
@@ -182,6 +221,4 @@ def insert(self, loc, column, value, type='DATA', units=None, allow_duplicates=F
         self.columns[loc][0] = column
         self.columns[loc][1] = type
         self.columns[loc][2] = units
-        
-class ResultSet(DataSet):
-    data_column_types = ['input', 'output']
+        
\ No newline at end of file
diff --git a/summit/domain.py b/summit/domain.py
index d1c123d8..4848ed70 100644
--- a/summit/domain.py
+++ b/summit/domain.py
@@ -176,6 +176,11 @@ def from_dict(variable_dict):
                            description=variable_dict['description'],
                            bounds=variable_dict['bounds'],
                            is_objective=variable_dict['is_objective'])
+
+    def __add__(self, other):
+        if isinstance(other, self.__class__):
+            return [self, other]
+
     
 class DiscreteVariable(Variable):
     """Representation of a discrete variable
@@ -335,6 +340,24 @@ def from_dict(variable_dict):
     def _html_table_rows(self):
         return self._make_html_table_rows(f"{self.num_examples} examples of {self.num_descriptors} descriptors")
 
+
+class Constraint: 
+    def __init__(self, expression):
+        self._expression = expression
+
+    @property
+    def expression(self):
+        return self._expression
+
+    def _html_table_rows(self):
+        columns = []
+        columns.append("") #name column
+        columns.append("constraint") #type column
+        columns.append(self.expression) #description columns
+        columns.append("") #value column
+        return ''.join([f"<td>{column}</td>" for column in columns])
+
+    
 class Domain:
     """Representation of the optimization domain
 
@@ -353,8 +376,9 @@ class Domain:
     >>> domain += ContinuousVariable('temperature', 'reaction temperature', [1, 100])
 
     """
-    def __init__(self, variables:Optional[List[Type[Variable]]]=[]):
+    def __init__(self, variables=[], constraints=[]):
         self._variables = variables
+        self._constraints = constraints
         #Check that all the output variables continuous
         self.raise_noncontinuous_outputs()
 
@@ -363,6 +387,20 @@ def variables(self):
         """[List[Type[Variable]]]: List of variables in the domain"""
         return self._variables
 
+    @property
+    def constraints(self):
+        return self._constraints
+
+    # def __getitem__(self, key):
+    #     '''For accessing variables like a dictionary'''
+    #     for v in self.variables:
+    #         if v.name == key:
+    #             return v    
+    #     raise KeyError(f'No variable {key} found in the domain.')
+    
+    # def _ipython_key_completions_(self):
+    #     return [v.name for v in self.variables]
+
     @property
     def input_variables(self):
         input_variables = []
@@ -482,10 +520,16 @@ def from_dict(domain_dict):
         return Domain(variables)
 
 
-    def __add__(self, var):
-        if var.is_objective and var.variable_type != 'continuous':
-            DomainError("Output variables must be continuous")
-        return Domain(self._variables + [var])
+    def __add__(self, obj):
+        #TODO: make this work with adding arrays of variable or constraints
+        if isinstance(obj, Variable):
+            if obj.is_objective and obj.variable_type != 'continuous':
+                raise DomainError("Output variables must be continuous")
+            return Domain(variables=self._variables + [obj], constraints=self.constraints) 
+        elif isinstance(obj, Constraint):
+            return Domain(variables=self.variables, constraints = self.constraints + [obj])
+        else:
+            raise RuntimeError('Not a supported domain object.')
     
     def _repr_html_(self):
         """Build html string for table display in jupyter notebooks.
@@ -510,7 +554,9 @@ def _repr_html_(self):
         return ''.join(html)
 
     def _html_table_rows(self):
-        return ''.join(map(lambda l: l._html_table_rows(), self._variables))
+        variables = ''.join([v._html_table_rows() for v in self.variables])
+        constraints = ''.join([c._html_table_rows() for c in self.constraints])
+        return variables + constraints
 
 
 class DomainError(Exception):
diff --git a/summit/models.py b/summit/models.py
index 474b3211..3bd76ba0 100644
--- a/summit/models.py
+++ b/summit/models.py
@@ -1,12 +1,12 @@
-from summit.initial_design.latin_designer import lhs
+from summit.data import DataSet
 
 from GPy.models import GPRegression
 from GPy.kern import Matern52
 import numpy as np
-from numpy import matlib
 import scipy
 
 from abc import ABC, abstractmethod
+from sklearn.base import BaseEstimator, RegressorMixin
 
 
 class Model(ABC):
@@ -23,7 +23,32 @@ def fit(self, X, Y):
     def predict(self, X):
         pass
 
-class GPyModel(Model):
+class ModelGroup:
+    def __init__(self, models: dict):
+        self._models = models
+
+    @property
+    def models(self):
+        return self._models
+
+    def fit(self, X, y, **kwargs):
+        for column_name, model in self.models.items():
+            model.fit(X, y[[column_name]]) 
+
+    def predict(self, X, **kwargs):
+        """
+        Note
+        -----
+        This the make the assumption that each model returns a n_samples x 1 array
+        from the predict method.
+        """
+        result = [model.predict(X)[:, 0] for model in self.models.values()]
+        return np.array(result).T
+    
+    def __getitem__(self, key):
+        return self.models[key]
+
+class GPyModel(BaseEstimator, RegressorMixin):
     ''' A Gaussian Process Regression model from GPy
 
     This is implemented as an alternative to the sklearn
@@ -54,7 +79,8 @@ def __init__(self, kernel=None, input_dim=None,noise_var=1.0, optimizer=None):
         else: 
             if not input_dim:
                 raise ValueError('input_dim must be specified if no kernel is specified.')
-            self._kernel =  Matern52(input_dim = input_dim, ARD=True)
+            self.input_dim = input_dim
+            self._kernel =  Matern52(input_dim = self.input_dim, ARD=True)
         self._noise_var = noise_var
         self._optimizer = optimizer
         self._model = None
@@ -63,10 +89,10 @@ def fit(self, X, y, num_restarts=10, max_iters=2000, parallel=False):
         """Fit Gaussian process regression model.
         Parameters
         ----------
-        X : array-like, shape = (n_samples, n_features)
-            Training data
-        y : array-like, shape = (n_samples, [n_output_dims])
-            Target values
+        X : DataSet
+            The data columns will be used as inputs for fitting the model
+        y : DataSEt
+            The data columns will be used as outputs for fitting the model
         num_restarts : int, optional (default=10)
             The number of random restarts of the optimizer.
         max_iters : int, optional (default=2000)
@@ -79,7 +105,25 @@ def fit(self, X, y, num_restarts=10, max_iters=2000, parallel=False):
         self : returns an instance of self.
         -----
         """ 
-        self._model = GPRegression(X,y, self._kernel, noise_var=self._noise_var)
+        #Standardize inputs and outputs
+        if isinstance(X, DataSet):
+            X_std, self.input_mean, self.input_std = X.standardize(return_mean=True, return_std=True)
+        elif isinstance(X, np.ndarray):
+            self.input_mean = np.mean(X,axis=0)
+            self.input_std = np.std(X, axis=0)
+            X_std = (X-self.input_mean)/self.input_std
+            X_std[abs(X_std) < 1e-5] = 0.0
+
+        if isinstance(y, DataSet):
+            y_std, self.output_mean, self.output_std = y.standardize(return_mean=True, return_std=True)
+        elif isinstance(y, np.ndarray):
+            self.output_mean = np.mean(y,axis=0)
+            self.output_std = np.std(y, axis=0)
+            y_std = (y-self.output_mean)/self.output_std
+            y_std[abs(y_std) < 1e-5] = 0.0
+
+        #Initialize and fit model
+        self._model = GPRegression(X_std,y_std, self._kernel, noise_var=self._noise_var)
         if self._optimizer:
             self._model.optimize_restarts(num_restarts = num_restarts, 
                                           verbose=False,
@@ -120,17 +164,24 @@ def predict(self, X,
             raise RuntimeError(
                 "Not returning standard deviation of predictions when "
                 "returning full covariance.")
-
-        m, v = self._model.predict(X)
-
-        if return_cov:
-            result = m, v
-        elif return_std:
-            result = m, self._model.Kdiag(X)
-        else:
-            result = m
         
-        return result
+        if isinstance(X, np.ndarray):
+            X_std =  (X-self.input_mean)/self.input_std
+            X_std[abs(X_std) < 1e-5] = 0.0
+        elif isinstance(X, DataSet):
+            X_std = X.standardize(mean=self.input_mean, std=self.input_std)
+
+        m_std, v_std = self._model.predict(X_std)
+        m = m_std*self.output_std + self.output_mean
+
+        # if return_cov:
+        #     result = m, v
+        # elif return_std:
+        #     result = m, self._model.Kdiag(X)
+        # else:
+        #     result = m
+
+        return m
     
 class AnalyticalModel(Model):
     ''' An analytical model instead of statistical model
diff --git a/summit/optimizers.py b/summit/optimizers.py
index 0defc248..de0c3d28 100644
--- a/summit/optimizers.py
+++ b/summit/optimizers.py
@@ -7,7 +7,7 @@
 from typing import List
 from summit.domain import Domain, DomainError
 from summit.initial_design import RandomDesigner
-# from .objective import ObjectiveWrapper
+from summit.data import DataSet
 
 from abc import ABC, abstractmethod
 import numpy as np
@@ -62,12 +62,17 @@ def is_multiobjective(self):
 class NSGAII(Optimizer): 
     def __init__(self, domain: Domain):
         Optimizer.__init__(self, domain)
+        #Set up platypus problem
         self.problem = pp.Problem(nvars=self.domain.num_variables(),
-                                  nobjs=len(self.domain.output_variables))
+                                  nobjs=len(self.domain.output_variables),
+                                  nconstrs=len(self.domain.constraints))
+        #Set maximization or minimization for each objective                          
+        j = 0
         for i, v in enumerate(self.domain.variables):
             if v.is_objective:
-                continue
-            if v.variable_type == "continuous":
+                direction = self.problem.MAXIMIZE if v.maximize else self.problem.MINIMIZE
+                self.problem.directions[j] = direction
+            elif v.variable_type == "continuous":
                 self.problem.types[i] = pp.Real(v.lower_bound, v.upper_bound)
             elif v.variable_type == "discrete":
                 #Select a subset of one of the available options
@@ -78,23 +83,36 @@ def __init__(self, domain: Domain):
             else:
                 raise DomainError(f'{v.variable_type} is not a valid variable type.')
 
-    def _optimize(self, objective, **kwargs):
+        #Set up constraints
+        self.problem.constraints[:] = "<=0"
+
+
+    def _optimize(self, models, **kwargs):
+        input_columns = [v.name for  v in self.domain.variables if not v.is_objective]
+        output_columns = [v.name for  v in self.domain.variables if v.is_objective]
         def problem_wrapper(X):
-            np.atleast_2d(X)
-            result = objective(X)
-            return result.tolist()
+            X = DataSet(np.atleast_2d(X), 
+                        columns=input_columns)
+            result = models.predict(X)
+
+            constraint_res = [X.eval(c.expression, resolvers=[X]) 
+                             for c in self.domain.constraints]
+            constraint_res = [c.tolist()[0] for c in constraint_res]
 
+            return result[0, :].tolist(), constraint_res
+        
+        #Run optimization
         self.problem.function = problem_wrapper
-        self.problem.directions[:] = pp.Problem.MAXIMIZE
         algorithm = pp.NSGAII(self.problem)
         iterations = kwargs.get('iterations', 10000)
         algorithm.run(iterations)
+        
         x = [[s.variables[i] for i in range(self.domain.num_variables())]
              for s in algorithm.result]
-        x = np.array(x)
+        x = DataSet(x, columns = input_columns)
         y =[[s.objectives[i] for i in range(len(self.domain.output_variables))]
             for s in algorithm.result]
-        y = np.array(y)
+        y = DataSet(y, columns=output_columns)
         return OptimizeResult(x=x, fun=y, success=True)
 
 class MCOptimizer(Optimizer):
diff --git a/summit/strategies.py b/summit/strategies.py
index 035e7f81..dd112cda 100644
--- a/summit/strategies.py
+++ b/summit/strategies.py
@@ -1,13 +1,13 @@
 from summit.data import DataSet
+from summit.models import ModelGroup
 from summit.domain import Domain, DomainError
 from summit.acquisition import HvI
 from summit.optimizers import NSGAII
+from summit.utils import pareto_efficient
 
 import GPy
 import numpy as np
 
-import warnings
-import logging
 
 class Strategy:
     def __init__(self, domain:Domain):
@@ -70,10 +70,6 @@ class TSEMO2(Strategy):
     maximize: bool, optional
         Whether optimization should be treated as a maximization or minimization problem.
         Defaults to maximization. 
-    acquisition: summit.acquistion.Acquisition, optional
-        The acquisition function used to select the next set of points from the pareto front
-        (see optimizer).  Defaults to hypervolume improvement with the reference point set 
-        as the upper bounds of the outputs in the specified domain and random rate 0.0
     optimizer: summit.optimizers.Optimizer, optional
         The internal optimizer for estimating the pareto front prior to maximization
         of the acquisition function. By default, NSGAII will be used if there is a combination
@@ -105,81 +101,122 @@ class TSEMO2(Strategy):
                                         normalize_inputs=True)
  
     ''' 
-    def __init__(self, domain, models, acquisition=None, optimizer=None):
+    def __init__(self, domain, models, optimizer=None, **kwargs):
         Strategy.__init__(self, domain)
-        self.models = models
-        if acquisition is None:
-            reference = [v.upper_bound for v in self.domain.output_variables]
-            self.acquisition = HvI(reference, random_rate=0.0)   
-        else:
-            self.acquisition = acquisition
+
+        if isinstance(models, ModelGroup):
+            self.models = models
+        elif isinstance(models, dict):
+            self.models = ModelGroup(models)
+        else: 
+            raise TypeError('models must be a ModelGroup or a dictionary of models.')
+
         if not optimizer:
             self.optimizer = NSGAII(self.domain)
         else:
             self.optimizer = optimizer
 
-    def generate_experiments(self, previous_results: DataSet, num_experiments, 
-                             normalize_inputs=False, no_repeats=True, maximize=True):
-        #Get inputs and outputs + standardize if needed
+        self._reference = kwargs.get('reference', [0,0])
+        self._random_rate = kwargs.get('random_rate', 0.0)
+
+    def generate_experiments(self, previous_results: DataSet, num_experiments):
+        #Get inputs and outputs
         inputs, outputs = self.get_inputs_outputs(previous_results)
-        if normalize_inputs:
-            self.x = inputs.standardize() #TODO: get this to work for discrete variables
+        
+        #Fit models to new data
+        self.models.fit(inputs, outputs)
+
+        internal_res = self.optimizer.optimize(self.models)
+
+        hv_imp, indices = self.select_max_hvi(outputs, internal_res.fun, num_experiments)
+        result = internal_res.x.join(internal_res.fun)
+        
+        return result.iloc[indices, :]
+
+    def select_max_hvi(self, y, samples, num_evals=1):
+        '''  Returns the point(s) that maximimize hypervolume improvement 
+        
+        Parameters
+        ---------- 
+        samples: np.ndarray
+             The samples on which hypervolume improvement is calculated
+        num_evals: `int`
+            The number of points to return (with top hypervolume improvement)
+        
+        Returns
+        -------
+        hv_imp, index
+            Returns a tuple with lists of the best hypervolume improvement
+            and the indices of the corresponding points in samples       
+        
+        ''' 
+        #Get the reference point, r
+        # r = self._reference + 0.01*(np.max(samples, axis=0)-np.min(samples, axis=0)) 
+        r = self._reference
+
+        #Set up maximization and minimization
+        for v in self.domain.variables:
+            if v.is_objective and v.maximize:
+                y[v.name] = -1.0 * y[v.name]
+                samples[v.name] = -1.0 * samples[v.name]
+        
+        Ynew = y.data_to_numpy()
+        samples = samples.data_to_numpy()
+        index = []
+        n = samples.shape[1]
+        mask = np.ones(samples.shape[0], dtype=bool)
+
+        #Set up random selection
+        if not (self._random_rate <=1.) | (self._random_rate >=0.):
+            raise ValueError('Random Rate must be between 0 and 1.')
+
+        if self._random_rate>0:
+            num_random = round(self._random_rate*num_evals)
+            random_selects = np.random.randint(0, num_evals, size=num_random)
         else:
-            self.x = inputs.data_to_numpy()
-
-        self.y = outputs.data_to_numpy()
-
-        #Update surrogate models with new data
-        for i, model in enumerate(self.models):
-            Y = self.y[:, i]
-            Y = np.atleast_2d(Y).T
-            logging.debug(f'Fitting model {i+1}')
-            model.fit(self.x, Y, num_restarts=3, max_iters=100,parallel=True)
-
-        logging.debug("Running internal optimization")
-
-        #If the domain consists of one descriptors variables, evaluate every candidate
-        check_descriptors = [True if v.variable_type =='descriptors' else False 
-                             for v in self.domain.input_variables]
-        if all(check_descriptors) and len(check_descriptors)==1:
-            descriptor_arr = self.domain.variables[0].ds.data_to_numpy()
-            if no_repeats:
-                points = self._mask_previous_points(self.x, descriptor_arr)
+            random_selects = np.array([])
+        
+        for i in range(num_evals):
+            masked_samples = samples[mask, :]
+            Yfront, _ = pareto_efficient(Ynew, maximize=True)
+            if len(Yfront) == 0:
+                raise ValueError('Pareto front length too short')
+
+            hv_improvement = []
+            hvY = HvI.hypervolume(Yfront, [0, 0])
+            #Determine hypervolume improvement by including
+            #each point from samples (masking previously selected poonts)
+            for sample in masked_samples:
+                sample = sample.reshape(1,n)
+                A = np.append(Ynew, sample, axis=0)
+                Afront, _ = pareto_efficient(A, maximize=True)
+                hv = HvI.hypervolume(Afront, [0,0])
+                hv_improvement.append(hv-hvY)
+            
+            hvY0 = hvY if i==0 else hvY0
+
+            if i in random_selects:
+                masked_index = np.random.randint(0, masked_samples.shape[0])
             else:
-                points = self.x
-            predictions = np.zeros([points.shape[0], len(self.models)])
-            for i, model in enumerate(self.models):
-                predictions[:, i] = model.predict(points)[0][:,0]
+                #Choose the point that maximizes hypervolume improvement
+                masked_index = hv_improvement.index(max(hv_improvement))
             
-            self.acquisition.data = self.y
-            self.acq_vals, indices = self.acquisition.select_max(predictions, num_evals=num_experiments)
-            indices = [np.where((descriptor_arr == points[ix]).all(axis=1))[0][0]
-                       for ix in indices]
-            result = self.domain.variables[0].ds.iloc[indices, :]
-        #Else use modified nsgaII
+            samples_index = np.where((samples == masked_samples[masked_index, :]).all(axis=1))[0][0]
+            new_point = samples[samples_index, :].reshape(1, n)
+            Ynew = np.append(Ynew, new_point, axis=0)
+            mask[samples_index] = False
+            index.append(samples_index)
+
+        if len(hv_improvement)==0:
+            hv_imp = 0
+        elif len(index) == 0:
+            index = []
+            hv_imp = 0
         else:
-            def problem(x):
-                x = np.array(x)
-                x = np.atleast_2d(x)
-                y = [model.predict(x)
-                     for model in self.models]
-                y = np.array([yo[0,0] for yo in y])
-                return y
-            int_result = self.optimizer.optimize(problem)
-            self.acquisition.data = self.y
-            self.acq_vals, indices = self.acquisition.select_max(int_result.fun, 
-                                                                 num_evals=num_experiments)
-            result = int_result.x[indices, :]
-        return result
-
-    def _mask_previous_points(self, x, descriptor_arr):
-        descriptor_mask = np.ones(descriptor_arr.shape[0], dtype=bool)
-        for point in x:
-            try: 
-                index = np.where(descriptor_arr==point)[0][0]
-                descriptor_mask[index] = False
-            except IndexError:
-                continue
-        return descriptor_arr[descriptor_mask, :]
+            #Total hypervolume improvement
+            #Includes all points added to batch (hvY + last hv_improvement)
+            #Subtracts hypervolume without any points added (hvY0)
+            hv_imp = hv_improvement[masked_index] + hvY-hvY0
+        return hv_imp, index
 
         
\ No newline at end of file
diff --git a/summit/utils.py b/summit/utils.py
new file mode 100644
index 00000000..9c31a9d9
--- /dev/null
+++ b/summit/utils.py
@@ -0,0 +1,24 @@
+import numpy as np
+
+def pareto_efficient(costs, maximize=True):
+    """
+    Find the pareto-efficient points
+    :param costs: An (n_points, n_costs) array
+
+
+    """
+    original_costs = costs
+    is_efficient = np.arange(costs.shape[0])
+    n_points = costs.shape[0]
+    next_point_index = 0  # Next index in the is_efficient array to search for
+    while next_point_index<len(costs):
+        if maximize:
+            nondominated_point_mask = np.any(costs>costs[next_point_index], axis=1)
+        else:
+            nondominated_point_mask = np.any(costs<costs[next_point_index], axis=1)
+        nondominated_point_mask[next_point_index] = True
+        is_efficient = is_efficient[nondominated_point_mask]  # Remove dominated points
+        costs = costs[nondominated_point_mask]
+        next_point_index = np.sum(nondominated_point_mask[:next_point_index])+1
+
+    return  costs, is_efficient  
\ No newline at end of file