idaholab · Jimmy-INL · Sep 27, 2017 · Oct 3, 2017 · Oct 4, 2017 · Oct 5, 2017
diff --git a/doc/user_manual/PostProcessors/Validation.tex b/doc/user_manual/PostProcessors/Validation.tex
diff --git a/doc/user_manual/raven_user_manual.bib b/doc/user_manual/raven_user_manual.bib
@@ -112,3 +112,50 @@ @TechReport{RAVENtheoryManual
   year = {2016},
   key = {INL/EXT-16-38178}
 }
+
+@book{Gandini,
+	title={Uncertainty analysis and experimental data transposition methods based on perturbation theory},
+	author={Gandini, A},
+	journal={Uncertainty Analysis},
+	pages={217--258},
+	year={1988},
+	publisher={CRC Press, Boca Raton, Fla, USA}
+}
+
+@article{palmiotti1,
+	title={A global approach to the physics validation of simulation codes for future nuclear systems},
+	author={Palmiotti, Giuseppe and Salvatores, Massimo and Aliberti, Gerardo and Hiruta, Hikarui and McKnight, R and Oblozinsky, P and Yang, WS},
+	journal={Annals of Nuclear Energy},
+	volume={36},
+	number={3},
+	pages={355--361},
+	year={2009},
+	publisher={Elsevier}
+}
+
+@article{palmiotti2,
+	title={The role of experiments and of sensitivity analysis in simulation validation strategies with emphasis on reactor physics},
+	author={Palmiotti, Giuseppe and Salvatores, Massimo},
+	journal={Annals of Nuclear Energy},
+	volume={52},
+	pages={10--21},
+	year={2013},
+	publisher={Elsevier}
+}
+
+@article{Epiney1,
+	title={A Systematic Approach to Inform Experiment Design Through Modern Modeling and Simulation Methods},
+	author={Epiney, A and Rabiti, C and Davis, C},
+	journal={Proc. 18th Int. Topl. Mtg. on Nuclear Reactor Thermal Hydraulics (NURETH-18)},
+	year={2019}
+}
+
+@inproceedings{Epiney2,
+	title={Representativity Analysis Applied to TREAT Water Loop LOCA Experiment Design},
+	author={Epiney, Aaron S and Woolstenhulme, Nicolas},
+	booktitle={International Conference on Nuclear Engineering},
+	volume={83785},
+	pages={V003T13A055},
+	year={2020},
+	organization={American Society of Mechanical Engineers}
+}
diff --git a/ravenframework/Models/PostProcessors/BasicStatistics.py b/ravenframework/Models/PostProcessors/BasicStatistics.py
@@ -27,13 +27,13 @@
 #External Modules End-----------------------------------------------------------
 
 #Internal Modules---------------------------------------------------------------
-from .PostProcessorInterface import PostProcessorInterface
+from .PostProcessorReadyInterface import PostProcessorReadyInterface
 from ...utils import utils
 from ...utils import InputData, InputTypes
 from ...utils import mathUtils
 #Internal Modules End-----------------------------------------------------------
 
-class BasicStatistics(PostProcessorInterface):
+class BasicStatistics(PostProcessorReadyInterface):
   """
     BasicStatistics filter class. It computes all the most popular statistics
   """
@@ -163,104 +163,47 @@ def __init__(self):
     self.sampleSize     = None # number of sample size
     self.calculations   = {}
     self.validDataType  = ['PointSet', 'HistorySet', 'DataSet'] # The list of accepted types of DataObject
+    self.inputDataObjectName = None # name for input data object
+    self.setInputDataType('xrDataset')
 
-  def inputToInternal(self, currentInp):
+  def inputToInternal(self, inputIn):
     """
-      Method to convert an input object into the internal format that is
+      Method to select corresponding data from Data Objects and normalize the ProbabilityWeight of corresponding data
       understandable by this pp.
-      @ In, currentInp, object, an object that needs to be converted
+      @ In, inputIn, dict, a dictionary that contains the input Data Object information
       @ Out, (inputDataset, pbWeights), tuple, the dataset of inputs and the corresponding variable probability weight
     """
-    # The BasicStatistics postprocessor only accept DataObjects
-    if self.dynamic is None:
-      self.dynamic = False
-    currentInput = currentInp [-1] if type(currentInp) == list else currentInp
-    if len(currentInput) == 0:
-      self.raiseAnError(IOError, "In post-processor " +self.name+" the input "+currentInput.name+" is empty.")
-
+    inpVars, outVars, dataSet = inputIn['Data'][0]
     pbWeights = None
-    if type(currentInput).__name__ == 'tuple':
-      # if tuple we check that we already have a dataset
-      # and store the probability weights
-      if len(currentInput) != 2:
-        self.raiseAnError(RuntimeError, "If tuple is sent in, the dataset and the pb weights must be sent in!")
-      if type(currentInput[0]).__name__ != 'Dataset' or (currentInput[1] is not None and type(currentInput[1]).__name__ != 'Dataset'):
-        self.raiseAnError(RuntimeError, "If tuple is sent in, the elements must be Dataset!")
-      if currentInput[1] is not None and 'ProbabilityWeight' in  currentInput[1]:
-        self.realizationWeight = xr.Dataset()
-        self.realizationWeight['ProbabilityWeight'] =  currentInput[1]['ProbabilityWeight']
-      return currentInput
-    # TODO: convert dict to dataset, I think this will be removed when DataSet is used by other entities that
-    # are currently using this Basic Statisitics PostProcessor.
-    if type(currentInput).__name__ == 'dict':
-      if 'targets' not in currentInput.keys():
-        self.raiseAnError(IOError, 'Did not find targets in the input dictionary')
-      inputDataset = xr.Dataset()
-      for var, val in currentInput['targets'].items():
-        inputDataset[var] = val
-      if 'metadata' in currentInput.keys():
-        metadata = currentInput['metadata']
-        self.pbPresent = True if 'ProbabilityWeight' in metadata else False
-        if self.pbPresent:
-          pbWeights = xr.Dataset()
-          pbWeights['ProbabilityWeight'] =  metadata['ProbabilityWeight']/metadata['ProbabilityWeight'].sum()
-          self.realizationWeight = xr.Dataset()
-          self.realizationWeight['ProbabilityWeight'] = pbWeights['ProbabilityWeight']
-          for target in self.parameters['targets']:
-            pbName = 'ProbabilityWeight-' + target
-            if pbName in metadata:
-              pbWeights[target] = metadata[pbName]/metadata[pbName].sum()
-            elif self.pbPresent:
-              pbWeights[target] = self.realizationWeight['ProbabilityWeight']
-        else:
-          self.raiseAWarning('BasicStatistics postprocessor did not detect ProbabilityWeights! Assuming unit weights instead...')
-      else:
-        self.raiseAWarning('BasicStatistics postprocessor did not detect ProbabilityWeights! Assuming unit weights instead...')
-      if 'RAVEN_sample_ID' not in inputDataset.sizes.keys():
-        self.raiseAWarning('BasicStatisitics postprocessor did not detect RAVEN_sample_ID! Assuming the first dimension of given data...')
-        self.sampleTag = utils.first(inputDataset.sizes.keys())
-      return inputDataset, pbWeights
-
-    if currentInput.type not in ['PointSet','HistorySet']:
-      self.raiseAnError(IOError, self, 'BasicStatistics postprocessor accepts PointSet and HistorySet only! Got ' + currentInput.type)
-
-    # extract all required data from input DataObjects, an input dataset is constructed
-    dataSet = currentInput.asDataset()
     try:
       inputDataset = dataSet[self.parameters['targets']]
     except KeyError:
       missing = [var for var in self.parameters['targets'] if var not in dataSet]
-      self.raiseAnError(KeyError, "Variables: '{}' missing from dataset '{}'!".format(", ".join(missing),currentInput.name))
-    self.sampleTag = currentInput.sampleTag
+      self.raiseAnError(KeyError, "Variables: '{}' missing from dataset '{}'!".format(", ".join(missing),self.inputDataObjectName))
+    self.sampleTag = 'RAVEN_sample_ID'
 
-    if currentInput.type == 'HistorySet':
+    if self.dynamic:
       dims = inputDataset.sizes.keys()
       if self.pivotParameter is None:
-        if len(dims) > 1:
-          self.raiseAnError(IOError, self, 'Time-dependent statistics is requested (HistorySet) but no pivotParameter \
-                got inputted!')
+        self.raiseAnError(IOError, self, 'Time-dependent statistics is requested (HistorySet) but no pivotParameter \
+              got inputted!')
       elif self.pivotParameter not in dims:
         self.raiseAnError(IOError, self, 'Pivot parameter', self.pivotParameter, 'is not the associated index for \
                 requested variables', ','.join(self.parameters['targets']))
-      else:
-        self.dynamic = True
-        if not currentInput.checkIndexAlignment(indexesToCheck=self.pivotParameter):
-          self.raiseAnError(IOError, "The data provided by the data objects", currentInput.name, "is not synchronized!")
-        self.pivotValue = inputDataset[self.pivotParameter].values
-        if self.pivotValue.size != len(inputDataset.groupby(self.pivotParameter)):
-          msg = "Duplicated values were identified in pivot parameter, please use the 'HistorySetSync'" + \
-          " PostProcessor to syncronize your data before running 'BasicStatistics' PostProcessor."
-          self.raiseAnError(IOError, msg)
+      self.pivotValue = dataSet[self.pivotParameter].values
+      if self.pivotValue.size != len(dataSet.groupby(self.pivotParameter)):
+        msg = "Duplicated values were identified in pivot parameter, please use the 'HistorySetSync'" + \
+        " PostProcessor to syncronize your data before running 'BasicStatistics' PostProcessor."
+        self.raiseAnError(IOError, msg)
     # extract all required meta data
-    metaVars = currentInput.getVars('meta')
-    self.pbPresent = True if 'ProbabilityWeight' in metaVars else False
+    self.pbPresent = 'ProbabilityWeight' in dataSet
     if self.pbPresent:
       pbWeights = xr.Dataset()
       self.realizationWeight = dataSet[['ProbabilityWeight']]/dataSet[['ProbabilityWeight']].sum()
       pbWeights['ProbabilityWeight'] = self.realizationWeight['ProbabilityWeight']
       for target in self.parameters['targets']:
         pbName = 'ProbabilityWeight-' + target
-        if pbName in metaVars:
+        if pbName in dataSet:
           pbWeights[target] = dataSet[pbName]/dataSet[pbName].sum()
         elif self.pbPresent:
           pbWeights[target] = self.realizationWeight['ProbabilityWeight']
@@ -269,6 +212,18 @@ def inputToInternal(self, currentInp):
 
     return inputDataset, pbWeights
 
+
+  def resetProbabilityWeight(self, pbWeights):
+    """
+      Reset probability weight using given pbWeights
+      @ In, pbWeights, xr.Dataset, dataset contains probability weights and
+        variable probability weight
+      @ Out, None
+    """
+    if 'ProbabilityWeight' in pbWeights:
+      self.realizationWeight = xr.Dataset()
+      self.realizationWeight['ProbabilityWeight'] =  pbWeights['ProbabilityWeight']
+
   def initialize(self, runInfo, inputs, initDict):
     """
       Method to initialize the BasicStatistic pp. In here the working dir is
@@ -278,6 +233,9 @@ def initialize(self, runInfo, inputs, initDict):
       @ In, initDict, dict, dictionary with initialization options
       @ Out, None
     """
+    if len(inputs)>1:
+      self.raiseAnError(IOError, 'Post-Processor', self.name, 'accepts only one DataObject')
+    self.inputDataObjectName = inputs[-1].name
     #construct a list of all the parameters that have requested values into self.allUsedParams
     self.allUsedParams = set()
     for metricName in self.scalarVals + self.vectorVals:
@@ -295,6 +253,8 @@ def initialize(self, runInfo, inputs, initDict):
     inputObj = inputs[-1] if type(inputs) == list else inputs
     if inputObj.type == 'HistorySet':
       self.dynamic = True
+      if not inputObj.checkIndexAlignment(indexesToCheck=self.pivotParameter):
+        self.raiseAnError(IOError, "The data provided by the input data object is not synchronized!")
     inputMetaKeys = []
     outputMetaKeys = []
     for metric, infos in self.toDo.items():
@@ -1558,6 +1518,21 @@ def spearmanCorrelation(self, featVars, targVars, featSamples, targSamples, pbWe
     da = xr.DataArray(spearmanMat, dims=('targets','features'), coords={'targets':targVars,'features':featVars})
     return da
 
+  def _runLegacy(self, inputIn):
+    """
+      This method executes the postprocessor action with the old data format. In this case, it computes all the requested statistical FOMs
+      @ In,  inputIn, object, object contained the data to process. (inputToInternal output)
+      @ Out, outputSet, xarray.Dataset or dictionary, dataset or dictionary containing the results
+    """
+    if type(inputIn).__name__ == 'PointSet':
+      merged = inputIn.asDataset()
+    elif 'metadata' in inputIn:
+      merged = xr.merge([inputIn['metadata'],inputIn['targets']])
+    else:
+      merged = xr.merge([inputIn['targets']])
+    newInputIn = {'Data':[[None,None,merged]]}
+    return self.run(newInputIn)
+
   def run(self, inputIn):
     """
       This method executes the postprocessor action. In this case, it computes all the requested statistical FOMs

diff --git a/ravenframework/Models/PostProcessors/Factory.py b/ravenframework/Models/PostProcessors/Factory.py
@@ -38,6 +38,7 @@
 from .EconomicRatio import EconomicRatio
 from .ValidationBase import ValidationBase
 from .Validations import Probabilistic
+from .Validations import Representativity
 from .Validations import PPDSS
 from .Validations import PhysicsGuidedCoverageMapping
 from .TSACharacterizer import TSACharacterizer

diff --git a/ravenframework/Models/PostProcessors/LimitSurfaceIntegral.py b/ravenframework/Models/PostProcessors/LimitSurfaceIntegral.py
@@ -255,9 +255,9 @@ def run(self, input):
         else:
           randomMatrix[:, index] = self.variableDist[varName].ppf(randomMatrix[:, index])  # previously used np.vectorize in the calculation, but this is faster with scipy distributions
         tempDict[varName] = randomMatrix[:, index]
-      pb = self.stat.run({'targets':{self.target:xarray.DataArray(self.functionS.evaluate(tempDict)[self.target])}})[self.computationPrefix +"_"+self.target]
+      pb = self.stat._runLegacy({'targets':{self.target:xarray.DataArray(self.functionS.evaluate(tempDict)[self.target], dims=self.sampleTag)}})[self.computationPrefix +"_"+self.target]
       if self.errorModel:
-        boundError = abs(pb-self.stat.run({'targets':{self.target:xarray.DataArray(self.errorModel.evaluate(tempDict)[self.target])}})[self.computationPrefix +"_"+self.target])
+        boundError = abs(pb-self.stat._runLegacy({'targets':{self.target:xarray.DataArray(self.errorModel.evaluate(tempDict)[self.target], dims=self.sampleTag)}})[self.computationPrefix +"_"+self.target])
     else:
       self.raiseAnError(NotImplemented, "quadrature not yet implemented")
     return pb, boundError

diff --git a/ravenframework/Models/PostProcessors/PostProcessorInterface.py b/ravenframework/Models/PostProcessors/PostProcessorInterface.py
@@ -67,6 +67,7 @@ def __init__(self):
     ## One possible solution is all postpocessors return a list of realizations, and we only
     ## use addRealization method to add the collections into the DataObjects
     self.outputMultipleRealizations = False
+    self.sampleTag = 'RAVEN_sample_ID' # raven sample tag used to store data
 
   def _handleInput(self, paramInput):
     """

diff --git a/ravenframework/Models/PostProcessors/SafestPoint.py b/ravenframework/Models/PostProcessors/SafestPoint.py
@@ -334,8 +334,8 @@ def run(self, input):
       rlz[self.outputName][ncLine] = np.prod(probList)
       rlz['ProbabilityWeight'][ncLine] = np.prod(probList)
     metadata = {'ProbabilityWeight':xarray.DataArray(rlz['ProbabilityWeight'])}
-    targets = {tar:xarray.DataArray( rlz[tar])  for tar in self.controllableOrd}
-    rlz['ExpectedSafestPointCoordinates'] = self.stat.run({'metadata':metadata, 'targets':targets})
+    targets = {tar:xarray.DataArray( rlz[tar], dims=self.sampleTag)  for tar in self.controllableOrd}
+    rlz['ExpectedSafestPointCoordinates'] = self.stat._runLegacy({'metadata':metadata, 'targets':targets})
     self.raiseADebug(rlz['ExpectedSafestPointCoordinates'])
     return rlz
 

diff --git a/ravenframework/Models/PostProcessors/SubdomainBasicStatistics.py b/ravenframework/Models/PostProcessors/SubdomainBasicStatistics.py
@@ -21,13 +21,12 @@
 #External Modules End-----------------------------------------------------------
 
 #Internal Modules---------------------------------------------------------------
-from .PostProcessorInterface import PostProcessorInterface
+from .PostProcessorReadyInterface import PostProcessorReadyInterface
 from .BasicStatistics import BasicStatistics
-from ...utils import utils
 from ...utils import InputData, InputTypes
 #Internal Modules End-----------------------------------------------------------
 
-class SubdomainBasicStatistics(PostProcessorInterface):
+class SubdomainBasicStatistics(PostProcessorReadyInterface):
   """
     Subdomain basic statitistics class. It computes all statistics on subdomains
   """
@@ -76,6 +75,9 @@ def __init__(self):
     self.validDataType  = ['PointSet', 'HistorySet', 'DataSet']
     self.outputMultipleRealizations = True
     self.printTag = 'PostProcessor SUBDOMAIN STATISTICS'
+    self.inputDataObjectName = None # name for input data object
+    self.setInputDataType('xrDataset')
+    self.sampleTag = 'RAVEN_sample_ID'
 
   def inputToInternal(self, currentInp):
     """
@@ -88,15 +90,12 @@ def inputToInternal(self, currentInp):
     cellIDs = self.gridEntity.returnCellIdsWithCoordinates()
     dimensionNames =  self.gridEntity.returnParameter('dimensionNames')
     self.dynamic = False
-    currentInput = currentInp [-1] if type(currentInp) == list else currentInp
-    if len(currentInput) == 0:
-      self.raiseAnError(IOError, "In post-processor " +self.name+" the input "+currentInput.name+" is empty.")
-    if currentInput.type not in ['PointSet','HistorySet']:
-      self.raiseAnError(IOError, self, 'This Postprocessor accepts PointSet and HistorySet only! Got ' + currentInput.type)
 
     # extract all required data from input DataObjects, an input dataset is constructed
-    dataSet = currentInput.asDataset()
-    processedDataSet, pbWeights = self.stat.inputToInternal(currentInput)
+    inpVars, outVars, dataSet = currentInp['Data'][0]
+    processedDataSet, pbWeights = self.stat.inputToInternal(currentInp)
+    self.sampleSize = dataSet.sizes[self.sampleTag]
+
     for cellId, verteces in cellIDs.items():
       # create masks
       maskDataset = None
@@ -118,9 +117,9 @@ def inputToInternal(self, currentInp):
       # check if at least sample is available (for scalar quantities) and at least 2 samples for derivative quantities
       setWhat = set(self.stat.what)
       minimumNumberOfSamples = 2 if len(setWhat.intersection(set(self.stat.vectorVals))) > 0 else 1
-      if len(cellDataset[currentInput.sampleTag]) < minimumNumberOfSamples:
+      if self.sampleSize < minimumNumberOfSamples:
         self.raiseAnError(RuntimeError,"Number of samples in cell "
-                          f"{cellId}  < {minimumNumberOfSamples}. Found {len(cellDataset[currentInput.sampleTag])}"
+                          f"{cellId}  < {minimumNumberOfSamples}. Found {self.sampleSize}"
                           " samples within the cell. Please make the evaluation grid coarser or increase number of samples!")
 
       # store datasets
@@ -172,7 +171,8 @@ def run(self, inputIn):
     midPoint = self.gridEntity.returnCellsMidPoints(returnDict=True)
     firstPass = True
     for i, (cellId, data) in enumerate(inputData.items()):
-      cellData = self.stat.inputToInternal(data)
+      cellData = data
+      self.stat.resetProbabilityWeight(data[1])
       res = self.stat._runLocal(cellData)
       for k in res:
         if firstPass:
@@ -185,8 +185,9 @@ def run(self, inputIn):
           results[k][i] =  np.atleast_1d(midPoint[cellId][k])
       firstPass = False
     outputRealization['data'] =  results
+    indexes = inputIn['Data'][0][-1].indexes
     if self.stat.dynamic:
-      dims = dict.fromkeys(results.keys(), inputIn[-1].indexes if type(inputIn) == list else inputIn.indexes)
+      dims = dict.fromkeys(results.keys(), indexes)
       for k in list(midPoint.values())[0]:
         dims[k] = []
       outputRealization['dims'] = dims