-
Notifications
You must be signed in to change notification settings - Fork 393
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #756 from cms-analysis/rebase_548_onto_112x
Cherry-pick #548 merge into 112x
- Loading branch information
Showing
5 changed files
with
225 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
imax 1 | ||
jmax 1 | ||
kmax * | ||
-------------------------------------------------------------------------------- | ||
shapes * * simple-shapes-df_input.csv $CHANNEL:$PROCESS:nominal,sum_w:sum_ww $CHANNEL:$PROCESS:$SYSTEMATIC,sum_w:sum_ww | ||
-------------------------------------------------------------------------------- | ||
bin bin1 | ||
observation 85 | ||
-------------------------------------------------------------------------------- | ||
bin bin1 bin1 | ||
process signal background | ||
process 0 1 | ||
rate 10 100 | ||
-------------------------------------------------------------------------------- | ||
lumi lnN 1.10 1.0 | ||
bgnorm lnN 1.00 1.3 | ||
alpha shapeN2 - 1 uncertainty on background shape and normalization | ||
sigma shapeN2 0.5 - uncertainty on signal resolution. Assume the histogram is a 2 sigma shift, | ||
# so divide the unit gaussian by 2 before doing the interpolationkk | ||
-------------------------------------------------------------------------------- |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
channel,process,systematic,bin,sum_w,sum_ww | ||
bin1,background,alphaDown,0.0,30.224781036376953,30.224781036376957 | ||
bin1,background,alphaDown,1.0,20.260276794433594,20.26027679443359 | ||
bin1,background,alphaDown,2.0,13.580870628356934,13.580870628356934 | ||
bin1,background,alphaDown,3.0,9.103529930114746,9.103529930114746 | ||
bin1,background,alphaDown,4.0,6.102278709411621,6.102278709411621 | ||
bin1,background,alphaDown,5.0,4.090479373931885,4.090479373931884 | ||
bin1,background,alphaDown,6.0,2.7419304847717285,2.741930484771729 | ||
bin1,background,alphaDown,7.0,1.8379708528518677,1.837970852851868 | ||
bin1,background,alphaDown,8.0,1.2320287227630615,1.2320287227630617 | ||
bin1,background,alphaDown,9.0,0.8258535861968994,0.8258535861968994 | ||
bin1,background,alphaDown,10.0,0.0,0.0 | ||
bin1,background,alphaUp,0.0,24.10872459411621,24.108724594116207 | ||
bin1,background,alphaUp,1.0,19.738554000854492,19.738554000854492 | ||
bin1,background,alphaUp,2.0,16.16056251525879,16.16056251525879 | ||
bin1,background,alphaUp,3.0,13.231148719787598,13.231148719787598 | ||
bin1,background,alphaUp,4.0,10.832748413085938,10.832748413085938 | ||
bin1,background,alphaUp,5.0,8.869104385375977,8.869104385375975 | ||
bin1,background,alphaUp,6.0,7.26140832901001,7.261408329010011 | ||
bin1,background,alphaUp,7.0,5.945138454437256,5.945138454437256 | ||
bin1,background,alphaUp,8.0,4.867467403411865,4.867467403411864 | ||
bin1,background,alphaUp,9.0,3.985145330429077,3.985145330429077 | ||
bin1,background,alphaUp,10.0,0.0,0.0 | ||
bin1,background,nominal,0.0,27.27617835998535,27.276178359985355 | ||
bin1,background,nominal,1.0,20.20669174194336,20.206691741943363 | ||
bin1,background,nominal,2.0,14.96948528289795,14.969485282897947 | ||
bin1,background,nominal,3.0,11.089667320251465,11.089667320251465 | ||
bin1,background,nominal,4.0,8.21542739868164,8.21542739868164 | ||
bin1,background,nominal,5.0,6.0861382484436035,6.086138248443604 | ||
bin1,background,nominal,6.0,4.508721828460693,4.508721828460693 | ||
bin1,background,nominal,7.0,3.3401434421539307,3.3401434421539307 | ||
bin1,background,nominal,8.0,2.4744391441345215,2.474439144134521 | ||
bin1,background,nominal,9.0,1.8331094980239868,1.833109498023987 | ||
bin1,background,nominal,10.0,0.0,0.0 | ||
bin1,data_obs,nominal,0.0,22.0,0.0 | ||
bin1,data_obs,nominal,1.0,14.0,14.0 | ||
bin1,data_obs,nominal,2.0,16.0,16.0 | ||
bin1,data_obs,nominal,3.0,13.0,12.999999999999998 | ||
bin1,data_obs,nominal,4.0,5.0,5.000000000000001 | ||
bin1,data_obs,nominal,5.0,5.0,5.000000000000001 | ||
bin1,data_obs,nominal,6.0,5.0,5.000000000000001 | ||
bin1,data_obs,nominal,7.0,3.0,2.9999999999999996 | ||
bin1,data_obs,nominal,8.0,2.0,2.0000000000000004 | ||
bin1,data_obs,nominal,9.0,0.0,0.0 | ||
bin1,data_obs,nominal,10.0,0.0,0.0 | ||
bin1,data_sig,nominal,0.0,30.0,0.0 | ||
bin1,data_sig,nominal,1.0,13.0,12.999999999999998 | ||
bin1,data_sig,nominal,2.0,13.0,12.999999999999998 | ||
bin1,data_sig,nominal,3.0,10.0,10.000000000000002 | ||
bin1,data_sig,nominal,4.0,11.0,11.0 | ||
bin1,data_sig,nominal,5.0,13.0,12.999999999999998 | ||
bin1,data_sig,nominal,6.0,6.0,5.999999999999999 | ||
bin1,data_sig,nominal,7.0,6.0,5.999999999999999 | ||
bin1,data_sig,nominal,8.0,3.0,2.9999999999999996 | ||
bin1,data_sig,nominal,9.0,2.0,2.0000000000000004 | ||
bin1,data_sig,nominal,10.0,0.0,0.0 | ||
bin1,signal,nominal,0.0,1.0769933851406677e-06,1.0769933851406677e-06 | ||
bin1,signal,nominal,1.0,0.0001598399830982089,0.00015983998309820893 | ||
bin1,signal,nominal,2.0,0.008726967498660088,0.008726967498660088 | ||
bin1,signal,nominal,3.0,0.17528583109378815,0.17528583109378812 | ||
bin1,signal,nominal,4.0,1.295196771621704,1.295196771621704 | ||
bin1,signal,nominal,5.0,3.520709991455078,3.5207099914550777 | ||
bin1,signal,nominal,6.0,3.520709991455078,3.5207099914550777 | ||
bin1,signal,nominal,7.0,1.295196771621704,1.295196771621704 | ||
bin1,signal,nominal,8.0,0.17528583109378815,0.17528583109378812 | ||
bin1,signal,nominal,9.0,0.008726967498660088,0.008726967498660088 | ||
bin1,signal,nominal,10.0,0.0,0.0 | ||
bin1,signal,sigmaDown,0.0,2.240517341968451e-13,2.2405173419684507e-13 | ||
bin1,signal,sigmaDown,1.0,6.052358614283548e-09,6.052358614283548e-09 | ||
bin1,signal,sigmaDown,2.0,2.124152706528548e-05,2.1241527065285485e-05 | ||
bin1,signal,sigmaDown,3.0,0.009685711935162544,0.009685711935162543 | ||
bin1,signal,sigmaDown,4.0,0.5738020539283752,0.5738020539283751 | ||
bin1,signal,sigmaDown,5.0,4.4164910316467285,4.4164910316467285 | ||
bin1,signal,sigmaDown,6.0,4.4164910316467285,4.4164910316467285 | ||
bin1,signal,sigmaDown,7.0,0.5738020539283752,0.5738020539283751 | ||
bin1,signal,sigmaDown,8.0,0.009685711935162544,0.009685711935162543 | ||
bin1,signal,sigmaDown,9.0,2.124152706528548e-05,2.1241527065285485e-05 | ||
bin1,signal,sigmaDown,10.0,0.0,0.0 | ||
bin1,signal,sigmaUp,0.0,0.006812803912907839,0.00681280391290784 | ||
bin1,signal,sigmaUp,1.0,0.048034943640232086,0.048034943640232086 | ||
bin1,signal,sigmaUp,2.0,0.22916190326213837,0.22916190326213834 | ||
bin1,signal,sigmaUp,3.0,0.739743709564209,0.739743709564209 | ||
bin1,signal,sigmaUp,4.0,1.6157487630844116,1.6157487630844116 | ||
bin1,signal,sigmaUp,5.0,2.3879218101501465,2.3879218101501465 | ||
bin1,signal,sigmaUp,6.0,2.3879218101501465,2.3879218101501465 | ||
bin1,signal,sigmaUp,7.0,1.6157487630844116,1.6157487630844116 | ||
bin1,signal,sigmaUp,8.0,0.739743709564209,0.739743709564209 | ||
bin1,signal,sigmaUp,9.0,0.22916190326213837,0.22916190326213834 | ||
bin1,signal,sigmaUp,10.0,0.0,0.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
import numpy as np | ||
import pandas as pd | ||
import ROOT | ||
|
||
class DataFrameWrapper(object): | ||
th1_class = "TH1F" | ||
def __init__(self, path, ext, load=True): | ||
""" | ||
path: file system path to the dataframe on disk | ||
ext: file extension used to interpret the file based on pandas IO | ||
load: skip loading the dataframe if False | ||
""" | ||
self.path = path | ||
self.ext = ext | ||
|
||
self.read_args = [] | ||
self.read_kwargs = {} | ||
if load: | ||
self.df = self.load_dataframe() | ||
|
||
def load_dataframe(self): | ||
""" | ||
Use pandas IO tools to load a dataframe from any of the following: | ||
["csv", "json", "html", "pkl", "xlsx", "h5", "parquet"] | ||
""" | ||
# Index all columns apart from the last 2 (taken as sum_w and sum_ww) | ||
# for csv, json, html and xlsx | ||
if self.ext == ".csv": | ||
df = pd.read_csv(self.path, *self.read_args, **self.read_kwargs) | ||
return df.set_index(df.columns.tolist()[:-2]) | ||
elif self.ext == ".json": | ||
df = pd.read_json(self.path, *self.read_args, **self.read_kwargs) | ||
return df.set_index(df.columns.tolist()[:-2]) | ||
elif self.ext == ".html": | ||
df = pd.read_html(self.path, *self.read_args, **self.read_kwargs) | ||
return df.set_index(df.columns.tolist()[:-2]) | ||
elif self.ext == ".pkl": | ||
return pd.read_pickle(self.path, *self.read_args, **self.read_kwargs) | ||
elif self.ext == ".xlsx": | ||
if ":" in self.path: | ||
filepath, sheetname = self.path.split(":") | ||
else: | ||
filepath, sheetname = self.path, 0 | ||
|
||
# read in columns first | ||
cols = pd.read_excel( | ||
self.path, sheetname, *self.read_args, **self.read_kwargs | ||
).columns.tolist() | ||
return pd.read_excel( | ||
self.path, sheetname, index_col=list(range(len(cols)-2)), | ||
*self.read_args, **self.read_kwargs | ||
) | ||
elif self.ext == ".h5": | ||
filepath, internalpath = self.path.split(":") | ||
return pd.read_hdf(filepath, internalpath, *self.read_args, **self.read_kwargs) | ||
elif self.ext == ".parquet": | ||
return pd.read_parquet(self.path, *self.read_args, **self.read_kwargs) | ||
return None | ||
|
||
def Get(self, object_name): | ||
""" | ||
Mimic ROOT file Get function to return a ROOT.TH1. | ||
object_name: dataframe index/columns selection split by ":" (used in .loc) | ||
e.g. "125:bin1:signal:sigmaUp,event_count:event_variance" to select | ||
the index (125, bin1, signal, sigmaUp) and columns (event_count, | ||
event_variance) | ||
""" | ||
if not hasattr(self, 'df'): | ||
raise AttributeError("Dataframe has not been loaded") | ||
|
||
index_labels, column_labels = object_name.split(",") | ||
|
||
# Try to cast index_labels into self.df.index dtypes. Users can only | ||
# input index_labels as a string, but the dataframe might have other | ||
# dtypes (e.g. int, float, ...) | ||
selection = [ | ||
np.array([index_labels.split(":")[idx]]).astype( | ||
self.df.index.get_level_values(idx).dtype | ||
)[0] for idx in range(len(index_labels.split(":"))) | ||
] | ||
|
||
df_hist = self.df.loc[tuple(selection), column_labels.split(":")] | ||
|
||
# index name used for th1 name | ||
df_hist.index.names = [index_labels.replace(":","_")] | ||
return self.convert_to_th1(df_hist, self.th1_class) | ||
|
||
@staticmethod | ||
def convert_to_th1(df, th1_class): | ||
""" | ||
Receive a dataframe and convert it to a TH1. Index is taken as the | ||
binning for labelling. Last bin is overflow. | ||
""" | ||
name = df.index.names[0] | ||
nbins = df.shape[0]-1 | ||
th1 = getattr(ROOT, th1_class)(name, name, nbins, 0., float(nbins)) | ||
for i in range(nbins+1): | ||
sum_w, sum_ww = df.iloc[i] | ||
th1.SetBinContent(i+1, sum_w) | ||
th1.SetBinError(i+1, np.sqrt(sum_ww)) | ||
return th1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters