-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2035 from NNPDF/Jet_commondata
Polarised Jet commondata implementation
- Loading branch information
Showing
173 changed files
with
27,704 additions
and
88 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
import numpy as np | ||
from numpy.linalg import eig | ||
|
||
|
||
def upper_triangular_to_symmetric(ut, dim): | ||
"""Build a symmetric matrix from the upper diagonal""" | ||
corr = np.zeros((dim, dim)) | ||
last = dim | ||
first = 0 | ||
for i in range(dim): | ||
corr[i, i:] = ut[first:last] | ||
last += dim - i - 1 | ||
first += dim - i | ||
return corr | ||
|
||
|
||
def compute_covmat(corrmat: np.ndarray, unc: np.ndarray, ndata: int) -> list: | ||
"""Compute the covariance matrix with the artificial stat uncertainties.""" | ||
# multiply by stat err | ||
cov_mat = np.einsum("i,ij,j->ij", unc, corrmat, unc) | ||
return covmat_to_artunc(ndata, cov_mat.flatten().tolist()) | ||
|
||
|
||
def covmat_to_artunc(ndata, covmat_list, no_of_norm_mat=0): | ||
r"""Convert the covariance matrix to a matrix of | ||
artificial uncertainties. | ||
NOTE: This function has been taken from validphys.newcommondata_utils. | ||
If those utils get merged in the future, we can replace this. | ||
Parameters | ||
---------- | ||
ndata : integer | ||
Number of data points | ||
covmat_list : list | ||
A one dimensional list which contains the elements of | ||
the covariance matrix row by row. Since experimental | ||
datasets provide these matrices in a list form, this | ||
simplifies the implementation for the user. | ||
no_of_norm_mat : int | ||
Normalized covariance matrices may have an eigenvalue | ||
of 0 due to the last data point not being linearly | ||
independent. To allow for this, the user should input | ||
the number of normalized matrices that are being treated | ||
in an instance. For example, if a single covariance matrix | ||
of a normalized distribution is being processed, the input | ||
would be 1. If a covariance matrix contains pertains to | ||
3 normalized datasets (i.e. cross covmat for 3 | ||
distributions), the input would be 3. The default value is | ||
0 for when the covariance matrix pertains to an absolute | ||
distribution. | ||
Returns | ||
------- | ||
artunc : list | ||
A two dimensional matrix (given as a list of lists) | ||
which contains artificial uncertainties to be added | ||
to the commondata. i^th row (or list) contains the | ||
artificial uncertainties of the i^th data point. | ||
""" | ||
epsilon = -0.0000000001 | ||
neg_eval_count = 0 | ||
psd_check = True | ||
covmat = np.zeros((ndata, ndata)) | ||
artunc = np.zeros((ndata, ndata)) | ||
for i in range(len(covmat_list)): | ||
a = i // ndata | ||
b = i % ndata | ||
covmat[a][b] = covmat_list[i] | ||
eigval, eigvec = eig(covmat) | ||
for j in range(len(eigval)): | ||
if eigval[j] < epsilon: | ||
psd_check = False | ||
elif eigval[j] > epsilon and eigval[j] <= 0: | ||
neg_eval_count = neg_eval_count + 1 | ||
if neg_eval_count == (no_of_norm_mat + 1): | ||
psd_check = False | ||
elif eigval[j] > 0: | ||
continue | ||
if psd_check == False: | ||
raise ValueError("The covariance matrix is not positive-semidefinite") | ||
else: | ||
for i in range(ndata): | ||
for j in range(ndata): | ||
if eigval[j] < 0: | ||
continue | ||
else: | ||
artunc[i][j] = eigvec[i][j] * np.sqrt(eigval[j]) | ||
return artunc.tolist() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
|
||
import numpy as np | ||
|
||
def symmetrize_errors(delta_plus, delta_minus): | ||
r"""Compute the symmetrized uncertainty and the shift in data point. | ||
Parameters | ||
---------- | ||
delta_plus : float | ||
The top/plus uncertainty with sign | ||
delta_minus : float | ||
The bottom/minus uncertainty with sign | ||
Returns | ||
------- | ||
se_delta : float | ||
The value to be added to the data point | ||
se_sigma : float | ||
The symmetrized uncertainty to be used in commondata | ||
""" | ||
semi_diff = (delta_plus + delta_minus) / 2 | ||
average = (delta_plus - delta_minus) / 2 | ||
se_delta = semi_diff | ||
se_sigma = np.sqrt(average * average + 2 * semi_diff * semi_diff) | ||
return se_delta, se_sigma | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
7 changes: 7 additions & 0 deletions
7
nnpdf_data/nnpdf_data/new_commondata/PHENIX_1JET_200GEV/data.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
data_central: | ||
- -0.0014 | ||
- -0.0005 | ||
- 0.0058 | ||
- 0.0034 | ||
- 0.0077 | ||
- -0.0181 |
101 changes: 101 additions & 0 deletions
101
nnpdf_data/nnpdf_data/new_commondata/PHENIX_1JET_200GEV/filter.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
import pandas as pd | ||
import yaml | ||
|
||
POL_UNC = 0.094 | ||
|
||
|
||
def read_data(): | ||
df = pd.DataFrame() | ||
|
||
with open("rawdata/Table4.yaml", "r") as file: | ||
data = yaml.safe_load(file) | ||
|
||
pTbsub = data["independent_variables"][0]["values"] | ||
pTsub = data["dependent_variables"][0]["values"] | ||
ALLsub = data["dependent_variables"][1]["values"] | ||
|
||
for i in range(len(ALLsub)): | ||
df = pd.concat( | ||
[ | ||
df, | ||
pd.DataFrame( | ||
{ | ||
"pT": [pTsub[i]["value"]], | ||
"pTmin": [pTbsub[i]["low"]], | ||
"pTmax": [pTbsub[i]["high"]], | ||
"eta": [0.0], | ||
"eta_min": [-0.35], | ||
"eta_max": [0.35], | ||
"sqrts": [200], | ||
"ALL": [ALLsub[i]["value"]], | ||
"stat": [ALLsub[i]["errors"][0]["symerror"]], | ||
} | ||
), | ||
], | ||
ignore_index=True, | ||
) | ||
|
||
df["pol"] = POL_UNC * abs(df["ALL"]) | ||
return df | ||
|
||
|
||
def write_data(df): | ||
data_central = [] | ||
for i in range(len(df["ALL"])): | ||
data_central.append(float(df.loc[i, "ALL"])) | ||
|
||
data_central_yaml = {"data_central": data_central} | ||
with open("data.yaml", "w") as file: | ||
yaml.dump(data_central_yaml, file, sort_keys=False) | ||
|
||
# Write kin file | ||
kin = [] | ||
for i in range(len(df["ALL"])): | ||
kin_value = { | ||
"pT": { | ||
"min": float(df.loc[i, "pTmin"]), | ||
"mid": float(df.loc[i, "pT"]), | ||
"max": float(df.loc[i, "pTmax"]), | ||
}, | ||
"sqrts": {"min": None, "mid": float(df.loc[i, "sqrts"]), "max": None}, | ||
"eta": { | ||
"min": float(df.loc[i, "eta_min"]), | ||
"mid": float(df.loc[i, "eta"]), | ||
"max": float(df.loc[i, "eta_max"]), | ||
}, | ||
} | ||
kin.append(kin_value) | ||
|
||
kinematics_yaml = {"bins": kin} | ||
|
||
with open("kinematics.yaml", "w") as file: | ||
yaml.dump(kinematics_yaml, file, sort_keys=False) | ||
|
||
# Write unc file | ||
error = [] | ||
for i in range(len(df)): | ||
e = {"stat": float(df.loc[i, "stat"]), "pol": float(df.loc[i, "pol"])} | ||
error.append(e) | ||
|
||
error_definition = { | ||
"stat": { | ||
"description": "statistical uncertainty", | ||
"treatment": "ADD", | ||
"type": "UNCORR", | ||
}, | ||
"pol": { | ||
"description": "beam polarization uncertainty", | ||
"treatment": "MULT", | ||
"type": "RHIC2005POL", | ||
}, | ||
} | ||
|
||
uncertainties_yaml = {"definitions": error_definition, "bins": error} | ||
|
||
with open("uncertainties.yaml", "w") as file: | ||
yaml.dump(uncertainties_yaml, file, sort_keys=False) | ||
|
||
|
||
if __name__ == "__main__": | ||
df = read_data() | ||
write_data(df) |
Oops, something went wrong.