Skip to content

Commit

Permalink
move TOPO_DEF to main poldata utils
Browse files Browse the repository at this point in the history
  • Loading branch information
Radonirinaunimi committed Jul 17, 2024
1 parent 6c53d9a commit 7271ab5
Show file tree
Hide file tree
Showing 13 changed files with 47 additions and 105 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,17 @@
import pandas as pd
import yaml

# Definition of various topologies used in Polarized Dijets
# NOTE: the observable is symmetric for jet1 and jet2,
# so 1 and 2 are not ordered in pT.
TOPO_DEF = {
"A": {"abs_eta1_min": 0.3, "abs_eta1_max": 0.9, "abs_eta2_min": 0.3, "abs_eta2_max": 0.9},
"B": {"abs_eta1_min": 0, "abs_eta1_max": 0.3, "abs_eta2_min": 0.3, "abs_eta2_max": 0.9},
"C": {"abs_eta1_min": 0, "abs_eta1_max": 0.3, "abs_eta2_min": 0, "abs_eta2_max": 0.3},
"D": {"abs_eta1_min": 0.3, "abs_eta1_max": 0.9, "abs_eta2_min": 0.3, "abs_eta2_max": 0.9},
"I": {"abs_eta_min": 0, "abs_eta_max": 0.9},
}


def read_central_values(path: Path) -> np.ndarray:
"""Read the central values from the theory predictions.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import numpy as np

from nnpdf_data.filter_utils.eic_utils import (
from nnpdf_data.filter_utils.poldata_utils import (
fluctuate_data,
read_central_values,
read_excel,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import numpy as np

from nnpdf_data.filter_utils.eic_utils import read_excel, write_data
from nnpdf_data.filter_utils.poldata_utils import read_excel, write_data

np.random.seed(1234567890)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import numpy as np

from nnpdf_data.filter_utils.eic_utils import (
from nnpdf_data.filter_utils.poldata_utils import (
fluctuate_data,
read_central_values,
read_excel,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import numpy as np

from nnpdf_data.filter_utils.eic_utils import (
from nnpdf_data.filter_utils.poldata_utils import (
fluctuate_data,
read_central_values,
read_excel,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import numpy as np

from nnpdf_data.filter_utils.eic_utils import (
from nnpdf_data.filter_utils.poldata_utils import (
fluctuate_data,
read_central_values,
read_excel,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import numpy as np

from nnpdf_data.filter_utils.eic_utils import (
from nnpdf_data.filter_utils.poldata_utils import (
fluctuate_data,
read_central_values,
read_txt_data,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import numpy as np

from nnpdf_data.filter_utils.eic_utils import (
from nnpdf_data.filter_utils.poldata_utils import (
fluctuate_data,
read_central_values,
read_txt_data,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import numpy as np

from nnpdf_data.filter_utils.eic_utils import (
from nnpdf_data.filter_utils.poldata_utils import (
fluctuate_data,
read_central_values,
read_txt_data,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import numpy as np

from nnpdf_data.filter_utils.eic_utils import (
from nnpdf_data.filter_utils.poldata_utils import (
fluctuate_data,
read_central_values,
read_txt_data,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import numpy as np

from nnpdf_data.filter_utils.eic_utils import (
from nnpdf_data.filter_utils.poldata_utils import (
fluctuate_data,
read_central_values,
read_txt_data,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
"""This script provides the common filer to the jet and dijet STAR 2012 datasets.
Files need to be parsed all together as there are correlations provided.
Files need to be parsed all together as there are correlations provided.
"""
import pathlib

import math
import pathlib

import numpy as np
import pandas as pd
import yaml

from nnpdf_data.filter_utils.correlations import (
compute_covmat,
upper_triangular_to_symmetric,
)
from nnpdf_data.filter_utils.correlations import compute_covmat, upper_triangular_to_symmetric
from nnpdf_data.filter_utils.poldata_utils import TOPO_DEF

# values from the paper https://arxiv.org/abs/1906.02740
SQRTS = 510
Expand All @@ -22,36 +21,6 @@
HERE = pathlib.Path(__file__).parent
RAWDATA_PATH = HERE / "rawdata/"

# NOTE: the observable is symmetric for jet1 and jet2,
# so 1 and 2 are not ordered in pT.
TOPO_DEF = {
"A": {
"abs_eta1_min": 0.3,
"abs_eta1_max": 0.9,
"abs_eta2_min": 0.3,
"abs_eta2_max": 0.9,
},
"B": {
"abs_eta1_min": 0,
"abs_eta1_max": 0.3,
"abs_eta2_min": 0.3,
"abs_eta2_max": 0.9,
},
"C": {
"abs_eta1_min": 0,
"abs_eta1_max": 0.3,
"abs_eta2_min": 0,
"abs_eta2_max": 0.3,
},
"D": {
"abs_eta1_min": 0.3,
"abs_eta1_max": 0.9,
"abs_eta2_min": 0.3,
"abs_eta2_max": 0.9,
},
"I": {"abs_eta_min": 0, "abs_eta_max": 0.9},
}


def read_1jet_data():
data_table = pathlib.Path(RAWDATA_PATH / "Figure12(Run12).csv")
Expand Down Expand Up @@ -116,9 +85,7 @@ def read_correlations(ndata_dict):
# build the block
try:
with open(RAWDATA_PATH / f"corr{a}Vs{b}.tex", encoding="utf-8") as file:
corr_df = pd.read_csv(
file, sep="&", skiprows=3, skipfooter=3, engine="python"
)
corr_df = pd.read_csv(file, sep="&", skiprows=3, skipfooter=3, engine="python")

# add some parsing
corr_vals = []
Expand Down Expand Up @@ -196,10 +163,7 @@ def write_1jet_data(df, art_sys):
}
# loop on data points
for i, sys_i in enumerate(art_sys):
e = {
"lumi_ue": float(df.loc[i, "lumi_ue"]),
"pol": float(df.loc[i, "pol"]),
}
e = {"lumi_ue": float(df.loc[i, "lumi_ue"]), "pol": float(df.loc[i, "pol"])}
# loop on art sys
for j, val in enumerate(sys_i):
e[f"sys_{j}"] = val
Expand Down Expand Up @@ -251,9 +215,7 @@ def write_2jet_data(df, topology, art_sys):
}
kin.append(kin_value)
kinematics_yaml = {"bins": kin}
with open(
STORE_PATH / f"kinematics_{topology}.yaml", "w", encoding="utf-8"
) as file:
with open(STORE_PATH / f"kinematics_{topology}.yaml", "w", encoding="utf-8") as file:
yaml.dump(kinematics_yaml, file)

# Write unc file
Expand All @@ -272,10 +234,7 @@ def write_2jet_data(df, topology, art_sys):
}
# loop on data points
for i, sys_i in enumerate(art_sys):
e = {
"lumi_ue": float(df.loc[i, "lumi_ue"]),
"pol": float(df.loc[i, "pol"]),
}
e = {"lumi_ue": float(df.loc[i, "lumi_ue"]), "pol": float(df.loc[i, "pol"])}
# loop on art sys
for j, val in enumerate(sys_i):
e[f"sys_{j}"] = val
Expand All @@ -294,9 +253,7 @@ def write_2jet_data(df, topology, art_sys):
)

uncertainties_yaml = {"definitions": error_definition, "bins": error}
with open(
STORE_PATH / f"uncertainties_{topology}.yaml", "w", encoding="utf-8"
) as file:
with open(STORE_PATH / f"uncertainties_{topology}.yaml", "w", encoding="utf-8") as file:
yaml.dump(uncertainties_yaml, file, sort_keys=False)


Expand All @@ -313,9 +270,7 @@ def write_2jet_data(df, topology, art_sys):
# by E.Aschenauer, see https://github.com/NNPDF/nnpdf/pull/2035#issuecomment-2201979662
correlated_unc = []
for a in TOPOPLOGY_LIST:
correlated_unc.extend(
np.sqrt(dfs[a]["syst"] ** 2 + dfs[a]["stat"] ** 2).values.tolist()
)
correlated_unc.extend(np.sqrt(dfs[a]["syst"] ** 2 + dfs[a]["stat"] ** 2).values.tolist())
ndata_points = np.sum((*ndata_dict.values(),))
# decompose uncertainties
art_sys = np.array(compute_covmat(correlation_df, correlated_unc, ndata_points))
Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
"""This script provides the common filer to the jet and dijet STAR 2013 datasets.
Files need to be parsed all together as there are correlations provided.
Files need to be parsed all together as there are correlations provided.
"""

import pathlib

import numpy as np
import pandas as pd
import yaml

from nnpdf_data.filter_utils.correlations import (
compute_covmat,
upper_triangular_to_symmetric,
)
from nnpdf_data.new_commondata.STAR_2012_1JET_510GEV.filter import TOPO_DEF
from nnpdf_data.filter_utils.correlations import compute_covmat, upper_triangular_to_symmetric
from nnpdf_data.filter_utils.poldata_utils import TOPO_DEF

# values from the paper https://arxiv.org/pdf/2110.11020.pdf
SQRTS = 510
Expand All @@ -29,20 +27,14 @@ def read_1jet_data():
data_table = pathlib.Path(RAWDATA_PATH / "Figure3.csv")

with open(data_table, "r", encoding="utf-8") as file:
parton_jet_data = pd.read_csv(
file, delimiter=",", skiprows=lambda x: (x <= 21 or x >= 38)
)
parton_jet_data = pd.read_csv(file, delimiter=",", skiprows=lambda x: (x <= 21 or x >= 38))
with open(data_table, "r", encoding="utf-8") as file:
all_data = pd.read_csv(file, delimiter=",", skiprows=37)

df = pd.DataFrame()
df["pT"] = parton_jet_data[r"Parton Jet $p_{T}$ (GeV/$c$)"]
df["pT_min"] = (
parton_jet_data[r"Parton Jet $p_{T}$ (GeV/$c$)"] + parton_jet_data["syst -"]
)
df["pT_max"] = (
parton_jet_data[r"Parton Jet $p_{T}$ (GeV/$c$)"] + parton_jet_data["syst +"]
)
df["pT_min"] = parton_jet_data[r"Parton Jet $p_{T}$ (GeV/$c$)"] + parton_jet_data["syst -"]
df["pT_max"] = parton_jet_data[r"Parton Jet $p_{T}$ (GeV/$c$)"] + parton_jet_data["syst +"]
df["eta"] = 0.0
df["eta_min"] = -TOPO_DEF["I"]["abs_eta_max"]
df["eta_max"] = +TOPO_DEF["I"]["abs_eta_max"]
Expand All @@ -60,20 +52,14 @@ def read_1jet_data():
def read_2jet_data(topology):
data_table = RAWDATA_PATH / f"Figure5topology{topology}.csv"
with open(data_table, "r", encoding="utf-8") as file:
mjj_data = pd.read_csv(
file, delimiter=",", skiprows=lambda x: (x <= 5 or x >= 20)
)
mjj_data = pd.read_csv(file, delimiter=",", skiprows=lambda x: (x <= 5 or x >= 20))
with open(data_table, "r", encoding="utf-8") as file:
all_data = pd.read_csv(file, delimiter=",", skiprows=20)

df = pd.DataFrame()
df["mjj"] = mjj_data[r"Parton Dijet $M_{inv}$ (GeV/$c^{2}$)"]
df["mjj_min"] = (
mjj_data[r"Parton Dijet $M_{inv}$ (GeV/$c^{2}$)"] + mjj_data["syst -"]
)
df["mjj_max"] = (
mjj_data[r"Parton Dijet $M_{inv}$ (GeV/$c^{2}$)"] + mjj_data["syst +"]
)
df["mjj_min"] = mjj_data[r"Parton Dijet $M_{inv}$ (GeV/$c^{2}$)"] + mjj_data["syst -"]
df["mjj_max"] = mjj_data[r"Parton Dijet $M_{inv}$ (GeV/$c^{2}$)"] + mjj_data["syst +"]

for p in ["1", "2"]:
df[f"abs_eta{p}_min"] = TOPO_DEF[topology][f"abs_eta{p}_min"]
Expand Down Expand Up @@ -113,8 +99,7 @@ def read_correlations(ndata_dict):
# build the block
try:
with open(
RAWDATA_PATH / f"{label_a}-{label_b}correlation.csv",
encoding="utf-8",
RAWDATA_PATH / f"{label_a}-{label_b}correlation.csv", encoding="utf-8"
) as file:
corr_df = pd.read_csv(file, delimiter=",", skiprows=6)
if a == b:
Expand Down Expand Up @@ -232,9 +217,7 @@ def write_2jet_data(df, topology, art_sys):
}
kin.append(kin_value)
kinematics_yaml = {"bins": kin}
with open(
STORE_PATH / f"kinematics_{topology}.yaml", "w", encoding="utf-8"
) as file:
with open(STORE_PATH / f"kinematics_{topology}.yaml", "w", encoding="utf-8") as file:
yaml.dump(kinematics_yaml, file)

# Write unc file
Expand All @@ -253,10 +236,7 @@ def write_2jet_data(df, topology, art_sys):
}
# loop on data points
for i, sys_i in enumerate(art_sys):
e = {
"pol": float(df.loc[i, "pol"]),
"lumi": float(df.loc[i, "lumi"]),
}
e = {"pol": float(df.loc[i, "pol"]), "lumi": float(df.loc[i, "lumi"])}
# loop on art sys
for j, val in enumerate(sys_i):
e[f"sys_{j}"] = val
Expand All @@ -275,9 +255,7 @@ def write_2jet_data(df, topology, art_sys):
)

uncertainties_yaml = {"definitions": error_definition, "bins": error}
with open(
STORE_PATH / f"uncertainties_{topology}.yaml", "w", encoding="utf-8"
) as file:
with open(STORE_PATH / f"uncertainties_{topology}.yaml", "w", encoding="utf-8") as file:
yaml.dump(uncertainties_yaml, file, sort_keys=False)


Expand All @@ -294,9 +272,7 @@ def write_2jet_data(df, topology, art_sys):
# by E.Aschenauer, see https://github.com/NNPDF/nnpdf/pull/2035#issuecomment-2201979662
correlated_unc = []
for a in TOPOPLOGY_LIST:
correlated_unc.extend(
np.sqrt(dfs[a]["syst"] ** 2 + dfs[a]["stat"] ** 2).values.tolist()
)
correlated_unc.extend(np.sqrt(dfs[a]["syst"] ** 2 + dfs[a]["stat"] ** 2).values.tolist())
ndata_points = np.sum((*ndata_dict.values(),))
# decompose uncertainties
art_sys = np.array(compute_covmat(correlation_df, correlated_unc, ndata_points))
Expand Down

0 comments on commit 7271ab5

Please sign in to comment.