Skip to content

Commit

Permalink
update categorization to include dy control region
Browse files Browse the repository at this point in the history
  • Loading branch information
mafrahm committed Jul 26, 2024
1 parent 93cfe3f commit 289cbdb
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 6 deletions.
44 changes: 40 additions & 4 deletions hbw/config/categories.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

@call_once_on_config()
def add_gen_categories(config: od.Config) -> None:
# NOTE: this should instead be covered by process ids if necessary
gen_0lep = config.add_category( # noqa
name="gen_0lep",
id=100000,
Expand Down Expand Up @@ -99,6 +100,34 @@ def add_abcd_categories(config: od.Config) -> None:
)


@call_once_on_config()
def add_mll_categories(config: od.Config) -> None:
"""
Adds categories based on mll.
NOTE: this should never be used in combination with the *add_abcd_categories* function
"""
config.add_category(
name="sr",
id=1,
selection="catid_mll_low",
)
dy_cr = config.add_category(
name="dy_cr",
id=2,
selection="catid_dy_cr",
)
dy_cr.add_category(
name="mll_z",
id=3,
selection="catid_mll_z",
)
dy_cr.add_category(
name="mll_high",
id=4,
selection="catid_mll_high",
)


@call_once_on_config()
def add_lepton_categories(config: od.Config) -> None:
config.x.lepton_channels = {
Expand Down Expand Up @@ -185,10 +214,15 @@ def add_categories_selection(config: od.Config) -> None:
"""

# adds categories based on the existence of gen particles
add_gen_categories(config)
# NOTE: commented out because we did not use it anyways
# add_gen_categories(config)

# adds categories for ABCD background estimation
add_abcd_categories(config)
# TODO: this might be used in SL analysis, so make this configurable at some point
# add_abcd_categories(config)

# adds categories based on mll
add_mll_categories(config)

# adds categories based on number of leptons
add_lepton_categories(config)
Expand Down Expand Up @@ -245,7 +279,8 @@ def add_categories_production(config: od.Config) -> None:
#

category_blocks = OrderedDict({
"lepid": [config.get_category("sr"), config.get_category("fake")],
"mll": [config.get_category("sr")], # NOTE: we could also build the product of all mll categories
# "lepid": [config.get_category("sr"), config.get_category("fake")],
# "met": [config.get_category("highmet"), config.get_category("lowmet")],
"lep": [config.get_category(lep_ch) for lep_ch in config.x.lepton_channels],
"jet": [config.get_category("resolved"), config.get_category("boosted")],
Expand Down Expand Up @@ -315,7 +350,8 @@ def add_categories_ml(config, ml_model_inst):

# NOTE: building this many categories takes forever: has to be improved...
category_blocks = OrderedDict({
"lepid": [config.get_category("sr"), config.get_category("fake")],
"mll": [config.get_category("sr"), config.get_category("dy_cr")],
# "lepid": [config.get_category("sr"), config.get_category("fake")],
# "met": [config.get_category("highmet"), config.get_category("lowmet")],
"lep": [config.get_category(lep_ch) for lep_ch in config.x.lepton_channels],
"jet": [config.get_category("resolved"), config.get_category("boosted")],
Expand Down
9 changes: 7 additions & 2 deletions hbw/config/config_run2.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

from columnflow.util import DotDict
from columnflow.config_util import add_shift_aliases
from columnflow.columnar_util import ColumnCollection, skip_column
from hbw.config.styling import stylize_processes
from hbw.config.categories import add_categories_selection
from hbw.config.variables import add_variables
Expand Down Expand Up @@ -628,8 +629,9 @@ def add_external(name, value):
})

cfg.x.keep_columns["cf.ReduceEvents"] = {
# general event information
# general event information, mandatory for reading files with coffea
"run", "luminosityBlock", "event",
ColumnCollection.MANDATORY_COFFEA,
# columns added during selection, required in general
"mc_weight", "PV.npvs", "process_id", "category_ids", "deterministic_seed",
# Gen information (for categorization)
Expand All @@ -648,9 +650,12 @@ def add_external(name, value):
"{FatJet,HbbJet}.{pt,eta,phi,mass,msoftdrop,tau1,tau2,tau3,btagHbb,deepTagMD_HbbvsQCD,particleNet_HbbvsQCD}",
# Leptons
"{Electron,Muon}.{pt,eta,phi,mass,charge,pdgId,jetRelIso,is_tight}",
"Electron.deltaEtaSC",
"Electron.deltaEtaSC", "mll",
# MET
"MET.{pt,phi}",
# all columns added during selection using a ColumnCollection flag, but skip cutflow ones
ColumnCollection.ALL_FROM_SELECTOR,
skip_column("cutflow.*"),
}

# Version of required tasks
Expand Down
1 change: 1 addition & 0 deletions hbw/config/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,7 @@ def configure_hbw_datasets(
if dataset.name.startswith("dy_"):
dataset.add_tag("is_v_jets")
dataset.add_tag("is_z_jets")
dataset.add_tag("is_dy")
if dataset.name.startswith("w_"):
dataset.add_tag("is_v_jets")
dataset.add_tag("is_w_jets")
Expand Down
8 changes: 8 additions & 0 deletions hbw/config/variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,14 @@ def add_variables(config: od.Config) -> None:
#
# Simple event properties
#

config.add_variable(
name="mll",
binning=(40, 0., 200.),
unit="GeV",
x_title=r"$m_{ll}$",
)

config.add_variable(
name="n_jet",
expression=lambda events: ak.num(events.Jet.pt, axis=1),
Expand Down
29 changes: 29 additions & 0 deletions hbw/selection/categories.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,35 @@ def catid_lowmet(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Arra
mask = events.MET.pt < 20
return events, mask

#
# Categorizer for mll categories
#


@categorizer(uses={"mll"}, call_force=True)
def catid_mll_low(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]:
mask = (events.mll < 81)
return events, mask


@categorizer(uses={"mll"}, call_force=True)
def catid_dy_cr(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]:
mask = (events.mll >= 81)
return events, mask


@categorizer(uses={"mll"}, call_force=True)
def catid_mll_z(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]:
mask = (events.mll >= 81) & (events.mll < 101)
return events, mask


@categorizer(uses={"mll"}, call_force=True)
def catid_mll_high(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]:
mask = (events.mll >= 101)
return events, mask


#
# SL
#
Expand Down

0 comments on commit 289cbdb

Please sign in to comment.