Skip to content

Commit

Permalink
Merge pull request #1237 from cta-observatory/pixel_selector
Browse files Browse the repository at this point in the history
Fixed bug & couple of improvements in lstchain_dvr_pixselector
  • Loading branch information
moralejo authored Feb 27, 2024
2 parents ea23f6e + e6da544 commit a453522
Showing 1 changed file with 129 additions and 77 deletions.
206 changes: 129 additions & 77 deletions lstchain/scripts/lstchain_dvr_pixselector.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,29 +12,34 @@
given run (for reasons of stability & simplicity of analysis), we cannot
decide the threshold subrun by subrun.
The approach is the following: we run first the script over *all* subruns of a
run, for example:
The approach is the following: we run first the script over a subset of the
subruns of a run, for example:
lstchain_dvr_pixselector -f "/.../dl1_LST-1.Run12469.????.h5"
(the default "action" compute_dvr_settings will be chosen by default, and the
default parameters will be used for "picture_threshold" and "number_of_rings" -
see below)
This creates in the output directory (which is the current by default) a file
DVR_settings_LST-1.Run12469.h5 which contains a table "run_summary" which
includes the DVR algorithm parameters determined for each subrun.
see below). The program will process a maximum of 10 subruns, distributed
uniformly through the run - just for speed reasons: the result would hardly
change if all subruns are used (we want common data volume reduction
settings for the whole run, and a few subruns are enough to determine them).
The program creates in the output directory (which is the current by default)
a file DVR_settings_LST-1.Run12469.h5 which contains a table "run_summary"
which includes the DVR algorithm parameters determined for each processed
subrun.
Then we run again the script over all subruns, and using the option to create
the pixel maks:
the pixel maks (this can also be done subrun by subrun, to parallelize the
creation of the pixel masks files):
lstchain_dvr_pixselector -f "/.../dl1_LST-1.Run12469.????.h5" --action create_pixel_masks
The script will detect that the file DVR_settings_LST-1.Run12469.h5 already
exists, read it, and use, as threshold for DVR, the average for all subruns,
in p.e., rounded to the closest lower integer (we do not want to have too many
different ways of reducing the data, so we "discretize" the threshold in
1-p.e. steps). Then the event-wise pixel maks (selected pixels) for the subrun
will be computed and written out to a file
exists, read it, and use, as threshold for DVR, the average for all the previously processed subruns, in p.e., rounded to the closest lower integer (we do
not want to have too many different ways of reducing the data, so we
"discretize" the threshold in 1-p.e. steps). Then the event-wise pixel maks
(selected pixels) for the subrun will be computed and written out to a file
Pixel_selection_LST-1.Run12469.xxxx.h5 for each subrun xxxx
Note that when the option --action create_pixel_masks is used, the options
Expand All @@ -45,12 +50,14 @@
"""

import argparse
import logging
from pathlib import Path
import tables
import glob
import os
import numpy as np
import time
import sys

from lstchain.paths import parse_dl1_filename
from lstchain.io.io import dl1_params_lstcam_key, dl1_images_lstcam_key
Expand Down Expand Up @@ -80,6 +87,9 @@
type=int, default=1,
help='Number of rings around picture pixels (default: %('
'default)s)')
parser.add_argument('--log', dest='log_file',
type=str, default=None,
help='Log file name')

class PixelMask(Container):
event_id = Field(-1, 'event id')
Expand All @@ -106,33 +116,70 @@ class RunSummary(Container):
number_of_always_saved_pixels = Field(0, 'number_of_always_saved_pixels')
ped_mean = Field(np.float32(np.nan), '') # photo-electrons
ped_stdev = Field(np.float32(np.nan), '') # photo-electrons


log = logging.getLogger(__name__)


def main():
args = parser.parse_args()
summary_info = RunSummary()

log.setLevel(logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))

output_dir = args.output_dir.absolute()
output_dir.mkdir(exist_ok=True, parents=True)

if args.dl1_files == '':
log.error('Please use --dl1-files to provide a valid set of DL1 files!')
sys.exit(1)

all_dl1_files = glob.glob(args.dl1_files)
all_dl1_files.sort()

log_file = args.log_file
if log_file is not None:
handler = logging.FileHandler(log_file, mode='w')
logging.getLogger().addHandler(handler)
elif args.action == 'compute_dvr_settings':
# Only in this case we set an automatic log file name:
run_info = parse_dl1_filename(all_dl1_files[0])
log_file = str(output_dir)
log_file += f'/DVR_settings_LST-1.Run{run_info.run:05d}.log'
handler = logging.FileHandler(log_file, mode='w')
logging.getLogger().addHandler(handler)

# Number of subruns (uniformly distributed through the run) to
# be processed:
max_number_of_processed_subruns = 10

write_pixel_masks = False
if args.action == 'compute_dvr_settings':
print('I will calculate the Data Volume Reduction parameters from '
'the input DL1 files, and write them to the output file')
log.info('I will calculate the Data Volume Reduction parameters from '
'the input DL1 files, and write them to the output file')
elif args.action == 'create_pixel_masks':
write_pixel_masks = True
print('Option to write pixel masks in output file selected. I will '
'read in the Data Volume Reduction parameters from the '
'corresponding DVR_settings_*.h5 file')
print('(the --picture_threshold and --number-of-rings command-line '
'options, if provided, will be ignored!)')
log.info('Option to write pixel masks in output file selected. I will '
'read in the Data Volume Reduction parameters from the '
'corresponding DVR_settings_*.h5 file')
log.info('(the --picture_threshold and --number-of-rings command-line '
'options, if provided, will be ignored!)')
else:
print('Unknown option in --action flag!')
return 1
log.error('Unknown option in --action flag!')
sys.exit(1)

if args.dl1_files == '':
print('Please use --dl1-files provide a valid set of DL1 files!')
return 1
dl1_files = []

dl1_files = glob.glob(args.dl1_files)
dl1_files.sort()
if (write_pixel_masks or
len(all_dl1_files) <= max_number_of_processed_subruns):
# process all input files:
dl1_files = all_dl1_files
else:
step = len(all_dl1_files) / max_number_of_processed_subruns
k = 0
while np.round(k) < len(all_dl1_files):
dl1_files.append(all_dl1_files[int(np.round(k))])
k += step

# The pixel selection will be applied only to "physics triggers"
# (showers), not to other events like interleaved pedestal and flatfield
Expand Down Expand Up @@ -186,17 +233,13 @@ def main():
current_run_number = -1

for dl1_file in dl1_files:
print()
print('Input file:', dl1_file)
log.info('\nInput file: %s', dl1_file)

run_info = parse_dl1_filename(dl1_file)
run_id, subrun_id = run_info.run, run_info.subrun
summary_info.run_id = run_id
summary_info.subrun_id = subrun_id

output_dir = args.output_dir.absolute()
output_dir.mkdir(exist_ok=True, parents=True)

# The file DVR_settings_LST-1.Run*.h5 will be needed in case the
# --action create_pixel_masks option is used:
input_dvr_settings_file = Path(output_dir,
Expand All @@ -214,11 +257,13 @@ def main():
f'{run_id:05d}.{subrun_id:04d}.h5')

if not os.path.isfile(input_dvr_settings_file):
print('ERROR: --action create_pixel_masks selected, but file ' +
input_dvr_settings_file.name + 'does not exist!')
print('You must run first this script over all the subruns of '
'the run in one go, using --action compute_dvr_settings')
exit(1)
log.error('ERROR: --action create_pixel_masks selected, but'
'file %s does not exist!',
input_dvr_settings_file.name)
log.error('You must run first this script over all the '
'subruns of the run in one go, using the option '
'--action compute_dvr_settings')
sys.exit(1)

dvr_settings = read_table(input_dvr_settings_file, '/run_summary')
# We just take the values below from the first table row, because by
Expand All @@ -228,7 +273,7 @@ def main():
summary_info.number_of_rings = dvr_settings['number_of_rings'][0]
summary_info.picture_threshold = dvr_settings['picture_threshold'][0]

print('Output file:', output_file)
log.info('Output file: %s', output_file)

data_parameters = read_table(dl1_file, dl1_params_lstcam_key)

Expand All @@ -251,12 +296,13 @@ def main():
# unknown(255) are those currently in use in LST1

found_event_types = np.unique(event_type_data, return_counts=True)
print('Event types found:', found_event_types[0])
print('Number of events of each type:', found_event_types[1])
log.info('Event types found:')
for et, ec in zip(found_event_types[0], found_event_types[1]):
log.info(' Type: %d: %d', et, ec)

if not np.any(np.isin(found_event_types[0],
event_types_to_be_reduced)):
print('No reducible events were found in file! SKIPPING IT!')
log.warn('No reducible events were found in file! SKIPPING IT!')
continue

cosmic_mask = event_type_data == EventType.SUBARRAY.value # showers
Expand All @@ -269,7 +315,8 @@ def main():
# that more than half of the events are labeled as UNKNOWN, we assume
# they are cosmics:
if unknown_mask.sum() > 0.5 * len(event_type_data):
print('Too many events tagged UNKNOWN! I will assume they are cosmics!')
log.warn('Too many events tagged UNKNOWN! '
'I will assume they are cosmics!')
cosmic_mask |= unknown_mask

data_images = read_table(dl1_file, dl1_images_lstcam_key)
Expand All @@ -287,21 +334,24 @@ def main():
summary_info.number_of_always_saved_pixels = keep_always_mask.sum()

if summary_info.number_of_always_saved_pixels > 0:
print(f'Pixels that will always be saved ('
f'{summary_info.number_of_always_saved_pixels}):')
log.info('Pixels that will always be saved (%d):',
summary_info.number_of_always_saved_pixels)
for pixindex in np.where(keep_always_mask)[0]:
print(pixindex)
log.info(' %d', pixindex)
else:
print('Pixels that will always be saved: None')
log.info('Pixels that will always be saved: None')

charges_data = data_images['image']
# times_data = data_images['peak_time'] # not used now
image_mask = data_images['image_mask']

print("Original standard cleaning, pixel survival probabilities:")
print(' Minimum: ', np.round(np.mean(image_mask, axis=0).min(), 5))
print(' Maximum: ', np.round(np.mean(image_mask, axis=0).max(), 5))
print(' Mean: ', np.round(np.mean(image_mask, axis=0).mean(), 5))
log.info('Original standard cleaning, pixel survival probabilities:')
log.info(' Minimum: %.4f',
np.round(np.mean(image_mask, axis=0).min(), 5))
log.info(' Maximum: %.4f',
np.round(np.mean(image_mask, axis=0).max(), 5))
log.info(' Mean: %.4f',
np.round(np.mean(image_mask, axis=0).mean(), 5))

charges_cosmics = charges_data[cosmic_mask]
if not write_pixel_masks:
Expand Down Expand Up @@ -373,14 +423,14 @@ def main():

cr_masks = selected_pixels_masks[cosmic_mask]
fraction_of_survival = cr_masks.sum() / len(cr_masks.flatten())
print("Fraction in shower events of selected pixels:", np.round(
fraction_of_survival, 3))
log.info('Fraction in shower events of selected pixels: %.4f',
np.round(fraction_of_survival, 3))

num_sel_pixels = np.array(num_sel_pixels)
print('Average number of selected pixels per event (of any type):',
log.info('Average number of selected pixels per event (of any type): %.1f',
np.round(num_sel_pixels.sum() / len(data_parameters), 2))
print(f'Fraction of whole camera: '
f'{num_sel_pixels.sum()/len(data_parameters)/camera_geom.n_pixels:.3f}')
log.info('Fraction of whole camera: %.3f',
num_sel_pixels.sum()/len(data_parameters)/camera_geom.n_pixels)

# Keep track of how many cosmic events were fully saved (whole camera)>
summary_info.fraction_of_full_shower_events = \
Expand Down Expand Up @@ -409,7 +459,7 @@ def main():
if run_id != current_run_number:
current_run_number = run_id
else:
filemode == 'a'
filemode = 'a'
# In the "compute_dvr_settings" mode (not writing pixel
# masks) we will write only one file per run number,
# so for every new subrun we just append one row to the
Expand All @@ -420,7 +470,8 @@ def main():
writer.write("run_summary", summary_info)

# In the pixel selection mode we create a new file per subrun,
# which contains the event-wise pixel masks:
# which contains the run_summary and the event-wise pixel
# masks:
if write_pixel_masks:
data = PixelMask()
with HDF5TableWriter(output_file, filters=writer_conf, mode='a') as writer:
Expand All @@ -436,19 +487,17 @@ def main():
break
except:
if number_of_writing_attempts > 60:
print("I gave up!")
exit(1)
print(time.asctime(time.localtime()),
"Could not write output, attempt",
number_of_writing_attempts,
"... Will try again after 5 minutes")
log.error('I gave up!')
sys.exit(1)
log.warn('%s: could not write output, attempt %d',
'... Will try again after 5 minutes',
time.asctime(time.localtime()),
number_of_writing_attempts)
time.sleep(300)
continue

print()
print('lstchain_dvr_pixselector finished successfully!')
print()
return 0
log.info('lstchain_dvr_pixselector finished successfully!')


def get_selected_pixels(charge_map, min_charge_for_certain_selection,
number_of_rings, geom,
Expand Down Expand Up @@ -532,20 +581,23 @@ def find_DVR_threshold(charges_cosmics, max_pix_survival_fraction,
if fraction_of_survival <= max_pix_survival_fraction:
break

print("Fraction in shower events of pixels with >",
min_charge_for_certain_selection, "pe & first neighbors:",
np.round(fraction_of_survival, 3), "is higher than maximum "
"allowed:",
max_pix_survival_fraction)
log.info('Fraction in shower events of pixels with > %.1f pe '
'& first neighbors: %.4f is higher than maximum '
'allowed: %.2f',
min_charge_for_certain_selection,
np.round(fraction_of_survival, 3),
max_pix_survival_fraction)

# Modify the value of min_charge_for_certain_selection to get a lower
# survival fraction
min_charge_for_certain_selection += 1.

if min_charge_for_certain_selection > picture_threshold:
print("min_charge_for_certain_selection changed to",
min_charge_for_certain_selection)
print("Fraction in shower events of pixels with >",
min_charge_for_certain_selection, "pe & first neighbors:",
np.round(fraction_of_survival, 3)),
log.info('min_charge_for_certain_selection changed to %.1f',
min_charge_for_certain_selection)
log.info('Fraction in shower events of pixels with > %.1f pe '
'& first neighbors: %.4f',
min_charge_for_certain_selection,
np.round(fraction_of_survival, 3))

return min_charge_for_certain_selection

0 comments on commit a453522

Please sign in to comment.