diff --git a/environment.yml b/environment.yml index 1526447131..2c04db6154 100644 --- a/environment.yml +++ b/environment.yml @@ -18,6 +18,7 @@ dependencies: - joblib~=1.2.0 - toml - protobuf=3.20 + - protozfits=2.2 - pyparsing - scipy~=1.11.4 - scikit-learn=1.2 diff --git a/lstchain/image/modifier.py b/lstchain/image/modifier.py index 84f64b8078..1c0f28f853 100644 --- a/lstchain/image/modifier.py +++ b/lstchain/image/modifier.py @@ -255,6 +255,7 @@ def calculate_noise_parameters(simtel_filename, data_dl1_filename, # Identify noisy pixels, likely containing stars - we want to adjust MC to # the average diffuse NSB across the camera + data_median_std_ped_pe = np.nanmedian(data_HG_ped_std_pe[good_pixels]) data_std_std_ped_pe = np.nanstd(data_HG_ped_std_pe[good_pixels]) log.info('\nReal data:') @@ -262,6 +263,7 @@ def calculate_noise_parameters(simtel_filename, data_dl1_filename, log.info(f' Median of FF pixel charge: ' f'{np.nanmedian(data_HG_FF_mean_pe[good_pixels]):.3f} p.e.') log.info(f' Median across camera of good pixels\' pedestal std ' + f'{data_median_std_ped_pe:.3f} p.e.') brightness_limit = data_median_std_ped_pe + 3 * data_std_std_ped_pe too_bright_pixels = (data_HG_ped_std_pe > brightness_limit) diff --git a/lstchain/scripts/lstchain_r0_to_r0g.py b/lstchain/scripts/lstchain_r0_to_r0g.py new file mode 100644 index 0000000000..6614228e7f --- /dev/null +++ b/lstchain/scripts/lstchain_r0_to_r0g.py @@ -0,0 +1,236 @@ +#!/usr/bin/env python +""" +Script to apply gain selection to the raw fits.fz produced by EvB6 in CTAR1 +format (aka R1v1). + +Gain selection is only applied to shower events (not to interleaved pedestal +and flatfield) + +It uses heuristic identification of the interleaved flatfield (FF) events ( +given the occasional problems we have with the FF event tagging) + +""" +import logging +import protozfits +import argparse +import sys +import re + +from pathlib import Path +from ctapipe.containers import EventType + +import lstchain.paths as paths +import numpy as np +from contextlib import ExitStack + +parser = argparse.ArgumentParser(description="Gain selector program (R0 to " + "R0G)") +parser.add_argument('-f', '--R0-file', dest='input_file', + type=str, help='Input R0 file name (of stream 1)') + +parser.add_argument('-o', '--output-dir', dest='output_dir', + type=str, default='./', + help='Output directory') + +parser.add_argument('--log', dest='log_file', + type=str, default=None, + help='Log file name') + +parser.add_argument('--no-flatfield-heuristic', action='store_false', + dest="use_flatfield_heuristic", + help=("If given, do *not* identify flatfield events" + " heuristically from the raw data. " + "Trust event_type.")) + +# Range of waveform to be checked (for gain selection & heuristic FF +# identification) +SAMPLE_START = 3 +SAMPLE_END = 39 + +# Level of high gain (HG) required for switching to low gain (LG) +THRESHOLD = 3900 + +# Events for which gain selection will be applied: +EVENT_TYPES_TO_REDUCE = [EventType.SUBARRAY, EventType.UNKNOWN] + +def main(): + args = parser.parse_args() + + input_file = args.input_file + output_dir = args.output_dir + use_flatfield_heuristic = args.use_flatfield_heuristic + + log = logging.getLogger(__name__) + log.setLevel(logging.INFO) + + log_file = args.log_file + runinfo = paths.parse_r0_filename(input_file) + + if log_file is None: + log_file = output_dir + log_file += f'/R0_to_R0g_Run{runinfo.run:05d}.{runinfo.subrun:04d}.log' + + formatter = logging.Formatter('%(asctime)s - ' + '%(levelname)s - %(message)s', + '%Y-%m-%d %H:%M:%S') + handler = logging.FileHandler(log_file, mode='w') + handler.setFormatter(formatter) + logging.getLogger().addHandler(handler) + + # Loop over the files (4 streams) to perform the gain selection: + + input_stream_names = [Path(Path(input_file).parent, + re.sub("LST-1...Run", + f"LST-1.{id_stream+1}.Run", + Path(input_file).name)) + for id_stream in range(4)] + + output_stream_names = [Path(output_dir, Path(inputsn).name) + for inputsn in input_stream_names] + + input_streams = [] + for name in input_stream_names: + input_streams.append(protozfits.File(str(name), pure_protobuf=True)) + + try: + camera_config = input_streams[0].CameraConfiguration[0] + except (AttributeError, IndexError): + log.error('CameraConfiguration not found! Is this R1v1 data?') + sys.exit(1) + + num_pixels = camera_config.num_pixels + num_samples = camera_config.num_samples_nominal + + # Counters to keep track of how many FF-like events are + # tagged as FF, and how many are not (and vice-versa): + num_FF_like_with_FF_type = 0 + num_FF_like_with_no_FF_type = 0 + num_FF_unlike_with_FF_type = 0 + num_events = 0 + + with ExitStack() as stack: + for i, name in enumerate(output_stream_names): + header = input_streams[i].Events.header + n_tiles = header["NAXIS2"] + rows_per_tile = header["ZTILELEN"] + + stream = stack.enter_context(protozfits.ProtobufZOFits( + n_tiles=n_tiles, + rows_per_tile=rows_per_tile, + compression_block_size_kb=64*1024, + defaul_compression="lst")) + stream.open(str(name)) + + stream.move_to_new_table("DataStream") + stream.write_message(input_streams[i].DataStream[0]) + + stream.move_to_new_table("CameraConfiguration") + stream.write_message(input_streams[i].CameraConfiguration[0]) + + stream.move_to_new_table("Events") + + wf_offset = input_streams[i].DataStream[0].waveform_offset + + for event in input_streams[i].Events: + # skip corrupted events: + if event.event_id == 0: + continue + num_events += 1 + + num_gains = event.num_channels + if num_gains != 2: + log.error('You are attempting gain selection on ' + 'data with only one gain!') + sys.exit(1) + + wf = protozfits.any_array_to_numpy(event.waveform) + wf = wf.reshape((num_gains, num_pixels, num_samples)) + hg = wf[0] + lg = wf[1] + + evtype = EventType(event.event_type) + evtype_heuristic = get_event_type(hg, wf_offset, evtype) + + if evtype == EventType.FLATFIELD: + if evtype_heuristic == EventType.FLATFIELD: + num_FF_like_with_FF_type += 1 + else: + num_FF_unlike_with_FF_type += 1 + elif evtype_heuristic == EventType.FLATFIELD: + num_FF_like_with_no_FF_type += 1 + + if use_flatfield_heuristic: + evtype = evtype_heuristic + + if evtype in EVENT_TYPES_TO_REDUCE: + # Find pixels with HG above gain switch threshold: + use_lg = (np.max(hg[:, SAMPLE_START:SAMPLE_END], axis=1) + > THRESHOLD) + new_wf = np.where(use_lg[:, None], lg, hg) # gain-selected + event.waveform.data = new_wf.tobytes() + event.num_channels = 1 # Just one gain stored! + + pixel_status = protozfits.any_array_to_numpy(event.pixel_status) + # Set to 0 the status bit of the removed gain: + new_status = np.where(use_lg, + pixel_status & 0b11111011, + pixel_status & 0b11110111) + event.pixel_status.data = new_status.tobytes() + + stream.write_message(event) + + stream.close() + input_streams[i].close() + + + log.info('Number of processed events: %d', num_events) + if num_FF_like_with_FF_type > 0: + log.info('FF-like events tagged as FF: %d', + num_FF_like_with_FF_type) + else: + log.warn('FF-like events tagged as FF: %d !!', + num_FF_like_with_FF_type) + + log.info('FF-like events not tagged as FF: %d', + num_FF_like_with_no_FF_type) + + log.info('FF-unlike events tagged as FF: %d', + num_FF_unlike_with_FF_type) + + + num_FF_like = num_FF_like_with_no_FF_type + num_FF_like_with_FF_type + + # If a relevant fraction of FF-like events were not tagged as FF...: + max_frac = 0.1 + if ((num_FF_like_with_no_FF_type / num_FF_like > max_frac) & + (use_flatfield_heuristic == False)): + log.warn('More than %d percent of FF-like events ' + 'have wrong event_type!', int(100*max_frac)) + log.warn('You should use heuristic identification of FF events!') + else: + log.info('R0 to R0G conversion finished successfully!') + +def get_event_type(wf_hg, offset, evtype): + + # For heuristic flat field identification (values refer to + # baseline-subtracted HG integrated waveforms): + MIN_FLATFIELD_ADC = 3000 + MAX_FLATFIELD_ADC = 12000 + MIN_FLATFIELD_PIXEL_FRACTION = 0.8 + + evtype_heuristic = evtype + + # pixel-wise integral: + wf_hg_sum = np.sum(wf_hg - offset, axis=1) + ff_pix = ((wf_hg_sum > MIN_FLATFIELD_ADC) & + (wf_hg_sum < MAX_FLATFIELD_ADC)) + + # Check fraction of pixels with HG in "FF-like range" + if ff_pix.sum() / len(ff_pix) > MIN_FLATFIELD_PIXEL_FRACTION: + # Looks like a FF event: + evtype_heuristic = EventType.FLATFIELD + elif evtype == EventType.FLATFIELD: + # Does not look like a FF, but was tagged as such: + evtype_heuristic = EventType.UNKNOWN + + return evtype_heuristic diff --git a/lstchain/scripts/tests/test_lstchain_scripts.py b/lstchain/scripts/tests/test_lstchain_scripts.py index 8f46742982..8f6f4b7919 100644 --- a/lstchain/scripts/tests/test_lstchain_scripts.py +++ b/lstchain/scripts/tests/test_lstchain_scripts.py @@ -15,6 +15,9 @@ from astropy.time import Time from ctapipe.instrument import SubarrayDescription from ctapipe.io import read_table +from ctapipe.io import EventSource +from ctapipe.containers import EventType + from lstchain.io.config import get_srcdep_config, get_standard_config from lstchain.io.io import (dl1_images_lstcam_key, dl1_params_lstcam_key, @@ -83,6 +86,28 @@ def merged_simulated_dl1_file(simulated_dl1_file, temp_dir_simulated_files): def test_lstchain_mc_r0_to_dl1(simulated_dl1_file): assert simulated_dl1_file.is_file() +@pytest.mark.private_data +def test_lstchain_r0_to_r0g(tmp_path, temp_dir_observed_files): + test_data = Path(os.getenv('LSTCHAIN_TEST_DATA', 'test_data')) + input_file = test_data / "real/R0/20231214/LST-1.1.Run16102.0000_first50" \ + ".fits.fz" + output_dir = temp_dir_observed_files / "R0G" + output_dir.mkdir() + run_program("lstchain_r0_to_r0g", "-f", input_file, "-o", output_dir) + output_file = output_dir / input_file.name + assert output_file.is_file() + + src = EventSource(input_url=output_file) + src.pointing_information = False + src.trigger_information = False + src.apply_drs4_corrections = False + # Check number of gains for first event of each type: + for evtype, ngains in zip([EventType.FLATFIELD, EventType.SKY_PEDESTAL, + EventType.SUBARRAY], [2, 2, 1]): + for event in src: + if event.trigger.event_type == evtype: + break + assert(event.r0.tel[1].waveform.shape[0] == ngains) @pytest.mark.private_data def test_lstchain_data_r0_to_dl1(observed_dl1_files): diff --git a/setup.py b/setup.py index 77ba7375c7..5a99073c6a 100644 --- a/setup.py +++ b/setup.py @@ -60,6 +60,7 @@ def find_scripts(script_dir, prefix): 'scikit-learn~=1.2', 'tables', 'toml', + 'protozfits>=2.2,<3', 'pymongo', 'pyparsing', 'setuptools_scm',