From d78320bc1025d7547ad7cac0f07df619dee1716f Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Tue, 19 Dec 2023 22:25:14 +0100
Subject: [PATCH 01/35] Script for gain selection (R0 to R0G), for data taken
 in ADHF format (i.e. from July 2023)

---
 environment.yml                        |   1 +
 lstchain/scripts/lstchain_r0_to_r0g.py | 203 +++++++++++++++++++++++++
 2 files changed, 204 insertions(+)
 create mode 100644 lstchain/scripts/lstchain_r0_to_r0g.py

diff --git a/environment.yml b/environment.yml
index d055b62103..c4dfbd4b54 100644
--- a/environment.yml
+++ b/environment.yml
@@ -18,6 +18,7 @@ dependencies:
   - joblib~=1.2.0
   - toml
   - protobuf=3.20
+  - protozfits=2.2
   - pyparsing
   - scikit-learn=1.2
   - sphinx=4
diff --git a/lstchain/scripts/lstchain_r0_to_r0g.py b/lstchain/scripts/lstchain_r0_to_r0g.py
new file mode 100644
index 0000000000..d276209c72
--- /dev/null
+++ b/lstchain/scripts/lstchain_r0_to_r0g.py
@@ -0,0 +1,203 @@
+#!/usr/bin/env python
+
+import logging
+import protozfits
+
+import argparse
+
+from protozfits.CTA_R1_pb2 import CameraConfiguration
+from protozfits.Debug_R1_pb2 import DebugEvent, DebugCameraConfiguration
+
+from ctapipe.io import EventSource
+from ctapipe.containers import EventType
+from lstchain.io import standard_config
+from traitlets.config import Config
+
+from astropy.time import Time
+import numpy as np
+import matplotlib.pyplot as plt
+from contextlib import ExitStack
+
+parser = argparse.ArgumentParser(description="Gain selector program (R0 to "
+                                             "R0G)")
+parser.add_argument('-f', '--R0-file', dest='input_file',
+                    type=str, help='Input R0 file name (of stream 1)')
+
+parser.add_argument('-o', '--output-dir', dest='output_dir',
+                    type=str, default='./',
+                    help='Output directory')
+parser.add_argument('-yyyymmdd', dest='yyyymmdd', type=str,
+                    help='date (YYYYMMDD)')
+
+def main():
+    args = parser.parse_args()
+
+    # Range of waveform to be checked (for gain selection & heuristic FF
+    # identification)
+    SAMPLE_START = 3
+    SAMPLE_END = 39
+
+    # Baseline offset:
+    OFFSET = 400
+    # Level of high gain (HG) required for switching to low gain (LG)
+    THRESHOLD = 3500 + OFFSET
+
+    # For heuristic flat field identification (values refer to
+    # baseline-subtracted HG integrated waveforms):
+
+    MIN_FLATFIELD_ADC = 3000
+    MAX_FLATFIELD_ADC = 12000
+    MIN_FLATFIELD_PIXEL_FRACTION = 0.8
+
+    input_file = args.input_file
+    output_dir = args.output_dir
+
+    # Look for the auxiliary files in their standard locations:
+    drive_file = f'/fefs/onsite/monitoring/driveLST1/DrivePositioning' \
+                 f'/DrivePosition_log_{args.yyyymmdd}.txt'
+    run_summary = f'/fefs/aswg/data/real/monitoring/RunSummary' \
+                  f'/RunSummary_{args.yyyymmdd}.ecsv'
+
+    standard_config['source_config']['LSTEventSource'][
+        'EventTimeCalculator']['run_summary_path'] = run_summary
+    standard_config['source_config']['LSTEventSource']['PointingSource'][
+        'drive_report_path'] = drive_file
+    standard_config['source_config']['LSTEventSource'][
+        'apply_drs4_corrections'] = False
+    #
+    # Loop just to identify properly interleaved pedestals (in case there are
+    # ucts jumps) and FF events (heuristically)
+    #
+    event_type = []
+    event_id = []
+
+    with EventSource(input_url=input_file,
+                     config=Config(standard_config['source_config'])) as source:
+        source.pointing_information = False
+        source.trigger_information = True
+        source.log.setLevel(logging.WARNING)
+        try:
+            for ievent, event in enumerate(source):
+                if event.r0.tel[1].waveform is None:
+                    logging.error('The data seem to contain R0 waveforms. Is '
+                                  'this already gain-selected data?')
+                    exit(1)
+
+                # Check if this may be a FF event
+                # Subtract baseline offset (convert to signed integer to
+                # allow for negative fluctuations!)
+                wf_hg = event.r0.tel[1].waveform[0][:,
+                        SAMPLE_START:SAMPLE_END].astype('int16') - OFFSET
+                # pixel-wise integral:
+                wf_hg_sum = np.sum(wf_hg, axis=1)
+                ff_pix = ((wf_hg_sum > MIN_FLATFIELD_ADC) &
+                          (wf_hg_sum < MAX_FLATFIELD_ADC))
+                event_type.append(event.trigger.event_type)
+
+                # Check fraction of pixels with HG in "FF-like range"
+                if ff_pix.sum() / event.r0.tel[1].waveform.shape[1] > \
+                        MIN_FLATFIELD_PIXEL_FRACTION:
+                    # Looks like a FF event:
+                    event_type[-1] = EventType.FLATFIELD
+                elif event_type[-1] == EventType.FLATFIELD:
+                    # If originally tagged as FF, but not looking like FF:
+                    event_type[-1] = EventType.UNKNOWN
+
+                event_id.append(event.index.event_id)
+            print('Finished first loop over input files - all ok!')
+
+        except Exception as err:
+            print(err)
+            print('Something went wrong!')
+            exit(1)
+
+    event_id = np.array(event_id)
+
+    # Numerical values of event types:
+    event_type_val = np.array([x.value for x in event_type])
+
+    logging.info('Identified event types and number of events:')
+    for j in np.unique(event_type_val):
+        logging.info(f'{EventType(j)}:, {np.sum(event_type_val == j)}\n')
+
+    # Now loop over the files (4 streams) again to perform the actual gain
+    # selection:
+    k = input_file.find('/LST-1.')
+    input_stream_names = [input_file[:k+7]+str(id_stream+1)+input_file[k+8:]
+                          for id_stream in range(4)]
+    output_stream_names = [output_dir + name[k:] for name in input_stream_names]
+
+    input_streams = []
+    for name in input_stream_names:
+        input_streams.append(protozfits.File(name, pure_protobuf=True))
+
+    num_pixels = input_streams[0].CameraConfig[0].num_pixels
+    num_samples = input_streams[0].CameraConfig[0].num_samples
+    num_bytes = 2
+
+    with ExitStack() as stack:
+        for i, name in enumerate(output_stream_names):
+
+            n_tiles = input_streams[i].Events.protobuf_i_fits.header[
+                'NAXIS2'].value
+            rows_per_tile = input_streams[i].Events.protobuf_i_fits.header[
+                'ZTILELEN'].value
+
+            stream = stack.enter_context(protozfits.ProtobufZOFits(
+                    n_tiles=n_tiles,
+                    rows_per_tile=rows_per_tile,
+                    compression_block_size_kb=64*1024,
+                    defaul_compression="lst"))
+            stream.open(name)
+            stream.move_to_new_table("CameraConfig")
+            stream.write_message(input_streams[i].CameraConfig[0])
+
+            stream.move_to_new_table("Events")
+
+            count = 0
+            for event in input_streams[i].Events:
+
+                # Get the event type, from the first loop over the files:
+                evtype = event_type_val[event_id==event.event_id][0]
+
+                if ((evtype == EventType.SUBARRAY.value) |
+                        (evtype == EventType.UNKNOWN.value)):
+                    # Find pixels with HG above gain switch threshold:
+                    wf = protozfits.any_array_to_numpy(event.waveform)
+                    num_gains = int(wf.size / num_pixels / num_samples)
+
+                    if num_gains != 2:
+                        logging.error('You are attempting gain selection on '
+                                      'data with only one gain!')
+                        exit(1)
+
+                    wf = wf.reshape((num_gains, num_pixels, num_samples))
+                    hg = wf[0]
+                    lg = wf[1]
+                    use_lg = (np.max(hg[:, SAMPLE_START:SAMPLE_END], axis=1)
+                              > THRESHOLD)
+                    new_wf = np.where(use_lg[:, None], lg, hg) # gain-selected
+
+                    event.waveform.data = new_wf.tobytes()
+
+                    pixel_status = protozfits.any_array_to_numpy(event.pixel_status)
+                    # Set to 0 the status bit of the removed gain:
+                    new_status = np.where(use_lg, pixel_status & 0b1011,
+                                          pixel_status & 0b0111)
+                    event.pixel_status.data = new_status.tobytes()
+
+                count += 1
+                # if count>1000:
+                #     break
+
+                stream.write_message(event)
+
+            stream.close()
+            input_streams[i].close()
+
+
+    logging.info('R0 to R0G conversion finished successfully!')
+    return(0)
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file

From 58d0935e2e73ea5e78c953fd49c8f90a8ac965d8 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Tue, 19 Dec 2023 22:30:39 +0100
Subject: [PATCH 02/35] Removed unused stuff

---
 lstchain/scripts/lstchain_r0_to_r0g.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/lstchain/scripts/lstchain_r0_to_r0g.py b/lstchain/scripts/lstchain_r0_to_r0g.py
index d276209c72..77e61ed05e 100644
--- a/lstchain/scripts/lstchain_r0_to_r0g.py
+++ b/lstchain/scripts/lstchain_r0_to_r0g.py
@@ -2,20 +2,14 @@
 
 import logging
 import protozfits
-
 import argparse
 
-from protozfits.CTA_R1_pb2 import CameraConfiguration
-from protozfits.Debug_R1_pb2 import DebugEvent, DebugCameraConfiguration
-
 from ctapipe.io import EventSource
 from ctapipe.containers import EventType
 from lstchain.io import standard_config
 from traitlets.config import Config
 
-from astropy.time import Time
 import numpy as np
-import matplotlib.pyplot as plt
 from contextlib import ExitStack
 
 parser = argparse.ArgumentParser(description="Gain selector program (R0 to "
@@ -133,7 +127,6 @@ def main():
 
     num_pixels = input_streams[0].CameraConfig[0].num_pixels
     num_samples = input_streams[0].CameraConfig[0].num_samples
-    num_bytes = 2
 
     with ExitStack() as stack:
         for i, name in enumerate(output_stream_names):

From ec4ab2c545518d94af274a6eda13bf84d5c74031 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Wed, 20 Dec 2023 21:18:56 +0100
Subject: [PATCH 03/35] Separate first loop in a function

---
 lstchain/scripts/lstchain_r0_to_r0g.py | 160 +++++++++++++------------
 1 file changed, 81 insertions(+), 79 deletions(-)

diff --git a/lstchain/scripts/lstchain_r0_to_r0g.py b/lstchain/scripts/lstchain_r0_to_r0g.py
index 77e61ed05e..d9397948af 100644
--- a/lstchain/scripts/lstchain_r0_to_r0g.py
+++ b/lstchain/scripts/lstchain_r0_to_r0g.py
@@ -23,91 +23,27 @@
 parser.add_argument('-yyyymmdd', dest='yyyymmdd', type=str,
                     help='date (YYYYMMDD)')
 
+# Range of waveform to be checked (for gain selection & heuristic FF
+# identification)
+SAMPLE_START = 3
+SAMPLE_END = 39
+# Baseline offset:
+OFFSET = 400
+
 def main():
     args = parser.parse_args()
 
-    # Range of waveform to be checked (for gain selection & heuristic FF
-    # identification)
-    SAMPLE_START = 3
-    SAMPLE_END = 39
-
-    # Baseline offset:
-    OFFSET = 400
     # Level of high gain (HG) required for switching to low gain (LG)
     THRESHOLD = 3500 + OFFSET
 
-    # For heuristic flat field identification (values refer to
-    # baseline-subtracted HG integrated waveforms):
-
-    MIN_FLATFIELD_ADC = 3000
-    MAX_FLATFIELD_ADC = 12000
-    MIN_FLATFIELD_PIXEL_FRACTION = 0.8
-
     input_file = args.input_file
     output_dir = args.output_dir
 
-    # Look for the auxiliary files in their standard locations:
-    drive_file = f'/fefs/onsite/monitoring/driveLST1/DrivePositioning' \
-                 f'/DrivePosition_log_{args.yyyymmdd}.txt'
-    run_summary = f'/fefs/aswg/data/real/monitoring/RunSummary' \
-                  f'/RunSummary_{args.yyyymmdd}.ecsv'
-
-    standard_config['source_config']['LSTEventSource'][
-        'EventTimeCalculator']['run_summary_path'] = run_summary
-    standard_config['source_config']['LSTEventSource']['PointingSource'][
-        'drive_report_path'] = drive_file
-    standard_config['source_config']['LSTEventSource'][
-        'apply_drs4_corrections'] = False
-    #
-    # Loop just to identify properly interleaved pedestals (in case there are
-    # ucts jumps) and FF events (heuristically)
-    #
-    event_type = []
-    event_id = []
-
-    with EventSource(input_url=input_file,
-                     config=Config(standard_config['source_config'])) as source:
-        source.pointing_information = False
-        source.trigger_information = True
-        source.log.setLevel(logging.WARNING)
-        try:
-            for ievent, event in enumerate(source):
-                if event.r0.tel[1].waveform is None:
-                    logging.error('The data seem to contain R0 waveforms. Is '
-                                  'this already gain-selected data?')
-                    exit(1)
-
-                # Check if this may be a FF event
-                # Subtract baseline offset (convert to signed integer to
-                # allow for negative fluctuations!)
-                wf_hg = event.r0.tel[1].waveform[0][:,
-                        SAMPLE_START:SAMPLE_END].astype('int16') - OFFSET
-                # pixel-wise integral:
-                wf_hg_sum = np.sum(wf_hg, axis=1)
-                ff_pix = ((wf_hg_sum > MIN_FLATFIELD_ADC) &
-                          (wf_hg_sum < MAX_FLATFIELD_ADC))
-                event_type.append(event.trigger.event_type)
-
-                # Check fraction of pixels with HG in "FF-like range"
-                if ff_pix.sum() / event.r0.tel[1].waveform.shape[1] > \
-                        MIN_FLATFIELD_PIXEL_FRACTION:
-                    # Looks like a FF event:
-                    event_type[-1] = EventType.FLATFIELD
-                elif event_type[-1] == EventType.FLATFIELD:
-                    # If originally tagged as FF, but not looking like FF:
-                    event_type[-1] = EventType.UNKNOWN
-
-                event_id.append(event.index.event_id)
-            print('Finished first loop over input files - all ok!')
-
-        except Exception as err:
-            print(err)
-            print('Something went wrong!')
-            exit(1)
+    # First identify properly interleaved pedestals (also in case there are
+    # ucts jumps) and FF events (heuristically):
+    event_id, event_type = get_event_types(input_file, args.yyyymmdd)
 
     event_id = np.array(event_id)
-
-    # Numerical values of event types:
     event_type_val = np.array([x.value for x in event_type])
 
     logging.info('Identified event types and number of events:')
@@ -154,7 +90,7 @@ def main():
                 evtype = event_type_val[event_id==event.event_id][0]
 
                 if ((evtype == EventType.SUBARRAY.value) |
-                        (evtype == EventType.UNKNOWN.value)):
+                    (evtype == EventType.UNKNOWN.value)):
                     # Find pixels with HG above gain switch threshold:
                     wf = protozfits.any_array_to_numpy(event.waveform)
                     num_gains = int(wf.size / num_pixels / num_samples)
@@ -175,22 +111,88 @@ def main():
 
                     pixel_status = protozfits.any_array_to_numpy(event.pixel_status)
                     # Set to 0 the status bit of the removed gain:
-                    new_status = np.where(use_lg, pixel_status & 0b1011,
+                    new_status = np.where(use_lg,
+                                          pixel_status & 0b1011,
                                           pixel_status & 0b0111)
                     event.pixel_status.data = new_status.tobytes()
 
                 count += 1
-                # if count>1000:
-                #     break
 
                 stream.write_message(event)
 
             stream.close()
             input_streams[i].close()
 
-
     logging.info('R0 to R0G conversion finished successfully!')
     return(0)
 
+
+def get_event_types(input_file, yyyymmdd):
+
+    # For heuristic flat field identification (values refer to
+    # baseline-subtracted HG integrated waveforms):
+    MIN_FLATFIELD_ADC = 3000
+    MAX_FLATFIELD_ADC = 12000
+    MIN_FLATFIELD_PIXEL_FRACTION = 0.8
+
+    # Look for the auxiliary files in their standard locations:
+    drive_file = f'/fefs/onsite/monitoring/driveLST1/DrivePositioning' \
+                 f'/DrivePosition_log_{yyyymmdd}.txt'
+    run_summary = f'/fefs/aswg/data/real/monitoring/RunSummary' \
+                  f'/RunSummary_{yyyymmdd}.ecsv'
+
+    standard_config['source_config']['LSTEventSource'][
+        'EventTimeCalculator']['run_summary_path'] = run_summary
+    standard_config['source_config']['LSTEventSource']['PointingSource'][
+        'drive_report_path'] = drive_file
+    standard_config['source_config']['LSTEventSource'][
+        'apply_drs4_corrections'] = False
+
+    event_type = []
+    event_id = []
+    with EventSource(input_url=input_file,
+                     config=Config(standard_config['source_config'])) as source:
+        source.pointing_information = False
+        source.trigger_information = True
+        source.log.setLevel(logging.WARNING)
+        try:
+            for ievent, event in enumerate(source):
+                if event.r0.tel[1].waveform is None:
+                    logging.error('The data seem to contain no R0 waveforms. '
+                                  'Is this already gain-selected data?')
+                    exit(1)
+
+                # Check if this may be a FF event
+                # Subtract baseline offset (convert to signed integer to
+                # allow for negative fluctuations!)
+                wf_hg = event.r0.tel[1].waveform[0][:,
+                        SAMPLE_START:SAMPLE_END].astype('int16') - OFFSET
+                # pixel-wise integral:
+                wf_hg_sum = np.sum(wf_hg, axis=1)
+                ff_pix = ((wf_hg_sum > MIN_FLATFIELD_ADC) &
+                          (wf_hg_sum < MAX_FLATFIELD_ADC))
+                event_type.append(event.trigger.event_type)
+
+                # Check fraction of pixels with HG in "FF-like range"
+                if ff_pix.sum() / event.r0.tel[1].waveform.shape[1] > \
+                        MIN_FLATFIELD_PIXEL_FRACTION:
+                    # Looks like a FF event:
+                    event_type[-1] = EventType.FLATFIELD
+                elif event_type[-1] == EventType.FLATFIELD:
+                    # If originally tagged as FF, but not looking like FF:
+                    event_type[-1] = EventType.UNKNOWN
+
+                event_id.append(event.index.event_id)
+            print('Finished first loop over input files - all ok!')
+
+        except Exception as err:
+            print(err)
+            print('Something went wrong!')
+            exit(1)
+
+    return event_id, event_type
+
+
+
 if __name__ == '__main__':
     main()
\ No newline at end of file

From df8483cb6648e0ed7017717e8c647bf15e1c22b3 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Thu, 21 Dec 2023 16:15:28 +0100
Subject: [PATCH 04/35] Changes to allow processing EvB6 data

---
 lstchain/scripts/lstchain_r0_to_r0g.py | 32 ++++++++++++++++++--------
 1 file changed, 23 insertions(+), 9 deletions(-)

diff --git a/lstchain/scripts/lstchain_r0_to_r0g.py b/lstchain/scripts/lstchain_r0_to_r0g.py
index d9397948af..2e5d02fbc5 100644
--- a/lstchain/scripts/lstchain_r0_to_r0g.py
+++ b/lstchain/scripts/lstchain_r0_to_r0g.py
@@ -61,8 +61,15 @@ def main():
     for name in input_stream_names:
         input_streams.append(protozfits.File(name, pure_protobuf=True))
 
-    num_pixels = input_streams[0].CameraConfig[0].num_pixels
-    num_samples = input_streams[0].CameraConfig[0].num_samples
+    is_evb6_data = False
+    try:
+        num_pixels = input_streams[0].CameraConfig[0].num_pixels
+        num_samples = input_streams[0].CameraConfig[0].num_samples
+    except:
+        is_evb6_data = True
+        num_pixels = input_streams[0].CameraConfiguration[0].num_pixels
+        num_samples = input_streams[0].CameraConfiguration[
+            0].num_samples_nominal
 
     with ExitStack() as stack:
         for i, name in enumerate(output_stream_names):
@@ -78,14 +85,21 @@ def main():
                     compression_block_size_kb=64*1024,
                     defaul_compression="lst"))
             stream.open(name)
-            stream.move_to_new_table("CameraConfig")
-            stream.write_message(input_streams[i].CameraConfig[0])
+            if is_evb6_data:
+                stream.move_to_new_table("CameraConfiguration")
+                stream.write_message(input_streams[i].CameraConfiguration[0])
+                stream.move_to_new_table("DataStream")
+                stream.write_message(input_streams[i].DataStream[0])
+            else:
+                stream.move_to_new_table("CameraConfig")
+                stream.write_message(input_streams[i].CameraConfig[0])
 
             stream.move_to_new_table("Events")
 
-            count = 0
             for event in input_streams[i].Events:
-
+                # skip corrupted events:
+                if event.event_id == 0:
+                    continue
                 # Get the event type, from the first loop over the files:
                 evtype = event_type_val[event_id==event.event_id][0]
 
@@ -106,9 +120,11 @@ def main():
                     use_lg = (np.max(hg[:, SAMPLE_START:SAMPLE_END], axis=1)
                               > THRESHOLD)
                     new_wf = np.where(use_lg[:, None], lg, hg) # gain-selected
-
                     event.waveform.data = new_wf.tobytes()
 
+                    if is_evb6_data:
+                        event.num_channels = 1  # Just one gain stored!
+
                     pixel_status = protozfits.any_array_to_numpy(event.pixel_status)
                     # Set to 0 the status bit of the removed gain:
                     new_status = np.where(use_lg,
@@ -116,8 +132,6 @@ def main():
                                           pixel_status & 0b0111)
                     event.pixel_status.data = new_status.tobytes()
 
-                count += 1
-
                 stream.write_message(event)
 
             stream.close()

From b80a4044041fc0a0935b568a8851834326178e2a Mon Sep 17 00:00:00 2001
From: Gabriel Emery <gabriel-emery@hotmail.fr>
Date: Mon, 22 Jan 2024 15:39:48 +0100
Subject: [PATCH 05/35] Limit scipy to version before 1.12 which introduces a
 breaking change

---
 environment.yml | 2 +-
 setup.py        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/environment.yml b/environment.yml
index c4dfbd4b54..001ca8a301 100644
--- a/environment.yml
+++ b/environment.yml
@@ -20,6 +20,7 @@ dependencies:
   - protobuf=3.20
   - protozfits=2.2
   - pyparsing
+  - scipy~=1.11.4
   - scikit-learn=1.2
   - sphinx=4
   - sphinx-automodapi
@@ -35,4 +36,3 @@ dependencies:
   - ctapipe_io_lst=0.22
   - pytest
   - pyirf~=0.10.0
-
diff --git a/setup.py b/setup.py
index 78d47e14b3..676f8ab8d1 100644
--- a/setup.py
+++ b/setup.py
@@ -55,7 +55,7 @@ def find_scripts(script_dir, prefix):
         'pandas',
         'protobuf~=3.20.0',
         'pyirf~=0.10.0',
-        'scipy>=1.8',
+        'scipy>=1.8,<1.12',
         'seaborn',
         'scikit-learn~=1.2',
         'tables',

From 180c98733c326ccb57917fee54e00d2ae3123096 Mon Sep 17 00:00:00 2001
From: Daniel Morcuende <dmorcuende@iaa.es>
Date: Mon, 22 Jan 2024 16:23:06 +0100
Subject: [PATCH 06/35] unfreeze sphinx version

---
 environment.yml | 2 +-
 setup.py        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/environment.yml b/environment.yml
index 001ca8a301..2c04db6154 100644
--- a/environment.yml
+++ b/environment.yml
@@ -22,7 +22,7 @@ dependencies:
   - pyparsing
   - scipy~=1.11.4
   - scikit-learn=1.2
-  - sphinx=4
+  - sphinx
   - sphinx-automodapi
   - sphinx_rtd_theme
   - sphinx-argparse
diff --git a/setup.py b/setup.py
index 676f8ab8d1..77ba7375c7 100644
--- a/setup.py
+++ b/setup.py
@@ -25,7 +25,7 @@ def find_scripts(script_dir, prefix):
 
 tests_require = ["pytest"]
 docs_require = [
-    "sphinx~=4.2",
+    "sphinx",
     "sphinx-automodapi",
     "sphinx_argparse",
     "sphinx_rtd_theme",

From 30f0a2dfa01d50162f4be144786b87b5c52dd627 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo <moralejo@ifae.es>
Date: Mon, 22 Jan 2024 16:41:32 +0100
Subject: [PATCH 07/35] Deal with missing modules in modifier.py

Just unse nanmedian, nanstd to skip non-existent pixels
---
 lstchain/image/modifier.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lstchain/image/modifier.py b/lstchain/image/modifier.py
index 29a28e8348..59b0622933 100644
--- a/lstchain/image/modifier.py
+++ b/lstchain/image/modifier.py
@@ -230,8 +230,8 @@ def calculate_noise_parameters(simtel_filename, data_dl1_filename,
 
     # Identify noisy pixels, likely containing stars - we want to adjust MC to
     # the average diffuse NSB across the camera
-    data_median_std_ped_pe = np.median(data_HG_ped_std_pe)
-    data_std_std_ped_pe = np.std(data_HG_ped_std_pe)
+    data_median_std_ped_pe = np.nanmedian(data_HG_ped_std_pe)
+    data_std_std_ped_pe = np.nanstd(data_HG_ped_std_pe)
     log.info(f'Real data: median across camera of good pixels\' pedestal std '
              f'{data_median_std_ped_pe:.3f} p.e.')
     brightness_limit = data_median_std_ped_pe + 3 * data_std_std_ped_pe
@@ -247,7 +247,7 @@ def calculate_noise_parameters(simtel_filename, data_dl1_filename,
     # Exclude too bright pixels, besides those with unusable calibration:
     good_pixels &= ~too_bright_pixels
     # recalculate the median of the pixels' std dev, with good_pixels:
-    data_median_std_ped_pe = np.median(data_HG_ped_std_pe[good_pixels])
+    data_median_std_ped_pe = np.nanmedian(data_HG_ped_std_pe[good_pixels])
 
     log.info(f'Good and not too bright pixels: {good_pixels.sum()}')
 

From 335b23f1e94bc4ad114b2c0c101f282d0d8afbf9 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Tue, 23 Jan 2024 12:39:31 +0100
Subject: [PATCH 08/35] Added protozfits

---
 setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.py b/setup.py
index 77ba7375c7..497d7b64b1 100644
--- a/setup.py
+++ b/setup.py
@@ -60,6 +60,7 @@ def find_scripts(script_dir, prefix):
         'scikit-learn~=1.2',
         'tables',
         'toml',
+        'protozfits~=2.2',
         'pymongo',
         'pyparsing',
         'setuptools_scm',

From 7a37ef2ae6b1c3e092d17462ab29180a6fc9d196 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Tue, 23 Jan 2024 14:29:43 +0100
Subject: [PATCH 09/35] Address various suggestions

---
 lstchain/scripts/lstchain_r0_to_r0g.py | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/lstchain/scripts/lstchain_r0_to_r0g.py b/lstchain/scripts/lstchain_r0_to_r0g.py
index 2e5d02fbc5..315e279523 100644
--- a/lstchain/scripts/lstchain_r0_to_r0g.py
+++ b/lstchain/scripts/lstchain_r0_to_r0g.py
@@ -1,5 +1,15 @@
 #!/usr/bin/env python
+"""
+Script to apply gain selection to the raw fits.fz produced by EvB6 in CTAR1
+format (aka R1v1). It also works for data created with EVBv5 + ADH-ZFW.
 
+Gain selection is only applied to shower events (not to interleaved pedestal
+and flatfield)
+
+It uses heuristic identification of the interleaved flatfield (FF) events (
+given the occasional problems we have with the FF event tagging)
+
+"""
 import logging
 import protozfits
 import argparse
@@ -170,7 +180,7 @@ def get_event_types(input_file, yyyymmdd):
         source.trigger_information = True
         source.log.setLevel(logging.WARNING)
         try:
-            for ievent, event in enumerate(source):
+            for event in source:
                 if event.r0.tel[1].waveform is None:
                     logging.error('The data seem to contain no R0 waveforms. '
                                   'Is this already gain-selected data?')
@@ -197,11 +207,12 @@ def get_event_types(input_file, yyyymmdd):
                     event_type[-1] = EventType.UNKNOWN
 
                 event_id.append(event.index.event_id)
-            print('Finished first loop over input files - all ok!')
+
+            logging.info('Finished first loop over input files - all ok!')
 
         except Exception as err:
-            print(err)
-            print('Something went wrong!')
+            logging.error(err)
+            logging.error('Something went wrong!')
             exit(1)
 
     return event_id, event_type

From 20316cd0dc97853c1277e52cacd173a0f4f1f8c5 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo <moralejo@ifae.es>
Date: Thu, 25 Jan 2024 13:02:17 +0100
Subject: [PATCH 10/35] Update test_lstchain_scripts.py

WIP!  I need to include in the script arguments to explicitly point to the needed Drive and RunSummary files (paths are hardcoded as of now!)
---
 lstchain/scripts/tests/test_lstchain_scripts.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/lstchain/scripts/tests/test_lstchain_scripts.py b/lstchain/scripts/tests/test_lstchain_scripts.py
index 8f46742982..943e2a4368 100644
--- a/lstchain/scripts/tests/test_lstchain_scripts.py
+++ b/lstchain/scripts/tests/test_lstchain_scripts.py
@@ -83,6 +83,15 @@ def merged_simulated_dl1_file(simulated_dl1_file, temp_dir_simulated_files):
 def test_lstchain_mc_r0_to_dl1(simulated_dl1_file):
     assert simulated_dl1_file.is_file()
 
+@pytest.mark.private_data
+def test_lstchain_r0_to_r0g(tmp_path):
+    test_data = Path(os.getenv('LSTCHAIN_TEST_DATA', 'test_data'))
+    input_file = test_data / "real/R0/20231218/LST-1.1.Run16231.0000_first50.fits.fz"
+    output_dir = temp_dir_observed_files / "R0G"
+    output_dir.mkdir()
+    run_program("lstchain_r0_to_r0g", "-f", input_file, "-o", output_dir)
+    output_file = output_dir / input_file.name
+    assert output_file.is_file()
 
 @pytest.mark.private_data
 def test_lstchain_data_r0_to_dl1(observed_dl1_files):

From d48e3cef27a63d4f6b5e5c2d7c1b97a0bced8154 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo <moralejo@ifae.es>
Date: Thu, 25 Jan 2024 14:17:05 +0100
Subject: [PATCH 11/35] Path to needed files

Set explicitly the location of the Drive log file and the Run Summary file (needed by the event source)
---
 lstchain/scripts/lstchain_r0_to_r0g.py | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/lstchain/scripts/lstchain_r0_to_r0g.py b/lstchain/scripts/lstchain_r0_to_r0g.py
index 315e279523..bfee236b22 100644
--- a/lstchain/scripts/lstchain_r0_to_r0g.py
+++ b/lstchain/scripts/lstchain_r0_to_r0g.py
@@ -30,8 +30,12 @@
 parser.add_argument('-o', '--output-dir', dest='output_dir',
                     type=str, default='./',
                     help='Output directory')
-parser.add_argument('-yyyymmdd', dest='yyyymmdd', type=str,
-                    help='date (YYYYMMDD)')
+
+parser.add_argument('--drive-file', dest='drive_file', type=str,
+                    help='Drive log file')
+
+parser.add_argument('--run-summary', dest='run_summary', type=str,
+                    help='Run Summary file')
 
 # Range of waveform to be checked (for gain selection & heuristic FF
 # identification)
@@ -51,7 +55,8 @@ def main():
 
     # First identify properly interleaved pedestals (also in case there are
     # ucts jumps) and FF events (heuristically):
-    event_id, event_type = get_event_types(input_file, args.yyyymmdd)
+    event_id, event_type = get_event_types(input_file, 
+                                           args.drive_file, args.run_summary)
 
     event_id = np.array(event_id)
     event_type_val = np.array([x.value for x in event_type])
@@ -151,7 +156,7 @@ def main():
     return(0)
 
 
-def get_event_types(input_file, yyyymmdd):
+def get_event_types(input_file, drive_file, run_summary):
 
     # For heuristic flat field identification (values refer to
     # baseline-subtracted HG integrated waveforms):
@@ -159,12 +164,6 @@ def get_event_types(input_file, yyyymmdd):
     MAX_FLATFIELD_ADC = 12000
     MIN_FLATFIELD_PIXEL_FRACTION = 0.8
 
-    # Look for the auxiliary files in their standard locations:
-    drive_file = f'/fefs/onsite/monitoring/driveLST1/DrivePositioning' \
-                 f'/DrivePosition_log_{yyyymmdd}.txt'
-    run_summary = f'/fefs/aswg/data/real/monitoring/RunSummary' \
-                  f'/RunSummary_{yyyymmdd}.ecsv'
-
     standard_config['source_config']['LSTEventSource'][
         'EventTimeCalculator']['run_summary_path'] = run_summary
     standard_config['source_config']['LSTEventSource']['PointingSource'][
@@ -220,4 +219,4 @@ def get_event_types(input_file, yyyymmdd):
 
 
 if __name__ == '__main__':
-    main()
\ No newline at end of file
+    main()

From dbff077729f9ed45c7291ee8c8e54bd45ce1180a Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo <moralejo@ifae.es>
Date: Thu, 25 Jan 2024 14:20:34 +0100
Subject: [PATCH 12/35] Update test_lstchain_scripts.py

---
 lstchain/scripts/tests/test_lstchain_scripts.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/lstchain/scripts/tests/test_lstchain_scripts.py b/lstchain/scripts/tests/test_lstchain_scripts.py
index 943e2a4368..fdf51ef981 100644
--- a/lstchain/scripts/tests/test_lstchain_scripts.py
+++ b/lstchain/scripts/tests/test_lstchain_scripts.py
@@ -87,9 +87,12 @@ def test_lstchain_mc_r0_to_dl1(simulated_dl1_file):
 def test_lstchain_r0_to_r0g(tmp_path):
     test_data = Path(os.getenv('LSTCHAIN_TEST_DATA', 'test_data'))
     input_file = test_data / "real/R0/20231218/LST-1.1.Run16231.0000_first50.fits.fz"
+    drive_file = test_data / "real/R0/20231218/DrivePosition_log_20231218.txt"
+    run_summary = test_data / "RunSummary_20231218.ecsv"
     output_dir = temp_dir_observed_files / "R0G"
     output_dir.mkdir()
-    run_program("lstchain_r0_to_r0g", "-f", input_file, "-o", output_dir)
+    run_program("lstchain_r0_to_r0g", "-f", input_file, "-o", output_dir,
+                "--drive-file", drive_file, "--run-summary", run_summary)
     output_file = output_dir / input_file.name
     assert output_file.is_file()
 

From cf3c2015859ec75fbfcdbb4f42b0757eaf818a8e Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo <moralejo@ifae.es>
Date: Thu, 25 Jan 2024 14:26:44 +0100
Subject: [PATCH 13/35] Update test_lstchain_scripts.py

---
 lstchain/scripts/tests/test_lstchain_scripts.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lstchain/scripts/tests/test_lstchain_scripts.py b/lstchain/scripts/tests/test_lstchain_scripts.py
index fdf51ef981..072c30b6e3 100644
--- a/lstchain/scripts/tests/test_lstchain_scripts.py
+++ b/lstchain/scripts/tests/test_lstchain_scripts.py
@@ -84,7 +84,7 @@ def test_lstchain_mc_r0_to_dl1(simulated_dl1_file):
     assert simulated_dl1_file.is_file()
 
 @pytest.mark.private_data
-def test_lstchain_r0_to_r0g(tmp_path):
+def test_lstchain_r0_to_r0g(tmp_path, temp_dir_observed_files):
     test_data = Path(os.getenv('LSTCHAIN_TEST_DATA', 'test_data'))
     input_file = test_data / "real/R0/20231218/LST-1.1.Run16231.0000_first50.fits.fz"
     drive_file = test_data / "real/R0/20231218/DrivePosition_log_20231218.txt"

From b054751a5984cb6dbe658cb786482a5ab31b1ba2 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Fri, 26 Jan 2024 09:16:51 +0100
Subject: [PATCH 14/35] Now the script works only for EvB6 data (CTAR1). No
 need for Drive log or run summary

---
 lstchain/scripts/lstchain_r0_to_r0g.py        | 44 ++++++-------------
 .../scripts/tests/test_lstchain_scripts.py    |  3 +-
 2 files changed, 15 insertions(+), 32 deletions(-)

diff --git a/lstchain/scripts/lstchain_r0_to_r0g.py b/lstchain/scripts/lstchain_r0_to_r0g.py
index bfee236b22..3e890403ef 100644
--- a/lstchain/scripts/lstchain_r0_to_r0g.py
+++ b/lstchain/scripts/lstchain_r0_to_r0g.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 """
 Script to apply gain selection to the raw fits.fz produced by EvB6 in CTAR1
-format (aka R1v1). It also works for data created with EVBv5 + ADH-ZFW.
+format (aka R1v1).
 
 Gain selection is only applied to shower events (not to interleaved pedestal
 and flatfield)
@@ -31,12 +31,6 @@
                     type=str, default='./',
                     help='Output directory')
 
-parser.add_argument('--drive-file', dest='drive_file', type=str,
-                    help='Drive log file')
-
-parser.add_argument('--run-summary', dest='run_summary', type=str,
-                    help='Run Summary file')
-
 # Range of waveform to be checked (for gain selection & heuristic FF
 # identification)
 SAMPLE_START = 3
@@ -55,8 +49,7 @@ def main():
 
     # First identify properly interleaved pedestals (also in case there are
     # ucts jumps) and FF events (heuristically):
-    event_id, event_type = get_event_types(input_file, 
-                                           args.drive_file, args.run_summary)
+    event_id, event_type = get_event_types(input_file)
 
     event_id = np.array(event_id)
     event_type_val = np.array([x.value for x in event_type])
@@ -76,15 +69,13 @@ def main():
     for name in input_stream_names:
         input_streams.append(protozfits.File(name, pure_protobuf=True))
 
-    is_evb6_data = False
     try:
-        num_pixels = input_streams[0].CameraConfig[0].num_pixels
-        num_samples = input_streams[0].CameraConfig[0].num_samples
-    except:
-        is_evb6_data = True
         num_pixels = input_streams[0].CameraConfiguration[0].num_pixels
         num_samples = input_streams[0].CameraConfiguration[
             0].num_samples_nominal
+    except:
+        logging.error('CameraConfiguration not found! Is this CTAR1 data?')
+        exit(1)
 
     with ExitStack() as stack:
         for i, name in enumerate(output_stream_names):
@@ -100,14 +91,11 @@ def main():
                     compression_block_size_kb=64*1024,
                     defaul_compression="lst"))
             stream.open(name)
-            if is_evb6_data:
-                stream.move_to_new_table("CameraConfiguration")
-                stream.write_message(input_streams[i].CameraConfiguration[0])
-                stream.move_to_new_table("DataStream")
-                stream.write_message(input_streams[i].DataStream[0])
-            else:
-                stream.move_to_new_table("CameraConfig")
-                stream.write_message(input_streams[i].CameraConfig[0])
+
+            stream.move_to_new_table("CameraConfiguration")
+            stream.write_message(input_streams[i].CameraConfiguration[0])
+            stream.move_to_new_table("DataStream")
+            stream.write_message(input_streams[i].DataStream[0])
 
             stream.move_to_new_table("Events")
 
@@ -136,9 +124,7 @@ def main():
                               > THRESHOLD)
                     new_wf = np.where(use_lg[:, None], lg, hg) # gain-selected
                     event.waveform.data = new_wf.tobytes()
-
-                    if is_evb6_data:
-                        event.num_channels = 1  # Just one gain stored!
+                    event.num_channels = 1  # Just one gain stored!
 
                     pixel_status = protozfits.any_array_to_numpy(event.pixel_status)
                     # Set to 0 the status bit of the removed gain:
@@ -156,7 +142,7 @@ def main():
     return(0)
 
 
-def get_event_types(input_file, drive_file, run_summary):
+def get_event_types(input_file):
 
     # For heuristic flat field identification (values refer to
     # baseline-subtracted HG integrated waveforms):
@@ -164,12 +150,10 @@ def get_event_types(input_file, drive_file, run_summary):
     MAX_FLATFIELD_ADC = 12000
     MIN_FLATFIELD_PIXEL_FRACTION = 0.8
 
-    standard_config['source_config']['LSTEventSource'][
-        'EventTimeCalculator']['run_summary_path'] = run_summary
-    standard_config['source_config']['LSTEventSource']['PointingSource'][
-        'drive_report_path'] = drive_file
     standard_config['source_config']['LSTEventSource'][
         'apply_drs4_corrections'] = False
+    standard_config['source_config']['LSTEventSource'][
+        'pointing_information'] = False
 
     event_type = []
     event_id = []
diff --git a/lstchain/scripts/tests/test_lstchain_scripts.py b/lstchain/scripts/tests/test_lstchain_scripts.py
index 072c30b6e3..157b63c48d 100644
--- a/lstchain/scripts/tests/test_lstchain_scripts.py
+++ b/lstchain/scripts/tests/test_lstchain_scripts.py
@@ -91,8 +91,7 @@ def test_lstchain_r0_to_r0g(tmp_path, temp_dir_observed_files):
     run_summary = test_data / "RunSummary_20231218.ecsv"
     output_dir = temp_dir_observed_files / "R0G"
     output_dir.mkdir()
-    run_program("lstchain_r0_to_r0g", "-f", input_file, "-o", output_dir,
-                "--drive-file", drive_file, "--run-summary", run_summary)
+    run_program("lstchain_r0_to_r0g", "-f", input_file, "-o", output_dir)
     output_file = output_dir / input_file.name
     assert output_file.is_file()
 

From 015c000121041128b9b0fc3e481d2bb47f3d6524 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Fri, 26 Jan 2024 09:21:55 +0100
Subject: [PATCH 15/35] Removed unused variables

---
 lstchain/scripts/tests/test_lstchain_scripts.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/lstchain/scripts/tests/test_lstchain_scripts.py b/lstchain/scripts/tests/test_lstchain_scripts.py
index 157b63c48d..bbd04badaf 100644
--- a/lstchain/scripts/tests/test_lstchain_scripts.py
+++ b/lstchain/scripts/tests/test_lstchain_scripts.py
@@ -87,8 +87,6 @@ def test_lstchain_mc_r0_to_dl1(simulated_dl1_file):
 def test_lstchain_r0_to_r0g(tmp_path, temp_dir_observed_files):
     test_data = Path(os.getenv('LSTCHAIN_TEST_DATA', 'test_data'))
     input_file = test_data / "real/R0/20231218/LST-1.1.Run16231.0000_first50.fits.fz"
-    drive_file = test_data / "real/R0/20231218/DrivePosition_log_20231218.txt"
-    run_summary = test_data / "RunSummary_20231218.ecsv"
     output_dir = temp_dir_observed_files / "R0G"
     output_dir.mkdir()
     run_program("lstchain_r0_to_r0g", "-f", input_file, "-o", output_dir)

From a1957d994611ad0c843868f9d40ec7030de99cc3 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Fri, 26 Jan 2024 11:03:35 +0100
Subject: [PATCH 16/35] Proper naming of the data format

---
 lstchain/scripts/lstchain_r0_to_r0g.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lstchain/scripts/lstchain_r0_to_r0g.py b/lstchain/scripts/lstchain_r0_to_r0g.py
index 3e890403ef..f878dce103 100644
--- a/lstchain/scripts/lstchain_r0_to_r0g.py
+++ b/lstchain/scripts/lstchain_r0_to_r0g.py
@@ -74,7 +74,7 @@ def main():
         num_samples = input_streams[0].CameraConfiguration[
             0].num_samples_nominal
     except:
-        logging.error('CameraConfiguration not found! Is this CTAR1 data?')
+        logging.error('CameraConfiguration not found! Is this R1v1 data?')
         exit(1)
 
     with ExitStack() as stack:

From 552c83db4e240cc39358765e74194954dc48d5be Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo Olaizola <moralejo@ifae.es>
Date: Fri, 26 Jan 2024 12:00:29 +0100
Subject: [PATCH 17/35] Changed name of test file

---
 lstchain/scripts/tests/test_lstchain_scripts.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lstchain/scripts/tests/test_lstchain_scripts.py b/lstchain/scripts/tests/test_lstchain_scripts.py
index bbd04badaf..08b236a5ff 100644
--- a/lstchain/scripts/tests/test_lstchain_scripts.py
+++ b/lstchain/scripts/tests/test_lstchain_scripts.py
@@ -86,7 +86,8 @@ def test_lstchain_mc_r0_to_dl1(simulated_dl1_file):
 @pytest.mark.private_data
 def test_lstchain_r0_to_r0g(tmp_path, temp_dir_observed_files):
     test_data = Path(os.getenv('LSTCHAIN_TEST_DATA', 'test_data'))
-    input_file = test_data / "real/R0/20231218/LST-1.1.Run16231.0000_first50.fits.fz"
+    input_file = test_data / "real/R0/20231214/LST-1.1.Run16102.0010_first50" \
+                             ".fits.fz"
     output_dir = temp_dir_observed_files / "R0G"
     output_dir.mkdir()
     run_program("lstchain_r0_to_r0g", "-f", input_file, "-o", output_dir)

From d3211b956eb8ae7bb29d2ce8a8c2e626f8374852 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo <moralejo@ifae.es>
Date: Tue, 20 Feb 2024 08:34:35 +0100
Subject: [PATCH 18/35] Update test_lstchain_scripts.py

---
 lstchain/scripts/tests/test_lstchain_scripts.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lstchain/scripts/tests/test_lstchain_scripts.py b/lstchain/scripts/tests/test_lstchain_scripts.py
index 08b236a5ff..11d9846940 100644
--- a/lstchain/scripts/tests/test_lstchain_scripts.py
+++ b/lstchain/scripts/tests/test_lstchain_scripts.py
@@ -86,7 +86,7 @@ def test_lstchain_mc_r0_to_dl1(simulated_dl1_file):
 @pytest.mark.private_data
 def test_lstchain_r0_to_r0g(tmp_path, temp_dir_observed_files):
     test_data = Path(os.getenv('LSTCHAIN_TEST_DATA', 'test_data'))
-    input_file = test_data / "real/R0/20231214/LST-1.1.Run16102.0010_first50" \
+    input_file = test_data / "real/R0/20231214/LST-1.1.Run16102.0000_first50" \
                              ".fits.fz"
     output_dir = temp_dir_observed_files / "R0G"
     output_dir.mkdir()

From 1888038383809cd7d28539693743c9deedf7cc56 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo <moralejo@ifae.es>
Date: Tue, 20 Feb 2024 11:34:38 +0100
Subject: [PATCH 19/35] Baseline offset read from file

---
 lstchain/scripts/lstchain_r0_to_r0g.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lstchain/scripts/lstchain_r0_to_r0g.py b/lstchain/scripts/lstchain_r0_to_r0g.py
index f878dce103..f96692e9f0 100644
--- a/lstchain/scripts/lstchain_r0_to_r0g.py
+++ b/lstchain/scripts/lstchain_r0_to_r0g.py
@@ -35,14 +35,12 @@
 # identification)
 SAMPLE_START = 3
 SAMPLE_END = 39
-# Baseline offset:
-OFFSET = 400
 
 def main():
     args = parser.parse_args()
 
     # Level of high gain (HG) required for switching to low gain (LG)
-    THRESHOLD = 3500 + OFFSET
+    THRESHOLD = 3900
 
     input_file = args.input_file
     output_dir = args.output_dir
@@ -162,6 +160,8 @@ def get_event_types(input_file):
         source.pointing_information = False
         source.trigger_information = True
         source.log.setLevel(logging.WARNING)
+
+        offset = source.data_stream.waveform_offset 
         try:
             for event in source:
                 if event.r0.tel[1].waveform is None:
@@ -173,7 +173,7 @@ def get_event_types(input_file):
                 # Subtract baseline offset (convert to signed integer to
                 # allow for negative fluctuations!)
                 wf_hg = event.r0.tel[1].waveform[0][:,
-                        SAMPLE_START:SAMPLE_END].astype('int16') - OFFSET
+                        SAMPLE_START:SAMPLE_END].astype('int16') - offset
                 # pixel-wise integral:
                 wf_hg_sum = np.sum(wf_hg, axis=1)
                 ff_pix = ((wf_hg_sum > MIN_FLATFIELD_ADC) &

From d72d37cbea41b8b7a2dbfaf9cb7c81d4b0e13ed9 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo <moralejo@ifae.es>
Date: Tue, 20 Feb 2024 12:12:50 +0100
Subject: [PATCH 20/35] Update test_lstchain_scripts.py

Test number of gains in the output
---
 lstchain/scripts/tests/test_lstchain_scripts.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/lstchain/scripts/tests/test_lstchain_scripts.py b/lstchain/scripts/tests/test_lstchain_scripts.py
index 11d9846940..8f6f4b7919 100644
--- a/lstchain/scripts/tests/test_lstchain_scripts.py
+++ b/lstchain/scripts/tests/test_lstchain_scripts.py
@@ -15,6 +15,9 @@
 from astropy.time import Time
 from ctapipe.instrument import SubarrayDescription
 from ctapipe.io import read_table
+from ctapipe.io import EventSource
+from ctapipe.containers import EventType
+
 
 from lstchain.io.config import get_srcdep_config, get_standard_config
 from lstchain.io.io import (dl1_images_lstcam_key, dl1_params_lstcam_key,
@@ -94,6 +97,18 @@ def test_lstchain_r0_to_r0g(tmp_path, temp_dir_observed_files):
     output_file = output_dir / input_file.name
     assert output_file.is_file()
 
+    src = EventSource(input_url=output_file)
+    src.pointing_information = False
+    src.trigger_information = False
+    src.apply_drs4_corrections = False
+    # Check number of gains for first event of each type:
+    for evtype, ngains in zip([EventType.FLATFIELD, EventType.SKY_PEDESTAL, 
+                               EventType.SUBARRAY], [2, 2, 1]):
+        for event in src:
+            if event.trigger.event_type == evtype:
+                break
+        assert(event.r0.tel[1].waveform.shape[0] == ngains)  
+
 @pytest.mark.private_data
 def test_lstchain_data_r0_to_dl1(observed_dl1_files):
     assert observed_dl1_files["dl1_file1"].is_file()

From 6bc41e05b07e3f9239010bd59b20c9c08a9e728c Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo <moralejo@ifae.es>
Date: Tue, 20 Feb 2024 18:34:42 +0100
Subject: [PATCH 21/35] Update lstchain_r0_to_r0g.py

Addressed reviewer's comments
---
 lstchain/scripts/lstchain_r0_to_r0g.py | 122 ++++++++++++++-----------
 1 file changed, 67 insertions(+), 55 deletions(-)

diff --git a/lstchain/scripts/lstchain_r0_to_r0g.py b/lstchain/scripts/lstchain_r0_to_r0g.py
index f96692e9f0..54e5bb22e1 100644
--- a/lstchain/scripts/lstchain_r0_to_r0g.py
+++ b/lstchain/scripts/lstchain_r0_to_r0g.py
@@ -13,10 +13,12 @@
 import logging
 import protozfits
 import argparse
+import sys
 
 from ctapipe.io import EventSource
 from ctapipe.containers import EventType
 from lstchain.io import standard_config
+import lstchain.paths as paths
 from traitlets.config import Config
 
 import numpy as np
@@ -36,52 +38,62 @@
 SAMPLE_START = 3
 SAMPLE_END = 39
 
-def main():
-    args = parser.parse_args()
+# Level of high gain (HG) required for switching to low gain (LG)
+THRESHOLD = 3900
 
-    # Level of high gain (HG) required for switching to low gain (LG)
-    THRESHOLD = 3900
+# Events for which gain selection will be applied:
+EVENT_TYPES_TO_REDUCE = [EventType.SUBARRAY, EventType.UNKNOWN]
 
+def main():
+    args = parser.parse_args()
+  
     input_file = args.input_file
     output_dir = args.output_dir
-
+  
+    log = logging.getLogger("lstchain_r0_to_r0g")
+  
     # First identify properly interleaved pedestals (also in case there are
     # ucts jumps) and FF events (heuristically):
-    event_id, event_type = get_event_types(input_file)
+    event_type = get_event_types(input_file)
+    event_type_val = np.array([x.value for x in event_type.values()])
 
-    event_id = np.array(event_id)
-    event_type_val = np.array([x.value for x in event_type])
-
-    logging.info('Identified event types and number of events:')
-    for j in np.unique(event_type_val):
-        logging.info(f'{EventType(j)}:, {np.sum(event_type_val == j)}\n')
+    log.info('Identified event types and number of events:')
+    evtype, counts = np.unique(event_type_val, return_counts=True)
+    for j, n in zip(evtype, counts):
+        log.info('%s: %d', EventType(j), n)
 
     # Now loop over the files (4 streams) again to perform the actual gain
     # selection:
-    k = input_file.find('/LST-1.')
-    input_stream_names = [input_file[:k+7]+str(id_stream+1)+input_file[k+8:]
+    
+    run_info = pth.parse_r0_filename(input_file)
+    input_stream_names = [pth.Path(pth.Path(input_file).parent,
+                                   pth.run_to_r0_filename(run_info.tel_id, 
+                                                          run_info.run,
+                                                          run_info.subrun, 
+                                                          id_stream))
                           for id_stream in range(4)]
-    output_stream_names = [output_dir + name[k:] for name in input_stream_names]
+    output_stream_names = [pth.Path(output_dir, pth.Path(inputsn).name) 
+                           for inputsn in input_stream_names]
 
     input_streams = []
     for name in input_stream_names:
         input_streams.append(protozfits.File(name, pure_protobuf=True))
 
     try:
-        num_pixels = input_streams[0].CameraConfiguration[0].num_pixels
-        num_samples = input_streams[0].CameraConfiguration[
-            0].num_samples_nominal
-    except:
-        logging.error('CameraConfiguration not found! Is this R1v1 data?')
-        exit(1)
+      camera_config = input_streams[0].CameraConfiguration[0]
+    except (AttributeError, IndexError):
+      log.error("CameraConfiguration not found! Is this R1v1 data?')
+      sys.exit(1)
+
+    num_pixels = camera_config.num_pixels
+    num_samples = camera_config.num_samples_nominal
+
 
     with ExitStack() as stack:
         for i, name in enumerate(output_stream_names):
-
-            n_tiles = input_streams[i].Events.protobuf_i_fits.header[
-                'NAXIS2'].value
-            rows_per_tile = input_streams[i].Events.protobuf_i_fits.header[
-                'ZTILELEN'].value
+            header = input_streams[i].Events.header
+            n_tiles = header["NAXIS2"]
+            rows_per_tile = header["ZTILELEN"]
 
             stream = stack.enter_context(protozfits.ProtobufZOFits(
                     n_tiles=n_tiles,
@@ -90,10 +102,11 @@ def main():
                     defaul_compression="lst"))
             stream.open(name)
 
-            stream.move_to_new_table("CameraConfiguration")
-            stream.write_message(input_streams[i].CameraConfiguration[0])
             stream.move_to_new_table("DataStream")
             stream.write_message(input_streams[i].DataStream[0])
+          
+            stream.move_to_new_table("CameraConfiguration")
+            stream.write_message(input_streams[i].CameraConfiguration[0])
 
             stream.move_to_new_table("Events")
 
@@ -102,19 +115,16 @@ def main():
                 if event.event_id == 0:
                     continue
                 # Get the event type, from the first loop over the files:
-                evtype = event_type_val[event_id==event.event_id][0]
+                evtype = event_type[event.event_id]
+              
+                if evtype in EVENT_TYPES_TO_REDUCE:
+                    if event.num_channels != 2:
+                        log.error('You are attempting gain selection on '
+                                      'data with only one gain!')
+                        sys.exit(1)
 
-                if ((evtype == EventType.SUBARRAY.value) |
-                    (evtype == EventType.UNKNOWN.value)):
                     # Find pixels with HG above gain switch threshold:
                     wf = protozfits.any_array_to_numpy(event.waveform)
-                    num_gains = int(wf.size / num_pixels / num_samples)
-
-                    if num_gains != 2:
-                        logging.error('You are attempting gain selection on '
-                                      'data with only one gain!')
-                        exit(1)
-
                     wf = wf.reshape((num_gains, num_pixels, num_samples))
                     hg = wf[0]
                     lg = wf[1]
@@ -136,12 +146,14 @@ def main():
             stream.close()
             input_streams[i].close()
 
-    logging.info('R0 to R0G conversion finished successfully!')
-    return(0)
+    log.info('R0 to R0G conversion finished successfully!')
+    
 
 
 def get_event_types(input_file):
 
+    log = logging.getLogger("lstchain_r0_to_r0g")
+
     # For heuristic flat field identification (values refer to
     # baseline-subtracted HG integrated waveforms):
     MIN_FLATFIELD_ADC = 3000
@@ -153,21 +165,21 @@ def get_event_types(input_file):
     standard_config['source_config']['LSTEventSource'][
         'pointing_information'] = False
 
-    event_type = []
-    event_id = []
+    event_type = dict()
+  
     with EventSource(input_url=input_file,
                      config=Config(standard_config['source_config'])) as source:
         source.pointing_information = False
         source.trigger_information = True
-        source.log.setLevel(logging.WARNING)
+        source.log.setLevel(log.WARNING)
 
         offset = source.data_stream.waveform_offset 
         try:
             for event in source:
                 if event.r0.tel[1].waveform is None:
-                    logging.error('The data seem to contain no R0 waveforms. '
+                    log.error('The data seem to contain no R0 waveforms. '
                                   'Is this already gain-selected data?')
-                    exit(1)
+                    sys.exit(1)
 
                 # Check if this may be a FF event
                 # Subtract baseline offset (convert to signed integer to
@@ -178,27 +190,27 @@ def get_event_types(input_file):
                 wf_hg_sum = np.sum(wf_hg, axis=1)
                 ff_pix = ((wf_hg_sum > MIN_FLATFIELD_ADC) &
                           (wf_hg_sum < MAX_FLATFIELD_ADC))
-                event_type.append(event.trigger.event_type)
 
+                evtype = event.trigger.event_type
                 # Check fraction of pixels with HG in "FF-like range"
                 if ff_pix.sum() / event.r0.tel[1].waveform.shape[1] > \
                         MIN_FLATFIELD_PIXEL_FRACTION:
                     # Looks like a FF event:
-                    event_type[-1] = EventType.FLATFIELD
-                elif event_type[-1] == EventType.FLATFIELD:
+                    evtype = EventType.FLATFIELD
+                elif evtype == EventType.FLATFIELD:
                     # If originally tagged as FF, but not looking like FF:
-                    event_type[-1] = EventType.UNKNOWN
+                    evtype = EventType.UNKNOWN
 
-                event_id.append(event.index.event_id)
+                event_type[event.index.event_id] = evtype
 
-            logging.info('Finished first loop over input files - all ok!')
+            log.info('Finished first loop over input files - all ok!')
 
         except Exception as err:
-            logging.error(err)
-            logging.error('Something went wrong!')
-            exit(1)
+            log.error(err)
+            log.error('Something went wrong!')
+            sys.exit(1)
 
-    return event_id, event_type
+    return event_type
 
 
 

From 1235d472aa8a3499c9cae875ca2065013002ad0a Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo <moralejo@ifae.es>
Date: Tue, 20 Feb 2024 18:37:04 +0100
Subject: [PATCH 22/35] Update lstchain_r0_to_r0g.py

Wrong quote
---
 lstchain/scripts/lstchain_r0_to_r0g.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lstchain/scripts/lstchain_r0_to_r0g.py b/lstchain/scripts/lstchain_r0_to_r0g.py
index 54e5bb22e1..c6f1c7d363 100644
--- a/lstchain/scripts/lstchain_r0_to_r0g.py
+++ b/lstchain/scripts/lstchain_r0_to_r0g.py
@@ -82,7 +82,7 @@ def main():
     try:
       camera_config = input_streams[0].CameraConfiguration[0]
     except (AttributeError, IndexError):
-      log.error("CameraConfiguration not found! Is this R1v1 data?')
+      log.error('CameraConfiguration not found! Is this R1v1 data?')
       sys.exit(1)
 
     num_pixels = camera_config.num_pixels

From 68eb0ee45d9403012570c2eff57f87857bb72733 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo <moralejo@ifae.es>
Date: Tue, 20 Feb 2024 18:38:46 +0100
Subject: [PATCH 23/35] Update lstchain_r0_to_r0g.py

missing variable
---
 lstchain/scripts/lstchain_r0_to_r0g.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lstchain/scripts/lstchain_r0_to_r0g.py b/lstchain/scripts/lstchain_r0_to_r0g.py
index c6f1c7d363..df08deab9f 100644
--- a/lstchain/scripts/lstchain_r0_to_r0g.py
+++ b/lstchain/scripts/lstchain_r0_to_r0g.py
@@ -118,7 +118,8 @@ def main():
                 evtype = event_type[event.event_id]
               
                 if evtype in EVENT_TYPES_TO_REDUCE:
-                    if event.num_channels != 2:
+                    num_gains = event.num_channels
+                    if num_gains != 2:
                         log.error('You are attempting gain selection on '
                                       'data with only one gain!')
                         sys.exit(1)

From 10f94028fcb01bcdd91ab5a38442b02a9e73c581 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo <moralejo@ifae.es>
Date: Tue, 20 Feb 2024 18:40:36 +0100
Subject: [PATCH 24/35] Update lstchain_r0_to_r0g.py

pth => paths
---
 lstchain/scripts/lstchain_r0_to_r0g.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/lstchain/scripts/lstchain_r0_to_r0g.py b/lstchain/scripts/lstchain_r0_to_r0g.py
index df08deab9f..bdd9b9ca33 100644
--- a/lstchain/scripts/lstchain_r0_to_r0g.py
+++ b/lstchain/scripts/lstchain_r0_to_r0g.py
@@ -65,14 +65,14 @@ def main():
     # Now loop over the files (4 streams) again to perform the actual gain
     # selection:
     
-    run_info = pth.parse_r0_filename(input_file)
-    input_stream_names = [pth.Path(pth.Path(input_file).parent,
-                                   pth.run_to_r0_filename(run_info.tel_id, 
-                                                          run_info.run,
-                                                          run_info.subrun, 
-                                                          id_stream))
+    run_info = paths.parse_r0_filename(input_file)
+    input_stream_names = [paths.Path(paths.Path(input_file).parent,
+                                     paths.run_to_r0_filename(run_info.tel_id, 
+                                                              run_info.run,
+                                                              run_info.subrun, 
+                                                              id_stream))
                           for id_stream in range(4)]
-    output_stream_names = [pth.Path(output_dir, pth.Path(inputsn).name) 
+    output_stream_names = [paths.Path(output_dir, paths.Path(inputsn).name) 
                            for inputsn in input_stream_names]
 
     input_streams = []

From 008fbf12dfbcf248278c82a8dd511cddeda4e43c Mon Sep 17 00:00:00 2001
From: moralejo <moralejo@ifae.es>
Date: Tue, 20 Feb 2024 18:12:11 +0000
Subject: [PATCH 25/35] Use str instead of Path in some calls.

---
 lstchain/scripts/lstchain_r0_to_r0g.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lstchain/scripts/lstchain_r0_to_r0g.py b/lstchain/scripts/lstchain_r0_to_r0g.py
index bdd9b9ca33..6d82085132 100644
--- a/lstchain/scripts/lstchain_r0_to_r0g.py
+++ b/lstchain/scripts/lstchain_r0_to_r0g.py
@@ -70,14 +70,14 @@ def main():
                                      paths.run_to_r0_filename(run_info.tel_id, 
                                                               run_info.run,
                                                               run_info.subrun, 
-                                                              id_stream))
+                                                              id_stream+1))
                           for id_stream in range(4)]
     output_stream_names = [paths.Path(output_dir, paths.Path(inputsn).name) 
                            for inputsn in input_stream_names]
 
     input_streams = []
     for name in input_stream_names:
-        input_streams.append(protozfits.File(name, pure_protobuf=True))
+        input_streams.append(protozfits.File(str(name), pure_protobuf=True))
 
     try:
       camera_config = input_streams[0].CameraConfiguration[0]
@@ -100,7 +100,7 @@ def main():
                     rows_per_tile=rows_per_tile,
                     compression_block_size_kb=64*1024,
                     defaul_compression="lst"))
-            stream.open(name)
+            stream.open(str(name))
 
             stream.move_to_new_table("DataStream")
             stream.write_message(input_streams[i].DataStream[0])
@@ -172,7 +172,7 @@ def get_event_types(input_file):
                      config=Config(standard_config['source_config'])) as source:
         source.pointing_information = False
         source.trigger_information = True
-        source.log.setLevel(log.WARNING)
+        source.log.setLevel(logging.WARNING)
 
         offset = source.data_stream.waveform_offset 
         try:

From e1b4fb0e435c21b555d2530f1f2af25ec30ea8ee Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo <moralejo@ifae.es>
Date: Wed, 21 Feb 2024 10:16:44 +0100
Subject: [PATCH 26/35] Fix issue with setting input filenames for the
 different streams

---
 lstchain/scripts/lstchain_r0_to_r0g.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/lstchain/scripts/lstchain_r0_to_r0g.py b/lstchain/scripts/lstchain_r0_to_r0g.py
index 6d82085132..0607fa395e 100644
--- a/lstchain/scripts/lstchain_r0_to_r0g.py
+++ b/lstchain/scripts/lstchain_r0_to_r0g.py
@@ -14,11 +14,12 @@
 import protozfits
 import argparse
 import sys
+import re
 
+from pathlib import Path
 from ctapipe.io import EventSource
 from ctapipe.containers import EventType
 from lstchain.io import standard_config
-import lstchain.paths as paths
 from traitlets.config import Config
 
 import numpy as np
@@ -65,14 +66,13 @@ def main():
     # Now loop over the files (4 streams) again to perform the actual gain
     # selection:
     
-    run_info = paths.parse_r0_filename(input_file)
-    input_stream_names = [paths.Path(paths.Path(input_file).parent,
-                                     paths.run_to_r0_filename(run_info.tel_id, 
-                                                              run_info.run,
-                                                              run_info.subrun, 
-                                                              id_stream+1))
+    input_stream_names = [Path(Path(input_file).parent,
+                               re.sub("LST-1...Run", 
+                                      f"LST-1.{id_stream+1}.Run",
+                                      Path(input_file).name))
                           for id_stream in range(4)]
-    output_stream_names = [paths.Path(output_dir, paths.Path(inputsn).name) 
+  
+    output_stream_names = [Path(output_dir, Path(inputsn).name) 
                            for inputsn in input_stream_names]
 
     input_streams = []

From 6687283086cb45a59c904ba6c97d62a764ae7ac3 Mon Sep 17 00:00:00 2001
From: moralejo <moralejo@ifae.es>
Date: Wed, 21 Feb 2024 15:45:32 +0000
Subject: [PATCH 27/35] WIP: removed unnecessary first loop over files. Count
 events of various types.

---
 lstchain/scripts/lstchain_r0_to_r0g.py | 153 +++++++++++--------------
 1 file changed, 66 insertions(+), 87 deletions(-)

diff --git a/lstchain/scripts/lstchain_r0_to_r0g.py b/lstchain/scripts/lstchain_r0_to_r0g.py
index 0607fa395e..ef11f07f49 100644
--- a/lstchain/scripts/lstchain_r0_to_r0g.py
+++ b/lstchain/scripts/lstchain_r0_to_r0g.py
@@ -17,10 +17,7 @@
 import re
 
 from pathlib import Path
-from ctapipe.io import EventSource
 from ctapipe.containers import EventType
-from lstchain.io import standard_config
-from traitlets.config import Config
 
 import numpy as np
 from contextlib import ExitStack
@@ -51,20 +48,13 @@ def main():
     input_file = args.input_file
     output_dir = args.output_dir
   
-    log = logging.getLogger("lstchain_r0_to_r0g")
-  
-    # First identify properly interleaved pedestals (also in case there are
-    # ucts jumps) and FF events (heuristically):
-    event_type = get_event_types(input_file)
-    event_type_val = np.array([x.value for x in event_type.values()])
-
-    log.info('Identified event types and number of events:')
-    evtype, counts = np.unique(event_type_val, return_counts=True)
-    for j, n in zip(evtype, counts):
-        log.info('%s: %d', EventType(j), n)
-
-    # Now loop over the files (4 streams) again to perform the actual gain
-    # selection:
+    log = logging.getLogger("__name__")
+    handler = logging.FileHandler("out.log", mode='w')
+    log.addHandler(handler)
+
+    use_heuristic = True
+
+    # Loop over the files (4 streams) to perform the gain selection:
     
     input_stream_names = [Path(Path(input_file).parent,
                                re.sub("LST-1...Run", 
@@ -88,6 +78,12 @@ def main():
     num_pixels = camera_config.num_pixels
     num_samples = camera_config.num_samples_nominal
 
+    # Counters to keep track of how many FF-like events are
+    # tagged as FF, and how many are not (and vice-versa):
+    num_FF_like_with_FF_type = 0
+    num_FF_like_with_no_FF_type = 0
+    num_FF_unlike_with_FF_type  = 0
+    num_events = 0
 
     with ExitStack() as stack:
         for i, name in enumerate(output_stream_names):
@@ -110,25 +106,42 @@ def main():
 
             stream.move_to_new_table("Events")
 
+            wf_offset = input_streams[i].DataStream[0].waveform_offset
+
             for event in input_streams[i].Events:
                 # skip corrupted events:
                 if event.event_id == 0:
                     continue
-                # Get the event type, from the first loop over the files:
-                evtype = event_type[event.event_id]
-              
-                if evtype in EVENT_TYPES_TO_REDUCE:
-                    num_gains = event.num_channels
-                    if num_gains != 2:
-                        log.error('You are attempting gain selection on '
-                                      'data with only one gain!')
-                        sys.exit(1)
+                num_events += 1
+
+                num_gains = event.num_channels
+                if num_gains != 2:
+                    log.error('You are attempting gain selection on '
+                              'data with only one gain!')
+                    sys.exit(1)
 
+                wf = protozfits.any_array_to_numpy(event.waveform)
+                wf = wf.reshape((num_gains, num_pixels, num_samples))
+                hg = wf[0]
+                lg = wf[1]
+
+                evtype = event.event_type
+                evtype_heuristic = get_event_type(hg, wf_offset, evtype)
+
+                if evtype == EventType.FLATFIELD:
+                    if evtype_heuristic == EventType.FLATFIELD:
+                        num_FF_like_with_FF_type += 1
+                    else:
+                        num_FF_unlike_with_FF_type += 1
+                elif evtype_heuristic == EventType.FLATFIELD:
+                    num_FF_like_with_no_FF_type += 1
+
+
+                if use_heuristic:
+                    evtype = evtype_heuristic
+
+                if evtype in EVENT_TYPES_TO_REDUCE:
                     # Find pixels with HG above gain switch threshold:
-                    wf = protozfits.any_array_to_numpy(event.waveform)
-                    wf = wf.reshape((num_gains, num_pixels, num_samples))
-                    hg = wf[0]
-                    lg = wf[1]
                     use_lg = (np.max(hg[:, SAMPLE_START:SAMPLE_END], axis=1)
                               > THRESHOLD)
                     new_wf = np.where(use_lg[:, None], lg, hg) # gain-selected
@@ -147,13 +160,19 @@ def main():
             stream.close()
             input_streams[i].close()
 
-    log.info('R0 to R0G conversion finished successfully!')
-    
 
+    log.info('Number of processed events: %d', num_events)
+    log.info('FF-like events tagged as FF: %d', 
+             num_FF_like_with_FF_type)
+    log.info('FF-like events not tagged as FF: %d', 
+             num_FF_like_with_no_FF_type)
+    log.info('FF-unlike events tagged as FF: %d', 
+             num_FF_unlike_with_FF_type)
+
+    log.info('R0 to R0G conversion finished successfully!')
 
-def get_event_types(input_file):
 
-    log = logging.getLogger("lstchain_r0_to_r0g")
+def get_event_type(wf_hg, offset, evtype):
 
     # For heuristic flat field identification (values refer to
     # baseline-subtracted HG integrated waveforms):
@@ -161,59 +180,19 @@ def get_event_types(input_file):
     MAX_FLATFIELD_ADC = 12000
     MIN_FLATFIELD_PIXEL_FRACTION = 0.8
 
-    standard_config['source_config']['LSTEventSource'][
-        'apply_drs4_corrections'] = False
-    standard_config['source_config']['LSTEventSource'][
-        'pointing_information'] = False
-
-    event_type = dict()
-  
-    with EventSource(input_url=input_file,
-                     config=Config(standard_config['source_config'])) as source:
-        source.pointing_information = False
-        source.trigger_information = True
-        source.log.setLevel(logging.WARNING)
-
-        offset = source.data_stream.waveform_offset 
-        try:
-            for event in source:
-                if event.r0.tel[1].waveform is None:
-                    log.error('The data seem to contain no R0 waveforms. '
-                                  'Is this already gain-selected data?')
-                    sys.exit(1)
-
-                # Check if this may be a FF event
-                # Subtract baseline offset (convert to signed integer to
-                # allow for negative fluctuations!)
-                wf_hg = event.r0.tel[1].waveform[0][:,
-                        SAMPLE_START:SAMPLE_END].astype('int16') - offset
-                # pixel-wise integral:
-                wf_hg_sum = np.sum(wf_hg, axis=1)
-                ff_pix = ((wf_hg_sum > MIN_FLATFIELD_ADC) &
-                          (wf_hg_sum < MAX_FLATFIELD_ADC))
-
-                evtype = event.trigger.event_type
-                # Check fraction of pixels with HG in "FF-like range"
-                if ff_pix.sum() / event.r0.tel[1].waveform.shape[1] > \
-                        MIN_FLATFIELD_PIXEL_FRACTION:
-                    # Looks like a FF event:
-                    evtype = EventType.FLATFIELD
-                elif evtype == EventType.FLATFIELD:
-                    # If originally tagged as FF, but not looking like FF:
-                    evtype = EventType.UNKNOWN
-
-                event_type[event.index.event_id] = evtype
-
-            log.info('Finished first loop over input files - all ok!')
-
-        except Exception as err:
-            log.error(err)
-            log.error('Something went wrong!')
-            sys.exit(1)
-
-    return event_type
+    evtype_heuristic = evtype
 
+    # pixel-wise integral:
+    wf_hg_sum = np.sum(wf_hg - offset, axis=1)
+    ff_pix = ((wf_hg_sum > MIN_FLATFIELD_ADC) &
+              (wf_hg_sum < MAX_FLATFIELD_ADC))
 
+    # Check fraction of pixels with HG in "FF-like range"
+    if ff_pix.sum() / len(ff_pix) > MIN_FLATFIELD_PIXEL_FRACTION:
+        # Looks like a FF event:
+        evtype_heuristic = EventType.FLATFIELD
+    elif evtype == EventType.FLATFIELD:
+        # Does not look like a FF, but was tagged as such:
+        evtype_heuristic = EventType.UNKNOWN
 
-if __name__ == '__main__':
-    main()
+    return evtype_heuristic

From 6efb3e5d72ce4f24b59ee88a6b9f386e2c5e1af2 Mon Sep 17 00:00:00 2001
From: moralejo <moralejo@ifae.es>
Date: Wed, 21 Feb 2024 16:05:02 +0000
Subject: [PATCH 28/35] Solved mess about EventType and its numerical values

---
 lstchain/scripts/lstchain_r0_to_r0g.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lstchain/scripts/lstchain_r0_to_r0g.py b/lstchain/scripts/lstchain_r0_to_r0g.py
index ef11f07f49..a0f8687859 100644
--- a/lstchain/scripts/lstchain_r0_to_r0g.py
+++ b/lstchain/scripts/lstchain_r0_to_r0g.py
@@ -125,7 +125,7 @@ def main():
                 hg = wf[0]
                 lg = wf[1]
 
-                evtype = event.event_type
+                evtype = EventType(event.event_type)
                 evtype_heuristic = get_event_type(hg, wf_offset, evtype)
 
                 if evtype == EventType.FLATFIELD:

From 0bd82ee1ec7b0e4ee9712883427dac5bf806f118 Mon Sep 17 00:00:00 2001
From: moralejo <moralejo@ifae.es>
Date: Wed, 21 Feb 2024 16:36:28 +0000
Subject: [PATCH 29/35] changes in logger

---
 lstchain/scripts/lstchain_r0_to_r0g.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/lstchain/scripts/lstchain_r0_to_r0g.py b/lstchain/scripts/lstchain_r0_to_r0g.py
index a0f8687859..52b75a4939 100644
--- a/lstchain/scripts/lstchain_r0_to_r0g.py
+++ b/lstchain/scripts/lstchain_r0_to_r0g.py
@@ -48,9 +48,10 @@ def main():
     input_file = args.input_file
     output_dir = args.output_dir
   
-    log = logging.getLogger("__name__")
+    log = logging.getLogger(__name__)
+    log.setLevel(logging.INFO)
     handler = logging.FileHandler("out.log", mode='w')
-    log.addHandler(handler)
+    logging.getLogger().addHandler(handler)
 
     use_heuristic = True
 

From b5fce9ed95d155de90f7794e8666f269794a8af6 Mon Sep 17 00:00:00 2001
From: moralejo <moralejo@ifae.es>
Date: Wed, 21 Feb 2024 17:45:30 +0000
Subject: [PATCH 30/35] Various fixes. Better logging.

---
 lstchain/scripts/lstchain_r0_to_r0g.py | 54 +++++++++++++++++++++-----
 1 file changed, 45 insertions(+), 9 deletions(-)

diff --git a/lstchain/scripts/lstchain_r0_to_r0g.py b/lstchain/scripts/lstchain_r0_to_r0g.py
index 52b75a4939..3722be92a9 100644
--- a/lstchain/scripts/lstchain_r0_to_r0g.py
+++ b/lstchain/scripts/lstchain_r0_to_r0g.py
@@ -19,6 +19,7 @@
 from pathlib import Path
 from ctapipe.containers import EventType
 
+import lstchain.paths as paths
 import numpy as np
 from contextlib import ExitStack
 
@@ -31,6 +32,17 @@
                     type=str, default='./',
                     help='Output directory')
 
+parser.add_argument('--log', dest='log_file',
+                    type=str, default=None,
+                    help='Log file name')
+
+parser.add_argument('--no-flatfield-heuristic', action='store_const', 
+                    const=False, 
+                    dest="use_flatfield_heuristic", default=True,
+                    help=("If given, do *not* identify flatfield events"
+                          " heuristically from the raw data. "
+                          "Trust event_type."))
+
 # Range of waveform to be checked (for gain selection & heuristic FF
 # identification)
 SAMPLE_START = 3
@@ -47,13 +59,22 @@ def main():
   
     input_file = args.input_file
     output_dir = args.output_dir
-  
+    use_flatfield_heuristic =  args.use_flatfield_heuristic
+
     log = logging.getLogger(__name__)
     log.setLevel(logging.INFO)
-    handler = logging.FileHandler("out.log", mode='w')
-    logging.getLogger().addHandler(handler)
 
-    use_heuristic = True
+    log_file = args.log_file
+    runinfo = paths.parse_r0_filename(input_file)
+    if log_file is None:
+        log_file = f'R0_to_R0g_Run{runinfo.run:05d}.{runinfo.subrun:04d}.log'
+
+    formatter = logging.Formatter('%(asctime)s - '
+                                  '%(levelname)s - %(message)s',
+                                  '%Y-%m-%d %H:%M:%S')
+    handler = logging.FileHandler(log_file, mode='w')
+    handler.setFormatter(formatter)
+    logging.getLogger().addHandler(handler)
 
     # Loop over the files (4 streams) to perform the gain selection:
     
@@ -137,8 +158,7 @@ def main():
                 elif evtype_heuristic == EventType.FLATFIELD:
                     num_FF_like_with_no_FF_type += 1
 
-
-                if use_heuristic:
+                if use_flatfield_heuristic:
                     evtype = evtype_heuristic
 
                 if evtype in EVENT_TYPES_TO_REDUCE:
@@ -163,15 +183,31 @@ def main():
 
 
     log.info('Number of processed events: %d', num_events)
-    log.info('FF-like events tagged as FF: %d', 
-             num_FF_like_with_FF_type)
+    if num_FF_like_with_FF_type > 0:
+        log.info('FF-like events tagged as FF: %d', 
+                 num_FF_like_with_FF_type)
+    else:
+        log.warn('FF-like events tagged as FF: %d !!', 
+                 num_FF_like_with_FF_type)
+
     log.info('FF-like events not tagged as FF: %d', 
              num_FF_like_with_no_FF_type)
+
     log.info('FF-unlike events tagged as FF: %d', 
              num_FF_unlike_with_FF_type)
 
-    log.info('R0 to R0G conversion finished successfully!')
 
+    num_FF_like = num_FF_like_with_no_FF_type + num_FF_like_with_FF_type
+
+    # If a relevant fraction of FF-like events were not tagged as FF...:
+    max_frac = 0.1
+    if ((num_FF_like_with_no_FF_type / num_FF_like > max_frac) &
+        (use_flatfield_heuristic == False)):
+            log.warn('More than %d percent of FF-like events '
+                     'have wrong event_type!', int(100*max_frac))
+            log.warn('You should use heuristic identification of FF events!')
+    else:
+        log.info('R0 to R0G conversion finished successfully!')
 
 def get_event_type(wf_hg, offset, evtype):
 

From 2824d2c410fd4cb08af91e500503dd325d3211f5 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo <moralejo@ifae.es>
Date: Thu, 22 Feb 2024 15:08:42 +0100
Subject: [PATCH 31/35] Better use of parser

---
 lstchain/scripts/lstchain_r0_to_r0g.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/lstchain/scripts/lstchain_r0_to_r0g.py b/lstchain/scripts/lstchain_r0_to_r0g.py
index 3722be92a9..833422f0ec 100644
--- a/lstchain/scripts/lstchain_r0_to_r0g.py
+++ b/lstchain/scripts/lstchain_r0_to_r0g.py
@@ -36,9 +36,8 @@
                     type=str, default=None,
                     help='Log file name')
 
-parser.add_argument('--no-flatfield-heuristic', action='store_const', 
-                    const=False, 
-                    dest="use_flatfield_heuristic", default=True,
+parser.add_argument('--no-flatfield-heuristic', action='store_false', 
+                    dest="use_flatfield_heuristic",
                     help=("If given, do *not* identify flatfield events"
                           " heuristically from the raw data. "
                           "Trust event_type."))

From 2796700ac26708252cfb908708544cd9c25dd7b3 Mon Sep 17 00:00:00 2001
From: moralejo <moralejo@ifae.es>
Date: Thu, 22 Feb 2024 15:09:49 +0000
Subject: [PATCH 32/35] Log file to output directory

---
 lstchain/scripts/lstchain_r0_to_r0g.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lstchain/scripts/lstchain_r0_to_r0g.py b/lstchain/scripts/lstchain_r0_to_r0g.py
index 3722be92a9..011af9f63d 100644
--- a/lstchain/scripts/lstchain_r0_to_r0g.py
+++ b/lstchain/scripts/lstchain_r0_to_r0g.py
@@ -66,8 +66,10 @@ def main():
 
     log_file = args.log_file
     runinfo = paths.parse_r0_filename(input_file)
+
     if log_file is None:
-        log_file = f'R0_to_R0g_Run{runinfo.run:05d}.{runinfo.subrun:04d}.log'
+        log_file = output_dir
+        log_file += f'/R0_to_R0g_Run{runinfo.run:05d}.{runinfo.subrun:04d}.log'
 
     formatter = logging.Formatter('%(asctime)s - '
                                   '%(levelname)s - %(message)s',

From a1326869c74d28273ce8a9b28fa5f3006db5bed0 Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo <moralejo@ifae.es>
Date: Thu, 22 Feb 2024 16:19:59 +0100
Subject: [PATCH 33/35] Fixed indents

---
 lstchain/scripts/lstchain_r0_to_r0g.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lstchain/scripts/lstchain_r0_to_r0g.py b/lstchain/scripts/lstchain_r0_to_r0g.py
index 95c5a9ca9c..a2e654b5c7 100644
--- a/lstchain/scripts/lstchain_r0_to_r0g.py
+++ b/lstchain/scripts/lstchain_r0_to_r0g.py
@@ -93,10 +93,10 @@ def main():
         input_streams.append(protozfits.File(str(name), pure_protobuf=True))
 
     try:
-      camera_config = input_streams[0].CameraConfiguration[0]
+        camera_config = input_streams[0].CameraConfiguration[0]
     except (AttributeError, IndexError):
-      log.error('CameraConfiguration not found! Is this R1v1 data?')
-      sys.exit(1)
+        log.error('CameraConfiguration not found! Is this R1v1 data?')
+        sys.exit(1)
 
     num_pixels = camera_config.num_pixels
     num_samples = camera_config.num_samples_nominal

From 3b14eb71d8a5870fb1f7fa4d0f63dc0f7ceeb6a3 Mon Sep 17 00:00:00 2001
From: moralejo <moralejo@ifae.es>
Date: Thu, 22 Feb 2024 15:51:24 +0000
Subject: [PATCH 34/35] Preserve higher bits of pixel status in case we ever
 use them

---
 lstchain/scripts/lstchain_r0_to_r0g.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lstchain/scripts/lstchain_r0_to_r0g.py b/lstchain/scripts/lstchain_r0_to_r0g.py
index 95c5a9ca9c..6f5dd96069 100644
--- a/lstchain/scripts/lstchain_r0_to_r0g.py
+++ b/lstchain/scripts/lstchain_r0_to_r0g.py
@@ -173,8 +173,8 @@ def main():
                     pixel_status = protozfits.any_array_to_numpy(event.pixel_status)
                     # Set to 0 the status bit of the removed gain:
                     new_status = np.where(use_lg,
-                                          pixel_status & 0b1011,
-                                          pixel_status & 0b0111)
+                                          pixel_status & 0b11111011,
+                                          pixel_status & 0b11110111)
                     event.pixel_status.data = new_status.tobytes()
 
                 stream.write_message(event)

From 39a502741bdf5e013432a149aa68076749a40b1f Mon Sep 17 00:00:00 2001
From: Abelardo Moralejo <moralejo@ifae.es>
Date: Thu, 22 Feb 2024 19:01:08 +0100
Subject: [PATCH 35/35] Update protozfits version specification in setup.py

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 497d7b64b1..5a99073c6a 100644
--- a/setup.py
+++ b/setup.py
@@ -60,7 +60,7 @@ def find_scripts(script_dir, prefix):
         'scikit-learn~=1.2',
         'tables',
         'toml',
-        'protozfits~=2.2',
+        'protozfits>=2.2,<3',
         'pymongo',
         'pyparsing',
         'setuptools_scm',