Skip to content

Commit

Permalink
Merge pull request #418 from cta-observatory/fix_merging
Browse files Browse the repository at this point in the history
Add pattern argument to merge hdf5, add progress bar
  • Loading branch information
maxnoe authored May 27, 2020
2 parents 6f98711 + ada45e7 commit 69bcae0
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 5 deletions.
4 changes: 4 additions & 0 deletions lstchain/io/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from eventio import Histograms
from eventio.search_utils import yield_toplevel_of_type
from .lstcontainers import ThrownEventsHistogram, ExtraMCInfo, MetaData
from tqdm import tqdm


__all__ = ['read_simu_info_hdf5',
Expand Down Expand Up @@ -183,6 +184,7 @@ def auto_merge_h5files(file_list, output_filename='merged.h5', nodes_keys=None,
else:
keys = set(nodes_keys)

bar = tqdm(total=len(file_list))
with open_file(output_filename, 'w') as merge_file:
with open_file(file_list[0]) as f1:
for k in keys:
Expand All @@ -202,6 +204,7 @@ def auto_merge_h5files(file_list, output_filename='merged.h5', nodes_keys=None,
os.path.basename(k),
createparents=True,
obj=f1.root[k].read())
bar.update(1)
for filename in file_list[1:]:
common_keys = keys.intersection(get_dataset_keys(filename))
with open_file(filename) as file:
Expand All @@ -214,6 +217,7 @@ def auto_merge_h5files(file_list, output_filename='merged.h5', nodes_keys=None,
merge_file.root[k].append(file.root[k].read())
except:
print("Can't append node {} from file {}".format(k, filename))
bar.update(1)


def merging_check(file_list):
Expand Down
19 changes: 14 additions & 5 deletions lstchain/scripts/lstchain_merge_hdf5_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
# import tables
from lstchain.io import get_dataset_keys
from lstchain.io import smart_merge_h5files, auto_merge_h5files
from glob import glob

parser = argparse.ArgumentParser(description='Merge HDF5 files')

Expand All @@ -47,23 +48,31 @@
help='Boolean. True to remove the images',
default=False)

parser.add_argument('--run-number', '-r', action='store', type=str,
parser.add_argument('--run-number', '-r', action='store', type=int,
dest='run_number',
help='Merge files run-wise if a run number is passed, \
otherwise merge all files in the directory',
default=None)

parser.add_argument(
'--pattern', '-p',
help='Glob pattern to match files',
default='*.h5',
)

args = parser.parse_args()


def main():

if args.run_number:
file_list = sorted([os.path.join(args.srcdir, f) for f in os.listdir(args.srcdir)
if (f.endswith('.h5') and args.run_number in f)])
run = f'Run{args.run_number:05d}'
file_list = sorted(filter(
lambda f: run in f,
glob(os.path.join(args.srcdir, args.pattern))
))
else:
file_list = sorted([os.path.join(args.srcdir, f) for f in os.listdir(args.srcdir)
if f.endswith('.h5')])
file_list = sorted(glob(os.path.join(args.srcdir, args.pattern)))

if args.noimage:
keys = get_dataset_keys(file_list[0])
Expand Down

0 comments on commit 69bcae0

Please sign in to comment.