Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add pattern argument to merge hdf5, add progress bar #418

Merged
merged 3 commits into from
May 27, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions lstchain/io/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from eventio import Histograms
from eventio.search_utils import yield_toplevel_of_type
from .lstcontainers import ThrownEventsHistogram, ExtraMCInfo, MetaData
from tqdm import tqdm


__all__ = ['read_simu_info_hdf5',
Expand Down Expand Up @@ -183,6 +184,7 @@ def auto_merge_h5files(file_list, output_filename='merged.h5', nodes_keys=None,
else:
keys = set(nodes_keys)

bar = tqdm(total=len(file_list))
with open_file(output_filename, 'w') as merge_file:
with open_file(file_list[0]) as f1:
for k in keys:
Expand All @@ -202,6 +204,7 @@ def auto_merge_h5files(file_list, output_filename='merged.h5', nodes_keys=None,
os.path.basename(k),
createparents=True,
obj=f1.root[k].read())
bar.update(1)
for filename in file_list[1:]:
common_keys = keys.intersection(get_dataset_keys(filename))
with open_file(filename) as file:
Expand All @@ -214,6 +217,7 @@ def auto_merge_h5files(file_list, output_filename='merged.h5', nodes_keys=None,
merge_file.root[k].append(file.root[k].read())
except:
print("Can't append node {} from file {}".format(k, filename))
bar.update(1)


def merging_check(file_list):
Expand Down
19 changes: 14 additions & 5 deletions lstchain/scripts/lstchain_merge_hdf5_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
# import tables
from lstchain.io import get_dataset_keys
from lstchain.io import smart_merge_h5files, auto_merge_h5files
from glob import glob

parser = argparse.ArgumentParser(description='Merge HDF5 files')

Expand All @@ -47,23 +48,31 @@
help='Boolean. True to remove the images',
default=False)

parser.add_argument('--run-number', '-r', action='store', type=str,
parser.add_argument('--run-number', '-r', action='store', type=int,
dest='run_number',
help='Merge files run-wise if a run number is passed, \
otherwise merge all files in the directory',
default=None)

parser.add_argument(
'--pattern', '-p',
help='Glob pattern to match files',
default='*.h5',
)

args = parser.parse_args()


def main():

if args.run_number:
file_list = sorted([os.path.join(args.srcdir, f) for f in os.listdir(args.srcdir)
if (f.endswith('.h5') and args.run_number in f)])
run = f'Run{args.run_number:05d}'
file_list = sorted(filter(
lambda f: run in f,
glob(os.path.join(args.srcdir, args.pattern))
))
else:
file_list = sorted([os.path.join(args.srcdir, f) for f in os.listdir(args.srcdir)
if f.endswith('.h5')])
file_list = sorted(glob(os.path.join(args.srcdir, args.pattern)))

if args.noimage:
keys = get_dataset_keys(file_list[0])
Expand Down