From 6fd9bdde6ffa641f5b84bf05affaa16686ccbb0a Mon Sep 17 00:00:00 2001 From: Steve Goldhaber Date: Thu, 12 Nov 2020 22:17:47 -0700 Subject: [PATCH 1/2] Added ability to handle directories and wildcards --- scripts/file_utils.py | 244 +++++++++++++++++++++++++++++------------- 1 file changed, 167 insertions(+), 77 deletions(-) diff --git a/scripts/file_utils.py b/scripts/file_utils.py index 8bdd41ab..f2e283ee 100644 --- a/scripts/file_utils.py +++ b/scripts/file_utils.py @@ -8,120 +8,210 @@ from __future__ import absolute_import from __future__ import unicode_literals +import glob import os -import os.path # CCPP framework imports from parse_tools import CCPPError, ParseInternalError ############################################################################### def check_for_existing_file(filename, description, readable=True): ############################################################################### - 'Check for file existence and access, abort on error' + """Check for file existence and access. + Return a list of error strings in case + does not exist or does not have read access and is True""" + errors = list() if os.path.exists(filename): if readable: if not os.access(filename, os.R_OK): errmsg = "No read access to {}, '{}'" - raise CCPPError(errmsg.format(description, filename)) - # End if - # End if (no else needed, checks all done + errors.append(errmsg.format(description, filename)) + # end if (no else, everything is fine) + # end if (no else, everything is fine) else: - raise CCPPError("{}, '{}', must exist".format(description, filename)) - # End if + errors.append("{}, '{}', must exist".format(description, filename)) + # end if + return errors ############################################################################### def check_for_writeable_file(filename, description): ############################################################################### - '''If exists but not writable, raise an error. + """If exists but not writable, raise an error. If does not exist and its directory is not writable, raise - an error. is a description of .''' + an error. is a description of .""" if os.path.exists(filename) and not os.access(filename, os.W_OK): raise CCPPError("Cannot write {}, '{}'".format(description, filename)) elif not os.access(os.path.dirname(filename), os.W_OK): raise CCPPError("Cannot write {}, '{}'".format(description, filename)) - # End if (else just return) + # end if (else just return) ############################################################################### -def read_pathnames_from_file(pathsfile): +def add_unique_files(filepath, pdesc, master_list, logger): ############################################################################### - 'Read path names from and return them as a list' + """Add any new files indicated by to . + Check each file for readability. + Log duplicate files + Return a list of errors found + Wildcards in are expanded""" + errors = list() + for file in glob.glob(filepath): + errs = check_for_existing_file(file, pdesc) + if errs: + errors.extend(errs) + elif file in master_list: + lmsg = "WARNING: Ignoring duplicate file, {}" + logger.warning(lmsg.format(file)) + else: + master_list.append(file) + # end if + # end for + return errors + +############################################################################### +def read_pathnames_from_file(pathsfile, file_type): +############################################################################### + """Read and return path names from . + Convert relative pathnames to use 's directory as root. + Also return a list of any errors encountered + """ # We want to end up with absolute paths, treat as root location root_path = os.path.dirname(os.path.abspath(pathsfile)) - pdesc = 'pathname in {}'.format(pathsfile) - pathnames = list() - with open(pathsfile, 'r') as infile: - for line in infile.readlines(): - path = line.strip() - # Skip blank lines and lines which appear to start with a comment. - if (len(path) > 0) and (path[0] != '#') and (path[0] != '!'): - # Check for an absolute path - if os.path.isabs(path): - check_for_existing_file(path, "pathname") - else: - # Assume relative pathnames are relative to pathsfile - path = os.path.normpath(os.path.join(root_path, path)) - check_for_existing_file(path, pdesc) - # End if - pathnames.append(path) - # End if (else skip blank or comment line) - # End for - # End with open - return pathnames + file_list = list() + pdesc = '{} pathsnames file'.format(file_type) + errors = check_for_existing_file(pathsfile, pdesc) + pdesc = '{} pathname in {}'.format(file_type, pathsfile) + if not errors: + with open(pathsfile, 'r') as infile: + for line in infile.readlines(): + path = line.strip() + # Skip blank lines & lines which appear to start with a comment. + if path and (path[0] not in ['#', '!']): + # Check for an absolute path + if not os.path.isabs(path): + path = os.path.normpath(os.path.join(root_path, path)) + # end if + file_list.append(path) + # end if (else skip blank or comment line) + # end for + # end with open + # end if (no else, we already have the errors) + return file_list, errors ############################################################################### -def create_file_list(files, suffices, file_type, logger, txt_files=None): +def _create_file_list_int(files, suffices, file_type, logger, + txt_files, pathname, root_path, master_list): ############################################################################### - '''Create and return a master list of files from . - is either a comma-separated string of filenames or a list. + """Create and return a master list of files from . + is a list of pathnames which may include wildcards. is a list of allowed file types. Filenames in - with an allowed suffix will be added to the master list. - Filenames with a '.txt' suffix will be parsed to look for allowed - filenames. + with an allowed suffix will be added to the master list. + Filenames with a '.txt' suffix will be parsed to look for allowed + filenames. is a description of the allowed file types. is a logger used to print warnings (unrecognized filename types) - and debug messages. - is an internal argument to prevent infinite recursion. - ''' + and debug messages. + is a list of previously-encountered text files (to prevent + infinite recursion). + is the text file name from which was read (if any). + is the list of files which have already been collected + A list of error strings is also returned + """ + errors = list() + if pathname: + pdesc = '{} pathname file, found in {}'.format(file_type, pathname) + else: + pdesc = '{} pathnames file'.format(file_type) + # end if + if not isinstance(files, list): + raise ParseInternalError("'{}' is not a list".format(files)) + # end if + for filename in files: + # suff is filename's extension + suff = os.path.splitext(filename)[1] + if suff: + suff = suff[1:] + # end if + if not os.path.isabs(filename): + filename = os.path.normpath(os.path.join(root_path, filename)) + # end if + if os.path.isdir(filename): + for suff_type in suffices: + file_type = os.path.join(filename, '*.{}'.format(suff_type)) + errs = add_unique_files(file_type, pdesc, master_list, logger) + errors.extend(errs) + # end for + elif suff in suffices: + errs = add_unique_files(filename, pdesc, master_list, logger) + errors.extend(errs) + elif suff == 'txt': + tfiles = glob.glob(filename) + if tfiles: + for file in tfiles: + if file in txt_files: + lmsg = "WARNING: Ignoring duplicate '.txt' file, {}" + logger.warning(lmsg.format(filename)) + else: + lmsg = 'Reading .{} filenames from {}' + logger.debug(lmsg.format(', .'.join(suffices), + file)) + flist, errs = read_pathnames_from_file(file, file_type) + errors.extend(errs) + txt_files.append(file) + root = os.path.dirname(file) + _, errs = _create_file_list_int(flist, suffices, + file_type, logger, + txt_files, file, + root, master_list) + errors.extend(errs) + # end if + # end for + else: + emsg = "{} pathnames file, '{}', does not exist" + errors.append(emsg.format(file_type, filename)) + # end if + else: + lmsg = 'WARNING: Not reading {}, only reading .{} or .txt files' + logger.warning(lmsg.format(filename, ', .'.join(suffices))) + # end if + # end for + + return master_list, errors + +############################################################################### +def create_file_list(files, suffices, file_type, logger, root_path=None): +############################################################################### + """Create and return a master list of files from . + is either a comma-separated string of pathnames or a list. + If a pathname is a directory, all files with extensions in + are included. + Wildcards in a pathname are expanded. + is a list of allowed file types. Filenames in + with an allowed suffix will be added to the master list. + Filenames with a '.txt' suffix will be parsed to look for allowed + filenames. + is a description of the allowed file types. + is a logger used to print warnings (unrecognized filename types) + and debug messages. + If is not None, it is used to create absolute paths for + , otherwise, the current working directory is used. + """ master_list = list() + txt_files = list() # Already processed txt files + pathname = None if isinstance(files, str): file_list = [x.strip() for x in files.split(',')] elif isinstance(files, (list, tuple)): file_list = files else: raise ParseInternalError("Bad input, = {}".format(files)) - # End if - pdesc = '{} pathnames file'.format(file_type) - for filename in file_list: - suff = os.path.basename(filename).split('.')[-1] - if suff in suffices: - check_for_existing_file(filename, pdesc) - apath = os.path.abspath(filename) - if apath in master_list: - lmsg = 'WARNING: Duplicate {} filename, {}' - logger.warning(lmsg.format(file_type, filename)) - else: - master_list.append(apath) - # End if - elif suff == 'txt': - if txt_files and (filename in txt_files): - lmsg = "WARNING: Ignoring duplicate '.txt' file, {}" - logger.warning(lmsg.format(filename)) - else: - check_for_existing_file(filename, pdesc) - flist = read_pathnames_from_file(filename) - if txt_files: - txt_files.append(filename) - else: - txt_files = [filename] - # End if - master_list.extend(create_file_list(flist, suffices, - file_type, logger, - txt_files=txt_files)) - # End if - lmsg = 'Reading .{} filenames from {}' - logger.debug(lmsg.format(', .'.join(suffices), filename)) - else: - lmsg = 'WARNING: Not reading {}, only reading .{} or .txt files' - logger.warning(lmsg.format(filename, ', .'.join(suffices))) - # End if - # End for + # end if + if root_path is None: + root_path = os.getcwd() + # end if + master_list, errors = _create_file_list_int(file_list, suffices, file_type, + logger, txt_files, pathname, + root_path, master_list) + if errors: + emsg = 'Error processing list of {} files:\n {}' + raise CCPPError(emsg.format(file_type, '\n '.join(errors))) + # end if return master_list From 667dd6206bb050eb641600e074b19abcec1b60cb Mon Sep 17 00:00:00 2001 From: Steve Goldhaber Date: Mon, 23 Nov 2020 22:58:33 -0700 Subject: [PATCH 2/2] Only overwrite modified files --- scripts/ccpp_capgen.py | 54 ++++++++++++++++++++------ scripts/file_utils.py | 87 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 128 insertions(+), 13 deletions(-) diff --git a/scripts/ccpp_capgen.py b/scripts/ccpp_capgen.py index aa99e276..ab38e073 100755 --- a/scripts/ccpp_capgen.py +++ b/scripts/ccpp_capgen.py @@ -13,14 +13,13 @@ import argparse import sys import os -import os.path import logging import re # CCPP framework imports from parse_tools import init_log, set_log_level, context_string from parse_tools import CCPPError, ParseInternalError -from file_utils import check_for_writeable_file -from file_utils import create_file_list +from file_utils import check_for_writeable_file, remove_dir, replace_paths +from file_utils import create_file_list, move_modified_files from fortran_tools import parse_fortran_file, FortranWriter from host_model import HostModel from host_cap import write_host_cap @@ -97,9 +96,8 @@ def parse_command_line(args, description): help='''Name of host model to use in CCPP API If this option is passed, a host model cap is generated''') - parser.add_argument("--clean", action='store_true', - help='Remove files created by this script, then exit', - default=False) + parser.add_argument("--clean", action='store_true', default=False, + help='Remove files created by this script, then exit') parser.add_argument("--kind-phys", type=str, default='REAL64', metavar="kind_phys", @@ -109,6 +107,9 @@ def parse_command_line(args, description): metavar='HTML | Latex | HTML,Latex', type=str, help="Generate LaTeX and/or HTML documentation") + parser.add_argument("--force-overwrite", action='store_true', default=False, + help="""Overwrite all CCPP-generated files, even +if unmodified""") parser.add_argument("--verbose", action='count', default=0, help="Log more activity, repeat for increased output") pargs = parser.parse_args(args) @@ -619,7 +620,8 @@ def clean_capgen(cap_output_file, logger): ############################################################################### def capgen(host_files, scheme_files, suites, datatable_file, preproc_defs, - gen_hostcap, gen_docfiles, output_dir, host_name, kind_phys, logger): + gen_hostcap, gen_docfiles, output_dir, host_name, kind_phys, + force_overwrite, logger): ############################################################################### """Parse indicated host, scheme, and suite files. Generate code to allow host model to run indicated CCPP suites.""" @@ -675,17 +677,45 @@ def capgen(host_files, scheme_files, suites, datatable_file, preproc_defs, logger.debug("{} variables = {}".format(host_model.name, plist)) logger.debug("schemes = {}".format([x.title for x in scheme_headers])) # Finally, we can get on with writing suites + # Make sure to write to temporary location if files exist in + if not os.path.exists(output_dir): + # Try to create output_dir (let it crash if it fails) + os.makedirs(output_dir) + # Nothing here, use it for output + outtemp_dir = output_dir + elif not os.listdir(output_dir): + # Nothing here, use it for output + outtemp_dir = output_dir + else: + # We need to create a temporary staging area, create it here + outtemp_name = "ccpp_temp_scratch_dir" + outtemp_dir = os.path.join(output_dir, outtemp_name) + if os.path.exists(outtemp_dir): + remove_dir(outtemp_dir, force=True) + # end if + os.makedirs(outtemp_dir) + # end if ccpp_api = API(sdfs, host_model, scheme_headers, logger) - cap_filenames = ccpp_api.write(output_dir, logger) + cap_filenames = ccpp_api.write(outtemp_dir, logger) if gen_hostcap: # Create a cap file - host_files = [write_host_cap(host_model, ccpp_api, output_dir, logger)] + host_files = [write_host_cap(host_model, ccpp_api, outtemp_dir, logger)] else: host_files = list() # end if # Create the kinds file - kinds_file = create_kinds_file(kind_phys, output_dir, logger) + kinds_file = create_kinds_file(kind_phys, outtemp_dir, logger) + # Move any changed files to output_dir and remove outtemp_dir + move_modified_files(outtemp_dir, output_dir, + overwrite=force_overwrite, remove_src=True) + # We have to rename the files we created + if outtemp_dir != output_dir: + replace_paths(cap_filenames, outtemp_dir, output_dir) + replace_paths(host_files, outtemp_dir, output_dir) + kinds_file = kinds_file.replace(outtemp_dir, output_dir) + # end if # Finally, create the database of generated files and caps + # This can be directly in output_dir because it will not affect dependencies generate_ccpp_datatable(datatable_file, host_model, ccpp_api, scheme_headers, scheme_tdict, host_files, cap_filenames, kinds_file) @@ -728,7 +758,7 @@ def _main_func(): # Make sure we can create output file lists if not os.path.isabs(datatable_file): datatable_file = os.path.normpath(os.path.join(output_dir, - datatable_file)) + datatable_file)) # end if if args.clean: clean_capgen(datatable_file, _LOGGER) @@ -738,7 +768,7 @@ def _main_func(): capgen(args.host_files, args.scheme_files, args.suites, datatable_file, preproc_defs, generate_host_cap, args.generate_docfiles, output_dir, args.host_name, - args.kind_phys, _LOGGER) + args.kind_phys, args.force_overwrite, _LOGGER) # end if (clean) ############################################################################### diff --git a/scripts/file_utils.py b/scripts/file_utils.py index f2e283ee..3db46ebb 100644 --- a/scripts/file_utils.py +++ b/scripts/file_utils.py @@ -8,6 +8,7 @@ from __future__ import absolute_import from __future__ import unicode_literals +import filecmp import glob import os # CCPP framework imports @@ -40,7 +41,8 @@ def check_for_writeable_file(filename, description): an error. is a description of .""" if os.path.exists(filename) and not os.access(filename, os.W_OK): raise CCPPError("Cannot write {}, '{}'".format(description, filename)) - elif not os.access(os.path.dirname(filename), os.W_OK): + # end if + if not os.access(os.path.dirname(filename), os.W_OK): raise CCPPError("Cannot write {}, '{}'".format(description, filename)) # end if (else just return) @@ -215,3 +217,86 @@ def create_file_list(files, suffices, file_type, logger, root_path=None): raise CCPPError(emsg.format(file_type, '\n '.join(errors))) # end if return master_list + +############################################################################### +def replace_paths(dir_list, src_dir, dest_dir): +############################################################################### + """For every path in , replace instances of with + """ + for index, path in enumerate(dir_list): + dir_list[index] = path.replace(src_dir, dest_dir) + # end for + +############################################################################### +def remove_dir(src_dir, force=False): +############################################################################### + """Remove and its children. This operation can only succeed if + contains no files or if is True.""" + currdir = os.getcwd() + src_parent = os.path.split(src_dir)[0] + src_rel = os.path.relpath(src_dir, src_parent) + os.chdir(src_parent) # Prevent removing the parent of src_dir + if force: + leaf_dirs = set() + for root, dirs, files in os.walk(src_rel): + for file in files: + os.remove(os.path.join(root, file)) + # end for + if not dirs: + leaf_dirs.add(root) + # end if + # end for + for ldir in leaf_dirs: + os.removedirs(ldir) + # end for + # end if (no else, always try to remove top level + try: + os.removedirs(src_rel) + except OSError: + pass # Ignore error, fail silently + # end try + os.chdir(currdir) + +############################################################################### +def move_modified_files(src_dir, dest_dir, overwrite=False, remove_src=False): +############################################################################### + """For each file in , move it to if that file is + different in the two locations. + if is True, move all files to , even if unchanged. + If is True, remove when complete.""" + src_files = {} # All files in + if os.path.normpath(src_dir) != os.path.normpath(dest_dir): + for root, _, files in os.walk(src_dir): + for file in files: + src_path = os.path.join(root, file) + if file in src_files: + # We do not allow two files with the same name + emsg = "Duplicate CCPP file found, '{}', original is '{}'" + raise CCPPError(emsg.format(src_path, src_files[file])) + # end if + src_files[file] = src_path + # end for + # end for + for file in src_files: + src_path = src_files[file] + src_file = os.path.relpath(src_path, start=src_dir) + dest_path = os.path.join(dest_dir, src_file) + if os.path.exists(dest_path): + if overwrite: + fmove = True + else: + fmove = filecmp.cmp(src_path, dest_path, shallow=False) + # end if + else: + fmove = True + # end if + if fmove: + os.replace(src_path, dest_path) + else: + os.remove(src_path) + # end if + # end for + if remove_src: + remove_dir(src_dir, force=True) + # end if + # end if (no else, take no action if the directories are identical)