Skip to content

Commit

Permalink
Merge pull request #3210 from boegel/easyconfigs_index
Browse files Browse the repository at this point in the history
add support for creating an index & using it when searching for easyconfigs
  • Loading branch information
smoors authored Apr 8, 2020
2 parents 30c26d8 + b31bfa8 commit c30bedc
Show file tree
Hide file tree
Showing 7 changed files with 390 additions and 23 deletions.
27 changes: 24 additions & 3 deletions easybuild/framework/easyconfig/easyconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,9 @@
from easybuild.tools.config import GENERIC_EASYBLOCK_PKG, LOCAL_VAR_NAMING_CHECK_ERROR, LOCAL_VAR_NAMING_CHECK_LOG
from easybuild.tools.config import LOCAL_VAR_NAMING_CHECK_WARN
from easybuild.tools.config import Singleton, build_option, get_module_naming_scheme
from easybuild.tools.filetools import copy_file, decode_class_name, encode_class_name
from easybuild.tools.filetools import find_backup_name_candidate, find_easyconfigs, read_file, write_file
from easybuild.tools.filetools import copy_file, create_index, decode_class_name, encode_class_name
from easybuild.tools.filetools import find_backup_name_candidate, find_easyconfigs, load_index
from easybuild.tools.filetools import read_file, write_file
from easybuild.tools.hooks import PARSE, load_hooks, run_hook
from easybuild.tools.module_naming_scheme.mns import DEVEL_MODULE_SUFFIX
from easybuild.tools.module_naming_scheme.utilities import avail_module_naming_schemes, det_full_ec_version
Expand Down Expand Up @@ -104,6 +105,7 @@

_easyconfig_files_cache = {}
_easyconfigs_cache = {}
_path_indexes = {}


def handle_deprecated_or_replaced_easyconfig_parameters(ec_method):
Expand Down Expand Up @@ -1916,10 +1918,29 @@ def robot_find_easyconfig(name, version):

res = None
for path in paths:

if build_option('ignore_index'):
_log.info("Ignoring index for %s...", path)
path_index = []
elif path in _path_indexes:
path_index = _path_indexes[path]
_log.info("Found loaded index for %s", path)
elif os.path.exists(path):
path_index = load_index(path)
if path_index is None:
_log.info("No index found for %s, so creating it...", path)
path_index = create_index(path)
else:
_log.info("Loaded index for %s", path)

_path_indexes[path] = path_index
else:
path_index = []

easyconfigs_paths = create_paths(path, name, version)
for easyconfig_path in easyconfigs_paths:
_log.debug("Checking easyconfig path %s" % easyconfig_path)
if os.path.isfile(easyconfig_path):
if easyconfig_path in path_index or os.path.isfile(easyconfig_path):
_log.debug("Found easyconfig file for name %s, version %s at %s" % (name, version, easyconfig_path))
_easyconfig_files_cache[key] = os.path.abspath(easyconfig_path)
res = _easyconfig_files_cache[key]
Expand Down
10 changes: 9 additions & 1 deletion easybuild/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@
from easybuild.tools.config import find_last_log, get_repository, get_repositorypath, build_option
from easybuild.tools.containers.common import containerize
from easybuild.tools.docs import list_software
from easybuild.tools.filetools import adjust_permissions, cleanup, copy_file, copy_files, read_file, write_file
from easybuild.tools.filetools import adjust_permissions, cleanup, copy_file, copy_files, dump_index, load_index
from easybuild.tools.filetools import read_file, write_file
from easybuild.tools.github import check_github, close_pr, new_branch_github, find_easybuild_easyconfig
from easybuild.tools.github import install_github_token, list_prs, new_pr, new_pr_from_branch, merge_pr
from easybuild.tools.github import sync_branch_with_develop, sync_pr_with_develop, update_branch, update_pr
Expand Down Expand Up @@ -255,9 +256,16 @@ def main(args=None, logfile=None, do_build=None, testing=False, modtool=None):
elif options.list_software:
print(list_software(output_format=options.output_format, detailed=options.list_software == 'detailed'))

elif options.create_index:
print_msg("Creating index for %s..." % options.create_index, prefix=False)
index_fp = dump_index(options.create_index, max_age_sec=options.index_max_age)
index = load_index(options.create_index)
print_msg("Index created at %s (%d files)" % (index_fp, len(index)), prefix=False)

# non-verbose cleanup after handling GitHub integration stuff or printing terse info
early_stop_options = [
options.check_github,
options.create_index,
options.install_github_token,
options.list_installed_software,
options.list_software,
Expand Down
5 changes: 5 additions & 0 deletions easybuild/tools/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@
DEFAULT_CONT_TYPE = CONT_TYPE_SINGULARITY

DEFAULT_BRANCH = 'develop'
DEFAULT_INDEX_MAX_AGE = 7 * 24 * 60 * 60 # 1 week (in seconds)
DEFAULT_JOB_BACKEND = 'GC3Pie'
DEFAULT_LOGFILE_FORMAT = ("easybuild", "easybuild-%(name)s-%(version)s-%(date)s.%(time)s.log")
DEFAULT_MAX_FAIL_RATIO_PERMS = 0.5
Expand Down Expand Up @@ -229,6 +230,7 @@ def mk_full_default_path(name, prefix=DEFAULT_PREFIX):
'group_writable_installdir',
'hidden',
'ignore_checksums',
'ignore_index',
'ignore_locks',
'install_latest_eb_release',
'lib64_fallback_sanity_check',
Expand Down Expand Up @@ -279,6 +281,9 @@ def mk_full_default_path(name, prefix=DEFAULT_PREFIX):
DEFAULT_BRANCH: [
'pr_target_branch',
],
DEFAULT_INDEX_MAX_AGE: [
'index_max_age',
],
DEFAULT_MAX_FAIL_RATIO_PERMS: [
'max_fail_ratio_adjust_permissions',
],
Expand Down
153 changes: 136 additions & 17 deletions easybuild/tools/filetools.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
from easybuild.base import fancylogger
from easybuild.tools import run
# import build_log must stay, to use of EasyBuildLog
from easybuild.tools.build_log import EasyBuildError, dry_run_msg, print_msg
from easybuild.tools.build_log import EasyBuildError, dry_run_msg, print_msg, print_warning
from easybuild.tools.config import GENERIC_EASYBLOCK_PKG, build_option
from easybuild.tools.py2vs3 import std_urllib, string_type
from easybuild.tools.utilities import nub, remove_unwanted_chars
Expand Down Expand Up @@ -111,6 +111,7 @@
r'~': "_tilde_",
}

PATH_INDEX_FILENAME = '.eb-path-index'

CHECKSUM_TYPE_MD5 = 'md5'
CHECKSUM_TYPE_SHA256 = 'sha256'
Expand Down Expand Up @@ -614,6 +615,120 @@ def download_file(filename, url, path, forced=False):
return None


def create_index(path, ignore_dirs=None):
"""
Create index for files in specified path.
"""
if ignore_dirs is None:
ignore_dirs = []

index = set()

if not os.path.exists(path):
raise EasyBuildError("Specified path does not exist: %s", path)
elif not os.path.isdir(path):
raise EasyBuildError("Specified path is not a directory: %s", path)

for (dirpath, dirnames, filenames) in os.walk(path, topdown=True, followlinks=True):
for filename in filenames:
# use relative paths in index
rel_dirpath = os.path.relpath(dirpath, path)
# avoid that relative paths start with './'
if rel_dirpath == '.':
rel_dirpath = ''
index.add(os.path.join(rel_dirpath, filename))

# do not consider (certain) hidden directories
# note: we still need to consider e.g., .local !
# replace list elements using [:], so os.walk doesn't process deleted directories
# see https://stackoverflow.com/questions/13454164/os-walk-without-hidden-folders
dirnames[:] = [d for d in dirnames if d not in ignore_dirs]

return index


def dump_index(path, max_age_sec=None):
"""
Create index for files in specified path, and dump it to file (alphabetically sorted).
"""
if max_age_sec is None:
max_age_sec = build_option('index_max_age')

index_fp = os.path.join(path, PATH_INDEX_FILENAME)
index_contents = create_index(path)

curr_ts = datetime.datetime.now()
if max_age_sec == 0:
end_ts = datetime.datetime.max
else:
end_ts = curr_ts + datetime.timedelta(0, max_age_sec)

lines = [
"# created at: %s" % str(curr_ts),
"# valid until: %s" % str(end_ts),
]
lines.extend(sorted(index_contents))

write_file(index_fp, '\n'.join(lines), always_overwrite=False)

return index_fp


def load_index(path, ignore_dirs=None):
"""
Load index for specified path, and return contents (or None if no index exists).
"""
if ignore_dirs is None:
ignore_dirs = []

index_fp = os.path.join(path, PATH_INDEX_FILENAME)
index = set()

if build_option('ignore_index'):
_log.info("Ignoring index for %s...", path)

elif os.path.exists(index_fp):
lines = read_file(index_fp).splitlines()

valid_ts_regex = re.compile("^# valid until: (.*)", re.M)
valid_ts = None

for line in lines:

# extract "valid until" timestamp, so we can check whether index is still valid
if valid_ts is None:
res = valid_ts_regex.match(line)
else:
res = None

if res:
valid_ts = res.group(1)
try:
valid_ts = datetime.datetime.strptime(valid_ts, '%Y-%m-%d %H:%M:%S.%f')
except ValueError as err:
raise EasyBuildError("Failed to parse timestamp '%s' for index at %s: %s", valid_ts, path, err)

elif line.startswith('#'):
_log.info("Ignoring unknown header line '%s' in index for %s", line, path)

else:
# filter out files that are in an ignored directory
path_dirs = line.split(os.path.sep)[:-1]
if not any(d in path_dirs for d in ignore_dirs):
index.add(line)

# check whether index is still valid
if valid_ts:
curr_ts = datetime.datetime.now()
if curr_ts > valid_ts:
print_warning("Index for %s is no longer valid (too old), so ignoring it...", path)
index = None
else:
print_msg("found valid index for %s, so using it...", path)

return index or None


def find_easyconfigs(path, ignore_dirs=None):
"""
Find .eb easyconfig files in path
Expand Down Expand Up @@ -679,22 +794,26 @@ def search_file(paths, query, short=False, ignore_dirs=None, silent=False, filen
if not terse:
print_msg("Searching (case-insensitive) for '%s' in %s " % (query.pattern, path), log=_log, silent=silent)

for (dirpath, dirnames, filenames) in os.walk(path, topdown=True):
for filename in filenames:
if query.search(filename):
if not path_hits:
var = "CFGS%d" % var_index
var_index += 1
if filename_only:
path_hits.append(filename)
else:
path_hits.append(os.path.join(dirpath, filename))

# do not consider (certain) hidden directories
# note: we still need to consider e.g., .local !
# replace list elements using [:], so os.walk doesn't process deleted directories
# see http://stackoverflow.com/questions/13454164/os-walk-without-hidden-folders
dirnames[:] = [d for d in dirnames if d not in ignore_dirs]
path_index = load_index(path, ignore_dirs=ignore_dirs)
if path_index is None or build_option('ignore_index'):
if os.path.exists(path):
_log.info("No index found for %s, creating one...", path)
path_index = create_index(path, ignore_dirs=ignore_dirs)
else:
path_index = []
else:
_log.info("Index found for %s, so using it...", path)

for filepath in path_index:
filename = os.path.basename(filepath)
if query.search(filename):
if not path_hits:
var = "CFGS%d" % var_index
var_index += 1
if filename_only:
path_hits.append(filename)
else:
path_hits.append(os.path.join(path, filepath))

path_hits = sorted(path_hits)

Expand Down
8 changes: 6 additions & 2 deletions easybuild/tools/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@
from easybuild.tools import build_log, run # build_log should always stay there, to ensure EasyBuildLog
from easybuild.tools.build_log import DEVEL_LOG_LEVEL, EasyBuildError
from easybuild.tools.build_log import init_logging, log_start, print_warning, raise_easybuilderror
from easybuild.tools.config import CONT_IMAGE_FORMATS, CONT_TYPES, DEFAULT_CONT_TYPE
from easybuild.tools.config import DEFAULT_ALLOW_LOADED_MODULES, DEFAULT_BRANCH, DEFAULT_FORCE_DOWNLOAD
from easybuild.tools.config import CONT_IMAGE_FORMATS, CONT_TYPES, DEFAULT_CONT_TYPE, DEFAULT_ALLOW_LOADED_MODULES
from easybuild.tools.config import DEFAULT_BRANCH, DEFAULT_FORCE_DOWNLOAD, DEFAULT_INDEX_MAX_AGE
from easybuild.tools.config import DEFAULT_JOB_BACKEND, DEFAULT_LOGFILE_FORMAT, DEFAULT_MAX_FAIL_RATIO_PERMS
from easybuild.tools.config import DEFAULT_MNS, DEFAULT_MODULE_SYNTAX, DEFAULT_MODULES_TOOL, DEFAULT_MODULECLASSES
from easybuild.tools.config import DEFAULT_PATH_SUBDIRS, DEFAULT_PKG_RELEASE, DEFAULT_PKG_TOOL, DEFAULT_PKG_TYPE
Expand Down Expand Up @@ -693,8 +693,12 @@ def easyconfig_options(self):
descr = ("Options for Easyconfigs", "Options that affect all specified easyconfig files.")

opts = OrderedDict({
'create-index': ("Create index for files in specified directory", None, 'store', None),
'fix-deprecated-easyconfigs': ("Fix use of deprecated functionality in specified easyconfig files.",
None, 'store_true', False),
'ignore-index': ("Ignore index when searching for files", None, 'store_true', False),
'index-max-age': ("Maximum age for index before it is considered stale (in seconds)",
int, 'store', DEFAULT_INDEX_MAX_AGE),
'inject-checksums': ("Inject checksums of specified type for sources/patches into easyconfig file(s)",
'choice', 'store_or_None', CHECKSUM_TYPE_SHA256, CHECKSUM_TYPES),
'local-var-naming-check': ("Mode to use when checking whether local variables follow the recommended "
Expand Down
Loading

0 comments on commit c30bedc

Please sign in to comment.