Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add support for creating an index & using it when searching for easyconfigs #3210

Merged
merged 21 commits into from
Apr 8, 2020
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
e51e4e4
implement support creating/dumping/loading index of files in path + l…
boegel Feb 10, 2020
ce29a38
use path index in robot_find_easyconfig, if available (and cache it)
boegel Feb 10, 2020
a4f3d67
make create_index check whether specified path is an existing directo…
boegel Feb 12, 2020
5d3b263
create index for path if no index is available in robot_find_easyconf…
boegel Feb 12, 2020
078f099
add support for --create-index
boegel Feb 12, 2020
7cd4e9d
add configuration option to specify maximum age of index file
boegel Feb 21, 2020
ecd10dd
add support to dump_index for specifying maximum age of index + make …
boegel Feb 21, 2020
b103a9e
take into account non-existing paths in robot_search_easyconfig while…
boegel Feb 21, 2020
88e676b
extend test for --create-index
boegel Feb 21, 2020
0efe3e3
print message when valid index is being used
boegel Feb 21, 2020
91a279b
take into account non-existing paths in search_file
boegel Feb 21, 2020
327b93f
Merge branch 'develop' into easyconfigs_index
boegel Feb 29, 2020
78b6497
appease the Hound
boegel Feb 29, 2020
6c15075
add support for ignoring search index via --ignore-index
boegel Feb 29, 2020
6bb2131
fix determining relative paths in create_index
boegel Mar 8, 2020
43163ba
Merge branch 'develop' into easyconfigs_index
boegel Mar 8, 2020
4dc4554
avoid that relative paths start with './' in create_index
boegel Mar 8, 2020
c8c3688
Merge branch 'develop' into easyconfigs_index
boegel Apr 8, 2020
72c6b3c
fix minor typo in test_create_index
boegel Apr 8, 2020
f64ae2c
fix typo in import + appease the Hound
boegel Apr 8, 2020
b31bfa8
take into account --ignore-index in load_index + check for it in tests
boegel Apr 8, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 19 additions & 2 deletions easybuild/framework/easyconfig/easyconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@
from easybuild.tools.config import LOCAL_VAR_NAMING_CHECK_ERROR, LOCAL_VAR_NAMING_CHECK_LOG, LOCAL_VAR_NAMING_CHECK_WARN
from easybuild.tools.config import Singleton, build_option, get_module_naming_scheme
from easybuild.tools.filetools import EASYBLOCK_CLASS_PREFIX, copy_file, decode_class_name, encode_class_name
from easybuild.tools.filetools import find_backup_name_candidate, find_easyconfigs, read_file, write_file
from easybuild.tools.filetools import create_index, find_backup_name_candidate, find_easyconfigs, load_index
from easybuild.tools.filetools import read_file, write_file
from easybuild.tools.hooks import PARSE, load_hooks, run_hook
from easybuild.tools.module_naming_scheme.mns import DEVEL_MODULE_SUFFIX
from easybuild.tools.module_naming_scheme.utilities import avail_module_naming_schemes, det_full_ec_version
Expand Down Expand Up @@ -102,6 +103,7 @@

_easyconfig_files_cache = {}
_easyconfigs_cache = {}
_path_indexes = {}


def handle_deprecated_or_replaced_easyconfig_parameters(ec_method):
Expand Down Expand Up @@ -1890,10 +1892,25 @@ def robot_find_easyconfig(name, version):

res = None
for path in paths:
if path in _path_indexes:
path_index = _path_indexes[path]
_log.info("Found loaded index for %s", path)
elif os.path.exists(path):
path_index = load_index(path)
if path_index is None:
_log.info("No index found for %s, so creating it...", path)
path_index = create_index(path)
else:
_log.info("Loaded index for %s", path)

_path_indexes[path] = path_index
else:
path_index = []

easyconfigs_paths = create_paths(path, name, version)
for easyconfig_path in easyconfigs_paths:
_log.debug("Checking easyconfig path %s" % easyconfig_path)
if os.path.isfile(easyconfig_path):
if easyconfig_path in path_index or os.path.isfile(easyconfig_path):
_log.debug("Found easyconfig file for name %s, version %s at %s" % (name, version, easyconfig_path))
_easyconfig_files_cache[key] = os.path.abspath(easyconfig_path)
res = _easyconfig_files_cache[key]
Expand Down
10 changes: 9 additions & 1 deletion easybuild/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@
from easybuild.tools.config import find_last_log, get_repository, get_repositorypath, build_option
from easybuild.tools.containers.common import containerize
from easybuild.tools.docs import list_software
from easybuild.tools.filetools import adjust_permissions, cleanup, copy_file, copy_files, read_file, write_file
from easybuild.tools.filetools import adjust_permissions, cleanup, copy_file, copy_files, dump_index, load_index
from easybuild.tools.filetools import read_file, write_file
from easybuild.tools.github import check_github, close_pr, new_branch_github, find_easybuild_easyconfig
from easybuild.tools.github import install_github_token, list_prs, new_pr, new_pr_from_branch, merge_pr
from easybuild.tools.github import sync_branch_with_develop, sync_pr_with_develop, update_branch, update_pr
Expand Down Expand Up @@ -255,9 +256,16 @@ def main(args=None, logfile=None, do_build=None, testing=False, modtool=None):
elif options.list_software:
print(list_software(output_format=options.output_format, detailed=options.list_software == 'detailed'))

elif options.create_index:
print_msg("Creating index for %s..." % options.create_index, prefix=False)
index_fp = dump_index(options.create_index, max_age_sec=options.index_max_age)
index = load_index(options.create_index)
print_msg("Index created at %s (%d files)" % (index_fp, len(index)), prefix=False)

# non-verbose cleanup after handling GitHub integration stuff or printing terse info
early_stop_options = [
options.check_github,
options.create_index,
options.install_github_token,
options.list_installed_software,
options.list_software,
Expand Down
4 changes: 4 additions & 0 deletions easybuild/tools/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
CONT_TYPES = [CONT_TYPE_DOCKER, CONT_TYPE_SINGULARITY]
DEFAULT_CONT_TYPE = CONT_TYPE_SINGULARITY

DEFAULT_INDEX_MAX_AGE = 7 * 24 * 60 * 60 # 1 week (in seconds)
DEFAULT_JOB_BACKEND = 'GC3Pie'
DEFAULT_LOGFILE_FORMAT = ("easybuild", "easybuild-%(name)s-%(version)s-%(date)s.%(time)s.log")
DEFAULT_MAX_FAIL_RATIO_PERMS = 0.5
Expand Down Expand Up @@ -270,6 +271,9 @@ def mk_full_default_path(name, prefix=DEFAULT_PREFIX):
DEFAULT_CONT_TYPE: [
'container_type',
],
DEFAULT_INDEX_MAX_AGE: [
'index_max_age',
],
DEFAULT_MAX_FAIL_RATIO_PERMS: [
'max_fail_ratio_adjust_permissions',
],
Expand Down
146 changes: 129 additions & 17 deletions easybuild/tools/filetools.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
from easybuild.base import fancylogger
from easybuild.tools import run
# import build_log must stay, to use of EasyBuildLog
from easybuild.tools.build_log import EasyBuildError, dry_run_msg, print_msg
from easybuild.tools.build_log import EasyBuildError, dry_run_msg, print_msg, print_warning
from easybuild.tools.config import build_option
from easybuild.tools.py2vs3 import std_urllib, string_type
from easybuild.tools.utilities import nub
Expand Down Expand Up @@ -109,6 +109,7 @@
r'~': "_tilde_",
}

PATH_INDEX_FILENAME = '.eb-path-index'

CHECKSUM_TYPE_MD5 = 'md5'
CHECKSUM_TYPE_SHA256 = 'sha256'
Expand Down Expand Up @@ -589,6 +590,113 @@ def download_file(filename, url, path, forced=False):
return None


def create_index(path, ignore_dirs=None):
"""
Create index for files in specified path.
"""
if ignore_dirs is None:
ignore_dirs = []

index = set()

if not os.path.exists(path):
raise EasyBuildError("Specified path does not exist: %s", path)
elif not os.path.isdir(path):
raise EasyBuildError("Specified path is not a directory: %s", path)

for (dirpath, dirnames, filenames) in os.walk(path, topdown=True):
boegel marked this conversation as resolved.
Show resolved Hide resolved
for filename in filenames:
# use relative paths in index
index.add(os.path.join(dirpath[len(path)+1:], filename))
boegel marked this conversation as resolved.
Show resolved Hide resolved
boegel marked this conversation as resolved.
Show resolved Hide resolved

# do not consider (certain) hidden directories
# note: we still need to consider e.g., .local !
# replace list elements using [:], so os.walk doesn't process deleted directories
# see http://stackoverflow.com/questions/13454164/os-walk-without-hidden-folders
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: use https

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed

dirnames[:] = [d for d in dirnames if d not in ignore_dirs]

return index


def dump_index(path, max_age_sec=None):
"""
Create index for files in specified path, and dump it to file (alphabetically sorted).
"""
if max_age_sec is None:
max_age_sec = build_option('index_max_age')

index_fp = os.path.join(path, PATH_INDEX_FILENAME)
index_contents = create_index(path)

curr_ts = datetime.datetime.now()
if max_age_sec == 0:
end_ts = datetime.datetime.max
else:
end_ts = curr_ts + datetime.timedelta(0, max_age_sec)

lines = [
"# created at: %s" % str(curr_ts),
"# valid until: %s" % str(end_ts),
]
lines.extend(sorted(index_contents))

write_file(index_fp, '\n'.join(lines), always_overwrite=False)

return index_fp


def load_index(path, ignore_dirs=None):
"""
Load index for specified path, and return contents (or None if no index exists).
"""
if ignore_dirs is None:
ignore_dirs = []

index_fp = os.path.join(path, PATH_INDEX_FILENAME)
index = set()

if os.path.exists(index_fp):
lines = read_file(index_fp).splitlines()

valid_ts_regex = re.compile("^# valid until: (.*)", re.M)
valid_ts = None

for line in lines:

# extract "valid until" timestamp, so we can check whether index is still valid
if valid_ts is None:
res = valid_ts_regex.match(line)
else:
res = None

if res:
valid_ts = res.group(1)
try:
valid_ts = datetime.datetime.strptime(valid_ts, '%Y-%m-%d %H:%M:%S.%f')
except ValueError as err:
raise EasyBuildError("Failed to parse timestamp '%s' for index at %s: %s", valid_ts, path, err)

elif line.startswith('#'):
_log.info("Ignoring unknown header line '%s' in index for %s", line, path)

else:
# filter out files that are in an ignored directory
path_dirs = line.split(os.path.sep)[:-1]
if not any(d in path_dirs for d in ignore_dirs):
index.add(line)

# check whether index is still valid
if valid_ts:
curr_ts = datetime.datetime.now()
if curr_ts > valid_ts:
print_warning("Index for %s is no longer valid (too old), so ignoring it...", path)
index = None
else:
print_msg("found valid index for %s, so using it...", path)

return index or None


def find_easyconfigs(path, ignore_dirs=None):
"""
Find .eb easyconfig files in path
Expand Down Expand Up @@ -654,22 +762,26 @@ def search_file(paths, query, short=False, ignore_dirs=None, silent=False, filen
if not terse:
print_msg("Searching (case-insensitive) for '%s' in %s " % (query.pattern, path), log=_log, silent=silent)

for (dirpath, dirnames, filenames) in os.walk(path, topdown=True):
for filename in filenames:
if query.search(filename):
if not path_hits:
var = "CFGS%d" % var_index
var_index += 1
if filename_only:
path_hits.append(filename)
else:
path_hits.append(os.path.join(dirpath, filename))

# do not consider (certain) hidden directories
# note: we still need to consider e.g., .local !
# replace list elements using [:], so os.walk doesn't process deleted directories
# see http://stackoverflow.com/questions/13454164/os-walk-without-hidden-folders
dirnames[:] = [d for d in dirnames if d not in ignore_dirs]
path_index = load_index(path, ignore_dirs=ignore_dirs)
if path_index is None:
if os.path.exists(path):
_log.info("No index found for %s, creating one...", path)
path_index = create_index(path, ignore_dirs=ignore_dirs)
else:
path_index = []
else:
_log.info("Index found for %s, so using it...", path)

for filepath in path_index:
filename = os.path.basename(filepath)
if query.search(filename):
if not path_hits:
var = "CFGS%d" % var_index
var_index += 1
if filename_only:
path_hits.append(filename)
else:
path_hits.append(os.path.join(path, filepath))

path_hits = sorted(path_hits)

Expand Down
9 changes: 6 additions & 3 deletions easybuild/tools/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,9 @@
from easybuild.tools.build_log import DEVEL_LOG_LEVEL, EasyBuildError
from easybuild.tools.build_log import init_logging, log_start, print_warning, raise_easybuilderror
from easybuild.tools.config import CONT_IMAGE_FORMATS, CONT_TYPES, DEFAULT_CONT_TYPE
from easybuild.tools.config import DEFAULT_ALLOW_LOADED_MODULES, DEFAULT_FORCE_DOWNLOAD, DEFAULT_JOB_BACKEND
from easybuild.tools.config import DEFAULT_LOGFILE_FORMAT, DEFAULT_MAX_FAIL_RATIO_PERMS, DEFAULT_MNS
from easybuild.tools.config import DEFAULT_MODULE_SYNTAX, DEFAULT_MODULES_TOOL, DEFAULT_MODULECLASSES
from easybuild.tools.config import DEFAULT_ALLOW_LOADED_MODULES, DEFAULT_FORCE_DOWNLOAD, DEFAULT_INDEX_MAX_AGE
from easybuild.tools.config import DEFAULT_JOB_BACKEND, DEFAULT_LOGFILE_FORMAT, DEFAULT_MAX_FAIL_RATIO_PERMS
from easybuild.tools.config import DEFAULT_MNS, DEFAULT_MODULE_SYNTAX, DEFAULT_MODULES_TOOL, DEFAULT_MODULECLASSES
from easybuild.tools.config import DEFAULT_PATH_SUBDIRS, DEFAULT_PKG_RELEASE, DEFAULT_PKG_TOOL, DEFAULT_PKG_TYPE
from easybuild.tools.config import DEFAULT_PNS, DEFAULT_PREFIX, DEFAULT_REPOSITORY, EBROOT_ENV_VAR_ACTIONS, ERROR
from easybuild.tools.config import FORCE_DOWNLOAD_CHOICES, GENERAL_CLASS, IGNORE, JOB_DEPS_TYPE_ABORT_ON_ERROR
Expand Down Expand Up @@ -681,8 +681,11 @@ def easyconfig_options(self):
descr = ("Options for Easyconfigs", "Options that affect all specified easyconfig files.")

opts = OrderedDict({
'create-index': ("Create index for files in specified directory", None, 'store', None),
'fix-deprecated-easyconfigs': ("Fix use of deprecated functionality in specified easyconfig files.",
None, 'store_true', False),
'index-max-age': ("Maximum age for index before it is considered stale (in seconds)",
int, 'store', DEFAULT_INDEX_MAX_AGE),
'inject-checksums': ("Inject checksums of specified type for sources/patches into easyconfig file(s)",
'choice', 'store_or_None', CHECKSUM_TYPE_SHA256, CHECKSUM_TYPES),
'local-var-naming-check': ("Mode to use when checking whether local variables follow the recommended "
Expand Down
Loading