diff --git a/doc/cachew_disable.md b/doc/cachew_disable.md new file mode 100644 index 0000000..6986610 --- /dev/null +++ b/doc/cachew_disable.md @@ -0,0 +1,39 @@ +Can put this in the README.md once its been tested a bit + +### Disable through Environment Variables + +To disable a `cachew` function in some module, you can use the `CACHEW_DISABLE` environment variable. This is a colon-delimited (like a `$PATH`) list of modules to disable. It disables modules given some name recursively, and supports [unix-style globs](https://docs.python.org/3/library/fnmatch.html) + +For example, say you were using [HPI](https://github.com/karlicoss/HPI) which internally uses a snippet like `mcachew` above. You may want to enable `cachew` for _most_ modules, but disable them for specific ones. For example take: + +``` +my/browser +├── active_browser.py +├── all.py +├── common.py +└── export.py +my/reddit +├── __init__.py +├── all.py +├── common.py +├── pushshift.py +└── rexport.py +``` + +To disable `cachew` in all of these files: `export CACHEW_DISABLE=my.browser:my.reddit` (disables for all submodules) + +To disable just for a particular module: `export CACHEW_DISABLE='my.browser.export'` + +Similarly to `$PATH` manipulations, you can do this in your shell configuration incrementally: + +``` +CACHEW_DISABLE='my.reddit.rexport' +if some condition...; then + CACHEW_DISABLE="my.browser.export:$CACHEW_DISABLE" +fi +export CACHEW_DISABLE +``` + +You can also use globs, e.g. `CACHEW_DISABLE='my.*.gdpr` + +To disable `cachew` everywhere, you could set `export CACHEW_DISABLE='*'` diff --git a/src/cachew/__init__.py b/src/cachew/__init__.py index 298c272..3ed911c 100644 --- a/src/cachew/__init__.py +++ b/src/cachew/__init__.py @@ -5,6 +5,7 @@ import json import logging from pathlib import Path +import os import stat import sys from typing import ( @@ -486,6 +487,78 @@ def callable_name(func: Callable) -> str: mod = getattr(func, '__module__', None) or '' return f'{mod}:{func.__qualname__}' +def callable_module_name(func: Callable) -> Optional[str]: + return getattr(func, '__module__', None) + +# could cache this, but might be worth not to, so the user can change it on the fly? +def _parse_disabled_modules(logger: Optional[logging.Logger] = None) -> List[str]: + # e.g. CACHEW_DISABLE=my.browser:my.reddit + if 'CACHEW_DISABLE' not in os.environ: + return [] + disabled = os.environ['CACHEW_DISABLE'] + if disabled.strip() == '': + return [] + if ',' in disabled and logger: + logger.warning('CACHEW_DISABLE contains a comma, but this expects a $PATH-like, colon-separated list; ' + f'try something like CACHEW_DISABLE={disabled.replace(",", ":")}') + # remove any empty strings incase did something like CACHEW_DISABLE=my.module:$CACHEW_DISABLE + return [p for p in disabled.split(':') if p.strip() != ''] + + +def _matches_disabled_module(module_name: str, pattern: str) -> bool: + ''' + >>> _matches_disabled_module('my.browser', 'my.browser') + True + >>> _matches_disabled_module('my.browser', 'my.*') + True + >>> _matches_disabled_module('my.browser', 'my') + True + >>> _matches_disabled_module('my.browser', 'my.browse*') + True + >>> _matches_disabled_module('my.browser.export', 'my.browser') + True + >>> _matches_disabled_module('mysomething.else', '*') # CACHEW_DISABLE='*' disables everything + True + >>> _matches_disabled_module('my.browser', 'my.br?????') # fnmatch supports unix-like patterns + True + >>> _matches_disabled_module('my.browser', 'my.browse') + False + >>> _matches_disabled_module('mysomething.else', 'my') # since not at '.' boundary, doesn't match + False + >>> _matches_disabled_module('mysomething.else', '') + False + >>> _matches_disabled_module('my.browser', 'my.browser.export') + False + ''' + import fnmatch + + if module_name == pattern: + return True + + module_parts = module_name.split('.') + pattern_parts = pattern.split('.') + + # e.g. if pattern is 'module.submod.inner_module' and module is just 'module.submod' + # theres no possible way for it to match + if len(module_parts) < len(pattern_parts): + return False + + for mp, pp in zip(module_parts, pattern_parts): + if fnmatch.fnmatch(mp, pp): + continue + else: + return False + return True + +def _module_is_disabled(module_name: str, logger: logging.Logger) -> bool: + + disabled_modules = _parse_disabled_modules(logger) + for pat in disabled_modules: + if _matches_disabled_module(module_name, pat): + logger.debug(f'caching disabled for {module_name} ' + f"(matched '{pat}' from 'CACHEW_DISABLE={os.environ['CACHEW_DISABLE']})'") + return True + return False # fmt: off _CACHEW_CACHED = 'cachew_cached' # TODO add to docs @@ -567,6 +640,11 @@ def cachew_wrapper( yield from func(*args, **kwargs) return + mod_name = callable_module_name(func) + if mod_name is not None and _module_is_disabled(mod_name, logger): + yield from func(*args, **kwargs) + return + def get_db_path() -> Optional[Path]: db_path: Path if callable(cache_path):