Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

nicer logging; proper support for single return values #51

Merged
merged 2 commits into from
Sep 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
135 changes: 90 additions & 45 deletions src/cachew/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,11 @@ def infer_return_type(func) -> Union[Failure, Inferred]:
>>> infer_return_type(person_provider)
('multiple', <class 'cachew.Person'>)

>>> def single_str() -> str:
... return 'hello'
>>> infer_return_type(single_str)
('single', <class 'str'>)

>>> def single_person() -> Person:
... return Person(name="what", age=-1)
>>> infer_return_type(single_person)
Expand Down Expand Up @@ -217,16 +222,7 @@ def bail(reason: str) -> str:

# first we wanna check if the top level type is some sort of iterable that makes sense ot cache
# e.g. List/Sequence/Iterator etc
origin = get_origin(rtype)
return_multiple = False
if origin is not None and origin is not tuple:
# TODO need to check it handles namedtuple correctly..
try:
return_multiple = issubclass(origin, Iterable)
except TypeError:
# that would happen if origin is not a 'proper' type, e.g. is a Union or something
# seems like exception is the easiest way to check
pass
return_multiple = _returns_multiple(rtype)

if return_multiple:
# then the actual type to cache will be the argument of the top level one
Expand All @@ -249,6 +245,21 @@ def bail(reason: str) -> str:
return ('multiple' if return_multiple else 'single', cached_type)


def _returns_multiple(rtype) -> bool:
origin = get_origin(rtype)
if origin is None:
return False
if origin is tuple:
# usually tuples are more like single values rather than a sequence? (+ this works for namedtuple)
return False
try:
return issubclass(origin, Iterable)
except TypeError:
# that would happen if origin is not a 'proper' type, e.g. is a Union or something
# seems like exception is the easiest way to check
return False


# https://stackoverflow.com/questions/653368/how-to-create-a-python-decorator-that-can-be-used-either-with-or-without-paramet
def doublewrap(f):
@functools.wraps(f)
Expand All @@ -263,14 +274,12 @@ def new_dec(*args, **kwargs):
return new_dec


def cachew_error(e: Exception) -> None:
def cachew_error(e: Exception, *, logger: logging.Logger) -> None:
if settings.THROW_ON_ERROR:
# TODO would be nice to throw from the original code line -- maybe mess with the stack here?
raise e
else:
logger = get_logger()
# todo add func name?
logger.error("cachew: error while setting up cache, falling back to non-cached version")
logger.error("error while setting up cache, falling back to non-cached version")
logger.exception(e)


Expand All @@ -283,7 +292,7 @@ def cachew_impl(
func=None,
cache_path: Optional[PathProvider[P]] = use_default_path,
force_file: bool = False,
cls: Optional[Type] = None,
cls: Optional[Union[Type, Tuple[Kind, Type]]] = None,
depends_on: HashFunction[P] = default_hash,
logger: Optional[logging.Logger] = None,
chunk_by: int = 100,
Expand Down Expand Up @@ -338,59 +347,89 @@ def cachew_impl(
"""
if logger is None:
module_name = getattr(func, '__module__', None)
if module_name is None:
if module_name is not None and module_name in logging.Logger.manager.loggerDict:
# if logger for the function's module already exists, reuse it
logger = logging.getLogger(module_name)
else:
# rely on default cachew logger
logger = get_logger()
else:
# if logger for the function's module already exists, reuse it
if module_name in logging.Logger.manager.loggerDict:
logger = logging.getLogger(module_name)
else:
logger = get_logger()

class AddFuncName(logging.LoggerAdapter):
def process(self, msg, kwargs):
extra = self.extra
assert extra is not None
func_name = extra['func_name']
return f'[{func_name}] {msg}', kwargs

func_name = callable_name(func)
adapter = AddFuncName(logger, {'func_name': func_name})
logger = cast(logging.Logger, adapter)

hashf = kwargs.get('hashf', None)
if hashf is not None:
warnings.warn("'hashf' is deprecated. Please use 'depends_on' instead")
depends_on = hashf

cn = callable_name(func)
# todo not very nice that ENABLE check is scattered across two places
if not settings.ENABLE or cache_path is None:
logger.debug('[%s]: cache explicitly disabled (settings.ENABLE is False or cache_path is None)', cn)
logger.debug('cache explicitly disabled (settings.ENABLE is False or cache_path is None)')
return func

if cache_path is use_default_path:
cache_path = settings.DEFAULT_CACHEW_DIR
logger.debug('[%s]: no cache_path specified, using the default %s', cn, cache_path)
logger.debug(f'no cache_path specified, using the default {cache_path}')

use_kind: Optional[Kind] = None
use_cls: Optional[Type] = None
if cls is not None:
# defensive here since typing. objects passed as cls might fail on isinstance
try:
is_tuple = isinstance(cls, tuple)
except:
is_tuple = False
if is_tuple:
use_kind, use_cls = cls # type: ignore[misc]
else:
use_kind = 'multiple'
use_cls = cls # type: ignore[assignment]

# TODO fuzz infer_return_type, should never crash?
inference_res = infer_return_type(func)
if isinstance(inference_res, Failure):
msg = f"failed to infer cache type: {inference_res}. See https://github.com/karlicoss/cachew#features for the list of supported types."
if cls is None:
if use_cls is None:
ex = CachewException(msg)
cachew_error(ex)
cachew_error(ex, logger=logger)
return func
else:
# it's ok, assuming user knows better
logger.debug(msg)
assert use_kind is not None
else:
(kind, inferred) = inference_res
assert kind == 'multiple' # TODO implement later
if cls is None:
logger.debug('[%s] using inferred type %s', cn, inferred)
cls = inferred
(inferred_kind, inferred_cls) = inference_res
if use_cls is None:
logger.debug(f'using inferred type {inferred_kind} {inferred_cls}')
(use_kind, use_cls) = (inferred_kind, inferred_cls)
else:
if cls != inferred:
logger.warning("inferred type %s mismatches specified type %s", inferred, cls)
assert use_kind is not None
if (use_kind, use_cls) != inference_res:
logger.warning(f"inferred type {inference_res} mismatches explicitly specified type {(use_kind, use_cls)}")
# TODO not sure if should be more serious error...

if use_kind == 'single':
# pretend it's an iterable, this is just simpler for cachew_wrapper
def _func(*args, **kwargs):
return [func(*args, **kwargs)]

else:
_func = func

ctx = Context(
# fmt: off
func =func,
func =_func,
cache_path =cache_path,
force_file =force_file,
cls_ =cls,
cls_ =use_cls,
depends_on =depends_on,
logger =logger,
chunk_by =chunk_by,
Expand All @@ -403,7 +442,13 @@ def cachew_impl(
@functools.wraps(func)
def binder(*args, **kwargs):
kwargs['_cachew_context'] = ctx
return cachew_wrapper(*args, **kwargs)
res = cachew_wrapper(*args, **kwargs)

if use_kind == 'single':
lres = list(res)
assert len(lres) == 1, lres # shouldn't happen
return lres[0]
return res

return binder

Expand All @@ -423,7 +468,7 @@ def cachew(
cache_path: Optional[PathProvider[P]] = ...,
*,
force_file: bool = ...,
cls: Optional[Type] = ...,
cls: Optional[Union[Type, Tuple[Kind, Type]]] = ...,
depends_on: HashFunction[P] = ...,
logger: Optional[logging.Logger] = ...,
chunk_by: int = ...,
Expand Down Expand Up @@ -518,7 +563,7 @@ def cachew_wrapper(

func_name = callable_name(func)
if not settings.ENABLE:
logger.debug(f'[{func_name}]: cache explicitly disabled (settings.ENABLE is False)')
logger.debug('cache explicitly disabled (settings.ENABLE is False)')
yield from func(*args, **kwargs)
return

Expand All @@ -527,7 +572,7 @@ def get_db_path() -> Optional[Path]:
if callable(cache_path):
pp = cache_path(*args, **kwargs)
if pp is None:
logger.debug(f'[{func_name}]: cache explicitly disabled (cache_path is None)')
logger.debug('cache explicitly disabled (cache_path is None)')
# early return, in this case we just yield the original items from the function
return None
else:
Expand All @@ -554,7 +599,7 @@ def get_db_path() -> Optional[Path]:
if stat.S_ISDIR(st.st_mode):
db_path = db_path / func_name

logger.debug(f'[{func_name}]: using {used_backend}:{db_path} for cache')
logger.debug(f'using {used_backend}:{db_path} for cache')
return db_path

def try_use_synthetic_key() -> None:
Expand Down Expand Up @@ -654,12 +699,12 @@ def flush() -> None:
flush()

backend.finalize(new_hash)
logger.info(f'{func_name}: wrote {total_objects} objects to cachew ({used_backend}:{db_path})')
logger.info(f'wrote {total_objects} objects to cachew ({used_backend}:{db_path})')

def cached_items():
total_cached = backend.cached_blobs_total()
total_cached_s = '' if total_cached is None else f'{total_cached} '
logger.info(f'{func_name}: loading {total_cached_s}objects from cachew ({used_backend}:{db_path})')
logger.info(f'loading {total_cached_s}objects from cachew ({used_backend}:{db_path})')

for blob in backend.cached_blobs():
j = orjson_loads(blob)
Expand All @@ -679,7 +724,7 @@ def cached_items():

new_hash_d = C.composite_hash(*args, **kwargs)
new_hash: SourceHash = json.dumps(new_hash_d)
logger.debug('new hash: %s', new_hash)
logger.debug(f'new hash: {new_hash}')

marshall = CachewMarshall(Type_=cls)

Expand Down Expand Up @@ -713,7 +758,7 @@ def cached_items():

# todo hmm, kinda annoying that it tries calling the function twice?
# but gonna require some sophisticated cooperation with the cached wrapper otherwise
cachew_error(e)
cachew_error(e, logger=logger)
yield from func(*args, **kwargs)


Expand Down
30 changes: 29 additions & 1 deletion src/cachew/tests/test_cachew.py
Original file line number Diff line number Diff line change
Expand Up @@ -728,7 +728,7 @@ def H(t: AllTypes):
# TODO should be possible to iterate anonymous tuples too? or just sequences of primitive types?


def test_primitive(tmp_path: Path):
def test_primitive(tmp_path: Path) -> None:
@cachew(tmp_path)
def fun() -> Iterator[str]:
yield 'aba'
Expand All @@ -738,6 +738,34 @@ def fun() -> Iterator[str]:
assert list(fun()) == ['aba', 'caba']


def test_single_value(tmp_path: Path) -> None:
@cachew(tmp_path)
def fun_int() -> int:
return 123

assert fun_int() == 123
assert fun_int() == 123

@cachew(tmp_path, cls=('single', str))
def fun_str():
return 'whatever'

assert fun_str() == 'whatever'
assert fun_str() == 'whatever'

@cachew(tmp_path)
def fun_opt_namedtuple(none: bool) -> Optional[UUU]:
if none:
return None
else:
return UUU(xx=1, yy=2)

assert fun_opt_namedtuple(none=False) == UUU(xx=1, yy=2)
assert fun_opt_namedtuple(none=False) == UUU(xx=1, yy=2)
assert fun_opt_namedtuple(none=True) is None
assert fun_opt_namedtuple(none=True) is None


class O(NamedTuple):
x: int

Expand Down