karlicoss · karlicoss · Jun 9, 2023 · Jun 9, 2023
diff --git a/setup.py b/setup.py
@@ -46,6 +46,11 @@ def main() -> None:
                 'bandit',
 
                 'more-itertools',
+
+                'enlighten',  # used in logging helper, but not really required
+            ],
+            'optional': [
+                'colorlog',
             ],
         },
 

diff --git a/src/cachew/__init__.py b/src/cachew/__init__.py
@@ -30,6 +30,8 @@
 except Exception as e:
     logging.exception(e)
 
+from .logging_helper import makeLogger
+
 
 # in case of changes in the way cachew stores data, this should be changed to discard old caches
 CACHEW_VERSION: str = importlib.metadata.version(__name__)
@@ -54,10 +56,8 @@ class settings:
     THROW_ON_ERROR: bool = False
 
 
-
 def get_logger() -> logging.Logger:
-    return logging.getLogger('cachew')
-
+    return makeLogger(__name__)
 
 
 class IsoDateTime(sqlalchemy.TypeDecorator):
@@ -750,7 +750,17 @@ def cachew_impl(
     call took 5 seconds
     """
     if logger is None:
-        logger = get_logger()
+        module_name = getattr(func, '__module__', None)
+        if module_name is None:
+            # rely on default cachew logger
+            logger = get_logger()
+        else:
+            # if logger for the function's module already exists, reuse it
+            if module_name in logging.Logger.manager.loggerDict:
+                logger = logging.getLogger(module_name)
+            else:
+                logger = get_logger()
+
 
     hashf = kwargs.get('hashf', None)
     if hashf is not None:
@@ -760,12 +770,12 @@ def cachew_impl(
     cn = cname(func)
     # todo not very nice that ENABLE check is scattered across two places
     if not settings.ENABLE or cache_path is None:
-        logger.info('[%s]: cache explicitly disabled (settings.ENABLE is False or cache_path is None)', cn)
+        logger.debug('[%s]: cache explicitly disabled (settings.ENABLE is False or cache_path is None)', cn)
         return func
 
     if cache_path is use_default_path:
         cache_path = settings.DEFAULT_CACHEW_DIR
-        logger.info('[%s]: no cache_path specified, using the default %s', cn, cache_path)
+        logger.debug('[%s]: no cache_path specified, using the default %s', cn, cache_path)
 
     # TODO fuzz infer_type, should never crash?
     inferred = infer_type(func)
@@ -905,7 +915,7 @@ def cachew_wrapper(
 
     cn = cname(func)
     if not settings.ENABLE:
-        logger.info('[%s]: cache explicitly disabled (settings.ENABLE is False)', cn)
+        logger.debug('[%s]: cache explicitly disabled (settings.ENABLE is False)', cn)
         yield from func(*args, **kwargs)
         return
 
@@ -919,7 +929,7 @@ def cachew_wrapper(
         if callable(cache_path):
             pp = cache_path(*args, **kwargs)
             if pp is None:
-                logger.info('[%s]: cache explicitly disabled (cache_path is None)', cn)
+                logger.debug('[%s]: cache explicitly disabled (cache_path is None)', cn)
                 yield from func(*args, **kwargs)
                 return
             else:
@@ -991,6 +1001,8 @@ def cached_items():
 
             if new_hash == old_hash:
                 logger.debug('hash matched: loading from cache')
+                total = list(conn.execute(sqlalchemy.select(sqlalchemy.func.count()).select_from(table_cache)))[0][0]
+                logger.info(f'{cn}: loading {total} objects from cachew (sqlite {dbp})')
                 yield from cached_items()
                 return
 
@@ -1098,8 +1110,10 @@ def flush() -> None:
                     conn.execute(insert_into_table_cache_tmp, chunk_dict)
                     chunk = []
 
+            total_objects = 0
             for d in datas:
                 try:
+                    total_objects += 1
                     yield d
                 except GeneratorExit:
                     early_exit = True
@@ -1124,6 +1138,7 @@ def flush() -> None:
 
             # pylint: disable=no-value-for-parameter
             conn.execute(db.table_hash.insert().values([{'value': new_hash}]))
+            logger.info(f'{cn}: wrote   {total_objects} objects to   cachew (sqlite {dbp})')
     except Exception as e:
         # sigh... see test_early_exit_shutdown...
         if early_exit and 'Cannot operate on a closed database' in str(e):

diff --git a/src/cachew/logging_helper.py b/src/cachew/logging_helper.py
@@ -0,0 +1,234 @@
+from __future__ import annotations
+
+from functools import lru_cache
+import logging
+import os
+from typing import Union
+import warnings
+
+
+def test() -> None:
+    import sys
+    from typing import Callable
+
+    M: Callable[[str], None] = lambda s: print(s, file=sys.stderr)
+
+    ## prepare exception for later
+    try:
+        None.whatever  # type: ignore[attr-defined]
+    except Exception as e:
+        ex = e
+    ##
+
+    M("   Logging module's defaults are not great:")
+    l = logging.getLogger('default_logger')
+    l.error("For example, this should be logged as error. But it's not even formatted properly, doesn't have logger name or level")
+
+    M("\n   The reason is that you need to remember to call basicConfig() first. Let's do it now:")
+    logging.basicConfig()
+    l.error("OK, this is better. But the default format kinda sucks, I prefer having timestamps and the file/line number")
+
+    M("\n   Also exception logging is kinda lame, doesn't print traceback by default unless you remember to pass exc_info:")
+    l.exception(ex)  # type: ignore[possibly-undefined]  # pylint: disable=used-before-assignment
+
+    M("\n\n    With makeLogger you get a reasonable logging format, colours (via colorlog library) and other neat things:")
+
+    ll = makeLogger('test')  # No need for basicConfig!
+    ll.info("default level is INFO")
+    ll.debug("... so this shouldn't be displayed")
+    ll.warning("warnings are easy to spot!")
+
+    M("\n    Exceptions print traceback by default now:")
+    ll.exception(ex)
+
+    M("\n    You can (and should) use it via regular logging.getLogger after that, e.g. let's set logging level to DEBUG now")
+    logging.getLogger('test').setLevel(logging.DEBUG)
+    ll.debug("... now debug messages are also displayed")
+
+
+DEFAULT_LEVEL = 'INFO'
+FORMAT = '{start}[%(levelname)-7s %(asctime)s %(name)s %(filename)s:%(lineno)-4d]{end} %(message)s'
+FORMAT_NOCOLOR = FORMAT.format(start='', end='')
+
+
+Level = int
+LevelIsh = Union[Level, str, None]
+
+
+def mklevel(level: LevelIsh) -> Level:
+    if level is None:
+        return logging.NOTSET
+    if isinstance(level, int):
+        return level
+    return getattr(logging, level.upper())
+
+
+def get_collapse_level() -> Level | None:
+    # TODO not sure if should be specific to logger name?
+    cl = os.environ.get('LOGGING_COLLAPSE', None)
+    if cl is not None:
+        return mklevel(cl)
+    # legacy name, maybe deprecate?
+    cl = os.environ.get('COLLAPSE_DEBUG_LOGS', None)
+    if cl is not None:
+        return logging.DEBUG
+    return None
+
+
+def get_env_level(name: str) -> Level | None:
+    PREFIX = 'LOGGING_LEVEL_'  # e.g. LOGGING_LEVEL_my_hypothesis=debug
+    # shell doesn't allow using dots in var names without escaping, so also support underscore syntax
+    lvl = os.environ.get(PREFIX + name, None) or os.environ.get(PREFIX + name.replace('.', '_'), None)
+    if lvl is not None:
+        return mklevel(lvl)
+    return None
+
+
+def setup_logger(logger: str | logging.Logger, *, level: LevelIsh = None) -> None:
+    """
+    Wrapper to simplify logging setup.
+    """
+    if isinstance(logger, str):
+        logger = logging.getLogger(logger)
+
+    if level is None:
+        level = DEFAULT_LEVEL
+
+    # env level always takes precedence
+    env_level = get_env_level(logger.name)
+    if env_level is not None:
+        lvl = env_level
+    else:
+        lvl = mklevel(level)
+
+    if logger.level == logging.NOTSET:
+        # if it's already set, the user requested a different logging level, let's respect that
+        logger.setLevel(lvl)
+
+    logger.addFilter(AddExceptionTraceback())
+
+    ch = logging.StreamHandler()
+    collapse_level = get_collapse_level()
+    ch = logging.StreamHandler() if collapse_level is None else CollapseLogsHandler(maxlevel=collapse_level)
+
+    # default level for handler is NOTSET, which will make it process all messages
+    # we rely on the logger to actually accept/reject log msgs
+    logger.addHandler(ch)
+
+    # this attribute is set to True by default, which causes log entries to be passed to root logger (e.g. if you call basicConfig beforehand)
+    # even if log entry is handled by this logger ... not sure what's the point of this behaviour??
+    logger.propagate = False
+
+    try:
+        # try colorlog first, so user gets nice colored logs
+        import colorlog
+    except ModuleNotFoundError:
+        warnings.warn("You might want to 'pip install colorlog' for nice colored logs")
+        formatter = logging.Formatter(FORMAT_NOCOLOR)
+    else:
+        # log_color/reset are specific to colorlog
+        FORMAT_COLOR = FORMAT.format(start='%(log_color)s', end='%(reset)s')
+        fmt = FORMAT_COLOR if ch.stream.isatty() else FORMAT_NOCOLOR
+        # colorlog should detect tty in principle, but doesn't handle everything for some reason
+        # see https://github.com/borntyping/python-colorlog/issues/71
+        formatter = colorlog.ColoredFormatter(fmt)
+
+    ch.setFormatter(formatter)
+
+
+# by default, logging.exception isn't logging traceback unless called inside of the exception handler
+# which is a bit annoying since we have to pass exc_info explicitly
+# also see https://stackoverflow.com/questions/75121925/why-doesnt-python-logging-exception-method-log-traceback-by-default
+# todo also amend by post about defensive error handling?
+class AddExceptionTraceback(logging.Filter):
+    def filter(self, record: logging.LogRecord) -> bool:
+        s = super().filter(record)
+        if s is False:
+            return False
+        if record.levelname == 'ERROR':
+            exc = record.msg
+            if isinstance(exc, BaseException):
+                if record.exc_info is None or record.exc_info == (None, None, None):
+                    exc_info = (type(exc), exc, exc.__traceback__)
+                    record.exc_info = exc_info
+        return s
+
+
+# todo also save full log in a file?
+class CollapseLogsHandler(logging.StreamHandler):
+    '''
+    Collapses subsequent debug log lines and redraws on the same line.
+    Hopefully this gives both a sense of progress and doesn't clutter the terminal as much?
+    '''
+
+    last: bool = False
+
+    maxlevel: Level = logging.DEBUG  # everything with less or equal level will be collapsed
+
+    def __init__(self, *args, maxlevel: Level, **kwargs) -> None:
+        super().__init__(*args, **kwargs)
+        self.maxlevel = maxlevel
+
+    def emit(self, record: logging.LogRecord) -> None:
+        try:
+            msg = self.format(record)
+            cur = record.levelno <= self.maxlevel and '\n' not in msg
+            if cur:
+                if self.last:
+                    self.stream.write('\033[K' + '\r')  # clear line + return carriage
+            else:
+                if self.last:
+                    self.stream.write('\n')  # clean up after the last line
+            self.last = cur
+            columns, _ = os.get_terminal_size(0)
+            # ugh. the columns thing is meh. dunno I guess ultimately need curses for that
+            # TODO also would be cool to have a terminal post-processor? kinda like tail but aware of logging keywords (INFO/DEBUG/etc)
+            self.stream.write(msg + ' ' * max(0, columns - len(msg)) + ('' if cur else '\n'))
+            self.flush()
+        except:
+            self.handleError(record)
+
+
+@lru_cache(None)  # cache so it's only initialized once
+def makeLogger(name: str, *, level: LevelIsh = None) -> logging.Logger:
+    logger = logging.getLogger(name)
+    setup_logger(logger, level=level)
+    return logger
+
+
+# ughh. hacky way to have a single enlighten instance per interpreter, so it can be shared between modules
+# not sure about this. I guess this should definitely be behind some flag
+# OK, when stdout is not a tty, enlighten doesn't log anything, good
+def get_enlighten():
+    # TODO could add env variable to disable enlighten for a module?
+    from unittest.mock import Mock
+    # Mock to return stub so cients don't have to think about it
+
+    # for now hidden behind the flag since it's a little experimental
+    if os.environ.get('ENLIGHTEN_ENABLE', None) is None:
+        return Mock()
+
+    try:
+        import enlighten  # type: ignore[import]
+    except ModuleNotFoundError:
+        warnings.warn("You might want to 'pip install enlighten' for a nice progress bar")
+
+        return Mock()
+
+    # dirty, but otherwise a bit unclear how to share enlighten manager between packages that call each other
+    instance = getattr(enlighten, 'INSTANCE', None)
+    if instance is not None:
+        return instance
+    instance = enlighten.get_manager()
+    setattr(enlighten, 'INSTANCE', instance)
+    return instance
+
+
+if __name__ == '__main__':
+    test()
+
+
+## legacy/deprecated methods for backwards compatilibity
+LazyLogger = makeLogger
+logger = makeLogger
+##
diff --git a/tox.ini b/tox.ini
@@ -29,7 +29,7 @@ commands =
 
 [testenv:mypy]
 commands =
-    {envpython} -m pip install -e .[testing]
+    {envpython} -m pip install -e .[testing,optional]
     {envpython} -m mypy --install-types --non-interactive \
         -p {[testenv]package_name}       \
         # txt report is a bit more convenient to view on CI