Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

core: improvements for logging #40

Merged
merged 1 commit into from
Jun 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@ def main() -> None:
'bandit',

'more-itertools',

'enlighten', # used in logging helper, but not really required
],
'optional': [
'colorlog',
],
},

Expand Down
31 changes: 23 additions & 8 deletions src/cachew/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
except Exception as e:
logging.exception(e)

from .logging_helper import makeLogger


# in case of changes in the way cachew stores data, this should be changed to discard old caches
CACHEW_VERSION: str = importlib.metadata.version(__name__)
Expand All @@ -54,10 +56,8 @@ class settings:
THROW_ON_ERROR: bool = False



def get_logger() -> logging.Logger:
return logging.getLogger('cachew')

return makeLogger(__name__)


class IsoDateTime(sqlalchemy.TypeDecorator):
Expand Down Expand Up @@ -750,7 +750,17 @@ def cachew_impl(
call took 5 seconds
"""
if logger is None:
logger = get_logger()
module_name = getattr(func, '__module__', None)
if module_name is None:
# rely on default cachew logger
logger = get_logger()
else:
# if logger for the function's module already exists, reuse it
if module_name in logging.Logger.manager.loggerDict:
logger = logging.getLogger(module_name)
else:
logger = get_logger()


hashf = kwargs.get('hashf', None)
if hashf is not None:
Expand All @@ -760,12 +770,12 @@ def cachew_impl(
cn = cname(func)
# todo not very nice that ENABLE check is scattered across two places
if not settings.ENABLE or cache_path is None:
logger.info('[%s]: cache explicitly disabled (settings.ENABLE is False or cache_path is None)', cn)
logger.debug('[%s]: cache explicitly disabled (settings.ENABLE is False or cache_path is None)', cn)
return func

if cache_path is use_default_path:
cache_path = settings.DEFAULT_CACHEW_DIR
logger.info('[%s]: no cache_path specified, using the default %s', cn, cache_path)
logger.debug('[%s]: no cache_path specified, using the default %s', cn, cache_path)

# TODO fuzz infer_type, should never crash?
inferred = infer_type(func)
Expand Down Expand Up @@ -905,7 +915,7 @@ def cachew_wrapper(

cn = cname(func)
if not settings.ENABLE:
logger.info('[%s]: cache explicitly disabled (settings.ENABLE is False)', cn)
logger.debug('[%s]: cache explicitly disabled (settings.ENABLE is False)', cn)
yield from func(*args, **kwargs)
return

Expand All @@ -919,7 +929,7 @@ def cachew_wrapper(
if callable(cache_path):
pp = cache_path(*args, **kwargs)
if pp is None:
logger.info('[%s]: cache explicitly disabled (cache_path is None)', cn)
logger.debug('[%s]: cache explicitly disabled (cache_path is None)', cn)
yield from func(*args, **kwargs)
return
else:
Expand Down Expand Up @@ -991,6 +1001,8 @@ def cached_items():

if new_hash == old_hash:
logger.debug('hash matched: loading from cache')
total = list(conn.execute(sqlalchemy.select(sqlalchemy.func.count()).select_from(table_cache)))[0][0]
logger.info(f'{cn}: loading {total} objects from cachew (sqlite {dbp})')
yield from cached_items()
return

Expand Down Expand Up @@ -1098,8 +1110,10 @@ def flush() -> None:
conn.execute(insert_into_table_cache_tmp, chunk_dict)
chunk = []

total_objects = 0
for d in datas:
try:
total_objects += 1
yield d
except GeneratorExit:
early_exit = True
Expand All @@ -1124,6 +1138,7 @@ def flush() -> None:

# pylint: disable=no-value-for-parameter
conn.execute(db.table_hash.insert().values([{'value': new_hash}]))
logger.info(f'{cn}: wrote {total_objects} objects to cachew (sqlite {dbp})')
except Exception as e:
# sigh... see test_early_exit_shutdown...
if early_exit and 'Cannot operate on a closed database' in str(e):
Expand Down
234 changes: 234 additions & 0 deletions src/cachew/logging_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,234 @@
from __future__ import annotations

from functools import lru_cache
import logging
import os
from typing import Union
import warnings


def test() -> None:
import sys
from typing import Callable

M: Callable[[str], None] = lambda s: print(s, file=sys.stderr)

## prepare exception for later
try:
None.whatever # type: ignore[attr-defined]
except Exception as e:
ex = e
##

M(" Logging module's defaults are not great:")
l = logging.getLogger('default_logger')
l.error("For example, this should be logged as error. But it's not even formatted properly, doesn't have logger name or level")

M("\n The reason is that you need to remember to call basicConfig() first. Let's do it now:")
logging.basicConfig()
l.error("OK, this is better. But the default format kinda sucks, I prefer having timestamps and the file/line number")

M("\n Also exception logging is kinda lame, doesn't print traceback by default unless you remember to pass exc_info:")
l.exception(ex) # type: ignore[possibly-undefined] # pylint: disable=used-before-assignment

M("\n\n With makeLogger you get a reasonable logging format, colours (via colorlog library) and other neat things:")

ll = makeLogger('test') # No need for basicConfig!
ll.info("default level is INFO")
ll.debug("... so this shouldn't be displayed")
ll.warning("warnings are easy to spot!")

M("\n Exceptions print traceback by default now:")
ll.exception(ex)

M("\n You can (and should) use it via regular logging.getLogger after that, e.g. let's set logging level to DEBUG now")
logging.getLogger('test').setLevel(logging.DEBUG)
ll.debug("... now debug messages are also displayed")


DEFAULT_LEVEL = 'INFO'
FORMAT = '{start}[%(levelname)-7s %(asctime)s %(name)s %(filename)s:%(lineno)-4d]{end} %(message)s'
FORMAT_NOCOLOR = FORMAT.format(start='', end='')


Level = int
LevelIsh = Union[Level, str, None]


def mklevel(level: LevelIsh) -> Level:
if level is None:
return logging.NOTSET
if isinstance(level, int):
return level
return getattr(logging, level.upper())


def get_collapse_level() -> Level | None:
# TODO not sure if should be specific to logger name?
cl = os.environ.get('LOGGING_COLLAPSE', None)
if cl is not None:
return mklevel(cl)
# legacy name, maybe deprecate?
cl = os.environ.get('COLLAPSE_DEBUG_LOGS', None)
if cl is not None:
return logging.DEBUG
return None


def get_env_level(name: str) -> Level | None:
PREFIX = 'LOGGING_LEVEL_' # e.g. LOGGING_LEVEL_my_hypothesis=debug
# shell doesn't allow using dots in var names without escaping, so also support underscore syntax
lvl = os.environ.get(PREFIX + name, None) or os.environ.get(PREFIX + name.replace('.', '_'), None)
if lvl is not None:
return mklevel(lvl)
return None


def setup_logger(logger: str | logging.Logger, *, level: LevelIsh = None) -> None:
"""
Wrapper to simplify logging setup.
"""
if isinstance(logger, str):
logger = logging.getLogger(logger)

if level is None:
level = DEFAULT_LEVEL

# env level always takes precedence
env_level = get_env_level(logger.name)
if env_level is not None:
lvl = env_level
else:
lvl = mklevel(level)

if logger.level == logging.NOTSET:
# if it's already set, the user requested a different logging level, let's respect that
logger.setLevel(lvl)

logger.addFilter(AddExceptionTraceback())

ch = logging.StreamHandler()
collapse_level = get_collapse_level()
ch = logging.StreamHandler() if collapse_level is None else CollapseLogsHandler(maxlevel=collapse_level)

# default level for handler is NOTSET, which will make it process all messages
# we rely on the logger to actually accept/reject log msgs
logger.addHandler(ch)

# this attribute is set to True by default, which causes log entries to be passed to root logger (e.g. if you call basicConfig beforehand)
# even if log entry is handled by this logger ... not sure what's the point of this behaviour??
logger.propagate = False

try:
# try colorlog first, so user gets nice colored logs
import colorlog
except ModuleNotFoundError:
warnings.warn("You might want to 'pip install colorlog' for nice colored logs")
formatter = logging.Formatter(FORMAT_NOCOLOR)
else:
# log_color/reset are specific to colorlog
FORMAT_COLOR = FORMAT.format(start='%(log_color)s', end='%(reset)s')
fmt = FORMAT_COLOR if ch.stream.isatty() else FORMAT_NOCOLOR
# colorlog should detect tty in principle, but doesn't handle everything for some reason
# see https://github.com/borntyping/python-colorlog/issues/71
formatter = colorlog.ColoredFormatter(fmt)

ch.setFormatter(formatter)


# by default, logging.exception isn't logging traceback unless called inside of the exception handler
# which is a bit annoying since we have to pass exc_info explicitly
# also see https://stackoverflow.com/questions/75121925/why-doesnt-python-logging-exception-method-log-traceback-by-default
# todo also amend by post about defensive error handling?
class AddExceptionTraceback(logging.Filter):
def filter(self, record: logging.LogRecord) -> bool:
s = super().filter(record)
if s is False:
return False
if record.levelname == 'ERROR':
exc = record.msg
if isinstance(exc, BaseException):
if record.exc_info is None or record.exc_info == (None, None, None):
exc_info = (type(exc), exc, exc.__traceback__)
record.exc_info = exc_info
return s


# todo also save full log in a file?
class CollapseLogsHandler(logging.StreamHandler):
'''
Collapses subsequent debug log lines and redraws on the same line.
Hopefully this gives both a sense of progress and doesn't clutter the terminal as much?
'''

last: bool = False

maxlevel: Level = logging.DEBUG # everything with less or equal level will be collapsed

def __init__(self, *args, maxlevel: Level, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.maxlevel = maxlevel

def emit(self, record: logging.LogRecord) -> None:
try:
msg = self.format(record)
cur = record.levelno <= self.maxlevel and '\n' not in msg
if cur:
if self.last:
self.stream.write('\033[K' + '\r') # clear line + return carriage
else:
if self.last:
self.stream.write('\n') # clean up after the last line
self.last = cur
columns, _ = os.get_terminal_size(0)
# ugh. the columns thing is meh. dunno I guess ultimately need curses for that
# TODO also would be cool to have a terminal post-processor? kinda like tail but aware of logging keywords (INFO/DEBUG/etc)
self.stream.write(msg + ' ' * max(0, columns - len(msg)) + ('' if cur else '\n'))
self.flush()
except:
self.handleError(record)


@lru_cache(None) # cache so it's only initialized once
def makeLogger(name: str, *, level: LevelIsh = None) -> logging.Logger:
logger = logging.getLogger(name)
setup_logger(logger, level=level)
return logger


# ughh. hacky way to have a single enlighten instance per interpreter, so it can be shared between modules
# not sure about this. I guess this should definitely be behind some flag
# OK, when stdout is not a tty, enlighten doesn't log anything, good
def get_enlighten():
# TODO could add env variable to disable enlighten for a module?
from unittest.mock import Mock
# Mock to return stub so cients don't have to think about it

# for now hidden behind the flag since it's a little experimental
if os.environ.get('ENLIGHTEN_ENABLE', None) is None:
return Mock()

try:
import enlighten # type: ignore[import]
except ModuleNotFoundError:
warnings.warn("You might want to 'pip install enlighten' for a nice progress bar")

return Mock()

# dirty, but otherwise a bit unclear how to share enlighten manager between packages that call each other
instance = getattr(enlighten, 'INSTANCE', None)
if instance is not None:
return instance
instance = enlighten.get_manager()
setattr(enlighten, 'INSTANCE', instance)
return instance


if __name__ == '__main__':
test()


## legacy/deprecated methods for backwards compatilibity
LazyLogger = makeLogger
logger = makeLogger
##
2 changes: 1 addition & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ commands =

[testenv:mypy]
commands =
{envpython} -m pip install -e .[testing]
{envpython} -m pip install -e .[testing,optional]
{envpython} -m mypy --install-types --non-interactive \
-p {[testenv]package_name} \
# txt report is a bit more convenient to view on CI
Expand Down