Skip to content

Commit

Permalink
logger + log level updates (#1178)
Browse files Browse the repository at this point in the history
  • Loading branch information
jlewitt1 authored Aug 28, 2024
1 parent c2f7691 commit 8922b07
Show file tree
Hide file tree
Showing 20 changed files with 54 additions and 102 deletions.
1 change: 1 addition & 0 deletions .github/workflows/local_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ on:

env:
API_SERVER_URL: https://api.run.house
RH_LOG_LEVEL: INFO

jobs:
# TODO: THESE ARE ONLY SEPARATE JOBS BECAUSE THERE ARE
Expand Down
6 changes: 5 additions & 1 deletion docs/debugging-logging.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,16 @@ Alternatively, to see logs on your local machine while running a remote function
remote_fn = rh.function(fn)
fn(fn_args, stream_logs=True)
Log Levels
----------
You can set the log level to control the verbosity of the Runhouse logs. You can adjust the log level by
setting the environment variable ``RH_LOG_LEVEL`` to your desired level.

Debugging
~~~~~~~~~

For general debugging that doesn't occur within remote function calls, you can add
``import pdb; pdb.set_trace()`` whereever you want to set your debugging session.
``breakpoint()`` wherever you want to set your debugging session.
If the code is being run locally at the point of the debugger, you'll be able to access the session from your
local machine. If the code is being run remotely on a cluster, you will need to ssh into the cluster with
``ssh cluster-name``, and then run ``screen -r`` inside the cluster. From there, you will see the RPC logs
Expand Down
2 changes: 1 addition & 1 deletion runhouse/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
DEFAULT_HTTPS_PORT = 443
DEFAULT_HTTP_PORT = 80
DEFAULT_SSH_PORT = 22
DEFAULT_LOG_LEVEL = "INFO"

DEFAULT_RAY_PORT = 6379

Expand Down Expand Up @@ -61,6 +60,7 @@
]

TEST_ORG = "test-org"
TESTING_LOG_LEVEL = "INFO"

EMPTY_DEFAULT_ENV_NAME = "_cluster_default_env"
DEFAULT_DOCKER_CONTAINER_NAME = "sky_container"
Expand Down
18 changes: 4 additions & 14 deletions runhouse/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,9 @@ def get_logger(name: str = __name__):

level = os.getenv("RH_LOG_LEVEL")
if level:
# Set the logging level
logger.setLevel(level.upper())

# Apply a custom formatter
formatter = logging.Formatter(
fmt="%(levelname)s | %(asctime)s | %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
)

# Apply the formatter to each handler
for handler in logger.handlers:
handler.setFormatter(formatter)

# Prevent the logger from propagating to the root logger
logger.propagate = False
try:
logger.setLevel(getattr(logging, level.upper()))
except AttributeError as e:
raise e

return logger
18 changes: 1 addition & 17 deletions runhouse/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
from runhouse import __version__, cluster, Cluster, configs
from runhouse.constants import (
BULLET_UNICODE,
DEFAULT_LOG_LEVEL,
DOUBLE_SPACE_UNICODE,
RAY_KILL_CMD,
RAY_START_CMD,
Expand Down Expand Up @@ -530,7 +529,6 @@ def _start_server(
default_env_name=None,
conda_env=None,
from_python=None,
log_level=None,
):
############################################
# Build CLI commands to start the server
Expand Down Expand Up @@ -620,12 +618,6 @@ def _start_server(

flags.append(" --from-python" if from_python else "")

flags.append(
f" --log-level {log_level}"
if log_level
else f" --log-level {DEFAULT_LOG_LEVEL}"
)

# Check if screen or nohup are available
screen = screen and _check_if_command_exists("screen")
nohup = not screen and nohup and _check_if_command_exists("nohup")
Expand Down Expand Up @@ -825,18 +817,11 @@ def restart(
False,
help="Whether HTTP server started from inside a Python call rather than CLI.",
),
log_level: str = typer.Option(
default=DEFAULT_LOG_LEVEL,
help="Minimum log level for logs to be printed",
callback=lambda value: value.upper(),
),
):
"""Restart the HTTP server on the cluster."""
if name:
c = cluster(name=name)
c.restart_server(
resync_rh=resync_rh, restart_ray=restart_ray, logs_level=log_level
)
c.restart_server(resync_rh=resync_rh, restart_ray=restart_ray)
return

_start_server(
Expand All @@ -859,7 +844,6 @@ def restart(
default_env_name=default_env_name,
conda_env=conda_env,
from_python=from_python,
log_level=log_level,
)


Expand Down
5 changes: 4 additions & 1 deletion runhouse/resources/envs/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ def _set_env_vars(env_vars):
for k, v in env_vars.items():
os.environ[k] = v

def add_env_var(self, key, value):
self.env_vars.update({key: value})

def config(self, condensed=True):
config = super().config(condensed)
self.save_attrs_to_config(
Expand Down Expand Up @@ -188,7 +191,7 @@ def _full_command(self, command: str):
def _run_command(self, command: str, **kwargs):
"""Run command locally inside the environment"""
command = self._full_command(command)
logging.info(f"Running command in {self.name}: {command}")
logger.info(f"Running command in {self.name}: {command}")
return run_with_logs(command, **kwargs)

def to(
Expand Down
4 changes: 2 additions & 2 deletions runhouse/resources/functions/function.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,14 +279,14 @@ def _handle_nb_fn(fn, fn_pointers, serialize_notebook_fn, name):
return "", "notebook", fn
else:
module_path = Path.cwd() / (f"{name}_fn.py" if name else "sent_fn.py")
logging.info(
logger.info(
f"Because this function is defined in a notebook, writing it out to {str(module_path)} "
f"to make it importable. Please make sure the function does not rely on any local variables, "
f"including imports (which should be moved inside the function body). "
f"This restriction does not apply to functions defined in normal Python files."
)
if not name:
logging.warning(
logger.warning(
"You should name Functions that are created in notebooks to avoid naming collisions "
"between the modules that are created to hold their functions "
'(i.e. "sent_fn.py" errors.'
Expand Down
2 changes: 1 addition & 1 deletion runhouse/resources/future_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def remote_next(self):
raise RunhouseStopIteration()

def __next__(self):
return self.remote_next(run_name=self.name)
return self.remote_next()

def send(self, __value):
return self._future.send(__value)
Expand Down
19 changes: 8 additions & 11 deletions runhouse/resources/hardware/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import copy
import importlib
import json
import logging
import os
import re
import subprocess
import threading
Expand Down Expand Up @@ -34,7 +34,6 @@
CLUSTER_CONFIG_PATH,
DEFAULT_HTTP_PORT,
DEFAULT_HTTPS_PORT,
DEFAULT_LOG_LEVEL,
DEFAULT_RAY_PORT,
DEFAULT_SERVER_PORT,
DEFAULT_STATUS_CHECK_INTERVAL,
Expand Down Expand Up @@ -467,7 +466,13 @@ def _sync_default_env_to_cluster(self):
if not self._default_env:
return

logging.info(f"Syncing default env {self._default_env.name} to cluster")
log_level = os.getenv("RH_LOG_LEVEL")
if log_level:
# add log level to the default env to ensure it gets set on the cluster when the server is restarted
self._default_env.add_env_var("RH_LOG_LEVEL", log_level)
logger.info(f"Using log level {log_level} on cluster's default env")

logger.info(f"Syncing default env {self._default_env.name} to cluster")
self._default_env.install(cluster=self)

def _sync_runhouse_to_cluster(
Expand Down Expand Up @@ -837,7 +842,6 @@ def restart_server(
resync_rh: Optional[bool] = None,
restart_ray: bool = True,
restart_proxy: bool = False,
logs_level: str = None,
):
"""Restart the RPC server.
Expand Down Expand Up @@ -933,7 +937,6 @@ def restart_server(

# Save a limited version of the local ~/.rh config to the cluster with the user's hashed token,
# if such does not exist on the cluster

if rns_client.token:
user_config = yaml.safe_dump(
{
Expand All @@ -957,11 +960,6 @@ def restart_server(
self._run_cli_commands_on_cluster_helper([command])
logger.debug("Saved user config to cluster")

allowed_log_levels = logging._nameToLevel.keys()
if not logs_level or logs_level not in allowed_log_levels:
logs_level = DEFAULT_LOG_LEVEL
logger.info(f"Setting cluster log level to: {logs_level}")

restart_cmd = (
CLI_RESTART_CMD
+ (" --restart-ray" if restart_ray else "")
Expand All @@ -980,7 +978,6 @@ def restart_server(
else ""
)
+ " --from-python"
+ f" --log-level {logs_level}"
)

status_codes = self._run_cli_commands_on_cluster_helper(commands=[restart_cmd])
Expand Down
2 changes: 1 addition & 1 deletion runhouse/resources/hardware/sky_ssh_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ def run(
executable = "/bin/bash"

# RH MODIFIED: Return command instead of running it
logging.debug(f"Running command: {' '.join(command)}")
logger.debug(f"Running command: {' '.join(command)}")
if return_cmd:
return " ".join(command)

Expand Down
3 changes: 1 addition & 2 deletions runhouse/rns/defaults.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import contextvars
import copy
import json
import logging
import os
from collections import defaultdict
from pathlib import Path
Expand Down Expand Up @@ -135,7 +134,7 @@ def load_defaults_from_file(self, config_path: Optional[str] = None) -> Dict:
if Path(config_path).exists():
with open(config_path, "r") as stream:
config = yaml.safe_load(stream)
logging.info(f"Loaded Runhouse config from {config_path}")
logger.info(f"Loaded Runhouse config from {config_path}")

return config or {}

Expand Down
10 changes: 3 additions & 7 deletions runhouse/servers/cluster_servlet.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
import runhouse

from runhouse.constants import (
DEFAULT_LOG_LEVEL,
DEFAULT_STATUS_CHECK_INTERVAL,
INCREASED_STATUS_CHECK_INTERVAL,
SERVER_LOGFILE,
Expand Down Expand Up @@ -53,9 +52,6 @@ async def __init__(
self._auth_cache: AuthCache = AuthCache(cluster_config)
self.autostop_helper = None

logger.setLevel(kwargs.get("logs_level", DEFAULT_LOG_LEVEL))
self.logger = logger

if cluster_config.get("resource_subtype", None) == "OnDemandCluster":
self.autostop_helper = AutostopHelper()

Expand Down Expand Up @@ -316,10 +312,10 @@ async def aperiodic_cluster_checks(self):
)

except Exception:
self.logger.error(
logger.error(
"Cluster checks have failed.\n"
"Please check cluster logs for more info.\n"
"Temporarily increasing the interval between status checks"
"Temporarily increasing the interval between status checks."
)
await asyncio.sleep(INCREASED_STATUS_CHECK_INTERVAL)
finally:
Expand Down Expand Up @@ -436,7 +432,7 @@ async def astatus(self):
# Nothing if there was an exception
if "Exception" in env_status.keys():
e = env_status.get("Exception")
self.logger.warning(
logger.warning(
f"Exception {str(e)} in status for env servlet {env_servlet_name}"
)
env_servlet_utilization_data[env_servlet_name] = {}
Expand Down
6 changes: 0 additions & 6 deletions runhouse/servers/env_servlet.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from functools import wraps
from typing import Any, Dict, Optional

from runhouse.constants import DEFAULT_LOG_LEVEL
from runhouse.globals import obj_store
from runhouse.logger import get_logger

Expand Down Expand Up @@ -70,15 +69,10 @@ class EnvServlet:
async def __init__(self, env_name: str, *args, **kwargs):
self.env_name = env_name

logs_level = kwargs.get("logs_level", DEFAULT_LOG_LEVEL)
logger.setLevel(logs_level)
# self.logger = logger

await obj_store.ainitialize(
self.env_name,
has_local_storage=True,
setup_cluster_servlet=ClusterServletSetupOption.GET_OR_FAIL,
logs_level=logs_level,
)

# Ray defaults to setting OMP_NUM_THREADS to 1, which unexpectedly limit parallelism in user programs.
Expand Down
5 changes: 2 additions & 3 deletions runhouse/servers/http/http_client.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import json
import logging
import time
import warnings
from functools import wraps
Expand Down Expand Up @@ -490,7 +489,7 @@ def call_module_method(
else:
log_str = f"Time to get {key}: {round(end - start, 2)} seconds"

logging.info(log_str)
logger.info(log_str)
return result

async def acall(
Expand Down Expand Up @@ -591,7 +590,7 @@ async def acall_module_method(
)
else:
log_str = f"Time to get {key}: {round(end - start, 2)} seconds"
logging.info(log_str)
logger.info(log_str)
return result

def put_object(self, key: str, value: Any, env=None):
Expand Down
Loading

0 comments on commit 8922b07

Please sign in to comment.