Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove telemetry APIs + otel requirements #986

Merged
merged 1 commit into from
Jul 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ Example output from ``sky status``:
Authenticate with Runhouse Den
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

To enable sharing features and telemetry through Runhouse Den, you can log in to your
To enable sharing features through Runhouse Den, you can log in to your
Runhouse Den account. Start by creating one via our `signup page </signup>`.
You'll have the option to authenticate with your Google or Github account.

Expand Down
6 changes: 0 additions & 6 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,5 @@
chardet==4.0.0
myst-parser==2.0.0
opentelemetry-api==1.20.0
opentelemetry-exporter-otlp-proto-http==1.20.0
opentelemetry-instrumentation==0.41b0
opentelemetry-instrumentation-fastapi==0.41b0
opentelemetry-instrumentation-requests==0.41b0
opentelemetry-sdk==1.20.0
pint==0.20.1
pyarrow==9.0.0
pydata-sphinx-theme==0.13.3
Expand Down
2 changes: 0 additions & 2 deletions docs/tutorials/quick-start-local.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ and details of the server. For printing cluster's status outside the cluser, its
• server connection type: ssh
• backend config:
• resource subtype: OnDemandCluster
• use local telemetry: False
• domain: None
• server host: 0.0.0.0
• ips: ['52.207.212.159']
Expand Down Expand Up @@ -77,7 +76,6 @@ and details of the server. For printing cluster's status outside the cluser, its
• server connection type: ssh
• backend config:
• resource subtype: OnDemandCluster
• use local telemetry: False
• domain: None
• server host: 0.0.0.0
• ips: ['35.171.157.49']
Expand Down
6 changes: 0 additions & 6 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,6 @@
python-dotenv
fastapi
fsspec<=2023.5.0
opentelemetry-api
opentelemetry-exporter-otlp-proto-http
opentelemetry-instrumentation
opentelemetry-instrumentation-fastapi
opentelemetry-instrumentation-requests
opentelemetry-sdk
pexpect
pyopenssl>=23.3.0
ray[default] >= 2.2.0, != 2.6.0
Expand Down
16 changes: 0 additions & 16 deletions runhouse/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,6 @@ def _print_cluster_config(cluster_config: Dict):

backend_config = [
"resource_subtype",
"use_local_telemetry",
"domain",
"server_host",
"ips",
Expand Down Expand Up @@ -506,7 +505,6 @@ def _start_server(
use_caddy=False,
domain=None,
certs_address=None,
use_local_telemetry=False,
api_server_url=None,
default_env_name=None,
conda_env=None,
Expand Down Expand Up @@ -580,11 +578,6 @@ def _start_server(
logger.info(f"Server public IP address: {certs_address}.")
flags.append(address_flag)

use_local_telemetry_flag = " --use-local-telemetry" if use_local_telemetry else ""
if use_local_telemetry_flag:
logger.info("Configuring local telemetry on the cluster.")
flags.append(use_local_telemetry_flag)

api_server_url_flag = (
f" --api-server-url {api_server_url}" if api_server_url else ""
)
Expand Down Expand Up @@ -703,9 +696,6 @@ def start(
None,
help="Public IP address of the server. Required for generating self-signed certs and enabling HTTPS",
),
use_local_telemetry: bool = typer.Option(
False, help="Whether to use local telemetry"
),
default_env_name: str = typer.Option(
None, help="Default env to start the server on."
),
Expand All @@ -727,7 +717,6 @@ def start(
use_caddy=use_caddy,
domain=domain,
certs_address=certs_address,
use_local_telemetry=use_local_telemetry,
default_env_name=default_env_name,
conda_env=conda_env,
)
Expand Down Expand Up @@ -782,10 +771,6 @@ def restart(
None,
help="Public IP address of the server. Required for generating self-signed certs and enabling HTTPS",
),
use_local_telemetry: bool = typer.Option(
False,
help="Whether to use local telemetry",
),
api_server_url: str = typer.Option(
default="https://api.run.house",
help="URL of Runhouse Den",
Expand Down Expand Up @@ -830,7 +815,6 @@ def restart(
use_caddy=use_caddy,
domain=domain,
certs_address=certs_address,
use_local_telemetry=use_local_telemetry,
api_server_url=api_server_url,
default_env_name=default_env_name,
conda_env=conda_env,
Expand Down
5 changes: 0 additions & 5 deletions runhouse/resources/hardware/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@ def __init__(
ssl_certfile: str = None,
domain: str = None,
den_auth: bool = False,
use_local_telemetry: bool = False,
dryrun=False,
**kwargs, # We have this here to ignore extra arguments when calling from from_config
):
Expand Down Expand Up @@ -104,7 +103,6 @@ def __init__(
self.ssh_port = ssh_port or self.DEFAULT_SSH_PORT
self.server_host = server_host
self.domain = domain
self.use_local_telemetry = use_local_telemetry

self._default_env = _get_env_from(default_env)
if self._default_env and not self._default_env.name:
Expand Down Expand Up @@ -249,7 +247,6 @@ def config(self, condensed=True):
"server_connection_type",
"domain",
"den_auth",
"use_local_telemetry",
"ssh_port",
"client_port",
],
Expand Down Expand Up @@ -818,7 +815,6 @@ def restart_server(
https_flag = self._use_https
caddy_flag = self._use_caddy
domain = self.domain
use_local_telemetry = self.use_local_telemetry

cluster_key_path = None
cluster_cert_path = None
Expand Down Expand Up @@ -895,7 +891,6 @@ def restart_server(
+ (f" --ssl-certfile {cluster_cert_path}" if self._use_custom_certs else "")
+ (f" --ssl-keyfile {cluster_key_path}" if self._use_custom_certs else "")
+ (f" --domain {domain}" if domain else "")
+ (" --use-local-telemetry" if use_local_telemetry else "")
+ f" --port {self.server_port}"
+ f" --api-server-url {rns_client.api_server_url}"
+ f" --default-env-name {self.default_env.name}"
Expand Down
2 changes: 0 additions & 2 deletions runhouse/rns/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,9 @@ class Defaults:
"use_spot": False,
"use_local_configs": True,
"disable_data_collection": False,
"use_local_telemetry": False,
"use_rns": False,
"api_server_url": "https://api.run.house",
"dashboard_url": "https://run.house",
"telemetry_collector_address": "https://api.run.house:14318",
}

def __init__(self):
Expand Down
147 changes: 0 additions & 147 deletions runhouse/servers/http/http_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,6 @@ async def ainitialize(
cls,
default_env_name=None,
conda_env=None,
enable_local_span_collection=None,
from_test: bool = False,
*args,
**kwargs,
Expand All @@ -125,59 +124,6 @@ async def ainitialize(
logger.info(f"setting logs level to {log_level}")
runtime_env = {"conda": conda_env} if conda_env else None

# If enable_local_span_collection flag is passed, setup the span exporter and related functionality
if enable_local_span_collection:
from opentelemetry import trace
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
from opentelemetry.instrumentation.requests import RequestsInstrumentor
from opentelemetry.sdk.resources import Resource
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
from opentelemetry.sdk.trace.export.in_memory_span_exporter import (
InMemorySpanExporter,
)

trace.set_tracer_provider(
TracerProvider(
resource=Resource.create(
{"service.name": "runhouse-in-memory-service"}
)
)
)
global memory_exporter
memory_exporter = InMemorySpanExporter()
trace.get_tracer_provider().add_span_processor(
SimpleSpanProcessor(memory_exporter)
)
# Instrument the app object
FastAPIInstrumentor.instrument_app(app)

# Instrument the requests library
RequestsInstrumentor().instrument()

@app.get("/spans")
@validate_cluster_access
def get_spans(request: Request):
return {
"spans": [
span.to_json() for span in memory_exporter.get_finished_spans()
]
}

@app.middleware("http")
async def _add_username_to_span(request: Request, call_next):
from opentelemetry import trace

span = trace.get_current_span()

token = get_token_from_request(request)
username = await obj_store.aget_username(token)
if username:
# Set the username as a span attribute
span.set_attribute("username", username)

return await call_next(request)

default_env_name = default_env_name or EMPTY_DEFAULT_ENV_NAME

# Ray and ClusterServlet should already be
Expand All @@ -199,13 +145,6 @@ async def _add_username_to_span(request: Request, call_next):
# except Exception as e:
# logger.error(f"Failed to collect cluster stats: {str(e)}")

if enable_local_span_collection:
try:
# Collect telemetry stats for the cluster
HTTPServer._collect_telemetry_stats()
except Exception as e:
logger.error(f"Failed to collect cluster telemetry stats: {str(e)}")

# We initialize a default env servlet where some things may run.
_ = obj_store.get_env_servlet(
env_name=default_env_name,
Expand All @@ -228,15 +167,13 @@ def initialize(
cls,
default_env_name=None,
conda_env=None,
enable_local_span_collection=None,
from_test: bool = False,
*args,
**kwargs,
):
return sync_function(cls.ainitialize)(
default_env_name,
conda_env,
enable_local_span_collection,
from_test,
*args,
**kwargs,
Expand Down Expand Up @@ -742,64 +679,6 @@ def _collect_cluster_stats():
labels={"username": configs.username, "environment": "prod"},
)

@staticmethod
def _collect_telemetry_stats():
"""Collect telemetry stats and send them to the Runhouse hosted OpenTelemetry collector"""
from opentelemetry import trace
from opentelemetry.exporter.otlp.proto.http.trace_exporter import (
OTLPSpanExporter,
)
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
from opentelemetry.instrumentation.requests import RequestsInstrumentor
from opentelemetry.sdk.resources import Resource
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor

telemetry_collector_address = configs.get("telemetry_collector_address")

logger.info(f"Preparing to send telemetry to {telemetry_collector_address}")

# Set the tracer provider and the exporter
import runhouse

service_version = runhouse.__version__
if telemetry_collector_address == "https://api-dev.run.house:14318":
service_name = "runhouse-service-dev"
deployment_env = "dev"
else:
service_name = "runhouse-service-prod"
deployment_env = "prod"
trace.set_tracer_provider(
TracerProvider(
resource=Resource.create(
{
"service.namespace": "Runhouse_OSS",
"service.name": service_name,
"service.version": service_version,
"deployment.environment": deployment_env,
}
)
)
)
otlp_exporter = OTLPSpanExporter(
endpoint=telemetry_collector_address + "/v1/traces",
)

# Add the exporter to the tracer provider
trace.get_tracer_provider().add_span_processor(
BatchSpanProcessor(otlp_exporter)
)

logger.info(
f"Successfully added telemetry exporter {telemetry_collector_address}"
)

# Instrument the app object
FastAPIInstrumentor.instrument_app(app)

# Instrument the requests library
RequestsInstrumentor().instrument()

@staticmethod
def _cluster_status_report():
import ray._private.usage.usage_lib as ray_usage_lib
Expand Down Expand Up @@ -881,12 +760,6 @@ async def main():
parser.add_argument(
"--conda-env", type=str, default=None, help="Conda env to run server in"
)
parser.add_argument(
"--use-local-telemetry",
action="store_true", # if providing --use-local-telemetry will be set to True
default=argparse.SUPPRESS, # If user didn't specify, attribute will not be present (not False)
help="Enable local telemetry",
)
parser.add_argument(
"--use-https",
action="store_true", # if providing --use-https will be set to True
Expand Down Expand Up @@ -1036,23 +909,6 @@ async def main():
if den_auth:
await obj_store.aenable_den_auth()

# Telemetry enabled
if hasattr(
parse_args, "use_local_telemetry"
) and parse_args.use_local_telemetry != cluster_config.get(
"use_local_telemetry", False
):
logger.warning(
f"CLI provided use_local_telemetry: {parse_args.use_local_telemetry} is different from the "
f"use_local_telemetry specified in cluster_config.json: "
f"{cluster_config.get('use_local_telemetry')}. Prioritizing CLI provided use_local_telemetry."
)

use_local_telemetry = getattr(
parse_args, "use_local_telemetry", False
) or cluster_config.get("use_local_telemetry", False)
cluster_config["use_local_telemetry"] = use_local_telemetry

domain = parse_args.domain or cluster_config.get("domain", None)
cluster_config["domain"] = domain

Expand Down Expand Up @@ -1170,8 +1026,6 @@ async def main():
await HTTPServer.ainitialize(
default_env_name=default_env_name,
conda_env=conda_name,
enable_local_span_collection=use_local_telemetry
or configs.data_collection_enabled(),
logs_level=parse_args.log_level,
)

Expand Down Expand Up @@ -1231,7 +1085,6 @@ async def main():

logger.info(
f"Launching Runhouse API server with den_auth={den_auth} and "
+ f"use_local_telemetry={use_local_telemetry} "
+ f"on host={host} and use_https={use_https} and port_arg={daemon_port}"
)

Expand Down
1 change: 0 additions & 1 deletion runhouse/servers/obj_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -1103,7 +1103,6 @@ async def acall_local(

log_ctx = None
if stream_logs:
# When we start collecting logs for telemetry, we'll enter here too
log_ctx = run(
name=run_name,
log_dest="file" if run_name else None,
Expand Down
Loading
Loading