Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove usage_lib.LibUsageRecorder #32806

Merged
merged 1 commit into from
Feb 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 0 additions & 72 deletions python/ray/_private/usage/usage_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,10 @@
To see collected/reported data, see `usage_stats.json` inside a temp
folder (e.g., /tmp/ray/session_[id]/*).
"""
import glob
import json
import logging
import threading
import os
import re
import sys
import time
import uuid
Expand Down Expand Up @@ -175,52 +173,6 @@ class UsageStatsEnabledness(Enum):
_recorded_extra_usage_tags_lock = threading.Lock()


# NOTE: Do not change the write / read protocol. That will cause
# version incompatibility issues.
class LibUsageRecorder:
"""A class to put/get the library usage to the ray tmp folder.
See https://github.com/ray-project/ray/pull/25842 for more details.
"""

def __init__(self, temp_dir_path: str):
self._lib_usage_dir = Path(temp_dir_path)
self._lib_usage_prefix = "_ray_lib_usage-"
self._lib_usage_filename_match = re.compile(
f"{self._lib_usage_prefix}([0-9a-zA-Z_.]+).txt"
)

def put_lib_usage(self, lib_name: str):
"""Put the library usage to the ray tmp folder."""
lib_usage_file = self._lib_usage_dir / self._lib_usage_filename(lib_name)
lib_usage_file.touch(exist_ok=True)

def read_lib_usages(self) -> List[str]:
"""Read a list of library usages from the ray tmp folder."""
# For checking if the file exists, it is okay to have a minor chance of
# having race condition.
lib_usages = []
file_paths = glob.glob(f"{self._lib_usage_dir}/{self._lib_usage_prefix}*")
for file_path in file_paths:
file_path = Path(file_path)
if file_path.exists():
lib_usages.append(self._get_lib_usage_from_filename(file_path.name))
return lib_usages

def delete_lib_usages(self):
"""Delete all usage files. Test only"""
file_paths = glob.glob(f"{self._lib_usage_dir}/{self._lib_usage_prefix}*")
for file_path in file_paths:
file_path = Path(file_path)
file_path.unlink()

def _lib_usage_filename(self, lib_name: str) -> str:
return f"{self._lib_usage_prefix}{lib_name}.txt"

def _get_lib_usage_from_filename(self, filename: str) -> str:
return self._lib_usage_filename_match.match(filename).group(1)


def _put_library_usage(library_usage: str):
assert _internal_kv_initialized()
try:
Expand All @@ -232,16 +184,6 @@ def _put_library_usage(library_usage: str):
except Exception as e:
logger.debug(f"Failed to put library usage, {e}")

# Record the library usage to the temp (e.g., /tmp/ray) folder.
# Note that although we always write this file, it is not
# reported when the usage stats is disabled.
if ray._private.worker.global_worker.mode == ray.SCRIPT_MODE:
try:
lib_usage_recorder = LibUsageRecorder(ray._private.utils.get_ray_temp_dir())
lib_usage_recorder.put_lib_usage(library_usage)
except Exception as e:
logger.debug(f"Failed to write a library usage to the home folder, {e}")


def record_extra_usage_tag(key: TagKey, value: str):
"""Record extra kv usage tag.
Expand Down Expand Up @@ -279,11 +221,6 @@ def _put_extra_usage_tag(key: str, value: str):

def record_library_usage(library_usage: str):
"""Record library usage (e.g. which library is used)"""
if "-" in library_usage:
# - is not permitted since it should be used as a separator
# of the lib usage file name. See LibUsageRecorder.
raise ValueError("The library name contains a char - which is not permitted.")

with _recorded_library_usages_lock:
if library_usage in _recorded_library_usages:
return
Expand Down Expand Up @@ -568,15 +505,6 @@ def get_library_usages_to_report(gcs_client) -> List[str]:
library_usage = library_usage.decode("utf-8")
result.append(library_usage[len(usage_constant.LIBRARY_USAGE_PREFIX) :])

try:
historical_lib_usages = LibUsageRecorder(
ray._private.utils.get_ray_temp_dir()
).read_lib_usages()
for library_usage in historical_lib_usages:
if library_usage not in result:
result.append(library_usage)
except Exception as e:
logger.info(f"Failed to read historical library usage {e}")
return result
except Exception as e:
logger.info(f"Failed to get library usages to report {e}")
Expand Down
107 changes: 0 additions & 107 deletions python/ray/tests/test_usage_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,10 +106,6 @@ def gcs_storage_type():
@pytest.fixture
def reset_usage_stats():
yield
# Remove the lib usage so that it will be reset for each test.
ray_usage_lib.LibUsageRecorder(
ray._private.utils.get_ray_temp_dir()
).delete_lib_usages()
ray.experimental.internal_kv._internal_kv_reset()
ray_usage_lib._recorded_library_usages.clear()
ray_usage_lib._recorded_extra_usage_tags.clear()
Expand Down Expand Up @@ -455,37 +451,6 @@ def test_set_usage_stats_enabled_via_config(monkeypatch, tmp_path, reset_usage_s
ray_usage_lib.set_usage_stats_enabled_via_config(True)


def test_lib_usage_recorder(tmp_path):
recorder = ray_usage_lib.LibUsageRecorder(tmp_path)
lib_tune = "tune"
lib_rllib = "rllib"

filename = recorder._lib_usage_filename(lib_tune)
assert recorder._get_lib_usage_from_filename(filename) == lib_tune

# Write tune.
assert recorder.read_lib_usages() == []
recorder.put_lib_usage(lib_tune)
assert recorder.read_lib_usages() == [lib_tune]
recorder.put_lib_usage(lib_tune)
assert recorder.read_lib_usages() == [lib_tune]

# Test write is idempotent
for _ in range(5):
recorder.put_lib_usage(lib_tune)
assert recorder.read_lib_usages() == [lib_tune]

# Write rllib.
recorder.put_lib_usage(lib_rllib)
assert set(recorder.read_lib_usages()) == {lib_tune, lib_rllib}

# Test idempotency when there is more than 1 lib.
recorder.put_lib_usage(lib_rllib)
recorder.put_lib_usage(lib_rllib)
recorder.put_lib_usage(lib_tune)
assert set(recorder.read_lib_usages()) == {lib_tune, lib_rllib}


@pytest.fixture
def clear_loggers():
"""Remove handlers from all loggers"""
Expand Down Expand Up @@ -795,10 +760,6 @@ def get_actor_metadata(self):
library_usages = ray_usage_lib.get_library_usages_to_report(
ray.experimental.internal_kv.internal_kv_get_gcs_client()
)
tmp_path = ray._private.utils.get_ray_temp_dir()
lib_usages_from_home_folder = ray_usage_lib.LibUsageRecorder(
tmp_path
).read_lib_usages()
expected = {
"pre_init",
"post_init",
Expand All @@ -816,8 +777,6 @@ def get_actor_metadata(self):
if ray_client:
expected.add("client")
assert set(library_usages) == expected
if not ray_client:
assert set(lib_usages_from_home_folder) == expected


def test_usage_lib_cluster_metadata_generation_usage_disabled(
Expand Down Expand Up @@ -1484,72 +1443,6 @@ def verify():
wait_for_condition(verify)


@pytest.mark.skipif(
os.environ.get("RAY_MINIMAL") == "1",
reason="Test depends on library that's not downloaded from a minimal install.",
)
def test_lib_usage_record_from_init_session(
monkeypatch, ray_start_cluster, reset_usage_stats
):
"""
Make sure we store a lib usage to the /tmp/ray folder and report them
when any instance that has usage stats enabled.
"""

# Start a driver without usage stats enabled. This will record
# lib_usage.txt.
script = """
import ray
import os
from ray import train # noqa: F401
from ray import tune # noqa: F401
from ray.rllib.algorithms.ppo import PPO # noqa: F401
# Start a instance that disables usage stats.
ray.init()
def objective(*args):
pass
tune.run(objective)
"""

run_string_as_driver(script)

# Run the cluster that reports the usage stats. Make sure the lib usage is reported.
with monkeypatch.context() as m:
m.setenv("RAY_USAGE_STATS_ENABLED", "1")
m.setenv("RAY_USAGE_STATS_REPORT_URL", "http://127.0.0.1:8000/usage")
m.setenv("RAY_USAGE_STATS_REPORT_INTERVAL_S", "1")
cluster = ray_start_cluster
cluster.add_node(num_cpus=3)
ray.init(address=cluster.address)

"""
Verify the library usage is recorded to the ray folder.
"""
lib_usages = ray_usage_lib.LibUsageRecorder(
ray._private.utils.get_ray_temp_dir()
).read_lib_usages()
assert set(lib_usages) == {"train", "rllib", "tune"}

"""
Verify the library usage is reported from the current instance.
"""
print("Verifying lib usage report.")
global_node = ray.worker._global_node
temp_dir = pathlib.Path(global_node.get_session_dir_path())

wait_for_condition(lambda: file_exists(temp_dir), timeout=30)

def verify():
lib_usages = read_file(temp_dir, "usage_stats")["library_usages"]
print(lib_usages)
return set(lib_usages) == {"rllib", "train", "tune"}

wait_for_condition(verify)


def test_usage_stats_tags(
monkeypatch, ray_start_cluster, reset_usage_stats, gcs_storage_type
):
Expand Down