Skip to content

Commit

Permalink
Split off cluster tests into their own test run.
Browse files Browse the repository at this point in the history
  • Loading branch information
rohinb2 committed Jun 24, 2024
1 parent a3bf388 commit 168d084
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 2 deletions.
29 changes: 27 additions & 2 deletions .github/workflows/local_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,13 @@ jobs:
token: ${{ secrets.CI_ACCOUNT_TOKEN }}
api_server_url: ${{ env.API_SERVER_URL }}

- name: pytest -v --level local -k "not servertest and not secrettest and not moduletest and not functiontest and not envtest"
- name: pytest -v --level local -k "not servertest and not secrettest and not moduletest and not functiontest and not envtest and not clustertest"
env:
TEST_TOKEN: ${{ secrets.TEST_TOKEN }}
TEST_USERNAME: ${{ secrets.TEST_USERNAME }}
ORG_MEMBER_TOKEN: ${{ secrets.ORG_MEMBER_TOKEN }}
ORG_MEMBER_USERNAME: ${{ secrets.ORG_MEMBER_USERNAME }}
run: pytest -v --level local -k "not servertest and not secrettest and not moduletest and not functiontest and not envtest"
run: pytest -v --level local -k "not servertest and not secrettest and not moduletest and not functiontest and not envtest and not clustertest"
timeout-minutes: 60

# secret-tests-logged-out-level-local:
Expand Down Expand Up @@ -204,3 +204,28 @@ jobs:
ORG_MEMBER_USERNAME: ${{ secrets.ORG_MEMBER_USERNAME }}
run: pytest -v --level local -k "envtest"
timeout-minutes: 60

cluster-tests-logged-in-level-local:
runs-on: ubuntu-latest
steps:
- name: Check out repository code
uses: actions/checkout@v3

- name: Setup Runhouse
uses: ./.github/workflows/setup_runhouse

- name: Setup ~/.rh/config.yaml
uses: ./.github/workflows/setup_rh_config
with:
username: ${{ secrets.CI_ACCOUNT_USERNAME }}
token: ${{ secrets.CI_ACCOUNT_TOKEN }}
api_server_url: ${{ env.API_SERVER_URL }}

- name: pytest -v --level local -k "clustertest"
env:
TEST_TOKEN: ${{ secrets.TEST_TOKEN }}
TEST_USERNAME: ${{ secrets.TEST_USERNAME }}
ORG_MEMBER_TOKEN: ${{ secrets.ORG_MEMBER_TOKEN }}
ORG_MEMBER_USERNAME: ${{ secrets.ORG_MEMBER_USERNAME }}
run: pytest -v --level local -k "clustertest"
timeout-minutes: 60
1 change: 1 addition & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@ markers =
secrettest: all tests in tests/test_resources/test_secrets/, for filtering out
moduletest: all tests in TestModule, for filtering out
functiontest: all tests in TestFunction, for filtering out
clustertest: all tests in TestCluster, for filtering out
envtest: all tests in TestEnv, for filtering out
level: mark tests with a given level that will be used when selecting tests to run
26 changes: 26 additions & 0 deletions tests/test_resources/test_clusters/test_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ class TestCluster(tests.test_resources.test_resource.TestResource):
GPU_CLUSTER_NAMES = ["rh-v100", "rh-k80", "rh-a10x", "rh-gpu-multinode"]

@pytest.mark.level("unit")
@pytest.mark.clustertest
def test_cluster_factory_and_properties(self, cluster):
assert isinstance(cluster, rh.Cluster)
args = init_args[id(cluster)]
Expand Down Expand Up @@ -154,6 +155,7 @@ def test_cluster_factory_and_properties(self, cluster):
assert cluster.cert_config.cert_path == args["ssl_certfile"]

@pytest.mark.level("local")
@pytest.mark.clustertest
def test_docker_cluster_fixture_is_logged_out(self, docker_cluster_pk_ssh_no_auth):
save_resource_and_return_config_cluster = rh.function(
save_resource_and_return_config,
Expand All @@ -169,6 +171,7 @@ def test_docker_cluster_fixture_is_logged_out(self, docker_cluster_pk_ssh_no_aut
)

@pytest.mark.level("local")
@pytest.mark.clustertest
def test_cluster_recreate(self, cluster):
# Create underlying ssh connection if not already
cluster.run(["echo hello"])
Expand All @@ -182,6 +185,7 @@ def test_cluster_recreate(self, cluster):
assert len(rh.globals.sky_ssh_runner_cache) == num_open_tunnels

@pytest.mark.level("local")
@pytest.mark.clustertest
def test_cluster_endpoint(self, cluster):
if not cluster.address:
assert cluster.endpoint() is None
Expand Down Expand Up @@ -218,6 +222,7 @@ def test_cluster_endpoint(self, cluster):
assert not status_data.get("system_gpu_data")

@pytest.mark.level("local")
@pytest.mark.clustertest
def test_cluster_objects(self, cluster):
k1 = get_random_str()
k2 = get_random_str()
Expand All @@ -238,6 +243,7 @@ def test_cluster_objects(self, cluster):
assert cluster.get(k3) == "v3"

@pytest.mark.level("local")
@pytest.mark.clustertest
def test_cluster_delete_env(self, cluster):
env1 = rh.env(reqs=[], working_dir="./", name="env1").to(cluster)
env2 = rh.env(reqs=[], working_dir="./", name="env2").to(cluster)
Expand All @@ -264,6 +270,7 @@ def test_cluster_delete_env(self, cluster):
assert cluster.get("k1")

@pytest.mark.level("local")
@pytest.mark.clustertest
@pytest.mark.skip(reason="TODO")
def test_rh_here_objects(self, cluster):
save_test_table_remote = rh.function(test_table_to_rh_here, system=cluster)
Expand All @@ -272,6 +279,7 @@ def test_rh_here_objects(self, cluster):
assert isinstance(cluster.get("test_table"), rh.Table)

@pytest.mark.level("local")
@pytest.mark.clustertest
def test_condensed_config_for_cluster(self, cluster):
remote_cluster_config = rh.function(cluster_config).to(cluster)
on_cluster_config = remote_cluster_config()
Expand Down Expand Up @@ -300,6 +308,7 @@ def test_condensed_config_for_cluster(self, cluster):
assert on_cluster_config == local_cluster_config

@pytest.mark.level("local")
@pytest.mark.clustertest
def test_sharing(self, cluster, friend_account_logged_in_docker_cluster_pk_ssh):
# Skip this test for ondemand clusters, because making
# it compatible with ondemand_cluster requires changes
Expand Down Expand Up @@ -349,6 +358,7 @@ def test_sharing(self, cluster, friend_account_logged_in_docker_cluster_pk_ssh):
assert new_config == config

@pytest.mark.level("local")
@pytest.mark.clustertest
def test_access_to_shared_cluster(self, cluster):
# TODO: Remove this by doing some CI-specific logic.
if cluster.__class__.__name__ == "OnDemandCluster":
Expand Down Expand Up @@ -384,6 +394,7 @@ def test_access_to_shared_cluster(self, cluster):
assert shared_cluster.run(["echo hello"])[0][0] == 0

@pytest.mark.level("local")
@pytest.mark.clustertest
def test_changing_name_and_saving_in_between(self, cluster):
remote_summer = rh.function(summer).to(cluster)
assert remote_summer(3, 4) == 7
Expand All @@ -406,6 +417,7 @@ def test_changing_name_and_saving_in_between(self, cluster):
cluster.save(name=old_name)

@pytest.mark.level("local")
@pytest.mark.clustertest
def test_caller_token_propagated(self, cluster):
remote_assume_caller_and_get_token = rh.function(
assume_caller_and_get_token
Expand Down Expand Up @@ -437,6 +449,7 @@ def test_caller_token_propagated(self, cluster):
####################################################################################################

@pytest.mark.level("local")
@pytest.mark.clustertest
def test_rh_status_pythonic(self, cluster):
sleep_remote = rh.function(sleep_fn).to(
cluster, env=rh.env(reqs=["pytest", "pandas"], name="worker_env")
Expand Down Expand Up @@ -526,6 +539,7 @@ def test_rh_status_pythonic(self, cluster):
assert env_servlet_info.get("env_gpu_usage")

@pytest.mark.level("maximal")
@pytest.mark.clustertest
def test_rh_status_pythonic_gpu(self, cluster):
if cluster.name in self.GPU_CLUSTER_NAMES:
from tests.test_tutorials import sd_generate
Expand Down Expand Up @@ -600,6 +614,7 @@ def test_rh_status_cli_in_cluster(self, cluster):
assert status_output_string.count("GPU: ") >= 1

@pytest.mark.level("maximal")
@pytest.mark.clustertest
def test_rh_status_cli_in_gpu_cluster(self, cluster):
if cluster.name in self.GPU_CLUSTER_NAMES:
from tests.test_tutorials import sd_generate
Expand All @@ -624,6 +639,7 @@ def test_rh_status_cli_in_gpu_cluster(self, cluster):

@pytest.mark.skip("Restarting the server mid-test causes some errors, need to fix")
@pytest.mark.level("local")
@pytest.mark.clustertest
# TODO: once fixed, extend this tests for gpu clusters as well.
def test_rh_status_cli_not_in_cluster(self, cluster):
default_env_name = cluster.default_env.name
Expand All @@ -645,6 +661,7 @@ def test_rh_status_cli_not_in_cluster(self, cluster):

@pytest.mark.skip("Restarting the server mid-test causes some errors, need to fix")
@pytest.mark.level("local")
@pytest.mark.clustertest
# TODO: once fixed, extend this tests for gpu clusters as well.
def test_rh_status_stopped(self, cluster):
try:
Expand All @@ -666,6 +683,7 @@ def test_rh_status_stopped(self, cluster):
cluster.run(["runhouse restart"])

@pytest.mark.level("local")
@pytest.mark.clustertest
def test_send_status_to_db(self, cluster):
import json

Expand Down Expand Up @@ -722,6 +740,7 @@ def test_send_status_to_db(self, cluster):
)

@pytest.mark.level("minimal")
@pytest.mark.clustertest
def test_status_scheduler_basic_flow(self, cluster):
# TODO [SB]: remove the den_auth check once we will get status of clusters without den_ayth as well.
if not cluster.den_auth:
Expand Down Expand Up @@ -764,11 +783,13 @@ def test_status_scheduler_basic_flow(self, cluster):
####################################################################################################

@pytest.mark.level("local")
@pytest.mark.clustertest
def test_default_env_in_status(self, cluster):
res = cluster.status()
assert cluster.default_env.name in res.get("env_resource_mapping")

@pytest.mark.level("local")
@pytest.mark.clustertest
def test_put_in_default_env(self, cluster):
k1 = get_random_str()
cluster.put(k1, "v1")
Expand All @@ -777,6 +798,7 @@ def test_put_in_default_env(self, cluster):
cluster.delete(k1)

@pytest.mark.level("local")
@pytest.mark.clustertest
def test_fn_to_default_env(self, cluster):
remote_summer = rh.function(summer).to(cluster)

Expand All @@ -788,6 +810,7 @@ def test_fn_to_default_env(self, cluster):
assert fn() == "success"

@pytest.mark.level("local")
@pytest.mark.clustertest
def test_run_in_default_env(self, cluster):
for req in cluster.default_env.reqs:
if isinstance(req, str) and "_" in req:
Expand All @@ -796,6 +819,7 @@ def test_run_in_default_env(self, cluster):
assert cluster.run(f"pip freeze | grep {req}")[0][0] == 0

@pytest.mark.level("local")
@pytest.mark.clustertest
def test_default_conda_env_created(self, cluster):
if not isinstance(cluster.default_env, rh.CondaEnv):
pytest.skip("Default env is not a CondaEnv")
Expand All @@ -804,6 +828,7 @@ def test_default_conda_env_created(self, cluster):
assert isinstance(cluster.get(cluster.default_env.name), rh.CondaEnv)

@pytest.mark.level("local")
@pytest.mark.clustertest
def test_default_env_var_run(self, cluster):
env_vars = cluster.default_env.env_vars
if not env_vars:
Expand All @@ -820,6 +845,7 @@ def test_default_env_var_run(self, cluster):
assert get_env_var_cpu(var) == env_vars[var]

@pytest.mark.level("release")
@pytest.mark.clustertest
def test_switch_default_env(self, cluster):
# test setting a new default env, w/o restarting the runhouse server
test_env = cluster.default_env
Expand Down

0 comments on commit 168d084

Please sign in to comment.