diff --git a/ci/cudf_pandas_scripts/pandas-tests/run.sh b/ci/cudf_pandas_scripts/pandas-tests/run.sh index e5cd4436a3a..f71ac83e407 100755 --- a/ci/cudf_pandas_scripts/pandas-tests/run.sh +++ b/ci/cudf_pandas_scripts/pandas-tests/run.sh @@ -33,7 +33,7 @@ bash python/cudf/cudf/pandas/scripts/run-pandas-tests.sh \ -m "not slow" \ --max-worker-restart=3 \ --junitxml="${RAPIDS_TESTS_DIR}/junit-cudf-pandas.xml" \ - --dist worksteal \ + --dist loadfile \ --report-log=${PANDAS_TESTS_BRANCH}.json 2>&1 SUMMARY_FILE_NAME=${PANDAS_TESTS_BRANCH}-${RAPIDS_FULL_VERSION}-results.json diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py index d855b3468a0..4dfdea72178 100644 --- a/python/cudf/cudf/pandas/scripts/conftest-patch.py +++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py @@ -7,6 +7,7 @@ import multiprocessing import os import sys +from collections import defaultdict from functools import wraps import pytest @@ -40,21 +41,35 @@ def patch_testing_functions(): # Dictionary to store function call counts manager = multiprocessing.Manager() -function_call_counts = manager.dict() +function_call_counts = defaultdict(int) # type: ignore # The specific function to track FUNCTION_NAME = {"_slow_function_call", "_fast_function_call"} +def find_pytest_file(frame): + new_f = frame + while new_f: + if "pandas-testing/pandas-tests/tests" in new_f.f_globals.get( + "__file__", "" + ): + return os.path.abspath(new_f.f_globals.get("__file__", "")) + new_f = new_f.f_back + return None + + def trace_calls(frame, event, arg): if event != "call": return code = frame.f_code func_name = code.co_name + if func_name in FUNCTION_NAME: - function_call_counts[func_name] = ( - function_call_counts.get(func_name, 0) + 1 - ) + # filename = find_pytest_file(frame) + # if filename not in function_call_counts: + # function_call_counts[filename] = defaultdict(int) + # function_call_counts[filename][func_name] += 1 + function_call_counts[func_name] += 1 def pytest_sessionstart(session): @@ -75,7 +90,7 @@ def pytest_runtest_setup(item): ): # If it's a new file, reset the function call counts global function_call_counts - function_call_counts = manager.dict() + function_call_counts = defaultdict(int) pytest_runtest_setup.current_file = item.nodeid.split("::")[0] @@ -89,6 +104,8 @@ def pytest_runtest_teardown(item, nextitem): # Write the function call counts to a file worker_id = os.getenv("PYTEST_XDIST_WORKER", "master") output_file = f'{item.nodeid.split("::")[0].replace("/", "__")}_{worker_id}_metrics.json' + # if os.path.exists(output_file): + # output_file = f'{item.nodeid.split("::")[0].replace("/", "__")}_{worker_id}_metrics_1.json' with open(output_file, "w") as f: json.dump(dict(function_call_counts), f, indent=4) print(f"Function call counts have been written to {output_file}") @@ -99,7 +116,7 @@ def pytest_configure(config): if hasattr(config, "workerinput"): # Running in xdist worker global function_call_counts - function_call_counts = manager.dict() + function_call_counts = defaultdict(int) @pytest.hookimpl(trylast=True) @@ -110,7 +127,7 @@ def pytest_unconfigure(config): output_file = f"function_call_counts_worker_{worker_id}.json" with open(output_file, "w") as f: json.dump(dict(function_call_counts), f, indent=4) - print(f"Function call counts have been written to {output_file}") + # print(f"Function call counts have been written to {output_file}") sys.path.append(os.path.dirname(__file__)) diff --git a/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh b/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh index 416264ea04d..781bde87f7d 100755 --- a/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh +++ b/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh @@ -65,7 +65,7 @@ markers = [ ] EOF # append the contents of patch-confest.py to conftest.py - cat ../python/cudf/cudf/pandas/scripts/conftest-patch.py >> pandas-tests/conftest.py + # cat ../python/cudf/cudf/pandas/scripts/conftest-patch.py >> pandas-tests/conftest.py # Substitute `pandas.tests` with a relative import. # This will depend on the location of the test module relative to @@ -134,7 +134,8 @@ TEST_THAT_CRASH_PYTEST_WORKERS="not test_bitmasks_pyarrow \ and not test_large_string_pyarrow \ and not test_interchange_from_corrected_buffer_dtypes \ and not test_eof_states \ -and not test_array_tz" +and not test_array_tz \ +and not test_groupby_raises_category" # TODO: Remove "not db" once a postgres & mysql container is set up on the CI PANDAS_CI="1" timeout 90m python -m pytest -p cudf.pandas \