From 03ded0bb73b9e44a4b16e78d0871822debf3dfbb Mon Sep 17 00:00:00 2001 From: Logan Adams Date: Fri, 10 Jan 2025 15:18:27 -0800 Subject: [PATCH 1/2] Add debugging for hangs on cpu torch --- .github/workflows/cpu-torch-latest.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/workflows/cpu-torch-latest.yml b/.github/workflows/cpu-torch-latest.yml index 78a51905834b..041c17a1ce53 100644 --- a/.github/workflows/cpu-torch-latest.yml +++ b/.github/workflows/cpu-torch-latest.yml @@ -21,6 +21,8 @@ jobs: unit-tests: runs-on: ubuntu-24.04 + + steps: - uses: actions/checkout@v4 @@ -55,7 +57,15 @@ jobs: run: | pip list + - name: System Status + run: | + df -h + free -m + - name: Unit tests + env: + ACTIONS_RUNNER_DEBUG: true + ACTIONS_STEP_DEBUG: true run: | unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch cd tests From bd14e272e1d54d700a84c872eca2185e628b4e5e Mon Sep 17 00:00:00 2001 From: Logan Adams Date: Fri, 10 Jan 2025 15:38:13 -0800 Subject: [PATCH 2/2] Move debug vars up --- .github/workflows/cpu-torch-latest.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/cpu-torch-latest.yml b/.github/workflows/cpu-torch-latest.yml index 041c17a1ce53..a278e1552de4 100644 --- a/.github/workflows/cpu-torch-latest.yml +++ b/.github/workflows/cpu-torch-latest.yml @@ -20,6 +20,9 @@ concurrency: jobs: unit-tests: runs-on: ubuntu-24.04 + env: + ACTIONS_RUNNER_DEBUG: true + ACTIONS_STEP_DEBUG: true @@ -63,9 +66,6 @@ jobs: free -m - name: Unit tests - env: - ACTIONS_RUNNER_DEBUG: true - ACTIONS_STEP_DEBUG: true run: | unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch cd tests