Skip to content

Commit

Permalink
initialize the GPUs (run cuda_init.sh) always if there is no nvidia0
Browse files Browse the repository at this point in the history
  • Loading branch information
martbhell committed Jul 20, 2016
1 parent 4af6d76 commit 897e52f
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 0 deletions.
1 change: 1 addition & 0 deletions defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ cuda_restart_node_on_install: True
cuda_init: True
cuda_init_compute_mode: 3
cuda_init_persistence_mode: 1
cuda_gpu_name0: "/dev/nvidia0"
13 changes: 13 additions & 0 deletions tasks/redhat.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,19 @@
line="/bin/bash /usr/local/bin/cuda_init.sh"
when: cuda_init

- name: check if cuda_gpu_name0 exists
stat: path={{ cuda_gpu_name0 }}
register: reg_cuda_gpu_name0
always_run: True
when: cuda_init

- debug: var=reg_cuda_gpu_name0
when: cuda_init

- name: Initialize the GPUs if there is no /dev/nvidia0
command: /bin/bash /usr/local/bin/cuda_init.sh
when: reg_cuda_gpu_name0.stat.exists is defined and reg_cuda_gpu_name0.stat.exists == False and cuda_init

# This is here because if we in the same playbook try to start slurmd without having run the cuda_init.sh script then slurmd doesn't start and the play fails.
- name: flush the handlers - so that the node is rebooted after CUDA is installed and that the GPUs are initialized before we start slurm
meta: flush_handlers
Expand Down

0 comments on commit 897e52f

Please sign in to comment.