Skip to content

Commit

Permalink
Fix flags for requester/compute split (#522)
Browse files Browse the repository at this point in the history
  • Loading branch information
hevans66 authored Jul 19, 2023
1 parent 69c0ee0 commit 87590b0
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 56 deletions.
10 changes: 5 additions & 5 deletions infrastructure/ansible/files/compute.service
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@ ExecStart=bacalhau serve \
--node-type compute \
--ipfs-connect {{ ipfs_connect }} \
--private-internal-ipfs=false \
--labels owner={{ owner }} \
--peer {{ requester_peer }} \
--limit-job-memory 12gb \
{% if gpu %}
--limit-total-gpu 1 \
--limit-job-gpu 1 \
{% endif %}
--limit-job-memory 12gb \
--job-selection-accept-networked \
--job-selection-data-locality anywhere \
--labels owner={{ owner }} \
--peer {{ requester_peer }}
--job-selection-data-locality anywhere

[Install]
WantedBy=multi-user.target
4 changes: 3 additions & 1 deletion infrastructure/ansible/files/requester.service
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ ExecStart=bacalhau serve \
{% if receptor_url is defined %}
--job-selection-probe-http {{ receptor_url }} \
{% endif %}
--labels owner={{ owner }}
--labels owner={{ owner }} \
--job-selection-accept-networked \
--job-selection-data-locality anywhere

[Install]
WantedBy=multi-user.target
99 changes: 50 additions & 49 deletions infrastructure/ansible/provision_compute_only.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
nvidia_container_toolkit_key_path: /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
ipfs_path: /opt/local/ipfs
requester_peer: /ip4/172.31.90.74/tcp/1235/p2p/QmbETsVtL1sQ97KKV1jPQA5ng8RSyzPWUiDgRBQp7AcjRt
requester_ipfs_peer: /ip4/172.31.90.74/tcp/4001/p2p/12D3KooWAjYbsjXAQWqPRCPTTaMkDkUjhschznkLrDoKUyfQvHAP
gpu: true
environment:
IPFS_PATH: "{{ ipfs_path }}"
Expand Down Expand Up @@ -42,42 +43,6 @@
name: ubuntu
groups: docker

# Nvidia
- name: Get Nvidia drivers apt key
ansible.builtin.get_url:
url: https://developer.download.nvidia.com/compute/cuda/repos/{{ nvidia_distribution }}/x86_64/cuda-keyring_1.0-1_all.deb
dest: /tmp/cuda-keyring.deb
when: gpu

- name: Add Nvidia Keyring
become: yes
ansible.builtin.apt:
deb: /tmp/cuda-keyring.deb
when: gpu

- name: Get Nvidia Container Tookit GPG key
become: yes
ansible.builtin.shell:
cmd: curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | gpg --yes --dearmor -o {{ nvidia_container_toolkit_key_path }}
creates: "{{ nvidia_container_toolkit_key_path }}"
when: gpu

- name: Add Nvidia Container Tookit Repository
become: yes
ansible.builtin.apt_repository:
repo: deb [signed-by={{ nvidia_container_toolkit_key_path }}] https://nvidia.github.io/libnvidia-container/stable/ubuntu18.04/$(ARCH) /
state: present
when: gpu

- name: Install required system packages for gpu build
become: yes
ansible.builtin.apt:
pkg:
- cuda-drivers
state: latest
update_cache: true
when: gpu

- name: Install required system packages
become: yes
ansible.builtin.apt:
Expand All @@ -93,20 +58,52 @@
state: latest
update_cache: true

- name: Install Nvidia Container Tookit
become: yes
ansible.builtin.apt:
pkg:
- nvidia-docker2
notify:
- Restart docker
when: gpu

- name: Ensure Nvidia persitence daemon is started
ansible.builtin.systemd:
name: nvidia-persistenced
# Nvidia
- name: Install Nvidia GPU drivers and packages
block:
- name: Get Nvidia drivers apt key
ansible.builtin.get_url:
url: https://developer.download.nvidia.com/compute/cuda/repos/{{ nvidia_distribution }}/x86_64/cuda-keyring_1.0-1_all.deb
dest: /tmp/cuda-keyring.deb

- name: Add Nvidia Keyring
become: yes
ansible.builtin.apt:
deb: /tmp/cuda-keyring.deb

- name: Get Nvidia Container Tookit GPG key
become: yes
ansible.builtin.shell:
cmd: curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | gpg --yes --dearmor -o {{ nvidia_container_toolkit_key_path }}
creates: "{{ nvidia_container_toolkit_key_path }}"

- name: Add Nvidia Container Tookit Repository
become: yes
ansible.builtin.apt_repository:
repo: deb [signed-by={{ nvidia_container_toolkit_key_path }}] https://nvidia.github.io/libnvidia-container/stable/ubuntu18.04/$(ARCH) /
state: present

- name: Install required system packages for gpu build
become: yes
ansible.builtin.apt:
pkg:
- cuda-drivers
state: latest
update_cache: true

- name: Install Nvidia Container Tookit
become: yes
ansible.builtin.apt:
pkg:
- nvidia-docker2
notify:
- Restart docker

- name: Ensure Nvidia persitence daemon is started
ansible.builtin.systemd:
name: nvidia-persistenced
when: gpu

- name: Install Golag
become: yes
vars:
Expand All @@ -123,6 +120,10 @@
- name: Install IPFS
ansible.builtin.import_tasks: install_ipfs_tasks.yaml

- name: Add the IPFS node to the swarm
ansible.builtin.command:
cmd: ipfs swarm connect {{ requester_ipfs_peer }}

- name: Install Bacalhau
ansible.builtin.shell:
cmd: curl -sL https://get.bacalhau.org/install.sh | bash
Expand Down
2 changes: 1 addition & 1 deletion infrastructure/terraform/plex.tf
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ resource "aws_instance" "plex_requester" {
}

resource "aws_eip" "plex_prod" {
instance = aws_instance.plex_compute_prod["compute1"].id
instance = aws_instance.plex_requester.id
vpc = true

tags = {
Expand Down

0 comments on commit 87590b0

Please sign in to comment.