Boost AI and LLM application dev on TPU.
🚧 Buiding in 2024.. 🚧
# Run on Cloud Shell Terminal
curl -fsSL bit.ly/new-gcp-vm-instance | sh
## Here, USER=m0nius ZONE=asia-east1-b TEMPLATE=xvm
curl -fsSL bit.ly/new-gcp-vm-instance | sh -s -- m0nius asia-east1-b xvm
# Generate new ssh key
curl -fsSL bit.ly/ssh-vm-gen | sh
# New rootless mamba environment with zsh
curl -fsSL bit.ly/create-mamba-zsh | sh
# Run on Cloud Shell Terminal
curl -fsSL bit.ly/attach-gcp-vm-disk | sh
## Here, DISK=disk-1 ZONE=asia-east1-b VM_NAME=xvm-1
curl -fsSL bit.ly/attach-gcp-vm-disk | sh -s -- disk-1 asia-east1-b xvm-1
# Clean all queued TPU nodes
curl -fsSL bit.ly/clean-tpu-nodes | sh -s -- proj_name asia-east1-b
# Run on Cloud Shell Terminal, TPUv2
curl -fsSL bit.ly/new-tpu-v2-node | sh -s -- -y
# Run on Cloud Shell Terminal, queued TPUv4
curl -fsSL bit.ly/new-tpu-v4-queue | sh -s -- -y
TPU
curl -fsSL bit.ly/tpu-torch-xla | sh
#OR
curl -fsSL bit.ly/tpu-rootless-xla | sh
CUDA
curl -fsSL bit.ly/cuda-torch-xla | sh
#OR
curl -fsSL bit.ly/cuda-rootless-xla | sh
# Run on Cloud Shell Terminal, TPUv2
curl -fsSL bit.ly/new-LLM-TPUv2-train | sh -s -- -y
# Run on Cloud Shell Terminal, queued TPUv4
curl -fsSL bit.ly/new-LLM-TPUv4-train | sh -s -- -y
# Replace OS of the VM to Alpine Linux
curl -fsSL bit.ly/os-LLM-Alpine-acc | sh -s -- 3.19
# Mount remote dataset
curl -fsSL bit.ly/remote-LLM-dataset-mount | sh -s -- dataset
curl -fsSL bit.ly/new-gcp-api | sh -s -- project_name api_num api_target
curl -fsSL bit.ly/new-gcp-dns | sh -s -- cf_token cf_domain cf_zone
curl -fsSL bit.ly/new-gcp-sb | sh -s -- cf_token cf_domain cf_zone
curl -fsSL bit.ly/new-gcp-wg | sh -s -- license
curl -fsSL bit.ly/create-vm-user | sh -s -- username
curl -fsSL bit.ly/create-ssh-tun | sh -s -- username
curl -fsSL bit.ly/vertex-test | sh -s -- project_name model_name
- https://pytorch.org/blog/scaling-pytorch-models-on-cloud-tpus-with-fsdp
- https://huggingface.co/blog/accelerate-large-models
- https://pytorch.org/blog/path-achieve-low-inference-latency
-
https://github.com/pytorch-tpu/transformers/blob/llama2-google-next-training/SPMD_USER_GUIDE.md
-
https://github.com/pytorch/xla/blob/master/docs/spmd.md#spmd-debugging-tool