Requirements: Add cuda 12.1 and update one click

Add a separate cuda 12.1 requirements file and update the one-click installer to prompt the user with appropriate information about cuda 12.1 and FA2 for Windows. Cuda 11.8 is still used by default for further GPU compatability, but 12.1 is now an option for those that want it. Signed-off-by: kingbri <bdashore3@proton.me>
oobabooga · Oct 14, 2023 · 722fac5 · 722fac5
1 parent 8cce1f1
commit 722fac5
Show file tree

Hide file tree

Showing 2 changed files with 68 additions and 5 deletions.
diff --git a/one_click.py b/one_click.py
@@ -170,9 +170,17 @@ def install_webui():
     # Find the proper Pytorch installation command
     install_git = "conda install -y -k ninja git"
     install_pytorch = "python -m pip install torch torchvision torchaudio"
+    use_cuda121 = "N"
 
     if any((is_windows(), is_linux())) and choice == "A":
-        install_pytorch = "python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118"
+        # Ask for cuda version if using Nvidia
+        print("Would you like to use cuda 12.1? This is required for Flash Attention 2 on Windows. Cuda 12.1 is not supported on Kepler GPUs.")
+        use_cuda121 = input("Input (Y/N)> ").upper()
+        while use_cuda121 not in 'YN':
+            print("Invalid choice. Please try again.")
+            use_cuda121 = input("Input> ").upper()
+
+        install_pytorch = f"python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/{'cu121' if use_cuda121 == 'Y' else 'cu118'}"
     elif not is_macos() and choice == "B":
         if is_linux():
             install_pytorch = "python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm5.6"
@@ -189,7 +197,7 @@ def install_webui():
 
     # Install CUDA libraries (this wasn't necessary for Pytorch before...)
     if choice == "A":
-        run_cmd("conda install -y -c \"nvidia/label/cuda-11.8.0\" cuda-runtime", assert_success=True, environment=True)
+        run_cmd(f"conda install -y -c \"nvidia/label/{'cuda-12.1.0' if use_cuda121 == 'Y' else 'cuda-11.8.0'}\" cuda-runtime", assert_success=True, environment=True)
 
     # Install the webui requirements
     update_requirements(initial_installation=True)
@@ -208,7 +216,7 @@ def update_requirements(initial_installation=False):
     ]
 
     before_pull_hashes = {file_name: calculate_file_hash(file_name) for file_name in files_to_check}
-    run_cmd("git pull --autostash", assert_success=True, environment=True)
+    # run_cmd("git pull --autostash", assert_success=True, environment=True)
     after_pull_hashes = {file_name: calculate_file_hash(file_name) for file_name in files_to_check}
 
     # Check for differences in installation file hashes
@@ -238,13 +246,18 @@ def update_requirements(initial_installation=False):
 
     # Detect the PyTorch version
     torver = torch_version()
-    is_cuda = '+cu' in torver  # 2.0.1+cu118
+    print(f"TORCH: {torver}")
+    is_cuda = '+cu118' or '+cu121' in torver  # 2.1.0+cu118 or 2.1.0+cu121
+    is_cuda121 = '+cu121' in torver # 2.1.0+cu121
+    is_cuda118 = '+cu118' in torver # 2.1.0+cu118
     is_cuda117 = '+cu117' in torver  # 2.0.1+cu117
     is_rocm = '+rocm' in torver  # 2.0.1+rocm5.4.2
     is_intel = '+cxx11' in torver  # 2.0.1a0+cxx11.abi
     is_cpu = '+cpu' in torver  # 2.0.1+cpu
 
-    if is_rocm:
+    if is_cuda121:
+        requirements_file = "requirements_cu121.txt"
+    elif is_rocm:
         if cpu_has_avx2():
             requirements_file = "requirements_amd.txt"
         else:

diff --git a/requirements_cu121.txt b/requirements_cu121.txt
@@ -0,0 +1,50 @@
+accelerate==0.23.*
+colorama
+datasets
+einops
+exllamav2==0.0.6; platform_system != "Darwin" and platform_machine != "x86_64"
+gradio==3.47.*
+markdown
+numpy==1.24
+optimum==1.13.1
+pandas
+peft==0.5.*
+Pillow>=9.5.0
+pyyaml
+requests
+safetensors==0.4.0
+scipy
+sentencepiece
+tensorboard
+transformers==4.34.*
+tqdm
+wandb
+
+git+https://github.com/oobabooga/torch-grammar.git
+
+# bitsandbytes
+bitsandbytes==0.41.1; platform_system != "Windows"
+https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
+
+# llama-cpp-python (CPU only, AVX2)
+https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp310-cp310-win_amd64.whl; platform_system == "Windows"
+
+# CUDA wheels
+https://github.com/jllllll/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows"
+https://github.com/jllllll/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows"
+https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/turboderp/exllamav2/releases/download/v0.0.6/exllamav2-0.0.6+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows"
+https://github.com/turboderp/exllamav2/releases/download/v0.0.6/exllamav2-0.0.6+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+# https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.2/flash_attn-2.3.2+cu122torch2.1cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows"
+https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows"
+https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/jllllll/ctransformers-cuBLAS-wheels/releases/download/AVX2/ctransformers-0.2.27+cu121-py3-none-any.whl
+autoawq==0.1.4
+
+# Flash Attention. The cuda 12.2 wheel is backwards compatible with 12.1
+flash_attn; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/bdashore3/flash-attention/releases/download/2.3.2-2/flash_attn-2.3.2+cu122-cp310-cp310-win_amd64.whl; platform_system == "Windows"