Skip to content

Commit

Permalink
Add hipBLAS feature and fix build script
Browse files Browse the repository at this point in the history
Attempts to copy the CMake build steps as much as possible. GPU is detected when
running but on my setup appears to segfault when loading the model. I'm at a bit
of a loss on the segfault but this is definitely better than before.
  • Loading branch information
AdamNiederer committed Jun 15, 2024
1 parent fb8e8e2 commit cd98222
Show file tree
Hide file tree
Showing 6 changed files with 39 additions and 25 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion llama-cpp-2/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@ tracing = { workspace = true }
[features]
cuda = ["llama-cpp-sys-2/cuda"]
metal = ["llama-cpp-sys-2/metal"]
hipblas = ["llama-cpp-sys-2/hipblas"]
sampler = []

[target.'cfg(all(target_os = "macos", any(target_arch = "aarch64", target_arch = "arm64")))'.dependencies]
[target.'cfg(all(target_os = "macos", any(target_arch = "aarch64", target_arch = "arm64")))'.dependencies]
llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", features=["metal"], version = "0.1.48" }

[lints]
Expand Down
1 change: 1 addition & 0 deletions llama-cpp-2/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
//! # Feature Flags
//!
//! - `cuda` enables CUDA gpu support.
//! - `hipblas` enables hipBLAS (ROCm) gpu support (experimental).
//! - `sampler` adds the [`context::sample::sampler`] struct for a more rusty way of sampling.
use std::ffi::NulError;
use std::fmt::Debug;
Expand Down
3 changes: 2 additions & 1 deletion llama-cpp-sys-2/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,9 @@ include = [
bindgen = { workspace = true }
cc = { workspace = true, features = ["parallel"] }
once_cell = "1.19.0"
glob = "0.3.1"

[features]
cuda = []
metal = []

hipblas = []
48 changes: 29 additions & 19 deletions llama-cpp-sys-2/build.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
use std::env;
use std::env::{self, VarError};
use std::fs::{read_dir, File};
use std::io::Write;
use std::path::{Path, PathBuf};
use std::process::Command;
use std::str::FromStr;

use cc::Build;
use once_cell::sync::Lazy;
use glob::glob;

// This build file is based on:
// https://github.com/mdrokz/rust-llama.cpp/blob/master/build.rs
Expand Down Expand Up @@ -365,23 +367,16 @@ fn compile_blis(cx: &mut Build) {
}

fn compile_hipblas(cx: &mut Build, cxx: &mut Build, mut hip: Build) -> &'static str {
const DEFAULT_ROCM_PATH_STR: &str = "/opt/rocm/";
let rocm_path_str = env::var("ROCM_PATH").or(Ok::<String, VarError>(String::from_str("/opt/rocm/").unwrap())).unwrap();

let rocm_path_str = env::var("ROCM_PATH")
.map_err(|_| DEFAULT_ROCM_PATH_STR.to_string())
.unwrap();
println!("Compiling HIPBLAS GGML. Using ROCm from {rocm_path_str}");
println!("Compiling hipBLAS GGML. Using ROCm from {rocm_path_str}");

let rocm_path = PathBuf::from(rocm_path_str);
let rocm_include = rocm_path.join("include");
let rocm_lib = rocm_path.join("lib");
let rocm_hip_bin = rocm_path.join("bin/hipcc");

let cuda_lib = "ggml-cuda";
let cuda_file = cuda_lib.to_string() + ".cu";
let cuda_header = cuda_lib.to_string() + ".h";

let defines = ["GGML_USE_HIPBLAS", "GGML_USE_CUBLAS"];
let defines = ["GGML_USE_HIPBLAS", "GGML_USE_CUDA"];
for def in defines {
cx.define(def, None);
cxx.define(def, None);
Expand All @@ -390,24 +385,39 @@ fn compile_hipblas(cx: &mut Build, cxx: &mut Build, mut hip: Build) -> &'static
cx.include(&rocm_include);
cxx.include(&rocm_include);

let ggml_cuda = glob(LLAMA_PATH.join("ggml-cuda").join("*.cu").to_str().unwrap())
.unwrap().filter_map(Result::ok).collect::<Vec<_>>();
let ggml_template_fattn = glob(LLAMA_PATH.join("ggml-cuda").join("template-instances").join("fattn-vec*.cu").to_str().unwrap())
.unwrap().filter_map(Result::ok).collect::<Vec<_>>();
let ggml_template_wmma = glob(LLAMA_PATH.join("ggml-cuda").join("template-instances").join("fattn-wmma*.cu").to_str().unwrap())
.unwrap().filter_map(Result::ok).collect::<Vec<_>>();
let ggml_template_mmq = glob(LLAMA_PATH.join("ggml-cuda").join("template-instances").join("mmq*.cu").to_str().unwrap())
.unwrap().filter_map(Result::ok).collect::<Vec<_>>();

hip.compiler(rocm_hip_bin)
.std("c++11")
.file(LLAMA_PATH.join(cuda_file))
.include(LLAMA_PATH.join(cuda_header))
.define("LLAMA_CUDA_DMMV_X", Some("32"))
.define("LLAMA_CUDA_MMV_Y", Some("1"))
.define("LLAMA_CUDA_KQUANTS_ITER", Some("2"))
.file(LLAMA_PATH.join("ggml-cuda.cu"))
.files(ggml_cuda)
.files(ggml_template_fattn)
.files(ggml_template_wmma)
.files(ggml_template_mmq)
.include(LLAMA_PATH.join(""))
.include(LLAMA_PATH.join("ggml-cuda"))
.define("GGML_USE_HIPBLAS", None)
.compile(cuda_lib);
.define("GGML_USE_CUDA", None)
.compile("ggml-cuda");

println!(
"cargo:rustc-link-search=native={}",
rocm_lib.to_string_lossy()
);
println!("cargo:rustc-link-search=native={}", rocm_lib.to_string_lossy());

let rocm_libs = ["hipblas", "rocblas", "amdhip64"];
for lib in rocm_libs {
println!("cargo:rustc-link-lib={lib}");
}

cuda_lib
"ggml-cuda"
}

fn compile_cuda(cx: &mut Build, cxx: &mut Build, featless_cxx: Build) -> &'static str {
Expand Down
8 changes: 4 additions & 4 deletions simple/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ struct Args {
#[arg(short = 'o', value_parser = parse_key_val)]
key_value_overrides: Vec<(String, ParamOverrideValue)>,
/// Disable offloading layers to the gpu
#[cfg(feature = "cuda")]
#[cfg(any(feature = "cuda", feature = "hipblas"))]
#[clap(long)]
disable_gpu: bool,
#[arg(short = 's', long, help = "RNG seed (default: 1234)")]
Expand Down Expand Up @@ -124,7 +124,7 @@ fn main() -> Result<()> {
model,
prompt,
file,
#[cfg(feature = "cuda")]
#[cfg(any(feature = "cuda", feature = "hipblas"))]
disable_gpu,
key_value_overrides,
seed,
Expand All @@ -138,13 +138,13 @@ fn main() -> Result<()> {

// offload all layers to the gpu
let model_params = {
#[cfg(feature = "cuda")]
#[cfg(any(feature = "cuda", feature = "hipblas"))]
if !disable_gpu {
LlamaModelParams::default().with_n_gpu_layers(1000)
} else {
LlamaModelParams::default()
}
#[cfg(not(feature = "cuda"))]
#[cfg(not(any(feature = "cuda", feature = "hipblas")))]
LlamaModelParams::default()
};

Expand Down

0 comments on commit cd98222

Please sign in to comment.