Skip to content

Commit

Permalink
Allow overriding CC_TURING
Browse files Browse the repository at this point in the history
  • Loading branch information
SlyEcho authored and YellowRoseCx committed Aug 12, 2023
1 parent e77a4c3 commit 8913bc6
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 4 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ if (LLAMA_HIPBLAS)
target_compile_definitions(ggml-rocm PRIVATE GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
target_compile_definitions(ggml-rocm PRIVATE GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})
target_compile_definitions(ggml-rocm PRIVATE K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})
target_compile_definitions(ggml-rocm PRIVATE CC_TURING=1000000000)
set_source_files_properties(ggml-cuda.cu PROPERTIES LANGUAGE CXX)
target_link_libraries(ggml-rocm PRIVATE hip::device PUBLIC hip::host roc::rocblas roc::hipblas)

Expand Down
9 changes: 6 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -209,17 +209,20 @@ ggml-cuda.o: HIPFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS)) \
-DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X) \
-DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y) \
-DGGML_CUDA_FORCE_DMMV \
-DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
-DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER) \
-DCC_TURING=1000000000
ggml_v2-cuda.o: HIPFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS)) \
-DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X) \
-DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y) \
-DGGML_CUDA_FORCE_DMMV \
-DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
-DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER) \
-DCC_TURING=1000000000
ggml_v2-cuda-legacy.o: HIPFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS)) \
-DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X) \
-DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y) \
-DGGML_CUDA_FORCE_DMMV \
-DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER) # DGGML_CUDA_DMMV_F16 does not currently work with AMD.
-DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER) \
-DCC_TURING=1000000000 # DGGML_CUDA_DMMV_F16 does not currently work with AMD.
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
$(CXX) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
ggml_v2-cuda.o: otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h
Expand Down
4 changes: 3 additions & 1 deletion ggml-cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,9 @@
#include "ggml.h"

#define MIN_CC_DP4A 610 // minimum compute capability for __dp4a, an intrinsic for byte-wise dot products
#define CC_TURING 1000000000
#ifndef CC_TURING
#define CC_TURING 700
#endif

#if defined(GGML_USE_HIPBLAS)
#define __CUDA_ARCH__ 1300
Expand Down

0 comments on commit 8913bc6

Please sign in to comment.