Skip to content

Commit

Permalink
sync : ggml (VMM, sync-ggml-am, dotprod ARM fixes, CUDA fixes) (#1691)
Browse files Browse the repository at this point in the history
* scripts : add sync-ggml-am.sh

* sync : ggml (VMM, ARM dot prod fix, etc.)

* build : fix CUDA build

* ggml : fix some mul mat cases + add tests for src1 F16

ggerganov/ggml@dbd0295
  • Loading branch information
ggerganov authored Dec 29, 2023
1 parent a5cc3dc commit e77b27c
Show file tree
Hide file tree
Showing 9 changed files with 721 additions and 767 deletions.
8 changes: 7 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -218,11 +218,17 @@ if (WHISPER_CUBLAS)
add_compile_definitions(GGML_USE_CUBLAS)

if (WHISPER_STATIC)
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static)
if (WIN32)
# As of 12.3.1 CUDA Tookit for Windows does not offer a static cublas library
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas CUDA::cublasLt)
else ()
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static)
endif()
else()
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt)
endif()

set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cuda_driver)
else()
message(FATAL_ERROR "cuBLAS not found")
endif()
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ ifdef WHISPER_CUBLAS

CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
CXXFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib
LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib
WHISPER_OBJ += ggml-cuda.o
NVCC = nvcc
NVCCFLAGS = --forward-unknown-to-host-compiler -arch=$(CUDA_ARCH_FLAG)
Expand Down
138 changes: 138 additions & 0 deletions extra/sync-ggml-am.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
#!/bin/bash
#
# Synchronize ggml changes to whisper.cpp
#
# Usage:
#
# $ cd /path/to/whisper.cpp
# $ ./extra/sync-ggml-am.sh
#

set -e

sd=$(dirname $0)
cd $sd/../

SRC_WHISPER=$(pwd)
SRC_GGML=$(cd ../ggml; pwd)

if [ ! -d $SRC_GGML ]; then
echo "ggml not found at $SRC_GGML"
exit 1
fi

lc=$(cat $SRC_WHISPER/extra/sync-ggml.last)
echo "Syncing ggml changes since commit $lc"

cd $SRC_GGML

git log --oneline $lc..HEAD

git format-patch $lc --stdout -- \
include/ggml/ggml*.h \
src/ggml*.h \
src/ggml*.c \
src/ggml*.cpp \
src/ggml*.m \
src/ggml*.metal \
src/ggml*.cu \
tests/test-opt.cpp \
tests/test-grad0.cpp \
tests/test-quantize-fns.cpp \
tests/test-quantize-perf.cpp \
tests/test-backend-ops.cpp \
> $SRC_WHISPER/ggml-src.patch

# delete files if empty
if [ ! -s $SRC_WHISPER/ggml-src.patch ]; then
rm -v $SRC_WHISPER/ggml-src.patch
fi

cd $SRC_WHISPER

if [ -f $SRC_WHISPER/ggml-src.patch ]; then
# replace PR numbers
#
# Subject: some text (#1234)
# Subject: some text (ggml/1234)
cat ggml-src.patch | sed -e 's/^Subject: \(.*\) (#\([0-9]*\))/Subject: \1 (ggml\/\2)/' > ggml-src.patch.tmp
mv ggml-src.patch.tmp ggml-src.patch

cat ggml-src.patch | sed -e 's/^\(.*\) (#\([0-9]*\))$/\1 (ggml\/\2)/' > ggml-src.patch.tmp
mv ggml-src.patch.tmp ggml-src.patch

# replace filenames:
#
# src/ggml.c -> ggml.c
# src/ggml-alloc.c -> ggml-alloc.c
# src/ggml-backend-impl.h -> ggml-backend-impl.h
# src/ggml-backend.c -> ggml-backend.c
# src/ggml-cuda.cu -> ggml-cuda.cu
# src/ggml-cuda.h -> ggml-cuda.h
# src/ggml-impl.h -> ggml-impl.h
# src/ggml-metal.h -> ggml-metal.h
# src/ggml-metal.m -> ggml-metal.m
# src/ggml-metal.metal -> ggml-metal.metal
# src/ggml-mpi.h -> ggml-mpi.h
# src/ggml-mpi.c -> ggml-mpi.c
# src/ggml-opencl.cpp -> ggml-opencl.cpp
# src/ggml-opencl.h -> ggml-opencl.h
# src/ggml-quants.c -> ggml-quants.c
# src/ggml-quants.h -> ggml-quants.h
# include/ggml/ggml.h -> ggml.h
# include/ggml/ggml-alloc.h -> ggml-alloc.h
# include/ggml/ggml-backend.h -> ggml-backend.h
#
# examples/common.h -> examples/common.h
# examples/common.cpp -> examples/common.cpp
# examples/common-ggml.h -> examples/common-ggml.h
# examples/common-ggml.cpp -> examples/common-ggml.cpp
#
# examples/whisper/whisper.h -> whisper.h
# examples/whisper/whisper.cpp -> whisper.cpp
# examples/whisper/main.cpp -> examples/main/main.cpp
# examples/whisper/quantize.cpp -> examples/quantize/quantize.cpp

cat ggml-src.patch | sed \
-e 's/src\/ggml\.c/ggml.c/g' \
-e 's/src\/ggml-alloc\.c/ggml-alloc.c/g' \
-e 's/src\/ggml-backend-impl\.h/ggml-backend-impl.h/g' \
-e 's/src\/ggml-backend\.c/ggml-backend.c/g' \
-e 's/src\/ggml-cuda\.cu/ggml-cuda.cu/g' \
-e 's/src\/ggml-cuda\.h/ggml-cuda.h/g' \
-e 's/src\/ggml-impl\.h/ggml-impl.h/g' \
-e 's/src\/ggml-metal\.h/ggml-metal.h/g' \
-e 's/src\/ggml-metal\.m/ggml-metal.m/g' \
-e 's/src\/ggml-metal\.metal/ggml-metal.metal/g' \
-e 's/src\/ggml-mpi\.h/ggml-mpi.h/g' \
-e 's/src\/ggml-mpi\.c/ggml-mpi.c/g' \
-e 's/src\/ggml-opencl\.cpp/ggml-opencl.cpp/g' \
-e 's/src\/ggml-opencl\.h/ggml-opencl.h/g' \
-e 's/src\/ggml-quants\.c/ggml-quants.c/g' \
-e 's/src\/ggml-quants\.h/ggml-quants.h/g' \
-e 's/include\/ggml\/ggml\.h/ggml.h/g' \
-e 's/include\/ggml\/ggml-alloc\.h/ggml-alloc.h/g' \
-e 's/include\/ggml\/ggml-backend\.h/ggml-backend.h/g' \
-e 's/examples\/common\.h/examples\/common.h/g' \
-e 's/examples\/common\.cpp/examples\/common.cpp/g' \
-e 's/examples\/common-ggml\.h/examples\/common-ggml.h/g' \
-e 's/examples\/common-ggml\.cpp/examples\/common-ggml.cpp/g' \
-e 's/examples\/whisper\/whisper\.h/whisper.h/g' \
-e 's/examples\/whisper\/whisper\.cpp/whisper.cpp/g' \
-e 's/examples\/whisper\/main\.cpp/examples\/main\/main.cpp/g' \
-e 's/examples\/whisper\/quantize\.cpp/examples\/quantize\/quantize.cpp/g' \
> ggml-src.patch.tmp
mv ggml-src.patch.tmp ggml-src.patch

git am ggml-src.patch

rm -v $SRC_WHISPER/ggml-src.patch
fi

# update last commit
cd $SRC_GGML
git log -1 --format=%H > $SRC_WHISPER/extra/sync-ggml.last

echo "Done"

exit 0
1 change: 1 addition & 0 deletions extra/sync-ggml.last
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1467a4eb71bdb5ac316d248a7f3f26cdadc56b68
24 changes: 12 additions & 12 deletions ggml-backend.c
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ static void ggml_backend_registry_init(void) {
void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data) {
GGML_ASSERT(ggml_backend_registry_count < GGML_MAX_BACKENDS_REG);

int id = ggml_backend_registry_count;
size_t id = ggml_backend_registry_count;

ggml_backend_registry[id] = (struct ggml_backend_reg) {
/* .name = */ {0},
Expand Down Expand Up @@ -330,6 +330,8 @@ size_t ggml_backend_reg_find_by_name(const char * name) {
return i;
}
}

// not found
return SIZE_MAX;
}

Expand All @@ -340,15 +342,15 @@ ggml_backend_t ggml_backend_reg_init_backend_from_str(const char * backend_str)
const char * params = strchr(backend_str, ':');
char backend_name[128];
if (params == NULL) {
strcpy(backend_name, backend_str);
snprintf(backend_name, sizeof(backend_name), "%s", backend_str);
params = "";
} else {
strncpy(backend_name, backend_str, params - backend_str);
backend_name[params - backend_str] = '\0';
snprintf(backend_name, sizeof(backend_name), "%.*s", (int)(params - backend_str), backend_str);
params++;
}

size_t backend_i = ggml_backend_reg_find_by_name(backend_name);

if (backend_i == SIZE_MAX) {
fprintf(stderr, "%s: backend %s not found\n", __func__, backend_name);
return NULL;
Expand Down Expand Up @@ -396,18 +398,12 @@ static void ggml_backend_cpu_buffer_free_buffer(ggml_backend_buffer_t buffer) {
}

static void ggml_backend_cpu_buffer_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor write out of bounds");
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");

memcpy((char *)tensor->data + offset, data, size);

GGML_UNUSED(buffer);
}

static void ggml_backend_cpu_buffer_get_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor read out of bounds");
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");

memcpy(data, (const char *)tensor->data + offset, size);

GGML_UNUSED(buffer);
Expand Down Expand Up @@ -618,10 +614,14 @@ static void ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_c
}

static bool ggml_backend_cpu_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
return true;
switch (op->op) {
case GGML_OP_MUL_MAT:
return op->src[1]->type == GGML_TYPE_F32 || op->src[1]->type == ggml_internal_get_type_traits(op->src[0]->type).vec_dot_type;
default:
return true;
}

GGML_UNUSED(backend);
GGML_UNUSED(op);
}

static struct ggml_backend_i cpu_backend_i = {
Expand Down
Loading

0 comments on commit e77b27c

Please sign in to comment.