-
Notifications
You must be signed in to change notification settings - Fork 10.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 26c0846
Showing
9 changed files
with
13,094 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
*.o | ||
*.a | ||
.cache/ | ||
.vs/ | ||
.vscode/ | ||
.DS_Store | ||
|
||
build/ | ||
build-em/ | ||
build-debug/ | ||
build-release/ | ||
build-static/ | ||
build-no-accel/ | ||
build-sanitize-addr/ | ||
build-sanitize-thread/ | ||
|
||
/main | ||
/quantize | ||
|
||
arm_neon.h | ||
compile_commands.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,203 @@ | ||
ifndef UNAME_S | ||
UNAME_S := $(shell uname -s) | ||
endif | ||
|
||
ifndef UNAME_P | ||
UNAME_P := $(shell uname -p) | ||
endif | ||
|
||
ifndef UNAME_M | ||
UNAME_M := $(shell uname -m) | ||
endif | ||
|
||
CCV := $(shell $(CC) --version | head -n 1) | ||
CXXV := $(shell $(CXX) --version | head -n 1) | ||
|
||
# Mac OS + Arm can report x86_64 | ||
# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789 | ||
ifeq ($(UNAME_S),Darwin) | ||
ifneq ($(UNAME_P),arm) | ||
SYSCTL_M := $(shell sysctl -n hw.optional.arm64) | ||
ifeq ($(SYSCTL_M),1) | ||
# UNAME_P := arm | ||
# UNAME_M := arm64 | ||
warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789) | ||
endif | ||
endif | ||
endif | ||
|
||
# | ||
# Compile flags | ||
# | ||
|
||
CFLAGS = -I. -O3 -DNDEBUG -std=c11 -fPIC | ||
CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC | ||
LDFLAGS = | ||
|
||
# OS specific | ||
# TODO: support Windows | ||
ifeq ($(UNAME_S),Linux) | ||
CFLAGS += -pthread | ||
CXXFLAGS += -pthread | ||
endif | ||
ifeq ($(UNAME_S),Darwin) | ||
CFLAGS += -pthread | ||
CXXFLAGS += -pthread | ||
endif | ||
ifeq ($(UNAME_S),FreeBSD) | ||
CFLAGS += -pthread | ||
CXXFLAGS += -pthread | ||
endif | ||
ifeq ($(UNAME_S),Haiku) | ||
CFLAGS += -pthread | ||
CXXFLAGS += -pthread | ||
endif | ||
|
||
# Architecture specific | ||
# TODO: probably these flags need to be tweaked on some architectures | ||
# feel free to update the Makefile for your architecture and send a pull request or issue | ||
ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686)) | ||
ifeq ($(UNAME_S),Darwin) | ||
CFLAGS += -mf16c | ||
AVX1_M := $(shell sysctl machdep.cpu.features) | ||
ifneq (,$(findstring FMA,$(AVX1_M))) | ||
CFLAGS += -mfma | ||
endif | ||
ifneq (,$(findstring AVX1.0,$(AVX1_M))) | ||
CFLAGS += -mavx | ||
endif | ||
AVX2_M := $(shell sysctl machdep.cpu.leaf7_features) | ||
ifneq (,$(findstring AVX2,$(AVX2_M))) | ||
CFLAGS += -mavx2 | ||
endif | ||
else ifeq ($(UNAME_S),Linux) | ||
AVX1_M := $(shell grep "avx " /proc/cpuinfo) | ||
ifneq (,$(findstring avx,$(AVX1_M))) | ||
CFLAGS += -mavx | ||
endif | ||
AVX2_M := $(shell grep "avx2 " /proc/cpuinfo) | ||
ifneq (,$(findstring avx2,$(AVX2_M))) | ||
CFLAGS += -mavx2 | ||
endif | ||
FMA_M := $(shell grep "fma " /proc/cpuinfo) | ||
ifneq (,$(findstring fma,$(FMA_M))) | ||
CFLAGS += -mfma | ||
endif | ||
F16C_M := $(shell grep "f16c " /proc/cpuinfo) | ||
ifneq (,$(findstring f16c,$(F16C_M))) | ||
CFLAGS += -mf16c | ||
endif | ||
SSE3_M := $(shell grep "sse3 " /proc/cpuinfo) | ||
ifneq (,$(findstring sse3,$(SSE3_M))) | ||
CFLAGS += -msse3 | ||
endif | ||
else ifeq ($(UNAME_S),Haiku) | ||
AVX1_M := $(shell sysinfo -cpu | grep "AVX ") | ||
ifneq (,$(findstring avx,$(AVX1_M))) | ||
CFLAGS += -mavx | ||
endif | ||
AVX2_M := $(shell sysinfo -cpu | grep "AVX2 ") | ||
ifneq (,$(findstring avx2,$(AVX2_M))) | ||
CFLAGS += -mavx2 | ||
endif | ||
FMA_M := $(shell sysinfo -cpu | grep "FMA ") | ||
ifneq (,$(findstring fma,$(FMA_M))) | ||
CFLAGS += -mfma | ||
endif | ||
F16C_M := $(shell sysinfo -cpu | grep "F16C ") | ||
ifneq (,$(findstring f16c,$(F16C_M))) | ||
CFLAGS += -mf16c | ||
endif | ||
else | ||
CFLAGS += -mfma -mf16c -mavx -mavx2 | ||
endif | ||
endif | ||
ifeq ($(UNAME_M),amd64) | ||
CFLAGS += -mavx -mavx2 -mfma -mf16c | ||
endif | ||
ifneq ($(filter ppc64%,$(UNAME_M)),) | ||
POWER9_M := $(shell grep "POWER9" /proc/cpuinfo) | ||
ifneq (,$(findstring POWER9,$(POWER9_M))) | ||
CFLAGS += -mpower9-vector | ||
endif | ||
# Require c++23's std::byteswap for big-endian support. | ||
ifeq ($(UNAME_M),ppc64) | ||
CXXFLAGS += -std=c++23 -DGGML_BIG_ENDIAN | ||
endif | ||
endif | ||
ifndef WHISPER_NO_ACCELERATE | ||
# Mac M1 - include Accelerate framework | ||
ifeq ($(UNAME_S),Darwin) | ||
CFLAGS += -DGGML_USE_ACCELERATE | ||
LDFLAGS += -framework Accelerate | ||
endif | ||
endif | ||
ifdef WHISPER_OPENBLAS | ||
CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas | ||
LDFLAGS += -lopenblas | ||
endif | ||
ifdef WHISPER_GPROF | ||
CFLAGS += -pg | ||
CXXFLAGS += -pg | ||
endif | ||
ifneq ($(filter aarch64%,$(UNAME_M)),) | ||
CFLAGS += -mcpu=native | ||
CXXFLAGS += -mcpu=native | ||
endif | ||
ifneq ($(filter armv6%,$(UNAME_M)),) | ||
# Raspberry Pi 1, 2, 3 | ||
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access | ||
endif | ||
ifneq ($(filter armv7%,$(UNAME_M)),) | ||
# Raspberry Pi 4 | ||
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations | ||
endif | ||
ifneq ($(filter armv8%,$(UNAME_M)),) | ||
# Raspberry Pi 4 | ||
CFLAGS += -mfp16-format=ieee -mno-unaligned-access | ||
endif | ||
|
||
# | ||
# Print build information | ||
# | ||
|
||
$(info I llama.cpp build info: ) | ||
$(info I UNAME_S: $(UNAME_S)) | ||
$(info I UNAME_P: $(UNAME_P)) | ||
$(info I UNAME_M: $(UNAME_M)) | ||
$(info I CFLAGS: $(CFLAGS)) | ||
$(info I CXXFLAGS: $(CXXFLAGS)) | ||
$(info I LDFLAGS: $(LDFLAGS)) | ||
$(info I CC: $(CCV)) | ||
$(info I CXX: $(CXXV)) | ||
$(info ) | ||
|
||
default: main quantize | ||
|
||
# | ||
# Build library | ||
# | ||
|
||
ggml.o: ggml.c ggml.h | ||
$(CC) $(CFLAGS) -c ggml.c -o ggml.o | ||
|
||
utils.o: utils.cpp utils.h | ||
$(CXX) $(CXXFLAGS) -c utils.cpp -o utils.o | ||
|
||
clean: | ||
rm -f *.o main quantize | ||
|
||
main: main.cpp ggml.o utils.o | ||
$(CXX) $(CXXFLAGS) main.cpp ggml.o utils.o -o main $(LDFLAGS) | ||
./main -h | ||
|
||
quantize: quantize.cpp ggml.o utils.o | ||
$(CXX) $(CXXFLAGS) quantize.cpp ggml.o utils.o -o quantize $(LDFLAGS) | ||
|
||
# | ||
# Tests | ||
# | ||
|
||
.PHONY: tests | ||
tests: | ||
bash ./tests/run-tests.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
# Convert a LLaMA model checkpoint to a ggml compatible file | ||
# | ||
# Load the model using Torch | ||
# Iterate over all variables and write them to a binary file. | ||
# | ||
# For each variable, write the following: | ||
# - Number of dimensions (int) | ||
# - Name length (int) | ||
# - Dimensions (int[n_dims]) | ||
# - Name (char[name_length]) | ||
# - Data (float[n_dims]) | ||
# | ||
# By default, the bigger matrices are converted to 16-bit floats. | ||
# This can be disabled by adding the "use-f32" CLI argument. | ||
# | ||
# At the start of the ggml file we write the model parameters | ||
# and vocabulary. | ||
# | ||
|
||
import sys | ||
import json | ||
import struct | ||
import numpy as np | ||
import torch | ||
|
||
from sentencepiece import SentencePieceProcessor | ||
|
||
if len(sys.argv) < 3: | ||
print("Usage: convert-ckpt-to-ggml.py dir-model ftype\n") | ||
print(" ftype == 0 -> float32") | ||
print(" ftype == 1 -> float16") | ||
sys.exit(1) | ||
|
||
# output in the same directory as the model | ||
dir_model = sys.argv[1] | ||
fname_out = sys.argv[1] + "/ggml-model.bin" | ||
|
||
fname_hparams = sys.argv[1] + "/params.json" | ||
fname_model = sys.argv[1] + "/consolidated.00.pth" | ||
fname_tokenizer = sys.argv[1] + "/../tokenizer.model" | ||
|
||
# possible data types | ||
# ftype == 0 -> float32 | ||
# ftype == 1 -> float16 | ||
# | ||
# map from ftype to string | ||
ftype_str = ["f32", "f16"] | ||
|
||
ftype = 1 | ||
if len(sys.argv) > 2: | ||
ftype = int(sys.argv[2]) | ||
if ftype < 0 or ftype > 1: | ||
print("Invalid ftype: " + str(ftype)) | ||
sys.exit(1) | ||
fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".bin" | ||
|
||
with open(fname_hparams, "r") as f: | ||
hparams = json.load(f) | ||
|
||
tokenizer = SentencePieceProcessor(fname_tokenizer) | ||
|
||
hparams.update({"vocab_size": tokenizer.vocab_size()}) | ||
|
||
print(hparams) | ||
|
||
model = torch.load(fname_model, map_location="cpu") | ||
|
||
fout = open(fname_out, "wb") | ||
|
||
fout.write(struct.pack("i", 0x67676d6c)) # magic: ggml in hex | ||
fout.write(struct.pack("i", hparams["vocab_size"])) | ||
fout.write(struct.pack("i", hparams["dim"])) | ||
fout.write(struct.pack("i", hparams["multiple_of"])) | ||
fout.write(struct.pack("i", hparams["n_heads"])) | ||
fout.write(struct.pack("i", hparams["n_layers"])) | ||
fout.write(struct.pack("i", 64)) # rot | ||
fout.write(struct.pack("i", ftype)) | ||
|
||
# Is this correct?? | ||
for i in range(32000): | ||
# TODO: this is probably wrong - not sure how this tokenizer works | ||
text = tokenizer.decode([29889, i]).encode('utf-8') | ||
# remove the first byte (it's always '.') | ||
text = text[1:] | ||
fout.write(struct.pack("i", len(text))) | ||
fout.write(text) | ||
|
||
for k, v in model.items(): | ||
name = k | ||
shape = v.shape | ||
|
||
# skip layers.X.attention.inner_attention.rope.freqs | ||
if name[-5:] == "freqs": | ||
continue | ||
|
||
print("Processing variable: " + name + " with shape: ", shape, " and type: ", v.dtype) | ||
|
||
#data = tf.train.load_variable(dir_model, name).squeeze() | ||
data = v.numpy().squeeze() | ||
n_dims = len(data.shape); | ||
|
||
# for efficiency - transpose some matrices | ||
# "model/h.*/attn/c_attn/w" | ||
# "model/h.*/attn/c_proj/w" | ||
# "model/h.*/mlp/c_fc/w" | ||
# "model/h.*/mlp/c_proj/w" | ||
#if name[-14:] == "/attn/c_attn/w" or \ | ||
# name[-14:] == "/attn/c_proj/w" or \ | ||
# name[-11:] == "/mlp/c_fc/w" or \ | ||
# name[-13:] == "/mlp/c_proj/w": | ||
# print(" Transposing") | ||
# data = data.transpose() | ||
|
||
dshape = data.shape | ||
|
||
# default type is fp16 | ||
ftype_cur = 1 | ||
if ftype == 0 or n_dims == 1: | ||
print(" Converting to float32") | ||
data = data.astype(np.float32) | ||
ftype_cur = 0 | ||
|
||
# header | ||
str = name.encode('utf-8') | ||
fout.write(struct.pack("iii", n_dims, len(str), ftype_cur)) | ||
for i in range(n_dims): | ||
fout.write(struct.pack("i", dshape[n_dims - 1 - i])) | ||
fout.write(str); | ||
|
||
# data | ||
data.tofile(fout) | ||
|
||
fout.close() | ||
|
||
print("Done. Output file: " + fname_out) | ||
print("") |
Oops, something went wrong.