Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

mmap() - backed istream implementation #150

Closed
wants to merge 3 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 125 additions & 3 deletions main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
#include <signal.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
#endif

#define ANSI_COLOR_RED "\x1b[31m"
Expand Down Expand Up @@ -83,14 +86,131 @@ struct llama_model {
std::map<std::string, struct ggml_tensor *> tensors;
};

#ifndef USE_MMAP
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
#define USE_MMAP 1
#else
#define USE_MMAP 0
#endif
#endif

#if USE_MMAP
// since std::istrstream is deprecated, reimplement it.
struct membuf : std::streambuf {
membuf(char const* base, size_t size) {
char* gptr(const_cast<char*>(base));
this->setg(gptr, gptr, gptr + size);
}
};
struct llama_istream: virtual membuf, std::istream {
size_t mapped_size;

llama_istream(const std::string & fname, std::ios::openmode mode = std::ios::binary) :
llama_istream(mmap_file(fname)) {}

llama_istream(std::tuple<char const*, size_t, size_t> t) :
llama_istream(std::get<0>(t), std::get<1>(t), std::get<2>(t)) {}

llama_istream(char const* base, size_t size, size_t mapped_size) :
membuf(base, size),
std::istream(static_cast<std::streambuf*>(this)),
mapped_size(mapped_size) {
if (base == errcontent)
setstate(std::ios::failbit);
}

std::char_traits<char>::pos_type seekoff(
std::char_traits<char>::off_type off,
std::ios_base::seekdir dir,
std::ios_base::openmode which = std::ios_base::in) override {
if (dir == std::ios_base::cur)
gbump(off);
else if (dir == std::ios_base::end)
setg(eback(), egptr() + off, egptr());
else if (dir == std::ios_base::beg)
setg(eback(), eback() + off, egptr());
return gptr() - eback();
}

std::char_traits<char>::pos_type seekpos(
std::char_traits<char>::pos_type sp,
std::ios_base::openmode which = std::ios_base::binary) override {
return seekoff(sp - std::char_traits<char>::pos_type(std::char_traits<char>::off_type(0)), std::ios_base::beg, which);
}

void close() {
char* gptr = const_cast<char*>(this->gptr());
if (gptr == errcontent) {
fprintf(stderr, "Closing an invalid llama_istream.\n");
return;
}
munmap(gptr, mapped_size);
}
private:
constexpr static char const* errcontent = "";

static std::tuple<char const*, size_t, size_t> mmap_file(const std::string & fname) {
static long pagesize;
if (!pagesize)
pagesize = sysconf(_SC_PAGESIZE);
if (pagesize == -1 || pagesize == 0) {
fprintf(stderr, "%s: could not get the OS page size.\n", __func__);
return {errcontent, 1, 0};
}

int fd = open(fname.c_str(), O_RDONLY);
if (fd == -1) {
fprintf(stderr, "%s: failed to open() '%s'\n", __func__, fname.c_str());
return {errcontent, 1, 0};
}

struct stat st;
if (fstat(fd, &st) == -1) {
fprintf(stderr, "%s: failed to stat '%s'\n", __func__, fname.c_str());
return {errcontent, 1, 0};
}

size_t file_size = st.st_size;
size_t map_size = (file_size + pagesize - 1) & -pagesize;
int prot = PROT_READ;
int map = MAP_SHARED;
char* file_contents = (char*)mmap(NULL, map_size, prot, map, fd, 0);
if (!file_contents || file_contents == MAP_FAILED) {
fprintf(stderr, "%s: failed to mmap '%s'\n", __func__, fname.c_str());
return {errcontent, 1, 0};
}

#if 1
int advice = MADV_SEQUENTIAL | MADV_WILLNEED;
#if defined(MADV_HUGEPAGE)
advice |= MADV_HUGEPAGE;
#endif
if (madvise(file_contents, map_size, advice) == -1) {
fprintf(stderr, "%s: failed to madvise '%s'\n", __func__, fname.c_str());
return {errcontent, 1, 0};
}
#endif

::close(fd);

return std::make_tuple(file_contents, file_size, map_size);
}

};
#else
using llama_istream = std::ifstream;
#endif


// load the model's weights from a file
bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab & vocab, int n_ctx) {
fprintf(stderr, "%s: loading model from '%s' - please wait ...\n", __func__, fname.c_str());

llama_istream fin{fname, std::ios::binary};
#if !USE_MMAP
std::vector<char> f_buf(1024*1024);

auto fin = std::ifstream(fname, std::ios::binary);
fin.rdbuf()->pubsetbuf(f_buf.data(), f_buf.size());
#endif
if (!fin) {
fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str());
return false;
Expand Down Expand Up @@ -327,8 +447,10 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab

fprintf(stderr, "%s: loading model part %d/%d from '%s'\n", __func__, i+1, n_parts, fname_part.c_str());

fin = std::ifstream(fname_part, std::ios::binary);
llama_istream fin{fname_part, std::ios::binary};
#if !USE_MMAP
fin.rdbuf()->pubsetbuf(f_buf.data(), f_buf.size());
#endif
fin.seekg(file_offset);

// load weights
Expand Down