Skip to content

Commit

Permalink
ggml-alloc : use virtual memory for measurement
Browse files Browse the repository at this point in the history
  • Loading branch information
slaren committed Sep 2, 2023
1 parent 3358c38 commit 96f3662
Showing 1 changed file with 83 additions and 23 deletions.
106 changes: 83 additions & 23 deletions ggml-alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,27 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef __has_include
#if __has_include(<unistd.h>)
#include <unistd.h>
#if defined(_POSIX_MAPPED_FILES)
#include <sys/mman.h>
#endif
#if defined(_POSIX_MEMLOCK_RANGE)
#include <sys/resource.h>
#endif
#endif
#endif

#if defined(_WIN32)
#define WIN32_LEAN_AND_MEAN
#ifndef NOMINMAX
#define NOMINMAX
#endif
#include <windows.h>
#include <memoryapi.h>
#endif


#define UNUSED(x) (void)(x)
#define MAX(a, b) ((a) > (b) ? (a) : (b))
Expand Down Expand Up @@ -100,18 +121,24 @@ static void remove_allocated_tensor(struct ggml_allocr * alloc, struct ggml_tens
#endif


static size_t ggml_allocator_get_alloc_size(struct ggml_allocr * alloc, struct ggml_tensor * tensor) {
static size_t ggml_allocr_get_alloc_size(struct ggml_allocr * alloc, struct ggml_tensor * tensor) {
return ggml_nbytes(tensor);

UNUSED(alloc);
}

// check if a tensor is allocated by this buffer
static bool ggml_allocr_is_own(struct ggml_allocr * alloc, struct ggml_tensor * tensor) {
void * ptr = tensor->data;
return ptr >= alloc->data && (char *)ptr < (char *)alloc->data + alloc->max_size;
}

void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor) {
#ifdef GGML_ALLOCATOR_DEBUG
GGML_ASSERT(ggml_is_view(tensor) == false); // views generally get data pointer from one of their sources
GGML_ASSERT(tensor->data == NULL); // avoid allocating tensor which already has memory allocated
#endif
size_t size = ggml_allocator_get_alloc_size(alloc, tensor);
size_t size = ggml_allocr_get_alloc_size(alloc, tensor);
size = aligned_offset(NULL, size, alloc->alignment);

AT_PRINTF("%s: allocating %s (%zu bytes) - ", __func__, tensor->name, size);
Expand Down Expand Up @@ -177,17 +204,17 @@ void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor)
}

// this is a very naive implementation, but for our case the number of free blocks should be very small
static void ggml_allocator_free_tensor(struct ggml_allocr * alloc, struct ggml_tensor * tensor) {
static void ggml_allocr_free_tensor(struct ggml_allocr * alloc, struct ggml_tensor * tensor) {
void * ptr = tensor->data;

if (ptr < alloc->data || (char*)ptr >= (char*)alloc->data + alloc->max_size) {
if (ggml_allocr_is_own(alloc, tensor) == false) {
// the tensor was not allocated in this buffer
// this can happen because the graph allocator will try to free weights and other tensors from different buffers
// the easiest way to deal with this is just to ignore it
return;
}

size_t size = ggml_allocator_get_alloc_size(alloc, tensor);
size_t size = ggml_allocr_get_alloc_size(alloc, tensor);
size = aligned_offset(NULL, size, alloc->alignment);
AT_PRINTF("%s: freeing %s (%zu bytes) - n_free_blocks = %d\n", __func__, tensor->name, size, alloc->n_free_blocks);

Expand Down Expand Up @@ -281,24 +308,55 @@ struct ggml_allocr * ggml_allocr_new(void * data, size_t size, size_t alignment)
return alloc;
}

// address and size of the buffer when measuring
// it needs to be large enough to fit all the tensors, but it cannot overlap with other existing buffers
static void * const MEASURE_BASE_ADDR = (void *) 0x1000;
#if defined(__ARM_NEON) && !defined(__aarch64__)
// 32-bit
// TODO: Use for 32-bit x86 as well
static const size_t MEASURE_MAX_SIZE = (1ULL<<32) - 1; // 4 GB
// os specific functions to allocate and free uncommitted virtual memory
static void * alloc_vmem(size_t size) {
#ifdef _WIN32
return VirtualAlloc(NULL, size, MEM_RESERVE, PAGE_NOACCESS);
#else
return mmap(NULL, size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
#endif
}

static void free_vmem(void * base_addr, size_t size) {
#ifdef _WIN32
VirtualFree(base_addr, 0, MEM_RELEASE);
#else
// 64-bit
static const size_t MEASURE_MAX_SIZE = 1ULL<<40; // 1 TB
munmap(base_addr, size);
#endif
}

// allocate uncommitted virtual memory to measure the size of the graph
static void alloc_measure_vmem(void ** base_addr, size_t * size) {
// 1TB for 64-bit, 1GB for 32-bit
*size = sizeof(void *) == 4 ? 1ULL<<30 : 1ULL<<40;
do {
*base_addr = alloc_vmem(*size);
if (*base_addr != NULL) {
AT_PRINTF("allocated %.2f GB of virtual memory for measure buffer\n", *size / 1024.0 / 1024.0 / 1024.0);
return;
}
// try again with half the size
*size /= 2;
} while (*size > 0);

GGML_ASSERT(!"failed to allocate virtual memory for measure buffer");
}

static void free_measure_vmem(void * base_addr, size_t size) {
free_vmem(base_addr, size);
}

struct ggml_allocr * ggml_allocr_new_measure(size_t alignment) {
struct ggml_allocr * alloc = (struct ggml_allocr *)malloc(sizeof(struct ggml_allocr) /* + n_free_blocks * sizeof(struct free_block) */);

void * base_addr;
size_t size;

alloc_measure_vmem(&base_addr, &size);

*alloc = (struct ggml_allocr){
/*.data = */ MEASURE_BASE_ADDR,
/*.size = */ MEASURE_MAX_SIZE,
/*.data = */ base_addr,
/*.size = */ size,
/*.alignment = */ alignment,
/*.n_free_blocks = */ 0,
/*.free_blocks = */ {{0}},
Expand All @@ -318,6 +376,9 @@ struct ggml_allocr * ggml_allocr_new_measure(size_t alignment) {
}

void ggml_allocr_free(struct ggml_allocr * alloc) {
if (alloc->measure) {
free_measure_vmem(alloc->data, alloc->size);
}
free(alloc);
}

Expand Down Expand Up @@ -387,8 +448,7 @@ static void allocate_node(struct ggml_allocr * alloc, struct ggml_tensor * node)
}

// if the node's data is external, then we cannot re-use it
if ((char *) parent->data < (char *) alloc->data ||
(char *) parent->data >= ((char *) alloc->data + alloc->size)) {
if (ggml_allocr_is_own(alloc, parent) == false) {
AT_PRINTF("not reusing parent %s for %s as %p is external\n", parent->name, node->name, parent->data);
continue;
}
Expand Down Expand Up @@ -422,7 +482,7 @@ static void allocate_node(struct ggml_allocr * alloc, struct ggml_tensor * node)
}
}

static size_t ggml_allocator_alloc_graph_tensors_n(
static size_t ggml_allocr_alloc_graph_tensors_n(
struct ggml_allocr * alloc,
struct ggml_cgraph ** graphs, int n_graphs,
struct ggml_tensor *** inputs, struct ggml_tensor *** outputs) {
Expand Down Expand Up @@ -528,12 +588,12 @@ static size_t ggml_allocator_alloc_graph_tensors_n(
view_src_hn->n_views -= 1;
AT_PRINTF("view_src %s: %d children, %d views\n", view_src->name, view_src_hn->n_children, view_src_hn->n_views);
if (view_src_hn->n_views == 0 && view_src_hn->n_children == 0 && view_src->data != node->data) {
ggml_allocator_free_tensor(alloc, view_src);
ggml_allocr_free_tensor(alloc, view_src);
}
}
else {
if (parent->data != node->data) {
ggml_allocator_free_tensor(alloc, parent);
ggml_allocr_free_tensor(alloc, parent);
}
}
}
Expand All @@ -550,7 +610,7 @@ static size_t ggml_allocator_alloc_graph_tensors_n(
for (int i = 0; outputs[g][i] != NULL; i++) {
struct ggml_tensor * output = outputs[g][i];
AT_PRINTF("output: %s\n", output->name);
ggml_allocator_free_tensor(alloc, output);
ggml_allocr_free_tensor(alloc, output);
}
}
}
Expand All @@ -559,5 +619,5 @@ static size_t ggml_allocator_alloc_graph_tensors_n(
}

size_t ggml_allocr_alloc_graph(struct ggml_allocr * alloc, struct ggml_cgraph * graph) {
return ggml_allocator_alloc_graph_tensors_n(alloc, &graph, 1, NULL, NULL);
return ggml_allocr_alloc_graph_tensors_n(alloc, &graph, 1, NULL, NULL);
}

0 comments on commit 96f3662

Please sign in to comment.