Skip to content
This repository has been archived by the owner on Feb 26, 2020. It is now read-only.

Commit

Permalink
Refactor generic memory allocation interfaces
Browse files Browse the repository at this point in the history
This patch achieves the following goals:

1. It replaces the preprocessor kmem flag to gfp flag mapping with
   proper translation logic. This eliminates the potential for
   surprises that were previously possible where kmem flags were
   mapped to gfp flags.

2. It maps vmem_alloc() allocations to kmem_alloc() for allocations
   sized less than or equal to spl_kmem_alloc_max.  This ensures that
   small allocations will not contend on a single global lock, large
   allocations can still be handled, and potentially limited virtual
   address space will not be squandered.  This behavior is entirely
   different than under Illumos due to different memory management
   stratagies employed by the respective kernels.  However, this
   functionally provides the sematics we require.

3. The --disable-debug-kmem, --enable-debug-kmem (default), and
   --enable-debug-kmem-tracking allocators have been unified in to
   a single spl_kmem_alloc_impl() allocation function.  This was
   done to simplify the code and make it more maintainable.

4. Improve portability by exposing an implementation of the memory
   allocations functions that can be safely used in the same way
   they are used on Illumos.   Specifically, callers may safely
   using KM_SLEEP in contexts which perform filesystem IO.  This
   allows us to eliminate an entire class of Linux specific changes
   which were previously required to avoid deadlocking the system.

This change will be largely transparent to existing callers by there
are a few caveats:

1. Because the headers were refactored it extraneous includes removed
   callers may find they need to explicitly add additional #includes.
   In particular, kmem_cache.h must now be explicitly includes to
   access the SPL's kmem cache implementation.  This behavior is
   different from Illumos but it was done to avoid always masking
   the Linux slab functions when kmem.h is included.

2. Callers, like Lustre, which made assumptions about the definitions
   of KM_SLEEP, KM_NOSLEEP, and KM_PUSHPAGE will need to be updated.
   Other callers such as ZFS which did not will not require changes.

3. KM_PUSHPAGE is no longer overloaded to imply GFP_NOIO.  It retains
   its original meaning of allowing allocations to access reserved
   memory.  KM_PUSHPAGE callers can be converted back to KM_SLEEP.

4. The KM_NODEBUG flags has been retired and the default warning
   threshold increased to 32k.

5. The kmem_virt() functions has been removed.  For callers which
   need to distinguish between a physical and virtual address use
   is_vmalloc_addr().
  • Loading branch information
behlendorf committed Dec 17, 2014
1 parent 75356ba commit b3db9cb
Show file tree
Hide file tree
Showing 10 changed files with 463 additions and 775 deletions.
159 changes: 46 additions & 113 deletions include/sys/kmem.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#define _SPL_KMEM_H

#include <linux/slab.h>
#include <linux/sched.h>

extern int kmem_debugging(void);
extern char *kmem_vasprintf(const char *fmt, va_list ap);
Expand All @@ -36,68 +37,41 @@ extern void strfree(char *str);
/*
* Memory allocation interfaces
*/
#define KM_SLEEP GFP_KERNEL /* Can sleep, never fails */
#define KM_NOSLEEP GFP_ATOMIC /* Can not sleep, may fail */
#define KM_PUSHPAGE (GFP_NOIO | __GFP_HIGH) /* Use reserved memory */
#define KM_NODEBUG __GFP_NOWARN /* Suppress warnings */
#define KM_FLAGS __GFP_BITS_MASK
#define KM_VMFLAGS GFP_LEVEL_MASK
#define KM_SLEEP 0x0000 /* can block for memory; success guaranteed */
#define KM_NOSLEEP 0x0001 /* cannot block for memory; may fail */
#define KM_PUSHPAGE 0x0004 /* can block for memory; may use reserve */
#define KM_ZERO 0x1000 /* zero the allocation */
#define KM_VMEM 0x2000 /* caller is vmem_* wrapper */

/*
* Used internally, the kernel does not need to support this flag
*/
#ifndef __GFP_ZERO
#define __GFP_ZERO 0x8000
#endif
#define KM_PUBLIC_MASK (KM_SLEEP | KM_NOSLEEP | KM_PUSHPAGE)

/*
* __GFP_NOFAIL looks like it will be removed from the kernel perhaps as
* early as 2.6.32. To avoid this issue when it occurs in upstream kernels
* we retry the allocation here as long as it is not __GFP_WAIT (GFP_ATOMIC).
* I would prefer the caller handle the failure case cleanly but we are
* trying to emulate Solaris and those are not the Solaris semantics.
* Convert a KM_* flags mask to its Linux GFP_* counterpart. The conversion
* function is context aware which means that KM_SLEEP allocations can be
* safely used in syncing contexts which have set PF_FSTRANS.
*/
static inline void *
kmalloc_nofail(size_t size, gfp_t flags)
{
void *ptr;

do {
ptr = kmalloc(size, flags);
} while (ptr == NULL && (flags & __GFP_WAIT));

return (ptr);
}

static inline void *
kzalloc_nofail(size_t size, gfp_t flags)
static inline gfp_t
kmem_flags_convert(int flags)
{
void *ptr;

do {
ptr = kzalloc(size, flags);
} while (ptr == NULL && (flags & __GFP_WAIT));
gfp_t lflags = __GFP_NOWARN | __GFP_COMP;

return (ptr);
}
if (flags & KM_NOSLEEP) {
lflags |= GFP_ATOMIC | __GFP_NORETRY;
} else {
lflags |= GFP_KERNEL;
if ((current->flags & PF_FSTRANS))
lflags &= ~(__GFP_IO|__GFP_FS);
}

static inline void *
kmalloc_node_nofail(size_t size, gfp_t flags, int node)
{
void *ptr;
if (flags & KM_PUSHPAGE)
lflags |= __GFP_HIGH;

do {
ptr = kmalloc_node(size, flags, node);
} while (ptr == NULL && (flags & __GFP_WAIT));
if (flags & KM_ZERO)
lflags |= __GFP_ZERO;

return (ptr);
return (lflags);
}

#ifdef DEBUG_KMEM

/*
* Memory accounting functions to be used only when DEBUG_KMEM is set.
*/
#ifdef HAVE_ATOMIC64_T
#define kmem_alloc_used_add(size) atomic64_add(size, &kmem_alloc_used)
#define kmem_alloc_used_sub(size) atomic64_sub(size, &kmem_alloc_used)
Expand All @@ -114,70 +88,29 @@ extern atomic_t kmem_alloc_used;
extern unsigned long long kmem_alloc_max;
#endif /* HAVE_ATOMIC64_T */

#ifdef DEBUG_KMEM_TRACKING
/*
* DEBUG_KMEM && DEBUG_KMEM_TRACKING
*
* The maximum level of memory debugging. All memory will be accounted
* for and each allocation will be explicitly tracked. Any allocation
* which is leaked will be reported on module unload and the exact location
* where that memory was allocation will be reported. This level of memory
* tracking will have a significant impact on performance and should only
* be enabled for debugging. This feature may be enabled by passing
* --enable-debug-kmem-tracking to configure.
*/
#define kmem_alloc(sz, fl) kmem_alloc_track((sz), (fl), \
__FUNCTION__, __LINE__, 0, 0)
#define kmem_zalloc(sz, fl) kmem_alloc_track((sz), (fl)|__GFP_ZERO,\
__FUNCTION__, __LINE__, 0, 0)
#define kmem_alloc_node(sz, fl, nd) kmem_alloc_track((sz), (fl), \
__FUNCTION__, __LINE__, 1, nd)
#define kmem_free(ptr, sz) kmem_free_track((ptr), (sz))

extern void *kmem_alloc_track(size_t, int, const char *, int, int, int);
extern void kmem_free_track(const void *, size_t);

#else /* DEBUG_KMEM_TRACKING */
/*
* DEBUG_KMEM && !DEBUG_KMEM_TRACKING
*
* The default build will set DEBUG_KEM. This provides basic memory
* accounting with little to no impact on performance. When the module
* is unloaded in any memory was leaked the total number of leaked bytes
* will be reported on the console. To disable this basic accounting
* pass the --disable-debug-kmem option to configure.
*/
#define kmem_alloc(sz, fl) kmem_alloc_debug((sz), (fl), \
__FUNCTION__, __LINE__, 0, 0)
#define kmem_zalloc(sz, fl) kmem_alloc_debug((sz), (fl)|__GFP_ZERO,\
__FUNCTION__, __LINE__, 0, 0)
#define kmem_alloc_node(sz, fl, nd) kmem_alloc_debug((sz), (fl), \
__FUNCTION__, __LINE__, 1, nd)
#define kmem_free(ptr, sz) kmem_free_debug((ptr), (sz))

extern void *kmem_alloc_debug(size_t, int, const char *, int, int, int);
extern void kmem_free_debug(const void *, size_t);

#endif /* DEBUG_KMEM_TRACKING */
#else /* DEBUG_KMEM */
/*
* !DEBUG_KMEM && !DEBUG_KMEM_TRACKING
*
* All debugging is disabled. There will be no overhead even for
* minimal memory accounting. To enable basic accounting pass the
* --enable-debug-kmem option to configure.
*/
#define kmem_alloc(sz, fl) kmalloc_nofail((sz), (fl))
#define kmem_zalloc(sz, fl) kzalloc_nofail((sz), (fl))
#define kmem_alloc_node(sz, fl, nd) kmalloc_node_nofail((sz), (fl), (nd))
#define kmem_free(ptr, sz) ((void)(sz), kfree(ptr))
extern unsigned int spl_kmem_alloc_warn;
extern unsigned int spl_kmem_alloc_max;

#endif /* DEBUG_KMEM */
#define kmem_alloc(sz, fl) spl_kmem_alloc((sz), (fl), __func__, __LINE__)
#define kmem_zalloc(sz, fl) spl_kmem_zalloc((sz), (fl), __func__, __LINE__)
#define kmem_free(ptr, sz) spl_kmem_free((ptr), (sz))

int spl_kmem_init(void);
void spl_kmem_fini(void);
extern void *spl_kmem_alloc(size_t sz, int fl, const char *func, int line);
extern void *spl_kmem_zalloc(size_t sz, int fl, const char *func, int line);
extern void spl_kmem_free(const void *ptr, size_t sz);

#define kmem_virt(ptr) (((ptr) >= (void *)VMALLOC_START) && \
((ptr) < (void *)VMALLOC_END))
/*
* The following functions are only available for internal use.
*/
extern void *spl_kmem_alloc_impl(size_t size, int flags, int node);
extern void *spl_kmem_alloc_debug(size_t size, int flags, int node);
extern void *spl_kmem_alloc_track(size_t size, int flags,
const char *func, int line, int node);
extern void spl_kmem_free_impl(const void *buf, size_t size);
extern void spl_kmem_free_debug(const void *buf, size_t size);
extern void spl_kmem_free_track(const void *buf, size_t size);

extern int spl_kmem_init(void);
extern void spl_kmem_fini(void);

#endif /* _SPL_KMEM_H */
24 changes: 1 addition & 23 deletions include/sys/kmem_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ extern void spl_kmem_cache_set_move(spl_kmem_cache_t *,
extern void spl_kmem_cache_destroy(spl_kmem_cache_t *skc);
extern void *spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags);
extern void spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj);
extern void spl_kmem_cache_set_allocflags(spl_kmem_cache_t *skc, gfp_t flags);
extern void spl_kmem_cache_reap_now(spl_kmem_cache_t *skc, int count);
extern void spl_kmem_reap(void);

Expand All @@ -214,29 +215,6 @@ extern void spl_kmem_reap(void);
#define kmem_cache_reap_now(skc) \
spl_kmem_cache_reap_now(skc, skc->skc_reap)
#define kmem_reap() spl_kmem_reap()
#define kmem_virt(ptr) \
(((ptr) >= (void *)VMALLOC_START) && \
((ptr) < (void *)VMALLOC_END))

/*
* Allow custom slab allocation flags to be set for KMC_SLAB based caches.
* One use for this function is to ensure the __GFP_COMP flag is part of
* the default allocation mask which ensures higher order allocations are
* properly refcounted. This flag was added to the default ->allocflags
* as of Linux 3.11.
*/
static inline void
kmem_cache_set_allocflags(spl_kmem_cache_t *skc, gfp_t flags)
{
if (skc->skc_linux_cache == NULL)
return;

#if defined(HAVE_KMEM_CACHE_ALLOCFLAGS)
skc->skc_linux_cache->allocflags |= flags;
#elif defined(HAVE_KMEM_CACHE_GFPFLAGS)
skc->skc_linux_cache->gfpflags |= flags;
#endif
}

/*
* The following functions are only available for internal use.
Expand Down
Loading

0 comments on commit b3db9cb

Please sign in to comment.