Skip to content

Commit

Permalink
Gang ABD Type
Browse files Browse the repository at this point in the history
Adding the gang ABD type, which allows for linear and scatter ABDs to
be chained together into a single ABD.

This can be used to avoid doing memory copies to/from ABDs. An example
of this can be found in vdev_queue.c in the vdev_queue_aggregate()
function.

Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Co-authored-by: Brian <bwa@clemson.edu>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Signed-off-by: Brian Atkinson <batkinson@lanl.gov>
Closes #10069
  • Loading branch information
bwatkinson authored May 21, 2020
1 parent 501a151 commit fb82226
Show file tree
Hide file tree
Showing 7 changed files with 629 additions and 106 deletions.
10 changes: 7 additions & 3 deletions include/sys/abd.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,20 +42,24 @@ typedef int abd_iter_func_t(void *buf, size_t len, void *private);
typedef int abd_iter_func2_t(void *bufa, void *bufb, size_t len, void *private);

extern int zfs_abd_scatter_enabled;
extern abd_t *abd_zero_scatter;

/*
* Allocations and deallocations
*/

abd_t *abd_alloc(size_t, boolean_t);
abd_t *abd_alloc_linear(size_t, boolean_t);
abd_t *abd_alloc_gang_abd(void);
abd_t *abd_alloc_for_io(size_t, boolean_t);
abd_t *abd_alloc_sametype(abd_t *, size_t);
void abd_gang_add(abd_t *, abd_t *, boolean_t);
void abd_free(abd_t *);
void abd_put(abd_t *);
abd_t *abd_get_offset(abd_t *, size_t);
abd_t *abd_get_offset_size(abd_t *, size_t, size_t);
abd_t *abd_get_zeros(size_t);
abd_t *abd_get_from_buf(void *, size_t);
void abd_put(abd_t *);

/*
* Conversion to and from a normal buffer
Expand Down Expand Up @@ -132,6 +136,7 @@ abd_zero(abd_t *abd, size_t size)
* ABD type check functions
*/
boolean_t abd_is_linear(abd_t *);
boolean_t abd_is_gang(abd_t *);
boolean_t abd_is_linear_page(abd_t *);

/*
Expand All @@ -146,8 +151,7 @@ void abd_fini(void);
* Linux ABD bio functions
*/
#if defined(__linux__) && defined(_KERNEL)
unsigned int abd_scatter_bio_map_off(struct bio *, abd_t *, unsigned int,
size_t);
unsigned int abd_bio_map_off(struct bio *, abd_t *, unsigned int, size_t);
unsigned long abd_nr_pages_off(abd_t *, unsigned int, size_t);
#endif

Expand Down
11 changes: 11 additions & 0 deletions include/sys/abd_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ typedef enum abd_flags {
ABD_FLAG_MULTI_ZONE = 1 << 3, /* pages split over memory zones */
ABD_FLAG_MULTI_CHUNK = 1 << 4, /* pages split over multiple chunks */
ABD_FLAG_LINEAR_PAGE = 1 << 5, /* linear but allocd from page */
ABD_FLAG_GANG = 1 << 6, /* mult ABDs chained together */
ABD_FLAG_GANG_FREE = 1 << 7, /* gang ABD is responsible for mem */
ABD_FLAG_ZEROS = 1 << 8, /* ABD for zero-filled buffer */
} abd_flags_t;

typedef enum abd_stats_op {
Expand All @@ -49,8 +52,10 @@ typedef enum abd_stats_op {
struct abd {
abd_flags_t abd_flags;
uint_t abd_size; /* excludes scattered abd_offset */
list_node_t abd_gang_link;
struct abd *abd_parent;
zfs_refcount_t abd_children;
kmutex_t abd_mtx;
union {
struct abd_scatter {
uint_t abd_offset;
Expand All @@ -66,6 +71,9 @@ struct abd {
void *abd_buf;
struct scatterlist *abd_sgl; /* for LINEAR_PAGE */
} abd_linear;
struct abd_gang {
list_t abd_gang_chain;
} abd_gang;
} abd_u;
};

Expand All @@ -84,6 +92,8 @@ struct abd_iter {
struct scatterlist *iter_sg; /* current sg */
};

abd_t *abd_gang_get_offset(abd_t *, size_t *);

/*
* OS specific functions
*/
Expand Down Expand Up @@ -116,6 +126,7 @@ void abd_iter_unmap(struct abd_iter *);

#define ABD_SCATTER(abd) (abd->abd_u.abd_scatter)
#define ABD_LINEAR_BUF(abd) (abd->abd_u.abd_linear.abd_buf)
#define ABD_GANG(abd) (abd->abd_u.abd_gang)

#if defined(_KERNEL)
#if defined(__FreeBSD__)
Expand Down
62 changes: 61 additions & 1 deletion module/os/freebsd/zfs/abd_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,15 @@ SYSCTL_ULONG(_vfs_zfs, OID_AUTO, abd_chunk_size, CTLFLAG_RDTUN,
kmem_cache_t *abd_chunk_cache;
static kstat_t *abd_ksp;

/*
* We use a scattered SPA_MAXBLOCKSIZE sized ABD whose chunks are
* just a single zero'd sized zfs_abd_chunk_size buffer. This
* allows us to conserve memory by only using a single zero buffer
* for the scatter chunks.
*/
abd_t *abd_zero_scatter = NULL;
static char *abd_zero_buf = NULL;

static void
abd_free_chunk(void *c)
{
Expand Down Expand Up @@ -193,6 +202,8 @@ abd_alloc_struct(size_t size)
abd_u.abd_scatter.abd_chunks[chunkcnt]);
abd_t *abd = kmem_alloc(abd_size, KM_PUSHPAGE);
ASSERT3P(abd, !=, NULL);
list_link_init(&abd->abd_gang_link);
mutex_init(&abd->abd_mtx, NULL, MUTEX_DEFAULT, NULL);
ABDSTAT_INCR(abdstat_struct_size, abd_size);

return (abd);
Expand All @@ -203,10 +214,53 @@ abd_free_struct(abd_t *abd)
{
size_t chunkcnt = abd_is_linear(abd) ? 0 : abd_scatter_chunkcnt(abd);
int size = offsetof(abd_t, abd_u.abd_scatter.abd_chunks[chunkcnt]);
mutex_destroy(&abd->abd_mtx);
ASSERT(!list_link_active(&abd->abd_gang_link));
kmem_free(abd, size);
ABDSTAT_INCR(abdstat_struct_size, -size);
}

/*
* Allocate scatter ABD of size SPA_MAXBLOCKSIZE, where
* each chunk in the scatterlist will be set to abd_zero_buf.
*/
static void
abd_alloc_zero_scatter(void)
{
size_t n = abd_chunkcnt_for_bytes(SPA_MAXBLOCKSIZE);
abd_zero_buf = kmem_zalloc(zfs_abd_chunk_size, KM_SLEEP);
abd_zero_scatter = abd_alloc_struct(SPA_MAXBLOCKSIZE);

abd_zero_scatter->abd_flags = ABD_FLAG_OWNER | ABD_FLAG_ZEROS;
abd_zero_scatter->abd_size = SPA_MAXBLOCKSIZE;
abd_zero_scatter->abd_parent = NULL;
zfs_refcount_create(&abd_zero_scatter->abd_children);

ABD_SCATTER(abd_zero_scatter).abd_offset = 0;
ABD_SCATTER(abd_zero_scatter).abd_chunk_size =
zfs_abd_chunk_size;

for (int i = 0; i < n; i++) {
ABD_SCATTER(abd_zero_scatter).abd_chunks[i] =
abd_zero_buf;
}

ABDSTAT_BUMP(abdstat_scatter_cnt);
ABDSTAT_INCR(abdstat_scatter_data_size, zfs_abd_chunk_size);
}

static void
abd_free_zero_scatter(void)
{
zfs_refcount_destroy(&abd_zero_scatter->abd_children);
ABDSTAT_BUMPDOWN(abdstat_scatter_cnt);
ABDSTAT_INCR(abdstat_scatter_data_size, -(int)zfs_abd_chunk_size);

abd_free_struct(abd_zero_scatter);
abd_zero_scatter = NULL;
kmem_free(abd_zero_buf, zfs_abd_chunk_size);
}

void
abd_init(void)
{
Expand All @@ -219,11 +273,15 @@ abd_init(void)
abd_ksp->ks_data = &abd_stats;
kstat_install(abd_ksp);
}

abd_alloc_zero_scatter();
}

void
abd_fini(void)
{
abd_free_zero_scatter();

if (abd_ksp != NULL) {
kstat_delete(abd_ksp);
abd_ksp = NULL;
Expand Down Expand Up @@ -271,12 +329,13 @@ abd_alloc_scatter_offset_chunkcnt(size_t chunkcnt)
abd_u.abd_scatter.abd_chunks[chunkcnt]);
abd_t *abd = kmem_alloc(abd_size, KM_PUSHPAGE);
ASSERT3P(abd, !=, NULL);
list_link_init(&abd->abd_gang_link);
mutex_init(&abd->abd_mtx, NULL, MUTEX_DEFAULT, NULL);
ABDSTAT_INCR(abdstat_struct_size, abd_size);

return (abd);
}


abd_t *
abd_get_offset_scatter(abd_t *sabd, size_t off)
{
Expand Down Expand Up @@ -332,6 +391,7 @@ abd_iter_scatter_chunk_index(struct abd_iter *aiter)
void
abd_iter_init(struct abd_iter *aiter, abd_t *abd)
{
ASSERT(!abd_is_gang(abd));
abd_verify(abd);
aiter->iter_abd = abd;
aiter->iter_pos = 0;
Expand Down
Loading

1 comment on commit fb82226

@dioni21
Copy link
Contributor

@dioni21 dioni21 commented on fb82226 Jun 4, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please take a look at #10401

Please sign in to comment.