Skip to content

Commit

Permalink
Speculative prefetch for reordered requests
Browse files Browse the repository at this point in the history
Before this change speculative prefetcher was able to detect a stream
only if all of its accesses are perfectly sequential.  It was easy to
implement and is perfectly fine for single-threaded applications.
Unfortunately multi-threaded network servers, such as iSCSI, SMB or
NFS usually have plenty of threads and may often reorder requests,
preventing successful speculation and prefetch.

This change allows speculative prefetcher to detect streams even if
requests are reordered by introducing a list of 9 non-contiguous
ranges up to 16MB ahead of current stream position and filling the
gaps as more requests arrive.  It also allows stream to proceed
even with holes up to a certain configurable threshold (25%).

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by:	Alexander Motin <mav@FreeBSD.org>
Sponsored by:	iXsystems, Inc.
Closes openzfs#16022
  • Loading branch information
amotin authored and lundman committed Sep 2, 2024
1 parent fc7b73d commit ebdbf93
Show file tree
Hide file tree
Showing 5 changed files with 272 additions and 63 deletions.
11 changes: 10 additions & 1 deletion cmd/arc_summary
Original file line number Diff line number Diff line change
Expand Up @@ -793,18 +793,27 @@ def section_dmu(kstats_dict):

zfetch_stats = isolate_section('zfetchstats', kstats_dict)

zfetch_access_total = int(zfetch_stats['hits'])+int(zfetch_stats['misses'])
zfetch_access_total = int(zfetch_stats['hits']) +\
int(zfetch_stats['future']) + int(zfetch_stats['stride']) +\
int(zfetch_stats['past']) + int(zfetch_stats['misses'])

prt_1('DMU predictive prefetcher calls:', f_hits(zfetch_access_total))
prt_i2('Stream hits:',
f_perc(zfetch_stats['hits'], zfetch_access_total),
f_hits(zfetch_stats['hits']))
future = int(zfetch_stats['future']) + int(zfetch_stats['stride'])
prt_i2('Hits ahead of stream:', f_perc(future, zfetch_access_total),
f_hits(future))
prt_i2('Hits behind stream:',
f_perc(zfetch_stats['past'], zfetch_access_total),
f_hits(zfetch_stats['past']))
prt_i2('Stream misses:',
f_perc(zfetch_stats['misses'], zfetch_access_total),
f_hits(zfetch_stats['misses']))
prt_i2('Streams limit reached:',
f_perc(zfetch_stats['max_streams'], zfetch_stats['misses']),
f_hits(zfetch_stats['max_streams']))
prt_i1('Stream strides:', f_hits(zfetch_stats['stride']))
prt_i1('Prefetches issued', f_hits(zfetch_stats['io_issued']))
print()

Expand Down
16 changes: 11 additions & 5 deletions include/sys/dmu_zfetch.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,18 +45,24 @@ typedef struct zfetch {
int zf_numstreams; /* number of zstream_t's */
} zfetch_t;

typedef struct zsrange {
uint16_t start;
uint16_t end;
} zsrange_t;

#define ZFETCH_RANGES 9 /* Fits zstream_t into 128 bytes */

typedef struct zstream {
list_node_t zs_node; /* link for zf_stream */
uint64_t zs_blkid; /* expect next access at this blkid */
uint_t zs_atime; /* time last prefetch issued */
zsrange_t zs_ranges[ZFETCH_RANGES]; /* ranges from future */
unsigned int zs_pf_dist; /* data prefetch distance in bytes */
unsigned int zs_ipf_dist; /* L1 prefetch distance in bytes */
uint64_t zs_pf_start; /* first data block to prefetch */
uint64_t zs_pf_end; /* data block to prefetch up to */
uint64_t zs_ipf_start; /* first data block to prefetch L1 */
uint64_t zs_ipf_end; /* data block to prefetch L1 up to */

list_node_t zs_node; /* link for zf_stream */
hrtime_t zs_atime; /* time last prefetch issued */
zfetch_t *zs_fetch; /* parent fetch */
boolean_t zs_missed; /* stream saw cache misses */
boolean_t zs_more; /* need more distant prefetch */
zfs_refcount_t zs_callers; /* number of pending callers */
Expand All @@ -74,7 +80,7 @@ void dmu_zfetch_init(zfetch_t *, struct dnode *);
void dmu_zfetch_fini(zfetch_t *);
zstream_t *dmu_zfetch_prepare(zfetch_t *, uint64_t, uint64_t, boolean_t,
boolean_t);
void dmu_zfetch_run(zstream_t *, boolean_t, boolean_t);
void dmu_zfetch_run(zfetch_t *, zstream_t *, boolean_t, boolean_t);
void dmu_zfetch(zfetch_t *, uint64_t, uint64_t, boolean_t, boolean_t,
boolean_t);

Expand Down
11 changes: 11 additions & 0 deletions man/man4/zfs.4
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,10 @@ However, this is limited by
Maximum micro ZAP size.
A micro ZAP is upgraded to a fat ZAP, once it grows beyond the specified size.
.
.It Sy zfetch_hole_shift Ns = Ns Sy 2 Pq uint
Log2 fraction of holes in speculative prefetch stream allowed for it to
proceed.
.
.It Sy zfetch_min_distance Ns = Ns Sy 4194304 Ns B Po 4 MiB Pc Pq uint
Min bytes to prefetch per stream.
Prefetch distance starts from the demand access size and quickly grows to
Expand All @@ -578,6 +582,13 @@ Max bytes to prefetch per stream.
.It Sy zfetch_max_idistance Ns = Ns Sy 67108864 Ns B Po 64 MiB Pc Pq uint
Max bytes to prefetch indirects for per stream.
.
.It Sy zfetch_max_reorder Ns = Ns Sy 16777216 Ns B Po 16 MiB Pc Pq uint
Requests within this byte distance from the current prefetch stream position
are considered parts of the stream, reordered due to parallel processing.
Such requests do not advance the stream position immediately unless
.Sy zfetch_hole_shift
fill threshold is reached, but saved to fill holes in the stream later.
.
.It Sy zfetch_max_streams Ns = Ns Sy 8 Pq uint
Max number of streams per zfetch (prefetch streams per file).
.
Expand Down
8 changes: 5 additions & 3 deletions module/zfs/dmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -569,8 +569,10 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
for (i = 0; i < nblks; i++) {
dmu_buf_impl_t *db = dbuf_hold(dn, blkid + i, tag);
if (db == NULL) {
if (zs)
dmu_zfetch_run(zs, missed, B_TRUE);
if (zs) {
dmu_zfetch_run(&dn->dn_zfetch, zs, missed,
B_TRUE);
}
rw_exit(&dn->dn_struct_rwlock);
dmu_buf_rele_array(dbp, nblks, tag);
if (read)
Expand Down Expand Up @@ -606,7 +608,7 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
zfs_racct_write(length, nblks);

if (zs)
dmu_zfetch_run(zs, missed, B_TRUE);
dmu_zfetch_run(&dn->dn_zfetch, zs, missed, B_TRUE);
rw_exit(&dn->dn_struct_rwlock);

if (read) {
Expand Down
Loading

0 comments on commit ebdbf93

Please sign in to comment.