Skip to content

Commit

Permalink
Change checksum & IO delay ratelimit values
Browse files Browse the repository at this point in the history
Change checksum & IO delay ratelimit thresholds from 5/sec to 20/sec.
This allows zed to actually trigger if a bunch of these events arrive in
a short period of time (zed has a threshold of 10 events in 10 sec).
Previously, if you had, say, 100 checksum errors in 1 sec, it would get
ratelimited to 5/sec which wouldn't trigger zed to fault the drive.

Also, convert the checksum and IO delay thresholds to module params for
easy testing.

Reviewed-by: loli10K <ezomori.nozomu@gmail.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Giuseppe Di Natale <dinatale2@llnl.gov>
Signed-off-by: Tony Hutter <hutter2@llnl.gov>
Closes openzfs#7252
  • Loading branch information
tonyhutter committed Mar 13, 2018
1 parent 7adf9d1 commit c5dc00e
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 9 deletions.
2 changes: 0 additions & 2 deletions include/sys/vdev_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -255,8 +255,6 @@ struct vdev {
* We rate limit ZIO delay and ZIO checksum events, since they
* can flood ZED with tons of events when a drive is acting up.
*/
#define DELAYS_PER_SECOND 5
#define CHECKSUMS_PER_SECOND 5
zfs_ratelimit_t vdev_delay_rl;
zfs_ratelimit_t vdev_checksum_rl;
};
Expand Down
12 changes: 9 additions & 3 deletions include/sys/zfs_ratelimit.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,19 @@
typedef struct {
hrtime_t start;
unsigned int count;
unsigned int burst; /* Number to allow per interval */
unsigned int interval; /* Interval length in seconds */

/*
* Pointer to number of events per interval. We do this to
* allow the burst to be a (changeable) module parameter.
*/
unsigned int *burst;

unsigned int interval; /* Interval length in seconds */
kmutex_t lock;
} zfs_ratelimit_t;

int zfs_ratelimit(zfs_ratelimit_t *rl);
void zfs_ratelimit_init(zfs_ratelimit_t *rl, unsigned int burst,
void zfs_ratelimit_init(zfs_ratelimit_t *rl, unsigned int *burst,
unsigned int interval);
void zfs_ratelimit_fini(zfs_ratelimit_t *rl);

Expand Down
39 changes: 39 additions & 0 deletions man/man5/zfs-module-parameters.5
Original file line number Diff line number Diff line change
Expand Up @@ -739,6 +739,34 @@ Disable pool import at module load by ignoring the cache file (typically \fB/etc
Use \fB1\fR for yes (default) and \fB0\fR for no.
.RE

.sp
.ne 2
.na
\fBzfs_checksums_per_second\fR (int)
.ad
.RS 12n
Rate limit checksum events to this many per second. Note that this should
not be set below the zed thresholds (currently 10 checksums over 10 sec)
or else zed may not trigger any action.
.sp
Default value: 20
.RE

.sp
.ne 2
.na
\fBzfs_commit_timeout_pct\fR (int)
.ad
.RS 12n
This controls the amount of time that a ZIL block (lwb) will remain "open"
when it isn't "full", and it has a thread waiting for it to be committed to
stable storage. The timeout is scaled based on a percentage of the last lwb
latency to avoid significantly impacting the latency of each individual
transaction record (itx).
.sp
Default value: \fB5\fR%.
.RE

.sp
.ne 2
.na
Expand Down Expand Up @@ -866,6 +894,17 @@ Note: \fBzfs_delay_scale\fR * \fBzfs_dirty_data_max\fR must be < 2^64.
Default value: \fB500,000\fR.
.RE

.sp
.ne 2
.na
\fBzfs_delays_per_second\fR (int)
.ad
.RS 12n
Rate limit IO delay events to this many per second.
.sp
Default value: 20
.RE

.sp
.ne 2
.na
Expand Down
4 changes: 2 additions & 2 deletions module/zcommon/zfs_comutil.c
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ const char *zfs_history_event_names[ZFS_NUM_LEGACY_HISTORY_EVENTS] = {
* interval: Interval time in seconds
*/
void
zfs_ratelimit_init(zfs_ratelimit_t *rl, unsigned int burst,
zfs_ratelimit_init(zfs_ratelimit_t *rl, unsigned int *burst,
unsigned int interval)
{
rl->count = 0;
Expand Down Expand Up @@ -270,7 +270,7 @@ zfs_ratelimit(zfs_ratelimit_t *rl)
rl->start = now;
rl->count = 0;
} else {
if (rl->count >= rl->burst) {
if (rl->count >= *rl->burst) {
rc = 0; /* We're ratelimiting */
}
}
Expand Down
23 changes: 21 additions & 2 deletions module/zfs/vdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,16 @@
*/
int metaslabs_per_vdev = 200;

/*
* Rate limit delay events to this many IO delays per second.
*/
unsigned int zfs_delays_per_second = 20;

/*
* Rate limit checksum events after this many checksum errors per second.
*/
unsigned int zfs_checksums_per_second = 20;

/*
* Virtual device management.
*/
Expand Down Expand Up @@ -357,8 +367,8 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
* and checksum events so that we don't overwhelm ZED with thousands
* of events when a disk is acting up.
*/
zfs_ratelimit_init(&vd->vdev_delay_rl, DELAYS_PER_SECOND, 1);
zfs_ratelimit_init(&vd->vdev_checksum_rl, CHECKSUMS_PER_SECOND, 1);
zfs_ratelimit_init(&vd->vdev_delay_rl, &zfs_delays_per_second, 1);
zfs_ratelimit_init(&vd->vdev_checksum_rl, &zfs_checksums_per_second, 1);

list_link_init(&vd->vdev_config_dirty_node);
list_link_init(&vd->vdev_state_dirty_node);
Expand Down Expand Up @@ -3776,5 +3786,14 @@ module_param(metaslabs_per_vdev, int, 0644);
MODULE_PARM_DESC(metaslabs_per_vdev,
"Divide added vdev into approximately (but no more than) this number "
"of metaslabs");

module_param(zfs_delays_per_second, uint, 0644);
MODULE_PARM_DESC(zfs_delays_per_second, "Rate limit delay events to this many "
"IO delays per second");

module_param(zfs_checksums_per_second, uint, 0644);
MODULE_PARM_DESC(zfs_checksums_per_second, "Rate limit checksum events "
"to this many checksum errors per second (do not set below zed"
"threshold).");
/* END CSTYLED */
#endif

0 comments on commit c5dc00e

Please sign in to comment.