Skip to content

Commit 6b06023

Browse files
committed
Add knob to disable slow io notifications
Introduce a new vdev property `VDEV_PROP_SLOW_IO_REPORTING` that allows users to disable notifications for slow devices. This prevents ZED and/or ZFSD from degrading the pool due to slow I/O. Signed-off-by: Mariusz Zaborski <[email protected]>
1 parent b6bd322 commit 6b06023

File tree

10 files changed

+128
-33
lines changed

10 files changed

+128
-33
lines changed

include/sys/fs/zfs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,7 @@ typedef enum {
385385
VDEV_PROP_TRIM_SUPPORT,
386386
VDEV_PROP_TRIM_ERRORS,
387387
VDEV_PROP_SLOW_IOS,
388+
VDEV_PROP_SLOW_IO_EVENTS,
388389
VDEV_NUM_PROPS
389390
} vdev_prop_t;
390391

include/sys/vdev_impl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -464,6 +464,7 @@ struct vdev {
464464
uint64_t vdev_checksum_t;
465465
uint64_t vdev_io_n;
466466
uint64_t vdev_io_t;
467+
boolean_t vdev_slow_io_events;
467468
uint64_t vdev_slow_io_n;
468469
uint64_t vdev_slow_io_t;
469470
};

lib/libzfs/libzfs.abi

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6117,7 +6117,8 @@
61176117
<enumerator name='VDEV_PROP_TRIM_SUPPORT' value='49'/>
61186118
<enumerator name='VDEV_PROP_TRIM_ERRORS' value='50'/>
61196119
<enumerator name='VDEV_PROP_SLOW_IOS' value='51'/>
6120-
<enumerator name='VDEV_NUM_PROPS' value='52'/>
6120+
<enumerator name='VDEV_PROP_SLOW_IO_EVENTS' value='52'/>
6121+
<enumerator name='VDEV_NUM_PROPS' value='53'/>
61216122
</enum-decl>
61226123
<typedef-decl name='vdev_prop_t' type-id='1573bec8' id='5aa5c90c'/>
61236124
<class-decl name='zpool_load_policy' size-in-bits='256' is-struct='yes' visibility='default' id='2f65b36f'>

man/man7/vdevprops.7

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ section, below.
4545
Every vdev has a set of properties that export statistics about the vdev
4646
as well as control various behaviors.
4747
Properties are not inherited from top-level vdevs, with the exception of
48-
checksum_n, checksum_t, io_n, io_t, slow_io_n, and slow_io_t.
48+
checksum_n, checksum_t, io_n, io_t, slow_io_events, slow_io_n, and slow_io_t.
4949
.Pp
5050
The values of numeric properties can be specified using human-readable suffixes
5151
.Po for example,
@@ -126,7 +126,8 @@ Indicates if a leaf device supports trim operations.
126126
.Pp
127127
The following native properties can be used to change the behavior of a vdev.
128128
.Bl -tag -width "allocating"
129-
.It Sy checksum_n , checksum_t , io_n , io_t , slow_io_n , slow_io_t
129+
.It Sy checksum_n , checksum_t , io_n , io_t , slow_io_events, slow_io_n ,
130+
.It Sy slow_io_t
130131
Tune the fault management daemon by specifying checksum/io thresholds of <N>
131132
errors in <T> seconds, respectively.
132133
These properties can be set on leaf and top-level vdevs.
@@ -143,6 +144,11 @@ For
143144
.Sy OpenZFS on FreeBSD
144145
defaults see
145146
.Xr zfsd 8 .
147+
The
148+
.It Sy slow_io_events
149+
property controls whether slow I/O events are generated.
150+
When enabled, the fault management daemon (or another consumer) can process them.
151+
The
146152
.It Sy comment
147153
A text comment up to 8192 characters long
148154
.It Sy bootsize

module/zcommon/zpool_prop.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -475,6 +475,9 @@ vdev_prop_init(void)
475475
zprop_register_index(VDEV_PROP_FAILFAST, "failfast", B_TRUE,
476476
PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off", "FAILFAST", boolean_table,
477477
sfeatures);
478+
zprop_register_index(VDEV_PROP_SLOW_IO_EVENTS, "slow_io_events",
479+
B_TRUE, PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off",
480+
"SLOW_IO_EVENTS", boolean_table, sfeatures);
478481

479482
/* hidden properties */
480483
zprop_register_hidden(VDEV_PROP_NAME, "name", PROP_TYPE_STRING,

module/zfs/vdev.c

Lines changed: 61 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -449,32 +449,53 @@ vdev_get_nparity(vdev_t *vd)
449449
}
450450

451451
static int
452-
vdev_prop_get_int(vdev_t *vd, vdev_prop_t prop, uint64_t *value)
452+
vdev_prop_get_objid(vdev_t *vd, uint64_t *objid)
453453
{
454-
spa_t *spa = vd->vdev_spa;
455-
objset_t *mos = spa->spa_meta_objset;
456-
uint64_t objid;
457-
int err;
458454

459455
if (vd->vdev_root_zap != 0) {
460-
objid = vd->vdev_root_zap;
456+
*objid = vd->vdev_root_zap;
461457
} else if (vd->vdev_top_zap != 0) {
462-
objid = vd->vdev_top_zap;
458+
*objid = vd->vdev_top_zap;
463459
} else if (vd->vdev_leaf_zap != 0) {
464-
objid = vd->vdev_leaf_zap;
460+
*objid = vd->vdev_leaf_zap;
465461
} else {
466462
return (EINVAL);
467463
}
468464

465+
return (0);
466+
}
467+
468+
static int
469+
vdev_prop_get_int(vdev_t *vd, vdev_prop_t prop, uint64_t *value)
470+
{
471+
spa_t *spa = vd->vdev_spa;
472+
objset_t *mos = spa->spa_meta_objset;
473+
uint64_t objid;
474+
int err;
475+
476+
if (vdev_prop_get_objid(vd, &objid) != 0)
477+
return (EINVAL);
478+
469479
err = zap_lookup(mos, objid, vdev_prop_to_name(prop),
470480
sizeof (uint64_t), 1, value);
471-
472481
if (err == ENOENT)
473482
*value = vdev_prop_default_numeric(prop);
474483

475484
return (err);
476485
}
477486

487+
static int
488+
vdev_prop_get_bool(vdev_t *vd, vdev_prop_t prop, boolean_t *bvalue)
489+
{
490+
int err;
491+
uint64_t ivalue;
492+
493+
err = vdev_prop_get_int(vd, prop, &ivalue);
494+
*bvalue = ivalue != 0;
495+
496+
return (err);
497+
}
498+
478499
/*
479500
* Get the number of data disks for a top-level vdev.
480501
*/
@@ -737,8 +758,12 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
737758
*/
738759
vd->vdev_checksum_n = vdev_prop_default_numeric(VDEV_PROP_CHECKSUM_N);
739760
vd->vdev_checksum_t = vdev_prop_default_numeric(VDEV_PROP_CHECKSUM_T);
761+
740762
vd->vdev_io_n = vdev_prop_default_numeric(VDEV_PROP_IO_N);
741763
vd->vdev_io_t = vdev_prop_default_numeric(VDEV_PROP_IO_T);
764+
765+
vd->vdev_slow_io_events = vdev_prop_default_numeric(
766+
VDEV_PROP_SLOW_IO_EVENTS);
742767
vd->vdev_slow_io_n = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_N);
743768
vd->vdev_slow_io_t = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_T);
744769

@@ -3903,6 +3928,11 @@ vdev_load(vdev_t *vd)
39033928
vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
39043929
"failed [error=%d]", (u_longlong_t)zapobj, error);
39053930

3931+
error = vdev_prop_get_bool(vd, VDEV_PROP_SLOW_IO_EVENTS,
3932+
&vd->vdev_slow_io_events);
3933+
if (error && error != ENOENT)
3934+
vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
3935+
"failed [error=%d]", (u_longlong_t)zapobj, error);
39063936
error = vdev_prop_get_int(vd, VDEV_PROP_SLOW_IO_N,
39073937
&vd->vdev_slow_io_n);
39083938
if (error && error != ENOENT)
@@ -5950,15 +5980,8 @@ vdev_props_set_sync(void *arg, dmu_tx_t *tx)
59505980
/*
59515981
* Set vdev property values in the vdev props mos object.
59525982
*/
5953-
if (vd->vdev_root_zap != 0) {
5954-
objid = vd->vdev_root_zap;
5955-
} else if (vd->vdev_top_zap != 0) {
5956-
objid = vd->vdev_top_zap;
5957-
} else if (vd->vdev_leaf_zap != 0) {
5958-
objid = vd->vdev_leaf_zap;
5959-
} else {
5983+
if (vdev_prop_get_objid(vd, &objid) != 0)
59605984
panic("unexpected vdev type");
5961-
}
59625985

59635986
mutex_enter(&spa->spa_props_lock);
59645987

@@ -6135,6 +6158,13 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
61356158
}
61366159
vd->vdev_io_t = intval;
61376160
break;
6161+
case VDEV_PROP_SLOW_IO_EVENTS:
6162+
if (nvpair_value_uint64(elem, &intval) != 0) {
6163+
error = EINVAL;
6164+
break;
6165+
}
6166+
vd->vdev_slow_io_events = intval != 0;
6167+
break;
61386168
case VDEV_PROP_SLOW_IO_N:
61396169
if (nvpair_value_uint64(elem, &intval) != 0) {
61406170
error = EINVAL;
@@ -6176,6 +6206,7 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
61766206
nvpair_t *elem = NULL;
61776207
nvlist_t *nvprops = NULL;
61786208
uint64_t intval = 0;
6209+
boolean_t boolval = 0;
61796210
char *strval = NULL;
61806211
const char *propname = NULL;
61816212
vdev_prop_t prop;
@@ -6189,15 +6220,8 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
61896220

61906221
nvlist_lookup_nvlist(innvl, ZPOOL_VDEV_PROPS_GET_PROPS, &nvprops);
61916222

6192-
if (vd->vdev_root_zap != 0) {
6193-
objid = vd->vdev_root_zap;
6194-
} else if (vd->vdev_top_zap != 0) {
6195-
objid = vd->vdev_top_zap;
6196-
} else if (vd->vdev_leaf_zap != 0) {
6197-
objid = vd->vdev_leaf_zap;
6198-
} else {
6223+
if (vdev_prop_get_objid(vd, &objid) != 0)
61996224
return (SET_ERROR(EINVAL));
6200-
}
62016225
ASSERT(objid != 0);
62026226

62036227
mutex_enter(&spa->spa_props_lock);
@@ -6506,6 +6530,18 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
65066530
vdev_prop_add_list(outnvl, propname, strval,
65076531
intval, src);
65086532
break;
6533+
case VDEV_PROP_SLOW_IO_EVENTS:
6534+
err = vdev_prop_get_bool(vd, prop, &boolval);
6535+
if (err && err != ENOENT)
6536+
break;
6537+
6538+
src = ZPROP_SRC_LOCAL;
6539+
if (boolval == vdev_prop_default_numeric(prop))
6540+
src = ZPROP_SRC_DEFAULT;
6541+
6542+
vdev_prop_add_list(outnvl, propname, NULL,
6543+
boolval, src);
6544+
break;
65096545
case VDEV_PROP_CHECKSUM_N:
65106546
case VDEV_PROP_CHECKSUM_T:
65116547
case VDEV_PROP_IO_N:

module/zfs/zfs_fm.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,9 @@ vdev_prop_get_inherited(vdev_t *vd, vdev_prop_t prop)
223223
case VDEV_PROP_IO_T:
224224
propval = vd->vdev_io_t;
225225
break;
226+
case VDEV_PROP_SLOW_IO_EVENTS:
227+
propval = vd->vdev_slow_io_events;
228+
break;
226229
case VDEV_PROP_SLOW_IO_N:
227230
propval = vd->vdev_slow_io_n;
228231
break;

module/zfs/zio.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5538,9 +5538,12 @@ zio_done(zio_t *zio)
55385538
zio->io_vd->vdev_stat.vs_slow_ios++;
55395539
mutex_exit(&zio->io_vd->vdev_stat_lock);
55405540

5541-
(void) zfs_ereport_post(FM_EREPORT_ZFS_DELAY,
5542-
zio->io_spa, zio->io_vd, &zio->io_bookmark,
5543-
zio, 0);
5541+
if (zio->io_vd->vdev_slow_io_events) {
5542+
(void) zfs_ereport_post(
5543+
FM_EREPORT_ZFS_DELAY,
5544+
zio->io_spa, zio->io_vd,
5545+
&zio->io_bookmark, zio, 0);
5546+
}
55445547
}
55455548
}
55465549
}

tests/zfs-tests/tests/functional/cli_root/zpool_get/vdev_get.cfg

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ typeset -a properties=(
7171
checksum_t
7272
io_n
7373
io_t
74+
slow_io_events
7475
slow_io_n
7576
slow_io_t
7677
trim_support

tests/zfs-tests/tests/functional/events/zed_slow_io.ksh

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323

2424
#
2525
# Copyright (c) 2023, Klara Inc.
26+
# Copyright (c) 2025, Mariusz Zaborski <[email protected]>
2627
#
2728

2829
# DESCRIPTION:
@@ -140,8 +141,8 @@ function slow_io_degrade
140141
{
141142
do_setup
142143

143-
zpool set slow_io_n=5 $TESTPOOL $VDEV
144-
zpool set slow_io_t=60 $TESTPOOL $VDEV
144+
log_must zpool set slow_io_n=5 $TESTPOOL $VDEV
145+
log_must zpool set slow_io_t=60 $TESTPOOL $VDEV
145146

146147
start_slow_io
147148
for i in {1..16}; do
@@ -193,6 +194,44 @@ function slow_io_no_degrade
193194
do_clean
194195
}
195196

197+
# Change slow_io_n, slow_io_t to 5 events in 60 seconds
198+
# fire more than 5 events. Disable slow io events.
199+
# Should not degrade.
200+
function slow_io_degrade_disabled
201+
{
202+
do_setup
203+
204+
log_must zpool set slow_io_n=5 $TESTPOOL $VDEV
205+
log_must zpool set slow_io_t=60 $TESTPOOL $VDEV
206+
log_must zpool set slow_io_events=off $TESTPOOL $VDEV
207+
208+
start_slow_io
209+
for i in {1..16}; do
210+
dd if=${FILEPATH}$i of=/dev/null count=1 bs=512 2>/dev/null
211+
sleep 0.5
212+
done
213+
stop_slow_io
214+
zpool sync
215+
216+
#
217+
# wait 60 seconds to confirm that zfs.delay was not generated.
218+
#
219+
typeset -i i=0
220+
typeset -i events=0
221+
while [[ $i -lt 60 ]]; do
222+
events=$(zpool events | grep "ereport\.fs\.zfs.delay" | wc -l)
223+
i=$((i+1))
224+
sleep 1
225+
done
226+
log_note "$events delay events found"
227+
228+
[ $events -eq "0" ] || \
229+
log_fail "expecting no delay events, found $events"
230+
231+
log_mustnot wait_vdev_state $TESTPOOL $VDEV "DEGRADED" 45
232+
do_clean
233+
}
234+
196235
log_assert "Test ZED slow io configurability"
197236
log_onexit cleanup
198237

@@ -202,5 +241,6 @@ log_must zed_start
202241
default_degrade
203242
slow_io_degrade
204243
slow_io_no_degrade
244+
slow_io_degrade_disabled
205245

206246
log_pass "Test ZED slow io configurability"

0 commit comments

Comments
 (0)