diff --git a/cmd/raidz_test/raidz_bench.c b/cmd/raidz_test/raidz_bench.c index 9d8fc74a9ce9..86e43ccfa641 100644 --- a/cmd/raidz_test/raidz_bench.c +++ b/cmd/raidz_test/raidz_bench.c @@ -33,9 +33,6 @@ #include -/*BUGBUG*/ -/*raidz expansion - temporarily disabled */ -#if 0 #include "raidz_test.h" #define GEN_BENCH_MEMORY (((uint64_t)1ULL)<<32) @@ -86,8 +83,17 @@ run_gen_bench_impl(const char *impl) /* create suitable raidz_map */ ncols = rto_opts.rto_dcols + fn + 1; zio_bench.io_size = 1ULL << ds; - rm_bench = vdev_raidz_map_alloc(&zio_bench, - BENCH_ASHIFT, ncols, fn+1); + + if (rto_opts.rto_expand) { + rm_bench = vdev_raidz_map_alloc_expanded( + zio_bench.io_abd, + zio_bench.io_size, zio_bench.io_offset, + rto_opts.rto_ashift, ncols+1, ncols, + fn+1, rto_opts.rto_expand_offset); + } else { + rm_bench = vdev_raidz_map_alloc(&zio_bench, + BENCH_ASHIFT, ncols, fn+1); + } /* estimate iteration count */ iter_cnt = GEN_BENCH_MEMORY; @@ -166,8 +172,16 @@ run_rec_bench_impl(const char *impl) (1ULL << BENCH_ASHIFT)) continue; - rm_bench = vdev_raidz_map_alloc(&zio_bench, - BENCH_ASHIFT, ncols, PARITY_PQR); + if (rto_opts.rto_expand) { + rm_bench = vdev_raidz_map_alloc_expanded( + zio_bench.io_abd, + zio_bench.io_size, zio_bench.io_offset, + BENCH_ASHIFT, ncols+1, ncols, + PARITY_PQR, rto_opts.rto_expand_offset); + } else { + rm_bench = vdev_raidz_map_alloc(&zio_bench, + BENCH_ASHIFT, ncols, PARITY_PQR); + } /* estimate iteration count */ iter_cnt = (REC_BENCH_MEMORY); @@ -228,5 +242,3 @@ run_raidz_benchmark(void) bench_fini_raidz_maps(); } -/*BUGBUG*/ -#endif /* 0 - disabled for now */ diff --git a/cmd/raidz_test/raidz_test.c b/cmd/raidz_test/raidz_test.c index 65ede5817562..f940a706b848 100644 --- a/cmd/raidz_test/raidz_test.c +++ b/cmd/raidz_test/raidz_test.c @@ -32,9 +32,6 @@ #include #include #include -/*BUGBUG*/ -#if 0 /* disabled currently for raidz expansion */ - #include "raidz_test.h" static int *rand_data; @@ -80,12 +77,16 @@ static void print_opts(raidz_test_opts_t *opts, boolean_t force) (void) fprintf(stdout, DBLSEP "Running with options:\n" " (-a) zio ashift : %zu\n" " (-o) zio offset : 1 << %zu\n" + " (-e) expanded map : %s\n" + " (-r) reflow offset : %zx\n" " (-d) number of raidz data columns : %zu\n" " (-s) size of DATA : 1 << %zu\n" " (-S) sweep parameters : %s \n" " (-v) verbose : %s \n\n", opts->rto_ashift, /* -a */ ilog2(opts->rto_offset), /* -o */ + opts->rto_expand ? "yes" : "no", /* -e */ + opts->rto_expand_offset, /* -r */ opts->rto_dcols, /* -d */ ilog2(opts->rto_dsize), /* -s */ opts->rto_sweep ? "yes" : "no", /* -S */ @@ -107,6 +108,8 @@ static void usage(boolean_t requested) "\t[-S parameter sweep (default: %s)]\n" "\t[-t timeout for parameter sweep test]\n" "\t[-B benchmark all raidz implementations]\n" + "\t[-e use expanded raidz map (default: %s)]\n" + "\t[-r expanded raidz map reflow offset (default: %zx)]\n" "\t[-v increase verbosity (default: %zu)]\n" "\t[-h (print help)]\n" "\t[-T test the test, see if failure would be detected]\n" @@ -117,6 +120,8 @@ static void usage(boolean_t requested) o->rto_dcols, /* -d */ ilog2(o->rto_dsize), /* -s */ rto_opts.rto_sweep ? "yes" : "no", /* -S */ + rto_opts.rto_expand ? "yes" : "no", /* -e */ + o->rto_expand_offset, /* -r */ o->rto_v); /* -d */ exit(requested ? 0 : 1); @@ -131,7 +136,7 @@ static void process_options(int argc, char **argv) bcopy(&rto_opts_defaults, o, sizeof (*o)); - while ((opt = getopt(argc, argv, "TDBSvha:o:d:s:t:")) != -1) { + while ((opt = getopt(argc, argv, "TDBSvha:er:o:d:s:t:")) != -1) { value = 0; switch (opt) { @@ -139,6 +144,12 @@ static void process_options(int argc, char **argv) value = strtoull(optarg, NULL, 0); o->rto_ashift = MIN(13, MAX(9, value)); break; + case 'e': + o->rto_expand = 1; + break; + case 'r': + o->rto_expand_offset = strtoull(optarg, NULL, 0); + break; case 'o': value = strtoull(optarg, NULL, 0); o->rto_offset = ((1ULL << MIN(12, value)) >> 9) << 9; @@ -182,25 +193,34 @@ static void process_options(int argc, char **argv) } } -#define DATA_COL(rm, i) ((rm)->rm_col[raidz_parity(rm) + (i)].rc_abd) -#define DATA_COL_SIZE(rm, i) ((rm)->rm_col[raidz_parity(rm) + (i)].rc_size) +#define DATA_COL(rr, i) ((rr)->rr_col[rr->rr_firstdatacol + (i)].rc_abd) +#define DATA_COL_SIZE(rr, i) ((rr)->rr_col[rr->rr_firstdatacol + (i)].rc_size) -#define CODE_COL(rm, i) ((rm)->rm_col[(i)].rc_abd) -#define CODE_COL_SIZE(rm, i) ((rm)->rm_col[(i)].rc_size) +#define CODE_COL(rr, i) ((rr)->rr_col[(i)].rc_abd) +#define CODE_COL_SIZE(rr, i) ((rr)->rr_col[(i)].rc_size) static int cmp_code(raidz_test_opts_t *opts, const raidz_map_t *rm, const int parity) { - int i, ret = 0; + int r, i, ret = 0; VERIFY(parity >= 1 && parity <= 3); - for (i = 0; i < parity; i++) { - if (abd_cmp(CODE_COL(rm, i), CODE_COL(opts->rm_golden, i)) - != 0) { - ret++; - LOG_OPT(D_DEBUG, opts, - "\nParity block [%d] different!\n", i); + for (r = 0; r < rm->rm_nrows; r++) { + raidz_row_t * const rr = rm->rm_row[r]; + raidz_row_t * const rrg = opts->rm_golden->rm_row[r]; + for (i = 0; i < parity; i++) { + if (CODE_COL_SIZE(rrg, i) == 0) { + VERIFY0(CODE_COL_SIZE(rr, i)); + continue; + } + + if (abd_cmp(CODE_COL(rr, i), + CODE_COL(rrg, i)) != 0) { + ret++; + LOG_OPT(D_DEBUG, opts, + "\nParity block [%d] different!\n", i); + } } } return (ret); @@ -209,16 +229,26 @@ cmp_code(raidz_test_opts_t *opts, const raidz_map_t *rm, const int parity) static int cmp_data(raidz_test_opts_t *opts, raidz_map_t *rm) { - int i, ret = 0; - int dcols = opts->rm_golden->rm_cols - raidz_parity(opts->rm_golden); + int r, i, dcols, ret = 0; + + for (r = 0; r < rm->rm_nrows; r++) { + raidz_row_t *rr = rm->rm_row[r]; + raidz_row_t *rrg = opts->rm_golden->rm_row[r]; + dcols = opts->rm_golden->rm_row[0]->rr_cols - + raidz_parity(opts->rm_golden); + for (i = 0; i < dcols; i++) { + if (DATA_COL_SIZE(rrg, i) == 0) { + VERIFY0(DATA_COL_SIZE(rr, i)); + continue; + } - for (i = 0; i < dcols; i++) { - if (abd_cmp(DATA_COL(opts->rm_golden, i), DATA_COL(rm, i)) - != 0) { - ret++; + if (abd_cmp(DATA_COL(rrg, i), + DATA_COL(rr, i)) != 0) { + ret++; - LOG_OPT(D_DEBUG, opts, - "\nData block [%d] different!\n", i); + LOG_OPT(D_DEBUG, opts, + "\nData block [%d] different!\n", i); + } } } return (ret); @@ -239,12 +269,13 @@ init_rand(void *data, size_t size, void *private) static void corrupt_colums(raidz_map_t *rm, const int *tgts, const int cnt) { - int i; - raidz_col_t *col; - - for (i = 0; i < cnt; i++) { - col = &rm->rm_col[tgts[i]]; - abd_iterate_func(col->rc_abd, 0, col->rc_size, init_rand, NULL); + for (int r = 0; r < rm->rm_nrows; r++) { + raidz_row_t *rr = rm->rm_row[r]; + for (int i = 0; i < cnt; i++) { + raidz_col_t *col = &rr->rr_col[tgts[i]]; + abd_iterate_func(col->rc_abd, 0, col->rc_size, + init_rand, NULL); + } } } @@ -291,10 +322,21 @@ init_raidz_golden_map(raidz_test_opts_t *opts, const int parity) VERIFY0(vdev_raidz_impl_set("original")); - opts->rm_golden = vdev_raidz_map_alloc(opts->zio_golden, - opts->rto_ashift, total_ncols, parity); - rm_test = vdev_raidz_map_alloc(zio_test, - opts->rto_ashift, total_ncols, parity); + if (opts->rto_expand) { + opts->rm_golden = vdev_raidz_map_alloc_expanded(opts->zio_golden->io_abd, + opts->zio_golden->io_size, opts->zio_golden->io_offset, + opts->rto_ashift, total_ncols+1, total_ncols, + parity, opts->rto_expand_offset); + rm_test = vdev_raidz_map_alloc_expanded(zio_test->io_abd, + zio_test->io_size, zio_test->io_offset, + opts->rto_ashift, total_ncols+1, total_ncols, + parity, opts->rto_expand_offset); + } else { + opts->rm_golden = vdev_raidz_map_alloc(opts->zio_golden, + opts->rto_ashift, total_ncols, parity); + rm_test = vdev_raidz_map_alloc(zio_test, + opts->rto_ashift, total_ncols, parity); + } VERIFY(opts->zio_golden); VERIFY(opts->rm_golden); @@ -333,8 +375,15 @@ init_raidz_map(raidz_test_opts_t *opts, zio_t **zio, const int parity) (*zio)->io_abd = raidz_alloc(alloc_dsize); init_zio_abd(*zio); - rm = vdev_raidz_map_alloc(*zio, opts->rto_ashift, - total_ncols, parity); + if (opts->rto_expand) { + rm = vdev_raidz_map_alloc_expanded((*zio)->io_abd, + (*zio)->io_size, (*zio)->io_offset, + opts->rto_ashift, total_ncols+1, total_ncols, + parity, opts->rto_expand_offset); + } else { + rm = vdev_raidz_map_alloc(*zio, opts->rto_ashift, + total_ncols, parity); + } VERIFY(rm); /* Make sure code columns are destroyed */ @@ -423,7 +472,7 @@ run_rec_check_impl(raidz_test_opts_t *opts, raidz_map_t *rm, const int fn) if (fn < RAIDZ_REC_PQ) { /* can reconstruct 1 failed data disk */ for (x0 = 0; x0 < opts->rto_dcols; x0++) { - if (x0 >= rm->rm_cols - raidz_parity(rm)) + if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm)) continue; /* Check if should stop */ @@ -448,10 +497,10 @@ run_rec_check_impl(raidz_test_opts_t *opts, raidz_map_t *rm, const int fn) } else if (fn < RAIDZ_REC_PQR) { /* can reconstruct 2 failed data disk */ for (x0 = 0; x0 < opts->rto_dcols; x0++) { - if (x0 >= rm->rm_cols - raidz_parity(rm)) + if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm)) continue; for (x1 = x0 + 1; x1 < opts->rto_dcols; x1++) { - if (x1 >= rm->rm_cols - raidz_parity(rm)) + if (x1 >= rm->rm_row[0]->rr_cols - raidz_parity(rm)) continue; /* Check if should stop */ @@ -478,14 +527,14 @@ run_rec_check_impl(raidz_test_opts_t *opts, raidz_map_t *rm, const int fn) } else { /* can reconstruct 3 failed data disk */ for (x0 = 0; x0 < opts->rto_dcols; x0++) { - if (x0 >= rm->rm_cols - raidz_parity(rm)) + if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm)) continue; for (x1 = x0 + 1; x1 < opts->rto_dcols; x1++) { - if (x1 >= rm->rm_cols - raidz_parity(rm)) + if (x1 >= rm->rm_row[0]->rr_cols - raidz_parity(rm)) continue; for (x2 = x1 + 1; x2 < opts->rto_dcols; x2++) { if (x2 >= - rm->rm_cols - raidz_parity(rm)) + rm->rm_row[0]->rr_cols - raidz_parity(rm)) continue; /* Check if should stop */ @@ -703,6 +752,8 @@ run_sweep(void) opts->rto_dcols = dcols_v[d]; opts->rto_offset = (1 << ashift_v[a]) * rand(); opts->rto_dsize = size_v[s]; + opts->rto_expand = rto_opts.rto_expand; + opts->rto_expand_offset = rto_opts.rto_expand_offset; opts->rto_v = 0; /* be quiet */ VERIFY3P(thread_create(NULL, 0, sweep_thread, (void *) opts, @@ -735,18 +786,10 @@ run_sweep(void) return (sweep_state == SWEEP_ERROR ? SWEEP_ERROR : 0); } -/*BUGBUG*/ -#endif /*0 - raidz expansion, currently disabled */ int main(int argc, char **argv) { -/*BUGBUG*/ -/* currently disabled for raidz expansion*/ - int err = 0; - fprintf(stdout, "\nraidz_test temporarily disabled - raidz expansion\n"); - exit(EXIT_FAILURE); -#if 0 size_t i; struct sigaction action; int err = 0; @@ -789,8 +832,6 @@ main(int argc, char **argv) umem_free(rand_data, SPA_MAXBLOCKSIZE); kernel_fini(); -/*BUGBUG*/ -#endif /*0 - raidz expansion, currently disabled */ return (err); } diff --git a/cmd/raidz_test/raidz_test.h b/cmd/raidz_test/raidz_test.h index 09c825ae43c7..1bb36d553e44 100644 --- a/cmd/raidz_test/raidz_test.h +++ b/cmd/raidz_test/raidz_test.h @@ -51,6 +51,8 @@ typedef struct raidz_test_opts { size_t rto_sweep; size_t rto_sweep_timeout; size_t rto_benchmark; + size_t rto_expand; + size_t rto_expand_offset; size_t rto_sanity; size_t rto_gdb; @@ -69,6 +71,8 @@ static const raidz_test_opts_t rto_opts_defaults = { .rto_v = 0, .rto_sweep = 0, .rto_benchmark = 0, + .rto_expand = 0, + .rto_expand_offset = -1ULL, .rto_sanity = 0, .rto_gdb = 0, .rto_should_stop = B_FALSE diff --git a/include/sys/vdev_raidz.h b/include/sys/vdev_raidz.h index ac0def984ae9..146dd3c29660 100644 --- a/include/sys/vdev_raidz.h +++ b/include/sys/vdev_raidz.h @@ -33,6 +33,7 @@ extern "C" { #endif struct zio; +struct raidz_row; struct raidz_map; #if !defined(_KERNEL) struct kernel_param {}; @@ -43,9 +44,11 @@ struct kernel_param {}; */ struct raidz_map *vdev_raidz_map_alloc(struct zio *, uint64_t, uint64_t, uint64_t); +struct raidz_map *vdev_raidz_map_alloc_expanded(abd_t *, uint64_t, uint64_t, + uint64_t, uint64_t, uint64_t, uint64_t, uint64_t); void vdev_raidz_map_free(struct raidz_map *); void vdev_raidz_generate_parity(struct raidz_map *); -int vdev_raidz_reconstruct(struct raidz_map *, const int *, int); +void vdev_raidz_reconstruct(struct raidz_map *, const int *, int); /* * vdev_raidz_math interface @@ -53,9 +56,9 @@ int vdev_raidz_reconstruct(struct raidz_map *, const int *, int); void vdev_raidz_math_init(void); void vdev_raidz_math_fini(void); const struct raidz_impl_ops *vdev_raidz_math_get_ops(void); -int vdev_raidz_math_generate(struct raidz_map *); -int vdev_raidz_math_reconstruct(struct raidz_map *, const int *, const int *, - const int); +int vdev_raidz_math_generate(struct raidz_map *, struct raidz_row *); +int vdev_raidz_math_reconstruct(struct raidz_map *, struct raidz_row *, + const int *, const int *, const int); int vdev_raidz_impl_set(const char *); typedef struct vdev_raidz_expand { diff --git a/include/sys/vdev_raidz_impl.h b/include/sys/vdev_raidz_impl.h index d517a49dc95b..5f28befb9a45 100644 --- a/include/sys/vdev_raidz_impl.h +++ b/include/sys/vdev_raidz_impl.h @@ -152,7 +152,7 @@ typedef struct raidz_map { int rm_nrows; int rm_nskip; /* Sectors skipped for padding */ zfs_locked_range_t *rm_lr; - raidz_impl_ops_t *rm_ops; /* RAIDZ math operations */ + const raidz_impl_ops_t *rm_ops; /* RAIDZ math operations */ raidz_row_t *rm_row[0]; /* flexible array of rows */ } raidz_map_t; @@ -195,7 +195,7 @@ extern const raidz_impl_ops_t vdev_raidz_powerpc_altivec_impl; * raidz_big_size Returns size of big columns * raidz_short_size Returns size of short columns */ -#define raidz_parity(rm) ((rm)->rm_firstdatacol) +#define raidz_parity(rm) ((rm)->rm_row[0]->rr_firstdatacol) #define raidz_ncols(rm) ((rm)->rm_row[0]->rr_cols) #define raidz_nbigcols(rm) ((rm)->rm_bigcols) #define raidz_col_p(rm, c) ((rm)->rm_col + (c)) @@ -211,10 +211,10 @@ extern const raidz_impl_ops_t vdev_raidz_powerpc_altivec_impl; */ #define _RAIDZ_GEN_WRAP(code, impl) \ static void \ -impl ## _gen_ ## code(void *rmp) \ +impl ## _gen_ ## code(void *rrp) \ { \ - raidz_map_t *rm = (raidz_map_t *)rmp; \ - raidz_generate_## code ## _impl(rm); \ + raidz_row_t *rr = (raidz_row_t *)rrp; \ + raidz_generate_## code ## _impl(rr); \ } /* @@ -225,10 +225,10 @@ impl ## _gen_ ## code(void *rmp) \ */ #define _RAIDZ_REC_WRAP(code, impl) \ static int \ -impl ## _rec_ ## code(void *rmp, const int *tgtidx) \ +impl ## _rec_ ## code(void *rrp, const int *tgtidx) \ { \ - raidz_map_t *rm = (raidz_map_t *)rmp; \ - return (raidz_reconstruct_## code ## _impl(rm, tgtidx)); \ + raidz_row_t *rr = (raidz_row_t *)rrp; \ + return (raidz_reconstruct_## code ## _impl(rr, tgtidx)); \ } /* diff --git a/lib/libzpool/Makefile.am b/lib/libzpool/Makefile.am index ecc2b1bd3588..afae939db0e5 100644 --- a/lib/libzpool/Makefile.am +++ b/lib/libzpool/Makefile.am @@ -123,6 +123,15 @@ KERNEL_C = \ vdev_missing.c \ vdev_queue.c \ vdev_raidz.c \ + vdev_raidz_math_aarch64_neon.c \ + vdev_raidz_math_aarch64_neonx2.c \ + vdev_raidz_math_avx2.c \ + vdev_raidz_math_avx512bw.c \ + vdev_raidz_math_avx512f.c \ + vdev_raidz_math.c \ + vdev_raidz_math_scalar.c \ + vdev_raidz_math_sse2.c \ + vdev_raidz_math_ssse3.c \ vdev_removal.c \ vdev_root.c \ vdev_trim.c \ diff --git a/man/man1/raidz_test.1 b/man/man1/raidz_test.1 index 423177a1b839..15a0d54f7008 100644 --- a/man/man1/raidz_test.1 +++ b/man/man1/raidz_test.1 @@ -61,6 +61,11 @@ during testing. .IP Size of data for raidz block. Size is 1 << (zio_size_shift). .HP +.BI "\-r" " reflow_offset" " (default: uint max)" +.IP +Set raidz expansion offset. The expanded raidz map allocation function will +produce different map configurations depending of this value. +.HP .BI "\-S(weep)" .IP Sweep parameter space while verifying the raidz implementations. This option @@ -77,6 +82,10 @@ This options starts the benchmark mode. All implementations are benchmarked using increasing per disk data size. Results are given as throughput per disk, measured in MiB/s. .HP +.BI "\-e(xpansion)" +.IP +Use expanded raidz map allocation function. +.HP .BI "\-v(erbose)" .IP Increase verbosity. diff --git a/module/zfs/Makefile.in b/module/zfs/Makefile.in index 929498d23ec1..7ea976d129dd 100644 --- a/module/zfs/Makefile.in +++ b/module/zfs/Makefile.in @@ -92,6 +92,8 @@ $(MODULE)-objs += vdev_mirror.o $(MODULE)-objs += vdev_missing.o $(MODULE)-objs += vdev_queue.o $(MODULE)-objs += vdev_raidz.o +$(MODULE)-objs += vdev_raidz_math.o +$(MODULE)-objs += vdev_raidz_math_scalar.o $(MODULE)-objs += vdev_removal.o $(MODULE)-objs += vdev_root.o $(MODULE)-objs += vdev_trim.o @@ -128,25 +130,23 @@ $(MODULE)-objs += zvol.o # Suppress incorrect warnings from versions of objtool which are not # aware of x86 EVEX prefix instructions used for AVX512. -# BUGBUG raidz expansion not building raidz math just yet -#OBJECT_FILES_NON_STANDARD_vdev_raidz_math_avx512bw.o := y -#OBJECT_FILES_NON_STANDARD_vdev_raidz_math_avx512f.o := y +OBJECT_FILES_NON_STANDARD_vdev_raidz_math_avx512bw.o := y +OBJECT_FILES_NON_STANDARD_vdev_raidz_math_avx512f.o := y -#$(MODULE)-$(CONFIG_X86) += vdev_raidz_math_sse2.o -#$(MODULE)-$(CONFIG_X86) += vdev_raidz_math_ssse3.o -#$(MODULE)-$(CONFIG_X86) += vdev_raidz_math_avx2.o -#$(MODULE)-$(CONFIG_X86) += vdev_raidz_math_avx512f.o -#$(MODULE)-$(CONFIG_X86) += vdev_raidz_math_avx512bw.o +$(MODULE)-$(CONFIG_X86) += vdev_raidz_math_sse2.o +$(MODULE)-$(CONFIG_X86) += vdev_raidz_math_ssse3.o +$(MODULE)-$(CONFIG_X86) += vdev_raidz_math_avx2.o +$(MODULE)-$(CONFIG_X86) += vdev_raidz_math_avx512f.o +$(MODULE)-$(CONFIG_X86) += vdev_raidz_math_avx512bw.o -#$(MODULE)-$(CONFIG_ARM64) += vdev_raidz_math_aarch64_neon.o -#$(MODULE)-$(CONFIG_ARM64) += vdev_raidz_math_aarch64_neonx2.o +$(MODULE)-$(CONFIG_ARM64) += vdev_raidz_math_aarch64_neon.o +$(MODULE)-$(CONFIG_ARM64) += vdev_raidz_math_aarch64_neonx2.o -#$(MODULE)-$(CONFIG_PPC) += vdev_raidz_math_powerpc_altivec.o -#$(MODULE)-$(CONFIG_PPC64) += vdev_raidz_math_powerpc_altivec.o +$(MODULE)-$(CONFIG_PPC) += vdev_raidz_math_powerpc_altivec.o +$(MODULE)-$(CONFIG_PPC64) += vdev_raidz_math_powerpc_altivec.o -#ifeq ($(CONFIG_ALTIVEC),y) -#$(obj)/vdev_raidz_math_powerpc_altivec.o: c_flags += -maltivec -#endif -# BUGBUG raidz expansion end +ifeq ($(CONFIG_ALTIVEC),y) +$(obj)/vdev_raidz_math_powerpc_altivec.o: c_flags += -maltivec +endif include $(mfdir)/../os/linux/zfs/Makefile diff --git a/module/zfs/spa.c b/module/zfs/spa.c index eaa34170a2fd..48c1e6c68716 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -6668,8 +6668,10 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) return (spa_vdev_exit(spa, newrootvd, txg, EDOM)); if (raidz) { - oldvdpath = kmem_asprintf("raidz%u-%u", + char *tmp = kmem_asprintf("raidz%u-%u", oldvd->vdev_nparity, oldvd->vdev_id); + oldvdpath = spa_strdup(tmp); + kmem_strfree(tmp); } else { oldvdpath = spa_strdup(oldvd->vdev_path); } diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index ac7c39694898..6a43071119eb 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -2354,7 +2354,7 @@ spa_init(spa_mode_t mode) zil_init(); vdev_cache_stat_init(); vdev_mirror_stat_init(); - /* vdev_raidz_math_init(); */ + vdev_raidz_math_init(); vdev_file_init(); zfs_prop_init(); zpool_prop_init(); @@ -2376,7 +2376,7 @@ spa_fini(void) vdev_file_fini(); vdev_cache_stat_fini(); vdev_mirror_stat_fini(); - /* vdev_raidz_math_fini(); */ + vdev_raidz_math_fini(); zil_fini(); dmu_fini(); zio_fini(); diff --git a/module/zfs/vdev_raidz.c b/module/zfs/vdev_raidz.c index b64e12a5a357..f01e995776a5 100644 --- a/module/zfs/vdev_raidz.c +++ b/module/zfs/vdev_raidz.c @@ -139,80 +139,7 @@ VDEV_RAIDZ_64MUL_2((x), mask); \ } -/* Powers of 2 in the RAID-Z Galois field. */ -const uint8_t vdev_raidz_pow2[256] __attribute__((aligned(256))) = { - 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, - 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26, - 0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9, - 0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, - 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35, - 0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23, - 0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0, - 0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1, - 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc, - 0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0, - 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f, - 0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2, - 0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88, - 0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce, - 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93, - 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc, - 0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9, - 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54, - 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa, - 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73, - 0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e, - 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff, - 0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4, - 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41, - 0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e, - 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6, - 0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef, - 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09, - 0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5, - 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16, - 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83, - 0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x01 -}; - -/* Logs of 2 in the RAID-Z Galois field. */ -const uint8_t vdev_raidz_log2[256] __attribute__((aligned(256))) = { - 0x00, 0x00, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6, - 0x03, 0xdf, 0x33, 0xee, 0x1b, 0x68, 0xc7, 0x4b, - 0x04, 0x64, 0xe0, 0x0e, 0x34, 0x8d, 0xef, 0x81, - 0x1c, 0xc1, 0x69, 0xf8, 0xc8, 0x08, 0x4c, 0x71, - 0x05, 0x8a, 0x65, 0x2f, 0xe1, 0x24, 0x0f, 0x21, - 0x35, 0x93, 0x8e, 0xda, 0xf0, 0x12, 0x82, 0x45, - 0x1d, 0xb5, 0xc2, 0x7d, 0x6a, 0x27, 0xf9, 0xb9, - 0xc9, 0x9a, 0x09, 0x78, 0x4d, 0xe4, 0x72, 0xa6, - 0x06, 0xbf, 0x8b, 0x62, 0x66, 0xdd, 0x30, 0xfd, - 0xe2, 0x98, 0x25, 0xb3, 0x10, 0x91, 0x22, 0x88, - 0x36, 0xd0, 0x94, 0xce, 0x8f, 0x96, 0xdb, 0xbd, - 0xf1, 0xd2, 0x13, 0x5c, 0x83, 0x38, 0x46, 0x40, - 0x1e, 0x42, 0xb6, 0xa3, 0xc3, 0x48, 0x7e, 0x6e, - 0x6b, 0x3a, 0x28, 0x54, 0xfa, 0x85, 0xba, 0x3d, - 0xca, 0x5e, 0x9b, 0x9f, 0x0a, 0x15, 0x79, 0x2b, - 0x4e, 0xd4, 0xe5, 0xac, 0x73, 0xf3, 0xa7, 0x57, - 0x07, 0x70, 0xc0, 0xf7, 0x8c, 0x80, 0x63, 0x0d, - 0x67, 0x4a, 0xde, 0xed, 0x31, 0xc5, 0xfe, 0x18, - 0xe3, 0xa5, 0x99, 0x77, 0x26, 0xb8, 0xb4, 0x7c, - 0x11, 0x44, 0x92, 0xd9, 0x23, 0x20, 0x89, 0x2e, - 0x37, 0x3f, 0xd1, 0x5b, 0x95, 0xbc, 0xcf, 0xcd, - 0x90, 0x87, 0x97, 0xb2, 0xdc, 0xfc, 0xbe, 0x61, - 0xf2, 0x56, 0xd3, 0xab, 0x14, 0x2a, 0x5d, 0x9e, - 0x84, 0x3c, 0x39, 0x53, 0x47, 0x6d, 0x41, 0xa2, - 0x1f, 0x2d, 0x43, 0xd8, 0xb7, 0x7b, 0xa4, 0x76, - 0xc4, 0x17, 0x49, 0xec, 0x7f, 0x0c, 0x6f, 0xf6, - 0x6c, 0xa1, 0x3b, 0x52, 0x29, 0x9d, 0x55, 0xaa, - 0xfb, 0x60, 0x86, 0xb1, 0xbb, 0xcc, 0x3e, 0x5a, - 0xcb, 0x59, 0x5f, 0xb0, 0x9c, 0xa9, 0xa0, 0x51, - 0x0b, 0xf5, 0x16, 0xeb, 0x7a, 0x75, 0x2c, 0xd7, - 0x4f, 0xae, 0xd5, 0xe9, 0xe6, 0xe7, 0xad, 0xe8, - 0x74, 0xd6, 0xf4, 0xea, 0xa8, 0x50, 0x58, 0xaf, -}; - uint64_t zfs_raidz_expand_max_offset_pause = UINT64_MAX; - uint64_t zfs_raidz_expand_max_copy_bytes = 10 * SPA_MAXBLOCKSIZE; static void @@ -581,7 +508,8 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t ashift, uint64_t dcols, } /* init RAIDZ parity ops */ - // rm->rm_ops = vdev_raidz_math_get_ops(); + rm->rm_ops = vdev_raidz_math_get_ops(); + return (rm); } @@ -592,7 +520,7 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t ashift, uint64_t dcols, * old location. Therefore, rows that straddle the reflow_offset will * come from the old location. */ -static raidz_map_t * +noinline raidz_map_t * vdev_raidz_map_alloc_expanded(abd_t *abd, uint64_t size, uint64_t offset, uint64_t ashift, uint64_t physical_cols, uint64_t logical_cols, uint64_t nparity, uint64_t reflow_offset) @@ -765,7 +693,7 @@ vdev_raidz_map_alloc_expanded(abd_t *abd, uint64_t size, uint64_t offset, ASSERT3U(asize, ==, tot << ashift); /* init RAIDZ parity ops */ - // rm->rm_ops = vdev_raidz_math_get_ops(); + rm->rm_ops = vdev_raidz_math_get_ops(); return (rm); } @@ -949,7 +877,7 @@ vdev_raidz_generate_parity_pqr(raidz_row_t *rr) * parity columns available. */ static void -vdev_raidz_generate_parity_row(raidz_row_t *rr) +vdev_raidz_generate_parity_row(raidz_map_t *rm, raidz_row_t *rr) { if (rr->rr_cols == 0) { /* @@ -961,11 +889,9 @@ vdev_raidz_generate_parity_row(raidz_row_t *rr) return; } -#if 0 /* Generate using the new math implementation */ - if (vdev_raidz_math_generate(rm) != RAIDZ_ORIGINAL_IMPL) + if (vdev_raidz_math_generate(rm, rr) != RAIDZ_ORIGINAL_IMPL) return; -#endif switch (rr->rr_firstdatacol) { case 1: @@ -987,7 +913,7 @@ vdev_raidz_generate_parity(raidz_map_t *rm) { for (int i = 0; i < rm->rm_nrows; i++) { raidz_row_t *rr = rm->rm_row[i]; - vdev_raidz_generate_parity_row(rr); + vdev_raidz_generate_parity_row(rm, rr); } } @@ -1786,11 +1712,12 @@ vdev_raidz_reconstruct_general(raidz_row_t *rr, int *tgts, int ntgts) } int -vdev_raidz_reconstruct_row(raidz_row_t *rr, const int *t, int nt) +vdev_raidz_reconstruct_row(raidz_map_t *rm, raidz_row_t *rr, + const int *t, int nt) { int tgts[VDEV_RAIDZ_MAXPARITY], *dt; int ntgts; - int i, c; + int i, c, ret; int code; int nbadparity, nbaddata; int parity_valid[VDEV_RAIDZ_MAXPARITY]; @@ -1841,12 +1768,10 @@ vdev_raidz_reconstruct_row(raidz_row_t *rr, const int *t, int nt) dt = &tgts[nbadparity]; -#if 0 /* Reconstruct using the new math implementation */ - int ret = vdev_raidz_math_reconstruct(rm, parity_valid, dt, nbaddata); + ret = vdev_raidz_math_reconstruct(rm, rr, parity_valid, dt, nbaddata); if (ret != RAIDZ_ORIGINAL_IMPL) return (ret); -#endif /* * See if we can use any of our optimized reconstruction routines. @@ -2001,8 +1926,9 @@ static void vdev_raidz_io_start_write(zio_t *zio, raidz_row_t *rr) { vdev_t *vd = zio->io_vd; + raidz_map_t *rm = zio->io_vsd; - vdev_raidz_generate_parity_row(rr); + vdev_raidz_generate_parity_row(rm, rr); for (int c = 0; c < rr->rr_cols; c++) { raidz_col_t *rc = &rr->rr_col[c]; @@ -2213,6 +2139,7 @@ raidz_parity_verify(zio_t *zio, raidz_row_t *rr) { abd_t *orig[VDEV_RAIDZ_MAXPARITY]; int c, ret = 0; + raidz_map_t *rm = zio->io_vsd; raidz_col_t *rc; blkptr_t *bp = zio->io_bp; @@ -2236,7 +2163,7 @@ raidz_parity_verify(zio_t *zio, raidz_row_t *rr) * This could cause a side effect of fixing stuff we didn't realize * was necessary (i.e. even if we return 0) */ - vdev_raidz_generate_parity_row(rr); + vdev_raidz_generate_parity_row(rm, rr); for (c = 0; c < rr->rr_firstdatacol; c++) { rc = &rr->rr_col[c]; @@ -2499,7 +2426,7 @@ raidz_reconstruct(zio_t *zio, int *ltgts, int ntgts) /* XXX is rr_code used anywhere? */ rr->rr_code = 0; if (dead_data > 0) - rr->rr_code = vdev_raidz_reconstruct_row(rr, + rr->rr_code = vdev_raidz_reconstruct_row(rm, rr, my_tgts, t); } @@ -2628,6 +2555,15 @@ vdev_raidz_combrec(zio_t *zio) return (ECKSUM); } +void +vdev_raidz_reconstruct(raidz_map_t *rm, const int *t, int nt) +{ + for (uint64_t row = 0; row < rm->rm_nrows; row++) { + raidz_row_t *rr = rm->rm_row[row]; + vdev_raidz_reconstruct_row(rm, rr, t, nt); + } +} + /* * Complete a write IO operation on a RAIDZ VDev * @@ -2678,7 +2614,8 @@ vdev_raidz_io_done_write_impl(zio_t *zio, raidz_row_t *rr) * vdev_raidz_reconstruct(). */ static int -vdev_raidz_io_done_reconstruct_known_missing(zio_t *zio, raidz_row_t *rr) +vdev_raidz_io_done_reconstruct_known_missing(zio_t *zio, raidz_map_t *rm , + raidz_row_t *rr) { int parity_errors = 0; int parity_untried = 0; @@ -2739,7 +2676,7 @@ vdev_raidz_io_done_reconstruct_known_missing(zio_t *zio, raidz_row_t *rr) ASSERT(rr->rr_firstdatacol >= n); - code = vdev_raidz_reconstruct_row(rr, tgts, n); + code = vdev_raidz_reconstruct_row(rm, rr, tgts, n); } return (code); @@ -2787,7 +2724,7 @@ vdev_raidz_io_done(zio_t *zio) raidz_row_t *rr = rm->rm_row[i]; rr->rr_code = vdev_raidz_io_done_reconstruct_known_missing(zio, - rr); + rm, rr); } if (raidz_checksum_verify(zio) == 0) { diff --git a/module/zfs/vdev_raidz_math.c b/module/zfs/vdev_raidz_math.c index 14120e110693..f44019fc9e86 100644 --- a/module/zfs/vdev_raidz_math.c +++ b/module/zfs/vdev_raidz_math.c @@ -151,7 +151,7 @@ vdev_raidz_math_get_ops(void) * Select parity generation method for raidz_map */ int -vdev_raidz_math_generate(raidz_map_t *rm) +vdev_raidz_math_generate(raidz_map_t *rm, raidz_row_t *rr) { raidz_gen_f gen_parity = NULL; @@ -176,7 +176,7 @@ vdev_raidz_math_generate(raidz_map_t *rm) if (gen_parity == NULL) return (RAIDZ_ORIGINAL_IMPL); - gen_parity(rm); + gen_parity(rr); return (0); } @@ -243,8 +243,8 @@ reconstruct_fun_pqr_sel(raidz_map_t *rm, const int *parity_valid, * @nbaddata - Number of failed data columns */ int -vdev_raidz_math_reconstruct(raidz_map_t *rm, const int *parity_valid, - const int *dt, const int nbaddata) +vdev_raidz_math_reconstruct(raidz_map_t *rm, raidz_row_t *rr, + const int *parity_valid, const int *dt, const int nbaddata) { raidz_rec_f rec_fn = NULL; @@ -267,7 +267,7 @@ vdev_raidz_math_reconstruct(raidz_map_t *rm, const int *parity_valid, if (rec_fn == NULL) return (RAIDZ_ORIGINAL_IMPL); else - return (rec_fn(rm, dt)); + return (rec_fn(rr, dt)); } const char *raidz_gen_name[] = { diff --git a/module/zfs/vdev_raidz_math_impl.h b/module/zfs/vdev_raidz_math_impl.h index e871d20d27c8..9c1babfeea32 100644 --- a/module/zfs/vdev_raidz_math_impl.h +++ b/module/zfs/vdev_raidz_math_impl.h @@ -37,33 +37,33 @@ * Functions calculate multiplication constants for data reconstruction. * Coefficients depend on RAIDZ geometry, indexes of failed child vdevs, and * used parity columns for reconstruction. - * @rm RAIDZ map + * @rr RAIDZ row * @tgtidx array of missing data indexes * @coeff output array of coefficients. Array must be provided by * user and must hold minimum MUL_CNT values. */ static noinline void -raidz_rec_q_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff) +raidz_rec_q_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff) { - const unsigned ncols = raidz_ncols(rm); + const unsigned ncols = rr->rr_cols; const unsigned x = tgtidx[TARGET_X]; coeff[MUL_Q_X] = gf_exp2(255 - (ncols - x - 1)); } static noinline void -raidz_rec_r_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff) +raidz_rec_r_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff) { - const unsigned ncols = raidz_ncols(rm); + const unsigned ncols = rr->rr_cols; const unsigned x = tgtidx[TARGET_X]; coeff[MUL_R_X] = gf_exp4(255 - (ncols - x - 1)); } static noinline void -raidz_rec_pq_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff) +raidz_rec_pq_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff) { - const unsigned ncols = raidz_ncols(rm); + const unsigned ncols = rr->rr_cols; const unsigned x = tgtidx[TARGET_X]; const unsigned y = tgtidx[TARGET_Y]; gf_t a, b, e; @@ -77,9 +77,9 @@ raidz_rec_pq_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff) } static noinline void -raidz_rec_pr_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff) +raidz_rec_pr_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff) { - const unsigned ncols = raidz_ncols(rm); + const unsigned ncols = rr->rr_cols; const unsigned x = tgtidx[TARGET_X]; const unsigned y = tgtidx[TARGET_Y]; @@ -94,9 +94,9 @@ raidz_rec_pr_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff) } static noinline void -raidz_rec_qr_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff) +raidz_rec_qr_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff) { - const unsigned ncols = raidz_ncols(rm); + const unsigned ncols = rr->rr_cols; const unsigned x = tgtidx[TARGET_X]; const unsigned y = tgtidx[TARGET_Y]; @@ -115,9 +115,9 @@ raidz_rec_qr_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff) } static noinline void -raidz_rec_pqr_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff) +raidz_rec_pqr_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff) { - const unsigned ncols = raidz_ncols(rm); + const unsigned ncols = rr->rr_cols; const unsigned x = tgtidx[TARGET_X]; const unsigned y = tgtidx[TARGET_Y]; const unsigned z = tgtidx[TARGET_Z]; @@ -348,26 +348,26 @@ raidz_mul_abd_cb(void *dc, size_t size, void *private) /* * Generate P parity (RAIDZ1) * - * @rm RAIDZ map + * @rr RAIDZ row */ static raidz_inline void -raidz_generate_p_impl(raidz_map_t * const rm) +raidz_generate_p_impl(raidz_row_t * const rr) { size_t c; - const size_t ncols = raidz_ncols(rm); - const size_t psize = rm->rm_col[CODE_P].rc_size; - abd_t *pabd = rm->rm_col[CODE_P].rc_abd; + const size_t ncols = rr->rr_cols; + const size_t psize = rr->rr_col[CODE_P].rc_size; + abd_t *pabd = rr->rr_col[CODE_P].rc_abd; size_t size; abd_t *dabd; raidz_math_begin(); /* start with first data column */ - raidz_copy(pabd, rm->rm_col[1].rc_abd, psize); + raidz_copy(pabd, rr->rr_col[1].rc_abd, psize); for (c = 2; c < ncols; c++) { - dabd = rm->rm_col[c].rc_abd; - size = rm->rm_col[c].rc_size; + dabd = rr->rr_col[c].rc_abd; + size = rr->rr_col[c].rc_size; /* add data column */ raidz_add(pabd, dabd, size); @@ -415,29 +415,29 @@ raidz_gen_pq_add(void **c, const void *dc, const size_t csize, /* * Generate PQ parity (RAIDZ2) * - * @rm RAIDZ map + * @rr RAIDZ row */ static raidz_inline void -raidz_generate_pq_impl(raidz_map_t * const rm) +raidz_generate_pq_impl(raidz_row_t * const rr) { size_t c; - const size_t ncols = raidz_ncols(rm); - const size_t csize = rm->rm_col[CODE_P].rc_size; + const size_t ncols = rr->rr_cols; + const size_t csize = rr->rr_col[CODE_P].rc_size; size_t dsize; abd_t *dabd; abd_t *cabds[] = { - rm->rm_col[CODE_P].rc_abd, - rm->rm_col[CODE_Q].rc_abd + rr->rr_col[CODE_P].rc_abd, + rr->rr_col[CODE_Q].rc_abd }; raidz_math_begin(); - raidz_copy(cabds[CODE_P], rm->rm_col[2].rc_abd, csize); - raidz_copy(cabds[CODE_Q], rm->rm_col[2].rc_abd, csize); + raidz_copy(cabds[CODE_P], rr->rr_col[2].rc_abd, csize); + raidz_copy(cabds[CODE_Q], rr->rr_col[2].rc_abd, csize); for (c = 3; c < ncols; c++) { - dabd = rm->rm_col[c].rc_abd; - dsize = rm->rm_col[c].rc_size; + dabd = rr->rr_col[c].rc_abd; + dsize = rr->rr_col[c].rc_size; abd_raidz_gen_iterate(cabds, dabd, csize, dsize, 2, raidz_gen_pq_add); @@ -488,31 +488,31 @@ raidz_gen_pqr_add(void **c, const void *dc, const size_t csize, /* * Generate PQR parity (RAIDZ2) * - * @rm RAIDZ map + * @rr RAIDZ row */ static raidz_inline void -raidz_generate_pqr_impl(raidz_map_t * const rm) +raidz_generate_pqr_impl(raidz_row_t * const rr) { size_t c; - const size_t ncols = raidz_ncols(rm); - const size_t csize = rm->rm_col[CODE_P].rc_size; + const size_t ncols = rr->rr_cols; + const size_t csize = rr->rr_col[CODE_P].rc_size; size_t dsize; abd_t *dabd; abd_t *cabds[] = { - rm->rm_col[CODE_P].rc_abd, - rm->rm_col[CODE_Q].rc_abd, - rm->rm_col[CODE_R].rc_abd + rr->rr_col[CODE_P].rc_abd, + rr->rr_col[CODE_Q].rc_abd, + rr->rr_col[CODE_R].rc_abd }; raidz_math_begin(); - raidz_copy(cabds[CODE_P], rm->rm_col[3].rc_abd, csize); - raidz_copy(cabds[CODE_Q], rm->rm_col[3].rc_abd, csize); - raidz_copy(cabds[CODE_R], rm->rm_col[3].rc_abd, csize); + raidz_copy(cabds[CODE_P], rr->rr_col[3].rc_abd, csize); + raidz_copy(cabds[CODE_Q], rr->rr_col[3].rc_abd, csize); + raidz_copy(cabds[CODE_R], rr->rr_col[3].rc_abd, csize); for (c = 4; c < ncols; c++) { - dabd = rm->rm_col[c].rc_abd; - dsize = rm->rm_col[c].rc_size; + dabd = rr->rr_col[c].rc_abd; + dsize = rr->rr_col[c].rc_size; abd_raidz_gen_iterate(cabds, dabd, csize, dsize, 3, raidz_gen_pqr_add); @@ -580,33 +580,36 @@ raidz_generate_pqr_impl(raidz_map_t * const rm) * @syn_method raidz_add_abd() * @rec_method not applicable * - * @rm RAIDZ map + * @rr RAIDZ row * @tgtidx array of missing data indexes */ static raidz_inline int -raidz_reconstruct_p_impl(raidz_map_t *rm, const int *tgtidx) +raidz_reconstruct_p_impl(raidz_row_t *rr, const int *tgtidx) { size_t c; - const size_t firstdc = raidz_parity(rm); - const size_t ncols = raidz_ncols(rm); + const size_t firstdc = rr->rr_firstdatacol; + const size_t ncols = rr->rr_cols; const size_t x = tgtidx[TARGET_X]; - const size_t xsize = rm->rm_col[x].rc_size; - abd_t *xabd = rm->rm_col[x].rc_abd; + const size_t xsize = rr->rr_col[x].rc_size; + abd_t *xabd = rr->rr_col[x].rc_abd; size_t size; abd_t *dabd; + if (xabd == NULL) + return (1 << CODE_P); + raidz_math_begin(); /* copy P into target */ - raidz_copy(xabd, rm->rm_col[CODE_P].rc_abd, xsize); + raidz_copy(xabd, rr->rr_col[CODE_P].rc_abd, xsize); /* generate p_syndrome */ for (c = firstdc; c < ncols; c++) { if (c == x) continue; - dabd = rm->rm_col[c].rc_abd; - size = MIN(rm->rm_col[c].rc_size, xsize); + dabd = rr->rr_col[c].rc_abd; + size = MIN(rr->rr_col[c].rc_size, xsize); raidz_add(xabd, dabd, size); } @@ -654,30 +657,33 @@ raidz_syn_q_abd(void **xc, const void *dc, const size_t xsize, * @syn_method raidz_add_abd() * @rec_method raidz_mul_abd_cb() * - * @rm RAIDZ map + * @rr RAIDZ row * @tgtidx array of missing data indexes */ static raidz_inline int -raidz_reconstruct_q_impl(raidz_map_t *rm, const int *tgtidx) +raidz_reconstruct_q_impl(raidz_row_t *rr, const int *tgtidx) { size_t c; size_t dsize; abd_t *dabd; - const size_t firstdc = raidz_parity(rm); - const size_t ncols = raidz_ncols(rm); + const size_t firstdc = rr->rr_firstdatacol; + const size_t ncols = rr->rr_cols; const size_t x = tgtidx[TARGET_X]; - abd_t *xabd = rm->rm_col[x].rc_abd; - const size_t xsize = rm->rm_col[x].rc_size; + abd_t *xabd = rr->rr_col[x].rc_abd; + const size_t xsize = rr->rr_col[x].rc_size; abd_t *tabds[] = { xabd }; + if (xabd == NULL) + return (1 << CODE_Q); + unsigned coeff[MUL_CNT]; - raidz_rec_q_coeff(rm, tgtidx, coeff); + raidz_rec_q_coeff(rr, tgtidx, coeff); raidz_math_begin(); /* Start with first data column if present */ if (firstdc != x) { - raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize); + raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize); } else { raidz_zero(xabd, xsize); } @@ -688,8 +694,8 @@ raidz_reconstruct_q_impl(raidz_map_t *rm, const int *tgtidx) dabd = NULL; dsize = 0; } else { - dabd = rm->rm_col[c].rc_abd; - dsize = rm->rm_col[c].rc_size; + dabd = rr->rr_col[c].rc_abd; + dsize = rr->rr_col[c].rc_size; } abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 1, @@ -697,7 +703,7 @@ raidz_reconstruct_q_impl(raidz_map_t *rm, const int *tgtidx) } /* add Q to the syndrome */ - raidz_add(xabd, rm->rm_col[CODE_Q].rc_abd, xsize); + raidz_add(xabd, rr->rr_col[CODE_Q].rc_abd, xsize); /* transform the syndrome */ abd_iterate_func(xabd, 0, xsize, raidz_mul_abd_cb, (void*) coeff); @@ -745,30 +751,33 @@ raidz_syn_r_abd(void **xc, const void *dc, const size_t tsize, * @syn_method raidz_add_abd() * @rec_method raidz_mul_abd_cb() * - * @rm RAIDZ map + * @rr RAIDZ rr * @tgtidx array of missing data indexes */ static raidz_inline int -raidz_reconstruct_r_impl(raidz_map_t *rm, const int *tgtidx) +raidz_reconstruct_r_impl(raidz_row_t *rr, const int *tgtidx) { size_t c; size_t dsize; abd_t *dabd; - const size_t firstdc = raidz_parity(rm); - const size_t ncols = raidz_ncols(rm); + const size_t firstdc = rr->rr_firstdatacol; + const size_t ncols = rr->rr_cols; const size_t x = tgtidx[TARGET_X]; - const size_t xsize = rm->rm_col[x].rc_size; - abd_t *xabd = rm->rm_col[x].rc_abd; + const size_t xsize = rr->rr_col[x].rc_size; + abd_t *xabd = rr->rr_col[x].rc_abd; abd_t *tabds[] = { xabd }; + if (xabd == NULL) + return (1 << CODE_R); + unsigned coeff[MUL_CNT]; - raidz_rec_r_coeff(rm, tgtidx, coeff); + raidz_rec_r_coeff(rr, tgtidx, coeff); raidz_math_begin(); /* Start with first data column if present */ if (firstdc != x) { - raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize); + raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize); } else { raidz_zero(xabd, xsize); } @@ -780,8 +789,8 @@ raidz_reconstruct_r_impl(raidz_map_t *rm, const int *tgtidx) dabd = NULL; dsize = 0; } else { - dabd = rm->rm_col[c].rc_abd; - dsize = rm->rm_col[c].rc_size; + dabd = rr->rr_col[c].rc_abd; + dsize = rr->rr_col[c].rc_size; } abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 1, @@ -789,7 +798,7 @@ raidz_reconstruct_r_impl(raidz_map_t *rm, const int *tgtidx) } /* add R to the syndrome */ - raidz_add(xabd, rm->rm_col[CODE_R].rc_abd, xsize); + raidz_add(xabd, rr->rr_col[CODE_R].rc_abd, xsize); /* transform the syndrome */ abd_iterate_func(xabd, 0, xsize, raidz_mul_abd_cb, (void *)coeff); @@ -882,31 +891,34 @@ raidz_rec_pq_abd(void **tc, const size_t tsize, void **c, * @syn_method raidz_syn_pq_abd() * @rec_method raidz_rec_pq_abd() * - * @rm RAIDZ map + * @rr RAIDZ row * @tgtidx array of missing data indexes */ static raidz_inline int -raidz_reconstruct_pq_impl(raidz_map_t *rm, const int *tgtidx) +raidz_reconstruct_pq_impl(raidz_row_t *rr, const int *tgtidx) { size_t c; size_t dsize; abd_t *dabd; - const size_t firstdc = raidz_parity(rm); - const size_t ncols = raidz_ncols(rm); + const size_t firstdc = rr->rr_firstdatacol; + const size_t ncols = rr->rr_cols; const size_t x = tgtidx[TARGET_X]; const size_t y = tgtidx[TARGET_Y]; - const size_t xsize = rm->rm_col[x].rc_size; - const size_t ysize = rm->rm_col[y].rc_size; - abd_t *xabd = rm->rm_col[x].rc_abd; - abd_t *yabd = rm->rm_col[y].rc_abd; + const size_t xsize = rr->rr_col[x].rc_size; + const size_t ysize = rr->rr_col[y].rc_size; + abd_t *xabd = rr->rr_col[x].rc_abd; + abd_t *yabd = rr->rr_col[y].rc_abd; abd_t *tabds[2] = { xabd, yabd }; abd_t *cabds[] = { - rm->rm_col[CODE_P].rc_abd, - rm->rm_col[CODE_Q].rc_abd + rr->rr_col[CODE_P].rc_abd, + rr->rr_col[CODE_Q].rc_abd }; + if (xabd == NULL) + return ((1 << CODE_P) | (1 << CODE_Q)); + unsigned coeff[MUL_CNT]; - raidz_rec_pq_coeff(rm, tgtidx, coeff); + raidz_rec_pq_coeff(rr, tgtidx, coeff); /* * Check if some of targets is shorter then others @@ -922,8 +934,8 @@ raidz_reconstruct_pq_impl(raidz_map_t *rm, const int *tgtidx) /* Start with first data column if present */ if (firstdc != x) { - raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize); - raidz_copy(yabd, rm->rm_col[firstdc].rc_abd, xsize); + raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize); + raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, xsize); } else { raidz_zero(xabd, xsize); raidz_zero(yabd, xsize); @@ -935,8 +947,8 @@ raidz_reconstruct_pq_impl(raidz_map_t *rm, const int *tgtidx) dabd = NULL; dsize = 0; } else { - dabd = rm->rm_col[c].rc_abd; - dsize = rm->rm_col[c].rc_size; + dabd = rr->rr_col[c].rc_abd; + dsize = rr->rr_col[c].rc_size; } abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2, @@ -947,7 +959,7 @@ raidz_reconstruct_pq_impl(raidz_map_t *rm, const int *tgtidx) /* Copy shorter targets back to the original abd buffer */ if (ysize < xsize) - raidz_copy(rm->rm_col[y].rc_abd, yabd, ysize); + raidz_copy(rr->rr_col[y].rc_abd, yabd, ysize); raidz_math_end(); @@ -1039,30 +1051,34 @@ raidz_rec_pr_abd(void **t, const size_t tsize, void **c, * @syn_method raidz_syn_pr_abd() * @rec_method raidz_rec_pr_abd() * - * @rm RAIDZ map + * @rr RAIDZ row * @tgtidx array of missing data indexes */ static raidz_inline int -raidz_reconstruct_pr_impl(raidz_map_t *rm, const int *tgtidx) +raidz_reconstruct_pr_impl(raidz_row_t *rr, const int *tgtidx) { size_t c; size_t dsize; abd_t *dabd; - const size_t firstdc = raidz_parity(rm); - const size_t ncols = raidz_ncols(rm); + const size_t firstdc = rr->rr_firstdatacol; + const size_t ncols = rr->rr_cols; const size_t x = tgtidx[0]; const size_t y = tgtidx[1]; - const size_t xsize = rm->rm_col[x].rc_size; - const size_t ysize = rm->rm_col[y].rc_size; - abd_t *xabd = rm->rm_col[x].rc_abd; - abd_t *yabd = rm->rm_col[y].rc_abd; + const size_t xsize = rr->rr_col[x].rc_size; + const size_t ysize = rr->rr_col[y].rc_size; + abd_t *xabd = rr->rr_col[x].rc_abd; + abd_t *yabd = rr->rr_col[y].rc_abd; abd_t *tabds[2] = { xabd, yabd }; abd_t *cabds[] = { - rm->rm_col[CODE_P].rc_abd, - rm->rm_col[CODE_R].rc_abd + rr->rr_col[CODE_P].rc_abd, + rr->rr_col[CODE_R].rc_abd }; + + if (xabd == NULL) + return ((1 << CODE_P) | (1 << CODE_R)); + unsigned coeff[MUL_CNT]; - raidz_rec_pr_coeff(rm, tgtidx, coeff); + raidz_rec_pr_coeff(rr, tgtidx, coeff); /* * Check if some of targets are shorter then others. @@ -1078,8 +1094,8 @@ raidz_reconstruct_pr_impl(raidz_map_t *rm, const int *tgtidx) /* Start with first data column if present */ if (firstdc != x) { - raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize); - raidz_copy(yabd, rm->rm_col[firstdc].rc_abd, xsize); + raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize); + raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, xsize); } else { raidz_zero(xabd, xsize); raidz_zero(yabd, xsize); @@ -1091,8 +1107,8 @@ raidz_reconstruct_pr_impl(raidz_map_t *rm, const int *tgtidx) dabd = NULL; dsize = 0; } else { - dabd = rm->rm_col[c].rc_abd; - dsize = rm->rm_col[c].rc_size; + dabd = rr->rr_col[c].rc_abd; + dsize = rr->rr_col[c].rc_size; } abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2, @@ -1105,14 +1121,14 @@ raidz_reconstruct_pr_impl(raidz_map_t *rm, const int *tgtidx) * Copy shorter targets back to the original abd buffer */ if (ysize < xsize) - raidz_copy(rm->rm_col[y].rc_abd, yabd, ysize); + raidz_copy(rr->rr_col[y].rc_abd, yabd, ysize); raidz_math_end(); if (ysize < xsize) abd_free(yabd); - return ((1 << CODE_P) | (1 << CODE_Q)); + return ((1 << CODE_P) | (1 << CODE_R)); } @@ -1202,30 +1218,34 @@ raidz_rec_qr_abd(void **t, const size_t tsize, void **c, * @syn_method raidz_syn_qr_abd() * @rec_method raidz_rec_qr_abd() * - * @rm RAIDZ map + * @rr RAIDZ row * @tgtidx array of missing data indexes */ static raidz_inline int -raidz_reconstruct_qr_impl(raidz_map_t *rm, const int *tgtidx) +raidz_reconstruct_qr_impl(raidz_row_t *rr, const int *tgtidx) { size_t c; size_t dsize; abd_t *dabd; - const size_t firstdc = raidz_parity(rm); - const size_t ncols = raidz_ncols(rm); + const size_t firstdc =rr->rr_firstdatacol; + const size_t ncols = rr->rr_cols; const size_t x = tgtidx[TARGET_X]; const size_t y = tgtidx[TARGET_Y]; - const size_t xsize = rm->rm_col[x].rc_size; - const size_t ysize = rm->rm_col[y].rc_size; - abd_t *xabd = rm->rm_col[x].rc_abd; - abd_t *yabd = rm->rm_col[y].rc_abd; + const size_t xsize = rr->rr_col[x].rc_size; + const size_t ysize = rr->rr_col[y].rc_size; + abd_t *xabd = rr->rr_col[x].rc_abd; + abd_t *yabd = rr->rr_col[y].rc_abd; abd_t *tabds[2] = { xabd, yabd }; abd_t *cabds[] = { - rm->rm_col[CODE_Q].rc_abd, - rm->rm_col[CODE_R].rc_abd + rr->rr_col[CODE_Q].rc_abd, + rr->rr_col[CODE_R].rc_abd }; + + if (xabd == NULL) + return ((1 << CODE_Q) | (1 << CODE_R)); + unsigned coeff[MUL_CNT]; - raidz_rec_qr_coeff(rm, tgtidx, coeff); + raidz_rec_qr_coeff(rr, tgtidx, coeff); /* * Check if some of targets is shorter then others @@ -1241,8 +1261,8 @@ raidz_reconstruct_qr_impl(raidz_map_t *rm, const int *tgtidx) /* Start with first data column if present */ if (firstdc != x) { - raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize); - raidz_copy(yabd, rm->rm_col[firstdc].rc_abd, xsize); + raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize); + raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, xsize); } else { raidz_zero(xabd, xsize); raidz_zero(yabd, xsize); @@ -1254,8 +1274,8 @@ raidz_reconstruct_qr_impl(raidz_map_t *rm, const int *tgtidx) dabd = NULL; dsize = 0; } else { - dabd = rm->rm_col[c].rc_abd; - dsize = rm->rm_col[c].rc_size; + dabd = rr->rr_col[c].rc_abd; + dsize = rr->rr_col[c].rc_size; } abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2, @@ -1268,7 +1288,7 @@ raidz_reconstruct_qr_impl(raidz_map_t *rm, const int *tgtidx) * Copy shorter targets back to the original abd buffer */ if (ysize < xsize) - raidz_copy(rm->rm_col[y].rc_abd, yabd, ysize); + raidz_copy(rr->rr_col[y].rc_abd, yabd, ysize); raidz_math_end(); @@ -1385,34 +1405,38 @@ raidz_rec_pqr_abd(void **t, const size_t tsize, void **c, * @syn_method raidz_syn_pqr_abd() * @rec_method raidz_rec_pqr_abd() * - * @rm RAIDZ map + * @rr RAIDZ row * @tgtidx array of missing data indexes */ static raidz_inline int -raidz_reconstruct_pqr_impl(raidz_map_t *rm, const int *tgtidx) +raidz_reconstruct_pqr_impl(raidz_row_t *rr, const int *tgtidx) { size_t c; size_t dsize; abd_t *dabd; - const size_t firstdc = raidz_parity(rm); - const size_t ncols = raidz_ncols(rm); + const size_t firstdc = rr->rr_firstdatacol; + const size_t ncols = rr->rr_cols; const size_t x = tgtidx[TARGET_X]; const size_t y = tgtidx[TARGET_Y]; const size_t z = tgtidx[TARGET_Z]; - const size_t xsize = rm->rm_col[x].rc_size; - const size_t ysize = rm->rm_col[y].rc_size; - const size_t zsize = rm->rm_col[z].rc_size; - abd_t *xabd = rm->rm_col[x].rc_abd; - abd_t *yabd = rm->rm_col[y].rc_abd; - abd_t *zabd = rm->rm_col[z].rc_abd; + const size_t xsize = rr->rr_col[x].rc_size; + const size_t ysize = rr->rr_col[y].rc_size; + const size_t zsize = rr->rr_col[z].rc_size; + abd_t *xabd = rr->rr_col[x].rc_abd; + abd_t *yabd = rr->rr_col[y].rc_abd; + abd_t *zabd = rr->rr_col[z].rc_abd; abd_t *tabds[] = { xabd, yabd, zabd }; abd_t *cabds[] = { - rm->rm_col[CODE_P].rc_abd, - rm->rm_col[CODE_Q].rc_abd, - rm->rm_col[CODE_R].rc_abd + rr->rr_col[CODE_P].rc_abd, + rr->rr_col[CODE_Q].rc_abd, + rr->rr_col[CODE_R].rc_abd }; + + if (xabd == NULL) + return ((1 << CODE_P) | (1 << CODE_Q) | (1 << CODE_R)); + unsigned coeff[MUL_CNT]; - raidz_rec_pqr_coeff(rm, tgtidx, coeff); + raidz_rec_pqr_coeff(rr, tgtidx, coeff); /* * Check if some of targets is shorter then others @@ -1432,9 +1456,9 @@ raidz_reconstruct_pqr_impl(raidz_map_t *rm, const int *tgtidx) /* Start with first data column if present */ if (firstdc != x) { - raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize); - raidz_copy(yabd, rm->rm_col[firstdc].rc_abd, xsize); - raidz_copy(zabd, rm->rm_col[firstdc].rc_abd, xsize); + raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize); + raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, xsize); + raidz_copy(zabd, rr->rr_col[firstdc].rc_abd, xsize); } else { raidz_zero(xabd, xsize); raidz_zero(yabd, xsize); @@ -1447,8 +1471,8 @@ raidz_reconstruct_pqr_impl(raidz_map_t *rm, const int *tgtidx) dabd = NULL; dsize = 0; } else { - dabd = rm->rm_col[c].rc_abd; - dsize = rm->rm_col[c].rc_size; + dabd = rr->rr_col[c].rc_abd; + dsize = rr->rr_col[c].rc_size; } abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 3, @@ -1461,9 +1485,9 @@ raidz_reconstruct_pqr_impl(raidz_map_t *rm, const int *tgtidx) * Copy shorter targets back to the original abd buffer */ if (ysize < xsize) - raidz_copy(rm->rm_col[y].rc_abd, yabd, ysize); + raidz_copy(rr->rr_col[y].rc_abd, yabd, ysize); if (zsize < xsize) - raidz_copy(rm->rm_col[z].rc_abd, zabd, zsize); + raidz_copy(rr->rr_col[z].rc_abd, zabd, zsize); raidz_math_end(); diff --git a/scripts/raidz_expand_test.sh b/scripts/raidz_expand_test.sh deleted file mode 100755 index 76ac9ce4a0a5..000000000000 --- a/scripts/raidz_expand_test.sh +++ /dev/null @@ -1,139 +0,0 @@ -#!/bin/bash -x - -combrec=1 -BASE_DIR=$(dirname "$0")/.. - -echo 1 >/sys/module/zfs/parameters/zfs_prefetch_disable - -zpool destroy test -zpool create filepool sdb - -zfs destroy -R filepool/files -zfs create -o compression=on filepool/files - -dir=/filepool/files - -for (( i=0; i<7; i=i+1 )); do - truncate -s 512M $dir/$i -done - -function wait_completion -{ - while zpool status test | grep "in progress"; do - sleep 5 - done -} - -function dotest -{ - nparity=$1 - - zpool create -o cachefile=none test raidz$nparity $dir/[0-5] - zfs set primarycache=metadata test - - zfs create test/fs - dd if=/dev/urandom of=/test/fs/file bs=1024k count=1 - - zfs create -o compress=on test/fs2 - cp -r $BASE_DIR /test/fs2/ - #truncate -s 100m /test/fs2/file - #/net/pharos/export/home/mahrens/randwritecomp-linux /test/fs2/file 10000 - - zfs create -o compress=on -o recordsize=8k test/fs3 - cp -r $BASE_DIR /test/fs3/ - #truncate -s 100m /test/fs3/file - #/net/pharos/export/home/mahrens/randwritecomp-linux /test/fs3/file 10000 - - zfs snapshot filepool/files@pre-attach - - sum /test/fs/file - sum /test/fs2/file - sum /test/fs3/file - - zfs list test - zpool list -v test - - sleep 2 - - zpool attach test raidz$nparity-0 $dir/6 - - wait_completion - - zfs list test - zpool list -v test - # should indicate new device is present, pool is larger size - - zfs snapshot filepool/files@post-attach - - zpool export test - zpool import -o cachefile=none -d $dir test - - zfs snapshot filepool/files@post-import - - sum /test/fs/file - sum /test/fs2/file - sum /test/fs3/file - zfs list -r test - zpool list -v test - zpool status -v test - zpool scrub test - wait_completion - zpool status -v test - - zpool export test - zpool import -o cachefile=none -d $dir test - - for (( i=0; i<$nparity; i=i+1 )); do - if [[ ! $combrec ]]; then - zpool offline test $dir/$i - fi - dd conv=notrunc if=/dev/zero of=$dir/$i bs=1024k seek=4 count=500 - done - sum /test/fs/file - zpool status -v test - - if [[ $combrec ]]; then - zpool scrub test - else - for (( i=0; i<$nparity; i=i+1 )); do - zpool replace -f test $dir/$i - done - fi - wait_completion - zpool status -v test - zpool clear test - - for (( i=$nparity; i<$nparity*2; i=i+1 )); do - if [[ ! $combrec ]]; then - zpool offline test $dir/$i - fi - dd conv=notrunc if=/dev/zero of=$dir/$i bs=1024k seek=4 count=500 - done - # XXX sometimes, scrub was already started - # XXX some READ (not CKSUM) errors reported - zpool status -v test - if [[ $combrec ]]; then - # XXX if scrub already started above, this scrub doesn't seem to repair everything, some - # repairs happen in final scrub - zpool scrub test - else - for (( i=0; i<$nparity; i=i+1 )); do - zpool replace -f test $dir/$i - done - fi - wait_completion - zpool status -v test - zpool clear test - - sum /test/fs3/file - - zpool scrub test - wait_completion - zpool status -v test - - zpool destroy test -} - -dotest 2 -dotest 3 -dotest 1 diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index d8c109eb7ddc..1750fa9b99de 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -706,7 +706,8 @@ tests = ['redacted_compressed', 'redacted_contents', 'redacted_deleted', tags = ['functional', 'redacted_send'] [tests/functional/raidz] -tests = ['raidz_001_neg', 'raidz_002_pos'] +tests = ['raidz_001_neg', 'raidz_002_pos', 'raidz_003_pos', 'raidz_004_pos', + 'raidz_expand.ksh'] tags = ['functional', 'raidz'] [tests/functional/redundancy] diff --git a/tests/zfs-tests/include/tunables.cfg b/tests/zfs-tests/include/tunables.cfg index c450764db4fc..2d9f6845c9cc 100644 --- a/tests/zfs-tests/include/tunables.cfg +++ b/tests/zfs-tests/include/tunables.cfg @@ -57,6 +57,7 @@ MULTIHOST_HISTORY UNSUPPORTED zfs_multihost_history MULTIHOST_IMPORT_INTERVALS multihost.import_intervals zfs_multihost_import_intervals MULTIHOST_INTERVAL UNSUPPORTED zfs_multihost_interval OVERRIDE_ESTIMATE_RECORDSIZE send.override_estimate_recordsize zfs_override_estimate_recordsize +PREFETCH_DISABLE prefetch_disable zfs_prefetch_disable REMOVAL_SUSPEND_PROGRESS removal_suspend_progress zfs_removal_suspend_progress REMOVE_MAX_SEGMENT remove_max_segment zfs_remove_max_segment RESILVER_MIN_TIME_MS resilver_min_time_ms zfs_resilver_min_time_ms diff --git a/tests/zfs-tests/tests/functional/raidz/raidz_003_pos.ksh b/tests/zfs-tests/tests/functional/raidz/raidz_003_pos.ksh new file mode 100755 index 000000000000..020593695c5d --- /dev/null +++ b/tests/zfs-tests/tests/functional/raidz/raidz_003_pos.ksh @@ -0,0 +1,41 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2020 by vStack. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Call the raidz_test tool with -S and -e to test all supported raidz +# implementations with expanded map and default reflow offset. +# This options will test several raidz block geometries and several zio +# parameters that affect raidz block layout. Data reconstruction performs +# all combinations of failed disks. Wall time is set to 5min, but actual +# runtime might be longer. +# + +log_must raidz_test -S -e -t 300 + +log_pass "raidz_test parameter sweep test with expanded map succeeded." diff --git a/tests/zfs-tests/tests/functional/raidz/raidz_004_pos.ksh b/tests/zfs-tests/tests/functional/raidz/raidz_004_pos.ksh new file mode 100755 index 000000000000..5a7087139650 --- /dev/null +++ b/tests/zfs-tests/tests/functional/raidz/raidz_004_pos.ksh @@ -0,0 +1,41 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2020 by vStack. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Call the raidz_test tool with -S and -e to test all supported raidz +# implementations with expanded map and zero reflow offset. +# This options will test several raidz block geometries and several zio +# parameters that affect raidz block layout. Data reconstruction performs +# all combinations of failed disks. Wall time is set to 5min, but actual +# runtime might be longer. +# + +log_must raidz_test -S -e -r 0 -t 300 + +log_pass "raidz_test parameter sweep test with expanded map succeeded." diff --git a/tests/zfs-tests/tests/functional/raidz/raidz_expand.ksh b/tests/zfs-tests/tests/functional/raidz/raidz_expand.ksh new file mode 100755 index 000000000000..b79e4d6beffe --- /dev/null +++ b/tests/zfs-tests/tests/functional/raidz/raidz_expand.ksh @@ -0,0 +1,232 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2020 by vStack. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# 'zpool attach poolname raidz ...' should attach new devive to the pool. +# +# STRATEGY: +# 1. Create block device files for the test raidz pool +# 2. For each parity value [1..3] +# - create raidz pool +# - fill it with some directories/files +# - attach device to the raidz pool +# - verify that device attached and the raidz pool size increase +# - verify resilver by replacing parity devices +# - verify resilver by replacing data devices +# - verify scrub by zeroing parity devices +# - verify scrub by zeroing data devices +# - verify the raidz pool +# - destroy the raidz pool + +typeset -r devs=6 +typeset -r dev_size_mb=512 + +typeset -a disks + +prefetch_disable=$(get_tunable PREFETCH_DISABLE) + +function cleanup +{ + poolexists "$TESTPOOL" && log_must_busy zpool destroy "$TESTPOOL" + + for i in {0..$devs}; do + log_must rm -f "$TEST_BASE_DIR/dev-$i" + done + + log_must set_tunable32 PREFETCH_DISABLE $prefetch_disable +} + +function wait_expand_completion +{ + while zpool status $TESTPOOL | grep 'raidz expand:' | \ + grep 'in progress'; do + sleep 1 + done +} + +function test_resilver # +{ + typeset pool=$1 + typeset nparity=$2 + typeset dir=$3 + + for (( i=0; i<$nparity; i=i+1 )); do + log_must zpool offline $pool $dir/dev-$i + done + + log_must zpool export $pool + + for (( i=0; i<$nparity; i=i+1 )); do + log_must zpool labelclear -f $dir/dev-$i + done + + log_must zpool import -o cachefile=none -d $dir $pool + + for (( i=0; i<$nparity; i=i+1 )); do + log_must zpool replace -f $pool $dir/dev-$i + done + + while ! is_pool_resilvered $pool; do + sleep 1 + done + + log_must check_pool_status $pool "errors" "No known data errors" + + log_must zpool clear $pool + + for (( i=$nparity; i<$nparity*2; i=i+1 )); do + log_must zpool offline $pool $dir/dev-$i + done + + log_must zpool export $pool + + for (( i=$nparity; i<$nparity*2; i=i+1 )); do + log_must zpool labelclear -f $dir/dev-$i + done + + log_must zpool import -o cachefile=none -d $dir $pool + + for (( i=$nparity; i<$nparity*2; i=i+1 )); do + log_must zpool replace -f $pool $dir/dev-$i + done + + while ! is_pool_resilvered $pool; do + sleep 1 + done + + log_must check_pool_status $pool "errors" "No known data errors" + + log_must zpool clear $pool +} + +function test_scrub # +{ + typeset pool=$1 + typeset nparity=$2 + typeset dir=$3 + typeset combrec=$4 + + log_must zpool export $pool + + for (( i=0; i<$nparity; i=i+1 )); do + dd conv=notrunc if=/dev/zero of=$dir/dev-$i \ + bs=1M seek=4 count=$(($dev_size_mb-4)) + done + + log_must zpool import -o cachefile=none -d $dir $pool + + log_must zpool scrub $pool + + while ! is_pool_scrubbed $pool; do + sleep 1 + done + + log_must zpool clear $pool + + log_must zpool export $pool + + for (( i=$nparity; i<$nparity*2; i=i+1 )); do + dd conv=notrunc if=/dev/zero of=$dir/dev-$i \ + bs=1M seek=4 count=$(($dev_size_mb-4)) + done + + log_must zpool import -o cachefile=none -d $dir $pool + + log_must zpool scrub $pool + + while ! is_pool_scrubbed $pool; do + sleep 1 + done + + log_must check_pool_status $pool "errors" "No known data errors" + + log_must zpool clear $pool +} + +log_onexit cleanup + +log_must set_tunable32 PREFETCH_DISABLE 1 + +# Disk files which will be used by pool +for i in {0..$(($devs - 1))}; do + device=$TEST_BASE_DIR/dev-$i + log_must truncate -s ${dev_size_mb}M $device + disks[${#disks[*]}+1]=$device +done + +# Disk file which will be attached +log_must truncate -s 512M $TEST_BASE_DIR/dev-$devs + +for nparity in 1 2 3; do + raid=raidz$nparity + dir=$TEST_BASE_DIR + + log_must zpool create -f -o cachefile=none $TESTPOOL $raid ${disks[@]} + log_must zfs set primarycache=metadata $TESTPOOL + + log_must zfs create $TESTPOOL/fs + log_must fill_fs /$TESTPOOL/fs 1 512 100 1024 R + + log_must zfs create -o compress=on $TESTPOOL/fs2 + log_must fill_fs /$TESTPOOL/fs2 1 512 100 1024 R + + log_must zfs create -o compress=on -o recordsize=8k $TESTPOOL/fs3 + log_must fill_fs /$TESTPOOL/fs3 1 512 100 1024 R + + typeset pool_size=$(get_pool_prop size $TESTPOOL) + + log_must zpool attach $TESTPOOL ${raid}-0 $dir/dev-$devs + + wait_expand_completion + + log_must zpool export $TESTPOOL + log_must zpool import -o cachefile=none -d $dir $TESTPOOL + + typeset disk_attached=$(get_disklist $TESTPOOL | grep dev-$devs) + if [[ -z $disk_attached ]]; then + log_fail "pool $TESTPOOL attached disk not found" + fi + + typeset expand_size=$(get_pool_prop size $TESTPOOL) + if [[ "$expand_size" -le "$pool_size" ]]; then + log_fail "pool $TESTPOOL not expanded" + fi + + log_must zpool export $TESTPOOL + log_must zpool import -o cachefile=none -d $dir $TESTPOOL + + log_must check_pool_status $TESTPOOL "errors" "No known data errors" + + test_resilver $TESTPOOL $nparity $dir + test_scrub $TESTPOOL $nparity $dir + + zpool destroy "$TESTPOOL" +done + +log_pass "raidz expansion test succeeded."