Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

linux: overlayfs support #14070

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ CONTRIBUTORS:
Alec Salazar <alec.j.salazar@gmail.com>
Alejandro R. Sedeño <asedeno@mit.edu>
Alek Pinchuk <alek@nexenta.com>
Aleksa Sarai <cyphar@cyphar.com>
Alex Braunegg <alex.braunegg@gmail.com>
Alex McWhirter <alexmcwhirter@triadic.us>
Alex Reece <alex@delphix.com>
Expand Down Expand Up @@ -236,6 +237,7 @@ CONTRIBUTORS:
Paul Dagnelie <pcd@delphix.com>
Paul Zuchowski <pzuchowski@datto.com>
Pavel Boldin <boldin.pavel@gmail.com>
Pavel Snajdr <snajpa@snajpa.net>
Pavel Zakharov <pavel.zakharov@delphix.com>
Pawel Jakub Dawidek <pjd@FreeBSD.org>
Pedro Giffuni <pfg@freebsd.org>
Expand Down
10 changes: 10 additions & 0 deletions cmd/zdb/zdb_il.c
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,14 @@ zil_prt_rec_rename(zilog_t *zilog, int txtype, const void *arg)
(void) printf("%ssdoid %llu, tdoid %llu\n", tab_prefix,
(u_longlong_t)lr->lr_sdoid, (u_longlong_t)lr->lr_tdoid);
(void) printf("%ssrc %s tgt %s\n", tab_prefix, snm, tnm);
switch (txtype) {
case TX_RENAME_EXCHANGE:
(void) printf("%sflags RENAME_EXCHANGE\n", tab_prefix);
break;
case TX_RENAME_WHITEOUT:
(void) printf("%sflags RENAME_WHITEOUT\n", tab_prefix);
break;
}
}

static int
Expand Down Expand Up @@ -330,6 +338,8 @@ static zil_rec_info_t zil_rec_info[TX_MAX_TYPE] = {
{.zri_print = zil_prt_rec_write, .zri_name = "TX_WRITE2 "},
{.zri_print = zil_prt_rec_setsaxattr,
.zri_name = "TX_SETSAXATTR "},
{.zri_print = zil_prt_rec_rename, .zri_name = "TX_RENAME_EXCHANGE "},
{.zri_print = zil_prt_rec_rename, .zri_name = "TX_RENAME_WHITEOUT "},
};

static int
Expand Down
2 changes: 2 additions & 0 deletions cmd/ztest.c
Original file line number Diff line number Diff line change
Expand Up @@ -2368,6 +2368,8 @@ static zil_replay_func_t *ztest_replay_vector[TX_MAX_TYPE] = {
NULL, /* TX_MKDIR_ACL_ATTR */
NULL, /* TX_WRITE2 */
NULL, /* TX_SETSAXATTR */
NULL, /* TX_RENAME_EXCHANGE */
NULL, /* TX_RENAME_WHITEOUT */
};

/*
Expand Down
30 changes: 30 additions & 0 deletions config/kernel-dentry-alias.m4
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
dnl #
dnl # 3.18 API change
dnl # Dentry aliases are in d_u struct dentry member
dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_DENTRY_ALIAS_D_U], [
ZFS_LINUX_TEST_SRC([dentry_alias_d_u], [
#include <linux/fs.h>
#include <linux/dcache.h>
#include <linux/list.h>
], [
struct inode *inode __attribute__ ((unused)) = NULL;
struct dentry *dentry __attribute__ ((unused)) = NULL;
hlist_for_each_entry(dentry, &inode->i_dentry,
d_u.d_alias) {
d_drop(dentry);
}
])
])

AC_DEFUN([ZFS_AC_KERNEL_DENTRY_ALIAS_D_U], [
AC_MSG_CHECKING([whether dentry aliases are in d_u member])
ZFS_LINUX_TEST_RESULT([dentry_alias_d_u], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_DENTRY_D_U_ALIASES, 1,
[dentry aliases are in d_u member])
],[
AC_MSG_RESULT(no)
])
])

71 changes: 63 additions & 8 deletions config/kernel-rename.m4
Original file line number Diff line number Diff line change
@@ -1,8 +1,28 @@
AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME], [
dnl #
dnl # 3.9 (to 4.9) API change,
dnl #
dnl # A new version of iops->rename() was added (rename2) that takes a flag
dnl # argument (to support renameat2). However this separate function was
dnl # merged back into iops->rename() in Linux 4.9.
dnl #
ZFS_LINUX_TEST_SRC([inode_operations_rename2], [
#include <linux/fs.h>
int rename2_fn(struct inode *sip, struct dentry *sdp,
struct inode *tip, struct dentry *tdp,
unsigned int flags) { return 0; }

static const struct inode_operations
iops __attribute__ ((unused)) = {
.rename2 = rename2_fn,
};
],[])

dnl #
dnl # 4.9 API change,
dnl # iops->rename2() merged into iops->rename(), and iops->rename() now wants
dnl # flags.
dnl #
dnl # iops->rename2() merged into iops->rename(), and iops->rename() now
dnl # wants flags.
dnl #
ZFS_LINUX_TEST_SRC([inode_operations_rename_flags], [
#include <linux/fs.h>
Expand All @@ -16,11 +36,29 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME], [
};
],[])

dnl #
dnl # EL7 compatibility
dnl #
dnl # EL7 has backported renameat2 support, but it's done by defining a
dnl # separate iops wrapper structure that takes the .renameat2 function.
dnl #
ZFS_LINUX_TEST_SRC([dir_inode_operations_wrapper_rename2], [
#include <linux/fs.h>
int rename2_fn(struct inode *sip, struct dentry *sdp,
struct inode *tip, struct dentry *tdp,
unsigned int flags) { return 0; }

static const struct inode_operations_wrapper
iops __attribute__ ((unused)) = {
.rename2 = rename2_fn,
};
],[])

dnl #
dnl # 5.12 API change,
dnl #
dnl # Linux 5.12 introduced passing struct user_namespace* as the first argument
dnl # of the rename() and other inode_operations members.
dnl # Linux 5.12 introduced passing struct user_namespace* as the first
dnl # argument of the rename() and other inode_operations members.
dnl #
ZFS_LINUX_TEST_SRC([inode_operations_rename_userns], [
#include <linux/fs.h>
Expand All @@ -44,13 +82,30 @@ AC_DEFUN([ZFS_AC_KERNEL_RENAME], [
],[
AC_MSG_RESULT(no)

AC_MSG_CHECKING([whether iop->rename() wants flags])
ZFS_LINUX_TEST_RESULT([inode_operations_rename_flags], [
AC_MSG_CHECKING([whether iops->rename2() exists])
ZFS_LINUX_TEST_RESULT([inode_operations_rename2], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_RENAME_WANTS_FLAGS, 1,
[iops->rename() wants flags])
AC_DEFINE(HAVE_RENAME2, 1, [iops->rename2() exists])
],[
AC_MSG_RESULT(no)

AC_MSG_CHECKING([whether iops->rename() wants flags])
ZFS_LINUX_TEST_RESULT([inode_operations_rename_flags], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_RENAME_WANTS_FLAGS, 1,
[iops->rename() wants flags])
],[
AC_MSG_RESULT(no)

AC_MSG_CHECKING([whether struct inode_operations_wrapper takes .rename2()])
ZFS_LINUX_TEST_RESULT([dir_inode_operations_wrapper_rename2], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_RENAME2_OPERATIONS_WRAPPER, 1,
[struct inode_operations_wrapper takes .rename2()])
],[
AC_MSG_RESULT(no)
])
])
])
])
])
2 changes: 2 additions & 0 deletions config/kernel.m4
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
ZFS_AC_KERNEL_SRC_SETATTR_PREPARE
ZFS_AC_KERNEL_SRC_INSERT_INODE_LOCKED
ZFS_AC_KERNEL_SRC_DENTRY
ZFS_AC_KERNEL_SRC_DENTRY_ALIAS_D_U
ZFS_AC_KERNEL_SRC_TRUNCATE_SETSIZE
ZFS_AC_KERNEL_SRC_SECURITY_INODE
ZFS_AC_KERNEL_SRC_FST_MOUNT
Expand Down Expand Up @@ -217,6 +218,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
ZFS_AC_KERNEL_SETATTR_PREPARE
ZFS_AC_KERNEL_INSERT_INODE_LOCKED
ZFS_AC_KERNEL_DENTRY
ZFS_AC_KERNEL_DENTRY_ALIAS_D_U
ZFS_AC_KERNEL_TRUNCATE_SETSIZE
ZFS_AC_KERNEL_SECURITY_INODE
ZFS_AC_KERNEL_FST_MOUNT
Expand Down
3 changes: 2 additions & 1 deletion include/os/freebsd/zfs/sys/zfs_vnops_os.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ extern int zfs_rmdir(znode_t *dzp, const char *name, znode_t *cwd,
extern int zfs_setattr(znode_t *zp, vattr_t *vap, int flag, cred_t *cr,
zuserns_t *mnt_ns);
extern int zfs_rename(znode_t *sdzp, const char *snm, znode_t *tdzp,
const char *tnm, cred_t *cr, int flags, zuserns_t *mnt_ns);
const char *tnm, cred_t *cr, int flags, uint64_t rflags, vattr_t *wo_vap,
zuserns_t *mnt_ns);
extern int zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap,
const char *link, znode_t **zpp, cred_t *cr, int flags, zuserns_t *mnt_ns);
extern int zfs_link(znode_t *tdzp, znode_t *sp,
Expand Down
21 changes: 21 additions & 0 deletions include/os/linux/kernel/linux/dcache_compat.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,4 +61,25 @@ d_clear_d_op(struct dentry *dentry)
DCACHE_OP_REVALIDATE | DCACHE_OP_DELETE);
}

/*
* Walk and invalidate all dentry aliases of an inode
* unless it's a mountpoint
*/
static inline void
zpl_d_drop_aliases(struct inode *inode)
{
struct dentry *dentry;
spin_lock(&inode->i_lock);
#ifdef HAVE_DENTRY_D_U_ALIASES
hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
#else
hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
#endif
if (!IS_ROOT(dentry) && !d_mountpoint(dentry) &&
(dentry->d_inode == inode)) {
d_drop(dentry);
}
}
spin_unlock(&inode->i_lock);
}
#endif /* _ZFS_DCACHE_H */
13 changes: 13 additions & 0 deletions include/os/linux/kernel/linux/vfs_compat.h
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,19 @@ static inline void zfs_gid_write(struct inode *ip, gid_t gid)
ip->i_gid = make_kgid(kcred->user_ns, gid);
}

/*
* 3.15 API change
*/
#ifndef RENAME_NOREPLACE
#define RENAME_NOREPLACE (1 << 0) /* Don't overwrite target */
#endif
#ifndef RENAME_EXCHANGE
#define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */
#endif
#ifndef RENAME_WHITEOUT
#define RENAME_WHITEOUT (1 << 2) /* Whiteout source */
#endif

/*
* 4.9 API change
*/
Expand Down
20 changes: 12 additions & 8 deletions include/os/linux/spl/sys/debug.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,16 @@ spl_assert(const char *buf, const char *file, const char *func, int line)
(long long) (_verify3_right)); \
} while (0)

#define VERIFY_IMPLY(A, B) \
((void)(likely((!(A)) || (B)) || \
spl_assert("(" #A ") implies (" #B ")", \
__FILE__, __FUNCTION__, __LINE__)))

#define VERIFY_EQUIV(A, B) \
((void)(likely(!!(A) == !!(B)) || \
spl_assert("(" #A ") is equivalent to (" #B ")", \
__FILE__, __FUNCTION__, __LINE__)))

/*
* Debugging disabled (--disable-debug)
*/
Expand Down Expand Up @@ -171,14 +181,8 @@ spl_assert(const char *buf, const char *file, const char *func, int line)
#define ASSERT3P VERIFY3P
#define ASSERT0 VERIFY0
#define ASSERT VERIFY
#define IMPLY(A, B) \
((void)(likely((!(A)) || (B)) || \
spl_assert("(" #A ") implies (" #B ")", \
__FILE__, __FUNCTION__, __LINE__)))
#define EQUIV(A, B) \
((void)(likely(!!(A) == !!(B)) || \
spl_assert("(" #A ") is equivalent to (" #B ")", \
__FILE__, __FUNCTION__, __LINE__)))
#define IMPLY VERIFY_IMPLY
#define EQUIV VERIFY_EQUIV

#endif /* NDEBUG */

Expand Down
10 changes: 10 additions & 0 deletions include/os/linux/spl/sys/sysmacros.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,16 @@ extern uint32_t zone_get_hostid(void *zone);
extern void spl_setup(void);
extern void spl_cleanup(void);

/*
* Only handles the first 4096 majors and first 256 minors. We don't have a
* libc for the kernel module so we define this inline.
*/
static inline dev_t
makedev(unsigned int major, unsigned int minor)
{
return ((major & 0xFFF) << 8) | (minor & 0xFF);
}

#define highbit(x) __fls(x)
#define lowbit(x) __ffs(x)

Expand Down
9 changes: 3 additions & 6 deletions include/os/linux/zfs/sys/trace_acl.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
__field(boolean_t, z_is_sa)
__field(boolean_t, z_is_mapped)
__field(boolean_t, z_is_ctldir)
__field(boolean_t, z_is_stale)

__field(uint32_t, i_uid)
__field(uint32_t, i_gid)
Expand Down Expand Up @@ -99,7 +98,6 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
__entry->z_is_sa = zn->z_is_sa;
__entry->z_is_mapped = zn->z_is_mapped;
__entry->z_is_ctldir = zn->z_is_ctldir;
__entry->z_is_stale = zn->z_is_stale;

__entry->i_uid = KUID_TO_SUID(ZTOI(zn)->i_uid);
__entry->i_gid = KGID_TO_SGID(ZTOI(zn)->i_gid);
Expand All @@ -121,9 +119,8 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
"zn_prefetch %u blksz %u seq %u "
"mapcnt %llu size %llu pflags %llu "
"sync_cnt %u sync_writes_cnt %u async_writes_cnt %u "
"mode 0x%x is_sa %d is_mapped %d "
"is_ctldir %d is_stale %d inode { "
"uid %u gid %u ino %lu nlink %u size %lli "
"mode 0x%x is_sa %d is_mapped %d is_ctldir %d "
"inode { uid %u gid %u ino %lu nlink %u size %lli "
"blkbits %u bytes %u mode 0x%x generation %x } } "
"ace { type %u flags %u access_mask %u } mask_matched %u",
__entry->z_id, __entry->z_unlinked, __entry->z_atime_dirty,
Expand All @@ -132,7 +129,7 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
__entry->z_pflags, __entry->z_sync_cnt,
__entry->z_sync_writes_cnt, __entry->z_async_writes_cnt,
__entry->z_mode, __entry->z_is_sa, __entry->z_is_mapped,
__entry->z_is_ctldir, __entry->z_is_stale, __entry->i_uid,
__entry->z_is_ctldir, __entry->i_uid,
__entry->i_gid, __entry->i_ino, __entry->i_nlink,
__entry->i_size, __entry->i_blkbits,
__entry->i_bytes, __entry->i_mode, __entry->i_generation,
Expand Down
1 change: 1 addition & 0 deletions include/os/linux/zfs/sys/zfs_dir.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ extern "C" {
extern int zfs_dirent_lock(zfs_dirlock_t **, znode_t *, char *, znode_t **,
int, int *, pathname_t *);
extern void zfs_dirent_unlock(zfs_dirlock_t *);
extern int zfs_drop_nlink(znode_t *, dmu_tx_t *, boolean_t *);
extern int zfs_link_create(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int);
extern int zfs_link_destroy(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int,
boolean_t *);
Expand Down
3 changes: 2 additions & 1 deletion include/os/linux/zfs/sys/zfs_vnops_os.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ extern int zfs_getattr_fast(struct user_namespace *, struct inode *ip,
extern int zfs_setattr(znode_t *zp, vattr_t *vap, int flag, cred_t *cr,
zuserns_t *mnt_ns);
extern int zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp,
char *tnm, cred_t *cr, int flags, zuserns_t *mnt_ns);
char *tnm, cred_t *cr, int flags, uint64_t rflags, vattr_t *wo_vap,
zuserns_t *mnt_ns);
extern int zfs_symlink(znode_t *dzp, char *name, vattr_t *vap,
char *link, znode_t **zpp, cred_t *cr, int flags, zuserns_t *mnt_ns);
extern int zfs_readlink(struct inode *ip, zfs_uio_t *uio, cred_t *cr);
Expand Down
7 changes: 6 additions & 1 deletion include/os/linux/zfs/sys/zpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,15 @@ extern void zpl_vap_init(vattr_t *vap, struct inode *dir,
umode_t mode, cred_t *cr, zuserns_t *mnt_ns);

extern const struct inode_operations zpl_inode_operations;
#ifdef HAVE_RENAME2_OPERATIONS_WRAPPER
extern const struct inode_operations_wrapper zpl_dir_inode_operations;
#else
extern const struct inode_operations zpl_dir_inode_operations;
#endif
extern const struct inode_operations zpl_symlink_inode_operations;
extern const struct inode_operations zpl_special_inode_operations;
extern dentry_operations_t zpl_dentry_operations;

/* zpl_file.c */
extern const struct address_space_operations zpl_address_space_operations;
extern const struct file_operations zpl_file_operations;
extern const struct file_operations zpl_dir_file_operations;
Expand Down
7 changes: 6 additions & 1 deletion include/sys/zfs_znode.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,6 @@ typedef struct znode {
boolean_t z_is_sa; /* are we native sa? */
boolean_t z_is_mapped; /* are we mmap'ed */
boolean_t z_is_ctldir; /* are we .zfs entry */
boolean_t z_is_stale; /* are we stale due to rollback? */
boolean_t z_suspended; /* extra ref from a suspend? */
uint_t z_blksz; /* block size in bytes */
uint_t z_seq; /* modification sequence number */
Expand Down Expand Up @@ -300,6 +299,12 @@ extern void zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
extern void zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
znode_t *sdzp, const char *sname, znode_t *tdzp, const char *dname,
znode_t *szp);
extern void zfs_log_rename_exchange(zilog_t *zilog, dmu_tx_t *tx,
uint64_t txtype, znode_t *sdzp, const char *sname, znode_t *tdzp,
const char *dname, znode_t *szp);
extern void zfs_log_rename_whiteout(zilog_t *zilog, dmu_tx_t *tx,
uint64_t txtype, znode_t *sdzp, const char *sname, znode_t *tdzp,
const char *dname, znode_t *szp, znode_t *wzp);
extern void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
znode_t *zp, offset_t off, ssize_t len, int ioflag,
zil_callback_t callback, void *callback_data);
Expand Down
Loading