Skip to content

Commit

Permalink
Merge tag 'ceph-for-5.17-rc1' of git://github.com/ceph/ceph-client
Browse files Browse the repository at this point in the history
Pull ceph updates from Ilya Dryomov:
 "The highlight is the new mount "device" string syntax implemented by
  Venky Shankar. It solves some long-standing issues with using
  different auth entities and/or mounting different CephFS filesystems
  from the same cluster, remounting and also misleading /proc/mounts
  contents. The existing syntax of course remains to be maintained.

  On top of that, there is a couple of fixes for edge cases in quota and
  a new mount option for turning on unbuffered I/O mode globally instead
  of on a per-file basis with ioctl(CEPH_IOC_SYNCIO)"

* tag 'ceph-for-5.17-rc1' of git://github.com/ceph/ceph-client:
  ceph: move CEPH_SUPER_MAGIC definition to magic.h
  ceph: remove redundant Lsx caps check
  ceph: add new "nopagecache" option
  ceph: don't check for quotas on MDS stray dirs
  ceph: drop send metrics debug message
  rbd: make const pointer spaces a static const array
  ceph: Fix incorrect statfs report for small quota
  ceph: mount syntax module parameter
  doc: document new CephFS mount device syntax
  ceph: record updated mon_addr on remount
  ceph: new device mount syntax
  libceph: rename parse_fsid() to ceph_parse_fsid() and export
  libceph: generalize addr/ip parsing based on delimiter
  • Loading branch information
torvalds committed Jan 20, 2022
2 parents 67ed868 + a0b3a15 commit 64f29d8
Show file tree
Hide file tree
Showing 13 changed files with 255 additions and 57 deletions.
25 changes: 22 additions & 3 deletions Documentation/filesystems/ceph.rst
Original file line number Diff line number Diff line change
Expand Up @@ -82,24 +82,43 @@ Mount Syntax

The basic mount syntax is::

# mount -t ceph monip[:port][,monip2[:port]...]:/[subdir] mnt
# mount -t ceph user@fsid.fs_name=/[subdir] mnt -o mon_addr=monip1[:port][/monip2[:port]]

You only need to specify a single monitor, as the client will get the
full list when it connects. (However, if the monitor you specify
happens to be down, the mount won't succeed.) The port can be left
off if the monitor is using the default. So if the monitor is at
1.2.3.4::

# mount -t ceph 1.2.3.4:/ /mnt/ceph
# mount -t ceph cephuser@07fe3187-00d9-42a3-814b-72a4d5e7d5be.cephfs=/ /mnt/ceph -o mon_addr=1.2.3.4

is sufficient. If /sbin/mount.ceph is installed, a hostname can be
used instead of an IP address.
used instead of an IP address and the cluster FSID can be left out
(as the mount helper will fill it in by reading the ceph configuration
file)::

# mount -t ceph cephuser@cephfs=/ /mnt/ceph -o mon_addr=mon-addr

Multiple monitor addresses can be passed by separating each address with a slash (`/`)::

# mount -t ceph cephuser@cephfs=/ /mnt/ceph -o mon_addr=192.168.1.100/192.168.1.101

When using the mount helper, monitor address can be read from ceph
configuration file if available. Note that, the cluster FSID (passed as part
of the device string) is validated by checking it with the FSID reported by
the monitor.

Mount Options
=============

mon_addr=ip_address[:port][/ip_address[:port]]
Monitor address to the cluster. This is used to bootstrap the
connection to the cluster. Once connection is established, the
monitor addresses in the monitor map are followed.

fsid=cluster-id
FSID of the cluster (from `ceph fsid` command).

ip=A.B.C.D[:N]
Specify the IP and/or port the client should bind to locally.
There is normally not much reason to do this. If the IP is not
Expand Down
5 changes: 3 additions & 2 deletions drivers/block/rbd.c
Original file line number Diff line number Diff line change
Expand Up @@ -6189,7 +6189,7 @@ static inline size_t next_token(const char **buf)
* These are the characters that produce nonzero for
* isspace() in the "C" and "POSIX" locales.
*/
const char *spaces = " \f\n\r\t\v";
static const char spaces[] = " \f\n\r\t\v";

*buf += strspn(*buf, spaces); /* Find start of token */

Expand Down Expand Up @@ -6495,7 +6495,8 @@ static int rbd_add_parse_args(const char *buf,
pctx.opts->exclusive = RBD_EXCLUSIVE_DEFAULT;
pctx.opts->trim = RBD_TRIM_DEFAULT;

ret = ceph_parse_mon_ips(mon_addrs, mon_addrs_size, pctx.copts, NULL);
ret = ceph_parse_mon_ips(mon_addrs, mon_addrs_size, pctx.copts, NULL,
',');
if (ret)
goto out_err;

Expand Down
3 changes: 1 addition & 2 deletions fs/ceph/caps.c
Original file line number Diff line number Diff line change
Expand Up @@ -3376,8 +3376,7 @@ static void handle_cap_grant(struct inode *inode,
if ((newcaps & CEPH_CAP_LINK_SHARED) &&
(extra_info->issued & CEPH_CAP_LINK_EXCL) == 0) {
set_nlink(inode, le32_to_cpu(grant->nlink));
if (inode->i_nlink == 0 &&
(newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL)))
if (inode->i_nlink == 0)
deleted_inode = true;
}

Expand Down
24 changes: 15 additions & 9 deletions fs/ceph/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,8 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
int fmode, bool isdir)
{
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_mount_options *opt =
ceph_inode_to_client(&ci->vfs_inode)->mount_options;
struct ceph_file_info *fi;

dout("%s %p %p 0%o (%s)\n", __func__, inode, file,
Expand All @@ -225,6 +227,9 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
if (!fi)
return -ENOMEM;

if (opt->flags & CEPH_MOUNT_OPT_NOPAGECACHE)
fi->flags |= CEPH_F_SYNC;

file->private_data = fi;
}

Expand Down Expand Up @@ -1541,7 +1546,7 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
struct ceph_inode_info *ci = ceph_inode(inode);
bool direct_lock = iocb->ki_flags & IOCB_DIRECT;
ssize_t ret;
int want, got = 0;
int want = 0, got = 0;
int retry_op = 0, read = 0;

again:
Expand All @@ -1556,13 +1561,14 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
else
ceph_start_io_read(inode);

if (!(fi->flags & CEPH_F_SYNC) && !direct_lock)
want |= CEPH_CAP_FILE_CACHE;
if (fi->fmode & CEPH_FILE_MODE_LAZY)
want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
else
want = CEPH_CAP_FILE_CACHE;
want |= CEPH_CAP_FILE_LAZYIO;

ret = ceph_get_caps(filp, CEPH_CAP_FILE_RD, want, -1, &got);
if (ret < 0) {
if (iocb->ki_flags & IOCB_DIRECT)
if (direct_lock)
ceph_end_io_direct(inode);
else
ceph_end_io_read(inode);
Expand Down Expand Up @@ -1696,7 +1702,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct ceph_osd_client *osdc = &fsc->client->osdc;
struct ceph_cap_flush *prealloc_cf;
ssize_t count, written = 0;
int err, want, got;
int err, want = 0, got;
bool direct_lock = false;
u32 map_flags;
u64 pool_flags;
Expand Down Expand Up @@ -1771,10 +1777,10 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)

dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n",
inode, ceph_vinop(inode), pos, count, i_size_read(inode));
if (!(fi->flags & CEPH_F_SYNC) && !direct_lock)
want |= CEPH_CAP_FILE_BUFFER;
if (fi->fmode & CEPH_FILE_MODE_LAZY)
want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
else
want = CEPH_CAP_FILE_BUFFER;
want |= CEPH_CAP_FILE_LAZYIO;
got = 0;
err = ceph_get_caps(file, CEPH_CAP_FILE_WR, want, pos + count, &got);
if (err < 0)
Expand Down
2 changes: 0 additions & 2 deletions fs/ceph/metric.c
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,6 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
msg->hdr.version = cpu_to_le16(1);
msg->hdr.compat_version = cpu_to_le16(1);
msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
dout("client%llu send metrics to mds%d\n",
ceph_client_gid(mdsc->fsc->client), s->s_mds);
ceph_con_send(&s->s_con, msg);

return true;
Expand Down
17 changes: 17 additions & 0 deletions fs/ceph/quota.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ static inline bool ceph_has_realms_with_quotas(struct inode *inode)
/* if root is the real CephFS root, we don't have quota realms */
if (root && ceph_ino(root) == CEPH_INO_ROOT)
return false;
/* MDS stray dirs have no quota realms */
if (ceph_vino_is_reserved(ceph_inode(inode)->i_vino))
return false;
/* otherwise, we can't know for sure */
return true;
}
Expand Down Expand Up @@ -494,10 +497,24 @@ bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
if (ci->i_max_bytes) {
total = ci->i_max_bytes >> CEPH_BLOCK_SHIFT;
used = ci->i_rbytes >> CEPH_BLOCK_SHIFT;
/* For quota size less than 4MB, use 4KB block size */
if (!total) {
total = ci->i_max_bytes >> CEPH_4K_BLOCK_SHIFT;
used = ci->i_rbytes >> CEPH_4K_BLOCK_SHIFT;
buf->f_frsize = 1 << CEPH_4K_BLOCK_SHIFT;
}
/* It is possible for a quota to be exceeded.
* Report 'zero' in that case
*/
free = total > used ? total - used : 0;
/* For quota size less than 4KB, report the
* total=used=4KB,free=0 when quota is full
* and total=free=4KB, used=0 otherwise */
if (!total) {
total = 1;
free = ci->i_max_bytes > ci->i_rbytes ? 1 : 0;
buf->f_frsize = 1 << CEPH_4K_BLOCK_SHIFT;
}
}
spin_unlock(&ci->i_ceph_lock);
if (total) {
Expand Down
Loading

0 comments on commit 64f29d8

Please sign in to comment.