1
0
Fork 0

The big ticket item here is support for msgr2 on-wire protocol, which

adds the option of full in-transit encryption using AES-GCM algorithm
 (myself).  On top of that we have a series to avoid intermittent
 errors during recovery with recover_session=clean and some MDS request
 encoding work from Jeff, a cap handling fix and assorted observability
 improvements from Luis and Xiubo and a good number of cleanups.  Luis
 also ran into a corner case with quotas which sadly means that we are
 back to denying cross-quota-realm renames.
 -----BEGIN PGP SIGNATURE-----
 
 iQFHBAABCAAxFiEEydHwtzie9C7TfviiSn/eOAIR84sFAl/beWITHGlkcnlvbW92
 QGdtYWlsLmNvbQAKCRBKf944AhHzi4i0CACnvd87l2n7dndig7p5d5lVsmo8tAFs
 wHYHaIVisWKMcqKoT+YJajSgzaonxjzvYiyCzwLxV7s7vI7cswAwjEfYT7tTDRp2
 pnO1+4N/1ftznnTk/1QdqwOQLUg5UtdgWvFCaXQF+Vr/YroZomKJPaK8fXK882pC
 9FBjoLNy1HWySsoXPCxJktmDzpEEyYRNJg0vquxm7mxwTgQErupWlwEFjNg5LBkm
 gC0UoKhCE3DeUrXnoq21Ga62RIajxHofTooNx7dg+JiSVgluW+nORaWDYJXNzwLC
 j5puSe4pWIah+gmcwIFuyNz4ddkvVL4URvsYPGkVFYXlEefQjErc10Jh
 =6b9f
 -----END PGP SIGNATURE-----

Merge tag 'ceph-for-5.11-rc1' of git://github.com/ceph/ceph-client

Pull ceph updates from Ilya Dryomov:
 "The big ticket item here is support for msgr2 on-wire protocol, which
  adds the option of full in-transit encryption using AES-GCM algorithm
  (myself).

  On top of that we have a series to avoid intermittent errors during
  recovery with recover_session=clean and some MDS request encoding work
  from Jeff, a cap handling fix and assorted observability improvements
  from Luis and Xiubo and a good number of cleanups.

  Luis also ran into a corner case with quotas which sadly means that we
  are back to denying cross-quota-realm renames"

* tag 'ceph-for-5.11-rc1' of git://github.com/ceph/ceph-client: (59 commits)
  libceph: drop ceph_auth_{create,update}_authorizer()
  libceph, ceph: make use of __ceph_auth_get_authorizer() in msgr1
  libceph, ceph: implement msgr2.1 protocol (crc and secure modes)
  libceph: introduce connection modes and ms_mode option
  libceph, rbd: ignore addr->type while comparing in some cases
  libceph, ceph: get and handle cluster maps with addrvecs
  libceph: factor out finish_auth()
  libceph: drop ac->ops->name field
  libceph: amend cephx init_protocol() and build_request()
  libceph, ceph: incorporate nautilus cephx changes
  libceph: safer en/decoding of cephx requests and replies
  libceph: more insight into ticket expiry and invalidation
  libceph: move msgr1 protocol specific fields to its own struct
  libceph: move msgr1 protocol implementation to its own file
  libceph: separate msgr1 protocol implementation
  libceph: export remaining protocol independent infrastructure
  libceph: export zero_page
  libceph: rename and export con->flags bits
  libceph: rename and export con->state states
  libceph: make con->state an int
  ...
zero-sugar-mainline-defconfig
Linus Torvalds 2020-12-17 11:53:52 -08:00
commit be695ee29e
41 changed files with 7225 additions and 2193 deletions

View File

@ -3925,8 +3925,12 @@ static int find_watcher(struct rbd_device *rbd_dev,
sscanf(locker->id.cookie, RBD_LOCK_COOKIE_PREFIX " %llu", &cookie);
for (i = 0; i < num_watchers; i++) {
if (!memcmp(&watchers[i].addr, &locker->info.addr,
sizeof(locker->info.addr)) &&
/*
* Ignore addr->type while comparing. This mimics
* entity_addr_t::get_legacy_str() + strcmp().
*/
if (ceph_addr_equal_no_type(&watchers[i].addr,
&locker->info.addr) &&
watchers[i].cookie == cookie) {
struct rbd_client_id cid = {
.gid = le64_to_cpu(watchers[i].name.num),

View File

@ -840,7 +840,7 @@ static int ceph_writepages_start(struct address_space *mapping,
wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
(wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
if (ci->i_wrbuffer_ref > 0) {
pr_warn_ratelimited(
"writepage_start %p %lld forced umount\n",
@ -1264,7 +1264,7 @@ ceph_find_incompatible(struct page *page)
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_inode_info *ci = ceph_inode(inode);
if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
dout(" page %p forced umount\n", page);
return ERR_PTR(-EIO);
}
@ -1321,7 +1321,7 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
dout("write_begin file %p inode %p page %p %d~%d\n", file, inode, page, (int)pos, (int)len);
for (;;) {
page = grab_cache_page_write_begin(mapping, index, 0);
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page) {
r = -ENOMEM;
break;

View File

@ -1140,16 +1140,24 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
{
struct ceph_mds_session *session = cap->session;
struct ceph_inode_info *ci = cap->ci;
struct ceph_mds_client *mdsc =
ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
struct ceph_mds_client *mdsc;
int removed = 0;
/* 'ci' being NULL means the remove have already occurred */
if (!ci) {
dout("%s: cap inode is NULL\n", __func__);
return;
}
dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
mdsc = ceph_inode_to_client(&ci->vfs_inode)->mdsc;
/* remove from inode's cap rbtree, and clear auth cap */
rb_erase(&cap->ci_node, &ci->i_caps);
if (ci->i_auth_cap == cap) {
WARN_ON_ONCE(!list_empty(&ci->i_dirty_item));
WARN_ON_ONCE(!list_empty(&ci->i_dirty_item) &&
!mdsc->fsc->blocklisted);
ci->i_auth_cap = NULL;
}
@ -2746,7 +2754,7 @@ again:
goto out_unlock;
}
if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
if (READ_ONCE(mdsc->fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
dout("get_cap_refs %p forced umount\n", inode);
ret = -EIO;
goto out_unlock;
@ -4027,15 +4035,13 @@ void ceph_handle_caps(struct ceph_mds_session *session,
}
if (msg_version >= 8) {
u64 flush_tid;
u32 caller_uid, caller_gid;
u32 pool_ns_len;
/* version >= 6 */
ceph_decode_64_safe(&p, end, flush_tid, bad);
ceph_decode_skip_64(&p, end, bad); // flush_tid
/* version >= 7 */
ceph_decode_32_safe(&p, end, caller_uid, bad);
ceph_decode_32_safe(&p, end, caller_gid, bad);
ceph_decode_skip_32(&p, end, bad); // caller_uid
ceph_decode_skip_32(&p, end, bad); // caller_gid
/* version >= 8 */
ceph_decode_32_safe(&p, end, pool_ns_len, bad);
if (pool_ns_len > 0) {
@ -4058,9 +4064,8 @@ void ceph_handle_caps(struct ceph_mds_session *session,
}
if (msg_version >= 11) {
u32 flags;
/* version >= 10 */
ceph_decode_32_safe(&p, end, flags, bad);
ceph_decode_skip_32(&p, end, bad); // flags
/* version >= 11 */
extra_info.dirstat_valid = true;
ceph_decode_64_safe(&p, end, extra_info.nfiles, bad);

View File

@ -304,11 +304,25 @@ static int mds_sessions_show(struct seq_file *s, void *ptr)
return 0;
}
static int status_show(struct seq_file *s, void *p)
{
struct ceph_fs_client *fsc = s->private;
struct ceph_entity_inst *inst = &fsc->client->msgr.inst;
struct ceph_entity_addr *client_addr = ceph_client_addr(fsc->client);
seq_printf(s, "instance: %s.%lld %s/%u\n", ENTITY_NAME(inst->name),
ceph_pr_addr(client_addr), le32_to_cpu(client_addr->nonce));
seq_printf(s, "blocklisted: %s\n", fsc->blocklisted ? "true" : "false");
return 0;
}
DEFINE_SHOW_ATTRIBUTE(mdsmap);
DEFINE_SHOW_ATTRIBUTE(mdsc);
DEFINE_SHOW_ATTRIBUTE(caps);
DEFINE_SHOW_ATTRIBUTE(mds_sessions);
DEFINE_SHOW_ATTRIBUTE(metric);
DEFINE_SHOW_ATTRIBUTE(status);
/*
@ -394,6 +408,12 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
fsc->client->debugfs_dir,
fsc,
&caps_fops);
fsc->debugfs_status = debugfs_create_file("status",
0400,
fsc->client->debugfs_dir,
fsc,
&status_fops);
}

View File

@ -1202,12 +1202,11 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
op = CEPH_MDS_OP_RENAMESNAP;
else
return -EROFS;
} else if (old_dir != new_dir) {
err = ceph_quota_check_rename(mdsc, d_inode(old_dentry),
new_dir);
if (err)
return err;
}
/* don't allow cross-quota renames */
if ((old_dir != new_dir) &&
(!ceph_quota_is_same_realm(old_dir, new_dir)))
return -EXDEV;
dout("rename dir %p dentry %p to dir %p dentry %p\n",
old_dir, old_dentry, new_dir, new_dentry);

View File

@ -1315,15 +1315,10 @@ retry_lookup:
}
if (rinfo->head->is_target) {
tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
in = ceph_get_inode(sb, tvino);
if (IS_ERR(in)) {
err = PTR_ERR(in);
goto done;
}
/* Should be filled in by handle_reply */
BUG_ON(!req->r_target_inode);
in = req->r_target_inode;
err = ceph_fill_inode(in, req->r_locked_page, &rinfo->targeti,
NULL, session,
(!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) &&
@ -1333,11 +1328,13 @@ retry_lookup:
if (err < 0) {
pr_err("ceph_fill_inode badness %p %llx.%llx\n",
in, ceph_vinop(in));
req->r_target_inode = NULL;
if (in->i_state & I_NEW)
discard_new_inode(in);
else
iput(in);
goto done;
}
req->r_target_inode = in;
if (in->i_state & I_NEW)
unlock_new_inode(in);
}
@ -1597,8 +1594,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
struct dentry *dn;
struct inode *in;
int err = 0, skipped = 0, ret, i;
struct ceph_mds_request_head *rhead = req->r_request->front.iov_base;
u32 frag = le32_to_cpu(rhead->args.readdir.frag);
u32 frag = le32_to_cpu(req->r_args.readdir.frag);
u32 last_hash = 0;
u32 fpos_offset;
struct ceph_readdir_cache_control cache_ctl = {};
@ -1615,7 +1611,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
} else if (rinfo->offset_hash) {
/* mds understands offset_hash */
WARN_ON_ONCE(req->r_readdir_offset != 2);
last_hash = le32_to_cpu(rhead->args.readdir.offset_hash);
last_hash = le32_to_cpu(req->r_args.readdir.offset_hash);
}
}
@ -1888,7 +1884,7 @@ static void ceph_do_invalidate_pages(struct inode *inode)
mutex_lock(&ci->i_truncate_mutex);
if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
pr_warn_ratelimited("invalidate_pages %p %lld forced umount\n",
inode, ceph_ino(inode));
mapping_set_error(inode->i_mapping, -EIO);
@ -2340,15 +2336,23 @@ int ceph_permission(struct inode *inode, int mask)
}
/* Craft a mask of needed caps given a set of requested statx attrs. */
static int statx_to_caps(u32 want)
static int statx_to_caps(u32 want, umode_t mode)
{
int mask = 0;
if (want & (STATX_MODE|STATX_UID|STATX_GID|STATX_CTIME|STATX_BTIME))
mask |= CEPH_CAP_AUTH_SHARED;
if (want & (STATX_NLINK|STATX_CTIME))
mask |= CEPH_CAP_LINK_SHARED;
if (want & (STATX_NLINK|STATX_CTIME)) {
/*
* The link count for directories depends on inode->i_subdirs,
* and that is only updated when Fs caps are held.
*/
if (S_ISDIR(mode))
mask |= CEPH_CAP_FILE_SHARED;
else
mask |= CEPH_CAP_LINK_SHARED;
}
if (want & (STATX_ATIME|STATX_MTIME|STATX_CTIME|STATX_SIZE|
STATX_BLOCKS))
@ -2374,8 +2378,9 @@ int ceph_getattr(const struct path *path, struct kstat *stat,
/* Skip the getattr altogether if we're asked not to sync */
if (!(flags & AT_STATX_DONT_SYNC)) {
err = ceph_do_getattr(inode, statx_to_caps(request_mask),
flags & AT_STATX_FORCE_SYNC);
err = ceph_do_getattr(inode,
statx_to_caps(request_mask, inode->i_mode),
flags & AT_STATX_FORCE_SYNC);
if (err)
return err;
}

View File

@ -57,7 +57,7 @@ static const struct file_lock_operations ceph_fl_lock_ops = {
.fl_release_private = ceph_fl_release_lock,
};
/**
/*
* Implement fcntl and flock locking functions.
*/
static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode,
@ -225,7 +225,7 @@ static int try_unlock_file(struct file *file, struct file_lock *fl)
return 1;
}
/**
/*
* Attempt to set an fcntl lock.
* For now, this just goes away to the server. Later it may be more awesome.
*/
@ -408,7 +408,7 @@ static int lock_to_ceph_filelock(struct file_lock *lock,
return err;
}
/**
/*
* Encode the flock and fcntl locks for the given inode into the ceph_filelock
* array. Must be called with inode->i_lock already held.
* If we encounter more of a specific lock type than expected, return -ENOSPC.
@ -458,7 +458,7 @@ fail:
return err;
}
/**
/*
* Copy the encoded flock and fcntl locks into the pagelist.
* Format is: #fcntl locks, sequential fcntl locks, #flock locks,
* sequential flock locks.

View File

@ -516,13 +516,9 @@ static int parse_reply_info_create(void **p, void *end,
/* Malformed reply? */
info->has_create_ino = false;
} else if (test_bit(CEPHFS_FEATURE_DELEG_INO, &s->s_features)) {
u8 struct_v, struct_compat;
u32 len;
info->has_create_ino = true;
ceph_decode_8_safe(p, end, struct_v, bad);
ceph_decode_8_safe(p, end, struct_compat, bad);
ceph_decode_32_safe(p, end, len, bad);
/* struct_v, struct_compat, and len */
ceph_decode_skip_n(p, end, 2 + sizeof(u32), bad);
ceph_decode_64_safe(p, end, info->ino, bad);
ret = ceph_parse_deleg_inos(p, end, s);
if (ret)
@ -837,6 +833,7 @@ void ceph_mdsc_release_request(struct kref *kref)
}
kfree(req->r_path1);
kfree(req->r_path2);
put_cred(req->r_cred);
if (req->r_pagelist)
ceph_pagelist_release(req->r_pagelist);
put_request_session(req);
@ -892,8 +889,7 @@ static void __register_request(struct ceph_mds_client *mdsc,
ceph_mdsc_get_request(req);
insert_request(&mdsc->request_tree, req);
req->r_uid = current_fsuid();
req->r_gid = current_fsgid();
req->r_cred = get_current_cred();
if (mdsc->oldest_tid == 0 && req->r_op != CEPH_MDS_OP_SETFILELOCK)
mdsc->oldest_tid = req->r_tid;
@ -1243,7 +1239,7 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
{
struct ceph_msg *msg;
struct ceph_mds_session_head *h;
int i = -1;
int i;
int extra_bytes = 0;
int metadata_key_count = 0;
struct ceph_options *opt = mdsc->fsc->client->options;
@ -1595,7 +1591,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
struct ceph_cap_flush *cf;
struct ceph_mds_client *mdsc = fsc->mdsc;
if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
if (inode->i_data.nrpages > 0)
invalidate = true;
if (ci->i_wrbuffer_ref > 0)
@ -2482,21 +2478,24 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
/*
* called under mdsc->mutex
*/
static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
struct ceph_mds_request *req,
int mds, bool drop_cap_releases)
bool drop_cap_releases)
{
int mds = session->s_mds;
struct ceph_mds_client *mdsc = session->s_mdsc;
struct ceph_msg *msg;
struct ceph_mds_request_head *head;
struct ceph_mds_request_head_old *head;
const char *path1 = NULL;
const char *path2 = NULL;
u64 ino1 = 0, ino2 = 0;
int pathlen1 = 0, pathlen2 = 0;
bool freepath1 = false, freepath2 = false;
int len;
int len, i;
u16 releases;
void *p, *end;
int ret;
bool legacy = !(session->s_con.peer_features & CEPH_FEATURE_FS_BTIME);
ret = set_request_path_attr(req->r_inode, req->r_dentry,
req->r_parent, req->r_path1, req->r_ino1.ino,
@ -2518,14 +2517,23 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
goto out_free1;
}
len = sizeof(*head) +
pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) +
if (legacy) {
/* Old style */
len = sizeof(*head);
} else {
/* New style: add gid_list and any later fields */
len = sizeof(struct ceph_mds_request_head) + sizeof(u32) +
(sizeof(u64) * req->r_cred->group_info->ngroups);
}
len += pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) +
sizeof(struct ceph_timespec);
/* calculate (max) length for cap releases */
len += sizeof(struct ceph_mds_request_release) *
(!!req->r_inode_drop + !!req->r_dentry_drop +
!!req->r_old_inode_drop + !!req->r_old_dentry_drop);
if (req->r_dentry_drop)
len += pathlen1;
if (req->r_old_dentry_drop)
@ -2537,17 +2545,33 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
goto out_free2;
}
msg->hdr.version = cpu_to_le16(2);
msg->hdr.tid = cpu_to_le64(req->r_tid);
head = msg->front.iov_base;
p = msg->front.iov_base + sizeof(*head);
/*
* The old ceph_mds_request_header didn't contain a version field, and
* one was added when we moved the message version from 3->4.
*/
if (legacy) {
msg->hdr.version = cpu_to_le16(3);
head = msg->front.iov_base;
p = msg->front.iov_base + sizeof(*head);
} else {
struct ceph_mds_request_head *new_head = msg->front.iov_base;
msg->hdr.version = cpu_to_le16(4);
new_head->version = cpu_to_le16(CEPH_MDS_REQUEST_HEAD_VERSION);
head = (struct ceph_mds_request_head_old *)&new_head->oldest_client_tid;
p = msg->front.iov_base + sizeof(*new_head);
}
end = msg->front.iov_base + msg->front.iov_len;
head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch);
head->op = cpu_to_le32(req->r_op);
head->caller_uid = cpu_to_le32(from_kuid(&init_user_ns, req->r_uid));
head->caller_gid = cpu_to_le32(from_kgid(&init_user_ns, req->r_gid));
head->caller_uid = cpu_to_le32(from_kuid(&init_user_ns,
req->r_cred->fsuid));
head->caller_gid = cpu_to_le32(from_kgid(&init_user_ns,
req->r_cred->fsgid));
head->ino = cpu_to_le64(req->r_deleg_ino);
head->args = req->r_args;
@ -2592,6 +2616,14 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
ceph_encode_copy(&p, &ts, sizeof(ts));
}
/* gid list */
if (!legacy) {
ceph_encode_32(&p, req->r_cred->group_info->ngroups);
for (i = 0; i < req->r_cred->group_info->ngroups; i++)
ceph_encode_64(&p, from_kgid(&init_user_ns,
req->r_cred->group_info->gid[i]));
}
if (WARN_ON_ONCE(p > end)) {
ceph_msg_put(msg);
msg = ERR_PTR(-ERANGE);
@ -2635,14 +2667,28 @@ static void complete_request(struct ceph_mds_client *mdsc,
complete_all(&req->r_completion);
}
static struct ceph_mds_request_head_old *
find_old_request_head(void *p, u64 features)
{
bool legacy = !(features & CEPH_FEATURE_FS_BTIME);
struct ceph_mds_request_head *new_head;
if (legacy)
return (struct ceph_mds_request_head_old *)p;
new_head = (struct ceph_mds_request_head *)p;
return (struct ceph_mds_request_head_old *)&new_head->oldest_client_tid;
}
/*
* called under mdsc->mutex
*/
static int __prepare_send_request(struct ceph_mds_client *mdsc,
static int __prepare_send_request(struct ceph_mds_session *session,
struct ceph_mds_request *req,
int mds, bool drop_cap_releases)
bool drop_cap_releases)
{
struct ceph_mds_request_head *rhead;
int mds = session->s_mds;
struct ceph_mds_client *mdsc = session->s_mdsc;
struct ceph_mds_request_head_old *rhead;
struct ceph_msg *msg;
int flags = 0;
@ -2661,6 +2707,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
void *p;
/*
* Replay. Do not regenerate message (and rebuild
* paths, etc.); just use the original message.
@ -2668,7 +2715,8 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
* d_move mangles the src name.
*/
msg = req->r_request;
rhead = msg->front.iov_base;
rhead = find_old_request_head(msg->front.iov_base,
session->s_con.peer_features);
flags = le32_to_cpu(rhead->flags);
flags |= CEPH_MDS_FLAG_REPLAY;
@ -2699,14 +2747,15 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
ceph_msg_put(req->r_request);
req->r_request = NULL;
}
msg = create_request_message(mdsc, req, mds, drop_cap_releases);
msg = create_request_message(session, req, drop_cap_releases);
if (IS_ERR(msg)) {
req->r_err = PTR_ERR(msg);
return PTR_ERR(msg);
}
req->r_request = msg;
rhead = msg->front.iov_base;
rhead = find_old_request_head(msg->front.iov_base,
session->s_con.peer_features);
rhead->oldest_client_tid = cpu_to_le64(__get_oldest_tid(mdsc));
if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
flags |= CEPH_MDS_FLAG_REPLAY;
@ -2725,15 +2774,13 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
/*
* called under mdsc->mutex
*/
static int __send_request(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session,
static int __send_request(struct ceph_mds_session *session,
struct ceph_mds_request *req,
bool drop_cap_releases)
{
int err;
err = __prepare_send_request(mdsc, req, session->s_mds,
drop_cap_releases);
err = __prepare_send_request(session, req, drop_cap_releases);
if (!err) {
ceph_msg_get(req->r_request);
ceph_con_send(&session->s_con, req->r_request);
@ -2818,10 +2865,6 @@ static void __do_request(struct ceph_mds_client *mdsc,
ceph_session_state_name(session->s_state));
if (session->s_state != CEPH_MDS_SESSION_OPEN &&
session->s_state != CEPH_MDS_SESSION_HUNG) {
if (session->s_state == CEPH_MDS_SESSION_REJECTED) {
err = -EACCES;
goto out_session;
}
/*
* We cannot queue async requests since the caps and delegated
* inodes are bound to the session. Just return -EJUKEBOX and
@ -2831,6 +2874,20 @@ static void __do_request(struct ceph_mds_client *mdsc,
err = -EJUKEBOX;
goto out_session;
}
/*
* If the session has been REJECTED, then return a hard error,
* unless it's a CLEANRECOVER mount, in which case we'll queue
* it to the mdsc queue.
*/
if (session->s_state == CEPH_MDS_SESSION_REJECTED) {
if (ceph_test_mount_opt(mdsc->fsc, CLEANRECOVER))
list_add(&req->r_wait, &mdsc->waiting_for_map);
else
err = -EACCES;
goto out_session;
}
if (session->s_state == CEPH_MDS_SESSION_NEW ||
session->s_state == CEPH_MDS_SESSION_CLOSING) {
err = __open_session(mdsc, session);
@ -2850,7 +2907,7 @@ static void __do_request(struct ceph_mds_client *mdsc,
if (req->r_request_started == 0) /* note request start time */
req->r_request_started = jiffies;
err = __send_request(mdsc, session, req, false);
err = __send_request(session, req, false);
out_session:
ceph_put_mds_session(session);
@ -3173,6 +3230,23 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
err = parse_reply_info(session, msg, rinfo, session->s_con.peer_features);
mutex_unlock(&mdsc->mutex);
/* Must find target inode outside of mutexes to avoid deadlocks */
if ((err >= 0) && rinfo->head->is_target) {
struct inode *in;
struct ceph_vino tvino = {
.ino = le64_to_cpu(rinfo->targeti.in->ino),
.snap = le64_to_cpu(rinfo->targeti.in->snapid)
};
in = ceph_get_inode(mdsc->fsc->sb, tvino);
if (IS_ERR(in)) {
err = PTR_ERR(in);
mutex_lock(&session->s_mutex);
goto out_err;
}
req->r_target_inode = in;
}
mutex_lock(&session->s_mutex);
if (err < 0) {
pr_err("mdsc_handle_reply got corrupt reply mds%d(tid:%lld)\n", mds, tid);
@ -3514,7 +3588,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
mutex_lock(&mdsc->mutex);
list_for_each_entry_safe(req, nreq, &session->s_unsafe, r_unsafe_item)
__send_request(mdsc, session, req, true);
__send_request(session, req, true);
/*
* also re-send old requests when MDS enters reconnect stage. So that MDS
@ -3535,7 +3609,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
ceph_mdsc_release_dir_caps_no_check(req);
__send_request(mdsc, session, req, true);
__send_request(session, req, true);
}
mutex_unlock(&mdsc->mutex);
}
@ -4374,12 +4448,7 @@ static void maybe_recover_session(struct ceph_mds_client *mdsc)
if (!READ_ONCE(fsc->blocklisted))
return;
if (fsc->last_auto_reconnect &&
time_before(jiffies, fsc->last_auto_reconnect + HZ * 60 * 30))
return;
pr_info("auto reconnect after blocklisted\n");
fsc->last_auto_reconnect = jiffies;
ceph_force_reconnect(fsc->sb);
}
@ -4678,7 +4747,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
{
u64 want_tid, want_flush;
if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
if (READ_ONCE(mdsc->fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN)
return;
dout("sync\n");
@ -4855,10 +4924,8 @@ void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
void *p = msg->front.iov_base;
void *end = p + msg->front.iov_len;
u32 epoch;
u32 map_len;
u32 num_fs;
u32 mount_fscid = (u32)-1;
u8 struct_v, struct_cv;
int err = -EINVAL;
ceph_decode_need(&p, end, sizeof(u32), bad);
@ -4866,24 +4933,17 @@ void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
dout("handle_fsmap epoch %u\n", epoch);
ceph_decode_need(&p, end, 2 + sizeof(u32), bad);
struct_v = ceph_decode_8(&p);
struct_cv = ceph_decode_8(&p);
map_len = ceph_decode_32(&p);
/* struct_v, struct_cv, map_len, epoch, legacy_client_fscid */
ceph_decode_skip_n(&p, end, 2 + sizeof(u32) * 3, bad);
ceph_decode_need(&p, end, sizeof(u32) * 3, bad);
p += sizeof(u32) * 2; /* skip epoch and legacy_client_fscid */
num_fs = ceph_decode_32(&p);
ceph_decode_32_safe(&p, end, num_fs, bad);
while (num_fs-- > 0) {
void *info_p, *info_end;
u32 info_len;
u8 info_v, info_cv;
u32 fscid, namelen;
ceph_decode_need(&p, end, 2 + sizeof(u32), bad);
info_v = ceph_decode_8(&p);
info_cv = ceph_decode_8(&p);
p += 2; // info_v, info_cv
info_len = ceph_decode_32(&p);
ceph_decode_need(&p, end, info_len, bad);
info_p = p;
@ -4954,7 +5014,7 @@ void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
return;
}
newmap = ceph_mdsmap_decode(&p, end);
newmap = ceph_mdsmap_decode(&p, end, ceph_msgr2(mdsc->fsc->client));
if (IS_ERR(newmap)) {
err = PTR_ERR(newmap);
goto bad_unlock;
@ -5081,23 +5141,12 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
struct ceph_mds_client *mdsc = s->s_mdsc;
struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
struct ceph_auth_handshake *auth = &s->s_auth;
int ret;
if (force_new && auth->authorizer) {
ceph_auth_destroy_authorizer(auth->authorizer);
auth->authorizer = NULL;
}
if (!auth->authorizer) {
int ret = ceph_auth_create_authorizer(ac, CEPH_ENTITY_TYPE_MDS,
auth);
if (ret)
return ERR_PTR(ret);
} else {
int ret = ceph_auth_update_authorizer(ac, CEPH_ENTITY_TYPE_MDS,
auth);
if (ret)
return ERR_PTR(ret);
}
*proto = ac->protocol;
ret = __ceph_auth_get_authorizer(ac, auth, CEPH_ENTITY_TYPE_MDS,
force_new, proto, NULL, NULL);
if (ret)
return ERR_PTR(ret);
return auth;
}
@ -5118,8 +5167,11 @@ static int verify_authorizer_reply(struct ceph_connection *con)
struct ceph_mds_session *s = con->private;
struct ceph_mds_client *mdsc = s->s_mdsc;
struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
struct ceph_auth_handshake *auth = &s->s_auth;
return ceph_auth_verify_authorizer_reply(ac, s->s_auth.authorizer);
return ceph_auth_verify_authorizer_reply(ac, auth->authorizer,
auth->authorizer_reply_buf, auth->authorizer_reply_buf_len,
NULL, NULL, NULL, NULL);
}
static int invalidate_authorizer(struct ceph_connection *con)
@ -5133,6 +5185,80 @@ static int invalidate_authorizer(struct ceph_connection *con)
return ceph_monc_validate_auth(&mdsc->fsc->client->monc);
}
static int mds_get_auth_request(struct ceph_connection *con,
void *buf, int *buf_len,
void **authorizer, int *authorizer_len)
{
struct ceph_mds_session *s = con->private;
struct ceph_auth_client *ac = s->s_mdsc->fsc->client->monc.auth;
struct ceph_auth_handshake *auth = &s->s_auth;
int ret;
ret = ceph_auth_get_authorizer(ac, auth, CEPH_ENTITY_TYPE_MDS,
buf, buf_len);
if (ret)
return ret;
*authorizer = auth->authorizer_buf;
*authorizer_len = auth->authorizer_buf_len;
return 0;
}
static int mds_handle_auth_reply_more(struct ceph_connection *con,
void *reply, int reply_len,
void *buf, int *buf_len,
void **authorizer, int *authorizer_len)
{
struct ceph_mds_session *s = con->private;
struct ceph_auth_client *ac = s->s_mdsc->fsc->client->monc.auth;
struct ceph_auth_handshake *auth = &s->s_auth;
int ret;
ret = ceph_auth_handle_svc_reply_more(ac, auth, reply, reply_len,
buf, buf_len);
if (ret)
return ret;
*authorizer = auth->authorizer_buf;
*authorizer_len = auth->authorizer_buf_len;
return 0;
}
static int mds_handle_auth_done(struct ceph_connection *con,
u64 global_id, void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len)
{
struct ceph_mds_session *s = con->private;
struct ceph_auth_client *ac = s->s_mdsc->fsc->client->monc.auth;
struct ceph_auth_handshake *auth = &s->s_auth;
return ceph_auth_handle_svc_reply_done(ac, auth, reply, reply_len,
session_key, session_key_len,
con_secret, con_secret_len);
}
static int mds_handle_auth_bad_method(struct ceph_connection *con,
int used_proto, int result,
const int *allowed_protos, int proto_cnt,
const int *allowed_modes, int mode_cnt)
{
struct ceph_mds_session *s = con->private;
struct ceph_mon_client *monc = &s->s_mdsc->fsc->client->monc;
int ret;
if (ceph_auth_handle_bad_authorizer(monc->auth, CEPH_ENTITY_TYPE_MDS,
used_proto, result,
allowed_protos, proto_cnt,
allowed_modes, mode_cnt)) {
ret = ceph_monc_validate_auth(monc);
if (ret)
return ret;
}
return -EACCES;
}
static struct ceph_msg *mds_alloc_msg(struct ceph_connection *con,
struct ceph_msg_header *hdr, int *skip)
{
@ -5182,6 +5308,10 @@ static const struct ceph_connection_operations mds_con_ops = {
.alloc_msg = mds_alloc_msg,
.sign_message = mds_sign_message,
.check_message_signature = mds_check_message_signature,
.get_auth_request = mds_get_auth_request,
.handle_auth_reply_more = mds_handle_auth_reply_more,
.handle_auth_done = mds_handle_auth_done,
.handle_auth_bad_method = mds_handle_auth_bad_method,
};
/* eof */

View File

@ -275,8 +275,7 @@ struct ceph_mds_request {
union ceph_mds_request_args r_args;
int r_fmode; /* file mode, if expecting cap */
kuid_t r_uid;
kgid_t r_gid;
const struct cred *r_cred;
int r_request_release_offset;
struct timespec64 r_stamp;

View File

@ -114,7 +114,7 @@ bad:
* Ignore any fields we don't care about (there are quite a few of
* them).
*/
struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
{
struct ceph_mdsmap *m;
const void *start = *p;
@ -201,18 +201,19 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
namelen = ceph_decode_32(p); /* skip mds name */
*p += namelen;
ceph_decode_need(p, end,
4*sizeof(u32) + sizeof(u64) +
sizeof(addr) + sizeof(struct ceph_timespec),
bad);
mds = ceph_decode_32(p);
inc = ceph_decode_32(p);
state = ceph_decode_32(p);
ceph_decode_32_safe(p, end, mds, bad);
ceph_decode_32_safe(p, end, inc, bad);
ceph_decode_32_safe(p, end, state, bad);
*p += sizeof(u64); /* state_seq */
err = ceph_decode_entity_addr(p, end, &addr);
if (info_v >= 8)
err = ceph_decode_entity_addrvec(p, end, msgr2, &addr);
else
err = ceph_decode_entity_addr(p, end, &addr);
if (err)
goto corrupt;
ceph_decode_copy(p, &laggy_since, sizeof(laggy_since));
ceph_decode_copy_safe(p, end, &laggy_since, sizeof(laggy_since),
bad);
laggy = laggy_since.tv_sec != 0 || laggy_since.tv_nsec != 0;
*p += sizeof(u32);
ceph_decode_32_safe(p, end, namelen, bad);
@ -243,8 +244,8 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
}
if (state <= 0) {
pr_warn("mdsmap_decode got incorrect state(%s)\n",
ceph_mds_state_name(state));
dout("mdsmap_decode got incorrect state(%s)\n",
ceph_mds_state_name(state));
continue;
}

View File

@ -16,6 +16,7 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
struct ceph_metric_read_latency *read;
struct ceph_metric_write_latency *write;
struct ceph_metric_metadata_latency *meta;
struct ceph_metric_dlease *dlease;
struct ceph_client_metric *m = &mdsc->metric;
u64 nr_caps = atomic64_read(&m->total_caps);
struct ceph_msg *msg;
@ -25,7 +26,7 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
s32 len;
len = sizeof(*head) + sizeof(*cap) + sizeof(*read) + sizeof(*write)
+ sizeof(*meta);
+ sizeof(*meta) + sizeof(*dlease);
msg = ceph_msg_new(CEPH_MSG_CLIENT_METRICS, len, GFP_NOFS, true);
if (!msg) {
@ -42,8 +43,8 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
cap->ver = 1;
cap->compat = 1;
cap->data_len = cpu_to_le32(sizeof(*cap) - 10);
cap->hit = cpu_to_le64(percpu_counter_sum(&mdsc->metric.i_caps_hit));
cap->mis = cpu_to_le64(percpu_counter_sum(&mdsc->metric.i_caps_mis));
cap->hit = cpu_to_le64(percpu_counter_sum(&m->i_caps_hit));
cap->mis = cpu_to_le64(percpu_counter_sum(&m->i_caps_mis));
cap->total = cpu_to_le64(nr_caps);
items++;
@ -83,6 +84,17 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
meta->nsec = cpu_to_le32(ts.tv_nsec);
items++;
/* encode the dentry lease metric */
dlease = (struct ceph_metric_dlease *)(meta + 1);
dlease->type = cpu_to_le32(CLIENT_METRIC_TYPE_DENTRY_LEASE);
dlease->ver = 1;
dlease->compat = 1;
dlease->data_len = cpu_to_le32(sizeof(*dlease) - 10);
dlease->hit = cpu_to_le64(percpu_counter_sum(&m->d_lease_hit));
dlease->mis = cpu_to_le64(percpu_counter_sum(&m->d_lease_mis));
dlease->total = cpu_to_le64(atomic64_read(&m->total_dentries));
items++;
put_unaligned_le32(items, &head->num);
msg->front.iov_len = len;
msg->hdr.version = cpu_to_le16(1);

View File

@ -27,6 +27,7 @@ enum ceph_metric_type {
CLIENT_METRIC_TYPE_READ_LATENCY, \
CLIENT_METRIC_TYPE_WRITE_LATENCY, \
CLIENT_METRIC_TYPE_METADATA_LATENCY, \
CLIENT_METRIC_TYPE_DENTRY_LEASE, \
\
CLIENT_METRIC_TYPE_MAX, \
}
@ -80,6 +81,19 @@ struct ceph_metric_metadata_latency {
__le32 nsec;
} __packed;
/* metric dentry lease header */
struct ceph_metric_dlease {
__le32 type; /* ceph metric type */
__u8 ver;
__u8 compat;
__le32 data_len; /* length of sizeof(hit + mis + total) */
__le64 hit;
__le64 mis;
__le64 total;
} __packed;
struct ceph_metric_head {
__le32 num; /* the number of metrics that will be sent */
} __packed;

View File

@ -264,7 +264,7 @@ restart:
return NULL;
}
static bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
{
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old->i_sb);
struct ceph_snap_realm *old_realm, *new_realm;
@ -516,59 +516,3 @@ bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
return is_updated;
}
/*
* ceph_quota_check_rename - check if a rename can be executed
* @mdsc: MDS client instance
* @old: inode to be copied
* @new: destination inode (directory)
*
* This function verifies if a rename (e.g. moving a file or directory) can be
* executed. It forces an rstat update in the @new target directory (and in the
* source @old as well, if it's a directory). The actual check is done both for
* max_files and max_bytes.
*
* This function returns 0 if it's OK to do the rename, or, if quotas are
* exceeded, -EXDEV (if @old is a directory) or -EDQUOT.
*/
int ceph_quota_check_rename(struct ceph_mds_client *mdsc,
struct inode *old, struct inode *new)
{
struct ceph_inode_info *ci_old = ceph_inode(old);
int ret = 0;
if (ceph_quota_is_same_realm(old, new))
return 0;
/*
* Get the latest rstat for target directory (and for source, if a
* directory)
*/
ret = ceph_do_getattr(new, CEPH_STAT_RSTAT, false);
if (ret)
return ret;
if (S_ISDIR(old->i_mode)) {
ret = ceph_do_getattr(old, CEPH_STAT_RSTAT, false);
if (ret)
return ret;
ret = check_quota_exceeded(new, QUOTA_CHECK_MAX_BYTES_OP,
ci_old->i_rbytes);
if (!ret)
ret = check_quota_exceeded(new,
QUOTA_CHECK_MAX_FILES_OP,
ci_old->i_rfiles +
ci_old->i_rsubdirs);
if (ret)
ret = -EXDEV;
} else {
ret = check_quota_exceeded(new, QUOTA_CHECK_MAX_BYTES_OP,
i_size_read(old));
if (!ret)
ret = check_quota_exceeded(new,
QUOTA_CHECK_MAX_FILES_OP, 1);
if (ret)
ret = -EDQUOT;
}
return ret;
}

View File

@ -831,6 +831,13 @@ static void destroy_caches(void)
ceph_fscache_unregister();
}
static void __ceph_umount_begin(struct ceph_fs_client *fsc)
{
ceph_osdc_abort_requests(&fsc->client->osdc, -EIO);
ceph_mdsc_force_umount(fsc->mdsc);
fsc->filp_gen++; // invalidate open files
}
/*
* ceph_umount_begin - initiate forced umount. Tear down the
* mount, skipping steps that may hang while waiting for server(s).
@ -843,9 +850,7 @@ static void ceph_umount_begin(struct super_block *sb)
if (!fsc)
return;
fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
ceph_osdc_abort_requests(&fsc->client->osdc, -EIO);
ceph_mdsc_force_umount(fsc->mdsc);
fsc->filp_gen++; // invalidate open files
__ceph_umount_begin(fsc);
}
static const struct super_operations ceph_super_ops = {
@ -1234,7 +1239,8 @@ int ceph_force_reconnect(struct super_block *sb)
struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
int err = 0;
ceph_umount_begin(sb);
fsc->mount_state = CEPH_MOUNT_RECOVER;
__ceph_umount_begin(fsc);
/* Make sure all page caches get invalidated.
* see remove_session_caps_cb() */

View File

@ -106,9 +106,8 @@ struct ceph_fs_client {
struct ceph_mount_options *mount_options;
struct ceph_client *client;
unsigned long mount_state;
int mount_state;
unsigned long last_auto_reconnect;
bool blocklisted;
bool have_copy_from2;
@ -129,6 +128,7 @@ struct ceph_fs_client {
struct dentry *debugfs_bdi;
struct dentry *debugfs_mdsc, *debugfs_mdsmap;
struct dentry *debugfs_metric;
struct dentry *debugfs_status;
struct dentry *debugfs_mds_sessions;
#endif
@ -1222,14 +1222,13 @@ extern void ceph_handle_quota(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session,
struct ceph_msg *msg);
extern bool ceph_quota_is_max_files_exceeded(struct inode *inode);
extern bool ceph_quota_is_same_realm(struct inode *old, struct inode *new);
extern bool ceph_quota_is_max_bytes_exceeded(struct inode *inode,
loff_t newlen);
extern bool ceph_quota_is_max_bytes_approaching(struct inode *inode,
loff_t newlen);
extern bool ceph_quota_update_statfs(struct ceph_fs_client *fsc,
struct kstatfs *buf);
extern int ceph_quota_check_rename(struct ceph_mds_client *mdsc,
struct inode *old, struct inode *new);
extern void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc);
#endif /* _FS_CEPH_SUPER_H */

View File

@ -42,6 +42,7 @@ struct ceph_vxattr {
#define VXATTR_FLAG_READONLY (1<<0)
#define VXATTR_FLAG_HIDDEN (1<<1)
#define VXATTR_FLAG_RSTAT (1<<2)
#define VXATTR_FLAG_DIRSTAT (1<<3)
/* layouts */
@ -303,6 +304,36 @@ static ssize_t ceph_vxattrcb_snap_btime(struct ceph_inode_info *ci, char *val,
ci->i_snap_btime.tv_nsec);
}
static ssize_t ceph_vxattrcb_cluster_fsid(struct ceph_inode_info *ci,
char *val, size_t size)
{
struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
return ceph_fmt_xattr(val, size, "%pU", &fsc->client->fsid);
}
static ssize_t ceph_vxattrcb_client_id(struct ceph_inode_info *ci,
char *val, size_t size)
{
struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
return ceph_fmt_xattr(val, size, "client%lld",
ceph_client_gid(fsc->client));
}
static ssize_t ceph_vxattrcb_caps(struct ceph_inode_info *ci, char *val,
size_t size)
{
int issued;
spin_lock(&ci->i_ceph_lock);
issued = __ceph_caps_issued(ci, NULL);
spin_unlock(&ci->i_ceph_lock);
return ceph_fmt_xattr(val, size, "%s/0x%x",
ceph_cap_string(issued), issued);
}
#define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name
#define CEPH_XATTR_NAME2(_type, _name, _name2) \
XATTR_CEPH_PREFIX #_type "." #_name "." #_name2
@ -347,9 +378,9 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = {
XATTR_LAYOUT_FIELD(dir, layout, object_size),
XATTR_LAYOUT_FIELD(dir, layout, pool),
XATTR_LAYOUT_FIELD(dir, layout, pool_namespace),
XATTR_NAME_CEPH(dir, entries, 0),
XATTR_NAME_CEPH(dir, files, 0),
XATTR_NAME_CEPH(dir, subdirs, 0),
XATTR_NAME_CEPH(dir, entries, VXATTR_FLAG_DIRSTAT),
XATTR_NAME_CEPH(dir, files, VXATTR_FLAG_DIRSTAT),
XATTR_NAME_CEPH(dir, subdirs, VXATTR_FLAG_DIRSTAT),
XATTR_RSTAT_FIELD(dir, rentries),
XATTR_RSTAT_FIELD(dir, rfiles),
XATTR_RSTAT_FIELD(dir, rsubdirs),
@ -378,6 +409,13 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = {
.exists_cb = ceph_vxattrcb_snap_btime_exists,
.flags = VXATTR_FLAG_READONLY,
},
{
.name = "ceph.caps",
.name_size = sizeof("ceph.caps"),
.getxattr_cb = ceph_vxattrcb_caps,
.exists_cb = NULL,
.flags = VXATTR_FLAG_HIDDEN,
},
{ .name = NULL, 0 } /* Required table terminator */
};
@ -403,6 +441,31 @@ static struct ceph_vxattr ceph_file_vxattrs[] = {
.exists_cb = ceph_vxattrcb_snap_btime_exists,
.flags = VXATTR_FLAG_READONLY,
},
{
.name = "ceph.caps",
.name_size = sizeof("ceph.caps"),
.getxattr_cb = ceph_vxattrcb_caps,
.exists_cb = NULL,
.flags = VXATTR_FLAG_HIDDEN,
},
{ .name = NULL, 0 } /* Required table terminator */
};
static struct ceph_vxattr ceph_common_vxattrs[] = {
{
.name = "ceph.cluster_fsid",
.name_size = sizeof("ceph.cluster_fsid"),
.getxattr_cb = ceph_vxattrcb_cluster_fsid,
.exists_cb = NULL,
.flags = VXATTR_FLAG_READONLY,
},
{
.name = "ceph.client_id",
.name_size = sizeof("ceph.client_id"),
.getxattr_cb = ceph_vxattrcb_client_id,
.exists_cb = NULL,
.flags = VXATTR_FLAG_READONLY,
},
{ .name = NULL, 0 } /* Required table terminator */
};
@ -428,6 +491,13 @@ static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode,
}
}
vxattr = ceph_common_vxattrs;
while (vxattr->name) {
if (!strcmp(vxattr->name, name))
return vxattr;
vxattr++;
}
return NULL;
}
@ -837,6 +907,8 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
int mask = 0;
if (vxattr->flags & VXATTR_FLAG_RSTAT)
mask |= CEPH_STAT_RSTAT;
if (vxattr->flags & VXATTR_FLAG_DIRSTAT)
mask |= CEPH_CAP_FILE_SHARED;
err = ceph_do_getattr(inode, mask, true);
if (err)
return err;
@ -950,6 +1022,7 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name,
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_mds_request *req;
struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_osd_client *osdc = &fsc->client->osdc;
struct ceph_pagelist *pagelist = NULL;
int op = CEPH_MDS_OP_SETXATTR;
int err;
@ -988,6 +1061,8 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name,
if (op == CEPH_MDS_OP_SETXATTR) {
req->r_args.setxattr.flags = cpu_to_le32(flags);
req->r_args.setxattr.osdmap_epoch =
cpu_to_le32(osdc->osdmap->epoch);
req->r_pagelist = pagelist;
pagelist = NULL;
}

View File

@ -32,8 +32,6 @@ struct ceph_auth_handshake {
};
struct ceph_auth_client_ops {
const char *name;
/*
* true if we are authenticated and can connect to
* services.
@ -53,7 +51,9 @@ struct ceph_auth_client_ops {
*/
int (*build_request)(struct ceph_auth_client *ac, void *buf, void *end);
int (*handle_reply)(struct ceph_auth_client *ac, int result,
void *buf, void *end);
void *buf, void *end, u8 *session_key,
int *session_key_len, u8 *con_secret,
int *con_secret_len);
/*
* Create authorizer for connecting to a service, and verify
@ -69,7 +69,10 @@ struct ceph_auth_client_ops {
void *challenge_buf,
int challenge_buf_len);
int (*verify_authorizer_reply)(struct ceph_auth_client *ac,
struct ceph_authorizer *a);
struct ceph_authorizer *a,
void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len);
void (*invalidate_authorizer)(struct ceph_auth_client *ac,
int peer_type);
@ -95,11 +98,15 @@ struct ceph_auth_client {
const struct ceph_crypto_key *key; /* our secret key */
unsigned want_keys; /* which services we want */
int preferred_mode; /* CEPH_CON_MODE_* */
int fallback_mode; /* ditto */
struct mutex mutex;
};
extern struct ceph_auth_client *ceph_auth_init(const char *name,
const struct ceph_crypto_key *key);
struct ceph_auth_client *ceph_auth_init(const char *name,
const struct ceph_crypto_key *key,
const int *con_modes);
extern void ceph_auth_destroy(struct ceph_auth_client *ac);
extern void ceph_auth_reset(struct ceph_auth_client *ac);
@ -113,21 +120,22 @@ int ceph_auth_entity_name_encode(const char *name, void **p, void *end);
extern int ceph_build_auth(struct ceph_auth_client *ac,
void *msg_buf, size_t msg_len);
extern int ceph_auth_is_authenticated(struct ceph_auth_client *ac);
extern int ceph_auth_create_authorizer(struct ceph_auth_client *ac,
int peer_type,
struct ceph_auth_handshake *auth);
int __ceph_auth_get_authorizer(struct ceph_auth_client *ac,
struct ceph_auth_handshake *auth,
int peer_type, bool force_new,
int *proto, int *pref_mode, int *fallb_mode);
void ceph_auth_destroy_authorizer(struct ceph_authorizer *a);
extern int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
int peer_type,
struct ceph_auth_handshake *a);
int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac,
struct ceph_authorizer *a,
void *challenge_buf,
int challenge_buf_len);
extern int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
struct ceph_authorizer *a);
int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
struct ceph_authorizer *a,
void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len);
extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac,
int peer_type);
@ -147,4 +155,34 @@ int ceph_auth_check_message_signature(struct ceph_auth_handshake *auth,
return auth->check_message_signature(auth, msg);
return 0;
}
int ceph_auth_get_request(struct ceph_auth_client *ac, void *buf, int buf_len);
int ceph_auth_handle_reply_more(struct ceph_auth_client *ac, void *reply,
int reply_len, void *buf, int buf_len);
int ceph_auth_handle_reply_done(struct ceph_auth_client *ac,
u64 global_id, void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len);
bool ceph_auth_handle_bad_method(struct ceph_auth_client *ac,
int used_proto, int result,
const int *allowed_protos, int proto_cnt,
const int *allowed_modes, int mode_cnt);
int ceph_auth_get_authorizer(struct ceph_auth_client *ac,
struct ceph_auth_handshake *auth,
int peer_type, void *buf, int *buf_len);
int ceph_auth_handle_svc_reply_more(struct ceph_auth_client *ac,
struct ceph_auth_handshake *auth,
void *reply, int reply_len,
void *buf, int *buf_len);
int ceph_auth_handle_svc_reply_done(struct ceph_auth_client *ac,
struct ceph_auth_handshake *auth,
void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len);
bool ceph_auth_handle_bad_authorizer(struct ceph_auth_client *ac,
int peer_type, int used_proto, int result,
const int *allowed_protos, int proto_cnt,
const int *allowed_modes, int mode_cnt);
#endif

View File

@ -8,7 +8,8 @@
* feature. Base case is 1 (first use).
*/
#define CEPH_FEATURE_INCARNATION_1 (0ull)
#define CEPH_FEATURE_INCARNATION_2 (1ull<<57) // CEPH_FEATURE_SERVER_JEWEL
#define CEPH_FEATURE_INCARNATION_2 (1ull<<57) // SERVER_JEWEL
#define CEPH_FEATURE_INCARNATION_3 ((1ull<<57)|(1ull<<28)) // SERVER_MIMIC
#define DEFINE_CEPH_FEATURE(bit, incarnation, name) \
static const uint64_t __maybe_unused CEPH_FEATURE_##name = (1ULL<<bit); \
@ -75,7 +76,7 @@
DEFINE_CEPH_FEATURE( 0, 1, UID)
DEFINE_CEPH_FEATURE( 1, 1, NOSRCADDR)
DEFINE_CEPH_FEATURE_RETIRED( 2, 1, MONCLOCKCHECK, JEWEL, LUMINOUS)
DEFINE_CEPH_FEATURE( 2, 3, SERVER_NAUTILUS)
DEFINE_CEPH_FEATURE( 3, 1, FLOCK)
DEFINE_CEPH_FEATURE( 4, 1, SUBSCRIBE2)
DEFINE_CEPH_FEATURE( 5, 1, MONNAMES)
@ -114,7 +115,7 @@ DEFINE_CEPH_FEATURE(25, 1, CRUSH_TUNABLES2)
DEFINE_CEPH_FEATURE(26, 1, CREATEPOOLID)
DEFINE_CEPH_FEATURE(27, 1, REPLY_CREATE_INODE)
DEFINE_CEPH_FEATURE_RETIRED(28, 1, OSD_HBMSGS, HAMMER, JEWEL)
DEFINE_CEPH_FEATURE(28, 2, SERVER_M)
DEFINE_CEPH_FEATURE(28, 2, SERVER_MIMIC)
DEFINE_CEPH_FEATURE(29, 1, MDSENC)
DEFINE_CEPH_FEATURE(30, 1, OSDHASHPSPOOL)
DEFINE_CEPH_FEATURE(31, 1, MON_SINGLE_PAXOS) // deprecate me
@ -177,13 +178,16 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
*/
#define CEPH_FEATURES_SUPPORTED_DEFAULT \
(CEPH_FEATURE_NOSRCADDR | \
CEPH_FEATURE_SERVER_NAUTILUS | \
CEPH_FEATURE_FLOCK | \
CEPH_FEATURE_SUBSCRIBE2 | \
CEPH_FEATURE_MONNAMES | \
CEPH_FEATURE_RECONNECT_SEQ | \
CEPH_FEATURE_DIRLAYOUTHASH | \
CEPH_FEATURE_PGID64 | \
CEPH_FEATURE_PGPOOL3 | \
CEPH_FEATURE_OSDENC | \
CEPH_FEATURE_MONENC | \
CEPH_FEATURE_CRUSH_TUNABLES | \
CEPH_FEATURE_SERVER_LUMINOUS | \
CEPH_FEATURE_RESEND_ON_SPLIT | \
@ -193,6 +197,7 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
CEPH_FEATURE_MSG_AUTH | \
CEPH_FEATURE_CRUSH_TUNABLES2 | \
CEPH_FEATURE_REPLY_CREATE_INODE | \
CEPH_FEATURE_SERVER_MIMIC | \
CEPH_FEATURE_MDSENC | \
CEPH_FEATURE_OSDHASHPSPOOL | \
CEPH_FEATURE_OSD_CACHEPOOL | \

View File

@ -93,8 +93,19 @@ struct ceph_dir_layout {
#define CEPH_AUTH_NONE 0x1
#define CEPH_AUTH_CEPHX 0x2
#define CEPH_AUTH_MODE_NONE 0
#define CEPH_AUTH_MODE_AUTHORIZER 1
#define CEPH_AUTH_MODE_MON 10
/* msgr2 protocol modes */
#define CEPH_CON_MODE_UNKNOWN 0x0
#define CEPH_CON_MODE_CRC 0x1
#define CEPH_CON_MODE_SECURE 0x2
#define CEPH_AUTH_UID_DEFAULT ((__u64) -1)
const char *ceph_auth_proto_name(int proto);
const char *ceph_con_mode_name(int mode);
/*********************************************
* message layer
@ -424,6 +435,7 @@ union ceph_mds_request_args {
} __attribute__ ((packed)) open;
struct {
__le32 flags;
__le32 osdmap_epoch; /* used for setting file/dir layouts */
} __attribute__ ((packed)) setxattr;
struct {
struct ceph_file_layout_legacy layout;
@ -445,11 +457,25 @@ union ceph_mds_request_args {
} __attribute__ ((packed)) lookupino;
} __attribute__ ((packed));
union ceph_mds_request_args_ext {
union ceph_mds_request_args old;
struct {
__le32 mode;
__le32 uid;
__le32 gid;
struct ceph_timespec mtime;
struct ceph_timespec atime;
__le64 size, old_size; /* old_size needed by truncate */
__le32 mask; /* CEPH_SETATTR_* */
struct ceph_timespec btime;
} __attribute__ ((packed)) setattr_ext;
};
#define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */
#define CEPH_MDS_FLAG_WANT_DENTRY 2 /* want dentry in reply */
#define CEPH_MDS_FLAG_ASYNC 4 /* request is asynchronous */
struct ceph_mds_request_head {
struct ceph_mds_request_head_old {
__le64 oldest_client_tid;
__le32 mdsmap_epoch; /* on client */
__le32 flags; /* CEPH_MDS_FLAG_* */
@ -462,6 +488,22 @@ struct ceph_mds_request_head {
union ceph_mds_request_args args;
} __attribute__ ((packed));
#define CEPH_MDS_REQUEST_HEAD_VERSION 1
struct ceph_mds_request_head {
__le16 version; /* struct version */
__le64 oldest_client_tid;
__le32 mdsmap_epoch; /* on client */
__le32 flags; /* CEPH_MDS_FLAG_* */
__u8 num_retry, num_fwd; /* count retry, fwd attempts */
__le16 num_releases; /* # include cap/lease release records */
__le32 op; /* mds op code */
__le32 caller_uid, caller_gid;
__le64 ino; /* use this ino for openc, mkdir, mknod,
etc. (if replaying) */
union ceph_mds_request_args_ext args;
} __attribute__ ((packed));
/* cap/lease release record */
struct ceph_mds_request_release {
__le64 ino, cap_id; /* ino and unique cap id */

View File

@ -220,6 +220,8 @@ static inline void ceph_encode_timespec64(struct ceph_timespec *tv,
*/
#define CEPH_ENTITY_ADDR_TYPE_NONE 0
#define CEPH_ENTITY_ADDR_TYPE_LEGACY __cpu_to_le32(1)
#define CEPH_ENTITY_ADDR_TYPE_MSGR2 __cpu_to_le32(2)
#define CEPH_ENTITY_ADDR_TYPE_ANY __cpu_to_le32(3)
static inline void ceph_encode_banner_addr(struct ceph_entity_addr *a)
{
@ -239,6 +241,12 @@ static inline void ceph_decode_banner_addr(struct ceph_entity_addr *a)
extern int ceph_decode_entity_addr(void **p, void *end,
struct ceph_entity_addr *addr);
int ceph_decode_entity_addrvec(void **p, void *end, bool msgr2,
struct ceph_entity_addr *addr);
int ceph_entity_addr_encoding_len(const struct ceph_entity_addr *addr);
void ceph_encode_entity_addr(void **p, const struct ceph_entity_addr *addr);
/*
* encoders
*/

View File

@ -31,10 +31,10 @@
#define CEPH_OPT_FSID (1<<0)
#define CEPH_OPT_NOSHARE (1<<1) /* don't share client with other sbs */
#define CEPH_OPT_MYIP (1<<2) /* specified my ip */
#define CEPH_OPT_NOCRC (1<<3) /* no data crc on writes */
#define CEPH_OPT_NOCRC (1<<3) /* no data crc on writes (msgr1) */
#define CEPH_OPT_NOMSGAUTH (1<<4) /* don't require msg signing feat */
#define CEPH_OPT_TCP_NODELAY (1<<5) /* TCP_NODELAY on TCP sockets */
#define CEPH_OPT_NOMSGSIGN (1<<6) /* don't sign msgs */
#define CEPH_OPT_NOMSGSIGN (1<<6) /* don't sign msgs (msgr1) */
#define CEPH_OPT_ABORT_ON_FULL (1<<7) /* abort w/ ENOSPC when full */
#define CEPH_OPT_DEFAULT (CEPH_OPT_TCP_NODELAY)
@ -53,6 +53,7 @@ struct ceph_options {
unsigned long osd_keepalive_timeout; /* jiffies */
unsigned long osd_request_timeout; /* jiffies */
u32 read_from_replica; /* CEPH_OSD_FLAG_BALANCE/LOCALIZE_READS */
int con_modes[2]; /* CEPH_CON_MODE_* */
/*
* any type that can't be simply compared or doesn't need
@ -83,6 +84,7 @@ struct ceph_options {
#define CEPH_MONC_HUNT_BACKOFF 2
#define CEPH_MONC_HUNT_MAX_MULT 10
#define CEPH_MSG_MAX_CONTROL_LEN (16*1024*1024)
#define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024)
#define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024)
@ -104,6 +106,7 @@ enum {
CEPH_MOUNT_UNMOUNTING,
CEPH_MOUNT_UNMOUNTED,
CEPH_MOUNT_SHUTDOWN,
CEPH_MOUNT_RECOVER,
};
static inline unsigned long ceph_timeout_jiffies(unsigned long timeout)
@ -150,6 +153,10 @@ struct ceph_client {
#define from_msgr(ms) container_of(ms, struct ceph_client, msgr)
static inline bool ceph_msgr2(struct ceph_client *client)
{
return client->options->con_modes[0] != CEPH_CON_MODE_UNKNOWN;
}
/*
* snapshots

View File

@ -64,7 +64,7 @@ static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w)
}
extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m);
extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end);
struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2);
extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m);
extern bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m);

View File

@ -3,6 +3,7 @@
#define __FS_CEPH_MESSENGER_H
#include <linux/bvec.h>
#include <linux/crypto.h>
#include <linux/kref.h>
#include <linux/mutex.h>
#include <linux/net.h>
@ -52,6 +53,23 @@ struct ceph_connection_operations {
int (*sign_message) (struct ceph_msg *msg);
int (*check_message_signature) (struct ceph_msg *msg);
/* msgr2 authentication exchange */
int (*get_auth_request)(struct ceph_connection *con,
void *buf, int *buf_len,
void **authorizer, int *authorizer_len);
int (*handle_auth_reply_more)(struct ceph_connection *con,
void *reply, int reply_len,
void *buf, int *buf_len,
void **authorizer, int *authorizer_len);
int (*handle_auth_done)(struct ceph_connection *con,
u64 global_id, void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len);
int (*handle_auth_bad_method)(struct ceph_connection *con,
int used_proto, int result,
const int *allowed_protos, int proto_cnt,
const int *allowed_modes, int mode_cnt);
};
/* use format string %s%lld */
@ -235,14 +253,171 @@ struct ceph_msg {
bool more_to_follow;
bool needs_out_seq;
int front_alloc_len;
unsigned long ack_stamp; /* tx: when we were acked */
struct ceph_msgpool *pool;
};
/*
* connection states
*/
#define CEPH_CON_S_CLOSED 1
#define CEPH_CON_S_PREOPEN 2
#define CEPH_CON_S_V1_BANNER 3
#define CEPH_CON_S_V1_CONNECT_MSG 4
#define CEPH_CON_S_V2_BANNER_PREFIX 5
#define CEPH_CON_S_V2_BANNER_PAYLOAD 6
#define CEPH_CON_S_V2_HELLO 7
#define CEPH_CON_S_V2_AUTH 8
#define CEPH_CON_S_V2_AUTH_SIGNATURE 9
#define CEPH_CON_S_V2_SESSION_CONNECT 10
#define CEPH_CON_S_V2_SESSION_RECONNECT 11
#define CEPH_CON_S_OPEN 12
#define CEPH_CON_S_STANDBY 13
/*
* ceph_connection flag bits
*/
#define CEPH_CON_F_LOSSYTX 0 /* we can close channel or drop
messages on errors */
#define CEPH_CON_F_KEEPALIVE_PENDING 1 /* we need to send a keepalive */
#define CEPH_CON_F_WRITE_PENDING 2 /* we have data ready to send */
#define CEPH_CON_F_SOCK_CLOSED 3 /* socket state changed to closed */
#define CEPH_CON_F_BACKOFF 4 /* need to retry queuing delayed
work */
/* ceph connection fault delay defaults, for exponential backoff */
#define BASE_DELAY_INTERVAL (HZ/2)
#define MAX_DELAY_INTERVAL (5 * 60 * HZ)
#define BASE_DELAY_INTERVAL (HZ / 4)
#define MAX_DELAY_INTERVAL (15 * HZ)
struct ceph_connection_v1_info {
struct kvec out_kvec[8], /* sending header/footer data */
*out_kvec_cur;
int out_kvec_left; /* kvec's left in out_kvec */
int out_skip; /* skip this many bytes */
int out_kvec_bytes; /* total bytes left */
bool out_more; /* there is more data after the kvecs */
bool out_msg_done;
struct ceph_auth_handshake *auth;
int auth_retry; /* true if we need a newer authorizer */
/* connection negotiation temps */
u8 in_banner[CEPH_BANNER_MAX_LEN];
struct ceph_entity_addr actual_peer_addr;
struct ceph_entity_addr peer_addr_for_me;
struct ceph_msg_connect out_connect;
struct ceph_msg_connect_reply in_reply;
int in_base_pos; /* bytes read */
/* message in temps */
u8 in_tag; /* protocol control byte */
struct ceph_msg_header in_hdr;
__le64 in_temp_ack; /* for reading an ack */
/* message out temps */
struct ceph_msg_header out_hdr;
__le64 out_temp_ack; /* for writing an ack */
struct ceph_timespec out_temp_keepalive2; /* for writing keepalive2
stamp */
u32 connect_seq; /* identify the most recent connection
attempt for this session */
u32 peer_global_seq; /* peer's global seq for this connection */
};
#define CEPH_CRC_LEN 4
#define CEPH_GCM_KEY_LEN 16
#define CEPH_GCM_IV_LEN sizeof(struct ceph_gcm_nonce)
#define CEPH_GCM_BLOCK_LEN 16
#define CEPH_GCM_TAG_LEN 16
#define CEPH_PREAMBLE_LEN 32
#define CEPH_PREAMBLE_INLINE_LEN 48
#define CEPH_PREAMBLE_PLAIN_LEN CEPH_PREAMBLE_LEN
#define CEPH_PREAMBLE_SECURE_LEN (CEPH_PREAMBLE_LEN + \
CEPH_PREAMBLE_INLINE_LEN + \
CEPH_GCM_TAG_LEN)
#define CEPH_EPILOGUE_PLAIN_LEN (1 + 3 * CEPH_CRC_LEN)
#define CEPH_EPILOGUE_SECURE_LEN (CEPH_GCM_BLOCK_LEN + CEPH_GCM_TAG_LEN)
#define CEPH_FRAME_MAX_SEGMENT_COUNT 4
struct ceph_frame_desc {
int fd_tag; /* FRAME_TAG_* */
int fd_seg_cnt;
int fd_lens[CEPH_FRAME_MAX_SEGMENT_COUNT]; /* logical */
int fd_aligns[CEPH_FRAME_MAX_SEGMENT_COUNT];
};
struct ceph_gcm_nonce {
__le32 fixed;
__le64 counter __packed;
};
struct ceph_connection_v2_info {
struct iov_iter in_iter;
struct kvec in_kvecs[5]; /* recvmsg */
struct bio_vec in_bvec; /* recvmsg (in_cursor) */
int in_kvec_cnt;
int in_state; /* IN_S_* */
struct iov_iter out_iter;
struct kvec out_kvecs[8]; /* sendmsg */
struct bio_vec out_bvec; /* sendpage (out_cursor, out_zero),
sendmsg (out_enc_pages) */
int out_kvec_cnt;
int out_state; /* OUT_S_* */
int out_zero; /* # of zero bytes to send */
bool out_iter_sendpage; /* use sendpage if possible */
struct ceph_frame_desc in_desc;
struct ceph_msg_data_cursor in_cursor;
struct ceph_msg_data_cursor out_cursor;
struct crypto_shash *hmac_tfm; /* post-auth signature */
struct crypto_aead *gcm_tfm; /* on-wire encryption */
struct aead_request *gcm_req;
struct crypto_wait gcm_wait;
struct ceph_gcm_nonce in_gcm_nonce;
struct ceph_gcm_nonce out_gcm_nonce;
struct page **out_enc_pages;
int out_enc_page_cnt;
int out_enc_resid;
int out_enc_i;
int con_mode; /* CEPH_CON_MODE_* */
void *conn_bufs[16];
int conn_buf_cnt;
struct kvec in_sign_kvecs[8];
struct kvec out_sign_kvecs[8];
int in_sign_kvec_cnt;
int out_sign_kvec_cnt;
u64 client_cookie;
u64 server_cookie;
u64 global_seq;
u64 connect_seq;
u64 peer_global_seq;
u8 in_buf[CEPH_PREAMBLE_SECURE_LEN];
u8 out_buf[CEPH_PREAMBLE_SECURE_LEN];
struct {
u8 late_status; /* FRAME_LATE_STATUS_* */
union {
struct {
u32 front_crc;
u32 middle_crc;
u32 data_crc;
} __packed;
u8 pad[CEPH_GCM_BLOCK_LEN - 1];
};
} out_epil;
};
/*
* A single connection with another host.
@ -258,24 +433,16 @@ struct ceph_connection {
struct ceph_messenger *msgr;
int state; /* CEPH_CON_S_* */
atomic_t sock_state;
struct socket *sock;
struct ceph_entity_addr peer_addr; /* peer address */
struct ceph_entity_addr peer_addr_for_me;
unsigned long flags;
unsigned long state;
unsigned long flags; /* CEPH_CON_F_* */
const char *error_msg; /* error message, if any */
struct ceph_entity_name peer_name; /* peer name */
struct ceph_entity_addr peer_addr; /* peer address */
u64 peer_features;
u32 connect_seq; /* identify the most recent connection
attempt for this connection, client */
u32 peer_global_seq; /* peer's global seq for this connection */
struct ceph_auth_handshake *auth;
int auth_retry; /* true if we need a newer authorizer */
struct mutex mutex;
@ -286,43 +453,80 @@ struct ceph_connection {
u64 in_seq, in_seq_acked; /* last message received, acked */
/* connection negotiation temps */
char in_banner[CEPH_BANNER_MAX_LEN];
struct ceph_msg_connect out_connect;
struct ceph_msg_connect_reply in_reply;
struct ceph_entity_addr actual_peer_addr;
/* message out temps */
struct ceph_msg_header out_hdr;
struct ceph_msg *in_msg;
struct ceph_msg *out_msg; /* sending message (== tail of
out_sent) */
bool out_msg_done;
struct kvec out_kvec[8], /* sending header/footer data */
*out_kvec_cur;
int out_kvec_left; /* kvec's left in out_kvec */
int out_skip; /* skip this many bytes */
int out_kvec_bytes; /* total bytes left */
int out_more; /* there is more data after the kvecs */
__le64 out_temp_ack; /* for writing an ack */
struct ceph_timespec out_temp_keepalive2; /* for writing keepalive2
stamp */
/* message in temps */
struct ceph_msg_header in_hdr;
struct ceph_msg *in_msg;
u32 in_front_crc, in_middle_crc, in_data_crc; /* calculated crc */
char in_tag; /* protocol control byte */
int in_base_pos; /* bytes read */
__le64 in_temp_ack; /* for reading an ack */
struct timespec64 last_keepalive_ack; /* keepalive2 ack stamp */
struct delayed_work work; /* send|recv work */
unsigned long delay; /* current delay interval */
union {
struct ceph_connection_v1_info v1;
struct ceph_connection_v2_info v2;
};
};
extern struct page *ceph_zero_page;
void ceph_con_flag_clear(struct ceph_connection *con, unsigned long con_flag);
void ceph_con_flag_set(struct ceph_connection *con, unsigned long con_flag);
bool ceph_con_flag_test(struct ceph_connection *con, unsigned long con_flag);
bool ceph_con_flag_test_and_clear(struct ceph_connection *con,
unsigned long con_flag);
bool ceph_con_flag_test_and_set(struct ceph_connection *con,
unsigned long con_flag);
void ceph_encode_my_addr(struct ceph_messenger *msgr);
int ceph_tcp_connect(struct ceph_connection *con);
int ceph_con_close_socket(struct ceph_connection *con);
void ceph_con_reset_session(struct ceph_connection *con);
u32 ceph_get_global_seq(struct ceph_messenger *msgr, u32 gt);
void ceph_con_discard_sent(struct ceph_connection *con, u64 ack_seq);
void ceph_con_discard_requeued(struct ceph_connection *con, u64 reconnect_seq);
void ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor,
struct ceph_msg *msg, size_t length);
struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor,
size_t *page_offset, size_t *length,
bool *last_piece);
void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, size_t bytes);
u32 ceph_crc32c_page(u32 crc, struct page *page, unsigned int page_offset,
unsigned int length);
bool ceph_addr_is_blank(const struct ceph_entity_addr *addr);
int ceph_addr_port(const struct ceph_entity_addr *addr);
void ceph_addr_set_port(struct ceph_entity_addr *addr, int p);
void ceph_con_process_message(struct ceph_connection *con);
int ceph_con_in_msg_alloc(struct ceph_connection *con,
struct ceph_msg_header *hdr, int *skip);
void ceph_con_get_out_msg(struct ceph_connection *con);
/* messenger_v1.c */
int ceph_con_v1_try_read(struct ceph_connection *con);
int ceph_con_v1_try_write(struct ceph_connection *con);
void ceph_con_v1_revoke(struct ceph_connection *con);
void ceph_con_v1_revoke_incoming(struct ceph_connection *con);
bool ceph_con_v1_opened(struct ceph_connection *con);
void ceph_con_v1_reset_session(struct ceph_connection *con);
void ceph_con_v1_reset_protocol(struct ceph_connection *con);
/* messenger_v2.c */
int ceph_con_v2_try_read(struct ceph_connection *con);
int ceph_con_v2_try_write(struct ceph_connection *con);
void ceph_con_v2_revoke(struct ceph_connection *con);
void ceph_con_v2_revoke_incoming(struct ceph_connection *con);
bool ceph_con_v2_opened(struct ceph_connection *con);
void ceph_con_v2_reset_session(struct ceph_connection *con);
void ceph_con_v2_reset_protocol(struct ceph_connection *con);
extern const char *ceph_pr_addr(const struct ceph_entity_addr *addr);
@ -330,7 +534,6 @@ extern int ceph_parse_ips(const char *c, const char *end,
struct ceph_entity_addr *addr,
int max_count, int *count);
extern int ceph_msgr_init(void);
extern void ceph_msgr_exit(void);
extern void ceph_msgr_flush(void);

View File

@ -8,24 +8,45 @@
#define CEPH_MON_PORT 6789 /* default monitor port */
/*
* client-side processes will try to bind to ports in this
* range, simply for the benefit of tools like nmap or wireshark
* that would like to identify the protocol.
*/
#define CEPH_PORT_FIRST 6789
#define CEPH_PORT_START 6800 /* non-monitors start here */
#define CEPH_PORT_LAST 6900
/*
* tcp connection banner. include a protocol version. and adjust
* whenever the wire protocol changes. try to keep this string length
* constant.
*/
#define CEPH_BANNER "ceph v027"
#define CEPH_BANNER_LEN 9
#define CEPH_BANNER_MAX_LEN 30
/*
* messenger V2 connection banner prefix.
* The full banner string should have the form: "ceph v2\n<le16>"
* the 2 bytes are the length of the remaining banner.
*/
#define CEPH_BANNER_V2 "ceph v2\n"
#define CEPH_BANNER_V2_LEN 8
#define CEPH_BANNER_V2_PREFIX_LEN (CEPH_BANNER_V2_LEN + sizeof(__le16))
/*
* messenger V2 features
*/
#define CEPH_MSGR2_INCARNATION_1 (0ull)
#define DEFINE_MSGR2_FEATURE(bit, incarnation, name) \
static const uint64_t CEPH_MSGR2_FEATURE_##name = (1ULL << bit); \
static const uint64_t CEPH_MSGR2_FEATUREMASK_##name = \
(1ULL << bit | CEPH_MSGR2_INCARNATION_##incarnation);
#define HAVE_MSGR2_FEATURE(x, name) \
(((x) & (CEPH_MSGR2_FEATUREMASK_##name)) == (CEPH_MSGR2_FEATUREMASK_##name))
DEFINE_MSGR2_FEATURE( 0, 1, REVISION_1) // msgr2.1
#define CEPH_MSGR2_SUPPORTED_FEATURES (CEPH_MSGR2_FEATURE_REVISION_1)
#define CEPH_MSGR2_REQUIRED_FEATURES (CEPH_MSGR2_FEATURE_REVISION_1)
/*
* Rollover-safe type and comparator for 32-bit sequence numbers.
* Comparator returns -1, 0, or 1.
@ -61,11 +82,18 @@ extern const char *ceph_entity_type_name(int type);
* entity_addr -- network address
*/
struct ceph_entity_addr {
__le32 type;
__le32 type; /* CEPH_ENTITY_ADDR_TYPE_* */
__le32 nonce; /* unique id for process (e.g. pid) */
struct sockaddr_storage in_addr;
} __attribute__ ((packed));
static inline bool ceph_addr_equal_no_type(const struct ceph_entity_addr *lhs,
const struct ceph_entity_addr *rhs)
{
return !memcmp(&lhs->in_addr, &rhs->in_addr, sizeof(lhs->in_addr)) &&
lhs->nonce == rhs->nonce;
}
struct ceph_entity_inst {
struct ceph_entity_name name;
struct ceph_entity_addr addr;
@ -160,6 +188,24 @@ struct ceph_msg_header {
__le32 crc; /* header crc32c */
} __attribute__ ((packed));
struct ceph_msg_header2 {
__le64 seq; /* message seq# for this session */
__le64 tid; /* transaction id */
__le16 type; /* message type */
__le16 priority; /* priority. higher value == higher priority */
__le16 version; /* version of message encoding */
__le32 data_pre_padding_len;
__le16 data_off; /* sender: include full offset;
receiver: mask against ~PAGE_MASK */
__le64 ack_seq;
__u8 flags;
/* oldest code we think can decode this. unknown if zero. */
__le16 compat_version;
__le16 reserved;
} __attribute__ ((packed));
#define CEPH_MSG_PRIO_LOW 64
#define CEPH_MSG_PRIO_DEFAULT 127
#define CEPH_MSG_PRIO_HIGH 196

View File

@ -251,8 +251,8 @@ static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid)
}
struct ceph_osdmap *ceph_osdmap_alloc(void);
extern struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end);
struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end, bool msgr2);
struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, bool msgr2,
struct ceph_osdmap *map);
extern void ceph_osdmap_destroy(struct ceph_osdmap *map);

View File

@ -5,6 +5,9 @@ config CEPH_LIB
select LIBCRC32C
select CRYPTO_AES
select CRYPTO_CBC
select CRYPTO_GCM
select CRYPTO_HMAC
select CRYPTO_SHA256
select CRYPTO
select KEYS
default n

View File

@ -14,4 +14,5 @@ libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
crypto.o armor.o \
auth_x.o \
ceph_strings.o ceph_hash.o \
pagevec.o snapshot.o string_table.o
pagevec.o snapshot.o string_table.o \
messenger_v1.o messenger_v2.o

View File

@ -21,28 +21,31 @@ static u32 supported_protocols[] = {
CEPH_AUTH_CEPHX
};
static int ceph_auth_init_protocol(struct ceph_auth_client *ac, int protocol)
static int init_protocol(struct ceph_auth_client *ac, int proto)
{
switch (protocol) {
dout("%s proto %d\n", __func__, proto);
switch (proto) {
case CEPH_AUTH_NONE:
return ceph_auth_none_init(ac);
case CEPH_AUTH_CEPHX:
return ceph_x_init(ac);
default:
return -ENOENT;
pr_err("bad auth protocol %d\n", proto);
return -EINVAL;
}
}
/*
* setup, teardown.
*/
struct ceph_auth_client *ceph_auth_init(const char *name, const struct ceph_crypto_key *key)
struct ceph_auth_client *ceph_auth_init(const char *name,
const struct ceph_crypto_key *key,
const int *con_modes)
{
struct ceph_auth_client *ac;
int ret;
dout("auth_init name '%s'\n", name);
ret = -ENOMEM;
ac = kzalloc(sizeof(*ac), GFP_NOFS);
if (!ac)
@ -54,8 +57,12 @@ struct ceph_auth_client *ceph_auth_init(const char *name, const struct ceph_cryp
ac->name = name;
else
ac->name = CEPH_AUTH_NAME_DEFAULT;
dout("auth_init name %s\n", ac->name);
ac->key = key;
ac->preferred_mode = con_modes[0];
ac->fallback_mode = con_modes[1];
dout("%s name '%s' preferred_mode %d fallback_mode %d\n", __func__,
ac->name, ac->preferred_mode, ac->fallback_mode);
return ac;
out:
@ -145,31 +152,35 @@ bad:
goto out;
}
static int ceph_build_auth_request(struct ceph_auth_client *ac,
void *msg_buf, size_t msg_len)
static int build_request(struct ceph_auth_client *ac, bool add_header,
void *buf, int buf_len)
{
struct ceph_mon_request_header *monhdr = msg_buf;
void *p = monhdr + 1;
void *end = msg_buf + msg_len;
void *end = buf + buf_len;
void *p;
int ret;
monhdr->have_version = 0;
monhdr->session_mon = cpu_to_le16(-1);
monhdr->session_mon_tid = 0;
ceph_encode_32(&p, ac->protocol);
p = buf;
if (add_header) {
/* struct ceph_mon_request_header + protocol */
ceph_encode_64_safe(&p, end, 0, e_range);
ceph_encode_16_safe(&p, end, -1, e_range);
ceph_encode_64_safe(&p, end, 0, e_range);
ceph_encode_32_safe(&p, end, ac->protocol, e_range);
}
ceph_encode_need(&p, end, sizeof(u32), e_range);
ret = ac->ops->build_request(ac, p + sizeof(u32), end);
if (ret < 0) {
pr_err("error %d building auth method %s request\n", ret,
ac->ops->name);
goto out;
pr_err("auth protocol '%s' building request failed: %d\n",
ceph_auth_proto_name(ac->protocol), ret);
return ret;
}
dout(" built request %d bytes\n", ret);
ceph_encode_32(&p, ret);
ret = p + ret - msg_buf;
out:
return ret;
return p + ret - buf;
e_range:
return -ERANGE;
}
/*
@ -229,10 +240,10 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac,
ac->ops = NULL;
}
if (ac->protocol != protocol) {
ret = ceph_auth_init_protocol(ac, protocol);
ret = init_protocol(ac, protocol);
if (ret) {
pr_err("error %d on auth protocol %d init\n",
ret, protocol);
pr_err("auth protocol '%s' init failed: %d\n",
ceph_auth_proto_name(protocol), ret);
goto out;
}
}
@ -240,12 +251,13 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac,
ac->negotiating = false;
}
ret = ac->ops->handle_reply(ac, result, payload, payload_end);
if (ret == -EAGAIN) {
ret = ceph_build_auth_request(ac, reply_buf, reply_len);
} else if (ret) {
pr_err("auth method '%s' error %d\n", ac->ops->name, ret);
}
ret = ac->ops->handle_reply(ac, result, payload, payload_end,
NULL, NULL, NULL, NULL);
if (ret == -EAGAIN)
ret = build_request(ac, true, reply_buf, reply_len);
else if (ret)
pr_err("auth protocol '%s' mauth authentication failed: %d\n",
ceph_auth_proto_name(ac->protocol), result);
out:
mutex_unlock(&ac->mutex);
@ -264,7 +276,7 @@ int ceph_build_auth(struct ceph_auth_client *ac,
mutex_lock(&ac->mutex);
if (ac->ops->should_authenticate(ac))
ret = ceph_build_auth_request(ac, msg_buf, msg_len);
ret = build_request(ac, true, msg_buf, msg_len);
mutex_unlock(&ac->mutex);
return ret;
}
@ -281,19 +293,38 @@ int ceph_auth_is_authenticated(struct ceph_auth_client *ac)
}
EXPORT_SYMBOL(ceph_auth_is_authenticated);
int ceph_auth_create_authorizer(struct ceph_auth_client *ac,
int peer_type,
struct ceph_auth_handshake *auth)
int __ceph_auth_get_authorizer(struct ceph_auth_client *ac,
struct ceph_auth_handshake *auth,
int peer_type, bool force_new,
int *proto, int *pref_mode, int *fallb_mode)
{
int ret = 0;
int ret;
mutex_lock(&ac->mutex);
if (ac->ops && ac->ops->create_authorizer)
if (force_new && auth->authorizer) {
ceph_auth_destroy_authorizer(auth->authorizer);
auth->authorizer = NULL;
}
if (!auth->authorizer)
ret = ac->ops->create_authorizer(ac, peer_type, auth);
else if (ac->ops->update_authorizer)
ret = ac->ops->update_authorizer(ac, peer_type, auth);
else
ret = 0;
if (ret)
goto out;
*proto = ac->protocol;
if (pref_mode && fallb_mode) {
*pref_mode = ac->preferred_mode;
*fallb_mode = ac->fallback_mode;
}
out:
mutex_unlock(&ac->mutex);
return ret;
}
EXPORT_SYMBOL(ceph_auth_create_authorizer);
EXPORT_SYMBOL(__ceph_auth_get_authorizer);
void ceph_auth_destroy_authorizer(struct ceph_authorizer *a)
{
@ -301,20 +332,6 @@ void ceph_auth_destroy_authorizer(struct ceph_authorizer *a)
}
EXPORT_SYMBOL(ceph_auth_destroy_authorizer);
int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
int peer_type,
struct ceph_auth_handshake *a)
{
int ret = 0;
mutex_lock(&ac->mutex);
if (ac->ops && ac->ops->update_authorizer)
ret = ac->ops->update_authorizer(ac, peer_type, a);
mutex_unlock(&ac->mutex);
return ret;
}
EXPORT_SYMBOL(ceph_auth_update_authorizer);
int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac,
struct ceph_authorizer *a,
void *challenge_buf,
@ -332,13 +349,18 @@ int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac,
EXPORT_SYMBOL(ceph_auth_add_authorizer_challenge);
int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
struct ceph_authorizer *a)
struct ceph_authorizer *a,
void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len)
{
int ret = 0;
mutex_lock(&ac->mutex);
if (ac->ops && ac->ops->verify_authorizer_reply)
ret = ac->ops->verify_authorizer_reply(ac, a);
ret = ac->ops->verify_authorizer_reply(ac, a,
reply, reply_len, session_key, session_key_len,
con_secret, con_secret_len);
mutex_unlock(&ac->mutex);
return ret;
}
@ -352,3 +374,279 @@ void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac, int peer_type)
mutex_unlock(&ac->mutex);
}
EXPORT_SYMBOL(ceph_auth_invalidate_authorizer);
/*
* msgr2 authentication
*/
static bool contains(const int *arr, int cnt, int val)
{
int i;
for (i = 0; i < cnt; i++) {
if (arr[i] == val)
return true;
}
return false;
}
static int encode_con_modes(void **p, void *end, int pref_mode, int fallb_mode)
{
WARN_ON(pref_mode == CEPH_CON_MODE_UNKNOWN);
if (fallb_mode != CEPH_CON_MODE_UNKNOWN) {
ceph_encode_32_safe(p, end, 2, e_range);
ceph_encode_32_safe(p, end, pref_mode, e_range);
ceph_encode_32_safe(p, end, fallb_mode, e_range);
} else {
ceph_encode_32_safe(p, end, 1, e_range);
ceph_encode_32_safe(p, end, pref_mode, e_range);
}
return 0;
e_range:
return -ERANGE;
}
/*
* Similar to ceph_auth_build_hello().
*/
int ceph_auth_get_request(struct ceph_auth_client *ac, void *buf, int buf_len)
{
int proto = ac->key ? CEPH_AUTH_CEPHX : CEPH_AUTH_NONE;
void *end = buf + buf_len;
void *lenp;
void *p;
int ret;
mutex_lock(&ac->mutex);
if (ac->protocol == CEPH_AUTH_UNKNOWN) {
ret = init_protocol(ac, proto);
if (ret) {
pr_err("auth protocol '%s' init failed: %d\n",
ceph_auth_proto_name(proto), ret);
goto out;
}
} else {
WARN_ON(ac->protocol != proto);
ac->ops->reset(ac);
}
p = buf;
ceph_encode_32_safe(&p, end, ac->protocol, e_range);
ret = encode_con_modes(&p, end, ac->preferred_mode, ac->fallback_mode);
if (ret)
goto out;
lenp = p;
p += 4; /* space for len */
ceph_encode_8_safe(&p, end, CEPH_AUTH_MODE_MON, e_range);
ret = ceph_auth_entity_name_encode(ac->name, &p, end);
if (ret)
goto out;
ceph_encode_64_safe(&p, end, ac->global_id, e_range);
ceph_encode_32(&lenp, p - lenp - 4);
ret = p - buf;
out:
mutex_unlock(&ac->mutex);
return ret;
e_range:
ret = -ERANGE;
goto out;
}
int ceph_auth_handle_reply_more(struct ceph_auth_client *ac, void *reply,
int reply_len, void *buf, int buf_len)
{
int ret;
mutex_lock(&ac->mutex);
ret = ac->ops->handle_reply(ac, 0, reply, reply + reply_len,
NULL, NULL, NULL, NULL);
if (ret == -EAGAIN)
ret = build_request(ac, false, buf, buf_len);
else
WARN_ON(ret >= 0);
mutex_unlock(&ac->mutex);
return ret;
}
int ceph_auth_handle_reply_done(struct ceph_auth_client *ac,
u64 global_id, void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len)
{
int ret;
mutex_lock(&ac->mutex);
if (global_id && ac->global_id != global_id) {
dout("%s global_id %llu -> %llu\n", __func__, ac->global_id,
global_id);
ac->global_id = global_id;
}
ret = ac->ops->handle_reply(ac, 0, reply, reply + reply_len,
session_key, session_key_len,
con_secret, con_secret_len);
mutex_unlock(&ac->mutex);
return ret;
}
bool ceph_auth_handle_bad_method(struct ceph_auth_client *ac,
int used_proto, int result,
const int *allowed_protos, int proto_cnt,
const int *allowed_modes, int mode_cnt)
{
mutex_lock(&ac->mutex);
WARN_ON(used_proto != ac->protocol);
if (result == -EOPNOTSUPP) {
if (!contains(allowed_protos, proto_cnt, ac->protocol)) {
pr_err("auth protocol '%s' not allowed\n",
ceph_auth_proto_name(ac->protocol));
goto not_allowed;
}
if (!contains(allowed_modes, mode_cnt, ac->preferred_mode) &&
(ac->fallback_mode == CEPH_CON_MODE_UNKNOWN ||
!contains(allowed_modes, mode_cnt, ac->fallback_mode))) {
pr_err("preferred mode '%s' not allowed\n",
ceph_con_mode_name(ac->preferred_mode));
if (ac->fallback_mode == CEPH_CON_MODE_UNKNOWN)
pr_err("no fallback mode\n");
else
pr_err("fallback mode '%s' not allowed\n",
ceph_con_mode_name(ac->fallback_mode));
goto not_allowed;
}
}
WARN_ON(result == -EOPNOTSUPP || result >= 0);
pr_err("auth protocol '%s' msgr authentication failed: %d\n",
ceph_auth_proto_name(ac->protocol), result);
mutex_unlock(&ac->mutex);
return true;
not_allowed:
mutex_unlock(&ac->mutex);
return false;
}
int ceph_auth_get_authorizer(struct ceph_auth_client *ac,
struct ceph_auth_handshake *auth,
int peer_type, void *buf, int *buf_len)
{
void *end = buf + *buf_len;
int pref_mode, fallb_mode;
int proto;
void *p;
int ret;
ret = __ceph_auth_get_authorizer(ac, auth, peer_type, true, &proto,
&pref_mode, &fallb_mode);
if (ret)
return ret;
p = buf;
ceph_encode_32_safe(&p, end, proto, e_range);
ret = encode_con_modes(&p, end, pref_mode, fallb_mode);
if (ret)
return ret;
ceph_encode_32_safe(&p, end, auth->authorizer_buf_len, e_range);
*buf_len = p - buf;
return 0;
e_range:
return -ERANGE;
}
EXPORT_SYMBOL(ceph_auth_get_authorizer);
int ceph_auth_handle_svc_reply_more(struct ceph_auth_client *ac,
struct ceph_auth_handshake *auth,
void *reply, int reply_len,
void *buf, int *buf_len)
{
void *end = buf + *buf_len;
void *p;
int ret;
ret = ceph_auth_add_authorizer_challenge(ac, auth->authorizer,
reply, reply_len);
if (ret)
return ret;
p = buf;
ceph_encode_32_safe(&p, end, auth->authorizer_buf_len, e_range);
*buf_len = p - buf;
return 0;
e_range:
return -ERANGE;
}
EXPORT_SYMBOL(ceph_auth_handle_svc_reply_more);
int ceph_auth_handle_svc_reply_done(struct ceph_auth_client *ac,
struct ceph_auth_handshake *auth,
void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len)
{
return ceph_auth_verify_authorizer_reply(ac, auth->authorizer,
reply, reply_len, session_key, session_key_len,
con_secret, con_secret_len);
}
EXPORT_SYMBOL(ceph_auth_handle_svc_reply_done);
bool ceph_auth_handle_bad_authorizer(struct ceph_auth_client *ac,
int peer_type, int used_proto, int result,
const int *allowed_protos, int proto_cnt,
const int *allowed_modes, int mode_cnt)
{
mutex_lock(&ac->mutex);
WARN_ON(used_proto != ac->protocol);
if (result == -EOPNOTSUPP) {
if (!contains(allowed_protos, proto_cnt, ac->protocol)) {
pr_err("auth protocol '%s' not allowed by %s\n",
ceph_auth_proto_name(ac->protocol),
ceph_entity_type_name(peer_type));
goto not_allowed;
}
if (!contains(allowed_modes, mode_cnt, ac->preferred_mode) &&
(ac->fallback_mode == CEPH_CON_MODE_UNKNOWN ||
!contains(allowed_modes, mode_cnt, ac->fallback_mode))) {
pr_err("preferred mode '%s' not allowed by %s\n",
ceph_con_mode_name(ac->preferred_mode),
ceph_entity_type_name(peer_type));
if (ac->fallback_mode == CEPH_CON_MODE_UNKNOWN)
pr_err("no fallback mode\n");
else
pr_err("fallback mode '%s' not allowed by %s\n",
ceph_con_mode_name(ac->fallback_mode),
ceph_entity_type_name(peer_type));
goto not_allowed;
}
}
WARN_ON(result == -EOPNOTSUPP || result >= 0);
pr_err("auth protocol '%s' authorization to %s failed: %d\n",
ceph_auth_proto_name(ac->protocol),
ceph_entity_type_name(peer_type), result);
if (ac->ops->invalidate_authorizer)
ac->ops->invalidate_authorizer(ac, peer_type);
mutex_unlock(&ac->mutex);
return true;
not_allowed:
mutex_unlock(&ac->mutex);
return false;
}
EXPORT_SYMBOL(ceph_auth_handle_bad_authorizer);

View File

@ -70,7 +70,9 @@ static int build_request(struct ceph_auth_client *ac, void *buf, void *end)
* authenticate state, so nothing happens here.
*/
static int handle_reply(struct ceph_auth_client *ac, int result,
void *buf, void *end)
void *buf, void *end, u8 *session_key,
int *session_key_len, u8 *con_secret,
int *con_secret_len)
{
struct ceph_auth_none_info *xi = ac->private;
@ -116,7 +118,6 @@ static int ceph_auth_none_create_authorizer(
}
static const struct ceph_auth_client_ops ceph_auth_none_ops = {
.name = "none",
.reset = reset,
.destroy = destroy,
.is_authenticated = is_authenticated,

View File

@ -22,12 +22,15 @@ static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed);
static int ceph_x_is_authenticated(struct ceph_auth_client *ac)
{
struct ceph_x_info *xi = ac->private;
int need;
int missing;
int need; /* missing + need renewal */
ceph_x_validate_tickets(ac, &need);
dout("ceph_x_is_authenticated want=%d need=%d have=%d\n",
ac->want_keys, need, xi->have_keys);
return (ac->want_keys & xi->have_keys) == ac->want_keys;
missing = ac->want_keys & ~xi->have_keys;
WARN_ON((need & missing) != missing);
dout("%s want 0x%x have 0x%x missing 0x%x -> %d\n", __func__,
ac->want_keys, xi->have_keys, missing, !missing);
return !missing;
}
static int ceph_x_should_authenticate(struct ceph_auth_client *ac)
@ -36,9 +39,9 @@ static int ceph_x_should_authenticate(struct ceph_auth_client *ac)
int need;
ceph_x_validate_tickets(ac, &need);
dout("ceph_x_should_authenticate want=%d need=%d have=%d\n",
ac->want_keys, need, xi->have_keys);
return need != 0;
dout("%s want 0x%x have 0x%x need 0x%x -> %d\n", __func__,
ac->want_keys, xi->have_keys, need, !!need);
return !!need;
}
static int ceph_x_encrypt_offset(void)
@ -197,7 +200,7 @@ static int process_one_ticket(struct ceph_auth_client *ac,
dout(" decrypted %d bytes\n", ret);
dend = dp + ret;
tkt_struct_v = ceph_decode_8(&dp);
ceph_decode_8_safe(&dp, dend, tkt_struct_v, bad);
if (tkt_struct_v != 1)
goto bad;
@ -205,6 +208,7 @@ static int process_one_ticket(struct ceph_auth_client *ac,
if (ret)
goto out;
ceph_decode_need(&dp, dend, sizeof(struct ceph_timespec), bad);
ceph_decode_timespec64(&validity, dp);
dp += sizeof(struct ceph_timespec);
new_expires = ktime_get_real_seconds() + validity.tv_sec;
@ -265,22 +269,21 @@ out:
static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
struct ceph_crypto_key *secret,
void *buf, void *end)
void **p, void *end)
{
void *p = buf;
u8 reply_struct_v;
u32 num;
int ret;
ceph_decode_8_safe(&p, end, reply_struct_v, bad);
ceph_decode_8_safe(p, end, reply_struct_v, bad);
if (reply_struct_v != 1)
return -EINVAL;
ceph_decode_32_safe(&p, end, num, bad);
ceph_decode_32_safe(p, end, num, bad);
dout("%d tickets\n", num);
while (num--) {
ret = process_one_ticket(ac, secret, &p, end);
ret = process_one_ticket(ac, secret, p, end);
if (ret)
return ret;
}
@ -379,6 +382,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
}
}
au->service = th->service;
WARN_ON(!th->secret_id);
au->secret_id = th->secret_id;
msg_a = au->buf->vec.iov_base;
@ -442,9 +446,10 @@ static bool need_key(struct ceph_x_ticket_handler *th)
static bool have_key(struct ceph_x_ticket_handler *th)
{
if (th->have_key) {
if (ktime_get_real_seconds() >= th->expires)
th->have_key = false;
if (th->have_key && ktime_get_real_seconds() >= th->expires) {
dout("ticket %d (%s) secret_id %llu expired\n", th->service,
ceph_entity_type_name(th->service), th->secret_id);
th->have_key = false;
}
return th->have_key;
@ -486,6 +491,7 @@ static int ceph_x_build_request(struct ceph_auth_client *ac,
struct ceph_x_info *xi = ac->private;
int need;
struct ceph_x_request_header *head = buf;
void *p;
int ret;
struct ceph_x_ticket_handler *th =
get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH);
@ -494,18 +500,17 @@ static int ceph_x_build_request(struct ceph_auth_client *ac,
return PTR_ERR(th);
ceph_x_validate_tickets(ac, &need);
dout("build_request want %x have %x need %x\n",
ac->want_keys, xi->have_keys, need);
dout("%s want 0x%x have 0x%x need 0x%x\n", __func__, ac->want_keys,
xi->have_keys, need);
if (need & CEPH_ENTITY_TYPE_AUTH) {
struct ceph_x_authenticate *auth = (void *)(head + 1);
void *p = auth + 1;
void *enc_buf = xi->auth_authorizer.enc_buf;
struct ceph_x_challenge_blob *blob = enc_buf +
ceph_x_encrypt_offset();
u64 *u;
p = auth + 1;
if (p > end)
return -ERANGE;
@ -521,7 +526,7 @@ static int ceph_x_build_request(struct ceph_auth_client *ac,
if (ret < 0)
return ret;
auth->struct_v = 1;
auth->struct_v = 2; /* nautilus+ */
auth->key = 0;
for (u = (u64 *)enc_buf; u + 1 <= (u64 *)(enc_buf + ret); u++)
auth->key ^= *(__le64 *)u;
@ -534,39 +539,117 @@ static int ceph_x_build_request(struct ceph_auth_client *ac,
if (ret < 0)
return ret;
/* nautilus+: request service tickets at the same time */
need = ac->want_keys & ~CEPH_ENTITY_TYPE_AUTH;
WARN_ON(!need);
ceph_encode_32_safe(&p, end, need, e_range);
return p - buf;
}
if (need) {
void *p = head + 1;
struct ceph_x_service_ticket_request *req;
if (p > end)
return -ERANGE;
head->op = cpu_to_le16(CEPHX_GET_PRINCIPAL_SESSION_KEY);
dout(" get_principal_session_key\n");
ret = ceph_x_build_authorizer(ac, th, &xi->auth_authorizer);
if (ret)
return ret;
ceph_encode_copy(&p, xi->auth_authorizer.buf->vec.iov_base,
xi->auth_authorizer.buf->vec.iov_len);
req = p;
req->keys = cpu_to_le32(need);
p += sizeof(*req);
p = buf;
ceph_encode_16_safe(&p, end, CEPHX_GET_PRINCIPAL_SESSION_KEY,
e_range);
ceph_encode_copy_safe(&p, end,
xi->auth_authorizer.buf->vec.iov_base,
xi->auth_authorizer.buf->vec.iov_len, e_range);
ceph_encode_8_safe(&p, end, 1, e_range);
ceph_encode_32_safe(&p, end, need, e_range);
return p - buf;
}
return 0;
e_range:
return -ERANGE;
}
static int handle_auth_session_key(struct ceph_auth_client *ac,
void **p, void *end,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len)
{
struct ceph_x_info *xi = ac->private;
struct ceph_x_ticket_handler *th;
void *dp, *dend;
int len;
int ret;
/* AUTH ticket */
ret = ceph_x_proc_ticket_reply(ac, &xi->secret, p, end);
if (ret)
return ret;
if (*p == end) {
/* pre-nautilus (or didn't request service tickets!) */
WARN_ON(session_key || con_secret);
return 0;
}
th = get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH);
if (IS_ERR(th))
return PTR_ERR(th);
if (session_key) {
memcpy(session_key, th->session_key.key, th->session_key.len);
*session_key_len = th->session_key.len;
}
/* connection secret */
ceph_decode_32_safe(p, end, len, e_inval);
dout("%s connection secret blob len %d\n", __func__, len);
if (len > 0) {
dp = *p + ceph_x_encrypt_offset();
ret = ceph_x_decrypt(&th->session_key, p, *p + len);
if (ret < 0)
return ret;
dout("%s decrypted %d bytes\n", __func__, ret);
dend = dp + ret;
ceph_decode_32_safe(&dp, dend, len, e_inval);
if (len > CEPH_MAX_CON_SECRET_LEN) {
pr_err("connection secret too big %d\n", len);
return -EINVAL;
}
dout("%s connection secret len %d\n", __func__, len);
if (con_secret) {
memcpy(con_secret, dp, len);
*con_secret_len = len;
}
}
/* service tickets */
ceph_decode_32_safe(p, end, len, e_inval);
dout("%s service tickets blob len %d\n", __func__, len);
if (len > 0) {
ret = ceph_x_proc_ticket_reply(ac, &th->session_key,
p, *p + len);
if (ret)
return ret;
}
return 0;
e_inval:
return -EINVAL;
}
static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result,
void *buf, void *end)
void *buf, void *end,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len)
{
struct ceph_x_info *xi = ac->private;
struct ceph_x_reply_header *head = buf;
struct ceph_x_ticket_handler *th;
int len = end - buf;
void *p;
int op;
int ret;
@ -587,22 +670,25 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result,
return -EAGAIN;
}
op = le16_to_cpu(head->op);
result = le32_to_cpu(head->result);
p = buf;
ceph_decode_16_safe(&p, end, op, e_inval);
ceph_decode_32_safe(&p, end, result, e_inval);
dout("handle_reply op %d result %d\n", op, result);
switch (op) {
case CEPHX_GET_AUTH_SESSION_KEY:
/* verify auth key */
ret = ceph_x_proc_ticket_reply(ac, &xi->secret,
buf + sizeof(*head), end);
/* AUTH ticket + [connection secret] + service tickets */
ret = handle_auth_session_key(ac, &p, end, session_key,
session_key_len, con_secret,
con_secret_len);
break;
case CEPHX_GET_PRINCIPAL_SESSION_KEY:
th = get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH);
if (IS_ERR(th))
return PTR_ERR(th);
ret = ceph_x_proc_ticket_reply(ac, &th->session_key,
buf + sizeof(*head), end);
/* service tickets */
ret = ceph_x_proc_ticket_reply(ac, &th->session_key, &p, end);
break;
default:
@ -613,6 +699,9 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result,
if (ac->want_keys == xi->have_keys)
return 0;
return -EAGAIN;
e_inval:
return -EINVAL;
}
static void ceph_x_destroy_authorizer(struct ceph_authorizer *a)
@ -678,40 +767,44 @@ static int ceph_x_update_authorizer(
return 0;
}
static int decrypt_authorize_challenge(struct ceph_x_authorizer *au,
void *challenge_buf,
int challenge_buf_len,
u64 *server_challenge)
/*
* CephXAuthorizeChallenge
*/
static int decrypt_authorizer_challenge(struct ceph_crypto_key *secret,
void *challenge, int challenge_len,
u64 *server_challenge)
{
struct ceph_x_authorize_challenge *ch =
challenge_buf + sizeof(struct ceph_x_encrypt_header);
void *dp, *dend;
int ret;
/* no leading len */
ret = __ceph_x_decrypt(&au->session_key, challenge_buf,
challenge_buf_len);
ret = __ceph_x_decrypt(secret, challenge, challenge_len);
if (ret < 0)
return ret;
if (ret < sizeof(*ch)) {
pr_err("bad size %d for ceph_x_authorize_challenge\n", ret);
return -EINVAL;
}
*server_challenge = le64_to_cpu(ch->server_challenge);
dout("%s decrypted %d bytes\n", __func__, ret);
dp = challenge + sizeof(struct ceph_x_encrypt_header);
dend = dp + ret;
ceph_decode_skip_8(&dp, dend, e_inval); /* struct_v */
ceph_decode_64_safe(&dp, dend, *server_challenge, e_inval);
dout("%s server_challenge %llu\n", __func__, *server_challenge);
return 0;
e_inval:
return -EINVAL;
}
static int ceph_x_add_authorizer_challenge(struct ceph_auth_client *ac,
struct ceph_authorizer *a,
void *challenge_buf,
int challenge_buf_len)
void *challenge, int challenge_len)
{
struct ceph_x_authorizer *au = (void *)a;
u64 server_challenge;
int ret;
ret = decrypt_authorize_challenge(au, challenge_buf, challenge_buf_len,
&server_challenge);
ret = decrypt_authorizer_challenge(&au->session_key, challenge,
challenge_len, &server_challenge);
if (ret) {
pr_err("failed to decrypt authorize challenge: %d", ret);
return ret;
@ -726,29 +819,76 @@ static int ceph_x_add_authorizer_challenge(struct ceph_auth_client *ac,
return 0;
}
static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
struct ceph_authorizer *a)
/*
* CephXAuthorizeReply
*/
static int decrypt_authorizer_reply(struct ceph_crypto_key *secret,
void **p, void *end, u64 *nonce_plus_one,
u8 *con_secret, int *con_secret_len)
{
struct ceph_x_authorizer *au = (void *)a;
void *p = au->enc_buf;
struct ceph_x_authorize_reply *reply = p + ceph_x_encrypt_offset();
void *dp, *dend;
u8 struct_v;
int len;
int ret;
ret = ceph_x_decrypt(&au->session_key, &p, p + CEPHX_AU_ENC_BUF_LEN);
dp = *p + ceph_x_encrypt_offset();
ret = ceph_x_decrypt(secret, p, end);
if (ret < 0)
return ret;
if (ret < sizeof(*reply)) {
pr_err("bad size %d for ceph_x_authorize_reply\n", ret);
return -EINVAL;
dout("%s decrypted %d bytes\n", __func__, ret);
dend = dp + ret;
ceph_decode_8_safe(&dp, dend, struct_v, e_inval);
ceph_decode_64_safe(&dp, dend, *nonce_plus_one, e_inval);
dout("%s nonce_plus_one %llu\n", __func__, *nonce_plus_one);
if (struct_v >= 2) {
ceph_decode_32_safe(&dp, dend, len, e_inval);
if (len > CEPH_MAX_CON_SECRET_LEN) {
pr_err("connection secret too big %d\n", len);
return -EINVAL;
}
dout("%s connection secret len %d\n", __func__, len);
if (con_secret) {
memcpy(con_secret, dp, len);
*con_secret_len = len;
}
}
if (au->nonce + 1 != le64_to_cpu(reply->nonce_plus_one))
ret = -EPERM;
else
ret = 0;
dout("verify_authorizer_reply nonce %llx got %llx ret %d\n",
au->nonce, le64_to_cpu(reply->nonce_plus_one), ret);
return ret;
return 0;
e_inval:
return -EINVAL;
}
static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
struct ceph_authorizer *a,
void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len)
{
struct ceph_x_authorizer *au = (void *)a;
u64 nonce_plus_one;
int ret;
if (session_key) {
memcpy(session_key, au->session_key.key, au->session_key.len);
*session_key_len = au->session_key.len;
}
ret = decrypt_authorizer_reply(&au->session_key, &reply,
reply + reply_len, &nonce_plus_one,
con_secret, con_secret_len);
if (ret)
return ret;
if (nonce_plus_one != au->nonce + 1) {
pr_err("failed to authenticate server\n");
return -EPERM;
}
return 0;
}
static void ceph_x_reset(struct ceph_auth_client *ac)
@ -785,8 +925,15 @@ static void invalidate_ticket(struct ceph_auth_client *ac, int peer_type)
struct ceph_x_ticket_handler *th;
th = get_ticket_handler(ac, peer_type);
if (!IS_ERR(th))
if (IS_ERR(th))
return;
if (th->have_key) {
dout("ticket %d (%s) secret_id %llu invalidated\n",
th->service, ceph_entity_type_name(th->service),
th->secret_id);
th->have_key = false;
}
}
static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
@ -911,7 +1058,6 @@ static int ceph_x_check_message_signature(struct ceph_auth_handshake *auth,
}
static const struct ceph_auth_client_ops ceph_x_ops = {
.name = "x",
.is_authenticated = ceph_x_is_authenticated,
.should_authenticate = ceph_x_should_authenticate,
.build_request = ceph_x_build_request,

View File

@ -38,7 +38,8 @@ struct ceph_x_authenticate {
__u8 struct_v;
__le64 client_challenge;
__le64 key;
/* ticket blob */
/* old_ticket blob */
/* nautilus+: other_keys */
} __attribute__ ((packed));
struct ceph_x_service_ticket_request {

View File

@ -265,6 +265,7 @@ enum {
Opt_ip,
Opt_crush_location,
Opt_read_from_replica,
Opt_ms_mode,
/* string args above */
Opt_share,
Opt_crc,
@ -287,6 +288,23 @@ static const struct constant_table ceph_param_read_from_replica[] = {
{}
};
enum ceph_ms_mode {
Opt_ms_mode_legacy,
Opt_ms_mode_crc,
Opt_ms_mode_secure,
Opt_ms_mode_prefer_crc,
Opt_ms_mode_prefer_secure
};
static const struct constant_table ceph_param_ms_mode[] = {
{"legacy", Opt_ms_mode_legacy},
{"crc", Opt_ms_mode_crc},
{"secure", Opt_ms_mode_secure},
{"prefer-crc", Opt_ms_mode_prefer_crc},
{"prefer-secure", Opt_ms_mode_prefer_secure},
{}
};
static const struct fs_parameter_spec ceph_parameters[] = {
fsparam_flag ("abort_on_full", Opt_abort_on_full),
fsparam_flag_no ("cephx_require_signatures", Opt_cephx_require_signatures),
@ -305,6 +323,8 @@ static const struct fs_parameter_spec ceph_parameters[] = {
fs_param_deprecated, NULL),
fsparam_enum ("read_from_replica", Opt_read_from_replica,
ceph_param_read_from_replica),
fsparam_enum ("ms_mode", Opt_ms_mode,
ceph_param_ms_mode),
fsparam_string ("secret", Opt_secret),
fsparam_flag_no ("share", Opt_share),
fsparam_flag_no ("tcp_nodelay", Opt_tcp_nodelay),
@ -333,6 +353,8 @@ struct ceph_options *ceph_alloc_options(void)
opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT;
opt->read_from_replica = CEPH_READ_FROM_REPLICA_DEFAULT;
opt->con_modes[0] = CEPH_CON_MODE_UNKNOWN;
opt->con_modes[1] = CEPH_CON_MODE_UNKNOWN;
return opt;
}
EXPORT_SYMBOL(ceph_alloc_options);
@ -503,6 +525,32 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
BUG();
}
break;
case Opt_ms_mode:
switch (result.uint_32) {
case Opt_ms_mode_legacy:
opt->con_modes[0] = CEPH_CON_MODE_UNKNOWN;
opt->con_modes[1] = CEPH_CON_MODE_UNKNOWN;
break;
case Opt_ms_mode_crc:
opt->con_modes[0] = CEPH_CON_MODE_CRC;
opt->con_modes[1] = CEPH_CON_MODE_UNKNOWN;
break;
case Opt_ms_mode_secure:
opt->con_modes[0] = CEPH_CON_MODE_SECURE;
opt->con_modes[1] = CEPH_CON_MODE_UNKNOWN;
break;
case Opt_ms_mode_prefer_crc:
opt->con_modes[0] = CEPH_CON_MODE_CRC;
opt->con_modes[1] = CEPH_CON_MODE_SECURE;
break;
case Opt_ms_mode_prefer_secure:
opt->con_modes[0] = CEPH_CON_MODE_SECURE;
opt->con_modes[1] = CEPH_CON_MODE_CRC;
break;
default:
BUG();
}
break;
case Opt_osdtimeout:
warn_plog(&log, "Ignoring osdtimeout");
@ -616,6 +664,21 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client,
} else if (opt->read_from_replica == CEPH_OSD_FLAG_LOCALIZE_READS) {
seq_puts(m, "read_from_replica=localize,");
}
if (opt->con_modes[0] != CEPH_CON_MODE_UNKNOWN) {
if (opt->con_modes[0] == CEPH_CON_MODE_CRC &&
opt->con_modes[1] == CEPH_CON_MODE_UNKNOWN) {
seq_puts(m, "ms_mode=crc,");
} else if (opt->con_modes[0] == CEPH_CON_MODE_SECURE &&
opt->con_modes[1] == CEPH_CON_MODE_UNKNOWN) {
seq_puts(m, "ms_mode=secure,");
} else if (opt->con_modes[0] == CEPH_CON_MODE_CRC &&
opt->con_modes[1] == CEPH_CON_MODE_SECURE) {
seq_puts(m, "ms_mode=prefer-crc,");
} else if (opt->con_modes[0] == CEPH_CON_MODE_SECURE &&
opt->con_modes[1] == CEPH_CON_MODE_CRC) {
seq_puts(m, "ms_mode=prefer-secure,");
}
}
if (opt->flags & CEPH_OPT_FSID)
seq_printf(m, "fsid=%pU,", &opt->fsid);

View File

@ -18,6 +18,34 @@ const char *ceph_entity_type_name(int type)
}
EXPORT_SYMBOL(ceph_entity_type_name);
const char *ceph_auth_proto_name(int proto)
{
switch (proto) {
case CEPH_AUTH_UNKNOWN:
return "unknown";
case CEPH_AUTH_NONE:
return "none";
case CEPH_AUTH_CEPHX:
return "cephx";
default:
return "???";
}
}
const char *ceph_con_mode_name(int mode)
{
switch (mode) {
case CEPH_CON_MODE_UNKNOWN:
return "unknown";
case CEPH_CON_MODE_CRC:
return "crc";
case CEPH_CON_MODE_SECURE:
return "secure";
default:
return "???";
}
}
const char *ceph_osd_op_name(int op)
{
switch (op) {

View File

@ -5,6 +5,9 @@
#include <linux/ceph/types.h>
#include <linux/ceph/buffer.h>
#define CEPH_KEY_LEN 16
#define CEPH_MAX_CON_SECRET_LEN 64
/*
* cryptographic secret
*/

View File

@ -1,4 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
#include <linux/inet.h>
#include <linux/ceph/decode.h>
@ -82,3 +85,101 @@ bad:
}
EXPORT_SYMBOL(ceph_decode_entity_addr);
/*
* Return addr of desired type (MSGR2 or LEGACY) or error.
* Make sure there is only one match.
*
* Assume encoding with MSG_ADDR2.
*/
int ceph_decode_entity_addrvec(void **p, void *end, bool msgr2,
struct ceph_entity_addr *addr)
{
__le32 my_type = msgr2 ? CEPH_ENTITY_ADDR_TYPE_MSGR2 :
CEPH_ENTITY_ADDR_TYPE_LEGACY;
struct ceph_entity_addr tmp_addr;
int addr_cnt;
bool found;
u8 marker;
int ret;
int i;
ceph_decode_8_safe(p, end, marker, e_inval);
if (marker != 2) {
pr_err("bad addrvec marker %d\n", marker);
return -EINVAL;
}
ceph_decode_32_safe(p, end, addr_cnt, e_inval);
found = false;
for (i = 0; i < addr_cnt; i++) {
ret = ceph_decode_entity_addr(p, end, &tmp_addr);
if (ret)
return ret;
if (tmp_addr.type == my_type) {
if (found) {
pr_err("another match of type %d in addrvec\n",
le32_to_cpu(my_type));
return -EINVAL;
}
memcpy(addr, &tmp_addr, sizeof(*addr));
found = true;
}
}
if (!found && addr_cnt != 0) {
pr_err("no match of type %d in addrvec\n",
le32_to_cpu(my_type));
return -ENOENT;
}
return 0;
e_inval:
return -EINVAL;
}
EXPORT_SYMBOL(ceph_decode_entity_addrvec);
static int get_sockaddr_encoding_len(sa_family_t family)
{
union {
struct sockaddr sa;
struct sockaddr_in sin;
struct sockaddr_in6 sin6;
} u;
switch (family) {
case AF_INET:
return sizeof(u.sin);
case AF_INET6:
return sizeof(u.sin6);
default:
return sizeof(u);
}
}
int ceph_entity_addr_encoding_len(const struct ceph_entity_addr *addr)
{
sa_family_t family = get_unaligned(&addr->in_addr.ss_family);
int addr_len = get_sockaddr_encoding_len(family);
return 1 + CEPH_ENCODING_START_BLK_LEN + 4 + 4 + 4 + addr_len;
}
void ceph_encode_entity_addr(void **p, const struct ceph_entity_addr *addr)
{
sa_family_t family = get_unaligned(&addr->in_addr.ss_family);
int addr_len = get_sockaddr_encoding_len(family);
ceph_encode_8(p, 1); /* marker */
ceph_start_encoding(p, 1, 1, sizeof(addr->type) +
sizeof(addr->nonce) +
sizeof(u32) + addr_len);
ceph_encode_copy(p, &addr->type, sizeof(addr->type));
ceph_encode_copy(p, &addr->nonce, sizeof(addr->nonce));
ceph_encode_32(p, addr_len);
ceph_encode_16(p, family);
ceph_encode_copy(p, addr->in_addr.__data, addr_len - sizeof(family));
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -36,57 +36,122 @@ static const struct ceph_connection_operations mon_con_ops;
static int __validate_auth(struct ceph_mon_client *monc);
static int decode_mon_info(void **p, void *end, bool msgr2,
struct ceph_entity_addr *addr)
{
void *mon_info_end;
u32 struct_len;
u8 struct_v;
int ret;
ret = ceph_start_decoding(p, end, 1, "mon_info_t", &struct_v,
&struct_len);
if (ret)
return ret;
mon_info_end = *p + struct_len;
ceph_decode_skip_string(p, end, e_inval); /* skip mon name */
ret = ceph_decode_entity_addrvec(p, end, msgr2, addr);
if (ret)
return ret;
*p = mon_info_end;
return 0;
e_inval:
return -EINVAL;
}
/*
* Decode a monmap blob (e.g., during mount).
*
* Assume MonMap v3 (i.e. encoding with MONNAMES and MONENC).
*/
static struct ceph_monmap *ceph_monmap_decode(void *p, void *end)
static struct ceph_monmap *ceph_monmap_decode(void **p, void *end, bool msgr2)
{
struct ceph_monmap *m = NULL;
int i, err = -EINVAL;
struct ceph_monmap *monmap = NULL;
struct ceph_fsid fsid;
u32 epoch, num_mon;
u32 len;
u32 struct_len;
int blob_len;
int num_mon;
u8 struct_v;
u32 epoch;
int ret;
int i;
ceph_decode_32_safe(&p, end, len, bad);
ceph_decode_need(&p, end, len, bad);
ceph_decode_32_safe(p, end, blob_len, e_inval);
ceph_decode_need(p, end, blob_len, e_inval);
dout("monmap_decode %p %p len %d (%d)\n", p, end, len, (int)(end-p));
p += sizeof(u16); /* skip version */
ret = ceph_start_decoding(p, end, 6, "monmap", &struct_v, &struct_len);
if (ret)
goto fail;
ceph_decode_need(&p, end, sizeof(fsid) + 2*sizeof(u32), bad);
ceph_decode_copy(&p, &fsid, sizeof(fsid));
epoch = ceph_decode_32(&p);
dout("%s struct_v %d\n", __func__, struct_v);
ceph_decode_copy_safe(p, end, &fsid, sizeof(fsid), e_inval);
ceph_decode_32_safe(p, end, epoch, e_inval);
if (struct_v >= 6) {
u32 feat_struct_len;
u8 feat_struct_v;
num_mon = ceph_decode_32(&p);
*p += sizeof(struct ceph_timespec); /* skip last_changed */
*p += sizeof(struct ceph_timespec); /* skip created */
if (num_mon > CEPH_MAX_MON)
goto bad;
m = kmalloc(struct_size(m, mon_inst, num_mon), GFP_NOFS);
if (m == NULL)
return ERR_PTR(-ENOMEM);
m->fsid = fsid;
m->epoch = epoch;
m->num_mon = num_mon;
for (i = 0; i < num_mon; ++i) {
struct ceph_entity_inst *inst = &m->mon_inst[i];
ret = ceph_start_decoding(p, end, 1, "mon_feature_t",
&feat_struct_v, &feat_struct_len);
if (ret)
goto fail;
/* copy name portion */
ceph_decode_copy_safe(&p, end, &inst->name,
sizeof(inst->name), bad);
err = ceph_decode_entity_addr(&p, end, &inst->addr);
if (err)
goto bad;
*p += feat_struct_len; /* skip persistent_features */
ret = ceph_start_decoding(p, end, 1, "mon_feature_t",
&feat_struct_v, &feat_struct_len);
if (ret)
goto fail;
*p += feat_struct_len; /* skip optional_features */
}
dout("monmap_decode epoch %d, num_mon %d\n", m->epoch,
m->num_mon);
for (i = 0; i < m->num_mon; i++)
dout("monmap_decode mon%d is %s\n", i,
ceph_pr_addr(&m->mon_inst[i].addr));
return m;
bad:
dout("monmap_decode failed with %d\n", err);
kfree(m);
return ERR_PTR(err);
ceph_decode_32_safe(p, end, num_mon, e_inval);
dout("%s fsid %pU epoch %u num_mon %d\n", __func__, &fsid, epoch,
num_mon);
if (num_mon > CEPH_MAX_MON)
goto e_inval;
monmap = kmalloc(struct_size(monmap, mon_inst, num_mon), GFP_NOIO);
if (!monmap) {
ret = -ENOMEM;
goto fail;
}
monmap->fsid = fsid;
monmap->epoch = epoch;
monmap->num_mon = num_mon;
/* legacy_mon_addr map or mon_info map */
for (i = 0; i < num_mon; i++) {
struct ceph_entity_inst *inst = &monmap->mon_inst[i];
ceph_decode_skip_string(p, end, e_inval); /* skip mon name */
inst->name.type = CEPH_ENTITY_TYPE_MON;
inst->name.num = cpu_to_le64(i);
if (struct_v >= 6)
ret = decode_mon_info(p, end, msgr2, &inst->addr);
else
ret = ceph_decode_entity_addr(p, end, &inst->addr);
if (ret)
goto fail;
dout("%s mon%d addr %s\n", __func__, i,
ceph_pr_addr(&inst->addr));
}
return monmap;
e_inval:
ret = -EINVAL;
fail:
kfree(monmap);
return ERR_PTR(ret);
}
/*
@ -96,9 +161,11 @@ int ceph_monmap_contains(struct ceph_monmap *m, struct ceph_entity_addr *addr)
{
int i;
for (i = 0; i < m->num_mon; i++)
if (memcmp(addr, &m->mon_inst[i].addr, sizeof(*addr)) == 0)
for (i = 0; i < m->num_mon; i++) {
if (ceph_addr_equal_no_type(addr, &m->mon_inst[i].addr))
return 1;
}
return 0;
}
@ -190,10 +257,16 @@ static void __open_session(struct ceph_mon_client *monc)
&monc->monmap->mon_inst[monc->cur_mon].addr);
/*
* send an initial keepalive to ensure our timestamp is valid
* by the time we are in an OPENED state
* Queue a keepalive to ensure that in case of an early fault
* the messenger doesn't put us into STANDBY state and instead
* retries. This also ensures that our timestamp is valid by
* the time we finish hunting and delayed_work() checks it.
*/
ceph_con_keepalive(&monc->con);
if (ceph_msgr2(monc->client)) {
monc->pending_auth = 1;
return;
}
/* initiate authentication handshake */
ret = ceph_auth_build_hello(monc->auth,
@ -476,7 +549,7 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc,
p = msg->front.iov_base;
end = p + msg->front.iov_len;
monmap = ceph_monmap_decode(p, end);
monmap = ceph_monmap_decode(&p, end, ceph_msgr2(client));
if (IS_ERR(monmap)) {
pr_err("problem decoding monmap, %d\n",
(int)PTR_ERR(monmap));
@ -1052,8 +1125,9 @@ static void delayed_work(struct work_struct *work)
*/
static int build_initial_monmap(struct ceph_mon_client *monc)
{
__le32 my_type = ceph_msgr2(monc->client) ?
CEPH_ENTITY_ADDR_TYPE_MSGR2 : CEPH_ENTITY_ADDR_TYPE_LEGACY;
struct ceph_options *opt = monc->client->options;
struct ceph_entity_addr *mon_addr = opt->mon_addr;
int num_mon = opt->num_mon;
int i;
@ -1062,12 +1136,16 @@ static int build_initial_monmap(struct ceph_mon_client *monc)
GFP_KERNEL);
if (!monc->monmap)
return -ENOMEM;
for (i = 0; i < num_mon; i++) {
monc->monmap->mon_inst[i].addr = mon_addr[i];
monc->monmap->mon_inst[i].addr.nonce = 0;
monc->monmap->mon_inst[i].name.type =
CEPH_ENTITY_TYPE_MON;
monc->monmap->mon_inst[i].name.num = cpu_to_le64(i);
struct ceph_entity_inst *inst = &monc->monmap->mon_inst[i];
memcpy(&inst->addr.in_addr, &opt->mon_addr[i].in_addr,
sizeof(inst->addr.in_addr));
inst->addr.type = my_type;
inst->addr.nonce = 0;
inst->name.type = CEPH_ENTITY_TYPE_MON;
inst->name.num = cpu_to_le64(i);
}
monc->monmap->num_mon = num_mon;
return 0;
@ -1089,8 +1167,8 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
/* connection */
/* authentication */
monc->auth = ceph_auth_init(cl->options->name,
cl->options->key);
monc->auth = ceph_auth_init(cl->options->name, cl->options->key,
cl->options->con_modes);
if (IS_ERR(monc->auth)) {
err = PTR_ERR(monc->auth);
goto out_monmap;
@ -1194,30 +1272,22 @@ static void finish_hunting(struct ceph_mon_client *monc)
}
}
static void handle_auth_reply(struct ceph_mon_client *monc,
struct ceph_msg *msg)
static void finish_auth(struct ceph_mon_client *monc, int auth_err,
bool was_authed)
{
int ret;
int was_auth = 0;
dout("%s auth_err %d was_authed %d\n", __func__, auth_err, was_authed);
WARN_ON(auth_err > 0);
mutex_lock(&monc->mutex);
was_auth = ceph_auth_is_authenticated(monc->auth);
monc->pending_auth = 0;
ret = ceph_handle_auth_reply(monc->auth, msg->front.iov_base,
msg->front.iov_len,
monc->m_auth->front.iov_base,
monc->m_auth->front_alloc_len);
if (ret > 0) {
__send_prepared_auth_request(monc, ret);
goto out;
if (auth_err) {
monc->client->auth_err = auth_err;
wake_up_all(&monc->client->auth_wq);
return;
}
finish_hunting(monc);
if (ret < 0) {
monc->client->auth_err = ret;
} else if (!was_auth && ceph_auth_is_authenticated(monc->auth)) {
dout("authenticated, starting session\n");
if (!was_authed && ceph_auth_is_authenticated(monc->auth)) {
dout("%s authenticated, starting session global_id %llu\n",
__func__, monc->auth->global_id);
monc->client->msgr.inst.name.type = CEPH_ENTITY_TYPE_CLIENT;
monc->client->msgr.inst.name.num =
@ -1229,11 +1299,27 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
pr_info("mon%d %s session established\n", monc->cur_mon,
ceph_pr_addr(&monc->con.peer_addr));
}
}
out:
static void handle_auth_reply(struct ceph_mon_client *monc,
struct ceph_msg *msg)
{
bool was_authed;
int ret;
mutex_lock(&monc->mutex);
was_authed = ceph_auth_is_authenticated(monc->auth);
ret = ceph_handle_auth_reply(monc->auth, msg->front.iov_base,
msg->front.iov_len,
monc->m_auth->front.iov_base,
monc->m_auth->front_alloc_len);
if (ret > 0) {
__send_prepared_auth_request(monc, ret);
} else {
finish_auth(monc, ret, was_authed);
finish_hunting(monc);
}
mutex_unlock(&monc->mutex);
if (monc->client->auth_err < 0)
wake_up_all(&monc->client->auth_wq);
}
static int __validate_auth(struct ceph_mon_client *monc)
@ -1262,6 +1348,88 @@ int ceph_monc_validate_auth(struct ceph_mon_client *monc)
}
EXPORT_SYMBOL(ceph_monc_validate_auth);
static int mon_get_auth_request(struct ceph_connection *con,
void *buf, int *buf_len,
void **authorizer, int *authorizer_len)
{
struct ceph_mon_client *monc = con->private;
int ret;
mutex_lock(&monc->mutex);
ret = ceph_auth_get_request(monc->auth, buf, *buf_len);
mutex_unlock(&monc->mutex);
if (ret < 0)
return ret;
*buf_len = ret;
*authorizer = NULL;
*authorizer_len = 0;
return 0;
}
static int mon_handle_auth_reply_more(struct ceph_connection *con,
void *reply, int reply_len,
void *buf, int *buf_len,
void **authorizer, int *authorizer_len)
{
struct ceph_mon_client *monc = con->private;
int ret;
mutex_lock(&monc->mutex);
ret = ceph_auth_handle_reply_more(monc->auth, reply, reply_len,
buf, *buf_len);
mutex_unlock(&monc->mutex);
if (ret < 0)
return ret;
*buf_len = ret;
*authorizer = NULL;
*authorizer_len = 0;
return 0;
}
static int mon_handle_auth_done(struct ceph_connection *con,
u64 global_id, void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len)
{
struct ceph_mon_client *monc = con->private;
bool was_authed;
int ret;
mutex_lock(&monc->mutex);
WARN_ON(!monc->hunting);
was_authed = ceph_auth_is_authenticated(monc->auth);
ret = ceph_auth_handle_reply_done(monc->auth, global_id,
reply, reply_len,
session_key, session_key_len,
con_secret, con_secret_len);
finish_auth(monc, ret, was_authed);
if (!ret)
finish_hunting(monc);
mutex_unlock(&monc->mutex);
return 0;
}
static int mon_handle_auth_bad_method(struct ceph_connection *con,
int used_proto, int result,
const int *allowed_protos, int proto_cnt,
const int *allowed_modes, int mode_cnt)
{
struct ceph_mon_client *monc = con->private;
bool was_authed;
mutex_lock(&monc->mutex);
WARN_ON(!monc->hunting);
was_authed = ceph_auth_is_authenticated(monc->auth);
ceph_auth_handle_bad_method(monc->auth, used_proto, result,
allowed_protos, proto_cnt,
allowed_modes, mode_cnt);
finish_auth(monc, -EACCES, was_authed);
mutex_unlock(&monc->mutex);
return 0;
}
/*
* handle incoming message
*/
@ -1412,4 +1580,8 @@ static const struct ceph_connection_operations mon_con_ops = {
.dispatch = dispatch,
.fault = mon_fault,
.alloc_msg = mon_alloc_msg,
.get_auth_request = mon_get_auth_request,
.handle_auth_reply_more = mon_handle_auth_reply_more,
.handle_auth_done = mon_handle_auth_done,
.handle_auth_bad_method = mon_handle_auth_bad_method,
};

View File

@ -3918,9 +3918,11 @@ static int handle_one_map(struct ceph_osd_client *osdc,
set_pool_was_full(osdc);
if (incremental)
newmap = osdmap_apply_incremental(&p, end, osdc->osdmap);
newmap = osdmap_apply_incremental(&p, end,
ceph_msgr2(osdc->client),
osdc->osdmap);
else
newmap = ceph_osdmap_decode(&p, end);
newmap = ceph_osdmap_decode(&p, end, ceph_msgr2(osdc->client));
if (IS_ERR(newmap))
return PTR_ERR(newmap);
@ -5575,6 +5577,7 @@ static void put_osd_con(struct ceph_connection *con)
/*
* authentication
*/
/*
* Note: returned pointer is the address of a structure that's
* managed separately. Caller must *not* attempt to free it.
@ -5586,23 +5589,12 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
struct ceph_osd_client *osdc = o->o_osdc;
struct ceph_auth_client *ac = osdc->client->monc.auth;
struct ceph_auth_handshake *auth = &o->o_auth;
int ret;
if (force_new && auth->authorizer) {
ceph_auth_destroy_authorizer(auth->authorizer);
auth->authorizer = NULL;
}
if (!auth->authorizer) {
int ret = ceph_auth_create_authorizer(ac, CEPH_ENTITY_TYPE_OSD,
auth);
if (ret)
return ERR_PTR(ret);
} else {
int ret = ceph_auth_update_authorizer(ac, CEPH_ENTITY_TYPE_OSD,
auth);
if (ret)
return ERR_PTR(ret);
}
*proto = ac->protocol;
ret = __ceph_auth_get_authorizer(ac, auth, CEPH_ENTITY_TYPE_OSD,
force_new, proto, NULL, NULL);
if (ret)
return ERR_PTR(ret);
return auth;
}
@ -5623,8 +5615,11 @@ static int verify_authorizer_reply(struct ceph_connection *con)
struct ceph_osd *o = con->private;
struct ceph_osd_client *osdc = o->o_osdc;
struct ceph_auth_client *ac = osdc->client->monc.auth;
struct ceph_auth_handshake *auth = &o->o_auth;
return ceph_auth_verify_authorizer_reply(ac, o->o_auth.authorizer);
return ceph_auth_verify_authorizer_reply(ac, auth->authorizer,
auth->authorizer_reply_buf, auth->authorizer_reply_buf_len,
NULL, NULL, NULL, NULL);
}
static int invalidate_authorizer(struct ceph_connection *con)
@ -5637,6 +5632,80 @@ static int invalidate_authorizer(struct ceph_connection *con)
return ceph_monc_validate_auth(&osdc->client->monc);
}
static int osd_get_auth_request(struct ceph_connection *con,
void *buf, int *buf_len,
void **authorizer, int *authorizer_len)
{
struct ceph_osd *o = con->private;
struct ceph_auth_client *ac = o->o_osdc->client->monc.auth;
struct ceph_auth_handshake *auth = &o->o_auth;
int ret;
ret = ceph_auth_get_authorizer(ac, auth, CEPH_ENTITY_TYPE_OSD,
buf, buf_len);
if (ret)
return ret;
*authorizer = auth->authorizer_buf;
*authorizer_len = auth->authorizer_buf_len;
return 0;
}
static int osd_handle_auth_reply_more(struct ceph_connection *con,
void *reply, int reply_len,
void *buf, int *buf_len,
void **authorizer, int *authorizer_len)
{
struct ceph_osd *o = con->private;
struct ceph_auth_client *ac = o->o_osdc->client->monc.auth;
struct ceph_auth_handshake *auth = &o->o_auth;
int ret;
ret = ceph_auth_handle_svc_reply_more(ac, auth, reply, reply_len,
buf, buf_len);
if (ret)
return ret;
*authorizer = auth->authorizer_buf;
*authorizer_len = auth->authorizer_buf_len;
return 0;
}
static int osd_handle_auth_done(struct ceph_connection *con,
u64 global_id, void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len)
{
struct ceph_osd *o = con->private;
struct ceph_auth_client *ac = o->o_osdc->client->monc.auth;
struct ceph_auth_handshake *auth = &o->o_auth;
return ceph_auth_handle_svc_reply_done(ac, auth, reply, reply_len,
session_key, session_key_len,
con_secret, con_secret_len);
}
static int osd_handle_auth_bad_method(struct ceph_connection *con,
int used_proto, int result,
const int *allowed_protos, int proto_cnt,
const int *allowed_modes, int mode_cnt)
{
struct ceph_osd *o = con->private;
struct ceph_mon_client *monc = &o->o_osdc->client->monc;
int ret;
if (ceph_auth_handle_bad_authorizer(monc->auth, CEPH_ENTITY_TYPE_OSD,
used_proto, result,
allowed_protos, proto_cnt,
allowed_modes, mode_cnt)) {
ret = ceph_monc_validate_auth(monc);
if (ret)
return ret;
}
return -EACCES;
}
static void osd_reencode_message(struct ceph_msg *msg)
{
int type = le16_to_cpu(msg->hdr.type);
@ -5674,4 +5743,8 @@ static const struct ceph_connection_operations osd_con_ops = {
.sign_message = osd_sign_message,
.check_message_signature = osd_check_message_signature,
.fault = osd_fault,
.get_auth_request = osd_get_auth_request,
.handle_auth_reply_more = osd_handle_auth_reply_more,
.handle_auth_done = osd_handle_auth_done,
.handle_auth_bad_method = osd_handle_auth_bad_method,
};

View File

@ -1647,7 +1647,8 @@ static int decode_old_pg_upmap_items(void **p, void *end,
/*
* decode a full map.
*/
static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
static int osdmap_decode(void **p, void *end, bool msgr2,
struct ceph_osdmap *map)
{
u8 struct_v;
u32 epoch = 0;
@ -1718,9 +1719,16 @@ static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
goto e_inval;
for (i = 0; i < map->max_osd; i++) {
err = ceph_decode_entity_addr(p, end, &map->osd_addr[i]);
struct ceph_entity_addr *addr = &map->osd_addr[i];
if (struct_v >= 8)
err = ceph_decode_entity_addrvec(p, end, msgr2, addr);
else
err = ceph_decode_entity_addr(p, end, addr);
if (err)
goto bad;
dout("%s osd%d addr %s\n", __func__, i, ceph_pr_addr(addr));
}
/* pg_temp */
@ -1790,7 +1798,7 @@ bad:
/*
* Allocate and decode a full map.
*/
struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end)
struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end, bool msgr2)
{
struct ceph_osdmap *map;
int ret;
@ -1799,7 +1807,7 @@ struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end)
if (!map)
return ERR_PTR(-ENOMEM);
ret = osdmap_decode(p, end, map);
ret = osdmap_decode(p, end, msgr2, map);
if (ret) {
ceph_osdmap_destroy(map);
return ERR_PTR(ret);
@ -1817,12 +1825,13 @@ struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end)
* new_state: { osd=6, xorstate=EXISTS } # clear osd_state
*/
static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
struct ceph_osdmap *map)
bool msgr2, struct ceph_osdmap *map)
{
void *new_up_client;
void *new_state;
void *new_weight_end;
u32 len;
int ret;
int i;
new_up_client = *p;
@ -1831,8 +1840,12 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
struct ceph_entity_addr addr;
ceph_decode_skip_32(p, end, e_inval);
if (ceph_decode_entity_addr(p, end, &addr))
goto e_inval;
if (struct_v >= 7)
ret = ceph_decode_entity_addrvec(p, end, msgr2, &addr);
else
ret = ceph_decode_entity_addr(p, end, &addr);
if (ret)
return ret;
}
new_state = *p;
@ -1874,7 +1887,6 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
while (len--) {
s32 osd;
u32 xorstate;
int ret;
osd = ceph_decode_32(p);
if (struct_v >= 5)
@ -1910,8 +1922,15 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
osd = ceph_decode_32(p);
BUG_ON(osd >= map->max_osd);
if (ceph_decode_entity_addr(p, end, &addr))
goto e_inval;
if (struct_v >= 7)
ret = ceph_decode_entity_addrvec(p, end, msgr2, &addr);
else
ret = ceph_decode_entity_addr(p, end, &addr);
if (ret)
return ret;
dout("%s osd%d addr %s\n", __func__, osd, ceph_pr_addr(&addr));
pr_info("osd%d up\n", osd);
map->osd_state[osd] |= CEPH_OSD_EXISTS | CEPH_OSD_UP;
map->osd_addr[osd] = addr;
@ -1927,7 +1946,7 @@ e_inval:
/*
* decode and apply an incremental map update.
*/
struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, bool msgr2,
struct ceph_osdmap *map)
{
struct ceph_fsid fsid;
@ -1962,7 +1981,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
if (len > 0) {
dout("apply_incremental full map len %d, %p to %p\n",
len, *p, end);
return ceph_osdmap_decode(p, min(*p+len, end));
return ceph_osdmap_decode(p, min(*p+len, end), msgr2);
}
/* new crush? */
@ -2014,7 +2033,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
}
/* new_up_client, new_state, new_weight */
err = decode_new_up_state_weight(p, end, struct_v, map);
err = decode_new_up_state_weight(p, end, struct_v, msgr2, map);
if (err)
goto bad;