6c1ea260f8
Starting the kernel client with cephx disabled and then enabling cephx and restarting userspace daemons can result in a crash: [262671.478162] BUG: unable to handle kernel paging request at ffffebe000000000 [262671.531460] IP: [<ffffffff811cd04a>] kfree+0x5a/0x130 [262671.584334] PGD 0 [262671.635847] Oops: 0000 [#1] SMP [262672.055841] CPU: 22 PID: 2961272 Comm: kworker/22:2 Not tainted 4.2.0-34-generic #39~14.04.1-Ubuntu [262672.162338] Hardware name: Dell Inc. PowerEdge R720/068CDY, BIOS 2.4.3 07/09/2014 [262672.268937] Workqueue: ceph-msgr con_work [libceph] [262672.322290] task: ffff88081c2d0dc0 ti: ffff880149ae8000 task.ti: ffff880149ae8000 [262672.428330] RIP: 0010:[<ffffffff811cd04a>] [<ffffffff811cd04a>] kfree+0x5a/0x130 [262672.535880] RSP: 0018:ffff880149aeba58 EFLAGS: 00010286 [262672.589486] RAX: 000001e000000000 RBX: 0000000000000012 RCX: ffff8807e7461018 [262672.695980] RDX: 000077ff80000000 RSI: ffff88081af2be04 RDI: 0000000000000012 [262672.803668] RBP: ffff880149aeba78 R08: 0000000000000000 R09: 0000000000000000 [262672.912299] R10: ffffebe000000000 R11: ffff880819a60e78 R12: ffff8800aec8df40 [262673.021769] R13: ffffffffc035f70f R14: ffff8807e5b138e0 R15: ffff880da9785840 [262673.131722] FS: 0000000000000000(0000) GS:ffff88081fac0000(0000) knlGS:0000000000000000 [262673.245377] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [262673.303281] CR2: ffffebe000000000 CR3: 0000000001c0d000 CR4: 00000000001406e0 [262673.417556] Stack: [262673.472943] ffff880149aeba88 ffff88081af2be04 ffff8800aec8df40 ffff88081af2be04 [262673.583767] ffff880149aeba98 ffffffffc035f70f ffff880149aebac8 ffff8800aec8df00 [262673.694546] ffff880149aebac8 ffffffffc035c89e ffff8807e5b138e0 ffff8805b047f800 [262673.805230] Call Trace: [262673.859116] [<ffffffffc035f70f>] ceph_x_destroy_authorizer+0x1f/0x50 [libceph] [262673.968705] [<ffffffffc035c89e>] ceph_auth_destroy_authorizer+0x3e/0x60 [libceph] [262674.078852] [<ffffffffc0352805>] put_osd+0x45/0x80 [libceph] [262674.134249] [<ffffffffc035290e>] remove_osd+0xae/0x140 [libceph] [262674.189124] [<ffffffffc0352aa3>] __reset_osd+0x103/0x150 [libceph] [262674.243749] [<ffffffffc0354703>] kick_requests+0x223/0x460 [libceph] [262674.297485] [<ffffffffc03559e2>] ceph_osdc_handle_map+0x282/0x5e0 [libceph] [262674.350813] [<ffffffffc035022e>] dispatch+0x4e/0x720 [libceph] [262674.403312] [<ffffffffc034bd91>] try_read+0x3d1/0x1090 [libceph] [262674.454712] [<ffffffff810ab7c2>] ? dequeue_entity+0x152/0x690 [262674.505096] [<ffffffffc034cb1b>] con_work+0xcb/0x1300 [libceph] [262674.555104] [<ffffffff8108fb3e>] process_one_work+0x14e/0x3d0 [262674.604072] [<ffffffff810901ea>] worker_thread+0x11a/0x470 [262674.652187] [<ffffffff810900d0>] ? rescuer_thread+0x310/0x310 [262674.699022] [<ffffffff810957a2>] kthread+0xd2/0xf0 [262674.744494] [<ffffffff810956d0>] ? kthread_create_on_node+0x1c0/0x1c0 [262674.789543] [<ffffffff817bd81f>] ret_from_fork+0x3f/0x70 [262674.834094] [<ffffffff810956d0>] ? kthread_create_on_node+0x1c0/0x1c0 What happens is the following: (1) new MON session is established (2) old "none" ac is destroyed (3) new "cephx" ac is constructed ... (4) old OSD session (w/ "none" authorizer) is put ceph_auth_destroy_authorizer(ac, osd->o_auth.authorizer) osd->o_auth.authorizer in the "none" case is just a bare pointer into ac, which contains a single static copy for all services. By the time we get to (4), "none" ac, freed in (2), is long gone. On top of that, a new vtable installed in (3) points us at ceph_x_destroy_authorizer(), so we end up trying to destroy a "none" authorizer with a "cephx" destructor operating on invalid memory! To fix this, decouple authorizer destruction from ac and do away with a single static "none" authorizer by making a copy for each OSD or MDS session. Authorizers themselves are independent of ac and so there is no reason for destroy_authorizer() to be an ac op. Make it an op on the authorizer itself by turning ceph_authorizer into a real struct. Fixes: http://tracker.ceph.com/issues/15447 Reported-by: Alan Zhang <alan.zhang@linux.com> Signed-off-by: Ilya Dryomov <idryomov@gmail.com> Reviewed-by: Sage Weil <sage@redhat.com>
382 lines
11 KiB
C
382 lines
11 KiB
C
#ifndef _FS_CEPH_OSD_CLIENT_H
|
|
#define _FS_CEPH_OSD_CLIENT_H
|
|
|
|
#include <linux/completion.h>
|
|
#include <linux/kref.h>
|
|
#include <linux/mempool.h>
|
|
#include <linux/rbtree.h>
|
|
|
|
#include <linux/ceph/types.h>
|
|
#include <linux/ceph/osdmap.h>
|
|
#include <linux/ceph/messenger.h>
|
|
#include <linux/ceph/auth.h>
|
|
#include <linux/ceph/pagelist.h>
|
|
|
|
struct ceph_msg;
|
|
struct ceph_snap_context;
|
|
struct ceph_osd_request;
|
|
struct ceph_osd_client;
|
|
|
|
/*
|
|
* completion callback for async writepages
|
|
*/
|
|
typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *,
|
|
struct ceph_msg *);
|
|
typedef void (*ceph_osdc_unsafe_callback_t)(struct ceph_osd_request *, bool);
|
|
|
|
/* a given osd we're communicating with */
|
|
struct ceph_osd {
|
|
atomic_t o_ref;
|
|
struct ceph_osd_client *o_osdc;
|
|
int o_osd;
|
|
int o_incarnation;
|
|
struct rb_node o_node;
|
|
struct ceph_connection o_con;
|
|
struct list_head o_requests;
|
|
struct list_head o_linger_requests;
|
|
struct list_head o_osd_lru;
|
|
struct ceph_auth_handshake o_auth;
|
|
unsigned long lru_ttl;
|
|
int o_marked_for_keepalive;
|
|
struct list_head o_keepalive_item;
|
|
};
|
|
|
|
|
|
#define CEPH_OSD_SLAB_OPS 2
|
|
#define CEPH_OSD_MAX_OPS 16
|
|
|
|
enum ceph_osd_data_type {
|
|
CEPH_OSD_DATA_TYPE_NONE = 0,
|
|
CEPH_OSD_DATA_TYPE_PAGES,
|
|
CEPH_OSD_DATA_TYPE_PAGELIST,
|
|
#ifdef CONFIG_BLOCK
|
|
CEPH_OSD_DATA_TYPE_BIO,
|
|
#endif /* CONFIG_BLOCK */
|
|
};
|
|
|
|
struct ceph_osd_data {
|
|
enum ceph_osd_data_type type;
|
|
union {
|
|
struct {
|
|
struct page **pages;
|
|
u64 length;
|
|
u32 alignment;
|
|
bool pages_from_pool;
|
|
bool own_pages;
|
|
};
|
|
struct ceph_pagelist *pagelist;
|
|
#ifdef CONFIG_BLOCK
|
|
struct {
|
|
struct bio *bio; /* list of bios */
|
|
size_t bio_length; /* total in list */
|
|
};
|
|
#endif /* CONFIG_BLOCK */
|
|
};
|
|
};
|
|
|
|
struct ceph_osd_req_op {
|
|
u16 op; /* CEPH_OSD_OP_* */
|
|
u32 flags; /* CEPH_OSD_OP_FLAG_* */
|
|
u32 indata_len; /* request */
|
|
u32 outdata_len; /* reply */
|
|
s32 rval;
|
|
|
|
union {
|
|
struct ceph_osd_data raw_data_in;
|
|
struct {
|
|
u64 offset, length;
|
|
u64 truncate_size;
|
|
u32 truncate_seq;
|
|
struct ceph_osd_data osd_data;
|
|
} extent;
|
|
struct {
|
|
u32 name_len;
|
|
u32 value_len;
|
|
__u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */
|
|
__u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */
|
|
struct ceph_osd_data osd_data;
|
|
} xattr;
|
|
struct {
|
|
const char *class_name;
|
|
const char *method_name;
|
|
struct ceph_osd_data request_info;
|
|
struct ceph_osd_data request_data;
|
|
struct ceph_osd_data response_data;
|
|
__u8 class_len;
|
|
__u8 method_len;
|
|
__u8 argc;
|
|
} cls;
|
|
struct {
|
|
u64 cookie;
|
|
u64 ver;
|
|
u32 prot_ver;
|
|
u32 timeout;
|
|
__u8 flag;
|
|
} watch;
|
|
struct {
|
|
u64 expected_object_size;
|
|
u64 expected_write_size;
|
|
} alloc_hint;
|
|
};
|
|
};
|
|
|
|
/* an in-flight request */
|
|
struct ceph_osd_request {
|
|
u64 r_tid; /* unique for this client */
|
|
struct rb_node r_node;
|
|
struct list_head r_req_lru_item;
|
|
struct list_head r_osd_item;
|
|
struct list_head r_linger_item;
|
|
struct list_head r_linger_osd_item;
|
|
struct ceph_osd *r_osd;
|
|
struct ceph_pg r_pgid;
|
|
int r_pg_osds[CEPH_PG_MAX_SIZE];
|
|
int r_num_pg_osds;
|
|
|
|
struct ceph_msg *r_request, *r_reply;
|
|
int r_flags; /* any additional flags for the osd */
|
|
u32 r_sent; /* >0 if r_request is sending/sent */
|
|
|
|
/* request osd ops array */
|
|
unsigned int r_num_ops;
|
|
|
|
/* these are updated on each send */
|
|
__le32 *r_request_osdmap_epoch;
|
|
__le32 *r_request_flags;
|
|
__le64 *r_request_pool;
|
|
void *r_request_pgid;
|
|
__le32 *r_request_attempts;
|
|
bool r_paused;
|
|
struct ceph_eversion *r_request_reassert_version;
|
|
|
|
int r_result;
|
|
int r_got_reply;
|
|
int r_linger;
|
|
|
|
struct ceph_osd_client *r_osdc;
|
|
struct kref r_kref;
|
|
bool r_mempool;
|
|
struct completion r_completion, r_safe_completion;
|
|
ceph_osdc_callback_t r_callback;
|
|
ceph_osdc_unsafe_callback_t r_unsafe_callback;
|
|
struct ceph_eversion r_reassert_version;
|
|
struct list_head r_unsafe_item;
|
|
|
|
struct inode *r_inode; /* for use by callbacks */
|
|
void *r_priv; /* ditto */
|
|
|
|
struct ceph_object_locator r_base_oloc;
|
|
struct ceph_object_id r_base_oid;
|
|
struct ceph_object_locator r_target_oloc;
|
|
struct ceph_object_id r_target_oid;
|
|
|
|
u64 r_snapid;
|
|
unsigned long r_stamp; /* send OR check time */
|
|
|
|
struct ceph_snap_context *r_snapc; /* snap context for writes */
|
|
|
|
struct ceph_osd_req_op r_ops[];
|
|
};
|
|
|
|
struct ceph_request_redirect {
|
|
struct ceph_object_locator oloc;
|
|
};
|
|
|
|
struct ceph_osd_event {
|
|
u64 cookie;
|
|
int one_shot;
|
|
struct ceph_osd_client *osdc;
|
|
void (*cb)(u64, u64, u8, void *);
|
|
void *data;
|
|
struct rb_node node;
|
|
struct list_head osd_node;
|
|
struct kref kref;
|
|
};
|
|
|
|
struct ceph_osd_event_work {
|
|
struct work_struct work;
|
|
struct ceph_osd_event *event;
|
|
u64 ver;
|
|
u64 notify_id;
|
|
u8 opcode;
|
|
};
|
|
|
|
struct ceph_osd_client {
|
|
struct ceph_client *client;
|
|
|
|
struct ceph_osdmap *osdmap; /* current map */
|
|
struct rw_semaphore map_sem;
|
|
struct completion map_waiters;
|
|
u64 last_requested_map;
|
|
|
|
struct mutex request_mutex;
|
|
struct rb_root osds; /* osds */
|
|
struct list_head osd_lru; /* idle osds */
|
|
u64 timeout_tid; /* tid of timeout triggering rq */
|
|
u64 last_tid; /* tid of last request */
|
|
struct rb_root requests; /* pending requests */
|
|
struct list_head req_lru; /* in-flight lru */
|
|
struct list_head req_unsent; /* unsent/need-resend queue */
|
|
struct list_head req_notarget; /* map to no osd */
|
|
struct list_head req_linger; /* lingering requests */
|
|
int num_requests;
|
|
struct delayed_work timeout_work;
|
|
struct delayed_work osds_timeout_work;
|
|
#ifdef CONFIG_DEBUG_FS
|
|
struct dentry *debugfs_file;
|
|
#endif
|
|
|
|
mempool_t *req_mempool;
|
|
|
|
struct ceph_msgpool msgpool_op;
|
|
struct ceph_msgpool msgpool_op_reply;
|
|
|
|
spinlock_t event_lock;
|
|
struct rb_root event_tree;
|
|
u64 event_count;
|
|
|
|
struct workqueue_struct *notify_wq;
|
|
};
|
|
|
|
extern int ceph_osdc_setup(void);
|
|
extern void ceph_osdc_cleanup(void);
|
|
|
|
extern int ceph_osdc_init(struct ceph_osd_client *osdc,
|
|
struct ceph_client *client);
|
|
extern void ceph_osdc_stop(struct ceph_osd_client *osdc);
|
|
|
|
extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
|
|
struct ceph_msg *msg);
|
|
extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
|
|
struct ceph_msg *msg);
|
|
|
|
extern void osd_req_op_init(struct ceph_osd_request *osd_req,
|
|
unsigned int which, u16 opcode, u32 flags);
|
|
|
|
extern void osd_req_op_raw_data_in_pages(struct ceph_osd_request *,
|
|
unsigned int which,
|
|
struct page **pages, u64 length,
|
|
u32 alignment, bool pages_from_pool,
|
|
bool own_pages);
|
|
|
|
extern void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
|
|
unsigned int which, u16 opcode,
|
|
u64 offset, u64 length,
|
|
u64 truncate_size, u32 truncate_seq);
|
|
extern void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
|
|
unsigned int which, u64 length);
|
|
extern void osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req,
|
|
unsigned int which, u64 offset_inc);
|
|
|
|
extern struct ceph_osd_data *osd_req_op_extent_osd_data(
|
|
struct ceph_osd_request *osd_req,
|
|
unsigned int which);
|
|
extern struct ceph_osd_data *osd_req_op_cls_response_data(
|
|
struct ceph_osd_request *osd_req,
|
|
unsigned int which);
|
|
|
|
extern void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *,
|
|
unsigned int which,
|
|
struct page **pages, u64 length,
|
|
u32 alignment, bool pages_from_pool,
|
|
bool own_pages);
|
|
extern void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *,
|
|
unsigned int which,
|
|
struct ceph_pagelist *pagelist);
|
|
#ifdef CONFIG_BLOCK
|
|
extern void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *,
|
|
unsigned int which,
|
|
struct bio *bio, size_t bio_length);
|
|
#endif /* CONFIG_BLOCK */
|
|
|
|
extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *,
|
|
unsigned int which,
|
|
struct ceph_pagelist *pagelist);
|
|
extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *,
|
|
unsigned int which,
|
|
struct page **pages, u64 length,
|
|
u32 alignment, bool pages_from_pool,
|
|
bool own_pages);
|
|
extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *,
|
|
unsigned int which,
|
|
struct page **pages, u64 length,
|
|
u32 alignment, bool pages_from_pool,
|
|
bool own_pages);
|
|
|
|
extern void osd_req_op_cls_init(struct ceph_osd_request *osd_req,
|
|
unsigned int which, u16 opcode,
|
|
const char *class, const char *method);
|
|
extern int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
|
|
u16 opcode, const char *name, const void *value,
|
|
size_t size, u8 cmp_op, u8 cmp_mode);
|
|
extern void osd_req_op_watch_init(struct ceph_osd_request *osd_req,
|
|
unsigned int which, u16 opcode,
|
|
u64 cookie, u64 version, int flag);
|
|
extern void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req,
|
|
unsigned int which,
|
|
u64 expected_object_size,
|
|
u64 expected_write_size);
|
|
|
|
extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
|
|
struct ceph_snap_context *snapc,
|
|
unsigned int num_ops,
|
|
bool use_mempool,
|
|
gfp_t gfp_flags);
|
|
|
|
extern void ceph_osdc_build_request(struct ceph_osd_request *req, u64 off,
|
|
struct ceph_snap_context *snapc,
|
|
u64 snap_id,
|
|
struct timespec *mtime);
|
|
|
|
extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
|
|
struct ceph_file_layout *layout,
|
|
struct ceph_vino vino,
|
|
u64 offset, u64 *len,
|
|
unsigned int which, int num_ops,
|
|
int opcode, int flags,
|
|
struct ceph_snap_context *snapc,
|
|
u32 truncate_seq, u64 truncate_size,
|
|
bool use_mempool);
|
|
|
|
extern void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc,
|
|
struct ceph_osd_request *req);
|
|
|
|
extern void ceph_osdc_get_request(struct ceph_osd_request *req);
|
|
extern void ceph_osdc_put_request(struct ceph_osd_request *req);
|
|
|
|
extern int ceph_osdc_start_request(struct ceph_osd_client *osdc,
|
|
struct ceph_osd_request *req,
|
|
bool nofail);
|
|
extern void ceph_osdc_cancel_request(struct ceph_osd_request *req);
|
|
extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
|
|
struct ceph_osd_request *req);
|
|
extern void ceph_osdc_sync(struct ceph_osd_client *osdc);
|
|
|
|
extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc);
|
|
|
|
extern int ceph_osdc_readpages(struct ceph_osd_client *osdc,
|
|
struct ceph_vino vino,
|
|
struct ceph_file_layout *layout,
|
|
u64 off, u64 *plen,
|
|
u32 truncate_seq, u64 truncate_size,
|
|
struct page **pages, int nr_pages,
|
|
int page_align);
|
|
|
|
extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
|
|
struct ceph_vino vino,
|
|
struct ceph_file_layout *layout,
|
|
struct ceph_snap_context *sc,
|
|
u64 off, u64 len,
|
|
u32 truncate_seq, u64 truncate_size,
|
|
struct timespec *mtime,
|
|
struct page **pages, int nr_pages);
|
|
|
|
/* watch/notify events */
|
|
extern int ceph_osdc_create_event(struct ceph_osd_client *osdc,
|
|
void (*event_cb)(u64, u64, u8, void *),
|
|
void *data, struct ceph_osd_event **pevent);
|
|
extern void ceph_osdc_cancel_event(struct ceph_osd_event *event);
|
|
extern void ceph_osdc_put_event(struct ceph_osd_event *event);
|
|
#endif
|
|
|