Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull Ceph fixes from Sage Weil: "We have a few wire protocol compatibility fixes, ports of a few recent CRUSH mapping changes, and a couple error path fixes" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: libceph: MOSDOpReply v7 encoding libceph: advertise support for TUNABLES5 crush: decode and initialize chooseleaf_stable crush: add chooseleaf_stable tunable crush: ensure take bucket value is valid crush: ensure bucket id is valid before indexing buckets array ceph: fix snap context leak in error path ceph: checking for IS_ERR instead of NULLhifive-unleashed-5.1
commit
5d6a6a75e0
|
@ -698,8 +698,8 @@ static void ceph_aio_retry_work(struct work_struct *work)
|
||||||
|
|
||||||
req = ceph_osdc_alloc_request(orig_req->r_osdc, snapc, 2,
|
req = ceph_osdc_alloc_request(orig_req->r_osdc, snapc, 2,
|
||||||
false, GFP_NOFS);
|
false, GFP_NOFS);
|
||||||
if (IS_ERR(req)) {
|
if (!req) {
|
||||||
ret = PTR_ERR(req);
|
ret = -ENOMEM;
|
||||||
req = orig_req;
|
req = orig_req;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
@ -716,7 +716,6 @@ static void ceph_aio_retry_work(struct work_struct *work)
|
||||||
ceph_osdc_build_request(req, req->r_ops[0].extent.offset,
|
ceph_osdc_build_request(req, req->r_ops[0].extent.offset,
|
||||||
snapc, CEPH_NOSNAP, &aio_req->mtime);
|
snapc, CEPH_NOSNAP, &aio_req->mtime);
|
||||||
|
|
||||||
ceph_put_snap_context(snapc);
|
|
||||||
ceph_osdc_put_request(orig_req);
|
ceph_osdc_put_request(orig_req);
|
||||||
|
|
||||||
req->r_callback = ceph_aio_complete_req;
|
req->r_callback = ceph_aio_complete_req;
|
||||||
|
@ -731,6 +730,7 @@ out:
|
||||||
ceph_aio_complete_req(req, NULL);
|
ceph_aio_complete_req(req, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ceph_put_snap_context(snapc);
|
||||||
kfree(aio_work);
|
kfree(aio_work);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -63,6 +63,18 @@
|
||||||
#define CEPH_FEATURE_OSD_MIN_SIZE_RECOVERY (1ULL<<49)
|
#define CEPH_FEATURE_OSD_MIN_SIZE_RECOVERY (1ULL<<49)
|
||||||
// duplicated since it was introduced at the same time as MIN_SIZE_RECOVERY
|
// duplicated since it was introduced at the same time as MIN_SIZE_RECOVERY
|
||||||
#define CEPH_FEATURE_OSD_PROXY_FEATURES (1ULL<<49) /* overlap w/ above */
|
#define CEPH_FEATURE_OSD_PROXY_FEATURES (1ULL<<49) /* overlap w/ above */
|
||||||
|
#define CEPH_FEATURE_MON_METADATA (1ULL<<50)
|
||||||
|
#define CEPH_FEATURE_OSD_BITWISE_HOBJ_SORT (1ULL<<51) /* can sort objs bitwise */
|
||||||
|
#define CEPH_FEATURE_OSD_PROXY_WRITE_FEATURES (1ULL<<52)
|
||||||
|
#define CEPH_FEATURE_ERASURE_CODE_PLUGINS_V3 (1ULL<<53)
|
||||||
|
#define CEPH_FEATURE_OSD_HITSET_GMT (1ULL<<54)
|
||||||
|
#define CEPH_FEATURE_HAMMER_0_94_4 (1ULL<<55)
|
||||||
|
#define CEPH_FEATURE_NEW_OSDOP_ENCODING (1ULL<<56) /* New, v7 encoding */
|
||||||
|
#define CEPH_FEATURE_MON_STATEFUL_SUB (1ULL<<57) /* stateful mon subscription */
|
||||||
|
#define CEPH_FEATURE_MON_ROUTE_OSDMAP (1ULL<<57) /* peon sends osdmaps */
|
||||||
|
#define CEPH_FEATURE_CRUSH_TUNABLES5 (1ULL<<58) /* chooseleaf stable mode */
|
||||||
|
// duplicated since it was introduced at the same time as CEPH_FEATURE_CRUSH_TUNABLES5
|
||||||
|
#define CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING (1ULL<<58) /* New, v7 encoding */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature
|
* The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature
|
||||||
|
@ -108,7 +120,9 @@ static inline u64 ceph_sanitize_features(u64 features)
|
||||||
CEPH_FEATURE_CRUSH_TUNABLES3 | \
|
CEPH_FEATURE_CRUSH_TUNABLES3 | \
|
||||||
CEPH_FEATURE_OSD_PRIMARY_AFFINITY | \
|
CEPH_FEATURE_OSD_PRIMARY_AFFINITY | \
|
||||||
CEPH_FEATURE_MSGR_KEEPALIVE2 | \
|
CEPH_FEATURE_MSGR_KEEPALIVE2 | \
|
||||||
CEPH_FEATURE_CRUSH_V4)
|
CEPH_FEATURE_CRUSH_V4 | \
|
||||||
|
CEPH_FEATURE_CRUSH_TUNABLES5 | \
|
||||||
|
CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING)
|
||||||
|
|
||||||
#define CEPH_FEATURES_REQUIRED_DEFAULT \
|
#define CEPH_FEATURES_REQUIRED_DEFAULT \
|
||||||
(CEPH_FEATURE_NOSRCADDR | \
|
(CEPH_FEATURE_NOSRCADDR | \
|
||||||
|
|
|
@ -59,7 +59,8 @@ enum {
|
||||||
CRUSH_RULE_SET_CHOOSELEAF_TRIES = 9, /* override chooseleaf_descend_once */
|
CRUSH_RULE_SET_CHOOSELEAF_TRIES = 9, /* override chooseleaf_descend_once */
|
||||||
CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES = 10,
|
CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES = 10,
|
||||||
CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES = 11,
|
CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES = 11,
|
||||||
CRUSH_RULE_SET_CHOOSELEAF_VARY_R = 12
|
CRUSH_RULE_SET_CHOOSELEAF_VARY_R = 12,
|
||||||
|
CRUSH_RULE_SET_CHOOSELEAF_STABLE = 13
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -205,6 +206,11 @@ struct crush_map {
|
||||||
* mappings line up a bit better with previous mappings. */
|
* mappings line up a bit better with previous mappings. */
|
||||||
__u8 chooseleaf_vary_r;
|
__u8 chooseleaf_vary_r;
|
||||||
|
|
||||||
|
/* if true, it makes chooseleaf firstn to return stable results (if
|
||||||
|
* no local retry) so that data migrations would be optimal when some
|
||||||
|
* device fails. */
|
||||||
|
__u8 chooseleaf_stable;
|
||||||
|
|
||||||
#ifndef __KERNEL__
|
#ifndef __KERNEL__
|
||||||
/*
|
/*
|
||||||
* version 0 (original) of straw_calc has various flaws. version 1
|
* version 0 (original) of straw_calc has various flaws. version 1
|
||||||
|
|
|
@ -403,6 +403,7 @@ static int is_out(const struct crush_map *map,
|
||||||
* @local_retries: localized retries
|
* @local_retries: localized retries
|
||||||
* @local_fallback_retries: localized fallback retries
|
* @local_fallback_retries: localized fallback retries
|
||||||
* @recurse_to_leaf: true if we want one device under each item of given type (chooseleaf instead of choose)
|
* @recurse_to_leaf: true if we want one device under each item of given type (chooseleaf instead of choose)
|
||||||
|
* @stable: stable mode starts rep=0 in the recursive call for all replicas
|
||||||
* @vary_r: pass r to recursive calls
|
* @vary_r: pass r to recursive calls
|
||||||
* @out2: second output vector for leaf items (if @recurse_to_leaf)
|
* @out2: second output vector for leaf items (if @recurse_to_leaf)
|
||||||
* @parent_r: r value passed from the parent
|
* @parent_r: r value passed from the parent
|
||||||
|
@ -419,6 +420,7 @@ static int crush_choose_firstn(const struct crush_map *map,
|
||||||
unsigned int local_fallback_retries,
|
unsigned int local_fallback_retries,
|
||||||
int recurse_to_leaf,
|
int recurse_to_leaf,
|
||||||
unsigned int vary_r,
|
unsigned int vary_r,
|
||||||
|
unsigned int stable,
|
||||||
int *out2,
|
int *out2,
|
||||||
int parent_r)
|
int parent_r)
|
||||||
{
|
{
|
||||||
|
@ -433,13 +435,13 @@ static int crush_choose_firstn(const struct crush_map *map,
|
||||||
int collide, reject;
|
int collide, reject;
|
||||||
int count = out_size;
|
int count = out_size;
|
||||||
|
|
||||||
dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d\n",
|
dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d stable %d\n",
|
||||||
recurse_to_leaf ? "_LEAF" : "",
|
recurse_to_leaf ? "_LEAF" : "",
|
||||||
bucket->id, x, outpos, numrep,
|
bucket->id, x, outpos, numrep,
|
||||||
tries, recurse_tries, local_retries, local_fallback_retries,
|
tries, recurse_tries, local_retries, local_fallback_retries,
|
||||||
parent_r);
|
parent_r, stable);
|
||||||
|
|
||||||
for (rep = outpos; rep < numrep && count > 0 ; rep++) {
|
for (rep = stable ? 0 : outpos; rep < numrep && count > 0 ; rep++) {
|
||||||
/* keep trying until we get a non-out, non-colliding item */
|
/* keep trying until we get a non-out, non-colliding item */
|
||||||
ftotal = 0;
|
ftotal = 0;
|
||||||
skip_rep = 0;
|
skip_rep = 0;
|
||||||
|
@ -512,13 +514,14 @@ static int crush_choose_firstn(const struct crush_map *map,
|
||||||
if (crush_choose_firstn(map,
|
if (crush_choose_firstn(map,
|
||||||
map->buckets[-1-item],
|
map->buckets[-1-item],
|
||||||
weight, weight_max,
|
weight, weight_max,
|
||||||
x, outpos+1, 0,
|
x, stable ? 1 : outpos+1, 0,
|
||||||
out2, outpos, count,
|
out2, outpos, count,
|
||||||
recurse_tries, 0,
|
recurse_tries, 0,
|
||||||
local_retries,
|
local_retries,
|
||||||
local_fallback_retries,
|
local_fallback_retries,
|
||||||
0,
|
0,
|
||||||
vary_r,
|
vary_r,
|
||||||
|
stable,
|
||||||
NULL,
|
NULL,
|
||||||
sub_r) <= outpos)
|
sub_r) <= outpos)
|
||||||
/* didn't get leaf */
|
/* didn't get leaf */
|
||||||
|
@ -816,6 +819,7 @@ int crush_do_rule(const struct crush_map *map,
|
||||||
int choose_local_fallback_retries = map->choose_local_fallback_tries;
|
int choose_local_fallback_retries = map->choose_local_fallback_tries;
|
||||||
|
|
||||||
int vary_r = map->chooseleaf_vary_r;
|
int vary_r = map->chooseleaf_vary_r;
|
||||||
|
int stable = map->chooseleaf_stable;
|
||||||
|
|
||||||
if ((__u32)ruleno >= map->max_rules) {
|
if ((__u32)ruleno >= map->max_rules) {
|
||||||
dprintk(" bad ruleno %d\n", ruleno);
|
dprintk(" bad ruleno %d\n", ruleno);
|
||||||
|
@ -835,7 +839,8 @@ int crush_do_rule(const struct crush_map *map,
|
||||||
case CRUSH_RULE_TAKE:
|
case CRUSH_RULE_TAKE:
|
||||||
if ((curstep->arg1 >= 0 &&
|
if ((curstep->arg1 >= 0 &&
|
||||||
curstep->arg1 < map->max_devices) ||
|
curstep->arg1 < map->max_devices) ||
|
||||||
(-1-curstep->arg1 < map->max_buckets &&
|
(-1-curstep->arg1 >= 0 &&
|
||||||
|
-1-curstep->arg1 < map->max_buckets &&
|
||||||
map->buckets[-1-curstep->arg1])) {
|
map->buckets[-1-curstep->arg1])) {
|
||||||
w[0] = curstep->arg1;
|
w[0] = curstep->arg1;
|
||||||
wsize = 1;
|
wsize = 1;
|
||||||
|
@ -869,6 +874,11 @@ int crush_do_rule(const struct crush_map *map,
|
||||||
vary_r = curstep->arg1;
|
vary_r = curstep->arg1;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case CRUSH_RULE_SET_CHOOSELEAF_STABLE:
|
||||||
|
if (curstep->arg1 >= 0)
|
||||||
|
stable = curstep->arg1;
|
||||||
|
break;
|
||||||
|
|
||||||
case CRUSH_RULE_CHOOSELEAF_FIRSTN:
|
case CRUSH_RULE_CHOOSELEAF_FIRSTN:
|
||||||
case CRUSH_RULE_CHOOSE_FIRSTN:
|
case CRUSH_RULE_CHOOSE_FIRSTN:
|
||||||
firstn = 1;
|
firstn = 1;
|
||||||
|
@ -888,6 +898,7 @@ int crush_do_rule(const struct crush_map *map,
|
||||||
osize = 0;
|
osize = 0;
|
||||||
|
|
||||||
for (i = 0; i < wsize; i++) {
|
for (i = 0; i < wsize; i++) {
|
||||||
|
int bno;
|
||||||
/*
|
/*
|
||||||
* see CRUSH_N, CRUSH_N_MINUS macros.
|
* see CRUSH_N, CRUSH_N_MINUS macros.
|
||||||
* basically, numrep <= 0 means relative to
|
* basically, numrep <= 0 means relative to
|
||||||
|
@ -900,6 +911,13 @@ int crush_do_rule(const struct crush_map *map,
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
j = 0;
|
j = 0;
|
||||||
|
/* make sure bucket id is valid */
|
||||||
|
bno = -1 - w[i];
|
||||||
|
if (bno < 0 || bno >= map->max_buckets) {
|
||||||
|
/* w[i] is probably CRUSH_ITEM_NONE */
|
||||||
|
dprintk(" bad w[i] %d\n", w[i]);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if (firstn) {
|
if (firstn) {
|
||||||
int recurse_tries;
|
int recurse_tries;
|
||||||
if (choose_leaf_tries)
|
if (choose_leaf_tries)
|
||||||
|
@ -911,7 +929,7 @@ int crush_do_rule(const struct crush_map *map,
|
||||||
recurse_tries = choose_tries;
|
recurse_tries = choose_tries;
|
||||||
osize += crush_choose_firstn(
|
osize += crush_choose_firstn(
|
||||||
map,
|
map,
|
||||||
map->buckets[-1-w[i]],
|
map->buckets[bno],
|
||||||
weight, weight_max,
|
weight, weight_max,
|
||||||
x, numrep,
|
x, numrep,
|
||||||
curstep->arg2,
|
curstep->arg2,
|
||||||
|
@ -923,6 +941,7 @@ int crush_do_rule(const struct crush_map *map,
|
||||||
choose_local_fallback_retries,
|
choose_local_fallback_retries,
|
||||||
recurse_to_leaf,
|
recurse_to_leaf,
|
||||||
vary_r,
|
vary_r,
|
||||||
|
stable,
|
||||||
c+osize,
|
c+osize,
|
||||||
0);
|
0);
|
||||||
} else {
|
} else {
|
||||||
|
@ -930,7 +949,7 @@ int crush_do_rule(const struct crush_map *map,
|
||||||
numrep : (result_max-osize));
|
numrep : (result_max-osize));
|
||||||
crush_choose_indep(
|
crush_choose_indep(
|
||||||
map,
|
map,
|
||||||
map->buckets[-1-w[i]],
|
map->buckets[bno],
|
||||||
weight, weight_max,
|
weight, weight_max,
|
||||||
x, out_size, numrep,
|
x, out_size, numrep,
|
||||||
curstep->arg2,
|
curstep->arg2,
|
||||||
|
|
|
@ -1770,6 +1770,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg)
|
||||||
u32 osdmap_epoch;
|
u32 osdmap_epoch;
|
||||||
int already_completed;
|
int already_completed;
|
||||||
u32 bytes;
|
u32 bytes;
|
||||||
|
u8 decode_redir;
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
|
|
||||||
tid = le64_to_cpu(msg->hdr.tid);
|
tid = le64_to_cpu(msg->hdr.tid);
|
||||||
|
@ -1841,6 +1842,15 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg)
|
||||||
p += 8 + 4; /* skip replay_version */
|
p += 8 + 4; /* skip replay_version */
|
||||||
p += 8; /* skip user_version */
|
p += 8; /* skip user_version */
|
||||||
|
|
||||||
|
if (le16_to_cpu(msg->hdr.version) >= 7)
|
||||||
|
ceph_decode_8_safe(&p, end, decode_redir, bad_put);
|
||||||
|
else
|
||||||
|
decode_redir = 1;
|
||||||
|
} else {
|
||||||
|
decode_redir = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (decode_redir) {
|
||||||
err = ceph_redirect_decode(&p, end, &redir);
|
err = ceph_redirect_decode(&p, end, &redir);
|
||||||
if (err)
|
if (err)
|
||||||
goto bad_put;
|
goto bad_put;
|
||||||
|
|
|
@ -342,23 +342,32 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
|
||||||
c->choose_local_tries = ceph_decode_32(p);
|
c->choose_local_tries = ceph_decode_32(p);
|
||||||
c->choose_local_fallback_tries = ceph_decode_32(p);
|
c->choose_local_fallback_tries = ceph_decode_32(p);
|
||||||
c->choose_total_tries = ceph_decode_32(p);
|
c->choose_total_tries = ceph_decode_32(p);
|
||||||
dout("crush decode tunable choose_local_tries = %d",
|
dout("crush decode tunable choose_local_tries = %d\n",
|
||||||
c->choose_local_tries);
|
c->choose_local_tries);
|
||||||
dout("crush decode tunable choose_local_fallback_tries = %d",
|
dout("crush decode tunable choose_local_fallback_tries = %d\n",
|
||||||
c->choose_local_fallback_tries);
|
c->choose_local_fallback_tries);
|
||||||
dout("crush decode tunable choose_total_tries = %d",
|
dout("crush decode tunable choose_total_tries = %d\n",
|
||||||
c->choose_total_tries);
|
c->choose_total_tries);
|
||||||
|
|
||||||
ceph_decode_need(p, end, sizeof(u32), done);
|
ceph_decode_need(p, end, sizeof(u32), done);
|
||||||
c->chooseleaf_descend_once = ceph_decode_32(p);
|
c->chooseleaf_descend_once = ceph_decode_32(p);
|
||||||
dout("crush decode tunable chooseleaf_descend_once = %d",
|
dout("crush decode tunable chooseleaf_descend_once = %d\n",
|
||||||
c->chooseleaf_descend_once);
|
c->chooseleaf_descend_once);
|
||||||
|
|
||||||
ceph_decode_need(p, end, sizeof(u8), done);
|
ceph_decode_need(p, end, sizeof(u8), done);
|
||||||
c->chooseleaf_vary_r = ceph_decode_8(p);
|
c->chooseleaf_vary_r = ceph_decode_8(p);
|
||||||
dout("crush decode tunable chooseleaf_vary_r = %d",
|
dout("crush decode tunable chooseleaf_vary_r = %d\n",
|
||||||
c->chooseleaf_vary_r);
|
c->chooseleaf_vary_r);
|
||||||
|
|
||||||
|
/* skip straw_calc_version, allowed_bucket_algs */
|
||||||
|
ceph_decode_need(p, end, sizeof(u8) + sizeof(u32), done);
|
||||||
|
*p += sizeof(u8) + sizeof(u32);
|
||||||
|
|
||||||
|
ceph_decode_need(p, end, sizeof(u8), done);
|
||||||
|
c->chooseleaf_stable = ceph_decode_8(p);
|
||||||
|
dout("crush decode tunable chooseleaf_stable = %d\n",
|
||||||
|
c->chooseleaf_stable);
|
||||||
|
|
||||||
done:
|
done:
|
||||||
dout("crush_decode success\n");
|
dout("crush_decode success\n");
|
||||||
return c;
|
return c;
|
||||||
|
|
Loading…
Reference in New Issue