diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index de174ad6f8bd..70b58b4bcf89 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -38,21 +38,36 @@ * near_copies (stored in low byte of layout) * far_copies (stored in second byte of layout) * far_offset (stored in bit 16 of layout ) + * use_far_sets (stored in bit 17 of layout ) * - * The data to be stored is divided into chunks using chunksize. - * Each device is divided into far_copies sections. - * In each section, chunks are laid out in a style similar to raid0, but - * near_copies copies of each chunk is stored (each on a different drive). - * The starting device for each section is offset near_copies from the starting - * device of the previous section. - * Thus they are (near_copies*far_copies) of each chunk, and each is on a different - * drive. - * near_copies and far_copies must be at least one, and their product is at most - * raid_disks. + * The data to be stored is divided into chunks using chunksize. Each device + * is divided into far_copies sections. In each section, chunks are laid out + * in a style similar to raid0, but near_copies copies of each chunk is stored + * (each on a different drive). The starting device for each section is offset + * near_copies from the starting device of the previous section. Thus there + * are (near_copies * far_copies) of each chunk, and each is on a different + * drive. near_copies and far_copies must be at least one, and their product + * is at most raid_disks. * * If far_offset is true, then the far_copies are handled a bit differently. - * The copies are still in different stripes, but instead of be very far apart - * on disk, there are adjacent stripes. + * The copies are still in different stripes, but instead of being very far + * apart on disk, there are adjacent stripes. + * + * The far and offset algorithms are handled slightly differently if + * 'use_far_sets' is true. In this case, the array's devices are grouped into + * sets that are (near_copies * far_copies) in size. The far copied stripes + * are still shifted by 'near_copies' devices, but this shifting stays confined + * to the set rather than the entire array. This is done to improve the number + * of device combinations that can fail without causing the array to fail. + * Example 'far' algorithm w/o 'use_far_sets' (each letter represents a chunk + * on a device): + * A B C D A B C D E + * ... ... + * D A B C E A B C D + * Example 'far' algorithm w/ 'use_far_sets' enabled (sets illustrated w/ []'s): + * [A B] [C D] [A B] [C D E] + * |...| |...| |...| | ... | + * [B A] [D C] [B A] [E C D] */ /* @@ -551,14 +566,18 @@ static void __raid10_find_phys(struct geom *geo, struct r10bio *r10bio) /* and calculate all the others */ for (n = 0; n < geo->near_copies; n++) { int d = dev; + int set; sector_t s = sector; r10bio->devs[slot].devnum = d; r10bio->devs[slot].addr = s; slot++; for (f = 1; f < geo->far_copies; f++) { + set = d / geo->far_set_size; d += geo->near_copies; - d %= geo->raid_disks; + d %= geo->far_set_size; + d += geo->far_set_size * set; + s += geo->stride; r10bio->devs[slot].devnum = d; r10bio->devs[slot].addr = s; @@ -594,6 +613,8 @@ static sector_t raid10_find_virt(struct r10conf *conf, sector_t sector, int dev) * or recovery, so reshape isn't happening */ struct geom *geo = &conf->geo; + int far_set_start = (dev / geo->far_set_size) * geo->far_set_size; + int far_set_size = geo->far_set_size; offset = sector & geo->chunk_mask; if (geo->far_offset) { @@ -601,13 +622,13 @@ static sector_t raid10_find_virt(struct r10conf *conf, sector_t sector, int dev) chunk = sector >> geo->chunk_shift; fc = sector_div(chunk, geo->far_copies); dev -= fc * geo->near_copies; - if (dev < 0) - dev += geo->raid_disks; + if (dev < far_set_start) + dev += far_set_size; } else { while (sector >= geo->stride) { sector -= geo->stride; - if (dev < geo->near_copies) - dev += geo->raid_disks - geo->near_copies; + if (dev < (geo->near_copies + far_set_start)) + dev += far_set_size - geo->near_copies; else dev -= geo->near_copies; } @@ -3438,7 +3459,7 @@ static int setup_geo(struct geom *geo, struct mddev *mddev, enum geo_type new) disks = mddev->raid_disks + mddev->delta_disks; break; } - if (layout >> 17) + if (layout >> 18) return -1; if (chunk < (PAGE_SIZE >> 9) || !is_power_of_2(chunk)) @@ -3450,6 +3471,7 @@ static int setup_geo(struct geom *geo, struct mddev *mddev, enum geo_type new) geo->near_copies = nc; geo->far_copies = fc; geo->far_offset = fo; + geo->far_set_size = (layout & (1<<17)) ? disks / fc : disks; geo->chunk_mask = chunk - 1; geo->chunk_shift = ffz(~chunk); return nc*fc; diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index 1054cf602345..157d69e83ff4 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h @@ -33,6 +33,11 @@ struct r10conf { * far_offset, in which case it is * 1 stripe. */ + int far_set_size; /* The number of devices in a set, + * where a 'set' are devices that + * contain far/offset copies of + * each other. + */ int chunk_shift; /* shift from chunks to sectors */ sector_t chunk_mask; } prev, geo;