A few bugfixes for md

- recent regressions in raid5
  - recent regressions in dmraid
  - a few instances of CONFIG_MULTICORE_RAID456 linger
 
 Several tagged for -stable
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2.0.19 (GNU/Linux)
 
 iQIVAwUAUUzCwDnsnt1WYoG5AQJKMhAAsi2XhqLC4Dx19J8MTF6+cjfynWCxF2SC
 3mMcVZm6yxSowixb1Ht72CyssWdJAi4vgaw0aLNH7b3CbPDZfTSfqLP4tSvyPfod
 aDcFDdd/RhHjDpJqZ52Tyc6QzBfyhwu+s9R+a78TSL47ZMjZpz1QpshG8Sm9JYTs
 z72VlIZeglzhWmzO1FInsL/oT/Hwr9IfpmJpuXBQQObDn3BgvZLuzZyCi35upqrM
 711ei7CKaN0s/jKcWclNRtgUrr10XsgQ6PugOZbli09CC8ushHwvXe/VmxoQFg2+
 Sj14YSfYAY+1QpOiuYc+knrWc7CtPGHgUqBzOoYWMxi9Lqpo5xhD1vkRsFhXxMSg
 GVnAnh/RXl7bGzGWaRv8twG4vU+qYOlEPNgO6/079AxCOrrNrstYrgjBxBSWuxrB
 0UIFQGT69zA5G3cLbIRrXUxO8oIVeEx92YV1TOcgLKP5OXlp/0I8ajnA9b8KoPZa
 He04GdPlZMXTLAqq9MaQRdS0XzX8YQDWbUebqe+w5NW46sLbckkmxaNZs7fOYAfG
 CNHfeRsLp5v0oNbhNyCDSjxqH6uYwKCdCqmDxo6A+fmjmDruHQmZoAK8YISUtPtx
 u4M82jW6Z/xOg4pomxMl4SxzCDhy1pM8PYzyx7Mj82C4XBR8CkrQTP8XD+FQL2Ih
 KhId4tJzx6Q=
 =Rycs
 -----END PGP SIGNATURE-----

Merge tag 'md-3.9-fixes' of git://neil.brown.name/md

Pull md fixes from NeilBrown:
 "A few bugfixes for md

   - recent regressions in raid5
   - recent regressions in dmraid
   - a few instances of CONFIG_MULTICORE_RAID456 linger

  Several tagged for -stable"

* tag 'md-3.9-fixes' of git://neil.brown.name/md:
  md: remove CONFIG_MULTICORE_RAID456 entirely
  md/raid5: ensure sync and DISCARD don't happen at the same time.
  MD: Prevent sysfs operations on uninitialized kobjects
  MD RAID5: Avoid accessing gendisk or queue structs when not available
  md/raid5: schedule_construction should abort if nothing to do.
This commit is contained in:
Linus Torvalds 2013-03-23 15:49:49 -07:00
commit 22c3f2fff6
6 changed files with 86 additions and 47 deletions

View file

@ -330,7 +330,6 @@ CONFIG_MD_RAID0=m
CONFIG_MD_RAID1=m CONFIG_MD_RAID1=m
CONFIG_MD_RAID10=m CONFIG_MD_RAID10=m
CONFIG_MD_RAID456=m CONFIG_MD_RAID456=m
CONFIG_MULTICORE_RAID456=y
CONFIG_MD_FAULTY=m CONFIG_MD_FAULTY=m
CONFIG_BLK_DEV_DM=m CONFIG_BLK_DEV_DM=m
CONFIG_DM_DEBUG=y CONFIG_DM_DEBUG=y

View file

@ -324,7 +324,6 @@ CONFIG_MD_RAID0=m
CONFIG_MD_RAID1=m CONFIG_MD_RAID1=m
CONFIG_MD_RAID10=m CONFIG_MD_RAID10=m
CONFIG_MD_RAID456=m CONFIG_MD_RAID456=m
CONFIG_MULTICORE_RAID456=y
CONFIG_MD_FAULTY=m CONFIG_MD_FAULTY=m
CONFIG_BLK_DEV_DM=m CONFIG_BLK_DEV_DM=m
CONFIG_DM_DEBUG=y CONFIG_DM_DEBUG=y

View file

@ -7663,10 +7663,8 @@ static int remove_and_add_spares(struct mddev *mddev)
removed++; removed++;
} }
} }
if (removed) if (removed && mddev->kobj.sd)
sysfs_notify(&mddev->kobj, NULL, sysfs_notify(&mddev->kobj, NULL, "degraded");
"degraded");
rdev_for_each(rdev, mddev) { rdev_for_each(rdev, mddev) {
if (rdev->raid_disk >= 0 && if (rdev->raid_disk >= 0 &&

View file

@ -506,7 +506,7 @@ static inline char * mdname (struct mddev * mddev)
static inline int sysfs_link_rdev(struct mddev *mddev, struct md_rdev *rdev) static inline int sysfs_link_rdev(struct mddev *mddev, struct md_rdev *rdev)
{ {
char nm[20]; char nm[20];
if (!test_bit(Replacement, &rdev->flags)) { if (!test_bit(Replacement, &rdev->flags) && mddev->kobj.sd) {
sprintf(nm, "rd%d", rdev->raid_disk); sprintf(nm, "rd%d", rdev->raid_disk);
return sysfs_create_link(&mddev->kobj, &rdev->kobj, nm); return sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
} else } else
@ -516,7 +516,7 @@ static inline int sysfs_link_rdev(struct mddev *mddev, struct md_rdev *rdev)
static inline void sysfs_unlink_rdev(struct mddev *mddev, struct md_rdev *rdev) static inline void sysfs_unlink_rdev(struct mddev *mddev, struct md_rdev *rdev)
{ {
char nm[20]; char nm[20];
if (!test_bit(Replacement, &rdev->flags)) { if (!test_bit(Replacement, &rdev->flags) && mddev->kobj.sd) {
sprintf(nm, "rd%d", rdev->raid_disk); sprintf(nm, "rd%d", rdev->raid_disk);
sysfs_remove_link(&mddev->kobj, nm); sysfs_remove_link(&mddev->kobj, nm);
} }

View file

@ -671,9 +671,11 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
bi->bi_next = NULL; bi->bi_next = NULL;
if (rrdev) if (rrdev)
set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags); set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags);
trace_block_bio_remap(bdev_get_queue(bi->bi_bdev),
bi, disk_devt(conf->mddev->gendisk), if (conf->mddev->gendisk)
sh->dev[i].sector); trace_block_bio_remap(bdev_get_queue(bi->bi_bdev),
bi, disk_devt(conf->mddev->gendisk),
sh->dev[i].sector);
generic_make_request(bi); generic_make_request(bi);
} }
if (rrdev) { if (rrdev) {
@ -701,9 +703,10 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
rbi->bi_io_vec[0].bv_offset = 0; rbi->bi_io_vec[0].bv_offset = 0;
rbi->bi_size = STRIPE_SIZE; rbi->bi_size = STRIPE_SIZE;
rbi->bi_next = NULL; rbi->bi_next = NULL;
trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev), if (conf->mddev->gendisk)
rbi, disk_devt(conf->mddev->gendisk), trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev),
sh->dev[i].sector); rbi, disk_devt(conf->mddev->gendisk),
sh->dev[i].sector);
generic_make_request(rbi); generic_make_request(rbi);
} }
if (!rdev && !rrdev) { if (!rdev && !rrdev) {
@ -2280,17 +2283,6 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
int level = conf->level; int level = conf->level;
if (rcw) { if (rcw) {
/* if we are not expanding this is a proper write request, and
* there will be bios with new data to be drained into the
* stripe cache
*/
if (!expand) {
sh->reconstruct_state = reconstruct_state_drain_run;
set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
} else
sh->reconstruct_state = reconstruct_state_run;
set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
for (i = disks; i--; ) { for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i]; struct r5dev *dev = &sh->dev[i];
@ -2303,6 +2295,21 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
s->locked++; s->locked++;
} }
} }
/* if we are not expanding this is a proper write request, and
* there will be bios with new data to be drained into the
* stripe cache
*/
if (!expand) {
if (!s->locked)
/* False alarm, nothing to do */
return;
sh->reconstruct_state = reconstruct_state_drain_run;
set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
} else
sh->reconstruct_state = reconstruct_state_run;
set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
if (s->locked + conf->max_degraded == disks) if (s->locked + conf->max_degraded == disks)
if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state)) if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
atomic_inc(&conf->pending_full_writes); atomic_inc(&conf->pending_full_writes);
@ -2311,11 +2318,6 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) || BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags))); test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
sh->reconstruct_state = reconstruct_state_prexor_drain_run;
set_bit(STRIPE_OP_PREXOR, &s->ops_request);
set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
for (i = disks; i--; ) { for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i]; struct r5dev *dev = &sh->dev[i];
if (i == pd_idx) if (i == pd_idx)
@ -2330,6 +2332,13 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
s->locked++; s->locked++;
} }
} }
if (!s->locked)
/* False alarm - nothing to do */
return;
sh->reconstruct_state = reconstruct_state_prexor_drain_run;
set_bit(STRIPE_OP_PREXOR, &s->ops_request);
set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
} }
/* keep the parity disk(s) locked while asynchronous operations /* keep the parity disk(s) locked while asynchronous operations
@ -2564,6 +2573,8 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
int i; int i;
clear_bit(STRIPE_SYNCING, &sh->state); clear_bit(STRIPE_SYNCING, &sh->state);
if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags))
wake_up(&conf->wait_for_overlap);
s->syncing = 0; s->syncing = 0;
s->replacing = 0; s->replacing = 0;
/* There is nothing more to do for sync/check/repair. /* There is nothing more to do for sync/check/repair.
@ -2737,6 +2748,7 @@ static void handle_stripe_clean_event(struct r5conf *conf,
{ {
int i; int i;
struct r5dev *dev; struct r5dev *dev;
int discard_pending = 0;
for (i = disks; i--; ) for (i = disks; i--; )
if (sh->dev[i].written) { if (sh->dev[i].written) {
@ -2765,9 +2777,23 @@ static void handle_stripe_clean_event(struct r5conf *conf,
STRIPE_SECTORS, STRIPE_SECTORS,
!test_bit(STRIPE_DEGRADED, &sh->state), !test_bit(STRIPE_DEGRADED, &sh->state),
0); 0);
} } else if (test_bit(R5_Discard, &dev->flags))
} else if (test_bit(R5_Discard, &sh->dev[i].flags)) discard_pending = 1;
clear_bit(R5_Discard, &sh->dev[i].flags); }
if (!discard_pending &&
test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) {
clear_bit(R5_Discard, &sh->dev[sh->pd_idx].flags);
clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags);
if (sh->qd_idx >= 0) {
clear_bit(R5_Discard, &sh->dev[sh->qd_idx].flags);
clear_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags);
}
/* now that discard is done we can proceed with any sync */
clear_bit(STRIPE_DISCARD, &sh->state);
if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state))
set_bit(STRIPE_HANDLE, &sh->state);
}
if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state)) if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
if (atomic_dec_and_test(&conf->pending_full_writes)) if (atomic_dec_and_test(&conf->pending_full_writes))
@ -2826,8 +2852,10 @@ static void handle_stripe_dirtying(struct r5conf *conf,
set_bit(STRIPE_HANDLE, &sh->state); set_bit(STRIPE_HANDLE, &sh->state);
if (rmw < rcw && rmw > 0) { if (rmw < rcw && rmw > 0) {
/* prefer read-modify-write, but need to get some data */ /* prefer read-modify-write, but need to get some data */
blk_add_trace_msg(conf->mddev->queue, "raid5 rmw %llu %d", if (conf->mddev->queue)
(unsigned long long)sh->sector, rmw); blk_add_trace_msg(conf->mddev->queue,
"raid5 rmw %llu %d",
(unsigned long long)sh->sector, rmw);
for (i = disks; i--; ) { for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i]; struct r5dev *dev = &sh->dev[i];
if ((dev->towrite || i == sh->pd_idx) && if ((dev->towrite || i == sh->pd_idx) &&
@ -2877,7 +2905,7 @@ static void handle_stripe_dirtying(struct r5conf *conf,
} }
} }
} }
if (rcw) if (rcw && conf->mddev->queue)
blk_add_trace_msg(conf->mddev->queue, "raid5 rcw %llu %d %d %d", blk_add_trace_msg(conf->mddev->queue, "raid5 rcw %llu %d %d %d",
(unsigned long long)sh->sector, (unsigned long long)sh->sector,
rcw, qread, test_bit(STRIPE_DELAYED, &sh->state)); rcw, qread, test_bit(STRIPE_DELAYED, &sh->state));
@ -3417,9 +3445,15 @@ static void handle_stripe(struct stripe_head *sh)
return; return;
} }
if (test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
set_bit(STRIPE_SYNCING, &sh->state); spin_lock(&sh->stripe_lock);
clear_bit(STRIPE_INSYNC, &sh->state); /* Cannot process 'sync' concurrently with 'discard' */
if (!test_bit(STRIPE_DISCARD, &sh->state) &&
test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
set_bit(STRIPE_SYNCING, &sh->state);
clear_bit(STRIPE_INSYNC, &sh->state);
}
spin_unlock(&sh->stripe_lock);
} }
clear_bit(STRIPE_DELAYED, &sh->state); clear_bit(STRIPE_DELAYED, &sh->state);
@ -3579,6 +3613,8 @@ static void handle_stripe(struct stripe_head *sh)
test_bit(STRIPE_INSYNC, &sh->state)) { test_bit(STRIPE_INSYNC, &sh->state)) {
md_done_sync(conf->mddev, STRIPE_SECTORS, 1); md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
clear_bit(STRIPE_SYNCING, &sh->state); clear_bit(STRIPE_SYNCING, &sh->state);
if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags))
wake_up(&conf->wait_for_overlap);
} }
/* If the failed drives are just a ReadError, then we might need /* If the failed drives are just a ReadError, then we might need
@ -3982,9 +4018,10 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
atomic_inc(&conf->active_aligned_reads); atomic_inc(&conf->active_aligned_reads);
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev), if (mddev->gendisk)
align_bi, disk_devt(mddev->gendisk), trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev),
raid_bio->bi_sector); align_bi, disk_devt(mddev->gendisk),
raid_bio->bi_sector);
generic_make_request(align_bi); generic_make_request(align_bi);
return 1; return 1;
} else { } else {
@ -4078,7 +4115,8 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)
} }
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
} }
trace_block_unplug(mddev->queue, cnt, !from_schedule); if (mddev->queue)
trace_block_unplug(mddev->queue, cnt, !from_schedule);
kfree(cb); kfree(cb);
} }
@ -4141,6 +4179,13 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
sh = get_active_stripe(conf, logical_sector, 0, 0, 0); sh = get_active_stripe(conf, logical_sector, 0, 0, 0);
prepare_to_wait(&conf->wait_for_overlap, &w, prepare_to_wait(&conf->wait_for_overlap, &w,
TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
set_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags);
if (test_bit(STRIPE_SYNCING, &sh->state)) {
release_stripe(sh);
schedule();
goto again;
}
clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags);
spin_lock_irq(&sh->stripe_lock); spin_lock_irq(&sh->stripe_lock);
for (d = 0; d < conf->raid_disks; d++) { for (d = 0; d < conf->raid_disks; d++) {
if (d == sh->pd_idx || d == sh->qd_idx) if (d == sh->pd_idx || d == sh->qd_idx)
@ -4153,6 +4198,7 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
goto again; goto again;
} }
} }
set_bit(STRIPE_DISCARD, &sh->state);
finish_wait(&conf->wait_for_overlap, &w); finish_wait(&conf->wait_for_overlap, &w);
for (d = 0; d < conf->raid_disks; d++) { for (d = 0; d < conf->raid_disks; d++) {
if (d == sh->pd_idx || d == sh->qd_idx) if (d == sh->pd_idx || d == sh->qd_idx)

View file

@ -221,10 +221,6 @@ struct stripe_head {
struct stripe_operations { struct stripe_operations {
int target, target2; int target, target2;
enum sum_check_flags zero_sum_result; enum sum_check_flags zero_sum_result;
#ifdef CONFIG_MULTICORE_RAID456
unsigned long request;
wait_queue_head_t wait_for_ops;
#endif
} ops; } ops;
struct r5dev { struct r5dev {
/* rreq and rvec are used for the replacement device when /* rreq and rvec are used for the replacement device when
@ -323,6 +319,7 @@ enum {
STRIPE_COMPUTE_RUN, STRIPE_COMPUTE_RUN,
STRIPE_OPS_REQ_PENDING, STRIPE_OPS_REQ_PENDING,
STRIPE_ON_UNPLUG_LIST, STRIPE_ON_UNPLUG_LIST,
STRIPE_DISCARD,
}; };
/* /*