Merge branch 'for-linus' of git://neil.brown.name/md

* 'for-linus' of git://neil.brown.name/md:
  md: Fix removal of extra drives when converting RAID6 to RAID5
  md: range check slot number when manually adding a spare.
  md/raid5: handle manually-added spares in start_reshape.
  md: fix sync_completed reporting for very large drives (>2TB)
  md: allow suspend_lo and suspend_hi to decrease as well as increase.
  md: Don't let implementation detail of curr_resync leak out through sysfs.
  md: separate meta and data devs
  md-new-param-to_sync_page_io
  md-new-param-to-calc_dev_sboffset
  md: Be more careful about clearing flags bit in ->recovery
  md: md_stop_writes requires mddev_lock.
  md/raid5: use sysfs_notify_dirent_safe to avoid NULL pointer
  md: Ensure no IO request to get md device before it is properly initialised.
  md: Fix single printks with multiple KERN_<level>s
  md: fix regression resulting in delays in clearing bits in a bitmap
  md: fix regression with re-adding devices to arrays with no metadata
This commit is contained in:
Linus Torvalds 2011-01-13 17:30:20 -08:00
commit 509e4aef44
6 changed files with 172 additions and 116 deletions

View file

@ -210,11 +210,11 @@ static struct page *read_sb_page(mddev_t *mddev, loff_t offset,
|| test_bit(Faulty, &rdev->flags)) || test_bit(Faulty, &rdev->flags))
continue; continue;
target = rdev->sb_start + offset + index * (PAGE_SIZE/512); target = offset + index * (PAGE_SIZE/512);
if (sync_page_io(rdev, target, if (sync_page_io(rdev, target,
roundup(size, bdev_logical_block_size(rdev->bdev)), roundup(size, bdev_logical_block_size(rdev->bdev)),
page, READ)) { page, READ, true)) {
page->index = index; page->index = index;
attach_page_buffers(page, NULL); /* so that free_buffer will attach_page_buffers(page, NULL); /* so that free_buffer will
* quietly no-op */ * quietly no-op */
@ -264,14 +264,18 @@ static mdk_rdev_t *next_active_rdev(mdk_rdev_t *rdev, mddev_t *mddev)
static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
{ {
mdk_rdev_t *rdev = NULL; mdk_rdev_t *rdev = NULL;
struct block_device *bdev;
mddev_t *mddev = bitmap->mddev; mddev_t *mddev = bitmap->mddev;
while ((rdev = next_active_rdev(rdev, mddev)) != NULL) { while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
int size = PAGE_SIZE; int size = PAGE_SIZE;
loff_t offset = mddev->bitmap_info.offset; loff_t offset = mddev->bitmap_info.offset;
bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev;
if (page->index == bitmap->file_pages-1) if (page->index == bitmap->file_pages-1)
size = roundup(bitmap->last_page_size, size = roundup(bitmap->last_page_size,
bdev_logical_block_size(rdev->bdev)); bdev_logical_block_size(bdev));
/* Just make sure we aren't corrupting data or /* Just make sure we aren't corrupting data or
* metadata * metadata
*/ */
@ -1542,7 +1546,7 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
wait_event(bitmap->mddev->recovery_wait, wait_event(bitmap->mddev->recovery_wait,
atomic_read(&bitmap->mddev->recovery_active) == 0); atomic_read(&bitmap->mddev->recovery_active) == 0);
bitmap->mddev->curr_resync_completed = bitmap->mddev->curr_resync; bitmap->mddev->curr_resync_completed = sector;
set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags); set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags);
sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1); sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1);
s = 0; s = 0;

View file

@ -288,10 +288,12 @@ static int md_make_request(struct request_queue *q, struct bio *bio)
int rv; int rv;
int cpu; int cpu;
if (mddev == NULL || mddev->pers == NULL) { if (mddev == NULL || mddev->pers == NULL
|| !mddev->ready) {
bio_io_error(bio); bio_io_error(bio);
return 0; return 0;
} }
smp_rmb(); /* Ensure implications of 'active' are visible */
rcu_read_lock(); rcu_read_lock();
if (mddev->suspended) { if (mddev->suspended) {
DEFINE_WAIT(__wait); DEFINE_WAIT(__wait);
@ -703,9 +705,9 @@ static struct mdk_personality *find_pers(int level, char *clevel)
} }
/* return the offset of the super block in 512byte sectors */ /* return the offset of the super block in 512byte sectors */
static inline sector_t calc_dev_sboffset(struct block_device *bdev) static inline sector_t calc_dev_sboffset(mdk_rdev_t *rdev)
{ {
sector_t num_sectors = i_size_read(bdev->bd_inode) / 512; sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512;
return MD_NEW_SIZE_SECTORS(num_sectors); return MD_NEW_SIZE_SECTORS(num_sectors);
} }
@ -763,7 +765,7 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
*/ */
struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev); struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
bio->bi_bdev = rdev->bdev; bio->bi_bdev = rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev;
bio->bi_sector = sector; bio->bi_sector = sector;
bio_add_page(bio, page, size, 0); bio_add_page(bio, page, size, 0);
bio->bi_private = rdev; bio->bi_private = rdev;
@ -793,7 +795,7 @@ static void bi_complete(struct bio *bio, int error)
} }
int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size, int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size,
struct page *page, int rw) struct page *page, int rw, bool metadata_op)
{ {
struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev); struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
struct completion event; struct completion event;
@ -801,8 +803,12 @@ int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size,
rw |= REQ_SYNC | REQ_UNPLUG; rw |= REQ_SYNC | REQ_UNPLUG;
bio->bi_bdev = rdev->bdev; bio->bi_bdev = (metadata_op && rdev->meta_bdev) ?
bio->bi_sector = sector; rdev->meta_bdev : rdev->bdev;
if (metadata_op)
bio->bi_sector = sector + rdev->sb_start;
else
bio->bi_sector = sector + rdev->data_offset;
bio_add_page(bio, page, size, 0); bio_add_page(bio, page, size, 0);
init_completion(&event); init_completion(&event);
bio->bi_private = &event; bio->bi_private = &event;
@ -827,7 +833,7 @@ static int read_disk_sb(mdk_rdev_t * rdev, int size)
return 0; return 0;
if (!sync_page_io(rdev, rdev->sb_start, size, rdev->sb_page, READ)) if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, true))
goto fail; goto fail;
rdev->sb_loaded = 1; rdev->sb_loaded = 1;
return 0; return 0;
@ -989,7 +995,7 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version
* *
* It also happens to be a multiple of 4Kb. * It also happens to be a multiple of 4Kb.
*/ */
rdev->sb_start = calc_dev_sboffset(rdev->bdev); rdev->sb_start = calc_dev_sboffset(rdev);
ret = read_disk_sb(rdev, MD_SB_BYTES); ret = read_disk_sb(rdev, MD_SB_BYTES);
if (ret) return ret; if (ret) return ret;
@ -1330,7 +1336,7 @@ super_90_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors)
return 0; /* component must fit device */ return 0; /* component must fit device */
if (rdev->mddev->bitmap_info.offset) if (rdev->mddev->bitmap_info.offset)
return 0; /* can't move bitmap */ return 0; /* can't move bitmap */
rdev->sb_start = calc_dev_sboffset(rdev->bdev); rdev->sb_start = calc_dev_sboffset(rdev);
if (!num_sectors || num_sectors > rdev->sb_start) if (!num_sectors || num_sectors > rdev->sb_start)
num_sectors = rdev->sb_start; num_sectors = rdev->sb_start;
md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size, md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
@ -2465,6 +2471,10 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
if (rdev2->raid_disk == slot) if (rdev2->raid_disk == slot)
return -EEXIST; return -EEXIST;
if (slot >= rdev->mddev->raid_disks &&
slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
return -ENOSPC;
rdev->raid_disk = slot; rdev->raid_disk = slot;
if (test_bit(In_sync, &rdev->flags)) if (test_bit(In_sync, &rdev->flags))
rdev->saved_raid_disk = slot; rdev->saved_raid_disk = slot;
@ -2482,7 +2492,8 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
/* failure here is OK */; /* failure here is OK */;
/* don't wakeup anyone, leave that to userspace. */ /* don't wakeup anyone, leave that to userspace. */
} else { } else {
if (slot >= rdev->mddev->raid_disks) if (slot >= rdev->mddev->raid_disks &&
slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
return -ENOSPC; return -ENOSPC;
rdev->raid_disk = slot; rdev->raid_disk = slot;
/* assume it is working */ /* assume it is working */
@ -3107,7 +3118,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
char nm[20]; char nm[20];
if (rdev->raid_disk < 0) if (rdev->raid_disk < 0)
continue; continue;
if (rdev->new_raid_disk > mddev->raid_disks) if (rdev->new_raid_disk >= mddev->raid_disks)
rdev->new_raid_disk = -1; rdev->new_raid_disk = -1;
if (rdev->new_raid_disk == rdev->raid_disk) if (rdev->new_raid_disk == rdev->raid_disk)
continue; continue;
@ -3736,6 +3747,8 @@ action_show(mddev_t *mddev, char *page)
return sprintf(page, "%s\n", type); return sprintf(page, "%s\n", type);
} }
static void reap_sync_thread(mddev_t *mddev);
static ssize_t static ssize_t
action_store(mddev_t *mddev, const char *page, size_t len) action_store(mddev_t *mddev, const char *page, size_t len)
{ {
@ -3750,9 +3763,7 @@ action_store(mddev_t *mddev, const char *page, size_t len)
if (cmd_match(page, "idle") || cmd_match(page, "frozen")) { if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
if (mddev->sync_thread) { if (mddev->sync_thread) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery); set_bit(MD_RECOVERY_INTR, &mddev->recovery);
md_unregister_thread(mddev->sync_thread); reap_sync_thread(mddev);
mddev->sync_thread = NULL;
mddev->recovery = 0;
} }
} else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
@ -3904,7 +3915,7 @@ static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
static ssize_t static ssize_t
sync_completed_show(mddev_t *mddev, char *page) sync_completed_show(mddev_t *mddev, char *page)
{ {
unsigned long max_sectors, resync; unsigned long long max_sectors, resync;
if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
return sprintf(page, "none\n"); return sprintf(page, "none\n");
@ -3915,7 +3926,7 @@ sync_completed_show(mddev_t *mddev, char *page)
max_sectors = mddev->dev_sectors; max_sectors = mddev->dev_sectors;
resync = mddev->curr_resync_completed; resync = mddev->curr_resync_completed;
return sprintf(page, "%lu / %lu\n", resync, max_sectors); return sprintf(page, "%llu / %llu\n", resync, max_sectors);
} }
static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed); static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed);
@ -4002,19 +4013,24 @@ suspend_lo_store(mddev_t *mddev, const char *buf, size_t len)
{ {
char *e; char *e;
unsigned long long new = simple_strtoull(buf, &e, 10); unsigned long long new = simple_strtoull(buf, &e, 10);
unsigned long long old = mddev->suspend_lo;
if (mddev->pers == NULL || if (mddev->pers == NULL ||
mddev->pers->quiesce == NULL) mddev->pers->quiesce == NULL)
return -EINVAL; return -EINVAL;
if (buf == e || (*e && *e != '\n')) if (buf == e || (*e && *e != '\n'))
return -EINVAL; return -EINVAL;
if (new >= mddev->suspend_hi ||
(new > mddev->suspend_lo && new < mddev->suspend_hi)) { mddev->suspend_lo = new;
mddev->suspend_lo = new; if (new >= old)
/* Shrinking suspended region */
mddev->pers->quiesce(mddev, 2); mddev->pers->quiesce(mddev, 2);
return len; else {
} else /* Expanding suspended region - need to wait */
return -EINVAL; mddev->pers->quiesce(mddev, 1);
mddev->pers->quiesce(mddev, 0);
}
return len;
} }
static struct md_sysfs_entry md_suspend_lo = static struct md_sysfs_entry md_suspend_lo =
__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store); __ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
@ -4031,20 +4047,24 @@ suspend_hi_store(mddev_t *mddev, const char *buf, size_t len)
{ {
char *e; char *e;
unsigned long long new = simple_strtoull(buf, &e, 10); unsigned long long new = simple_strtoull(buf, &e, 10);
unsigned long long old = mddev->suspend_hi;
if (mddev->pers == NULL || if (mddev->pers == NULL ||
mddev->pers->quiesce == NULL) mddev->pers->quiesce == NULL)
return -EINVAL; return -EINVAL;
if (buf == e || (*e && *e != '\n')) if (buf == e || (*e && *e != '\n'))
return -EINVAL; return -EINVAL;
if ((new <= mddev->suspend_lo && mddev->suspend_lo >= mddev->suspend_hi) ||
(new > mddev->suspend_lo && new > mddev->suspend_hi)) { mddev->suspend_hi = new;
mddev->suspend_hi = new; if (new <= old)
/* Shrinking suspended region */
mddev->pers->quiesce(mddev, 2);
else {
/* Expanding suspended region - need to wait */
mddev->pers->quiesce(mddev, 1); mddev->pers->quiesce(mddev, 1);
mddev->pers->quiesce(mddev, 0); mddev->pers->quiesce(mddev, 0);
return len; }
} else return len;
return -EINVAL;
} }
static struct md_sysfs_entry md_suspend_hi = static struct md_sysfs_entry md_suspend_hi =
__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store); __ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
@ -4422,7 +4442,9 @@ int md_run(mddev_t *mddev)
* We don't want the data to overlap the metadata, * We don't want the data to overlap the metadata,
* Internal Bitmap issues have been handled elsewhere. * Internal Bitmap issues have been handled elsewhere.
*/ */
if (rdev->data_offset < rdev->sb_start) { if (rdev->meta_bdev) {
/* Nothing to check */;
} else if (rdev->data_offset < rdev->sb_start) {
if (mddev->dev_sectors && if (mddev->dev_sectors &&
rdev->data_offset + mddev->dev_sectors rdev->data_offset + mddev->dev_sectors
> rdev->sb_start) { > rdev->sb_start) {
@ -4556,7 +4578,8 @@ int md_run(mddev_t *mddev)
mddev->safemode_timer.data = (unsigned long) mddev; mddev->safemode_timer.data = (unsigned long) mddev;
mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */ mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */
mddev->in_sync = 1; mddev->in_sync = 1;
smp_wmb();
mddev->ready = 1;
list_for_each_entry(rdev, &mddev->disks, same_set) list_for_each_entry(rdev, &mddev->disks, same_set)
if (rdev->raid_disk >= 0) { if (rdev->raid_disk >= 0) {
char nm[20]; char nm[20];
@ -4693,13 +4716,12 @@ static void md_clean(mddev_t *mddev)
mddev->plug = NULL; mddev->plug = NULL;
} }
void md_stop_writes(mddev_t *mddev) static void __md_stop_writes(mddev_t *mddev)
{ {
if (mddev->sync_thread) { if (mddev->sync_thread) {
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
set_bit(MD_RECOVERY_INTR, &mddev->recovery); set_bit(MD_RECOVERY_INTR, &mddev->recovery);
md_unregister_thread(mddev->sync_thread); reap_sync_thread(mddev);
mddev->sync_thread = NULL;
} }
del_timer_sync(&mddev->safemode_timer); del_timer_sync(&mddev->safemode_timer);
@ -4713,10 +4735,18 @@ void md_stop_writes(mddev_t *mddev)
md_update_sb(mddev, 1); md_update_sb(mddev, 1);
} }
} }
void md_stop_writes(mddev_t *mddev)
{
mddev_lock(mddev);
__md_stop_writes(mddev);
mddev_unlock(mddev);
}
EXPORT_SYMBOL_GPL(md_stop_writes); EXPORT_SYMBOL_GPL(md_stop_writes);
void md_stop(mddev_t *mddev) void md_stop(mddev_t *mddev)
{ {
mddev->ready = 0;
mddev->pers->stop(mddev); mddev->pers->stop(mddev);
if (mddev->pers->sync_request && mddev->to_remove == NULL) if (mddev->pers->sync_request && mddev->to_remove == NULL)
mddev->to_remove = &md_redundancy_group; mddev->to_remove = &md_redundancy_group;
@ -4736,7 +4766,7 @@ static int md_set_readonly(mddev_t *mddev, int is_open)
goto out; goto out;
} }
if (mddev->pers) { if (mddev->pers) {
md_stop_writes(mddev); __md_stop_writes(mddev);
err = -ENXIO; err = -ENXIO;
if (mddev->ro==1) if (mddev->ro==1)
@ -4773,7 +4803,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
if (mddev->ro) if (mddev->ro)
set_disk_ro(disk, 0); set_disk_ro(disk, 0);
md_stop_writes(mddev); __md_stop_writes(mddev);
md_stop(mddev); md_stop(mddev);
mddev->queue->merge_bvec_fn = NULL; mddev->queue->merge_bvec_fn = NULL;
mddev->queue->unplug_fn = NULL; mddev->queue->unplug_fn = NULL;
@ -5151,9 +5181,10 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
/* set saved_raid_disk if appropriate */ /* set saved_raid_disk if appropriate */
if (!mddev->persistent) { if (!mddev->persistent) {
if (info->state & (1<<MD_DISK_SYNC) && if (info->state & (1<<MD_DISK_SYNC) &&
info->raid_disk < mddev->raid_disks) info->raid_disk < mddev->raid_disks) {
rdev->raid_disk = info->raid_disk; rdev->raid_disk = info->raid_disk;
else set_bit(In_sync, &rdev->flags);
} else
rdev->raid_disk = -1; rdev->raid_disk = -1;
} else } else
super_types[mddev->major_version]. super_types[mddev->major_version].
@ -5230,7 +5261,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
printk(KERN_INFO "md: nonpersistent superblock ...\n"); printk(KERN_INFO "md: nonpersistent superblock ...\n");
rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512; rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
} else } else
rdev->sb_start = calc_dev_sboffset(rdev->bdev); rdev->sb_start = calc_dev_sboffset(rdev);
rdev->sectors = rdev->sb_start; rdev->sectors = rdev->sb_start;
err = bind_rdev_to_array(rdev, mddev); err = bind_rdev_to_array(rdev, mddev);
@ -5297,7 +5328,7 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev)
} }
if (mddev->persistent) if (mddev->persistent)
rdev->sb_start = calc_dev_sboffset(rdev->bdev); rdev->sb_start = calc_dev_sboffset(rdev);
else else
rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512; rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
@ -5510,7 +5541,6 @@ static int update_size(mddev_t *mddev, sector_t num_sectors)
* sb_start or, if that is <data_offset, it must fit before the size * sb_start or, if that is <data_offset, it must fit before the size
* of each device. If num_sectors is zero, we find the largest size * of each device. If num_sectors is zero, we find the largest size
* that fits. * that fits.
*/ */
if (mddev->sync_thread) if (mddev->sync_thread)
return -EBUSY; return -EBUSY;
@ -6033,7 +6063,8 @@ static int md_thread(void * arg)
|| kthread_should_stop(), || kthread_should_stop(),
thread->timeout); thread->timeout);
if (test_and_clear_bit(THREAD_WAKEUP, &thread->flags)) clear_bit(THREAD_WAKEUP, &thread->flags);
if (!kthread_should_stop())
thread->run(thread->mddev); thread->run(thread->mddev);
} }
@ -6799,7 +6830,7 @@ void md_do_sync(mddev_t *mddev)
desc, mdname(mddev)); desc, mdname(mddev));
mddev->curr_resync = j; mddev->curr_resync = j;
} }
mddev->curr_resync_completed = mddev->curr_resync; mddev->curr_resync_completed = j;
while (j < max_sectors) { while (j < max_sectors) {
sector_t sectors; sector_t sectors;
@ -6817,8 +6848,7 @@ void md_do_sync(mddev_t *mddev)
md_unplug(mddev); md_unplug(mddev);
wait_event(mddev->recovery_wait, wait_event(mddev->recovery_wait,
atomic_read(&mddev->recovery_active) == 0); atomic_read(&mddev->recovery_active) == 0);
mddev->curr_resync_completed = mddev->curr_resync_completed = j;
mddev->curr_resync;
set_bit(MD_CHANGE_CLEAN, &mddev->flags); set_bit(MD_CHANGE_CLEAN, &mddev->flags);
sysfs_notify(&mddev->kobj, NULL, "sync_completed"); sysfs_notify(&mddev->kobj, NULL, "sync_completed");
} }
@ -7023,6 +7053,45 @@ static int remove_and_add_spares(mddev_t *mddev)
} }
return spares; return spares;
} }
static void reap_sync_thread(mddev_t *mddev)
{
mdk_rdev_t *rdev;
/* resync has finished, collect result */
md_unregister_thread(mddev->sync_thread);
mddev->sync_thread = NULL;
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
/* success...*/
/* activate any spares */
if (mddev->pers->spare_active(mddev))
sysfs_notify(&mddev->kobj, NULL,
"degraded");
}
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
mddev->pers->finish_reshape)
mddev->pers->finish_reshape(mddev);
md_update_sb(mddev, 1);
/* if array is no-longer degraded, then any saved_raid_disk
* information must be scrapped
*/
if (!mddev->degraded)
list_for_each_entry(rdev, &mddev->disks, same_set)
rdev->saved_raid_disk = -1;
clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
/* flag recovery needed just to double check */
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
sysfs_notify_dirent_safe(mddev->sysfs_action);
md_new_event(mddev);
}
/* /*
* This routine is regularly called by all per-raid-array threads to * This routine is regularly called by all per-raid-array threads to
* deal with generic issues like resync and super-block update. * deal with generic issues like resync and super-block update.
@ -7047,9 +7116,6 @@ static int remove_and_add_spares(mddev_t *mddev)
*/ */
void md_check_recovery(mddev_t *mddev) void md_check_recovery(mddev_t *mddev)
{ {
mdk_rdev_t *rdev;
if (mddev->bitmap) if (mddev->bitmap)
bitmap_daemon_work(mddev); bitmap_daemon_work(mddev);
@ -7117,34 +7183,7 @@ void md_check_recovery(mddev_t *mddev)
goto unlock; goto unlock;
} }
if (mddev->sync_thread) { if (mddev->sync_thread) {
/* resync has finished, collect result */ reap_sync_thread(mddev);
md_unregister_thread(mddev->sync_thread);
mddev->sync_thread = NULL;
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
/* success...*/
/* activate any spares */
if (mddev->pers->spare_active(mddev))
sysfs_notify(&mddev->kobj, NULL,
"degraded");
}
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
mddev->pers->finish_reshape)
mddev->pers->finish_reshape(mddev);
md_update_sb(mddev, 1);
/* if array is no-longer degraded, then any saved_raid_disk
* information must be scrapped
*/
if (!mddev->degraded)
list_for_each_entry(rdev, &mddev->disks, same_set)
rdev->saved_raid_disk = -1;
mddev->recovery = 0;
/* flag recovery needed just to double check */
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
sysfs_notify_dirent_safe(mddev->sysfs_action);
md_new_event(mddev);
goto unlock; goto unlock;
} }
/* Set RUNNING before clearing NEEDED to avoid /* Set RUNNING before clearing NEEDED to avoid
@ -7202,7 +7241,11 @@ void md_check_recovery(mddev_t *mddev)
" thread...\n", " thread...\n",
mdname(mddev)); mdname(mddev));
/* leave the spares where they are, it shouldn't hurt */ /* leave the spares where they are, it shouldn't hurt */
mddev->recovery = 0; clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
} else } else
md_wakeup_thread(mddev->sync_thread); md_wakeup_thread(mddev->sync_thread);
sysfs_notify_dirent_safe(mddev->sysfs_action); sysfs_notify_dirent_safe(mddev->sysfs_action);

View file

@ -60,6 +60,12 @@ struct mdk_rdev_s
mddev_t *mddev; /* RAID array if running */ mddev_t *mddev; /* RAID array if running */
int last_events; /* IO event timestamp */ int last_events; /* IO event timestamp */
/*
* If meta_bdev is non-NULL, it means that a separate device is
* being used to store the metadata (superblock/bitmap) which
* would otherwise be contained on the same device as the data (bdev).
*/
struct block_device *meta_bdev;
struct block_device *bdev; /* block device handle */ struct block_device *bdev; /* block device handle */
struct page *sb_page; struct page *sb_page;
@ -148,7 +154,8 @@ struct mddev_s
* are happening, so run/ * are happening, so run/
* takeover/stop are not safe * takeover/stop are not safe
*/ */
int ready; /* See when safe to pass
* IO requests down */
struct gendisk *gendisk; struct gendisk *gendisk;
struct kobject kobj; struct kobject kobj;
@ -497,8 +504,8 @@ extern void md_flush_request(mddev_t *mddev, struct bio *bio);
extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
sector_t sector, int size, struct page *page); sector_t sector, int size, struct page *page);
extern void md_super_wait(mddev_t *mddev); extern void md_super_wait(mddev_t *mddev);
extern int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size, extern int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size,
struct page *page, int rw); struct page *page, int rw, bool metadata_op);
extern void md_do_sync(mddev_t *mddev); extern void md_do_sync(mddev_t *mddev);
extern void md_new_event(mddev_t *mddev); extern void md_new_event(mddev_t *mddev);
extern int md_allow_write(mddev_t *mddev); extern int md_allow_write(mddev_t *mddev);

View file

@ -1027,8 +1027,9 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
} else } else
set_bit(Faulty, &rdev->flags); set_bit(Faulty, &rdev->flags);
set_bit(MD_CHANGE_DEVS, &mddev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags);
printk(KERN_ALERT "md/raid1:%s: Disk failure on %s, disabling device.\n" printk(KERN_ALERT
KERN_ALERT "md/raid1:%s: Operation continuing on %d devices.\n", "md/raid1:%s: Disk failure on %s, disabling device.\n"
"md/raid1:%s: Operation continuing on %d devices.\n",
mdname(mddev), bdevname(rdev->bdev, b), mdname(mddev), bdevname(rdev->bdev, b),
mdname(mddev), conf->raid_disks - mddev->degraded); mdname(mddev), conf->raid_disks - mddev->degraded);
} }
@ -1364,10 +1365,10 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
*/ */
rdev = conf->mirrors[d].rdev; rdev = conf->mirrors[d].rdev;
if (sync_page_io(rdev, if (sync_page_io(rdev,
sect + rdev->data_offset, sect,
s<<9, s<<9,
bio->bi_io_vec[idx].bv_page, bio->bi_io_vec[idx].bv_page,
READ)) { READ, false)) {
success = 1; success = 1;
break; break;
} }
@ -1390,10 +1391,10 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
rdev = conf->mirrors[d].rdev; rdev = conf->mirrors[d].rdev;
atomic_add(s, &rdev->corrected_errors); atomic_add(s, &rdev->corrected_errors);
if (sync_page_io(rdev, if (sync_page_io(rdev,
sect + rdev->data_offset, sect,
s<<9, s<<9,
bio->bi_io_vec[idx].bv_page, bio->bi_io_vec[idx].bv_page,
WRITE) == 0) WRITE, false) == 0)
md_error(mddev, rdev); md_error(mddev, rdev);
} }
d = start; d = start;
@ -1405,10 +1406,10 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
continue; continue;
rdev = conf->mirrors[d].rdev; rdev = conf->mirrors[d].rdev;
if (sync_page_io(rdev, if (sync_page_io(rdev,
sect + rdev->data_offset, sect,
s<<9, s<<9,
bio->bi_io_vec[idx].bv_page, bio->bi_io_vec[idx].bv_page,
READ) == 0) READ, false) == 0)
md_error(mddev, rdev); md_error(mddev, rdev);
} }
} else { } else {
@ -1488,10 +1489,8 @@ static void fix_read_error(conf_t *conf, int read_disk,
rdev = conf->mirrors[d].rdev; rdev = conf->mirrors[d].rdev;
if (rdev && if (rdev &&
test_bit(In_sync, &rdev->flags) && test_bit(In_sync, &rdev->flags) &&
sync_page_io(rdev, sync_page_io(rdev, sect, s<<9,
sect + rdev->data_offset, conf->tmppage, READ, false))
s<<9,
conf->tmppage, READ))
success = 1; success = 1;
else { else {
d++; d++;
@ -1514,9 +1513,8 @@ static void fix_read_error(conf_t *conf, int read_disk,
rdev = conf->mirrors[d].rdev; rdev = conf->mirrors[d].rdev;
if (rdev && if (rdev &&
test_bit(In_sync, &rdev->flags)) { test_bit(In_sync, &rdev->flags)) {
if (sync_page_io(rdev, if (sync_page_io(rdev, sect, s<<9,
sect + rdev->data_offset, conf->tmppage, WRITE, false)
s<<9, conf->tmppage, WRITE)
== 0) == 0)
/* Well, this device is dead */ /* Well, this device is dead */
md_error(mddev, rdev); md_error(mddev, rdev);
@ -1531,9 +1529,8 @@ static void fix_read_error(conf_t *conf, int read_disk,
rdev = conf->mirrors[d].rdev; rdev = conf->mirrors[d].rdev;
if (rdev && if (rdev &&
test_bit(In_sync, &rdev->flags)) { test_bit(In_sync, &rdev->flags)) {
if (sync_page_io(rdev, if (sync_page_io(rdev, sect, s<<9,
sect + rdev->data_offset, conf->tmppage, READ, false)
s<<9, conf->tmppage, READ)
== 0) == 0)
/* Well, this device is dead */ /* Well, this device is dead */
md_error(mddev, rdev); md_error(mddev, rdev);

View file

@ -1051,8 +1051,9 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
} }
set_bit(Faulty, &rdev->flags); set_bit(Faulty, &rdev->flags);
set_bit(MD_CHANGE_DEVS, &mddev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags);
printk(KERN_ALERT "md/raid10:%s: Disk failure on %s, disabling device.\n" printk(KERN_ALERT
KERN_ALERT "md/raid10:%s: Operation continuing on %d devices.\n", "md/raid10:%s: Disk failure on %s, disabling device.\n"
"md/raid10:%s: Operation continuing on %d devices.\n",
mdname(mddev), bdevname(rdev->bdev, b), mdname(mddev), bdevname(rdev->bdev, b),
mdname(mddev), conf->raid_disks - mddev->degraded); mdname(mddev), conf->raid_disks - mddev->degraded);
} }
@ -1559,9 +1560,9 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
rcu_read_unlock(); rcu_read_unlock();
success = sync_page_io(rdev, success = sync_page_io(rdev,
r10_bio->devs[sl].addr + r10_bio->devs[sl].addr +
sect + rdev->data_offset, sect,
s<<9, s<<9,
conf->tmppage, READ); conf->tmppage, READ, false);
rdev_dec_pending(rdev, mddev); rdev_dec_pending(rdev, mddev);
rcu_read_lock(); rcu_read_lock();
if (success) if (success)
@ -1598,8 +1599,8 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
atomic_add(s, &rdev->corrected_errors); atomic_add(s, &rdev->corrected_errors);
if (sync_page_io(rdev, if (sync_page_io(rdev,
r10_bio->devs[sl].addr + r10_bio->devs[sl].addr +
sect + rdev->data_offset, sect,
s<<9, conf->tmppage, WRITE) s<<9, conf->tmppage, WRITE, false)
== 0) { == 0) {
/* Well, this device is dead */ /* Well, this device is dead */
printk(KERN_NOTICE printk(KERN_NOTICE
@ -1635,9 +1636,9 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
rcu_read_unlock(); rcu_read_unlock();
if (sync_page_io(rdev, if (sync_page_io(rdev,
r10_bio->devs[sl].addr + r10_bio->devs[sl].addr +
sect + rdev->data_offset, sect,
s<<9, conf->tmppage, s<<9, conf->tmppage,
READ) == 0) { READ, false) == 0) {
/* Well, this device is dead */ /* Well, this device is dead */
printk(KERN_NOTICE printk(KERN_NOTICE
"md/raid10:%s: unable to read back " "md/raid10:%s: unable to read back "

View file

@ -1721,7 +1721,6 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
set_bit(Faulty, &rdev->flags); set_bit(Faulty, &rdev->flags);
printk(KERN_ALERT printk(KERN_ALERT
"md/raid:%s: Disk failure on %s, disabling device.\n" "md/raid:%s: Disk failure on %s, disabling device.\n"
KERN_ALERT
"md/raid:%s: Operation continuing on %d devices.\n", "md/raid:%s: Operation continuing on %d devices.\n",
mdname(mddev), mdname(mddev),
bdevname(rdev->bdev, b), bdevname(rdev->bdev, b),
@ -4237,7 +4236,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
wait_event(conf->wait_for_overlap, wait_event(conf->wait_for_overlap,
atomic_read(&conf->reshape_stripes)==0); atomic_read(&conf->reshape_stripes)==0);
mddev->reshape_position = conf->reshape_progress; mddev->reshape_position = conf->reshape_progress;
mddev->curr_resync_completed = mddev->curr_resync; mddev->curr_resync_completed = sector_nr;
conf->reshape_checkpoint = jiffies; conf->reshape_checkpoint = jiffies;
set_bit(MD_CHANGE_DEVS, &mddev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags);
md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->thread);
@ -4338,7 +4337,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
wait_event(conf->wait_for_overlap, wait_event(conf->wait_for_overlap,
atomic_read(&conf->reshape_stripes) == 0); atomic_read(&conf->reshape_stripes) == 0);
mddev->reshape_position = conf->reshape_progress; mddev->reshape_position = conf->reshape_progress;
mddev->curr_resync_completed = mddev->curr_resync + reshape_sectors; mddev->curr_resync_completed = sector_nr;
conf->reshape_checkpoint = jiffies; conf->reshape_checkpoint = jiffies;
set_bit(MD_CHANGE_DEVS, &mddev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags);
md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->thread);
@ -5339,7 +5338,7 @@ static int raid5_spare_active(mddev_t *mddev)
&& !test_bit(Faulty, &tmp->rdev->flags) && !test_bit(Faulty, &tmp->rdev->flags)
&& !test_and_set_bit(In_sync, &tmp->rdev->flags)) { && !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
count++; count++;
sysfs_notify_dirent(tmp->rdev->sysfs_state); sysfs_notify_dirent_safe(tmp->rdev->sysfs_state);
} }
} }
spin_lock_irqsave(&conf->device_lock, flags); spin_lock_irqsave(&conf->device_lock, flags);
@ -5528,8 +5527,8 @@ static int raid5_start_reshape(mddev_t *mddev)
return -ENOSPC; return -ENOSPC;
list_for_each_entry(rdev, &mddev->disks, same_set) list_for_each_entry(rdev, &mddev->disks, same_set)
if (rdev->raid_disk < 0 && if ((rdev->raid_disk < 0 || rdev->raid_disk >= conf->raid_disks)
!test_bit(Faulty, &rdev->flags)) && !test_bit(Faulty, &rdev->flags))
spares++; spares++;
if (spares - mddev->degraded < mddev->delta_disks - conf->max_degraded) if (spares - mddev->degraded < mddev->delta_disks - conf->max_degraded)
@ -5589,6 +5588,11 @@ static int raid5_start_reshape(mddev_t *mddev)
/* Failure here is OK */; /* Failure here is OK */;
} else } else
break; break;
} else if (rdev->raid_disk >= conf->previous_raid_disks
&& !test_bit(Faulty, &rdev->flags)) {
/* This is a spare that was manually added */
set_bit(In_sync, &rdev->flags);
added_devices++;
} }
/* When a reshape changes the number of devices, ->degraded /* When a reshape changes the number of devices, ->degraded