diff --git a/Documentation/admin-guide/iostats.rst b/Documentation/admin-guide/iostats.rst index df5b8345c41d..9b14b0c2c9c4 100644 --- a/Documentation/admin-guide/iostats.rst +++ b/Documentation/admin-guide/iostats.rst @@ -100,7 +100,7 @@ Field 10 -- # of milliseconds spent doing I/Os (unsigned int) Since 5.0 this field counts jiffies when at least one request was started or completed. If request runs more than 2 jiffies then some - I/O time will not be accounted unless there are other requests. + I/O time might be not accounted in case of concurrent requests. Field 11 -- weighted # of milliseconds spent doing I/Os (unsigned int) This field is incremented at each I/O start, I/O completion, I/O @@ -143,6 +143,9 @@ are summed (possibly overflowing the unsigned long variable they are summed to) and the result given to the user. There is no convenient user interface for accessing the per-CPU counters themselves. +Since 4.19 request times are measured with nanoseconds precision and +truncated to milliseconds before showing in this interface. + Disks vs Partitions ------------------- diff --git a/Documentation/block/capability.rst b/Documentation/block/capability.rst index 2cf258d64bbe..160a5148b915 100644 --- a/Documentation/block/capability.rst +++ b/Documentation/block/capability.rst @@ -2,17 +2,9 @@ Generic Block Device Capability =============================== -This file documents the sysfs file block//capability +This file documents the sysfs file ``block//capability``. -capability is a hex word indicating which capabilities a specific disk -supports. For more information on bits not listed here, see -include/linux/genhd.h +``capability`` is a bitfield, printed in hexadecimal, indicating which +capabilities a specific block device supports: -GENHD_FL_MEDIA_CHANGE_NOTIFY ----------------------------- - -Value: 4 - -When this bit is set, the disk supports Asynchronous Notification -of media change events. These events will be broadcast to user -space via kernel uevent. +.. kernel-doc:: include/linux/genhd.h diff --git a/Documentation/scsi/scsi_mid_low_api.txt b/Documentation/scsi/scsi_mid_low_api.txt index 2a4be1c3e6db..537f04728487 100644 --- a/Documentation/scsi/scsi_mid_low_api.txt +++ b/Documentation/scsi/scsi_mid_low_api.txt @@ -299,7 +299,6 @@ Summary: scsi_host_alloc - return a new scsi_host instance whose refcount==1 scsi_host_get - increments Scsi_Host instance's refcount scsi_host_put - decrements Scsi_Host instance's refcount (free if 0) - scsi_partsize - parse partition table into cylinders, heads + sectors scsi_register - create and register a scsi host adapter instance. scsi_remove_device - detach and remove a SCSI device scsi_remove_host - detach and remove all SCSI devices owned by host @@ -472,26 +471,6 @@ void scsi_host_get(struct Scsi_Host *shost) void scsi_host_put(struct Scsi_Host *shost) -/** - * scsi_partsize - parse partition table into cylinders, heads + sectors - * @buf: pointer to partition table - * @capacity: size of (total) disk in 512 byte sectors - * @cyls: outputs number of cylinders calculated via this pointer - * @hds: outputs number of heads calculated via this pointer - * @secs: outputs number of sectors calculated via this pointer - * - * Returns 0 on success, -1 on failure - * - * Might block: no - * - * Notes: Caller owns memory returned (free with kfree() ) - * - * Defined in: drivers/scsi/scsicam.c - **/ -int scsi_partsize(unsigned char *buf, unsigned long capacity, - unsigned int *cyls, unsigned int *hds, unsigned int *secs) - - /** * scsi_register - create and register a scsi host adapter instance. * @sht: pointer to scsi host template diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c index 40712e49381b..c3a630440512 100644 --- a/arch/m68k/emu/nfblock.c +++ b/arch/m68k/emu/nfblock.c @@ -118,12 +118,11 @@ static int __init nfhd_init_one(int id, u32 blocks, u32 bsize) dev->bsize = bsize; dev->bshift = ffs(bsize) - 10; - dev->queue = blk_alloc_queue(GFP_KERNEL); + dev->queue = blk_alloc_queue(nfhd_make_request, NUMA_NO_NODE); if (dev->queue == NULL) goto free_dev; dev->queue->queuedata = dev; - blk_queue_make_request(dev->queue, nfhd_make_request); blk_queue_logical_block_size(dev->queue, bsize); dev->disk = alloc_disk(16); diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c index 833109880165..49322b66cda9 100644 --- a/arch/xtensa/platforms/iss/simdisk.c +++ b/arch/xtensa/platforms/iss/simdisk.c @@ -267,13 +267,12 @@ static int __init simdisk_setup(struct simdisk *dev, int which, spin_lock_init(&dev->lock); dev->users = 0; - dev->queue = blk_alloc_queue(GFP_KERNEL); + dev->queue = blk_alloc_queue(simdisk_make_request, NUMA_NO_NODE); if (dev->queue == NULL) { pr_err("blk_alloc_queue failed\n"); goto out_alloc_queue; } - blk_queue_make_request(dev->queue, simdisk_make_request); dev->queue->queuedata = dev; dev->gd = alloc_disk(SIMDISK_MINORS); diff --git a/block/Makefile b/block/Makefile index 1a43750f4b01..206b96e9387f 100644 --- a/block/Makefile +++ b/block/Makefile @@ -8,8 +8,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-sysfs.o \ blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \ blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \ - genhd.o partition-generic.o ioprio.o \ - badblocks.o partitions/ blk-rq-qos.o + genhd.o ioprio.o badblocks.o partitions/ blk-rq-qos.o obj-$(CONFIG_BOUNCE) += bounce.o obj-$(CONFIG_BLK_SCSI_REQUEST) += scsi_ioctl.o diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index f0ff6654af28..68882b9b8f11 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -642,6 +642,12 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, { struct bfq_entity *entity = &bfqq->entity; + /* + * Get extra reference to prevent bfqq from being freed in + * next possible expire or deactivate. + */ + bfqq->ref++; + /* If bfqq is empty, then bfq_bfqq_expire also invokes * bfq_del_bfqq_busy, thereby removing bfqq and its entity * from data structures related to current group. Otherwise we @@ -652,12 +658,6 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, bfq_bfqq_expire(bfqd, bfqd->in_service_queue, false, BFQQE_PREEMPTED); - /* - * get extra reference to prevent bfqq from being freed in - * next possible deactivate - */ - bfqq->ref++; - if (bfq_bfqq_busy(bfqq)) bfq_deactivate_bfqq(bfqd, bfqq, false, false); else if (entity->on_st_or_in_serv) @@ -677,7 +677,7 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, if (!bfqd->in_service_queue && !bfqd->rq_in_driver) bfq_schedule_dispatch(bfqd); - /* release extra ref taken above */ + /* release extra ref taken above, bfqq may happen to be freed now */ bfq_put_queue(bfqq); } @@ -714,10 +714,7 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, if (entity->sched_data != &bfqg->sched_data) { bic_set_bfqq(bic, NULL, 0); - bfq_log_bfqq(bfqd, async_bfqq, - "bic_change_group: %p %d", - async_bfqq, async_bfqq->ref); - bfq_put_queue(async_bfqq); + bfq_release_process_ref(bfqd, async_bfqq); } } @@ -818,39 +815,53 @@ static void bfq_flush_idle_tree(struct bfq_service_tree *st) /** * bfq_reparent_leaf_entity - move leaf entity to the root_group. * @bfqd: the device data structure with the root group. - * @entity: the entity to move. + * @entity: the entity to move, if entity is a leaf; or the parent entity + * of an active leaf entity to move, if entity is not a leaf. */ static void bfq_reparent_leaf_entity(struct bfq_data *bfqd, - struct bfq_entity *entity) + struct bfq_entity *entity, + int ioprio_class) { - struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + struct bfq_queue *bfqq; + struct bfq_entity *child_entity = entity; + while (child_entity->my_sched_data) { /* leaf not reached yet */ + struct bfq_sched_data *child_sd = child_entity->my_sched_data; + struct bfq_service_tree *child_st = child_sd->service_tree + + ioprio_class; + struct rb_root *child_active = &child_st->active; + + child_entity = bfq_entity_of(rb_first(child_active)); + + if (!child_entity) + child_entity = child_sd->in_service_entity; + } + + bfqq = bfq_entity_to_bfqq(child_entity); bfq_bfqq_move(bfqd, bfqq, bfqd->root_group); } /** - * bfq_reparent_active_entities - move to the root group all active - * entities. + * bfq_reparent_active_queues - move to the root group all active queues. * @bfqd: the device data structure with the root group. * @bfqg: the group to move from. - * @st: the service tree with the entities. + * @st: the service tree to start the search from. */ -static void bfq_reparent_active_entities(struct bfq_data *bfqd, - struct bfq_group *bfqg, - struct bfq_service_tree *st) +static void bfq_reparent_active_queues(struct bfq_data *bfqd, + struct bfq_group *bfqg, + struct bfq_service_tree *st, + int ioprio_class) { struct rb_root *active = &st->active; - struct bfq_entity *entity = NULL; + struct bfq_entity *entity; - if (!RB_EMPTY_ROOT(&st->active)) - entity = bfq_entity_of(rb_first(active)); - - for (; entity ; entity = bfq_entity_of(rb_first(active))) - bfq_reparent_leaf_entity(bfqd, entity); + while ((entity = bfq_entity_of(rb_first(active)))) + bfq_reparent_leaf_entity(bfqd, entity, ioprio_class); if (bfqg->sched_data.in_service_entity) bfq_reparent_leaf_entity(bfqd, - bfqg->sched_data.in_service_entity); + bfqg->sched_data.in_service_entity, + ioprio_class); } /** @@ -882,13 +893,6 @@ static void bfq_pd_offline(struct blkg_policy_data *pd) for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) { st = bfqg->sched_data.service_tree + i; - /* - * The idle tree may still contain bfq_queues belonging - * to exited task because they never migrated to a different - * cgroup from the one being destroyed now. - */ - bfq_flush_idle_tree(st); - /* * It may happen that some queues are still active * (busy) upon group destruction (if the corresponding @@ -901,7 +905,20 @@ static void bfq_pd_offline(struct blkg_policy_data *pd) * There is no need to put the sync queues, as the * scheduler has taken no reference. */ - bfq_reparent_active_entities(bfqd, bfqg, st); + bfq_reparent_active_queues(bfqd, bfqg, st, i); + + /* + * The idle tree may still contain bfq_queues + * belonging to exited task because they never + * migrated to a different cgroup from the one being + * destroyed now. In addition, even + * bfq_reparent_active_queues() may happen to add some + * entities to the idle tree. It happens if, in some + * of the calls to bfq_bfqq_move() performed by + * bfq_reparent_active_queues(), the queue to move is + * empty and gets expired. + */ + bfq_flush_idle_tree(st); } __bfq_deactivate_entity(entity, false); diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 8c436abfaf14..78ba57efd16b 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -2716,8 +2716,6 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq) } } - -static void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq) { /* @@ -6215,20 +6213,28 @@ static struct bfq_queue *bfq_init_rq(struct request *rq) return bfqq; } -static void bfq_idle_slice_timer_body(struct bfq_queue *bfqq) +static void +bfq_idle_slice_timer_body(struct bfq_data *bfqd, struct bfq_queue *bfqq) { - struct bfq_data *bfqd = bfqq->bfqd; enum bfqq_expiration reason; unsigned long flags; spin_lock_irqsave(&bfqd->lock, flags); - bfq_clear_bfqq_wait_request(bfqq); + /* + * Considering that bfqq may be in race, we should firstly check + * whether bfqq is in service before doing something on it. If + * the bfqq in race is not in service, it has already been expired + * through __bfq_bfqq_expire func and its wait_request flags has + * been cleared in __bfq_bfqd_reset_in_service func. + */ if (bfqq != bfqd->in_service_queue) { spin_unlock_irqrestore(&bfqd->lock, flags); return; } + bfq_clear_bfqq_wait_request(bfqq); + if (bfq_bfqq_budget_timeout(bfqq)) /* * Also here the queue can be safely expired @@ -6273,7 +6279,7 @@ static enum hrtimer_restart bfq_idle_slice_timer(struct hrtimer *timer) * early. */ if (bfqq) - bfq_idle_slice_timer_body(bfqq); + bfq_idle_slice_timer_body(bfqd, bfqq); return HRTIMER_NORESTART; } diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index d1233af9c684..cd224aaf9f52 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -955,6 +955,7 @@ void bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq, bool compensate, enum bfqq_expiration reason); void bfq_put_queue(struct bfq_queue *bfqq); void bfq_end_wr_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg); +void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq); void bfq_schedule_dispatch(struct bfq_data *bfqd); void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg); diff --git a/block/bio.c b/block/bio.c index 94d697217887..21cbaa6a1c20 100644 --- a/block/bio.c +++ b/block/bio.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include "blk.h" @@ -587,6 +588,49 @@ void bio_truncate(struct bio *bio, unsigned new_size) bio->bi_iter.bi_size = new_size; } +/** + * guard_bio_eod - truncate a BIO to fit the block device + * @bio: bio to truncate + * + * This allows us to do IO even on the odd last sectors of a device, even if the + * block size is some multiple of the physical sector size. + * + * We'll just truncate the bio to the size of the device, and clear the end of + * the buffer head manually. Truly out-of-range accesses will turn into actual + * I/O errors, this only handles the "we need to be able to do I/O at the final + * sector" case. + */ +void guard_bio_eod(struct bio *bio) +{ + sector_t maxsector; + struct hd_struct *part; + + rcu_read_lock(); + part = __disk_get_part(bio->bi_disk, bio->bi_partno); + if (part) + maxsector = part_nr_sects_read(part); + else + maxsector = get_capacity(bio->bi_disk); + rcu_read_unlock(); + + if (!maxsector) + return; + + /* + * If the *whole* IO is past the end of the device, + * let it through, and the IO layer will turn it into + * an EIO. + */ + if (unlikely(bio->bi_iter.bi_sector >= maxsector)) + return; + + maxsector -= bio->bi_iter.bi_sector; + if (likely((bio->bi_iter.bi_size >> 9) <= maxsector)) + return; + + bio_truncate(bio, maxsector << 9); +} + /** * bio_put - release a reference to a bio * @bio: bio to release reference to @@ -679,6 +723,12 @@ struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs) } EXPORT_SYMBOL(bio_clone_fast); +const char *bio_devname(struct bio *bio, char *buf) +{ + return disk_name(bio->bi_disk, bio->bi_partno, buf); +} +EXPORT_SYMBOL(bio_devname); + static inline bool page_is_mergeable(const struct bio_vec *bv, struct page *page, unsigned int len, unsigned int off, bool *same_page) @@ -730,7 +780,7 @@ static bool bio_try_merge_pc_page(struct request_queue *q, struct bio *bio, * * This should only be used by passthrough bios. */ -static int __bio_add_pc_page(struct request_queue *q, struct bio *bio, +int __bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page *page, unsigned int len, unsigned int offset, bool *same_page) { @@ -1019,12 +1069,21 @@ static void submit_bio_wait_endio(struct bio *bio) int submit_bio_wait(struct bio *bio) { DECLARE_COMPLETION_ONSTACK_MAP(done, bio->bi_disk->lockdep_map); + unsigned long hang_check; bio->bi_private = &done; bio->bi_end_io = submit_bio_wait_endio; bio->bi_opf |= REQ_SYNC; submit_bio(bio); - wait_for_completion_io(&done); + + /* Prevent hang_check timer from firing at us during very long I/O */ + hang_check = sysctl_hung_task_timeout_secs; + if (hang_check) + while (!wait_for_completion_io_timeout(&done, + hang_check * (HZ/2))) + ; + else + wait_for_completion_io(&done); return blk_status_to_errno(bio->bi_status); } @@ -1135,90 +1194,6 @@ void bio_list_copy_data(struct bio *dst, struct bio *src) } EXPORT_SYMBOL(bio_list_copy_data); -struct bio_map_data { - int is_our_pages; - struct iov_iter iter; - struct iovec iov[]; -}; - -static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data, - gfp_t gfp_mask) -{ - struct bio_map_data *bmd; - if (data->nr_segs > UIO_MAXIOV) - return NULL; - - bmd = kmalloc(struct_size(bmd, iov, data->nr_segs), gfp_mask); - if (!bmd) - return NULL; - memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs); - bmd->iter = *data; - bmd->iter.iov = bmd->iov; - return bmd; -} - -/** - * bio_copy_from_iter - copy all pages from iov_iter to bio - * @bio: The &struct bio which describes the I/O as destination - * @iter: iov_iter as source - * - * Copy all pages from iov_iter to bio. - * Returns 0 on success, or error on failure. - */ -static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter) -{ - struct bio_vec *bvec; - struct bvec_iter_all iter_all; - - bio_for_each_segment_all(bvec, bio, iter_all) { - ssize_t ret; - - ret = copy_page_from_iter(bvec->bv_page, - bvec->bv_offset, - bvec->bv_len, - iter); - - if (!iov_iter_count(iter)) - break; - - if (ret < bvec->bv_len) - return -EFAULT; - } - - return 0; -} - -/** - * bio_copy_to_iter - copy all pages from bio to iov_iter - * @bio: The &struct bio which describes the I/O as source - * @iter: iov_iter as destination - * - * Copy all pages from bio to iov_iter. - * Returns 0 on success, or error on failure. - */ -static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter) -{ - struct bio_vec *bvec; - struct bvec_iter_all iter_all; - - bio_for_each_segment_all(bvec, bio, iter_all) { - ssize_t ret; - - ret = copy_page_to_iter(bvec->bv_page, - bvec->bv_offset, - bvec->bv_len, - &iter); - - if (!iov_iter_count(&iter)) - break; - - if (ret < bvec->bv_len) - return -EFAULT; - } - - return 0; -} - void bio_free_pages(struct bio *bio) { struct bio_vec *bvec; @@ -1229,430 +1204,6 @@ void bio_free_pages(struct bio *bio) } EXPORT_SYMBOL(bio_free_pages); -/** - * bio_uncopy_user - finish previously mapped bio - * @bio: bio being terminated - * - * Free pages allocated from bio_copy_user_iov() and write back data - * to user space in case of a read. - */ -int bio_uncopy_user(struct bio *bio) -{ - struct bio_map_data *bmd = bio->bi_private; - int ret = 0; - - if (!bio_flagged(bio, BIO_NULL_MAPPED)) { - /* - * if we're in a workqueue, the request is orphaned, so - * don't copy into a random user address space, just free - * and return -EINTR so user space doesn't expect any data. - */ - if (!current->mm) - ret = -EINTR; - else if (bio_data_dir(bio) == READ) - ret = bio_copy_to_iter(bio, bmd->iter); - if (bmd->is_our_pages) - bio_free_pages(bio); - } - kfree(bmd); - bio_put(bio); - return ret; -} - -/** - * bio_copy_user_iov - copy user data to bio - * @q: destination block queue - * @map_data: pointer to the rq_map_data holding pages (if necessary) - * @iter: iovec iterator - * @gfp_mask: memory allocation flags - * - * Prepares and returns a bio for indirect user io, bouncing data - * to/from kernel pages as necessary. Must be paired with - * call bio_uncopy_user() on io completion. - */ -struct bio *bio_copy_user_iov(struct request_queue *q, - struct rq_map_data *map_data, - struct iov_iter *iter, - gfp_t gfp_mask) -{ - struct bio_map_data *bmd; - struct page *page; - struct bio *bio; - int i = 0, ret; - int nr_pages; - unsigned int len = iter->count; - unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0; - - bmd = bio_alloc_map_data(iter, gfp_mask); - if (!bmd) - return ERR_PTR(-ENOMEM); - - /* - * We need to do a deep copy of the iov_iter including the iovecs. - * The caller provided iov might point to an on-stack or otherwise - * shortlived one. - */ - bmd->is_our_pages = map_data ? 0 : 1; - - nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE); - if (nr_pages > BIO_MAX_PAGES) - nr_pages = BIO_MAX_PAGES; - - ret = -ENOMEM; - bio = bio_kmalloc(gfp_mask, nr_pages); - if (!bio) - goto out_bmd; - - ret = 0; - - if (map_data) { - nr_pages = 1 << map_data->page_order; - i = map_data->offset / PAGE_SIZE; - } - while (len) { - unsigned int bytes = PAGE_SIZE; - - bytes -= offset; - - if (bytes > len) - bytes = len; - - if (map_data) { - if (i == map_data->nr_entries * nr_pages) { - ret = -ENOMEM; - break; - } - - page = map_data->pages[i / nr_pages]; - page += (i % nr_pages); - - i++; - } else { - page = alloc_page(q->bounce_gfp | gfp_mask); - if (!page) { - ret = -ENOMEM; - break; - } - } - - if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) { - if (!map_data) - __free_page(page); - break; - } - - len -= bytes; - offset = 0; - } - - if (ret) - goto cleanup; - - if (map_data) - map_data->offset += bio->bi_iter.bi_size; - - /* - * success - */ - if ((iov_iter_rw(iter) == WRITE && (!map_data || !map_data->null_mapped)) || - (map_data && map_data->from_user)) { - ret = bio_copy_from_iter(bio, iter); - if (ret) - goto cleanup; - } else { - if (bmd->is_our_pages) - zero_fill_bio(bio); - iov_iter_advance(iter, bio->bi_iter.bi_size); - } - - bio->bi_private = bmd; - if (map_data && map_data->null_mapped) - bio_set_flag(bio, BIO_NULL_MAPPED); - return bio; -cleanup: - if (!map_data) - bio_free_pages(bio); - bio_put(bio); -out_bmd: - kfree(bmd); - return ERR_PTR(ret); -} - -/** - * bio_map_user_iov - map user iovec into bio - * @q: the struct request_queue for the bio - * @iter: iovec iterator - * @gfp_mask: memory allocation flags - * - * Map the user space address into a bio suitable for io to a block - * device. Returns an error pointer in case of error. - */ -struct bio *bio_map_user_iov(struct request_queue *q, - struct iov_iter *iter, - gfp_t gfp_mask) -{ - int j; - struct bio *bio; - int ret; - - if (!iov_iter_count(iter)) - return ERR_PTR(-EINVAL); - - bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES)); - if (!bio) - return ERR_PTR(-ENOMEM); - - while (iov_iter_count(iter)) { - struct page **pages; - ssize_t bytes; - size_t offs, added = 0; - int npages; - - bytes = iov_iter_get_pages_alloc(iter, &pages, LONG_MAX, &offs); - if (unlikely(bytes <= 0)) { - ret = bytes ? bytes : -EFAULT; - goto out_unmap; - } - - npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE); - - if (unlikely(offs & queue_dma_alignment(q))) { - ret = -EINVAL; - j = 0; - } else { - for (j = 0; j < npages; j++) { - struct page *page = pages[j]; - unsigned int n = PAGE_SIZE - offs; - bool same_page = false; - - if (n > bytes) - n = bytes; - - if (!__bio_add_pc_page(q, bio, page, n, offs, - &same_page)) { - if (same_page) - put_page(page); - break; - } - - added += n; - bytes -= n; - offs = 0; - } - iov_iter_advance(iter, added); - } - /* - * release the pages we didn't map into the bio, if any - */ - while (j < npages) - put_page(pages[j++]); - kvfree(pages); - /* couldn't stuff something into bio? */ - if (bytes) - break; - } - - bio_set_flag(bio, BIO_USER_MAPPED); - - /* - * subtle -- if bio_map_user_iov() ended up bouncing a bio, - * it would normally disappear when its bi_end_io is run. - * however, we need it for the unmap, so grab an extra - * reference to it - */ - bio_get(bio); - return bio; - - out_unmap: - bio_release_pages(bio, false); - bio_put(bio); - return ERR_PTR(ret); -} - -/** - * bio_unmap_user - unmap a bio - * @bio: the bio being unmapped - * - * Unmap a bio previously mapped by bio_map_user_iov(). Must be called from - * process context. - * - * bio_unmap_user() may sleep. - */ -void bio_unmap_user(struct bio *bio) -{ - bio_release_pages(bio, bio_data_dir(bio) == READ); - bio_put(bio); - bio_put(bio); -} - -static void bio_invalidate_vmalloc_pages(struct bio *bio) -{ -#ifdef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE - if (bio->bi_private && !op_is_write(bio_op(bio))) { - unsigned long i, len = 0; - - for (i = 0; i < bio->bi_vcnt; i++) - len += bio->bi_io_vec[i].bv_len; - invalidate_kernel_vmap_range(bio->bi_private, len); - } -#endif -} - -static void bio_map_kern_endio(struct bio *bio) -{ - bio_invalidate_vmalloc_pages(bio); - bio_put(bio); -} - -/** - * bio_map_kern - map kernel address into bio - * @q: the struct request_queue for the bio - * @data: pointer to buffer to map - * @len: length in bytes - * @gfp_mask: allocation flags for bio allocation - * - * Map the kernel address into a bio suitable for io to a block - * device. Returns an error pointer in case of error. - */ -struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len, - gfp_t gfp_mask) -{ - unsigned long kaddr = (unsigned long)data; - unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; - unsigned long start = kaddr >> PAGE_SHIFT; - const int nr_pages = end - start; - bool is_vmalloc = is_vmalloc_addr(data); - struct page *page; - int offset, i; - struct bio *bio; - - bio = bio_kmalloc(gfp_mask, nr_pages); - if (!bio) - return ERR_PTR(-ENOMEM); - - if (is_vmalloc) { - flush_kernel_vmap_range(data, len); - bio->bi_private = data; - } - - offset = offset_in_page(kaddr); - for (i = 0; i < nr_pages; i++) { - unsigned int bytes = PAGE_SIZE - offset; - - if (len <= 0) - break; - - if (bytes > len) - bytes = len; - - if (!is_vmalloc) - page = virt_to_page(data); - else - page = vmalloc_to_page(data); - if (bio_add_pc_page(q, bio, page, bytes, - offset) < bytes) { - /* we don't support partial mappings */ - bio_put(bio); - return ERR_PTR(-EINVAL); - } - - data += bytes; - len -= bytes; - offset = 0; - } - - bio->bi_end_io = bio_map_kern_endio; - return bio; -} - -static void bio_copy_kern_endio(struct bio *bio) -{ - bio_free_pages(bio); - bio_put(bio); -} - -static void bio_copy_kern_endio_read(struct bio *bio) -{ - char *p = bio->bi_private; - struct bio_vec *bvec; - struct bvec_iter_all iter_all; - - bio_for_each_segment_all(bvec, bio, iter_all) { - memcpy(p, page_address(bvec->bv_page), bvec->bv_len); - p += bvec->bv_len; - } - - bio_copy_kern_endio(bio); -} - -/** - * bio_copy_kern - copy kernel address into bio - * @q: the struct request_queue for the bio - * @data: pointer to buffer to copy - * @len: length in bytes - * @gfp_mask: allocation flags for bio and page allocation - * @reading: data direction is READ - * - * copy the kernel address into a bio suitable for io to a block - * device. Returns an error pointer in case of error. - */ -struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, - gfp_t gfp_mask, int reading) -{ - unsigned long kaddr = (unsigned long)data; - unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; - unsigned long start = kaddr >> PAGE_SHIFT; - struct bio *bio; - void *p = data; - int nr_pages = 0; - - /* - * Overflow, abort - */ - if (end < start) - return ERR_PTR(-EINVAL); - - nr_pages = end - start; - bio = bio_kmalloc(gfp_mask, nr_pages); - if (!bio) - return ERR_PTR(-ENOMEM); - - while (len) { - struct page *page; - unsigned int bytes = PAGE_SIZE; - - if (bytes > len) - bytes = len; - - page = alloc_page(q->bounce_gfp | gfp_mask); - if (!page) - goto cleanup; - - if (!reading) - memcpy(page_address(page), p, bytes); - - if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) - break; - - len -= bytes; - p += bytes; - } - - if (reading) { - bio->bi_end_io = bio_copy_kern_endio_read; - bio->bi_private = data; - } else { - bio->bi_end_io = bio_copy_kern_endio; - } - - return bio; - -cleanup: - bio_free_pages(bio); - bio_put(bio); - return ERR_PTR(-ENOMEM); -} - /* * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions * for performing direct-IO in BIOs. @@ -1752,14 +1303,14 @@ defer: schedule_work(&bio_dirty_work); } -void update_io_ticks(struct hd_struct *part, unsigned long now) +void update_io_ticks(struct hd_struct *part, unsigned long now, bool end) { unsigned long stamp; again: stamp = READ_ONCE(part->stamp); if (unlikely(stamp != now)) { if (likely(cmpxchg(&part->stamp, stamp, now) == stamp)) { - __part_stat_add(part, io_ticks, 1); + __part_stat_add(part, io_ticks, end ? now - stamp : 1); } } if (part->partno) { @@ -1775,7 +1326,7 @@ void generic_start_io_acct(struct request_queue *q, int op, part_stat_lock(); - update_io_ticks(part, jiffies); + update_io_ticks(part, jiffies, false); part_stat_inc(part, ios[sgrp]); part_stat_add(part, sectors[sgrp], sectors); part_inc_in_flight(q, part, op_is_write(op)); @@ -1793,9 +1344,8 @@ void generic_end_io_acct(struct request_queue *q, int req_op, part_stat_lock(); - update_io_ticks(part, now); + update_io_ticks(part, now, true); part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration)); - part_stat_add(part, time_in_queue, duration); part_dec_in_flight(q, part, op_is_write(req_op)); part_stat_unlock(); diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index a229b94d5390..c15a26096038 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1010,7 +1010,7 @@ unlock: * blkcg_init_queue - initialize blkcg part of request queue * @q: request_queue to initialize * - * Called from blk_alloc_queue_node(). Responsible for initializing blkcg + * Called from __blk_alloc_queue(). Responsible for initializing blkcg * part of new request_queue @q. * * RETURNS: diff --git a/block/blk-core.c b/block/blk-core.c index 60dc9552ef8d..7e4a1da0715e 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -346,7 +346,6 @@ void blk_cleanup_queue(struct request_queue *q) blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q); blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q); - blk_queue_flag_set(QUEUE_FLAG_DYING, q); /* * Drain all requests queued before DYING marking. Set DEAD flag to @@ -389,12 +388,6 @@ void blk_cleanup_queue(struct request_queue *q) } EXPORT_SYMBOL(blk_cleanup_queue); -struct request_queue *blk_alloc_queue(gfp_t gfp_mask) -{ - return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE); -} -EXPORT_SYMBOL(blk_alloc_queue); - /** * blk_queue_enter() - try to increase q->q_usage_counter * @q: request queue pointer @@ -471,24 +464,19 @@ static void blk_timeout_work(struct work_struct *work) { } -/** - * blk_alloc_queue_node - allocate a request queue - * @gfp_mask: memory allocation flags - * @node_id: NUMA node to allocate memory from - */ -struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) +struct request_queue *__blk_alloc_queue(int node_id) { struct request_queue *q; int ret; q = kmem_cache_alloc_node(blk_requestq_cachep, - gfp_mask | __GFP_ZERO, node_id); + GFP_KERNEL | __GFP_ZERO, node_id); if (!q) return NULL; q->last_merge = NULL; - q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask); + q->id = ida_simple_get(&blk_queue_ida, 0, 0, GFP_KERNEL); if (q->id < 0) goto fail_q; @@ -496,7 +484,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) if (ret) goto fail_id; - q->backing_dev_info = bdi_alloc_node(gfp_mask, node_id); + q->backing_dev_info = bdi_alloc_node(GFP_KERNEL, node_id); if (!q->backing_dev_info) goto fail_split; @@ -542,6 +530,9 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) if (blkcg_init_queue(q)) goto fail_ref; + blk_queue_dma_alignment(q, 511); + blk_set_default_limits(&q->limits); + return q; fail_ref: @@ -558,7 +549,22 @@ fail_q: kmem_cache_free(blk_requestq_cachep, q); return NULL; } -EXPORT_SYMBOL(blk_alloc_queue_node); + +struct request_queue *blk_alloc_queue(make_request_fn make_request, int node_id) +{ + struct request_queue *q; + + if (WARN_ON_ONCE(!make_request)) + return NULL; + + q = __blk_alloc_queue(node_id); + if (!q) + return NULL; + q->make_request_fn = make_request; + q->nr_requests = BLKDEV_MAX_RQ; + return q; +} +EXPORT_SYMBOL(blk_alloc_queue); bool blk_get_queue(struct request_queue *q) { @@ -1121,10 +1127,9 @@ blk_qc_t direct_make_request(struct bio *bio) if (unlikely(blk_queue_enter(q, nowait ? BLK_MQ_REQ_NOWAIT : 0))) { if (nowait && !blk_queue_dying(q)) - bio->bi_status = BLK_STS_AGAIN; + bio_wouldblock_error(bio); else - bio->bi_status = BLK_STS_IOERR; - bio_endio(bio); + bio_io_error(bio); return BLK_QC_T_NONE; } @@ -1203,7 +1208,7 @@ EXPORT_SYMBOL(submit_bio); /** * blk_cloned_rq_check_limits - Helper function to check a cloned request - * for new the queue limits + * for the new queue limits * @q: the queue * @rq: the request being checked * @@ -1339,10 +1344,9 @@ void blk_account_io_done(struct request *req, u64 now) part_stat_lock(); part = req->part; - update_io_ticks(part, jiffies); + update_io_ticks(part, jiffies, true); part_stat_inc(part, ios[sgrp]); part_stat_add(part, nsecs[sgrp], now - req->start_time_ns); - part_stat_add(part, time_in_queue, nsecs_to_jiffies64(now - req->start_time_ns)); part_dec_in_flight(req->q, part, rq_data_dir(req)); hd_struct_put(part); @@ -1381,7 +1385,7 @@ void blk_account_io_start(struct request *rq, bool new_io) rq->part = part; } - update_io_ticks(part, jiffies); + update_io_ticks(part, jiffies, false); part_stat_unlock(); } @@ -1583,23 +1587,6 @@ void blk_rq_unprep_clone(struct request *rq) } EXPORT_SYMBOL_GPL(blk_rq_unprep_clone); -/* - * Copy attributes of the original request to the clone request. - * The actual data parts (e.g. ->cmd, ->sense) are not copied. - */ -static void __blk_rq_prep_clone(struct request *dst, struct request *src) -{ - dst->__sector = blk_rq_pos(src); - dst->__data_len = blk_rq_bytes(src); - if (src->rq_flags & RQF_SPECIAL_PAYLOAD) { - dst->rq_flags |= RQF_SPECIAL_PAYLOAD; - dst->special_vec = src->special_vec; - } - dst->nr_phys_segments = src->nr_phys_segments; - dst->ioprio = src->ioprio; - dst->extra_len = src->extra_len; -} - /** * blk_rq_prep_clone - Helper function to setup clone request * @rq: the request to be setup @@ -1612,8 +1599,6 @@ static void __blk_rq_prep_clone(struct request *dst, struct request *src) * * Description: * Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq. - * The actual data parts of @rq_src (e.g. ->cmd, ->sense) - * are not copied, and copying such parts is the caller's responsibility. * Also, pages which the original bios are pointing to are not copied * and the cloned bios just point same pages. * So cloned bios must be completed before original bios, which means @@ -1644,7 +1629,16 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src, rq->bio = rq->biotail = bio; } - __blk_rq_prep_clone(rq, rq_src); + /* Copy attributes of the original request to the clone request. */ + rq->__sector = blk_rq_pos(rq_src); + rq->__data_len = blk_rq_bytes(rq_src); + if (rq_src->rq_flags & RQF_SPECIAL_PAYLOAD) { + rq->rq_flags |= RQF_SPECIAL_PAYLOAD; + rq->special_vec = rq_src->special_vec; + } + rq->nr_phys_segments = rq_src->nr_phys_segments; + rq->ioprio = rq_src->ioprio; + rq->extra_len = rq_src->extra_len; return 0; diff --git a/block/blk-flush.c b/block/blk-flush.c index 5cc775bdb06a..c7f396e3d5e2 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -160,9 +160,6 @@ static void blk_account_io_flush(struct request *rq) * * CONTEXT: * spin_lock_irq(fq->mq_flush_lock) - * - * RETURNS: - * %true if requests were added to the dispatch queue, %false otherwise. */ static void blk_flush_complete_seq(struct request *rq, struct blk_flush_queue *fq, @@ -457,15 +454,6 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, if (!q) return -ENXIO; - /* - * some block devices may not have their queue correctly set up here - * (e.g. loop device without a backing file) and so issuing a flush - * here will panic. Ensure there is a request function before issuing - * the flush. - */ - if (!q->make_request_fn) - return -ENXIO; - bio = bio_alloc(gfp_mask, 0); bio_set_dev(bio, bdev); bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; @@ -485,8 +473,8 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, } EXPORT_SYMBOL(blkdev_issue_flush); -struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q, - int node, int cmd_size, gfp_t flags) +struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size, + gfp_t flags) { struct blk_flush_queue *fq; int rq_sz = sizeof(struct request); diff --git a/block/blk-ioc.c b/block/blk-ioc.c index 5ed59ac6ae58..9df50fb507ca 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -84,6 +84,7 @@ static void ioc_destroy_icq(struct io_cq *icq) * making it impossible to determine icq_cache. Record it in @icq. */ icq->__rcu_icq_cache = et->icq_cache; + icq->flags |= ICQ_DESTROYED; call_rcu(&icq->__rcu_head, icq_free_icq_rcu); } @@ -212,15 +213,21 @@ static void __ioc_clear_queue(struct list_head *icq_list) { unsigned long flags; + rcu_read_lock(); while (!list_empty(icq_list)) { struct io_cq *icq = list_entry(icq_list->next, struct io_cq, q_node); struct io_context *ioc = icq->ioc; spin_lock_irqsave(&ioc->lock, flags); + if (icq->flags & ICQ_DESTROYED) { + spin_unlock_irqrestore(&ioc->lock, flags); + continue; + } ioc_destroy_icq(icq); spin_unlock_irqrestore(&ioc->lock, flags); } + rcu_read_unlock(); } /** diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 9a599cc28c29..db35ee682294 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -46,9 +46,6 @@ * If needed, tools/cgroup/iocost_coef_gen.py can be used to generate * device-specific coefficients. * - * If needed, tools/cgroup/iocost_coef_gen.py can be used to generate - * device-specific coefficients. - * * 2. Control Strategy * * The device virtual time (vtime) is used as the primary control metric. diff --git a/block/blk-map.c b/block/blk-map.c index b0790268ed9d..b72c361911a4 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -11,6 +11,514 @@ #include "blk.h" +struct bio_map_data { + int is_our_pages; + struct iov_iter iter; + struct iovec iov[]; +}; + +static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data, + gfp_t gfp_mask) +{ + struct bio_map_data *bmd; + + if (data->nr_segs > UIO_MAXIOV) + return NULL; + + bmd = kmalloc(struct_size(bmd, iov, data->nr_segs), gfp_mask); + if (!bmd) + return NULL; + memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs); + bmd->iter = *data; + bmd->iter.iov = bmd->iov; + return bmd; +} + +/** + * bio_copy_from_iter - copy all pages from iov_iter to bio + * @bio: The &struct bio which describes the I/O as destination + * @iter: iov_iter as source + * + * Copy all pages from iov_iter to bio. + * Returns 0 on success, or error on failure. + */ +static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter) +{ + struct bio_vec *bvec; + struct bvec_iter_all iter_all; + + bio_for_each_segment_all(bvec, bio, iter_all) { + ssize_t ret; + + ret = copy_page_from_iter(bvec->bv_page, + bvec->bv_offset, + bvec->bv_len, + iter); + + if (!iov_iter_count(iter)) + break; + + if (ret < bvec->bv_len) + return -EFAULT; + } + + return 0; +} + +/** + * bio_copy_to_iter - copy all pages from bio to iov_iter + * @bio: The &struct bio which describes the I/O as source + * @iter: iov_iter as destination + * + * Copy all pages from bio to iov_iter. + * Returns 0 on success, or error on failure. + */ +static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter) +{ + struct bio_vec *bvec; + struct bvec_iter_all iter_all; + + bio_for_each_segment_all(bvec, bio, iter_all) { + ssize_t ret; + + ret = copy_page_to_iter(bvec->bv_page, + bvec->bv_offset, + bvec->bv_len, + &iter); + + if (!iov_iter_count(&iter)) + break; + + if (ret < bvec->bv_len) + return -EFAULT; + } + + return 0; +} + +/** + * bio_uncopy_user - finish previously mapped bio + * @bio: bio being terminated + * + * Free pages allocated from bio_copy_user_iov() and write back data + * to user space in case of a read. + */ +static int bio_uncopy_user(struct bio *bio) +{ + struct bio_map_data *bmd = bio->bi_private; + int ret = 0; + + if (!bio_flagged(bio, BIO_NULL_MAPPED)) { + /* + * if we're in a workqueue, the request is orphaned, so + * don't copy into a random user address space, just free + * and return -EINTR so user space doesn't expect any data. + */ + if (!current->mm) + ret = -EINTR; + else if (bio_data_dir(bio) == READ) + ret = bio_copy_to_iter(bio, bmd->iter); + if (bmd->is_our_pages) + bio_free_pages(bio); + } + kfree(bmd); + bio_put(bio); + return ret; +} + +/** + * bio_copy_user_iov - copy user data to bio + * @q: destination block queue + * @map_data: pointer to the rq_map_data holding pages (if necessary) + * @iter: iovec iterator + * @gfp_mask: memory allocation flags + * + * Prepares and returns a bio for indirect user io, bouncing data + * to/from kernel pages as necessary. Must be paired with + * call bio_uncopy_user() on io completion. + */ +static struct bio *bio_copy_user_iov(struct request_queue *q, + struct rq_map_data *map_data, struct iov_iter *iter, + gfp_t gfp_mask) +{ + struct bio_map_data *bmd; + struct page *page; + struct bio *bio; + int i = 0, ret; + int nr_pages; + unsigned int len = iter->count; + unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0; + + bmd = bio_alloc_map_data(iter, gfp_mask); + if (!bmd) + return ERR_PTR(-ENOMEM); + + /* + * We need to do a deep copy of the iov_iter including the iovecs. + * The caller provided iov might point to an on-stack or otherwise + * shortlived one. + */ + bmd->is_our_pages = map_data ? 0 : 1; + + nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE); + if (nr_pages > BIO_MAX_PAGES) + nr_pages = BIO_MAX_PAGES; + + ret = -ENOMEM; + bio = bio_kmalloc(gfp_mask, nr_pages); + if (!bio) + goto out_bmd; + + ret = 0; + + if (map_data) { + nr_pages = 1 << map_data->page_order; + i = map_data->offset / PAGE_SIZE; + } + while (len) { + unsigned int bytes = PAGE_SIZE; + + bytes -= offset; + + if (bytes > len) + bytes = len; + + if (map_data) { + if (i == map_data->nr_entries * nr_pages) { + ret = -ENOMEM; + break; + } + + page = map_data->pages[i / nr_pages]; + page += (i % nr_pages); + + i++; + } else { + page = alloc_page(q->bounce_gfp | gfp_mask); + if (!page) { + ret = -ENOMEM; + break; + } + } + + if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) { + if (!map_data) + __free_page(page); + break; + } + + len -= bytes; + offset = 0; + } + + if (ret) + goto cleanup; + + if (map_data) + map_data->offset += bio->bi_iter.bi_size; + + /* + * success + */ + if ((iov_iter_rw(iter) == WRITE && + (!map_data || !map_data->null_mapped)) || + (map_data && map_data->from_user)) { + ret = bio_copy_from_iter(bio, iter); + if (ret) + goto cleanup; + } else { + if (bmd->is_our_pages) + zero_fill_bio(bio); + iov_iter_advance(iter, bio->bi_iter.bi_size); + } + + bio->bi_private = bmd; + if (map_data && map_data->null_mapped) + bio_set_flag(bio, BIO_NULL_MAPPED); + return bio; +cleanup: + if (!map_data) + bio_free_pages(bio); + bio_put(bio); +out_bmd: + kfree(bmd); + return ERR_PTR(ret); +} + +/** + * bio_map_user_iov - map user iovec into bio + * @q: the struct request_queue for the bio + * @iter: iovec iterator + * @gfp_mask: memory allocation flags + * + * Map the user space address into a bio suitable for io to a block + * device. Returns an error pointer in case of error. + */ +static struct bio *bio_map_user_iov(struct request_queue *q, + struct iov_iter *iter, gfp_t gfp_mask) +{ + int j; + struct bio *bio; + int ret; + + if (!iov_iter_count(iter)) + return ERR_PTR(-EINVAL); + + bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES)); + if (!bio) + return ERR_PTR(-ENOMEM); + + while (iov_iter_count(iter)) { + struct page **pages; + ssize_t bytes; + size_t offs, added = 0; + int npages; + + bytes = iov_iter_get_pages_alloc(iter, &pages, LONG_MAX, &offs); + if (unlikely(bytes <= 0)) { + ret = bytes ? bytes : -EFAULT; + goto out_unmap; + } + + npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE); + + if (unlikely(offs & queue_dma_alignment(q))) { + ret = -EINVAL; + j = 0; + } else { + for (j = 0; j < npages; j++) { + struct page *page = pages[j]; + unsigned int n = PAGE_SIZE - offs; + bool same_page = false; + + if (n > bytes) + n = bytes; + + if (!__bio_add_pc_page(q, bio, page, n, offs, + &same_page)) { + if (same_page) + put_page(page); + break; + } + + added += n; + bytes -= n; + offs = 0; + } + iov_iter_advance(iter, added); + } + /* + * release the pages we didn't map into the bio, if any + */ + while (j < npages) + put_page(pages[j++]); + kvfree(pages); + /* couldn't stuff something into bio? */ + if (bytes) + break; + } + + bio_set_flag(bio, BIO_USER_MAPPED); + + /* + * subtle -- if bio_map_user_iov() ended up bouncing a bio, + * it would normally disappear when its bi_end_io is run. + * however, we need it for the unmap, so grab an extra + * reference to it + */ + bio_get(bio); + return bio; + + out_unmap: + bio_release_pages(bio, false); + bio_put(bio); + return ERR_PTR(ret); +} + +/** + * bio_unmap_user - unmap a bio + * @bio: the bio being unmapped + * + * Unmap a bio previously mapped by bio_map_user_iov(). Must be called from + * process context. + * + * bio_unmap_user() may sleep. + */ +static void bio_unmap_user(struct bio *bio) +{ + bio_release_pages(bio, bio_data_dir(bio) == READ); + bio_put(bio); + bio_put(bio); +} + +static void bio_invalidate_vmalloc_pages(struct bio *bio) +{ +#ifdef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE + if (bio->bi_private && !op_is_write(bio_op(bio))) { + unsigned long i, len = 0; + + for (i = 0; i < bio->bi_vcnt; i++) + len += bio->bi_io_vec[i].bv_len; + invalidate_kernel_vmap_range(bio->bi_private, len); + } +#endif +} + +static void bio_map_kern_endio(struct bio *bio) +{ + bio_invalidate_vmalloc_pages(bio); + bio_put(bio); +} + +/** + * bio_map_kern - map kernel address into bio + * @q: the struct request_queue for the bio + * @data: pointer to buffer to map + * @len: length in bytes + * @gfp_mask: allocation flags for bio allocation + * + * Map the kernel address into a bio suitable for io to a block + * device. Returns an error pointer in case of error. + */ +static struct bio *bio_map_kern(struct request_queue *q, void *data, + unsigned int len, gfp_t gfp_mask) +{ + unsigned long kaddr = (unsigned long)data; + unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; + unsigned long start = kaddr >> PAGE_SHIFT; + const int nr_pages = end - start; + bool is_vmalloc = is_vmalloc_addr(data); + struct page *page; + int offset, i; + struct bio *bio; + + bio = bio_kmalloc(gfp_mask, nr_pages); + if (!bio) + return ERR_PTR(-ENOMEM); + + if (is_vmalloc) { + flush_kernel_vmap_range(data, len); + bio->bi_private = data; + } + + offset = offset_in_page(kaddr); + for (i = 0; i < nr_pages; i++) { + unsigned int bytes = PAGE_SIZE - offset; + + if (len <= 0) + break; + + if (bytes > len) + bytes = len; + + if (!is_vmalloc) + page = virt_to_page(data); + else + page = vmalloc_to_page(data); + if (bio_add_pc_page(q, bio, page, bytes, + offset) < bytes) { + /* we don't support partial mappings */ + bio_put(bio); + return ERR_PTR(-EINVAL); + } + + data += bytes; + len -= bytes; + offset = 0; + } + + bio->bi_end_io = bio_map_kern_endio; + return bio; +} + +static void bio_copy_kern_endio(struct bio *bio) +{ + bio_free_pages(bio); + bio_put(bio); +} + +static void bio_copy_kern_endio_read(struct bio *bio) +{ + char *p = bio->bi_private; + struct bio_vec *bvec; + struct bvec_iter_all iter_all; + + bio_for_each_segment_all(bvec, bio, iter_all) { + memcpy(p, page_address(bvec->bv_page), bvec->bv_len); + p += bvec->bv_len; + } + + bio_copy_kern_endio(bio); +} + +/** + * bio_copy_kern - copy kernel address into bio + * @q: the struct request_queue for the bio + * @data: pointer to buffer to copy + * @len: length in bytes + * @gfp_mask: allocation flags for bio and page allocation + * @reading: data direction is READ + * + * copy the kernel address into a bio suitable for io to a block + * device. Returns an error pointer in case of error. + */ +static struct bio *bio_copy_kern(struct request_queue *q, void *data, + unsigned int len, gfp_t gfp_mask, int reading) +{ + unsigned long kaddr = (unsigned long)data; + unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; + unsigned long start = kaddr >> PAGE_SHIFT; + struct bio *bio; + void *p = data; + int nr_pages = 0; + + /* + * Overflow, abort + */ + if (end < start) + return ERR_PTR(-EINVAL); + + nr_pages = end - start; + bio = bio_kmalloc(gfp_mask, nr_pages); + if (!bio) + return ERR_PTR(-ENOMEM); + + while (len) { + struct page *page; + unsigned int bytes = PAGE_SIZE; + + if (bytes > len) + bytes = len; + + page = alloc_page(q->bounce_gfp | gfp_mask); + if (!page) + goto cleanup; + + if (!reading) + memcpy(page_address(page), p, bytes); + + if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) + break; + + len -= bytes; + p += bytes; + } + + if (reading) { + bio->bi_end_io = bio_copy_kern_endio_read; + bio->bi_private = data; + } else { + bio->bi_end_io = bio_copy_kern_endio; + } + + return bio; + +cleanup: + bio_free_pages(bio); + bio_put(bio); + return ERR_PTR(-ENOMEM); +} + /* * Append a bio to a passthrough request. Only works if the bio can be merged * into the request based on the driver constraints. diff --git a/block/blk-mq.c b/block/blk-mq.c index d92088dec6c3..f6291ceedee4 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1178,6 +1178,23 @@ static void blk_mq_update_dispatch_busy(struct blk_mq_hw_ctx *hctx, bool busy) #define BLK_MQ_RESOURCE_DELAY 3 /* ms units */ +static void blk_mq_handle_dev_resource(struct request *rq, + struct list_head *list) +{ + struct request *next = + list_first_entry_or_null(list, struct request, queuelist); + + /* + * If an I/O scheduler has been configured and we got a driver tag for + * the next request already, free it. + */ + if (next) + blk_mq_put_driver_tag(next); + + list_add(&rq->queuelist, list); + __blk_mq_requeue_request(rq); +} + /* * Returns true if we did some work AND can potentially do more. */ @@ -1245,17 +1262,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, ret = q->mq_ops->queue_rq(hctx, &bd); if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) { - /* - * If an I/O scheduler has been configured and we got a - * driver tag for the next request already, free it - * again. - */ - if (!list_empty(list)) { - nxt = list_first_entry(list, struct request, queuelist); - blk_mq_put_driver_tag(nxt); - } - list_add(&rq->queuelist, list); - __blk_mq_requeue_request(rq); + blk_mq_handle_dev_resource(rq, list); break; } @@ -2409,8 +2416,7 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set, init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake); INIT_LIST_HEAD(&hctx->dispatch_wait.entry); - hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size, - gfp); + hctx->fq = blk_alloc_flush_queue(hctx->numa_node, set->cmd_size, gfp); if (!hctx->fq) goto free_bitmap; @@ -2718,13 +2724,15 @@ void blk_mq_release(struct request_queue *q) blk_mq_sysfs_deinit(q); } -struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) +struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set, + void *queuedata) { struct request_queue *uninit_q, *q; - uninit_q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node); + uninit_q = __blk_alloc_queue(set->numa_node); if (!uninit_q) return ERR_PTR(-ENOMEM); + uninit_q->queuedata = queuedata; /* * Initialize the queue without an elevator. device_add_disk() will do @@ -2736,6 +2744,12 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) return q; } +EXPORT_SYMBOL_GPL(blk_mq_init_queue_data); + +struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) +{ + return blk_mq_init_queue_data(set, NULL); +} EXPORT_SYMBOL(blk_mq_init_queue); /* @@ -2824,7 +2838,6 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, memcpy(new_hctxs, hctxs, q->nr_hw_queues * sizeof(*hctxs)); q->queue_hw_ctx = new_hctxs; - q->nr_hw_queues = set->nr_hw_queues; kfree(hctxs); hctxs = new_hctxs; } @@ -2926,11 +2939,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, INIT_LIST_HEAD(&q->requeue_list); spin_lock_init(&q->requeue_lock); - blk_queue_make_request(q, blk_mq_make_request); - - /* - * Do this after blk_queue_make_request() overrides it... - */ + q->make_request_fn = blk_mq_make_request; q->nr_requests = set->queue_depth; /* @@ -3023,6 +3032,14 @@ static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set) static int blk_mq_update_queue_map(struct blk_mq_tag_set *set) { + /* + * blk_mq_map_queues() and multiple .map_queues() implementations + * expect that set->map[HCTX_TYPE_DEFAULT].nr_queues is set to the + * number of hardware queues. + */ + if (set->nr_maps == 1) + set->map[HCTX_TYPE_DEFAULT].nr_queues = set->nr_hw_queues; + if (set->ops->map_queues && !is_kdump_kernel()) { int i; diff --git a/block/blk-settings.c b/block/blk-settings.c index c8eda2e7b91e..126d216a2db6 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -86,42 +86,6 @@ void blk_set_stacking_limits(struct queue_limits *lim) } EXPORT_SYMBOL(blk_set_stacking_limits); -/** - * blk_queue_make_request - define an alternate make_request function for a device - * @q: the request queue for the device to be affected - * @mfn: the alternate make_request function - * - * Description: - * The normal way for &struct bios to be passed to a device - * driver is for them to be collected into requests on a request - * queue, and then to allow the device driver to select requests - * off that queue when it is ready. This works well for many block - * devices. However some block devices (typically virtual devices - * such as md or lvm) do not benefit from the processing on the - * request queue, and are served best by having the requests passed - * directly to them. This can be achieved by providing a function - * to blk_queue_make_request(). - * - * Caveat: - * The driver that does this *must* be able to deal appropriately - * with buffers in "highmemory". This can be accomplished by either calling - * kmap_atomic() to get a temporary kernel mapping, or by calling - * blk_queue_bounce() to create a buffer in normal memory. - **/ -void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn) -{ - /* - * set defaults - */ - q->nr_requests = BLKDEV_MAX_RQ; - - q->make_request_fn = mfn; - blk_queue_dma_alignment(q, 511); - - blk_set_default_limits(&q->limits); -} -EXPORT_SYMBOL(blk_queue_make_request); - /** * blk_queue_bounce_limit - set bounce buffer limit for queue * @q: the request queue for the device diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 05741c6f618b..6b442ae96499 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -173,7 +173,7 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, if (!op_is_zone_mgmt(op)) return -EOPNOTSUPP; - if (!nr_sectors || end_sector > capacity) + if (end_sector <= sector || end_sector > capacity) /* Out of range */ return -EINVAL; diff --git a/block/blk.h b/block/blk.h index 0b8884353f6b..0a94ec68af32 100644 --- a/block/blk.h +++ b/block/blk.h @@ -4,6 +4,7 @@ #include #include +#include #include #include "blk-mq.h" #include "blk-mq-sched.h" @@ -55,8 +56,8 @@ is_flush_rq(struct request *req, struct blk_mq_hw_ctx *hctx) return hctx->fq->flush_rq == req; } -struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q, - int node, int cmd_size, gfp_t flags); +struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size, + gfp_t flags); void blk_free_flush_queue(struct blk_flush_queue *q); void blk_freeze_queue(struct request_queue *q); @@ -149,6 +150,9 @@ static inline bool integrity_req_gap_front_merge(struct request *req, return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1], bip_next->bip_vec[0].bv_offset); } + +void blk_integrity_add(struct gendisk *); +void blk_integrity_del(struct gendisk *); #else /* CONFIG_BLK_DEV_INTEGRITY */ static inline bool integrity_req_gap_back_merge(struct request *req, struct bio *next) @@ -171,6 +175,12 @@ static inline bool bio_integrity_endio(struct bio *bio) static inline void bio_integrity_free(struct bio *bio) { } +static inline void blk_integrity_add(struct gendisk *disk) +{ +} +static inline void blk_integrity_del(struct gendisk *disk) +{ +} #endif /* CONFIG_BLK_DEV_INTEGRITY */ unsigned long blk_rq_timeout(unsigned long timeout); @@ -214,6 +224,17 @@ static inline void elevator_exit(struct request_queue *q, struct hd_struct *__disk_get_part(struct gendisk *disk, int partno); +ssize_t part_size_show(struct device *dev, struct device_attribute *attr, + char *buf); +ssize_t part_stat_show(struct device *dev, struct device_attribute *attr, + char *buf); +ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr, + char *buf); +ssize_t part_fail_show(struct device *dev, struct device_attribute *attr, + char *buf); +ssize_t part_fail_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count); + #ifdef CONFIG_FAIL_IO_TIMEOUT int blk_should_fake_timeout(struct request_queue *); ssize_t part_timeout_show(struct device *, struct device_attribute *, char *); @@ -354,4 +375,117 @@ void blk_queue_free_zone_bitmaps(struct request_queue *q); static inline void blk_queue_free_zone_bitmaps(struct request_queue *q) {} #endif +void part_dec_in_flight(struct request_queue *q, struct hd_struct *part, + int rw); +void part_inc_in_flight(struct request_queue *q, struct hd_struct *part, + int rw); +void update_io_ticks(struct hd_struct *part, unsigned long now, bool end); +struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector); + +int blk_alloc_devt(struct hd_struct *part, dev_t *devt); +void blk_free_devt(dev_t devt); +void blk_invalidate_devt(dev_t devt); +char *disk_name(struct gendisk *hd, int partno, char *buf); +#define ADDPART_FLAG_NONE 0 +#define ADDPART_FLAG_RAID 1 +#define ADDPART_FLAG_WHOLEDISK 2 +struct hd_struct *__must_check add_partition(struct gendisk *disk, int partno, + sector_t start, sector_t len, int flags, + struct partition_meta_info *info); +void __delete_partition(struct percpu_ref *ref); +void delete_partition(struct gendisk *disk, int partno); +int disk_expand_part_tbl(struct gendisk *disk, int target); + +static inline int hd_ref_init(struct hd_struct *part) +{ + if (percpu_ref_init(&part->ref, __delete_partition, 0, + GFP_KERNEL)) + return -ENOMEM; + return 0; +} + +static inline void hd_struct_get(struct hd_struct *part) +{ + percpu_ref_get(&part->ref); +} + +static inline int hd_struct_try_get(struct hd_struct *part) +{ + return percpu_ref_tryget_live(&part->ref); +} + +static inline void hd_struct_put(struct hd_struct *part) +{ + percpu_ref_put(&part->ref); +} + +static inline void hd_struct_kill(struct hd_struct *part) +{ + percpu_ref_kill(&part->ref); +} + +static inline void hd_free_part(struct hd_struct *part) +{ + free_part_stats(part); + kfree(part->info); + percpu_ref_exit(&part->ref); +} + +/* + * Any access of part->nr_sects which is not protected by partition + * bd_mutex or gendisk bdev bd_mutex, should be done using this + * accessor function. + * + * Code written along the lines of i_size_read() and i_size_write(). + * CONFIG_PREEMPTION case optimizes the case of UP kernel with preemption + * on. + */ +static inline sector_t part_nr_sects_read(struct hd_struct *part) +{ +#if BITS_PER_LONG==32 && defined(CONFIG_SMP) + sector_t nr_sects; + unsigned seq; + do { + seq = read_seqcount_begin(&part->nr_sects_seq); + nr_sects = part->nr_sects; + } while (read_seqcount_retry(&part->nr_sects_seq, seq)); + return nr_sects; +#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) + sector_t nr_sects; + + preempt_disable(); + nr_sects = part->nr_sects; + preempt_enable(); + return nr_sects; +#else + return part->nr_sects; +#endif +} + +/* + * Should be called with mutex lock held (typically bd_mutex) of partition + * to provide mutual exlusion among writers otherwise seqcount might be + * left in wrong state leaving the readers spinning infinitely. + */ +static inline void part_nr_sects_write(struct hd_struct *part, sector_t size) +{ +#if BITS_PER_LONG==32 && defined(CONFIG_SMP) + write_seqcount_begin(&part->nr_sects_seq); + part->nr_sects = size; + write_seqcount_end(&part->nr_sects_seq); +#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) + preempt_disable(); + part->nr_sects = size; + preempt_enable(); +#else + part->nr_sects = size; +#endif +} + +struct request_queue *__blk_alloc_queue(int node_id); + +int __bio_add_pc_page(struct request_queue *q, struct bio *bio, + struct page *page, unsigned int len, unsigned int offset, + bool *same_page); + #endif /* BLK_INTERNAL_H */ diff --git a/block/genhd.c b/block/genhd.c index 9c2e13ce0d19..06b642b23a07 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -4,6 +4,7 @@ */ #include +#include #include #include #include @@ -26,7 +27,7 @@ #include "blk.h" static DEFINE_MUTEX(block_class_lock); -struct kobject *block_depr; +static struct kobject *block_depr; /* for extended dynamic devt allocation, currently only one major is used */ #define NR_EXT_DEVT (1 << MINORBITS) @@ -46,6 +47,78 @@ static void disk_add_events(struct gendisk *disk); static void disk_del_events(struct gendisk *disk); static void disk_release_events(struct gendisk *disk); +/* + * Set disk capacity and notify if the size is not currently + * zero and will not be set to zero + */ +void set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size, + bool revalidate) +{ + sector_t capacity = get_capacity(disk); + + set_capacity(disk, size); + + if (revalidate) + revalidate_disk(disk); + + if (capacity != size && capacity != 0 && size != 0) { + char *envp[] = { "RESIZE=1", NULL }; + + kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp); + } +} + +EXPORT_SYMBOL_GPL(set_capacity_revalidate_and_notify); + +/* + * Format the device name of the indicated disk into the supplied buffer and + * return a pointer to that same buffer for convenience. + */ +char *disk_name(struct gendisk *hd, int partno, char *buf) +{ + if (!partno) + snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name); + else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1])) + snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno); + else + snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno); + + return buf; +} + +const char *bdevname(struct block_device *bdev, char *buf) +{ + return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf); +} +EXPORT_SYMBOL(bdevname); + +#ifdef CONFIG_SMP +static void part_stat_read_all(struct hd_struct *part, struct disk_stats *stat) +{ + int cpu; + + memset(stat, 0, sizeof(struct disk_stats)); + for_each_possible_cpu(cpu) { + struct disk_stats *ptr = per_cpu_ptr(part->dkstats, cpu); + int group; + + for (group = 0; group < NR_STAT_GROUPS; group++) { + stat->nsecs[group] += ptr->nsecs[group]; + stat->sectors[group] += ptr->sectors[group]; + stat->ios[group] += ptr->ios[group]; + stat->merges[group] += ptr->merges[group]; + } + + stat->io_ticks += ptr->io_ticks; + } +} +#else /* CONFIG_SMP */ +static void part_stat_read_all(struct hd_struct *part, struct disk_stats *stat) +{ + memcpy(stat, &part->dkstats, sizeof(struct disk_stats)); +} +#endif /* CONFIG_SMP */ + void part_inc_in_flight(struct request_queue *q, struct hd_struct *part, int rw) { if (queue_is_mq(q)) @@ -66,7 +139,8 @@ void part_dec_in_flight(struct request_queue *q, struct hd_struct *part, int rw) part_stat_local_dec(&part_to_disk(part)->part0, in_flight[rw]); } -unsigned int part_in_flight(struct request_queue *q, struct hd_struct *part) +static unsigned int part_in_flight(struct request_queue *q, + struct hd_struct *part) { int cpu; unsigned int inflight; @@ -86,8 +160,8 @@ unsigned int part_in_flight(struct request_queue *q, struct hd_struct *part) return inflight; } -void part_in_flight_rw(struct request_queue *q, struct hd_struct *part, - unsigned int inflight[2]) +static void part_in_flight_rw(struct request_queue *q, struct hd_struct *part, + unsigned int inflight[2]) { int cpu; @@ -143,7 +217,6 @@ struct hd_struct *disk_get_part(struct gendisk *disk, int partno) return part; } -EXPORT_SYMBOL_GPL(disk_get_part); /** * disk_part_iter_init - initialize partition iterator @@ -299,7 +372,6 @@ struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector) } return &disk->part0; } -EXPORT_SYMBOL_GPL(disk_map_sector_rcu); /** * disk_has_partitions @@ -944,7 +1016,6 @@ struct gendisk *get_gendisk(dev_t devt, int *partno) } return disk; } -EXPORT_SYMBOL(get_gendisk); /** * bdget_disk - do bdget() by gendisk and partition number @@ -1190,6 +1261,67 @@ static ssize_t disk_ro_show(struct device *dev, return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0); } +ssize_t part_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hd_struct *p = dev_to_part(dev); + + return sprintf(buf, "%llu\n", + (unsigned long long)part_nr_sects_read(p)); +} + +ssize_t part_stat_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hd_struct *p = dev_to_part(dev); + struct request_queue *q = part_to_disk(p)->queue; + struct disk_stats stat; + unsigned int inflight; + + part_stat_read_all(p, &stat); + inflight = part_in_flight(q, p); + + return sprintf(buf, + "%8lu %8lu %8llu %8u " + "%8lu %8lu %8llu %8u " + "%8u %8u %8u " + "%8lu %8lu %8llu %8u " + "%8lu %8u" + "\n", + stat.ios[STAT_READ], + stat.merges[STAT_READ], + (unsigned long long)stat.sectors[STAT_READ], + (unsigned int)div_u64(stat.nsecs[STAT_READ], NSEC_PER_MSEC), + stat.ios[STAT_WRITE], + stat.merges[STAT_WRITE], + (unsigned long long)stat.sectors[STAT_WRITE], + (unsigned int)div_u64(stat.nsecs[STAT_WRITE], NSEC_PER_MSEC), + inflight, + jiffies_to_msecs(stat.io_ticks), + (unsigned int)div_u64(stat.nsecs[STAT_READ] + + stat.nsecs[STAT_WRITE] + + stat.nsecs[STAT_DISCARD] + + stat.nsecs[STAT_FLUSH], + NSEC_PER_MSEC), + stat.ios[STAT_DISCARD], + stat.merges[STAT_DISCARD], + (unsigned long long)stat.sectors[STAT_DISCARD], + (unsigned int)div_u64(stat.nsecs[STAT_DISCARD], NSEC_PER_MSEC), + stat.ios[STAT_FLUSH], + (unsigned int)div_u64(stat.nsecs[STAT_FLUSH], NSEC_PER_MSEC)); +} + +ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hd_struct *p = dev_to_part(dev); + struct request_queue *q = part_to_disk(p)->queue; + unsigned int inflight[2]; + + part_in_flight_rw(q, p, inflight); + return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]); +} + static ssize_t disk_capability_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1228,10 +1360,33 @@ static DEVICE_ATTR(capability, 0444, disk_capability_show, NULL); static DEVICE_ATTR(stat, 0444, part_stat_show, NULL); static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL); static DEVICE_ATTR(badblocks, 0644, disk_badblocks_show, disk_badblocks_store); + #ifdef CONFIG_FAIL_MAKE_REQUEST +ssize_t part_fail_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hd_struct *p = dev_to_part(dev); + + return sprintf(buf, "%d\n", p->make_it_fail); +} + +ssize_t part_fail_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct hd_struct *p = dev_to_part(dev); + int i; + + if (count > 0 && sscanf(buf, "%d", &i) > 0) + p->make_it_fail = (i == 0) ? 0 : 1; + + return count; +} + static struct device_attribute dev_attr_fail = __ATTR(make-it-fail, 0644, part_fail_show, part_fail_store); -#endif +#endif /* CONFIG_FAIL_MAKE_REQUEST */ + #ifdef CONFIG_FAIL_IO_TIMEOUT static struct device_attribute dev_attr_fail_timeout = __ATTR(io-timeout-fail, 0644, part_timeout_show, part_timeout_store); @@ -1378,8 +1533,8 @@ static char *block_devnode(struct device *dev, umode_t *mode, { struct gendisk *disk = dev_to_disk(dev); - if (disk->devnode) - return disk->devnode(disk, mode); + if (disk->fops->devnode) + return disk->fops->devnode(disk, mode); return NULL; } @@ -1405,6 +1560,7 @@ static int diskstats_show(struct seq_file *seqf, void *v) struct hd_struct *hd; char buf[BDEVNAME_SIZE]; unsigned int inflight; + struct disk_stats stat; /* if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next) @@ -1416,7 +1572,9 @@ static int diskstats_show(struct seq_file *seqf, void *v) disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0); while ((hd = disk_part_iter_next(&piter))) { + part_stat_read_all(hd, &stat); inflight = part_in_flight(gp->queue, hd); + seq_printf(seqf, "%4d %7d %s " "%lu %lu %lu %u " "%lu %lu %lu %u " @@ -1426,23 +1584,31 @@ static int diskstats_show(struct seq_file *seqf, void *v) "\n", MAJOR(part_devt(hd)), MINOR(part_devt(hd)), disk_name(gp, hd->partno, buf), - part_stat_read(hd, ios[STAT_READ]), - part_stat_read(hd, merges[STAT_READ]), - part_stat_read(hd, sectors[STAT_READ]), - (unsigned int)part_stat_read_msecs(hd, STAT_READ), - part_stat_read(hd, ios[STAT_WRITE]), - part_stat_read(hd, merges[STAT_WRITE]), - part_stat_read(hd, sectors[STAT_WRITE]), - (unsigned int)part_stat_read_msecs(hd, STAT_WRITE), + stat.ios[STAT_READ], + stat.merges[STAT_READ], + stat.sectors[STAT_READ], + (unsigned int)div_u64(stat.nsecs[STAT_READ], + NSEC_PER_MSEC), + stat.ios[STAT_WRITE], + stat.merges[STAT_WRITE], + stat.sectors[STAT_WRITE], + (unsigned int)div_u64(stat.nsecs[STAT_WRITE], + NSEC_PER_MSEC), inflight, - jiffies_to_msecs(part_stat_read(hd, io_ticks)), - jiffies_to_msecs(part_stat_read(hd, time_in_queue)), - part_stat_read(hd, ios[STAT_DISCARD]), - part_stat_read(hd, merges[STAT_DISCARD]), - part_stat_read(hd, sectors[STAT_DISCARD]), - (unsigned int)part_stat_read_msecs(hd, STAT_DISCARD), - part_stat_read(hd, ios[STAT_FLUSH]), - (unsigned int)part_stat_read_msecs(hd, STAT_FLUSH) + jiffies_to_msecs(stat.io_ticks), + (unsigned int)div_u64(stat.nsecs[STAT_READ] + + stat.nsecs[STAT_WRITE] + + stat.nsecs[STAT_DISCARD] + + stat.nsecs[STAT_FLUSH], + NSEC_PER_MSEC), + stat.ios[STAT_DISCARD], + stat.merges[STAT_DISCARD], + stat.sectors[STAT_DISCARD], + (unsigned int)div_u64(stat.nsecs[STAT_DISCARD], + NSEC_PER_MSEC), + stat.ios[STAT_FLUSH], + (unsigned int)div_u64(stat.nsecs[STAT_FLUSH], + NSEC_PER_MSEC) ); } disk_part_iter_exit(&piter); @@ -1499,7 +1665,6 @@ dev_t blk_lookup_devt(const char *name, int partno) class_dev_iter_exit(&iter); return devt; } -EXPORT_SYMBOL(blk_lookup_devt); struct gendisk *__alloc_disk_node(int minors, int node_id) { diff --git a/block/ioctl.c b/block/ioctl.c index 127194b9f9bd..6e827de1a4c4 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -11,6 +11,7 @@ #include #include #include +#include "blk.h" static int blkpg_do_ioctl(struct block_device *bdev, struct blkpg_partition __user *upart, int op) diff --git a/block/opal_proto.h b/block/opal_proto.h index 325cbba2465f..b486b3ec7dc4 100644 --- a/block/opal_proto.h +++ b/block/opal_proto.h @@ -36,6 +36,7 @@ enum opal_response_token { #define DTAERROR_NO_METHOD_STATUS 0x89 #define GENERIC_HOST_SESSION_NUM 0x41 +#define FIRST_TPER_SESSION_NUM 4096 #define TPER_SYNC_SUPPORTED 0x01 #define MBR_ENABLED_MASK 0x10 diff --git a/block/partitions/Makefile b/block/partitions/Makefile index 2f276b677c81..a7f05cdb02a8 100644 --- a/block/partitions/Makefile +++ b/block/partitions/Makefile @@ -3,8 +3,7 @@ # Makefile for the linux kernel. # -obj-$(CONFIG_BLOCK) := check.o - +obj-$(CONFIG_BLOCK) += core.o obj-$(CONFIG_ACORN_PARTITION) += acorn.o obj-$(CONFIG_AMIGA_PARTITION) += amiga.o obj-$(CONFIG_ATARI_PARTITION) += atari.o diff --git a/block/partitions/acorn.c b/block/partitions/acorn.c index 7587700fad4a..c64c57b958bf 100644 --- a/block/partitions/acorn.c +++ b/block/partitions/acorn.c @@ -11,7 +11,6 @@ #include #include "check.h" -#include "acorn.h" /* * Partition types. (Oh for reusability) diff --git a/block/partitions/acorn.h b/block/partitions/acorn.h deleted file mode 100644 index 67b06601ca4c..000000000000 --- a/block/partitions/acorn.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * linux/fs/partitions/acorn.h - * - * Copyright (C) 1996-2001 Russell King. - * - * I _hate_ this partitioning mess - why can't we have one defined - * format, and everyone stick to it? - */ - -int adfspart_check_CUMANA(struct parsed_partitions *state); -int adfspart_check_ADFS(struct parsed_partitions *state); -int adfspart_check_ICS(struct parsed_partitions *state); -int adfspart_check_POWERTEC(struct parsed_partitions *state); -int adfspart_check_EESOX(struct parsed_partitions *state); diff --git a/block/partitions/aix.c b/block/partitions/aix.c index 903f3ed175d0..c7b4fd1a4a97 100644 --- a/block/partitions/aix.c +++ b/block/partitions/aix.c @@ -6,7 +6,6 @@ */ #include "check.h" -#include "aix.h" struct lvm_rec { char lvm_id[4]; /* "_LVM" */ diff --git a/block/partitions/aix.h b/block/partitions/aix.h deleted file mode 100644 index b4449f0b9f2b..000000000000 --- a/block/partitions/aix.h +++ /dev/null @@ -1,2 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -extern int aix_partition(struct parsed_partitions *state); diff --git a/block/partitions/amiga.c b/block/partitions/amiga.c index 560936617d9c..9526491d9aed 100644 --- a/block/partitions/amiga.c +++ b/block/partitions/amiga.c @@ -14,7 +14,6 @@ #include #include "check.h" -#include "amiga.h" static __inline__ u32 checksum_block(__be32 *m, int size) @@ -42,9 +41,8 @@ int amiga_partition(struct parsed_partitions *state) goto rdb_done; data = read_part_sector(state, blk, §); if (!data) { - if (warn_no_part) - pr_err("Dev %s: unable to read RDB block %d\n", - bdevname(state->bdev, b), blk); + pr_err("Dev %s: unable to read RDB block %d\n", + bdevname(state->bdev, b), blk); res = -1; goto rdb_done; } @@ -85,9 +83,8 @@ int amiga_partition(struct parsed_partitions *state) blk *= blksize; /* Read in terms partition table understands */ data = read_part_sector(state, blk, §); if (!data) { - if (warn_no_part) - pr_err("Dev %s: unable to read partition block %d\n", - bdevname(state->bdev, b), blk); + pr_err("Dev %s: unable to read partition block %d\n", + bdevname(state->bdev, b), blk); res = -1; goto rdb_done; } diff --git a/block/partitions/amiga.h b/block/partitions/amiga.h deleted file mode 100644 index 7e63f4d9d969..000000000000 --- a/block/partitions/amiga.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * fs/partitions/amiga.h - */ - -int amiga_partition(struct parsed_partitions *state); - diff --git a/block/partitions/atari.h b/block/partitions/atari.h index 01c2b9457394..678202442fd3 100644 --- a/block/partitions/atari.h +++ b/block/partitions/atari.h @@ -34,4 +34,3 @@ struct rootsector u16 checksum; /* checksum for bootable disks */ } __packed; -int atari_partition(struct parsed_partitions *state); diff --git a/block/partitions/check.c b/block/partitions/check.c deleted file mode 100644 index ffe408fead0c..000000000000 --- a/block/partitions/check.c +++ /dev/null @@ -1,198 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * fs/partitions/check.c - * - * Code extracted from drivers/block/genhd.c - * Copyright (C) 1991-1998 Linus Torvalds - * Re-organised Feb 1998 Russell King - * - * We now have independent partition support from the - * block drivers, which allows all the partition code to - * be grouped in one location, and it to be mostly self - * contained. - * - * Added needed MAJORS for new pairs, {hdi,hdj}, {hdk,hdl} - */ - -#include -#include -#include -#include - -#include "check.h" - -#include "acorn.h" -#include "amiga.h" -#include "atari.h" -#include "ldm.h" -#include "mac.h" -#include "msdos.h" -#include "osf.h" -#include "sgi.h" -#include "sun.h" -#include "ibm.h" -#include "ultrix.h" -#include "efi.h" -#include "karma.h" -#include "sysv68.h" -#include "cmdline.h" - -int warn_no_part = 1; /*This is ugly: should make genhd removable media aware*/ - -static int (*check_part[])(struct parsed_partitions *) = { - /* - * Probe partition formats with tables at disk address 0 - * that also have an ADFS boot block at 0xdc0. - */ -#ifdef CONFIG_ACORN_PARTITION_ICS - adfspart_check_ICS, -#endif -#ifdef CONFIG_ACORN_PARTITION_POWERTEC - adfspart_check_POWERTEC, -#endif -#ifdef CONFIG_ACORN_PARTITION_EESOX - adfspart_check_EESOX, -#endif - - /* - * Now move on to formats that only have partition info at - * disk address 0xdc0. Since these may also have stale - * PC/BIOS partition tables, they need to come before - * the msdos entry. - */ -#ifdef CONFIG_ACORN_PARTITION_CUMANA - adfspart_check_CUMANA, -#endif -#ifdef CONFIG_ACORN_PARTITION_ADFS - adfspart_check_ADFS, -#endif - -#ifdef CONFIG_CMDLINE_PARTITION - cmdline_partition, -#endif -#ifdef CONFIG_EFI_PARTITION - efi_partition, /* this must come before msdos */ -#endif -#ifdef CONFIG_SGI_PARTITION - sgi_partition, -#endif -#ifdef CONFIG_LDM_PARTITION - ldm_partition, /* this must come before msdos */ -#endif -#ifdef CONFIG_MSDOS_PARTITION - msdos_partition, -#endif -#ifdef CONFIG_OSF_PARTITION - osf_partition, -#endif -#ifdef CONFIG_SUN_PARTITION - sun_partition, -#endif -#ifdef CONFIG_AMIGA_PARTITION - amiga_partition, -#endif -#ifdef CONFIG_ATARI_PARTITION - atari_partition, -#endif -#ifdef CONFIG_MAC_PARTITION - mac_partition, -#endif -#ifdef CONFIG_ULTRIX_PARTITION - ultrix_partition, -#endif -#ifdef CONFIG_IBM_PARTITION - ibm_partition, -#endif -#ifdef CONFIG_KARMA_PARTITION - karma_partition, -#endif -#ifdef CONFIG_SYSV68_PARTITION - sysv68_partition, -#endif - NULL -}; - -static struct parsed_partitions *allocate_partitions(struct gendisk *hd) -{ - struct parsed_partitions *state; - int nr; - - state = kzalloc(sizeof(*state), GFP_KERNEL); - if (!state) - return NULL; - - nr = disk_max_parts(hd); - state->parts = vzalloc(array_size(nr, sizeof(state->parts[0]))); - if (!state->parts) { - kfree(state); - return NULL; - } - - state->limit = nr; - - return state; -} - -void free_partitions(struct parsed_partitions *state) -{ - vfree(state->parts); - kfree(state); -} - -struct parsed_partitions * -check_partition(struct gendisk *hd, struct block_device *bdev) -{ - struct parsed_partitions *state; - int i, res, err; - - state = allocate_partitions(hd); - if (!state) - return NULL; - state->pp_buf = (char *)__get_free_page(GFP_KERNEL); - if (!state->pp_buf) { - free_partitions(state); - return NULL; - } - state->pp_buf[0] = '\0'; - - state->bdev = bdev; - disk_name(hd, 0, state->name); - snprintf(state->pp_buf, PAGE_SIZE, " %s:", state->name); - if (isdigit(state->name[strlen(state->name)-1])) - sprintf(state->name, "p"); - - i = res = err = 0; - while (!res && check_part[i]) { - memset(state->parts, 0, state->limit * sizeof(state->parts[0])); - res = check_part[i++](state); - if (res < 0) { - /* We have hit an I/O error which we don't report now. - * But record it, and let the others do their job. - */ - err = res; - res = 0; - } - - } - if (res > 0) { - printk(KERN_INFO "%s", state->pp_buf); - - free_page((unsigned long)state->pp_buf); - return state; - } - if (state->access_beyond_eod) - err = -ENOSPC; - if (err) - /* The partition is unrecognized. So report I/O errors if there were any */ - res = err; - if (res) { - if (warn_no_part) - strlcat(state->pp_buf, - " unable to read partition table\n", PAGE_SIZE); - printk(KERN_INFO "%s", state->pp_buf); - } - - free_page((unsigned long)state->pp_buf); - free_partitions(state); - return ERR_PTR(res); -} diff --git a/block/partitions/check.h b/block/partitions/check.h index 6042f769471a..c577e9ee67f0 100644 --- a/block/partitions/check.h +++ b/block/partitions/check.h @@ -2,6 +2,7 @@ #include #include #include +#include "../blk.h" /* * add_gd_partition adds a partitions details to the devices partition @@ -23,19 +24,14 @@ struct parsed_partitions { char *pp_buf; }; -void free_partitions(struct parsed_partitions *state); +typedef struct { + struct page *v; +} Sector; -struct parsed_partitions * -check_partition(struct gendisk *, struct block_device *); - -static inline void *read_part_sector(struct parsed_partitions *state, - sector_t n, Sector *p) +void *read_part_sector(struct parsed_partitions *state, sector_t n, Sector *p); +static inline void put_dev_sector(Sector p) { - if (n >= get_capacity(state->bdev->bd_disk)) { - state->access_beyond_eod = true; - return NULL; - } - return read_dev_sector(state->bdev, n, p); + put_page(p.v); } static inline void @@ -51,5 +47,24 @@ put_partition(struct parsed_partitions *p, int n, sector_t from, sector_t size) } } -extern int warn_no_part; - +/* detection routines go here in alphabetical order: */ +int adfspart_check_ADFS(struct parsed_partitions *state); +int adfspart_check_CUMANA(struct parsed_partitions *state); +int adfspart_check_EESOX(struct parsed_partitions *state); +int adfspart_check_ICS(struct parsed_partitions *state); +int adfspart_check_POWERTEC(struct parsed_partitions *state); +int aix_partition(struct parsed_partitions *state); +int amiga_partition(struct parsed_partitions *state); +int atari_partition(struct parsed_partitions *state); +int cmdline_partition(struct parsed_partitions *state); +int efi_partition(struct parsed_partitions *state); +int ibm_partition(struct parsed_partitions *); +int karma_partition(struct parsed_partitions *state); +int ldm_partition(struct parsed_partitions *state); +int mac_partition(struct parsed_partitions *state); +int msdos_partition(struct parsed_partitions *state); +int osf_partition(struct parsed_partitions *state); +int sgi_partition(struct parsed_partitions *state); +int sun_partition(struct parsed_partitions *state); +int sysv68_partition(struct parsed_partitions *state); +int ultrix_partition(struct parsed_partitions *state); diff --git a/block/partitions/cmdline.c b/block/partitions/cmdline.c index f1edd5452249..8f545c36cde4 100644 --- a/block/partitions/cmdline.c +++ b/block/partitions/cmdline.c @@ -18,7 +18,6 @@ #include #include "check.h" -#include "cmdline.h" static char *cmdline; static struct cmdline_parts *bdev_parts; diff --git a/block/partitions/cmdline.h b/block/partitions/cmdline.h deleted file mode 100644 index e64a31636a1f..000000000000 --- a/block/partitions/cmdline.h +++ /dev/null @@ -1,3 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -int cmdline_partition(struct parsed_partitions *state); diff --git a/block/partition-generic.c b/block/partitions/core.c similarity index 72% rename from block/partition-generic.c rename to block/partitions/core.c index 564fae77711d..b79c4513629b 100644 --- a/block/partition-generic.c +++ b/block/partitions/core.c @@ -1,75 +1,176 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Code extracted from drivers/block/genhd.c - * Copyright (C) 1991-1998 Linus Torvalds - * Re-organised Feb 1998 Russell King - * - * We now have independent partition support from the - * block drivers, which allows all the partition code to - * be grouped in one location, and it to be mostly self - * contained. + * Copyright (C) 1991-1998 Linus Torvalds + * Re-organised Feb 1998 Russell King */ - -#include -#include #include #include -#include #include #include +#include #include +#include +#include "check.h" -#include "partitions/check.h" - -#ifdef CONFIG_BLK_DEV_MD -extern void md_autodetect_dev(dev_t dev); +static int (*check_part[])(struct parsed_partitions *) = { + /* + * Probe partition formats with tables at disk address 0 + * that also have an ADFS boot block at 0xdc0. + */ +#ifdef CONFIG_ACORN_PARTITION_ICS + adfspart_check_ICS, +#endif +#ifdef CONFIG_ACORN_PARTITION_POWERTEC + adfspart_check_POWERTEC, +#endif +#ifdef CONFIG_ACORN_PARTITION_EESOX + adfspart_check_EESOX, #endif - -/* - * disk_name() is used by partition check code and the genhd driver. - * It formats the devicename of the indicated disk into - * the supplied buffer (of size at least 32), and returns - * a pointer to that same buffer (for convenience). - */ -char *disk_name(struct gendisk *hd, int partno, char *buf) + /* + * Now move on to formats that only have partition info at + * disk address 0xdc0. Since these may also have stale + * PC/BIOS partition tables, they need to come before + * the msdos entry. + */ +#ifdef CONFIG_ACORN_PARTITION_CUMANA + adfspart_check_CUMANA, +#endif +#ifdef CONFIG_ACORN_PARTITION_ADFS + adfspart_check_ADFS, +#endif + +#ifdef CONFIG_CMDLINE_PARTITION + cmdline_partition, +#endif +#ifdef CONFIG_EFI_PARTITION + efi_partition, /* this must come before msdos */ +#endif +#ifdef CONFIG_SGI_PARTITION + sgi_partition, +#endif +#ifdef CONFIG_LDM_PARTITION + ldm_partition, /* this must come before msdos */ +#endif +#ifdef CONFIG_MSDOS_PARTITION + msdos_partition, +#endif +#ifdef CONFIG_OSF_PARTITION + osf_partition, +#endif +#ifdef CONFIG_SUN_PARTITION + sun_partition, +#endif +#ifdef CONFIG_AMIGA_PARTITION + amiga_partition, +#endif +#ifdef CONFIG_ATARI_PARTITION + atari_partition, +#endif +#ifdef CONFIG_MAC_PARTITION + mac_partition, +#endif +#ifdef CONFIG_ULTRIX_PARTITION + ultrix_partition, +#endif +#ifdef CONFIG_IBM_PARTITION + ibm_partition, +#endif +#ifdef CONFIG_KARMA_PARTITION + karma_partition, +#endif +#ifdef CONFIG_SYSV68_PARTITION + sysv68_partition, +#endif + NULL +}; + +static struct parsed_partitions *allocate_partitions(struct gendisk *hd) { - if (!partno) - snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name); - else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1])) - snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno); - else - snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno); + struct parsed_partitions *state; + int nr; - return buf; + state = kzalloc(sizeof(*state), GFP_KERNEL); + if (!state) + return NULL; + + nr = disk_max_parts(hd); + state->parts = vzalloc(array_size(nr, sizeof(state->parts[0]))); + if (!state->parts) { + kfree(state); + return NULL; + } + + state->limit = nr; + + return state; } -const char *bdevname(struct block_device *bdev, char *buf) +static void free_partitions(struct parsed_partitions *state) { - return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf); + vfree(state->parts); + kfree(state); } -EXPORT_SYMBOL(bdevname); - -const char *bio_devname(struct bio *bio, char *buf) +static struct parsed_partitions *check_partition(struct gendisk *hd, + struct block_device *bdev) { - return disk_name(bio->bi_disk, bio->bi_partno, buf); -} -EXPORT_SYMBOL(bio_devname); + struct parsed_partitions *state; + int i, res, err; -/* - * There's very little reason to use this, you should really - * have a struct block_device just about everywhere and use - * bdevname() instead. - */ -const char *__bdevname(dev_t dev, char *buffer) -{ - scnprintf(buffer, BDEVNAME_SIZE, "unknown-block(%u,%u)", - MAJOR(dev), MINOR(dev)); - return buffer; -} + state = allocate_partitions(hd); + if (!state) + return NULL; + state->pp_buf = (char *)__get_free_page(GFP_KERNEL); + if (!state->pp_buf) { + free_partitions(state); + return NULL; + } + state->pp_buf[0] = '\0'; -EXPORT_SYMBOL(__bdevname); + state->bdev = bdev; + disk_name(hd, 0, state->name); + snprintf(state->pp_buf, PAGE_SIZE, " %s:", state->name); + if (isdigit(state->name[strlen(state->name)-1])) + sprintf(state->name, "p"); + + i = res = err = 0; + while (!res && check_part[i]) { + memset(state->parts, 0, state->limit * sizeof(state->parts[0])); + res = check_part[i++](state); + if (res < 0) { + /* + * We have hit an I/O error which we don't report now. + * But record it, and let the others do their job. + */ + err = res; + res = 0; + } + + } + if (res > 0) { + printk(KERN_INFO "%s", state->pp_buf); + + free_page((unsigned long)state->pp_buf); + return state; + } + if (state->access_beyond_eod) + err = -ENOSPC; + /* + * The partition is unrecognized. So report I/O errors if there were any + */ + if (err) + res = err; + if (res) { + strlcat(state->pp_buf, + " unable to read partition table\n", PAGE_SIZE); + printk(KERN_INFO "%s", state->pp_buf); + } + + free_page((unsigned long)state->pp_buf); + free_partitions(state); + return ERR_PTR(res); +} static ssize_t part_partition_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -87,13 +188,6 @@ static ssize_t part_start_show(struct device *dev, return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect); } -ssize_t part_size_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct hd_struct *p = dev_to_part(dev); - return sprintf(buf, "%llu\n",(unsigned long long)part_nr_sects_read(p)); -} - static ssize_t part_ro_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -115,74 +209,6 @@ static ssize_t part_discard_alignment_show(struct device *dev, return sprintf(buf, "%u\n", p->discard_alignment); } -ssize_t part_stat_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct hd_struct *p = dev_to_part(dev); - struct request_queue *q = part_to_disk(p)->queue; - unsigned int inflight; - - inflight = part_in_flight(q, p); - return sprintf(buf, - "%8lu %8lu %8llu %8u " - "%8lu %8lu %8llu %8u " - "%8u %8u %8u " - "%8lu %8lu %8llu %8u " - "%8lu %8u" - "\n", - part_stat_read(p, ios[STAT_READ]), - part_stat_read(p, merges[STAT_READ]), - (unsigned long long)part_stat_read(p, sectors[STAT_READ]), - (unsigned int)part_stat_read_msecs(p, STAT_READ), - part_stat_read(p, ios[STAT_WRITE]), - part_stat_read(p, merges[STAT_WRITE]), - (unsigned long long)part_stat_read(p, sectors[STAT_WRITE]), - (unsigned int)part_stat_read_msecs(p, STAT_WRITE), - inflight, - jiffies_to_msecs(part_stat_read(p, io_ticks)), - jiffies_to_msecs(part_stat_read(p, time_in_queue)), - part_stat_read(p, ios[STAT_DISCARD]), - part_stat_read(p, merges[STAT_DISCARD]), - (unsigned long long)part_stat_read(p, sectors[STAT_DISCARD]), - (unsigned int)part_stat_read_msecs(p, STAT_DISCARD), - part_stat_read(p, ios[STAT_FLUSH]), - (unsigned int)part_stat_read_msecs(p, STAT_FLUSH)); -} - -ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct hd_struct *p = dev_to_part(dev); - struct request_queue *q = part_to_disk(p)->queue; - unsigned int inflight[2]; - - part_in_flight_rw(q, p, inflight); - return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]); -} - -#ifdef CONFIG_FAIL_MAKE_REQUEST -ssize_t part_fail_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct hd_struct *p = dev_to_part(dev); - - return sprintf(buf, "%d\n", p->make_it_fail); -} - -ssize_t part_fail_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - struct hd_struct *p = dev_to_part(dev); - int i; - - if (count > 0 && sscanf(buf, "%d", &i) > 0) - p->make_it_fail = (i == 0) ? 0 : 1; - - return count; -} -#endif - static DEVICE_ATTR(partition, 0444, part_partition_show, NULL); static DEVICE_ATTR(start, 0444, part_start_show, NULL); static DEVICE_ATTR(size, 0444, part_size_show, NULL); @@ -369,7 +395,9 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, p->policy = get_disk_ro(disk); if (info) { - struct partition_meta_info *pinfo = alloc_part_info(disk); + struct partition_meta_info *pinfo; + + pinfo = kzalloc_node(sizeof(*pinfo), GFP_KERNEL, disk->node_id); if (!pinfo) { err = -ENOMEM; goto out_free_stats; @@ -428,7 +456,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, return p; out_free_info: - free_part_info(p); + kfree(p->info); out_free_stats: free_part_stats(p); out_free: @@ -525,10 +553,10 @@ static bool blk_add_partition(struct gendisk *disk, struct block_device *bdev, return true; } -#ifdef CONFIG_BLK_DEV_MD - if (state->parts[p].flags & ADDPART_FLAG_RAID) + if (IS_BUILTIN(CONFIG_BLK_DEV_MD) && + (state->parts[p].flags & ADDPART_FLAG_RAID)) md_autodetect_dev(part_to_dev(part)->devt); -#endif + return true; } @@ -602,22 +630,29 @@ out_free_state: return ret; } -unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p) +void *read_part_sector(struct parsed_partitions *state, sector_t n, Sector *p) { - struct address_space *mapping = bdev->bd_inode->i_mapping; + struct address_space *mapping = state->bdev->bd_inode->i_mapping; struct page *page; - page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_SHIFT-9)), NULL); - if (!IS_ERR(page)) { - if (PageError(page)) - goto fail; - p->v = page; - return (unsigned char *)page_address(page) + ((n & ((1 << (PAGE_SHIFT - 9)) - 1)) << 9); -fail: - put_page(page); + if (n >= get_capacity(state->bdev->bd_disk)) { + state->access_beyond_eod = true; + return NULL; } + + page = read_mapping_page(mapping, + (pgoff_t)(n >> (PAGE_SHIFT - 9)), NULL); + if (IS_ERR(page)) + goto out; + if (PageError(page)) + goto out_put_page; + + p->v = page; + return (unsigned char *)page_address(page) + + ((n & ((1 << (PAGE_SHIFT - 9)) - 1)) << SECTOR_SHIFT); +out_put_page: + put_page(page); +out: p->v = NULL; return NULL; } - -EXPORT_SYMBOL(read_dev_sector); diff --git a/block/partitions/efi.h b/block/partitions/efi.h index 3e8576157575..907bac5ce8f7 100644 --- a/block/partitions/efi.h +++ b/block/partitions/efi.h @@ -113,7 +113,4 @@ typedef struct _legacy_mbr { __le16 signature; } __packed legacy_mbr; -/* Functions */ -extern int efi_partition(struct parsed_partitions *state); - #endif diff --git a/block/partitions/ibm.c b/block/partitions/ibm.c index a5d480f807f3..073faa6a69b8 100644 --- a/block/partitions/ibm.c +++ b/block/partitions/ibm.c @@ -15,7 +15,6 @@ #include #include "check.h" -#include "ibm.h" union label_t { diff --git a/block/partitions/ibm.h b/block/partitions/ibm.h deleted file mode 100644 index 8bf13febb2b6..000000000000 --- a/block/partitions/ibm.h +++ /dev/null @@ -1,2 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -int ibm_partition(struct parsed_partitions *); diff --git a/block/partitions/karma.c b/block/partitions/karma.c index 59812d705c3d..4d93512f4bd4 100644 --- a/block/partitions/karma.c +++ b/block/partitions/karma.c @@ -8,9 +8,10 @@ */ #include "check.h" -#include "karma.h" #include +#define KARMA_LABEL_MAGIC 0xAB56 + int karma_partition(struct parsed_partitions *state) { int i; diff --git a/block/partitions/karma.h b/block/partitions/karma.h deleted file mode 100644 index 48e074d417fb..000000000000 --- a/block/partitions/karma.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * fs/partitions/karma.h - */ - -#define KARMA_LABEL_MAGIC 0xAB56 - -int karma_partition(struct parsed_partitions *state); - diff --git a/block/partitions/ldm.c b/block/partitions/ldm.c index a2d97ee1908c..6fdfcb40c537 100644 --- a/block/partitions/ldm.c +++ b/block/partitions/ldm.c @@ -14,10 +14,10 @@ #include #include #include +#include #include "ldm.h" #include "check.h" -#include "msdos.h" /* * ldm_debug/info/error/crit - Output an error message @@ -493,7 +493,7 @@ static bool ldm_validate_partition_table(struct parsed_partitions *state) { Sector sect; u8 *data; - struct partition *p; + struct msdos_partition *p; int i; bool result = false; @@ -508,7 +508,7 @@ static bool ldm_validate_partition_table(struct parsed_partitions *state) if (*(__le16*) (data + 0x01FE) != cpu_to_le16 (MSDOS_LABEL_MAGIC)) goto out; - p = (struct partition*)(data + 0x01BE); + p = (struct msdos_partition *)(data + 0x01BE); for (i = 0; i < 4; i++, p++) if (SYS_IND (p) == LDM_PARTITION) { result = true; diff --git a/block/partitions/ldm.h b/block/partitions/ldm.h index 1ca63e97bccc..841580af7f9b 100644 --- a/block/partitions/ldm.h +++ b/block/partitions/ldm.h @@ -193,7 +193,5 @@ struct ldmdb { /* Cache of the database */ struct list_head v_part; }; -int ldm_partition(struct parsed_partitions *state); - #endif /* _FS_PT_LDM_H_ */ diff --git a/block/partitions/mac.h b/block/partitions/mac.h index 453ed2964804..0e41c9da7532 100644 --- a/block/partitions/mac.h +++ b/block/partitions/mac.h @@ -42,4 +42,3 @@ struct mac_driver_desc { /* ... more stuff */ }; -int mac_partition(struct parsed_partitions *state); diff --git a/block/partitions/msdos.c b/block/partitions/msdos.c index 82c44f7df911..8f2fcc080264 100644 --- a/block/partitions/msdos.c +++ b/block/partitions/msdos.c @@ -18,13 +18,18 @@ * Check partition table on IDE disks for common CHS translations * * Re-organised Feb 1998 Russell King + * + * BSD disklabel support by Yossi Gottlieb + * updated by Marc Espie + * + * Unixware slices support by Andrzej Krzysztofowicz + * and Krzysztof G. Baranowski */ #include +#include #include "check.h" -#include "msdos.h" #include "efi.h" -#include "aix.h" /* * Many architectures don't like unaligned accesses, while @@ -35,17 +40,17 @@ #define SYS_IND(p) get_unaligned(&p->sys_ind) -static inline sector_t nr_sects(struct partition *p) +static inline sector_t nr_sects(struct msdos_partition *p) { return (sector_t)get_unaligned_le32(&p->nr_sects); } -static inline sector_t start_sect(struct partition *p) +static inline sector_t start_sect(struct msdos_partition *p) { return (sector_t)get_unaligned_le32(&p->start_sect); } -static inline int is_extended_partition(struct partition *p) +static inline int is_extended_partition(struct msdos_partition *p) { return (SYS_IND(p) == DOS_EXTENDED_PARTITION || SYS_IND(p) == WIN98_EXTENDED_PARTITION || @@ -68,7 +73,7 @@ msdos_magic_present(unsigned char *p) #define AIX_LABEL_MAGIC4 0xC1 static int aix_magic_present(struct parsed_partitions *state, unsigned char *p) { - struct partition *pt = (struct partition *) (p + 0x1be); + struct msdos_partition *pt = (struct msdos_partition *) (p + 0x1be); Sector sect; unsigned char *d; int slot, ret = 0; @@ -78,13 +83,19 @@ static int aix_magic_present(struct parsed_partitions *state, unsigned char *p) p[2] == AIX_LABEL_MAGIC3 && p[3] == AIX_LABEL_MAGIC4)) return 0; - /* Assume the partition table is valid if Linux partitions exists */ + + /* + * Assume the partition table is valid if Linux partitions exists. + * Note that old Solaris/x86 partitions use the same indicator as + * Linux swap partitions, so we consider that a Linux partition as + * well. + */ for (slot = 1; slot <= 4; slot++, pt++) { - if (pt->sys_ind == LINUX_SWAP_PARTITION || - pt->sys_ind == LINUX_RAID_PARTITION || - pt->sys_ind == LINUX_DATA_PARTITION || - pt->sys_ind == LINUX_LVM_PARTITION || - is_extended_partition(pt)) + if (pt->sys_ind == SOLARIS_X86_PARTITION || + pt->sys_ind == LINUX_RAID_PARTITION || + pt->sys_ind == LINUX_DATA_PARTITION || + pt->sys_ind == LINUX_LVM_PARTITION || + is_extended_partition(pt)) return 0; } d = read_part_sector(state, 7, §); @@ -122,7 +133,7 @@ static void parse_extended(struct parsed_partitions *state, sector_t first_sector, sector_t first_size, u32 disksig) { - struct partition *p; + struct msdos_partition *p; Sector sect; unsigned char *data; sector_t this_sector, this_size; @@ -146,7 +157,7 @@ static void parse_extended(struct parsed_partitions *state, if (!msdos_magic_present(data + 510)) goto done; - p = (struct partition *) (data + 0x1be); + p = (struct msdos_partition *) (data + 0x1be); /* * Usually, the first entry is the real data partition, @@ -210,6 +221,30 @@ done: put_dev_sector(sect); } +#define SOLARIS_X86_NUMSLICE 16 +#define SOLARIS_X86_VTOC_SANE (0x600DDEEEUL) + +struct solaris_x86_slice { + __le16 s_tag; /* ID tag of partition */ + __le16 s_flag; /* permission flags */ + __le32 s_start; /* start sector no of partition */ + __le32 s_size; /* # of blocks in partition */ +}; + +struct solaris_x86_vtoc { + unsigned int v_bootinfo[3]; /* info needed by mboot */ + __le32 v_sanity; /* to verify vtoc sanity */ + __le32 v_version; /* layout version */ + char v_volume[8]; /* volume name */ + __le16 v_sectorsz; /* sector size in bytes */ + __le16 v_nparts; /* number of partitions */ + unsigned int v_reserved[10]; /* free space */ + struct solaris_x86_slice + v_slice[SOLARIS_X86_NUMSLICE]; /* slice headers */ + unsigned int timestamp[SOLARIS_X86_NUMSLICE]; /* timestamp */ + char v_asciilabel[128]; /* for compatibility */ +}; + /* james@bpgc.com: Solaris has a nasty indicator: 0x82 which also indicates linux swap. Be careful before believing this is Solaris. */ @@ -265,6 +300,54 @@ static void parse_solaris_x86(struct parsed_partitions *state, #endif } +/* check against BSD src/sys/sys/disklabel.h for consistency */ +#define BSD_DISKMAGIC (0x82564557UL) /* The disk magic number */ +#define BSD_MAXPARTITIONS 16 +#define OPENBSD_MAXPARTITIONS 16 +#define BSD_FS_UNUSED 0 /* disklabel unused partition entry ID */ +struct bsd_disklabel { + __le32 d_magic; /* the magic number */ + __s16 d_type; /* drive type */ + __s16 d_subtype; /* controller/d_type specific */ + char d_typename[16]; /* type name, e.g. "eagle" */ + char d_packname[16]; /* pack identifier */ + __u32 d_secsize; /* # of bytes per sector */ + __u32 d_nsectors; /* # of data sectors per track */ + __u32 d_ntracks; /* # of tracks per cylinder */ + __u32 d_ncylinders; /* # of data cylinders per unit */ + __u32 d_secpercyl; /* # of data sectors per cylinder */ + __u32 d_secperunit; /* # of data sectors per unit */ + __u16 d_sparespertrack; /* # of spare sectors per track */ + __u16 d_sparespercyl; /* # of spare sectors per cylinder */ + __u32 d_acylinders; /* # of alt. cylinders per unit */ + __u16 d_rpm; /* rotational speed */ + __u16 d_interleave; /* hardware sector interleave */ + __u16 d_trackskew; /* sector 0 skew, per track */ + __u16 d_cylskew; /* sector 0 skew, per cylinder */ + __u32 d_headswitch; /* head switch time, usec */ + __u32 d_trkseek; /* track-to-track seek, usec */ + __u32 d_flags; /* generic flags */ +#define NDDATA 5 + __u32 d_drivedata[NDDATA]; /* drive-type specific information */ +#define NSPARE 5 + __u32 d_spare[NSPARE]; /* reserved for future use */ + __le32 d_magic2; /* the magic number (again) */ + __le16 d_checksum; /* xor of data incl. partitions */ + + /* filesystem and partition information: */ + __le16 d_npartitions; /* number of partitions in following */ + __le32 d_bbsize; /* size of boot area at sn0, bytes */ + __le32 d_sbsize; /* max size of fs superblock, bytes */ + struct bsd_partition { /* the partition table */ + __le32 p_size; /* number of sectors in partition */ + __le32 p_offset; /* starting sector */ + __le32 p_fsize; /* filesystem basic fragment size */ + __u8 p_fstype; /* filesystem type, see below */ + __u8 p_frag; /* filesystem fragments per block */ + __le16 p_cpg; /* filesystem cylinders per group */ + } d_partitions[BSD_MAXPARTITIONS]; /* actually may be more */ +}; + #if defined(CONFIG_BSD_DISKLABEL) /* * Create devices for BSD partitions listed in a disklabel, under a @@ -349,6 +432,51 @@ static void parse_openbsd(struct parsed_partitions *state, #endif } +#define UNIXWARE_DISKMAGIC (0xCA5E600DUL) /* The disk magic number */ +#define UNIXWARE_DISKMAGIC2 (0x600DDEEEUL) /* The slice table magic nr */ +#define UNIXWARE_NUMSLICE 16 +#define UNIXWARE_FS_UNUSED 0 /* Unused slice entry ID */ + +struct unixware_slice { + __le16 s_label; /* label */ + __le16 s_flags; /* permission flags */ + __le32 start_sect; /* starting sector */ + __le32 nr_sects; /* number of sectors in slice */ +}; + +struct unixware_disklabel { + __le32 d_type; /* drive type */ + __le32 d_magic; /* the magic number */ + __le32 d_version; /* version number */ + char d_serial[12]; /* serial number of the device */ + __le32 d_ncylinders; /* # of data cylinders per device */ + __le32 d_ntracks; /* # of tracks per cylinder */ + __le32 d_nsectors; /* # of data sectors per track */ + __le32 d_secsize; /* # of bytes per sector */ + __le32 d_part_start; /* # of first sector of this partition*/ + __le32 d_unknown1[12]; /* ? */ + __le32 d_alt_tbl; /* byte offset of alternate table */ + __le32 d_alt_len; /* byte length of alternate table */ + __le32 d_phys_cyl; /* # of physical cylinders per device */ + __le32 d_phys_trk; /* # of physical tracks per cylinder */ + __le32 d_phys_sec; /* # of physical sectors per track */ + __le32 d_phys_bytes; /* # of physical bytes per sector */ + __le32 d_unknown2; /* ? */ + __le32 d_unknown3; /* ? */ + __le32 d_pad[8]; /* pad */ + + struct unixware_vtoc { + __le32 v_magic; /* the magic number */ + __le32 v_version; /* version number */ + char v_name[8]; /* volume name */ + __le16 v_nslices; /* # of slices */ + __le16 v_unknown1; /* ? */ + __le32 v_reserved[10]; /* reserved */ + struct unixware_slice + v_slice[UNIXWARE_NUMSLICE]; /* slice headers */ + } vtoc; +}; /* 408 */ + /* * Create devices for Unixware partitions listed in a disklabel, under a * dos-like partition. See parse_extended() for more information. @@ -392,6 +520,8 @@ static void parse_unixware(struct parsed_partitions *state, #endif } +#define MINIX_NR_SUBPARTITIONS 4 + /* * Minix 2.0.0/2.0.2 subpartition support. * Anand Krishnamurthy @@ -403,14 +533,14 @@ static void parse_minix(struct parsed_partitions *state, #ifdef CONFIG_MINIX_SUBPARTITION Sector sect; unsigned char *data; - struct partition *p; + struct msdos_partition *p; int i; data = read_part_sector(state, offset, §); if (!data) return; - p = (struct partition *)(data + 0x1be); + p = (struct msdos_partition *)(data + 0x1be); /* The first sector of a Minix partition can have either * a secondary MBR describing its subpartitions, or @@ -454,7 +584,7 @@ int msdos_partition(struct parsed_partitions *state) sector_t sector_size = bdev_logical_block_size(state->bdev) / 512; Sector sect; unsigned char *data; - struct partition *p; + struct msdos_partition *p; struct fat_boot_sector *fb; int slot; u32 disksig; @@ -488,7 +618,7 @@ int msdos_partition(struct parsed_partitions *state) * partition table. Reject this in case the boot indicator * is not 0 or 0x80. */ - p = (struct partition *) (data + 0x1be); + p = (struct msdos_partition *) (data + 0x1be); for (slot = 1; slot <= 4; slot++, p++) { if (p->boot_ind != 0 && p->boot_ind != 0x80) { /* @@ -510,7 +640,7 @@ int msdos_partition(struct parsed_partitions *state) } #ifdef CONFIG_EFI_PARTITION - p = (struct partition *) (data + 0x1be); + p = (struct msdos_partition *) (data + 0x1be); for (slot = 1 ; slot <= 4 ; slot++, p++) { /* If this is an EFI GPT disk, msdos should ignore it. */ if (SYS_IND(p) == EFI_PMBR_OSTYPE_EFI_GPT) { @@ -519,7 +649,7 @@ int msdos_partition(struct parsed_partitions *state) } } #endif - p = (struct partition *) (data + 0x1be); + p = (struct msdos_partition *) (data + 0x1be); disksig = le32_to_cpup((__le32 *)(data + 0x1b8)); @@ -566,7 +696,7 @@ int msdos_partition(struct parsed_partitions *state) strlcat(state->pp_buf, "\n", PAGE_SIZE); /* second pass - output for each on a separate line */ - p = (struct partition *) (0x1be + data); + p = (struct msdos_partition *) (0x1be + data); for (slot = 1 ; slot <= 4 ; slot++, p++) { unsigned char id = SYS_IND(p); int n; diff --git a/block/partitions/msdos.h b/block/partitions/msdos.h deleted file mode 100644 index fcacfc486092..000000000000 --- a/block/partitions/msdos.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * fs/partitions/msdos.h - */ - -#define MSDOS_LABEL_MAGIC 0xAA55 - -int msdos_partition(struct parsed_partitions *state); - diff --git a/block/partitions/osf.c b/block/partitions/osf.c index 4b873973d6c0..84560d0765ed 100644 --- a/block/partitions/osf.c +++ b/block/partitions/osf.c @@ -9,9 +9,9 @@ */ #include "check.h" -#include "osf.h" #define MAX_OSF_PARTITIONS 18 +#define DISKLABELMAGIC (0x82564557UL) int osf_partition(struct parsed_partitions *state) { diff --git a/block/partitions/osf.h b/block/partitions/osf.h deleted file mode 100644 index 4d8088e7ea8c..000000000000 --- a/block/partitions/osf.h +++ /dev/null @@ -1,8 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * fs/partitions/osf.h - */ - -#define DISKLABELMAGIC (0x82564557UL) - -int osf_partition(struct parsed_partitions *state); diff --git a/block/partitions/sgi.c b/block/partitions/sgi.c index d7b421c6e530..4273f1bb0515 100644 --- a/block/partitions/sgi.c +++ b/block/partitions/sgi.c @@ -6,7 +6,12 @@ */ #include "check.h" -#include "sgi.h" + +#define SGI_LABEL_MAGIC 0x0be5a941 + +enum { + LINUX_RAID_PARTITION = 0xfd, /* autodetect RAID partition */ +}; struct sgi_disklabel { __be32 magic_mushroom; /* Big fat spliff... */ diff --git a/block/partitions/sgi.h b/block/partitions/sgi.h deleted file mode 100644 index a5b77c3987cf..000000000000 --- a/block/partitions/sgi.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * fs/partitions/sgi.h - */ - -extern int sgi_partition(struct parsed_partitions *state); - -#define SGI_LABEL_MAGIC 0x0be5a941 - diff --git a/block/partitions/sun.c b/block/partitions/sun.c index 90f36724e796..47dc53eccf77 100644 --- a/block/partitions/sun.c +++ b/block/partitions/sun.c @@ -9,7 +9,14 @@ */ #include "check.h" -#include "sun.h" + +#define SUN_LABEL_MAGIC 0xDABE +#define SUN_VTOC_SANITY 0x600DDEEE + +enum { + SUN_WHOLE_DISK = 5, + LINUX_RAID_PARTITION = 0xfd, /* autodetect RAID partition */ +}; int sun_partition(struct parsed_partitions *state) { diff --git a/block/partitions/sun.h b/block/partitions/sun.h deleted file mode 100644 index ae1b9eed3fd7..000000000000 --- a/block/partitions/sun.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * fs/partitions/sun.h - */ - -#define SUN_LABEL_MAGIC 0xDABE -#define SUN_VTOC_SANITY 0x600DDEEE - -int sun_partition(struct parsed_partitions *state); diff --git a/block/partitions/sysv68.c b/block/partitions/sysv68.c index 92e810826b01..6f6257fd4eb4 100644 --- a/block/partitions/sysv68.c +++ b/block/partitions/sysv68.c @@ -6,7 +6,6 @@ */ #include "check.h" -#include "sysv68.h" /* * Volume ID structure: on first 256-bytes sector of disk diff --git a/block/partitions/sysv68.h b/block/partitions/sysv68.h deleted file mode 100644 index 4fb6b8ec78ae..000000000000 --- a/block/partitions/sysv68.h +++ /dev/null @@ -1,2 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -extern int sysv68_partition(struct parsed_partitions *state); diff --git a/block/partitions/ultrix.c b/block/partitions/ultrix.c index ecd0d7346c3d..4aaa81043ca0 100644 --- a/block/partitions/ultrix.c +++ b/block/partitions/ultrix.c @@ -8,7 +8,6 @@ */ #include "check.h" -#include "ultrix.h" int ultrix_partition(struct parsed_partitions *state) { diff --git a/block/partitions/ultrix.h b/block/partitions/ultrix.h deleted file mode 100644 index 9f676cead222..000000000000 --- a/block/partitions/ultrix.h +++ /dev/null @@ -1,6 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * fs/partitions/ultrix.h - */ - -int ultrix_partition(struct parsed_partitions *state); diff --git a/block/sed-opal.c b/block/sed-opal.c index 880cc57a5f6b..daafadbb88ca 100644 --- a/block/sed-opal.c +++ b/block/sed-opal.c @@ -1056,7 +1056,7 @@ static int start_opal_session_cont(struct opal_dev *dev) hsn = response_get_u64(&dev->parsed, 4); tsn = response_get_u64(&dev->parsed, 5); - if (hsn == 0 && tsn == 0) { + if (hsn != GENERIC_HOST_SESSION_NUM || tsn < FIRST_TPER_SESSION_NUM) { pr_debug("Couldn't authenticate session\n"); return -EPERM; } diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 220c5e18aba0..2fb25c348d53 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -381,12 +381,10 @@ static struct brd_device *brd_alloc(int i) spin_lock_init(&brd->brd_lock); INIT_RADIX_TREE(&brd->brd_pages, GFP_ATOMIC); - brd->brd_queue = blk_alloc_queue(GFP_KERNEL); + brd->brd_queue = blk_alloc_queue(brd_make_request, NUMA_NO_NODE); if (!brd->brd_queue) goto out_free_dev; - blk_queue_make_request(brd->brd_queue, brd_make_request); - /* This is so fdisk will align partitions on 4k, because of * direct_access API needing 4k alignment, returning a PFN * (This is only a problem on very small devices <= 4M, diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index a18155cdce41..72a7c3ea2ce3 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2801,7 +2801,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig drbd_init_set_defaults(device); - q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE); + q = blk_alloc_queue(drbd_make_request, NUMA_NO_NODE); if (!q) goto out_no_q; device->rq_queue = q; @@ -2828,7 +2828,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig q->backing_dev_info->congested_fn = drbd_congested; q->backing_dev_info->congested_data = device; - blk_queue_make_request(q, drbd_make_request); blk_queue_write_cache(q, true, true); /* Setting the max_hw_sectors to an odd value of 8kibyte here This triggers a max_bio_size message upon first attach or connect */ diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 79e216446030..c15e7083b13a 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "drbd_int.h" #include "drbd_protocol.h" #include "drbd_req.h" diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index b7f605c6e231..0dc019da1f8d 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "drbd_int.h" #include "drbd_protocol.h" diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c index 133060431dbd..5f13793d35ee 100644 --- a/drivers/block/null_blk_main.c +++ b/drivers/block/null_blk_main.c @@ -23,6 +23,7 @@ #ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION static DECLARE_FAULT_ATTR(null_timeout_attr); static DECLARE_FAULT_ATTR(null_requeue_attr); +static DECLARE_FAULT_ATTR(null_init_hctx_attr); #endif static inline u64 mb_per_tick(int mbps) @@ -101,6 +102,9 @@ module_param_string(timeout, g_timeout_str, sizeof(g_timeout_str), 0444); static char g_requeue_str[80]; module_param_string(requeue, g_requeue_str, sizeof(g_requeue_str), 0444); + +static char g_init_hctx_str[80]; +module_param_string(init_hctx, g_init_hctx_str, sizeof(g_init_hctx_str), 0444); #endif static int g_queue_mode = NULL_Q_MQ; @@ -276,7 +280,7 @@ nullb_device_##NAME##_store(struct config_item *item, const char *page, \ { \ int (*apply_fn)(struct nullb_device *dev, TYPE new_value) = APPLY;\ struct nullb_device *dev = to_nullb_device(item); \ - TYPE uninitialized_var(new_value); \ + TYPE new_value = 0; \ int ret; \ \ ret = nullb_device_##TYPE##_attr_store(&new_value, page, count);\ @@ -302,6 +306,12 @@ static int nullb_apply_submit_queues(struct nullb_device *dev, if (!nullb) return 0; + /* + * Make sure that null_init_hctx() does not access nullb->queues[] past + * the end of that array. + */ + if (submit_queues > nr_cpu_ids) + return -EINVAL; set = nullb->tag_set; blk_mq_update_nr_hw_queues(set, submit_queues); return set->nr_hw_queues == submit_queues ? 0 : -ENOMEM; @@ -1408,12 +1418,6 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, return null_handle_cmd(cmd, sector, nr_sectors, req_op(bd->rq)); } -static const struct blk_mq_ops null_mq_ops = { - .queue_rq = null_queue_rq, - .complete = null_complete_rq, - .timeout = null_timeout_rq, -}; - static void cleanup_queue(struct nullb_queue *nq) { kfree(nq->tag_map); @@ -1430,9 +1434,56 @@ static void cleanup_queues(struct nullb *nullb) kfree(nullb->queues); } +static void null_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) +{ + struct nullb_queue *nq = hctx->driver_data; + struct nullb *nullb = nq->dev->nullb; + + nullb->nr_queues--; +} + +static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq) +{ + init_waitqueue_head(&nq->wait); + nq->queue_depth = nullb->queue_depth; + nq->dev = nullb->dev; +} + +static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data, + unsigned int hctx_idx) +{ + struct nullb *nullb = hctx->queue->queuedata; + struct nullb_queue *nq; + +#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION + if (g_init_hctx_str[0] && should_fail(&null_init_hctx_attr, 1)) + return -EFAULT; +#endif + + nq = &nullb->queues[hctx_idx]; + hctx->driver_data = nq; + null_init_queue(nullb, nq); + nullb->nr_queues++; + + return 0; +} + +static const struct blk_mq_ops null_mq_ops = { + .queue_rq = null_queue_rq, + .complete = null_complete_rq, + .timeout = null_timeout_rq, + .init_hctx = null_init_hctx, + .exit_hctx = null_exit_hctx, +}; + static void null_del_dev(struct nullb *nullb) { - struct nullb_device *dev = nullb->dev; + struct nullb_device *dev; + + if (!nullb) + return; + + dev = nullb->dev; ida_simple_remove(&nullb_indexes, nullb->index); @@ -1473,33 +1524,6 @@ static const struct block_device_operations null_ops = { .report_zones = null_report_zones, }; -static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq) -{ - BUG_ON(!nullb); - BUG_ON(!nq); - - init_waitqueue_head(&nq->wait); - nq->queue_depth = nullb->queue_depth; - nq->dev = nullb->dev; -} - -static void null_init_queues(struct nullb *nullb) -{ - struct request_queue *q = nullb->q; - struct blk_mq_hw_ctx *hctx; - struct nullb_queue *nq; - int i; - - queue_for_each_hw_ctx(q, hctx, i) { - if (!hctx->nr_ctx || !hctx->tags) - continue; - nq = &nullb->queues[i]; - hctx->driver_data = nq; - null_init_queue(nullb, nq); - nullb->nr_queues++; - } -} - static int setup_commands(struct nullb_queue *nq) { struct nullb_cmd *cmd; @@ -1526,8 +1550,7 @@ static int setup_commands(struct nullb_queue *nq) static int setup_queues(struct nullb *nullb) { - nullb->queues = kcalloc(nullb->dev->submit_queues, - sizeof(struct nullb_queue), + nullb->queues = kcalloc(nr_cpu_ids, sizeof(struct nullb_queue), GFP_KERNEL); if (!nullb->queues) return -ENOMEM; @@ -1669,6 +1692,8 @@ static bool null_setup_fault(void) return false; if (!__null_setup_fault(&null_requeue_attr, g_requeue_str)) return false; + if (!__null_setup_fault(&null_init_hctx_attr, g_init_hctx_str)) + return false; #endif return true; } @@ -1712,19 +1737,17 @@ static int null_add_dev(struct nullb_device *dev) goto out_cleanup_queues; nullb->tag_set->timeout = 5 * HZ; - nullb->q = blk_mq_init_queue(nullb->tag_set); + nullb->q = blk_mq_init_queue_data(nullb->tag_set, nullb); if (IS_ERR(nullb->q)) { rv = -ENOMEM; goto out_cleanup_tags; } - null_init_queues(nullb); } else if (dev->queue_mode == NULL_Q_BIO) { - nullb->q = blk_alloc_queue_node(GFP_KERNEL, dev->home_node); + nullb->q = blk_alloc_queue(null_queue_bio, dev->home_node); if (!nullb->q) { rv = -ENOMEM; goto out_cleanup_queues; } - blk_queue_make_request(nullb->q, null_queue_bio); rv = init_driver_queues(nullb); if (rv) goto out_cleanup_blk_queue; @@ -1788,6 +1811,7 @@ out_cleanup_queues: cleanup_queues(nullb); out_free_nullb: kfree(nullb); + dev->nullb = NULL; out: return rv; } diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 5f970a7d32c0..0b944ac96d6b 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2493,7 +2493,6 @@ static void pkt_init_queue(struct pktcdvd_device *pd) { struct request_queue *q = pd->disk->queue; - blk_queue_make_request(q, pkt_make_request); blk_queue_logical_block_size(q, CD_FRAMESIZE); blk_queue_max_hw_sectors(q, PACKET_MAX_SECTORS); q->queuedata = pd; @@ -2679,6 +2678,11 @@ static unsigned int pkt_check_events(struct gendisk *disk, return attached_disk->fops->check_events(attached_disk, clearing); } +static char *pkt_devnode(struct gendisk *disk, umode_t *mode) +{ + return kasprintf(GFP_KERNEL, "pktcdvd/%s", disk->disk_name); +} + static const struct block_device_operations pktcdvd_ops = { .owner = THIS_MODULE, .open = pkt_open, @@ -2686,13 +2690,9 @@ static const struct block_device_operations pktcdvd_ops = { .ioctl = pkt_ioctl, .compat_ioctl = blkdev_compat_ptr_ioctl, .check_events = pkt_check_events, + .devnode = pkt_devnode, }; -static char *pktcdvd_devnode(struct gendisk *gd, umode_t *mode) -{ - return kasprintf(GFP_KERNEL, "pktcdvd/%s", gd->disk_name); -} - /* * Set up mapping from pktcdvd device to CD-ROM device. */ @@ -2748,9 +2748,8 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) disk->fops = &pktcdvd_ops; disk->flags = GENHD_FL_REMOVABLE; strcpy(disk->disk_name, pd->name); - disk->devnode = pktcdvd_devnode; disk->private_data = pd; - disk->queue = blk_alloc_queue(GFP_KERNEL); + disk->queue = blk_alloc_queue(pkt_make_request, NUMA_NO_NODE); if (!disk->queue) goto out_mem2; diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c index 4628e1a27a2b..821d4d8b1d76 100644 --- a/drivers/block/ps3vram.c +++ b/drivers/block/ps3vram.c @@ -737,7 +737,7 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev) ps3vram_proc_init(dev); - queue = blk_alloc_queue(GFP_KERNEL); + queue = blk_alloc_queue(ps3vram_make_request, NUMA_NO_NODE); if (!queue) { dev_err(&dev->core, "blk_alloc_queue failed\n"); error = -ENOMEM; @@ -746,7 +746,6 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev) priv->queue = queue; queue->queuedata = dev; - blk_queue_make_request(queue, ps3vram_make_request); blk_queue_max_segments(queue, BLK_MAX_SEGMENTS); blk_queue_max_segment_size(queue, BLK_MAX_SEGMENT_SIZE); blk_queue_max_hw_sectors(queue, BLK_SAFE_MAX_SECTORS); diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c index c47d28b2ce44..8ffa8260dcaf 100644 --- a/drivers/block/rsxx/dev.c +++ b/drivers/block/rsxx/dev.c @@ -248,7 +248,7 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card) return -ENOMEM; } - card->queue = blk_alloc_queue(GFP_KERNEL); + card->queue = blk_alloc_queue(rsxx_make_request, NUMA_NO_NODE); if (!card->queue) { dev_err(CARD_TO_DEV(card), "Failed queue alloc\n"); unregister_blkdev(card->major, DRIVER_NAME); @@ -269,7 +269,6 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card) blk_queue_logical_block_size(card->queue, blk_size); } - blk_queue_make_request(card->queue, rsxx_make_request); blk_queue_max_hw_sectors(card->queue, blkdev_max_hw_sectors); blk_queue_physical_block_size(card->queue, RSXX_HW_BLK_SIZE); diff --git a/drivers/block/umem.c b/drivers/block/umem.c index 4eaf97d7a170..d84e8a878df2 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c @@ -885,11 +885,9 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) card->biotail = &card->bio; spin_lock_init(&card->lock); - card->queue = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE); + card->queue = blk_alloc_queue(mm_make_request, NUMA_NO_NODE); if (!card->queue) goto failed_alloc; - - blk_queue_make_request(card->queue, mm_make_request); card->queue->queuedata = card; tasklet_init(&card->tasklet, process_page, (unsigned long)card); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 0736248999b0..f9b1e70f1b31 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -388,18 +388,15 @@ static void virtblk_update_capacity(struct virtio_blk *vblk, bool resize) cap_str_10, cap_str_2); - set_capacity(vblk->disk, capacity); + set_capacity_revalidate_and_notify(vblk->disk, capacity, true); } static void virtblk_config_changed_work(struct work_struct *work) { struct virtio_blk *vblk = container_of(work, struct virtio_blk, config_work); - char *envp[] = { "RESIZE=1", NULL }; virtblk_update_capacity(vblk, true); - revalidate_disk(vblk->disk); - kobject_uevent_env(&disk_to_dev(vblk->disk)->kobj, KOBJ_CHANGE, envp); } static void virtblk_config_changed(struct virtio_device *vdev) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 9df516a56bb2..915cf5b6388c 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2338,7 +2338,6 @@ static void blkfront_connect(struct blkfront_info *info) unsigned long sector_size; unsigned int physical_sector_size; unsigned int binfo; - char *envp[] = { "RESIZE=1", NULL }; int err, i; struct blkfront_ring_info *rinfo; @@ -2354,10 +2353,7 @@ static void blkfront_connect(struct blkfront_info *info) return; printk(KERN_INFO "Setting capacity to %Lu\n", sectors); - set_capacity(info->gd, sectors); - revalidate_disk(info->gd); - kobject_uevent_env(&disk_to_dev(info->gd)->kobj, - KOBJ_CHANGE, envp); + set_capacity_revalidate_and_notify(info->gd, sectors, true); return; case BLKIF_STATE_SUSPENDED: diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 1bdb5793842b..ebb234f36909 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "zram_drv.h" @@ -1894,7 +1895,7 @@ static int zram_add(void) #ifdef CONFIG_ZRAM_WRITEBACK spin_lock_init(&zram->wb_limit_lock); #endif - queue = blk_alloc_queue(GFP_KERNEL); + queue = blk_alloc_queue(zram_make_request, NUMA_NO_NODE); if (!queue) { pr_err("Error allocating disk queue for device %d\n", device_id); @@ -1902,8 +1903,6 @@ static int zram_add(void) goto out_free_idr; } - blk_queue_make_request(queue, zram_make_request); - /* gendisk structure */ zram->disk = alloc_disk(1); if (!zram->disk) { diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 7543e395a2c6..db38a68abb6c 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -380,12 +380,11 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) goto err_dev; } - tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node); + tqueue = blk_alloc_queue(tt->make_rq, dev->q->node); if (!tqueue) { ret = -ENOMEM; goto err_disk; } - blk_queue_make_request(tqueue, tt->make_rq); strlcpy(tdisk->disk_name, create->tgtname, sizeof(tdisk->disk_name)); tdisk->flags = GENHD_FL_EXT_DEVT; diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 820d8402a1dc..71a90fbec314 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -1161,8 +1161,7 @@ static void quit_max_writeback_rate(struct cache_set *c, /* Cached devices - read & write stuff */ -static blk_qc_t cached_dev_make_request(struct request_queue *q, - struct bio *bio) +blk_qc_t cached_dev_make_request(struct request_queue *q, struct bio *bio) { struct search *s; struct bcache_device *d = bio->bi_disk->private_data; @@ -1266,7 +1265,6 @@ void bch_cached_dev_request_init(struct cached_dev *dc) { struct gendisk *g = dc->disk.disk; - g->queue->make_request_fn = cached_dev_make_request; g->queue->backing_dev_info->congested_fn = cached_dev_congested; dc->disk.cache_miss = cached_dev_cache_miss; dc->disk.ioctl = cached_dev_ioctl; @@ -1301,8 +1299,7 @@ static void flash_dev_nodata(struct closure *cl) continue_at(cl, search_free, NULL); } -static blk_qc_t flash_dev_make_request(struct request_queue *q, - struct bio *bio) +blk_qc_t flash_dev_make_request(struct request_queue *q, struct bio *bio) { struct search *s; struct closure *cl; diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h index c64dbd7a91aa..bb005c93dd72 100644 --- a/drivers/md/bcache/request.h +++ b/drivers/md/bcache/request.h @@ -37,7 +37,10 @@ unsigned int bch_get_congested(const struct cache_set *c); void bch_data_insert(struct closure *cl); void bch_cached_dev_request_init(struct cached_dev *dc); +blk_qc_t cached_dev_make_request(struct request_queue *q, struct bio *bio); + void bch_flash_dev_request_init(struct bcache_device *d); +blk_qc_t flash_dev_make_request(struct request_queue *q, struct bio *bio); extern struct kmem_cache *bch_search_cache; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 0c3c5419c52b..d98354fa28e3 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -816,7 +816,7 @@ static void bcache_device_free(struct bcache_device *d) } static int bcache_device_init(struct bcache_device *d, unsigned int block_size, - sector_t sectors) + sector_t sectors, make_request_fn make_request_fn) { struct request_queue *q; const size_t max_stripes = min_t(size_t, INT_MAX, @@ -866,11 +866,10 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, d->disk->fops = &bcache_ops; d->disk->private_data = d; - q = blk_alloc_queue(GFP_KERNEL); + q = blk_alloc_queue(make_request_fn, NUMA_NO_NODE); if (!q) return -ENOMEM; - blk_queue_make_request(q, NULL); d->disk->queue = q; q->queuedata = d; q->backing_dev_info->congested_data = d; @@ -1339,7 +1338,8 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size) q->limits.raid_partial_stripes_expensive; ret = bcache_device_init(&dc->disk, block_size, - dc->bdev->bd_part->nr_sects - dc->sb.data_offset); + dc->bdev->bd_part->nr_sects - dc->sb.data_offset, + cached_dev_make_request); if (ret) return ret; @@ -1451,7 +1451,8 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u) kobject_init(&d->kobj, &bch_flash_dev_ktype); - if (bcache_device_init(d, block_bytes(c), u->sectors)) + if (bcache_device_init(d, block_bytes(c), u->sectors, + flash_dev_make_request)) goto err; bcache_device_attach(d, c, u - c->uuids); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 0413018c8305..753302e83910 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -25,6 +25,7 @@ #include #include #include +#include #define DM_MSG_PREFIX "core" @@ -1938,16 +1939,15 @@ static struct mapped_device *alloc_dev(int minor) INIT_LIST_HEAD(&md->table_devices); spin_lock_init(&md->uevent_lock); - md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id); - if (!md->queue) - goto bad; - md->queue->queuedata = md; /* * default to bio-based required ->make_request_fn until DM * table is loaded and md->type established. If request-based * table is loaded: blk-mq will override accordingly. */ - blk_queue_make_request(md->queue, dm_make_request); + md->queue = blk_alloc_queue(dm_make_request, numa_node_id); + if (!md->queue) + goto bad; + md->queue->queuedata = md; md->disk = alloc_disk_node(1, md->numa_node_id); if (!md->disk) diff --git a/drivers/md/md.c b/drivers/md/md.c index 469f551863be..cd1210a0d957 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -58,8 +58,10 @@ #include #include #include +#include #include #include +#include #include #include "md.h" @@ -2491,12 +2493,12 @@ static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared) { int err = 0; struct block_device *bdev; - char b[BDEVNAME_SIZE]; bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, shared ? (struct md_rdev *)lock_rdev : rdev); if (IS_ERR(bdev)) { - pr_warn("md: could not open %s.\n", __bdevname(dev, b)); + pr_warn("md: could not open device unknown-block(%u,%u).\n", + MAJOR(dev), MINOR(dev)); return PTR_ERR(bdev); } rdev->bdev = bdev; @@ -5621,12 +5623,11 @@ static int md_alloc(dev_t dev, char *name) mddev->hold_active = UNTIL_STOP; error = -ENOMEM; - mddev->queue = blk_alloc_queue(GFP_KERNEL); + mddev->queue = blk_alloc_queue(md_make_request, NUMA_NO_NODE); if (!mddev->queue) goto abort; mddev->queue->queuedata = mddev; - blk_queue_make_request(mddev->queue, md_make_request); blk_set_stacking_limits(&mddev->queue->limits); disk = alloc_disk(1 << shift); diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c index 677d6f45b5c4..43751fab9d36 100644 --- a/drivers/nvdimm/blk.c +++ b/drivers/nvdimm/blk.c @@ -249,13 +249,12 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk) internal_nlba = div_u64(nsblk->size, nsblk_internal_lbasize(nsblk)); available_disk_size = internal_nlba * nsblk_sector_size(nsblk); - q = blk_alloc_queue(GFP_KERNEL); + q = blk_alloc_queue(nd_blk_make_request, NUMA_NO_NODE); if (!q) return -ENOMEM; if (devm_add_action_or_reset(dev, nd_blk_release_queue, q)) return -ENOMEM; - blk_queue_make_request(q, nd_blk_make_request); blk_queue_max_hw_sectors(q, UINT_MAX); blk_queue_logical_block_size(q, nsblk_sector_size(nsblk)); blk_queue_flag_set(QUEUE_FLAG_NONROT, q); diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 0d04ea3d9fd7..3b09419218d6 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1521,7 +1521,7 @@ static int btt_blk_init(struct btt *btt) struct nd_namespace_common *ndns = nd_btt->ndns; /* create a new disk and request queue for btt */ - btt->btt_queue = blk_alloc_queue(GFP_KERNEL); + btt->btt_queue = blk_alloc_queue(btt_make_request, NUMA_NO_NODE); if (!btt->btt_queue) return -ENOMEM; @@ -1540,7 +1540,6 @@ static int btt_blk_init(struct btt *btt) btt->btt_disk->queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO; - blk_queue_make_request(btt->btt_queue, btt_make_request); blk_queue_logical_block_size(btt->btt_queue, btt->sector_size); blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX); blk_queue_flag_set(QUEUE_FLAG_NONROT, btt->btt_queue); diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 4eae441f86c9..4ffc6f7ca131 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -395,7 +395,7 @@ static int pmem_attach_disk(struct device *dev, return -EBUSY; } - q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev)); + q = blk_alloc_queue(pmem_make_request, dev_to_node(dev)); if (!q) return -ENOMEM; @@ -433,7 +433,6 @@ static int pmem_attach_disk(struct device *dev, pmem->virt_addr = addr; blk_queue_write_cache(q, true, fua); - blk_queue_make_request(q, pmem_make_request); blk_queue_physical_block_size(q, PAGE_SIZE); blk_queue_logical_block_size(q, pmem_sector_size(ndns)); blk_queue_max_hw_sectors(q, UINT_MAX); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index a4d8c90ee7cc..41ad07f6a564 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1810,7 +1810,7 @@ static void nvme_update_disk_info(struct gendisk *disk, ns->lba_shift > PAGE_SHIFT) capacity = 0; - set_capacity(disk, capacity); + set_capacity_revalidate_and_notify(disk, capacity, false); nvme_config_discard(disk, ns); nvme_config_write_zeroes(disk, ns); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index a11900cf3a36..a38d7f196aba 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -377,11 +377,10 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) if (!(ctrl->subsys->cmic & (1 << 1)) || !multipath) return 0; - q = blk_alloc_queue_node(GFP_KERNEL, ctrl->numa_node); + q = blk_alloc_queue(nvme_ns_head_make_request, ctrl->numa_node); if (!q) goto out; q->queuedata = head; - blk_queue_make_request(q, nvme_ns_head_make_request); blk_queue_flag_set(QUEUE_FLAG_NONROT, q); /* set to a default value for 512 until disk is validated */ blk_queue_logical_block_size(q, 512); diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 72a7e41f3018..cca759c918a4 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -6,6 +6,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include +#include #include #include diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 63502ca537eb..80d22290f268 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -636,10 +636,10 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char } dev_info->gd->major = dcssblk_major; dev_info->gd->fops = &dcssblk_devops; - dev_info->dcssblk_queue = blk_alloc_queue(GFP_KERNEL); + dev_info->dcssblk_queue = + blk_alloc_queue(dcssblk_make_request, NUMA_NO_NODE); dev_info->gd->queue = dev_info->dcssblk_queue; dev_info->gd->private_data = dev_info; - blk_queue_make_request(dev_info->dcssblk_queue, dcssblk_make_request); blk_queue_logical_block_size(dev_info->dcssblk_queue, 4096); blk_queue_flag_set(QUEUE_FLAG_DAX, dev_info->dcssblk_queue); diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c index 3df5d68d09f0..45a04daec89e 100644 --- a/drivers/s390/block/xpram.c +++ b/drivers/s390/block/xpram.c @@ -343,14 +343,14 @@ static int __init xpram_setup_blkdev(void) xpram_disks[i] = alloc_disk(1); if (!xpram_disks[i]) goto out; - xpram_queues[i] = blk_alloc_queue(GFP_KERNEL); + xpram_queues[i] = blk_alloc_queue(xpram_make_request, + NUMA_NO_NODE); if (!xpram_queues[i]) { put_disk(xpram_disks[i]); goto out; } blk_queue_flag_set(QUEUE_FLAG_NONROT, xpram_queues[i]); blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, xpram_queues[i]); - blk_queue_make_request(xpram_queues[i], xpram_make_request); blk_queue_logical_block_size(xpram_queues[i], 4096); } diff --git a/drivers/scsi/BusLogic.c b/drivers/scsi/BusLogic.c index 3170b295a5da..186259417449 100644 --- a/drivers/scsi/BusLogic.c +++ b/drivers/scsi/BusLogic.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -3410,9 +3411,10 @@ static int blogic_diskparam(struct scsi_device *sdev, struct block_device *dev, a partition table entry whose end_head matches one of the standard BusLogic geometry translations (64/32, 128/32, or 255/63). */ - if (*(unsigned short *) (buf + 64) == 0xAA55) { - struct partition *part1_entry = (struct partition *) buf; - struct partition *part_entry = part1_entry; + if (*(unsigned short *) (buf + 64) == MSDOS_LABEL_MAGIC) { + struct msdos_partition *part1_entry = + (struct msdos_partition *)buf; + struct msdos_partition *part_entry = part1_entry; int saved_cyl = diskparam->cylinders, part_no; unsigned char part_end_head = 0, part_end_sector = 0; diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index ee6bc2f9b80a..0443b74390cf 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -328,9 +329,9 @@ static int aac_biosparm(struct scsi_device *sdev, struct block_device *bdev, buf = scsi_bios_ptable(bdev); if (!buf) return 0; - if(*(__le16 *)(buf + 0x40) == cpu_to_le16(0xaa55)) { - struct partition *first = (struct partition * )buf; - struct partition *entry = first; + if (*(__le16 *)(buf + 0x40) == cpu_to_le16(MSDOS_LABEL_MAGIC)) { + struct msdos_partition *first = (struct msdos_partition *)buf; + struct msdos_partition *entry = first; int saved_cylinders = param->cylinders; int num; unsigned char end_head, end_sec; diff --git a/drivers/scsi/aic7xxx/aic79xx_osm.c b/drivers/scsi/aic7xxx/aic79xx_osm.c index 57992519384e..dc4fe334efd0 100644 --- a/drivers/scsi/aic7xxx/aic79xx_osm.c +++ b/drivers/scsi/aic7xxx/aic79xx_osm.c @@ -723,24 +723,17 @@ static int ahd_linux_biosparam(struct scsi_device *sdev, struct block_device *bdev, sector_t capacity, int geom[]) { - uint8_t *bh; int heads; int sectors; int cylinders; - int ret; int extended; struct ahd_softc *ahd; ahd = *((struct ahd_softc **)sdev->host->hostdata); - bh = scsi_bios_ptable(bdev); - if (bh) { - ret = scsi_partsize(bh, capacity, - &geom[2], &geom[0], &geom[1]); - kfree(bh); - if (ret != -1) - return (ret); - } + if (scsi_partsize(bdev, capacity, geom)) + return 0; + heads = 64; sectors = 32; cylinders = aic_sector_div(capacity, heads, sectors); diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm.c b/drivers/scsi/aic7xxx/aic7xxx_osm.c index d5c4a0d23706..2edfa0594f18 100644 --- a/drivers/scsi/aic7xxx/aic7xxx_osm.c +++ b/drivers/scsi/aic7xxx/aic7xxx_osm.c @@ -695,11 +695,9 @@ static int ahc_linux_biosparam(struct scsi_device *sdev, struct block_device *bdev, sector_t capacity, int geom[]) { - uint8_t *bh; int heads; int sectors; int cylinders; - int ret; int extended; struct ahc_softc *ahc; u_int channel; @@ -707,14 +705,9 @@ ahc_linux_biosparam(struct scsi_device *sdev, struct block_device *bdev, ahc = *((struct ahc_softc **)sdev->host->hostdata); channel = sdev_channel(sdev); - bh = scsi_bios_ptable(bdev); - if (bh) { - ret = scsi_partsize(bh, capacity, - &geom[2], &geom[0], &geom[1]); - kfree(bh); - if (ret != -1) - return (ret); - } + if (scsi_partsize(bdev, capacity, geom)) + return 0; + heads = 64; sectors = 32; cylinders = aic_sector_div(capacity, heads, sectors); diff --git a/drivers/scsi/arcmsr/arcmsr_hba.c b/drivers/scsi/arcmsr/arcmsr_hba.c index 40dc8eac0e3a..c2c79a37a9ef 100644 --- a/drivers/scsi/arcmsr/arcmsr_hba.c +++ b/drivers/scsi/arcmsr/arcmsr_hba.c @@ -353,16 +353,11 @@ static irqreturn_t arcmsr_do_interrupt(int irq, void *dev_id) static int arcmsr_bios_param(struct scsi_device *sdev, struct block_device *bdev, sector_t capacity, int *geom) { - int ret, heads, sectors, cylinders, total_capacity; - unsigned char *buffer;/* return copy of block device's partition table */ + int heads, sectors, cylinders, total_capacity; + + if (scsi_partsize(bdev, capacity, geom)) + return 0; - buffer = scsi_bios_ptable(bdev); - if (buffer) { - ret = scsi_partsize(buffer, capacity, &geom[2], &geom[0], &geom[1]); - kfree(buffer); - if (ret != -1) - return ret; - } total_capacity = capacity; heads = 64; sectors = 32; diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c index ff6d4aa92421..f27ffd088c8a 100644 --- a/drivers/scsi/megaraid.c +++ b/drivers/scsi/megaraid.c @@ -2795,11 +2795,9 @@ megaraid_biosparam(struct scsi_device *sdev, struct block_device *bdev, sector_t capacity, int geom[]) { adapter_t *adapter; - unsigned char *bh; int heads; int sectors; int cylinders; - int rval; /* Get pointer to host config structure */ adapter = (adapter_t *)sdev->host->hostdata; @@ -2826,15 +2824,8 @@ megaraid_biosparam(struct scsi_device *sdev, struct block_device *bdev, geom[2] = cylinders; } else { - bh = scsi_bios_ptable(bdev); - - if( bh ) { - rval = scsi_partsize(bh, capacity, - &geom[2], &geom[0], &geom[1]); - kfree(bh); - if( rval != -1 ) - return rval; - } + if (scsi_partsize(bdev, capacity, geom)) + return 0; dev_info(&adapter->dev->dev, "invalid partition on this disk on channel %d\n", diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 44cb054d5e66..4c6c448dc2df 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -38,6 +38,7 @@ #include #include #include +#include #include @@ -4146,7 +4147,7 @@ static int scsi_debug_host_reset(struct scsi_cmnd *SCpnt) static void __init sdebug_build_parts(unsigned char *ramp, unsigned long store_size) { - struct partition *pp; + struct msdos_partition *pp; int starts[SDEBUG_MAX_PARTS + 2]; int sectors_per_part, num_sectors, k; int heads_by_sects, start_sec, end_sec; @@ -4171,7 +4172,7 @@ static void __init sdebug_build_parts(unsigned char *ramp, ramp[510] = 0x55; /* magic partition markings */ ramp[511] = 0xAA; - pp = (struct partition *)(ramp + 0x1be); + pp = (struct msdos_partition *)(ramp + 0x1be); for (k = 0; starts[k + 1]; ++k, ++pp) { start_sec = starts[k]; end_sec = starts[k + 1] - 1; diff --git a/drivers/scsi/scsicam.c b/drivers/scsi/scsicam.c index e969138051c7..682cf08ab041 100644 --- a/drivers/scsi/scsicam.c +++ b/drivers/scsi/scsicam.c @@ -17,14 +17,11 @@ #include #include #include +#include #include #include - -static int setsize(unsigned long capacity, unsigned int *cyls, unsigned int *hds, - unsigned int *secs); - /** * scsi_bios_ptable - Read PC partition table out of first sector of device. * @dev: from this device @@ -35,105 +32,48 @@ static int setsize(unsigned long capacity, unsigned int *cyls, unsigned int *hds */ unsigned char *scsi_bios_ptable(struct block_device *dev) { - unsigned char *res = kmalloc(66, GFP_KERNEL); - if (res) { - struct block_device *bdev = dev->bd_contains; - Sector sect; - void *data = read_dev_sector(bdev, 0, §); - if (data) { - memcpy(res, data + 0x1be, 66); - put_dev_sector(sect); - } else { - kfree(res); - res = NULL; - } - } + struct address_space *mapping = dev->bd_contains->bd_inode->i_mapping; + unsigned char *res = NULL; + struct page *page; + + page = read_mapping_page(mapping, 0, NULL); + if (IS_ERR(page)) + return NULL; + + if (!PageError(page)) + res = kmemdup(page_address(page) + 0x1be, 66, GFP_KERNEL); + put_page(page); return res; } EXPORT_SYMBOL(scsi_bios_ptable); -/** - * scsicam_bios_param - Determine geometry of a disk in cylinders/heads/sectors. - * @bdev: which device - * @capacity: size of the disk in sectors - * @ip: return value: ip[0]=heads, ip[1]=sectors, ip[2]=cylinders - * - * Description : determine the BIOS mapping/geometry used for a drive in a - * SCSI-CAM system, storing the results in ip as required - * by the HDIO_GETGEO ioctl(). - * - * Returns : -1 on failure, 0 on success. - */ - -int scsicam_bios_param(struct block_device *bdev, sector_t capacity, int *ip) -{ - unsigned char *p; - u64 capacity64 = capacity; /* Suppress gcc warning */ - int ret; - - p = scsi_bios_ptable(bdev); - if (!p) - return -1; - - /* try to infer mapping from partition table */ - ret = scsi_partsize(p, (unsigned long)capacity, (unsigned int *)ip + 2, - (unsigned int *)ip + 0, (unsigned int *)ip + 1); - kfree(p); - - if (ret == -1 && capacity64 < (1ULL << 32)) { - /* pick some standard mapping with at most 1024 cylinders, - and at most 62 sectors per track - this works up to - 7905 MB */ - ret = setsize((unsigned long)capacity, (unsigned int *)ip + 2, - (unsigned int *)ip + 0, (unsigned int *)ip + 1); - } - - /* if something went wrong, then apparently we have to return - a geometry with more than 1024 cylinders */ - if (ret || ip[0] > 255 || ip[1] > 63) { - if ((capacity >> 11) > 65534) { - ip[0] = 255; - ip[1] = 63; - } else { - ip[0] = 64; - ip[1] = 32; - } - - if (capacity > 65535*63*255) - ip[2] = 65535; - else - ip[2] = (unsigned long)capacity / (ip[0] * ip[1]); - } - - return 0; -} -EXPORT_SYMBOL(scsicam_bios_param); - /** * scsi_partsize - Parse cylinders/heads/sectors from PC partition table - * @buf: partition table, see scsi_bios_ptable() + * @bdev: block device to parse * @capacity: size of the disk in sectors - * @cyls: put cylinders here - * @hds: put heads here - * @secs: put sectors here + * @geom: output in form of [hds, cylinders, sectors] * * Determine the BIOS mapping/geometry used to create the partition - * table, storing the results in @cyls, @hds, and @secs + * table, storing the results in @geom. * - * Returns: -1 on failure, 0 on success. + * Returns: %false on failure, %true on success. */ - -int scsi_partsize(unsigned char *buf, unsigned long capacity, - unsigned int *cyls, unsigned int *hds, unsigned int *secs) +bool scsi_partsize(struct block_device *bdev, sector_t capacity, int geom[3]) { - struct partition *p = (struct partition *)buf, *largest = NULL; - int i, largest_cyl; int cyl, ext_cyl, end_head, end_cyl, end_sector; unsigned int logical_end, physical_end, ext_physical_end; + struct msdos_partition *p, *largest = NULL; + void *buf; + int ret = false; + buf = scsi_bios_ptable(bdev); + if (!buf) + return false; if (*(unsigned short *) (buf + 64) == 0xAA55) { - for (largest_cyl = -1, i = 0; i < 4; ++i, ++p) { + int largest_cyl = -1, i; + + for (i = 0, p = buf; i < 4; i++, p++) { if (!p->sys_ind) continue; #ifdef DEBUG @@ -153,7 +93,7 @@ int scsi_partsize(unsigned char *buf, unsigned long capacity, end_sector = largest->end_sector & 0x3f; if (end_head + 1 == 0 || end_sector == 0) - return -1; + goto out_free_buf; #ifdef DEBUG printk("scsicam_bios_param : end at h = %d, c = %d, s = %d\n", @@ -178,19 +118,24 @@ int scsi_partsize(unsigned char *buf, unsigned long capacity, ,logical_end, physical_end, ext_physical_end, ext_cyl); #endif - if ((logical_end == physical_end) || - (end_cyl == 1023 && ext_physical_end == logical_end)) { - *secs = end_sector; - *hds = end_head + 1; - *cyls = capacity / ((end_head + 1) * end_sector); - return 0; + if (logical_end == physical_end || + (end_cyl == 1023 && ext_physical_end == logical_end)) { + geom[0] = end_head + 1; + geom[1] = end_sector; + geom[2] = (unsigned long)capacity / + ((end_head + 1) * end_sector); + ret = true; + goto out_free_buf; } #ifdef DEBUG printk("scsicam_bios_param : logical (%u) != physical (%u)\n", logical_end, physical_end); #endif } - return -1; + +out_free_buf: + kfree(buf); + return ret; } EXPORT_SYMBOL(scsi_partsize); @@ -258,3 +203,56 @@ static int setsize(unsigned long capacity, unsigned int *cyls, unsigned int *hds *hds = (unsigned int) heads; return (rv); } + +/** + * scsicam_bios_param - Determine geometry of a disk in cylinders/heads/sectors. + * @bdev: which device + * @capacity: size of the disk in sectors + * @ip: return value: ip[0]=heads, ip[1]=sectors, ip[2]=cylinders + * + * Description : determine the BIOS mapping/geometry used for a drive in a + * SCSI-CAM system, storing the results in ip as required + * by the HDIO_GETGEO ioctl(). + * + * Returns : -1 on failure, 0 on success. + */ +int scsicam_bios_param(struct block_device *bdev, sector_t capacity, int *ip) +{ + u64 capacity64 = capacity; /* Suppress gcc warning */ + int ret = 0; + + /* try to infer mapping from partition table */ + if (scsi_partsize(bdev, capacity, ip)) + return 0; + + if (capacity64 < (1ULL << 32)) { + /* + * Pick some standard mapping with at most 1024 cylinders, and + * at most 62 sectors per track - this works up to 7905 MB. + */ + ret = setsize((unsigned long)capacity, (unsigned int *)ip + 2, + (unsigned int *)ip + 0, (unsigned int *)ip + 1); + } + + /* + * If something went wrong, then apparently we have to return a geometry + * with more than 1024 cylinders. + */ + if (ret || ip[0] > 255 || ip[1] > 63) { + if ((capacity >> 11) > 65534) { + ip[0] = 255; + ip[1] = 63; + } else { + ip[0] = 64; + ip[1] = 32; + } + + if (capacity > 65535*63*255) + ip[2] = 65535; + else + ip[2] = (unsigned long)capacity / (ip[0] * ip[1]); + } + + return 0; +} +EXPORT_SYMBOL(scsicam_bios_param); diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 2710a0e5ae6d..a793cb08d025 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3189,7 +3189,8 @@ static int sd_revalidate_disk(struct gendisk *disk) sdkp->first_scan = 0; - set_capacity(disk, logical_to_sectors(sdp, sdkp->capacity)); + set_capacity_revalidate_and_notify(disk, + logical_to_sectors(sdp, sdkp->capacity), false); sd_config_write_same(sdkp); kfree(buffer); diff --git a/fs/block_dev.c b/fs/block_dev.c index 69bf2fb6f7cd..9501880dff5e 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1520,10 +1520,22 @@ rescan: if (ret) return ret; - if (invalidate) - set_capacity(disk, 0); - else if (disk->fops->revalidate_disk) - disk->fops->revalidate_disk(disk); + /* + * Historically we only set the capacity to zero for devices that + * support partitions (independ of actually having partitions created). + * Doing that is rather inconsistent, but changing it broke legacy + * udisks polling for legacy ide-cdrom devices. Use the crude check + * below to get the sane behavior for most device while not breaking + * userspace for this particular setup. + */ + if (invalidate) { + if (disk_part_scan_enabled(disk) || + !(disk->flags & GENHD_FL_REMOVABLE)) + set_capacity(disk, 0); + } else { + if (disk->fops->revalidate_disk) + disk->fops->revalidate_disk(disk); + } check_disk_size_change(disk, bdev, !invalidate); diff --git a/fs/buffer.c b/fs/buffer.c index b8d28370cfd7..3f5758e01e40 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3019,49 +3019,6 @@ static void end_bio_bh_io_sync(struct bio *bio) bio_put(bio); } -/* - * This allows us to do IO even on the odd last sectors - * of a device, even if the block size is some multiple - * of the physical sector size. - * - * We'll just truncate the bio to the size of the device, - * and clear the end of the buffer head manually. - * - * Truly out-of-range accesses will turn into actual IO - * errors, this only handles the "we need to be able to - * do IO at the final sector" case. - */ -void guard_bio_eod(struct bio *bio) -{ - sector_t maxsector; - struct hd_struct *part; - - rcu_read_lock(); - part = __disk_get_part(bio->bi_disk, bio->bi_partno); - if (part) - maxsector = part_nr_sects_read(part); - else - maxsector = get_capacity(bio->bi_disk); - rcu_read_unlock(); - - if (!maxsector) - return; - - /* - * If the *whole* IO is past the end of the device, - * let it through, and the IO layer will turn it into - * an EIO. - */ - if (unlikely(bio->bi_iter.bi_sector >= maxsector)) - return; - - maxsector -= bio->bi_iter.bi_sector; - if (likely((bio->bi_iter.bi_size >> 9) <= maxsector)) - return; - - bio_truncate(bio, maxsector << 9); -} - static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, enum rw_hint write_hint, struct writeback_control *wbc) { diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 0c7c4adb664e..c8dff4c68141 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -43,7 +43,7 @@ #include #include #include - +#include #include #include @@ -927,7 +927,6 @@ void ext4_update_dynamic_rev(struct super_block *sb) static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb) { struct block_device *bdev; - char b[BDEVNAME_SIZE]; bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb); if (IS_ERR(bdev)) @@ -935,8 +934,9 @@ static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb) return bdev; fail: - ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld", - __bdevname(dev, b), PTR_ERR(bdev)); + ext4_msg(sb, KERN_ERR, + "failed to open journal device unknown-block(%u,%u) %ld", + MAJOR(dev), MINOR(dev), PTR_ERR(bdev)); return NULL; } diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index d218ebdafa4a..04bfaf63752c 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "ext4.h" #include "ext4_jbd2.h" diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5355be6b6755..088c3e7a1080 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 65a7a432dfee..d398b2d90c6c 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "f2fs.h" #include "node.h" diff --git a/fs/internal.h b/fs/internal.h index f3f280b952a3..4d37912a5587 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -38,7 +38,6 @@ static inline int __sync_blockdev(struct block_device *bdev, int wait) /* * buffer.c */ -extern void guard_bio_eod(struct bio *bio); extern int __block_write_begin_int(struct page *page, loff_t pos, unsigned len, get_block_t *get_block, struct iomap *iomap); diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 072156c4f895..5c766330e493 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2599,7 +2599,6 @@ static int journal_init_dev(struct super_block *super, int result; dev_t jdev; fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL; - char b[BDEVNAME_SIZE]; result = 0; @@ -2621,8 +2620,8 @@ static int journal_init_dev(struct super_block *super, result = PTR_ERR(journal->j_dev_bd); journal->j_dev_bd = NULL; reiserfs_warning(super, "sh-458", - "cannot init journal device '%s': %i", - __bdevname(jdev, b), result); + "cannot init journal device unknown-block(%u,%u): %i", + MAJOR(jdev), MINOR(jdev), result); return result; } else if (jdev != super->s_dev) set_blocksize(journal->j_dev_bd, super->s_blocksize); diff --git a/include/linux/bio.h b/include/linux/bio.h index 853d92ceee64..c1c0f9ea4e63 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -441,14 +441,6 @@ void __bio_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int off); int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter); void bio_release_pages(struct bio *bio, bool mark_dirty); -struct rq_map_data; -extern struct bio *bio_map_user_iov(struct request_queue *, - struct iov_iter *, gfp_t); -extern void bio_unmap_user(struct bio *); -extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int, - gfp_t); -extern struct bio *bio_copy_kern(struct request_queue *, void *, unsigned int, - gfp_t, int); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); @@ -463,14 +455,9 @@ extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, extern void bio_copy_data(struct bio *dst, struct bio *src); extern void bio_list_copy_data(struct bio *dst, struct bio *src); extern void bio_free_pages(struct bio *bio); - -extern struct bio *bio_copy_user_iov(struct request_queue *, - struct rq_map_data *, - struct iov_iter *, - gfp_t); -extern int bio_uncopy_user(struct bio *); void zero_fill_bio_iter(struct bio *bio, struct bvec_iter iter); void bio_truncate(struct bio *bio, unsigned new_size); +void guard_bio_eod(struct bio *bio); static inline void zero_fill_bio(struct bio *bio) { diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 11cfd6470b1a..f389d7c724bd 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -162,7 +162,10 @@ struct blk_mq_hw_ctx { struct dentry *sched_debugfs_dir; #endif - /** @hctx_list: List of all hardware queues. */ + /** + * @hctx_list: if this hctx is not in use, this is an entry in + * q->unused_hctx_list. + */ struct list_head hctx_list; /** @@ -409,6 +412,8 @@ enum { << BLK_MQ_F_ALLOC_POLICY_START_BIT) struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); +struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set, + void *queuedata); struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, struct request_queue *q, bool elevator_init); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f629d40c645c..3f27ff08483e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1063,7 +1063,6 @@ extern void blk_abort_request(struct request *); * Access functions for manipulating queue properties */ extern void blk_cleanup_queue(struct request_queue *); -extern void blk_queue_make_request(struct request_queue *, make_request_fn *); extern void blk_queue_bounce_limit(struct request_queue *, u64); extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int); @@ -1140,8 +1139,7 @@ extern void blk_dump_rq_flags(struct request *, char *); extern long nr_blockdev_pages(void); bool __must_check blk_get_queue(struct request_queue *); -struct request_queue *blk_alloc_queue(gfp_t); -struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id); +struct request_queue *blk_alloc_queue(make_request_fn make_request, int node_id); extern void blk_put_queue(struct request_queue *); extern void blk_set_queue_dying(struct request_queue *); @@ -1484,15 +1482,6 @@ static inline unsigned int block_size(struct block_device *bdev) return bdev->bd_block_size; } -typedef struct {struct page *v;} Sector; - -unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *); - -static inline void put_dev_sector(Sector p) -{ - put_page(p.v); -} - int kblockd_schedule_work(struct work_struct *work); int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay); @@ -1706,6 +1695,7 @@ struct block_device_operations { void (*swap_slot_free_notify) (struct block_device *, unsigned long); int (*report_zones)(struct gendisk *, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); + char *(*devnode)(struct gendisk *disk, umode_t *mode); struct module *owner; const struct pr_ops *pr_ops; }; diff --git a/include/linux/fs.h b/include/linux/fs.h index abedbffe2c9e..593e911d1ca0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2700,7 +2700,6 @@ static inline void unregister_chrdev(unsigned int major, const char *name) #ifdef CONFIG_BLOCK #define BLKDEV_MAJOR_MAX 512 -extern const char *__bdevname(dev_t, char *buffer); extern const char *bdevname(struct block_device *bdev, char *buffer); extern struct block_device *lookup_bdev(const char *); extern void blkdev_show(struct seq_file *,off_t); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 07dc91835b98..9b3fffdf4011 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -27,39 +27,8 @@ #define part_to_dev(part) (&((part)->__dev)) extern struct device_type part_type; -extern struct kobject *block_depr; extern struct class block_class; -enum { -/* These three have identical behaviour; use the second one if DOS FDISK gets - confused about extended/logical partitions starting past cylinder 1023. */ - DOS_EXTENDED_PARTITION = 5, - LINUX_EXTENDED_PARTITION = 0x85, - WIN98_EXTENDED_PARTITION = 0x0f, - - SUN_WHOLE_DISK = DOS_EXTENDED_PARTITION, - - LINUX_SWAP_PARTITION = 0x82, - LINUX_DATA_PARTITION = 0x83, - LINUX_LVM_PARTITION = 0x8e, - LINUX_RAID_PARTITION = 0xfd, /* autodetect RAID partition */ - - SOLARIS_X86_PARTITION = LINUX_SWAP_PARTITION, - NEW_SOLARIS_X86_PARTITION = 0xbf, - - DM6_AUX1PARTITION = 0x51, /* no DDO: use xlated geom */ - DM6_AUX3PARTITION = 0x53, /* no DDO: use xlated geom */ - DM6_PARTITION = 0x54, /* has DDO: use xlated geom & offset */ - EZD_PARTITION = 0x55, /* EZ-DRIVE */ - - FREEBSD_PARTITION = 0xa5, /* FreeBSD Partition ID */ - OPENBSD_PARTITION = 0xa6, /* OpenBSD Partition ID */ - NETBSD_PARTITION = 0xa9, /* NetBSD Partition ID */ - BSDI_PARTITION = 0xb7, /* BSDI Partition ID */ - MINIX_PARTITION = 0x81, /* Minix Partition ID */ - UNIXWARE_PARTITION = 0x63, /* Same as GNU_HURD and SCO Unix */ -}; - #define DISK_MAX_PARTS 256 #define DISK_NAME_LEN 32 @@ -70,26 +39,12 @@ enum { #include #include -struct partition { - unsigned char boot_ind; /* 0x80 - active */ - unsigned char head; /* starting head */ - unsigned char sector; /* starting sector */ - unsigned char cyl; /* starting cylinder */ - unsigned char sys_ind; /* What partition type */ - unsigned char end_head; /* end head */ - unsigned char end_sector; /* end sector */ - unsigned char end_cyl; /* end cylinder */ - __le32 start_sect; /* starting sector counting from 0 */ - __le32 nr_sects; /* nr of sectors in partition */ -} __attribute__((packed)); - struct disk_stats { u64 nsecs[NR_STAT_GROUPS]; unsigned long sectors[NR_STAT_GROUPS]; unsigned long ios[NR_STAT_GROUPS]; unsigned long merges[NR_STAT_GROUPS]; unsigned long io_ticks; - unsigned long time_in_queue; local_t in_flight[2]; }; @@ -133,17 +88,64 @@ struct hd_struct { struct rcu_work rcu_work; }; -#define GENHD_FL_REMOVABLE 1 -/* 2 is unused */ -#define GENHD_FL_MEDIA_CHANGE_NOTIFY 4 -#define GENHD_FL_CD 8 -#define GENHD_FL_UP 16 -#define GENHD_FL_SUPPRESS_PARTITION_INFO 32 -#define GENHD_FL_EXT_DEVT 64 /* allow extended devt */ -#define GENHD_FL_NATIVE_CAPACITY 128 -#define GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE 256 -#define GENHD_FL_NO_PART_SCAN 512 -#define GENHD_FL_HIDDEN 1024 +/** + * DOC: genhd capability flags + * + * ``GENHD_FL_REMOVABLE`` (0x0001): indicates that the block device + * gives access to removable media. + * When set, the device remains present even when media is not + * inserted. + * Must not be set for devices which are removed entirely when the + * media is removed. + * + * ``GENHD_FL_CD`` (0x0008): the block device is a CD-ROM-style + * device. + * Affects responses to the ``CDROM_GET_CAPABILITY`` ioctl. + * + * ``GENHD_FL_UP`` (0x0010): indicates that the block device is "up", + * with a similar meaning to network interfaces. + * + * ``GENHD_FL_SUPPRESS_PARTITION_INFO`` (0x0020): don't include + * partition information in ``/proc/partitions`` or in the output of + * printk_all_partitions(). + * Used for the null block device and some MMC devices. + * + * ``GENHD_FL_EXT_DEVT`` (0x0040): the driver supports extended + * dynamic ``dev_t``, i.e. it wants extended device numbers + * (``BLOCK_EXT_MAJOR``). + * This affects the maximum number of partitions. + * + * ``GENHD_FL_NATIVE_CAPACITY`` (0x0080): based on information in the + * partition table, the device's capacity has been extended to its + * native capacity; i.e. the device has hidden capacity used by one + * of the partitions (this is a flag used so that native capacity is + * only ever unlocked once). + * + * ``GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE`` (0x0100): event polling is + * blocked whenever a writer holds an exclusive lock. + * + * ``GENHD_FL_NO_PART_SCAN`` (0x0200): partition scanning is disabled. + * Used for loop devices in their default settings and some MMC + * devices. + * + * ``GENHD_FL_HIDDEN`` (0x0400): the block device is hidden; it + * doesn't produce events, doesn't appear in sysfs, and doesn't have + * an associated ``bdev``. + * Implies ``GENHD_FL_SUPPRESS_PARTITION_INFO`` and + * ``GENHD_FL_NO_PART_SCAN``. + * Used for multipath devices. + */ +#define GENHD_FL_REMOVABLE 0x0001 +/* 2 is unused (used to be GENHD_FL_DRIVERFS) */ +/* 4 is unused (used to be GENHD_FL_MEDIA_CHANGE_NOTIFY) */ +#define GENHD_FL_CD 0x0008 +#define GENHD_FL_UP 0x0010 +#define GENHD_FL_SUPPRESS_PARTITION_INFO 0x0020 +#define GENHD_FL_EXT_DEVT 0x0040 +#define GENHD_FL_NATIVE_CAPACITY 0x0080 +#define GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE 0x0100 +#define GENHD_FL_NO_PART_SCAN 0x0200 +#define GENHD_FL_HIDDEN 0x0400 enum { DISK_EVENT_MEDIA_CHANGE = 1 << 0, /* media changed */ @@ -189,7 +191,6 @@ struct gendisk { * disks that can't be partitioned. */ char disk_name[DISK_NAME_LEN]; /* name of major driver */ - char *(*devnode)(struct gendisk *gd, umode_t *mode); unsigned short events; /* supported events */ unsigned short event_flags; /* flags related to event processing */ @@ -283,144 +284,7 @@ extern void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk, unsigned int flags); extern struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter); extern void disk_part_iter_exit(struct disk_part_iter *piter); - -extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, - sector_t sector); -bool disk_has_partitions(struct gendisk *disk); - -/* - * Macros to operate on percpu disk statistics: - * - * {disk|part|all}_stat_{add|sub|inc|dec}() modify the stat counters - * and should be called between disk_stat_lock() and - * disk_stat_unlock(). - * - * part_stat_read() can be called at any time. - * - * part_stat_{add|set_all}() and {init|free}_part_stats are for - * internal use only. - */ -#ifdef CONFIG_SMP -#define part_stat_lock() ({ rcu_read_lock(); get_cpu(); }) -#define part_stat_unlock() do { put_cpu(); rcu_read_unlock(); } while (0) - -#define part_stat_get_cpu(part, field, cpu) \ - (per_cpu_ptr((part)->dkstats, (cpu))->field) - -#define part_stat_get(part, field) \ - part_stat_get_cpu(part, field, smp_processor_id()) - -#define part_stat_read(part, field) \ -({ \ - typeof((part)->dkstats->field) res = 0; \ - unsigned int _cpu; \ - for_each_possible_cpu(_cpu) \ - res += per_cpu_ptr((part)->dkstats, _cpu)->field; \ - res; \ -}) - -static inline void part_stat_set_all(struct hd_struct *part, int value) -{ - int i; - - for_each_possible_cpu(i) - memset(per_cpu_ptr(part->dkstats, i), value, - sizeof(struct disk_stats)); -} - -static inline int init_part_stats(struct hd_struct *part) -{ - part->dkstats = alloc_percpu(struct disk_stats); - if (!part->dkstats) - return 0; - return 1; -} - -static inline void free_part_stats(struct hd_struct *part) -{ - free_percpu(part->dkstats); -} - -#else /* !CONFIG_SMP */ -#define part_stat_lock() ({ rcu_read_lock(); 0; }) -#define part_stat_unlock() rcu_read_unlock() - -#define part_stat_get(part, field) ((part)->dkstats.field) -#define part_stat_get_cpu(part, field, cpu) part_stat_get(part, field) -#define part_stat_read(part, field) part_stat_get(part, field) - -static inline void part_stat_set_all(struct hd_struct *part, int value) -{ - memset(&part->dkstats, value, sizeof(struct disk_stats)); -} - -static inline int init_part_stats(struct hd_struct *part) -{ - return 1; -} - -static inline void free_part_stats(struct hd_struct *part) -{ -} - -#endif /* CONFIG_SMP */ - -#define part_stat_read_msecs(part, which) \ - div_u64(part_stat_read(part, nsecs[which]), NSEC_PER_MSEC) - -#define part_stat_read_accum(part, field) \ - (part_stat_read(part, field[STAT_READ]) + \ - part_stat_read(part, field[STAT_WRITE]) + \ - part_stat_read(part, field[STAT_DISCARD])) - -#define __part_stat_add(part, field, addnd) \ - (part_stat_get(part, field) += (addnd)) - -#define part_stat_add(part, field, addnd) do { \ - __part_stat_add((part), field, addnd); \ - if ((part)->partno) \ - __part_stat_add(&part_to_disk((part))->part0, \ - field, addnd); \ -} while (0) - -#define part_stat_dec(gendiskp, field) \ - part_stat_add(gendiskp, field, -1) -#define part_stat_inc(gendiskp, field) \ - part_stat_add(gendiskp, field, 1) -#define part_stat_sub(gendiskp, field, subnd) \ - part_stat_add(gendiskp, field, -subnd) - -#define part_stat_local_dec(gendiskp, field) \ - local_dec(&(part_stat_get(gendiskp, field))) -#define part_stat_local_inc(gendiskp, field) \ - local_inc(&(part_stat_get(gendiskp, field))) -#define part_stat_local_read(gendiskp, field) \ - local_read(&(part_stat_get(gendiskp, field))) -#define part_stat_local_read_cpu(gendiskp, field, cpu) \ - local_read(&(part_stat_get_cpu(gendiskp, field, cpu))) - -unsigned int part_in_flight(struct request_queue *q, struct hd_struct *part); -void part_in_flight_rw(struct request_queue *q, struct hd_struct *part, - unsigned int inflight[2]); -void part_dec_in_flight(struct request_queue *q, struct hd_struct *part, - int rw); -void part_inc_in_flight(struct request_queue *q, struct hd_struct *part, - int rw); - -static inline struct partition_meta_info *alloc_part_info(struct gendisk *disk) -{ - if (disk) - return kzalloc_node(sizeof(struct partition_meta_info), - GFP_KERNEL, disk->node_id); - return kzalloc(sizeof(struct partition_meta_info), GFP_KERNEL); -} - -static inline void free_part_info(struct hd_struct *part) -{ - kfree(part->info); -} - -void update_io_ticks(struct hd_struct *part, unsigned long now); +extern bool disk_has_partitions(struct gendisk *disk); /* block/genhd.c */ extern void device_add_disk(struct device *parent, struct gendisk *disk, @@ -450,6 +314,8 @@ static inline int get_disk_ro(struct gendisk *disk) extern void disk_block_events(struct gendisk *disk); extern void disk_unblock_events(struct gendisk *disk); extern void disk_flush_events(struct gendisk *disk, unsigned int mask); +extern void set_capacity_revalidate_and_notify(struct gendisk *disk, + sector_t size, bool revalidate); extern unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask); /* drivers/char/random.c */ @@ -469,170 +335,11 @@ static inline void set_capacity(struct gendisk *disk, sector_t size) disk->part0.nr_sects = size; } -#ifdef CONFIG_SOLARIS_X86_PARTITION - -#define SOLARIS_X86_NUMSLICE 16 -#define SOLARIS_X86_VTOC_SANE (0x600DDEEEUL) - -struct solaris_x86_slice { - __le16 s_tag; /* ID tag of partition */ - __le16 s_flag; /* permission flags */ - __le32 s_start; /* start sector no of partition */ - __le32 s_size; /* # of blocks in partition */ -}; - -struct solaris_x86_vtoc { - unsigned int v_bootinfo[3]; /* info needed by mboot (unsupported) */ - __le32 v_sanity; /* to verify vtoc sanity */ - __le32 v_version; /* layout version */ - char v_volume[8]; /* volume name */ - __le16 v_sectorsz; /* sector size in bytes */ - __le16 v_nparts; /* number of partitions */ - unsigned int v_reserved[10]; /* free space */ - struct solaris_x86_slice - v_slice[SOLARIS_X86_NUMSLICE]; /* slice headers */ - unsigned int timestamp[SOLARIS_X86_NUMSLICE]; /* timestamp (unsupported) */ - char v_asciilabel[128]; /* for compatibility */ -}; - -#endif /* CONFIG_SOLARIS_X86_PARTITION */ - -#ifdef CONFIG_BSD_DISKLABEL -/* - * BSD disklabel support by Yossi Gottlieb - * updated by Marc Espie - */ - -/* check against BSD src/sys/sys/disklabel.h for consistency */ - -#define BSD_DISKMAGIC (0x82564557UL) /* The disk magic number */ -#define BSD_MAXPARTITIONS 16 -#define OPENBSD_MAXPARTITIONS 16 -#define BSD_FS_UNUSED 0 /* disklabel unused partition entry ID */ -struct bsd_disklabel { - __le32 d_magic; /* the magic number */ - __s16 d_type; /* drive type */ - __s16 d_subtype; /* controller/d_type specific */ - char d_typename[16]; /* type name, e.g. "eagle" */ - char d_packname[16]; /* pack identifier */ - __u32 d_secsize; /* # of bytes per sector */ - __u32 d_nsectors; /* # of data sectors per track */ - __u32 d_ntracks; /* # of tracks per cylinder */ - __u32 d_ncylinders; /* # of data cylinders per unit */ - __u32 d_secpercyl; /* # of data sectors per cylinder */ - __u32 d_secperunit; /* # of data sectors per unit */ - __u16 d_sparespertrack; /* # of spare sectors per track */ - __u16 d_sparespercyl; /* # of spare sectors per cylinder */ - __u32 d_acylinders; /* # of alt. cylinders per unit */ - __u16 d_rpm; /* rotational speed */ - __u16 d_interleave; /* hardware sector interleave */ - __u16 d_trackskew; /* sector 0 skew, per track */ - __u16 d_cylskew; /* sector 0 skew, per cylinder */ - __u32 d_headswitch; /* head switch time, usec */ - __u32 d_trkseek; /* track-to-track seek, usec */ - __u32 d_flags; /* generic flags */ -#define NDDATA 5 - __u32 d_drivedata[NDDATA]; /* drive-type specific information */ -#define NSPARE 5 - __u32 d_spare[NSPARE]; /* reserved for future use */ - __le32 d_magic2; /* the magic number (again) */ - __le16 d_checksum; /* xor of data incl. partitions */ - - /* filesystem and partition information: */ - __le16 d_npartitions; /* number of partitions in following */ - __le32 d_bbsize; /* size of boot area at sn0, bytes */ - __le32 d_sbsize; /* max size of fs superblock, bytes */ - struct bsd_partition { /* the partition table */ - __le32 p_size; /* number of sectors in partition */ - __le32 p_offset; /* starting sector */ - __le32 p_fsize; /* filesystem basic fragment size */ - __u8 p_fstype; /* filesystem type, see below */ - __u8 p_frag; /* filesystem fragments per block */ - __le16 p_cpg; /* filesystem cylinders per group */ - } d_partitions[BSD_MAXPARTITIONS]; /* actually may be more */ -}; - -#endif /* CONFIG_BSD_DISKLABEL */ - -#ifdef CONFIG_UNIXWARE_DISKLABEL -/* - * Unixware slices support by Andrzej Krzysztofowicz - * and Krzysztof G. Baranowski - */ - -#define UNIXWARE_DISKMAGIC (0xCA5E600DUL) /* The disk magic number */ -#define UNIXWARE_DISKMAGIC2 (0x600DDEEEUL) /* The slice table magic nr */ -#define UNIXWARE_NUMSLICE 16 -#define UNIXWARE_FS_UNUSED 0 /* Unused slice entry ID */ - -struct unixware_slice { - __le16 s_label; /* label */ - __le16 s_flags; /* permission flags */ - __le32 start_sect; /* starting sector */ - __le32 nr_sects; /* number of sectors in slice */ -}; - -struct unixware_disklabel { - __le32 d_type; /* drive type */ - __le32 d_magic; /* the magic number */ - __le32 d_version; /* version number */ - char d_serial[12]; /* serial number of the device */ - __le32 d_ncylinders; /* # of data cylinders per device */ - __le32 d_ntracks; /* # of tracks per cylinder */ - __le32 d_nsectors; /* # of data sectors per track */ - __le32 d_secsize; /* # of bytes per sector */ - __le32 d_part_start; /* # of first sector of this partition */ - __le32 d_unknown1[12]; /* ? */ - __le32 d_alt_tbl; /* byte offset of alternate table */ - __le32 d_alt_len; /* byte length of alternate table */ - __le32 d_phys_cyl; /* # of physical cylinders per device */ - __le32 d_phys_trk; /* # of physical tracks per cylinder */ - __le32 d_phys_sec; /* # of physical sectors per track */ - __le32 d_phys_bytes; /* # of physical bytes per sector */ - __le32 d_unknown2; /* ? */ - __le32 d_unknown3; /* ? */ - __le32 d_pad[8]; /* pad */ - - struct unixware_vtoc { - __le32 v_magic; /* the magic number */ - __le32 v_version; /* version number */ - char v_name[8]; /* volume name */ - __le16 v_nslices; /* # of slices */ - __le16 v_unknown1; /* ? */ - __le32 v_reserved[10]; /* reserved */ - struct unixware_slice - v_slice[UNIXWARE_NUMSLICE]; /* slice headers */ - } vtoc; - -}; /* 408 */ - -#endif /* CONFIG_UNIXWARE_DISKLABEL */ - -#ifdef CONFIG_MINIX_SUBPARTITION -# define MINIX_NR_SUBPARTITIONS 4 -#endif /* CONFIG_MINIX_SUBPARTITION */ - -#define ADDPART_FLAG_NONE 0 -#define ADDPART_FLAG_RAID 1 -#define ADDPART_FLAG_WHOLEDISK 2 - -extern int blk_alloc_devt(struct hd_struct *part, dev_t *devt); -extern void blk_free_devt(dev_t devt); -extern void blk_invalidate_devt(dev_t devt); extern dev_t blk_lookup_devt(const char *name, int partno); -extern char *disk_name (struct gendisk *hd, int partno, char *buf); int bdev_disk_changed(struct block_device *bdev, bool invalidate); int blk_add_partitions(struct gendisk *disk, struct block_device *bdev); int blk_drop_partitions(struct gendisk *disk, struct block_device *bdev); -extern int disk_expand_part_tbl(struct gendisk *disk, int target); -extern struct hd_struct * __must_check add_partition(struct gendisk *disk, - int partno, sector_t start, - sector_t len, int flags, - struct partition_meta_info - *info); -extern void __delete_partition(struct percpu_ref *); -extern void delete_partition(struct gendisk *, int); extern void printk_all_partitions(void); extern struct gendisk *__alloc_disk_node(int minors, int node_id); @@ -646,20 +353,6 @@ extern void blk_register_region(dev_t devt, unsigned long range, void *data); extern void blk_unregister_region(dev_t devt, unsigned long range); -extern ssize_t part_size_show(struct device *dev, - struct device_attribute *attr, char *buf); -extern ssize_t part_stat_show(struct device *dev, - struct device_attribute *attr, char *buf); -extern ssize_t part_inflight_show(struct device *dev, - struct device_attribute *attr, char *buf); -#ifdef CONFIG_FAIL_MAKE_REQUEST -extern ssize_t part_fail_show(struct device *dev, - struct device_attribute *attr, char *buf); -extern ssize_t part_fail_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count); -#endif /* CONFIG_FAIL_MAKE_REQUEST */ - #define alloc_disk_node(minors, node_id) \ ({ \ static struct lock_class_key __key; \ @@ -678,100 +371,6 @@ extern ssize_t part_fail_store(struct device *dev, #define alloc_disk(minors) alloc_disk_node(minors, NUMA_NO_NODE) -static inline int hd_ref_init(struct hd_struct *part) -{ - if (percpu_ref_init(&part->ref, __delete_partition, 0, - GFP_KERNEL)) - return -ENOMEM; - return 0; -} - -static inline void hd_struct_get(struct hd_struct *part) -{ - percpu_ref_get(&part->ref); -} - -static inline int hd_struct_try_get(struct hd_struct *part) -{ - return percpu_ref_tryget_live(&part->ref); -} - -static inline void hd_struct_put(struct hd_struct *part) -{ - percpu_ref_put(&part->ref); -} - -static inline void hd_struct_kill(struct hd_struct *part) -{ - percpu_ref_kill(&part->ref); -} - -static inline void hd_free_part(struct hd_struct *part) -{ - free_part_stats(part); - free_part_info(part); - percpu_ref_exit(&part->ref); -} - -/* - * Any access of part->nr_sects which is not protected by partition - * bd_mutex or gendisk bdev bd_mutex, should be done using this - * accessor function. - * - * Code written along the lines of i_size_read() and i_size_write(). - * CONFIG_PREEMPTION case optimizes the case of UP kernel with preemption - * on. - */ -static inline sector_t part_nr_sects_read(struct hd_struct *part) -{ -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) - sector_t nr_sects; - unsigned seq; - do { - seq = read_seqcount_begin(&part->nr_sects_seq); - nr_sects = part->nr_sects; - } while (read_seqcount_retry(&part->nr_sects_seq, seq)); - return nr_sects; -#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) - sector_t nr_sects; - - preempt_disable(); - nr_sects = part->nr_sects; - preempt_enable(); - return nr_sects; -#else - return part->nr_sects; -#endif -} - -/* - * Should be called with mutex lock held (typically bd_mutex) of partition - * to provide mutual exlusion among writers otherwise seqcount might be - * left in wrong state leaving the readers spinning infinitely. - */ -static inline void part_nr_sects_write(struct hd_struct *part, sector_t size) -{ -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) - write_seqcount_begin(&part->nr_sects_seq); - part->nr_sects = size; - write_seqcount_end(&part->nr_sects_seq); -#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) - preempt_disable(); - part->nr_sects = size; - preempt_enable(); -#else - part->nr_sects = size; -#endif -} - -#if defined(CONFIG_BLK_DEV_INTEGRITY) -extern void blk_integrity_add(struct gendisk *); -extern void blk_integrity_del(struct gendisk *); -#else /* CONFIG_BLK_DEV_INTEGRITY */ -static inline void blk_integrity_add(struct gendisk *disk) { } -static inline void blk_integrity_del(struct gendisk *disk) { } -#endif /* CONFIG_BLK_DEV_INTEGRITY */ - #else /* CONFIG_BLOCK */ static inline void printk_all_partitions(void) { } diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index dba15ca8e60b..1dcd9198beb7 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -8,6 +8,7 @@ enum { ICQ_EXITED = 1 << 2, + ICQ_DESTROYED = 1 << 3, }; /* diff --git a/include/linux/msdos_partition.h b/include/linux/msdos_partition.h new file mode 100644 index 000000000000..2cb82db2a43c --- /dev/null +++ b/include/linux/msdos_partition.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_MSDOS_PARTITION_H +#define _LINUX_MSDOS_PARTITION_H + +#define MSDOS_LABEL_MAGIC 0xAA55 + +struct msdos_partition { + u8 boot_ind; /* 0x80 - active */ + u8 head; /* starting head */ + u8 sector; /* starting sector */ + u8 cyl; /* starting cylinder */ + u8 sys_ind; /* What partition type */ + u8 end_head; /* end head */ + u8 end_sector; /* end sector */ + u8 end_cyl; /* end cylinder */ + __le32 start_sect; /* starting sector counting from 0 */ + __le32 nr_sects; /* nr of sectors in partition */ +} __packed; + +enum msdos_sys_ind { + /* + * These three have identical behaviour; use the second one if DOS FDISK + * gets confused about extended/logical partitions starting past + * cylinder 1023. + */ + DOS_EXTENDED_PARTITION = 5, + LINUX_EXTENDED_PARTITION = 0x85, + WIN98_EXTENDED_PARTITION = 0x0f, + + LINUX_DATA_PARTITION = 0x83, + LINUX_LVM_PARTITION = 0x8e, + LINUX_RAID_PARTITION = 0xfd, /* autodetect RAID partition */ + + SOLARIS_X86_PARTITION = 0x82, /* also Linux swap partitions */ + NEW_SOLARIS_X86_PARTITION = 0xbf, + + DM6_AUX1PARTITION = 0x51, /* no DDO: use xlated geom */ + DM6_AUX3PARTITION = 0x53, /* no DDO: use xlated geom */ + DM6_PARTITION = 0x54, /* has DDO: use xlated geom & offset */ + EZD_PARTITION = 0x55, /* EZ-DRIVE */ + + FREEBSD_PARTITION = 0xa5, /* FreeBSD Partition ID */ + OPENBSD_PARTITION = 0xa6, /* OpenBSD Partition ID */ + NETBSD_PARTITION = 0xa9, /* NetBSD Partition ID */ + BSDI_PARTITION = 0xb7, /* BSDI Partition ID */ + MINIX_PARTITION = 0x81, /* Minix Partition ID */ + UNIXWARE_PARTITION = 0x63, /* Same as GNU_HURD and SCO Unix */ +}; + +#endif /* LINUX_MSDOS_PARTITION_H */ diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h new file mode 100644 index 000000000000..ece607607a86 --- /dev/null +++ b/include/linux/part_stat.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_PART_STAT_H +#define _LINUX_PART_STAT_H + +#include + +/* + * Macros to operate on percpu disk statistics: + * + * {disk|part|all}_stat_{add|sub|inc|dec}() modify the stat counters + * and should be called between disk_stat_lock() and + * disk_stat_unlock(). + * + * part_stat_read() can be called at any time. + * + * part_stat_{add|set_all}() and {init|free}_part_stats are for + * internal use only. + */ +#ifdef CONFIG_SMP +#define part_stat_lock() ({ rcu_read_lock(); get_cpu(); }) +#define part_stat_unlock() do { put_cpu(); rcu_read_unlock(); } while (0) + +#define part_stat_get_cpu(part, field, cpu) \ + (per_cpu_ptr((part)->dkstats, (cpu))->field) + +#define part_stat_get(part, field) \ + part_stat_get_cpu(part, field, smp_processor_id()) + +#define part_stat_read(part, field) \ +({ \ + typeof((part)->dkstats->field) res = 0; \ + unsigned int _cpu; \ + for_each_possible_cpu(_cpu) \ + res += per_cpu_ptr((part)->dkstats, _cpu)->field; \ + res; \ +}) + +static inline void part_stat_set_all(struct hd_struct *part, int value) +{ + int i; + + for_each_possible_cpu(i) + memset(per_cpu_ptr(part->dkstats, i), value, + sizeof(struct disk_stats)); +} + +static inline int init_part_stats(struct hd_struct *part) +{ + part->dkstats = alloc_percpu(struct disk_stats); + if (!part->dkstats) + return 0; + return 1; +} + +static inline void free_part_stats(struct hd_struct *part) +{ + free_percpu(part->dkstats); +} + +#else /* !CONFIG_SMP */ +#define part_stat_lock() ({ rcu_read_lock(); 0; }) +#define part_stat_unlock() rcu_read_unlock() + +#define part_stat_get(part, field) ((part)->dkstats.field) +#define part_stat_get_cpu(part, field, cpu) part_stat_get(part, field) +#define part_stat_read(part, field) part_stat_get(part, field) + +static inline void part_stat_set_all(struct hd_struct *part, int value) +{ + memset(&part->dkstats, value, sizeof(struct disk_stats)); +} + +static inline int init_part_stats(struct hd_struct *part) +{ + return 1; +} + +static inline void free_part_stats(struct hd_struct *part) +{ +} + +#endif /* CONFIG_SMP */ + +#define part_stat_read_accum(part, field) \ + (part_stat_read(part, field[STAT_READ]) + \ + part_stat_read(part, field[STAT_WRITE]) + \ + part_stat_read(part, field[STAT_DISCARD])) + +#define __part_stat_add(part, field, addnd) \ + (part_stat_get(part, field) += (addnd)) + +#define part_stat_add(part, field, addnd) do { \ + __part_stat_add((part), field, addnd); \ + if ((part)->partno) \ + __part_stat_add(&part_to_disk((part))->part0, \ + field, addnd); \ +} while (0) + +#define part_stat_dec(gendiskp, field) \ + part_stat_add(gendiskp, field, -1) +#define part_stat_inc(gendiskp, field) \ + part_stat_add(gendiskp, field, 1) +#define part_stat_sub(gendiskp, field, subnd) \ + part_stat_add(gendiskp, field, -subnd) + +#define part_stat_local_dec(gendiskp, field) \ + local_dec(&(part_stat_get(gendiskp, field))) +#define part_stat_local_inc(gendiskp, field) \ + local_inc(&(part_stat_get(gendiskp, field))) +#define part_stat_local_read(gendiskp, field) \ + local_read(&(part_stat_get(gendiskp, field))) +#define part_stat_local_read_cpu(gendiskp, field, cpu) \ + local_read(&(part_stat_get_cpu(gendiskp, field, cpu))) + +#endif /* _LINUX_PART_STAT_H */ diff --git a/include/linux/raid/detect.h b/include/linux/raid/detect.h new file mode 100644 index 000000000000..37dd3f40cd31 --- /dev/null +++ b/include/linux/raid/detect.h @@ -0,0 +1,3 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +void md_autodetect_dev(dev_t dev); diff --git a/include/scsi/scsicam.h b/include/scsi/scsicam.h index 57c729254569..08edd603e521 100644 --- a/include/scsi/scsicam.h +++ b/include/scsi/scsicam.h @@ -13,8 +13,7 @@ #ifndef SCSICAM_H #define SCSICAM_H -extern int scsicam_bios_param (struct block_device *bdev, sector_t capacity, int *ip); -extern int scsi_partsize(unsigned char *buf, unsigned long capacity, - unsigned int *cyls, unsigned int *hds, unsigned int *secs); -extern unsigned char *scsi_bios_ptable(struct block_device *bdev); +int scsicam_bios_param(struct block_device *bdev, sector_t capacity, int *ip); +bool scsi_partsize(struct block_device *bdev, sector_t capacity, int geom[3]); +unsigned char *scsi_bios_ptable(struct block_device *bdev); #endif /* def SCSICAM_H */ diff --git a/init/do_mounts.c b/init/do_mounts.c index 0ae9cc22f2ae..29d326b6c29d 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -429,12 +429,10 @@ void __init mount_block_root(char *name, int flags) struct page *page = alloc_page(GFP_KERNEL); char *fs_names = page_address(page); char *p; -#ifdef CONFIG_BLOCK char b[BDEVNAME_SIZE]; -#else - const char *b = name; -#endif + scnprintf(b, BDEVNAME_SIZE, "unknown-block(%u,%u)", + MAJOR(ROOT_DEV), MINOR(ROOT_DEV)); get_fs_names(fs_names); retry: for (p = fs_names; *p; p += strlen(p)+1) { @@ -451,9 +449,6 @@ retry: * and bad superblock on root device. * and give them a list of the available devices */ -#ifdef CONFIG_BLOCK - __bdevname(ROOT_DEV, b); -#endif printk("VFS: Cannot open root device \"%s\" or %s: error %d\n", root_device_name, b, err); printk("Please append a correct \"root=\" boot option; here are the available partitions:\n"); @@ -476,9 +471,6 @@ retry: for (p = fs_names; *p; p += strlen(p)+1) printk(" %s", p); printk("\n"); -#ifdef CONFIG_BLOCK - __bdevname(ROOT_DEV, b); -#endif panic("VFS: Unable to mount root fs on %s", b); out: put_page(page);