Merge branch 'for-2.6.37/barrier' of git://git.kernel.dk/linux-2.6-block
* 'for-2.6.37/barrier' of git://git.kernel.dk/linux-2.6-block: (46 commits) xen-blkfront: disable barrier/flush write support Added blk-lib.c and blk-barrier.c was renamed to blk-flush.c block: remove BLKDEV_IFL_WAIT aic7xxx_old: removed unused 'req' variable block: remove the BH_Eopnotsupp flag block: remove the BLKDEV_IFL_BARRIER flag block: remove the WRITE_BARRIER flag swap: do not send discards as barriers fat: do not send discards as barriers ext4: do not send discards as barriers jbd2: replace barriers with explicit flush / FUA usage jbd2: Modify ASYNC_COMMIT code to not rely on queue draining on barrier jbd: replace barriers with explicit flush / FUA usage nilfs2: replace barriers with explicit flush / FUA usage reiserfs: replace barriers with explicit flush / FUA usage gfs2: replace barriers with explicit flush / FUA usage btrfs: replace barriers with explicit flush / FUA usage xfs: replace barriers with explicit flush / FUA usage block: pass gfp_mask and flags to sb_issue_discard dm: convey that all flushes are processed as empty ...hifive-unleashed-5.1
commit
a2887097f2
|
@ -257,7 +257,8 @@ X!Earch/x86/kernel/mca_32.c
|
||||||
!Iblock/blk-sysfs.c
|
!Iblock/blk-sysfs.c
|
||||||
!Eblock/blk-settings.c
|
!Eblock/blk-settings.c
|
||||||
!Eblock/blk-exec.c
|
!Eblock/blk-exec.c
|
||||||
!Eblock/blk-barrier.c
|
!Eblock/blk-flush.c
|
||||||
|
!Eblock/blk-lib.c
|
||||||
!Eblock/blk-tag.c
|
!Eblock/blk-tag.c
|
||||||
!Iblock/blk-tag.c
|
!Iblock/blk-tag.c
|
||||||
!Eblock/blk-integrity.c
|
!Eblock/blk-integrity.c
|
||||||
|
|
|
@ -1,7 +1,5 @@
|
||||||
00-INDEX
|
00-INDEX
|
||||||
- This file
|
- This file
|
||||||
barrier.txt
|
|
||||||
- I/O Barriers
|
|
||||||
biodoc.txt
|
biodoc.txt
|
||||||
- Notes on the Generic Block Layer Rewrite in Linux 2.5
|
- Notes on the Generic Block Layer Rewrite in Linux 2.5
|
||||||
capability.txt
|
capability.txt
|
||||||
|
@ -16,3 +14,5 @@ stat.txt
|
||||||
- Block layer statistics in /sys/block/<dev>/stat
|
- Block layer statistics in /sys/block/<dev>/stat
|
||||||
switching-sched.txt
|
switching-sched.txt
|
||||||
- Switching I/O schedulers at runtime
|
- Switching I/O schedulers at runtime
|
||||||
|
writeback_cache_control.txt
|
||||||
|
- Control of volatile write back caches
|
||||||
|
|
|
@ -1,261 +0,0 @@
|
||||||
I/O Barriers
|
|
||||||
============
|
|
||||||
Tejun Heo <htejun@gmail.com>, July 22 2005
|
|
||||||
|
|
||||||
I/O barrier requests are used to guarantee ordering around the barrier
|
|
||||||
requests. Unless you're crazy enough to use disk drives for
|
|
||||||
implementing synchronization constructs (wow, sounds interesting...),
|
|
||||||
the ordering is meaningful only for write requests for things like
|
|
||||||
journal checkpoints. All requests queued before a barrier request
|
|
||||||
must be finished (made it to the physical medium) before the barrier
|
|
||||||
request is started, and all requests queued after the barrier request
|
|
||||||
must be started only after the barrier request is finished (again,
|
|
||||||
made it to the physical medium).
|
|
||||||
|
|
||||||
In other words, I/O barrier requests have the following two properties.
|
|
||||||
|
|
||||||
1. Request ordering
|
|
||||||
|
|
||||||
Requests cannot pass the barrier request. Preceding requests are
|
|
||||||
processed before the barrier and following requests after.
|
|
||||||
|
|
||||||
Depending on what features a drive supports, this can be done in one
|
|
||||||
of the following three ways.
|
|
||||||
|
|
||||||
i. For devices which have queue depth greater than 1 (TCQ devices) and
|
|
||||||
support ordered tags, block layer can just issue the barrier as an
|
|
||||||
ordered request and the lower level driver, controller and drive
|
|
||||||
itself are responsible for making sure that the ordering constraint is
|
|
||||||
met. Most modern SCSI controllers/drives should support this.
|
|
||||||
|
|
||||||
NOTE: SCSI ordered tag isn't currently used due to limitation in the
|
|
||||||
SCSI midlayer, see the following random notes section.
|
|
||||||
|
|
||||||
ii. For devices which have queue depth greater than 1 but don't
|
|
||||||
support ordered tags, block layer ensures that the requests preceding
|
|
||||||
a barrier request finishes before issuing the barrier request. Also,
|
|
||||||
it defers requests following the barrier until the barrier request is
|
|
||||||
finished. Older SCSI controllers/drives and SATA drives fall in this
|
|
||||||
category.
|
|
||||||
|
|
||||||
iii. Devices which have queue depth of 1. This is a degenerate case
|
|
||||||
of ii. Just keeping issue order suffices. Ancient SCSI
|
|
||||||
controllers/drives and IDE drives are in this category.
|
|
||||||
|
|
||||||
2. Forced flushing to physical medium
|
|
||||||
|
|
||||||
Again, if you're not gonna do synchronization with disk drives (dang,
|
|
||||||
it sounds even more appealing now!), the reason you use I/O barriers
|
|
||||||
is mainly to protect filesystem integrity when power failure or some
|
|
||||||
other events abruptly stop the drive from operating and possibly make
|
|
||||||
the drive lose data in its cache. So, I/O barriers need to guarantee
|
|
||||||
that requests actually get written to non-volatile medium in order.
|
|
||||||
|
|
||||||
There are four cases,
|
|
||||||
|
|
||||||
i. No write-back cache. Keeping requests ordered is enough.
|
|
||||||
|
|
||||||
ii. Write-back cache but no flush operation. There's no way to
|
|
||||||
guarantee physical-medium commit order. This kind of devices can't to
|
|
||||||
I/O barriers.
|
|
||||||
|
|
||||||
iii. Write-back cache and flush operation but no FUA (forced unit
|
|
||||||
access). We need two cache flushes - before and after the barrier
|
|
||||||
request.
|
|
||||||
|
|
||||||
iv. Write-back cache, flush operation and FUA. We still need one
|
|
||||||
flush to make sure requests preceding a barrier are written to medium,
|
|
||||||
but post-barrier flush can be avoided by using FUA write on the
|
|
||||||
barrier itself.
|
|
||||||
|
|
||||||
|
|
||||||
How to support barrier requests in drivers
|
|
||||||
------------------------------------------
|
|
||||||
|
|
||||||
All barrier handling is done inside block layer proper. All low level
|
|
||||||
drivers have to are implementing its prepare_flush_fn and using one
|
|
||||||
the following two functions to indicate what barrier type it supports
|
|
||||||
and how to prepare flush requests. Note that the term 'ordered' is
|
|
||||||
used to indicate the whole sequence of performing barrier requests
|
|
||||||
including draining and flushing.
|
|
||||||
|
|
||||||
typedef void (prepare_flush_fn)(struct request_queue *q, struct request *rq);
|
|
||||||
|
|
||||||
int blk_queue_ordered(struct request_queue *q, unsigned ordered,
|
|
||||||
prepare_flush_fn *prepare_flush_fn);
|
|
||||||
|
|
||||||
@q : the queue in question
|
|
||||||
@ordered : the ordered mode the driver/device supports
|
|
||||||
@prepare_flush_fn : this function should prepare @rq such that it
|
|
||||||
flushes cache to physical medium when executed
|
|
||||||
|
|
||||||
For example, SCSI disk driver's prepare_flush_fn looks like the
|
|
||||||
following.
|
|
||||||
|
|
||||||
static void sd_prepare_flush(struct request_queue *q, struct request *rq)
|
|
||||||
{
|
|
||||||
memset(rq->cmd, 0, sizeof(rq->cmd));
|
|
||||||
rq->cmd_type = REQ_TYPE_BLOCK_PC;
|
|
||||||
rq->timeout = SD_TIMEOUT;
|
|
||||||
rq->cmd[0] = SYNCHRONIZE_CACHE;
|
|
||||||
rq->cmd_len = 10;
|
|
||||||
}
|
|
||||||
|
|
||||||
The following seven ordered modes are supported. The following table
|
|
||||||
shows which mode should be used depending on what features a
|
|
||||||
device/driver supports. In the leftmost column of table,
|
|
||||||
QUEUE_ORDERED_ prefix is omitted from the mode names to save space.
|
|
||||||
|
|
||||||
The table is followed by description of each mode. Note that in the
|
|
||||||
descriptions of QUEUE_ORDERED_DRAIN*, '=>' is used whereas '->' is
|
|
||||||
used for QUEUE_ORDERED_TAG* descriptions. '=>' indicates that the
|
|
||||||
preceding step must be complete before proceeding to the next step.
|
|
||||||
'->' indicates that the next step can start as soon as the previous
|
|
||||||
step is issued.
|
|
||||||
|
|
||||||
write-back cache ordered tag flush FUA
|
|
||||||
-----------------------------------------------------------------------
|
|
||||||
NONE yes/no N/A no N/A
|
|
||||||
DRAIN no no N/A N/A
|
|
||||||
DRAIN_FLUSH yes no yes no
|
|
||||||
DRAIN_FUA yes no yes yes
|
|
||||||
TAG no yes N/A N/A
|
|
||||||
TAG_FLUSH yes yes yes no
|
|
||||||
TAG_FUA yes yes yes yes
|
|
||||||
|
|
||||||
|
|
||||||
QUEUE_ORDERED_NONE
|
|
||||||
I/O barriers are not needed and/or supported.
|
|
||||||
|
|
||||||
Sequence: N/A
|
|
||||||
|
|
||||||
QUEUE_ORDERED_DRAIN
|
|
||||||
Requests are ordered by draining the request queue and cache
|
|
||||||
flushing isn't needed.
|
|
||||||
|
|
||||||
Sequence: drain => barrier
|
|
||||||
|
|
||||||
QUEUE_ORDERED_DRAIN_FLUSH
|
|
||||||
Requests are ordered by draining the request queue and both
|
|
||||||
pre-barrier and post-barrier cache flushings are needed.
|
|
||||||
|
|
||||||
Sequence: drain => preflush => barrier => postflush
|
|
||||||
|
|
||||||
QUEUE_ORDERED_DRAIN_FUA
|
|
||||||
Requests are ordered by draining the request queue and
|
|
||||||
pre-barrier cache flushing is needed. By using FUA on barrier
|
|
||||||
request, post-barrier flushing can be skipped.
|
|
||||||
|
|
||||||
Sequence: drain => preflush => barrier
|
|
||||||
|
|
||||||
QUEUE_ORDERED_TAG
|
|
||||||
Requests are ordered by ordered tag and cache flushing isn't
|
|
||||||
needed.
|
|
||||||
|
|
||||||
Sequence: barrier
|
|
||||||
|
|
||||||
QUEUE_ORDERED_TAG_FLUSH
|
|
||||||
Requests are ordered by ordered tag and both pre-barrier and
|
|
||||||
post-barrier cache flushings are needed.
|
|
||||||
|
|
||||||
Sequence: preflush -> barrier -> postflush
|
|
||||||
|
|
||||||
QUEUE_ORDERED_TAG_FUA
|
|
||||||
Requests are ordered by ordered tag and pre-barrier cache
|
|
||||||
flushing is needed. By using FUA on barrier request,
|
|
||||||
post-barrier flushing can be skipped.
|
|
||||||
|
|
||||||
Sequence: preflush -> barrier
|
|
||||||
|
|
||||||
|
|
||||||
Random notes/caveats
|
|
||||||
--------------------
|
|
||||||
|
|
||||||
* SCSI layer currently can't use TAG ordering even if the drive,
|
|
||||||
controller and driver support it. The problem is that SCSI midlayer
|
|
||||||
request dispatch function is not atomic. It releases queue lock and
|
|
||||||
switch to SCSI host lock during issue and it's possible and likely to
|
|
||||||
happen in time that requests change their relative positions. Once
|
|
||||||
this problem is solved, TAG ordering can be enabled.
|
|
||||||
|
|
||||||
* Currently, no matter which ordered mode is used, there can be only
|
|
||||||
one barrier request in progress. All I/O barriers are held off by
|
|
||||||
block layer until the previous I/O barrier is complete. This doesn't
|
|
||||||
make any difference for DRAIN ordered devices, but, for TAG ordered
|
|
||||||
devices with very high command latency, passing multiple I/O barriers
|
|
||||||
to low level *might* be helpful if they are very frequent. Well, this
|
|
||||||
certainly is a non-issue. I'm writing this just to make clear that no
|
|
||||||
two I/O barrier is ever passed to low-level driver.
|
|
||||||
|
|
||||||
* Completion order. Requests in ordered sequence are issued in order
|
|
||||||
but not required to finish in order. Barrier implementation can
|
|
||||||
handle out-of-order completion of ordered sequence. IOW, the requests
|
|
||||||
MUST be processed in order but the hardware/software completion paths
|
|
||||||
are allowed to reorder completion notifications - eg. current SCSI
|
|
||||||
midlayer doesn't preserve completion order during error handling.
|
|
||||||
|
|
||||||
* Requeueing order. Low-level drivers are free to requeue any request
|
|
||||||
after they removed it from the request queue with
|
|
||||||
blkdev_dequeue_request(). As barrier sequence should be kept in order
|
|
||||||
when requeued, generic elevator code takes care of putting requests in
|
|
||||||
order around barrier. See blk_ordered_req_seq() and
|
|
||||||
ELEVATOR_INSERT_REQUEUE handling in __elv_add_request() for details.
|
|
||||||
|
|
||||||
Note that block drivers must not requeue preceding requests while
|
|
||||||
completing latter requests in an ordered sequence. Currently, no
|
|
||||||
error checking is done against this.
|
|
||||||
|
|
||||||
* Error handling. Currently, block layer will report error to upper
|
|
||||||
layer if any of requests in an ordered sequence fails. Unfortunately,
|
|
||||||
this doesn't seem to be enough. Look at the following request flow.
|
|
||||||
QUEUE_ORDERED_TAG_FLUSH is in use.
|
|
||||||
|
|
||||||
[0] [1] [2] [3] [pre] [barrier] [post] < [4] [5] [6] ... >
|
|
||||||
still in elevator
|
|
||||||
|
|
||||||
Let's say request [2], [3] are write requests to update file system
|
|
||||||
metadata (journal or whatever) and [barrier] is used to mark that
|
|
||||||
those updates are valid. Consider the following sequence.
|
|
||||||
|
|
||||||
i. Requests [0] ~ [post] leaves the request queue and enters
|
|
||||||
low-level driver.
|
|
||||||
ii. After a while, unfortunately, something goes wrong and the
|
|
||||||
drive fails [2]. Note that any of [0], [1] and [3] could have
|
|
||||||
completed by this time, but [pre] couldn't have been finished
|
|
||||||
as the drive must process it in order and it failed before
|
|
||||||
processing that command.
|
|
||||||
iii. Error handling kicks in and determines that the error is
|
|
||||||
unrecoverable and fails [2], and resumes operation.
|
|
||||||
iv. [pre] [barrier] [post] gets processed.
|
|
||||||
v. *BOOM* power fails
|
|
||||||
|
|
||||||
The problem here is that the barrier request is *supposed* to indicate
|
|
||||||
that filesystem update requests [2] and [3] made it safely to the
|
|
||||||
physical medium and, if the machine crashes after the barrier is
|
|
||||||
written, filesystem recovery code can depend on that. Sadly, that
|
|
||||||
isn't true in this case anymore. IOW, the success of a I/O barrier
|
|
||||||
should also be dependent on success of some of the preceding requests,
|
|
||||||
where only upper layer (filesystem) knows what 'some' is.
|
|
||||||
|
|
||||||
This can be solved by implementing a way to tell the block layer which
|
|
||||||
requests affect the success of the following barrier request and
|
|
||||||
making lower lever drivers to resume operation on error only after
|
|
||||||
block layer tells it to do so.
|
|
||||||
|
|
||||||
As the probability of this happening is very low and the drive should
|
|
||||||
be faulty, implementing the fix is probably an overkill. But, still,
|
|
||||||
it's there.
|
|
||||||
|
|
||||||
* In previous drafts of barrier implementation, there was fallback
|
|
||||||
mechanism such that, if FUA or ordered TAG fails, less fancy ordered
|
|
||||||
mode can be selected and the failed barrier request is retried
|
|
||||||
automatically. The rationale for this feature was that as FUA is
|
|
||||||
pretty new in ATA world and ordered tag was never used widely, there
|
|
||||||
could be devices which report to support those features but choke when
|
|
||||||
actually given such requests.
|
|
||||||
|
|
||||||
This was removed for two reasons 1. it's an overkill 2. it's
|
|
||||||
impossible to implement properly when TAG ordering is used as low
|
|
||||||
level drivers resume after an error automatically. If it's ever
|
|
||||||
needed adding it back and modifying low level drivers accordingly
|
|
||||||
shouldn't be difficult.
|
|
|
@ -0,0 +1,86 @@
|
||||||
|
|
||||||
|
Explicit volatile write back cache control
|
||||||
|
=====================================
|
||||||
|
|
||||||
|
Introduction
|
||||||
|
------------
|
||||||
|
|
||||||
|
Many storage devices, especially in the consumer market, come with volatile
|
||||||
|
write back caches. That means the devices signal I/O completion to the
|
||||||
|
operating system before data actually has hit the non-volatile storage. This
|
||||||
|
behavior obviously speeds up various workloads, but it means the operating
|
||||||
|
system needs to force data out to the non-volatile storage when it performs
|
||||||
|
a data integrity operation like fsync, sync or an unmount.
|
||||||
|
|
||||||
|
The Linux block layer provides two simple mechanisms that let filesystems
|
||||||
|
control the caching behavior of the storage device. These mechanisms are
|
||||||
|
a forced cache flush, and the Force Unit Access (FUA) flag for requests.
|
||||||
|
|
||||||
|
|
||||||
|
Explicit cache flushes
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
The REQ_FLUSH flag can be OR ed into the r/w flags of a bio submitted from
|
||||||
|
the filesystem and will make sure the volatile cache of the storage device
|
||||||
|
has been flushed before the actual I/O operation is started. This explicitly
|
||||||
|
guarantees that previously completed write requests are on non-volatile
|
||||||
|
storage before the flagged bio starts. In addition the REQ_FLUSH flag can be
|
||||||
|
set on an otherwise empty bio structure, which causes only an explicit cache
|
||||||
|
flush without any dependent I/O. It is recommend to use
|
||||||
|
the blkdev_issue_flush() helper for a pure cache flush.
|
||||||
|
|
||||||
|
|
||||||
|
Forced Unit Access
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
The REQ_FUA flag can be OR ed into the r/w flags of a bio submitted from the
|
||||||
|
filesystem and will make sure that I/O completion for this request is only
|
||||||
|
signaled after the data has been committed to non-volatile storage.
|
||||||
|
|
||||||
|
|
||||||
|
Implementation details for filesystems
|
||||||
|
--------------------------------------
|
||||||
|
|
||||||
|
Filesystems can simply set the REQ_FLUSH and REQ_FUA bits and do not have to
|
||||||
|
worry if the underlying devices need any explicit cache flushing and how
|
||||||
|
the Forced Unit Access is implemented. The REQ_FLUSH and REQ_FUA flags
|
||||||
|
may both be set on a single bio.
|
||||||
|
|
||||||
|
|
||||||
|
Implementation details for make_request_fn based block drivers
|
||||||
|
--------------------------------------------------------------
|
||||||
|
|
||||||
|
These drivers will always see the REQ_FLUSH and REQ_FUA bits as they sit
|
||||||
|
directly below the submit_bio interface. For remapping drivers the REQ_FUA
|
||||||
|
bits need to be propagated to underlying devices, and a global flush needs
|
||||||
|
to be implemented for bios with the REQ_FLUSH bit set. For real device
|
||||||
|
drivers that do not have a volatile cache the REQ_FLUSH and REQ_FUA bits
|
||||||
|
on non-empty bios can simply be ignored, and REQ_FLUSH requests without
|
||||||
|
data can be completed successfully without doing any work. Drivers for
|
||||||
|
devices with volatile caches need to implement the support for these
|
||||||
|
flags themselves without any help from the block layer.
|
||||||
|
|
||||||
|
|
||||||
|
Implementation details for request_fn based block drivers
|
||||||
|
--------------------------------------------------------------
|
||||||
|
|
||||||
|
For devices that do not support volatile write caches there is no driver
|
||||||
|
support required, the block layer completes empty REQ_FLUSH requests before
|
||||||
|
entering the driver and strips off the REQ_FLUSH and REQ_FUA bits from
|
||||||
|
requests that have a payload. For devices with volatile write caches the
|
||||||
|
driver needs to tell the block layer that it supports flushing caches by
|
||||||
|
doing:
|
||||||
|
|
||||||
|
blk_queue_flush(sdkp->disk->queue, REQ_FLUSH);
|
||||||
|
|
||||||
|
and handle empty REQ_FLUSH requests in its prep_fn/request_fn. Note that
|
||||||
|
REQ_FLUSH requests with a payload are automatically turned into a sequence
|
||||||
|
of an empty REQ_FLUSH request followed by the actual write by the block
|
||||||
|
layer. For devices that also support the FUA bit the block layer needs
|
||||||
|
to be told to pass through the REQ_FUA bit using:
|
||||||
|
|
||||||
|
blk_queue_flush(sdkp->disk->queue, REQ_FLUSH | REQ_FUA);
|
||||||
|
|
||||||
|
and the driver must handle write requests that have the REQ_FUA bit set
|
||||||
|
in prep_fn/request_fn. If the FUA bit is not natively supported the block
|
||||||
|
layer turns it into an empty REQ_FLUSH request after the actual write.
|
|
@ -1639,15 +1639,6 @@ static void blk_request(struct virtqueue *vq)
|
||||||
*/
|
*/
|
||||||
off = out->sector * 512;
|
off = out->sector * 512;
|
||||||
|
|
||||||
/*
|
|
||||||
* The block device implements "barriers", where the Guest indicates
|
|
||||||
* that it wants all previous writes to occur before this write. We
|
|
||||||
* don't have a way of asking our kernel to do a barrier, so we just
|
|
||||||
* synchronize all the data in the file. Pretty poor, no?
|
|
||||||
*/
|
|
||||||
if (out->type & VIRTIO_BLK_T_BARRIER)
|
|
||||||
fdatasync(vblk->fd);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* In general the virtio block driver is allowed to try SCSI commands.
|
* In general the virtio block driver is allowed to try SCSI commands.
|
||||||
* It'd be nice if we supported eject, for example, but we don't.
|
* It'd be nice if we supported eject, for example, but we don't.
|
||||||
|
@ -1680,6 +1671,13 @@ static void blk_request(struct virtqueue *vq)
|
||||||
/* Die, bad Guest, die. */
|
/* Die, bad Guest, die. */
|
||||||
errx(1, "Write past end %llu+%u", off, ret);
|
errx(1, "Write past end %llu+%u", off, ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
wlen = sizeof(*in);
|
||||||
|
*in = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR);
|
||||||
|
} else if (out->type & VIRTIO_BLK_T_FLUSH) {
|
||||||
|
/* Flush */
|
||||||
|
ret = fdatasync(vblk->fd);
|
||||||
|
verbose("FLUSH fdatasync: %i\n", ret);
|
||||||
wlen = sizeof(*in);
|
wlen = sizeof(*in);
|
||||||
*in = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR);
|
*in = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR);
|
||||||
} else {
|
} else {
|
||||||
|
@ -1703,15 +1701,6 @@ static void blk_request(struct virtqueue *vq)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* OK, so we noted that it was pretty poor to use an fdatasync as a
|
|
||||||
* barrier. But Christoph Hellwig points out that we need a sync
|
|
||||||
* *afterwards* as well: "Barriers specify no reordering to the front
|
|
||||||
* or the back." And Jens Axboe confirmed it, so here we are:
|
|
||||||
*/
|
|
||||||
if (out->type & VIRTIO_BLK_T_BARRIER)
|
|
||||||
fdatasync(vblk->fd);
|
|
||||||
|
|
||||||
/* Finished that request. */
|
/* Finished that request. */
|
||||||
add_used(vq, head, wlen);
|
add_used(vq, head, wlen);
|
||||||
}
|
}
|
||||||
|
@ -1736,8 +1725,8 @@ static void setup_block_file(const char *filename)
|
||||||
vblk->fd = open_or_die(filename, O_RDWR|O_LARGEFILE);
|
vblk->fd = open_or_die(filename, O_RDWR|O_LARGEFILE);
|
||||||
vblk->len = lseek64(vblk->fd, 0, SEEK_END);
|
vblk->len = lseek64(vblk->fd, 0, SEEK_END);
|
||||||
|
|
||||||
/* We support barriers. */
|
/* We support FLUSH. */
|
||||||
add_feature(dev, VIRTIO_BLK_F_BARRIER);
|
add_feature(dev, VIRTIO_BLK_F_FLUSH);
|
||||||
|
|
||||||
/* Tell Guest how many sectors this device has. */
|
/* Tell Guest how many sectors this device has. */
|
||||||
conf.capacity = cpu_to_le64(vblk->len / 512);
|
conf.capacity = cpu_to_le64(vblk->len / 512);
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
#
|
#
|
||||||
|
|
||||||
obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
|
obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
|
||||||
blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
|
blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
|
||||||
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
|
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
|
||||||
blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o
|
blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o
|
||||||
|
|
||||||
|
|
|
@ -1,350 +0,0 @@
|
||||||
/*
|
|
||||||
* Functions related to barrier IO handling
|
|
||||||
*/
|
|
||||||
#include <linux/kernel.h>
|
|
||||||
#include <linux/module.h>
|
|
||||||
#include <linux/bio.h>
|
|
||||||
#include <linux/blkdev.h>
|
|
||||||
#include <linux/gfp.h>
|
|
||||||
|
|
||||||
#include "blk.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* blk_queue_ordered - does this queue support ordered writes
|
|
||||||
* @q: the request queue
|
|
||||||
* @ordered: one of QUEUE_ORDERED_*
|
|
||||||
*
|
|
||||||
* Description:
|
|
||||||
* For journalled file systems, doing ordered writes on a commit
|
|
||||||
* block instead of explicitly doing wait_on_buffer (which is bad
|
|
||||||
* for performance) can be a big win. Block drivers supporting this
|
|
||||||
* feature should call this function and indicate so.
|
|
||||||
*
|
|
||||||
**/
|
|
||||||
int blk_queue_ordered(struct request_queue *q, unsigned ordered)
|
|
||||||
{
|
|
||||||
if (ordered != QUEUE_ORDERED_NONE &&
|
|
||||||
ordered != QUEUE_ORDERED_DRAIN &&
|
|
||||||
ordered != QUEUE_ORDERED_DRAIN_FLUSH &&
|
|
||||||
ordered != QUEUE_ORDERED_DRAIN_FUA &&
|
|
||||||
ordered != QUEUE_ORDERED_TAG &&
|
|
||||||
ordered != QUEUE_ORDERED_TAG_FLUSH &&
|
|
||||||
ordered != QUEUE_ORDERED_TAG_FUA) {
|
|
||||||
printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered);
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
q->ordered = ordered;
|
|
||||||
q->next_ordered = ordered;
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL(blk_queue_ordered);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Cache flushing for ordered writes handling
|
|
||||||
*/
|
|
||||||
unsigned blk_ordered_cur_seq(struct request_queue *q)
|
|
||||||
{
|
|
||||||
if (!q->ordseq)
|
|
||||||
return 0;
|
|
||||||
return 1 << ffz(q->ordseq);
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned blk_ordered_req_seq(struct request *rq)
|
|
||||||
{
|
|
||||||
struct request_queue *q = rq->q;
|
|
||||||
|
|
||||||
BUG_ON(q->ordseq == 0);
|
|
||||||
|
|
||||||
if (rq == &q->pre_flush_rq)
|
|
||||||
return QUEUE_ORDSEQ_PREFLUSH;
|
|
||||||
if (rq == &q->bar_rq)
|
|
||||||
return QUEUE_ORDSEQ_BAR;
|
|
||||||
if (rq == &q->post_flush_rq)
|
|
||||||
return QUEUE_ORDSEQ_POSTFLUSH;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* !fs requests don't need to follow barrier ordering. Always
|
|
||||||
* put them at the front. This fixes the following deadlock.
|
|
||||||
*
|
|
||||||
* http://thread.gmane.org/gmane.linux.kernel/537473
|
|
||||||
*/
|
|
||||||
if (rq->cmd_type != REQ_TYPE_FS)
|
|
||||||
return QUEUE_ORDSEQ_DRAIN;
|
|
||||||
|
|
||||||
if ((rq->cmd_flags & REQ_ORDERED_COLOR) ==
|
|
||||||
(q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR))
|
|
||||||
return QUEUE_ORDSEQ_DRAIN;
|
|
||||||
else
|
|
||||||
return QUEUE_ORDSEQ_DONE;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
|
|
||||||
{
|
|
||||||
struct request *rq;
|
|
||||||
|
|
||||||
if (error && !q->orderr)
|
|
||||||
q->orderr = error;
|
|
||||||
|
|
||||||
BUG_ON(q->ordseq & seq);
|
|
||||||
q->ordseq |= seq;
|
|
||||||
|
|
||||||
if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Okay, sequence complete.
|
|
||||||
*/
|
|
||||||
q->ordseq = 0;
|
|
||||||
rq = q->orig_bar_rq;
|
|
||||||
__blk_end_request_all(rq, q->orderr);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void pre_flush_end_io(struct request *rq, int error)
|
|
||||||
{
|
|
||||||
elv_completed_request(rq->q, rq);
|
|
||||||
blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void bar_end_io(struct request *rq, int error)
|
|
||||||
{
|
|
||||||
elv_completed_request(rq->q, rq);
|
|
||||||
blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void post_flush_end_io(struct request *rq, int error)
|
|
||||||
{
|
|
||||||
elv_completed_request(rq->q, rq);
|
|
||||||
blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void queue_flush(struct request_queue *q, unsigned which)
|
|
||||||
{
|
|
||||||
struct request *rq;
|
|
||||||
rq_end_io_fn *end_io;
|
|
||||||
|
|
||||||
if (which == QUEUE_ORDERED_DO_PREFLUSH) {
|
|
||||||
rq = &q->pre_flush_rq;
|
|
||||||
end_io = pre_flush_end_io;
|
|
||||||
} else {
|
|
||||||
rq = &q->post_flush_rq;
|
|
||||||
end_io = post_flush_end_io;
|
|
||||||
}
|
|
||||||
|
|
||||||
blk_rq_init(q, rq);
|
|
||||||
rq->cmd_type = REQ_TYPE_FS;
|
|
||||||
rq->cmd_flags = REQ_HARDBARRIER | REQ_FLUSH;
|
|
||||||
rq->rq_disk = q->orig_bar_rq->rq_disk;
|
|
||||||
rq->end_io = end_io;
|
|
||||||
|
|
||||||
elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline bool start_ordered(struct request_queue *q, struct request **rqp)
|
|
||||||
{
|
|
||||||
struct request *rq = *rqp;
|
|
||||||
unsigned skip = 0;
|
|
||||||
|
|
||||||
q->orderr = 0;
|
|
||||||
q->ordered = q->next_ordered;
|
|
||||||
q->ordseq |= QUEUE_ORDSEQ_STARTED;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* For an empty barrier, there's no actual BAR request, which
|
|
||||||
* in turn makes POSTFLUSH unnecessary. Mask them off.
|
|
||||||
*/
|
|
||||||
if (!blk_rq_sectors(rq)) {
|
|
||||||
q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
|
|
||||||
QUEUE_ORDERED_DO_POSTFLUSH);
|
|
||||||
/*
|
|
||||||
* Empty barrier on a write-through device w/ ordered
|
|
||||||
* tag has no command to issue and without any command
|
|
||||||
* to issue, ordering by tag can't be used. Drain
|
|
||||||
* instead.
|
|
||||||
*/
|
|
||||||
if ((q->ordered & QUEUE_ORDERED_BY_TAG) &&
|
|
||||||
!(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) {
|
|
||||||
q->ordered &= ~QUEUE_ORDERED_BY_TAG;
|
|
||||||
q->ordered |= QUEUE_ORDERED_BY_DRAIN;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* stash away the original request */
|
|
||||||
blk_dequeue_request(rq);
|
|
||||||
q->orig_bar_rq = rq;
|
|
||||||
rq = NULL;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Queue ordered sequence. As we stack them at the head, we
|
|
||||||
* need to queue in reverse order. Note that we rely on that
|
|
||||||
* no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
|
|
||||||
* request gets inbetween ordered sequence.
|
|
||||||
*/
|
|
||||||
if (q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) {
|
|
||||||
queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH);
|
|
||||||
rq = &q->post_flush_rq;
|
|
||||||
} else
|
|
||||||
skip |= QUEUE_ORDSEQ_POSTFLUSH;
|
|
||||||
|
|
||||||
if (q->ordered & QUEUE_ORDERED_DO_BAR) {
|
|
||||||
rq = &q->bar_rq;
|
|
||||||
|
|
||||||
/* initialize proxy request and queue it */
|
|
||||||
blk_rq_init(q, rq);
|
|
||||||
if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
|
|
||||||
rq->cmd_flags |= REQ_WRITE;
|
|
||||||
if (q->ordered & QUEUE_ORDERED_DO_FUA)
|
|
||||||
rq->cmd_flags |= REQ_FUA;
|
|
||||||
init_request_from_bio(rq, q->orig_bar_rq->bio);
|
|
||||||
rq->end_io = bar_end_io;
|
|
||||||
|
|
||||||
elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
|
|
||||||
} else
|
|
||||||
skip |= QUEUE_ORDSEQ_BAR;
|
|
||||||
|
|
||||||
if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) {
|
|
||||||
queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH);
|
|
||||||
rq = &q->pre_flush_rq;
|
|
||||||
} else
|
|
||||||
skip |= QUEUE_ORDSEQ_PREFLUSH;
|
|
||||||
|
|
||||||
if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && queue_in_flight(q))
|
|
||||||
rq = NULL;
|
|
||||||
else
|
|
||||||
skip |= QUEUE_ORDSEQ_DRAIN;
|
|
||||||
|
|
||||||
*rqp = rq;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Complete skipped sequences. If whole sequence is complete,
|
|
||||||
* return false to tell elevator that this request is gone.
|
|
||||||
*/
|
|
||||||
return !blk_ordered_complete_seq(q, skip, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool blk_do_ordered(struct request_queue *q, struct request **rqp)
|
|
||||||
{
|
|
||||||
struct request *rq = *rqp;
|
|
||||||
const int is_barrier = rq->cmd_type == REQ_TYPE_FS &&
|
|
||||||
(rq->cmd_flags & REQ_HARDBARRIER);
|
|
||||||
|
|
||||||
if (!q->ordseq) {
|
|
||||||
if (!is_barrier)
|
|
||||||
return true;
|
|
||||||
|
|
||||||
if (q->next_ordered != QUEUE_ORDERED_NONE)
|
|
||||||
return start_ordered(q, rqp);
|
|
||||||
else {
|
|
||||||
/*
|
|
||||||
* Queue ordering not supported. Terminate
|
|
||||||
* with prejudice.
|
|
||||||
*/
|
|
||||||
blk_dequeue_request(rq);
|
|
||||||
__blk_end_request_all(rq, -EOPNOTSUPP);
|
|
||||||
*rqp = NULL;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Ordered sequence in progress
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Special requests are not subject to ordering rules. */
|
|
||||||
if (rq->cmd_type != REQ_TYPE_FS &&
|
|
||||||
rq != &q->pre_flush_rq && rq != &q->post_flush_rq)
|
|
||||||
return true;
|
|
||||||
|
|
||||||
if (q->ordered & QUEUE_ORDERED_BY_TAG) {
|
|
||||||
/* Ordered by tag. Blocking the next barrier is enough. */
|
|
||||||
if (is_barrier && rq != &q->bar_rq)
|
|
||||||
*rqp = NULL;
|
|
||||||
} else {
|
|
||||||
/* Ordered by draining. Wait for turn. */
|
|
||||||
WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q));
|
|
||||||
if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q))
|
|
||||||
*rqp = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void bio_end_empty_barrier(struct bio *bio, int err)
|
|
||||||
{
|
|
||||||
if (err) {
|
|
||||||
if (err == -EOPNOTSUPP)
|
|
||||||
set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
|
|
||||||
clear_bit(BIO_UPTODATE, &bio->bi_flags);
|
|
||||||
}
|
|
||||||
if (bio->bi_private)
|
|
||||||
complete(bio->bi_private);
|
|
||||||
bio_put(bio);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* blkdev_issue_flush - queue a flush
|
|
||||||
* @bdev: blockdev to issue flush for
|
|
||||||
* @gfp_mask: memory allocation flags (for bio_alloc)
|
|
||||||
* @error_sector: error sector
|
|
||||||
* @flags: BLKDEV_IFL_* flags to control behaviour
|
|
||||||
*
|
|
||||||
* Description:
|
|
||||||
* Issue a flush for the block device in question. Caller can supply
|
|
||||||
* room for storing the error offset in case of a flush error, if they
|
|
||||||
* wish to. If WAIT flag is not passed then caller may check only what
|
|
||||||
* request was pushed in some internal queue for later handling.
|
|
||||||
*/
|
|
||||||
int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
|
|
||||||
sector_t *error_sector, unsigned long flags)
|
|
||||||
{
|
|
||||||
DECLARE_COMPLETION_ONSTACK(wait);
|
|
||||||
struct request_queue *q;
|
|
||||||
struct bio *bio;
|
|
||||||
int ret = 0;
|
|
||||||
|
|
||||||
if (bdev->bd_disk == NULL)
|
|
||||||
return -ENXIO;
|
|
||||||
|
|
||||||
q = bdev_get_queue(bdev);
|
|
||||||
if (!q)
|
|
||||||
return -ENXIO;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* some block devices may not have their queue correctly set up here
|
|
||||||
* (e.g. loop device without a backing file) and so issuing a flush
|
|
||||||
* here will panic. Ensure there is a request function before issuing
|
|
||||||
* the barrier.
|
|
||||||
*/
|
|
||||||
if (!q->make_request_fn)
|
|
||||||
return -ENXIO;
|
|
||||||
|
|
||||||
bio = bio_alloc(gfp_mask, 0);
|
|
||||||
bio->bi_end_io = bio_end_empty_barrier;
|
|
||||||
bio->bi_bdev = bdev;
|
|
||||||
if (test_bit(BLKDEV_WAIT, &flags))
|
|
||||||
bio->bi_private = &wait;
|
|
||||||
|
|
||||||
bio_get(bio);
|
|
||||||
submit_bio(WRITE_BARRIER, bio);
|
|
||||||
if (test_bit(BLKDEV_WAIT, &flags)) {
|
|
||||||
wait_for_completion(&wait);
|
|
||||||
/*
|
|
||||||
* The driver must store the error location in ->bi_sector, if
|
|
||||||
* it supports it. For non-stacked drivers, this should be
|
|
||||||
* copied from blk_rq_pos(rq).
|
|
||||||
*/
|
|
||||||
if (error_sector)
|
|
||||||
*error_sector = bio->bi_sector;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bio_flagged(bio, BIO_EOPNOTSUPP))
|
|
||||||
ret = -EOPNOTSUPP;
|
|
||||||
else if (!bio_flagged(bio, BIO_UPTODATE))
|
|
||||||
ret = -EIO;
|
|
||||||
|
|
||||||
bio_put(bio);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL(blkdev_issue_flush);
|
|
|
@ -139,7 +139,7 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
|
||||||
{
|
{
|
||||||
struct request_queue *q = rq->q;
|
struct request_queue *q = rq->q;
|
||||||
|
|
||||||
if (&q->bar_rq != rq) {
|
if (&q->flush_rq != rq) {
|
||||||
if (error)
|
if (error)
|
||||||
clear_bit(BIO_UPTODATE, &bio->bi_flags);
|
clear_bit(BIO_UPTODATE, &bio->bi_flags);
|
||||||
else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
|
else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
|
||||||
|
@ -163,13 +163,12 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
|
||||||
if (bio->bi_size == 0)
|
if (bio->bi_size == 0)
|
||||||
bio_endio(bio, error);
|
bio_endio(bio, error);
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Okay, this is the barrier request in progress, just
|
* Okay, this is the sequenced flush request in
|
||||||
* record the error;
|
* progress, just record the error;
|
||||||
*/
|
*/
|
||||||
if (error && !q->orderr)
|
if (error && !q->flush_err)
|
||||||
q->orderr = error;
|
q->flush_err = error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -531,6 +530,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
|
||||||
init_timer(&q->unplug_timer);
|
init_timer(&q->unplug_timer);
|
||||||
setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
|
setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
|
||||||
INIT_LIST_HEAD(&q->timeout_list);
|
INIT_LIST_HEAD(&q->timeout_list);
|
||||||
|
INIT_LIST_HEAD(&q->pending_flushes);
|
||||||
INIT_WORK(&q->unplug_work, blk_unplug_work);
|
INIT_WORK(&q->unplug_work, blk_unplug_work);
|
||||||
|
|
||||||
kobject_init(&q->kobj, &blk_queue_ktype);
|
kobject_init(&q->kobj, &blk_queue_ktype);
|
||||||
|
@ -1053,22 +1053,6 @@ void blk_insert_request(struct request_queue *q, struct request *rq,
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(blk_insert_request);
|
EXPORT_SYMBOL(blk_insert_request);
|
||||||
|
|
||||||
/*
|
|
||||||
* add-request adds a request to the linked list.
|
|
||||||
* queue lock is held and interrupts disabled, as we muck with the
|
|
||||||
* request queue list.
|
|
||||||
*/
|
|
||||||
static inline void add_request(struct request_queue *q, struct request *req)
|
|
||||||
{
|
|
||||||
drive_stat_acct(req, 1);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* elevator indicated where it wants this request to be
|
|
||||||
* inserted at elevator_merge time
|
|
||||||
*/
|
|
||||||
__elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void part_round_stats_single(int cpu, struct hd_struct *part,
|
static void part_round_stats_single(int cpu, struct hd_struct *part,
|
||||||
unsigned long now)
|
unsigned long now)
|
||||||
{
|
{
|
||||||
|
@ -1217,13 +1201,16 @@ static int __make_request(struct request_queue *q, struct bio *bio)
|
||||||
const bool sync = !!(bio->bi_rw & REQ_SYNC);
|
const bool sync = !!(bio->bi_rw & REQ_SYNC);
|
||||||
const bool unplug = !!(bio->bi_rw & REQ_UNPLUG);
|
const bool unplug = !!(bio->bi_rw & REQ_UNPLUG);
|
||||||
const unsigned long ff = bio->bi_rw & REQ_FAILFAST_MASK;
|
const unsigned long ff = bio->bi_rw & REQ_FAILFAST_MASK;
|
||||||
|
int where = ELEVATOR_INSERT_SORT;
|
||||||
int rw_flags;
|
int rw_flags;
|
||||||
|
|
||||||
if ((bio->bi_rw & REQ_HARDBARRIER) &&
|
/* REQ_HARDBARRIER is no more */
|
||||||
(q->next_ordered == QUEUE_ORDERED_NONE)) {
|
if (WARN_ONCE(bio->bi_rw & REQ_HARDBARRIER,
|
||||||
|
"block: HARDBARRIER is deprecated, use FLUSH/FUA instead\n")) {
|
||||||
bio_endio(bio, -EOPNOTSUPP);
|
bio_endio(bio, -EOPNOTSUPP);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* low level driver can indicate that it wants pages above a
|
* low level driver can indicate that it wants pages above a
|
||||||
* certain limit bounced to low memory (ie for highmem, or even
|
* certain limit bounced to low memory (ie for highmem, or even
|
||||||
|
@ -1233,7 +1220,12 @@ static int __make_request(struct request_queue *q, struct bio *bio)
|
||||||
|
|
||||||
spin_lock_irq(q->queue_lock);
|
spin_lock_irq(q->queue_lock);
|
||||||
|
|
||||||
if (unlikely((bio->bi_rw & REQ_HARDBARRIER)) || elv_queue_empty(q))
|
if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
|
||||||
|
where = ELEVATOR_INSERT_FRONT;
|
||||||
|
goto get_rq;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (elv_queue_empty(q))
|
||||||
goto get_rq;
|
goto get_rq;
|
||||||
|
|
||||||
el_ret = elv_merge(q, &req, bio);
|
el_ret = elv_merge(q, &req, bio);
|
||||||
|
@ -1330,7 +1322,10 @@ get_rq:
|
||||||
req->cpu = blk_cpu_to_group(smp_processor_id());
|
req->cpu = blk_cpu_to_group(smp_processor_id());
|
||||||
if (queue_should_plug(q) && elv_queue_empty(q))
|
if (queue_should_plug(q) && elv_queue_empty(q))
|
||||||
blk_plug_device(q);
|
blk_plug_device(q);
|
||||||
add_request(q, req);
|
|
||||||
|
/* insert the request into the elevator */
|
||||||
|
drive_stat_acct(req, 1);
|
||||||
|
__elv_add_request(q, req, where, 0);
|
||||||
out:
|
out:
|
||||||
if (unplug || !queue_should_plug(q))
|
if (unplug || !queue_should_plug(q))
|
||||||
__generic_unplug_device(q);
|
__generic_unplug_device(q);
|
||||||
|
@ -1530,6 +1525,19 @@ static inline void __generic_make_request(struct bio *bio)
|
||||||
if (bio_check_eod(bio, nr_sectors))
|
if (bio_check_eod(bio, nr_sectors))
|
||||||
goto end_io;
|
goto end_io;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Filter flush bio's early so that make_request based
|
||||||
|
* drivers without flush support don't have to worry
|
||||||
|
* about them.
|
||||||
|
*/
|
||||||
|
if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) {
|
||||||
|
bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA);
|
||||||
|
if (!nr_sectors) {
|
||||||
|
err = 0;
|
||||||
|
goto end_io;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if ((bio->bi_rw & REQ_DISCARD) &&
|
if ((bio->bi_rw & REQ_DISCARD) &&
|
||||||
(!blk_queue_discard(q) ||
|
(!blk_queue_discard(q) ||
|
||||||
((bio->bi_rw & REQ_SECURE) &&
|
((bio->bi_rw & REQ_SECURE) &&
|
||||||
|
@ -1794,11 +1802,11 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes)
|
||||||
static void blk_account_io_done(struct request *req)
|
static void blk_account_io_done(struct request *req)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Account IO completion. bar_rq isn't accounted as a normal
|
* Account IO completion. flush_rq isn't accounted as a
|
||||||
* IO on queueing nor completion. Accounting the containing
|
* normal IO on queueing nor completion. Accounting the
|
||||||
* request is enough.
|
* containing request is enough.
|
||||||
*/
|
*/
|
||||||
if (blk_do_io_stat(req) && req != &req->q->bar_rq) {
|
if (blk_do_io_stat(req) && req != &req->q->flush_rq) {
|
||||||
unsigned long duration = jiffies - req->start_time;
|
unsigned long duration = jiffies - req->start_time;
|
||||||
const int rw = rq_data_dir(req);
|
const int rw = rq_data_dir(req);
|
||||||
struct hd_struct *part;
|
struct hd_struct *part;
|
||||||
|
@ -2523,9 +2531,7 @@ EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
|
||||||
static void __blk_rq_prep_clone(struct request *dst, struct request *src)
|
static void __blk_rq_prep_clone(struct request *dst, struct request *src)
|
||||||
{
|
{
|
||||||
dst->cpu = src->cpu;
|
dst->cpu = src->cpu;
|
||||||
dst->cmd_flags = (rq_data_dir(src) | REQ_NOMERGE);
|
dst->cmd_flags = (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE;
|
||||||
if (src->cmd_flags & REQ_DISCARD)
|
|
||||||
dst->cmd_flags |= REQ_DISCARD;
|
|
||||||
dst->cmd_type = src->cmd_type;
|
dst->cmd_type = src->cmd_type;
|
||||||
dst->__sector = blk_rq_pos(src);
|
dst->__sector = blk_rq_pos(src);
|
||||||
dst->__data_len = blk_rq_bytes(src);
|
dst->__data_len = blk_rq_bytes(src);
|
||||||
|
|
|
@ -0,0 +1,262 @@
|
||||||
|
/*
|
||||||
|
* Functions to sequence FLUSH and FUA writes.
|
||||||
|
*/
|
||||||
|
#include <linux/kernel.h>
|
||||||
|
#include <linux/module.h>
|
||||||
|
#include <linux/bio.h>
|
||||||
|
#include <linux/blkdev.h>
|
||||||
|
#include <linux/gfp.h>
|
||||||
|
|
||||||
|
#include "blk.h"
|
||||||
|
|
||||||
|
/* FLUSH/FUA sequences */
|
||||||
|
enum {
|
||||||
|
QUEUE_FSEQ_STARTED = (1 << 0), /* flushing in progress */
|
||||||
|
QUEUE_FSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */
|
||||||
|
QUEUE_FSEQ_DATA = (1 << 2), /* data write in progress */
|
||||||
|
QUEUE_FSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */
|
||||||
|
QUEUE_FSEQ_DONE = (1 << 4),
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct request *queue_next_fseq(struct request_queue *q);
|
||||||
|
|
||||||
|
unsigned blk_flush_cur_seq(struct request_queue *q)
|
||||||
|
{
|
||||||
|
if (!q->flush_seq)
|
||||||
|
return 0;
|
||||||
|
return 1 << ffz(q->flush_seq);
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct request *blk_flush_complete_seq(struct request_queue *q,
|
||||||
|
unsigned seq, int error)
|
||||||
|
{
|
||||||
|
struct request *next_rq = NULL;
|
||||||
|
|
||||||
|
if (error && !q->flush_err)
|
||||||
|
q->flush_err = error;
|
||||||
|
|
||||||
|
BUG_ON(q->flush_seq & seq);
|
||||||
|
q->flush_seq |= seq;
|
||||||
|
|
||||||
|
if (blk_flush_cur_seq(q) != QUEUE_FSEQ_DONE) {
|
||||||
|
/* not complete yet, queue the next flush sequence */
|
||||||
|
next_rq = queue_next_fseq(q);
|
||||||
|
} else {
|
||||||
|
/* complete this flush request */
|
||||||
|
__blk_end_request_all(q->orig_flush_rq, q->flush_err);
|
||||||
|
q->orig_flush_rq = NULL;
|
||||||
|
q->flush_seq = 0;
|
||||||
|
|
||||||
|
/* dispatch the next flush if there's one */
|
||||||
|
if (!list_empty(&q->pending_flushes)) {
|
||||||
|
next_rq = list_entry_rq(q->pending_flushes.next);
|
||||||
|
list_move(&next_rq->queuelist, &q->queue_head);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return next_rq;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void blk_flush_complete_seq_end_io(struct request_queue *q,
|
||||||
|
unsigned seq, int error)
|
||||||
|
{
|
||||||
|
bool was_empty = elv_queue_empty(q);
|
||||||
|
struct request *next_rq;
|
||||||
|
|
||||||
|
next_rq = blk_flush_complete_seq(q, seq, error);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Moving a request silently to empty queue_head may stall the
|
||||||
|
* queue. Kick the queue in those cases.
|
||||||
|
*/
|
||||||
|
if (was_empty && next_rq)
|
||||||
|
__blk_run_queue(q);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void pre_flush_end_io(struct request *rq, int error)
|
||||||
|
{
|
||||||
|
elv_completed_request(rq->q, rq);
|
||||||
|
blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_PREFLUSH, error);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void flush_data_end_io(struct request *rq, int error)
|
||||||
|
{
|
||||||
|
elv_completed_request(rq->q, rq);
|
||||||
|
blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_DATA, error);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void post_flush_end_io(struct request *rq, int error)
|
||||||
|
{
|
||||||
|
elv_completed_request(rq->q, rq);
|
||||||
|
blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_POSTFLUSH, error);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void init_flush_request(struct request *rq, struct gendisk *disk)
|
||||||
|
{
|
||||||
|
rq->cmd_type = REQ_TYPE_FS;
|
||||||
|
rq->cmd_flags = WRITE_FLUSH;
|
||||||
|
rq->rq_disk = disk;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct request *queue_next_fseq(struct request_queue *q)
|
||||||
|
{
|
||||||
|
struct request *orig_rq = q->orig_flush_rq;
|
||||||
|
struct request *rq = &q->flush_rq;
|
||||||
|
|
||||||
|
blk_rq_init(q, rq);
|
||||||
|
|
||||||
|
switch (blk_flush_cur_seq(q)) {
|
||||||
|
case QUEUE_FSEQ_PREFLUSH:
|
||||||
|
init_flush_request(rq, orig_rq->rq_disk);
|
||||||
|
rq->end_io = pre_flush_end_io;
|
||||||
|
break;
|
||||||
|
case QUEUE_FSEQ_DATA:
|
||||||
|
init_request_from_bio(rq, orig_rq->bio);
|
||||||
|
/*
|
||||||
|
* orig_rq->rq_disk may be different from
|
||||||
|
* bio->bi_bdev->bd_disk if orig_rq got here through
|
||||||
|
* remapping drivers. Make sure rq->rq_disk points
|
||||||
|
* to the same one as orig_rq.
|
||||||
|
*/
|
||||||
|
rq->rq_disk = orig_rq->rq_disk;
|
||||||
|
rq->cmd_flags &= ~(REQ_FLUSH | REQ_FUA);
|
||||||
|
rq->cmd_flags |= orig_rq->cmd_flags & (REQ_FLUSH | REQ_FUA);
|
||||||
|
rq->end_io = flush_data_end_io;
|
||||||
|
break;
|
||||||
|
case QUEUE_FSEQ_POSTFLUSH:
|
||||||
|
init_flush_request(rq, orig_rq->rq_disk);
|
||||||
|
rq->end_io = post_flush_end_io;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
BUG();
|
||||||
|
}
|
||||||
|
|
||||||
|
elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
|
||||||
|
return rq;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct request *blk_do_flush(struct request_queue *q, struct request *rq)
|
||||||
|
{
|
||||||
|
unsigned int fflags = q->flush_flags; /* may change, cache it */
|
||||||
|
bool has_flush = fflags & REQ_FLUSH, has_fua = fflags & REQ_FUA;
|
||||||
|
bool do_preflush = has_flush && (rq->cmd_flags & REQ_FLUSH);
|
||||||
|
bool do_postflush = has_flush && !has_fua && (rq->cmd_flags & REQ_FUA);
|
||||||
|
unsigned skip = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Special case. If there's data but flush is not necessary,
|
||||||
|
* the request can be issued directly.
|
||||||
|
*
|
||||||
|
* Flush w/o data should be able to be issued directly too but
|
||||||
|
* currently some drivers assume that rq->bio contains
|
||||||
|
* non-zero data if it isn't NULL and empty FLUSH requests
|
||||||
|
* getting here usually have bio's without data.
|
||||||
|
*/
|
||||||
|
if (blk_rq_sectors(rq) && !do_preflush && !do_postflush) {
|
||||||
|
rq->cmd_flags &= ~REQ_FLUSH;
|
||||||
|
if (!has_fua)
|
||||||
|
rq->cmd_flags &= ~REQ_FUA;
|
||||||
|
return rq;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Sequenced flushes can't be processed in parallel. If
|
||||||
|
* another one is already in progress, queue for later
|
||||||
|
* processing.
|
||||||
|
*/
|
||||||
|
if (q->flush_seq) {
|
||||||
|
list_move_tail(&rq->queuelist, &q->pending_flushes);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Start a new flush sequence
|
||||||
|
*/
|
||||||
|
q->flush_err = 0;
|
||||||
|
q->flush_seq |= QUEUE_FSEQ_STARTED;
|
||||||
|
|
||||||
|
/* adjust FLUSH/FUA of the original request and stash it away */
|
||||||
|
rq->cmd_flags &= ~REQ_FLUSH;
|
||||||
|
if (!has_fua)
|
||||||
|
rq->cmd_flags &= ~REQ_FUA;
|
||||||
|
blk_dequeue_request(rq);
|
||||||
|
q->orig_flush_rq = rq;
|
||||||
|
|
||||||
|
/* skip unneded sequences and return the first one */
|
||||||
|
if (!do_preflush)
|
||||||
|
skip |= QUEUE_FSEQ_PREFLUSH;
|
||||||
|
if (!blk_rq_sectors(rq))
|
||||||
|
skip |= QUEUE_FSEQ_DATA;
|
||||||
|
if (!do_postflush)
|
||||||
|
skip |= QUEUE_FSEQ_POSTFLUSH;
|
||||||
|
return blk_flush_complete_seq(q, skip, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void bio_end_flush(struct bio *bio, int err)
|
||||||
|
{
|
||||||
|
if (err)
|
||||||
|
clear_bit(BIO_UPTODATE, &bio->bi_flags);
|
||||||
|
if (bio->bi_private)
|
||||||
|
complete(bio->bi_private);
|
||||||
|
bio_put(bio);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* blkdev_issue_flush - queue a flush
|
||||||
|
* @bdev: blockdev to issue flush for
|
||||||
|
* @gfp_mask: memory allocation flags (for bio_alloc)
|
||||||
|
* @error_sector: error sector
|
||||||
|
*
|
||||||
|
* Description:
|
||||||
|
* Issue a flush for the block device in question. Caller can supply
|
||||||
|
* room for storing the error offset in case of a flush error, if they
|
||||||
|
* wish to. If WAIT flag is not passed then caller may check only what
|
||||||
|
* request was pushed in some internal queue for later handling.
|
||||||
|
*/
|
||||||
|
int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
|
||||||
|
sector_t *error_sector)
|
||||||
|
{
|
||||||
|
DECLARE_COMPLETION_ONSTACK(wait);
|
||||||
|
struct request_queue *q;
|
||||||
|
struct bio *bio;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
if (bdev->bd_disk == NULL)
|
||||||
|
return -ENXIO;
|
||||||
|
|
||||||
|
q = bdev_get_queue(bdev);
|
||||||
|
if (!q)
|
||||||
|
return -ENXIO;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* some block devices may not have their queue correctly set up here
|
||||||
|
* (e.g. loop device without a backing file) and so issuing a flush
|
||||||
|
* here will panic. Ensure there is a request function before issuing
|
||||||
|
* the flush.
|
||||||
|
*/
|
||||||
|
if (!q->make_request_fn)
|
||||||
|
return -ENXIO;
|
||||||
|
|
||||||
|
bio = bio_alloc(gfp_mask, 0);
|
||||||
|
bio->bi_end_io = bio_end_flush;
|
||||||
|
bio->bi_bdev = bdev;
|
||||||
|
bio->bi_private = &wait;
|
||||||
|
|
||||||
|
bio_get(bio);
|
||||||
|
submit_bio(WRITE_FLUSH, bio);
|
||||||
|
wait_for_completion(&wait);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The driver must store the error location in ->bi_sector, if
|
||||||
|
* it supports it. For non-stacked drivers, this should be
|
||||||
|
* copied from blk_rq_pos(rq).
|
||||||
|
*/
|
||||||
|
if (error_sector)
|
||||||
|
*error_sector = bio->bi_sector;
|
||||||
|
|
||||||
|
if (!bio_flagged(bio, BIO_UPTODATE))
|
||||||
|
ret = -EIO;
|
||||||
|
|
||||||
|
bio_put(bio);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(blkdev_issue_flush);
|
|
@ -39,8 +39,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
|
||||||
{
|
{
|
||||||
DECLARE_COMPLETION_ONSTACK(wait);
|
DECLARE_COMPLETION_ONSTACK(wait);
|
||||||
struct request_queue *q = bdev_get_queue(bdev);
|
struct request_queue *q = bdev_get_queue(bdev);
|
||||||
int type = flags & BLKDEV_IFL_BARRIER ?
|
int type = REQ_WRITE | REQ_DISCARD;
|
||||||
DISCARD_BARRIER : DISCARD_NOBARRIER;
|
|
||||||
unsigned int max_discard_sectors;
|
unsigned int max_discard_sectors;
|
||||||
struct bio *bio;
|
struct bio *bio;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
@ -62,10 +61,10 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
|
||||||
max_discard_sectors &= ~(disc_sects - 1);
|
max_discard_sectors &= ~(disc_sects - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flags & BLKDEV_IFL_SECURE) {
|
if (flags & BLKDEV_DISCARD_SECURE) {
|
||||||
if (!blk_queue_secdiscard(q))
|
if (!blk_queue_secdiscard(q))
|
||||||
return -EOPNOTSUPP;
|
return -EOPNOTSUPP;
|
||||||
type |= DISCARD_SECURE;
|
type |= REQ_SECURE;
|
||||||
}
|
}
|
||||||
|
|
||||||
while (nr_sects && !ret) {
|
while (nr_sects && !ret) {
|
||||||
|
@ -78,8 +77,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
|
||||||
bio->bi_sector = sector;
|
bio->bi_sector = sector;
|
||||||
bio->bi_end_io = blkdev_discard_end_io;
|
bio->bi_end_io = blkdev_discard_end_io;
|
||||||
bio->bi_bdev = bdev;
|
bio->bi_bdev = bdev;
|
||||||
if (flags & BLKDEV_IFL_WAIT)
|
bio->bi_private = &wait;
|
||||||
bio->bi_private = &wait;
|
|
||||||
|
|
||||||
if (nr_sects > max_discard_sectors) {
|
if (nr_sects > max_discard_sectors) {
|
||||||
bio->bi_size = max_discard_sectors << 9;
|
bio->bi_size = max_discard_sectors << 9;
|
||||||
|
@ -93,8 +91,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
|
||||||
bio_get(bio);
|
bio_get(bio);
|
||||||
submit_bio(type, bio);
|
submit_bio(type, bio);
|
||||||
|
|
||||||
if (flags & BLKDEV_IFL_WAIT)
|
wait_for_completion(&wait);
|
||||||
wait_for_completion(&wait);
|
|
||||||
|
|
||||||
if (bio_flagged(bio, BIO_EOPNOTSUPP))
|
if (bio_flagged(bio, BIO_EOPNOTSUPP))
|
||||||
ret = -EOPNOTSUPP;
|
ret = -EOPNOTSUPP;
|
||||||
|
@ -140,7 +137,6 @@ static void bio_batch_end_io(struct bio *bio, int err)
|
||||||
* @sector: start sector
|
* @sector: start sector
|
||||||
* @nr_sects: number of sectors to write
|
* @nr_sects: number of sectors to write
|
||||||
* @gfp_mask: memory allocation flags (for bio_alloc)
|
* @gfp_mask: memory allocation flags (for bio_alloc)
|
||||||
* @flags: BLKDEV_IFL_* flags to control behaviour
|
|
||||||
*
|
*
|
||||||
* Description:
|
* Description:
|
||||||
* Generate and issue number of bios with zerofiled pages.
|
* Generate and issue number of bios with zerofiled pages.
|
||||||
|
@ -149,7 +145,7 @@ static void bio_batch_end_io(struct bio *bio, int err)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
|
int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
|
||||||
sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
|
sector_t nr_sects, gfp_t gfp_mask)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
struct bio *bio;
|
struct bio *bio;
|
||||||
|
@ -162,12 +158,6 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
|
||||||
bb.wait = &wait;
|
bb.wait = &wait;
|
||||||
bb.end_io = NULL;
|
bb.end_io = NULL;
|
||||||
|
|
||||||
if (flags & BLKDEV_IFL_BARRIER) {
|
|
||||||
/* issue async barrier before the data */
|
|
||||||
ret = blkdev_issue_flush(bdev, gfp_mask, NULL, 0);
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
submit:
|
submit:
|
||||||
ret = 0;
|
ret = 0;
|
||||||
while (nr_sects != 0) {
|
while (nr_sects != 0) {
|
||||||
|
@ -181,8 +171,7 @@ submit:
|
||||||
bio->bi_sector = sector;
|
bio->bi_sector = sector;
|
||||||
bio->bi_bdev = bdev;
|
bio->bi_bdev = bdev;
|
||||||
bio->bi_end_io = bio_batch_end_io;
|
bio->bi_end_io = bio_batch_end_io;
|
||||||
if (flags & BLKDEV_IFL_WAIT)
|
bio->bi_private = &bb;
|
||||||
bio->bi_private = &bb;
|
|
||||||
|
|
||||||
while (nr_sects != 0) {
|
while (nr_sects != 0) {
|
||||||
sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects);
|
sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects);
|
||||||
|
@ -199,18 +188,10 @@ submit:
|
||||||
issued++;
|
issued++;
|
||||||
submit_bio(WRITE, bio);
|
submit_bio(WRITE, bio);
|
||||||
}
|
}
|
||||||
/*
|
|
||||||
* When all data bios are in flight. Send final barrier if requeted.
|
|
||||||
*/
|
|
||||||
if (nr_sects == 0 && flags & BLKDEV_IFL_BARRIER)
|
|
||||||
ret = blkdev_issue_flush(bdev, gfp_mask, NULL,
|
|
||||||
flags & BLKDEV_IFL_WAIT);
|
|
||||||
|
|
||||||
|
/* Wait for bios in-flight */
|
||||||
if (flags & BLKDEV_IFL_WAIT)
|
while (issued != atomic_read(&bb.done))
|
||||||
/* Wait for bios in-flight */
|
wait_for_completion(&wait);
|
||||||
while ( issued != atomic_read(&bb.done))
|
|
||||||
wait_for_completion(&wait);
|
|
||||||
|
|
||||||
if (!test_bit(BIO_UPTODATE, &bb.flags))
|
if (!test_bit(BIO_UPTODATE, &bb.flags))
|
||||||
/* One of bios in the batch was completed with error.*/
|
/* One of bios in the batch was completed with error.*/
|
||||||
|
|
|
@ -792,6 +792,26 @@ void blk_queue_update_dma_alignment(struct request_queue *q, int mask)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(blk_queue_update_dma_alignment);
|
EXPORT_SYMBOL(blk_queue_update_dma_alignment);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* blk_queue_flush - configure queue's cache flush capability
|
||||||
|
* @q: the request queue for the device
|
||||||
|
* @flush: 0, REQ_FLUSH or REQ_FLUSH | REQ_FUA
|
||||||
|
*
|
||||||
|
* Tell block layer cache flush capability of @q. If it supports
|
||||||
|
* flushing, REQ_FLUSH should be set. If it supports bypassing
|
||||||
|
* write cache for individual writes, REQ_FUA should be set.
|
||||||
|
*/
|
||||||
|
void blk_queue_flush(struct request_queue *q, unsigned int flush)
|
||||||
|
{
|
||||||
|
WARN_ON_ONCE(flush & ~(REQ_FLUSH | REQ_FUA));
|
||||||
|
|
||||||
|
if (WARN_ON_ONCE(!(flush & REQ_FLUSH) && (flush & REQ_FUA)))
|
||||||
|
flush &= ~REQ_FUA;
|
||||||
|
|
||||||
|
q->flush_flags = flush & (REQ_FLUSH | REQ_FUA);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(blk_queue_flush);
|
||||||
|
|
||||||
static int __init blk_settings_init(void)
|
static int __init blk_settings_init(void)
|
||||||
{
|
{
|
||||||
blk_max_low_pfn = max_low_pfn - 1;
|
blk_max_low_pfn = max_low_pfn - 1;
|
||||||
|
|
|
@ -51,6 +51,8 @@ static inline void blk_clear_rq_complete(struct request *rq)
|
||||||
*/
|
*/
|
||||||
#define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash))
|
#define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash))
|
||||||
|
|
||||||
|
struct request *blk_do_flush(struct request_queue *q, struct request *rq);
|
||||||
|
|
||||||
static inline struct request *__elv_next_request(struct request_queue *q)
|
static inline struct request *__elv_next_request(struct request_queue *q)
|
||||||
{
|
{
|
||||||
struct request *rq;
|
struct request *rq;
|
||||||
|
@ -58,7 +60,11 @@ static inline struct request *__elv_next_request(struct request_queue *q)
|
||||||
while (1) {
|
while (1) {
|
||||||
while (!list_empty(&q->queue_head)) {
|
while (!list_empty(&q->queue_head)) {
|
||||||
rq = list_entry_rq(q->queue_head.next);
|
rq = list_entry_rq(q->queue_head.next);
|
||||||
if (blk_do_ordered(q, &rq))
|
if (!(rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) ||
|
||||||
|
rq == &q->flush_rq)
|
||||||
|
return rq;
|
||||||
|
rq = blk_do_flush(q, rq);
|
||||||
|
if (rq)
|
||||||
return rq;
|
return rq;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -617,8 +617,6 @@ void elv_quiesce_end(struct request_queue *q)
|
||||||
|
|
||||||
void elv_insert(struct request_queue *q, struct request *rq, int where)
|
void elv_insert(struct request_queue *q, struct request *rq, int where)
|
||||||
{
|
{
|
||||||
struct list_head *pos;
|
|
||||||
unsigned ordseq;
|
|
||||||
int unplug_it = 1;
|
int unplug_it = 1;
|
||||||
|
|
||||||
trace_block_rq_insert(q, rq);
|
trace_block_rq_insert(q, rq);
|
||||||
|
@ -626,9 +624,16 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
|
||||||
rq->q = q;
|
rq->q = q;
|
||||||
|
|
||||||
switch (where) {
|
switch (where) {
|
||||||
|
case ELEVATOR_INSERT_REQUEUE:
|
||||||
|
/*
|
||||||
|
* Most requeues happen because of a busy condition,
|
||||||
|
* don't force unplug of the queue for that case.
|
||||||
|
* Clear unplug_it and fall through.
|
||||||
|
*/
|
||||||
|
unplug_it = 0;
|
||||||
|
|
||||||
case ELEVATOR_INSERT_FRONT:
|
case ELEVATOR_INSERT_FRONT:
|
||||||
rq->cmd_flags |= REQ_SOFTBARRIER;
|
rq->cmd_flags |= REQ_SOFTBARRIER;
|
||||||
|
|
||||||
list_add(&rq->queuelist, &q->queue_head);
|
list_add(&rq->queuelist, &q->queue_head);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -668,36 +673,6 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
|
||||||
q->elevator->ops->elevator_add_req_fn(q, rq);
|
q->elevator->ops->elevator_add_req_fn(q, rq);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case ELEVATOR_INSERT_REQUEUE:
|
|
||||||
/*
|
|
||||||
* If ordered flush isn't in progress, we do front
|
|
||||||
* insertion; otherwise, requests should be requeued
|
|
||||||
* in ordseq order.
|
|
||||||
*/
|
|
||||||
rq->cmd_flags |= REQ_SOFTBARRIER;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Most requeues happen because of a busy condition,
|
|
||||||
* don't force unplug of the queue for that case.
|
|
||||||
*/
|
|
||||||
unplug_it = 0;
|
|
||||||
|
|
||||||
if (q->ordseq == 0) {
|
|
||||||
list_add(&rq->queuelist, &q->queue_head);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
ordseq = blk_ordered_req_seq(rq);
|
|
||||||
|
|
||||||
list_for_each(pos, &q->queue_head) {
|
|
||||||
struct request *pos_rq = list_entry_rq(pos);
|
|
||||||
if (ordseq <= blk_ordered_req_seq(pos_rq))
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
list_add_tail(&rq->queuelist, pos);
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
default:
|
||||||
printk(KERN_ERR "%s: bad insertion point %d\n",
|
printk(KERN_ERR "%s: bad insertion point %d\n",
|
||||||
__func__, where);
|
__func__, where);
|
||||||
|
@ -716,26 +691,8 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
|
||||||
void __elv_add_request(struct request_queue *q, struct request *rq, int where,
|
void __elv_add_request(struct request_queue *q, struct request *rq, int where,
|
||||||
int plug)
|
int plug)
|
||||||
{
|
{
|
||||||
if (q->ordcolor)
|
|
||||||
rq->cmd_flags |= REQ_ORDERED_COLOR;
|
|
||||||
|
|
||||||
if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
|
if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
|
||||||
/*
|
/* barriers are scheduling boundary, update end_sector */
|
||||||
* toggle ordered color
|
|
||||||
*/
|
|
||||||
if (rq->cmd_flags & REQ_HARDBARRIER)
|
|
||||||
q->ordcolor ^= 1;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* barriers implicitly indicate back insertion
|
|
||||||
*/
|
|
||||||
if (where == ELEVATOR_INSERT_SORT)
|
|
||||||
where = ELEVATOR_INSERT_BACK;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* this request is scheduling boundary, update
|
|
||||||
* end_sector
|
|
||||||
*/
|
|
||||||
if (rq->cmd_type == REQ_TYPE_FS ||
|
if (rq->cmd_type == REQ_TYPE_FS ||
|
||||||
(rq->cmd_flags & REQ_DISCARD)) {
|
(rq->cmd_flags & REQ_DISCARD)) {
|
||||||
q->end_sector = rq_end_sector(rq);
|
q->end_sector = rq_end_sector(rq);
|
||||||
|
@ -855,24 +812,6 @@ void elv_completed_request(struct request_queue *q, struct request *rq)
|
||||||
e->ops->elevator_completed_req_fn)
|
e->ops->elevator_completed_req_fn)
|
||||||
e->ops->elevator_completed_req_fn(q, rq);
|
e->ops->elevator_completed_req_fn(q, rq);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Check if the queue is waiting for fs requests to be
|
|
||||||
* drained for flush sequence.
|
|
||||||
*/
|
|
||||||
if (unlikely(q->ordseq)) {
|
|
||||||
struct request *next = NULL;
|
|
||||||
|
|
||||||
if (!list_empty(&q->queue_head))
|
|
||||||
next = list_entry_rq(q->queue_head.next);
|
|
||||||
|
|
||||||
if (!queue_in_flight(q) &&
|
|
||||||
blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
|
|
||||||
(!next || blk_ordered_req_seq(next) > QUEUE_ORDSEQ_DRAIN)) {
|
|
||||||
blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
|
|
||||||
__blk_run_queue(q);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#define to_elv(atr) container_of((atr), struct elv_fs_entry, attr)
|
#define to_elv(atr) container_of((atr), struct elv_fs_entry, attr)
|
||||||
|
|
|
@ -116,7 +116,7 @@ static int blkdev_reread_part(struct block_device *bdev)
|
||||||
static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
|
static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
|
||||||
uint64_t len, int secure)
|
uint64_t len, int secure)
|
||||||
{
|
{
|
||||||
unsigned long flags = BLKDEV_IFL_WAIT;
|
unsigned long flags = 0;
|
||||||
|
|
||||||
if (start & 511)
|
if (start & 511)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
@ -128,7 +128,7 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
|
||||||
if (start + len > (bdev->bd_inode->i_size >> 9))
|
if (start + len > (bdev->bd_inode->i_size >> 9))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
if (secure)
|
if (secure)
|
||||||
flags |= BLKDEV_IFL_SECURE;
|
flags |= BLKDEV_DISCARD_SECURE;
|
||||||
return blkdev_issue_discard(bdev, start, len, GFP_KERNEL, flags);
|
return blkdev_issue_discard(bdev, start, len, GFP_KERNEL, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -483,7 +483,6 @@ static struct brd_device *brd_alloc(int i)
|
||||||
if (!brd->brd_queue)
|
if (!brd->brd_queue)
|
||||||
goto out_free_dev;
|
goto out_free_dev;
|
||||||
blk_queue_make_request(brd->brd_queue, brd_make_request);
|
blk_queue_make_request(brd->brd_queue, brd_make_request);
|
||||||
blk_queue_ordered(brd->brd_queue, QUEUE_ORDERED_TAG);
|
|
||||||
blk_queue_max_hw_sectors(brd->brd_queue, 1024);
|
blk_queue_max_hw_sectors(brd->brd_queue, 1024);
|
||||||
blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY);
|
blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY);
|
||||||
|
|
||||||
|
|
|
@ -2409,8 +2409,7 @@ static inline void drbd_md_flush(struct drbd_conf *mdev)
|
||||||
if (test_bit(MD_NO_BARRIER, &mdev->flags))
|
if (test_bit(MD_NO_BARRIER, &mdev->flags))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
r = blkdev_issue_flush(mdev->ldev->md_bdev, GFP_KERNEL, NULL,
|
r = blkdev_issue_flush(mdev->ldev->md_bdev, GFP_KERNEL, NULL);
|
||||||
BLKDEV_IFL_WAIT);
|
|
||||||
if (r) {
|
if (r) {
|
||||||
set_bit(MD_NO_BARRIER, &mdev->flags);
|
set_bit(MD_NO_BARRIER, &mdev->flags);
|
||||||
dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r);
|
dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r);
|
||||||
|
|
|
@ -987,7 +987,7 @@ static enum finish_epoch drbd_flush_after_epoch(struct drbd_conf *mdev, struct d
|
||||||
|
|
||||||
if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) {
|
if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) {
|
||||||
rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL,
|
rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL,
|
||||||
NULL, BLKDEV_IFL_WAIT);
|
NULL);
|
||||||
if (rv) {
|
if (rv) {
|
||||||
dev_err(DEV, "local disk flush failed with status %d\n", rv);
|
dev_err(DEV, "local disk flush failed with status %d\n", rv);
|
||||||
/* would rather check on EOPNOTSUPP, but that is not reliable.
|
/* would rather check on EOPNOTSUPP, but that is not reliable.
|
||||||
|
|
|
@ -479,17 +479,17 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
|
||||||
pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset;
|
pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset;
|
||||||
|
|
||||||
if (bio_rw(bio) == WRITE) {
|
if (bio_rw(bio) == WRITE) {
|
||||||
bool barrier = !!(bio->bi_rw & REQ_HARDBARRIER);
|
|
||||||
struct file *file = lo->lo_backing_file;
|
struct file *file = lo->lo_backing_file;
|
||||||
|
|
||||||
if (barrier) {
|
/* REQ_HARDBARRIER is deprecated */
|
||||||
if (unlikely(!file->f_op->fsync)) {
|
if (bio->bi_rw & REQ_HARDBARRIER) {
|
||||||
ret = -EOPNOTSUPP;
|
ret = -EOPNOTSUPP;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (bio->bi_rw & REQ_FLUSH) {
|
||||||
ret = vfs_fsync(file, 0);
|
ret = vfs_fsync(file, 0);
|
||||||
if (unlikely(ret)) {
|
if (unlikely(ret && ret != -EINVAL)) {
|
||||||
ret = -EIO;
|
ret = -EIO;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
@ -497,9 +497,9 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
|
||||||
|
|
||||||
ret = lo_send(lo, bio, pos);
|
ret = lo_send(lo, bio, pos);
|
||||||
|
|
||||||
if (barrier && !ret) {
|
if ((bio->bi_rw & REQ_FUA) && !ret) {
|
||||||
ret = vfs_fsync(file, 0);
|
ret = vfs_fsync(file, 0);
|
||||||
if (unlikely(ret))
|
if (unlikely(ret && ret != -EINVAL))
|
||||||
ret = -EIO;
|
ret = -EIO;
|
||||||
}
|
}
|
||||||
} else
|
} else
|
||||||
|
@ -931,7 +931,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
|
||||||
lo->lo_queue->unplug_fn = loop_unplug;
|
lo->lo_queue->unplug_fn = loop_unplug;
|
||||||
|
|
||||||
if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
|
if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
|
||||||
blk_queue_ordered(lo->lo_queue, QUEUE_ORDERED_DRAIN);
|
blk_queue_flush(lo->lo_queue, REQ_FLUSH);
|
||||||
|
|
||||||
set_capacity(lo->lo_disk, size);
|
set_capacity(lo->lo_disk, size);
|
||||||
bd_set_size(bdev, size << 9);
|
bd_set_size(bdev, size << 9);
|
||||||
|
|
|
@ -310,8 +310,7 @@ static void osdblk_rq_fn(struct request_queue *q)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* filter out block requests we don't understand */
|
/* filter out block requests we don't understand */
|
||||||
if (rq->cmd_type != REQ_TYPE_FS &&
|
if (rq->cmd_type != REQ_TYPE_FS) {
|
||||||
!(rq->cmd_flags & REQ_HARDBARRIER)) {
|
|
||||||
blk_end_request_all(rq, 0);
|
blk_end_request_all(rq, 0);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -439,7 +438,7 @@ static int osdblk_init_disk(struct osdblk_device *osdev)
|
||||||
blk_queue_stack_limits(q, osd_request_queue(osdev->osd));
|
blk_queue_stack_limits(q, osd_request_queue(osdev->osd));
|
||||||
|
|
||||||
blk_queue_prep_rq(q, blk_queue_start_tag);
|
blk_queue_prep_rq(q, blk_queue_start_tag);
|
||||||
blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH);
|
blk_queue_flush(q, REQ_FLUSH);
|
||||||
|
|
||||||
disk->queue = q;
|
disk->queue = q;
|
||||||
|
|
||||||
|
|
|
@ -753,7 +753,6 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
|
||||||
|
|
||||||
rq->timeout = 60*HZ;
|
rq->timeout = 60*HZ;
|
||||||
rq->cmd_type = REQ_TYPE_BLOCK_PC;
|
rq->cmd_type = REQ_TYPE_BLOCK_PC;
|
||||||
rq->cmd_flags |= REQ_HARDBARRIER;
|
|
||||||
if (cgc->quiet)
|
if (cgc->quiet)
|
||||||
rq->cmd_flags |= REQ_QUIET;
|
rq->cmd_flags |= REQ_QUIET;
|
||||||
|
|
||||||
|
|
|
@ -468,7 +468,7 @@ static int __devinit ps3disk_probe(struct ps3_system_bus_device *_dev)
|
||||||
blk_queue_dma_alignment(queue, dev->blk_size-1);
|
blk_queue_dma_alignment(queue, dev->blk_size-1);
|
||||||
blk_queue_logical_block_size(queue, dev->blk_size);
|
blk_queue_logical_block_size(queue, dev->blk_size);
|
||||||
|
|
||||||
blk_queue_ordered(queue, QUEUE_ORDERED_DRAIN_FLUSH);
|
blk_queue_flush(queue, REQ_FLUSH);
|
||||||
|
|
||||||
blk_queue_max_segments(queue, -1);
|
blk_queue_max_segments(queue, -1);
|
||||||
blk_queue_max_segment_size(queue, dev->bounce_size);
|
blk_queue_max_segment_size(queue, dev->bounce_size);
|
||||||
|
|
|
@ -127,9 +127,6 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vbr->req->cmd_flags & REQ_HARDBARRIER)
|
|
||||||
vbr->out_hdr.type |= VIRTIO_BLK_T_BARRIER;
|
|
||||||
|
|
||||||
sg_set_buf(&vblk->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
|
sg_set_buf(&vblk->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -379,31 +376,9 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
|
||||||
vblk->disk->driverfs_dev = &vdev->dev;
|
vblk->disk->driverfs_dev = &vdev->dev;
|
||||||
index++;
|
index++;
|
||||||
|
|
||||||
if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH)) {
|
/* configure queue flush support */
|
||||||
/*
|
if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH))
|
||||||
* If the FLUSH feature is supported we do have support for
|
blk_queue_flush(q, REQ_FLUSH);
|
||||||
* flushing a volatile write cache on the host. Use that
|
|
||||||
* to implement write barrier support.
|
|
||||||
*/
|
|
||||||
blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH);
|
|
||||||
} else if (virtio_has_feature(vdev, VIRTIO_BLK_F_BARRIER)) {
|
|
||||||
/*
|
|
||||||
* If the BARRIER feature is supported the host expects us
|
|
||||||
* to order request by tags. This implies there is not
|
|
||||||
* volatile write cache on the host, and that the host
|
|
||||||
* never re-orders outstanding I/O. This feature is not
|
|
||||||
* useful for real life scenarious and deprecated.
|
|
||||||
*/
|
|
||||||
blk_queue_ordered(q, QUEUE_ORDERED_TAG);
|
|
||||||
} else {
|
|
||||||
/*
|
|
||||||
* If the FLUSH feature is not supported we must assume that
|
|
||||||
* the host does not perform any kind of volatile write
|
|
||||||
* caching. We still need to drain the queue to provider
|
|
||||||
* proper barrier semantics.
|
|
||||||
*/
|
|
||||||
blk_queue_ordered(q, QUEUE_ORDERED_DRAIN);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* If disk is read-only in the host, the guest should obey */
|
/* If disk is read-only in the host, the guest should obey */
|
||||||
if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
|
if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
|
||||||
|
@ -522,9 +497,9 @@ static const struct virtio_device_id id_table[] = {
|
||||||
};
|
};
|
||||||
|
|
||||||
static unsigned int features[] = {
|
static unsigned int features[] = {
|
||||||
VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX,
|
VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
|
||||||
VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
|
VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI,
|
||||||
VIRTIO_BLK_F_SCSI, VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY
|
VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -96,7 +96,7 @@ struct blkfront_info
|
||||||
struct gnttab_free_callback callback;
|
struct gnttab_free_callback callback;
|
||||||
struct blk_shadow shadow[BLK_RING_SIZE];
|
struct blk_shadow shadow[BLK_RING_SIZE];
|
||||||
unsigned long shadow_free;
|
unsigned long shadow_free;
|
||||||
int feature_barrier;
|
unsigned int feature_flush;
|
||||||
int is_ready;
|
int is_ready;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -419,26 +419,12 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static int xlvbd_barrier(struct blkfront_info *info)
|
static void xlvbd_flush(struct blkfront_info *info)
|
||||||
{
|
{
|
||||||
int err;
|
blk_queue_flush(info->rq, info->feature_flush);
|
||||||
const char *barrier;
|
|
||||||
|
|
||||||
switch (info->feature_barrier) {
|
|
||||||
case QUEUE_ORDERED_DRAIN: barrier = "enabled (drain)"; break;
|
|
||||||
case QUEUE_ORDERED_TAG: barrier = "enabled (tag)"; break;
|
|
||||||
case QUEUE_ORDERED_NONE: barrier = "disabled"; break;
|
|
||||||
default: return -EINVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
err = blk_queue_ordered(info->rq, info->feature_barrier);
|
|
||||||
|
|
||||||
if (err)
|
|
||||||
return err;
|
|
||||||
|
|
||||||
printk(KERN_INFO "blkfront: %s: barriers %s\n",
|
printk(KERN_INFO "blkfront: %s: barriers %s\n",
|
||||||
info->gd->disk_name, barrier);
|
info->gd->disk_name,
|
||||||
return 0;
|
info->feature_flush ? "enabled" : "disabled");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -517,7 +503,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
|
||||||
info->rq = gd->queue;
|
info->rq = gd->queue;
|
||||||
info->gd = gd;
|
info->gd = gd;
|
||||||
|
|
||||||
xlvbd_barrier(info);
|
xlvbd_flush(info);
|
||||||
|
|
||||||
if (vdisk_info & VDISK_READONLY)
|
if (vdisk_info & VDISK_READONLY)
|
||||||
set_disk_ro(gd, 1);
|
set_disk_ro(gd, 1);
|
||||||
|
@ -663,8 +649,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
|
||||||
printk(KERN_WARNING "blkfront: %s: write barrier op failed\n",
|
printk(KERN_WARNING "blkfront: %s: write barrier op failed\n",
|
||||||
info->gd->disk_name);
|
info->gd->disk_name);
|
||||||
error = -EOPNOTSUPP;
|
error = -EOPNOTSUPP;
|
||||||
info->feature_barrier = QUEUE_ORDERED_NONE;
|
info->feature_flush = 0;
|
||||||
xlvbd_barrier(info);
|
xlvbd_flush(info);
|
||||||
}
|
}
|
||||||
/* fall through */
|
/* fall through */
|
||||||
case BLKIF_OP_READ:
|
case BLKIF_OP_READ:
|
||||||
|
@ -1077,20 +1063,20 @@ static void blkfront_connect(struct blkfront_info *info)
|
||||||
/*
|
/*
|
||||||
* If there's no "feature-barrier" defined, then it means
|
* If there's no "feature-barrier" defined, then it means
|
||||||
* we're dealing with a very old backend which writes
|
* we're dealing with a very old backend which writes
|
||||||
* synchronously; draining will do what needs to get done.
|
* synchronously; nothing to do.
|
||||||
*
|
*
|
||||||
* If there are barriers, then we can do full queued writes
|
* If there are barriers, then we use flush.
|
||||||
* with tagged barriers.
|
|
||||||
*
|
|
||||||
* If barriers are not supported, then there's no much we can
|
|
||||||
* do, so just set ordering to NONE.
|
|
||||||
*/
|
*/
|
||||||
if (err)
|
info->feature_flush = 0;
|
||||||
info->feature_barrier = QUEUE_ORDERED_DRAIN;
|
|
||||||
else if (barrier)
|
/*
|
||||||
info->feature_barrier = QUEUE_ORDERED_TAG;
|
* The driver doesn't properly handled empty flushes, so
|
||||||
else
|
* lets disable barrier support for now.
|
||||||
info->feature_barrier = QUEUE_ORDERED_NONE;
|
*/
|
||||||
|
#if 0
|
||||||
|
if (!err && barrier)
|
||||||
|
info->feature_flush = REQ_FLUSH;
|
||||||
|
#endif
|
||||||
|
|
||||||
err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
|
err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
|
||||||
if (err) {
|
if (err) {
|
||||||
|
|
|
@ -516,10 +516,10 @@ static int ide_do_setfeature(ide_drive_t *drive, u8 feature, u8 nsect)
|
||||||
return ide_no_data_taskfile(drive, &cmd);
|
return ide_no_data_taskfile(drive, &cmd);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void update_ordered(ide_drive_t *drive)
|
static void update_flush(ide_drive_t *drive)
|
||||||
{
|
{
|
||||||
u16 *id = drive->id;
|
u16 *id = drive->id;
|
||||||
unsigned ordered = QUEUE_ORDERED_NONE;
|
unsigned flush = 0;
|
||||||
|
|
||||||
if (drive->dev_flags & IDE_DFLAG_WCACHE) {
|
if (drive->dev_flags & IDE_DFLAG_WCACHE) {
|
||||||
unsigned long long capacity;
|
unsigned long long capacity;
|
||||||
|
@ -543,13 +543,12 @@ static void update_ordered(ide_drive_t *drive)
|
||||||
drive->name, barrier ? "" : "not ");
|
drive->name, barrier ? "" : "not ");
|
||||||
|
|
||||||
if (barrier) {
|
if (barrier) {
|
||||||
ordered = QUEUE_ORDERED_DRAIN_FLUSH;
|
flush = REQ_FLUSH;
|
||||||
blk_queue_prep_rq(drive->queue, idedisk_prep_fn);
|
blk_queue_prep_rq(drive->queue, idedisk_prep_fn);
|
||||||
}
|
}
|
||||||
} else
|
}
|
||||||
ordered = QUEUE_ORDERED_DRAIN;
|
|
||||||
|
|
||||||
blk_queue_ordered(drive->queue, ordered);
|
blk_queue_flush(drive->queue, flush);
|
||||||
}
|
}
|
||||||
|
|
||||||
ide_devset_get_flag(wcache, IDE_DFLAG_WCACHE);
|
ide_devset_get_flag(wcache, IDE_DFLAG_WCACHE);
|
||||||
|
@ -572,7 +571,7 @@ static int set_wcache(ide_drive_t *drive, int arg)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
update_ordered(drive);
|
update_flush(drive);
|
||||||
|
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
|
@ -441,19 +441,6 @@ void do_ide_request(struct request_queue *q)
|
||||||
struct request *rq = NULL;
|
struct request *rq = NULL;
|
||||||
ide_startstop_t startstop;
|
ide_startstop_t startstop;
|
||||||
|
|
||||||
/*
|
|
||||||
* drive is doing pre-flush, ordered write, post-flush sequence. even
|
|
||||||
* though that is 3 requests, it must be seen as a single transaction.
|
|
||||||
* we must not preempt this drive until that is complete
|
|
||||||
*/
|
|
||||||
if (blk_queue_flushing(q))
|
|
||||||
/*
|
|
||||||
* small race where queue could get replugged during
|
|
||||||
* the 3-request flush cycle, just yank the plug since
|
|
||||||
* we want it to finish asap
|
|
||||||
*/
|
|
||||||
blk_remove_plug(q);
|
|
||||||
|
|
||||||
spin_unlock_irq(q->queue_lock);
|
spin_unlock_irq(q->queue_lock);
|
||||||
|
|
||||||
/* HLD do_request() callback might sleep, make sure it's okay */
|
/* HLD do_request() callback might sleep, make sure it's okay */
|
||||||
|
|
|
@ -1278,7 +1278,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio,
|
||||||
struct dm_crypt_io *io;
|
struct dm_crypt_io *io;
|
||||||
struct crypt_config *cc;
|
struct crypt_config *cc;
|
||||||
|
|
||||||
if (unlikely(bio_empty_barrier(bio))) {
|
if (bio->bi_rw & REQ_FLUSH) {
|
||||||
cc = ti->private;
|
cc = ti->private;
|
||||||
bio->bi_bdev = cc->dev->bdev;
|
bio->bi_bdev = cc->dev->bdev;
|
||||||
return DM_MAPIO_REMAPPED;
|
return DM_MAPIO_REMAPPED;
|
||||||
|
|
|
@ -31,7 +31,6 @@ struct dm_io_client {
|
||||||
*/
|
*/
|
||||||
struct io {
|
struct io {
|
||||||
unsigned long error_bits;
|
unsigned long error_bits;
|
||||||
unsigned long eopnotsupp_bits;
|
|
||||||
atomic_t count;
|
atomic_t count;
|
||||||
struct task_struct *sleeper;
|
struct task_struct *sleeper;
|
||||||
struct dm_io_client *client;
|
struct dm_io_client *client;
|
||||||
|
@ -130,11 +129,8 @@ static void retrieve_io_and_region_from_bio(struct bio *bio, struct io **io,
|
||||||
*---------------------------------------------------------------*/
|
*---------------------------------------------------------------*/
|
||||||
static void dec_count(struct io *io, unsigned int region, int error)
|
static void dec_count(struct io *io, unsigned int region, int error)
|
||||||
{
|
{
|
||||||
if (error) {
|
if (error)
|
||||||
set_bit(region, &io->error_bits);
|
set_bit(region, &io->error_bits);
|
||||||
if (error == -EOPNOTSUPP)
|
|
||||||
set_bit(region, &io->eopnotsupp_bits);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (atomic_dec_and_test(&io->count)) {
|
if (atomic_dec_and_test(&io->count)) {
|
||||||
if (io->sleeper)
|
if (io->sleeper)
|
||||||
|
@ -310,8 +306,8 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
|
||||||
sector_t remaining = where->count;
|
sector_t remaining = where->count;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* where->count may be zero if rw holds a write barrier and we
|
* where->count may be zero if rw holds a flush and we need to
|
||||||
* need to send a zero-sized barrier.
|
* send a zero-sized flush.
|
||||||
*/
|
*/
|
||||||
do {
|
do {
|
||||||
/*
|
/*
|
||||||
|
@ -364,7 +360,7 @@ static void dispatch_io(int rw, unsigned int num_regions,
|
||||||
*/
|
*/
|
||||||
for (i = 0; i < num_regions; i++) {
|
for (i = 0; i < num_regions; i++) {
|
||||||
*dp = old_pages;
|
*dp = old_pages;
|
||||||
if (where[i].count || (rw & REQ_HARDBARRIER))
|
if (where[i].count || (rw & REQ_FLUSH))
|
||||||
do_region(rw, i, where + i, dp, io);
|
do_region(rw, i, where + i, dp, io);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -393,9 +389,7 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
|
||||||
return -EIO;
|
return -EIO;
|
||||||
}
|
}
|
||||||
|
|
||||||
retry:
|
|
||||||
io->error_bits = 0;
|
io->error_bits = 0;
|
||||||
io->eopnotsupp_bits = 0;
|
|
||||||
atomic_set(&io->count, 1); /* see dispatch_io() */
|
atomic_set(&io->count, 1); /* see dispatch_io() */
|
||||||
io->sleeper = current;
|
io->sleeper = current;
|
||||||
io->client = client;
|
io->client = client;
|
||||||
|
@ -412,11 +406,6 @@ retry:
|
||||||
}
|
}
|
||||||
set_current_state(TASK_RUNNING);
|
set_current_state(TASK_RUNNING);
|
||||||
|
|
||||||
if (io->eopnotsupp_bits && (rw & REQ_HARDBARRIER)) {
|
|
||||||
rw &= ~REQ_HARDBARRIER;
|
|
||||||
goto retry;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (error_bits)
|
if (error_bits)
|
||||||
*error_bits = io->error_bits;
|
*error_bits = io->error_bits;
|
||||||
|
|
||||||
|
@ -437,7 +426,6 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions,
|
||||||
|
|
||||||
io = mempool_alloc(client->pool, GFP_NOIO);
|
io = mempool_alloc(client->pool, GFP_NOIO);
|
||||||
io->error_bits = 0;
|
io->error_bits = 0;
|
||||||
io->eopnotsupp_bits = 0;
|
|
||||||
atomic_set(&io->count, 1); /* see dispatch_io() */
|
atomic_set(&io->count, 1); /* see dispatch_io() */
|
||||||
io->sleeper = NULL;
|
io->sleeper = NULL;
|
||||||
io->client = client;
|
io->client = client;
|
||||||
|
|
|
@ -300,7 +300,7 @@ static int flush_header(struct log_c *lc)
|
||||||
.count = 0,
|
.count = 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
lc->io_req.bi_rw = WRITE_BARRIER;
|
lc->io_req.bi_rw = WRITE_FLUSH;
|
||||||
|
|
||||||
return dm_io(&lc->io_req, 1, &null_location, NULL);
|
return dm_io(&lc->io_req, 1, &null_location, NULL);
|
||||||
}
|
}
|
||||||
|
|
|
@ -259,7 +259,7 @@ static int mirror_flush(struct dm_target *ti)
|
||||||
struct dm_io_region io[ms->nr_mirrors];
|
struct dm_io_region io[ms->nr_mirrors];
|
||||||
struct mirror *m;
|
struct mirror *m;
|
||||||
struct dm_io_request io_req = {
|
struct dm_io_request io_req = {
|
||||||
.bi_rw = WRITE_BARRIER,
|
.bi_rw = WRITE_FLUSH,
|
||||||
.mem.type = DM_IO_KMEM,
|
.mem.type = DM_IO_KMEM,
|
||||||
.mem.ptr.bvec = NULL,
|
.mem.ptr.bvec = NULL,
|
||||||
.client = ms->io_client,
|
.client = ms->io_client,
|
||||||
|
@ -629,7 +629,7 @@ static void do_write(struct mirror_set *ms, struct bio *bio)
|
||||||
struct dm_io_region io[ms->nr_mirrors], *dest = io;
|
struct dm_io_region io[ms->nr_mirrors], *dest = io;
|
||||||
struct mirror *m;
|
struct mirror *m;
|
||||||
struct dm_io_request io_req = {
|
struct dm_io_request io_req = {
|
||||||
.bi_rw = WRITE | (bio->bi_rw & WRITE_BARRIER),
|
.bi_rw = WRITE | (bio->bi_rw & WRITE_FLUSH_FUA),
|
||||||
.mem.type = DM_IO_BVEC,
|
.mem.type = DM_IO_BVEC,
|
||||||
.mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx,
|
.mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx,
|
||||||
.notify.fn = write_callback,
|
.notify.fn = write_callback,
|
||||||
|
@ -670,7 +670,7 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
|
||||||
bio_list_init(&requeue);
|
bio_list_init(&requeue);
|
||||||
|
|
||||||
while ((bio = bio_list_pop(writes))) {
|
while ((bio = bio_list_pop(writes))) {
|
||||||
if (unlikely(bio_empty_barrier(bio))) {
|
if (bio->bi_rw & REQ_FLUSH) {
|
||||||
bio_list_add(&sync, bio);
|
bio_list_add(&sync, bio);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -1203,7 +1203,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,
|
||||||
* We need to dec pending if this was a write.
|
* We need to dec pending if this was a write.
|
||||||
*/
|
*/
|
||||||
if (rw == WRITE) {
|
if (rw == WRITE) {
|
||||||
if (likely(!bio_empty_barrier(bio)))
|
if (!(bio->bi_rw & REQ_FLUSH))
|
||||||
dm_rh_dec(ms->rh, map_context->ll);
|
dm_rh_dec(ms->rh, map_context->ll);
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
|
@ -81,9 +81,9 @@ struct dm_region_hash {
|
||||||
struct list_head failed_recovered_regions;
|
struct list_head failed_recovered_regions;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If there was a barrier failure no regions can be marked clean.
|
* If there was a flush failure no regions can be marked clean.
|
||||||
*/
|
*/
|
||||||
int barrier_failure;
|
int flush_failure;
|
||||||
|
|
||||||
void *context;
|
void *context;
|
||||||
sector_t target_begin;
|
sector_t target_begin;
|
||||||
|
@ -217,7 +217,7 @@ struct dm_region_hash *dm_region_hash_create(
|
||||||
INIT_LIST_HEAD(&rh->quiesced_regions);
|
INIT_LIST_HEAD(&rh->quiesced_regions);
|
||||||
INIT_LIST_HEAD(&rh->recovered_regions);
|
INIT_LIST_HEAD(&rh->recovered_regions);
|
||||||
INIT_LIST_HEAD(&rh->failed_recovered_regions);
|
INIT_LIST_HEAD(&rh->failed_recovered_regions);
|
||||||
rh->barrier_failure = 0;
|
rh->flush_failure = 0;
|
||||||
|
|
||||||
rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS,
|
rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS,
|
||||||
sizeof(struct dm_region));
|
sizeof(struct dm_region));
|
||||||
|
@ -399,8 +399,8 @@ void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio)
|
||||||
region_t region = dm_rh_bio_to_region(rh, bio);
|
region_t region = dm_rh_bio_to_region(rh, bio);
|
||||||
int recovering = 0;
|
int recovering = 0;
|
||||||
|
|
||||||
if (bio_empty_barrier(bio)) {
|
if (bio->bi_rw & REQ_FLUSH) {
|
||||||
rh->barrier_failure = 1;
|
rh->flush_failure = 1;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -524,7 +524,7 @@ void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios)
|
||||||
struct bio *bio;
|
struct bio *bio;
|
||||||
|
|
||||||
for (bio = bios->head; bio; bio = bio->bi_next) {
|
for (bio = bios->head; bio; bio = bio->bi_next) {
|
||||||
if (bio_empty_barrier(bio))
|
if (bio->bi_rw & REQ_FLUSH)
|
||||||
continue;
|
continue;
|
||||||
rh_inc(rh, dm_rh_bio_to_region(rh, bio));
|
rh_inc(rh, dm_rh_bio_to_region(rh, bio));
|
||||||
}
|
}
|
||||||
|
@ -555,9 +555,9 @@ void dm_rh_dec(struct dm_region_hash *rh, region_t region)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* do nothing for DM_RH_NOSYNC */
|
/* do nothing for DM_RH_NOSYNC */
|
||||||
if (unlikely(rh->barrier_failure)) {
|
if (unlikely(rh->flush_failure)) {
|
||||||
/*
|
/*
|
||||||
* If a write barrier failed some time ago, we
|
* If a write flush failed some time ago, we
|
||||||
* don't know whether or not this write made it
|
* don't know whether or not this write made it
|
||||||
* to the disk, so we must resync the device.
|
* to the disk, so we must resync the device.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -687,7 +687,7 @@ static void persistent_commit_exception(struct dm_exception_store *store,
|
||||||
/*
|
/*
|
||||||
* Commit exceptions to disk.
|
* Commit exceptions to disk.
|
||||||
*/
|
*/
|
||||||
if (ps->valid && area_io(ps, WRITE_BARRIER))
|
if (ps->valid && area_io(ps, WRITE_FLUSH_FUA))
|
||||||
ps->valid = 0;
|
ps->valid = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -1585,7 +1585,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
|
||||||
chunk_t chunk;
|
chunk_t chunk;
|
||||||
struct dm_snap_pending_exception *pe = NULL;
|
struct dm_snap_pending_exception *pe = NULL;
|
||||||
|
|
||||||
if (unlikely(bio_empty_barrier(bio))) {
|
if (bio->bi_rw & REQ_FLUSH) {
|
||||||
bio->bi_bdev = s->cow->bdev;
|
bio->bi_bdev = s->cow->bdev;
|
||||||
return DM_MAPIO_REMAPPED;
|
return DM_MAPIO_REMAPPED;
|
||||||
}
|
}
|
||||||
|
@ -1689,7 +1689,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
|
||||||
int r = DM_MAPIO_REMAPPED;
|
int r = DM_MAPIO_REMAPPED;
|
||||||
chunk_t chunk;
|
chunk_t chunk;
|
||||||
|
|
||||||
if (unlikely(bio_empty_barrier(bio))) {
|
if (bio->bi_rw & REQ_FLUSH) {
|
||||||
if (!map_context->target_request_nr)
|
if (!map_context->target_request_nr)
|
||||||
bio->bi_bdev = s->origin->bdev;
|
bio->bi_bdev = s->origin->bdev;
|
||||||
else
|
else
|
||||||
|
@ -2133,7 +2133,7 @@ static int origin_map(struct dm_target *ti, struct bio *bio,
|
||||||
struct dm_dev *dev = ti->private;
|
struct dm_dev *dev = ti->private;
|
||||||
bio->bi_bdev = dev->bdev;
|
bio->bi_bdev = dev->bdev;
|
||||||
|
|
||||||
if (unlikely(bio_empty_barrier(bio)))
|
if (bio->bi_rw & REQ_FLUSH)
|
||||||
return DM_MAPIO_REMAPPED;
|
return DM_MAPIO_REMAPPED;
|
||||||
|
|
||||||
/* Only tell snapshots if this is a write */
|
/* Only tell snapshots if this is a write */
|
||||||
|
|
|
@ -271,7 +271,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio,
|
||||||
uint32_t stripe;
|
uint32_t stripe;
|
||||||
unsigned target_request_nr;
|
unsigned target_request_nr;
|
||||||
|
|
||||||
if (unlikely(bio_empty_barrier(bio))) {
|
if (bio->bi_rw & REQ_FLUSH) {
|
||||||
target_request_nr = map_context->target_request_nr;
|
target_request_nr = map_context->target_request_nr;
|
||||||
BUG_ON(target_request_nr >= sc->stripes);
|
BUG_ON(target_request_nr >= sc->stripes);
|
||||||
bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev;
|
bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev;
|
||||||
|
|
398
drivers/md/dm.c
398
drivers/md/dm.c
|
@ -110,7 +110,6 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo);
|
||||||
#define DMF_FREEING 3
|
#define DMF_FREEING 3
|
||||||
#define DMF_DELETING 4
|
#define DMF_DELETING 4
|
||||||
#define DMF_NOFLUSH_SUSPENDING 5
|
#define DMF_NOFLUSH_SUSPENDING 5
|
||||||
#define DMF_QUEUE_IO_TO_THREAD 6
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Work processed by per-device workqueue.
|
* Work processed by per-device workqueue.
|
||||||
|
@ -144,24 +143,9 @@ struct mapped_device {
|
||||||
spinlock_t deferred_lock;
|
spinlock_t deferred_lock;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* An error from the barrier request currently being processed.
|
* Processing queue (flush)
|
||||||
*/
|
|
||||||
int barrier_error;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Protect barrier_error from concurrent endio processing
|
|
||||||
* in request-based dm.
|
|
||||||
*/
|
|
||||||
spinlock_t barrier_error_lock;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Processing queue (flush/barriers)
|
|
||||||
*/
|
*/
|
||||||
struct workqueue_struct *wq;
|
struct workqueue_struct *wq;
|
||||||
struct work_struct barrier_work;
|
|
||||||
|
|
||||||
/* A pointer to the currently processing pre/post flush request */
|
|
||||||
struct request *flush_request;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The current mapping.
|
* The current mapping.
|
||||||
|
@ -200,8 +184,8 @@ struct mapped_device {
|
||||||
/* sysfs handle */
|
/* sysfs handle */
|
||||||
struct kobject kobj;
|
struct kobject kobj;
|
||||||
|
|
||||||
/* zero-length barrier that will be cloned and submitted to targets */
|
/* zero-length flush that will be cloned and submitted to targets */
|
||||||
struct bio barrier_bio;
|
struct bio flush_bio;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -512,7 +496,7 @@ static void end_io_acct(struct dm_io *io)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* After this is decremented the bio must not be touched if it is
|
* After this is decremented the bio must not be touched if it is
|
||||||
* a barrier.
|
* a flush.
|
||||||
*/
|
*/
|
||||||
dm_disk(md)->part0.in_flight[rw] = pending =
|
dm_disk(md)->part0.in_flight[rw] = pending =
|
||||||
atomic_dec_return(&md->pending[rw]);
|
atomic_dec_return(&md->pending[rw]);
|
||||||
|
@ -528,16 +512,12 @@ static void end_io_acct(struct dm_io *io)
|
||||||
*/
|
*/
|
||||||
static void queue_io(struct mapped_device *md, struct bio *bio)
|
static void queue_io(struct mapped_device *md, struct bio *bio)
|
||||||
{
|
{
|
||||||
down_write(&md->io_lock);
|
unsigned long flags;
|
||||||
|
|
||||||
spin_lock_irq(&md->deferred_lock);
|
spin_lock_irqsave(&md->deferred_lock, flags);
|
||||||
bio_list_add(&md->deferred, bio);
|
bio_list_add(&md->deferred, bio);
|
||||||
spin_unlock_irq(&md->deferred_lock);
|
spin_unlock_irqrestore(&md->deferred_lock, flags);
|
||||||
|
queue_work(md->wq, &md->work);
|
||||||
if (!test_and_set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags))
|
|
||||||
queue_work(md->wq, &md->work);
|
|
||||||
|
|
||||||
up_write(&md->io_lock);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -625,11 +605,9 @@ static void dec_pending(struct dm_io *io, int error)
|
||||||
* Target requested pushing back the I/O.
|
* Target requested pushing back the I/O.
|
||||||
*/
|
*/
|
||||||
spin_lock_irqsave(&md->deferred_lock, flags);
|
spin_lock_irqsave(&md->deferred_lock, flags);
|
||||||
if (__noflush_suspending(md)) {
|
if (__noflush_suspending(md))
|
||||||
if (!(io->bio->bi_rw & REQ_HARDBARRIER))
|
bio_list_add_head(&md->deferred, io->bio);
|
||||||
bio_list_add_head(&md->deferred,
|
else
|
||||||
io->bio);
|
|
||||||
} else
|
|
||||||
/* noflush suspend was interrupted. */
|
/* noflush suspend was interrupted. */
|
||||||
io->error = -EIO;
|
io->error = -EIO;
|
||||||
spin_unlock_irqrestore(&md->deferred_lock, flags);
|
spin_unlock_irqrestore(&md->deferred_lock, flags);
|
||||||
|
@ -637,32 +615,23 @@ static void dec_pending(struct dm_io *io, int error)
|
||||||
|
|
||||||
io_error = io->error;
|
io_error = io->error;
|
||||||
bio = io->bio;
|
bio = io->bio;
|
||||||
|
end_io_acct(io);
|
||||||
|
free_io(md, io);
|
||||||
|
|
||||||
if (bio->bi_rw & REQ_HARDBARRIER) {
|
if (io_error == DM_ENDIO_REQUEUE)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if ((bio->bi_rw & REQ_FLUSH) && bio->bi_size) {
|
||||||
/*
|
/*
|
||||||
* There can be just one barrier request so we use
|
* Preflush done for flush with data, reissue
|
||||||
* a per-device variable for error reporting.
|
* without REQ_FLUSH.
|
||||||
* Note that you can't touch the bio after end_io_acct
|
|
||||||
*
|
|
||||||
* We ignore -EOPNOTSUPP for empty flush reported by
|
|
||||||
* underlying devices. We assume that if the device
|
|
||||||
* doesn't support empty barriers, it doesn't need
|
|
||||||
* cache flushing commands.
|
|
||||||
*/
|
*/
|
||||||
if (!md->barrier_error &&
|
bio->bi_rw &= ~REQ_FLUSH;
|
||||||
!(bio_empty_barrier(bio) && io_error == -EOPNOTSUPP))
|
queue_io(md, bio);
|
||||||
md->barrier_error = io_error;
|
|
||||||
end_io_acct(io);
|
|
||||||
free_io(md, io);
|
|
||||||
} else {
|
} else {
|
||||||
end_io_acct(io);
|
/* done with normal IO or empty flush */
|
||||||
free_io(md, io);
|
trace_block_bio_complete(md->queue, bio);
|
||||||
|
bio_endio(bio, io_error);
|
||||||
if (io_error != DM_ENDIO_REQUEUE) {
|
|
||||||
trace_block_bio_complete(md->queue, bio);
|
|
||||||
|
|
||||||
bio_endio(bio, io_error);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -755,23 +724,6 @@ static void end_clone_bio(struct bio *clone, int error)
|
||||||
blk_update_request(tio->orig, 0, nr_bytes);
|
blk_update_request(tio->orig, 0, nr_bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void store_barrier_error(struct mapped_device *md, int error)
|
|
||||||
{
|
|
||||||
unsigned long flags;
|
|
||||||
|
|
||||||
spin_lock_irqsave(&md->barrier_error_lock, flags);
|
|
||||||
/*
|
|
||||||
* Basically, the first error is taken, but:
|
|
||||||
* -EOPNOTSUPP supersedes any I/O error.
|
|
||||||
* Requeue request supersedes any I/O error but -EOPNOTSUPP.
|
|
||||||
*/
|
|
||||||
if (!md->barrier_error || error == -EOPNOTSUPP ||
|
|
||||||
(md->barrier_error != -EOPNOTSUPP &&
|
|
||||||
error == DM_ENDIO_REQUEUE))
|
|
||||||
md->barrier_error = error;
|
|
||||||
spin_unlock_irqrestore(&md->barrier_error_lock, flags);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Don't touch any member of the md after calling this function because
|
* Don't touch any member of the md after calling this function because
|
||||||
* the md may be freed in dm_put() at the end of this function.
|
* the md may be freed in dm_put() at the end of this function.
|
||||||
|
@ -809,13 +761,11 @@ static void free_rq_clone(struct request *clone)
|
||||||
static void dm_end_request(struct request *clone, int error)
|
static void dm_end_request(struct request *clone, int error)
|
||||||
{
|
{
|
||||||
int rw = rq_data_dir(clone);
|
int rw = rq_data_dir(clone);
|
||||||
int run_queue = 1;
|
|
||||||
bool is_barrier = clone->cmd_flags & REQ_HARDBARRIER;
|
|
||||||
struct dm_rq_target_io *tio = clone->end_io_data;
|
struct dm_rq_target_io *tio = clone->end_io_data;
|
||||||
struct mapped_device *md = tio->md;
|
struct mapped_device *md = tio->md;
|
||||||
struct request *rq = tio->orig;
|
struct request *rq = tio->orig;
|
||||||
|
|
||||||
if (rq->cmd_type == REQ_TYPE_BLOCK_PC && !is_barrier) {
|
if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
|
||||||
rq->errors = clone->errors;
|
rq->errors = clone->errors;
|
||||||
rq->resid_len = clone->resid_len;
|
rq->resid_len = clone->resid_len;
|
||||||
|
|
||||||
|
@ -829,15 +779,8 @@ static void dm_end_request(struct request *clone, int error)
|
||||||
}
|
}
|
||||||
|
|
||||||
free_rq_clone(clone);
|
free_rq_clone(clone);
|
||||||
|
blk_end_request_all(rq, error);
|
||||||
if (unlikely(is_barrier)) {
|
rq_completed(md, rw, true);
|
||||||
if (unlikely(error))
|
|
||||||
store_barrier_error(md, error);
|
|
||||||
run_queue = 0;
|
|
||||||
} else
|
|
||||||
blk_end_request_all(rq, error);
|
|
||||||
|
|
||||||
rq_completed(md, rw, run_queue);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void dm_unprep_request(struct request *rq)
|
static void dm_unprep_request(struct request *rq)
|
||||||
|
@ -862,16 +805,6 @@ void dm_requeue_unmapped_request(struct request *clone)
|
||||||
struct request_queue *q = rq->q;
|
struct request_queue *q = rq->q;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) {
|
|
||||||
/*
|
|
||||||
* Barrier clones share an original request.
|
|
||||||
* Leave it to dm_end_request(), which handles this special
|
|
||||||
* case.
|
|
||||||
*/
|
|
||||||
dm_end_request(clone, DM_ENDIO_REQUEUE);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
dm_unprep_request(rq);
|
dm_unprep_request(rq);
|
||||||
|
|
||||||
spin_lock_irqsave(q->queue_lock, flags);
|
spin_lock_irqsave(q->queue_lock, flags);
|
||||||
|
@ -961,19 +894,6 @@ static void dm_complete_request(struct request *clone, int error)
|
||||||
struct dm_rq_target_io *tio = clone->end_io_data;
|
struct dm_rq_target_io *tio = clone->end_io_data;
|
||||||
struct request *rq = tio->orig;
|
struct request *rq = tio->orig;
|
||||||
|
|
||||||
if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) {
|
|
||||||
/*
|
|
||||||
* Barrier clones share an original request. So can't use
|
|
||||||
* softirq_done with the original.
|
|
||||||
* Pass the clone to dm_done() directly in this special case.
|
|
||||||
* It is safe (even if clone->q->queue_lock is held here)
|
|
||||||
* because there is no I/O dispatching during the completion
|
|
||||||
* of barrier clone.
|
|
||||||
*/
|
|
||||||
dm_done(clone, error, true);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
tio->error = error;
|
tio->error = error;
|
||||||
rq->completion_data = clone;
|
rq->completion_data = clone;
|
||||||
blk_complete_request(rq);
|
blk_complete_request(rq);
|
||||||
|
@ -990,17 +910,6 @@ void dm_kill_unmapped_request(struct request *clone, int error)
|
||||||
struct dm_rq_target_io *tio = clone->end_io_data;
|
struct dm_rq_target_io *tio = clone->end_io_data;
|
||||||
struct request *rq = tio->orig;
|
struct request *rq = tio->orig;
|
||||||
|
|
||||||
if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) {
|
|
||||||
/*
|
|
||||||
* Barrier clones share an original request.
|
|
||||||
* Leave it to dm_end_request(), which handles this special
|
|
||||||
* case.
|
|
||||||
*/
|
|
||||||
BUG_ON(error > 0);
|
|
||||||
dm_end_request(clone, error);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
rq->cmd_flags |= REQ_FAILED;
|
rq->cmd_flags |= REQ_FAILED;
|
||||||
dm_complete_request(clone, error);
|
dm_complete_request(clone, error);
|
||||||
}
|
}
|
||||||
|
@ -1119,7 +1028,7 @@ static void dm_bio_destructor(struct bio *bio)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Creates a little bio that is just does part of a bvec.
|
* Creates a little bio that just does part of a bvec.
|
||||||
*/
|
*/
|
||||||
static struct bio *split_bvec(struct bio *bio, sector_t sector,
|
static struct bio *split_bvec(struct bio *bio, sector_t sector,
|
||||||
unsigned short idx, unsigned int offset,
|
unsigned short idx, unsigned int offset,
|
||||||
|
@ -1134,7 +1043,7 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector,
|
||||||
|
|
||||||
clone->bi_sector = sector;
|
clone->bi_sector = sector;
|
||||||
clone->bi_bdev = bio->bi_bdev;
|
clone->bi_bdev = bio->bi_bdev;
|
||||||
clone->bi_rw = bio->bi_rw & ~REQ_HARDBARRIER;
|
clone->bi_rw = bio->bi_rw;
|
||||||
clone->bi_vcnt = 1;
|
clone->bi_vcnt = 1;
|
||||||
clone->bi_size = to_bytes(len);
|
clone->bi_size = to_bytes(len);
|
||||||
clone->bi_io_vec->bv_offset = offset;
|
clone->bi_io_vec->bv_offset = offset;
|
||||||
|
@ -1161,7 +1070,6 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector,
|
||||||
|
|
||||||
clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs);
|
clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs);
|
||||||
__bio_clone(clone, bio);
|
__bio_clone(clone, bio);
|
||||||
clone->bi_rw &= ~REQ_HARDBARRIER;
|
|
||||||
clone->bi_destructor = dm_bio_destructor;
|
clone->bi_destructor = dm_bio_destructor;
|
||||||
clone->bi_sector = sector;
|
clone->bi_sector = sector;
|
||||||
clone->bi_idx = idx;
|
clone->bi_idx = idx;
|
||||||
|
@ -1225,16 +1133,15 @@ static void __issue_target_requests(struct clone_info *ci, struct dm_target *ti,
|
||||||
__issue_target_request(ci, ti, request_nr, len);
|
__issue_target_request(ci, ti, request_nr, len);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __clone_and_map_empty_barrier(struct clone_info *ci)
|
static int __clone_and_map_empty_flush(struct clone_info *ci)
|
||||||
{
|
{
|
||||||
unsigned target_nr = 0;
|
unsigned target_nr = 0;
|
||||||
struct dm_target *ti;
|
struct dm_target *ti;
|
||||||
|
|
||||||
|
BUG_ON(bio_has_data(ci->bio));
|
||||||
while ((ti = dm_table_get_target(ci->map, target_nr++)))
|
while ((ti = dm_table_get_target(ci->map, target_nr++)))
|
||||||
__issue_target_requests(ci, ti, ti->num_flush_requests, 0);
|
__issue_target_requests(ci, ti, ti->num_flush_requests, 0);
|
||||||
|
|
||||||
ci->sector_count = 0;
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1289,9 +1196,6 @@ static int __clone_and_map(struct clone_info *ci)
|
||||||
sector_t len = 0, max;
|
sector_t len = 0, max;
|
||||||
struct dm_target_io *tio;
|
struct dm_target_io *tio;
|
||||||
|
|
||||||
if (unlikely(bio_empty_barrier(bio)))
|
|
||||||
return __clone_and_map_empty_barrier(ci);
|
|
||||||
|
|
||||||
if (unlikely(bio->bi_rw & REQ_DISCARD))
|
if (unlikely(bio->bi_rw & REQ_DISCARD))
|
||||||
return __clone_and_map_discard(ci);
|
return __clone_and_map_discard(ci);
|
||||||
|
|
||||||
|
@ -1383,16 +1287,11 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
|
||||||
|
|
||||||
ci.map = dm_get_live_table(md);
|
ci.map = dm_get_live_table(md);
|
||||||
if (unlikely(!ci.map)) {
|
if (unlikely(!ci.map)) {
|
||||||
if (!(bio->bi_rw & REQ_HARDBARRIER))
|
bio_io_error(bio);
|
||||||
bio_io_error(bio);
|
|
||||||
else
|
|
||||||
if (!md->barrier_error)
|
|
||||||
md->barrier_error = -EIO;
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
ci.md = md;
|
ci.md = md;
|
||||||
ci.bio = bio;
|
|
||||||
ci.io = alloc_io(md);
|
ci.io = alloc_io(md);
|
||||||
ci.io->error = 0;
|
ci.io->error = 0;
|
||||||
atomic_set(&ci.io->io_count, 1);
|
atomic_set(&ci.io->io_count, 1);
|
||||||
|
@ -1400,14 +1299,20 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
|
||||||
ci.io->md = md;
|
ci.io->md = md;
|
||||||
spin_lock_init(&ci.io->endio_lock);
|
spin_lock_init(&ci.io->endio_lock);
|
||||||
ci.sector = bio->bi_sector;
|
ci.sector = bio->bi_sector;
|
||||||
ci.sector_count = bio_sectors(bio);
|
|
||||||
if (unlikely(bio_empty_barrier(bio)))
|
|
||||||
ci.sector_count = 1;
|
|
||||||
ci.idx = bio->bi_idx;
|
ci.idx = bio->bi_idx;
|
||||||
|
|
||||||
start_io_acct(ci.io);
|
start_io_acct(ci.io);
|
||||||
while (ci.sector_count && !error)
|
if (bio->bi_rw & REQ_FLUSH) {
|
||||||
error = __clone_and_map(&ci);
|
ci.bio = &ci.md->flush_bio;
|
||||||
|
ci.sector_count = 0;
|
||||||
|
error = __clone_and_map_empty_flush(&ci);
|
||||||
|
/* dec_pending submits any data associated with flush */
|
||||||
|
} else {
|
||||||
|
ci.bio = bio;
|
||||||
|
ci.sector_count = bio_sectors(bio);
|
||||||
|
while (ci.sector_count && !error)
|
||||||
|
error = __clone_and_map(&ci);
|
||||||
|
}
|
||||||
|
|
||||||
/* drop the extra reference count */
|
/* drop the extra reference count */
|
||||||
dec_pending(ci.io, error);
|
dec_pending(ci.io, error);
|
||||||
|
@ -1491,22 +1396,14 @@ static int _dm_request(struct request_queue *q, struct bio *bio)
|
||||||
part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio));
|
part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio));
|
||||||
part_stat_unlock();
|
part_stat_unlock();
|
||||||
|
|
||||||
/*
|
/* if we're suspended, we have to queue this io for later */
|
||||||
* If we're suspended or the thread is processing barriers
|
if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) {
|
||||||
* we have to queue this io for later.
|
|
||||||
*/
|
|
||||||
if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) ||
|
|
||||||
unlikely(bio->bi_rw & REQ_HARDBARRIER)) {
|
|
||||||
up_read(&md->io_lock);
|
up_read(&md->io_lock);
|
||||||
|
|
||||||
if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) &&
|
if (bio_rw(bio) != READA)
|
||||||
bio_rw(bio) == READA) {
|
queue_io(md, bio);
|
||||||
|
else
|
||||||
bio_io_error(bio);
|
bio_io_error(bio);
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
queue_io(md, bio);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1537,14 +1434,6 @@ static int dm_request(struct request_queue *q, struct bio *bio)
|
||||||
return _dm_request(q, bio);
|
return _dm_request(q, bio);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool dm_rq_is_flush_request(struct request *rq)
|
|
||||||
{
|
|
||||||
if (rq->cmd_flags & REQ_FLUSH)
|
|
||||||
return true;
|
|
||||||
else
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
void dm_dispatch_request(struct request *rq)
|
void dm_dispatch_request(struct request *rq)
|
||||||
{
|
{
|
||||||
int r;
|
int r;
|
||||||
|
@ -1592,22 +1481,15 @@ static int setup_clone(struct request *clone, struct request *rq,
|
||||||
{
|
{
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
if (dm_rq_is_flush_request(rq)) {
|
r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC,
|
||||||
blk_rq_init(NULL, clone);
|
dm_rq_bio_constructor, tio);
|
||||||
clone->cmd_type = REQ_TYPE_FS;
|
if (r)
|
||||||
clone->cmd_flags |= (REQ_HARDBARRIER | WRITE);
|
return r;
|
||||||
} else {
|
|
||||||
r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC,
|
|
||||||
dm_rq_bio_constructor, tio);
|
|
||||||
if (r)
|
|
||||||
return r;
|
|
||||||
|
|
||||||
clone->cmd = rq->cmd;
|
|
||||||
clone->cmd_len = rq->cmd_len;
|
|
||||||
clone->sense = rq->sense;
|
|
||||||
clone->buffer = rq->buffer;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
clone->cmd = rq->cmd;
|
||||||
|
clone->cmd_len = rq->cmd_len;
|
||||||
|
clone->sense = rq->sense;
|
||||||
|
clone->buffer = rq->buffer;
|
||||||
clone->end_io = end_clone_request;
|
clone->end_io = end_clone_request;
|
||||||
clone->end_io_data = tio;
|
clone->end_io_data = tio;
|
||||||
|
|
||||||
|
@ -1648,9 +1530,6 @@ static int dm_prep_fn(struct request_queue *q, struct request *rq)
|
||||||
struct mapped_device *md = q->queuedata;
|
struct mapped_device *md = q->queuedata;
|
||||||
struct request *clone;
|
struct request *clone;
|
||||||
|
|
||||||
if (unlikely(dm_rq_is_flush_request(rq)))
|
|
||||||
return BLKPREP_OK;
|
|
||||||
|
|
||||||
if (unlikely(rq->special)) {
|
if (unlikely(rq->special)) {
|
||||||
DMWARN("Already has something in rq->special.");
|
DMWARN("Already has something in rq->special.");
|
||||||
return BLKPREP_KILL;
|
return BLKPREP_KILL;
|
||||||
|
@ -1727,6 +1606,7 @@ static void dm_request_fn(struct request_queue *q)
|
||||||
struct dm_table *map = dm_get_live_table(md);
|
struct dm_table *map = dm_get_live_table(md);
|
||||||
struct dm_target *ti;
|
struct dm_target *ti;
|
||||||
struct request *rq, *clone;
|
struct request *rq, *clone;
|
||||||
|
sector_t pos;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For suspend, check blk_queue_stopped() and increment
|
* For suspend, check blk_queue_stopped() and increment
|
||||||
|
@ -1739,15 +1619,14 @@ static void dm_request_fn(struct request_queue *q)
|
||||||
if (!rq)
|
if (!rq)
|
||||||
goto plug_and_out;
|
goto plug_and_out;
|
||||||
|
|
||||||
if (unlikely(dm_rq_is_flush_request(rq))) {
|
/* always use block 0 to find the target for flushes for now */
|
||||||
BUG_ON(md->flush_request);
|
pos = 0;
|
||||||
md->flush_request = rq;
|
if (!(rq->cmd_flags & REQ_FLUSH))
|
||||||
blk_start_request(rq);
|
pos = blk_rq_pos(rq);
|
||||||
queue_work(md->wq, &md->barrier_work);
|
|
||||||
goto out;
|
ti = dm_table_find_target(map, pos);
|
||||||
}
|
BUG_ON(!dm_target_is_valid(ti));
|
||||||
|
|
||||||
ti = dm_table_find_target(map, blk_rq_pos(rq));
|
|
||||||
if (ti->type->busy && ti->type->busy(ti))
|
if (ti->type->busy && ti->type->busy(ti))
|
||||||
goto plug_and_out;
|
goto plug_and_out;
|
||||||
|
|
||||||
|
@ -1918,7 +1797,6 @@ out:
|
||||||
static const struct block_device_operations dm_blk_dops;
|
static const struct block_device_operations dm_blk_dops;
|
||||||
|
|
||||||
static void dm_wq_work(struct work_struct *work);
|
static void dm_wq_work(struct work_struct *work);
|
||||||
static void dm_rq_barrier_work(struct work_struct *work);
|
|
||||||
|
|
||||||
static void dm_init_md_queue(struct mapped_device *md)
|
static void dm_init_md_queue(struct mapped_device *md)
|
||||||
{
|
{
|
||||||
|
@ -1940,6 +1818,7 @@ static void dm_init_md_queue(struct mapped_device *md)
|
||||||
blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
|
blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
|
||||||
md->queue->unplug_fn = dm_unplug_all;
|
md->queue->unplug_fn = dm_unplug_all;
|
||||||
blk_queue_merge_bvec(md->queue, dm_merge_bvec);
|
blk_queue_merge_bvec(md->queue, dm_merge_bvec);
|
||||||
|
blk_queue_flush(md->queue, REQ_FLUSH | REQ_FUA);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1972,7 +1851,6 @@ static struct mapped_device *alloc_dev(int minor)
|
||||||
mutex_init(&md->suspend_lock);
|
mutex_init(&md->suspend_lock);
|
||||||
mutex_init(&md->type_lock);
|
mutex_init(&md->type_lock);
|
||||||
spin_lock_init(&md->deferred_lock);
|
spin_lock_init(&md->deferred_lock);
|
||||||
spin_lock_init(&md->barrier_error_lock);
|
|
||||||
rwlock_init(&md->map_lock);
|
rwlock_init(&md->map_lock);
|
||||||
atomic_set(&md->holders, 1);
|
atomic_set(&md->holders, 1);
|
||||||
atomic_set(&md->open_count, 0);
|
atomic_set(&md->open_count, 0);
|
||||||
|
@ -1995,7 +1873,6 @@ static struct mapped_device *alloc_dev(int minor)
|
||||||
atomic_set(&md->pending[1], 0);
|
atomic_set(&md->pending[1], 0);
|
||||||
init_waitqueue_head(&md->wait);
|
init_waitqueue_head(&md->wait);
|
||||||
INIT_WORK(&md->work, dm_wq_work);
|
INIT_WORK(&md->work, dm_wq_work);
|
||||||
INIT_WORK(&md->barrier_work, dm_rq_barrier_work);
|
|
||||||
init_waitqueue_head(&md->eventq);
|
init_waitqueue_head(&md->eventq);
|
||||||
|
|
||||||
md->disk->major = _major;
|
md->disk->major = _major;
|
||||||
|
@ -2015,6 +1892,10 @@ static struct mapped_device *alloc_dev(int minor)
|
||||||
if (!md->bdev)
|
if (!md->bdev)
|
||||||
goto bad_bdev;
|
goto bad_bdev;
|
||||||
|
|
||||||
|
bio_init(&md->flush_bio);
|
||||||
|
md->flush_bio.bi_bdev = md->bdev;
|
||||||
|
md->flush_bio.bi_rw = WRITE_FLUSH;
|
||||||
|
|
||||||
/* Populate the mapping, nobody knows we exist yet */
|
/* Populate the mapping, nobody knows we exist yet */
|
||||||
spin_lock(&_minor_lock);
|
spin_lock(&_minor_lock);
|
||||||
old_md = idr_replace(&_minor_idr, md, minor);
|
old_md = idr_replace(&_minor_idr, md, minor);
|
||||||
|
@ -2245,7 +2126,6 @@ static int dm_init_request_based_queue(struct mapped_device *md)
|
||||||
blk_queue_softirq_done(md->queue, dm_softirq_done);
|
blk_queue_softirq_done(md->queue, dm_softirq_done);
|
||||||
blk_queue_prep_rq(md->queue, dm_prep_fn);
|
blk_queue_prep_rq(md->queue, dm_prep_fn);
|
||||||
blk_queue_lld_busy(md->queue, dm_lld_busy);
|
blk_queue_lld_busy(md->queue, dm_lld_busy);
|
||||||
blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN_FLUSH);
|
|
||||||
|
|
||||||
elv_register_queue(md->queue);
|
elv_register_queue(md->queue);
|
||||||
|
|
||||||
|
@ -2406,43 +2286,6 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void dm_flush(struct mapped_device *md)
|
|
||||||
{
|
|
||||||
dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
|
|
||||||
|
|
||||||
bio_init(&md->barrier_bio);
|
|
||||||
md->barrier_bio.bi_bdev = md->bdev;
|
|
||||||
md->barrier_bio.bi_rw = WRITE_BARRIER;
|
|
||||||
__split_and_process_bio(md, &md->barrier_bio);
|
|
||||||
|
|
||||||
dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void process_barrier(struct mapped_device *md, struct bio *bio)
|
|
||||||
{
|
|
||||||
md->barrier_error = 0;
|
|
||||||
|
|
||||||
dm_flush(md);
|
|
||||||
|
|
||||||
if (!bio_empty_barrier(bio)) {
|
|
||||||
__split_and_process_bio(md, bio);
|
|
||||||
/*
|
|
||||||
* If the request isn't supported, don't waste time with
|
|
||||||
* the second flush.
|
|
||||||
*/
|
|
||||||
if (md->barrier_error != -EOPNOTSUPP)
|
|
||||||
dm_flush(md);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (md->barrier_error != DM_ENDIO_REQUEUE)
|
|
||||||
bio_endio(bio, md->barrier_error);
|
|
||||||
else {
|
|
||||||
spin_lock_irq(&md->deferred_lock);
|
|
||||||
bio_list_add_head(&md->deferred, bio);
|
|
||||||
spin_unlock_irq(&md->deferred_lock);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Process the deferred bios
|
* Process the deferred bios
|
||||||
*/
|
*/
|
||||||
|
@ -2452,33 +2295,27 @@ static void dm_wq_work(struct work_struct *work)
|
||||||
work);
|
work);
|
||||||
struct bio *c;
|
struct bio *c;
|
||||||
|
|
||||||
down_write(&md->io_lock);
|
down_read(&md->io_lock);
|
||||||
|
|
||||||
while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
|
while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
|
||||||
spin_lock_irq(&md->deferred_lock);
|
spin_lock_irq(&md->deferred_lock);
|
||||||
c = bio_list_pop(&md->deferred);
|
c = bio_list_pop(&md->deferred);
|
||||||
spin_unlock_irq(&md->deferred_lock);
|
spin_unlock_irq(&md->deferred_lock);
|
||||||
|
|
||||||
if (!c) {
|
if (!c)
|
||||||
clear_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags);
|
|
||||||
break;
|
break;
|
||||||
}
|
|
||||||
|
|
||||||
up_write(&md->io_lock);
|
up_read(&md->io_lock);
|
||||||
|
|
||||||
if (dm_request_based(md))
|
if (dm_request_based(md))
|
||||||
generic_make_request(c);
|
generic_make_request(c);
|
||||||
else {
|
else
|
||||||
if (c->bi_rw & REQ_HARDBARRIER)
|
__split_and_process_bio(md, c);
|
||||||
process_barrier(md, c);
|
|
||||||
else
|
|
||||||
__split_and_process_bio(md, c);
|
|
||||||
}
|
|
||||||
|
|
||||||
down_write(&md->io_lock);
|
down_read(&md->io_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
up_write(&md->io_lock);
|
up_read(&md->io_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void dm_queue_flush(struct mapped_device *md)
|
static void dm_queue_flush(struct mapped_device *md)
|
||||||
|
@ -2488,73 +2325,6 @@ static void dm_queue_flush(struct mapped_device *md)
|
||||||
queue_work(md->wq, &md->work);
|
queue_work(md->wq, &md->work);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void dm_rq_set_target_request_nr(struct request *clone, unsigned request_nr)
|
|
||||||
{
|
|
||||||
struct dm_rq_target_io *tio = clone->end_io_data;
|
|
||||||
|
|
||||||
tio->info.target_request_nr = request_nr;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Issue barrier requests to targets and wait for their completion. */
|
|
||||||
static int dm_rq_barrier(struct mapped_device *md)
|
|
||||||
{
|
|
||||||
int i, j;
|
|
||||||
struct dm_table *map = dm_get_live_table(md);
|
|
||||||
unsigned num_targets = dm_table_get_num_targets(map);
|
|
||||||
struct dm_target *ti;
|
|
||||||
struct request *clone;
|
|
||||||
|
|
||||||
md->barrier_error = 0;
|
|
||||||
|
|
||||||
for (i = 0; i < num_targets; i++) {
|
|
||||||
ti = dm_table_get_target(map, i);
|
|
||||||
for (j = 0; j < ti->num_flush_requests; j++) {
|
|
||||||
clone = clone_rq(md->flush_request, md, GFP_NOIO);
|
|
||||||
dm_rq_set_target_request_nr(clone, j);
|
|
||||||
atomic_inc(&md->pending[rq_data_dir(clone)]);
|
|
||||||
map_request(ti, clone, md);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
|
|
||||||
dm_table_put(map);
|
|
||||||
|
|
||||||
return md->barrier_error;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void dm_rq_barrier_work(struct work_struct *work)
|
|
||||||
{
|
|
||||||
int error;
|
|
||||||
struct mapped_device *md = container_of(work, struct mapped_device,
|
|
||||||
barrier_work);
|
|
||||||
struct request_queue *q = md->queue;
|
|
||||||
struct request *rq;
|
|
||||||
unsigned long flags;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Hold the md reference here and leave it at the last part so that
|
|
||||||
* the md can't be deleted by device opener when the barrier request
|
|
||||||
* completes.
|
|
||||||
*/
|
|
||||||
dm_get(md);
|
|
||||||
|
|
||||||
error = dm_rq_barrier(md);
|
|
||||||
|
|
||||||
rq = md->flush_request;
|
|
||||||
md->flush_request = NULL;
|
|
||||||
|
|
||||||
if (error == DM_ENDIO_REQUEUE) {
|
|
||||||
spin_lock_irqsave(q->queue_lock, flags);
|
|
||||||
blk_requeue_request(q, rq);
|
|
||||||
spin_unlock_irqrestore(q->queue_lock, flags);
|
|
||||||
} else
|
|
||||||
blk_end_request_all(rq, error);
|
|
||||||
|
|
||||||
blk_run_queue(q);
|
|
||||||
|
|
||||||
dm_put(md);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Swap in a new table, returning the old one for the caller to destroy.
|
* Swap in a new table, returning the old one for the caller to destroy.
|
||||||
*/
|
*/
|
||||||
|
@ -2677,23 +2447,17 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
|
||||||
*
|
*
|
||||||
* To get all processes out of __split_and_process_bio in dm_request,
|
* To get all processes out of __split_and_process_bio in dm_request,
|
||||||
* we take the write lock. To prevent any process from reentering
|
* we take the write lock. To prevent any process from reentering
|
||||||
* __split_and_process_bio from dm_request, we set
|
* __split_and_process_bio from dm_request and quiesce the thread
|
||||||
* DMF_QUEUE_IO_TO_THREAD.
|
* (dm_wq_work), we set BMF_BLOCK_IO_FOR_SUSPEND and call
|
||||||
*
|
* flush_workqueue(md->wq).
|
||||||
* To quiesce the thread (dm_wq_work), we set DMF_BLOCK_IO_FOR_SUSPEND
|
|
||||||
* and call flush_workqueue(md->wq). flush_workqueue will wait until
|
|
||||||
* dm_wq_work exits and DMF_BLOCK_IO_FOR_SUSPEND will prevent any
|
|
||||||
* further calls to __split_and_process_bio from dm_wq_work.
|
|
||||||
*/
|
*/
|
||||||
down_write(&md->io_lock);
|
down_write(&md->io_lock);
|
||||||
set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
|
set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
|
||||||
set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags);
|
|
||||||
up_write(&md->io_lock);
|
up_write(&md->io_lock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Request-based dm uses md->wq for barrier (dm_rq_barrier_work) which
|
* Stop md->queue before flushing md->wq in case request-based
|
||||||
* can be kicked until md->queue is stopped. So stop md->queue before
|
* dm defers requests to md->wq from md->queue.
|
||||||
* flushing md->wq.
|
|
||||||
*/
|
*/
|
||||||
if (dm_request_based(md))
|
if (dm_request_based(md))
|
||||||
stop_queue(md->queue);
|
stop_queue(md->queue);
|
||||||
|
|
|
@ -294,8 +294,8 @@ static int linear_make_request (mddev_t *mddev, struct bio *bio)
|
||||||
dev_info_t *tmp_dev;
|
dev_info_t *tmp_dev;
|
||||||
sector_t start_sector;
|
sector_t start_sector;
|
||||||
|
|
||||||
if (unlikely(bio->bi_rw & REQ_HARDBARRIER)) {
|
if (unlikely(bio->bi_rw & REQ_FLUSH)) {
|
||||||
md_barrier_request(mddev, bio);
|
md_flush_request(mddev, bio);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
117
drivers/md/md.c
117
drivers/md/md.c
|
@ -227,12 +227,12 @@ static int md_make_request(struct request_queue *q, struct bio *bio)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
if (mddev->suspended || mddev->barrier) {
|
if (mddev->suspended) {
|
||||||
DEFINE_WAIT(__wait);
|
DEFINE_WAIT(__wait);
|
||||||
for (;;) {
|
for (;;) {
|
||||||
prepare_to_wait(&mddev->sb_wait, &__wait,
|
prepare_to_wait(&mddev->sb_wait, &__wait,
|
||||||
TASK_UNINTERRUPTIBLE);
|
TASK_UNINTERRUPTIBLE);
|
||||||
if (!mddev->suspended && !mddev->barrier)
|
if (!mddev->suspended)
|
||||||
break;
|
break;
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
schedule();
|
schedule();
|
||||||
|
@ -283,40 +283,29 @@ EXPORT_SYMBOL_GPL(mddev_resume);
|
||||||
|
|
||||||
int mddev_congested(mddev_t *mddev, int bits)
|
int mddev_congested(mddev_t *mddev, int bits)
|
||||||
{
|
{
|
||||||
if (mddev->barrier)
|
|
||||||
return 1;
|
|
||||||
return mddev->suspended;
|
return mddev->suspended;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(mddev_congested);
|
EXPORT_SYMBOL(mddev_congested);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Generic barrier handling for md
|
* Generic flush handling for md
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define POST_REQUEST_BARRIER ((void*)1)
|
static void md_end_flush(struct bio *bio, int err)
|
||||||
|
|
||||||
static void md_end_barrier(struct bio *bio, int err)
|
|
||||||
{
|
{
|
||||||
mdk_rdev_t *rdev = bio->bi_private;
|
mdk_rdev_t *rdev = bio->bi_private;
|
||||||
mddev_t *mddev = rdev->mddev;
|
mddev_t *mddev = rdev->mddev;
|
||||||
if (err == -EOPNOTSUPP && mddev->barrier != POST_REQUEST_BARRIER)
|
|
||||||
set_bit(BIO_EOPNOTSUPP, &mddev->barrier->bi_flags);
|
|
||||||
|
|
||||||
rdev_dec_pending(rdev, mddev);
|
rdev_dec_pending(rdev, mddev);
|
||||||
|
|
||||||
if (atomic_dec_and_test(&mddev->flush_pending)) {
|
if (atomic_dec_and_test(&mddev->flush_pending)) {
|
||||||
if (mddev->barrier == POST_REQUEST_BARRIER) {
|
/* The pre-request flush has finished */
|
||||||
/* This was a post-request barrier */
|
schedule_work(&mddev->flush_work);
|
||||||
mddev->barrier = NULL;
|
|
||||||
wake_up(&mddev->sb_wait);
|
|
||||||
} else
|
|
||||||
/* The pre-request barrier has finished */
|
|
||||||
schedule_work(&mddev->barrier_work);
|
|
||||||
}
|
}
|
||||||
bio_put(bio);
|
bio_put(bio);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void submit_barriers(mddev_t *mddev)
|
static void submit_flushes(mddev_t *mddev)
|
||||||
{
|
{
|
||||||
mdk_rdev_t *rdev;
|
mdk_rdev_t *rdev;
|
||||||
|
|
||||||
|
@ -333,60 +322,56 @@ static void submit_barriers(mddev_t *mddev)
|
||||||
atomic_inc(&rdev->nr_pending);
|
atomic_inc(&rdev->nr_pending);
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
bi = bio_alloc(GFP_KERNEL, 0);
|
bi = bio_alloc(GFP_KERNEL, 0);
|
||||||
bi->bi_end_io = md_end_barrier;
|
bi->bi_end_io = md_end_flush;
|
||||||
bi->bi_private = rdev;
|
bi->bi_private = rdev;
|
||||||
bi->bi_bdev = rdev->bdev;
|
bi->bi_bdev = rdev->bdev;
|
||||||
atomic_inc(&mddev->flush_pending);
|
atomic_inc(&mddev->flush_pending);
|
||||||
submit_bio(WRITE_BARRIER, bi);
|
submit_bio(WRITE_FLUSH, bi);
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
rdev_dec_pending(rdev, mddev);
|
rdev_dec_pending(rdev, mddev);
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void md_submit_barrier(struct work_struct *ws)
|
static void md_submit_flush_data(struct work_struct *ws)
|
||||||
{
|
{
|
||||||
mddev_t *mddev = container_of(ws, mddev_t, barrier_work);
|
mddev_t *mddev = container_of(ws, mddev_t, flush_work);
|
||||||
struct bio *bio = mddev->barrier;
|
struct bio *bio = mddev->flush_bio;
|
||||||
|
|
||||||
atomic_set(&mddev->flush_pending, 1);
|
atomic_set(&mddev->flush_pending, 1);
|
||||||
|
|
||||||
if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags))
|
if (bio->bi_size == 0)
|
||||||
bio_endio(bio, -EOPNOTSUPP);
|
|
||||||
else if (bio->bi_size == 0)
|
|
||||||
/* an empty barrier - all done */
|
/* an empty barrier - all done */
|
||||||
bio_endio(bio, 0);
|
bio_endio(bio, 0);
|
||||||
else {
|
else {
|
||||||
bio->bi_rw &= ~REQ_HARDBARRIER;
|
bio->bi_rw &= ~REQ_FLUSH;
|
||||||
if (mddev->pers->make_request(mddev, bio))
|
if (mddev->pers->make_request(mddev, bio))
|
||||||
generic_make_request(bio);
|
generic_make_request(bio);
|
||||||
mddev->barrier = POST_REQUEST_BARRIER;
|
|
||||||
submit_barriers(mddev);
|
|
||||||
}
|
}
|
||||||
if (atomic_dec_and_test(&mddev->flush_pending)) {
|
if (atomic_dec_and_test(&mddev->flush_pending)) {
|
||||||
mddev->barrier = NULL;
|
mddev->flush_bio = NULL;
|
||||||
wake_up(&mddev->sb_wait);
|
wake_up(&mddev->sb_wait);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void md_barrier_request(mddev_t *mddev, struct bio *bio)
|
void md_flush_request(mddev_t *mddev, struct bio *bio)
|
||||||
{
|
{
|
||||||
spin_lock_irq(&mddev->write_lock);
|
spin_lock_irq(&mddev->write_lock);
|
||||||
wait_event_lock_irq(mddev->sb_wait,
|
wait_event_lock_irq(mddev->sb_wait,
|
||||||
!mddev->barrier,
|
!mddev->flush_bio,
|
||||||
mddev->write_lock, /*nothing*/);
|
mddev->write_lock, /*nothing*/);
|
||||||
mddev->barrier = bio;
|
mddev->flush_bio = bio;
|
||||||
spin_unlock_irq(&mddev->write_lock);
|
spin_unlock_irq(&mddev->write_lock);
|
||||||
|
|
||||||
atomic_set(&mddev->flush_pending, 1);
|
atomic_set(&mddev->flush_pending, 1);
|
||||||
INIT_WORK(&mddev->barrier_work, md_submit_barrier);
|
INIT_WORK(&mddev->flush_work, md_submit_flush_data);
|
||||||
|
|
||||||
submit_barriers(mddev);
|
submit_flushes(mddev);
|
||||||
|
|
||||||
if (atomic_dec_and_test(&mddev->flush_pending))
|
if (atomic_dec_and_test(&mddev->flush_pending))
|
||||||
schedule_work(&mddev->barrier_work);
|
schedule_work(&mddev->flush_work);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(md_barrier_request);
|
EXPORT_SYMBOL(md_flush_request);
|
||||||
|
|
||||||
/* Support for plugging.
|
/* Support for plugging.
|
||||||
* This mirrors the plugging support in request_queue, but does not
|
* This mirrors the plugging support in request_queue, but does not
|
||||||
|
@ -697,31 +682,6 @@ static void super_written(struct bio *bio, int error)
|
||||||
bio_put(bio);
|
bio_put(bio);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void super_written_barrier(struct bio *bio, int error)
|
|
||||||
{
|
|
||||||
struct bio *bio2 = bio->bi_private;
|
|
||||||
mdk_rdev_t *rdev = bio2->bi_private;
|
|
||||||
mddev_t *mddev = rdev->mddev;
|
|
||||||
|
|
||||||
if (!test_bit(BIO_UPTODATE, &bio->bi_flags) &&
|
|
||||||
error == -EOPNOTSUPP) {
|
|
||||||
unsigned long flags;
|
|
||||||
/* barriers don't appear to be supported :-( */
|
|
||||||
set_bit(BarriersNotsupp, &rdev->flags);
|
|
||||||
mddev->barriers_work = 0;
|
|
||||||
spin_lock_irqsave(&mddev->write_lock, flags);
|
|
||||||
bio2->bi_next = mddev->biolist;
|
|
||||||
mddev->biolist = bio2;
|
|
||||||
spin_unlock_irqrestore(&mddev->write_lock, flags);
|
|
||||||
wake_up(&mddev->sb_wait);
|
|
||||||
bio_put(bio);
|
|
||||||
} else {
|
|
||||||
bio_put(bio2);
|
|
||||||
bio->bi_private = rdev;
|
|
||||||
super_written(bio, error);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
|
void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
|
||||||
sector_t sector, int size, struct page *page)
|
sector_t sector, int size, struct page *page)
|
||||||
{
|
{
|
||||||
|
@ -730,51 +690,28 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
|
||||||
* and decrement it on completion, waking up sb_wait
|
* and decrement it on completion, waking up sb_wait
|
||||||
* if zero is reached.
|
* if zero is reached.
|
||||||
* If an error occurred, call md_error
|
* If an error occurred, call md_error
|
||||||
*
|
|
||||||
* As we might need to resubmit the request if REQ_HARDBARRIER
|
|
||||||
* causes ENOTSUPP, we allocate a spare bio...
|
|
||||||
*/
|
*/
|
||||||
struct bio *bio = bio_alloc(GFP_NOIO, 1);
|
struct bio *bio = bio_alloc(GFP_NOIO, 1);
|
||||||
int rw = REQ_WRITE | REQ_SYNC | REQ_UNPLUG;
|
|
||||||
|
|
||||||
bio->bi_bdev = rdev->bdev;
|
bio->bi_bdev = rdev->bdev;
|
||||||
bio->bi_sector = sector;
|
bio->bi_sector = sector;
|
||||||
bio_add_page(bio, page, size, 0);
|
bio_add_page(bio, page, size, 0);
|
||||||
bio->bi_private = rdev;
|
bio->bi_private = rdev;
|
||||||
bio->bi_end_io = super_written;
|
bio->bi_end_io = super_written;
|
||||||
bio->bi_rw = rw;
|
|
||||||
|
|
||||||
atomic_inc(&mddev->pending_writes);
|
atomic_inc(&mddev->pending_writes);
|
||||||
if (!test_bit(BarriersNotsupp, &rdev->flags)) {
|
submit_bio(REQ_WRITE | REQ_SYNC | REQ_UNPLUG | REQ_FLUSH | REQ_FUA,
|
||||||
struct bio *rbio;
|
bio);
|
||||||
rw |= REQ_HARDBARRIER;
|
|
||||||
rbio = bio_clone(bio, GFP_NOIO);
|
|
||||||
rbio->bi_private = bio;
|
|
||||||
rbio->bi_end_io = super_written_barrier;
|
|
||||||
submit_bio(rw, rbio);
|
|
||||||
} else
|
|
||||||
submit_bio(rw, bio);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void md_super_wait(mddev_t *mddev)
|
void md_super_wait(mddev_t *mddev)
|
||||||
{
|
{
|
||||||
/* wait for all superblock writes that were scheduled to complete.
|
/* wait for all superblock writes that were scheduled to complete */
|
||||||
* if any had to be retried (due to BARRIER problems), retry them
|
|
||||||
*/
|
|
||||||
DEFINE_WAIT(wq);
|
DEFINE_WAIT(wq);
|
||||||
for(;;) {
|
for(;;) {
|
||||||
prepare_to_wait(&mddev->sb_wait, &wq, TASK_UNINTERRUPTIBLE);
|
prepare_to_wait(&mddev->sb_wait, &wq, TASK_UNINTERRUPTIBLE);
|
||||||
if (atomic_read(&mddev->pending_writes)==0)
|
if (atomic_read(&mddev->pending_writes)==0)
|
||||||
break;
|
break;
|
||||||
while (mddev->biolist) {
|
|
||||||
struct bio *bio;
|
|
||||||
spin_lock_irq(&mddev->write_lock);
|
|
||||||
bio = mddev->biolist;
|
|
||||||
mddev->biolist = bio->bi_next ;
|
|
||||||
bio->bi_next = NULL;
|
|
||||||
spin_unlock_irq(&mddev->write_lock);
|
|
||||||
submit_bio(bio->bi_rw, bio);
|
|
||||||
}
|
|
||||||
schedule();
|
schedule();
|
||||||
}
|
}
|
||||||
finish_wait(&mddev->sb_wait, &wq);
|
finish_wait(&mddev->sb_wait, &wq);
|
||||||
|
@ -1071,7 +1008,6 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
|
||||||
clear_bit(Faulty, &rdev->flags);
|
clear_bit(Faulty, &rdev->flags);
|
||||||
clear_bit(In_sync, &rdev->flags);
|
clear_bit(In_sync, &rdev->flags);
|
||||||
clear_bit(WriteMostly, &rdev->flags);
|
clear_bit(WriteMostly, &rdev->flags);
|
||||||
clear_bit(BarriersNotsupp, &rdev->flags);
|
|
||||||
|
|
||||||
if (mddev->raid_disks == 0) {
|
if (mddev->raid_disks == 0) {
|
||||||
mddev->major_version = 0;
|
mddev->major_version = 0;
|
||||||
|
@ -1486,7 +1422,6 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
|
||||||
clear_bit(Faulty, &rdev->flags);
|
clear_bit(Faulty, &rdev->flags);
|
||||||
clear_bit(In_sync, &rdev->flags);
|
clear_bit(In_sync, &rdev->flags);
|
||||||
clear_bit(WriteMostly, &rdev->flags);
|
clear_bit(WriteMostly, &rdev->flags);
|
||||||
clear_bit(BarriersNotsupp, &rdev->flags);
|
|
||||||
|
|
||||||
if (mddev->raid_disks == 0) {
|
if (mddev->raid_disks == 0) {
|
||||||
mddev->major_version = 1;
|
mddev->major_version = 1;
|
||||||
|
@ -4505,7 +4440,6 @@ int md_run(mddev_t *mddev)
|
||||||
/* may be over-ridden by personality */
|
/* may be over-ridden by personality */
|
||||||
mddev->resync_max_sectors = mddev->dev_sectors;
|
mddev->resync_max_sectors = mddev->dev_sectors;
|
||||||
|
|
||||||
mddev->barriers_work = 1;
|
|
||||||
mddev->ok_start_degraded = start_dirty_degraded;
|
mddev->ok_start_degraded = start_dirty_degraded;
|
||||||
|
|
||||||
if (start_readonly && mddev->ro == 0)
|
if (start_readonly && mddev->ro == 0)
|
||||||
|
@ -4684,7 +4618,6 @@ static void md_clean(mddev_t *mddev)
|
||||||
mddev->recovery = 0;
|
mddev->recovery = 0;
|
||||||
mddev->in_sync = 0;
|
mddev->in_sync = 0;
|
||||||
mddev->degraded = 0;
|
mddev->degraded = 0;
|
||||||
mddev->barriers_work = 0;
|
|
||||||
mddev->safemode = 0;
|
mddev->safemode = 0;
|
||||||
mddev->bitmap_info.offset = 0;
|
mddev->bitmap_info.offset = 0;
|
||||||
mddev->bitmap_info.default_offset = 0;
|
mddev->bitmap_info.default_offset = 0;
|
||||||
|
|
|
@ -87,7 +87,6 @@ struct mdk_rdev_s
|
||||||
#define Faulty 1 /* device is known to have a fault */
|
#define Faulty 1 /* device is known to have a fault */
|
||||||
#define In_sync 2 /* device is in_sync with rest of array */
|
#define In_sync 2 /* device is in_sync with rest of array */
|
||||||
#define WriteMostly 4 /* Avoid reading if at all possible */
|
#define WriteMostly 4 /* Avoid reading if at all possible */
|
||||||
#define BarriersNotsupp 5 /* REQ_HARDBARRIER is not supported */
|
|
||||||
#define AllReserved 6 /* If whole device is reserved for
|
#define AllReserved 6 /* If whole device is reserved for
|
||||||
* one array */
|
* one array */
|
||||||
#define AutoDetected 7 /* added by auto-detect */
|
#define AutoDetected 7 /* added by auto-detect */
|
||||||
|
@ -273,13 +272,6 @@ struct mddev_s
|
||||||
int degraded; /* whether md should consider
|
int degraded; /* whether md should consider
|
||||||
* adding a spare
|
* adding a spare
|
||||||
*/
|
*/
|
||||||
int barriers_work; /* initialised to true, cleared as soon
|
|
||||||
* as a barrier request to slave
|
|
||||||
* fails. Only supported
|
|
||||||
*/
|
|
||||||
struct bio *biolist; /* bios that need to be retried
|
|
||||||
* because REQ_HARDBARRIER is not supported
|
|
||||||
*/
|
|
||||||
|
|
||||||
atomic_t recovery_active; /* blocks scheduled, but not written */
|
atomic_t recovery_active; /* blocks scheduled, but not written */
|
||||||
wait_queue_head_t recovery_wait;
|
wait_queue_head_t recovery_wait;
|
||||||
|
@ -339,16 +331,13 @@ struct mddev_s
|
||||||
struct attribute_group *to_remove;
|
struct attribute_group *to_remove;
|
||||||
struct plug_handle *plug; /* if used by personality */
|
struct plug_handle *plug; /* if used by personality */
|
||||||
|
|
||||||
/* Generic barrier handling.
|
/* Generic flush handling.
|
||||||
* If there is a pending barrier request, all other
|
* The last to finish preflush schedules a worker to submit
|
||||||
* writes are blocked while the devices are flushed.
|
* the rest of the request (without the REQ_FLUSH flag).
|
||||||
* The last to finish a flush schedules a worker to
|
|
||||||
* submit the barrier request (without the barrier flag),
|
|
||||||
* then submit more flush requests.
|
|
||||||
*/
|
*/
|
||||||
struct bio *barrier;
|
struct bio *flush_bio;
|
||||||
atomic_t flush_pending;
|
atomic_t flush_pending;
|
||||||
struct work_struct barrier_work;
|
struct work_struct flush_work;
|
||||||
struct work_struct event_work; /* used by dm to report failure event */
|
struct work_struct event_work; /* used by dm to report failure event */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -502,7 +491,7 @@ extern void md_done_sync(mddev_t *mddev, int blocks, int ok);
|
||||||
extern void md_error(mddev_t *mddev, mdk_rdev_t *rdev);
|
extern void md_error(mddev_t *mddev, mdk_rdev_t *rdev);
|
||||||
|
|
||||||
extern int mddev_congested(mddev_t *mddev, int bits);
|
extern int mddev_congested(mddev_t *mddev, int bits);
|
||||||
extern void md_barrier_request(mddev_t *mddev, struct bio *bio);
|
extern void md_flush_request(mddev_t *mddev, struct bio *bio);
|
||||||
extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
|
extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
|
||||||
sector_t sector, int size, struct page *page);
|
sector_t sector, int size, struct page *page);
|
||||||
extern void md_super_wait(mddev_t *mddev);
|
extern void md_super_wait(mddev_t *mddev);
|
||||||
|
|
|
@ -142,8 +142,8 @@ static int multipath_make_request(mddev_t *mddev, struct bio * bio)
|
||||||
struct multipath_bh * mp_bh;
|
struct multipath_bh * mp_bh;
|
||||||
struct multipath_info *multipath;
|
struct multipath_info *multipath;
|
||||||
|
|
||||||
if (unlikely(bio->bi_rw & REQ_HARDBARRIER)) {
|
if (unlikely(bio->bi_rw & REQ_FLUSH)) {
|
||||||
md_barrier_request(mddev, bio);
|
md_flush_request(mddev, bio);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -483,8 +483,8 @@ static int raid0_make_request(mddev_t *mddev, struct bio *bio)
|
||||||
struct strip_zone *zone;
|
struct strip_zone *zone;
|
||||||
mdk_rdev_t *tmp_dev;
|
mdk_rdev_t *tmp_dev;
|
||||||
|
|
||||||
if (unlikely(bio->bi_rw & REQ_HARDBARRIER)) {
|
if (unlikely(bio->bi_rw & REQ_FLUSH)) {
|
||||||
md_barrier_request(mddev, bio);
|
md_flush_request(mddev, bio);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -319,83 +319,74 @@ static void raid1_end_write_request(struct bio *bio, int error)
|
||||||
if (r1_bio->bios[mirror] == bio)
|
if (r1_bio->bios[mirror] == bio)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (error == -EOPNOTSUPP && test_bit(R1BIO_Barrier, &r1_bio->state)) {
|
/*
|
||||||
set_bit(BarriersNotsupp, &conf->mirrors[mirror].rdev->flags);
|
* 'one mirror IO has finished' event handler:
|
||||||
set_bit(R1BIO_BarrierRetry, &r1_bio->state);
|
*/
|
||||||
r1_bio->mddev->barriers_work = 0;
|
r1_bio->bios[mirror] = NULL;
|
||||||
/* Don't rdev_dec_pending in this branch - keep it for the retry */
|
to_put = bio;
|
||||||
} else {
|
if (!uptodate) {
|
||||||
|
md_error(r1_bio->mddev, conf->mirrors[mirror].rdev);
|
||||||
|
/* an I/O failed, we can't clear the bitmap */
|
||||||
|
set_bit(R1BIO_Degraded, &r1_bio->state);
|
||||||
|
} else
|
||||||
/*
|
/*
|
||||||
* this branch is our 'one mirror IO has finished' event handler:
|
* Set R1BIO_Uptodate in our master bio, so that we
|
||||||
|
* will return a good error code for to the higher
|
||||||
|
* levels even if IO on some other mirrored buffer
|
||||||
|
* fails.
|
||||||
|
*
|
||||||
|
* The 'master' represents the composite IO operation
|
||||||
|
* to user-side. So if something waits for IO, then it
|
||||||
|
* will wait for the 'master' bio.
|
||||||
*/
|
*/
|
||||||
r1_bio->bios[mirror] = NULL;
|
set_bit(R1BIO_Uptodate, &r1_bio->state);
|
||||||
to_put = bio;
|
|
||||||
if (!uptodate) {
|
|
||||||
md_error(r1_bio->mddev, conf->mirrors[mirror].rdev);
|
|
||||||
/* an I/O failed, we can't clear the bitmap */
|
|
||||||
set_bit(R1BIO_Degraded, &r1_bio->state);
|
|
||||||
} else
|
|
||||||
/*
|
|
||||||
* Set R1BIO_Uptodate in our master bio, so that
|
|
||||||
* we will return a good error code for to the higher
|
|
||||||
* levels even if IO on some other mirrored buffer fails.
|
|
||||||
*
|
|
||||||
* The 'master' represents the composite IO operation to
|
|
||||||
* user-side. So if something waits for IO, then it will
|
|
||||||
* wait for the 'master' bio.
|
|
||||||
*/
|
|
||||||
set_bit(R1BIO_Uptodate, &r1_bio->state);
|
|
||||||
|
|
||||||
update_head_pos(mirror, r1_bio);
|
update_head_pos(mirror, r1_bio);
|
||||||
|
|
||||||
if (behind) {
|
if (behind) {
|
||||||
if (test_bit(WriteMostly, &conf->mirrors[mirror].rdev->flags))
|
if (test_bit(WriteMostly, &conf->mirrors[mirror].rdev->flags))
|
||||||
atomic_dec(&r1_bio->behind_remaining);
|
atomic_dec(&r1_bio->behind_remaining);
|
||||||
|
|
||||||
/* In behind mode, we ACK the master bio once the I/O has safely
|
/*
|
||||||
* reached all non-writemostly disks. Setting the Returned bit
|
* In behind mode, we ACK the master bio once the I/O
|
||||||
* ensures that this gets done only once -- we don't ever want to
|
* has safely reached all non-writemostly
|
||||||
* return -EIO here, instead we'll wait */
|
* disks. Setting the Returned bit ensures that this
|
||||||
|
* gets done only once -- we don't ever want to return
|
||||||
if (atomic_read(&r1_bio->behind_remaining) >= (atomic_read(&r1_bio->remaining)-1) &&
|
* -EIO here, instead we'll wait
|
||||||
test_bit(R1BIO_Uptodate, &r1_bio->state)) {
|
*/
|
||||||
/* Maybe we can return now */
|
if (atomic_read(&r1_bio->behind_remaining) >= (atomic_read(&r1_bio->remaining)-1) &&
|
||||||
if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) {
|
test_bit(R1BIO_Uptodate, &r1_bio->state)) {
|
||||||
struct bio *mbio = r1_bio->master_bio;
|
/* Maybe we can return now */
|
||||||
PRINTK(KERN_DEBUG "raid1: behind end write sectors %llu-%llu\n",
|
if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) {
|
||||||
(unsigned long long) mbio->bi_sector,
|
struct bio *mbio = r1_bio->master_bio;
|
||||||
(unsigned long long) mbio->bi_sector +
|
PRINTK(KERN_DEBUG "raid1: behind end write sectors %llu-%llu\n",
|
||||||
(mbio->bi_size >> 9) - 1);
|
(unsigned long long) mbio->bi_sector,
|
||||||
bio_endio(mbio, 0);
|
(unsigned long long) mbio->bi_sector +
|
||||||
}
|
(mbio->bi_size >> 9) - 1);
|
||||||
|
bio_endio(mbio, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
|
|
||||||
}
|
}
|
||||||
|
rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
*
|
|
||||||
* Let's see if all mirrored write operations have finished
|
* Let's see if all mirrored write operations have finished
|
||||||
* already.
|
* already.
|
||||||
*/
|
*/
|
||||||
if (atomic_dec_and_test(&r1_bio->remaining)) {
|
if (atomic_dec_and_test(&r1_bio->remaining)) {
|
||||||
if (test_bit(R1BIO_BarrierRetry, &r1_bio->state))
|
if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
|
||||||
reschedule_retry(r1_bio);
|
/* free extra copy of the data pages */
|
||||||
else {
|
int i = bio->bi_vcnt;
|
||||||
/* it really is the end of this request */
|
while (i--)
|
||||||
if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
|
safe_put_page(bio->bi_io_vec[i].bv_page);
|
||||||
/* free extra copy of the data pages */
|
|
||||||
int i = bio->bi_vcnt;
|
|
||||||
while (i--)
|
|
||||||
safe_put_page(bio->bi_io_vec[i].bv_page);
|
|
||||||
}
|
|
||||||
/* clear the bitmap if all writes complete successfully */
|
|
||||||
bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
|
|
||||||
r1_bio->sectors,
|
|
||||||
!test_bit(R1BIO_Degraded, &r1_bio->state),
|
|
||||||
behind);
|
|
||||||
md_write_end(r1_bio->mddev);
|
|
||||||
raid_end_bio_io(r1_bio);
|
|
||||||
}
|
}
|
||||||
|
/* clear the bitmap if all writes complete successfully */
|
||||||
|
bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
|
||||||
|
r1_bio->sectors,
|
||||||
|
!test_bit(R1BIO_Degraded, &r1_bio->state),
|
||||||
|
behind);
|
||||||
|
md_write_end(r1_bio->mddev);
|
||||||
|
raid_end_bio_io(r1_bio);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (to_put)
|
if (to_put)
|
||||||
|
@ -788,16 +779,13 @@ static int make_request(mddev_t *mddev, struct bio * bio)
|
||||||
struct page **behind_pages = NULL;
|
struct page **behind_pages = NULL;
|
||||||
const int rw = bio_data_dir(bio);
|
const int rw = bio_data_dir(bio);
|
||||||
const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
|
const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
|
||||||
unsigned long do_barriers;
|
const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA));
|
||||||
mdk_rdev_t *blocked_rdev;
|
mdk_rdev_t *blocked_rdev;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Register the new request and wait if the reconstruction
|
* Register the new request and wait if the reconstruction
|
||||||
* thread has put up a bar for new requests.
|
* thread has put up a bar for new requests.
|
||||||
* Continue immediately if no resync is active currently.
|
* Continue immediately if no resync is active currently.
|
||||||
* We test barriers_work *after* md_write_start as md_write_start
|
|
||||||
* may cause the first superblock write, and that will check out
|
|
||||||
* if barriers work.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
md_write_start(mddev, bio); /* wait on superblock update early */
|
md_write_start(mddev, bio); /* wait on superblock update early */
|
||||||
|
@ -821,13 +809,6 @@ static int make_request(mddev_t *mddev, struct bio * bio)
|
||||||
}
|
}
|
||||||
finish_wait(&conf->wait_barrier, &w);
|
finish_wait(&conf->wait_barrier, &w);
|
||||||
}
|
}
|
||||||
if (unlikely(!mddev->barriers_work &&
|
|
||||||
(bio->bi_rw & REQ_HARDBARRIER))) {
|
|
||||||
if (rw == WRITE)
|
|
||||||
md_write_end(mddev);
|
|
||||||
bio_endio(bio, -EOPNOTSUPP);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
wait_barrier(conf);
|
wait_barrier(conf);
|
||||||
|
|
||||||
|
@ -959,10 +940,6 @@ static int make_request(mddev_t *mddev, struct bio * bio)
|
||||||
atomic_set(&r1_bio->remaining, 0);
|
atomic_set(&r1_bio->remaining, 0);
|
||||||
atomic_set(&r1_bio->behind_remaining, 0);
|
atomic_set(&r1_bio->behind_remaining, 0);
|
||||||
|
|
||||||
do_barriers = bio->bi_rw & REQ_HARDBARRIER;
|
|
||||||
if (do_barriers)
|
|
||||||
set_bit(R1BIO_Barrier, &r1_bio->state);
|
|
||||||
|
|
||||||
bio_list_init(&bl);
|
bio_list_init(&bl);
|
||||||
for (i = 0; i < disks; i++) {
|
for (i = 0; i < disks; i++) {
|
||||||
struct bio *mbio;
|
struct bio *mbio;
|
||||||
|
@ -975,7 +952,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
|
||||||
mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset;
|
mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset;
|
||||||
mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
|
mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
|
||||||
mbio->bi_end_io = raid1_end_write_request;
|
mbio->bi_end_io = raid1_end_write_request;
|
||||||
mbio->bi_rw = WRITE | do_barriers | do_sync;
|
mbio->bi_rw = WRITE | do_flush_fua | do_sync;
|
||||||
mbio->bi_private = r1_bio;
|
mbio->bi_private = r1_bio;
|
||||||
|
|
||||||
if (behind_pages) {
|
if (behind_pages) {
|
||||||
|
@ -1634,41 +1611,6 @@ static void raid1d(mddev_t *mddev)
|
||||||
if (test_bit(R1BIO_IsSync, &r1_bio->state)) {
|
if (test_bit(R1BIO_IsSync, &r1_bio->state)) {
|
||||||
sync_request_write(mddev, r1_bio);
|
sync_request_write(mddev, r1_bio);
|
||||||
unplug = 1;
|
unplug = 1;
|
||||||
} else if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) {
|
|
||||||
/* some requests in the r1bio were REQ_HARDBARRIER
|
|
||||||
* requests which failed with -EOPNOTSUPP. Hohumm..
|
|
||||||
* Better resubmit without the barrier.
|
|
||||||
* We know which devices to resubmit for, because
|
|
||||||
* all others have had their bios[] entry cleared.
|
|
||||||
* We already have a nr_pending reference on these rdevs.
|
|
||||||
*/
|
|
||||||
int i;
|
|
||||||
const unsigned long do_sync = (r1_bio->master_bio->bi_rw & REQ_SYNC);
|
|
||||||
clear_bit(R1BIO_BarrierRetry, &r1_bio->state);
|
|
||||||
clear_bit(R1BIO_Barrier, &r1_bio->state);
|
|
||||||
for (i=0; i < conf->raid_disks; i++)
|
|
||||||
if (r1_bio->bios[i])
|
|
||||||
atomic_inc(&r1_bio->remaining);
|
|
||||||
for (i=0; i < conf->raid_disks; i++)
|
|
||||||
if (r1_bio->bios[i]) {
|
|
||||||
struct bio_vec *bvec;
|
|
||||||
int j;
|
|
||||||
|
|
||||||
bio = bio_clone(r1_bio->master_bio, GFP_NOIO);
|
|
||||||
/* copy pages from the failed bio, as
|
|
||||||
* this might be a write-behind device */
|
|
||||||
__bio_for_each_segment(bvec, bio, j, 0)
|
|
||||||
bvec->bv_page = bio_iovec_idx(r1_bio->bios[i], j)->bv_page;
|
|
||||||
bio_put(r1_bio->bios[i]);
|
|
||||||
bio->bi_sector = r1_bio->sector +
|
|
||||||
conf->mirrors[i].rdev->data_offset;
|
|
||||||
bio->bi_bdev = conf->mirrors[i].rdev->bdev;
|
|
||||||
bio->bi_end_io = raid1_end_write_request;
|
|
||||||
bio->bi_rw = WRITE | do_sync;
|
|
||||||
bio->bi_private = r1_bio;
|
|
||||||
r1_bio->bios[i] = bio;
|
|
||||||
generic_make_request(bio);
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
int disk;
|
int disk;
|
||||||
|
|
||||||
|
|
|
@ -117,8 +117,6 @@ struct r1bio_s {
|
||||||
#define R1BIO_IsSync 1
|
#define R1BIO_IsSync 1
|
||||||
#define R1BIO_Degraded 2
|
#define R1BIO_Degraded 2
|
||||||
#define R1BIO_BehindIO 3
|
#define R1BIO_BehindIO 3
|
||||||
#define R1BIO_Barrier 4
|
|
||||||
#define R1BIO_BarrierRetry 5
|
|
||||||
/* For write-behind requests, we call bi_end_io when
|
/* For write-behind requests, we call bi_end_io when
|
||||||
* the last non-write-behind device completes, providing
|
* the last non-write-behind device completes, providing
|
||||||
* any write was successful. Otherwise we call when
|
* any write was successful. Otherwise we call when
|
||||||
|
|
|
@ -800,12 +800,13 @@ static int make_request(mddev_t *mddev, struct bio * bio)
|
||||||
int chunk_sects = conf->chunk_mask + 1;
|
int chunk_sects = conf->chunk_mask + 1;
|
||||||
const int rw = bio_data_dir(bio);
|
const int rw = bio_data_dir(bio);
|
||||||
const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
|
const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
|
||||||
|
const unsigned long do_fua = (bio->bi_rw & REQ_FUA);
|
||||||
struct bio_list bl;
|
struct bio_list bl;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
mdk_rdev_t *blocked_rdev;
|
mdk_rdev_t *blocked_rdev;
|
||||||
|
|
||||||
if (unlikely(bio->bi_rw & REQ_HARDBARRIER)) {
|
if (unlikely(bio->bi_rw & REQ_FLUSH)) {
|
||||||
md_barrier_request(mddev, bio);
|
md_flush_request(mddev, bio);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -965,7 +966,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
|
||||||
conf->mirrors[d].rdev->data_offset;
|
conf->mirrors[d].rdev->data_offset;
|
||||||
mbio->bi_bdev = conf->mirrors[d].rdev->bdev;
|
mbio->bi_bdev = conf->mirrors[d].rdev->bdev;
|
||||||
mbio->bi_end_io = raid10_end_write_request;
|
mbio->bi_end_io = raid10_end_write_request;
|
||||||
mbio->bi_rw = WRITE | do_sync;
|
mbio->bi_rw = WRITE | do_sync | do_fua;
|
||||||
mbio->bi_private = r10_bio;
|
mbio->bi_private = r10_bio;
|
||||||
|
|
||||||
atomic_inc(&r10_bio->remaining);
|
atomic_inc(&r10_bio->remaining);
|
||||||
|
|
|
@ -506,9 +506,12 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
|
||||||
int rw;
|
int rw;
|
||||||
struct bio *bi;
|
struct bio *bi;
|
||||||
mdk_rdev_t *rdev;
|
mdk_rdev_t *rdev;
|
||||||
if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags))
|
if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) {
|
||||||
rw = WRITE;
|
if (test_and_clear_bit(R5_WantFUA, &sh->dev[i].flags))
|
||||||
else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags))
|
rw = WRITE_FUA;
|
||||||
|
else
|
||||||
|
rw = WRITE;
|
||||||
|
} else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags))
|
||||||
rw = READ;
|
rw = READ;
|
||||||
else
|
else
|
||||||
continue;
|
continue;
|
||||||
|
@ -1031,6 +1034,8 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
|
||||||
|
|
||||||
while (wbi && wbi->bi_sector <
|
while (wbi && wbi->bi_sector <
|
||||||
dev->sector + STRIPE_SECTORS) {
|
dev->sector + STRIPE_SECTORS) {
|
||||||
|
if (wbi->bi_rw & REQ_FUA)
|
||||||
|
set_bit(R5_WantFUA, &dev->flags);
|
||||||
tx = async_copy_data(1, wbi, dev->page,
|
tx = async_copy_data(1, wbi, dev->page,
|
||||||
dev->sector, tx);
|
dev->sector, tx);
|
||||||
wbi = r5_next_bio(wbi, dev->sector);
|
wbi = r5_next_bio(wbi, dev->sector);
|
||||||
|
@ -1048,15 +1053,22 @@ static void ops_complete_reconstruct(void *stripe_head_ref)
|
||||||
int pd_idx = sh->pd_idx;
|
int pd_idx = sh->pd_idx;
|
||||||
int qd_idx = sh->qd_idx;
|
int qd_idx = sh->qd_idx;
|
||||||
int i;
|
int i;
|
||||||
|
bool fua = false;
|
||||||
|
|
||||||
pr_debug("%s: stripe %llu\n", __func__,
|
pr_debug("%s: stripe %llu\n", __func__,
|
||||||
(unsigned long long)sh->sector);
|
(unsigned long long)sh->sector);
|
||||||
|
|
||||||
|
for (i = disks; i--; )
|
||||||
|
fua |= test_bit(R5_WantFUA, &sh->dev[i].flags);
|
||||||
|
|
||||||
for (i = disks; i--; ) {
|
for (i = disks; i--; ) {
|
||||||
struct r5dev *dev = &sh->dev[i];
|
struct r5dev *dev = &sh->dev[i];
|
||||||
|
|
||||||
if (dev->written || i == pd_idx || i == qd_idx)
|
if (dev->written || i == pd_idx || i == qd_idx) {
|
||||||
set_bit(R5_UPTODATE, &dev->flags);
|
set_bit(R5_UPTODATE, &dev->flags);
|
||||||
|
if (fua)
|
||||||
|
set_bit(R5_WantFUA, &dev->flags);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sh->reconstruct_state == reconstruct_state_drain_run)
|
if (sh->reconstruct_state == reconstruct_state_drain_run)
|
||||||
|
@ -3281,7 +3293,7 @@ static void handle_stripe5(struct stripe_head *sh)
|
||||||
|
|
||||||
if (dec_preread_active) {
|
if (dec_preread_active) {
|
||||||
/* We delay this until after ops_run_io so that if make_request
|
/* We delay this until after ops_run_io so that if make_request
|
||||||
* is waiting on a barrier, it won't continue until the writes
|
* is waiting on a flush, it won't continue until the writes
|
||||||
* have actually been submitted.
|
* have actually been submitted.
|
||||||
*/
|
*/
|
||||||
atomic_dec(&conf->preread_active_stripes);
|
atomic_dec(&conf->preread_active_stripes);
|
||||||
|
@ -3583,7 +3595,7 @@ static void handle_stripe6(struct stripe_head *sh)
|
||||||
|
|
||||||
if (dec_preread_active) {
|
if (dec_preread_active) {
|
||||||
/* We delay this until after ops_run_io so that if make_request
|
/* We delay this until after ops_run_io so that if make_request
|
||||||
* is waiting on a barrier, it won't continue until the writes
|
* is waiting on a flush, it won't continue until the writes
|
||||||
* have actually been submitted.
|
* have actually been submitted.
|
||||||
*/
|
*/
|
||||||
atomic_dec(&conf->preread_active_stripes);
|
atomic_dec(&conf->preread_active_stripes);
|
||||||
|
@ -3978,14 +3990,8 @@ static int make_request(mddev_t *mddev, struct bio * bi)
|
||||||
const int rw = bio_data_dir(bi);
|
const int rw = bio_data_dir(bi);
|
||||||
int remaining;
|
int remaining;
|
||||||
|
|
||||||
if (unlikely(bi->bi_rw & REQ_HARDBARRIER)) {
|
if (unlikely(bi->bi_rw & REQ_FLUSH)) {
|
||||||
/* Drain all pending writes. We only really need
|
md_flush_request(mddev, bi);
|
||||||
* to ensure they have been submitted, but this is
|
|
||||||
* easier.
|
|
||||||
*/
|
|
||||||
mddev->pers->quiesce(mddev, 1);
|
|
||||||
mddev->pers->quiesce(mddev, 0);
|
|
||||||
md_barrier_request(mddev, bi);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4103,7 +4109,7 @@ static int make_request(mddev_t *mddev, struct bio * bi)
|
||||||
finish_wait(&conf->wait_for_overlap, &w);
|
finish_wait(&conf->wait_for_overlap, &w);
|
||||||
set_bit(STRIPE_HANDLE, &sh->state);
|
set_bit(STRIPE_HANDLE, &sh->state);
|
||||||
clear_bit(STRIPE_DELAYED, &sh->state);
|
clear_bit(STRIPE_DELAYED, &sh->state);
|
||||||
if (mddev->barrier &&
|
if ((bi->bi_rw & REQ_SYNC) &&
|
||||||
!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
|
!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
|
||||||
atomic_inc(&conf->preread_active_stripes);
|
atomic_inc(&conf->preread_active_stripes);
|
||||||
release_stripe(sh);
|
release_stripe(sh);
|
||||||
|
@ -4126,13 +4132,6 @@ static int make_request(mddev_t *mddev, struct bio * bi)
|
||||||
bio_endio(bi, 0);
|
bio_endio(bi, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mddev->barrier) {
|
|
||||||
/* We need to wait for the stripes to all be handled.
|
|
||||||
* So: wait for preread_active_stripes to drop to 0.
|
|
||||||
*/
|
|
||||||
wait_event(mddev->thread->wqueue,
|
|
||||||
atomic_read(&conf->preread_active_stripes) == 0);
|
|
||||||
}
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -275,6 +275,7 @@ struct r6_state {
|
||||||
* filling
|
* filling
|
||||||
*/
|
*/
|
||||||
#define R5_Wantdrain 13 /* dev->towrite needs to be drained */
|
#define R5_Wantdrain 13 /* dev->towrite needs to be drained */
|
||||||
|
#define R5_WantFUA 14 /* Write should be FUA */
|
||||||
/*
|
/*
|
||||||
* Write method
|
* Write method
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -128,7 +128,6 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock
|
||||||
mq->req = NULL;
|
mq->req = NULL;
|
||||||
|
|
||||||
blk_queue_prep_rq(mq->queue, mmc_prep_request);
|
blk_queue_prep_rq(mq->queue, mmc_prep_request);
|
||||||
blk_queue_ordered(mq->queue, QUEUE_ORDERED_DRAIN);
|
|
||||||
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mq->queue);
|
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mq->queue);
|
||||||
if (mmc_can_erase(card)) {
|
if (mmc_can_erase(card)) {
|
||||||
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mq->queue);
|
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mq->queue);
|
||||||
|
|
|
@ -2196,7 +2196,6 @@ static void dasd_setup_queue(struct dasd_block *block)
|
||||||
*/
|
*/
|
||||||
blk_queue_max_segment_size(block->request_queue, PAGE_SIZE);
|
blk_queue_max_segment_size(block->request_queue, PAGE_SIZE);
|
||||||
blk_queue_segment_boundary(block->request_queue, PAGE_SIZE - 1);
|
blk_queue_segment_boundary(block->request_queue, PAGE_SIZE - 1);
|
||||||
blk_queue_ordered(block->request_queue, QUEUE_ORDERED_DRAIN);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -2850,12 +2850,6 @@ aic7xxx_done(struct aic7xxx_host *p, struct aic7xxx_scb *scb)
|
||||||
aic_dev->r_total++;
|
aic_dev->r_total++;
|
||||||
ptr = aic_dev->r_bins;
|
ptr = aic_dev->r_bins;
|
||||||
}
|
}
|
||||||
if(cmd->device->simple_tags && cmd->request->cmd_flags & REQ_HARDBARRIER)
|
|
||||||
{
|
|
||||||
aic_dev->barrier_total++;
|
|
||||||
if(scb->tag_action == MSG_ORDERED_Q_TAG)
|
|
||||||
aic_dev->ordered_total++;
|
|
||||||
}
|
|
||||||
x = scb->sg_length;
|
x = scb->sg_length;
|
||||||
x >>= 10;
|
x >>= 10;
|
||||||
for(i=0; i<6; i++)
|
for(i=0; i<6; i++)
|
||||||
|
@ -10125,7 +10119,6 @@ static void aic7xxx_buildscb(struct aic7xxx_host *p, struct scsi_cmnd *cmd,
|
||||||
struct aic_dev_data *aic_dev = cmd->device->hostdata;
|
struct aic_dev_data *aic_dev = cmd->device->hostdata;
|
||||||
struct scsi_device *sdptr = cmd->device;
|
struct scsi_device *sdptr = cmd->device;
|
||||||
unsigned char tindex = TARGET_INDEX(cmd);
|
unsigned char tindex = TARGET_INDEX(cmd);
|
||||||
struct request *req = cmd->request;
|
|
||||||
int use_sg;
|
int use_sg;
|
||||||
|
|
||||||
mask = (0x01 << tindex);
|
mask = (0x01 << tindex);
|
||||||
|
@ -10144,19 +10137,8 @@ static void aic7xxx_buildscb(struct aic7xxx_host *p, struct scsi_cmnd *cmd,
|
||||||
/* We always force TEST_UNIT_READY to untagged */
|
/* We always force TEST_UNIT_READY to untagged */
|
||||||
if (cmd->cmnd[0] != TEST_UNIT_READY && sdptr->simple_tags)
|
if (cmd->cmnd[0] != TEST_UNIT_READY && sdptr->simple_tags)
|
||||||
{
|
{
|
||||||
if (req->cmd_flags & REQ_HARDBARRIER)
|
hscb->control |= MSG_SIMPLE_Q_TAG;
|
||||||
{
|
scb->tag_action = MSG_SIMPLE_Q_TAG;
|
||||||
if(sdptr->ordered_tags)
|
|
||||||
{
|
|
||||||
hscb->control |= MSG_ORDERED_Q_TAG;
|
|
||||||
scb->tag_action = MSG_ORDERED_Q_TAG;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
hscb->control |= MSG_SIMPLE_Q_TAG;
|
|
||||||
scb->tag_action = MSG_SIMPLE_Q_TAG;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ( !(aic_dev->dtr_pending) &&
|
if ( !(aic_dev->dtr_pending) &&
|
||||||
|
|
|
@ -130,17 +130,6 @@ static void sas_scsi_task_done(struct sas_task *task)
|
||||||
sc->scsi_done(sc);
|
sc->scsi_done(sc);
|
||||||
}
|
}
|
||||||
|
|
||||||
static enum task_attribute sas_scsi_get_task_attr(struct scsi_cmnd *cmd)
|
|
||||||
{
|
|
||||||
enum task_attribute ta = TASK_ATTR_SIMPLE;
|
|
||||||
if (cmd->request && blk_rq_tagged(cmd->request)) {
|
|
||||||
if (cmd->device->ordered_tags &&
|
|
||||||
(cmd->request->cmd_flags & REQ_HARDBARRIER))
|
|
||||||
ta = TASK_ATTR_ORDERED;
|
|
||||||
}
|
|
||||||
return ta;
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct sas_task *sas_create_task(struct scsi_cmnd *cmd,
|
static struct sas_task *sas_create_task(struct scsi_cmnd *cmd,
|
||||||
struct domain_device *dev,
|
struct domain_device *dev,
|
||||||
gfp_t gfp_flags)
|
gfp_t gfp_flags)
|
||||||
|
@ -160,7 +149,7 @@ static struct sas_task *sas_create_task(struct scsi_cmnd *cmd,
|
||||||
task->ssp_task.retry_count = 1;
|
task->ssp_task.retry_count = 1;
|
||||||
int_to_scsilun(cmd->device->lun, &lun);
|
int_to_scsilun(cmd->device->lun, &lun);
|
||||||
memcpy(task->ssp_task.LUN, &lun.scsi_lun, 8);
|
memcpy(task->ssp_task.LUN, &lun.scsi_lun, 8);
|
||||||
task->ssp_task.task_attr = sas_scsi_get_task_attr(cmd);
|
task->ssp_task.task_attr = TASK_ATTR_SIMPLE;
|
||||||
memcpy(task->ssp_task.cdb, cmd->cmnd, 16);
|
memcpy(task->ssp_task.cdb, cmd->cmnd, 16);
|
||||||
|
|
||||||
task->scatter = scsi_sglist(cmd);
|
task->scatter = scsi_sglist(cmd);
|
||||||
|
|
|
@ -2109,7 +2109,7 @@ static int sd_revalidate_disk(struct gendisk *disk)
|
||||||
struct scsi_disk *sdkp = scsi_disk(disk);
|
struct scsi_disk *sdkp = scsi_disk(disk);
|
||||||
struct scsi_device *sdp = sdkp->device;
|
struct scsi_device *sdp = sdkp->device;
|
||||||
unsigned char *buffer;
|
unsigned char *buffer;
|
||||||
unsigned ordered;
|
unsigned flush = 0;
|
||||||
|
|
||||||
SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp,
|
SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp,
|
||||||
"sd_revalidate_disk\n"));
|
"sd_revalidate_disk\n"));
|
||||||
|
@ -2151,17 +2151,15 @@ static int sd_revalidate_disk(struct gendisk *disk)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We now have all cache related info, determine how we deal
|
* We now have all cache related info, determine how we deal
|
||||||
* with ordered requests. Note that as the current SCSI
|
* with flush requests.
|
||||||
* dispatch function can alter request order, we cannot use
|
|
||||||
* QUEUE_ORDERED_TAG_* even when ordered tag is supported.
|
|
||||||
*/
|
*/
|
||||||
if (sdkp->WCE)
|
if (sdkp->WCE) {
|
||||||
ordered = sdkp->DPOFUA
|
flush |= REQ_FLUSH;
|
||||||
? QUEUE_ORDERED_DRAIN_FUA : QUEUE_ORDERED_DRAIN_FLUSH;
|
if (sdkp->DPOFUA)
|
||||||
else
|
flush |= REQ_FUA;
|
||||||
ordered = QUEUE_ORDERED_DRAIN;
|
}
|
||||||
|
|
||||||
blk_queue_ordered(sdkp->disk->queue, ordered);
|
blk_queue_flush(sdkp->disk->queue, flush);
|
||||||
|
|
||||||
set_capacity(disk, sdkp->capacity);
|
set_capacity(disk, sdkp->capacity);
|
||||||
kfree(buffer);
|
kfree(buffer);
|
||||||
|
|
|
@ -370,7 +370,7 @@ int blkdev_fsync(struct file *filp, int datasync)
|
||||||
*/
|
*/
|
||||||
mutex_unlock(&bd_inode->i_mutex);
|
mutex_unlock(&bd_inode->i_mutex);
|
||||||
|
|
||||||
error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL, BLKDEV_IFL_WAIT);
|
error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL);
|
||||||
if (error == -EOPNOTSUPP)
|
if (error == -EOPNOTSUPP)
|
||||||
error = 0;
|
error = 0;
|
||||||
|
|
||||||
|
|
|
@ -2063,7 +2063,7 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
|
||||||
if (uptodate) {
|
if (uptodate) {
|
||||||
set_buffer_uptodate(bh);
|
set_buffer_uptodate(bh);
|
||||||
} else {
|
} else {
|
||||||
if (!buffer_eopnotsupp(bh) && printk_ratelimit()) {
|
if (printk_ratelimit()) {
|
||||||
printk(KERN_WARNING "lost page write due to "
|
printk(KERN_WARNING "lost page write due to "
|
||||||
"I/O error on %s\n",
|
"I/O error on %s\n",
|
||||||
bdevname(bh->b_bdev, b));
|
bdevname(bh->b_bdev, b));
|
||||||
|
@ -2200,21 +2200,10 @@ static int write_dev_supers(struct btrfs_device *device,
|
||||||
bh->b_end_io = btrfs_end_buffer_write_sync;
|
bh->b_end_io = btrfs_end_buffer_write_sync;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (i == last_barrier && do_barriers && device->barriers) {
|
if (i == last_barrier && do_barriers)
|
||||||
ret = submit_bh(WRITE_BARRIER, bh);
|
ret = submit_bh(WRITE_FLUSH_FUA, bh);
|
||||||
if (ret == -EOPNOTSUPP) {
|
else
|
||||||
printk("btrfs: disabling barriers on dev %s\n",
|
|
||||||
device->name);
|
|
||||||
set_buffer_uptodate(bh);
|
|
||||||
device->barriers = 0;
|
|
||||||
/* one reference for submit_bh */
|
|
||||||
get_bh(bh);
|
|
||||||
lock_buffer(bh);
|
|
||||||
ret = submit_bh(WRITE_SYNC, bh);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
ret = submit_bh(WRITE_SYNC, bh);
|
ret = submit_bh(WRITE_SYNC, bh);
|
||||||
}
|
|
||||||
|
|
||||||
if (ret)
|
if (ret)
|
||||||
errors++;
|
errors++;
|
||||||
|
|
|
@ -1695,8 +1695,7 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
|
||||||
static void btrfs_issue_discard(struct block_device *bdev,
|
static void btrfs_issue_discard(struct block_device *bdev,
|
||||||
u64 start, u64 len)
|
u64 start, u64 len)
|
||||||
{
|
{
|
||||||
blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL,
|
blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, 0);
|
||||||
BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
|
static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
|
||||||
|
|
|
@ -398,7 +398,6 @@ static noinline int device_list_add(const char *path,
|
||||||
device->work.func = pending_bios_fn;
|
device->work.func = pending_bios_fn;
|
||||||
memcpy(device->uuid, disk_super->dev_item.uuid,
|
memcpy(device->uuid, disk_super->dev_item.uuid,
|
||||||
BTRFS_UUID_SIZE);
|
BTRFS_UUID_SIZE);
|
||||||
device->barriers = 1;
|
|
||||||
spin_lock_init(&device->io_lock);
|
spin_lock_init(&device->io_lock);
|
||||||
device->name = kstrdup(path, GFP_NOFS);
|
device->name = kstrdup(path, GFP_NOFS);
|
||||||
if (!device->name) {
|
if (!device->name) {
|
||||||
|
@ -462,7 +461,6 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
|
||||||
device->devid = orig_dev->devid;
|
device->devid = orig_dev->devid;
|
||||||
device->work.func = pending_bios_fn;
|
device->work.func = pending_bios_fn;
|
||||||
memcpy(device->uuid, orig_dev->uuid, sizeof(device->uuid));
|
memcpy(device->uuid, orig_dev->uuid, sizeof(device->uuid));
|
||||||
device->barriers = 1;
|
|
||||||
spin_lock_init(&device->io_lock);
|
spin_lock_init(&device->io_lock);
|
||||||
INIT_LIST_HEAD(&device->dev_list);
|
INIT_LIST_HEAD(&device->dev_list);
|
||||||
INIT_LIST_HEAD(&device->dev_alloc_list);
|
INIT_LIST_HEAD(&device->dev_alloc_list);
|
||||||
|
@ -1489,7 +1487,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
|
||||||
trans = btrfs_start_transaction(root, 0);
|
trans = btrfs_start_transaction(root, 0);
|
||||||
lock_chunks(root);
|
lock_chunks(root);
|
||||||
|
|
||||||
device->barriers = 1;
|
|
||||||
device->writeable = 1;
|
device->writeable = 1;
|
||||||
device->work.func = pending_bios_fn;
|
device->work.func = pending_bios_fn;
|
||||||
generate_random_uuid(device->uuid);
|
generate_random_uuid(device->uuid);
|
||||||
|
@ -3084,7 +3081,6 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root,
|
||||||
return NULL;
|
return NULL;
|
||||||
list_add(&device->dev_list,
|
list_add(&device->dev_list,
|
||||||
&fs_devices->devices);
|
&fs_devices->devices);
|
||||||
device->barriers = 1;
|
|
||||||
device->dev_root = root->fs_info->dev_root;
|
device->dev_root = root->fs_info->dev_root;
|
||||||
device->devid = devid;
|
device->devid = devid;
|
||||||
device->work.func = pending_bios_fn;
|
device->work.func = pending_bios_fn;
|
||||||
|
|
|
@ -42,7 +42,6 @@ struct btrfs_device {
|
||||||
int running_pending;
|
int running_pending;
|
||||||
u64 generation;
|
u64 generation;
|
||||||
|
|
||||||
int barriers;
|
|
||||||
int writeable;
|
int writeable;
|
||||||
int in_fs_metadata;
|
int in_fs_metadata;
|
||||||
|
|
||||||
|
|
|
@ -156,7 +156,7 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
|
||||||
if (uptodate) {
|
if (uptodate) {
|
||||||
set_buffer_uptodate(bh);
|
set_buffer_uptodate(bh);
|
||||||
} else {
|
} else {
|
||||||
if (!buffer_eopnotsupp(bh) && !quiet_error(bh)) {
|
if (!quiet_error(bh)) {
|
||||||
buffer_io_error(bh);
|
buffer_io_error(bh);
|
||||||
printk(KERN_WARNING "lost page write due to "
|
printk(KERN_WARNING "lost page write due to "
|
||||||
"I/O error on %s\n",
|
"I/O error on %s\n",
|
||||||
|
@ -2891,7 +2891,6 @@ static void end_bio_bh_io_sync(struct bio *bio, int err)
|
||||||
|
|
||||||
if (err == -EOPNOTSUPP) {
|
if (err == -EOPNOTSUPP) {
|
||||||
set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
|
set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
|
||||||
set_bit(BH_Eopnotsupp, &bh->b_state);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags)))
|
if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags)))
|
||||||
|
@ -3031,10 +3030,6 @@ int __sync_dirty_buffer(struct buffer_head *bh, int rw)
|
||||||
bh->b_end_io = end_buffer_write_sync;
|
bh->b_end_io = end_buffer_write_sync;
|
||||||
ret = submit_bh(rw, bh);
|
ret = submit_bh(rw, bh);
|
||||||
wait_on_buffer(bh);
|
wait_on_buffer(bh);
|
||||||
if (buffer_eopnotsupp(bh)) {
|
|
||||||
clear_buffer_eopnotsupp(bh);
|
|
||||||
ret = -EOPNOTSUPP;
|
|
||||||
}
|
|
||||||
if (!ret && !buffer_uptodate(bh))
|
if (!ret && !buffer_uptodate(bh))
|
||||||
ret = -EIO;
|
ret = -EIO;
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -90,7 +90,6 @@ int ext3_sync_file(struct file *file, int datasync)
|
||||||
* storage
|
* storage
|
||||||
*/
|
*/
|
||||||
if (needs_barrier)
|
if (needs_barrier)
|
||||||
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL,
|
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
|
||||||
BLKDEV_IFL_WAIT);
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
|
@ -128,10 +128,9 @@ int ext4_sync_file(struct file *file, int datasync)
|
||||||
(journal->j_fs_dev != journal->j_dev) &&
|
(journal->j_fs_dev != journal->j_dev) &&
|
||||||
(journal->j_flags & JBD2_BARRIER))
|
(journal->j_flags & JBD2_BARRIER))
|
||||||
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL,
|
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL,
|
||||||
NULL, BLKDEV_IFL_WAIT);
|
NULL);
|
||||||
ret = jbd2_log_wait_commit(journal, commit_tid);
|
ret = jbd2_log_wait_commit(journal, commit_tid);
|
||||||
} else if (journal->j_flags & JBD2_BARRIER)
|
} else if (journal->j_flags & JBD2_BARRIER)
|
||||||
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL,
|
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
|
||||||
BLKDEV_IFL_WAIT);
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
|
@ -2566,7 +2566,7 @@ static inline void ext4_issue_discard(struct super_block *sb,
|
||||||
discard_block = block + ext4_group_first_block_no(sb, block_group);
|
discard_block = block + ext4_group_first_block_no(sb, block_group);
|
||||||
trace_ext4_discard_blocks(sb,
|
trace_ext4_discard_blocks(sb,
|
||||||
(unsigned long long) discard_block, count);
|
(unsigned long long) discard_block, count);
|
||||||
ret = sb_issue_discard(sb, discard_block, count);
|
ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
|
||||||
if (ret == EOPNOTSUPP) {
|
if (ret == EOPNOTSUPP) {
|
||||||
ext4_warning(sb, "discard not supported, disabling");
|
ext4_warning(sb, "discard not supported, disabling");
|
||||||
clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
|
clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
|
||||||
|
|
|
@ -577,7 +577,8 @@ int fat_free_clusters(struct inode *inode, int cluster)
|
||||||
|
|
||||||
sb_issue_discard(sb,
|
sb_issue_discard(sb,
|
||||||
fat_clus_to_blknr(sbi, first_cl),
|
fat_clus_to_blknr(sbi, first_cl),
|
||||||
nr_clus * sbi->sec_per_clus);
|
nr_clus * sbi->sec_per_clus,
|
||||||
|
GFP_NOFS, 0);
|
||||||
|
|
||||||
first_cl = cluster;
|
first_cl = cluster;
|
||||||
}
|
}
|
||||||
|
|
|
@ -255,10 +255,7 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs)
|
||||||
|
|
||||||
for (i = 0; i < nr_bhs; i++) {
|
for (i = 0; i < nr_bhs; i++) {
|
||||||
wait_on_buffer(bhs[i]);
|
wait_on_buffer(bhs[i]);
|
||||||
if (buffer_eopnotsupp(bhs[i])) {
|
if (!err && !buffer_uptodate(bhs[i]))
|
||||||
clear_buffer_eopnotsupp(bhs[i]);
|
|
||||||
err = -EOPNOTSUPP;
|
|
||||||
} else if (!err && !buffer_uptodate(bhs[i]))
|
|
||||||
err = -EIO;
|
err = -EIO;
|
||||||
}
|
}
|
||||||
return err;
|
return err;
|
||||||
|
|
|
@ -592,22 +592,13 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
|
||||||
lh->lh_hash = cpu_to_be32(hash);
|
lh->lh_hash = cpu_to_be32(hash);
|
||||||
|
|
||||||
bh->b_end_io = end_buffer_write_sync;
|
bh->b_end_io = end_buffer_write_sync;
|
||||||
if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags))
|
|
||||||
goto skip_barrier;
|
|
||||||
get_bh(bh);
|
get_bh(bh);
|
||||||
submit_bh(WRITE_BARRIER | REQ_META, bh);
|
if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags))
|
||||||
wait_on_buffer(bh);
|
|
||||||
if (buffer_eopnotsupp(bh)) {
|
|
||||||
clear_buffer_eopnotsupp(bh);
|
|
||||||
set_buffer_uptodate(bh);
|
|
||||||
fs_info(sdp, "barrier sync failed - disabling barriers\n");
|
|
||||||
set_bit(SDF_NOBARRIERS, &sdp->sd_flags);
|
|
||||||
lock_buffer(bh);
|
|
||||||
skip_barrier:
|
|
||||||
get_bh(bh);
|
|
||||||
submit_bh(WRITE_SYNC | REQ_META, bh);
|
submit_bh(WRITE_SYNC | REQ_META, bh);
|
||||||
wait_on_buffer(bh);
|
else
|
||||||
}
|
submit_bh(WRITE_FLUSH_FUA | REQ_META, bh);
|
||||||
|
wait_on_buffer(bh);
|
||||||
|
|
||||||
if (!buffer_uptodate(bh))
|
if (!buffer_uptodate(bh))
|
||||||
gfs2_io_error_bh(sdp, bh);
|
gfs2_io_error_bh(sdp, bh);
|
||||||
brelse(bh);
|
brelse(bh);
|
||||||
|
|
|
@ -866,8 +866,7 @@ static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
|
||||||
if ((start + nr_sects) != blk) {
|
if ((start + nr_sects) != blk) {
|
||||||
rv = blkdev_issue_discard(bdev, start,
|
rv = blkdev_issue_discard(bdev, start,
|
||||||
nr_sects, GFP_NOFS,
|
nr_sects, GFP_NOFS,
|
||||||
BLKDEV_IFL_WAIT |
|
0);
|
||||||
BLKDEV_IFL_BARRIER);
|
|
||||||
if (rv)
|
if (rv)
|
||||||
goto fail;
|
goto fail;
|
||||||
nr_sects = 0;
|
nr_sects = 0;
|
||||||
|
@ -881,8 +880,7 @@ start_new_extent:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (nr_sects) {
|
if (nr_sects) {
|
||||||
rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS,
|
rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, 0);
|
||||||
BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
|
|
||||||
if (rv)
|
if (rv)
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
|
|
|
@ -137,34 +137,10 @@ static int journal_write_commit_record(journal_t *journal,
|
||||||
JBUFFER_TRACE(descriptor, "write commit block");
|
JBUFFER_TRACE(descriptor, "write commit block");
|
||||||
set_buffer_dirty(bh);
|
set_buffer_dirty(bh);
|
||||||
|
|
||||||
if (journal->j_flags & JFS_BARRIER) {
|
if (journal->j_flags & JFS_BARRIER)
|
||||||
ret = __sync_dirty_buffer(bh, WRITE_SYNC | WRITE_BARRIER);
|
ret = __sync_dirty_buffer(bh, WRITE_SYNC | WRITE_FLUSH_FUA);
|
||||||
|
else
|
||||||
/*
|
|
||||||
* Is it possible for another commit to fail at roughly
|
|
||||||
* the same time as this one? If so, we don't want to
|
|
||||||
* trust the barrier flag in the super, but instead want
|
|
||||||
* to remember if we sent a barrier request
|
|
||||||
*/
|
|
||||||
if (ret == -EOPNOTSUPP) {
|
|
||||||
char b[BDEVNAME_SIZE];
|
|
||||||
|
|
||||||
printk(KERN_WARNING
|
|
||||||
"JBD: barrier-based sync failed on %s - "
|
|
||||||
"disabling barriers\n",
|
|
||||||
bdevname(journal->j_dev, b));
|
|
||||||
spin_lock(&journal->j_state_lock);
|
|
||||||
journal->j_flags &= ~JFS_BARRIER;
|
|
||||||
spin_unlock(&journal->j_state_lock);
|
|
||||||
|
|
||||||
/* And try again, without the barrier */
|
|
||||||
set_buffer_uptodate(bh);
|
|
||||||
set_buffer_dirty(bh);
|
|
||||||
ret = sync_dirty_buffer(bh);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
ret = sync_dirty_buffer(bh);
|
ret = sync_dirty_buffer(bh);
|
||||||
}
|
|
||||||
|
|
||||||
put_bh(bh); /* One for getblk() */
|
put_bh(bh); /* One for getblk() */
|
||||||
journal_put_journal_head(descriptor);
|
journal_put_journal_head(descriptor);
|
||||||
|
|
|
@ -532,8 +532,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
|
||||||
*/
|
*/
|
||||||
if ((journal->j_fs_dev != journal->j_dev) &&
|
if ((journal->j_fs_dev != journal->j_dev) &&
|
||||||
(journal->j_flags & JBD2_BARRIER))
|
(journal->j_flags & JBD2_BARRIER))
|
||||||
blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL,
|
blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
|
||||||
BLKDEV_IFL_WAIT);
|
|
||||||
if (!(journal->j_flags & JBD2_ABORT))
|
if (!(journal->j_flags & JBD2_ABORT))
|
||||||
jbd2_journal_update_superblock(journal, 1);
|
jbd2_journal_update_superblock(journal, 1);
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -134,25 +134,11 @@ static int journal_submit_commit_record(journal_t *journal,
|
||||||
|
|
||||||
if (journal->j_flags & JBD2_BARRIER &&
|
if (journal->j_flags & JBD2_BARRIER &&
|
||||||
!JBD2_HAS_INCOMPAT_FEATURE(journal,
|
!JBD2_HAS_INCOMPAT_FEATURE(journal,
|
||||||
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
|
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT))
|
||||||
ret = submit_bh(WRITE_SYNC_PLUG | WRITE_BARRIER, bh);
|
ret = submit_bh(WRITE_SYNC_PLUG | WRITE_FLUSH_FUA, bh);
|
||||||
if (ret == -EOPNOTSUPP) {
|
else
|
||||||
printk(KERN_WARNING
|
|
||||||
"JBD2: Disabling barriers on %s, "
|
|
||||||
"not supported by device\n", journal->j_devname);
|
|
||||||
write_lock(&journal->j_state_lock);
|
|
||||||
journal->j_flags &= ~JBD2_BARRIER;
|
|
||||||
write_unlock(&journal->j_state_lock);
|
|
||||||
|
|
||||||
/* And try again, without the barrier */
|
|
||||||
lock_buffer(bh);
|
|
||||||
set_buffer_uptodate(bh);
|
|
||||||
clear_buffer_dirty(bh);
|
|
||||||
ret = submit_bh(WRITE_SYNC_PLUG, bh);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
ret = submit_bh(WRITE_SYNC_PLUG, bh);
|
ret = submit_bh(WRITE_SYNC_PLUG, bh);
|
||||||
}
|
|
||||||
*cbh = bh;
|
*cbh = bh;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -166,29 +152,8 @@ static int journal_wait_on_commit_record(journal_t *journal,
|
||||||
{
|
{
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
retry:
|
|
||||||
clear_buffer_dirty(bh);
|
clear_buffer_dirty(bh);
|
||||||
wait_on_buffer(bh);
|
wait_on_buffer(bh);
|
||||||
if (buffer_eopnotsupp(bh) && (journal->j_flags & JBD2_BARRIER)) {
|
|
||||||
printk(KERN_WARNING
|
|
||||||
"JBD2: %s: disabling barries on %s - not supported "
|
|
||||||
"by device\n", __func__, journal->j_devname);
|
|
||||||
write_lock(&journal->j_state_lock);
|
|
||||||
journal->j_flags &= ~JBD2_BARRIER;
|
|
||||||
write_unlock(&journal->j_state_lock);
|
|
||||||
|
|
||||||
lock_buffer(bh);
|
|
||||||
clear_buffer_dirty(bh);
|
|
||||||
set_buffer_uptodate(bh);
|
|
||||||
bh->b_end_io = journal_end_buffer_io_sync;
|
|
||||||
|
|
||||||
ret = submit_bh(WRITE_SYNC_PLUG, bh);
|
|
||||||
if (ret) {
|
|
||||||
unlock_buffer(bh);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
goto retry;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (unlikely(!buffer_uptodate(bh)))
|
if (unlikely(!buffer_uptodate(bh)))
|
||||||
ret = -EIO;
|
ret = -EIO;
|
||||||
|
@ -701,29 +666,6 @@ start_journal_io:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* If the journal is not located on the file system device,
|
|
||||||
* then we must flush the file system device before we issue
|
|
||||||
* the commit record
|
|
||||||
*/
|
|
||||||
if (commit_transaction->t_flushed_data_blocks &&
|
|
||||||
(journal->j_fs_dev != journal->j_dev) &&
|
|
||||||
(journal->j_flags & JBD2_BARRIER))
|
|
||||||
blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL,
|
|
||||||
BLKDEV_IFL_WAIT);
|
|
||||||
|
|
||||||
/* Done it all: now write the commit record asynchronously. */
|
|
||||||
if (JBD2_HAS_INCOMPAT_FEATURE(journal,
|
|
||||||
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
|
|
||||||
err = journal_submit_commit_record(journal, commit_transaction,
|
|
||||||
&cbh, crc32_sum);
|
|
||||||
if (err)
|
|
||||||
__jbd2_journal_abort_hard(journal);
|
|
||||||
if (journal->j_flags & JBD2_BARRIER)
|
|
||||||
blkdev_issue_flush(journal->j_dev, GFP_KERNEL, NULL,
|
|
||||||
BLKDEV_IFL_WAIT);
|
|
||||||
}
|
|
||||||
|
|
||||||
err = journal_finish_inode_data_buffers(journal, commit_transaction);
|
err = journal_finish_inode_data_buffers(journal, commit_transaction);
|
||||||
if (err) {
|
if (err) {
|
||||||
printk(KERN_WARNING
|
printk(KERN_WARNING
|
||||||
|
@ -734,6 +676,25 @@ start_journal_io:
|
||||||
err = 0;
|
err = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the journal is not located on the file system device,
|
||||||
|
* then we must flush the file system device before we issue
|
||||||
|
* the commit record
|
||||||
|
*/
|
||||||
|
if (commit_transaction->t_flushed_data_blocks &&
|
||||||
|
(journal->j_fs_dev != journal->j_dev) &&
|
||||||
|
(journal->j_flags & JBD2_BARRIER))
|
||||||
|
blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
|
||||||
|
|
||||||
|
/* Done it all: now write the commit record asynchronously. */
|
||||||
|
if (JBD2_HAS_INCOMPAT_FEATURE(journal,
|
||||||
|
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
|
||||||
|
err = journal_submit_commit_record(journal, commit_transaction,
|
||||||
|
&cbh, crc32_sum);
|
||||||
|
if (err)
|
||||||
|
__jbd2_journal_abort_hard(journal);
|
||||||
|
}
|
||||||
|
|
||||||
/* Lo and behold: we have just managed to send a transaction to
|
/* Lo and behold: we have just managed to send a transaction to
|
||||||
the log. Before we can commit it, wait for the IO so far to
|
the log. Before we can commit it, wait for the IO so far to
|
||||||
complete. Control buffers being written are on the
|
complete. Control buffers being written are on the
|
||||||
|
@ -845,6 +806,11 @@ wait_for_iobuf:
|
||||||
}
|
}
|
||||||
if (!err && !is_journal_aborted(journal))
|
if (!err && !is_journal_aborted(journal))
|
||||||
err = journal_wait_on_commit_record(journal, cbh);
|
err = journal_wait_on_commit_record(journal, cbh);
|
||||||
|
if (JBD2_HAS_INCOMPAT_FEATURE(journal,
|
||||||
|
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) &&
|
||||||
|
journal->j_flags & JBD2_BARRIER) {
|
||||||
|
blkdev_issue_flush(journal->j_dev, GFP_KERNEL, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
if (err)
|
if (err)
|
||||||
jbd2_journal_abort(journal, err);
|
jbd2_journal_abort(journal, err);
|
||||||
|
|
|
@ -177,17 +177,9 @@ static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag)
|
||||||
|
|
||||||
retry:
|
retry:
|
||||||
set_buffer_dirty(nilfs->ns_sbh[0]);
|
set_buffer_dirty(nilfs->ns_sbh[0]);
|
||||||
|
|
||||||
if (nilfs_test_opt(sbi, BARRIER)) {
|
if (nilfs_test_opt(sbi, BARRIER)) {
|
||||||
err = __sync_dirty_buffer(nilfs->ns_sbh[0],
|
err = __sync_dirty_buffer(nilfs->ns_sbh[0],
|
||||||
WRITE_SYNC | WRITE_BARRIER);
|
WRITE_SYNC | WRITE_FLUSH_FUA);
|
||||||
if (err == -EOPNOTSUPP) {
|
|
||||||
nilfs_warning(sbi->s_super, __func__,
|
|
||||||
"barrier-based sync failed. "
|
|
||||||
"disabling barriers\n");
|
|
||||||
nilfs_clear_opt(sbi, BARRIER);
|
|
||||||
goto retry;
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
err = sync_dirty_buffer(nilfs->ns_sbh[0]);
|
err = sync_dirty_buffer(nilfs->ns_sbh[0]);
|
||||||
}
|
}
|
||||||
|
|
|
@ -775,9 +775,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump,
|
||||||
ret = blkdev_issue_discard(nilfs->ns_bdev,
|
ret = blkdev_issue_discard(nilfs->ns_bdev,
|
||||||
start * sects_per_block,
|
start * sects_per_block,
|
||||||
nblocks * sects_per_block,
|
nblocks * sects_per_block,
|
||||||
GFP_NOFS,
|
GFP_NOFS, 0);
|
||||||
BLKDEV_IFL_WAIT |
|
|
||||||
BLKDEV_IFL_BARRIER);
|
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
return ret;
|
return ret;
|
||||||
nblocks = 0;
|
nblocks = 0;
|
||||||
|
@ -787,8 +785,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump,
|
||||||
ret = blkdev_issue_discard(nilfs->ns_bdev,
|
ret = blkdev_issue_discard(nilfs->ns_bdev,
|
||||||
start * sects_per_block,
|
start * sects_per_block,
|
||||||
nblocks * sects_per_block,
|
nblocks * sects_per_block,
|
||||||
GFP_NOFS,
|
GFP_NOFS, 0);
|
||||||
BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -152,8 +152,7 @@ static int reiserfs_sync_file(struct file *filp, int datasync)
|
||||||
barrier_done = reiserfs_commit_for_inode(inode);
|
barrier_done = reiserfs_commit_for_inode(inode);
|
||||||
reiserfs_write_unlock(inode->i_sb);
|
reiserfs_write_unlock(inode->i_sb);
|
||||||
if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb))
|
if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb))
|
||||||
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL,
|
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
|
||||||
BLKDEV_IFL_WAIT);
|
|
||||||
if (barrier_done < 0)
|
if (barrier_done < 0)
|
||||||
return barrier_done;
|
return barrier_done;
|
||||||
return (err < 0) ? -EIO : 0;
|
return (err < 0) ? -EIO : 0;
|
||||||
|
|
|
@ -138,13 +138,6 @@ static int reiserfs_clean_and_file_buffer(struct buffer_head *bh)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void disable_barrier(struct super_block *s)
|
|
||||||
{
|
|
||||||
REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_BARRIER_FLUSH);
|
|
||||||
printk("reiserfs: disabling flush barriers on %s\n",
|
|
||||||
reiserfs_bdevname(s));
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block
|
static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block
|
||||||
*sb)
|
*sb)
|
||||||
{
|
{
|
||||||
|
@ -677,30 +670,6 @@ static void submit_ordered_buffer(struct buffer_head *bh)
|
||||||
submit_bh(WRITE, bh);
|
submit_bh(WRITE, bh);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int submit_barrier_buffer(struct buffer_head *bh)
|
|
||||||
{
|
|
||||||
get_bh(bh);
|
|
||||||
bh->b_end_io = reiserfs_end_ordered_io;
|
|
||||||
clear_buffer_dirty(bh);
|
|
||||||
if (!buffer_uptodate(bh))
|
|
||||||
BUG();
|
|
||||||
return submit_bh(WRITE_BARRIER, bh);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void check_barrier_completion(struct super_block *s,
|
|
||||||
struct buffer_head *bh)
|
|
||||||
{
|
|
||||||
if (buffer_eopnotsupp(bh)) {
|
|
||||||
clear_buffer_eopnotsupp(bh);
|
|
||||||
disable_barrier(s);
|
|
||||||
set_buffer_uptodate(bh);
|
|
||||||
set_buffer_dirty(bh);
|
|
||||||
reiserfs_write_unlock(s);
|
|
||||||
sync_dirty_buffer(bh);
|
|
||||||
reiserfs_write_lock(s);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#define CHUNK_SIZE 32
|
#define CHUNK_SIZE 32
|
||||||
struct buffer_chunk {
|
struct buffer_chunk {
|
||||||
struct buffer_head *bh[CHUNK_SIZE];
|
struct buffer_head *bh[CHUNK_SIZE];
|
||||||
|
@ -1009,7 +978,6 @@ static int flush_commit_list(struct super_block *s,
|
||||||
struct buffer_head *tbh = NULL;
|
struct buffer_head *tbh = NULL;
|
||||||
unsigned int trans_id = jl->j_trans_id;
|
unsigned int trans_id = jl->j_trans_id;
|
||||||
struct reiserfs_journal *journal = SB_JOURNAL(s);
|
struct reiserfs_journal *journal = SB_JOURNAL(s);
|
||||||
int barrier = 0;
|
|
||||||
int retval = 0;
|
int retval = 0;
|
||||||
int write_len;
|
int write_len;
|
||||||
|
|
||||||
|
@ -1094,24 +1062,6 @@ static int flush_commit_list(struct super_block *s,
|
||||||
}
|
}
|
||||||
atomic_dec(&journal->j_async_throttle);
|
atomic_dec(&journal->j_async_throttle);
|
||||||
|
|
||||||
/* We're skipping the commit if there's an error */
|
|
||||||
if (retval || reiserfs_is_journal_aborted(journal))
|
|
||||||
barrier = 0;
|
|
||||||
|
|
||||||
/* wait on everything written so far before writing the commit
|
|
||||||
* if we are in barrier mode, send the commit down now
|
|
||||||
*/
|
|
||||||
barrier = reiserfs_barrier_flush(s);
|
|
||||||
if (barrier) {
|
|
||||||
int ret;
|
|
||||||
lock_buffer(jl->j_commit_bh);
|
|
||||||
ret = submit_barrier_buffer(jl->j_commit_bh);
|
|
||||||
if (ret == -EOPNOTSUPP) {
|
|
||||||
set_buffer_uptodate(jl->j_commit_bh);
|
|
||||||
disable_barrier(s);
|
|
||||||
barrier = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (i = 0; i < (jl->j_len + 1); i++) {
|
for (i = 0; i < (jl->j_len + 1); i++) {
|
||||||
bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) +
|
bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) +
|
||||||
(jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s);
|
(jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s);
|
||||||
|
@ -1143,27 +1093,22 @@ static int flush_commit_list(struct super_block *s,
|
||||||
|
|
||||||
BUG_ON(atomic_read(&(jl->j_commit_left)) != 1);
|
BUG_ON(atomic_read(&(jl->j_commit_left)) != 1);
|
||||||
|
|
||||||
if (!barrier) {
|
/* If there was a write error in the journal - we can't commit
|
||||||
/* If there was a write error in the journal - we can't commit
|
* this transaction - it will be invalid and, if successful,
|
||||||
* this transaction - it will be invalid and, if successful,
|
* will just end up propagating the write error out to
|
||||||
* will just end up propagating the write error out to
|
* the file system. */
|
||||||
* the file system. */
|
if (likely(!retval && !reiserfs_is_journal_aborted (journal))) {
|
||||||
if (likely(!retval && !reiserfs_is_journal_aborted (journal))) {
|
if (buffer_dirty(jl->j_commit_bh))
|
||||||
if (buffer_dirty(jl->j_commit_bh))
|
BUG();
|
||||||
BUG();
|
mark_buffer_dirty(jl->j_commit_bh) ;
|
||||||
mark_buffer_dirty(jl->j_commit_bh) ;
|
|
||||||
reiserfs_write_unlock(s);
|
|
||||||
sync_dirty_buffer(jl->j_commit_bh) ;
|
|
||||||
reiserfs_write_lock(s);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
reiserfs_write_unlock(s);
|
reiserfs_write_unlock(s);
|
||||||
wait_on_buffer(jl->j_commit_bh);
|
if (reiserfs_barrier_flush(s))
|
||||||
|
__sync_dirty_buffer(jl->j_commit_bh, WRITE_FLUSH_FUA);
|
||||||
|
else
|
||||||
|
sync_dirty_buffer(jl->j_commit_bh);
|
||||||
reiserfs_write_lock(s);
|
reiserfs_write_lock(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
check_barrier_completion(s, jl->j_commit_bh);
|
|
||||||
|
|
||||||
/* If there was a write error in the journal - we can't commit this
|
/* If there was a write error in the journal - we can't commit this
|
||||||
* transaction - it will be invalid and, if successful, will just end
|
* transaction - it will be invalid and, if successful, will just end
|
||||||
* up propagating the write error out to the filesystem. */
|
* up propagating the write error out to the filesystem. */
|
||||||
|
@ -1319,26 +1264,15 @@ static int _update_journal_header_block(struct super_block *sb,
|
||||||
jh->j_first_unflushed_offset = cpu_to_le32(offset);
|
jh->j_first_unflushed_offset = cpu_to_le32(offset);
|
||||||
jh->j_mount_id = cpu_to_le32(journal->j_mount_id);
|
jh->j_mount_id = cpu_to_le32(journal->j_mount_id);
|
||||||
|
|
||||||
if (reiserfs_barrier_flush(sb)) {
|
set_buffer_dirty(journal->j_header_bh);
|
||||||
int ret;
|
reiserfs_write_unlock(sb);
|
||||||
lock_buffer(journal->j_header_bh);
|
|
||||||
ret = submit_barrier_buffer(journal->j_header_bh);
|
if (reiserfs_barrier_flush(sb))
|
||||||
if (ret == -EOPNOTSUPP) {
|
__sync_dirty_buffer(journal->j_header_bh, WRITE_FLUSH_FUA);
|
||||||
set_buffer_uptodate(journal->j_header_bh);
|
else
|
||||||
disable_barrier(sb);
|
|
||||||
goto sync;
|
|
||||||
}
|
|
||||||
reiserfs_write_unlock(sb);
|
|
||||||
wait_on_buffer(journal->j_header_bh);
|
|
||||||
reiserfs_write_lock(sb);
|
|
||||||
check_barrier_completion(sb, journal->j_header_bh);
|
|
||||||
} else {
|
|
||||||
sync:
|
|
||||||
set_buffer_dirty(journal->j_header_bh);
|
|
||||||
reiserfs_write_unlock(sb);
|
|
||||||
sync_dirty_buffer(journal->j_header_bh);
|
sync_dirty_buffer(journal->j_header_bh);
|
||||||
reiserfs_write_lock(sb);
|
|
||||||
}
|
reiserfs_write_lock(sb);
|
||||||
if (!buffer_uptodate(journal->j_header_bh)) {
|
if (!buffer_uptodate(journal->j_header_bh)) {
|
||||||
reiserfs_warning(sb, "journal-837",
|
reiserfs_warning(sb, "journal-837",
|
||||||
"IO error during journal replay");
|
"IO error during journal replay");
|
||||||
|
|
|
@ -924,19 +924,7 @@ xfs_buf_iodone_work(
|
||||||
xfs_buf_t *bp =
|
xfs_buf_t *bp =
|
||||||
container_of(work, xfs_buf_t, b_iodone_work);
|
container_of(work, xfs_buf_t, b_iodone_work);
|
||||||
|
|
||||||
/*
|
if (bp->b_iodone)
|
||||||
* We can get an EOPNOTSUPP to ordered writes. Here we clear the
|
|
||||||
* ordered flag and reissue them. Because we can't tell the higher
|
|
||||||
* layers directly that they should not issue ordered I/O anymore, they
|
|
||||||
* need to check if the _XFS_BARRIER_FAILED flag was set during I/O completion.
|
|
||||||
*/
|
|
||||||
if ((bp->b_error == EOPNOTSUPP) &&
|
|
||||||
(bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) {
|
|
||||||
trace_xfs_buf_ordered_retry(bp, _RET_IP_);
|
|
||||||
bp->b_flags &= ~XBF_ORDERED;
|
|
||||||
bp->b_flags |= _XFS_BARRIER_FAILED;
|
|
||||||
xfs_buf_iorequest(bp);
|
|
||||||
} else if (bp->b_iodone)
|
|
||||||
(*(bp->b_iodone))(bp);
|
(*(bp->b_iodone))(bp);
|
||||||
else if (bp->b_flags & XBF_ASYNC)
|
else if (bp->b_flags & XBF_ASYNC)
|
||||||
xfs_buf_relse(bp);
|
xfs_buf_relse(bp);
|
||||||
|
@ -1195,7 +1183,7 @@ _xfs_buf_ioapply(
|
||||||
|
|
||||||
if (bp->b_flags & XBF_ORDERED) {
|
if (bp->b_flags & XBF_ORDERED) {
|
||||||
ASSERT(!(bp->b_flags & XBF_READ));
|
ASSERT(!(bp->b_flags & XBF_READ));
|
||||||
rw = WRITE_BARRIER;
|
rw = WRITE_FLUSH_FUA;
|
||||||
} else if (bp->b_flags & XBF_LOG_BUFFER) {
|
} else if (bp->b_flags & XBF_LOG_BUFFER) {
|
||||||
ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
|
ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
|
||||||
bp->b_flags &= ~_XBF_RUN_QUEUES;
|
bp->b_flags &= ~_XBF_RUN_QUEUES;
|
||||||
|
|
|
@ -86,14 +86,6 @@ typedef enum {
|
||||||
*/
|
*/
|
||||||
#define _XBF_PAGE_LOCKED (1 << 22)
|
#define _XBF_PAGE_LOCKED (1 << 22)
|
||||||
|
|
||||||
/*
|
|
||||||
* If we try a barrier write, but it fails we have to communicate
|
|
||||||
* this to the upper layers. Unfortunately b_error gets overwritten
|
|
||||||
* when the buffer is re-issued so we have to add another flag to
|
|
||||||
* keep this information.
|
|
||||||
*/
|
|
||||||
#define _XFS_BARRIER_FAILED (1 << 23)
|
|
||||||
|
|
||||||
typedef unsigned int xfs_buf_flags_t;
|
typedef unsigned int xfs_buf_flags_t;
|
||||||
|
|
||||||
#define XFS_BUF_FLAGS \
|
#define XFS_BUF_FLAGS \
|
||||||
|
@ -114,8 +106,7 @@ typedef unsigned int xfs_buf_flags_t;
|
||||||
{ _XBF_PAGES, "PAGES" }, \
|
{ _XBF_PAGES, "PAGES" }, \
|
||||||
{ _XBF_RUN_QUEUES, "RUN_QUEUES" }, \
|
{ _XBF_RUN_QUEUES, "RUN_QUEUES" }, \
|
||||||
{ _XBF_DELWRI_Q, "DELWRI_Q" }, \
|
{ _XBF_DELWRI_Q, "DELWRI_Q" }, \
|
||||||
{ _XBF_PAGE_LOCKED, "PAGE_LOCKED" }, \
|
{ _XBF_PAGE_LOCKED, "PAGE_LOCKED" }
|
||||||
{ _XFS_BARRIER_FAILED, "BARRIER_FAILED" }
|
|
||||||
|
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
|
|
|
@ -693,8 +693,7 @@ void
|
||||||
xfs_blkdev_issue_flush(
|
xfs_blkdev_issue_flush(
|
||||||
xfs_buftarg_t *buftarg)
|
xfs_buftarg_t *buftarg)
|
||||||
{
|
{
|
||||||
blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL,
|
blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL);
|
||||||
BLKDEV_IFL_WAIT);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
STATIC void
|
STATIC void
|
||||||
|
|
|
@ -325,7 +325,6 @@ DEFINE_BUF_EVENT(xfs_buf_lock);
|
||||||
DEFINE_BUF_EVENT(xfs_buf_lock_done);
|
DEFINE_BUF_EVENT(xfs_buf_lock_done);
|
||||||
DEFINE_BUF_EVENT(xfs_buf_cond_lock);
|
DEFINE_BUF_EVENT(xfs_buf_cond_lock);
|
||||||
DEFINE_BUF_EVENT(xfs_buf_unlock);
|
DEFINE_BUF_EVENT(xfs_buf_unlock);
|
||||||
DEFINE_BUF_EVENT(xfs_buf_ordered_retry);
|
|
||||||
DEFINE_BUF_EVENT(xfs_buf_iowait);
|
DEFINE_BUF_EVENT(xfs_buf_iowait);
|
||||||
DEFINE_BUF_EVENT(xfs_buf_iowait_done);
|
DEFINE_BUF_EVENT(xfs_buf_iowait_done);
|
||||||
DEFINE_BUF_EVENT(xfs_buf_delwri_queue);
|
DEFINE_BUF_EVENT(xfs_buf_delwri_queue);
|
||||||
|
|
|
@ -916,19 +916,6 @@ xlog_iodone(xfs_buf_t *bp)
|
||||||
aborted = 0;
|
aborted = 0;
|
||||||
l = iclog->ic_log;
|
l = iclog->ic_log;
|
||||||
|
|
||||||
/*
|
|
||||||
* If the _XFS_BARRIER_FAILED flag was set by a lower
|
|
||||||
* layer, it means the underlying device no longer supports
|
|
||||||
* barrier I/O. Warn loudly and turn off barriers.
|
|
||||||
*/
|
|
||||||
if (bp->b_flags & _XFS_BARRIER_FAILED) {
|
|
||||||
bp->b_flags &= ~_XFS_BARRIER_FAILED;
|
|
||||||
l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER;
|
|
||||||
xfs_fs_cmn_err(CE_WARN, l->l_mp,
|
|
||||||
"xlog_iodone: Barriers are no longer supported"
|
|
||||||
" by device. Disabling barriers\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Race to shutdown the filesystem if we see an error.
|
* Race to shutdown the filesystem if we see an error.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -146,7 +146,6 @@ enum rq_flag_bits {
|
||||||
__REQ_FAILED, /* set if the request failed */
|
__REQ_FAILED, /* set if the request failed */
|
||||||
__REQ_QUIET, /* don't worry about errors */
|
__REQ_QUIET, /* don't worry about errors */
|
||||||
__REQ_PREEMPT, /* set for "ide_preempt" requests */
|
__REQ_PREEMPT, /* set for "ide_preempt" requests */
|
||||||
__REQ_ORDERED_COLOR, /* is before or after barrier */
|
|
||||||
__REQ_ALLOCED, /* request came from our alloc pool */
|
__REQ_ALLOCED, /* request came from our alloc pool */
|
||||||
__REQ_COPY_USER, /* contains copies of user pages */
|
__REQ_COPY_USER, /* contains copies of user pages */
|
||||||
__REQ_FLUSH, /* request for cache flush */
|
__REQ_FLUSH, /* request for cache flush */
|
||||||
|
@ -170,7 +169,8 @@ enum rq_flag_bits {
|
||||||
(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
|
(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
|
||||||
#define REQ_COMMON_MASK \
|
#define REQ_COMMON_MASK \
|
||||||
(REQ_WRITE | REQ_FAILFAST_MASK | REQ_HARDBARRIER | REQ_SYNC | \
|
(REQ_WRITE | REQ_FAILFAST_MASK | REQ_HARDBARRIER | REQ_SYNC | \
|
||||||
REQ_META| REQ_DISCARD | REQ_NOIDLE)
|
REQ_META | REQ_DISCARD | REQ_NOIDLE | REQ_FLUSH | REQ_FUA)
|
||||||
|
#define REQ_CLONE_MASK REQ_COMMON_MASK
|
||||||
|
|
||||||
#define REQ_UNPLUG (1 << __REQ_UNPLUG)
|
#define REQ_UNPLUG (1 << __REQ_UNPLUG)
|
||||||
#define REQ_RAHEAD (1 << __REQ_RAHEAD)
|
#define REQ_RAHEAD (1 << __REQ_RAHEAD)
|
||||||
|
@ -187,7 +187,6 @@ enum rq_flag_bits {
|
||||||
#define REQ_FAILED (1 << __REQ_FAILED)
|
#define REQ_FAILED (1 << __REQ_FAILED)
|
||||||
#define REQ_QUIET (1 << __REQ_QUIET)
|
#define REQ_QUIET (1 << __REQ_QUIET)
|
||||||
#define REQ_PREEMPT (1 << __REQ_PREEMPT)
|
#define REQ_PREEMPT (1 << __REQ_PREEMPT)
|
||||||
#define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR)
|
|
||||||
#define REQ_ALLOCED (1 << __REQ_ALLOCED)
|
#define REQ_ALLOCED (1 << __REQ_ALLOCED)
|
||||||
#define REQ_COPY_USER (1 << __REQ_COPY_USER)
|
#define REQ_COPY_USER (1 << __REQ_COPY_USER)
|
||||||
#define REQ_FLUSH (1 << __REQ_FLUSH)
|
#define REQ_FLUSH (1 << __REQ_FLUSH)
|
||||||
|
|
|
@ -360,12 +360,14 @@ struct request_queue
|
||||||
struct blk_trace *blk_trace;
|
struct blk_trace *blk_trace;
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* reserved for flush operations
|
* for flush operations
|
||||||
*/
|
*/
|
||||||
unsigned int ordered, next_ordered, ordseq;
|
unsigned int flush_flags;
|
||||||
int orderr, ordcolor;
|
unsigned int flush_seq;
|
||||||
struct request pre_flush_rq, bar_rq, post_flush_rq;
|
int flush_err;
|
||||||
struct request *orig_bar_rq;
|
struct request flush_rq;
|
||||||
|
struct request *orig_flush_rq;
|
||||||
|
struct list_head pending_flushes;
|
||||||
|
|
||||||
struct mutex sysfs_lock;
|
struct mutex sysfs_lock;
|
||||||
|
|
||||||
|
@ -472,56 +474,6 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
|
||||||
__clear_bit(flag, &q->queue_flags);
|
__clear_bit(flag, &q->queue_flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
enum {
|
|
||||||
/*
|
|
||||||
* Hardbarrier is supported with one of the following methods.
|
|
||||||
*
|
|
||||||
* NONE : hardbarrier unsupported
|
|
||||||
* DRAIN : ordering by draining is enough
|
|
||||||
* DRAIN_FLUSH : ordering by draining w/ pre and post flushes
|
|
||||||
* DRAIN_FUA : ordering by draining w/ pre flush and FUA write
|
|
||||||
* TAG : ordering by tag is enough
|
|
||||||
* TAG_FLUSH : ordering by tag w/ pre and post flushes
|
|
||||||
* TAG_FUA : ordering by tag w/ pre flush and FUA write
|
|
||||||
*/
|
|
||||||
QUEUE_ORDERED_BY_DRAIN = 0x01,
|
|
||||||
QUEUE_ORDERED_BY_TAG = 0x02,
|
|
||||||
QUEUE_ORDERED_DO_PREFLUSH = 0x10,
|
|
||||||
QUEUE_ORDERED_DO_BAR = 0x20,
|
|
||||||
QUEUE_ORDERED_DO_POSTFLUSH = 0x40,
|
|
||||||
QUEUE_ORDERED_DO_FUA = 0x80,
|
|
||||||
|
|
||||||
QUEUE_ORDERED_NONE = 0x00,
|
|
||||||
|
|
||||||
QUEUE_ORDERED_DRAIN = QUEUE_ORDERED_BY_DRAIN |
|
|
||||||
QUEUE_ORDERED_DO_BAR,
|
|
||||||
QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN |
|
|
||||||
QUEUE_ORDERED_DO_PREFLUSH |
|
|
||||||
QUEUE_ORDERED_DO_POSTFLUSH,
|
|
||||||
QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN |
|
|
||||||
QUEUE_ORDERED_DO_PREFLUSH |
|
|
||||||
QUEUE_ORDERED_DO_FUA,
|
|
||||||
|
|
||||||
QUEUE_ORDERED_TAG = QUEUE_ORDERED_BY_TAG |
|
|
||||||
QUEUE_ORDERED_DO_BAR,
|
|
||||||
QUEUE_ORDERED_TAG_FLUSH = QUEUE_ORDERED_TAG |
|
|
||||||
QUEUE_ORDERED_DO_PREFLUSH |
|
|
||||||
QUEUE_ORDERED_DO_POSTFLUSH,
|
|
||||||
QUEUE_ORDERED_TAG_FUA = QUEUE_ORDERED_TAG |
|
|
||||||
QUEUE_ORDERED_DO_PREFLUSH |
|
|
||||||
QUEUE_ORDERED_DO_FUA,
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Ordered operation sequence
|
|
||||||
*/
|
|
||||||
QUEUE_ORDSEQ_STARTED = 0x01, /* flushing in progress */
|
|
||||||
QUEUE_ORDSEQ_DRAIN = 0x02, /* waiting for the queue to be drained */
|
|
||||||
QUEUE_ORDSEQ_PREFLUSH = 0x04, /* pre-flushing in progress */
|
|
||||||
QUEUE_ORDSEQ_BAR = 0x08, /* original barrier req in progress */
|
|
||||||
QUEUE_ORDSEQ_POSTFLUSH = 0x10, /* post-flushing in progress */
|
|
||||||
QUEUE_ORDSEQ_DONE = 0x20,
|
|
||||||
};
|
|
||||||
|
|
||||||
#define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
|
#define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
|
||||||
#define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
|
#define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
|
||||||
#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
|
#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
|
||||||
|
@ -531,7 +483,6 @@ enum {
|
||||||
#define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags)
|
#define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags)
|
||||||
#define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags)
|
#define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags)
|
||||||
#define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags)
|
#define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags)
|
||||||
#define blk_queue_flushing(q) ((q)->ordseq)
|
|
||||||
#define blk_queue_stackable(q) \
|
#define blk_queue_stackable(q) \
|
||||||
test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags)
|
test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags)
|
||||||
#define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags)
|
#define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags)
|
||||||
|
@ -602,7 +553,8 @@ static inline void blk_clear_queue_full(struct request_queue *q, int sync)
|
||||||
* it already be started by driver.
|
* it already be started by driver.
|
||||||
*/
|
*/
|
||||||
#define RQ_NOMERGE_FLAGS \
|
#define RQ_NOMERGE_FLAGS \
|
||||||
(REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER)
|
(REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER | \
|
||||||
|
REQ_FLUSH | REQ_FUA)
|
||||||
#define rq_mergeable(rq) \
|
#define rq_mergeable(rq) \
|
||||||
(!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \
|
(!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \
|
||||||
(((rq)->cmd_flags & REQ_DISCARD) || \
|
(((rq)->cmd_flags & REQ_DISCARD) || \
|
||||||
|
@ -891,12 +843,8 @@ extern void blk_queue_update_dma_alignment(struct request_queue *, int);
|
||||||
extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
|
extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
|
||||||
extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
|
extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
|
||||||
extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
|
extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
|
||||||
|
extern void blk_queue_flush(struct request_queue *q, unsigned int flush);
|
||||||
extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
|
extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
|
||||||
extern int blk_queue_ordered(struct request_queue *, unsigned);
|
|
||||||
extern bool blk_do_ordered(struct request_queue *, struct request **);
|
|
||||||
extern unsigned blk_ordered_cur_seq(struct request_queue *);
|
|
||||||
extern unsigned blk_ordered_req_seq(struct request *);
|
|
||||||
extern bool blk_ordered_complete_seq(struct request_queue *, unsigned, int);
|
|
||||||
|
|
||||||
extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
|
extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
|
||||||
extern void blk_dump_rq_flags(struct request *, char *);
|
extern void blk_dump_rq_flags(struct request *, char *);
|
||||||
|
@ -929,27 +877,20 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
|
||||||
return NULL;
|
return NULL;
|
||||||
return bqt->tag_index[tag];
|
return bqt->tag_index[tag];
|
||||||
}
|
}
|
||||||
enum{
|
|
||||||
BLKDEV_WAIT, /* wait for completion */
|
#define BLKDEV_DISCARD_SECURE 0x01 /* secure discard */
|
||||||
BLKDEV_BARRIER, /* issue request with barrier */
|
|
||||||
BLKDEV_SECURE, /* secure discard */
|
extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *);
|
||||||
};
|
|
||||||
#define BLKDEV_IFL_WAIT (1 << BLKDEV_WAIT)
|
|
||||||
#define BLKDEV_IFL_BARRIER (1 << BLKDEV_BARRIER)
|
|
||||||
#define BLKDEV_IFL_SECURE (1 << BLKDEV_SECURE)
|
|
||||||
extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *,
|
|
||||||
unsigned long);
|
|
||||||
extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
|
extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
|
||||||
sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
|
sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
|
||||||
extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
|
extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
|
||||||
sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
|
sector_t nr_sects, gfp_t gfp_mask);
|
||||||
static inline int sb_issue_discard(struct super_block *sb,
|
static inline int sb_issue_discard(struct super_block *sb, sector_t block,
|
||||||
sector_t block, sector_t nr_blocks)
|
sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags)
|
||||||
{
|
{
|
||||||
block <<= (sb->s_blocksize_bits - 9);
|
return blkdev_issue_discard(sb->s_bdev, block << (sb->s_blocksize_bits - 9),
|
||||||
nr_blocks <<= (sb->s_blocksize_bits - 9);
|
nr_blocks << (sb->s_blocksize_bits - 9),
|
||||||
return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_NOFS,
|
gfp_mask, flags);
|
||||||
BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);
|
extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);
|
||||||
|
|
|
@ -32,7 +32,6 @@ enum bh_state_bits {
|
||||||
BH_Delay, /* Buffer is not yet allocated on disk */
|
BH_Delay, /* Buffer is not yet allocated on disk */
|
||||||
BH_Boundary, /* Block is followed by a discontiguity */
|
BH_Boundary, /* Block is followed by a discontiguity */
|
||||||
BH_Write_EIO, /* I/O error on write */
|
BH_Write_EIO, /* I/O error on write */
|
||||||
BH_Eopnotsupp, /* operation not supported (barrier) */
|
|
||||||
BH_Unwritten, /* Buffer is allocated on disk but not written */
|
BH_Unwritten, /* Buffer is allocated on disk but not written */
|
||||||
BH_Quiet, /* Buffer Error Prinks to be quiet */
|
BH_Quiet, /* Buffer Error Prinks to be quiet */
|
||||||
|
|
||||||
|
@ -124,7 +123,6 @@ BUFFER_FNS(Async_Write, async_write)
|
||||||
BUFFER_FNS(Delay, delay)
|
BUFFER_FNS(Delay, delay)
|
||||||
BUFFER_FNS(Boundary, boundary)
|
BUFFER_FNS(Boundary, boundary)
|
||||||
BUFFER_FNS(Write_EIO, write_io_error)
|
BUFFER_FNS(Write_EIO, write_io_error)
|
||||||
BUFFER_FNS(Eopnotsupp, eopnotsupp)
|
|
||||||
BUFFER_FNS(Unwritten, unwritten)
|
BUFFER_FNS(Unwritten, unwritten)
|
||||||
|
|
||||||
#define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK)
|
#define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK)
|
||||||
|
|
|
@ -135,12 +135,12 @@ struct inodes_stat_t {
|
||||||
* immediately after submission. The write equivalent
|
* immediately after submission. The write equivalent
|
||||||
* of READ_SYNC.
|
* of READ_SYNC.
|
||||||
* WRITE_ODIRECT_PLUG Special case write for O_DIRECT only.
|
* WRITE_ODIRECT_PLUG Special case write for O_DIRECT only.
|
||||||
* WRITE_BARRIER Like WRITE_SYNC, but tells the block layer that all
|
* WRITE_FLUSH Like WRITE_SYNC but with preceding cache flush.
|
||||||
* previously submitted writes must be safely on storage
|
* WRITE_FUA Like WRITE_SYNC but data is guaranteed to be on
|
||||||
* before this one is started. Also guarantees that when
|
* non-volatile media on completion.
|
||||||
* this write is complete, it itself is also safely on
|
* WRITE_FLUSH_FUA Combination of WRITE_FLUSH and FUA. The IO is preceded
|
||||||
* storage. Prevents reordering of writes on both sides
|
* by a cache flush and data is guaranteed to be on
|
||||||
* of this IO.
|
* non-volatile media on completion.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
#define RW_MASK REQ_WRITE
|
#define RW_MASK REQ_WRITE
|
||||||
|
@ -156,16 +156,12 @@ struct inodes_stat_t {
|
||||||
#define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG)
|
#define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG)
|
||||||
#define WRITE_ODIRECT_PLUG (WRITE | REQ_SYNC)
|
#define WRITE_ODIRECT_PLUG (WRITE | REQ_SYNC)
|
||||||
#define WRITE_META (WRITE | REQ_META)
|
#define WRITE_META (WRITE | REQ_META)
|
||||||
#define WRITE_BARRIER (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
|
#define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
|
||||||
REQ_HARDBARRIER)
|
REQ_FLUSH)
|
||||||
|
#define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
|
||||||
/*
|
REQ_FUA)
|
||||||
* These aren't really reads or writes, they pass down information about
|
#define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
|
||||||
* parts of device that are now unused by the file system.
|
REQ_FLUSH | REQ_FUA)
|
||||||
*/
|
|
||||||
#define DISCARD_NOBARRIER (WRITE | REQ_DISCARD)
|
|
||||||
#define DISCARD_BARRIER (WRITE | REQ_DISCARD | REQ_HARDBARRIER)
|
|
||||||
#define DISCARD_SECURE (DISCARD_NOBARRIER | REQ_SECURE)
|
|
||||||
|
|
||||||
#define SEL_IN 1
|
#define SEL_IN 1
|
||||||
#define SEL_OUT 2
|
#define SEL_OUT 2
|
||||||
|
|
|
@ -97,13 +97,9 @@ static inline void scsi_deactivate_tcq(struct scsi_device *sdev, int depth)
|
||||||
static inline int scsi_populate_tag_msg(struct scsi_cmnd *cmd, char *msg)
|
static inline int scsi_populate_tag_msg(struct scsi_cmnd *cmd, char *msg)
|
||||||
{
|
{
|
||||||
struct request *req = cmd->request;
|
struct request *req = cmd->request;
|
||||||
struct scsi_device *sdev = cmd->device;
|
|
||||||
|
|
||||||
if (blk_rq_tagged(req)) {
|
if (blk_rq_tagged(req)) {
|
||||||
if (sdev->ordered_tags && req->cmd_flags & REQ_HARDBARRIER)
|
*msg++ = MSG_SIMPLE_TAG;
|
||||||
*msg++ = MSG_ORDERED_TAG;
|
|
||||||
else
|
|
||||||
*msg++ = MSG_SIMPLE_TAG;
|
|
||||||
*msg++ = req->tag;
|
*msg++ = req->tag;
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
|
|
|
@ -139,7 +139,7 @@ static int discard_swap(struct swap_info_struct *si)
|
||||||
nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9);
|
nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9);
|
||||||
if (nr_blocks) {
|
if (nr_blocks) {
|
||||||
err = blkdev_issue_discard(si->bdev, start_block,
|
err = blkdev_issue_discard(si->bdev, start_block,
|
||||||
nr_blocks, GFP_KERNEL, BLKDEV_IFL_WAIT);
|
nr_blocks, GFP_KERNEL, 0);
|
||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
cond_resched();
|
cond_resched();
|
||||||
|
@ -150,7 +150,7 @@ static int discard_swap(struct swap_info_struct *si)
|
||||||
nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9);
|
nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9);
|
||||||
|
|
||||||
err = blkdev_issue_discard(si->bdev, start_block,
|
err = blkdev_issue_discard(si->bdev, start_block,
|
||||||
nr_blocks, GFP_KERNEL, BLKDEV_IFL_WAIT);
|
nr_blocks, GFP_KERNEL, 0);
|
||||||
if (err)
|
if (err)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -189,7 +189,7 @@ static void discard_swap_cluster(struct swap_info_struct *si,
|
||||||
start_block <<= PAGE_SHIFT - 9;
|
start_block <<= PAGE_SHIFT - 9;
|
||||||
nr_blocks <<= PAGE_SHIFT - 9;
|
nr_blocks <<= PAGE_SHIFT - 9;
|
||||||
if (blkdev_issue_discard(si->bdev, start_block,
|
if (blkdev_issue_discard(si->bdev, start_block,
|
||||||
nr_blocks, GFP_NOIO, BLKDEV_IFL_WAIT))
|
nr_blocks, GFP_NOIO, 0))
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue