remarkable-linux/block/blk.h
Tejun Heo ae1b153962 block: reimplement FLUSH/FUA to support merge
The current FLUSH/FUA support has evolved from the implementation
which had to perform queue draining.  As such, sequencing is done
queue-wide one flush request after another.  However, with the
draining requirement gone, there's no reason to keep the queue-wide
sequential approach.

This patch reimplements FLUSH/FUA support such that each FLUSH/FUA
request is sequenced individually.  The actual FLUSH execution is
double buffered and whenever a request wants to execute one for either
PRE or POSTFLUSH, it queues on the pending queue.  Once certain
conditions are met, a flush request is issued and on its completion
all pending requests proceed to the next sequence.

This allows arbitrary merging of different type of flushes.  How they
are merged can be primarily controlled and tuned by adjusting the
above said 'conditions' used to determine when to issue the next
flush.

This is inspired by Darrick's patches to merge multiple zero-data
flushes which helps workloads with highly concurrent fsync requests.

* As flush requests are never put on the IO scheduler, request fields
  used for flush share space with rq->rb_node.  rq->completion_data is
  moved out of the union.  This increases the request size by one
  pointer.

  As rq->elevator_private* are used only by the iosched too, it is
  possible to reduce the request size further.  However, to do that,
  we need to modify request allocation path such that iosched data is
  not allocated for flush requests.

* FLUSH/FUA processing happens on insertion now instead of dispatch.

- Comments updated as per Vivek and Mike.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: "Darrick J. Wong" <djwong@us.ibm.com>
Cc: Shaohua Li <shli@kernel.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
2011-01-25 12:43:54 +01:00

169 lines
4.5 KiB
C

#ifndef BLK_INTERNAL_H
#define BLK_INTERNAL_H
/* Amount of time in which a process may batch requests */
#define BLK_BATCH_TIME (HZ/50UL)
/* Number of requests a "batching" process may submit */
#define BLK_BATCH_REQ 32
extern struct kmem_cache *blk_requestq_cachep;
extern struct kobj_type blk_queue_ktype;
void init_request_from_bio(struct request *req, struct bio *bio);
void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
struct bio *bio);
int blk_rq_append_bio(struct request_queue *q, struct request *rq,
struct bio *bio);
void blk_dequeue_request(struct request *rq);
void __blk_queue_free_tags(struct request_queue *q);
void blk_unplug_work(struct work_struct *work);
void blk_unplug_timeout(unsigned long data);
void blk_rq_timed_out_timer(unsigned long data);
void blk_delete_timer(struct request *);
void blk_add_timer(struct request *);
void __generic_unplug_device(struct request_queue *);
/*
* Internal atomic flags for request handling
*/
enum rq_atomic_flags {
REQ_ATOM_COMPLETE = 0,
};
/*
* EH timer and IO completion will both attempt to 'grab' the request, make
* sure that only one of them suceeds
*/
static inline int blk_mark_rq_complete(struct request *rq)
{
return test_and_set_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags);
}
static inline void blk_clear_rq_complete(struct request *rq)
{
clear_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags);
}
/*
* Internal elevator interface
*/
#define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash))
void blk_insert_flush(struct request *rq);
void blk_abort_flushes(struct request_queue *q);
static inline struct request *__elv_next_request(struct request_queue *q)
{
struct request *rq;
while (1) {
if (!list_empty(&q->queue_head)) {
rq = list_entry_rq(q->queue_head.next);
return rq;
}
if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
return NULL;
}
}
static inline void elv_activate_rq(struct request_queue *q, struct request *rq)
{
struct elevator_queue *e = q->elevator;
if (e->ops->elevator_activate_req_fn)
e->ops->elevator_activate_req_fn(q, rq);
}
static inline void elv_deactivate_rq(struct request_queue *q, struct request *rq)
{
struct elevator_queue *e = q->elevator;
if (e->ops->elevator_deactivate_req_fn)
e->ops->elevator_deactivate_req_fn(q, rq);
}
#ifdef CONFIG_FAIL_IO_TIMEOUT
int blk_should_fake_timeout(struct request_queue *);
ssize_t part_timeout_show(struct device *, struct device_attribute *, char *);
ssize_t part_timeout_store(struct device *, struct device_attribute *,
const char *, size_t);
#else
static inline int blk_should_fake_timeout(struct request_queue *q)
{
return 0;
}
#endif
struct io_context *current_io_context(gfp_t gfp_flags, int node);
int ll_back_merge_fn(struct request_queue *q, struct request *req,
struct bio *bio);
int ll_front_merge_fn(struct request_queue *q, struct request *req,
struct bio *bio);
int attempt_back_merge(struct request_queue *q, struct request *rq);
int attempt_front_merge(struct request_queue *q, struct request *rq);
void blk_recalc_rq_segments(struct request *rq);
void blk_rq_set_mixed_merge(struct request *rq);
void blk_queue_congestion_threshold(struct request_queue *q);
int blk_dev_init(void);
void elv_quiesce_start(struct request_queue *q);
void elv_quiesce_end(struct request_queue *q);
/*
* Return the threshold (number of used requests) at which the queue is
* considered to be congested. It include a little hysteresis to keep the
* context switch rate down.
*/
static inline int queue_congestion_on_threshold(struct request_queue *q)
{
return q->nr_congestion_on;
}
/*
* The threshold at which a queue is considered to be uncongested
*/
static inline int queue_congestion_off_threshold(struct request_queue *q)
{
return q->nr_congestion_off;
}
static inline int blk_cpu_to_group(int cpu)
{
int group = NR_CPUS;
#ifdef CONFIG_SCHED_MC
const struct cpumask *mask = cpu_coregroup_mask(cpu);
group = cpumask_first(mask);
#elif defined(CONFIG_SCHED_SMT)
group = cpumask_first(topology_thread_cpumask(cpu));
#else
return cpu;
#endif
if (likely(group < NR_CPUS))
return group;
return cpu;
}
/*
* Contribute to IO statistics IFF:
*
* a) it's attached to a gendisk, and
* b) the queue had IO stats enabled when this request was started, and
* c) it's a file system request or a discard request
*/
static inline int blk_do_io_stat(struct request *rq)
{
return rq->rq_disk &&
(rq->cmd_flags & REQ_IO_STAT) &&
(rq->cmd_type == REQ_TYPE_FS ||
(rq->cmd_flags & REQ_DISCARD));
}
#endif