1
0
Fork 0

Merge branch 'for-3.16/drivers' of git://git.kernel.dk/linux-block into next

Pull block driver changes from Jens Axboe:
 "Now that the core bits are in, here's the pull request for the driver
  related changes for 3.16.  Nothing out of the ordinary here, mostly
  business as usual.  There are a few pulls of for-3.16/core into this
  branch, which were done when the blk-mq was modified after the
  mtip32xx conversion was put in.

  The pull request contains:

   - skd and cciss converted to use pci_enable_msix_exact().  From
     Alexander Gordeev.

   - A few mtip32xx fixes from Asai @ Micron.

   - The conversion of mtip32xx from make_request_fn to blk-mq, and a
     later small fix for that conversion on quiescing for non-queued IO.
     From me.

   - A fix for bsg to use an exported function to check whether this
     driver is request based or not.  Needed updating for blk-mq, which
     is request based, but does not have a request_fn hook.  From me.

   - Small floppy bug fix from Jiri.

   - A series of cleanups for the cdrom uniform layer from Joe Perches.
     Gets rid of various old ugly macros, making the code conform more
     to the modern coding style.

   - A series of patches for drbd from the drbd crew (Lars Ellenberg and
     Philipp Reisner).

   - A use-after-free fix for null_blk from Ming Lei.

   - Also from Ming Lei is a performance patch for virtio-blk, which can
     net us a 3x win on kvm platforms where world notification is
     expensive.

   - Ming Lei also fixed a stall issue in virtio-blk, due to a race
     between queue start/stop and resource limits.

   - A small batch of fixes for xen-blk{back,front} from Olaf Hering and
     Valentin Priescu"

* 'for-3.16/drivers' of git://git.kernel.dk/linux-block: (54 commits)
  block: virtio_blk: don't hold spin lock during world switch
  xen-blkback: defer freeing blkif to avoid blocking xenwatch
  xen blkif.h: fix comment typo in discard-alignment
  xen/blkback: disable discard feature if requested by toolstack
  xen-blkfront: remove type check from blkfront_setup_discard
  floppy: do not corrupt bio.bi_flags when reading block 0
  mtip32xx: move error handling to service thread
  virtio_blk: fix race between start and stop queue
  mtip32xx: stop block hardware queues before quiescing IO
  mtip32xx: blk_mq_init_queue() returns an ERR_PTR
  mtip32xx: convert to use blk-mq
  cdrom: Remove unnecessary prototype for cdrom_get_disc_info
  cdrom: Remove unnecessary prototype for cdrom_mrw_exit
  cdrom: Remove cdrom_count_tracks prototype
  cdrom: Remove cdrom_get_next_writeable prototype
  cdrom: Remove cdrom_get_last_written prototype
  cdrom: Move mmc_ioctls above cdrom_ioctl to remove unnecessary prototype
  cdrom: Remove unnecessary sanitize_format prototype
  cdrom: Remove unnecessary check_for_audio_disc prototype
  cdrom: Remove prototype for open_for_data
  ...
wifi-calibration
Linus Torvalds 2014-06-02 13:57:01 -07:00
commit 80081ec309
27 changed files with 2177 additions and 2099 deletions

View File

@ -1008,7 +1008,7 @@ int bsg_register_queue(struct request_queue *q, struct device *parent,
/* /*
* we need a proper transport to send commands, not a stacked device * we need a proper transport to send commands, not a stacked device
*/ */
if (!q->request_fn) if (!queue_is_rq_based(q))
return 0; return 0;
bcd = &q->bsg_dev; bcd = &q->bsg_dev;

View File

@ -4080,7 +4080,7 @@ static void cciss_interrupt_mode(ctlr_info_t *h)
goto default_int_mode; goto default_int_mode;
if (pci_find_capability(h->pdev, PCI_CAP_ID_MSIX)) { if (pci_find_capability(h->pdev, PCI_CAP_ID_MSIX)) {
err = pci_enable_msix(h->pdev, cciss_msix_entries, 4); err = pci_enable_msix_exact(h->pdev, cciss_msix_entries, 4);
if (!err) { if (!err) {
h->intr[0] = cciss_msix_entries[0].vector; h->intr[0] = cciss_msix_entries[0].vector;
h->intr[1] = cciss_msix_entries[1].vector; h->intr[1] = cciss_msix_entries[1].vector;
@ -4088,10 +4088,6 @@ static void cciss_interrupt_mode(ctlr_info_t *h)
h->intr[3] = cciss_msix_entries[3].vector; h->intr[3] = cciss_msix_entries[3].vector;
h->msix_vector = 1; h->msix_vector = 1;
return; return;
}
if (err > 0) {
dev_warn(&h->pdev->dev,
"only %d MSI-X vectors available\n", err);
} else { } else {
dev_warn(&h->pdev->dev, dev_warn(&h->pdev->dev,
"MSI-X init failed %d\n", err); "MSI-X init failed %d\n", err);

View File

@ -29,7 +29,6 @@
#include <linux/drbd_limits.h> #include <linux/drbd_limits.h>
#include <linux/dynamic_debug.h> #include <linux/dynamic_debug.h>
#include "drbd_int.h" #include "drbd_int.h"
#include "drbd_wrappers.h"
enum al_transaction_types { enum al_transaction_types {
@ -204,7 +203,7 @@ int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bd
BUG_ON(!bdev->md_bdev); BUG_ON(!bdev->md_bdev);
drbd_dbg(device, "meta_data io: %s [%d]:%s(,%llus,%s) %pS\n", dynamic_drbd_dbg(device, "meta_data io: %s [%d]:%s(,%llus,%s) %pS\n",
current->comm, current->pid, __func__, current->comm, current->pid, __func__,
(unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ", (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ",
(void*)_RET_IP_ ); (void*)_RET_IP_ );
@ -276,7 +275,6 @@ bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval
return _al_get(device, first, true); return _al_get(device, first, true);
} }
static
bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i) bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i)
{ {
/* for bios crossing activity log extent boundaries, /* for bios crossing activity log extent boundaries,
@ -846,7 +844,7 @@ void __drbd_set_in_sync(struct drbd_device *device, sector_t sector, int size,
int wake_up = 0; int wake_up = 0;
unsigned long flags; unsigned long flags;
if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) { if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) {
drbd_err(device, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n", drbd_err(device, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n",
(unsigned long long)sector, size); (unsigned long long)sector, size);
return; return;
@ -920,7 +918,7 @@ int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector, int size
if (size == 0) if (size == 0)
return 0; return 0;
if (size < 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) { if (size < 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) {
drbd_err(device, "sector: %llus, size: %d\n", drbd_err(device, "sector: %llus, size: %d\n",
(unsigned long long)sector, size); (unsigned long long)sector, size);
return 0; return 0;
@ -1023,8 +1021,7 @@ int drbd_rs_begin_io(struct drbd_device *device, sector_t sector)
unsigned int enr = BM_SECT_TO_EXT(sector); unsigned int enr = BM_SECT_TO_EXT(sector);
struct bm_extent *bm_ext; struct bm_extent *bm_ext;
int i, sig; int i, sig;
int sa = 200; /* Step aside 200 times, then grab the extent and let app-IO wait. bool sa;
200 times -> 20 seconds. */
retry: retry:
sig = wait_event_interruptible(device->al_wait, sig = wait_event_interruptible(device->al_wait,
@ -1035,12 +1032,15 @@ retry:
if (test_bit(BME_LOCKED, &bm_ext->flags)) if (test_bit(BME_LOCKED, &bm_ext->flags))
return 0; return 0;
/* step aside only while we are above c-min-rate; unless disabled. */
sa = drbd_rs_c_min_rate_throttle(device);
for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
sig = wait_event_interruptible(device->al_wait, sig = wait_event_interruptible(device->al_wait,
!_is_in_al(device, enr * AL_EXT_PER_BM_SECT + i) || !_is_in_al(device, enr * AL_EXT_PER_BM_SECT + i) ||
test_bit(BME_PRIORITY, &bm_ext->flags)); (sa && test_bit(BME_PRIORITY, &bm_ext->flags)));
if (sig || (test_bit(BME_PRIORITY, &bm_ext->flags) && sa)) { if (sig || (sa && test_bit(BME_PRIORITY, &bm_ext->flags))) {
spin_lock_irq(&device->al_lock); spin_lock_irq(&device->al_lock);
if (lc_put(device->resync, &bm_ext->lce) == 0) { if (lc_put(device->resync, &bm_ext->lce) == 0) {
bm_ext->flags = 0; /* clears BME_NO_WRITES and eventually BME_PRIORITY */ bm_ext->flags = 0; /* clears BME_NO_WRITES and eventually BME_PRIORITY */
@ -1052,9 +1052,6 @@ retry:
return -EINTR; return -EINTR;
if (schedule_timeout_interruptible(HZ/10)) if (schedule_timeout_interruptible(HZ/10))
return -EINTR; return -EINTR;
if (sa && --sa == 0)
drbd_warn(device, "drbd_rs_begin_io() stepped aside for 20sec."
"Resync stalled?\n");
goto retry; goto retry;
} }
} }
@ -1288,7 +1285,7 @@ void drbd_rs_failed_io(struct drbd_device *device, sector_t sector, int size)
sector_t esector, nr_sectors; sector_t esector, nr_sectors;
int wake_up = 0; int wake_up = 0;
if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) { if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) {
drbd_err(device, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n", drbd_err(device, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n",
(unsigned long long)sector, size); (unsigned long long)sector, size);
return; return;

View File

@ -382,6 +382,12 @@ enum {
__EE_CALL_AL_COMPLETE_IO, __EE_CALL_AL_COMPLETE_IO,
__EE_MAY_SET_IN_SYNC, __EE_MAY_SET_IN_SYNC,
/* is this a TRIM aka REQ_DISCARD? */
__EE_IS_TRIM,
/* our lower level cannot handle trim,
* and we want to fall back to zeroout instead */
__EE_IS_TRIM_USE_ZEROOUT,
/* In case a barrier failed, /* In case a barrier failed,
* we need to resubmit without the barrier flag. */ * we need to resubmit without the barrier flag. */
__EE_RESUBMITTED, __EE_RESUBMITTED,
@ -405,7 +411,9 @@ enum {
}; };
#define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO) #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
#define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC) #define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC)
#define EE_RESUBMITTED (1<<__EE_RESUBMITTED) #define EE_IS_TRIM (1<<__EE_IS_TRIM)
#define EE_IS_TRIM_USE_ZEROOUT (1<<__EE_IS_TRIM_USE_ZEROOUT)
#define EE_RESUBMITTED (1<<__EE_RESUBMITTED)
#define EE_WAS_ERROR (1<<__EE_WAS_ERROR) #define EE_WAS_ERROR (1<<__EE_WAS_ERROR)
#define EE_HAS_DIGEST (1<<__EE_HAS_DIGEST) #define EE_HAS_DIGEST (1<<__EE_HAS_DIGEST)
#define EE_RESTART_REQUESTS (1<<__EE_RESTART_REQUESTS) #define EE_RESTART_REQUESTS (1<<__EE_RESTART_REQUESTS)
@ -579,6 +587,7 @@ struct drbd_resource {
struct list_head resources; struct list_head resources;
struct res_opts res_opts; struct res_opts res_opts;
struct mutex conf_update; /* mutex for ready-copy-update of net_conf and disk_conf */ struct mutex conf_update; /* mutex for ready-copy-update of net_conf and disk_conf */
struct mutex adm_mutex; /* mutex to serialize administrative requests */
spinlock_t req_lock; spinlock_t req_lock;
unsigned susp:1; /* IO suspended by user */ unsigned susp:1; /* IO suspended by user */
@ -609,6 +618,7 @@ struct drbd_connection {
struct drbd_socket data; /* data/barrier/cstate/parameter packets */ struct drbd_socket data; /* data/barrier/cstate/parameter packets */
struct drbd_socket meta; /* ping/ack (metadata) packets */ struct drbd_socket meta; /* ping/ack (metadata) packets */
int agreed_pro_version; /* actually used protocol version */ int agreed_pro_version; /* actually used protocol version */
u32 agreed_features;
unsigned long last_received; /* in jiffies, either socket */ unsigned long last_received; /* in jiffies, either socket */
unsigned int ko_count; unsigned int ko_count;
@ -814,6 +824,28 @@ struct drbd_device {
struct submit_worker submit; struct submit_worker submit;
}; };
struct drbd_config_context {
/* assigned from drbd_genlmsghdr */
unsigned int minor;
/* assigned from request attributes, if present */
unsigned int volume;
#define VOLUME_UNSPECIFIED (-1U)
/* pointer into the request skb,
* limited lifetime! */
char *resource_name;
struct nlattr *my_addr;
struct nlattr *peer_addr;
/* reply buffer */
struct sk_buff *reply_skb;
/* pointer into reply buffer */
struct drbd_genlmsghdr *reply_dh;
/* resolved from attributes, if possible */
struct drbd_device *device;
struct drbd_resource *resource;
struct drbd_connection *connection;
};
static inline struct drbd_device *minor_to_device(unsigned int minor) static inline struct drbd_device *minor_to_device(unsigned int minor)
{ {
return (struct drbd_device *)idr_find(&drbd_devices, minor); return (struct drbd_device *)idr_find(&drbd_devices, minor);
@ -821,7 +853,7 @@ static inline struct drbd_device *minor_to_device(unsigned int minor)
static inline struct drbd_peer_device *first_peer_device(struct drbd_device *device) static inline struct drbd_peer_device *first_peer_device(struct drbd_device *device)
{ {
return list_first_entry(&device->peer_devices, struct drbd_peer_device, peer_devices); return list_first_entry_or_null(&device->peer_devices, struct drbd_peer_device, peer_devices);
} }
#define for_each_resource(resource, _resources) \ #define for_each_resource(resource, _resources) \
@ -1139,6 +1171,12 @@ struct bm_extent {
#define DRBD_MAX_SIZE_H80_PACKET (1U << 15) /* Header 80 only allows packets up to 32KiB data */ #define DRBD_MAX_SIZE_H80_PACKET (1U << 15) /* Header 80 only allows packets up to 32KiB data */
#define DRBD_MAX_BIO_SIZE_P95 (1U << 17) /* Protocol 95 to 99 allows bios up to 128KiB */ #define DRBD_MAX_BIO_SIZE_P95 (1U << 17) /* Protocol 95 to 99 allows bios up to 128KiB */
/* For now, don't allow more than one activity log extent worth of data
* to be discarded in one go. We may need to rework drbd_al_begin_io()
* to allow for even larger discard ranges */
#define DRBD_MAX_DISCARD_SIZE AL_EXTENT_SIZE
#define DRBD_MAX_DISCARD_SECTORS (DRBD_MAX_DISCARD_SIZE >> 9)
extern int drbd_bm_init(struct drbd_device *device); extern int drbd_bm_init(struct drbd_device *device);
extern int drbd_bm_resize(struct drbd_device *device, sector_t sectors, int set_new_bits); extern int drbd_bm_resize(struct drbd_device *device, sector_t sectors, int set_new_bits);
extern void drbd_bm_cleanup(struct drbd_device *device); extern void drbd_bm_cleanup(struct drbd_device *device);
@ -1229,9 +1267,9 @@ extern struct bio *bio_alloc_drbd(gfp_t gfp_mask);
extern rwlock_t global_state_lock; extern rwlock_t global_state_lock;
extern int conn_lowest_minor(struct drbd_connection *connection); extern int conn_lowest_minor(struct drbd_connection *connection);
enum drbd_ret_code drbd_create_device(struct drbd_resource *resource, unsigned int minor, int vnr); extern enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsigned int minor);
extern void drbd_destroy_device(struct kref *kref); extern void drbd_destroy_device(struct kref *kref);
extern void drbd_delete_device(struct drbd_device *mdev); extern void drbd_delete_device(struct drbd_device *device);
extern struct drbd_resource *drbd_create_resource(const char *name); extern struct drbd_resource *drbd_create_resource(const char *name);
extern void drbd_free_resource(struct drbd_resource *resource); extern void drbd_free_resource(struct drbd_resource *resource);
@ -1257,7 +1295,7 @@ extern int is_valid_ar_handle(struct drbd_request *, sector_t);
/* drbd_nl.c */ /* drbd_nl.c */
extern int drbd_msg_put_info(const char *info); extern int drbd_msg_put_info(struct sk_buff *skb, const char *info);
extern void drbd_suspend_io(struct drbd_device *device); extern void drbd_suspend_io(struct drbd_device *device);
extern void drbd_resume_io(struct drbd_device *device); extern void drbd_resume_io(struct drbd_device *device);
extern char *ppsize(char *buf, unsigned long long size); extern char *ppsize(char *buf, unsigned long long size);
@ -1283,6 +1321,10 @@ extern void conn_try_outdate_peer_async(struct drbd_connection *connection);
extern int drbd_khelper(struct drbd_device *device, char *cmd); extern int drbd_khelper(struct drbd_device *device, char *cmd);
/* drbd_worker.c */ /* drbd_worker.c */
/* bi_end_io handlers */
extern void drbd_md_io_complete(struct bio *bio, int error);
extern void drbd_peer_request_endio(struct bio *bio, int error);
extern void drbd_request_endio(struct bio *bio, int error);
extern int drbd_worker(struct drbd_thread *thi); extern int drbd_worker(struct drbd_thread *thi);
enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor); enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor);
void drbd_resync_after_changed(struct drbd_device *device); void drbd_resync_after_changed(struct drbd_device *device);
@ -1332,16 +1374,20 @@ extern int w_start_resync(struct drbd_work *, int);
extern void resync_timer_fn(unsigned long data); extern void resync_timer_fn(unsigned long data);
extern void start_resync_timer_fn(unsigned long data); extern void start_resync_timer_fn(unsigned long data);
extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req);
/* drbd_receiver.c */ /* drbd_receiver.c */
extern int drbd_receiver(struct drbd_thread *thi); extern int drbd_receiver(struct drbd_thread *thi);
extern int drbd_asender(struct drbd_thread *thi); extern int drbd_asender(struct drbd_thread *thi);
extern int drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector); extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device);
extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector);
extern int drbd_submit_peer_request(struct drbd_device *, extern int drbd_submit_peer_request(struct drbd_device *,
struct drbd_peer_request *, const unsigned, struct drbd_peer_request *, const unsigned,
const int); const int);
extern int drbd_free_peer_reqs(struct drbd_device *, struct list_head *); extern int drbd_free_peer_reqs(struct drbd_device *, struct list_head *);
extern struct drbd_peer_request *drbd_alloc_peer_req(struct drbd_peer_device *, u64, extern struct drbd_peer_request *drbd_alloc_peer_req(struct drbd_peer_device *, u64,
sector_t, unsigned int, sector_t, unsigned int,
bool,
gfp_t) __must_hold(local); gfp_t) __must_hold(local);
extern void __drbd_free_peer_req(struct drbd_device *, struct drbd_peer_request *, extern void __drbd_free_peer_req(struct drbd_device *, struct drbd_peer_request *,
int); int);
@ -1401,6 +1447,37 @@ static inline void drbd_tcp_quickack(struct socket *sock)
(char*)&val, sizeof(val)); (char*)&val, sizeof(val));
} }
/* sets the number of 512 byte sectors of our virtual device */
static inline void drbd_set_my_capacity(struct drbd_device *device,
sector_t size)
{
/* set_capacity(device->this_bdev->bd_disk, size); */
set_capacity(device->vdisk, size);
device->this_bdev->bd_inode->i_size = (loff_t)size << 9;
}
/*
* used to submit our private bio
*/
static inline void drbd_generic_make_request(struct drbd_device *device,
int fault_type, struct bio *bio)
{
__release(local);
if (!bio->bi_bdev) {
printk(KERN_ERR "drbd%d: drbd_generic_make_request: "
"bio->bi_bdev == NULL\n",
device_to_minor(device));
dump_stack();
bio_endio(bio, -ENODEV);
return;
}
if (drbd_insert_fault(device, fault_type))
bio_endio(bio, -EIO);
else
generic_make_request(bio);
}
void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ordering_e wo); void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ordering_e wo);
/* drbd_proc.c */ /* drbd_proc.c */
@ -1410,6 +1487,7 @@ extern const char *drbd_conn_str(enum drbd_conns s);
extern const char *drbd_role_str(enum drbd_role s); extern const char *drbd_role_str(enum drbd_role s);
/* drbd_actlog.c */ /* drbd_actlog.c */
extern bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i);
extern int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i); extern int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i);
extern void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate); extern void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate);
extern bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval *i); extern bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval *i);
@ -2144,7 +2222,7 @@ static inline void drbd_md_flush(struct drbd_device *device)
static inline struct drbd_connection *first_connection(struct drbd_resource *resource) static inline struct drbd_connection *first_connection(struct drbd_resource *resource)
{ {
return list_first_entry(&resource->connections, return list_first_entry_or_null(&resource->connections,
struct drbd_connection, connections); struct drbd_connection, connections);
} }

View File

@ -1607,8 +1607,8 @@ static u32 bio_flags_to_wire(struct drbd_connection *connection, unsigned long b
return bi_rw & REQ_SYNC ? DP_RW_SYNC : 0; return bi_rw & REQ_SYNC ? DP_RW_SYNC : 0;
} }
/* Used to send write requests /* Used to send write or TRIM aka REQ_DISCARD requests
* R_PRIMARY -> Peer (P_DATA) * R_PRIMARY -> Peer (P_DATA, P_TRIM)
*/ */
int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *req) int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *req)
{ {
@ -1640,6 +1640,16 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *
dp_flags |= DP_SEND_WRITE_ACK; dp_flags |= DP_SEND_WRITE_ACK;
} }
p->dp_flags = cpu_to_be32(dp_flags); p->dp_flags = cpu_to_be32(dp_flags);
if (dp_flags & DP_DISCARD) {
struct p_trim *t = (struct p_trim*)p;
t->size = cpu_to_be32(req->i.size);
err = __send_command(peer_device->connection, device->vnr, sock, P_TRIM, sizeof(*t), NULL, 0);
goto out;
}
/* our digest is still only over the payload.
* TRIM does not carry any payload. */
if (dgs) if (dgs)
drbd_csum_bio(peer_device->connection->integrity_tfm, req->master_bio, p + 1); drbd_csum_bio(peer_device->connection->integrity_tfm, req->master_bio, p + 1);
err = __send_command(peer_device->connection, device->vnr, sock, P_DATA, sizeof(*p) + dgs, NULL, req->i.size); err = __send_command(peer_device->connection, device->vnr, sock, P_DATA, sizeof(*p) + dgs, NULL, req->i.size);
@ -1675,6 +1685,7 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *
... Be noisy about digest too large ... ... Be noisy about digest too large ...
} */ } */
} }
out:
mutex_unlock(&sock->mutex); /* locked by drbd_prepare_command() */ mutex_unlock(&sock->mutex); /* locked by drbd_prepare_command() */
return err; return err;
@ -2570,6 +2581,7 @@ struct drbd_resource *drbd_create_resource(const char *name)
INIT_LIST_HEAD(&resource->connections); INIT_LIST_HEAD(&resource->connections);
list_add_tail_rcu(&resource->resources, &drbd_resources); list_add_tail_rcu(&resource->resources, &drbd_resources);
mutex_init(&resource->conf_update); mutex_init(&resource->conf_update);
mutex_init(&resource->adm_mutex);
spin_lock_init(&resource->req_lock); spin_lock_init(&resource->req_lock);
return resource; return resource;
@ -2687,14 +2699,16 @@ static int init_submitter(struct drbd_device *device)
return 0; return 0;
} }
enum drbd_ret_code drbd_create_device(struct drbd_resource *resource, unsigned int minor, int vnr) enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsigned int minor)
{ {
struct drbd_resource *resource = adm_ctx->resource;
struct drbd_connection *connection; struct drbd_connection *connection;
struct drbd_device *device; struct drbd_device *device;
struct drbd_peer_device *peer_device, *tmp_peer_device; struct drbd_peer_device *peer_device, *tmp_peer_device;
struct gendisk *disk; struct gendisk *disk;
struct request_queue *q; struct request_queue *q;
int id; int id;
int vnr = adm_ctx->volume;
enum drbd_ret_code err = ERR_NOMEM; enum drbd_ret_code err = ERR_NOMEM;
device = minor_to_device(minor); device = minor_to_device(minor);
@ -2763,7 +2777,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_resource *resource, unsigned i
if (id < 0) { if (id < 0) {
if (id == -ENOSPC) { if (id == -ENOSPC) {
err = ERR_MINOR_EXISTS; err = ERR_MINOR_EXISTS;
drbd_msg_put_info("requested minor exists already"); drbd_msg_put_info(adm_ctx->reply_skb, "requested minor exists already");
} }
goto out_no_minor_idr; goto out_no_minor_idr;
} }
@ -2773,7 +2787,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_resource *resource, unsigned i
if (id < 0) { if (id < 0) {
if (id == -ENOSPC) { if (id == -ENOSPC) {
err = ERR_MINOR_EXISTS; err = ERR_MINOR_EXISTS;
drbd_msg_put_info("requested minor exists already"); drbd_msg_put_info(adm_ctx->reply_skb, "requested minor exists already");
} }
goto out_idr_remove_minor; goto out_idr_remove_minor;
} }
@ -2794,7 +2808,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_resource *resource, unsigned i
if (id < 0) { if (id < 0) {
if (id == -ENOSPC) { if (id == -ENOSPC) {
err = ERR_INVALID_REQUEST; err = ERR_INVALID_REQUEST;
drbd_msg_put_info("requested volume exists already"); drbd_msg_put_info(adm_ctx->reply_skb, "requested volume exists already");
} }
goto out_idr_remove_from_resource; goto out_idr_remove_from_resource;
} }
@ -2803,7 +2817,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_resource *resource, unsigned i
if (init_submitter(device)) { if (init_submitter(device)) {
err = ERR_NOMEM; err = ERR_NOMEM;
drbd_msg_put_info("unable to create submit workqueue"); drbd_msg_put_info(adm_ctx->reply_skb, "unable to create submit workqueue");
goto out_idr_remove_vol; goto out_idr_remove_vol;
} }

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,3 @@
#include "drbd_wrappers.h"
#include <linux/kernel.h> #include <linux/kernel.h>
#include <net/netlink.h> #include <net/netlink.h>
#include <linux/drbd_genl_api.h> #include <linux/drbd_genl_api.h>

View File

@ -116,7 +116,7 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
/* ------------------------ ~18s average ------------------------ */ /* ------------------------ ~18s average ------------------------ */
i = (device->rs_last_mark + 2) % DRBD_SYNC_MARKS; i = (device->rs_last_mark + 2) % DRBD_SYNC_MARKS;
dt = (jiffies - device->rs_mark_time[i]) / HZ; dt = (jiffies - device->rs_mark_time[i]) / HZ;
if (dt > (DRBD_SYNC_MARK_STEP * DRBD_SYNC_MARKS)) if (dt > 180)
stalled = 1; stalled = 1;
if (!dt) if (!dt)

View File

@ -54,6 +54,11 @@ enum drbd_packet {
P_CONN_ST_CHG_REPLY = 0x2b, /* meta sock: Connection side state req reply */ P_CONN_ST_CHG_REPLY = 0x2b, /* meta sock: Connection side state req reply */
P_RETRY_WRITE = 0x2c, /* Protocol C: retry conflicting write request */ P_RETRY_WRITE = 0x2c, /* Protocol C: retry conflicting write request */
P_PROTOCOL_UPDATE = 0x2d, /* data sock: is used in established connections */ P_PROTOCOL_UPDATE = 0x2d, /* data sock: is used in established connections */
/* 0x2e to 0x30 reserved, used in drbd 9 */
/* REQ_DISCARD. We used "discard" in different contexts before,
* which is why I chose TRIM here, to disambiguate. */
P_TRIM = 0x31,
P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */ P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */
P_MAX_OPT_CMD = 0x101, P_MAX_OPT_CMD = 0x101,
@ -119,6 +124,11 @@ struct p_data {
u32 dp_flags; u32 dp_flags;
} __packed; } __packed;
struct p_trim {
struct p_data p_data;
u32 size; /* == bio->bi_size */
} __packed;
/* /*
* commands which share a struct: * commands which share a struct:
* p_block_ack: * p_block_ack:
@ -150,6 +160,8 @@ struct p_block_req {
* ReportParams * ReportParams
*/ */
#define FF_TRIM 1
struct p_connection_features { struct p_connection_features {
u32 protocol_min; u32 protocol_min;
u32 feature_flags; u32 feature_flags;

View File

@ -46,9 +46,10 @@
#include "drbd_int.h" #include "drbd_int.h"
#include "drbd_protocol.h" #include "drbd_protocol.h"
#include "drbd_req.h" #include "drbd_req.h"
#include "drbd_vli.h" #include "drbd_vli.h"
#define PRO_FEATURES (FF_TRIM)
struct packet_info { struct packet_info {
enum drbd_packet cmd; enum drbd_packet cmd;
unsigned int size; unsigned int size;
@ -65,7 +66,7 @@ enum finish_epoch {
static int drbd_do_features(struct drbd_connection *connection); static int drbd_do_features(struct drbd_connection *connection);
static int drbd_do_auth(struct drbd_connection *connection); static int drbd_do_auth(struct drbd_connection *connection);
static int drbd_disconnected(struct drbd_peer_device *); static int drbd_disconnected(struct drbd_peer_device *);
static void conn_wait_active_ee_empty(struct drbd_connection *connection);
static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event); static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
static int e_end_block(struct drbd_work *, int); static int e_end_block(struct drbd_work *, int);
@ -234,9 +235,17 @@ static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device)
* @retry: whether to retry, if not enough pages are available right now * @retry: whether to retry, if not enough pages are available right now
* *
* Tries to allocate number pages, first from our own page pool, then from * Tries to allocate number pages, first from our own page pool, then from
* the kernel, unless this allocation would exceed the max_buffers setting. * the kernel.
* Possibly retry until DRBD frees sufficient pages somewhere else. * Possibly retry until DRBD frees sufficient pages somewhere else.
* *
* If this allocation would exceed the max_buffers setting, we throttle
* allocation (schedule_timeout) to give the system some room to breathe.
*
* We do not use max-buffers as hard limit, because it could lead to
* congestion and further to a distributed deadlock during online-verify or
* (checksum based) resync, if the max-buffers, socket buffer sizes and
* resync-rate settings are mis-configured.
*
* Returns a page chain linked via page->private. * Returns a page chain linked via page->private.
*/ */
struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number, struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number,
@ -246,10 +255,8 @@ struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int
struct page *page = NULL; struct page *page = NULL;
struct net_conf *nc; struct net_conf *nc;
DEFINE_WAIT(wait); DEFINE_WAIT(wait);
int mxb; unsigned int mxb;
/* Yes, we may run up to @number over max_buffers. If we
* follow it strictly, the admin will get it wrong anyways. */
rcu_read_lock(); rcu_read_lock();
nc = rcu_dereference(peer_device->connection->net_conf); nc = rcu_dereference(peer_device->connection->net_conf);
mxb = nc ? nc->max_buffers : 1000000; mxb = nc ? nc->max_buffers : 1000000;
@ -277,7 +284,8 @@ struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int
break; break;
} }
schedule(); if (schedule_timeout(HZ/10) == 0)
mxb = UINT_MAX;
} }
finish_wait(&drbd_pp_wait, &wait); finish_wait(&drbd_pp_wait, &wait);
@ -331,7 +339,7 @@ You must not have the req_lock:
struct drbd_peer_request * struct drbd_peer_request *
drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector, drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
unsigned int data_size, gfp_t gfp_mask) __must_hold(local) unsigned int data_size, bool has_payload, gfp_t gfp_mask) __must_hold(local)
{ {
struct drbd_device *device = peer_device->device; struct drbd_device *device = peer_device->device;
struct drbd_peer_request *peer_req; struct drbd_peer_request *peer_req;
@ -348,7 +356,7 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto
return NULL; return NULL;
} }
if (data_size) { if (has_payload && data_size) {
page = drbd_alloc_pages(peer_device, nr_pages, (gfp_mask & __GFP_WAIT)); page = drbd_alloc_pages(peer_device, nr_pages, (gfp_mask & __GFP_WAIT));
if (!page) if (!page)
goto fail; goto fail;
@ -1026,24 +1034,27 @@ randomize:
if (drbd_send_protocol(connection) == -EOPNOTSUPP) if (drbd_send_protocol(connection) == -EOPNOTSUPP)
return -1; return -1;
/* Prevent a race between resync-handshake and
* being promoted to Primary.
*
* Grab and release the state mutex, so we know that any current
* drbd_set_role() is finished, and any incoming drbd_set_role
* will see the STATE_SENT flag, and wait for it to be cleared.
*/
idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
mutex_lock(peer_device->device->state_mutex);
set_bit(STATE_SENT, &connection->flags); set_bit(STATE_SENT, &connection->flags);
idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
mutex_unlock(peer_device->device->state_mutex);
rcu_read_lock(); rcu_read_lock();
idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
struct drbd_device *device = peer_device->device; struct drbd_device *device = peer_device->device;
kref_get(&device->kref); kref_get(&device->kref);
rcu_read_unlock(); rcu_read_unlock();
/* Prevent a race between resync-handshake and
* being promoted to Primary.
*
* Grab and release the state mutex, so we know that any current
* drbd_set_role() is finished, and any incoming drbd_set_role
* will see the STATE_SENT flag, and wait for it to be cleared.
*/
mutex_lock(device->state_mutex);
mutex_unlock(device->state_mutex);
if (discard_my_data) if (discard_my_data)
set_bit(DISCARD_MY_DATA, &device->flags); set_bit(DISCARD_MY_DATA, &device->flags);
else else
@ -1315,6 +1326,20 @@ int drbd_submit_peer_request(struct drbd_device *device,
unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT; unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
int err = -ENOMEM; int err = -ENOMEM;
if (peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) {
/* wait for all pending IO completions, before we start
* zeroing things out. */
conn_wait_active_ee_empty(first_peer_device(device)->connection);
if (blkdev_issue_zeroout(device->ldev->backing_bdev,
sector, ds >> 9, GFP_NOIO))
peer_req->flags |= EE_WAS_ERROR;
drbd_endio_write_sec_final(peer_req);
return 0;
}
if (peer_req->flags & EE_IS_TRIM)
nr_pages = 0; /* discards don't have any payload. */
/* In most cases, we will only need one bio. But in case the lower /* In most cases, we will only need one bio. But in case the lower
* level restrictions happen to be different at this offset on this * level restrictions happen to be different at this offset on this
* side than those of the sending peer, we may need to submit the * side than those of the sending peer, we may need to submit the
@ -1326,7 +1351,7 @@ int drbd_submit_peer_request(struct drbd_device *device,
next_bio: next_bio:
bio = bio_alloc(GFP_NOIO, nr_pages); bio = bio_alloc(GFP_NOIO, nr_pages);
if (!bio) { if (!bio) {
drbd_err(device, "submit_ee: Allocation of a bio failed\n"); drbd_err(device, "submit_ee: Allocation of a bio failed (nr_pages=%u)\n", nr_pages);
goto fail; goto fail;
} }
/* > peer_req->i.sector, unless this is the first bio */ /* > peer_req->i.sector, unless this is the first bio */
@ -1340,6 +1365,11 @@ next_bio:
bios = bio; bios = bio;
++n_bios; ++n_bios;
if (rw & REQ_DISCARD) {
bio->bi_iter.bi_size = ds;
goto submit;
}
page_chain_for_each(page) { page_chain_for_each(page) {
unsigned len = min_t(unsigned, ds, PAGE_SIZE); unsigned len = min_t(unsigned, ds, PAGE_SIZE);
if (!bio_add_page(bio, page, len, 0)) { if (!bio_add_page(bio, page, len, 0)) {
@ -1360,8 +1390,9 @@ next_bio:
sector += len >> 9; sector += len >> 9;
--nr_pages; --nr_pages;
} }
D_ASSERT(device, page == NULL);
D_ASSERT(device, ds == 0); D_ASSERT(device, ds == 0);
submit:
D_ASSERT(device, page == NULL);
atomic_set(&peer_req->pending_bios, n_bios); atomic_set(&peer_req->pending_bios, n_bios);
do { do {
@ -1490,19 +1521,21 @@ static int receive_Barrier(struct drbd_connection *connection, struct packet_inf
* and from receive_Data */ * and from receive_Data */
static struct drbd_peer_request * static struct drbd_peer_request *
read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector, read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
int data_size) __must_hold(local) struct packet_info *pi) __must_hold(local)
{ {
struct drbd_device *device = peer_device->device; struct drbd_device *device = peer_device->device;
const sector_t capacity = drbd_get_capacity(device->this_bdev); const sector_t capacity = drbd_get_capacity(device->this_bdev);
struct drbd_peer_request *peer_req; struct drbd_peer_request *peer_req;
struct page *page; struct page *page;
int dgs, ds, err; int dgs, ds, err;
int data_size = pi->size;
void *dig_in = peer_device->connection->int_dig_in; void *dig_in = peer_device->connection->int_dig_in;
void *dig_vv = peer_device->connection->int_dig_vv; void *dig_vv = peer_device->connection->int_dig_vv;
unsigned long *data; unsigned long *data;
struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL;
dgs = 0; dgs = 0;
if (peer_device->connection->peer_integrity_tfm) { if (!trim && peer_device->connection->peer_integrity_tfm) {
dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm); dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
/* /*
* FIXME: Receive the incoming digest into the receive buffer * FIXME: Receive the incoming digest into the receive buffer
@ -1514,9 +1547,15 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
data_size -= dgs; data_size -= dgs;
} }
if (trim) {
D_ASSERT(peer_device, data_size == 0);
data_size = be32_to_cpu(trim->size);
}
if (!expect(IS_ALIGNED(data_size, 512))) if (!expect(IS_ALIGNED(data_size, 512)))
return NULL; return NULL;
if (!expect(data_size <= DRBD_MAX_BIO_SIZE)) /* prepare for larger trim requests. */
if (!trim && !expect(data_size <= DRBD_MAX_BIO_SIZE))
return NULL; return NULL;
/* even though we trust out peer, /* even though we trust out peer,
@ -1532,11 +1571,11 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
* "criss-cross" setup, that might cause write-out on some other DRBD, * "criss-cross" setup, that might cause write-out on some other DRBD,
* which in turn might block on the other node at this very place. */ * which in turn might block on the other node at this very place. */
peer_req = drbd_alloc_peer_req(peer_device, id, sector, data_size, GFP_NOIO); peer_req = drbd_alloc_peer_req(peer_device, id, sector, data_size, trim == NULL, GFP_NOIO);
if (!peer_req) if (!peer_req)
return NULL; return NULL;
if (!data_size) if (trim)
return peer_req; return peer_req;
ds = data_size; ds = data_size;
@ -1676,12 +1715,12 @@ static int e_end_resync_block(struct drbd_work *w, int unused)
} }
static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector, static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector,
int data_size) __releases(local) struct packet_info *pi) __releases(local)
{ {
struct drbd_device *device = peer_device->device; struct drbd_device *device = peer_device->device;
struct drbd_peer_request *peer_req; struct drbd_peer_request *peer_req;
peer_req = read_in_block(peer_device, ID_SYNCER, sector, data_size); peer_req = read_in_block(peer_device, ID_SYNCER, sector, pi);
if (!peer_req) if (!peer_req)
goto fail; goto fail;
@ -1697,7 +1736,7 @@ static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t secto
list_add(&peer_req->w.list, &device->sync_ee); list_add(&peer_req->w.list, &device->sync_ee);
spin_unlock_irq(&device->resource->req_lock); spin_unlock_irq(&device->resource->req_lock);
atomic_add(data_size >> 9, &device->rs_sect_ev); atomic_add(pi->size >> 9, &device->rs_sect_ev);
if (drbd_submit_peer_request(device, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0) if (drbd_submit_peer_request(device, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
return 0; return 0;
@ -1785,7 +1824,7 @@ static int receive_RSDataReply(struct drbd_connection *connection, struct packet
/* data is submitted to disk within recv_resync_read. /* data is submitted to disk within recv_resync_read.
* corresponding put_ldev done below on error, * corresponding put_ldev done below on error,
* or in drbd_peer_request_endio. */ * or in drbd_peer_request_endio. */
err = recv_resync_read(peer_device, sector, pi->size); err = recv_resync_read(peer_device, sector, pi);
} else { } else {
if (__ratelimit(&drbd_ratelimit_state)) if (__ratelimit(&drbd_ratelimit_state))
drbd_err(device, "Can not write resync data to local disk.\n"); drbd_err(device, "Can not write resync data to local disk.\n");
@ -2196,7 +2235,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
*/ */
sector = be64_to_cpu(p->sector); sector = be64_to_cpu(p->sector);
peer_req = read_in_block(peer_device, p->block_id, sector, pi->size); peer_req = read_in_block(peer_device, p->block_id, sector, pi);
if (!peer_req) { if (!peer_req) {
put_ldev(device); put_ldev(device);
return -EIO; return -EIO;
@ -2206,7 +2245,15 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
dp_flags = be32_to_cpu(p->dp_flags); dp_flags = be32_to_cpu(p->dp_flags);
rw |= wire_flags_to_bio(dp_flags); rw |= wire_flags_to_bio(dp_flags);
if (peer_req->pages == NULL) { if (pi->cmd == P_TRIM) {
struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
peer_req->flags |= EE_IS_TRIM;
if (!blk_queue_discard(q))
peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT;
D_ASSERT(peer_device, peer_req->i.size > 0);
D_ASSERT(peer_device, rw & REQ_DISCARD);
D_ASSERT(peer_device, peer_req->pages == NULL);
} else if (peer_req->pages == NULL) {
D_ASSERT(device, peer_req->i.size == 0); D_ASSERT(device, peer_req->i.size == 0);
D_ASSERT(device, dp_flags & DP_FLUSH); D_ASSERT(device, dp_flags & DP_FLUSH);
} }
@ -2242,7 +2289,12 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
update_peer_seq(peer_device, peer_seq); update_peer_seq(peer_device, peer_seq);
spin_lock_irq(&device->resource->req_lock); spin_lock_irq(&device->resource->req_lock);
} }
list_add(&peer_req->w.list, &device->active_ee); /* if we use the zeroout fallback code, we process synchronously
* and we wait for all pending requests, respectively wait for
* active_ee to become empty in drbd_submit_peer_request();
* better not add ourselves here. */
if ((peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) == 0)
list_add(&peer_req->w.list, &device->active_ee);
spin_unlock_irq(&device->resource->req_lock); spin_unlock_irq(&device->resource->req_lock);
if (device->state.conn == C_SYNC_TARGET) if (device->state.conn == C_SYNC_TARGET)
@ -2313,14 +2365,33 @@ out_interrupted:
* The current sync rate used here uses only the most recent two step marks, * The current sync rate used here uses only the most recent two step marks,
* to have a short time average so we can react faster. * to have a short time average so we can react faster.
*/ */
int drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector) bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
{
struct lc_element *tmp;
bool throttle = true;
if (!drbd_rs_c_min_rate_throttle(device))
return false;
spin_lock_irq(&device->al_lock);
tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
if (tmp) {
struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
if (test_bit(BME_PRIORITY, &bm_ext->flags))
throttle = false;
/* Do not slow down if app IO is already waiting for this extent */
}
spin_unlock_irq(&device->al_lock);
return throttle;
}
bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
{ {
struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk; struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
unsigned long db, dt, dbdt; unsigned long db, dt, dbdt;
struct lc_element *tmp;
int curr_events;
int throttle = 0;
unsigned int c_min_rate; unsigned int c_min_rate;
int curr_events;
rcu_read_lock(); rcu_read_lock();
c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate; c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
@ -2328,24 +2399,11 @@ int drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
/* feature disabled? */ /* feature disabled? */
if (c_min_rate == 0) if (c_min_rate == 0)
return 0; return false;
spin_lock_irq(&device->al_lock);
tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
if (tmp) {
struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
spin_unlock_irq(&device->al_lock);
return 0;
}
/* Do not slow down if app IO is already waiting for this extent */
}
spin_unlock_irq(&device->al_lock);
curr_events = (int)part_stat_read(&disk->part0, sectors[0]) + curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
(int)part_stat_read(&disk->part0, sectors[1]) - (int)part_stat_read(&disk->part0, sectors[1]) -
atomic_read(&device->rs_sect_ev); atomic_read(&device->rs_sect_ev);
if (!device->rs_last_events || curr_events - device->rs_last_events > 64) { if (!device->rs_last_events || curr_events - device->rs_last_events > 64) {
unsigned long rs_left; unsigned long rs_left;
int i; int i;
@ -2368,12 +2426,11 @@ int drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
dbdt = Bit2KB(db/dt); dbdt = Bit2KB(db/dt);
if (dbdt > c_min_rate) if (dbdt > c_min_rate)
throttle = 1; return true;
} }
return throttle; return false;
} }
static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi) static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
{ {
struct drbd_peer_device *peer_device; struct drbd_peer_device *peer_device;
@ -2436,7 +2493,8 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
* "criss-cross" setup, that might cause write-out on some other DRBD, * "criss-cross" setup, that might cause write-out on some other DRBD,
* which in turn might block on the other node at this very place. */ * which in turn might block on the other node at this very place. */
peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size, GFP_NOIO); peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size,
true /* has real payload */, GFP_NOIO);
if (!peer_req) { if (!peer_req) {
put_ldev(device); put_ldev(device);
return -ENOMEM; return -ENOMEM;
@ -3648,6 +3706,13 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
put_ldev(device); put_ldev(device);
} }
device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
drbd_reconsider_max_bio_size(device);
/* Leave drbd_reconsider_max_bio_size() before drbd_determine_dev_size().
In case we cleared the QUEUE_FLAG_DISCARD from our queue in
drbd_reconsider_max_bio_size(), we can be sure that after
drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */
ddsf = be16_to_cpu(p->dds_flags); ddsf = be16_to_cpu(p->dds_flags);
if (get_ldev(device)) { if (get_ldev(device)) {
dd = drbd_determine_dev_size(device, ddsf, NULL); dd = drbd_determine_dev_size(device, ddsf, NULL);
@ -3660,9 +3725,6 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
drbd_set_my_capacity(device, p_size); drbd_set_my_capacity(device, p_size);
} }
device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
drbd_reconsider_max_bio_size(device);
if (get_ldev(device)) { if (get_ldev(device)) {
if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) { if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev); device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
@ -4423,6 +4485,7 @@ static struct data_cmd drbd_cmd_handler[] = {
[P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync }, [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
[P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state }, [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
[P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol }, [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
[P_TRIM] = { 0, sizeof(struct p_trim), receive_Data },
}; };
static void drbdd(struct drbd_connection *connection) static void drbdd(struct drbd_connection *connection)
@ -4630,6 +4693,7 @@ static int drbd_send_features(struct drbd_connection *connection)
memset(p, 0, sizeof(*p)); memset(p, 0, sizeof(*p));
p->protocol_min = cpu_to_be32(PRO_VERSION_MIN); p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
p->protocol_max = cpu_to_be32(PRO_VERSION_MAX); p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
p->feature_flags = cpu_to_be32(PRO_FEATURES);
return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0); return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
} }
@ -4683,10 +4747,14 @@ static int drbd_do_features(struct drbd_connection *connection)
goto incompat; goto incompat;
connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max); connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
connection->agreed_features = PRO_FEATURES & be32_to_cpu(p->feature_flags);
drbd_info(connection, "Handshake successful: " drbd_info(connection, "Handshake successful: "
"Agreed network protocol version %d\n", connection->agreed_pro_version); "Agreed network protocol version %d\n", connection->agreed_pro_version);
drbd_info(connection, "Agreed to%ssupport TRIM on protocol level\n",
connection->agreed_features & FF_TRIM ? " " : " not ");
return 1; return 1;
incompat: incompat:
@ -4778,6 +4846,12 @@ static int drbd_do_auth(struct drbd_connection *connection)
goto fail; goto fail;
} }
if (pi.size < CHALLENGE_LEN) {
drbd_err(connection, "AuthChallenge payload too small.\n");
rv = -1;
goto fail;
}
peers_ch = kmalloc(pi.size, GFP_NOIO); peers_ch = kmalloc(pi.size, GFP_NOIO);
if (peers_ch == NULL) { if (peers_ch == NULL) {
drbd_err(connection, "kmalloc of peers_ch failed\n"); drbd_err(connection, "kmalloc of peers_ch failed\n");
@ -4791,6 +4865,12 @@ static int drbd_do_auth(struct drbd_connection *connection)
goto fail; goto fail;
} }
if (!memcmp(my_challenge, peers_ch, CHALLENGE_LEN)) {
drbd_err(connection, "Peer presented the same challenge!\n");
rv = -1;
goto fail;
}
resp_size = crypto_hash_digestsize(connection->cram_hmac_tfm); resp_size = crypto_hash_digestsize(connection->cram_hmac_tfm);
response = kmalloc(resp_size, GFP_NOIO); response = kmalloc(resp_size, GFP_NOIO);
if (response == NULL) { if (response == NULL) {

View File

@ -522,6 +522,13 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED); mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED);
break; break;
case DISCARD_COMPLETED_NOTSUPP:
case DISCARD_COMPLETED_WITH_ERROR:
/* I'd rather not detach from local disk just because it
* failed a REQ_DISCARD. */
mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED);
break;
case QUEUE_FOR_NET_READ: case QUEUE_FOR_NET_READ:
/* READ or READA, and /* READ or READA, and
* no local disk, * no local disk,
@ -1235,6 +1242,7 @@ void do_submit(struct work_struct *ws)
if (list_empty(&incoming)) if (list_empty(&incoming))
break; break;
skip_fast_path:
wait_event(device->al_wait, prepare_al_transaction_nonblock(device, &incoming, &pending)); wait_event(device->al_wait, prepare_al_transaction_nonblock(device, &incoming, &pending));
/* Maybe more was queued, while we prepared the transaction? /* Maybe more was queued, while we prepared the transaction?
* Try to stuff them into this transaction as well. * Try to stuff them into this transaction as well.
@ -1273,6 +1281,25 @@ void do_submit(struct work_struct *ws)
list_del_init(&req->tl_requests); list_del_init(&req->tl_requests);
drbd_send_and_submit(device, req); drbd_send_and_submit(device, req);
} }
/* If all currently hot activity log extents are kept busy by
* incoming requests, we still must not totally starve new
* requests to cold extents. In that case, prepare one request
* in blocking mode. */
list_for_each_entry_safe(req, tmp, &incoming, tl_requests) {
list_del_init(&req->tl_requests);
req->rq_state |= RQ_IN_ACT_LOG;
if (!drbd_al_begin_io_prepare(device, &req->i)) {
/* Corresponding extent was hot after all? */
drbd_send_and_submit(device, req);
} else {
/* Found a request to a cold extent.
* Put on "pending" list,
* and try to cumulate with more. */
list_add(&req->tl_requests, &pending);
goto skip_fast_path;
}
}
} }
} }
@ -1326,23 +1353,35 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct
return limit; return limit;
} }
static struct drbd_request *find_oldest_request(struct drbd_connection *connection) static void find_oldest_requests(
struct drbd_connection *connection,
struct drbd_device *device,
struct drbd_request **oldest_req_waiting_for_peer,
struct drbd_request **oldest_req_waiting_for_disk)
{ {
/* Walk the transfer log,
* and find the oldest not yet completed request */
struct drbd_request *r; struct drbd_request *r;
*oldest_req_waiting_for_peer = NULL;
*oldest_req_waiting_for_disk = NULL;
list_for_each_entry(r, &connection->transfer_log, tl_requests) { list_for_each_entry(r, &connection->transfer_log, tl_requests) {
if (atomic_read(&r->completion_ref)) const unsigned s = r->rq_state;
return r; if (!*oldest_req_waiting_for_peer
&& ((s & RQ_NET_MASK) && !(s & RQ_NET_DONE)))
*oldest_req_waiting_for_peer = r;
if (!*oldest_req_waiting_for_disk
&& (s & RQ_LOCAL_PENDING) && r->device == device)
*oldest_req_waiting_for_disk = r;
if (*oldest_req_waiting_for_peer && *oldest_req_waiting_for_disk)
break;
} }
return NULL;
} }
void request_timer_fn(unsigned long data) void request_timer_fn(unsigned long data)
{ {
struct drbd_device *device = (struct drbd_device *) data; struct drbd_device *device = (struct drbd_device *) data;
struct drbd_connection *connection = first_peer_device(device)->connection; struct drbd_connection *connection = first_peer_device(device)->connection;
struct drbd_request *req; /* oldest request */ struct drbd_request *req_disk, *req_peer; /* oldest request */
struct net_conf *nc; struct net_conf *nc;
unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */ unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
unsigned long now; unsigned long now;
@ -1366,8 +1405,8 @@ void request_timer_fn(unsigned long data)
now = jiffies; now = jiffies;
spin_lock_irq(&device->resource->req_lock); spin_lock_irq(&device->resource->req_lock);
req = find_oldest_request(connection); find_oldest_requests(connection, device, &req_peer, &req_disk);
if (!req) { if (req_peer == NULL && req_disk == NULL) {
spin_unlock_irq(&device->resource->req_lock); spin_unlock_irq(&device->resource->req_lock);
mod_timer(&device->request_timer, now + et); mod_timer(&device->request_timer, now + et);
return; return;
@ -1389,19 +1428,26 @@ void request_timer_fn(unsigned long data)
* ~198 days with 250 HZ, we have a window where the timeout would need * ~198 days with 250 HZ, we have a window where the timeout would need
* to expire twice (worst case) to become effective. Good enough. * to expire twice (worst case) to become effective. Good enough.
*/ */
if (ent && req->rq_state & RQ_NET_PENDING && if (ent && req_peer &&
time_after(now, req->start_time + ent) && time_after(now, req_peer->start_time + ent) &&
!time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent)) { !time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent)) {
drbd_warn(device, "Remote failed to finish a request within ko-count * timeout\n"); drbd_warn(device, "Remote failed to finish a request within ko-count * timeout\n");
_drbd_set_state(_NS(device, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL); _drbd_set_state(_NS(device, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL);
} }
if (dt && req->rq_state & RQ_LOCAL_PENDING && req->device == device && if (dt && req_disk &&
time_after(now, req->start_time + dt) && time_after(now, req_disk->start_time + dt) &&
!time_in_range(now, device->last_reattach_jif, device->last_reattach_jif + dt)) { !time_in_range(now, device->last_reattach_jif, device->last_reattach_jif + dt)) {
drbd_warn(device, "Local backing device failed to meet the disk-timeout\n"); drbd_warn(device, "Local backing device failed to meet the disk-timeout\n");
__drbd_chk_io_error(device, DRBD_FORCE_DETACH); __drbd_chk_io_error(device, DRBD_FORCE_DETACH);
} }
nt = (time_after(now, req->start_time + et) ? now : req->start_time) + et;
/* Reschedule timer for the nearest not already expired timeout.
* Fallback to now + min(effective network timeout, disk timeout). */
ent = (ent && req_peer && time_before(now, req_peer->start_time + ent))
? req_peer->start_time + ent : now + et;
dt = (dt && req_disk && time_before(now, req_disk->start_time + dt))
? req_disk->start_time + dt : now + et;
nt = time_before(ent, dt) ? ent : dt;
spin_unlock_irq(&connection->resource->req_lock); spin_unlock_irq(&connection->resource->req_lock);
mod_timer(&device->request_timer, nt); mod_timer(&device->request_timer, nt);
} }

View File

@ -30,7 +30,6 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/drbd.h> #include <linux/drbd.h>
#include "drbd_int.h" #include "drbd_int.h"
#include "drbd_wrappers.h"
/* The request callbacks will be called in irq context by the IDE drivers, /* The request callbacks will be called in irq context by the IDE drivers,
and in Softirqs/Tasklets/BH context by the SCSI drivers, and in Softirqs/Tasklets/BH context by the SCSI drivers,
@ -111,11 +110,14 @@ enum drbd_req_event {
BARRIER_ACKED, /* in protocol A and B */ BARRIER_ACKED, /* in protocol A and B */
DATA_RECEIVED, /* (remote read) */ DATA_RECEIVED, /* (remote read) */
COMPLETED_OK,
READ_COMPLETED_WITH_ERROR, READ_COMPLETED_WITH_ERROR,
READ_AHEAD_COMPLETED_WITH_ERROR, READ_AHEAD_COMPLETED_WITH_ERROR,
WRITE_COMPLETED_WITH_ERROR, WRITE_COMPLETED_WITH_ERROR,
DISCARD_COMPLETED_NOTSUPP,
DISCARD_COMPLETED_WITH_ERROR,
ABORT_DISK_IO, ABORT_DISK_IO,
COMPLETED_OK,
RESEND, RESEND,
FAIL_FROZEN_DISK_IO, FAIL_FROZEN_DISK_IO,
RESTART_FROZEN_DISK_IO, RESTART_FROZEN_DISK_IO,

View File

@ -54,8 +54,8 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
static enum drbd_state_rv is_valid_state(struct drbd_device *, union drbd_state); static enum drbd_state_rv is_valid_state(struct drbd_device *, union drbd_state);
static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state, struct drbd_connection *); static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state, struct drbd_connection *);
static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns); static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns);
static union drbd_state sanitize_state(struct drbd_device *device, union drbd_state ns, static union drbd_state sanitize_state(struct drbd_device *device, union drbd_state os,
enum sanitize_state_warnings *warn); union drbd_state ns, enum sanitize_state_warnings *warn);
static inline bool is_susp(union drbd_state s) static inline bool is_susp(union drbd_state s)
{ {
@ -287,7 +287,7 @@ _req_st_cond(struct drbd_device *device, union drbd_state mask,
spin_lock_irqsave(&device->resource->req_lock, flags); spin_lock_irqsave(&device->resource->req_lock, flags);
os = drbd_read_state(device); os = drbd_read_state(device);
ns = sanitize_state(device, apply_mask_val(os, mask, val), NULL); ns = sanitize_state(device, os, apply_mask_val(os, mask, val), NULL);
rv = is_valid_transition(os, ns); rv = is_valid_transition(os, ns);
if (rv >= SS_SUCCESS) if (rv >= SS_SUCCESS)
rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */
@ -333,7 +333,7 @@ drbd_req_state(struct drbd_device *device, union drbd_state mask,
spin_lock_irqsave(&device->resource->req_lock, flags); spin_lock_irqsave(&device->resource->req_lock, flags);
os = drbd_read_state(device); os = drbd_read_state(device);
ns = sanitize_state(device, apply_mask_val(os, mask, val), NULL); ns = sanitize_state(device, os, apply_mask_val(os, mask, val), NULL);
rv = is_valid_transition(os, ns); rv = is_valid_transition(os, ns);
if (rv < SS_SUCCESS) { if (rv < SS_SUCCESS) {
spin_unlock_irqrestore(&device->resource->req_lock, flags); spin_unlock_irqrestore(&device->resource->req_lock, flags);
@ -740,8 +740,8 @@ static void print_sanitize_warnings(struct drbd_device *device, enum sanitize_st
* When we loose connection, we have to set the state of the peers disk (pdsk) * When we loose connection, we have to set the state of the peers disk (pdsk)
* to D_UNKNOWN. This rule and many more along those lines are in this function. * to D_UNKNOWN. This rule and many more along those lines are in this function.
*/ */
static union drbd_state sanitize_state(struct drbd_device *device, union drbd_state ns, static union drbd_state sanitize_state(struct drbd_device *device, union drbd_state os,
enum sanitize_state_warnings *warn) union drbd_state ns, enum sanitize_state_warnings *warn)
{ {
enum drbd_fencing_p fp; enum drbd_fencing_p fp;
enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max; enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max;
@ -882,11 +882,13 @@ static union drbd_state sanitize_state(struct drbd_device *device, union drbd_st
} }
if (fp == FP_STONITH && if (fp == FP_STONITH &&
(ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED)) (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) &&
!(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED))
ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */ ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */
if (device->resource->res_opts.on_no_data == OND_SUSPEND_IO && if (device->resource->res_opts.on_no_data == OND_SUSPEND_IO &&
(ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)) (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) &&
!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE))
ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */ ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */
if (ns.aftr_isp || ns.peer_isp || ns.user_isp) { if (ns.aftr_isp || ns.peer_isp || ns.user_isp) {
@ -958,7 +960,7 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns,
os = drbd_read_state(device); os = drbd_read_state(device);
ns = sanitize_state(device, ns, &ssw); ns = sanitize_state(device, os, ns, &ssw);
if (ns.i == os.i) if (ns.i == os.i)
return SS_NOTHING_TO_DO; return SS_NOTHING_TO_DO;
@ -1656,7 +1658,7 @@ conn_is_valid_transition(struct drbd_connection *connection, union drbd_state ma
idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
struct drbd_device *device = peer_device->device; struct drbd_device *device = peer_device->device;
os = drbd_read_state(device); os = drbd_read_state(device);
ns = sanitize_state(device, apply_mask_val(os, mask, val), NULL); ns = sanitize_state(device, os, apply_mask_val(os, mask, val), NULL);
if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED) if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED)
ns.disk = os.disk; ns.disk = os.disk;
@ -1718,7 +1720,7 @@ conn_set_state(struct drbd_connection *connection, union drbd_state mask, union
number_of_volumes++; number_of_volumes++;
os = drbd_read_state(device); os = drbd_read_state(device);
ns = apply_mask_val(os, mask, val); ns = apply_mask_val(os, mask, val);
ns = sanitize_state(device, ns, NULL); ns = sanitize_state(device, os, ns, NULL);
if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED) if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED)
ns.disk = os.disk; ns.disk = os.disk;
@ -1763,19 +1765,19 @@ conn_set_state(struct drbd_connection *connection, union drbd_state mask, union
static enum drbd_state_rv static enum drbd_state_rv
_conn_rq_cond(struct drbd_connection *connection, union drbd_state mask, union drbd_state val) _conn_rq_cond(struct drbd_connection *connection, union drbd_state mask, union drbd_state val)
{ {
enum drbd_state_rv rv; enum drbd_state_rv err, rv = SS_UNKNOWN_ERROR; /* continue waiting */;
if (test_and_clear_bit(CONN_WD_ST_CHG_OKAY, &connection->flags)) if (test_and_clear_bit(CONN_WD_ST_CHG_OKAY, &connection->flags))
return SS_CW_SUCCESS; rv = SS_CW_SUCCESS;
if (test_and_clear_bit(CONN_WD_ST_CHG_FAIL, &connection->flags)) if (test_and_clear_bit(CONN_WD_ST_CHG_FAIL, &connection->flags))
return SS_CW_FAILED_BY_PEER; rv = SS_CW_FAILED_BY_PEER;
rv = conn_is_valid_transition(connection, mask, val, 0); err = conn_is_valid_transition(connection, mask, val, 0);
if (rv == SS_SUCCESS && connection->cstate == C_WF_REPORT_PARAMS) if (err == SS_SUCCESS && connection->cstate == C_WF_REPORT_PARAMS)
rv = SS_UNKNOWN_ERROR; /* continue waiting */ return rv;
return rv; return err;
} }
enum drbd_state_rv enum drbd_state_rv

View File

@ -118,7 +118,7 @@ static void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __rele
/* writes on behalf of the partner, or resync writes, /* writes on behalf of the partner, or resync writes,
* "submitted" by the receiver, final stage. */ * "submitted" by the receiver, final stage. */
static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local) void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
{ {
unsigned long flags = 0; unsigned long flags = 0;
struct drbd_peer_device *peer_device = peer_req->peer_device; struct drbd_peer_device *peer_device = peer_req->peer_device;
@ -150,7 +150,9 @@ static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __rel
do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee); do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee);
if (test_bit(__EE_WAS_ERROR, &peer_req->flags)) /* FIXME do we want to detach for failed REQ_DISCARD?
* ((peer_req->flags & (EE_WAS_ERROR|EE_IS_TRIM)) == EE_WAS_ERROR) */
if (peer_req->flags & EE_WAS_ERROR)
__drbd_chk_io_error(device, DRBD_WRITE_ERROR); __drbd_chk_io_error(device, DRBD_WRITE_ERROR);
spin_unlock_irqrestore(&device->resource->req_lock, flags); spin_unlock_irqrestore(&device->resource->req_lock, flags);
@ -176,10 +178,12 @@ void drbd_peer_request_endio(struct bio *bio, int error)
struct drbd_device *device = peer_req->peer_device->device; struct drbd_device *device = peer_req->peer_device->device;
int uptodate = bio_flagged(bio, BIO_UPTODATE); int uptodate = bio_flagged(bio, BIO_UPTODATE);
int is_write = bio_data_dir(bio) == WRITE; int is_write = bio_data_dir(bio) == WRITE;
int is_discard = !!(bio->bi_rw & REQ_DISCARD);
if (error && __ratelimit(&drbd_ratelimit_state)) if (error && __ratelimit(&drbd_ratelimit_state))
drbd_warn(device, "%s: error=%d s=%llus\n", drbd_warn(device, "%s: error=%d s=%llus\n",
is_write ? "write" : "read", error, is_write ? (is_discard ? "discard" : "write")
: "read", error,
(unsigned long long)peer_req->i.sector); (unsigned long long)peer_req->i.sector);
if (!error && !uptodate) { if (!error && !uptodate) {
if (__ratelimit(&drbd_ratelimit_state)) if (__ratelimit(&drbd_ratelimit_state))
@ -263,7 +267,12 @@ void drbd_request_endio(struct bio *bio, int error)
/* to avoid recursion in __req_mod */ /* to avoid recursion in __req_mod */
if (unlikely(error)) { if (unlikely(error)) {
what = (bio_data_dir(bio) == WRITE) if (bio->bi_rw & REQ_DISCARD)
what = (error == -EOPNOTSUPP)
? DISCARD_COMPLETED_NOTSUPP
: DISCARD_COMPLETED_WITH_ERROR;
else
what = (bio_data_dir(bio) == WRITE)
? WRITE_COMPLETED_WITH_ERROR ? WRITE_COMPLETED_WITH_ERROR
: (bio_rw(bio) == READ) : (bio_rw(bio) == READ)
? READ_COMPLETED_WITH_ERROR ? READ_COMPLETED_WITH_ERROR
@ -395,7 +404,7 @@ static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector,
/* GFP_TRY, because if there is no memory available right now, this may /* GFP_TRY, because if there is no memory available right now, this may
* be rescheduled for later. It is "only" background resync, after all. */ * be rescheduled for later. It is "only" background resync, after all. */
peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector, peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector,
size, GFP_TRY); size, true /* has real payload */, GFP_TRY);
if (!peer_req) if (!peer_req)
goto defer; goto defer;
@ -492,10 +501,9 @@ struct fifo_buffer *fifo_alloc(int fifo_size)
return fb; return fb;
} }
static int drbd_rs_controller(struct drbd_device *device) static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in)
{ {
struct disk_conf *dc; struct disk_conf *dc;
unsigned int sect_in; /* Number of sectors that came in since the last turn */
unsigned int want; /* The number of sectors we want in the proxy */ unsigned int want; /* The number of sectors we want in the proxy */
int req_sect; /* Number of sectors to request in this turn */ int req_sect; /* Number of sectors to request in this turn */
int correction; /* Number of sectors more we need in the proxy*/ int correction; /* Number of sectors more we need in the proxy*/
@ -505,9 +513,6 @@ static int drbd_rs_controller(struct drbd_device *device)
int max_sect; int max_sect;
struct fifo_buffer *plan; struct fifo_buffer *plan;
sect_in = atomic_xchg(&device->rs_sect_in, 0); /* Number of sectors that came in */
device->rs_in_flight -= sect_in;
dc = rcu_dereference(device->ldev->disk_conf); dc = rcu_dereference(device->ldev->disk_conf);
plan = rcu_dereference(device->rs_plan_s); plan = rcu_dereference(device->rs_plan_s);
@ -550,11 +555,16 @@ static int drbd_rs_controller(struct drbd_device *device)
static int drbd_rs_number_requests(struct drbd_device *device) static int drbd_rs_number_requests(struct drbd_device *device)
{ {
int number; unsigned int sect_in; /* Number of sectors that came in since the last turn */
int number, mxb;
sect_in = atomic_xchg(&device->rs_sect_in, 0);
device->rs_in_flight -= sect_in;
rcu_read_lock(); rcu_read_lock();
mxb = drbd_get_max_buffers(device) / 2;
if (rcu_dereference(device->rs_plan_s)->size) { if (rcu_dereference(device->rs_plan_s)->size) {
number = drbd_rs_controller(device) >> (BM_BLOCK_SHIFT - 9); number = drbd_rs_controller(device, sect_in) >> (BM_BLOCK_SHIFT - 9);
device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME; device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
} else { } else {
device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate; device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate;
@ -562,8 +572,14 @@ static int drbd_rs_number_requests(struct drbd_device *device)
} }
rcu_read_unlock(); rcu_read_unlock();
/* ignore the amount of pending requests, the resync controller should /* Don't have more than "max-buffers"/2 in-flight.
* throttle down to incoming reply rate soon enough anyways. */ * Otherwise we may cause the remote site to stall on drbd_alloc_pages(),
* potentially causing a distributed deadlock on congestion during
* online-verify or (checksum-based) resync, if max-buffers,
* socket buffer sizes and resync rate settings are mis-configured. */
if (mxb - device->rs_in_flight < number)
number = mxb - device->rs_in_flight;
return number; return number;
} }
@ -597,7 +613,7 @@ static int make_resync_request(struct drbd_device *device, int cancel)
max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9; max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9;
number = drbd_rs_number_requests(device); number = drbd_rs_number_requests(device);
if (number == 0) if (number <= 0)
goto requeue; goto requeue;
for (i = 0; i < number; i++) { for (i = 0; i < number; i++) {
@ -647,7 +663,7 @@ next_sector:
*/ */
align = 1; align = 1;
rollback_i = i; rollback_i = i;
for (;;) { while (i < number) {
if (size + BM_BLOCK_SIZE > max_bio_size) if (size + BM_BLOCK_SIZE > max_bio_size)
break; break;
@ -1670,11 +1686,15 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
} }
clear_bit(B_RS_H_DONE, &device->flags); clear_bit(B_RS_H_DONE, &device->flags);
write_lock_irq(&global_state_lock); /* req_lock: serialize with drbd_send_and_submit() and others
* global_state_lock: for stable sync-after dependencies */
spin_lock_irq(&device->resource->req_lock);
write_lock(&global_state_lock);
/* Did some connection breakage or IO error race with us? */ /* Did some connection breakage or IO error race with us? */
if (device->state.conn < C_CONNECTED if (device->state.conn < C_CONNECTED
|| !get_ldev_if_state(device, D_NEGOTIATING)) { || !get_ldev_if_state(device, D_NEGOTIATING)) {
write_unlock_irq(&global_state_lock); write_unlock(&global_state_lock);
spin_unlock_irq(&device->resource->req_lock);
mutex_unlock(device->state_mutex); mutex_unlock(device->state_mutex);
return; return;
} }
@ -1714,7 +1734,8 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
} }
_drbd_pause_after(device); _drbd_pause_after(device);
} }
write_unlock_irq(&global_state_lock); write_unlock(&global_state_lock);
spin_unlock_irq(&device->resource->req_lock);
if (r == SS_SUCCESS) { if (r == SS_SUCCESS) {
/* reset rs_last_bcast when a resync or verify is started, /* reset rs_last_bcast when a resync or verify is started,
@ -1778,34 +1799,6 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
mutex_unlock(device->state_mutex); mutex_unlock(device->state_mutex);
} }
/* If the resource already closed the current epoch, but we did not
* (because we have not yet seen new requests), we should send the
* corresponding barrier now. Must be checked within the same spinlock
* that is used to check for new requests. */
static bool need_to_send_barrier(struct drbd_connection *connection)
{
if (!connection->send.seen_any_write_yet)
return false;
/* Skip barriers that do not contain any writes.
* This may happen during AHEAD mode. */
if (!connection->send.current_epoch_writes)
return false;
/* ->req_lock is held when requests are queued on
* connection->sender_work, and put into ->transfer_log.
* It is also held when ->current_tle_nr is increased.
* So either there are already new requests queued,
* and corresponding barriers will be send there.
* Or nothing new is queued yet, so the difference will be 1.
*/
if (atomic_read(&connection->current_tle_nr) !=
connection->send.current_epoch_nr + 1)
return false;
return true;
}
static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list) static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list)
{ {
spin_lock_irq(&queue->q_lock); spin_lock_irq(&queue->q_lock);
@ -1864,12 +1857,22 @@ static void wait_for_work(struct drbd_connection *connection, struct list_head *
spin_unlock_irq(&connection->resource->req_lock); spin_unlock_irq(&connection->resource->req_lock);
break; break;
} }
send_barrier = need_to_send_barrier(connection);
/* We found nothing new to do, no to-be-communicated request,
* no other work item. We may still need to close the last
* epoch. Next incoming request epoch will be connection ->
* current transfer log epoch number. If that is different
* from the epoch of the last request we communicated, it is
* safe to send the epoch separating barrier now.
*/
send_barrier =
atomic_read(&connection->current_tle_nr) !=
connection->send.current_epoch_nr;
spin_unlock_irq(&connection->resource->req_lock); spin_unlock_irq(&connection->resource->req_lock);
if (send_barrier) {
drbd_send_barrier(connection); if (send_barrier)
connection->send.current_epoch_nr++; maybe_send_barrier(connection,
} connection->send.current_epoch_nr + 1);
schedule(); schedule();
/* may be woken up for other things but new work, too, /* may be woken up for other things but new work, too,
* e.g. if the current epoch got closed. * e.g. if the current epoch got closed.

View File

@ -1,54 +0,0 @@
#ifndef _DRBD_WRAPPERS_H
#define _DRBD_WRAPPERS_H
#include <linux/ctype.h>
#include <linux/mm.h>
#include "drbd_int.h"
/* see get_sb_bdev and bd_claim */
extern char *drbd_sec_holder;
/* sets the number of 512 byte sectors of our virtual device */
static inline void drbd_set_my_capacity(struct drbd_device *device,
sector_t size)
{
/* set_capacity(device->this_bdev->bd_disk, size); */
set_capacity(device->vdisk, size);
device->this_bdev->bd_inode->i_size = (loff_t)size << 9;
}
#define drbd_bio_uptodate(bio) bio_flagged(bio, BIO_UPTODATE)
/* bi_end_io handlers */
extern void drbd_md_io_complete(struct bio *bio, int error);
extern void drbd_peer_request_endio(struct bio *bio, int error);
extern void drbd_request_endio(struct bio *bio, int error);
/*
* used to submit our private bio
*/
static inline void drbd_generic_make_request(struct drbd_device *device,
int fault_type, struct bio *bio)
{
__release(local);
if (!bio->bi_bdev) {
printk(KERN_ERR "drbd%d: drbd_generic_make_request: "
"bio->bi_bdev == NULL\n",
device_to_minor(device));
dump_stack();
bio_endio(bio, -ENODEV);
return;
}
if (drbd_insert_fault(device, fault_type))
bio_endio(bio, -EIO);
else
generic_make_request(bio);
}
#ifndef __CHECKER__
# undef __cond_lock
# define __cond_lock(x,c) (c)
#endif
#endif

View File

@ -3812,7 +3812,7 @@ static int __floppy_read_block_0(struct block_device *bdev, int drive)
bio.bi_iter.bi_size = size; bio.bi_iter.bi_size = size;
bio.bi_bdev = bdev; bio.bi_bdev = bdev;
bio.bi_iter.bi_sector = 0; bio.bi_iter.bi_sector = 0;
bio.bi_flags = (1 << BIO_QUIET); bio.bi_flags |= (1 << BIO_QUIET);
bio.bi_private = &cbdata; bio.bi_private = &cbdata;
bio.bi_end_io = floppy_rb0_cb; bio.bi_end_io = floppy_rb0_cb;

File diff suppressed because it is too large Load Diff

View File

@ -40,9 +40,11 @@
#define MTIP_MAX_RETRIES 2 #define MTIP_MAX_RETRIES 2
/* Various timeout values in ms */ /* Various timeout values in ms */
#define MTIP_NCQ_COMMAND_TIMEOUT_MS 5000 #define MTIP_NCQ_CMD_TIMEOUT_MS 15000
#define MTIP_IOCTL_COMMAND_TIMEOUT_MS 5000 #define MTIP_IOCTL_CMD_TIMEOUT_MS 5000
#define MTIP_INTERNAL_COMMAND_TIMEOUT_MS 5000 #define MTIP_INT_CMD_TIMEOUT_MS 5000
#define MTIP_QUIESCE_IO_TIMEOUT_MS (MTIP_NCQ_CMD_TIMEOUT_MS * \
(MTIP_MAX_RETRIES + 1))
/* check for timeouts every 500ms */ /* check for timeouts every 500ms */
#define MTIP_TIMEOUT_CHECK_PERIOD 500 #define MTIP_TIMEOUT_CHECK_PERIOD 500
@ -331,12 +333,8 @@ struct mtip_cmd {
*/ */
void (*comp_func)(struct mtip_port *port, void (*comp_func)(struct mtip_port *port,
int tag, int tag,
void *data, struct mtip_cmd *cmd,
int status); int status);
/* Additional callback function that may be called by comp_func() */
void (*async_callback)(void *data, int status);
void *async_data; /* Addl. data passed to async_callback() */
int scatter_ents; /* Number of scatter list entries used */ int scatter_ents; /* Number of scatter list entries used */
@ -347,10 +345,6 @@ struct mtip_cmd {
int retries; /* The number of retries left for this command. */ int retries; /* The number of retries left for this command. */
int direction; /* Data transfer direction */ int direction; /* Data transfer direction */
unsigned long comp_time; /* command completion time, in jiffies */
atomic_t active; /* declares if this command sent to the drive. */
}; };
/* Structure used to describe a port. */ /* Structure used to describe a port. */
@ -436,12 +430,6 @@ struct mtip_port {
* or error handling is active * or error handling is active
*/ */
unsigned long cmds_to_issue[SLOTBITS_IN_LONGS]; unsigned long cmds_to_issue[SLOTBITS_IN_LONGS];
/*
* Array of command slots. Structure includes pointers to the
* command header and command table, and completion function and data
* pointers.
*/
struct mtip_cmd commands[MTIP_MAX_COMMAND_SLOTS];
/* Used by mtip_service_thread to wait for an event */ /* Used by mtip_service_thread to wait for an event */
wait_queue_head_t svc_wait; wait_queue_head_t svc_wait;
/* /*
@ -452,13 +440,7 @@ struct mtip_port {
/* /*
* Timer used to complete commands that have been active for too long. * Timer used to complete commands that have been active for too long.
*/ */
struct timer_list cmd_timer;
unsigned long ic_pause_timer; unsigned long ic_pause_timer;
/*
* Semaphore used to block threads if there are no
* command slots available.
*/
struct semaphore cmd_slot;
/* Semaphore to control queue depth of unaligned IOs */ /* Semaphore to control queue depth of unaligned IOs */
struct semaphore cmd_slot_unal; struct semaphore cmd_slot_unal;
@ -485,6 +467,8 @@ struct driver_data {
struct request_queue *queue; /* Our request queue. */ struct request_queue *queue; /* Our request queue. */
struct blk_mq_tag_set tags; /* blk_mq tags */
struct mtip_port *port; /* Pointer to the port data structure. */ struct mtip_port *port; /* Pointer to the port data structure. */
unsigned product_type; /* magic value declaring the product type */ unsigned product_type; /* magic value declaring the product type */

View File

@ -203,8 +203,8 @@ static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
entry = llist_reverse_order(entry); entry = llist_reverse_order(entry);
do { do {
cmd = container_of(entry, struct nullb_cmd, ll_list); cmd = container_of(entry, struct nullb_cmd, ll_list);
end_cmd(cmd);
entry = entry->next; entry = entry->next;
end_cmd(cmd);
} while (entry); } while (entry);
} }

View File

@ -3944,15 +3944,14 @@ static int skd_acquire_msix(struct skd_device *skdev)
for (i = 0; i < SKD_MAX_MSIX_COUNT; i++) for (i = 0; i < SKD_MAX_MSIX_COUNT; i++)
entries[i].entry = i; entries[i].entry = i;
rc = pci_enable_msix_range(pdev, entries, rc = pci_enable_msix_exact(pdev, entries, SKD_MAX_MSIX_COUNT);
SKD_MIN_MSIX_COUNT, SKD_MAX_MSIX_COUNT); if (rc) {
if (rc < 0) {
pr_err("(%s): failed to enable MSI-X %d\n", pr_err("(%s): failed to enable MSI-X %d\n",
skd_name(skdev), rc); skd_name(skdev), rc);
goto msix_out; goto msix_out;
} }
skdev->msix_count = rc; skdev->msix_count = SKD_MAX_MSIX_COUNT;
skdev->msix_entries = kzalloc(sizeof(struct skd_msix_entry) * skdev->msix_entries = kzalloc(sizeof(struct skd_msix_entry) *
skdev->msix_count, GFP_KERNEL); skdev->msix_count, GFP_KERNEL);
if (!skdev->msix_entries) { if (!skdev->msix_entries) {

View File

@ -162,6 +162,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
unsigned int num; unsigned int num;
const bool last = (req->cmd_flags & REQ_END) != 0; const bool last = (req->cmd_flags & REQ_END) != 0;
int err; int err;
bool notify = false;
BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems); BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
@ -214,10 +215,12 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
return BLK_MQ_RQ_QUEUE_ERROR; return BLK_MQ_RQ_QUEUE_ERROR;
} }
if (last) if (last && virtqueue_kick_prepare(vblk->vq))
virtqueue_kick(vblk->vq); notify = true;
spin_unlock_irqrestore(&vblk->vq_lock, flags); spin_unlock_irqrestore(&vblk->vq_lock, flags);
if (notify)
virtqueue_notify(vblk->vq);
return BLK_MQ_RQ_QUEUE_OK; return BLK_MQ_RQ_QUEUE_OK;
} }

View File

@ -314,7 +314,7 @@ struct xen_blkif {
unsigned long long st_rd_sect; unsigned long long st_rd_sect;
unsigned long long st_wr_sect; unsigned long long st_wr_sect;
wait_queue_head_t waiting_to_free; struct work_struct free_work;
/* Thread shutdown wait queue. */ /* Thread shutdown wait queue. */
wait_queue_head_t shutdown_wq; wait_queue_head_t shutdown_wq;
}; };
@ -361,7 +361,7 @@ struct pending_req {
#define xen_blkif_put(_b) \ #define xen_blkif_put(_b) \
do { \ do { \
if (atomic_dec_and_test(&(_b)->refcnt)) \ if (atomic_dec_and_test(&(_b)->refcnt)) \
wake_up(&(_b)->waiting_to_free);\ schedule_work(&(_b)->free_work);\
} while (0) } while (0)
struct phys_req { struct phys_req {

View File

@ -35,12 +35,26 @@ static void connect(struct backend_info *);
static int connect_ring(struct backend_info *); static int connect_ring(struct backend_info *);
static void backend_changed(struct xenbus_watch *, const char **, static void backend_changed(struct xenbus_watch *, const char **,
unsigned int); unsigned int);
static void xen_blkif_free(struct xen_blkif *blkif);
static void xen_vbd_free(struct xen_vbd *vbd);
struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be) struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be)
{ {
return be->dev; return be->dev;
} }
/*
* The last request could free the device from softirq context and
* xen_blkif_free() can sleep.
*/
static void xen_blkif_deferred_free(struct work_struct *work)
{
struct xen_blkif *blkif;
blkif = container_of(work, struct xen_blkif, free_work);
xen_blkif_free(blkif);
}
static int blkback_name(struct xen_blkif *blkif, char *buf) static int blkback_name(struct xen_blkif *blkif, char *buf)
{ {
char *devpath, *devname; char *devpath, *devname;
@ -121,7 +135,6 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
init_completion(&blkif->drain_complete); init_completion(&blkif->drain_complete);
atomic_set(&blkif->drain, 0); atomic_set(&blkif->drain, 0);
blkif->st_print = jiffies; blkif->st_print = jiffies;
init_waitqueue_head(&blkif->waiting_to_free);
blkif->persistent_gnts.rb_node = NULL; blkif->persistent_gnts.rb_node = NULL;
spin_lock_init(&blkif->free_pages_lock); spin_lock_init(&blkif->free_pages_lock);
INIT_LIST_HEAD(&blkif->free_pages); INIT_LIST_HEAD(&blkif->free_pages);
@ -132,6 +145,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
INIT_WORK(&blkif->persistent_purge_work, xen_blkbk_unmap_purged_grants); INIT_WORK(&blkif->persistent_purge_work, xen_blkbk_unmap_purged_grants);
INIT_LIST_HEAD(&blkif->pending_free); INIT_LIST_HEAD(&blkif->pending_free);
INIT_WORK(&blkif->free_work, xen_blkif_deferred_free);
for (i = 0; i < XEN_BLKIF_REQS; i++) { for (i = 0; i < XEN_BLKIF_REQS; i++) {
req = kzalloc(sizeof(*req), GFP_KERNEL); req = kzalloc(sizeof(*req), GFP_KERNEL);
@ -231,7 +245,7 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
return 0; return 0;
} }
static void xen_blkif_disconnect(struct xen_blkif *blkif) static int xen_blkif_disconnect(struct xen_blkif *blkif)
{ {
if (blkif->xenblkd) { if (blkif->xenblkd) {
kthread_stop(blkif->xenblkd); kthread_stop(blkif->xenblkd);
@ -239,9 +253,12 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif)
blkif->xenblkd = NULL; blkif->xenblkd = NULL;
} }
atomic_dec(&blkif->refcnt); /* The above kthread_stop() guarantees that at this point we
wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0); * don't have any discard_io or other_io requests. So, checking
atomic_inc(&blkif->refcnt); * for inflight IO is enough.
*/
if (atomic_read(&blkif->inflight) > 0)
return -EBUSY;
if (blkif->irq) { if (blkif->irq) {
unbind_from_irqhandler(blkif->irq, blkif); unbind_from_irqhandler(blkif->irq, blkif);
@ -252,6 +269,8 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif)
xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring); xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring);
blkif->blk_rings.common.sring = NULL; blkif->blk_rings.common.sring = NULL;
} }
return 0;
} }
static void xen_blkif_free(struct xen_blkif *blkif) static void xen_blkif_free(struct xen_blkif *blkif)
@ -259,8 +278,8 @@ static void xen_blkif_free(struct xen_blkif *blkif)
struct pending_req *req, *n; struct pending_req *req, *n;
int i = 0, j; int i = 0, j;
if (!atomic_dec_and_test(&blkif->refcnt)) xen_blkif_disconnect(blkif);
BUG(); xen_vbd_free(&blkif->vbd);
/* Remove all persistent grants and the cache of ballooned pages. */ /* Remove all persistent grants and the cache of ballooned pages. */
xen_blkbk_free_caches(blkif); xen_blkbk_free_caches(blkif);
@ -449,16 +468,15 @@ static int xen_blkbk_remove(struct xenbus_device *dev)
be->backend_watch.node = NULL; be->backend_watch.node = NULL;
} }
dev_set_drvdata(&dev->dev, NULL);
if (be->blkif) { if (be->blkif) {
xen_blkif_disconnect(be->blkif); xen_blkif_disconnect(be->blkif);
xen_vbd_free(&be->blkif->vbd); xen_blkif_put(be->blkif);
xen_blkif_free(be->blkif);
be->blkif = NULL;
} }
kfree(be->mode); kfree(be->mode);
kfree(be); kfree(be);
dev_set_drvdata(&dev->dev, NULL);
return 0; return 0;
} }
@ -481,10 +499,15 @@ static void xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info
struct xenbus_device *dev = be->dev; struct xenbus_device *dev = be->dev;
struct xen_blkif *blkif = be->blkif; struct xen_blkif *blkif = be->blkif;
int err; int err;
int state = 0; int state = 0, discard_enable;
struct block_device *bdev = be->blkif->vbd.bdev; struct block_device *bdev = be->blkif->vbd.bdev;
struct request_queue *q = bdev_get_queue(bdev); struct request_queue *q = bdev_get_queue(bdev);
err = xenbus_scanf(XBT_NIL, dev->nodename, "discard-enable", "%d",
&discard_enable);
if (err == 1 && !discard_enable)
return;
if (blk_queue_discard(q)) { if (blk_queue_discard(q)) {
err = xenbus_printf(xbt, dev->nodename, err = xenbus_printf(xbt, dev->nodename,
"discard-granularity", "%u", "discard-granularity", "%u",
@ -700,7 +723,11 @@ static void frontend_changed(struct xenbus_device *dev,
* Enforce precondition before potential leak point. * Enforce precondition before potential leak point.
* xen_blkif_disconnect() is idempotent. * xen_blkif_disconnect() is idempotent.
*/ */
xen_blkif_disconnect(be->blkif); err = xen_blkif_disconnect(be->blkif);
if (err) {
xenbus_dev_fatal(dev, err, "pending I/O");
break;
}
err = connect_ring(be); err = connect_ring(be);
if (err) if (err)

View File

@ -1635,36 +1635,24 @@ blkfront_closing(struct blkfront_info *info)
static void blkfront_setup_discard(struct blkfront_info *info) static void blkfront_setup_discard(struct blkfront_info *info)
{ {
int err; int err;
char *type;
unsigned int discard_granularity; unsigned int discard_granularity;
unsigned int discard_alignment; unsigned int discard_alignment;
unsigned int discard_secure; unsigned int discard_secure;
type = xenbus_read(XBT_NIL, info->xbdev->otherend, "type", NULL); info->feature_discard = 1;
if (IS_ERR(type)) err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
return; "discard-granularity", "%u", &discard_granularity,
"discard-alignment", "%u", &discard_alignment,
info->feature_secdiscard = 0; NULL);
if (strncmp(type, "phy", 3) == 0) { if (!err) {
err = xenbus_gather(XBT_NIL, info->xbdev->otherend, info->discard_granularity = discard_granularity;
"discard-granularity", "%u", &discard_granularity, info->discard_alignment = discard_alignment;
"discard-alignment", "%u", &discard_alignment, }
NULL); err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
if (!err) { "discard-secure", "%d", &discard_secure,
info->feature_discard = 1; NULL);
info->discard_granularity = discard_granularity; if (!err)
info->discard_alignment = discard_alignment; info->feature_secdiscard = !!discard_secure;
}
err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
"discard-secure", "%d", &discard_secure,
NULL);
if (!err)
info->feature_secdiscard = discard_secure;
} else if (strncmp(type, "file", 4) == 0)
info->feature_discard = 1;
kfree(type);
} }
static int blkfront_setup_indirect(struct blkfront_info *info) static int blkfront_setup_indirect(struct blkfront_info *info)

File diff suppressed because it is too large Load Diff

View File

@ -620,6 +620,15 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
#define rq_data_dir(rq) (((rq)->cmd_flags & 1) != 0) #define rq_data_dir(rq) (((rq)->cmd_flags & 1) != 0)
/*
* Driver can handle struct request, if it either has an old style
* request_fn defined, or is blk-mq based.
*/
static inline bool queue_is_rq_based(struct request_queue *q)
{
return q->request_fn || q->mq_ops;
}
static inline unsigned int blk_queue_cluster(struct request_queue *q) static inline unsigned int blk_queue_cluster(struct request_queue *q)
{ {
return q->limits.cluster; return q->limits.cluster;

View File

@ -86,7 +86,7 @@ typedef uint64_t blkif_sector_t;
* Interface%20manuals/100293068c.pdf * Interface%20manuals/100293068c.pdf
* The backend can optionally provide three extra XenBus attributes to * The backend can optionally provide three extra XenBus attributes to
* further optimize the discard functionality: * further optimize the discard functionality:
* 'discard-aligment' - Devices that support discard functionality may * 'discard-alignment' - Devices that support discard functionality may
* internally allocate space in units that are bigger than the exported * internally allocate space in units that are bigger than the exported
* logical block size. The discard-alignment parameter indicates how many bytes * logical block size. The discard-alignment parameter indicates how many bytes
* the beginning of the partition is offset from the internal allocation unit's * the beginning of the partition is offset from the internal allocation unit's