diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 4b97b86da926..765fc7348b66 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -375,7 +375,7 @@ static void purge_persistent_gnt(struct xen_blkif *blkif) pr_debug(DRV_PFX "Going to purge %u persistent grants\n", num_clean); - INIT_LIST_HEAD(&blkif->persistent_purge_list); + BUG_ON(!list_empty(&blkif->persistent_purge_list)); root = &blkif->persistent_gnts; purge_list: foreach_grant_safe(persistent_gnt, n, root, node) { @@ -625,16 +625,8 @@ purge_gnt_list: print_stats(blkif); } - /* Since we are shutting down remove all pages from the buffer */ - shrink_free_pagepool(blkif, 0 /* All */); - - /* Free all persistent grant pages */ - if (!RB_EMPTY_ROOT(&blkif->persistent_gnts)) - free_persistent_gnts(blkif, &blkif->persistent_gnts, - blkif->persistent_gnt_c); - - BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts)); - blkif->persistent_gnt_c = 0; + /* Drain pending purge work */ + flush_work(&blkif->persistent_purge_work); if (log_stats) print_stats(blkif); @@ -645,6 +637,23 @@ purge_gnt_list: return 0; } +/* + * Remove persistent grants and empty the pool of free pages + */ +void xen_blkbk_free_caches(struct xen_blkif *blkif) +{ + /* Free all persistent grant pages */ + if (!RB_EMPTY_ROOT(&blkif->persistent_gnts)) + free_persistent_gnts(blkif, &blkif->persistent_gnts, + blkif->persistent_gnt_c); + + BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts)); + blkif->persistent_gnt_c = 0; + + /* Since we are shutting down remove all pages from the buffer */ + shrink_free_pagepool(blkif, 0 /* All */); +} + /* * Unmap the grant references, and also remove the M2P over-rides * used in the 'pending_req'. @@ -838,7 +847,7 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req, struct grant_page **pages = pending_req->indirect_pages; struct xen_blkif *blkif = pending_req->blkif; int indirect_grefs, rc, n, nseg, i; - struct blkif_request_segment_aligned *segments = NULL; + struct blkif_request_segment *segments = NULL; nseg = pending_req->nr_pages; indirect_grefs = INDIRECT_PAGES(nseg); @@ -934,9 +943,7 @@ static void xen_blk_drain_io(struct xen_blkif *blkif) { atomic_set(&blkif->drain, 1); do { - /* The initial value is one, and one refcnt taken at the - * start of the xen_blkif_schedule thread. */ - if (atomic_read(&blkif->refcnt) <= 2) + if (atomic_read(&blkif->inflight) == 0) break; wait_for_completion_interruptible_timeout( &blkif->drain_complete, HZ); @@ -976,17 +983,30 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) * the proper response on the ring. */ if (atomic_dec_and_test(&pending_req->pendcnt)) { - xen_blkbk_unmap(pending_req->blkif, + struct xen_blkif *blkif = pending_req->blkif; + + xen_blkbk_unmap(blkif, pending_req->segments, pending_req->nr_pages); - make_response(pending_req->blkif, pending_req->id, + make_response(blkif, pending_req->id, pending_req->operation, pending_req->status); - xen_blkif_put(pending_req->blkif); - if (atomic_read(&pending_req->blkif->refcnt) <= 2) { - if (atomic_read(&pending_req->blkif->drain)) - complete(&pending_req->blkif->drain_complete); + free_req(blkif, pending_req); + /* + * Make sure the request is freed before releasing blkif, + * or there could be a race between free_req and the + * cleanup done in xen_blkif_free during shutdown. + * + * NB: The fact that we might try to wake up pending_free_wq + * before drain_complete (in case there's a drain going on) + * it's not a problem with our current implementation + * because we can assure there's no thread waiting on + * pending_free_wq if there's a drain going on, but it has + * to be taken into account if the current model is changed. + */ + if (atomic_dec_and_test(&blkif->inflight) && atomic_read(&blkif->drain)) { + complete(&blkif->drain_complete); } - free_req(pending_req->blkif, pending_req); + xen_blkif_put(blkif); } } @@ -1240,6 +1260,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, * below (in "!bio") if we are handling a BLKIF_OP_DISCARD. */ xen_blkif_get(blkif); + atomic_inc(&blkif->inflight); for (i = 0; i < nseg; i++) { while ((bio == NULL) || diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 8d8807563d99..9eb34e24b4fe 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -57,7 +57,7 @@ #define MAX_INDIRECT_SEGMENTS 256 #define SEGS_PER_INDIRECT_FRAME \ - (PAGE_SIZE/sizeof(struct blkif_request_segment_aligned)) + (PAGE_SIZE/sizeof(struct blkif_request_segment)) #define MAX_INDIRECT_PAGES \ ((MAX_INDIRECT_SEGMENTS + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME) #define INDIRECT_PAGES(_segs) \ @@ -278,6 +278,7 @@ struct xen_blkif { /* for barrier (drain) requests */ struct completion drain_complete; atomic_t drain; + atomic_t inflight; /* One thread per one blkif. */ struct task_struct *xenblkd; unsigned int waiting_reqs; @@ -376,6 +377,7 @@ int xen_blkif_xenbus_init(void); irqreturn_t xen_blkif_be_int(int irq, void *dev_id); int xen_blkif_schedule(void *arg); int xen_blkif_purge_persistent(void *arg); +void xen_blkbk_free_caches(struct xen_blkif *blkif); int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, struct backend_info *be, int state); diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index c2014a0aa206..84973c6a856a 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -125,8 +125,10 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) blkif->persistent_gnts.rb_node = NULL; spin_lock_init(&blkif->free_pages_lock); INIT_LIST_HEAD(&blkif->free_pages); + INIT_LIST_HEAD(&blkif->persistent_purge_list); blkif->free_pages_num = 0; atomic_set(&blkif->persistent_gnt_in_use, 0); + atomic_set(&blkif->inflight, 0); INIT_LIST_HEAD(&blkif->pending_free); @@ -259,6 +261,17 @@ static void xen_blkif_free(struct xen_blkif *blkif) if (!atomic_dec_and_test(&blkif->refcnt)) BUG(); + /* Remove all persistent grants and the cache of ballooned pages. */ + xen_blkbk_free_caches(blkif); + + /* Make sure everything is drained before shutting down */ + BUG_ON(blkif->persistent_gnt_c != 0); + BUG_ON(atomic_read(&blkif->persistent_gnt_in_use) != 0); + BUG_ON(blkif->free_pages_num != 0); + BUG_ON(!list_empty(&blkif->persistent_purge_list)); + BUG_ON(!list_empty(&blkif->free_pages)); + BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts)); + /* Check that there is no request in use */ list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) { list_del(&req->free_list); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 8dcfb54f1603..efe1b4761735 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -162,7 +162,7 @@ static DEFINE_SPINLOCK(minor_lock); #define DEV_NAME "xvd" /* name in /dev */ #define SEGS_PER_INDIRECT_FRAME \ - (PAGE_SIZE/sizeof(struct blkif_request_segment_aligned)) + (PAGE_SIZE/sizeof(struct blkif_request_segment)) #define INDIRECT_GREFS(_segs) \ ((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME) @@ -393,7 +393,7 @@ static int blkif_queue_request(struct request *req) unsigned long id; unsigned int fsect, lsect; int i, ref, n; - struct blkif_request_segment_aligned *segments = NULL; + struct blkif_request_segment *segments = NULL; /* * Used to store if we are able to queue the request by just using @@ -550,7 +550,7 @@ static int blkif_queue_request(struct request *req) } else { n = i % SEGS_PER_INDIRECT_FRAME; segments[n] = - (struct blkif_request_segment_aligned) { + (struct blkif_request_segment) { .gref = ref, .first_sect = fsect, .last_sect = lsect }; @@ -1904,13 +1904,16 @@ static void blkback_changed(struct xenbus_device *dev, case XenbusStateReconfiguring: case XenbusStateReconfigured: case XenbusStateUnknown: - case XenbusStateClosed: break; case XenbusStateConnected: blkfront_connect(info); break; + case XenbusStateClosed: + if (dev->state == XenbusStateClosed) + break; + /* Missed the backend's Closing state -- fallthrough */ case XenbusStateClosing: blkfront_closing(info); break; diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h index ae665ac59c36..32ec05a6572f 100644 --- a/include/xen/interface/io/blkif.h +++ b/include/xen/interface/io/blkif.h @@ -113,13 +113,13 @@ typedef uint64_t blkif_sector_t; * it's less than the number provided by the backend. The indirect_grefs field * in blkif_request_indirect should be filled by the frontend with the * grant references of the pages that are holding the indirect segments. - * This pages are filled with an array of blkif_request_segment_aligned - * that hold the information about the segments. The number of indirect - * pages to use is determined by the maximum number of segments - * a indirect request contains. Every indirect page can contain a maximum - * of 512 segments (PAGE_SIZE/sizeof(blkif_request_segment_aligned)), - * so to calculate the number of indirect pages to use we have to do - * ceil(indirect_segments/512). + * These pages are filled with an array of blkif_request_segment that hold the + * information about the segments. The number of indirect pages to use is + * determined by the number of segments an indirect request contains. Every + * indirect page can contain a maximum of + * (PAGE_SIZE / sizeof(struct blkif_request_segment)) segments, so to + * calculate the number of indirect pages to use we have to do + * ceil(indirect_segments / (PAGE_SIZE / sizeof(struct blkif_request_segment))). * * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not* * create the "feature-max-indirect-segments" node! @@ -135,13 +135,12 @@ typedef uint64_t blkif_sector_t; #define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8 -struct blkif_request_segment_aligned { - grant_ref_t gref; /* reference to I/O buffer frame */ - /* @first_sect: first sector in frame to transfer (inclusive). */ - /* @last_sect: last sector in frame to transfer (inclusive). */ - uint8_t first_sect, last_sect; - uint16_t _pad; /* padding to make it 8 bytes, so it's cache-aligned */ -} __attribute__((__packed__)); +struct blkif_request_segment { + grant_ref_t gref; /* reference to I/O buffer frame */ + /* @first_sect: first sector in frame to transfer (inclusive). */ + /* @last_sect: last sector in frame to transfer (inclusive). */ + uint8_t first_sect, last_sect; +}; struct blkif_request_rw { uint8_t nr_segments; /* number of segments */ @@ -151,12 +150,7 @@ struct blkif_request_rw { #endif uint64_t id; /* private guest value, echoed in resp */ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ - struct blkif_request_segment { - grant_ref_t gref; /* reference to I/O buffer frame */ - /* @first_sect: first sector in frame to transfer (inclusive). */ - /* @last_sect: last sector in frame to transfer (inclusive). */ - uint8_t first_sect, last_sect; - } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; } __attribute__((__packed__)); struct blkif_request_discard {