From 2ed22e3c3bec5b92b9aba4afdef0cc5e6d859a11 Mon Sep 17 00:00:00 2001 From: Matt Rushton Date: Tue, 4 Feb 2014 11:26:12 +0100 Subject: [PATCH 1/5] xen-blkback: fix memory leak when persistent grants are used MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently shrink_free_pagepool() is called before the pages used for persistent grants are released via free_persistent_gnts(). This results in a memory leak when a VBD that uses persistent grants is torn down. Cc: Konrad Rzeszutek Wilk Cc: "Roger Pau Monné" Cc: Ian Campbell Reviewed-by: David Vrabel Cc: linux-kernel@vger.kernel.org Cc: xen-devel@lists.xen.org Cc: Anthony Liguori Signed-off-by: Matt Rushton Signed-off-by: Matt Wilson Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 6620b73d0490..30ef7b390df5 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -625,9 +625,6 @@ purge_gnt_list: print_stats(blkif); } - /* Since we are shutting down remove all pages from the buffer */ - shrink_free_pagepool(blkif, 0 /* All */); - /* Free all persistent grant pages */ if (!RB_EMPTY_ROOT(&blkif->persistent_gnts)) free_persistent_gnts(blkif, &blkif->persistent_gnts, @@ -636,6 +633,9 @@ purge_gnt_list: BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts)); blkif->persistent_gnt_c = 0; + /* Since we are shutting down remove all pages from the buffer */ + shrink_free_pagepool(blkif, 0 /* All */); + if (log_stats) print_stats(blkif); From ef753411339eae46b9a3151906901f8bfd12b0f1 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Tue, 4 Feb 2014 11:26:13 +0100 Subject: [PATCH 2/5] xen-blkback: fix memory leaks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I've at least identified two possible memory leaks in blkback, both related to the shutdown path of a VBD: - blkback doesn't wait for any pending purge work to finish before cleaning the list of free_pages. The purge work will call put_free_pages and thus we might end up with pages being added to the free_pages list after we have emptied it. Fix this by making sure there's no pending purge work before exiting xen_blkif_schedule, and moving the free_page cleanup code to xen_blkif_free. - blkback doesn't wait for pending requests to end before cleaning persistent grants and the list of free_pages. Again this can add pages to the free_pages list or persistent grants to the persistent_gnts red-black tree. Fixed by moving the persistent grants and free_pages cleanup code to xen_blkif_free. Also, add some checks in xen_blkif_free to make sure we are cleaning everything. Signed-off-by: Roger Pau Monné Cc: Konrad Rzeszutek Wilk Reviewed-by: David Vrabel Cc: Boris Ostrovsky Tested-by: Matt Rushton Reviewed-by: Matt Rushton Cc: Matt Wilson Cc: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 27 ++++++++++++++++++--------- drivers/block/xen-blkback/common.h | 1 + drivers/block/xen-blkback/xenbus.c | 12 ++++++++++++ 3 files changed, 31 insertions(+), 9 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 30ef7b390df5..dcfe49fd3cb4 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -375,7 +375,7 @@ static void purge_persistent_gnt(struct xen_blkif *blkif) pr_debug(DRV_PFX "Going to purge %u persistent grants\n", num_clean); - INIT_LIST_HEAD(&blkif->persistent_purge_list); + BUG_ON(!list_empty(&blkif->persistent_purge_list)); root = &blkif->persistent_gnts; purge_list: foreach_grant_safe(persistent_gnt, n, root, node) { @@ -625,6 +625,23 @@ purge_gnt_list: print_stats(blkif); } + /* Drain pending purge work */ + flush_work(&blkif->persistent_purge_work); + + if (log_stats) + print_stats(blkif); + + blkif->xenblkd = NULL; + xen_blkif_put(blkif); + + return 0; +} + +/* + * Remove persistent grants and empty the pool of free pages + */ +void xen_blkbk_free_caches(struct xen_blkif *blkif) +{ /* Free all persistent grant pages */ if (!RB_EMPTY_ROOT(&blkif->persistent_gnts)) free_persistent_gnts(blkif, &blkif->persistent_gnts, @@ -635,14 +652,6 @@ purge_gnt_list: /* Since we are shutting down remove all pages from the buffer */ shrink_free_pagepool(blkif, 0 /* All */); - - if (log_stats) - print_stats(blkif); - - blkif->xenblkd = NULL; - xen_blkif_put(blkif); - - return 0; } /* diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 8d8807563d99..f733d7627120 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -376,6 +376,7 @@ int xen_blkif_xenbus_init(void); irqreturn_t xen_blkif_be_int(int irq, void *dev_id); int xen_blkif_schedule(void *arg); int xen_blkif_purge_persistent(void *arg); +void xen_blkbk_free_caches(struct xen_blkif *blkif); int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, struct backend_info *be, int state); diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index c2014a0aa206..8afef67fecdd 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -125,6 +125,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) blkif->persistent_gnts.rb_node = NULL; spin_lock_init(&blkif->free_pages_lock); INIT_LIST_HEAD(&blkif->free_pages); + INIT_LIST_HEAD(&blkif->persistent_purge_list); blkif->free_pages_num = 0; atomic_set(&blkif->persistent_gnt_in_use, 0); @@ -259,6 +260,17 @@ static void xen_blkif_free(struct xen_blkif *blkif) if (!atomic_dec_and_test(&blkif->refcnt)) BUG(); + /* Remove all persistent grants and the cache of ballooned pages. */ + xen_blkbk_free_caches(blkif); + + /* Make sure everything is drained before shutting down */ + BUG_ON(blkif->persistent_gnt_c != 0); + BUG_ON(atomic_read(&blkif->persistent_gnt_in_use) != 0); + BUG_ON(blkif->free_pages_num != 0); + BUG_ON(!list_empty(&blkif->persistent_purge_list)); + BUG_ON(!list_empty(&blkif->free_pages)); + BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts)); + /* Check that there is no request in use */ list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) { list_del(&req->free_list); From c05f3e3c85df1d89673e00cee7ece5ae4eb4c6ec Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Tue, 4 Feb 2014 11:26:14 +0100 Subject: [PATCH 3/5] xen-blkback: fix shutdown race MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a new variable to keep track of the number of in-flight requests. We need to make sure that when xen_blkif_put is called the request has already been freed and we can safely free xen_blkif, which was not the case before. Signed-off-by: Roger Pau Monné Cc: Konrad Rzeszutek Wilk Cc: David Vrabel Reviewed-by: Boris Ostrovsky Tested-by: Matt Rushton Reviewed-by: Matt Rushton Cc: Matt Wilson Cc: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 32 ++++++++++++++++++++--------- drivers/block/xen-blkback/common.h | 1 + drivers/block/xen-blkback/xenbus.c | 1 + 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index dcfe49fd3cb4..394fa2eabf87 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -943,9 +943,7 @@ static void xen_blk_drain_io(struct xen_blkif *blkif) { atomic_set(&blkif->drain, 1); do { - /* The initial value is one, and one refcnt taken at the - * start of the xen_blkif_schedule thread. */ - if (atomic_read(&blkif->refcnt) <= 2) + if (atomic_read(&blkif->inflight) == 0) break; wait_for_completion_interruptible_timeout( &blkif->drain_complete, HZ); @@ -985,17 +983,30 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) * the proper response on the ring. */ if (atomic_dec_and_test(&pending_req->pendcnt)) { - xen_blkbk_unmap(pending_req->blkif, + struct xen_blkif *blkif = pending_req->blkif; + + xen_blkbk_unmap(blkif, pending_req->segments, pending_req->nr_pages); - make_response(pending_req->blkif, pending_req->id, + make_response(blkif, pending_req->id, pending_req->operation, pending_req->status); - xen_blkif_put(pending_req->blkif); - if (atomic_read(&pending_req->blkif->refcnt) <= 2) { - if (atomic_read(&pending_req->blkif->drain)) - complete(&pending_req->blkif->drain_complete); + free_req(blkif, pending_req); + /* + * Make sure the request is freed before releasing blkif, + * or there could be a race between free_req and the + * cleanup done in xen_blkif_free during shutdown. + * + * NB: The fact that we might try to wake up pending_free_wq + * before drain_complete (in case there's a drain going on) + * it's not a problem with our current implementation + * because we can assure there's no thread waiting on + * pending_free_wq if there's a drain going on, but it has + * to be taken into account if the current model is changed. + */ + if (atomic_dec_and_test(&blkif->inflight) && atomic_read(&blkif->drain)) { + complete(&blkif->drain_complete); } - free_req(pending_req->blkif, pending_req); + xen_blkif_put(blkif); } } @@ -1249,6 +1260,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, * below (in "!bio") if we are handling a BLKIF_OP_DISCARD. */ xen_blkif_get(blkif); + atomic_inc(&blkif->inflight); for (i = 0; i < nseg; i++) { while ((bio == NULL) || diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index f733d7627120..e40326a7f707 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -278,6 +278,7 @@ struct xen_blkif { /* for barrier (drain) requests */ struct completion drain_complete; atomic_t drain; + atomic_t inflight; /* One thread per one blkif. */ struct task_struct *xenblkd; unsigned int waiting_reqs; diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 8afef67fecdd..84973c6a856a 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -128,6 +128,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) INIT_LIST_HEAD(&blkif->persistent_purge_list); blkif->free_pages_num = 0; atomic_set(&blkif->persistent_gnt_in_use, 0); + atomic_set(&blkif->inflight, 0); INIT_LIST_HEAD(&blkif->pending_free); From 80bfa2f6e2e81049fc6cd3bfaeedcb64db3a9ba6 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Tue, 4 Feb 2014 11:26:15 +0100 Subject: [PATCH 4/5] xen-blkif: drop struct blkif_request_segment_aligned MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was wrongly introduced in commit 402b27f9, the only difference between blkif_request_segment_aligned and blkif_request_segment is that the former has a named padding, while both share the same memory layout. Also correct a few minor glitches in the description, including for it to no longer assume PAGE_SIZE == 4096. Signed-off-by: Roger Pau Monné [Description fix by Jan Beulich] Signed-off-by: Jan Beulich Reported-by: Jan Beulich Cc: Konrad Rzeszutek Wilk Cc: David Vrabel Cc: Boris Ostrovsky Tested-by: Matt Rushton Cc: Matt Wilson Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 2 +- drivers/block/xen-blkback/common.h | 2 +- drivers/block/xen-blkfront.c | 6 ++--- include/xen/interface/io/blkif.h | 34 ++++++++++++----------------- 4 files changed, 19 insertions(+), 25 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 394fa2eabf87..e612627ae981 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -847,7 +847,7 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req, struct grant_page **pages = pending_req->indirect_pages; struct xen_blkif *blkif = pending_req->blkif; int indirect_grefs, rc, n, nseg, i; - struct blkif_request_segment_aligned *segments = NULL; + struct blkif_request_segment *segments = NULL; nseg = pending_req->nr_pages; indirect_grefs = INDIRECT_PAGES(nseg); diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index e40326a7f707..9eb34e24b4fe 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -57,7 +57,7 @@ #define MAX_INDIRECT_SEGMENTS 256 #define SEGS_PER_INDIRECT_FRAME \ - (PAGE_SIZE/sizeof(struct blkif_request_segment_aligned)) + (PAGE_SIZE/sizeof(struct blkif_request_segment)) #define MAX_INDIRECT_PAGES \ ((MAX_INDIRECT_SEGMENTS + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME) #define INDIRECT_PAGES(_segs) \ diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index c4a4c9006288..7d09dfc9735c 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -162,7 +162,7 @@ static DEFINE_SPINLOCK(minor_lock); #define DEV_NAME "xvd" /* name in /dev */ #define SEGS_PER_INDIRECT_FRAME \ - (PAGE_SIZE/sizeof(struct blkif_request_segment_aligned)) + (PAGE_SIZE/sizeof(struct blkif_request_segment)) #define INDIRECT_GREFS(_segs) \ ((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME) @@ -393,7 +393,7 @@ static int blkif_queue_request(struct request *req) unsigned long id; unsigned int fsect, lsect; int i, ref, n; - struct blkif_request_segment_aligned *segments = NULL; + struct blkif_request_segment *segments = NULL; /* * Used to store if we are able to queue the request by just using @@ -550,7 +550,7 @@ static int blkif_queue_request(struct request *req) } else { n = i % SEGS_PER_INDIRECT_FRAME; segments[n] = - (struct blkif_request_segment_aligned) { + (struct blkif_request_segment) { .gref = ref, .first_sect = fsect, .last_sect = lsect }; diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h index ae665ac59c36..32ec05a6572f 100644 --- a/include/xen/interface/io/blkif.h +++ b/include/xen/interface/io/blkif.h @@ -113,13 +113,13 @@ typedef uint64_t blkif_sector_t; * it's less than the number provided by the backend. The indirect_grefs field * in blkif_request_indirect should be filled by the frontend with the * grant references of the pages that are holding the indirect segments. - * This pages are filled with an array of blkif_request_segment_aligned - * that hold the information about the segments. The number of indirect - * pages to use is determined by the maximum number of segments - * a indirect request contains. Every indirect page can contain a maximum - * of 512 segments (PAGE_SIZE/sizeof(blkif_request_segment_aligned)), - * so to calculate the number of indirect pages to use we have to do - * ceil(indirect_segments/512). + * These pages are filled with an array of blkif_request_segment that hold the + * information about the segments. The number of indirect pages to use is + * determined by the number of segments an indirect request contains. Every + * indirect page can contain a maximum of + * (PAGE_SIZE / sizeof(struct blkif_request_segment)) segments, so to + * calculate the number of indirect pages to use we have to do + * ceil(indirect_segments / (PAGE_SIZE / sizeof(struct blkif_request_segment))). * * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not* * create the "feature-max-indirect-segments" node! @@ -135,13 +135,12 @@ typedef uint64_t blkif_sector_t; #define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8 -struct blkif_request_segment_aligned { - grant_ref_t gref; /* reference to I/O buffer frame */ - /* @first_sect: first sector in frame to transfer (inclusive). */ - /* @last_sect: last sector in frame to transfer (inclusive). */ - uint8_t first_sect, last_sect; - uint16_t _pad; /* padding to make it 8 bytes, so it's cache-aligned */ -} __attribute__((__packed__)); +struct blkif_request_segment { + grant_ref_t gref; /* reference to I/O buffer frame */ + /* @first_sect: first sector in frame to transfer (inclusive). */ + /* @last_sect: last sector in frame to transfer (inclusive). */ + uint8_t first_sect, last_sect; +}; struct blkif_request_rw { uint8_t nr_segments; /* number of segments */ @@ -151,12 +150,7 @@ struct blkif_request_rw { #endif uint64_t id; /* private guest value, echoed in resp */ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ - struct blkif_request_segment { - grant_ref_t gref; /* reference to I/O buffer frame */ - /* @first_sect: first sector in frame to transfer (inclusive). */ - /* @last_sect: last sector in frame to transfer (inclusive). */ - uint8_t first_sect, last_sect; - } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; } __attribute__((__packed__)); struct blkif_request_discard { From 3661371701e714f0cea4120f6a365340858fb4e4 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Tue, 4 Feb 2014 18:53:56 +0000 Subject: [PATCH 5/5] xen-blkfront: handle backend CLOSED without CLOSING Backend drivers shouldn't transistion to CLOSED unless the frontend is CLOSED. If a backend does transition to CLOSED too soon then the frontend may not see the CLOSING state and will not properly shutdown. So, treat an unexpected backend CLOSED state the same as CLOSING. Signed-off-by: David Vrabel Acked-by: Konrad Rzeszutek Wilk Cc: stable@vger.kernel.org Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkfront.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 7d09dfc9735c..76e5c0b265b1 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1904,13 +1904,16 @@ static void blkback_changed(struct xenbus_device *dev, case XenbusStateReconfiguring: case XenbusStateReconfigured: case XenbusStateUnknown: - case XenbusStateClosed: break; case XenbusStateConnected: blkfront_connect(info); break; + case XenbusStateClosed: + if (dev->state == XenbusStateClosed) + break; + /* Missed the backend's Closing state -- fallthrough */ case XenbusStateClosing: blkfront_closing(info); break;