mm: extended batches for generic mmu_gather

Instead of using a single batch (the small on-stack, or an allocated page), try and extend the batch every time it runs out and only flush once either the extend fails or we're done. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Requested-by: Nick Piggin <npiggin@kernel.dk> Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Acked-by: Hugh Dickins <hughd@google.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: David Miller <davem@davemloft.net> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Russell King <rmk@arm.linux.org.uk> Cc: Paul Mundt <lethal@linux-sh.org> Cc: Jeff Dike <jdike@addtoit.com> Cc: Richard Weinberger <richard@nod.at> Cc: Tony Luck <tony.luck@intel.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Nick Piggin <npiggin@kernel.dk> Cc: Namhyung Kim <namhyung@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2011-05-24 17:12:01 -07:00 · 2011-05-24 17:12:01 -07:00 · e303297e6c
parent 2672391169
commit e303297e6c
2 changed files with 85 additions and 48 deletions
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@ -19,16 +19,6 @@
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 /*
 * For UP we don't need to worry about TLB flush
 * and page free order so much..
 */
 #ifdef CONFIG_SMP
  #define tlb_fast_mode(tlb) ((tlb)->nr == ~0U)
 #else
  #define tlb_fast_mode(tlb) 1
 #endif
 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
 /*
 * Semi RCU freeing of the page directories.
@ -78,6 +68,16 @@ extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
 */
 #define MMU_GATHER_BUNDLE	8
 struct mmu_gather_batch {
 	struct mmu_gather_batch	*next;
 	unsigned int		nr;
 	unsigned int		max;
 	struct page		*pages[0];
 };
 #define MAX_GATHER_BATCH	\
 	((PAGE_SIZE - sizeof(struct mmu_gather_batch)) / sizeof(void *))
 /* struct mmu_gather is an opaque type used by the mm code for passing around
 * any data needed by arch specific code for tlb_remove_page.
 */
@ -86,22 +86,48 @@ struct mmu_gather {
 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
 	struct mmu_table_batch	*batch;
 #endif
-	unsigned int		nr;	/* set to ~0U means fast mode */
+	unsigned int		need_flush : 1,	/* Did free PTEs */
-	unsigned int		max;	/* nr < max */
+				fast_mode  : 1; /* No batching   */
-	unsigned int		need_flush;/* Really unmapped some ptes? */
+
-	unsigned int		fullmm; /* non-zero means full mm flush */
+	unsigned int		fullmm;
-	struct page		**pages;
+
-	struct page		*local[MMU_GATHER_BUNDLE];
+	struct mmu_gather_batch *active;
 	struct mmu_gather_batch	local;
 	struct page		*__pages[MMU_GATHER_BUNDLE];
 };
-static inline void __tlb_alloc_page(struct mmu_gather *tlb)
+/*
-{
+ * For UP we don't need to worry about TLB flush
-	unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
+ * and page free order so much..
 */
 #ifdef CONFIG_SMP
  #define tlb_fast_mode(tlb) (tlb->fast_mode)
 #else
  #define tlb_fast_mode(tlb) 1
 #endif
-	if (addr) {
+static inline int tlb_next_batch(struct mmu_gather *tlb)
-		tlb->pages = (void *)addr;
+{
-		tlb->max = PAGE_SIZE / sizeof(struct page *);
+	struct mmu_gather_batch *batch;
 	batch = tlb->active;
 	if (batch->next) {
 		tlb->active = batch->next;
 		return 1;
 	}
 	batch = (void *)__get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
 	if (!batch)
 		return 0;
 	batch->next = NULL;
 	batch->nr   = 0;
 	batch->max  = MAX_GATHER_BATCH;
 	tlb->active->next = batch;
 	tlb->active = batch;
 	return 1;
 }
 /* tlb_gather_mmu
@ -114,16 +140,13 @@ tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm)
 {
 	tlb->mm = mm;
-	tlb->max = ARRAY_SIZE(tlb->local);
+	tlb->fullmm     = fullmm;
-	tlb->pages = tlb->local;
+	tlb->need_flush = 0;
-
+	tlb->fast_mode  = (num_possible_cpus() == 1);
-	if (num_online_cpus() > 1) {
+	tlb->local.next = NULL;
-		tlb->nr = 0;
+	tlb->local.nr   = 0;
-		__tlb_alloc_page(tlb);
+	tlb->local.max  = ARRAY_SIZE(tlb->__pages);
-	} else /* Use fast mode if only one CPU is online */
+	tlb->active     = &tlb->local;
 		tlb->nr = ~0U;
 	tlb->fullmm = fullmm;
 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
 	tlb->batch = NULL;
@ -133,6 +156,8 @@ tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm)
 static inline void
 tlb_flush_mmu(struct mmu_gather *tlb)
 {
 	struct mmu_gather_batch *batch;
 	if (!tlb->need_flush)
 		return;
 	tlb->need_flush = 0;
@ -140,17 +165,15 @@ tlb_flush_mmu(struct mmu_gather *tlb)
 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
 	tlb_table_flush(tlb);
 #endif
-	if (!tlb_fast_mode(tlb)) {
+
-		free_pages_and_swap_cache(tlb->pages, tlb->nr);
+	if (tlb_fast_mode(tlb))
-		tlb->nr = 0;
+		return;
-		/*
+
-		 * If we are using the local on-stack array of pages for MMU
+	for (batch = &tlb->local; batch; batch = batch->next) {
-		 * gather, try allocating an off-stack array again as we have
+		free_pages_and_swap_cache(batch->pages, batch->nr);
-		 * recently freed pages.
+		batch->nr = 0;
 		 */
 		if (tlb->pages == tlb->local)
 			__tlb_alloc_page(tlb);
 	}
 	tlb->active = &tlb->local;
 }
 /* tlb_finish_mmu
@ -160,13 +183,18 @@ tlb_flush_mmu(struct mmu_gather *tlb)
 static inline void
 tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 {
 	struct mmu_gather_batch *batch, *next;
 	tlb_flush_mmu(tlb);
 	/* keep the page table cache within bounds */
 	check_pgt_cache();
-	if (tlb->pages != tlb->local)
+	for (batch = tlb->local.next; batch; batch = next) {
-		free_pages((unsigned long)tlb->pages, 0);
+		next = batch->next;
 		free_pages((unsigned long)batch, 0);
 	}
 	tlb->local.next = NULL;
 }
 /* __tlb_remove_page
@ -177,15 +205,24 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
 */
 static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
 	struct mmu_gather_batch *batch;
 	tlb->need_flush = 1;
 	if (tlb_fast_mode(tlb)) {
 		free_page_and_swap_cache(page);
 		return 1; /* avoid calling tlb_flush_mmu() */
 	}
 	tlb->pages[tlb->nr++] = page;
 	VM_BUG_ON(tlb->nr > tlb->max);
-	return tlb->max - tlb->nr;
+	batch = tlb->active;
 	batch->pages[batch->nr++] = page;
 	VM_BUG_ON(batch->nr > batch->max);
 	if (batch->nr == batch->max) {
 		if (!tlb_next_batch(tlb))
 			return 0;
 	}
 	return batch->max - batch->nr;
 }
 /* tlb_remove_page
--- a/mm/memory.c
+++ b/mm/memory.c
@ -994,8 +994,8 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
 	spinlock_t *ptl;
 	int rss[NR_MM_COUNTERS];
 	init_rss_vec(rss);
 again:
 	init_rss_vec(rss);
 	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
 	arch_enter_lazy_mmu_mode();
 	do {