diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 6c6fb116e925..680d3395fc83 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -317,6 +317,9 @@ enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root, int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask, struct mem_cgroup **memcgp, bool compound); +int mem_cgroup_try_charge_delay(struct page *page, struct mm_struct *mm, + gfp_t gfp_mask, struct mem_cgroup **memcgp, + bool compound); void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg, bool lrucare, bool compound); void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg, @@ -789,6 +792,16 @@ static inline int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, return 0; } +static inline int mem_cgroup_try_charge_delay(struct page *page, + struct mm_struct *mm, + gfp_t gfp_mask, + struct mem_cgroup **memcgp, + bool compound) +{ + *memcgp = NULL; + return 0; +} + static inline void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg, bool lrucare, bool compound) diff --git a/include/linux/swap.h b/include/linux/swap.h index c063443d8638..1a8bd05a335e 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -629,7 +629,6 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *memcg) return memcg->swappiness; } - #else static inline int mem_cgroup_swappiness(struct mem_cgroup *mem) { @@ -637,6 +636,16 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem) } #endif +#if defined(CONFIG_SWAP) && defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) +extern void mem_cgroup_throttle_swaprate(struct mem_cgroup *memcg, int node, + gfp_t gfp_mask); +#else +static inline void mem_cgroup_throttle_swaprate(struct mem_cgroup *memcg, + int node, gfp_t gfp_mask) +{ +} +#endif + #ifdef CONFIG_MEMCG_SWAP extern void mem_cgroup_swapout(struct page *page, swp_entry_t entry); extern int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 1cd7c1a57a14..b87d5b151db2 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -552,7 +552,7 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page, VM_BUG_ON_PAGE(!PageCompound(page), page); - if (mem_cgroup_try_charge(page, vma->vm_mm, gfp, &memcg, true)) { + if (mem_cgroup_try_charge_delay(page, vma->vm_mm, gfp, &memcg, true)) { put_page(page); count_vm_event(THP_FAULT_FALLBACK); return VM_FAULT_FALLBACK; @@ -1142,7 +1142,7 @@ static int do_huge_pmd_wp_page_fallback(struct vm_fault *vmf, pmd_t orig_pmd, pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE, vma, vmf->address, page_to_nid(page)); if (unlikely(!pages[i] || - mem_cgroup_try_charge(pages[i], vma->vm_mm, + mem_cgroup_try_charge_delay(pages[i], vma->vm_mm, GFP_KERNEL, &memcg, false))) { if (pages[i]) put_page(pages[i]); @@ -1312,7 +1312,7 @@ alloc: goto out; } - if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm, + if (unlikely(mem_cgroup_try_charge_delay(new_page, vma->vm_mm, huge_gfp, &memcg, true))) { put_page(new_page); split_huge_pmd(vma, vmf->pmd, vmf->address); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e6f0d5ef320a..64bd28d35388 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5593,6 +5593,19 @@ out: return ret; } +int mem_cgroup_try_charge_delay(struct page *page, struct mm_struct *mm, + gfp_t gfp_mask, struct mem_cgroup **memcgp, + bool compound) +{ + struct mem_cgroup *memcg; + int ret; + + ret = mem_cgroup_try_charge(page, mm, gfp_mask, memcgp, compound); + memcg = *memcgp; + mem_cgroup_throttle_swaprate(memcg, page_to_nid(page), gfp_mask); + return ret; +} + /** * mem_cgroup_commit_charge - commit a page charge * @page: page to charge diff --git a/mm/memory.c b/mm/memory.c index 7206a634270b..dfe80c574282 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2503,7 +2503,7 @@ static int wp_page_copy(struct vm_fault *vmf) cow_user_page(new_page, old_page, vmf->address, vma); } - if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false)) + if (mem_cgroup_try_charge_delay(new_page, mm, GFP_KERNEL, &memcg, false)) goto oom_free_new; __SetPageUptodate(new_page); @@ -3003,8 +3003,8 @@ int do_swap_page(struct vm_fault *vmf) goto out_page; } - if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL, - &memcg, false)) { + if (mem_cgroup_try_charge_delay(page, vma->vm_mm, GFP_KERNEL, + &memcg, false)) { ret = VM_FAULT_OOM; goto out_page; } @@ -3165,7 +3165,8 @@ static int do_anonymous_page(struct vm_fault *vmf) if (!page) goto oom; - if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL, &memcg, false)) + if (mem_cgroup_try_charge_delay(page, vma->vm_mm, GFP_KERNEL, &memcg, + false)) goto oom_free_page; /* @@ -3661,7 +3662,7 @@ static int do_cow_fault(struct vm_fault *vmf) if (!vmf->cow_page) return VM_FAULT_OOM; - if (mem_cgroup_try_charge(vmf->cow_page, vma->vm_mm, GFP_KERNEL, + if (mem_cgroup_try_charge_delay(vmf->cow_page, vma->vm_mm, GFP_KERNEL, &vmf->memcg, false)) { put_page(vmf->cow_page); return VM_FAULT_OOM; diff --git a/mm/shmem.c b/mm/shmem.c index 2cab84403055..6206ca3510cf 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1239,8 +1239,8 @@ int shmem_unuse(swp_entry_t swap, struct page *page) * the shmem_swaplist_mutex which might hold up shmem_writepage(). * Charged back to the user (not to caller) when swap account is used. */ - error = mem_cgroup_try_charge(page, current->mm, GFP_KERNEL, &memcg, - false); + error = mem_cgroup_try_charge_delay(page, current->mm, GFP_KERNEL, + &memcg, false); if (error) goto out; /* No radix_tree_preload: swap entry keeps a place for page in tree */ @@ -1712,7 +1712,7 @@ repeat: goto failed; } - error = mem_cgroup_try_charge(page, charge_mm, gfp, &memcg, + error = mem_cgroup_try_charge_delay(page, charge_mm, gfp, &memcg, false); if (!error) { error = shmem_add_to_page_cache(page, mapping, index, @@ -1818,7 +1818,7 @@ alloc_nohuge: page = shmem_alloc_and_acct_page(gfp, inode, if (sgp == SGP_WRITE) __SetPageReferenced(page); - error = mem_cgroup_try_charge(page, charge_mm, gfp, &memcg, + error = mem_cgroup_try_charge_delay(page, charge_mm, gfp, &memcg, PageTransHuge(page)); if (error) goto unacct; @@ -2291,7 +2291,7 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, __SetPageSwapBacked(page); __SetPageUptodate(page); - ret = mem_cgroup_try_charge(page, dst_mm, gfp, &memcg, false); + ret = mem_cgroup_try_charge_delay(page, dst_mm, gfp, &memcg, false); if (ret) goto out_release; diff --git a/mm/swapfile.c b/mm/swapfile.c index 2cc2972eedaf..db4ec8ae1c8c 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3731,6 +3731,37 @@ static void free_swap_count_continuations(struct swap_info_struct *si) } } +#if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) +void mem_cgroup_throttle_swaprate(struct mem_cgroup *memcg, int node, + gfp_t gfp_mask) +{ + struct swap_info_struct *si, *next; + if (!(gfp_mask & __GFP_IO) || !memcg) + return; + + if (!blk_cgroup_congested()) + return; + + /* + * We've already scheduled a throttle, avoid taking the global swap + * lock. + */ + if (current->throttle_queue) + return; + + spin_lock(&swap_avail_lock); + plist_for_each_entry_safe(si, next, &swap_avail_heads[node], + avail_lists[node]) { + if (si->bdev) { + blkcg_schedule_throttle(bdev_get_queue(si->bdev), + true); + break; + } + } + spin_unlock(&swap_avail_lock); +} +#endif + static int __init swapfile_init(void) { int nid;