diff --git a/include/linux/swap.h b/include/linux/swap.h index ad220359f1b0..0af2bb2028fd 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -316,6 +316,7 @@ extern void lru_cache_add_active_or_unevictable(struct page *page, struct vm_area_struct *vma); /* linux/mm/vmscan.c */ +extern unsigned long zone_reclaimable_pages(struct zone *zone); extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask); extern int __isolate_lru_page(struct page *page, isolate_mode_t mode); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8bcc10616fab..fa39efc3a692 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3386,6 +3386,77 @@ static inline bool is_thp_gfp_mask(gfp_t gfp_mask) return (gfp_mask & (GFP_TRANSHUGE | __GFP_KSWAPD_RECLAIM)) == GFP_TRANSHUGE; } +/* + * Maximum number of reclaim retries without any progress before OOM killer + * is consider as the only way to move forward. + */ +#define MAX_RECLAIM_RETRIES 16 + +/* + * Checks whether it makes sense to retry the reclaim to make a forward progress + * for the given allocation request. + * The reclaim feedback represented by did_some_progress (any progress during + * the last reclaim round), pages_reclaimed (cumulative number of reclaimed + * pages) and no_progress_loops (number of reclaim rounds without any progress + * in a row) is considered as well as the reclaimable pages on the applicable + * zone list (with a backoff mechanism which is a function of no_progress_loops). + * + * Returns true if a retry is viable or false to enter the oom path. + */ +static inline bool +should_reclaim_retry(gfp_t gfp_mask, unsigned order, + struct alloc_context *ac, int alloc_flags, + bool did_some_progress, unsigned long pages_reclaimed, + int no_progress_loops) +{ + struct zone *zone; + struct zoneref *z; + + /* + * Make sure we converge to OOM if we cannot make any progress + * several times in the row. + */ + if (no_progress_loops > MAX_RECLAIM_RETRIES) + return false; + + if (order > PAGE_ALLOC_COSTLY_ORDER) { + if (pages_reclaimed >= (1<zonelist, ac->high_zoneidx, + ac->nodemask) { + unsigned long available; + + available = zone_reclaimable_pages(zone); + available -= DIV_ROUND_UP(no_progress_loops * available, + MAX_RECLAIM_RETRIES); + available += zone_page_state_snapshot(zone, NR_FREE_PAGES); + + /* + * Would the allocation succeed if we reclaimed the whole + * available? + */ + if (__zone_watermark_ok(zone, order, min_wmark_pages(zone), + ac->high_zoneidx, alloc_flags, available)) { + /* Wait for some write requests to complete then retry */ + wait_iff_congested(zone, BLK_RW_ASYNC, HZ/50); + return true; + } + } + + return false; +} + static inline struct page * __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, struct alloc_context *ac) @@ -3397,6 +3468,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, unsigned long did_some_progress; enum migrate_mode migration_mode = MIGRATE_ASYNC; enum compact_result compact_result; + int no_progress_loops = 0; /* * In the slowpath, we sanity check order to avoid ever trying to @@ -3525,23 +3597,35 @@ retry: if (gfp_mask & __GFP_NORETRY) goto noretry; - /* Keep reclaiming pages as long as there is reasonable progress */ - pages_reclaimed += did_some_progress; - if ((did_some_progress && order <= PAGE_ALLOC_COSTLY_ORDER) || - ((gfp_mask & __GFP_REPEAT) && pages_reclaimed < (1 << order))) { - /* Wait for some write requests to complete then retry */ - wait_iff_congested(ac->preferred_zoneref->zone, BLK_RW_ASYNC, HZ/50); - goto retry; + /* + * Do not retry costly high order allocations unless they are + * __GFP_REPEAT + */ + if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_REPEAT)) + goto noretry; + + if (did_some_progress) { + no_progress_loops = 0; + pages_reclaimed += did_some_progress; + } else { + no_progress_loops++; } + if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags, + did_some_progress > 0, pages_reclaimed, + no_progress_loops)) + goto retry; + /* Reclaim has failed us, start killing things */ page = __alloc_pages_may_oom(gfp_mask, order, ac, &did_some_progress); if (page) goto got_pg; /* Retry as long as the OOM killer is making progress */ - if (did_some_progress) + if (did_some_progress) { + no_progress_loops = 0; goto retry; + } noretry: /* diff --git a/mm/vmscan.c b/mm/vmscan.c index a386454c015a..c4a2f4512fca 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -191,7 +191,7 @@ static bool sane_reclaim(struct scan_control *sc) } #endif -static unsigned long zone_reclaimable_pages(struct zone *zone) +unsigned long zone_reclaimable_pages(struct zone *zone) { unsigned long nr; @@ -2507,10 +2507,8 @@ static inline bool compaction_ready(struct zone *zone, int order, int classzone_ * * If a zone is deemed to be full of pinned pages then just give it a light * scan then give up on it. - * - * Returns true if a zone was reclaimable. */ -static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) +static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc) { struct zoneref *z; struct zone *zone; @@ -2518,7 +2516,6 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) unsigned long nr_soft_scanned; gfp_t orig_mask; enum zone_type requested_highidx = gfp_zone(sc->gfp_mask); - bool reclaimable = false; /* * If the number of buffer_heads in the machine exceeds the maximum @@ -2583,17 +2580,10 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) &nr_soft_scanned); sc->nr_reclaimed += nr_soft_reclaimed; sc->nr_scanned += nr_soft_scanned; - if (nr_soft_reclaimed) - reclaimable = true; /* need some check for avoid more shrink_zone() */ } - if (shrink_zone(zone, sc, zone_idx(zone) == classzone_idx)) - reclaimable = true; - - if (global_reclaim(sc) && - !reclaimable && zone_reclaimable(zone)) - reclaimable = true; + shrink_zone(zone, sc, zone_idx(zone) == classzone_idx); } /* @@ -2601,8 +2591,6 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) * promoted it to __GFP_HIGHMEM. */ sc->gfp_mask = orig_mask; - - return reclaimable; } /* @@ -2627,7 +2615,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, int initial_priority = sc->priority; unsigned long total_scanned = 0; unsigned long writeback_threshold; - bool zones_reclaimable; retry: delayacct_freepages_start(); @@ -2638,7 +2625,7 @@ retry: vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup, sc->priority); sc->nr_scanned = 0; - zones_reclaimable = shrink_zones(zonelist, sc); + shrink_zones(zonelist, sc); total_scanned += sc->nr_scanned; if (sc->nr_reclaimed >= sc->nr_to_reclaim) @@ -2685,10 +2672,6 @@ retry: goto retry; } - /* Any of the zones still reclaimable? Don't OOM. */ - if (zones_reclaimable) - return 1; - return 0; }