diff --git a/mm/swapfile.c b/mm/swapfile.c index dac47c66055c..98e52e373bd8 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -370,7 +370,8 @@ static void dec_cluster_info_page(struct swap_info_struct *p, * instead of free it immediately. The cluster will be freed * after discard. */ - if (p->flags & SWP_PAGE_DISCARD) { + if ((p->flags & (SWP_WRITEOK | SWP_PAGE_DISCARD)) == + (SWP_WRITEOK | SWP_PAGE_DISCARD)) { swap_cluster_schedule_discard(p, idx); return; } @@ -1288,7 +1289,7 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si, else continue; } - count = si->swap_map[i]; + count = ACCESS_ONCE(si->swap_map[i]); if (count && swap_count(count) != SWAP_MAP_BAD) break; } @@ -1308,7 +1309,11 @@ int try_to_unuse(unsigned int type, bool frontswap, { struct swap_info_struct *si = swap_info[type]; struct mm_struct *start_mm; - unsigned char *swap_map; + volatile unsigned char *swap_map; /* swap_map is accessed without + * locking. Mark it as volatile + * to prevent compiler doing + * something odd. + */ unsigned char swcount; struct page *page; swp_entry_t entry; @@ -1359,7 +1364,15 @@ int try_to_unuse(unsigned int type, bool frontswap, * reused since sys_swapoff() already disabled * allocation from here, or alloc_page() failed. */ - if (!*swap_map) + swcount = *swap_map; + /* + * We don't hold lock here, so the swap entry could be + * SWAP_MAP_BAD (when the cluster is discarding). + * Instead of fail out, We can just skip the swap + * entry because swapoff will wait for discarding + * finish anyway. + */ + if (!swcount || swcount == SWAP_MAP_BAD) continue; retval = -ENOMEM; break; @@ -2543,6 +2556,16 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage) goto unlock_out; count = p->swap_map[offset]; + + /* + * swapin_readahead() doesn't check if a swap entry is valid, so the + * swap entry could be SWAP_MAP_BAD. Check here with lock held. + */ + if (unlikely(swap_count(count) == SWAP_MAP_BAD)) { + err = -ENOENT; + goto unlock_out; + } + has_cache = count & SWAP_HAS_CACHE; count &= ~SWAP_HAS_CACHE; err = 0;