Merge branch 'akpm' (patches from Andrew)
Merge fixes from Andrew Morton: "27 fixes. There are three patches that aren't actually fixes. They're simple function renamings which are nice-to-have in mainline as ongoing net development depends on them." * akpm: (27 commits) timerfd: export defines to userspace mm/hugetlb.c: fix reservation race when freeing surplus pages mm/slab.c: fix SLAB freelist randomization duplicate entries zram: support BDI_CAP_STABLE_WRITES zram: revalidate disk under init_lock mm: support anonymous stable page mm: add documentation for page fragment APIs mm: rename __page_frag functions to __page_frag_cache, drop order from drain mm: rename __alloc_page_frag to page_frag_alloc and __free_page_frag to page_frag_free mm, memcg: fix the active list aging for lowmem requests when memcg is enabled mm: don't dereference struct page fields of invalid pages mailmap: add codeaurora.org names for nameless email commits signal: protect SIGNAL_UNKILLABLE from unintentional clearing. mm: pmd dirty emulation in page fault handler ipc/sem.c: fix incorrect sem_lock pairing lib/Kconfig.debug: fix frv build failure mm: get rid of __GFP_OTHER_NODE mm: fix remote numa hits statistics mm: fix devm_memremap_pages crash, use mem_hotplug_{begin, done} ocfs2: fix crash caused by stale lvb with fsdlm plugin ...hifive-unleashed-5.1
commit
ba836a6f5a
4
.mailmap
4
.mailmap
|
@ -137,6 +137,7 @@ Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
|
||||||
Rudolf Marek <R.Marek@sh.cvut.cz>
|
Rudolf Marek <R.Marek@sh.cvut.cz>
|
||||||
Rui Saraiva <rmps@joel.ist.utl.pt>
|
Rui Saraiva <rmps@joel.ist.utl.pt>
|
||||||
Sachin P Sant <ssant@in.ibm.com>
|
Sachin P Sant <ssant@in.ibm.com>
|
||||||
|
Sarangdhar Joshi <spjoshi@codeaurora.org>
|
||||||
Sam Ravnborg <sam@mars.ravnborg.org>
|
Sam Ravnborg <sam@mars.ravnborg.org>
|
||||||
Santosh Shilimkar <ssantosh@kernel.org>
|
Santosh Shilimkar <ssantosh@kernel.org>
|
||||||
Santosh Shilimkar <santosh.shilimkar@oracle.org>
|
Santosh Shilimkar <santosh.shilimkar@oracle.org>
|
||||||
|
@ -150,10 +151,13 @@ Shuah Khan <shuah@kernel.org> <shuah.kh@samsung.com>
|
||||||
Simon Kelley <simon@thekelleys.org.uk>
|
Simon Kelley <simon@thekelleys.org.uk>
|
||||||
Stéphane Witzmann <stephane.witzmann@ubpmes.univ-bpclermont.fr>
|
Stéphane Witzmann <stephane.witzmann@ubpmes.univ-bpclermont.fr>
|
||||||
Stephen Hemminger <shemminger@osdl.org>
|
Stephen Hemminger <shemminger@osdl.org>
|
||||||
|
Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
|
||||||
|
Subhash Jadavani <subhashj@codeaurora.org>
|
||||||
Sudeep Holla <sudeep.holla@arm.com> Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com>
|
Sudeep Holla <sudeep.holla@arm.com> Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com>
|
||||||
Sumit Semwal <sumit.semwal@ti.com>
|
Sumit Semwal <sumit.semwal@ti.com>
|
||||||
Tejun Heo <htejun@gmail.com>
|
Tejun Heo <htejun@gmail.com>
|
||||||
Thomas Graf <tgraf@suug.ch>
|
Thomas Graf <tgraf@suug.ch>
|
||||||
|
Thomas Pedersen <twp@codeaurora.org>
|
||||||
Tony Luck <tony.luck@intel.com>
|
Tony Luck <tony.luck@intel.com>
|
||||||
Tsuneo Yoshioka <Tsuneo.Yoshioka@f-secure.com>
|
Tsuneo Yoshioka <Tsuneo.Yoshioka@f-secure.com>
|
||||||
Uwe Kleine-König <ukleinek@informatik.uni-freiburg.de>
|
Uwe Kleine-König <ukleinek@informatik.uni-freiburg.de>
|
||||||
|
|
|
@ -0,0 +1,42 @@
|
||||||
|
Page fragments
|
||||||
|
--------------
|
||||||
|
|
||||||
|
A page fragment is an arbitrary-length arbitrary-offset area of memory
|
||||||
|
which resides within a 0 or higher order compound page. Multiple
|
||||||
|
fragments within that page are individually refcounted, in the page's
|
||||||
|
reference counter.
|
||||||
|
|
||||||
|
The page_frag functions, page_frag_alloc and page_frag_free, provide a
|
||||||
|
simple allocation framework for page fragments. This is used by the
|
||||||
|
network stack and network device drivers to provide a backing region of
|
||||||
|
memory for use as either an sk_buff->head, or to be used in the "frags"
|
||||||
|
portion of skb_shared_info.
|
||||||
|
|
||||||
|
In order to make use of the page fragment APIs a backing page fragment
|
||||||
|
cache is needed. This provides a central point for the fragment allocation
|
||||||
|
and tracks allows multiple calls to make use of a cached page. The
|
||||||
|
advantage to doing this is that multiple calls to get_page can be avoided
|
||||||
|
which can be expensive at allocation time. However due to the nature of
|
||||||
|
this caching it is required that any calls to the cache be protected by
|
||||||
|
either a per-cpu limitation, or a per-cpu limitation and forcing interrupts
|
||||||
|
to be disabled when executing the fragment allocation.
|
||||||
|
|
||||||
|
The network stack uses two separate caches per CPU to handle fragment
|
||||||
|
allocation. The netdev_alloc_cache is used by callers making use of the
|
||||||
|
__netdev_alloc_frag and __netdev_alloc_skb calls. The napi_alloc_cache is
|
||||||
|
used by callers of the __napi_alloc_frag and __napi_alloc_skb calls. The
|
||||||
|
main difference between these two calls is the context in which they may be
|
||||||
|
called. The "netdev" prefixed functions are usable in any context as these
|
||||||
|
functions will disable interrupts, while the "napi" prefixed functions are
|
||||||
|
only usable within the softirq context.
|
||||||
|
|
||||||
|
Many network device drivers use a similar methodology for allocating page
|
||||||
|
fragments, but the page fragments are cached at the ring or descriptor
|
||||||
|
level. In order to enable these cases it is necessary to provide a generic
|
||||||
|
way of tearing down a page cache. For this reason __page_frag_cache_drain
|
||||||
|
was implemented. It allows for freeing multiple references from a single
|
||||||
|
page via a single call. The advantage to doing this is that it allows for
|
||||||
|
cleaning up the multiple references that were added to a page in order to
|
||||||
|
avoid calling get_page per allocation.
|
||||||
|
|
||||||
|
Alexander Duyck, Nov 29, 2016.
|
|
@ -81,7 +81,6 @@ Descriptions of section entries:
|
||||||
Q: Patchwork web based patch tracking system site
|
Q: Patchwork web based patch tracking system site
|
||||||
T: SCM tree type and location.
|
T: SCM tree type and location.
|
||||||
Type is one of: git, hg, quilt, stgit, topgit
|
Type is one of: git, hg, quilt, stgit, topgit
|
||||||
B: Bug tracking system location.
|
|
||||||
S: Status, one of the following:
|
S: Status, one of the following:
|
||||||
Supported: Someone is actually paid to look after this.
|
Supported: Someone is actually paid to look after this.
|
||||||
Maintained: Someone actually looks after it.
|
Maintained: Someone actually looks after it.
|
||||||
|
|
|
@ -25,6 +25,7 @@
|
||||||
#include <linux/genhd.h>
|
#include <linux/genhd.h>
|
||||||
#include <linux/highmem.h>
|
#include <linux/highmem.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
|
#include <linux/backing-dev.h>
|
||||||
#include <linux/string.h>
|
#include <linux/string.h>
|
||||||
#include <linux/vmalloc.h>
|
#include <linux/vmalloc.h>
|
||||||
#include <linux/err.h>
|
#include <linux/err.h>
|
||||||
|
@ -112,6 +113,14 @@ static inline bool is_partial_io(struct bio_vec *bvec)
|
||||||
return bvec->bv_len != PAGE_SIZE;
|
return bvec->bv_len != PAGE_SIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void zram_revalidate_disk(struct zram *zram)
|
||||||
|
{
|
||||||
|
revalidate_disk(zram->disk);
|
||||||
|
/* revalidate_disk reset the BDI_CAP_STABLE_WRITES so set again */
|
||||||
|
zram->disk->queue->backing_dev_info.capabilities |=
|
||||||
|
BDI_CAP_STABLE_WRITES;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check if request is within bounds and aligned on zram logical blocks.
|
* Check if request is within bounds and aligned on zram logical blocks.
|
||||||
*/
|
*/
|
||||||
|
@ -1095,15 +1104,9 @@ static ssize_t disksize_store(struct device *dev,
|
||||||
zram->comp = comp;
|
zram->comp = comp;
|
||||||
zram->disksize = disksize;
|
zram->disksize = disksize;
|
||||||
set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
|
set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
|
||||||
|
zram_revalidate_disk(zram);
|
||||||
up_write(&zram->init_lock);
|
up_write(&zram->init_lock);
|
||||||
|
|
||||||
/*
|
|
||||||
* Revalidate disk out of the init_lock to avoid lockdep splat.
|
|
||||||
* It's okay because disk's capacity is protected by init_lock
|
|
||||||
* so that revalidate_disk always sees up-to-date capacity.
|
|
||||||
*/
|
|
||||||
revalidate_disk(zram->disk);
|
|
||||||
|
|
||||||
return len;
|
return len;
|
||||||
|
|
||||||
out_destroy_comp:
|
out_destroy_comp:
|
||||||
|
@ -1149,7 +1152,7 @@ static ssize_t reset_store(struct device *dev,
|
||||||
/* Make sure all the pending I/O are finished */
|
/* Make sure all the pending I/O are finished */
|
||||||
fsync_bdev(bdev);
|
fsync_bdev(bdev);
|
||||||
zram_reset_device(zram);
|
zram_reset_device(zram);
|
||||||
revalidate_disk(zram->disk);
|
zram_revalidate_disk(zram);
|
||||||
bdput(bdev);
|
bdput(bdev);
|
||||||
|
|
||||||
mutex_lock(&bdev->bd_mutex);
|
mutex_lock(&bdev->bd_mutex);
|
||||||
|
|
|
@ -3962,7 +3962,7 @@ static void igb_clean_rx_ring(struct igb_ring *rx_ring)
|
||||||
PAGE_SIZE,
|
PAGE_SIZE,
|
||||||
DMA_FROM_DEVICE,
|
DMA_FROM_DEVICE,
|
||||||
DMA_ATTR_SKIP_CPU_SYNC);
|
DMA_ATTR_SKIP_CPU_SYNC);
|
||||||
__page_frag_drain(buffer_info->page, 0,
|
__page_frag_cache_drain(buffer_info->page,
|
||||||
buffer_info->pagecnt_bias);
|
buffer_info->pagecnt_bias);
|
||||||
|
|
||||||
buffer_info->page = NULL;
|
buffer_info->page = NULL;
|
||||||
|
@ -6991,7 +6991,7 @@ static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring,
|
||||||
dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
|
dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
|
||||||
PAGE_SIZE, DMA_FROM_DEVICE,
|
PAGE_SIZE, DMA_FROM_DEVICE,
|
||||||
DMA_ATTR_SKIP_CPU_SYNC);
|
DMA_ATTR_SKIP_CPU_SYNC);
|
||||||
__page_frag_drain(page, 0, rx_buffer->pagecnt_bias);
|
__page_frag_cache_drain(page, rx_buffer->pagecnt_bias);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* clear contents of rx_buffer */
|
/* clear contents of rx_buffer */
|
||||||
|
|
33
fs/dax.c
33
fs/dax.c
|
@ -691,8 +691,8 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
|
||||||
pgoff_t index, unsigned long pfn)
|
pgoff_t index, unsigned long pfn)
|
||||||
{
|
{
|
||||||
struct vm_area_struct *vma;
|
struct vm_area_struct *vma;
|
||||||
pte_t *ptep;
|
pte_t pte, *ptep = NULL;
|
||||||
pte_t pte;
|
pmd_t *pmdp = NULL;
|
||||||
spinlock_t *ptl;
|
spinlock_t *ptl;
|
||||||
bool changed;
|
bool changed;
|
||||||
|
|
||||||
|
@ -707,12 +707,32 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
|
||||||
|
|
||||||
address = pgoff_address(index, vma);
|
address = pgoff_address(index, vma);
|
||||||
changed = false;
|
changed = false;
|
||||||
if (follow_pte(vma->vm_mm, address, &ptep, &ptl))
|
if (follow_pte_pmd(vma->vm_mm, address, &ptep, &pmdp, &ptl))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
if (pmdp) {
|
||||||
|
#ifdef CONFIG_FS_DAX_PMD
|
||||||
|
pmd_t pmd;
|
||||||
|
|
||||||
|
if (pfn != pmd_pfn(*pmdp))
|
||||||
|
goto unlock_pmd;
|
||||||
|
if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp))
|
||||||
|
goto unlock_pmd;
|
||||||
|
|
||||||
|
flush_cache_page(vma, address, pfn);
|
||||||
|
pmd = pmdp_huge_clear_flush(vma, address, pmdp);
|
||||||
|
pmd = pmd_wrprotect(pmd);
|
||||||
|
pmd = pmd_mkclean(pmd);
|
||||||
|
set_pmd_at(vma->vm_mm, address, pmdp, pmd);
|
||||||
|
changed = true;
|
||||||
|
unlock_pmd:
|
||||||
|
spin_unlock(ptl);
|
||||||
|
#endif
|
||||||
|
} else {
|
||||||
if (pfn != pte_pfn(*ptep))
|
if (pfn != pte_pfn(*ptep))
|
||||||
goto unlock;
|
goto unlock_pte;
|
||||||
if (!pte_dirty(*ptep) && !pte_write(*ptep))
|
if (!pte_dirty(*ptep) && !pte_write(*ptep))
|
||||||
goto unlock;
|
goto unlock_pte;
|
||||||
|
|
||||||
flush_cache_page(vma, address, pfn);
|
flush_cache_page(vma, address, pfn);
|
||||||
pte = ptep_clear_flush(vma, address, ptep);
|
pte = ptep_clear_flush(vma, address, ptep);
|
||||||
|
@ -720,8 +740,9 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
|
||||||
pte = pte_mkclean(pte);
|
pte = pte_mkclean(pte);
|
||||||
set_pte_at(vma->vm_mm, address, ptep, pte);
|
set_pte_at(vma->vm_mm, address, ptep, pte);
|
||||||
changed = true;
|
changed = true;
|
||||||
unlock:
|
unlock_pte:
|
||||||
pte_unmap_unlock(ptep, ptl);
|
pte_unmap_unlock(ptep, ptl);
|
||||||
|
}
|
||||||
|
|
||||||
if (changed)
|
if (changed)
|
||||||
mmu_notifier_invalidate_page(vma->vm_mm, address);
|
mmu_notifier_invalidate_page(vma->vm_mm, address);
|
||||||
|
|
|
@ -3303,6 +3303,16 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
|
||||||
mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name,
|
mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name,
|
||||||
lockres->l_level, new_level);
|
lockres->l_level, new_level);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* On DLM_LKF_VALBLK, fsdlm behaves differently with o2cb. It always
|
||||||
|
* expects DLM_LKF_VALBLK being set if the LKB has LVB, so that
|
||||||
|
* we can recover correctly from node failure. Otherwise, we may get
|
||||||
|
* invalid LVB in LKB, but without DLM_SBF_VALNOTVALID being set.
|
||||||
|
*/
|
||||||
|
if (!ocfs2_is_o2cb_active() &&
|
||||||
|
lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
|
||||||
|
lvb = 1;
|
||||||
|
|
||||||
if (lvb)
|
if (lvb)
|
||||||
dlm_flags |= DLM_LKF_VALBLK;
|
dlm_flags |= DLM_LKF_VALBLK;
|
||||||
|
|
||||||
|
|
|
@ -48,6 +48,12 @@ static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl";
|
||||||
*/
|
*/
|
||||||
static struct ocfs2_stack_plugin *active_stack;
|
static struct ocfs2_stack_plugin *active_stack;
|
||||||
|
|
||||||
|
inline int ocfs2_is_o2cb_active(void)
|
||||||
|
{
|
||||||
|
return !strcmp(active_stack->sp_name, OCFS2_STACK_PLUGIN_O2CB);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(ocfs2_is_o2cb_active);
|
||||||
|
|
||||||
static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name)
|
static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name)
|
||||||
{
|
{
|
||||||
struct ocfs2_stack_plugin *p;
|
struct ocfs2_stack_plugin *p;
|
||||||
|
|
|
@ -298,6 +298,9 @@ void ocfs2_stack_glue_set_max_proto_version(struct ocfs2_protocol_version *max_p
|
||||||
int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin);
|
int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin);
|
||||||
void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin);
|
void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin);
|
||||||
|
|
||||||
|
/* In ocfs2_downconvert_lock(), we need to know which stack we are using */
|
||||||
|
int ocfs2_is_o2cb_active(void);
|
||||||
|
|
||||||
extern struct kset *ocfs2_kset;
|
extern struct kset *ocfs2_kset;
|
||||||
|
|
||||||
#endif /* STACKGLUE_H */
|
#endif /* STACKGLUE_H */
|
||||||
|
|
|
@ -38,9 +38,8 @@ struct vm_area_struct;
|
||||||
#define ___GFP_ACCOUNT 0x100000u
|
#define ___GFP_ACCOUNT 0x100000u
|
||||||
#define ___GFP_NOTRACK 0x200000u
|
#define ___GFP_NOTRACK 0x200000u
|
||||||
#define ___GFP_DIRECT_RECLAIM 0x400000u
|
#define ___GFP_DIRECT_RECLAIM 0x400000u
|
||||||
#define ___GFP_OTHER_NODE 0x800000u
|
#define ___GFP_WRITE 0x800000u
|
||||||
#define ___GFP_WRITE 0x1000000u
|
#define ___GFP_KSWAPD_RECLAIM 0x1000000u
|
||||||
#define ___GFP_KSWAPD_RECLAIM 0x2000000u
|
|
||||||
/* If the above are modified, __GFP_BITS_SHIFT may need updating */
|
/* If the above are modified, __GFP_BITS_SHIFT may need updating */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -172,11 +171,6 @@ struct vm_area_struct;
|
||||||
* __GFP_NOTRACK_FALSE_POSITIVE is an alias of __GFP_NOTRACK. It's a means of
|
* __GFP_NOTRACK_FALSE_POSITIVE is an alias of __GFP_NOTRACK. It's a means of
|
||||||
* distinguishing in the source between false positives and allocations that
|
* distinguishing in the source between false positives and allocations that
|
||||||
* cannot be supported (e.g. page tables).
|
* cannot be supported (e.g. page tables).
|
||||||
*
|
|
||||||
* __GFP_OTHER_NODE is for allocations that are on a remote node but that
|
|
||||||
* should not be accounted for as a remote allocation in vmstat. A
|
|
||||||
* typical user would be khugepaged collapsing a huge page on a remote
|
|
||||||
* node.
|
|
||||||
*/
|
*/
|
||||||
#define __GFP_COLD ((__force gfp_t)___GFP_COLD)
|
#define __GFP_COLD ((__force gfp_t)___GFP_COLD)
|
||||||
#define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN)
|
#define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN)
|
||||||
|
@ -184,10 +178,9 @@ struct vm_area_struct;
|
||||||
#define __GFP_ZERO ((__force gfp_t)___GFP_ZERO)
|
#define __GFP_ZERO ((__force gfp_t)___GFP_ZERO)
|
||||||
#define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK)
|
#define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK)
|
||||||
#define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)
|
#define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)
|
||||||
#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE)
|
|
||||||
|
|
||||||
/* Room for N __GFP_FOO bits */
|
/* Room for N __GFP_FOO bits */
|
||||||
#define __GFP_BITS_SHIFT 26
|
#define __GFP_BITS_SHIFT 25
|
||||||
#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
|
#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -506,11 +499,10 @@ extern void free_hot_cold_page(struct page *page, bool cold);
|
||||||
extern void free_hot_cold_page_list(struct list_head *list, bool cold);
|
extern void free_hot_cold_page_list(struct list_head *list, bool cold);
|
||||||
|
|
||||||
struct page_frag_cache;
|
struct page_frag_cache;
|
||||||
extern void __page_frag_drain(struct page *page, unsigned int order,
|
extern void __page_frag_cache_drain(struct page *page, unsigned int count);
|
||||||
unsigned int count);
|
extern void *page_frag_alloc(struct page_frag_cache *nc,
|
||||||
extern void *__alloc_page_frag(struct page_frag_cache *nc,
|
|
||||||
unsigned int fragsz, gfp_t gfp_mask);
|
unsigned int fragsz, gfp_t gfp_mask);
|
||||||
extern void __free_page_frag(void *addr);
|
extern void page_frag_free(void *addr);
|
||||||
|
|
||||||
#define __free_page(page) __free_pages((page), 0)
|
#define __free_page(page) __free_pages((page), 0)
|
||||||
#define free_page(addr) free_pages((addr), 0)
|
#define free_page(addr) free_pages((addr), 0)
|
||||||
|
|
|
@ -120,7 +120,7 @@ struct mem_cgroup_reclaim_iter {
|
||||||
*/
|
*/
|
||||||
struct mem_cgroup_per_node {
|
struct mem_cgroup_per_node {
|
||||||
struct lruvec lruvec;
|
struct lruvec lruvec;
|
||||||
unsigned long lru_size[NR_LRU_LISTS];
|
unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
|
||||||
|
|
||||||
struct mem_cgroup_reclaim_iter iter[DEF_PRIORITY + 1];
|
struct mem_cgroup_reclaim_iter iter[DEF_PRIORITY + 1];
|
||||||
|
|
||||||
|
@ -432,7 +432,7 @@ static inline bool mem_cgroup_online(struct mem_cgroup *memcg)
|
||||||
int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
|
int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
|
||||||
|
|
||||||
void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
|
void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
|
||||||
int nr_pages);
|
int zid, int nr_pages);
|
||||||
|
|
||||||
unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
|
unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
|
||||||
int nid, unsigned int lru_mask);
|
int nid, unsigned int lru_mask);
|
||||||
|
@ -441,9 +441,23 @@ static inline
|
||||||
unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
|
unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
|
||||||
{
|
{
|
||||||
struct mem_cgroup_per_node *mz;
|
struct mem_cgroup_per_node *mz;
|
||||||
|
unsigned long nr_pages = 0;
|
||||||
|
int zid;
|
||||||
|
|
||||||
mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
|
mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
|
||||||
return mz->lru_size[lru];
|
for (zid = 0; zid < MAX_NR_ZONES; zid++)
|
||||||
|
nr_pages += mz->lru_zone_size[zid][lru];
|
||||||
|
return nr_pages;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline
|
||||||
|
unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec,
|
||||||
|
enum lru_list lru, int zone_idx)
|
||||||
|
{
|
||||||
|
struct mem_cgroup_per_node *mz;
|
||||||
|
|
||||||
|
mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
|
||||||
|
return mz->lru_zone_size[zone_idx][lru];
|
||||||
}
|
}
|
||||||
|
|
||||||
void mem_cgroup_handle_over_high(void);
|
void mem_cgroup_handle_over_high(void);
|
||||||
|
@ -671,6 +685,12 @@ mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
static inline
|
||||||
|
unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec,
|
||||||
|
enum lru_list lru, int zone_idx)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static inline unsigned long
|
static inline unsigned long
|
||||||
mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
|
mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
|
||||||
|
|
|
@ -1210,8 +1210,8 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
|
||||||
struct vm_area_struct *vma);
|
struct vm_area_struct *vma);
|
||||||
void unmap_mapping_range(struct address_space *mapping,
|
void unmap_mapping_range(struct address_space *mapping,
|
||||||
loff_t const holebegin, loff_t const holelen, int even_cows);
|
loff_t const holebegin, loff_t const holelen, int even_cows);
|
||||||
int follow_pte(struct mm_struct *mm, unsigned long address, pte_t **ptepp,
|
int follow_pte_pmd(struct mm_struct *mm, unsigned long address,
|
||||||
spinlock_t **ptlp);
|
pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp);
|
||||||
int follow_pfn(struct vm_area_struct *vma, unsigned long address,
|
int follow_pfn(struct vm_area_struct *vma, unsigned long address,
|
||||||
unsigned long *pfn);
|
unsigned long *pfn);
|
||||||
int follow_phys(struct vm_area_struct *vma, unsigned long address,
|
int follow_phys(struct vm_area_struct *vma, unsigned long address,
|
||||||
|
|
|
@ -39,7 +39,7 @@ static __always_inline void update_lru_size(struct lruvec *lruvec,
|
||||||
{
|
{
|
||||||
__update_lru_size(lruvec, lru, zid, nr_pages);
|
__update_lru_size(lruvec, lru, zid, nr_pages);
|
||||||
#ifdef CONFIG_MEMCG
|
#ifdef CONFIG_MEMCG
|
||||||
mem_cgroup_update_lru_size(lruvec, lru, nr_pages);
|
mem_cgroup_update_lru_size(lruvec, lru, zid, nr_pages);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -854,6 +854,16 @@ struct signal_struct {
|
||||||
|
|
||||||
#define SIGNAL_UNKILLABLE 0x00000040 /* for init: ignore fatal signals */
|
#define SIGNAL_UNKILLABLE 0x00000040 /* for init: ignore fatal signals */
|
||||||
|
|
||||||
|
#define SIGNAL_STOP_MASK (SIGNAL_CLD_MASK | SIGNAL_STOP_STOPPED | \
|
||||||
|
SIGNAL_STOP_CONTINUED)
|
||||||
|
|
||||||
|
static inline void signal_set_stop_flags(struct signal_struct *sig,
|
||||||
|
unsigned int flags)
|
||||||
|
{
|
||||||
|
WARN_ON(sig->flags & (SIGNAL_GROUP_EXIT|SIGNAL_GROUP_COREDUMP));
|
||||||
|
sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags;
|
||||||
|
}
|
||||||
|
|
||||||
/* If true, all threads except ->group_exit_task have pending SIGKILL */
|
/* If true, all threads except ->group_exit_task have pending SIGKILL */
|
||||||
static inline int signal_group_exit(const struct signal_struct *sig)
|
static inline int signal_group_exit(const struct signal_struct *sig)
|
||||||
{
|
{
|
||||||
|
|
|
@ -2480,7 +2480,7 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev,
|
||||||
|
|
||||||
static inline void skb_free_frag(void *addr)
|
static inline void skb_free_frag(void *addr)
|
||||||
{
|
{
|
||||||
__free_page_frag(addr);
|
page_frag_free(addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void *napi_alloc_frag(unsigned int fragsz);
|
void *napi_alloc_frag(unsigned int fragsz);
|
||||||
|
|
|
@ -226,7 +226,7 @@ static inline const char *__check_heap_object(const void *ptr,
|
||||||
* (PAGE_SIZE*2). Larger requests are passed to the page allocator.
|
* (PAGE_SIZE*2). Larger requests are passed to the page allocator.
|
||||||
*/
|
*/
|
||||||
#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1)
|
#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1)
|
||||||
#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT)
|
#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT - 1)
|
||||||
#ifndef KMALLOC_SHIFT_LOW
|
#ifndef KMALLOC_SHIFT_LOW
|
||||||
#define KMALLOC_SHIFT_LOW 3
|
#define KMALLOC_SHIFT_LOW 3
|
||||||
#endif
|
#endif
|
||||||
|
@ -239,7 +239,7 @@ static inline const char *__check_heap_object(const void *ptr,
|
||||||
* be allocated from the same page.
|
* be allocated from the same page.
|
||||||
*/
|
*/
|
||||||
#define KMALLOC_SHIFT_HIGH PAGE_SHIFT
|
#define KMALLOC_SHIFT_HIGH PAGE_SHIFT
|
||||||
#define KMALLOC_SHIFT_MAX 30
|
#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT - 1)
|
||||||
#ifndef KMALLOC_SHIFT_LOW
|
#ifndef KMALLOC_SHIFT_LOW
|
||||||
#define KMALLOC_SHIFT_LOW 3
|
#define KMALLOC_SHIFT_LOW 3
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -150,8 +150,9 @@ enum {
|
||||||
SWP_FILE = (1 << 7), /* set after swap_activate success */
|
SWP_FILE = (1 << 7), /* set after swap_activate success */
|
||||||
SWP_AREA_DISCARD = (1 << 8), /* single-time swap area discards */
|
SWP_AREA_DISCARD = (1 << 8), /* single-time swap area discards */
|
||||||
SWP_PAGE_DISCARD = (1 << 9), /* freed swap page-cluster discards */
|
SWP_PAGE_DISCARD = (1 << 9), /* freed swap page-cluster discards */
|
||||||
|
SWP_STABLE_WRITES = (1 << 10), /* no overwrite PG_writeback pages */
|
||||||
/* add others here before... */
|
/* add others here before... */
|
||||||
SWP_SCANNING = (1 << 10), /* refcount in scan_swap_map */
|
SWP_SCANNING = (1 << 11), /* refcount in scan_swap_map */
|
||||||
};
|
};
|
||||||
|
|
||||||
#define SWAP_CLUSTER_MAX 32UL
|
#define SWAP_CLUSTER_MAX 32UL
|
||||||
|
|
|
@ -8,23 +8,7 @@
|
||||||
#ifndef _LINUX_TIMERFD_H
|
#ifndef _LINUX_TIMERFD_H
|
||||||
#define _LINUX_TIMERFD_H
|
#define _LINUX_TIMERFD_H
|
||||||
|
|
||||||
/* For O_CLOEXEC and O_NONBLOCK */
|
#include <uapi/linux/timerfd.h>
|
||||||
#include <linux/fcntl.h>
|
|
||||||
|
|
||||||
/* For _IO helpers */
|
|
||||||
#include <linux/ioctl.h>
|
|
||||||
|
|
||||||
/*
|
|
||||||
* CAREFUL: Check include/asm-generic/fcntl.h when defining
|
|
||||||
* new flags, since they might collide with O_* ones. We want
|
|
||||||
* to re-use O_* flags that couldn't possibly have a meaning
|
|
||||||
* from eventfd, in order to leave a free define-space for
|
|
||||||
* shared O_* flags.
|
|
||||||
*/
|
|
||||||
#define TFD_TIMER_ABSTIME (1 << 0)
|
|
||||||
#define TFD_TIMER_CANCEL_ON_SET (1 << 1)
|
|
||||||
#define TFD_CLOEXEC O_CLOEXEC
|
|
||||||
#define TFD_NONBLOCK O_NONBLOCK
|
|
||||||
|
|
||||||
#define TFD_SHARED_FCNTL_FLAGS (TFD_CLOEXEC | TFD_NONBLOCK)
|
#define TFD_SHARED_FCNTL_FLAGS (TFD_CLOEXEC | TFD_NONBLOCK)
|
||||||
/* Flags for timerfd_create. */
|
/* Flags for timerfd_create. */
|
||||||
|
@ -32,6 +16,4 @@
|
||||||
/* Flags for timerfd_settime. */
|
/* Flags for timerfd_settime. */
|
||||||
#define TFD_SETTIME_FLAGS (TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET)
|
#define TFD_SETTIME_FLAGS (TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET)
|
||||||
|
|
||||||
#define TFD_IOC_SET_TICKS _IOW('T', 0, u64)
|
|
||||||
|
|
||||||
#endif /* _LINUX_TIMERFD_H */
|
#endif /* _LINUX_TIMERFD_H */
|
||||||
|
|
|
@ -47,8 +47,7 @@
|
||||||
{(unsigned long)__GFP_WRITE, "__GFP_WRITE"}, \
|
{(unsigned long)__GFP_WRITE, "__GFP_WRITE"}, \
|
||||||
{(unsigned long)__GFP_RECLAIM, "__GFP_RECLAIM"}, \
|
{(unsigned long)__GFP_RECLAIM, "__GFP_RECLAIM"}, \
|
||||||
{(unsigned long)__GFP_DIRECT_RECLAIM, "__GFP_DIRECT_RECLAIM"},\
|
{(unsigned long)__GFP_DIRECT_RECLAIM, "__GFP_DIRECT_RECLAIM"},\
|
||||||
{(unsigned long)__GFP_KSWAPD_RECLAIM, "__GFP_KSWAPD_RECLAIM"},\
|
{(unsigned long)__GFP_KSWAPD_RECLAIM, "__GFP_KSWAPD_RECLAIM"}\
|
||||||
{(unsigned long)__GFP_OTHER_NODE, "__GFP_OTHER_NODE"} \
|
|
||||||
|
|
||||||
#define show_gfp_flags(flags) \
|
#define show_gfp_flags(flags) \
|
||||||
(flags) ? __print_flags(flags, "|", \
|
(flags) ? __print_flags(flags, "|", \
|
||||||
|
|
|
@ -414,6 +414,7 @@ header-y += telephony.h
|
||||||
header-y += termios.h
|
header-y += termios.h
|
||||||
header-y += thermal.h
|
header-y += thermal.h
|
||||||
header-y += time.h
|
header-y += time.h
|
||||||
|
header-y += timerfd.h
|
||||||
header-y += times.h
|
header-y += times.h
|
||||||
header-y += timex.h
|
header-y += timex.h
|
||||||
header-y += tiocl.h
|
header-y += tiocl.h
|
||||||
|
|
|
@ -0,0 +1,36 @@
|
||||||
|
/*
|
||||||
|
* include/linux/timerfd.h
|
||||||
|
*
|
||||||
|
* Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org>
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _UAPI_LINUX_TIMERFD_H
|
||||||
|
#define _UAPI_LINUX_TIMERFD_H
|
||||||
|
|
||||||
|
#include <linux/types.h>
|
||||||
|
|
||||||
|
/* For O_CLOEXEC and O_NONBLOCK */
|
||||||
|
#include <linux/fcntl.h>
|
||||||
|
|
||||||
|
/* For _IO helpers */
|
||||||
|
#include <linux/ioctl.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* CAREFUL: Check include/asm-generic/fcntl.h when defining
|
||||||
|
* new flags, since they might collide with O_* ones. We want
|
||||||
|
* to re-use O_* flags that couldn't possibly have a meaning
|
||||||
|
* from eventfd, in order to leave a free define-space for
|
||||||
|
* shared O_* flags.
|
||||||
|
*
|
||||||
|
* Also make sure to update the masks in include/linux/timerfd.h
|
||||||
|
* when adding new flags.
|
||||||
|
*/
|
||||||
|
#define TFD_TIMER_ABSTIME (1 << 0)
|
||||||
|
#define TFD_TIMER_CANCEL_ON_SET (1 << 1)
|
||||||
|
#define TFD_CLOEXEC O_CLOEXEC
|
||||||
|
#define TFD_NONBLOCK O_NONBLOCK
|
||||||
|
|
||||||
|
#define TFD_IOC_SET_TICKS _IOW('T', 0, __u64)
|
||||||
|
|
||||||
|
#endif /* _UAPI_LINUX_TIMERFD_H */
|
|
@ -1977,7 +1977,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
|
||||||
}
|
}
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
sem_lock(sma, sops, nsops);
|
locknum = sem_lock(sma, sops, nsops);
|
||||||
|
|
||||||
if (!ipc_valid_object(&sma->sem_perm))
|
if (!ipc_valid_object(&sma->sem_perm))
|
||||||
goto out_unlock_free;
|
goto out_unlock_free;
|
||||||
|
|
|
@ -56,7 +56,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
|
||||||
attr->value_size == 0 || attr->map_flags)
|
attr->value_size == 0 || attr->map_flags)
|
||||||
return ERR_PTR(-EINVAL);
|
return ERR_PTR(-EINVAL);
|
||||||
|
|
||||||
if (attr->value_size >= 1 << (KMALLOC_SHIFT_MAX - 1))
|
if (attr->value_size > KMALLOC_MAX_SIZE)
|
||||||
/* if value_size is bigger, the user space won't be able to
|
/* if value_size is bigger, the user space won't be able to
|
||||||
* access the elements.
|
* access the elements.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -274,7 +274,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
|
||||||
*/
|
*/
|
||||||
goto free_htab;
|
goto free_htab;
|
||||||
|
|
||||||
if (htab->map.value_size >= (1 << (KMALLOC_SHIFT_MAX - 1)) -
|
if (htab->map.value_size >= KMALLOC_MAX_SIZE -
|
||||||
MAX_BPF_STACK - sizeof(struct htab_elem))
|
MAX_BPF_STACK - sizeof(struct htab_elem))
|
||||||
/* if value_size is bigger, the user space won't be able to
|
/* if value_size is bigger, the user space won't be able to
|
||||||
* access the elements via bpf syscall. This check also makes
|
* access the elements via bpf syscall. This check also makes
|
||||||
|
|
|
@ -246,7 +246,9 @@ static void devm_memremap_pages_release(struct device *dev, void *data)
|
||||||
/* pages are dead and unused, undo the arch mapping */
|
/* pages are dead and unused, undo the arch mapping */
|
||||||
align_start = res->start & ~(SECTION_SIZE - 1);
|
align_start = res->start & ~(SECTION_SIZE - 1);
|
||||||
align_size = ALIGN(resource_size(res), SECTION_SIZE);
|
align_size = ALIGN(resource_size(res), SECTION_SIZE);
|
||||||
|
mem_hotplug_begin();
|
||||||
arch_remove_memory(align_start, align_size);
|
arch_remove_memory(align_start, align_size);
|
||||||
|
mem_hotplug_done();
|
||||||
untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
|
untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
|
||||||
pgmap_radix_release(res);
|
pgmap_radix_release(res);
|
||||||
dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc,
|
dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc,
|
||||||
|
@ -358,7 +360,9 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
|
||||||
if (error)
|
if (error)
|
||||||
goto err_pfn_remap;
|
goto err_pfn_remap;
|
||||||
|
|
||||||
|
mem_hotplug_begin();
|
||||||
error = arch_add_memory(nid, align_start, align_size, true);
|
error = arch_add_memory(nid, align_start, align_size, true);
|
||||||
|
mem_hotplug_done();
|
||||||
if (error)
|
if (error)
|
||||||
goto err_add_memory;
|
goto err_add_memory;
|
||||||
|
|
||||||
|
|
|
@ -346,7 +346,7 @@ static bool task_participate_group_stop(struct task_struct *task)
|
||||||
* fresh group stop. Read comment in do_signal_stop() for details.
|
* fresh group stop. Read comment in do_signal_stop() for details.
|
||||||
*/
|
*/
|
||||||
if (!sig->group_stop_count && !(sig->flags & SIGNAL_STOP_STOPPED)) {
|
if (!sig->group_stop_count && !(sig->flags & SIGNAL_STOP_STOPPED)) {
|
||||||
sig->flags = SIGNAL_STOP_STOPPED;
|
signal_set_stop_flags(sig, SIGNAL_STOP_STOPPED);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
@ -843,7 +843,7 @@ static bool prepare_signal(int sig, struct task_struct *p, bool force)
|
||||||
* will take ->siglock, notice SIGNAL_CLD_MASK, and
|
* will take ->siglock, notice SIGNAL_CLD_MASK, and
|
||||||
* notify its parent. See get_signal_to_deliver().
|
* notify its parent. See get_signal_to_deliver().
|
||||||
*/
|
*/
|
||||||
signal->flags = why | SIGNAL_STOP_CONTINUED;
|
signal_set_stop_flags(signal, why | SIGNAL_STOP_CONTINUED);
|
||||||
signal->group_stop_count = 0;
|
signal->group_stop_count = 0;
|
||||||
signal->group_exit_code = 0;
|
signal->group_exit_code = 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -164,7 +164,7 @@ config DEBUG_INFO_REDUCED
|
||||||
|
|
||||||
config DEBUG_INFO_SPLIT
|
config DEBUG_INFO_SPLIT
|
||||||
bool "Produce split debuginfo in .dwo files"
|
bool "Produce split debuginfo in .dwo files"
|
||||||
depends on DEBUG_INFO
|
depends on DEBUG_INFO && !FRV
|
||||||
help
|
help
|
||||||
Generate debug info into separate .dwo files. This significantly
|
Generate debug info into separate .dwo files. This significantly
|
||||||
reduces the build directory size for builds with DEBUG_INFO,
|
reduces the build directory size for builds with DEBUG_INFO,
|
||||||
|
|
|
@ -138,7 +138,7 @@ static int page_cache_tree_insert(struct address_space *mapping,
|
||||||
dax_radix_locked_entry(0, RADIX_DAX_EMPTY));
|
dax_radix_locked_entry(0, RADIX_DAX_EMPTY));
|
||||||
/* Wakeup waiters for exceptional entry lock */
|
/* Wakeup waiters for exceptional entry lock */
|
||||||
dax_wake_mapping_entry_waiter(mapping, page->index, p,
|
dax_wake_mapping_entry_waiter(mapping, page->index, p,
|
||||||
false);
|
true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
__radix_tree_replace(&mapping->page_tree, node, slot, page,
|
__radix_tree_replace(&mapping->page_tree, node, slot, page,
|
||||||
|
|
|
@ -883,15 +883,17 @@ void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd)
|
||||||
{
|
{
|
||||||
pmd_t entry;
|
pmd_t entry;
|
||||||
unsigned long haddr;
|
unsigned long haddr;
|
||||||
|
bool write = vmf->flags & FAULT_FLAG_WRITE;
|
||||||
|
|
||||||
vmf->ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
|
vmf->ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
|
||||||
if (unlikely(!pmd_same(*vmf->pmd, orig_pmd)))
|
if (unlikely(!pmd_same(*vmf->pmd, orig_pmd)))
|
||||||
goto unlock;
|
goto unlock;
|
||||||
|
|
||||||
entry = pmd_mkyoung(orig_pmd);
|
entry = pmd_mkyoung(orig_pmd);
|
||||||
|
if (write)
|
||||||
|
entry = pmd_mkdirty(entry);
|
||||||
haddr = vmf->address & HPAGE_PMD_MASK;
|
haddr = vmf->address & HPAGE_PMD_MASK;
|
||||||
if (pmdp_set_access_flags(vmf->vma, haddr, vmf->pmd, entry,
|
if (pmdp_set_access_flags(vmf->vma, haddr, vmf->pmd, entry, write))
|
||||||
vmf->flags & FAULT_FLAG_WRITE))
|
|
||||||
update_mmu_cache_pmd(vmf->vma, vmf->address, vmf->pmd);
|
update_mmu_cache_pmd(vmf->vma, vmf->address, vmf->pmd);
|
||||||
|
|
||||||
unlock:
|
unlock:
|
||||||
|
@ -919,8 +921,7 @@ static int do_huge_pmd_wp_page_fallback(struct vm_fault *vmf, pmd_t orig_pmd,
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < HPAGE_PMD_NR; i++) {
|
for (i = 0; i < HPAGE_PMD_NR; i++) {
|
||||||
pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE |
|
pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE, vma,
|
||||||
__GFP_OTHER_NODE, vma,
|
|
||||||
vmf->address, page_to_nid(page));
|
vmf->address, page_to_nid(page));
|
||||||
if (unlikely(!pages[i] ||
|
if (unlikely(!pages[i] ||
|
||||||
mem_cgroup_try_charge(pages[i], vma->vm_mm,
|
mem_cgroup_try_charge(pages[i], vma->vm_mm,
|
||||||
|
|
37
mm/hugetlb.c
37
mm/hugetlb.c
|
@ -1773,23 +1773,32 @@ free:
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* When releasing a hugetlb pool reservation, any surplus pages that were
|
* This routine has two main purposes:
|
||||||
* allocated to satisfy the reservation must be explicitly freed if they were
|
* 1) Decrement the reservation count (resv_huge_pages) by the value passed
|
||||||
* never used.
|
* in unused_resv_pages. This corresponds to the prior adjustments made
|
||||||
* Called with hugetlb_lock held.
|
* to the associated reservation map.
|
||||||
|
* 2) Free any unused surplus pages that may have been allocated to satisfy
|
||||||
|
* the reservation. As many as unused_resv_pages may be freed.
|
||||||
|
*
|
||||||
|
* Called with hugetlb_lock held. However, the lock could be dropped (and
|
||||||
|
* reacquired) during calls to cond_resched_lock. Whenever dropping the lock,
|
||||||
|
* we must make sure nobody else can claim pages we are in the process of
|
||||||
|
* freeing. Do this by ensuring resv_huge_page always is greater than the
|
||||||
|
* number of huge pages we plan to free when dropping the lock.
|
||||||
*/
|
*/
|
||||||
static void return_unused_surplus_pages(struct hstate *h,
|
static void return_unused_surplus_pages(struct hstate *h,
|
||||||
unsigned long unused_resv_pages)
|
unsigned long unused_resv_pages)
|
||||||
{
|
{
|
||||||
unsigned long nr_pages;
|
unsigned long nr_pages;
|
||||||
|
|
||||||
/* Uncommit the reservation */
|
|
||||||
h->resv_huge_pages -= unused_resv_pages;
|
|
||||||
|
|
||||||
/* Cannot return gigantic pages currently */
|
/* Cannot return gigantic pages currently */
|
||||||
if (hstate_is_gigantic(h))
|
if (hstate_is_gigantic(h))
|
||||||
return;
|
goto out;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Part (or even all) of the reservation could have been backed
|
||||||
|
* by pre-allocated pages. Only free surplus pages.
|
||||||
|
*/
|
||||||
nr_pages = min(unused_resv_pages, h->surplus_huge_pages);
|
nr_pages = min(unused_resv_pages, h->surplus_huge_pages);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1799,12 +1808,22 @@ static void return_unused_surplus_pages(struct hstate *h,
|
||||||
* when the nodes with surplus pages have no free pages.
|
* when the nodes with surplus pages have no free pages.
|
||||||
* free_pool_huge_page() will balance the the freed pages across the
|
* free_pool_huge_page() will balance the the freed pages across the
|
||||||
* on-line nodes with memory and will handle the hstate accounting.
|
* on-line nodes with memory and will handle the hstate accounting.
|
||||||
|
*
|
||||||
|
* Note that we decrement resv_huge_pages as we free the pages. If
|
||||||
|
* we drop the lock, resv_huge_pages will still be sufficiently large
|
||||||
|
* to cover subsequent pages we may free.
|
||||||
*/
|
*/
|
||||||
while (nr_pages--) {
|
while (nr_pages--) {
|
||||||
|
h->resv_huge_pages--;
|
||||||
|
unused_resv_pages--;
|
||||||
if (!free_pool_huge_page(h, &node_states[N_MEMORY], 1))
|
if (!free_pool_huge_page(h, &node_states[N_MEMORY], 1))
|
||||||
break;
|
goto out;
|
||||||
cond_resched_lock(&hugetlb_lock);
|
cond_resched_lock(&hugetlb_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
/* Fully uncommit the reservation */
|
||||||
|
h->resv_huge_pages -= unused_resv_pages;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -943,7 +943,7 @@ static void collapse_huge_page(struct mm_struct *mm,
|
||||||
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
|
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
|
||||||
|
|
||||||
/* Only allocate from the target node */
|
/* Only allocate from the target node */
|
||||||
gfp = alloc_hugepage_khugepaged_gfpmask() | __GFP_OTHER_NODE | __GFP_THISNODE;
|
gfp = alloc_hugepage_khugepaged_gfpmask() | __GFP_THISNODE;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Before allocating the hugepage, release the mmap_sem read lock.
|
* Before allocating the hugepage, release the mmap_sem read lock.
|
||||||
|
@ -1242,7 +1242,6 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
|
||||||
struct vm_area_struct *vma;
|
struct vm_area_struct *vma;
|
||||||
unsigned long addr;
|
unsigned long addr;
|
||||||
pmd_t *pmd, _pmd;
|
pmd_t *pmd, _pmd;
|
||||||
bool deposited = false;
|
|
||||||
|
|
||||||
i_mmap_lock_write(mapping);
|
i_mmap_lock_write(mapping);
|
||||||
vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
|
vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
|
||||||
|
@ -1267,28 +1266,12 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
|
||||||
spinlock_t *ptl = pmd_lock(vma->vm_mm, pmd);
|
spinlock_t *ptl = pmd_lock(vma->vm_mm, pmd);
|
||||||
/* assume page table is clear */
|
/* assume page table is clear */
|
||||||
_pmd = pmdp_collapse_flush(vma, addr, pmd);
|
_pmd = pmdp_collapse_flush(vma, addr, pmd);
|
||||||
/*
|
|
||||||
* now deposit the pgtable for arch that need it
|
|
||||||
* otherwise free it.
|
|
||||||
*/
|
|
||||||
if (arch_needs_pgtable_deposit()) {
|
|
||||||
/*
|
|
||||||
* The deposit should be visibile only after
|
|
||||||
* collapse is seen by others.
|
|
||||||
*/
|
|
||||||
smp_wmb();
|
|
||||||
pgtable_trans_huge_deposit(vma->vm_mm, pmd,
|
|
||||||
pmd_pgtable(_pmd));
|
|
||||||
deposited = true;
|
|
||||||
}
|
|
||||||
spin_unlock(ptl);
|
spin_unlock(ptl);
|
||||||
up_write(&vma->vm_mm->mmap_sem);
|
up_write(&vma->vm_mm->mmap_sem);
|
||||||
if (!deposited) {
|
|
||||||
atomic_long_dec(&vma->vm_mm->nr_ptes);
|
atomic_long_dec(&vma->vm_mm->nr_ptes);
|
||||||
pte_free(vma->vm_mm, pmd_pgtable(_pmd));
|
pte_free(vma->vm_mm, pmd_pgtable(_pmd));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
i_mmap_unlock_write(mapping);
|
i_mmap_unlock_write(mapping);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1326,8 +1309,7 @@ static void collapse_shmem(struct mm_struct *mm,
|
||||||
VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
|
VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
|
||||||
|
|
||||||
/* Only allocate from the target node */
|
/* Only allocate from the target node */
|
||||||
gfp = alloc_hugepage_khugepaged_gfpmask() |
|
gfp = alloc_hugepage_khugepaged_gfpmask() | __GFP_THISNODE;
|
||||||
__GFP_OTHER_NODE | __GFP_THISNODE;
|
|
||||||
|
|
||||||
new_page = khugepaged_alloc_page(hpage, gfp, node);
|
new_page = khugepaged_alloc_page(hpage, gfp, node);
|
||||||
if (!new_page) {
|
if (!new_page) {
|
||||||
|
|
|
@ -625,8 +625,8 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
|
||||||
unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
|
unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
|
||||||
int nid, unsigned int lru_mask)
|
int nid, unsigned int lru_mask)
|
||||||
{
|
{
|
||||||
|
struct lruvec *lruvec = mem_cgroup_lruvec(NODE_DATA(nid), memcg);
|
||||||
unsigned long nr = 0;
|
unsigned long nr = 0;
|
||||||
struct mem_cgroup_per_node *mz;
|
|
||||||
enum lru_list lru;
|
enum lru_list lru;
|
||||||
|
|
||||||
VM_BUG_ON((unsigned)nid >= nr_node_ids);
|
VM_BUG_ON((unsigned)nid >= nr_node_ids);
|
||||||
|
@ -634,8 +634,7 @@ unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
|
||||||
for_each_lru(lru) {
|
for_each_lru(lru) {
|
||||||
if (!(BIT(lru) & lru_mask))
|
if (!(BIT(lru) & lru_mask))
|
||||||
continue;
|
continue;
|
||||||
mz = mem_cgroup_nodeinfo(memcg, nid);
|
nr += mem_cgroup_get_lru_size(lruvec, lru);
|
||||||
nr += mz->lru_size[lru];
|
|
||||||
}
|
}
|
||||||
return nr;
|
return nr;
|
||||||
}
|
}
|
||||||
|
@ -1002,6 +1001,7 @@ out:
|
||||||
* mem_cgroup_update_lru_size - account for adding or removing an lru page
|
* mem_cgroup_update_lru_size - account for adding or removing an lru page
|
||||||
* @lruvec: mem_cgroup per zone lru vector
|
* @lruvec: mem_cgroup per zone lru vector
|
||||||
* @lru: index of lru list the page is sitting on
|
* @lru: index of lru list the page is sitting on
|
||||||
|
* @zid: zone id of the accounted pages
|
||||||
* @nr_pages: positive when adding or negative when removing
|
* @nr_pages: positive when adding or negative when removing
|
||||||
*
|
*
|
||||||
* This function must be called under lru_lock, just before a page is added
|
* This function must be called under lru_lock, just before a page is added
|
||||||
|
@ -1009,27 +1009,25 @@ out:
|
||||||
* so as to allow it to check that lru_size 0 is consistent with list_empty).
|
* so as to allow it to check that lru_size 0 is consistent with list_empty).
|
||||||
*/
|
*/
|
||||||
void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
|
void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
|
||||||
int nr_pages)
|
int zid, int nr_pages)
|
||||||
{
|
{
|
||||||
struct mem_cgroup_per_node *mz;
|
struct mem_cgroup_per_node *mz;
|
||||||
unsigned long *lru_size;
|
unsigned long *lru_size;
|
||||||
long size;
|
long size;
|
||||||
bool empty;
|
|
||||||
|
|
||||||
if (mem_cgroup_disabled())
|
if (mem_cgroup_disabled())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
|
mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
|
||||||
lru_size = mz->lru_size + lru;
|
lru_size = &mz->lru_zone_size[zid][lru];
|
||||||
empty = list_empty(lruvec->lists + lru);
|
|
||||||
|
|
||||||
if (nr_pages < 0)
|
if (nr_pages < 0)
|
||||||
*lru_size += nr_pages;
|
*lru_size += nr_pages;
|
||||||
|
|
||||||
size = *lru_size;
|
size = *lru_size;
|
||||||
if (WARN_ONCE(size < 0 || empty != !size,
|
if (WARN_ONCE(size < 0,
|
||||||
"%s(%p, %d, %d): lru_size %ld but %sempty\n",
|
"%s(%p, %d, %d): lru_size %ld\n",
|
||||||
__func__, lruvec, lru, nr_pages, size, empty ? "" : "not ")) {
|
__func__, lruvec, lru, nr_pages, size)) {
|
||||||
VM_BUG_ON(1);
|
VM_BUG_ON(1);
|
||||||
*lru_size = 0;
|
*lru_size = 0;
|
||||||
}
|
}
|
||||||
|
|
39
mm/memory.c
39
mm/memory.c
|
@ -3772,8 +3772,8 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
|
||||||
}
|
}
|
||||||
#endif /* __PAGETABLE_PMD_FOLDED */
|
#endif /* __PAGETABLE_PMD_FOLDED */
|
||||||
|
|
||||||
static int __follow_pte(struct mm_struct *mm, unsigned long address,
|
static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
|
||||||
pte_t **ptepp, spinlock_t **ptlp)
|
pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
|
||||||
{
|
{
|
||||||
pgd_t *pgd;
|
pgd_t *pgd;
|
||||||
pud_t *pud;
|
pud_t *pud;
|
||||||
|
@ -3790,11 +3790,20 @@ static int __follow_pte(struct mm_struct *mm, unsigned long address,
|
||||||
|
|
||||||
pmd = pmd_offset(pud, address);
|
pmd = pmd_offset(pud, address);
|
||||||
VM_BUG_ON(pmd_trans_huge(*pmd));
|
VM_BUG_ON(pmd_trans_huge(*pmd));
|
||||||
if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
|
|
||||||
|
if (pmd_huge(*pmd)) {
|
||||||
|
if (!pmdpp)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
/* We cannot handle huge page PFN maps. Luckily they don't exist. */
|
*ptlp = pmd_lock(mm, pmd);
|
||||||
if (pmd_huge(*pmd))
|
if (pmd_huge(*pmd)) {
|
||||||
|
*pmdpp = pmd;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
spin_unlock(*ptlp);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
ptep = pte_offset_map_lock(mm, pmd, address, ptlp);
|
ptep = pte_offset_map_lock(mm, pmd, address, ptlp);
|
||||||
|
@ -3810,17 +3819,31 @@ out:
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
int follow_pte(struct mm_struct *mm, unsigned long address, pte_t **ptepp,
|
static inline int follow_pte(struct mm_struct *mm, unsigned long address,
|
||||||
spinlock_t **ptlp)
|
pte_t **ptepp, spinlock_t **ptlp)
|
||||||
{
|
{
|
||||||
int res;
|
int res;
|
||||||
|
|
||||||
/* (void) is needed to make gcc happy */
|
/* (void) is needed to make gcc happy */
|
||||||
(void) __cond_lock(*ptlp,
|
(void) __cond_lock(*ptlp,
|
||||||
!(res = __follow_pte(mm, address, ptepp, ptlp)));
|
!(res = __follow_pte_pmd(mm, address, ptepp, NULL,
|
||||||
|
ptlp)));
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int follow_pte_pmd(struct mm_struct *mm, unsigned long address,
|
||||||
|
pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
|
||||||
|
{
|
||||||
|
int res;
|
||||||
|
|
||||||
|
/* (void) is needed to make gcc happy */
|
||||||
|
(void) __cond_lock(*ptlp,
|
||||||
|
!(res = __follow_pte_pmd(mm, address, ptepp, pmdpp,
|
||||||
|
ptlp)));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(follow_pte_pmd);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* follow_pfn - look up PFN at a user virtual address
|
* follow_pfn - look up PFN at a user virtual address
|
||||||
* @vma: memory mapping
|
* @vma: memory mapping
|
||||||
|
|
|
@ -1864,14 +1864,14 @@ int move_freepages(struct zone *zone,
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
for (page = start_page; page <= end_page;) {
|
for (page = start_page; page <= end_page;) {
|
||||||
/* Make sure we are not inadvertently changing nodes */
|
|
||||||
VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page);
|
|
||||||
|
|
||||||
if (!pfn_valid_within(page_to_pfn(page))) {
|
if (!pfn_valid_within(page_to_pfn(page))) {
|
||||||
page++;
|
page++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Make sure we are not inadvertently changing nodes */
|
||||||
|
VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page);
|
||||||
|
|
||||||
if (!PageBuddy(page)) {
|
if (!PageBuddy(page)) {
|
||||||
page++;
|
page++;
|
||||||
continue;
|
continue;
|
||||||
|
@ -2583,30 +2583,22 @@ int __isolate_free_page(struct page *page, unsigned int order)
|
||||||
* Update NUMA hit/miss statistics
|
* Update NUMA hit/miss statistics
|
||||||
*
|
*
|
||||||
* Must be called with interrupts disabled.
|
* Must be called with interrupts disabled.
|
||||||
*
|
|
||||||
* When __GFP_OTHER_NODE is set assume the node of the preferred
|
|
||||||
* zone is the local node. This is useful for daemons who allocate
|
|
||||||
* memory on behalf of other processes.
|
|
||||||
*/
|
*/
|
||||||
static inline void zone_statistics(struct zone *preferred_zone, struct zone *z,
|
static inline void zone_statistics(struct zone *preferred_zone, struct zone *z)
|
||||||
gfp_t flags)
|
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_NUMA
|
#ifdef CONFIG_NUMA
|
||||||
int local_nid = numa_node_id();
|
|
||||||
enum zone_stat_item local_stat = NUMA_LOCAL;
|
enum zone_stat_item local_stat = NUMA_LOCAL;
|
||||||
|
|
||||||
if (unlikely(flags & __GFP_OTHER_NODE)) {
|
if (z->node != numa_node_id())
|
||||||
local_stat = NUMA_OTHER;
|
local_stat = NUMA_OTHER;
|
||||||
local_nid = preferred_zone->node;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (z->node == local_nid) {
|
if (z->node == preferred_zone->node)
|
||||||
__inc_zone_state(z, NUMA_HIT);
|
__inc_zone_state(z, NUMA_HIT);
|
||||||
__inc_zone_state(z, local_stat);
|
else {
|
||||||
} else {
|
|
||||||
__inc_zone_state(z, NUMA_MISS);
|
__inc_zone_state(z, NUMA_MISS);
|
||||||
__inc_zone_state(preferred_zone, NUMA_FOREIGN);
|
__inc_zone_state(preferred_zone, NUMA_FOREIGN);
|
||||||
}
|
}
|
||||||
|
__inc_zone_state(z, local_stat);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2674,7 +2666,7 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
|
||||||
}
|
}
|
||||||
|
|
||||||
__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
|
__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
|
||||||
zone_statistics(preferred_zone, zone, gfp_flags);
|
zone_statistics(preferred_zone, zone);
|
||||||
local_irq_restore(flags);
|
local_irq_restore(flags);
|
||||||
|
|
||||||
VM_BUG_ON_PAGE(bad_range(zone, page), page);
|
VM_BUG_ON_PAGE(bad_range(zone, page), page);
|
||||||
|
@ -3904,7 +3896,7 @@ EXPORT_SYMBOL(free_pages);
|
||||||
* drivers to provide a backing region of memory for use as either an
|
* drivers to provide a backing region of memory for use as either an
|
||||||
* sk_buff->head, or to be used in the "frags" portion of skb_shared_info.
|
* sk_buff->head, or to be used in the "frags" portion of skb_shared_info.
|
||||||
*/
|
*/
|
||||||
static struct page *__page_frag_refill(struct page_frag_cache *nc,
|
static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
|
||||||
gfp_t gfp_mask)
|
gfp_t gfp_mask)
|
||||||
{
|
{
|
||||||
struct page *page = NULL;
|
struct page *page = NULL;
|
||||||
|
@ -3925,21 +3917,22 @@ static struct page *__page_frag_refill(struct page_frag_cache *nc,
|
||||||
return page;
|
return page;
|
||||||
}
|
}
|
||||||
|
|
||||||
void __page_frag_drain(struct page *page, unsigned int order,
|
void __page_frag_cache_drain(struct page *page, unsigned int count)
|
||||||
unsigned int count)
|
|
||||||
{
|
{
|
||||||
VM_BUG_ON_PAGE(page_ref_count(page) == 0, page);
|
VM_BUG_ON_PAGE(page_ref_count(page) == 0, page);
|
||||||
|
|
||||||
if (page_ref_sub_and_test(page, count)) {
|
if (page_ref_sub_and_test(page, count)) {
|
||||||
|
unsigned int order = compound_order(page);
|
||||||
|
|
||||||
if (order == 0)
|
if (order == 0)
|
||||||
free_hot_cold_page(page, false);
|
free_hot_cold_page(page, false);
|
||||||
else
|
else
|
||||||
__free_pages_ok(page, order);
|
__free_pages_ok(page, order);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(__page_frag_drain);
|
EXPORT_SYMBOL(__page_frag_cache_drain);
|
||||||
|
|
||||||
void *__alloc_page_frag(struct page_frag_cache *nc,
|
void *page_frag_alloc(struct page_frag_cache *nc,
|
||||||
unsigned int fragsz, gfp_t gfp_mask)
|
unsigned int fragsz, gfp_t gfp_mask)
|
||||||
{
|
{
|
||||||
unsigned int size = PAGE_SIZE;
|
unsigned int size = PAGE_SIZE;
|
||||||
|
@ -3948,7 +3941,7 @@ void *__alloc_page_frag(struct page_frag_cache *nc,
|
||||||
|
|
||||||
if (unlikely(!nc->va)) {
|
if (unlikely(!nc->va)) {
|
||||||
refill:
|
refill:
|
||||||
page = __page_frag_refill(nc, gfp_mask);
|
page = __page_frag_cache_refill(nc, gfp_mask);
|
||||||
if (!page)
|
if (!page)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
@ -3991,19 +3984,19 @@ refill:
|
||||||
|
|
||||||
return nc->va + offset;
|
return nc->va + offset;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(__alloc_page_frag);
|
EXPORT_SYMBOL(page_frag_alloc);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Frees a page fragment allocated out of either a compound or order 0 page.
|
* Frees a page fragment allocated out of either a compound or order 0 page.
|
||||||
*/
|
*/
|
||||||
void __free_page_frag(void *addr)
|
void page_frag_free(void *addr)
|
||||||
{
|
{
|
||||||
struct page *page = virt_to_head_page(addr);
|
struct page *page = virt_to_head_page(addr);
|
||||||
|
|
||||||
if (unlikely(put_page_testzero(page)))
|
if (unlikely(put_page_testzero(page)))
|
||||||
__free_pages_ok(page, compound_order(page));
|
__free_pages_ok(page, compound_order(page));
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(__free_page_frag);
|
EXPORT_SYMBOL(page_frag_free);
|
||||||
|
|
||||||
static void *make_alloc_exact(unsigned long addr, unsigned int order,
|
static void *make_alloc_exact(unsigned long addr, unsigned int order,
|
||||||
size_t size)
|
size_t size)
|
||||||
|
|
|
@ -2457,7 +2457,6 @@ union freelist_init_state {
|
||||||
unsigned int pos;
|
unsigned int pos;
|
||||||
unsigned int *list;
|
unsigned int *list;
|
||||||
unsigned int count;
|
unsigned int count;
|
||||||
unsigned int rand;
|
|
||||||
};
|
};
|
||||||
struct rnd_state rnd_state;
|
struct rnd_state rnd_state;
|
||||||
};
|
};
|
||||||
|
@ -2483,8 +2482,7 @@ static bool freelist_state_initialize(union freelist_init_state *state,
|
||||||
} else {
|
} else {
|
||||||
state->list = cachep->random_seq;
|
state->list = cachep->random_seq;
|
||||||
state->count = count;
|
state->count = count;
|
||||||
state->pos = 0;
|
state->pos = rand % count;
|
||||||
state->rand = rand;
|
|
||||||
ret = true;
|
ret = true;
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -2493,7 +2491,9 @@ static bool freelist_state_initialize(union freelist_init_state *state,
|
||||||
/* Get the next entry on the list and randomize it using a random shift */
|
/* Get the next entry on the list and randomize it using a random shift */
|
||||||
static freelist_idx_t next_random_slot(union freelist_init_state *state)
|
static freelist_idx_t next_random_slot(union freelist_init_state *state)
|
||||||
{
|
{
|
||||||
return (state->list[state->pos++] + state->rand) % state->count;
|
if (state->pos >= state->count)
|
||||||
|
state->pos = 0;
|
||||||
|
return state->list[state->pos++];
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Swap two freelist entries */
|
/* Swap two freelist entries */
|
||||||
|
|
|
@ -943,11 +943,25 @@ bool reuse_swap_page(struct page *page, int *total_mapcount)
|
||||||
count = page_trans_huge_mapcount(page, total_mapcount);
|
count = page_trans_huge_mapcount(page, total_mapcount);
|
||||||
if (count <= 1 && PageSwapCache(page)) {
|
if (count <= 1 && PageSwapCache(page)) {
|
||||||
count += page_swapcount(page);
|
count += page_swapcount(page);
|
||||||
if (count == 1 && !PageWriteback(page)) {
|
if (count != 1)
|
||||||
|
goto out;
|
||||||
|
if (!PageWriteback(page)) {
|
||||||
delete_from_swap_cache(page);
|
delete_from_swap_cache(page);
|
||||||
SetPageDirty(page);
|
SetPageDirty(page);
|
||||||
|
} else {
|
||||||
|
swp_entry_t entry;
|
||||||
|
struct swap_info_struct *p;
|
||||||
|
|
||||||
|
entry.val = page_private(page);
|
||||||
|
p = swap_info_get(entry);
|
||||||
|
if (p->flags & SWP_STABLE_WRITES) {
|
||||||
|
spin_unlock(&p->lock);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
spin_unlock(&p->lock);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
out:
|
||||||
return count <= 1;
|
return count <= 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2448,6 +2462,10 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
|
||||||
error = -ENOMEM;
|
error = -ENOMEM;
|
||||||
goto bad_swap;
|
goto bad_swap;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (bdi_cap_stable_pages_required(inode_to_bdi(inode)))
|
||||||
|
p->flags |= SWP_STABLE_WRITES;
|
||||||
|
|
||||||
if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) {
|
if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) {
|
||||||
int cpu;
|
int cpu;
|
||||||
|
|
||||||
|
|
27
mm/vmscan.c
27
mm/vmscan.c
|
@ -242,6 +242,16 @@ unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru)
|
||||||
return node_page_state(lruvec_pgdat(lruvec), NR_LRU_BASE + lru);
|
return node_page_state(lruvec_pgdat(lruvec), NR_LRU_BASE + lru);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned long lruvec_zone_lru_size(struct lruvec *lruvec, enum lru_list lru,
|
||||||
|
int zone_idx)
|
||||||
|
{
|
||||||
|
if (!mem_cgroup_disabled())
|
||||||
|
return mem_cgroup_get_zone_lru_size(lruvec, lru, zone_idx);
|
||||||
|
|
||||||
|
return zone_page_state(&lruvec_pgdat(lruvec)->node_zones[zone_idx],
|
||||||
|
NR_ZONE_LRU_BASE + lru);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Add a shrinker callback to be called from the vm.
|
* Add a shrinker callback to be called from the vm.
|
||||||
*/
|
*/
|
||||||
|
@ -1382,8 +1392,7 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode)
|
||||||
* be complete before mem_cgroup_update_lru_size due to a santity check.
|
* be complete before mem_cgroup_update_lru_size due to a santity check.
|
||||||
*/
|
*/
|
||||||
static __always_inline void update_lru_sizes(struct lruvec *lruvec,
|
static __always_inline void update_lru_sizes(struct lruvec *lruvec,
|
||||||
enum lru_list lru, unsigned long *nr_zone_taken,
|
enum lru_list lru, unsigned long *nr_zone_taken)
|
||||||
unsigned long nr_taken)
|
|
||||||
{
|
{
|
||||||
int zid;
|
int zid;
|
||||||
|
|
||||||
|
@ -1392,11 +1401,11 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec,
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
__update_lru_size(lruvec, lru, zid, -nr_zone_taken[zid]);
|
__update_lru_size(lruvec, lru, zid, -nr_zone_taken[zid]);
|
||||||
|
#ifdef CONFIG_MEMCG
|
||||||
|
mem_cgroup_update_lru_size(lruvec, lru, zid, -nr_zone_taken[zid]);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_MEMCG
|
|
||||||
mem_cgroup_update_lru_size(lruvec, lru, -nr_taken);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1501,7 +1510,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
|
||||||
*nr_scanned = scan;
|
*nr_scanned = scan;
|
||||||
trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan, scan,
|
trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan, scan,
|
||||||
nr_taken, mode, is_file_lru(lru));
|
nr_taken, mode, is_file_lru(lru));
|
||||||
update_lru_sizes(lruvec, lru, nr_zone_taken, nr_taken);
|
update_lru_sizes(lruvec, lru, nr_zone_taken);
|
||||||
return nr_taken;
|
return nr_taken;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2047,10 +2056,8 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
|
||||||
if (!managed_zone(zone))
|
if (!managed_zone(zone))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
inactive_zone = zone_page_state(zone,
|
inactive_zone = lruvec_zone_lru_size(lruvec, file * LRU_FILE, zid);
|
||||||
NR_ZONE_LRU_BASE + (file * LRU_FILE));
|
active_zone = lruvec_zone_lru_size(lruvec, (file * LRU_FILE) + LRU_ACTIVE, zid);
|
||||||
active_zone = zone_page_state(zone,
|
|
||||||
NR_ZONE_LRU_BASE + (file * LRU_FILE) + LRU_ACTIVE);
|
|
||||||
|
|
||||||
inactive -= min(inactive, inactive_zone);
|
inactive -= min(inactive, inactive_zone);
|
||||||
active -= min(active, active_zone);
|
active -= min(active, active_zone);
|
||||||
|
|
|
@ -369,7 +369,7 @@ static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
|
||||||
|
|
||||||
local_irq_save(flags);
|
local_irq_save(flags);
|
||||||
nc = this_cpu_ptr(&netdev_alloc_cache);
|
nc = this_cpu_ptr(&netdev_alloc_cache);
|
||||||
data = __alloc_page_frag(nc, fragsz, gfp_mask);
|
data = page_frag_alloc(nc, fragsz, gfp_mask);
|
||||||
local_irq_restore(flags);
|
local_irq_restore(flags);
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
@ -391,7 +391,7 @@ static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
|
||||||
{
|
{
|
||||||
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
|
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
|
||||||
|
|
||||||
return __alloc_page_frag(&nc->page, fragsz, gfp_mask);
|
return page_frag_alloc(&nc->page, fragsz, gfp_mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
void *napi_alloc_frag(unsigned int fragsz)
|
void *napi_alloc_frag(unsigned int fragsz)
|
||||||
|
@ -441,7 +441,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
|
||||||
local_irq_save(flags);
|
local_irq_save(flags);
|
||||||
|
|
||||||
nc = this_cpu_ptr(&netdev_alloc_cache);
|
nc = this_cpu_ptr(&netdev_alloc_cache);
|
||||||
data = __alloc_page_frag(nc, len, gfp_mask);
|
data = page_frag_alloc(nc, len, gfp_mask);
|
||||||
pfmemalloc = nc->pfmemalloc;
|
pfmemalloc = nc->pfmemalloc;
|
||||||
|
|
||||||
local_irq_restore(flags);
|
local_irq_restore(flags);
|
||||||
|
@ -505,7 +505,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
|
||||||
if (sk_memalloc_socks())
|
if (sk_memalloc_socks())
|
||||||
gfp_mask |= __GFP_MEMALLOC;
|
gfp_mask |= __GFP_MEMALLOC;
|
||||||
|
|
||||||
data = __alloc_page_frag(&nc->page, len, gfp_mask);
|
data = page_frag_alloc(&nc->page, len, gfp_mask);
|
||||||
if (unlikely(!data))
|
if (unlikely(!data))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
|
|
@ -655,7 +655,6 @@ static const struct {
|
||||||
{ "__GFP_RECLAIM", "R" },
|
{ "__GFP_RECLAIM", "R" },
|
||||||
{ "__GFP_DIRECT_RECLAIM", "DR" },
|
{ "__GFP_DIRECT_RECLAIM", "DR" },
|
||||||
{ "__GFP_KSWAPD_RECLAIM", "KR" },
|
{ "__GFP_KSWAPD_RECLAIM", "KR" },
|
||||||
{ "__GFP_OTHER_NODE", "ON" },
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static size_t max_gfp_len;
|
static size_t max_gfp_len;
|
||||||
|
|
Loading…
Reference in New Issue