diff --git a/drivers/base/memory.c b/drivers/base/memory.c index b9f474c11393..4086718f6876 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -97,30 +97,13 @@ static ssize_t phys_index_show(struct device *dev, } /* - * Show whether the memory block is likely to be offlineable (or is already - * offline). Once offline, the memory block could be removed. The return - * value does, however, not indicate that there is a way to remove the - * memory block. + * Legacy interface that we cannot remove. Always indicate "removable" + * with CONFIG_MEMORY_HOTREMOVE - bad heuristic. */ static ssize_t removable_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct memory_block *mem = to_memory_block(dev); - unsigned long pfn; - int ret = 1, i; - - if (mem->state != MEM_ONLINE) - goto out; - - for (i = 0; i < sections_per_block; i++) { - if (!present_section_nr(mem->start_section_nr + i)) - continue; - pfn = section_nr_to_pfn(mem->start_section_nr + i); - ret &= is_mem_section_removable(pfn, PAGES_PER_SECTION); - } - -out: - return sprintf(buf, "%d\n", ret); + return sprintf(buf, "%d\n", (int)IS_ENABLED(CONFIG_MEMORY_HOTREMOVE)); } /* diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index a7a0a1a5c8d5..e9ba01336d4e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -695,6 +695,7 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val); void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val); +void mod_memcg_obj_state(void *p, int idx, int val); static inline void mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val) @@ -1123,6 +1124,10 @@ static inline void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, __mod_node_page_state(page_pgdat(page), idx, val); } +static inline void mod_memcg_obj_state(void *p, int idx, int val) +{ +} + static inline unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, gfp_t gfp_mask, @@ -1427,6 +1432,8 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg) return memcg ? memcg->kmemcg_id : -1; } +struct mem_cgroup *mem_cgroup_from_obj(void *p); + #else static inline int memcg_kmem_charge(struct page *page, gfp_t gfp, int order) @@ -1468,6 +1475,11 @@ static inline void memcg_put_cache_ids(void) { } +static inline struct mem_cgroup *mem_cgroup_from_obj(void *p) +{ + return NULL; +} + #endif /* CONFIG_MEMCG_KMEM */ #endif /* _LINUX_MEMCONTROL_H */ diff --git a/kernel/fork.c b/kernel/fork.c index 86425305cd4a..d90af13431c7 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -397,8 +397,8 @@ static void account_kernel_stack(struct task_struct *tsk, int account) mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB, THREAD_SIZE / 1024 * account); - mod_memcg_page_state(first_page, MEMCG_KERNEL_STACK_KB, - account * (THREAD_SIZE / 1024)); + mod_memcg_obj_state(stack, MEMCG_KERNEL_STACK_KB, + account * (THREAD_SIZE / 1024)); } } diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index e434b05416c6..5280bcf459af 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -240,8 +240,7 @@ again: if (!page_counter_try_charge(&h_cg->hugepage[idx], nr_pages, &counter)) { ret = -ENOMEM; - hugetlb_event(hugetlb_cgroup_from_counter(counter, idx), idx, - HUGETLB_MAX); + hugetlb_event(h_cg, idx, HUGETLB_MAX); } css_put(&h_cg->css); done: diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7a4bd8b9adc2..7ddf91c4295f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -777,6 +777,17 @@ void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val) rcu_read_unlock(); } +void mod_memcg_obj_state(void *p, int idx, int val) +{ + struct mem_cgroup *memcg; + + rcu_read_lock(); + memcg = mem_cgroup_from_obj(p); + if (memcg) + mod_memcg_state(memcg, idx, val); + rcu_read_unlock(); +} + /** * __count_memcg_events - account VM events in a cgroup * @memcg: the memory cgroup @@ -2661,6 +2672,33 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg, } #ifdef CONFIG_MEMCG_KMEM +/* + * Returns a pointer to the memory cgroup to which the kernel object is charged. + * + * The caller must ensure the memcg lifetime, e.g. by taking rcu_read_lock(), + * cgroup_mutex, etc. + */ +struct mem_cgroup *mem_cgroup_from_obj(void *p) +{ + struct page *page; + + if (mem_cgroup_disabled()) + return NULL; + + page = virt_to_head_page(p); + + /* + * Slab pages don't have page->mem_cgroup set because corresponding + * kmem caches can be reparented during the lifetime. That's why + * memcg_from_slab_page() should be used instead. + */ + if (PageSlab(page)) + return memcg_from_slab_page(page); + + /* All other pages use page->mem_cgroup */ + return page->mem_cgroup; +} + static int memcg_alloc_cache_id(void) { int id, size; diff --git a/mm/sparse.c b/mm/sparse.c index aadb7298dcef..65599e8bd636 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -781,6 +781,12 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages, ms->usage = NULL; } memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr); + /* + * Mark the section invalid so that valid_section() + * return false. This prevents code from dereferencing + * ms->usage array. + */ + ms->section_mem_map &= ~SECTION_HAS_MEM_MAP; } if (section_is_early && memmap) diff --git a/mm/swapfile.c b/mm/swapfile.c index b2a2e45c9a36..be33e6176cd9 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2899,10 +2899,6 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode) p->bdev = inode->i_sb->s_bdev; } - inode_lock(inode); - if (IS_SWAPFILE(inode)) - return -EBUSY; - return 0; } @@ -3157,36 +3153,41 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) mapping = swap_file->f_mapping; inode = mapping->host; - /* will take i_rwsem; */ error = claim_swapfile(p, inode); if (unlikely(error)) goto bad_swap; + inode_lock(inode); + if (IS_SWAPFILE(inode)) { + error = -EBUSY; + goto bad_swap_unlock_inode; + } + /* * Read the swap header. */ if (!mapping->a_ops->readpage) { error = -EINVAL; - goto bad_swap; + goto bad_swap_unlock_inode; } page = read_mapping_page(mapping, 0, swap_file); if (IS_ERR(page)) { error = PTR_ERR(page); - goto bad_swap; + goto bad_swap_unlock_inode; } swap_header = kmap(page); maxpages = read_swap_header(p, swap_header, inode); if (unlikely(!maxpages)) { error = -EINVAL; - goto bad_swap; + goto bad_swap_unlock_inode; } /* OK, set up the swap map and apply the bad block list */ swap_map = vzalloc(maxpages); if (!swap_map) { error = -ENOMEM; - goto bad_swap; + goto bad_swap_unlock_inode; } if (bdi_cap_stable_pages_required(inode_to_bdi(inode))) @@ -3211,7 +3212,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) GFP_KERNEL); if (!cluster_info) { error = -ENOMEM; - goto bad_swap; + goto bad_swap_unlock_inode; } for (ci = 0; ci < nr_cluster; ci++) @@ -3220,7 +3221,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) p->percpu_cluster = alloc_percpu(struct percpu_cluster); if (!p->percpu_cluster) { error = -ENOMEM; - goto bad_swap; + goto bad_swap_unlock_inode; } for_each_possible_cpu(cpu) { struct percpu_cluster *cluster; @@ -3234,13 +3235,13 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) error = swap_cgroup_swapon(p->type, maxpages); if (error) - goto bad_swap; + goto bad_swap_unlock_inode; nr_extents = setup_swap_map_and_extents(p, swap_header, swap_map, cluster_info, maxpages, &span); if (unlikely(nr_extents < 0)) { error = nr_extents; - goto bad_swap; + goto bad_swap_unlock_inode; } /* frontswap enabled? set up bit-per-page map for frontswap */ if (IS_ENABLED(CONFIG_FRONTSWAP)) @@ -3280,7 +3281,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) error = init_swap_address_space(p->type, maxpages); if (error) - goto bad_swap; + goto bad_swap_unlock_inode; /* * Flush any pending IO and dirty mappings before we start using this @@ -3290,7 +3291,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) error = inode_drain_writes(inode); if (error) { inode->i_flags &= ~S_SWAPFILE; - goto bad_swap; + goto bad_swap_unlock_inode; } mutex_lock(&swapon_mutex); @@ -3315,6 +3316,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) error = 0; goto out; +bad_swap_unlock_inode: + inode_unlock(inode); bad_swap: free_percpu(p->percpu_cluster); p->percpu_cluster = NULL; @@ -3322,6 +3325,7 @@ bad_swap: set_blocksize(p->bdev, p->old_block_size); blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); } + inode = NULL; destroy_swap_extents(p); swap_cgroup_swapoff(p->type); spin_lock(&swap_lock); @@ -3333,13 +3337,8 @@ bad_swap: kvfree(frontswap_map); if (inced_nr_rotate_swap) atomic_dec(&nr_rotate_swap); - if (swap_file) { - if (inode) { - inode_unlock(inode); - inode = NULL; - } + if (swap_file) filp_close(swap_file, NULL); - } out: if (page && !IS_ERR(page)) { kunmap(page);