From bce73e4842390f7b7309c8e253e139db71288ac3 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 13 Jul 2018 16:58:52 -0700 Subject: [PATCH 01/11] mm: do not drop unused pages when userfaultd is running KVM guests on s390 can notify the host of unused pages. This can result in pte_unused callbacks to be true for KVM guest memory. If a page is unused (checked with pte_unused) we might drop this page instead of paging it. This can have side-effects on userfaultd, when the page in question was already migrated: The next access of that page will trigger a fault and a user fault instead of faulting in a new and empty zero page. As QEMU does not expect a userfault on an already migrated page this migration will fail. The most straightforward solution is to ignore the pte_unused hint if a userfault context is active for this VMA. Link: http://lkml.kernel.org/r/20180703171854.63981-1-borntraeger@de.ibm.com Signed-off-by: Christian Borntraeger Cc: Martin Schwidefsky Cc: Andrea Arcangeli Cc: Mike Rapoport Cc: Janosch Frank Cc: David Hildenbrand Cc: Cornelia Huck Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/rmap.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/mm/rmap.c b/mm/rmap.c index 6db729dc4c50..eb477809a5c0 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -64,6 +64,7 @@ #include #include #include +#include #include @@ -1481,11 +1482,16 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, set_pte_at(mm, address, pvmw.pte, pteval); } - } else if (pte_unused(pteval)) { + } else if (pte_unused(pteval) && !userfaultfd_armed(vma)) { /* * The guest indicated that the page content is of no * interest anymore. Simply discard the pte, vmscan * will take care of the rest. + * A future reference will then fault in a new zero + * page. When userfaultfd is active, we must not drop + * this page though, as its main user (postcopy + * migration) will not expect userfaults on already + * copied pages. */ dec_mm_counter(mm, mm_counter(page)); /* We have to invalidate as we cleared the pte */ From e70cc2bd579e8a9d6d153762f0fe294d0e652ff0 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Fri, 13 Jul 2018 16:58:56 -0700 Subject: [PATCH 02/11] fs/proc/task_mmu.c: fix Locked field in /proc/pid/smaps* Thomas reports: "While looking around in /proc on my v4.14.52 system I noticed that all processes got a lot of "Locked" memory in /proc/*/smaps. A lot more memory than a regular user can usually lock with mlock(). Commit 493b0e9d945f (in v4.14-rc1) seems to have changed the behavior of "Locked". Before that commit the code was like this. Notice the VM_LOCKED check. (vma->vm_flags & VM_LOCKED) ? (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0); After that commit Locked is now the same as Pss: (unsigned long)(mss->pss >> (10 + PSS_SHIFT))); This looks like a mistake." Indeed, the commit has added mss->pss_locked with the correct value that depends on VM_LOCKED, but forgot to actually use it. Fix it. Link: http://lkml.kernel.org/r/ebf6c7fb-fec3-6a26-544f-710ed193c154@suse.cz Fixes: 493b0e9d945f ("mm: add /proc/pid/smaps_rollup") Signed-off-by: Vlastimil Babka Reported-by: Thomas Lindroth Cc: Alexey Dobriyan Cc: Daniel Colascione Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index e9679016271f..dfd73a4616ce 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -831,7 +831,8 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) SEQ_PUT_DEC(" kB\nSwap: ", mss->swap); SEQ_PUT_DEC(" kB\nSwapPss: ", mss->swap_pss >> PSS_SHIFT); - SEQ_PUT_DEC(" kB\nLocked: ", mss->pss >> PSS_SHIFT); + SEQ_PUT_DEC(" kB\nLocked: ", + mss->pss_locked >> PSS_SHIFT); seq_puts(m, " kB\n"); } if (!rollup_mode) { From 02f51d45937f7bc7f4dee21e9f85b2d5eac37104 Mon Sep 17 00:00:00 2001 From: Tomas Bortoli Date: Fri, 13 Jul 2018 16:58:59 -0700 Subject: [PATCH 03/11] autofs: fix slab out of bounds read in getname_kernel() The autofs subsystem does not check that the "path" parameter is present for all cases where it is required when it is passed in via the "param" struct. In particular it isn't checked for the AUTOFS_DEV_IOCTL_OPENMOUNT_CMD ioctl command. To solve it, modify validate_dev_ioctl(function to check that a path has been provided for ioctl commands that require it. Link: http://lkml.kernel.org/r/153060031527.26631.18306637892746301555.stgit@pluto.themaw.net Signed-off-by: Tomas Bortoli Signed-off-by: Ian Kent Reported-by: syzbot+60c837b428dc84e83a93@syzkaller.appspotmail.com Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs/dev-ioctl.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/fs/autofs/dev-ioctl.c b/fs/autofs/dev-ioctl.c index ea4ca1445ab7..86eafda4a652 100644 --- a/fs/autofs/dev-ioctl.c +++ b/fs/autofs/dev-ioctl.c @@ -135,6 +135,15 @@ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param) cmd); goto out; } + } else { + unsigned int inr = _IOC_NR(cmd); + + if (inr == AUTOFS_DEV_IOCTL_OPENMOUNT_CMD || + inr == AUTOFS_DEV_IOCTL_REQUESTER_CMD || + inr == AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD) { + err = -EINVAL; + goto out; + } } err = 0; @@ -271,7 +280,8 @@ static int autofs_dev_ioctl_openmount(struct file *fp, dev_t devid; int err, fd; - /* param->path has already been checked */ + /* param->path has been checked in validate_dev_ioctl() */ + if (!param->openmount.devid) return -EINVAL; @@ -433,10 +443,7 @@ static int autofs_dev_ioctl_requester(struct file *fp, dev_t devid; int err = -ENOENT; - if (param->size <= AUTOFS_DEV_IOCTL_SIZE) { - err = -EINVAL; - goto out; - } + /* param->path has been checked in validate_dev_ioctl() */ devid = sbi->sb->s_dev; @@ -521,10 +528,7 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp, unsigned int devid, magic; int err = -ENOENT; - if (param->size <= AUTOFS_DEV_IOCTL_SIZE) { - err = -EINVAL; - goto out; - } + /* param->path has been checked in validate_dev_ioctl() */ name = param->path; type = param->ismountpoint.in.type; From a90744bac57c3c07d0d4422af62f3e44549ade30 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 13 Jul 2018 16:59:03 -0700 Subject: [PATCH 04/11] mm: allow arch to supply p??_free_tlb functions The mmu_gather APIs keep track of the invalidated address range including the span covered by invalidated page table pages. Ranges covered by page tables but not ptes (and therefore no TLBs) still need to be invalidated because some architectures (x86) can cache intermediate page table entries, and invalidate those with normal TLB invalidation instructions to be almost-backward-compatible. Architectures which don't cache intermediate page table entries, or which invalidate these caches separately from TLB invalidation, do not require TLB invalidation range expanded over page tables. Allow architectures to supply their own p??_free_tlb functions, which can avoid the __tlb_adjust_range. Link: http://lkml.kernel.org/r/20180703013131.2807-1-npiggin@gmail.com Signed-off-by: Nicholas Piggin Reviewed-by: Andrew Morton Cc: "Aneesh Kumar K. V" Cc: Minchan Kim Cc: Mel Gorman Cc: Nadav Amit Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/tlb.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index faddde44de8c..3063125197ad 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -265,33 +265,41 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, * For now w.r.t page table cache, mark the range_size as PAGE_SIZE */ +#ifndef pte_free_tlb #define pte_free_tlb(tlb, ptep, address) \ do { \ __tlb_adjust_range(tlb, address, PAGE_SIZE); \ __pte_free_tlb(tlb, ptep, address); \ } while (0) +#endif +#ifndef pmd_free_tlb #define pmd_free_tlb(tlb, pmdp, address) \ do { \ __tlb_adjust_range(tlb, address, PAGE_SIZE); \ __pmd_free_tlb(tlb, pmdp, address); \ } while (0) +#endif #ifndef __ARCH_HAS_4LEVEL_HACK +#ifndef pud_free_tlb #define pud_free_tlb(tlb, pudp, address) \ do { \ __tlb_adjust_range(tlb, address, PAGE_SIZE); \ __pud_free_tlb(tlb, pudp, address); \ } while (0) #endif +#endif #ifndef __ARCH_HAS_5LEVEL_HACK +#ifndef p4d_free_tlb #define p4d_free_tlb(tlb, pudp, address) \ do { \ __tlb_adjust_range(tlb, address, PAGE_SIZE); \ __p4d_free_tlb(tlb, pudp, address); \ } while (0) #endif +#endif #define tlb_migrate_finish(mm) do {} while (0) From c290fba8c4ce6530cd941ea14db5a4ac2f77183f Mon Sep 17 00:00:00 2001 From: piaojun Date: Fri, 13 Jul 2018 16:59:06 -0700 Subject: [PATCH 05/11] net/9p/client.c: put refcount of trans_mod in error case in parse_opts() In my testing, the second mount will fail after umounting successfully. The reason is that we put refcount of trans_mod in the correct case rather than the error case in parse_opts() at last. That will cause the refcount decrease to -1, and when we try to get trans_mod again in try_module_get(), we could only increase refcount to 0 which will cause failure as follows: parse_opts v9fs_get_trans_by_name try_module_get : return NULL to caller which cause error So we should put refcount of trans_mod in error case. Link: http://lkml.kernel.org/r/5B3F39A0.2030509@huawei.com Fixes: 9421c3e64137ec ("net/9p/client.c: fix potential refcnt problem of trans module") Signed-off-by: Jun Piao Reviewed-by: Yiwen Jiang Reviewed-by: Greg Kurz Reviewed-by: Dominique Martinet Tested-by: Dominique Martinet Cc: Eric Van Hensbergen Cc: Ron Minnich Cc: Latchesar Ionkov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/9p/client.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/9p/client.c b/net/9p/client.c index 18c5271910dc..5c1343195292 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -225,7 +225,8 @@ static int parse_opts(char *opts, struct p9_client *clnt) } free_and_return: - v9fs_put_trans(clnt->trans_mod); + if (ret) + v9fs_put_trans(clnt->trans_mod); kfree(tmp_options); return ret; } From fa8cbda88db12e632a8987c94b66f5caf25bcec4 Mon Sep 17 00:00:00 2001 From: Philipp Rudo Date: Fri, 13 Jul 2018 16:59:09 -0700 Subject: [PATCH 06/11] x86/purgatory: add missing FORCE to Makefile target - Build the kernel without the fix - Add some flag to the purgatories KBUILD_CFLAGS,I used -fno-asynchronous-unwind-tables - Re-build the kernel When you look at makes output you see that sha256.o is not re-build in the last step. Also readelf -S still shows the .eh_frame section for sha256.o. With the fix sha256.o is rebuilt in the last step. Without FORCE make does not detect changes only made to the command line options. So object files might not be re-built even when they should be. Fix this by adding FORCE where it is missing. Link: http://lkml.kernel.org/r/20180704110044.29279-2-prudo@linux.ibm.com Fixes: df6f2801f511 ("kernel/kexec_file.c: move purgatories sha256 to common code") Signed-off-by: Philipp Rudo Acked-by: Dave Young Cc: Ingo Molnar Cc: Thomas Gleixner Cc: [4.17+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/purgatory/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index 2e9ee023e6bc..81a8e33115ad 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -6,7 +6,7 @@ purgatory-y := purgatory.o stack.o setup-x86_$(BITS).o sha256.o entry64.o string targets += $(purgatory-y) PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) -$(obj)/sha256.o: $(srctree)/lib/sha256.c +$(obj)/sha256.o: $(srctree)/lib/sha256.c FORCE $(call if_changed_rule,cc_o_c) LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib From 24962af7e1041b7e50c1bc71d8d10dc678c556b5 Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Fri, 13 Jul 2018 16:59:13 -0700 Subject: [PATCH 07/11] fs, elf: make sure to page align bss in load_elf_library The current code does not make sure to page align bss before calling vm_brk(), and this can lead to a VM_BUG_ON() in __mm_populate() due to the requested lenght not being correctly aligned. Let us make sure to align it properly. Kees: only applicable to CONFIG_USELIB kernels: 32-bit and configured for libc5. Link: http://lkml.kernel.org/r/20180705145539.9627-1-osalvador@techadventures.net Signed-off-by: Oscar Salvador Reported-by: syzbot+5dcb560fe12aa5091c06@syzkaller.appspotmail.com Tested-by: Tetsuo Handa Acked-by: Kees Cook Cc: Michal Hocko Cc: Nicolas Pitre Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 0ac456b52bdd..816cc921cf36 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1259,9 +1259,8 @@ static int load_elf_library(struct file *file) goto out_free_ph; } - len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr + - ELF_MIN_ALIGN - 1); - bss = eppnt->p_memsz + eppnt->p_vaddr; + len = ELF_PAGEALIGN(eppnt->p_filesz + eppnt->p_vaddr); + bss = ELF_PAGEALIGN(eppnt->p_memsz + eppnt->p_vaddr); if (bss > len) { error = vm_brk(len, bss - len); if (error) From e3d301cae0092062cbcd6b4e7ceebbab9d87e263 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 13 Jul 2018 16:59:16 -0700 Subject: [PATCH 08/11] mm/memblock.c: do not complain about top-down allocations for !MEMORY_HOTREMOVE Mike Rapoport is converting architectures from bootmem to nobootmem allocator. While doing so for m68k Geert has noticed that he gets a scary looking warning: WARNING: CPU: 0 PID: 0 at mm/memblock.c:230 memblock_find_in_range_node+0x11c/0x1be memblock: bottom-up allocation failed, memory hotunplug may be affected Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 4.18.0-rc3-atari-01343-gf2fb5f2e09a97a3c-dirty #7 Call Trace: __warn+0xa8/0xc2 kernel_pg_dir+0x0/0x1000 netdev_lower_get_next+0x2/0x22 warn_slowpath_fmt+0x2e/0x36 memblock_find_in_range_node+0x11c/0x1be memblock_find_in_range_node+0x11c/0x1be memblock_find_in_range_node+0x0/0x1be vprintk_func+0x66/0x6e memblock_virt_alloc_internal+0xd0/0x156 netdev_lower_get_next+0x2/0x22 netdev_lower_get_next+0x2/0x22 kernel_pg_dir+0x0/0x1000 memblock_virt_alloc_try_nid_nopanic+0x58/0x7a netdev_lower_get_next+0x2/0x22 kernel_pg_dir+0x0/0x1000 kernel_pg_dir+0x0/0x1000 EXPTBL+0x234/0x400 EXPTBL+0x234/0x400 alloc_node_mem_map+0x4a/0x66 netdev_lower_get_next+0x2/0x22 free_area_init_node+0xe2/0x29e EXPTBL+0x234/0x400 paging_init+0x430/0x462 kernel_pg_dir+0x0/0x1000 printk+0x0/0x1a EXPTBL+0x234/0x400 setup_arch+0x1b8/0x22c start_kernel+0x4a/0x40a _sinittext+0x344/0x9e8 The warning is basically saying that a top-down allocation can break memory hotremove because memblock allocation is not movable. But m68k doesn't even support MEMORY_HOTREMOVE so there is no point to warn about it. Make the warning conditional only to configurations that care. Link: http://lkml.kernel.org/r/20180706061750.GH32658@dhcp22.suse.cz Signed-off-by: Michal Hocko Reported-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven Reviewed-by: Andrew Morton Cc: Vlastimil Babka Cc: Mike Rapoport Cc: Greg Ungerer Cc: Sam Creasey Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memblock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/memblock.c b/mm/memblock.c index 03d48d8835ba..11e46f83e1ad 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -227,7 +227,8 @@ phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size, * so we use WARN_ONCE() here to see the stack trace if * fail happens. */ - WARN_ONCE(1, "memblock: bottom-up allocation failed, memory hotunplug may be affected\n"); + WARN_ONCE(IS_ENABLED(CONFIG_MEMORY_HOTREMOVE), + "memblock: bottom-up allocation failed, memory hotremove may be affected\n"); } return __memblock_find_range_top_down(start, end, size, align, nid, From bb177a732c4369bb58a1fe1df8f552b6f0f7db5f Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 13 Jul 2018 16:59:20 -0700 Subject: [PATCH 09/11] mm: do not bug_on on incorrect length in __mm_populate() syzbot has noticed that a specially crafted library can easily hit VM_BUG_ON in __mm_populate kernel BUG at mm/gup.c:1242! invalid opcode: 0000 [#1] SMP CPU: 2 PID: 9667 Comm: a.out Not tainted 4.18.0-rc3 #644 Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 05/19/2017 RIP: 0010:__mm_populate+0x1e2/0x1f0 Code: 55 d0 65 48 33 14 25 28 00 00 00 89 d8 75 21 48 83 c4 20 5b 41 5c 41 5d 41 5e 41 5f 5d c3 e8 75 18 f1 ff 0f 0b e8 6e 18 f1 ff <0f> 0b 31 db eb c9 e8 93 06 e0 ff 0f 1f 00 55 48 89 e5 53 48 89 fb Call Trace: vm_brk_flags+0xc3/0x100 vm_brk+0x1f/0x30 load_elf_library+0x281/0x2e0 __ia32_sys_uselib+0x170/0x1e0 do_fast_syscall_32+0xca/0x420 entry_SYSENTER_compat+0x70/0x7f The reason is that the length of the new brk is not page aligned when we try to populate the it. There is no reason to bug on that though. do_brk_flags already aligns the length properly so the mapping is expanded as it should. All we need is to tell mm_populate about it. Besides that there is absolutely no reason to to bug_on in the first place. The worst thing that could happen is that the last page wouldn't get populated and that is far from putting system into an inconsistent state. Fix the issue by moving the length sanitization code from do_brk_flags up to vm_brk_flags. The only other caller of do_brk_flags is brk syscall entry and it makes sure to provide the proper length so t here is no need for sanitation and so we can use do_brk_flags without it. Also remove the bogus BUG_ONs. [osalvador@techadventures.net: fix up vm_brk_flags s@request@len@] Link: http://lkml.kernel.org/r/20180706090217.GI32658@dhcp22.suse.cz Signed-off-by: Michal Hocko Reported-by: syzbot Tested-by: Tetsuo Handa Reviewed-by: Oscar Salvador Cc: Zi Yan Cc: "Aneesh Kumar K.V" Cc: Dan Williams Cc: "Kirill A. Shutemov" Cc: Michael S. Tsirkin Cc: Al Viro Cc: "Huang, Ying" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/gup.c | 2 -- mm/mmap.c | 29 ++++++++++++----------------- 2 files changed, 12 insertions(+), 19 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index b70d7ba7cc13..fc5f98069f4e 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1238,8 +1238,6 @@ int __mm_populate(unsigned long start, unsigned long len, int ignore_errors) int locked = 0; long ret = 0; - VM_BUG_ON(start & ~PAGE_MASK); - VM_BUG_ON(len != PAGE_ALIGN(len)); end = start + len; for (nstart = start; nstart < end; nstart = nend) { diff --git a/mm/mmap.c b/mm/mmap.c index d1eb87ef4b1a..5801b5f0a634 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -186,8 +186,8 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) return next; } -static int do_brk(unsigned long addr, unsigned long len, struct list_head *uf); - +static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags, + struct list_head *uf); SYSCALL_DEFINE1(brk, unsigned long, brk) { unsigned long retval; @@ -245,7 +245,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) goto out; /* Ok, looks good - let it rip. */ - if (do_brk(oldbrk, newbrk-oldbrk, &uf) < 0) + if (do_brk_flags(oldbrk, newbrk-oldbrk, 0, &uf) < 0) goto out; set_brk: @@ -2929,21 +2929,14 @@ static inline void verify_mm_writelocked(struct mm_struct *mm) * anonymous maps. eventually we may be able to do some * brk-specific accounting here. */ -static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags, struct list_head *uf) +static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long flags, struct list_head *uf) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma, *prev; - unsigned long len; struct rb_node **rb_link, *rb_parent; pgoff_t pgoff = addr >> PAGE_SHIFT; int error; - len = PAGE_ALIGN(request); - if (len < request) - return -ENOMEM; - if (!len) - return 0; - /* Until we need other flags, refuse anything except VM_EXEC. */ if ((flags & (~VM_EXEC)) != 0) return -EINVAL; @@ -3015,18 +3008,20 @@ out: return 0; } -static int do_brk(unsigned long addr, unsigned long len, struct list_head *uf) -{ - return do_brk_flags(addr, len, 0, uf); -} - -int vm_brk_flags(unsigned long addr, unsigned long len, unsigned long flags) +int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags) { struct mm_struct *mm = current->mm; + unsigned long len; int ret; bool populate; LIST_HEAD(uf); + len = PAGE_ALIGN(request); + if (len < request) + return -ENOMEM; + if (!len) + return 0; + if (down_write_killable(&mm->mmap_sem)) return -EINTR; From ffe075132af8b7967089c361e506d4fa747efd14 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 13 Jul 2018 16:59:23 -0700 Subject: [PATCH 10/11] checkpatch: fix duplicate invalid vsprintf pointer extension '%p' messages Multiline statements with invalid %p uses produce multiple warnings. Fix that. e.g.: $ cat t_block.c void foo(void) { MY_DEBUG(drv->foo, "%pk", foo->boo); } $ ./scripts/checkpatch.pl -f t_block.c WARNING: Missing or malformed SPDX-License-Identifier tag in line 1 #1: FILE: t_block.c:1: +void foo(void) WARNING: Invalid vsprintf pointer extension '%pk' #3: FILE: t_block.c:3: + MY_DEBUG(drv->foo, + "%pk", + foo->boo); WARNING: Invalid vsprintf pointer extension '%pk' #3: FILE: t_block.c:3: + MY_DEBUG(drv->foo, + "%pk", + foo->boo); total: 0 errors, 3 warnings, 6 lines checked NOTE: For some of the reported defects, checkpatch may be able to mechanically convert to the typical style using --fix or --fix-inplace. t_block.c has style problems, please review. NOTE: If any of the errors are false positives, please report them to the maintainer, see CHECKPATCH in MAINTAINERS. Link: http://lkml.kernel.org/r/9e8341bbe4c9877d159cb512bb701043cbfbb10b.camel@perches.com Signed-off-by: Joe Perches Cc: "Tobin C. Harding" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index a9c05506e325..447857ffaf6b 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -5813,14 +5813,14 @@ sub process { defined $stat && $stat =~ /^\+(?![^\{]*\{\s*).*\b(\w+)\s*\(.*$String\s*,/s && $1 !~ /^_*volatile_*$/) { - my $specifier; - my $extension; - my $bad_specifier = ""; my $stat_real; my $lc = $stat =~ tr@\n@@; $lc = $lc + $linenr; for (my $count = $linenr; $count <= $lc; $count++) { + my $specifier; + my $extension; + my $bad_specifier = ""; my $fmt = get_quoted_string($lines[$count - 1], raw_line($count, 0)); $fmt =~ s/%%//g; From fe10e398e860955bac4d28ec031b701d358465e4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 13 Jul 2018 16:59:27 -0700 Subject: [PATCH 11/11] reiserfs: fix buffer overflow with long warning messages ReiserFS prepares log messages into a 1024-byte buffer with no bounds checks. Long messages, such as the "unknown mount option" warning when userspace passes a crafted mount options string, overflow this buffer. This causes KASAN to report a global-out-of-bounds write. Fix it by truncating messages to the buffer size. Link: http://lkml.kernel.org/r/20180707203621.30922-1-ebiggers3@gmail.com Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+b890b3335a4d8c608963@syzkaller.appspotmail.com Signed-off-by: Eric Biggers Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/reiserfs/prints.c | 141 +++++++++++++++++++++++++------------------ 1 file changed, 81 insertions(+), 60 deletions(-) diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c index 7e288d97adcb..9fed1c05f1f4 100644 --- a/fs/reiserfs/prints.c +++ b/fs/reiserfs/prints.c @@ -76,83 +76,99 @@ static char *le_type(struct reiserfs_key *key) } /* %k */ -static void sprintf_le_key(char *buf, struct reiserfs_key *key) +static int scnprintf_le_key(char *buf, size_t size, struct reiserfs_key *key) { if (key) - sprintf(buf, "[%d %d %s %s]", le32_to_cpu(key->k_dir_id), - le32_to_cpu(key->k_objectid), le_offset(key), - le_type(key)); + return scnprintf(buf, size, "[%d %d %s %s]", + le32_to_cpu(key->k_dir_id), + le32_to_cpu(key->k_objectid), le_offset(key), + le_type(key)); else - sprintf(buf, "[NULL]"); + return scnprintf(buf, size, "[NULL]"); } /* %K */ -static void sprintf_cpu_key(char *buf, struct cpu_key *key) +static int scnprintf_cpu_key(char *buf, size_t size, struct cpu_key *key) { if (key) - sprintf(buf, "[%d %d %s %s]", key->on_disk_key.k_dir_id, - key->on_disk_key.k_objectid, reiserfs_cpu_offset(key), - cpu_type(key)); + return scnprintf(buf, size, "[%d %d %s %s]", + key->on_disk_key.k_dir_id, + key->on_disk_key.k_objectid, + reiserfs_cpu_offset(key), cpu_type(key)); else - sprintf(buf, "[NULL]"); + return scnprintf(buf, size, "[NULL]"); } -static void sprintf_de_head(char *buf, struct reiserfs_de_head *deh) +static int scnprintf_de_head(char *buf, size_t size, + struct reiserfs_de_head *deh) { if (deh) - sprintf(buf, - "[offset=%d dir_id=%d objectid=%d location=%d state=%04x]", - deh_offset(deh), deh_dir_id(deh), deh_objectid(deh), - deh_location(deh), deh_state(deh)); + return scnprintf(buf, size, + "[offset=%d dir_id=%d objectid=%d location=%d state=%04x]", + deh_offset(deh), deh_dir_id(deh), + deh_objectid(deh), deh_location(deh), + deh_state(deh)); else - sprintf(buf, "[NULL]"); + return scnprintf(buf, size, "[NULL]"); } -static void sprintf_item_head(char *buf, struct item_head *ih) +static int scnprintf_item_head(char *buf, size_t size, struct item_head *ih) { if (ih) { - strcpy(buf, - (ih_version(ih) == KEY_FORMAT_3_6) ? "*3.6* " : "*3.5*"); - sprintf_le_key(buf + strlen(buf), &(ih->ih_key)); - sprintf(buf + strlen(buf), ", item_len %d, item_location %d, " - "free_space(entry_count) %d", - ih_item_len(ih), ih_location(ih), ih_free_space(ih)); + char *p = buf; + char * const end = buf + size; + + p += scnprintf(p, end - p, "%s", + (ih_version(ih) == KEY_FORMAT_3_6) ? + "*3.6* " : "*3.5*"); + + p += scnprintf_le_key(p, end - p, &ih->ih_key); + + p += scnprintf(p, end - p, + ", item_len %d, item_location %d, free_space(entry_count) %d", + ih_item_len(ih), ih_location(ih), + ih_free_space(ih)); + return p - buf; } else - sprintf(buf, "[NULL]"); + return scnprintf(buf, size, "[NULL]"); } -static void sprintf_direntry(char *buf, struct reiserfs_dir_entry *de) +static int scnprintf_direntry(char *buf, size_t size, + struct reiserfs_dir_entry *de) { char name[20]; memcpy(name, de->de_name, de->de_namelen > 19 ? 19 : de->de_namelen); name[de->de_namelen > 19 ? 19 : de->de_namelen] = 0; - sprintf(buf, "\"%s\"==>[%d %d]", name, de->de_dir_id, de->de_objectid); + return scnprintf(buf, size, "\"%s\"==>[%d %d]", + name, de->de_dir_id, de->de_objectid); } -static void sprintf_block_head(char *buf, struct buffer_head *bh) +static int scnprintf_block_head(char *buf, size_t size, struct buffer_head *bh) { - sprintf(buf, "level=%d, nr_items=%d, free_space=%d rdkey ", - B_LEVEL(bh), B_NR_ITEMS(bh), B_FREE_SPACE(bh)); + return scnprintf(buf, size, + "level=%d, nr_items=%d, free_space=%d rdkey ", + B_LEVEL(bh), B_NR_ITEMS(bh), B_FREE_SPACE(bh)); } -static void sprintf_buffer_head(char *buf, struct buffer_head *bh) +static int scnprintf_buffer_head(char *buf, size_t size, struct buffer_head *bh) { - sprintf(buf, - "dev %pg, size %zd, blocknr %llu, count %d, state 0x%lx, page %p, (%s, %s, %s)", - bh->b_bdev, bh->b_size, - (unsigned long long)bh->b_blocknr, atomic_read(&(bh->b_count)), - bh->b_state, bh->b_page, - buffer_uptodate(bh) ? "UPTODATE" : "!UPTODATE", - buffer_dirty(bh) ? "DIRTY" : "CLEAN", - buffer_locked(bh) ? "LOCKED" : "UNLOCKED"); + return scnprintf(buf, size, + "dev %pg, size %zd, blocknr %llu, count %d, state 0x%lx, page %p, (%s, %s, %s)", + bh->b_bdev, bh->b_size, + (unsigned long long)bh->b_blocknr, + atomic_read(&(bh->b_count)), + bh->b_state, bh->b_page, + buffer_uptodate(bh) ? "UPTODATE" : "!UPTODATE", + buffer_dirty(bh) ? "DIRTY" : "CLEAN", + buffer_locked(bh) ? "LOCKED" : "UNLOCKED"); } -static void sprintf_disk_child(char *buf, struct disk_child *dc) +static int scnprintf_disk_child(char *buf, size_t size, struct disk_child *dc) { - sprintf(buf, "[dc_number=%d, dc_size=%u]", dc_block_number(dc), - dc_size(dc)); + return scnprintf(buf, size, "[dc_number=%d, dc_size=%u]", + dc_block_number(dc), dc_size(dc)); } static char *is_there_reiserfs_struct(char *fmt, int *what) @@ -189,55 +205,60 @@ static void prepare_error_buf(const char *fmt, va_list args) char *fmt1 = fmt_buf; char *k; char *p = error_buf; + char * const end = &error_buf[sizeof(error_buf)]; int what; spin_lock(&error_lock); - strcpy(fmt1, fmt); + if (WARN_ON(strscpy(fmt_buf, fmt, sizeof(fmt_buf)) < 0)) { + strscpy(error_buf, "format string too long", end - error_buf); + goto out_unlock; + } while ((k = is_there_reiserfs_struct(fmt1, &what)) != NULL) { *k = 0; - p += vsprintf(p, fmt1, args); + p += vscnprintf(p, end - p, fmt1, args); switch (what) { case 'k': - sprintf_le_key(p, va_arg(args, struct reiserfs_key *)); + p += scnprintf_le_key(p, end - p, + va_arg(args, struct reiserfs_key *)); break; case 'K': - sprintf_cpu_key(p, va_arg(args, struct cpu_key *)); + p += scnprintf_cpu_key(p, end - p, + va_arg(args, struct cpu_key *)); break; case 'h': - sprintf_item_head(p, va_arg(args, struct item_head *)); + p += scnprintf_item_head(p, end - p, + va_arg(args, struct item_head *)); break; case 't': - sprintf_direntry(p, - va_arg(args, - struct reiserfs_dir_entry *)); + p += scnprintf_direntry(p, end - p, + va_arg(args, struct reiserfs_dir_entry *)); break; case 'y': - sprintf_disk_child(p, - va_arg(args, struct disk_child *)); + p += scnprintf_disk_child(p, end - p, + va_arg(args, struct disk_child *)); break; case 'z': - sprintf_block_head(p, - va_arg(args, struct buffer_head *)); + p += scnprintf_block_head(p, end - p, + va_arg(args, struct buffer_head *)); break; case 'b': - sprintf_buffer_head(p, - va_arg(args, struct buffer_head *)); + p += scnprintf_buffer_head(p, end - p, + va_arg(args, struct buffer_head *)); break; case 'a': - sprintf_de_head(p, - va_arg(args, - struct reiserfs_de_head *)); + p += scnprintf_de_head(p, end - p, + va_arg(args, struct reiserfs_de_head *)); break; } - p += strlen(p); fmt1 = k + 2; } - vsprintf(p, fmt1, args); + p += vscnprintf(p, end - p, fmt1, args); +out_unlock: spin_unlock(&error_lock); }