diff --git a/arch/Kconfig b/arch/Kconfig index d007b2a15b22..467dfa35bf96 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -245,6 +245,17 @@ config ARCH_TASK_STRUCT_ON_STACK config ARCH_TASK_STRUCT_ALLOCATOR bool +config HAVE_ARCH_THREAD_STRUCT_WHITELIST + bool + depends on !ARCH_TASK_STRUCT_ALLOCATOR + help + An architecture should select this to provide hardened usercopy + knowledge about what region of the thread_struct should be + whitelisted for copying to userspace. Normally this is only the + FPU registers. Specifically, arch_thread_struct_whitelist() + should be implemented. Without this, the entire thread_struct + field in task_struct will be left whitelisted. + # Select if arch has its private alloc_thread_stack() function config ARCH_THREAD_STACK_ALLOCATOR bool diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 4daddc53e7bc..7e3d53575486 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -51,6 +51,7 @@ config ARM select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT) + select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_TRACEHOOK select HAVE_ARM_SMCCC if CPU_V7 select HAVE_EBPF_JIT if !CPU_ENDIAN_BE32 diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h index 338cbe0a18ef..1bf65b47808a 100644 --- a/arch/arm/include/asm/processor.h +++ b/arch/arm/include/asm/processor.h @@ -45,6 +45,16 @@ struct thread_struct { struct debug_info debug; }; +/* + * Everything usercopied to/from thread_struct is statically-sized, so + * no hardened usercopy whitelist is needed. + */ +static inline void arch_thread_struct_whitelist(unsigned long *offset, + unsigned long *size) +{ + *offset = *size = 0; +} + #define INIT_THREAD { } #define start_thread(regs,pc,sp) \ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b2b95f79c746..53612879fe56 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -91,6 +91,7 @@ config ARM64 select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ARCH_VMAP_STACK diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index cee4ae25a5d1..6db43ebd648d 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -113,6 +113,16 @@ struct thread_struct { struct debug_info debug; /* debugging */ }; +/* + * Everything usercopied to/from thread_struct is statically-sized, so + * no hardened usercopy whitelist is needed. + */ +static inline void arch_thread_struct_whitelist(unsigned long *offset, + unsigned long *size) +{ + *offset = *size = 0; +} + #ifdef CONFIG_COMPAT #define task_user_tls(t) \ ({ \ diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 72d5149bcaa1..b0771ceabb4b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -116,6 +116,7 @@ config X86 select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT select HAVE_ARCH_COMPAT_MMAP_BASES if MMU && COMPAT select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64 diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index efbde088a718..6207f2f740b6 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -507,6 +507,14 @@ struct thread_struct { */ }; +/* Whitelist the FPU state from the task_struct for hardened usercopy. */ +static inline void arch_thread_struct_whitelist(unsigned long *offset, + unsigned long *size) +{ + *offset = offsetof(struct thread_struct, fpu.state); + *size = fpu_kernel_xstate_size; +} + /* * Thread-synchronous status. * diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c53298dfbf50..5040ff9de6ab 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4237,13 +4237,14 @@ set_identity_unlock: mutex_unlock(&kvm->lock); break; case KVM_XEN_HVM_CONFIG: { + struct kvm_xen_hvm_config xhc; r = -EFAULT; - if (copy_from_user(&kvm->arch.xen_hvm_config, argp, - sizeof(struct kvm_xen_hvm_config))) + if (copy_from_user(&xhc, argp, sizeof(xhc))) goto out; r = -EINVAL; - if (kvm->arch.xen_hvm_config.flags) + if (xhc.flags) goto out; + memcpy(&kvm->arch.xen_hvm_config, &xhc, sizeof(xhc)); r = 0; break; } diff --git a/drivers/misc/lkdtm.h b/drivers/misc/lkdtm.h index 687a0dbbe199..9e513dcfd809 100644 --- a/drivers/misc/lkdtm.h +++ b/drivers/misc/lkdtm.h @@ -76,8 +76,8 @@ void __init lkdtm_usercopy_init(void); void __exit lkdtm_usercopy_exit(void); void lkdtm_USERCOPY_HEAP_SIZE_TO(void); void lkdtm_USERCOPY_HEAP_SIZE_FROM(void); -void lkdtm_USERCOPY_HEAP_FLAG_TO(void); -void lkdtm_USERCOPY_HEAP_FLAG_FROM(void); +void lkdtm_USERCOPY_HEAP_WHITELIST_TO(void); +void lkdtm_USERCOPY_HEAP_WHITELIST_FROM(void); void lkdtm_USERCOPY_STACK_FRAME_TO(void); void lkdtm_USERCOPY_STACK_FRAME_FROM(void); void lkdtm_USERCOPY_STACK_BEYOND(void); diff --git a/drivers/misc/lkdtm_core.c b/drivers/misc/lkdtm_core.c index 4942da93d066..2154d1bfd18b 100644 --- a/drivers/misc/lkdtm_core.c +++ b/drivers/misc/lkdtm_core.c @@ -177,8 +177,8 @@ static const struct crashtype crashtypes[] = { CRASHTYPE(ATOMIC_TIMING), CRASHTYPE(USERCOPY_HEAP_SIZE_TO), CRASHTYPE(USERCOPY_HEAP_SIZE_FROM), - CRASHTYPE(USERCOPY_HEAP_FLAG_TO), - CRASHTYPE(USERCOPY_HEAP_FLAG_FROM), + CRASHTYPE(USERCOPY_HEAP_WHITELIST_TO), + CRASHTYPE(USERCOPY_HEAP_WHITELIST_FROM), CRASHTYPE(USERCOPY_STACK_FRAME_TO), CRASHTYPE(USERCOPY_STACK_FRAME_FROM), CRASHTYPE(USERCOPY_STACK_BEYOND), diff --git a/drivers/misc/lkdtm_usercopy.c b/drivers/misc/lkdtm_usercopy.c index a64372cc148d..9725aed305bb 100644 --- a/drivers/misc/lkdtm_usercopy.c +++ b/drivers/misc/lkdtm_usercopy.c @@ -20,7 +20,7 @@ */ static volatile size_t unconst = 0; static volatile size_t cache_size = 1024; -static struct kmem_cache *bad_cache; +static struct kmem_cache *whitelist_cache; static const unsigned char test_text[] = "This is a test.\n"; @@ -115,10 +115,16 @@ free_user: vm_munmap(user_addr, PAGE_SIZE); } +/* + * This checks for whole-object size validation with hardened usercopy, + * with or without usercopy whitelisting. + */ static void do_usercopy_heap_size(bool to_user) { unsigned long user_addr; unsigned char *one, *two; + void __user *test_user_addr; + void *test_kern_addr; size_t size = unconst + 1024; one = kmalloc(size, GFP_KERNEL); @@ -139,27 +145,30 @@ static void do_usercopy_heap_size(bool to_user) memset(one, 'A', size); memset(two, 'B', size); + test_user_addr = (void __user *)(user_addr + 16); + test_kern_addr = one + 16; + if (to_user) { pr_info("attempting good copy_to_user of correct size\n"); - if (copy_to_user((void __user *)user_addr, one, size)) { + if (copy_to_user(test_user_addr, test_kern_addr, size / 2)) { pr_warn("copy_to_user failed unexpectedly?!\n"); goto free_user; } pr_info("attempting bad copy_to_user of too large size\n"); - if (copy_to_user((void __user *)user_addr, one, 2 * size)) { + if (copy_to_user(test_user_addr, test_kern_addr, size)) { pr_warn("copy_to_user failed, but lacked Oops\n"); goto free_user; } } else { pr_info("attempting good copy_from_user of correct size\n"); - if (copy_from_user(one, (void __user *)user_addr, size)) { + if (copy_from_user(test_kern_addr, test_user_addr, size / 2)) { pr_warn("copy_from_user failed unexpectedly?!\n"); goto free_user; } pr_info("attempting bad copy_from_user of too large size\n"); - if (copy_from_user(one, (void __user *)user_addr, 2 * size)) { + if (copy_from_user(test_kern_addr, test_user_addr, size)) { pr_warn("copy_from_user failed, but lacked Oops\n"); goto free_user; } @@ -172,77 +181,79 @@ free_kernel: kfree(two); } -static void do_usercopy_heap_flag(bool to_user) +/* + * This checks for the specific whitelist window within an object. If this + * test passes, then do_usercopy_heap_size() tests will pass too. + */ +static void do_usercopy_heap_whitelist(bool to_user) { - unsigned long user_addr; - unsigned char *good_buf = NULL; - unsigned char *bad_buf = NULL; + unsigned long user_alloc; + unsigned char *buf = NULL; + unsigned char __user *user_addr; + size_t offset, size; /* Make sure cache was prepared. */ - if (!bad_cache) { + if (!whitelist_cache) { pr_warn("Failed to allocate kernel cache\n"); return; } /* - * Allocate one buffer from each cache (kmalloc will have the - * SLAB_USERCOPY flag already, but "bad_cache" won't). + * Allocate a buffer with a whitelisted window in the buffer. */ - good_buf = kmalloc(cache_size, GFP_KERNEL); - bad_buf = kmem_cache_alloc(bad_cache, GFP_KERNEL); - if (!good_buf || !bad_buf) { - pr_warn("Failed to allocate buffers from caches\n"); + buf = kmem_cache_alloc(whitelist_cache, GFP_KERNEL); + if (!buf) { + pr_warn("Failed to allocate buffer from whitelist cache\n"); goto free_alloc; } /* Allocate user memory we'll poke at. */ - user_addr = vm_mmap(NULL, 0, PAGE_SIZE, + user_alloc = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANONYMOUS | MAP_PRIVATE, 0); - if (user_addr >= TASK_SIZE) { + if (user_alloc >= TASK_SIZE) { pr_warn("Failed to allocate user memory\n"); goto free_alloc; } + user_addr = (void __user *)user_alloc; - memset(good_buf, 'A', cache_size); - memset(bad_buf, 'B', cache_size); + memset(buf, 'B', cache_size); + + /* Whitelisted window in buffer, from kmem_cache_create_usercopy. */ + offset = (cache_size / 4) + unconst; + size = (cache_size / 16) + unconst; if (to_user) { - pr_info("attempting good copy_to_user with SLAB_USERCOPY\n"); - if (copy_to_user((void __user *)user_addr, good_buf, - cache_size)) { + pr_info("attempting good copy_to_user inside whitelist\n"); + if (copy_to_user(user_addr, buf + offset, size)) { pr_warn("copy_to_user failed unexpectedly?!\n"); goto free_user; } - pr_info("attempting bad copy_to_user w/o SLAB_USERCOPY\n"); - if (copy_to_user((void __user *)user_addr, bad_buf, - cache_size)) { + pr_info("attempting bad copy_to_user outside whitelist\n"); + if (copy_to_user(user_addr, buf + offset - 1, size)) { pr_warn("copy_to_user failed, but lacked Oops\n"); goto free_user; } } else { - pr_info("attempting good copy_from_user with SLAB_USERCOPY\n"); - if (copy_from_user(good_buf, (void __user *)user_addr, - cache_size)) { + pr_info("attempting good copy_from_user inside whitelist\n"); + if (copy_from_user(buf + offset, user_addr, size)) { pr_warn("copy_from_user failed unexpectedly?!\n"); goto free_user; } - pr_info("attempting bad copy_from_user w/o SLAB_USERCOPY\n"); - if (copy_from_user(bad_buf, (void __user *)user_addr, - cache_size)) { + pr_info("attempting bad copy_from_user outside whitelist\n"); + if (copy_from_user(buf + offset - 1, user_addr, size)) { pr_warn("copy_from_user failed, but lacked Oops\n"); goto free_user; } } free_user: - vm_munmap(user_addr, PAGE_SIZE); + vm_munmap(user_alloc, PAGE_SIZE); free_alloc: - if (bad_buf) - kmem_cache_free(bad_cache, bad_buf); - kfree(good_buf); + if (buf) + kmem_cache_free(whitelist_cache, buf); } /* Callable tests. */ @@ -256,14 +267,14 @@ void lkdtm_USERCOPY_HEAP_SIZE_FROM(void) do_usercopy_heap_size(false); } -void lkdtm_USERCOPY_HEAP_FLAG_TO(void) +void lkdtm_USERCOPY_HEAP_WHITELIST_TO(void) { - do_usercopy_heap_flag(true); + do_usercopy_heap_whitelist(true); } -void lkdtm_USERCOPY_HEAP_FLAG_FROM(void) +void lkdtm_USERCOPY_HEAP_WHITELIST_FROM(void) { - do_usercopy_heap_flag(false); + do_usercopy_heap_whitelist(false); } void lkdtm_USERCOPY_STACK_FRAME_TO(void) @@ -314,11 +325,15 @@ free_user: void __init lkdtm_usercopy_init(void) { /* Prepare cache that lacks SLAB_USERCOPY flag. */ - bad_cache = kmem_cache_create("lkdtm-no-usercopy", cache_size, 0, - 0, NULL); + whitelist_cache = + kmem_cache_create_usercopy("lkdtm-usercopy", cache_size, + 0, 0, + cache_size / 4, + cache_size / 16, + NULL); } void __exit lkdtm_usercopy_exit(void) { - kmem_cache_destroy(bad_cache); + kmem_cache_destroy(whitelist_cache); } diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 976c936029cb..8efe4731ed89 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -79,14 +79,15 @@ int scsi_init_sense_cache(struct Scsi_Host *shost) if (shost->unchecked_isa_dma) { scsi_sense_isadma_cache = kmem_cache_create("scsi_sense_cache(DMA)", - SCSI_SENSE_BUFFERSIZE, 0, - SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA, NULL); + SCSI_SENSE_BUFFERSIZE, 0, + SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA, NULL); if (!scsi_sense_isadma_cache) ret = -ENOMEM; } else { scsi_sense_cache = - kmem_cache_create("scsi_sense_cache", - SCSI_SENSE_BUFFERSIZE, 0, SLAB_HWCACHE_ALIGN, NULL); + kmem_cache_create_usercopy("scsi_sense_cache", + SCSI_SENSE_BUFFERSIZE, 0, SLAB_HWCACHE_ALIGN, + 0, SCSI_SENSE_BUFFERSIZE, NULL); if (!scsi_sense_cache) ret = -ENOMEM; } diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index ee236231cafa..af2832aaeec5 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -444,11 +444,15 @@ unacquire_none: static int __init befs_init_inodecache(void) { - befs_inode_cachep = kmem_cache_create("befs_inode_cache", - sizeof (struct befs_inode_info), - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD|SLAB_ACCOUNT), - init_once); + befs_inode_cachep = kmem_cache_create_usercopy("befs_inode_cache", + sizeof(struct befs_inode_info), 0, + (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| + SLAB_ACCOUNT), + offsetof(struct befs_inode_info, + i_data.symlink), + sizeof_field(struct befs_inode_info, + i_data.symlink), + init_once); if (befs_inode_cachep == NULL) return -ENOMEM; diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index a7be591d8e18..32cdea67bbfd 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1239,9 +1239,11 @@ cifs_init_request_bufs(void) cifs_dbg(VFS, "CIFSMaxBufSize %d 0x%x\n", CIFSMaxBufSize, CIFSMaxBufSize); */ - cifs_req_cachep = kmem_cache_create("cifs_request", + cifs_req_cachep = kmem_cache_create_usercopy("cifs_request", CIFSMaxBufSize + max_hdr_size, 0, - SLAB_HWCACHE_ALIGN, NULL); + SLAB_HWCACHE_ALIGN, 0, + CIFSMaxBufSize + max_hdr_size, + NULL); if (cifs_req_cachep == NULL) return -ENOMEM; @@ -1267,9 +1269,9 @@ cifs_init_request_bufs(void) more SMBs to use small buffer alloc and is still much more efficient to alloc 1 per page off the slab compared to 17K (5page) alloc of large cifs buffers even when page debugging is on */ - cifs_sm_req_cachep = kmem_cache_create("cifs_small_rq", + cifs_sm_req_cachep = kmem_cache_create_usercopy("cifs_small_rq", MAX_CIFS_SMALL_BUFFER_SIZE, 0, SLAB_HWCACHE_ALIGN, - NULL); + 0, MAX_CIFS_SMALL_BUFFER_SIZE, NULL); if (cifs_sm_req_cachep == NULL) { mempool_destroy(cifs_req_poolp); kmem_cache_destroy(cifs_req_cachep); diff --git a/fs/dcache.c b/fs/dcache.c index 5b207e0de459..cca2b377ff0a 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -3602,8 +3602,9 @@ static void __init dcache_init(void) * but it is probably not worth it because of the cache nature * of the dcache. */ - dentry_cache = KMEM_CACHE(dentry, - SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD|SLAB_ACCOUNT); + dentry_cache = KMEM_CACHE_USERCOPY(dentry, + SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD|SLAB_ACCOUNT, + d_iname); /* Hash may have been set up in dcache_init_early */ if (!hashdist) @@ -3641,8 +3642,8 @@ void __init vfs_caches_init_early(void) void __init vfs_caches_init(void) { - names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + names_cachep = kmem_cache_create_usercopy("names_cache", PATH_MAX, 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC, 0, PATH_MAX, NULL); dcache_init(); inode_init(); diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 7e244093c0e5..179cd5c2f52a 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -193,10 +193,13 @@ static void exofs_init_once(void *foo) */ static int init_inodecache(void) { - exofs_inode_cachep = kmem_cache_create("exofs_inode_cache", + exofs_inode_cachep = kmem_cache_create_usercopy("exofs_inode_cache", sizeof(struct exofs_i_info), 0, SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | - SLAB_ACCOUNT, exofs_init_once); + SLAB_ACCOUNT, + offsetof(struct exofs_i_info, i_data), + sizeof_field(struct exofs_i_info, i_data), + exofs_init_once); if (exofs_inode_cachep == NULL) return -ENOMEM; return 0; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 554c98b8a93a..db5f9daa7780 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -221,11 +221,13 @@ static void init_once(void *foo) static int __init init_inodecache(void) { - ext2_inode_cachep = kmem_cache_create("ext2_inode_cache", - sizeof(struct ext2_inode_info), - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD|SLAB_ACCOUNT), - init_once); + ext2_inode_cachep = kmem_cache_create_usercopy("ext2_inode_cache", + sizeof(struct ext2_inode_info), 0, + (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| + SLAB_ACCOUNT), + offsetof(struct ext2_inode_info, i_data), + sizeof_field(struct ext2_inode_info, i_data), + init_once); if (ext2_inode_cachep == NULL) return -ENOMEM; return 0; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 82eead1c8b84..899e6faf3381 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1038,11 +1038,13 @@ static void init_once(void *foo) static int __init init_inodecache(void) { - ext4_inode_cachep = kmem_cache_create("ext4_inode_cache", - sizeof(struct ext4_inode_info), - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD|SLAB_ACCOUNT), - init_once); + ext4_inode_cachep = kmem_cache_create_usercopy("ext4_inode_cache", + sizeof(struct ext4_inode_info), 0, + (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| + SLAB_ACCOUNT), + offsetof(struct ext4_inode_info, i_data), + sizeof_field(struct ext4_inode_info, i_data), + init_once); if (ext4_inode_cachep == NULL) return -ENOMEM; return 0; diff --git a/fs/fhandle.c b/fs/fhandle.c index 0ace128f5d23..0ee727485615 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -69,8 +69,7 @@ static long do_sys_name_to_handle(struct path *path, } else retval = 0; /* copy the mount id */ - if (copy_to_user(mnt_id, &real_mount(path->mnt)->mnt_id, - sizeof(*mnt_id)) || + if (put_user(real_mount(path->mnt)->mnt_id, mnt_id) || copy_to_user(ufh, handle, sizeof(struct file_handle) + handle_bytes)) retval = -EFAULT; diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index f989efa051a0..48b24bb50d02 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -332,9 +332,13 @@ vxfs_init(void) { int rv; - vxfs_inode_cachep = kmem_cache_create("vxfs_inode", + vxfs_inode_cachep = kmem_cache_create_usercopy("vxfs_inode", sizeof(struct vxfs_inode_info), 0, - SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); + SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, + offsetof(struct vxfs_inode_info, vii_immed.vi_immed), + sizeof_field(struct vxfs_inode_info, + vii_immed.vi_immed), + NULL); if (!vxfs_inode_cachep) return -ENOMEM; rv = register_filesystem(&vxfs_fs_type); diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 90373aebfdca..1b9264fd54b6 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -965,9 +965,11 @@ static int __init init_jfs_fs(void) int rc; jfs_inode_cachep = - kmem_cache_create("jfs_ip", sizeof(struct jfs_inode_info), 0, - SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT, - init_once); + kmem_cache_create_usercopy("jfs_ip", sizeof(struct jfs_inode_info), + 0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT, + offsetof(struct jfs_inode_info, i_inline), + sizeof_field(struct jfs_inode_info, i_inline), + init_once); if (jfs_inode_cachep == NULL) return -ENOMEM; diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index 36f1390b5ed7..62d49e53061c 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -610,11 +610,16 @@ void orangefs_kill_sb(struct super_block *sb) int orangefs_inode_cache_initialize(void) { - orangefs_inode_cache = kmem_cache_create("orangefs_inode_cache", - sizeof(struct orangefs_inode_s), - 0, - ORANGEFS_CACHE_CREATE_FLAGS, - orangefs_inode_cache_ctor); + orangefs_inode_cache = kmem_cache_create_usercopy( + "orangefs_inode_cache", + sizeof(struct orangefs_inode_s), + 0, + ORANGEFS_CACHE_CREATE_FLAGS, + offsetof(struct orangefs_inode_s, + link_target), + sizeof_field(struct orangefs_inode_s, + link_target), + orangefs_inode_cache_ctor); if (!orangefs_inode_cache) { gossip_err("Cannot create orangefs_inode_cache\n"); diff --git a/fs/ufs/super.c b/fs/ufs/super.c index b6ba80e05bff..8254b8b3690f 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1467,11 +1467,14 @@ static void init_once(void *foo) static int __init init_inodecache(void) { - ufs_inode_cachep = kmem_cache_create("ufs_inode_cache", - sizeof(struct ufs_inode_info), - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD|SLAB_ACCOUNT), - init_once); + ufs_inode_cachep = kmem_cache_create_usercopy("ufs_inode_cache", + sizeof(struct ufs_inode_info), 0, + (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| + SLAB_ACCOUNT), + offsetof(struct ufs_inode_info, i_u1.i_symlink), + sizeof_field(struct ufs_inode_info, + i_u1.i_symlink), + init_once); if (ufs_inode_cachep == NULL) return -ENOMEM; return 0; diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 05b8650f06f5..5be31eb7b266 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -104,6 +104,20 @@ extern int arch_task_struct_size __read_mostly; # define arch_task_struct_size (sizeof(struct task_struct)) #endif +#ifndef CONFIG_HAVE_ARCH_THREAD_STRUCT_WHITELIST +/* + * If an architecture has not declared a thread_struct whitelist we + * must assume something there may need to be copied to userspace. + */ +static inline void arch_thread_struct_whitelist(unsigned long *offset, + unsigned long *size) +{ + *offset = 0; + /* Handle dynamically sized thread_struct. */ + *size = arch_task_struct_size - offsetof(struct task_struct, thread); +} +#endif + #ifdef CONFIG_VMAP_STACK static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) { diff --git a/include/linux/slab.h b/include/linux/slab.h index 50697a1d6621..231abc8976c5 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -135,9 +135,15 @@ struct mem_cgroup; void __init kmem_cache_init(void); bool slab_is_available(void); -struct kmem_cache *kmem_cache_create(const char *, size_t, size_t, - slab_flags_t, - void (*)(void *)); +extern bool usercopy_fallback; + +struct kmem_cache *kmem_cache_create(const char *name, size_t size, + size_t align, slab_flags_t flags, + void (*ctor)(void *)); +struct kmem_cache *kmem_cache_create_usercopy(const char *name, + size_t size, size_t align, slab_flags_t flags, + size_t useroffset, size_t usersize, + void (*ctor)(void *)); void kmem_cache_destroy(struct kmem_cache *); int kmem_cache_shrink(struct kmem_cache *); @@ -153,9 +159,20 @@ void memcg_destroy_kmem_caches(struct mem_cgroup *); * f.e. add ____cacheline_aligned_in_smp to the struct declaration * then the objects will be properly aligned in SMP configurations. */ -#define KMEM_CACHE(__struct, __flags) kmem_cache_create(#__struct,\ - sizeof(struct __struct), __alignof__(struct __struct),\ - (__flags), NULL) +#define KMEM_CACHE(__struct, __flags) \ + kmem_cache_create(#__struct, sizeof(struct __struct), \ + __alignof__(struct __struct), (__flags), NULL) + +/* + * To whitelist a single field for copying to/from usercopy, use this + * macro instead for KMEM_CACHE() above. + */ +#define KMEM_CACHE_USERCOPY(__struct, __flags, __field) \ + kmem_cache_create_usercopy(#__struct, \ + sizeof(struct __struct), \ + __alignof__(struct __struct), (__flags), \ + offsetof(struct __struct, __field), \ + sizeof_field(struct __struct, __field), NULL) /* * Common kmalloc functions provided by all allocators @@ -167,15 +184,11 @@ void kzfree(const void *); size_t ksize(const void *); #ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR -const char *__check_heap_object(const void *ptr, unsigned long n, - struct page *page); +void __check_heap_object(const void *ptr, unsigned long n, struct page *page, + bool to_user); #else -static inline const char *__check_heap_object(const void *ptr, - unsigned long n, - struct page *page) -{ - return NULL; -} +static inline void __check_heap_object(const void *ptr, unsigned long n, + struct page *page, bool to_user) { } #endif /* diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 072e46e9e1d5..7385547c04b1 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -85,6 +85,9 @@ struct kmem_cache { unsigned int *random_seq; #endif + size_t useroffset; /* Usercopy region offset */ + size_t usersize; /* Usercopy region size */ + struct kmem_cache_node *node[MAX_NUMNODES]; }; diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 0adae162dc8f..8ad99c47b19c 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -135,6 +135,9 @@ struct kmem_cache { struct kasan_cache kasan_info; #endif + size_t useroffset; /* Usercopy region offset */ + size_t usersize; /* Usercopy region size */ + struct kmem_cache_node *node[MAX_NUMNODES]; }; diff --git a/include/linux/stddef.h b/include/linux/stddef.h index 2181719fd907..998a4ba28eba 100644 --- a/include/linux/stddef.h +++ b/include/linux/stddef.h @@ -19,6 +19,14 @@ enum { #define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) #endif +/** + * sizeof_field(TYPE, MEMBER) + * + * @TYPE: The structure containing the field of interest + * @MEMBER: The field to return the size of + */ +#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) + /** * offsetofend(TYPE, MEMBER) * @@ -26,6 +34,6 @@ enum { * @MEMBER: The member within the structure to get the end offset of */ #define offsetofend(TYPE, MEMBER) \ - (offsetof(TYPE, MEMBER) + sizeof(((TYPE *)0)->MEMBER)) + (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) #endif diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 251e655d407f..efe79c1cdd47 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -273,4 +273,12 @@ extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count); #define unsafe_put_user(x, ptr, err) do { if (unlikely(__put_user(x, ptr))) goto err; } while (0) #endif +#ifdef CONFIG_HARDENED_USERCOPY +void usercopy_warn(const char *name, const char *detail, bool to_user, + unsigned long offset, unsigned long len); +void __noreturn usercopy_abort(const char *name, const char *detail, + bool to_user, unsigned long offset, + unsigned long len); +#endif + #endif /* __LINUX_UACCESS_H__ */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 02369e379d35..03e92dda1813 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -203,12 +203,17 @@ struct sctp_sock { /* Flags controlling Heartbeat, SACK delay, and Path MTU Discovery. */ __u32 param_flags; - struct sctp_initmsg initmsg; struct sctp_rtoinfo rtoinfo; struct sctp_paddrparams paddrparam; - struct sctp_event_subscribe subscribe; struct sctp_assocparams assocparams; + /* + * These two structures must be grouped together for the usercopy + * whitelist region. + */ + struct sctp_event_subscribe subscribe; + struct sctp_initmsg initmsg; + int user_frag; __u32 autoclose; diff --git a/include/net/sock.h b/include/net/sock.h index 63731289186a..169c92afcafa 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1109,6 +1109,8 @@ struct proto { struct kmem_cache *slab; unsigned int obj_size; slab_flags_t slab_flags; + size_t useroffset; /* Usercopy region offset */ + size_t usersize; /* Usercopy region size */ struct percpu_counter *orphan_count; diff --git a/kernel/fork.c b/kernel/fork.c index 5e6cf0dd031c..5c372c954f3b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -283,8 +283,9 @@ static void free_thread_stack(struct task_struct *tsk) void thread_stack_cache_init(void) { - thread_stack_cache = kmem_cache_create("thread_stack", THREAD_SIZE, - THREAD_SIZE, 0, NULL); + thread_stack_cache = kmem_cache_create_usercopy("thread_stack", + THREAD_SIZE, THREAD_SIZE, 0, 0, + THREAD_SIZE, NULL); BUG_ON(thread_stack_cache == NULL); } # endif @@ -693,6 +694,21 @@ static void set_max_threads(unsigned int max_threads_suggested) int arch_task_struct_size __read_mostly; #endif +static void task_struct_whitelist(unsigned long *offset, unsigned long *size) +{ + /* Fetch thread_struct whitelist for the architecture. */ + arch_thread_struct_whitelist(offset, size); + + /* + * Handle zero-sized whitelist or empty thread_struct, otherwise + * adjust offset to position of thread_struct in task_struct. + */ + if (unlikely(*size == 0)) + *offset = 0; + else + *offset += offsetof(struct task_struct, thread); +} + void __init fork_init(void) { int i; @@ -701,11 +717,14 @@ void __init fork_init(void) #define ARCH_MIN_TASKALIGN 0 #endif int align = max_t(int, L1_CACHE_BYTES, ARCH_MIN_TASKALIGN); + unsigned long useroffset, usersize; /* create a slab on which task_structs can be allocated */ - task_struct_cachep = kmem_cache_create("task_struct", + task_struct_whitelist(&useroffset, &usersize); + task_struct_cachep = kmem_cache_create_usercopy("task_struct", arch_task_struct_size, align, - SLAB_PANIC|SLAB_ACCOUNT, NULL); + SLAB_PANIC|SLAB_ACCOUNT, + useroffset, usersize, NULL); #endif /* do the arch specific task caches init */ @@ -2248,9 +2267,11 @@ void __init proc_caches_init(void) * maximum number of CPU's we can ever have. The cpumask_allocation * is at the end of the structure, exactly for that reason. */ - mm_cachep = kmem_cache_create("mm_struct", + mm_cachep = kmem_cache_create_usercopy("mm_struct", sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, + offsetof(struct mm_struct, saved_auxv), + sizeof_field(struct mm_struct, saved_auxv), NULL); vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT); mmap_init(); diff --git a/mm/slab.c b/mm/slab.c index 226906294183..cd86f15071ad 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1281,7 +1281,7 @@ void __init kmem_cache_init(void) create_boot_cache(kmem_cache, "kmem_cache", offsetof(struct kmem_cache, node) + nr_node_ids * sizeof(struct kmem_cache_node *), - SLAB_HWCACHE_ALIGN); + SLAB_HWCACHE_ALIGN, 0, 0); list_add(&kmem_cache->list, &slab_caches); slab_state = PARTIAL; @@ -1291,7 +1291,8 @@ void __init kmem_cache_init(void) */ kmalloc_caches[INDEX_NODE] = create_kmalloc_cache( kmalloc_info[INDEX_NODE].name, - kmalloc_size(INDEX_NODE), ARCH_KMALLOC_FLAGS); + kmalloc_size(INDEX_NODE), ARCH_KMALLOC_FLAGS, + 0, kmalloc_size(INDEX_NODE)); slab_state = PARTIAL_NODE; setup_kmalloc_cache_index_table(); @@ -4385,13 +4386,15 @@ module_init(slab_proc_init); #ifdef CONFIG_HARDENED_USERCOPY /* - * Rejects objects that are incorrectly sized. + * Rejects incorrectly sized objects and objects that are to be copied + * to/from userspace but do not fall entirely within the containing slab + * cache's usercopy region. * * Returns NULL if check passes, otherwise const char * to name of cache * to indicate an error. */ -const char *__check_heap_object(const void *ptr, unsigned long n, - struct page *page) +void __check_heap_object(const void *ptr, unsigned long n, struct page *page, + bool to_user) { struct kmem_cache *cachep; unsigned int objnr; @@ -4405,11 +4408,26 @@ const char *__check_heap_object(const void *ptr, unsigned long n, /* Find offset within object. */ offset = ptr - index_to_obj(cachep, page, objnr) - obj_offset(cachep); - /* Allow address range falling entirely within object size. */ - if (offset <= cachep->object_size && n <= cachep->object_size - offset) - return NULL; + /* Allow address range falling entirely within usercopy region. */ + if (offset >= cachep->useroffset && + offset - cachep->useroffset <= cachep->usersize && + n <= cachep->useroffset - offset + cachep->usersize) + return; - return cachep->name; + /* + * If the copy is still within the allocated object, produce + * a warning instead of rejecting the copy. This is intended + * to be a temporary method to find any missing usercopy + * whitelists. + */ + if (usercopy_fallback && + offset <= cachep->object_size && + n <= cachep->object_size - offset) { + usercopy_warn("SLAB object", cachep->name, to_user, offset, n); + return; + } + + usercopy_abort("SLAB object", cachep->name, to_user, offset, n); } #endif /* CONFIG_HARDENED_USERCOPY */ diff --git a/mm/slab.h b/mm/slab.h index e8e2095a6185..51813236e773 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -22,6 +22,8 @@ struct kmem_cache { unsigned int size; /* The aligned/padded/added on size */ unsigned int align; /* Alignment as calculated */ slab_flags_t flags; /* Active flags on the slab */ + size_t useroffset; /* Usercopy region offset */ + size_t usersize; /* Usercopy region size */ const char *name; /* Slab name for sysfs */ int refcount; /* Use counter */ void (*ctor)(void *); /* Called on object slot creation */ @@ -92,9 +94,11 @@ struct kmem_cache *kmalloc_slab(size_t, gfp_t); int __kmem_cache_create(struct kmem_cache *, slab_flags_t flags); extern struct kmem_cache *create_kmalloc_cache(const char *name, size_t size, - slab_flags_t flags); + slab_flags_t flags, size_t useroffset, + size_t usersize); extern void create_boot_cache(struct kmem_cache *, const char *name, - size_t size, slab_flags_t flags); + size_t size, slab_flags_t flags, size_t useroffset, + size_t usersize); int slab_unmergeable(struct kmem_cache *s); struct kmem_cache *find_mergeable(size_t size, size_t align, diff --git a/mm/slab_common.c b/mm/slab_common.c index deeddf95cdcf..10f127b2de7c 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -31,6 +31,14 @@ LIST_HEAD(slab_caches); DEFINE_MUTEX(slab_mutex); struct kmem_cache *kmem_cache; +#ifdef CONFIG_HARDENED_USERCOPY +bool usercopy_fallback __ro_after_init = + IS_ENABLED(CONFIG_HARDENED_USERCOPY_FALLBACK); +module_param(usercopy_fallback, bool, 0400); +MODULE_PARM_DESC(usercopy_fallback, + "WARN instead of reject usercopy whitelist violations"); +#endif + static LIST_HEAD(slab_caches_to_rcu_destroy); static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work); static DECLARE_WORK(slab_caches_to_rcu_destroy_work, @@ -310,6 +318,9 @@ int slab_unmergeable(struct kmem_cache *s) if (s->ctor) return 1; + if (s->usersize) + return 1; + /* * We may have set a slab to be unmergeable during bootstrap. */ @@ -368,12 +379,16 @@ struct kmem_cache *find_mergeable(size_t size, size_t align, static struct kmem_cache *create_cache(const char *name, size_t object_size, size_t size, size_t align, - slab_flags_t flags, void (*ctor)(void *), + slab_flags_t flags, size_t useroffset, + size_t usersize, void (*ctor)(void *), struct mem_cgroup *memcg, struct kmem_cache *root_cache) { struct kmem_cache *s; int err; + if (WARN_ON(useroffset + usersize > object_size)) + useroffset = usersize = 0; + err = -ENOMEM; s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL); if (!s) @@ -384,6 +399,8 @@ static struct kmem_cache *create_cache(const char *name, s->size = size; s->align = align; s->ctor = ctor; + s->useroffset = useroffset; + s->usersize = usersize; err = init_memcg_params(s, memcg, root_cache); if (err) @@ -408,11 +425,13 @@ out_free_cache: } /* - * kmem_cache_create - Create a cache. + * kmem_cache_create_usercopy - Create a cache. * @name: A string which is used in /proc/slabinfo to identify this cache. * @size: The size of objects to be created in this cache. * @align: The required alignment for the objects. * @flags: SLAB flags + * @useroffset: Usercopy region offset + * @usersize: Usercopy region size * @ctor: A constructor for the objects. * * Returns a ptr to the cache on success, NULL on failure. @@ -432,8 +451,9 @@ out_free_cache: * as davem. */ struct kmem_cache * -kmem_cache_create(const char *name, size_t size, size_t align, - slab_flags_t flags, void (*ctor)(void *)) +kmem_cache_create_usercopy(const char *name, size_t size, size_t align, + slab_flags_t flags, size_t useroffset, size_t usersize, + void (*ctor)(void *)) { struct kmem_cache *s = NULL; const char *cache_name; @@ -464,7 +484,13 @@ kmem_cache_create(const char *name, size_t size, size_t align, */ flags &= CACHE_CREATE_MASK; - s = __kmem_cache_alias(name, size, align, flags, ctor); + /* Fail closed on bad usersize of useroffset values. */ + if (WARN_ON(!usersize && useroffset) || + WARN_ON(size < usersize || size - usersize < useroffset)) + usersize = useroffset = 0; + + if (!usersize) + s = __kmem_cache_alias(name, size, align, flags, ctor); if (s) goto out_unlock; @@ -476,7 +502,7 @@ kmem_cache_create(const char *name, size_t size, size_t align, s = create_cache(cache_name, size, size, calculate_alignment(flags, align, size), - flags, ctor, NULL, NULL); + flags, useroffset, usersize, ctor, NULL, NULL); if (IS_ERR(s)) { err = PTR_ERR(s); kfree_const(cache_name); @@ -502,6 +528,15 @@ out_unlock: } return s; } +EXPORT_SYMBOL(kmem_cache_create_usercopy); + +struct kmem_cache * +kmem_cache_create(const char *name, size_t size, size_t align, + slab_flags_t flags, void (*ctor)(void *)) +{ + return kmem_cache_create_usercopy(name, size, align, flags, 0, 0, + ctor); +} EXPORT_SYMBOL(kmem_cache_create); static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work) @@ -614,6 +649,7 @@ void memcg_create_kmem_cache(struct mem_cgroup *memcg, s = create_cache(cache_name, root_cache->object_size, root_cache->size, root_cache->align, root_cache->flags & CACHE_CREATE_MASK, + root_cache->useroffset, root_cache->usersize, root_cache->ctor, memcg, root_cache); /* * If we could not create a memcg cache, do not complain, because @@ -881,13 +917,15 @@ bool slab_is_available(void) #ifndef CONFIG_SLOB /* Create a cache during boot when no slab services are available yet */ void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t size, - slab_flags_t flags) + slab_flags_t flags, size_t useroffset, size_t usersize) { int err; s->name = name; s->size = s->object_size = size; s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size); + s->useroffset = useroffset; + s->usersize = usersize; slab_init_memcg_params(s); @@ -901,14 +939,15 @@ void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t siz } struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size, - slab_flags_t flags) + slab_flags_t flags, size_t useroffset, + size_t usersize) { struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT); if (!s) panic("Out of memory when creating slab %s\n", name); - create_boot_cache(s, name, size, flags); + create_boot_cache(s, name, size, flags, useroffset, usersize); list_add(&s->list, &slab_caches); memcg_link_cache(s); s->refcount = 1; @@ -1062,7 +1101,8 @@ void __init setup_kmalloc_cache_index_table(void) static void __init new_kmalloc_cache(int idx, slab_flags_t flags) { kmalloc_caches[idx] = create_kmalloc_cache(kmalloc_info[idx].name, - kmalloc_info[idx].size, flags); + kmalloc_info[idx].size, flags, 0, + kmalloc_info[idx].size); } /* @@ -1103,7 +1143,7 @@ void __init create_kmalloc_caches(slab_flags_t flags) BUG_ON(!n); kmalloc_dma_caches[i] = create_kmalloc_cache(n, - size, SLAB_CACHE_DMA | flags); + size, SLAB_CACHE_DMA | flags, 0, 0); } } #endif diff --git a/mm/slub.c b/mm/slub.c index 693b7074bc53..cc71176c6eef 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3813,13 +3813,15 @@ EXPORT_SYMBOL(__kmalloc_node); #ifdef CONFIG_HARDENED_USERCOPY /* - * Rejects objects that are incorrectly sized. + * Rejects incorrectly sized objects and objects that are to be copied + * to/from userspace but do not fall entirely within the containing slab + * cache's usercopy region. * * Returns NULL if check passes, otherwise const char * to name of cache * to indicate an error. */ -const char *__check_heap_object(const void *ptr, unsigned long n, - struct page *page) +void __check_heap_object(const void *ptr, unsigned long n, struct page *page, + bool to_user) { struct kmem_cache *s; unsigned long offset; @@ -3827,11 +3829,11 @@ const char *__check_heap_object(const void *ptr, unsigned long n, /* Find object and usable object size. */ s = page->slab_cache; - object_size = slab_ksize(s); /* Reject impossible pointers. */ if (ptr < page_address(page)) - return s->name; + usercopy_abort("SLUB object not in SLUB page?!", NULL, + to_user, 0, n); /* Find offset within object. */ offset = (ptr - page_address(page)) % s->size; @@ -3839,15 +3841,31 @@ const char *__check_heap_object(const void *ptr, unsigned long n, /* Adjust for redzone and reject if within the redzone. */ if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE) { if (offset < s->red_left_pad) - return s->name; + usercopy_abort("SLUB object in left red zone", + s->name, to_user, offset, n); offset -= s->red_left_pad; } - /* Allow address range falling entirely within object size. */ - if (offset <= object_size && n <= object_size - offset) - return NULL; + /* Allow address range falling entirely within usercopy region. */ + if (offset >= s->useroffset && + offset - s->useroffset <= s->usersize && + n <= s->useroffset - offset + s->usersize) + return; - return s->name; + /* + * If the copy is still within the allocated object, produce + * a warning instead of rejecting the copy. This is intended + * to be a temporary method to find any missing usercopy + * whitelists. + */ + object_size = slab_ksize(s); + if (usercopy_fallback && + offset <= object_size && n <= object_size - offset) { + usercopy_warn("SLUB object", s->name, to_user, offset, n); + return; + } + + usercopy_abort("SLUB object", s->name, to_user, offset, n); } #endif /* CONFIG_HARDENED_USERCOPY */ @@ -4181,7 +4199,7 @@ void __init kmem_cache_init(void) kmem_cache = &boot_kmem_cache; create_boot_cache(kmem_cache_node, "kmem_cache_node", - sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN); + sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN, 0, 0); register_hotmemory_notifier(&slab_memory_callback_nb); @@ -4191,7 +4209,7 @@ void __init kmem_cache_init(void) create_boot_cache(kmem_cache, "kmem_cache", offsetof(struct kmem_cache, node) + nr_node_ids * sizeof(struct kmem_cache_node *), - SLAB_HWCACHE_ALIGN); + SLAB_HWCACHE_ALIGN, 0, 0); kmem_cache = bootstrap(&boot_kmem_cache); @@ -5061,6 +5079,12 @@ static ssize_t cache_dma_show(struct kmem_cache *s, char *buf) SLAB_ATTR_RO(cache_dma); #endif +static ssize_t usersize_show(struct kmem_cache *s, char *buf) +{ + return sprintf(buf, "%zu\n", s->usersize); +} +SLAB_ATTR_RO(usersize); + static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf) { return sprintf(buf, "%d\n", !!(s->flags & SLAB_TYPESAFE_BY_RCU)); @@ -5435,6 +5459,7 @@ static struct attribute *slab_attrs[] = { #ifdef CONFIG_FAILSLAB &failslab_attr.attr, #endif + &usersize_attr.attr, NULL }; diff --git a/mm/usercopy.c b/mm/usercopy.c index a9852b24715d..e9e9325f7638 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -58,12 +58,40 @@ static noinline int check_stack_object(const void *obj, unsigned long len) return GOOD_STACK; } -static void report_usercopy(const void *ptr, unsigned long len, - bool to_user, const char *type) +/* + * If these functions are reached, then CONFIG_HARDENED_USERCOPY has found + * an unexpected state during a copy_from_user() or copy_to_user() call. + * There are several checks being performed on the buffer by the + * __check_object_size() function. Normal stack buffer usage should never + * trip the checks, and kernel text addressing will always trip the check. + * For cache objects, it is checking that only the whitelisted range of + * bytes for a given cache is being accessed (via the cache's usersize and + * useroffset fields). To adjust a cache whitelist, use the usercopy-aware + * kmem_cache_create_usercopy() function to create the cache (and + * carefully audit the whitelist range). + */ +void usercopy_warn(const char *name, const char *detail, bool to_user, + unsigned long offset, unsigned long len) { - pr_emerg("kernel memory %s attempt detected %s %p (%s) (%lu bytes)\n", - to_user ? "exposure" : "overwrite", - to_user ? "from" : "to", ptr, type ? : "unknown", len); + WARN_ONCE(1, "Bad or missing usercopy whitelist? Kernel memory %s attempt detected %s %s%s%s%s (offset %lu, size %lu)!\n", + to_user ? "exposure" : "overwrite", + to_user ? "from" : "to", + name ? : "unknown?!", + detail ? " '" : "", detail ? : "", detail ? "'" : "", + offset, len); +} + +void __noreturn usercopy_abort(const char *name, const char *detail, + bool to_user, unsigned long offset, + unsigned long len) +{ + pr_emerg("Kernel memory %s attempt detected %s %s%s%s%s (offset %lu, size %lu)!\n", + to_user ? "exposure" : "overwrite", + to_user ? "from" : "to", + name ? : "unknown?!", + detail ? " '" : "", detail ? : "", detail ? "'" : "", + offset, len); + /* * For greater effect, it would be nice to do do_group_exit(), * but BUG() actually hooks all the lock-breaking and per-arch @@ -73,10 +101,10 @@ static void report_usercopy(const void *ptr, unsigned long len, } /* Returns true if any portion of [ptr,ptr+n) over laps with [low,high). */ -static bool overlaps(const void *ptr, unsigned long n, unsigned long low, - unsigned long high) +static bool overlaps(const unsigned long ptr, unsigned long n, + unsigned long low, unsigned long high) { - unsigned long check_low = (uintptr_t)ptr; + const unsigned long check_low = ptr; unsigned long check_high = check_low + n; /* Does not overlap if entirely above or entirely below. */ @@ -87,15 +115,15 @@ static bool overlaps(const void *ptr, unsigned long n, unsigned long low, } /* Is this address range in the kernel text area? */ -static inline const char *check_kernel_text_object(const void *ptr, - unsigned long n) +static inline void check_kernel_text_object(const unsigned long ptr, + unsigned long n, bool to_user) { unsigned long textlow = (unsigned long)_stext; unsigned long texthigh = (unsigned long)_etext; unsigned long textlow_linear, texthigh_linear; if (overlaps(ptr, n, textlow, texthigh)) - return ""; + usercopy_abort("kernel text", NULL, to_user, ptr - textlow, n); /* * Some architectures have virtual memory mappings with a secondary @@ -108,32 +136,30 @@ static inline const char *check_kernel_text_object(const void *ptr, textlow_linear = (unsigned long)lm_alias(textlow); /* No different mapping: we're done. */ if (textlow_linear == textlow) - return NULL; + return; /* Check the secondary mapping... */ texthigh_linear = (unsigned long)lm_alias(texthigh); if (overlaps(ptr, n, textlow_linear, texthigh_linear)) - return ""; - - return NULL; + usercopy_abort("linear kernel text", NULL, to_user, + ptr - textlow_linear, n); } -static inline const char *check_bogus_address(const void *ptr, unsigned long n) +static inline void check_bogus_address(const unsigned long ptr, unsigned long n, + bool to_user) { /* Reject if object wraps past end of memory. */ - if ((unsigned long)ptr + n < (unsigned long)ptr) - return ""; + if (ptr + n < ptr) + usercopy_abort("wrapped address", NULL, to_user, 0, ptr + n); /* Reject if NULL or ZERO-allocation. */ if (ZERO_OR_NULL_PTR(ptr)) - return ""; - - return NULL; + usercopy_abort("null address", NULL, to_user, ptr, n); } /* Checks for allocs that are marked in some way as spanning multiple pages. */ -static inline const char *check_page_span(const void *ptr, unsigned long n, - struct page *page, bool to_user) +static inline void check_page_span(const void *ptr, unsigned long n, + struct page *page, bool to_user) { #ifdef CONFIG_HARDENED_USERCOPY_PAGESPAN const void *end = ptr + n - 1; @@ -150,28 +176,28 @@ static inline const char *check_page_span(const void *ptr, unsigned long n, if (ptr >= (const void *)__start_rodata && end <= (const void *)__end_rodata) { if (!to_user) - return ""; - return NULL; + usercopy_abort("rodata", NULL, to_user, 0, n); + return; } /* Allow kernel data region (if not marked as Reserved). */ if (ptr >= (const void *)_sdata && end <= (const void *)_edata) - return NULL; + return; /* Allow kernel bss region (if not marked as Reserved). */ if (ptr >= (const void *)__bss_start && end <= (const void *)__bss_stop) - return NULL; + return; /* Is the object wholly within one base page? */ if (likely(((unsigned long)ptr & (unsigned long)PAGE_MASK) == ((unsigned long)end & (unsigned long)PAGE_MASK))) - return NULL; + return; /* Allow if fully inside the same compound (__GFP_COMP) page. */ endpage = virt_to_head_page(end); if (likely(endpage == page)) - return NULL; + return; /* * Reject if range is entirely either Reserved (i.e. special or @@ -181,36 +207,37 @@ static inline const char *check_page_span(const void *ptr, unsigned long n, is_reserved = PageReserved(page); is_cma = is_migrate_cma_page(page); if (!is_reserved && !is_cma) - return ""; + usercopy_abort("spans multiple pages", NULL, to_user, 0, n); for (ptr += PAGE_SIZE; ptr <= end; ptr += PAGE_SIZE) { page = virt_to_head_page(ptr); if (is_reserved && !PageReserved(page)) - return ""; + usercopy_abort("spans Reserved and non-Reserved pages", + NULL, to_user, 0, n); if (is_cma && !is_migrate_cma_page(page)) - return ""; + usercopy_abort("spans CMA and non-CMA pages", NULL, + to_user, 0, n); } #endif - - return NULL; } -static inline const char *check_heap_object(const void *ptr, unsigned long n, - bool to_user) +static inline void check_heap_object(const void *ptr, unsigned long n, + bool to_user) { struct page *page; if (!virt_addr_valid(ptr)) - return NULL; + return; page = virt_to_head_page(ptr); - /* Check slab allocator for flags and size. */ - if (PageSlab(page)) - return __check_heap_object(ptr, n, page); - - /* Verify object does not incorrectly span multiple pages. */ - return check_page_span(ptr, n, page, to_user); + if (PageSlab(page)) { + /* Check slab allocator for flags and size. */ + __check_heap_object(ptr, n, page, to_user); + } else { + /* Verify object does not incorrectly span multiple pages. */ + check_page_span(ptr, n, page, to_user); + } } /* @@ -221,21 +248,15 @@ static inline const char *check_heap_object(const void *ptr, unsigned long n, */ void __check_object_size(const void *ptr, unsigned long n, bool to_user) { - const char *err; - /* Skip all tests if size is zero. */ if (!n) return; /* Check for invalid addresses. */ - err = check_bogus_address(ptr, n); - if (err) - goto report; + check_bogus_address((const unsigned long)ptr, n, to_user); /* Check for bad heap object. */ - err = check_heap_object(ptr, n, to_user); - if (err) - goto report; + check_heap_object(ptr, n, to_user); /* Check for bad stack object. */ switch (check_stack_object(ptr, n)) { @@ -251,16 +272,10 @@ void __check_object_size(const void *ptr, unsigned long n, bool to_user) */ return; default: - err = ""; - goto report; + usercopy_abort("process stack", NULL, to_user, 0, n); } /* Check for object in kernel to avoid text exposure. */ - err = check_kernel_text_object(ptr, n); - if (!err) - return; - -report: - report_usercopy(ptr, n, to_user, err); + check_kernel_text_object((const unsigned long)ptr, n, to_user); } EXPORT_SYMBOL(__check_object_size); diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 64048cec41e0..b109445a1df9 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -1032,6 +1032,8 @@ static int caif_create(struct net *net, struct socket *sock, int protocol, static struct proto prot = {.name = "PF_CAIF", .owner = THIS_MODULE, .obj_size = sizeof(struct caifsock), + .useroffset = offsetof(struct caifsock, conn_req.param), + .usersize = sizeof_field(struct caifsock, conn_req.param) }; if (!capable(CAP_SYS_ADMIN) && !capable(CAP_NET_ADMIN)) diff --git a/net/core/sock.c b/net/core/sock.c index e50e7b3f2223..b026e1717df4 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3191,8 +3191,10 @@ static int req_prot_init(const struct proto *prot) int proto_register(struct proto *prot, int alloc_slab) { if (alloc_slab) { - prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, + prot->slab = kmem_cache_create_usercopy(prot->name, + prot->obj_size, 0, SLAB_HWCACHE_ALIGN | prot->slab_flags, + prot->useroffset, prot->usersize, NULL); if (prot->slab == NULL) { diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 7c509697ebc7..9b367fc48d7d 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -990,6 +990,8 @@ struct proto raw_prot = { .hash = raw_hash_sk, .unhash = raw_unhash_sk, .obj_size = sizeof(struct raw_sock), + .useroffset = offsetof(struct raw_sock, filter), + .usersize = sizeof_field(struct raw_sock, filter), .h.raw_hash = &raw_v4_hashinfo, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_raw_setsockopt, diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index ddda7eb3c623..4c25339b1984 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1272,6 +1272,8 @@ struct proto rawv6_prot = { .hash = raw_hash_sk, .unhash = raw_unhash_sk, .obj_size = sizeof(struct raw6_sock), + .useroffset = offsetof(struct raw6_sock, filter), + .usersize = sizeof_field(struct raw6_sock, filter), .h.raw_hash = &raw_v6_hashinfo, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_rawv6_setsockopt, diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 356e387f82e7..ebb8cb9eb0bd 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5053,7 +5053,7 @@ static int sctp_getsockopt_autoclose(struct sock *sk, int len, char __user *optv len = sizeof(int); if (put_user(len, optlen)) return -EFAULT; - if (copy_to_user(optval, &sctp_sk(sk)->autoclose, len)) + if (put_user(sctp_sk(sk)->autoclose, (int __user *)optval)) return -EFAULT; return 0; } @@ -8552,6 +8552,10 @@ struct proto sctp_prot = { .unhash = sctp_unhash, .get_port = sctp_get_port, .obj_size = sizeof(struct sctp_sock), + .useroffset = offsetof(struct sctp_sock, subscribe), + .usersize = offsetof(struct sctp_sock, initmsg) - + offsetof(struct sctp_sock, subscribe) + + sizeof_field(struct sctp_sock, initmsg), .sysctl_mem = sysctl_sctp_mem, .sysctl_rmem = sysctl_sctp_rmem, .sysctl_wmem = sysctl_sctp_wmem, @@ -8591,6 +8595,10 @@ struct proto sctpv6_prot = { .unhash = sctp_unhash, .get_port = sctp_get_port, .obj_size = sizeof(struct sctp6_sock), + .useroffset = offsetof(struct sctp6_sock, sctp.subscribe), + .usersize = offsetof(struct sctp6_sock, sctp.initmsg) - + offsetof(struct sctp6_sock, sctp.subscribe) + + sizeof_field(struct sctp6_sock, sctp.initmsg), .sysctl_mem = sysctl_sctp_mem, .sysctl_rmem = sysctl_sctp_rmem, .sysctl_wmem = sysctl_sctp_wmem, diff --git a/security/Kconfig b/security/Kconfig index 3709db95027f..c4302067a3ad 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -164,6 +164,20 @@ config HARDENED_USERCOPY or are part of the kernel text. This kills entire classes of heap overflow exploits and similar kernel memory exposures. +config HARDENED_USERCOPY_FALLBACK + bool "Allow usercopy whitelist violations to fallback to object size" + depends on HARDENED_USERCOPY + default y + help + This is a temporary option that allows missing usercopy whitelists + to be discovered via a WARN() to the kernel log, instead of + rejecting the copy, falling back to non-whitelisted hardened + usercopy that checks the slab allocation size instead of the + whitelist size. This option will be removed once it seems like + all missing usercopy whitelists have been identified and fixed. + Booting with "slab_common.usercopy_fallback=Y/N" can change + this setting. + config HARDENED_USERCOPY_PAGESPAN bool "Refuse to copy allocations that span multiple pages" depends on HARDENED_USERCOPY diff --git a/tools/objtool/check.c b/tools/objtool/check.c index f40d46e24bcc..524becf88e04 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -138,6 +138,7 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func, "__reiserfs_panic", "lbug_with_loc", "fortify_panic", + "usercopy_abort", }; if (func->bind == STB_WEAK) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 35db929f92f0..001085b611ad 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4019,8 +4019,12 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, /* A kmem cache lets us meet the alignment requirements of fx_save. */ if (!vcpu_align) vcpu_align = __alignof__(struct kvm_vcpu); - kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, vcpu_align, - SLAB_ACCOUNT, NULL); + kvm_vcpu_cache = + kmem_cache_create_usercopy("kvm_vcpu", vcpu_size, vcpu_align, + SLAB_ACCOUNT, + offsetof(struct kvm_vcpu, arch), + sizeof_field(struct kvm_vcpu, arch), + NULL); if (!kvm_vcpu_cache) { r = -ENOMEM; goto out_free_3;