diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c index 940dfb406591..04abdec7f496 100644 --- a/arch/alpha/kernel/ptrace.c +++ b/arch/alpha/kernel/ptrace.c @@ -283,7 +283,7 @@ long arch_ptrace(struct task_struct *child, long request, /* When I and D space are separate, these will need to be fixed. */ case PTRACE_PEEKTEXT: /* read word at location addr. */ case PTRACE_PEEKDATA: - copied = access_process_vm(child, addr, &tmp, sizeof(tmp), + copied = ptrace_access_vm(child, addr, &tmp, sizeof(tmp), FOLL_FORCE); ret = -EIO; if (copied != sizeof(tmp)) diff --git a/arch/blackfin/kernel/ptrace.c b/arch/blackfin/kernel/ptrace.c index 8d79286ee4e8..360d99645163 100644 --- a/arch/blackfin/kernel/ptrace.c +++ b/arch/blackfin/kernel/ptrace.c @@ -270,7 +270,7 @@ long arch_ptrace(struct task_struct *child, long request, switch (bfin_mem_access_type(addr, to_copy)) { case BFIN_MEM_ACCESS_CORE: case BFIN_MEM_ACCESS_CORE_ONLY: - copied = access_process_vm(child, addr, &tmp, + copied = ptrace_access_vm(child, addr, &tmp, to_copy, FOLL_FORCE); if (copied) break; @@ -323,7 +323,7 @@ long arch_ptrace(struct task_struct *child, long request, switch (bfin_mem_access_type(addr, to_copy)) { case BFIN_MEM_ACCESS_CORE: case BFIN_MEM_ACCESS_CORE_ONLY: - copied = access_process_vm(child, addr, &data, + copied = ptrace_access_vm(child, addr, &data, to_copy, FOLL_FORCE | FOLL_WRITE); break; diff --git a/arch/cris/arch-v32/kernel/ptrace.c b/arch/cris/arch-v32/kernel/ptrace.c index f0df654ac6fc..fe1f9cf7b391 100644 --- a/arch/cris/arch-v32/kernel/ptrace.c +++ b/arch/cris/arch-v32/kernel/ptrace.c @@ -147,7 +147,7 @@ long arch_ptrace(struct task_struct *child, long request, /* The trampoline page is globally mapped, no page table to traverse.*/ tmp = *(unsigned long*)addr; } else { - copied = access_process_vm(child, addr, &tmp, sizeof(tmp), FOLL_FORCE); + copied = ptrace_access_vm(child, addr, &tmp, sizeof(tmp), FOLL_FORCE); if (copied != sizeof(tmp)) break; diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 31aa8c0f68e1..36f660da8124 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -1159,7 +1159,7 @@ arch_ptrace (struct task_struct *child, long request, case PTRACE_PEEKTEXT: case PTRACE_PEEKDATA: /* read word at location addr */ - if (access_process_vm(child, addr, &data, sizeof(data), + if (ptrace_access_vm(child, addr, &data, sizeof(data), FOLL_FORCE) != sizeof(data)) return -EIO; diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c index 7e71a4e0281b..5fcbdcd7abd0 100644 --- a/arch/mips/kernel/ptrace32.c +++ b/arch/mips/kernel/ptrace32.c @@ -69,7 +69,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, if (get_user(addrOthers, (u32 __user * __user *) (unsigned long) addr) != 0) break; - copied = access_process_vm(child, (u64)addrOthers, &tmp, + copied = ptrace_access_vm(child, (u64)addrOthers, &tmp, sizeof(tmp), FOLL_FORCE); if (copied != sizeof(tmp)) break; @@ -178,7 +178,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, if (get_user(addrOthers, (u32 __user * __user *) (unsigned long) addr) != 0) break; ret = 0; - if (access_process_vm(child, (u64)addrOthers, &data, + if (ptrace_access_vm(child, (u64)addrOthers, &data, sizeof(data), FOLL_FORCE | FOLL_WRITE) == sizeof(data)) break; diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c index 010b7b310237..1e887f3a61a6 100644 --- a/arch/powerpc/kernel/ptrace32.c +++ b/arch/powerpc/kernel/ptrace32.c @@ -73,7 +73,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, if (get_user(addrOthers, (u32 __user * __user *)addr) != 0) break; - copied = access_process_vm(child, (u64)addrOthers, &tmp, + copied = ptrace_access_vm(child, (u64)addrOthers, &tmp, sizeof(tmp), FOLL_FORCE); if (copied != sizeof(tmp)) break; @@ -178,7 +178,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, if (get_user(addrOthers, (u32 __user * __user *)addr) != 0) break; ret = 0; - if (access_process_vm(child, (u64)addrOthers, &tmp, + if (ptrace_access_vm(child, (u64)addrOthers, &tmp, sizeof(tmp), FOLL_FORCE | FOLL_WRITE) == sizeof(tmp)) break; diff --git a/fs/exec.c b/fs/exec.c index 923c57d96899..88b5e1efdbd6 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1277,8 +1277,22 @@ EXPORT_SYMBOL(flush_old_exec); void would_dump(struct linux_binprm *bprm, struct file *file) { - if (inode_permission(file_inode(file), MAY_READ) < 0) + struct inode *inode = file_inode(file); + if (inode_permission(inode, MAY_READ) < 0) { + struct user_namespace *old, *user_ns; bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP; + + /* Ensure mm->user_ns contains the executable */ + user_ns = old = bprm->mm->user_ns; + while ((user_ns != &init_user_ns) && + !privileged_wrt_inode_uidgid(user_ns, inode)) + user_ns = user_ns->parent; + + if (old != user_ns) { + bprm->mm->user_ns = get_user_ns(user_ns); + put_user_ns(old); + } + } } EXPORT_SYMBOL(would_dump); @@ -1308,7 +1322,6 @@ void setup_new_exec(struct linux_binprm * bprm) !gid_eq(bprm->cred->gid, current_egid())) { current->pdeath_signal = 0; } else { - would_dump(bprm, bprm->file); if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP) set_dumpable(current->mm, suid_dumpable); } @@ -1408,7 +1421,7 @@ static void check_unsafe_exec(struct linux_binprm *bprm) unsigned n_fs; if (p->ptrace) { - if (p->ptrace & PT_PTRACE_CAP) + if (ptracer_capable(p, current_user_ns())) bprm->unsafe |= LSM_UNSAFE_PTRACE_CAP; else bprm->unsafe |= LSM_UNSAFE_PTRACE; @@ -1743,6 +1756,8 @@ static int do_execveat_common(int fd, struct filename *filename, if (retval < 0) goto out; + would_dump(bprm, bprm->file); + retval = exec_binprm(bprm); if (retval < 0) goto out; diff --git a/include/linux/capability.h b/include/linux/capability.h index dbc21c719ce6..6ffb67e10c06 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -240,8 +240,10 @@ static inline bool ns_capable_noaudit(struct user_namespace *ns, int cap) return true; } #endif /* CONFIG_MULTIUSER */ +extern bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct inode *inode); extern bool capable_wrt_inode_uidgid(const struct inode *inode, int cap); extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap); +extern bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns); /* audit system wants to get cap info from files as well */ extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps); diff --git a/include/linux/mm.h b/include/linux/mm.h index a92c8d73aeaf..0b5b2e4df14e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1270,6 +1270,8 @@ extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void * unsigned int gup_flags); extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, int len, unsigned int gup_flags); +extern int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, + unsigned long addr, void *buf, int len, unsigned int gup_flags); long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 4a8acedf4b7d..08d947fc4c59 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -473,6 +473,7 @@ struct mm_struct { */ struct task_struct __rcu *owner; #endif + struct user_namespace *user_ns; /* store ref to file /proc//exe symlink points to */ struct file __rcu *exe_file; diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 504c98a278d4..e0e539321ab9 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -8,6 +8,9 @@ #include /* For task_active_pid_ns. */ #include +extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, + void *buf, int len, unsigned int gup_flags); + /* * Ptrace flags * @@ -19,7 +22,6 @@ #define PT_SEIZED 0x00010000 /* SEIZE used, enable new behavior */ #define PT_PTRACED 0x00000001 #define PT_DTRACE 0x00000002 /* delayed trace (used on m68k, i386) */ -#define PT_PTRACE_CAP 0x00000004 /* ptracer can follow suid-exec */ #define PT_OPT_FLAG_SHIFT 3 /* PT_TRACE_* event enable flags */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 5ccbbfe41345..a440cf178191 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1685,6 +1685,7 @@ struct task_struct { struct list_head cpu_timers[3]; /* process credentials */ + const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */ const struct cred __rcu *real_cred; /* objective and real subjective task * credentials (COW) */ const struct cred __rcu *cred; /* effective (overridable) subjective task diff --git a/kernel/capability.c b/kernel/capability.c index 00411c82dac5..4984e1f552eb 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -456,6 +456,19 @@ bool file_ns_capable(const struct file *file, struct user_namespace *ns, } EXPORT_SYMBOL(file_ns_capable); +/** + * privileged_wrt_inode_uidgid - Do capabilities in the namespace work over the inode? + * @ns: The user namespace in question + * @inode: The inode in question + * + * Return true if the inode uid and gid are within the namespace. + */ +bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct inode *inode) +{ + return kuid_has_mapping(ns, inode->i_uid) && + kgid_has_mapping(ns, inode->i_gid); +} + /** * capable_wrt_inode_uidgid - Check nsown_capable and uid and gid mapped * @inode: The inode in question @@ -469,7 +482,26 @@ bool capable_wrt_inode_uidgid(const struct inode *inode, int cap) { struct user_namespace *ns = current_user_ns(); - return ns_capable(ns, cap) && kuid_has_mapping(ns, inode->i_uid) && - kgid_has_mapping(ns, inode->i_gid); + return ns_capable(ns, cap) && privileged_wrt_inode_uidgid(ns, inode); } EXPORT_SYMBOL(capable_wrt_inode_uidgid); + +/** + * ptracer_capable - Determine if the ptracer holds CAP_SYS_PTRACE in the namespace + * @tsk: The task that may be ptraced + * @ns: The user namespace to search for CAP_SYS_PTRACE in + * + * Return true if the task that is ptracing the current task had CAP_SYS_PTRACE + * in the specified user namespace. + */ +bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns) +{ + int ret = 0; /* An absent tracer adds no restrictions */ + const struct cred *cred; + rcu_read_lock(); + cred = rcu_dereference(tsk->ptracer_cred); + if (cred) + ret = security_capable_noaudit(cred, ns, CAP_SYS_PTRACE); + rcu_read_unlock(); + return (ret == 0); +} diff --git a/kernel/fork.c b/kernel/fork.c index a439ac429669..869b8ccc00bf 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -747,7 +747,8 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p) #endif } -static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) +static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, + struct user_namespace *user_ns) { mm->mmap = NULL; mm->mm_rb = RB_ROOT; @@ -787,6 +788,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) if (init_new_context(p, mm)) goto fail_nocontext; + mm->user_ns = get_user_ns(user_ns); return mm; fail_nocontext: @@ -832,7 +834,7 @@ struct mm_struct *mm_alloc(void) return NULL; memset(mm, 0, sizeof(*mm)); - return mm_init(mm, current); + return mm_init(mm, current, current_user_ns()); } /* @@ -847,6 +849,7 @@ void __mmdrop(struct mm_struct *mm) destroy_context(mm); mmu_notifier_mm_destroy(mm); check_mm(mm); + put_user_ns(mm->user_ns); free_mm(mm); } EXPORT_SYMBOL_GPL(__mmdrop); @@ -1128,7 +1131,7 @@ static struct mm_struct *dup_mm(struct task_struct *tsk) memcpy(mm, oldmm, sizeof(*mm)); - if (!mm_init(mm, tsk)) + if (!mm_init(mm, tsk, mm->user_ns)) goto fail_nomem; err = dup_mmap(mm, oldmm); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index e6474f7272ec..49ba7c1ade9d 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -27,6 +27,35 @@ #include #include +/* + * Access another process' address space via ptrace. + * Source/target buffer must be kernel space, + * Do not walk the page table directly, use get_user_pages + */ +int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, + void *buf, int len, unsigned int gup_flags) +{ + struct mm_struct *mm; + int ret; + + mm = get_task_mm(tsk); + if (!mm) + return 0; + + if (!tsk->ptrace || + (current != tsk->parent) || + ((get_dumpable(mm) != SUID_DUMP_USER) && + !ptracer_capable(tsk, mm->user_ns))) { + mmput(mm); + return 0; + } + + ret = __access_remote_vm(tsk, mm, addr, buf, len, gup_flags); + mmput(mm); + + return ret; +} + /* * ptrace a task: make the debugger its new parent and @@ -39,6 +68,9 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent) BUG_ON(!list_empty(&child->ptrace_entry)); list_add(&child->ptrace_entry, &new_parent->ptraced); child->parent = new_parent; + rcu_read_lock(); + child->ptracer_cred = get_cred(__task_cred(new_parent)); + rcu_read_unlock(); } /** @@ -71,12 +103,16 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent) */ void __ptrace_unlink(struct task_struct *child) { + const struct cred *old_cred; BUG_ON(!child->ptrace); clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); child->parent = child->real_parent; list_del_init(&child->ptrace_entry); + old_cred = child->ptracer_cred; + child->ptracer_cred = NULL; + put_cred(old_cred); spin_lock(&child->sighand->siglock); child->ptrace = 0; @@ -220,7 +256,7 @@ static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode) static int __ptrace_may_access(struct task_struct *task, unsigned int mode) { const struct cred *cred = current_cred(), *tcred; - int dumpable = 0; + struct mm_struct *mm; kuid_t caller_uid; kgid_t caller_gid; @@ -271,16 +307,11 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode) return -EPERM; ok: rcu_read_unlock(); - smp_rmb(); - if (task->mm) - dumpable = get_dumpable(task->mm); - rcu_read_lock(); - if (dumpable != SUID_DUMP_USER && - !ptrace_has_cap(__task_cred(task)->user_ns, mode)) { - rcu_read_unlock(); - return -EPERM; - } - rcu_read_unlock(); + mm = task->mm; + if (mm && + ((get_dumpable(mm) != SUID_DUMP_USER) && + !ptrace_has_cap(mm->user_ns, mode))) + return -EPERM; return security_ptrace_access_check(task, mode); } @@ -344,10 +375,6 @@ static int ptrace_attach(struct task_struct *task, long request, if (seize) flags |= PT_SEIZED; - rcu_read_lock(); - if (ns_capable(__task_cred(task)->user_ns, CAP_SYS_PTRACE)) - flags |= PT_PTRACE_CAP; - rcu_read_unlock(); task->ptrace = flags; __ptrace_link(task, current); @@ -537,7 +564,8 @@ int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst int this_len, retval; this_len = (len > sizeof(buf)) ? sizeof(buf) : len; - retval = access_process_vm(tsk, src, buf, this_len, FOLL_FORCE); + retval = ptrace_access_vm(tsk, src, buf, this_len, FOLL_FORCE); + if (!retval) { if (copied) break; @@ -564,7 +592,7 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds this_len = (len > sizeof(buf)) ? sizeof(buf) : len; if (copy_from_user(buf, src, this_len)) return -EFAULT; - retval = access_process_vm(tsk, dst, buf, this_len, + retval = ptrace_access_vm(tsk, dst, buf, this_len, FOLL_FORCE | FOLL_WRITE); if (!retval) { if (copied) @@ -1128,7 +1156,7 @@ int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr, unsigned long tmp; int copied; - copied = access_process_vm(tsk, addr, &tmp, sizeof(tmp), FOLL_FORCE); + copied = ptrace_access_vm(tsk, addr, &tmp, sizeof(tmp), FOLL_FORCE); if (copied != sizeof(tmp)) return -EIO; return put_user(tmp, (unsigned long __user *)data); @@ -1139,7 +1167,7 @@ int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr, { int copied; - copied = access_process_vm(tsk, addr, &data, sizeof(data), + copied = ptrace_access_vm(tsk, addr, &data, sizeof(data), FOLL_FORCE | FOLL_WRITE); return (copied == sizeof(data)) ? 0 : -EIO; } @@ -1157,7 +1185,7 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request, switch (request) { case PTRACE_PEEKTEXT: case PTRACE_PEEKDATA: - ret = access_process_vm(child, addr, &word, sizeof(word), + ret = ptrace_access_vm(child, addr, &word, sizeof(word), FOLL_FORCE); if (ret != sizeof(word)) ret = -EIO; @@ -1167,7 +1195,7 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request, case PTRACE_POKETEXT: case PTRACE_POKEDATA: - ret = access_process_vm(child, addr, &data, sizeof(data), + ret = ptrace_access_vm(child, addr, &data, sizeof(data), FOLL_FORCE | FOLL_WRITE); ret = (ret != sizeof(data) ? -EIO : 0); break; diff --git a/mm/init-mm.c b/mm/init-mm.c index a56a851908d2..975e49f00f34 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -21,5 +22,6 @@ struct mm_struct init_mm = { .mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem), .page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock), .mmlist = LIST_HEAD_INIT(init_mm.mmlist), + .user_ns = &init_user_ns, INIT_MM_CONTEXT(init_mm) }; diff --git a/mm/memory.c b/mm/memory.c index c264f7cd3e47..08d8da39de28 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3904,7 +3904,7 @@ EXPORT_SYMBOL_GPL(generic_access_phys); * Access another process' address space as given in mm. If non-NULL, use the * given task for page fault accounting. */ -static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, +int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, unsigned long addr, void *buf, int len, unsigned int gup_flags) { struct vm_area_struct *vma; diff --git a/mm/nommu.c b/mm/nommu.c index 9720e0bab029..27bc543128e5 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1808,7 +1808,7 @@ void filemap_map_pages(struct fault_env *fe, } EXPORT_SYMBOL(filemap_map_pages); -static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, +int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, unsigned long addr, void *buf, int len, unsigned int gup_flags) { struct vm_area_struct *vma; diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c index bf663915412e..d7f282d75cc1 100644 --- a/security/integrity/evm/evm_crypto.c +++ b/security/integrity/evm/evm_crypto.c @@ -151,8 +151,16 @@ static void hmac_add_misc(struct shash_desc *desc, struct inode *inode, memset(&hmac_misc, 0, sizeof(hmac_misc)); hmac_misc.ino = inode->i_ino; hmac_misc.generation = inode->i_generation; - hmac_misc.uid = from_kuid(inode->i_sb->s_user_ns, inode->i_uid); - hmac_misc.gid = from_kgid(inode->i_sb->s_user_ns, inode->i_gid); + /* The hmac uid and gid must be encoded in the initial user + * namespace (not the filesystems user namespace) as encoding + * them in the filesystems user namespace allows an attack + * where first they are written in an unprivileged fuse mount + * of a filesystem and then the system is tricked to mount the + * filesystem for real on next boot and trust it because + * everything is signed. + */ + hmac_misc.uid = from_kuid(&init_user_ns, inode->i_uid); + hmac_misc.gid = from_kgid(&init_user_ns, inode->i_gid); hmac_misc.mode = inode->i_mode; crypto_shash_update(desc, (const u8 *)&hmac_misc, sizeof(hmac_misc)); if (evm_hmac_attrs & EVM_ATTR_FSUUID)