diff --git a/fs/xattr.c b/fs/xattr.c index 464c94bf65f9..7b03df6b8be2 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -441,6 +441,12 @@ setxattr(struct dentry *d, const char __user *name, const void __user *value, if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) || (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)) posix_acl_fix_xattr_from_user(kvalue, size); + else if (strcmp(kname, XATTR_NAME_CAPS) == 0) { + error = cap_convert_nscap(d, &kvalue, size); + if (error < 0) + goto out; + size = error; + } } error = vfs_setxattr(d, kname, kvalue, size, flags); diff --git a/include/linux/capability.h b/include/linux/capability.h index 6ffb67e10c06..b52e278e4744 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -248,4 +248,6 @@ extern bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns); /* audit system wants to get cap info from files as well */ extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps); +extern int cap_convert_nscap(struct dentry *dentry, void **ivalue, size_t size); + #endif /* !_LINUX_CAPABILITY_H */ diff --git a/include/linux/security.h b/include/linux/security.h index b6ea1dc9cc9d..6fff8c924718 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -91,6 +91,8 @@ extern int cap_inode_setxattr(struct dentry *dentry, const char *name, extern int cap_inode_removexattr(struct dentry *dentry, const char *name); extern int cap_inode_need_killpriv(struct dentry *dentry); extern int cap_inode_killpriv(struct dentry *dentry); +extern int cap_inode_getsecurity(struct inode *inode, const char *name, + void **buffer, bool alloc); extern int cap_mmap_addr(unsigned long addr); extern int cap_mmap_file(struct file *file, unsigned long reqprot, unsigned long prot, unsigned long flags); diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h index 6fe14d001f68..230e05d35191 100644 --- a/include/uapi/linux/capability.h +++ b/include/uapi/linux/capability.h @@ -60,9 +60,13 @@ typedef struct __user_cap_data_struct { #define VFS_CAP_U32_2 2 #define XATTR_CAPS_SZ_2 (sizeof(__le32)*(1 + 2*VFS_CAP_U32_2)) -#define XATTR_CAPS_SZ XATTR_CAPS_SZ_2 -#define VFS_CAP_U32 VFS_CAP_U32_2 -#define VFS_CAP_REVISION VFS_CAP_REVISION_2 +#define VFS_CAP_REVISION_3 0x03000000 +#define VFS_CAP_U32_3 2 +#define XATTR_CAPS_SZ_3 (sizeof(__le32)*(2 + 2*VFS_CAP_U32_3)) + +#define XATTR_CAPS_SZ XATTR_CAPS_SZ_3 +#define VFS_CAP_U32 VFS_CAP_U32_3 +#define VFS_CAP_REVISION VFS_CAP_REVISION_3 struct vfs_cap_data { __le32 magic_etc; /* Little endian */ @@ -72,6 +76,18 @@ struct vfs_cap_data { } data[VFS_CAP_U32]; }; +/* + * same as vfs_cap_data but with a rootid at the end + */ +struct vfs_ns_cap_data { + __le32 magic_etc; + struct { + __le32 permitted; /* Little endian */ + __le32 inheritable; /* Little endian */ + } data[VFS_CAP_U32]; + __le32 rootid; +}; + #ifndef __KERNEL__ /* diff --git a/security/commoncap.c b/security/commoncap.c index d59320282294..c37d27dd1e2c 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -335,6 +335,209 @@ int cap_inode_killpriv(struct dentry *dentry) return error; } +static bool rootid_owns_currentns(kuid_t kroot) +{ + struct user_namespace *ns; + + if (!uid_valid(kroot)) + return false; + + for (ns = current_user_ns(); ; ns = ns->parent) { + if (from_kuid(ns, kroot) == 0) + return true; + if (ns == &init_user_ns) + break; + } + + return false; +} + +static __u32 sansflags(__u32 m) +{ + return m & ~VFS_CAP_FLAGS_EFFECTIVE; +} + +static bool is_v2header(size_t size, __le32 magic) +{ + __u32 m = le32_to_cpu(magic); + if (size != XATTR_CAPS_SZ_2) + return false; + return sansflags(m) == VFS_CAP_REVISION_2; +} + +static bool is_v3header(size_t size, __le32 magic) +{ + __u32 m = le32_to_cpu(magic); + + if (size != XATTR_CAPS_SZ_3) + return false; + return sansflags(m) == VFS_CAP_REVISION_3; +} + +/* + * getsecurity: We are called for security.* before any attempt to read the + * xattr from the inode itself. + * + * This gives us a chance to read the on-disk value and convert it. If we + * return -EOPNOTSUPP, then vfs_getxattr() will call the i_op handler. + * + * Note we are not called by vfs_getxattr_alloc(), but that is only called + * by the integrity subsystem, which really wants the unconverted values - + * so that's good. + */ +int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer, + bool alloc) +{ + int size, ret; + kuid_t kroot; + uid_t root, mappedroot; + char *tmpbuf = NULL; + struct vfs_cap_data *cap; + struct vfs_ns_cap_data *nscap; + struct dentry *dentry; + struct user_namespace *fs_ns; + + if (strcmp(name, "capability") != 0) + return -EOPNOTSUPP; + + dentry = d_find_alias(inode); + if (!dentry) + return -EINVAL; + + size = sizeof(struct vfs_ns_cap_data); + ret = (int) vfs_getxattr_alloc(dentry, XATTR_NAME_CAPS, + &tmpbuf, size, GFP_NOFS); + dput(dentry); + + if (ret < 0) + return ret; + + fs_ns = inode->i_sb->s_user_ns; + cap = (struct vfs_cap_data *) tmpbuf; + if (is_v2header((size_t) ret, cap->magic_etc)) { + /* If this is sizeof(vfs_cap_data) then we're ok with the + * on-disk value, so return that. */ + if (alloc) + *buffer = tmpbuf; + else + kfree(tmpbuf); + return ret; + } else if (!is_v3header((size_t) ret, cap->magic_etc)) { + kfree(tmpbuf); + return -EINVAL; + } + + nscap = (struct vfs_ns_cap_data *) tmpbuf; + root = le32_to_cpu(nscap->rootid); + kroot = make_kuid(fs_ns, root); + + /* If the root kuid maps to a valid uid in current ns, then return + * this as a nscap. */ + mappedroot = from_kuid(current_user_ns(), kroot); + if (mappedroot != (uid_t)-1 && mappedroot != (uid_t)0) { + if (alloc) { + *buffer = tmpbuf; + nscap->rootid = cpu_to_le32(mappedroot); + } else + kfree(tmpbuf); + return size; + } + + if (!rootid_owns_currentns(kroot)) { + kfree(tmpbuf); + return -EOPNOTSUPP; + } + + /* This comes from a parent namespace. Return as a v2 capability */ + size = sizeof(struct vfs_cap_data); + if (alloc) { + *buffer = kmalloc(size, GFP_ATOMIC); + if (*buffer) { + struct vfs_cap_data *cap = *buffer; + __le32 nsmagic, magic; + magic = VFS_CAP_REVISION_2; + nsmagic = le32_to_cpu(nscap->magic_etc); + if (nsmagic & VFS_CAP_FLAGS_EFFECTIVE) + magic |= VFS_CAP_FLAGS_EFFECTIVE; + memcpy(&cap->data, &nscap->data, sizeof(__le32) * 2 * VFS_CAP_U32); + cap->magic_etc = cpu_to_le32(magic); + } + } + kfree(tmpbuf); + return size; +} + +static kuid_t rootid_from_xattr(const void *value, size_t size, + struct user_namespace *task_ns) +{ + const struct vfs_ns_cap_data *nscap = value; + uid_t rootid = 0; + + if (size == XATTR_CAPS_SZ_3) + rootid = le32_to_cpu(nscap->rootid); + + return make_kuid(task_ns, rootid); +} + +static bool validheader(size_t size, __le32 magic) +{ + return is_v2header(size, magic) || is_v3header(size, magic); +} + +/* + * User requested a write of security.capability. If needed, update the + * xattr to change from v2 to v3, or to fixup the v3 rootid. + * + * If all is ok, we return the new size, on error return < 0. + */ +int cap_convert_nscap(struct dentry *dentry, void **ivalue, size_t size) +{ + struct vfs_ns_cap_data *nscap; + uid_t nsrootid; + const struct vfs_cap_data *cap = *ivalue; + __u32 magic, nsmagic; + struct inode *inode = d_backing_inode(dentry); + struct user_namespace *task_ns = current_user_ns(), + *fs_ns = inode->i_sb->s_user_ns; + kuid_t rootid; + size_t newsize; + + if (!*ivalue) + return -EINVAL; + if (!validheader(size, cap->magic_etc)) + return -EINVAL; + if (!capable_wrt_inode_uidgid(inode, CAP_SETFCAP)) + return -EPERM; + if (size == XATTR_CAPS_SZ_2) + if (ns_capable(inode->i_sb->s_user_ns, CAP_SETFCAP)) + /* user is privileged, just write the v2 */ + return size; + + rootid = rootid_from_xattr(*ivalue, size, task_ns); + if (!uid_valid(rootid)) + return -EINVAL; + + nsrootid = from_kuid(fs_ns, rootid); + if (nsrootid == -1) + return -EINVAL; + + newsize = sizeof(struct vfs_ns_cap_data); + nscap = kmalloc(newsize, GFP_ATOMIC); + if (!nscap) + return -ENOMEM; + nscap->rootid = cpu_to_le32(nsrootid); + nsmagic = VFS_CAP_REVISION_3; + magic = le32_to_cpu(cap->magic_etc); + if (magic & VFS_CAP_FLAGS_EFFECTIVE) + nsmagic |= VFS_CAP_FLAGS_EFFECTIVE; + nscap->magic_etc = cpu_to_le32(nsmagic); + memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32); + + kvfree(*ivalue); + *ivalue = nscap; + return newsize; +} + /* * Calculate the new process capability sets from the capability sets attached * to a file. @@ -388,7 +591,10 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data __u32 magic_etc; unsigned tocopy, i; int size; - struct vfs_cap_data caps; + struct vfs_ns_cap_data data, *nscaps = &data; + struct vfs_cap_data *caps = (struct vfs_cap_data *) &data; + kuid_t rootkuid; + struct user_namespace *fs_ns = inode->i_sb->s_user_ns; memset(cpu_caps, 0, sizeof(struct cpu_vfs_cap_data)); @@ -396,18 +602,20 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data return -ENODATA; size = __vfs_getxattr((struct dentry *)dentry, inode, - XATTR_NAME_CAPS, &caps, XATTR_CAPS_SZ); + XATTR_NAME_CAPS, &data, XATTR_CAPS_SZ); if (size == -ENODATA || size == -EOPNOTSUPP) /* no data, that's ok */ return -ENODATA; + if (size < 0) return size; if (size < sizeof(magic_etc)) return -EINVAL; - cpu_caps->magic_etc = magic_etc = le32_to_cpu(caps.magic_etc); + cpu_caps->magic_etc = magic_etc = le32_to_cpu(caps->magic_etc); + rootkuid = make_kuid(fs_ns, 0); switch (magic_etc & VFS_CAP_REVISION_MASK) { case VFS_CAP_REVISION_1: if (size != XATTR_CAPS_SZ_1) @@ -419,15 +627,27 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data return -EINVAL; tocopy = VFS_CAP_U32_2; break; + case VFS_CAP_REVISION_3: + if (size != XATTR_CAPS_SZ_3) + return -EINVAL; + tocopy = VFS_CAP_U32_3; + rootkuid = make_kuid(fs_ns, le32_to_cpu(nscaps->rootid)); + break; + default: return -EINVAL; } + /* Limit the caps to the mounter of the filesystem + * or the more limited uid specified in the xattr. + */ + if (!rootid_owns_currentns(rootkuid)) + return -ENODATA; CAP_FOR_EACH_U32(i) { if (i >= tocopy) break; - cpu_caps->permitted.cap[i] = le32_to_cpu(caps.data[i].permitted); - cpu_caps->inheritable.cap[i] = le32_to_cpu(caps.data[i].inheritable); + cpu_caps->permitted.cap[i] = le32_to_cpu(caps->data[i].permitted); + cpu_caps->inheritable.cap[i] = le32_to_cpu(caps->data[i].inheritable); } cpu_caps->permitted.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK; @@ -465,8 +685,8 @@ static int get_file_caps(struct linux_binprm *bprm, bool *effective, bool *has_c rc = get_vfs_caps_from_disk(bprm->file->f_path.dentry, &vcaps); if (rc < 0) { if (rc == -EINVAL) - printk(KERN_NOTICE "%s: get_vfs_caps_from_disk returned %d for %s\n", - __func__, rc, bprm->filename); + printk(KERN_NOTICE "Invalid argument reading file caps for %s\n", + bprm->filename); else if (rc == -ENODATA) rc = 0; goto out; @@ -663,15 +883,19 @@ int cap_bprm_secureexec(struct linux_binprm *bprm) int cap_inode_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { - if (!strcmp(name, XATTR_NAME_CAPS)) { - if (!capable(CAP_SETFCAP)) - return -EPERM; + /* Ignore non-security xattrs */ + if (strncmp(name, XATTR_SECURITY_PREFIX, + sizeof(XATTR_SECURITY_PREFIX) - 1) != 0) return 0; - } - if (!strncmp(name, XATTR_SECURITY_PREFIX, - sizeof(XATTR_SECURITY_PREFIX) - 1) && - !capable(CAP_SYS_ADMIN)) + /* + * For XATTR_NAME_CAPS the check will be done in + * cap_convert_nscap(), called by setxattr() + */ + if (strcmp(name, XATTR_NAME_CAPS) == 0) + return 0; + + if (!capable(CAP_SYS_ADMIN)) return -EPERM; return 0; } @@ -689,15 +913,22 @@ int cap_inode_setxattr(struct dentry *dentry, const char *name, */ int cap_inode_removexattr(struct dentry *dentry, const char *name) { - if (!strcmp(name, XATTR_NAME_CAPS)) { - if (!capable(CAP_SETFCAP)) + /* Ignore non-security xattrs */ + if (strncmp(name, XATTR_SECURITY_PREFIX, + sizeof(XATTR_SECURITY_PREFIX) - 1) != 0) + return 0; + + if (strcmp(name, XATTR_NAME_CAPS) == 0) { + /* security.capability gets namespaced */ + struct inode *inode = d_backing_inode(dentry); + if (!inode) + return -EINVAL; + if (!capable_wrt_inode_uidgid(inode, CAP_SETFCAP)) return -EPERM; return 0; } - if (!strncmp(name, XATTR_SECURITY_PREFIX, - sizeof(XATTR_SECURITY_PREFIX) - 1) && - !capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN)) return -EPERM; return 0; } @@ -1085,6 +1316,7 @@ struct security_hook_list capability_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(bprm_secureexec, cap_bprm_secureexec), LSM_HOOK_INIT(inode_need_killpriv, cap_inode_need_killpriv), LSM_HOOK_INIT(inode_killpriv, cap_inode_killpriv), + LSM_HOOK_INIT(inode_getsecurity, cap_inode_getsecurity), LSM_HOOK_INIT(mmap_addr, cap_mmap_addr), LSM_HOOK_INIT(mmap_file, cap_mmap_file), LSM_HOOK_INIT(task_fix_setuid, cap_task_fix_setuid),