alistair23-linux/fs/proc/proc_sysctl.c
Christoph Hellwig 85fe4025c6 fs: do not assign default i_ino in new_inode
Instead of always assigning an increasing inode number in new_inode
move the call to assign it into those callers that actually need it.
For now callers that need it is estimated conservatively, that is
the call is added to all filesystems that do not assign an i_ino
by themselves.  For a few more filesystems we can avoid assigning
any inode number given that they aren't user visible, and for others
it could be done lazily when an inode number is actually needed,
but that's left for later patches.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2010-10-25 21:26:11 -04:00

427 lines
9.8 KiB
C

/*
* /proc/sys support
*/
#include <linux/init.h>
#include <linux/sysctl.h>
#include <linux/proc_fs.h>
#include <linux/security.h>
#include "internal.h"
static const struct dentry_operations proc_sys_dentry_operations;
static const struct file_operations proc_sys_file_operations;
static const struct inode_operations proc_sys_inode_operations;
static const struct file_operations proc_sys_dir_file_operations;
static const struct inode_operations proc_sys_dir_operations;
static struct inode *proc_sys_make_inode(struct super_block *sb,
struct ctl_table_header *head, struct ctl_table *table)
{
struct inode *inode;
struct proc_inode *ei;
inode = new_inode(sb);
if (!inode)
goto out;
inode->i_ino = get_next_ino();
sysctl_head_get(head);
ei = PROC_I(inode);
ei->sysctl = head;
ei->sysctl_entry = table;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
inode->i_flags |= S_PRIVATE; /* tell selinux to ignore this inode */
inode->i_mode = table->mode;
if (!table->child) {
inode->i_mode |= S_IFREG;
inode->i_op = &proc_sys_inode_operations;
inode->i_fop = &proc_sys_file_operations;
} else {
inode->i_mode |= S_IFDIR;
inode->i_nlink = 0;
inode->i_op = &proc_sys_dir_operations;
inode->i_fop = &proc_sys_dir_file_operations;
}
out:
return inode;
}
static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name)
{
int len;
for ( ; p->procname; p++) {
if (!p->procname)
continue;
len = strlen(p->procname);
if (len != name->len)
continue;
if (memcmp(p->procname, name->name, len) != 0)
continue;
/* I have a match */
return p;
}
return NULL;
}
static struct ctl_table_header *grab_header(struct inode *inode)
{
if (PROC_I(inode)->sysctl)
return sysctl_head_grab(PROC_I(inode)->sysctl);
else
return sysctl_head_next(NULL);
}
static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
struct nameidata *nd)
{
struct ctl_table_header *head = grab_header(dir);
struct ctl_table *table = PROC_I(dir)->sysctl_entry;
struct ctl_table_header *h = NULL;
struct qstr *name = &dentry->d_name;
struct ctl_table *p;
struct inode *inode;
struct dentry *err = ERR_PTR(-ENOENT);
if (IS_ERR(head))
return ERR_CAST(head);
if (table && !table->child) {
WARN_ON(1);
goto out;
}
table = table ? table->child : head->ctl_table;
p = find_in_table(table, name);
if (!p) {
for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) {
if (h->attached_to != table)
continue;
p = find_in_table(h->attached_by, name);
if (p)
break;
}
}
if (!p)
goto out;
err = ERR_PTR(-ENOMEM);
inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p);
if (h)
sysctl_head_finish(h);
if (!inode)
goto out;
err = NULL;
dentry->d_op = &proc_sys_dentry_operations;
d_add(dentry, inode);
out:
sysctl_head_finish(head);
return err;
}
static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
size_t count, loff_t *ppos, int write)
{
struct inode *inode = filp->f_path.dentry->d_inode;
struct ctl_table_header *head = grab_header(inode);
struct ctl_table *table = PROC_I(inode)->sysctl_entry;
ssize_t error;
size_t res;
if (IS_ERR(head))
return PTR_ERR(head);
/*
* At this point we know that the sysctl was not unregistered
* and won't be until we finish.
*/
error = -EPERM;
if (sysctl_perm(head->root, table, write ? MAY_WRITE : MAY_READ))
goto out;
/* if that can happen at all, it should be -EINVAL, not -EISDIR */
error = -EINVAL;
if (!table->proc_handler)
goto out;
/* careful: calling conventions are nasty here */
res = count;
error = table->proc_handler(table, write, buf, &res, ppos);
if (!error)
error = res;
out:
sysctl_head_finish(head);
return error;
}
static ssize_t proc_sys_read(struct file *filp, char __user *buf,
size_t count, loff_t *ppos)
{
return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 0);
}
static ssize_t proc_sys_write(struct file *filp, const char __user *buf,
size_t count, loff_t *ppos)
{
return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 1);
}
static int proc_sys_fill_cache(struct file *filp, void *dirent,
filldir_t filldir,
struct ctl_table_header *head,
struct ctl_table *table)
{
struct dentry *child, *dir = filp->f_path.dentry;
struct inode *inode;
struct qstr qname;
ino_t ino = 0;
unsigned type = DT_UNKNOWN;
qname.name = table->procname;
qname.len = strlen(table->procname);
qname.hash = full_name_hash(qname.name, qname.len);
child = d_lookup(dir, &qname);
if (!child) {
child = d_alloc(dir, &qname);
if (child) {
inode = proc_sys_make_inode(dir->d_sb, head, table);
if (!inode) {
dput(child);
return -ENOMEM;
} else {
child->d_op = &proc_sys_dentry_operations;
d_add(child, inode);
}
} else {
return -ENOMEM;
}
}
inode = child->d_inode;
ino = inode->i_ino;
type = inode->i_mode >> 12;
dput(child);
return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type);
}
static int scan(struct ctl_table_header *head, ctl_table *table,
unsigned long *pos, struct file *file,
void *dirent, filldir_t filldir)
{
for (; table->procname; table++, (*pos)++) {
int res;
/* Can't do anything without a proc name */
if (!table->procname)
continue;
if (*pos < file->f_pos)
continue;
res = proc_sys_fill_cache(file, dirent, filldir, head, table);
if (res)
return res;
file->f_pos = *pos + 1;
}
return 0;
}
static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
{
struct dentry *dentry = filp->f_path.dentry;
struct inode *inode = dentry->d_inode;
struct ctl_table_header *head = grab_header(inode);
struct ctl_table *table = PROC_I(inode)->sysctl_entry;
struct ctl_table_header *h = NULL;
unsigned long pos;
int ret = -EINVAL;
if (IS_ERR(head))
return PTR_ERR(head);
if (table && !table->child) {
WARN_ON(1);
goto out;
}
table = table ? table->child : head->ctl_table;
ret = 0;
/* Avoid a switch here: arm builds fail with missing __cmpdi2 */
if (filp->f_pos == 0) {
if (filldir(dirent, ".", 1, filp->f_pos,
inode->i_ino, DT_DIR) < 0)
goto out;
filp->f_pos++;
}
if (filp->f_pos == 1) {
if (filldir(dirent, "..", 2, filp->f_pos,
parent_ino(dentry), DT_DIR) < 0)
goto out;
filp->f_pos++;
}
pos = 2;
ret = scan(head, table, &pos, filp, dirent, filldir);
if (ret)
goto out;
for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) {
if (h->attached_to != table)
continue;
ret = scan(h, h->attached_by, &pos, filp, dirent, filldir);
if (ret) {
sysctl_head_finish(h);
break;
}
}
ret = 1;
out:
sysctl_head_finish(head);
return ret;
}
static int proc_sys_permission(struct inode *inode, int mask)
{
/*
* sysctl entries that are not writeable,
* are _NOT_ writeable, capabilities or not.
*/
struct ctl_table_header *head;
struct ctl_table *table;
int error;
/* Executable files are not allowed under /proc/sys/ */
if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))
return -EACCES;
head = grab_header(inode);
if (IS_ERR(head))
return PTR_ERR(head);
table = PROC_I(inode)->sysctl_entry;
if (!table) /* global root - r-xr-xr-x */
error = mask & MAY_WRITE ? -EACCES : 0;
else /* Use the permissions on the sysctl table entry */
error = sysctl_perm(head->root, table, mask);
sysctl_head_finish(head);
return error;
}
static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = dentry->d_inode;
int error;
if (attr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID))
return -EPERM;
error = inode_change_ok(inode, attr);
if (error)
return error;
if ((attr->ia_valid & ATTR_SIZE) &&
attr->ia_size != i_size_read(inode)) {
error = vmtruncate(inode, attr->ia_size);
if (error)
return error;
}
setattr_copy(inode, attr);
mark_inode_dirty(inode);
return 0;
}
static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
{
struct inode *inode = dentry->d_inode;
struct ctl_table_header *head = grab_header(inode);
struct ctl_table *table = PROC_I(inode)->sysctl_entry;
if (IS_ERR(head))
return PTR_ERR(head);
generic_fillattr(inode, stat);
if (table)
stat->mode = (stat->mode & S_IFMT) | table->mode;
sysctl_head_finish(head);
return 0;
}
static const struct file_operations proc_sys_file_operations = {
.read = proc_sys_read,
.write = proc_sys_write,
.llseek = default_llseek,
};
static const struct file_operations proc_sys_dir_file_operations = {
.readdir = proc_sys_readdir,
.llseek = generic_file_llseek,
};
static const struct inode_operations proc_sys_inode_operations = {
.permission = proc_sys_permission,
.setattr = proc_sys_setattr,
.getattr = proc_sys_getattr,
};
static const struct inode_operations proc_sys_dir_operations = {
.lookup = proc_sys_lookup,
.permission = proc_sys_permission,
.setattr = proc_sys_setattr,
.getattr = proc_sys_getattr,
};
static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd)
{
return !PROC_I(dentry->d_inode)->sysctl->unregistering;
}
static int proc_sys_delete(struct dentry *dentry)
{
return !!PROC_I(dentry->d_inode)->sysctl->unregistering;
}
static int proc_sys_compare(struct dentry *dir, struct qstr *qstr,
struct qstr *name)
{
struct dentry *dentry = container_of(qstr, struct dentry, d_name);
if (qstr->len != name->len)
return 1;
if (memcmp(qstr->name, name->name, name->len))
return 1;
return !sysctl_is_seen(PROC_I(dentry->d_inode)->sysctl);
}
static const struct dentry_operations proc_sys_dentry_operations = {
.d_revalidate = proc_sys_revalidate,
.d_delete = proc_sys_delete,
.d_compare = proc_sys_compare,
};
int __init proc_sys_init(void)
{
struct proc_dir_entry *proc_sys_root;
proc_sys_root = proc_mkdir("sys", NULL);
proc_sys_root->proc_iops = &proc_sys_dir_operations;
proc_sys_root->proc_fops = &proc_sys_dir_file_operations;
proc_sys_root->nlink = 0;
return 0;
}