alistair23-linux/ipc/ipc_sysctl.c
Vasiliy Kulikov b34a6b1da3 ipc: introduce shm_rmid_forced sysctl
Add support for the shm_rmid_forced sysctl.  If set to 1, all shared
memory objects in current ipc namespace will be automatically forced to
use IPC_RMID.

The POSIX way of handling shmem allows one to create shm objects and
call shmdt(), leaving shm object associated with no process, thus
consuming memory not counted via rlimits.

With shm_rmid_forced=1 the shared memory object is counted at least for
one process, so OOM killer may effectively kill the fat process holding
the shared memory.

It obviously breaks POSIX - some programs relying on the feature would
stop working.  So set shm_rmid_forced=1 only if you're sure nobody uses
"orphaned" memory.  Use shm_rmid_forced=0 by default for compatability
reasons.

The feature was previously impemented in -ow as a configure option.

[akpm@linux-foundation.org: fix documentation, per Randy]
[akpm@linux-foundation.org: fix warning]
[akpm@linux-foundation.org: readability/conventionality tweaks]
[akpm@linux-foundation.org: fix shm_rmid_forced/shm_forced_rmid confusion, use standard comment layout]
Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
Cc: Randy Dunlap <rdunlap@xenotime.net>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: "Serge E. Hallyn" <serge.hallyn@canonical.com>
Cc: Daniel Lezcano <daniel.lezcano@free.fr>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Solar Designer <solar@openwall.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2011-07-26 16:49:44 -07:00

249 lines
6 KiB
C

/*
* Copyright (C) 2007
*
* Author: Eric Biederman <ebiederm@xmision.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation, version 2 of the
* License.
*/
#include <linux/module.h>
#include <linux/ipc.h>
#include <linux/nsproxy.h>
#include <linux/sysctl.h>
#include <linux/uaccess.h>
#include <linux/ipc_namespace.h>
#include <linux/msg.h>
#include "util.h"
static void *get_ipc(ctl_table *table)
{
char *which = table->data;
struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
which = (which - (char *)&init_ipc_ns) + (char *)ipc_ns;
return which;
}
#ifdef CONFIG_PROC_SYSCTL
static int proc_ipc_dointvec(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table ipc_table;
memcpy(&ipc_table, table, sizeof(ipc_table));
ipc_table.data = get_ipc(table);
return proc_dointvec(&ipc_table, write, buffer, lenp, ppos);
}
static int proc_ipc_dointvec_minmax(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table ipc_table;
memcpy(&ipc_table, table, sizeof(ipc_table));
ipc_table.data = get_ipc(table);
return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos);
}
static int proc_ipc_dointvec_minmax_orphans(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ipc_namespace *ns = current->nsproxy->ipc_ns;
int err = proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (err < 0)
return err;
if (ns->shm_rmid_forced)
shm_destroy_orphaned(ns);
return err;
}
static int proc_ipc_callback_dointvec(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table ipc_table;
size_t lenp_bef = *lenp;
int rc;
memcpy(&ipc_table, table, sizeof(ipc_table));
ipc_table.data = get_ipc(table);
rc = proc_dointvec(&ipc_table, write, buffer, lenp, ppos);
if (write && !rc && lenp_bef == *lenp)
/*
* Tunable has successfully been changed by hand. Disable its
* automatic adjustment. This simply requires unregistering
* the notifiers that trigger recalculation.
*/
unregister_ipcns_notifier(current->nsproxy->ipc_ns);
return rc;
}
static int proc_ipc_doulongvec_minmax(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table ipc_table;
memcpy(&ipc_table, table, sizeof(ipc_table));
ipc_table.data = get_ipc(table);
return proc_doulongvec_minmax(&ipc_table, write, buffer,
lenp, ppos);
}
/*
* Routine that is called when the file "auto_msgmni" has successfully been
* written.
* Two values are allowed:
* 0: unregister msgmni's callback routine from the ipc namespace notifier
* chain. This means that msgmni won't be recomputed anymore upon memory
* add/remove or ipc namespace creation/removal.
* 1: register back the callback routine.
*/
static void ipc_auto_callback(int val)
{
if (!val)
unregister_ipcns_notifier(current->nsproxy->ipc_ns);
else {
/*
* Re-enable automatic recomputing only if not already
* enabled.
*/
recompute_msgmni(current->nsproxy->ipc_ns);
cond_register_ipcns_notifier(current->nsproxy->ipc_ns);
}
}
static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table ipc_table;
size_t lenp_bef = *lenp;
int oldval;
int rc;
memcpy(&ipc_table, table, sizeof(ipc_table));
ipc_table.data = get_ipc(table);
oldval = *((int *)(ipc_table.data));
rc = proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos);
if (write && !rc && lenp_bef == *lenp) {
int newval = *((int *)(ipc_table.data));
/*
* The file "auto_msgmni" has correctly been set.
* React by (un)registering the corresponding tunable, if the
* value has changed.
*/
if (newval != oldval)
ipc_auto_callback(newval);
}
return rc;
}
#else
#define proc_ipc_doulongvec_minmax NULL
#define proc_ipc_dointvec NULL
#define proc_ipc_dointvec_minmax NULL
#define proc_ipc_dointvec_minmax_orphans NULL
#define proc_ipc_callback_dointvec NULL
#define proc_ipcauto_dointvec_minmax NULL
#endif
static int zero;
static int one = 1;
static struct ctl_table ipc_kern_table[] = {
{
.procname = "shmmax",
.data = &init_ipc_ns.shm_ctlmax,
.maxlen = sizeof (init_ipc_ns.shm_ctlmax),
.mode = 0644,
.proc_handler = proc_ipc_doulongvec_minmax,
},
{
.procname = "shmall",
.data = &init_ipc_ns.shm_ctlall,
.maxlen = sizeof (init_ipc_ns.shm_ctlall),
.mode = 0644,
.proc_handler = proc_ipc_doulongvec_minmax,
},
{
.procname = "shmmni",
.data = &init_ipc_ns.shm_ctlmni,
.maxlen = sizeof (init_ipc_ns.shm_ctlmni),
.mode = 0644,
.proc_handler = proc_ipc_dointvec,
},
{
.procname = "shm_rmid_forced",
.data = &init_ipc_ns.shm_rmid_forced,
.maxlen = sizeof(init_ipc_ns.shm_rmid_forced),
.mode = 0644,
.proc_handler = proc_ipc_dointvec_minmax_orphans,
.extra1 = &zero,
.extra2 = &one,
},
{
.procname = "msgmax",
.data = &init_ipc_ns.msg_ctlmax,
.maxlen = sizeof (init_ipc_ns.msg_ctlmax),
.mode = 0644,
.proc_handler = proc_ipc_dointvec,
},
{
.procname = "msgmni",
.data = &init_ipc_ns.msg_ctlmni,
.maxlen = sizeof (init_ipc_ns.msg_ctlmni),
.mode = 0644,
.proc_handler = proc_ipc_callback_dointvec,
},
{
.procname = "msgmnb",
.data = &init_ipc_ns.msg_ctlmnb,
.maxlen = sizeof (init_ipc_ns.msg_ctlmnb),
.mode = 0644,
.proc_handler = proc_ipc_dointvec,
},
{
.procname = "sem",
.data = &init_ipc_ns.sem_ctls,
.maxlen = 4*sizeof (int),
.mode = 0644,
.proc_handler = proc_ipc_dointvec,
},
{
.procname = "auto_msgmni",
.data = &init_ipc_ns.auto_msgmni,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_ipcauto_dointvec_minmax,
.extra1 = &zero,
.extra2 = &one,
},
{}
};
static struct ctl_table ipc_root_table[] = {
{
.procname = "kernel",
.mode = 0555,
.child = ipc_kern_table,
},
{}
};
static int __init ipc_sysctl_init(void)
{
register_sysctl_table(ipc_root_table);
return 0;
}
__initcall(ipc_sysctl_init);