remarkable-linux/include/linux/mempolicy.h
David Rientjes 028fec414d mempolicy: support optional mode flags
With the evolution of mempolicies, it is necessary to support mempolicy mode
flags that specify how the policy shall behave in certain circumstances.  The
most immediate need for mode flag support is to suppress remapping the
nodemask of a policy at the time of rebind.

Both the mempolicy mode and flags are passed by the user in the 'int policy'
formal of either the set_mempolicy() or mbind() syscall.  A new constant,
MPOL_MODE_FLAGS, represents the union of legal optional flags that may be
passed as part of this int.  Mempolicies that include illegal flags as part of
their policy are rejected as invalid.

An additional member to struct mempolicy is added to support the mode flags:

	struct mempolicy {
		...
		unsigned short policy;
		unsigned short flags;
	}

The splitting of the 'int' actual passed by the user is done in
sys_set_mempolicy() and sys_mbind() for their respective syscalls.  This is
done by intersecting the actual with MPOL_MODE_FLAGS, rejecting the syscall of
there are additional flags, and storing it in the new 'flags' member of struct
mempolicy.  The intersection of the actual with ~MPOL_MODE_FLAGS is stored in
the 'policy' member of the struct and all current users of pol->policy remain
unchanged.

The union of the policy mode and optional mode flags is passed back to the
user in get_mempolicy().

This combination of mode and flags within the same actual does not break
userspace code that relies on get_mempolicy(&policy, ...) and either

	switch (policy) {
	case MPOL_BIND:
		...
	case MPOL_INTERLEAVE:
		...
	};

statements or

	if (policy == MPOL_INTERLEAVE) {
		...
	}

statements.  Such applications would need to use optional mode flags when
calling set_mempolicy() or mbind() for these previously implemented statements
to stop working.  If an application does start using optional mode flags, it
will need to mask the optional flags off the policy in switch and conditional
statements that only test mode.

An additional member is also added to struct shmem_sb_info to store the
optional mode flags.

[hugh@veritas.com: shmem mpol: fix build warning]
Cc: Paul Jackson <pj@sgi.com>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-28 08:58:19 -07:00

281 lines
7.1 KiB
C

#ifndef _LINUX_MEMPOLICY_H
#define _LINUX_MEMPOLICY_H 1
#include <linux/errno.h>
/*
* NUMA memory policies for Linux.
* Copyright 2003,2004 Andi Kleen SuSE Labs
*/
/*
* Both the MPOL_* mempolicy mode and the MPOL_F_* optional mode flags are
* passed by the user to either set_mempolicy() or mbind() in an 'int' actual.
* The MPOL_MODE_FLAGS macro determines the legal set of optional mode flags.
*/
/* Policies */
enum {
MPOL_DEFAULT,
MPOL_PREFERRED,
MPOL_BIND,
MPOL_INTERLEAVE,
MPOL_MAX, /* always last member of enum */
};
/* Flags for set_mempolicy */
/*
* MPOL_MODE_FLAGS is the union of all possible optional mode flags passed to
* either set_mempolicy() or mbind().
*/
#define MPOL_MODE_FLAGS (0)
/* Flags for get_mempolicy */
#define MPOL_F_NODE (1<<0) /* return next IL mode instead of node mask */
#define MPOL_F_ADDR (1<<1) /* look up vma using address */
#define MPOL_F_MEMS_ALLOWED (1<<2) /* return allowed memories */
/* Flags for mbind */
#define MPOL_MF_STRICT (1<<0) /* Verify existing pages in the mapping */
#define MPOL_MF_MOVE (1<<1) /* Move pages owned by this process to conform to mapping */
#define MPOL_MF_MOVE_ALL (1<<2) /* Move every page to conform to mapping */
#define MPOL_MF_INTERNAL (1<<3) /* Internal flags start here */
#ifdef __KERNEL__
#include <linux/mmzone.h>
#include <linux/slab.h>
#include <linux/rbtree.h>
#include <linux/spinlock.h>
#include <linux/nodemask.h>
struct vm_area_struct;
struct mm_struct;
#ifdef CONFIG_NUMA
/*
* Describe a memory policy.
*
* A mempolicy can be either associated with a process or with a VMA.
* For VMA related allocations the VMA policy is preferred, otherwise
* the process policy is used. Interrupts ignore the memory policy
* of the current process.
*
* Locking policy for interlave:
* In process context there is no locking because only the process accesses
* its own state. All vma manipulation is somewhat protected by a down_read on
* mmap_sem.
*
* Freeing policy:
* Mempolicy objects are reference counted. A mempolicy will be freed when
* mpol_free() decrements the reference count to zero.
*
* Copying policy objects:
* mpol_copy() allocates a new mempolicy and copies the specified mempolicy
* to the new storage. The reference count of the new object is initialized
* to 1, representing the caller of mpol_copy().
*/
struct mempolicy {
atomic_t refcnt;
unsigned short policy; /* See MPOL_* above */
unsigned short flags; /* See set_mempolicy() MPOL_F_* above */
union {
short preferred_node; /* preferred */
nodemask_t nodes; /* interleave/bind */
/* undefined for default */
} v;
nodemask_t cpuset_mems_allowed; /* mempolicy relative to these nodes */
};
/*
* Support for managing mempolicy data objects (clone, copy, destroy)
* The default fast path of a NULL MPOL_DEFAULT policy is always inlined.
*/
extern void __mpol_free(struct mempolicy *pol);
static inline void mpol_free(struct mempolicy *pol)
{
if (pol)
__mpol_free(pol);
}
extern struct mempolicy *__mpol_copy(struct mempolicy *pol);
static inline struct mempolicy *mpol_copy(struct mempolicy *pol)
{
if (pol)
pol = __mpol_copy(pol);
return pol;
}
#define vma_policy(vma) ((vma)->vm_policy)
#define vma_set_policy(vma, pol) ((vma)->vm_policy = (pol))
static inline void mpol_get(struct mempolicy *pol)
{
if (pol)
atomic_inc(&pol->refcnt);
}
extern int __mpol_equal(struct mempolicy *a, struct mempolicy *b);
static inline int mpol_equal(struct mempolicy *a, struct mempolicy *b)
{
if (a == b)
return 1;
return __mpol_equal(a, b);
}
#define vma_mpol_equal(a,b) mpol_equal(vma_policy(a), vma_policy(b))
/* Could later add inheritance of the process policy here. */
#define mpol_set_vma_default(vma) ((vma)->vm_policy = NULL)
/*
* Tree of shared policies for a shared memory region.
* Maintain the policies in a pseudo mm that contains vmas. The vmas
* carry the policy. As a special twist the pseudo mm is indexed in pages, not
* bytes, so that we can work with shared memory segments bigger than
* unsigned long.
*/
struct sp_node {
struct rb_node nd;
unsigned long start, end;
struct mempolicy *policy;
};
struct shared_policy {
struct rb_root root;
spinlock_t lock;
};
void mpol_shared_policy_init(struct shared_policy *info, unsigned short policy,
unsigned short flags, nodemask_t *nodes);
int mpol_set_shared_policy(struct shared_policy *info,
struct vm_area_struct *vma,
struct mempolicy *new);
void mpol_free_shared_policy(struct shared_policy *p);
struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
unsigned long idx);
extern void numa_default_policy(void);
extern void numa_policy_init(void);
extern void mpol_rebind_task(struct task_struct *tsk,
const nodemask_t *new);
extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new);
extern void mpol_fix_fork_child_flag(struct task_struct *p);
extern struct mempolicy default_policy;
extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
unsigned long addr, gfp_t gfp_flags,
struct mempolicy **mpol, nodemask_t **nodemask);
extern unsigned slab_node(struct mempolicy *policy);
extern enum zone_type policy_zone;
static inline void check_highest_zone(enum zone_type k)
{
if (k > policy_zone && k != ZONE_MOVABLE)
policy_zone = k;
}
int do_migrate_pages(struct mm_struct *mm,
const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags);
#else
struct mempolicy {};
static inline int mpol_equal(struct mempolicy *a, struct mempolicy *b)
{
return 1;
}
#define vma_mpol_equal(a,b) 1
#define mpol_set_vma_default(vma) do {} while(0)
static inline void mpol_free(struct mempolicy *p)
{
}
static inline void mpol_get(struct mempolicy *pol)
{
}
static inline struct mempolicy *mpol_copy(struct mempolicy *old)
{
return NULL;
}
struct shared_policy {};
static inline int mpol_set_shared_policy(struct shared_policy *info,
struct vm_area_struct *vma,
struct mempolicy *new)
{
return -EINVAL;
}
static inline void mpol_shared_policy_init(struct shared_policy *info,
unsigned short policy, unsigned short flags, nodemask_t *nodes)
{
}
static inline void mpol_free_shared_policy(struct shared_policy *p)
{
}
static inline struct mempolicy *
mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx)
{
return NULL;
}
#define vma_policy(vma) NULL
#define vma_set_policy(vma, pol) do {} while(0)
static inline void numa_policy_init(void)
{
}
static inline void numa_default_policy(void)
{
}
static inline void mpol_rebind_task(struct task_struct *tsk,
const nodemask_t *new)
{
}
static inline void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
{
}
static inline void mpol_fix_fork_child_flag(struct task_struct *p)
{
}
static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
unsigned long addr, gfp_t gfp_flags,
struct mempolicy **mpol, nodemask_t **nodemask)
{
*mpol = NULL;
*nodemask = NULL;
return node_zonelist(0, gfp_flags);
}
static inline int do_migrate_pages(struct mm_struct *mm,
const nodemask_t *from_nodes,
const nodemask_t *to_nodes, int flags)
{
return 0;
}
static inline void check_highest_zone(int k)
{
}
#endif /* CONFIG_NUMA */
#endif /* __KERNEL__ */
#endif