diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index f2bab4d2fc40..07350d7b8d96 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -24,11 +24,13 @@ enum { }; /* Flags for set_mempolicy */ +#define MPOL_F_STATIC_NODES (1 << 15) + /* * MPOL_MODE_FLAGS is the union of all possible optional mode flags passed to * either set_mempolicy() or mbind(). */ -#define MPOL_MODE_FLAGS (0) +#define MPOL_MODE_FLAGS (MPOL_F_STATIC_NODES) /* Flags for get_mempolicy */ #define MPOL_F_NODE (1<<0) /* return next IL mode instead of node mask */ @@ -85,7 +87,10 @@ struct mempolicy { nodemask_t nodes; /* interleave/bind */ /* undefined for default */ } v; - nodemask_t cpuset_mems_allowed; /* mempolicy relative to these nodes */ + union { + nodemask_t cpuset_mems_allowed; /* relative to these nodes */ + nodemask_t user_nodemask; /* nodemask passed by user */ + } w; }; /* @@ -124,7 +129,6 @@ static inline int mpol_equal(struct mempolicy *a, struct mempolicy *b) return 1; return __mpol_equal(a, b); } -#define vma_mpol_equal(a,b) mpol_equal(vma_policy(a), vma_policy(b)) /* Could later add inheritance of the process policy here. */ @@ -190,7 +194,6 @@ static inline int mpol_equal(struct mempolicy *a, struct mempolicy *b) { return 1; } -#define vma_mpol_equal(a,b) 1 #define mpol_set_vma_default(vma) do {} while(0) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 1f6ff9c1bbc3..d59b1e766aee 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -113,58 +113,6 @@ struct mempolicy default_policy = { static void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask); -/* Do sanity checking on a policy */ -static int mpol_check_policy(unsigned short mode, nodemask_t *nodes) -{ - int was_empty, is_empty; - - if (!nodes) - return 0; - - /* - * "Contextualize" the in-coming nodemast for cpusets: - * Remember whether in-coming nodemask was empty, If not, - * restrict the nodes to the allowed nodes in the cpuset. - * This is guaranteed to be a subset of nodes with memory. - */ - cpuset_update_task_memory_state(); - is_empty = was_empty = nodes_empty(*nodes); - if (!was_empty) { - nodes_and(*nodes, *nodes, cpuset_current_mems_allowed); - is_empty = nodes_empty(*nodes); /* after "contextualization" */ - } - - switch (mode) { - case MPOL_DEFAULT: - /* - * require caller to specify an empty nodemask - * before "contextualization" - */ - if (!was_empty) - return -EINVAL; - break; - case MPOL_BIND: - case MPOL_INTERLEAVE: - /* - * require at least 1 valid node after "contextualization" - */ - if (is_empty) - return -EINVAL; - break; - case MPOL_PREFERRED: - /* - * Did caller specify invalid nodes? - * Don't silently accept this as "local allocation". - */ - if (!was_empty && is_empty) - return -EINVAL; - break; - default: - BUG(); - } - return 0; -} - /* Check that the nodemask contains at least one populated zone */ static int is_valid_nodemask(nodemask_t *nodemask) { @@ -186,48 +134,60 @@ static int is_valid_nodemask(nodemask_t *nodemask) return 0; } +static inline int mpol_store_user_nodemask(const struct mempolicy *pol) +{ + return pol->flags & MPOL_F_STATIC_NODES; +} + /* Create a new policy */ static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, nodemask_t *nodes) { struct mempolicy *policy; + nodemask_t cpuset_context_nmask; pr_debug("setting mode %d flags %d nodes[0] %lx\n", mode, flags, nodes ? nodes_addr(*nodes)[0] : -1); if (mode == MPOL_DEFAULT) - return NULL; + return (nodes && nodes_weight(*nodes)) ? ERR_PTR(-EINVAL) : + NULL; policy = kmem_cache_alloc(policy_cache, GFP_KERNEL); if (!policy) return ERR_PTR(-ENOMEM); atomic_set(&policy->refcnt, 1); + cpuset_update_task_memory_state(); + nodes_and(cpuset_context_nmask, *nodes, cpuset_current_mems_allowed); switch (mode) { case MPOL_INTERLEAVE: - policy->v.nodes = *nodes; - if (nodes_weight(policy->v.nodes) == 0) { - kmem_cache_free(policy_cache, policy); - return ERR_PTR(-EINVAL); - } + if (nodes_empty(*nodes) || nodes_empty(cpuset_context_nmask)) + goto free; + policy->v.nodes = cpuset_context_nmask; break; case MPOL_PREFERRED: - policy->v.preferred_node = first_node(*nodes); + policy->v.preferred_node = first_node(cpuset_context_nmask); if (policy->v.preferred_node >= MAX_NUMNODES) - policy->v.preferred_node = -1; + goto free; break; case MPOL_BIND: - if (!is_valid_nodemask(nodes)) { - kmem_cache_free(policy_cache, policy); - return ERR_PTR(-EINVAL); - } - policy->v.nodes = *nodes; + if (!is_valid_nodemask(&cpuset_context_nmask)) + goto free; + policy->v.nodes = cpuset_context_nmask; break; default: BUG(); } policy->policy = mode; policy->flags = flags; - policy->cpuset_mems_allowed = cpuset_mems_allowed(current); + if (mpol_store_user_nodemask(policy)) + policy->w.user_nodemask = *nodes; + else + policy->w.cpuset_mems_allowed = cpuset_mems_allowed(current); return policy; + +free: + kmem_cache_free(policy_cache, policy); + return ERR_PTR(-EINVAL); } static void gather_stats(struct page *, void *, int pte_dirty); @@ -473,15 +433,14 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags, { struct mempolicy *new; - if (mpol_check_policy(mode, nodes)) - return -EINVAL; new = mpol_new(mode, flags, nodes); if (IS_ERR(new)) return PTR_ERR(new); mpol_free(current->mempolicy); current->mempolicy = new; mpol_set_task_struct_flag(); - if (new && new->policy == MPOL_INTERLEAVE) + if (new && new->policy == MPOL_INTERLEAVE && + nodes_weight(new->v.nodes)) current->il_next = first_node(new->v.nodes); return 0; } @@ -796,9 +755,6 @@ static long do_mbind(unsigned long start, unsigned long len, if (end == start) return 0; - if (mpol_check_policy(mode, nmask)) - return -EINVAL; - new = mpol_new(mode, mode_flags, nmask); if (IS_ERR(new)) return PTR_ERR(new); @@ -1206,7 +1162,8 @@ static unsigned interleave_nodes(struct mempolicy *policy) next = next_node(nid, policy->v.nodes); if (next >= MAX_NUMNODES) next = first_node(policy->v.nodes); - me->il_next = next; + if (next < MAX_NUMNODES) + me->il_next = next; return nid; } @@ -1252,10 +1209,13 @@ static unsigned offset_il_node(struct mempolicy *pol, struct vm_area_struct *vma, unsigned long off) { unsigned nnodes = nodes_weight(pol->v.nodes); - unsigned target = (unsigned)off % nnodes; + unsigned target; int c; int nid = -1; + if (!nnodes) + return numa_node_id(); + target = (unsigned int)off % nnodes; c = 0; do { nid = next_node(nid, pol->v.nodes); @@ -1465,6 +1425,16 @@ struct mempolicy *__mpol_copy(struct mempolicy *old) return new; } +static int mpol_match_intent(const struct mempolicy *a, + const struct mempolicy *b) +{ + if (a->flags != b->flags) + return 0; + if (!mpol_store_user_nodemask(a)) + return 1; + return nodes_equal(a->w.user_nodemask, b->w.user_nodemask); +} + /* Slow path of a mempolicy comparison */ int __mpol_equal(struct mempolicy *a, struct mempolicy *b) { @@ -1472,6 +1442,8 @@ int __mpol_equal(struct mempolicy *a, struct mempolicy *b) return 0; if (a->policy != b->policy) return 0; + if (a->policy != MPOL_DEFAULT && !mpol_match_intent(a, b)) + return 0; switch (a->policy) { case MPOL_DEFAULT: return 1; @@ -1771,13 +1743,14 @@ void numa_default_policy(void) static void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask) { - nodemask_t *mpolmask; nodemask_t tmp; + int static_nodes; if (!pol) return; - mpolmask = &pol->cpuset_mems_allowed; - if (nodes_equal(*mpolmask, *newmask)) + static_nodes = pol->flags & MPOL_F_STATIC_NODES; + if (!mpol_store_user_nodemask(pol) && + nodes_equal(pol->w.cpuset_mems_allowed, *newmask)) return; switch (pol->policy) { @@ -1786,16 +1759,35 @@ static void mpol_rebind_policy(struct mempolicy *pol, case MPOL_BIND: /* Fall through */ case MPOL_INTERLEAVE: - nodes_remap(tmp, pol->v.nodes, *mpolmask, *newmask); + if (static_nodes) + nodes_and(tmp, pol->w.user_nodemask, *newmask); + else { + nodes_remap(tmp, pol->v.nodes, + pol->w.cpuset_mems_allowed, *newmask); + pol->w.cpuset_mems_allowed = *newmask; + } pol->v.nodes = tmp; - *mpolmask = *newmask; - current->il_next = node_remap(current->il_next, - *mpolmask, *newmask); + if (!node_isset(current->il_next, tmp)) { + current->il_next = next_node(current->il_next, tmp); + if (current->il_next >= MAX_NUMNODES) + current->il_next = first_node(tmp); + if (current->il_next >= MAX_NUMNODES) + current->il_next = numa_node_id(); + } break; case MPOL_PREFERRED: - pol->v.preferred_node = node_remap(pol->v.preferred_node, - *mpolmask, *newmask); - *mpolmask = *newmask; + if (static_nodes) { + int node = first_node(pol->w.user_nodemask); + + if (node_isset(node, *newmask)) + pol->v.preferred_node = node; + else + pol->v.preferred_node = -1; + } else { + pol->v.preferred_node = node_remap(pol->v.preferred_node, + pol->w.cpuset_mems_allowed, *newmask); + pol->w.cpuset_mems_allowed = *newmask; + } break; default: BUG(); @@ -1847,6 +1839,7 @@ static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol) int l; nodemask_t nodes; unsigned short mode = pol ? pol->policy : MPOL_DEFAULT; + unsigned short flags = pol ? pol->flags : 0; switch (mode) { case MPOL_DEFAULT: @@ -1876,6 +1869,17 @@ static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol) strcpy(p, policy_types[mode]); p += l; + if (flags) { + int need_bar = 0; + + if (buffer + maxlen < p + 2) + return -ENOSPC; + *p++ = '='; + + if (flags & MPOL_F_STATIC_NODES) + p += sprintf(p, "%sstatic", need_bar++ ? "|" : ""); + } + if (!nodes_empty(nodes)) { if (buffer + maxlen < p + 2) return -ENOSPC; diff --git a/mm/shmem.c b/mm/shmem.c index 1ccf794fbe61..3e9fda0ca470 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1126,6 +1126,8 @@ static int shmem_parse_mpol(char *value, unsigned short *policy, err = 0; } if (flags) { + if (!strcmp(flags, "static")) + *mode_flags |= MPOL_F_STATIC_NODES; } out: /* Restore string for error message */