alistair23-linux/include/linux/mman.h
Dan Williams 1c97259740 mm: introduce MAP_SHARED_VALIDATE, a mechanism to safely define new mmap flags
The mmap(2) syscall suffers from the ABI anti-pattern of not validating
unknown flags. However, proposals like MAP_SYNC need a mechanism to
define new behavior that is known to fail on older kernels without the
support. Define a new MAP_SHARED_VALIDATE flag pattern that is
guaranteed to fail on all legacy mmap implementations.

It is worth noting that the original proposal was for a standalone
MAP_VALIDATE flag. However, when that  could not be supported by all
archs Linus observed:

    I see why you *think* you want a bitmap. You think you want
    a bitmap because you want to make MAP_VALIDATE be part of MAP_SYNC
    etc, so that people can do

    ret = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED
		    | MAP_SYNC, fd, 0);

    and "know" that MAP_SYNC actually takes.

    And I'm saying that whole wish is bogus. You're fundamentally
    depending on special semantics, just make it explicit. It's already
    not portable, so don't try to make it so.

    Rename that MAP_VALIDATE as MAP_SHARED_VALIDATE, make it have a value
    of 0x3, and make people do

    ret = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED_VALIDATE
		    | MAP_SYNC, fd, 0);

    and then the kernel side is easier too (none of that random garbage
    playing games with looking at the "MAP_VALIDATE bit", but just another
    case statement in that map type thing.

    Boom. Done.

Similar to ->fallocate() we also want the ability to validate the
support for new flags on a per ->mmap() 'struct file_operations'
instance basis.  Towards that end arrange for flags to be generically
validated against a mmap_supported_flags exported by 'struct
file_operations'. By default all existing flags are implicitly
supported, but new flags require MAP_SHARED_VALIDATE and
per-instance-opt-in.

Cc: Jan Kara <jack@suse.cz>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Suggested-by: Christoph Hellwig <hch@lst.de>
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2017-11-03 06:26:22 -07:00

134 lines
3.1 KiB
C

#ifndef _LINUX_MMAN_H
#define _LINUX_MMAN_H
#include <linux/mm.h>
#include <linux/percpu_counter.h>
#include <linux/atomic.h>
#include <uapi/linux/mman.h>
/*
* Arrange for legacy / undefined architecture specific flags to be
* ignored by default in LEGACY_MAP_MASK.
*/
#ifndef MAP_32BIT
#define MAP_32BIT 0
#endif
#ifndef MAP_HUGE_2MB
#define MAP_HUGE_2MB 0
#endif
#ifndef MAP_HUGE_1GB
#define MAP_HUGE_1GB 0
#endif
#ifndef MAP_UNINITIALIZED
#define MAP_UNINITIALIZED 0
#endif
/*
* The historical set of flags that all mmap implementations implicitly
* support when a ->mmap_validate() op is not provided in file_operations.
*/
#define LEGACY_MAP_MASK (MAP_SHARED \
| MAP_PRIVATE \
| MAP_FIXED \
| MAP_ANONYMOUS \
| MAP_DENYWRITE \
| MAP_EXECUTABLE \
| MAP_UNINITIALIZED \
| MAP_GROWSDOWN \
| MAP_LOCKED \
| MAP_NORESERVE \
| MAP_POPULATE \
| MAP_NONBLOCK \
| MAP_STACK \
| MAP_HUGETLB \
| MAP_32BIT \
| MAP_HUGE_2MB \
| MAP_HUGE_1GB)
extern int sysctl_overcommit_memory;
extern int sysctl_overcommit_ratio;
extern unsigned long sysctl_overcommit_kbytes;
extern struct percpu_counter vm_committed_as;
#ifdef CONFIG_SMP
extern s32 vm_committed_as_batch;
#else
#define vm_committed_as_batch 0
#endif
unsigned long vm_memory_committed(void);
static inline void vm_acct_memory(long pages)
{
percpu_counter_add_batch(&vm_committed_as, pages, vm_committed_as_batch);
}
static inline void vm_unacct_memory(long pages)
{
vm_acct_memory(-pages);
}
/*
* Allow architectures to handle additional protection bits
*/
#ifndef arch_calc_vm_prot_bits
#define arch_calc_vm_prot_bits(prot, pkey) 0
#endif
#ifndef arch_vm_get_page_prot
#define arch_vm_get_page_prot(vm_flags) __pgprot(0)
#endif
#ifndef arch_validate_prot
/*
* This is called from mprotect(). PROT_GROWSDOWN and PROT_GROWSUP have
* already been masked out.
*
* Returns true if the prot flags are valid
*/
static inline bool arch_validate_prot(unsigned long prot)
{
return (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM)) == 0;
}
#define arch_validate_prot arch_validate_prot
#endif
/*
* Optimisation macro. It is equivalent to:
* (x & bit1) ? bit2 : 0
* but this version is faster.
* ("bit1" and "bit2" must be single bits)
*/
#define _calc_vm_trans(x, bit1, bit2) \
((!(bit1) || !(bit2)) ? 0 : \
((bit1) <= (bit2) ? ((x) & (bit1)) * ((bit2) / (bit1)) \
: ((x) & (bit1)) / ((bit1) / (bit2))))
/*
* Combine the mmap "prot" argument into "vm_flags" used internally.
*/
static inline unsigned long
calc_vm_prot_bits(unsigned long prot, unsigned long pkey)
{
return _calc_vm_trans(prot, PROT_READ, VM_READ ) |
_calc_vm_trans(prot, PROT_WRITE, VM_WRITE) |
_calc_vm_trans(prot, PROT_EXEC, VM_EXEC) |
arch_calc_vm_prot_bits(prot, pkey);
}
/*
* Combine the mmap "flags" argument into "vm_flags" used internally.
*/
static inline unsigned long
calc_vm_flag_bits(unsigned long flags)
{
return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) |
_calc_vm_trans(flags, MAP_DENYWRITE, VM_DENYWRITE ) |
_calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED );
}
unsigned long vm_commit_limit(void);
#endif /* _LINUX_MMAN_H */