1
0
Fork 0
alistair23-linux/drivers/media/v4l2-core/videobuf-dma-sg.c

688 lines
16 KiB
C
Raw Permalink Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/*
* helper functions for SG DMA video4linux capture buffers
*
* The functions expect the hardware being able to scatter gather
* (i.e. the buffers are not linear in physical memory, but fragmented
* into PAGE_SIZE chunks). They also assume the driver does not need
* to touch the video data.
*
* (c) 2007 Mauro Carvalho Chehab, <mchehab@kernel.org>
*
* Highly based on video-buf written originally by:
* (c) 2001,02 Gerd Knorr <kraxel@bytesex.org>
* (c) 2006 Mauro Carvalho Chehab, <mchehab@kernel.org>
* (c) 2006 Ted Walther and John Sokol
*/
#include <linux/init.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/sched/mm.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/dma-mapping.h>
#include <linux/vmalloc.h>
#include <linux/pagemap.h>
#include <linux/scatterlist.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <media/videobuf-dma-sg.h>
#define MAGIC_DMABUF 0x19721112
#define MAGIC_SG_MEM 0x17890714
#define MAGIC_CHECK(is, should) \
if (unlikely((is) != (should))) { \
printk(KERN_ERR "magic mismatch: %x (expected %x)\n", \
is, should); \
BUG(); \
}
static int debug;
module_param(debug, int, 0644);
MODULE_DESCRIPTION("helper module to manage video4linux dma sg buffers");
MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@kernel.org>");
MODULE_LICENSE("GPL");
#define dprintk(level, fmt, arg...) \
if (debug >= level) \
printk(KERN_DEBUG "vbuf-sg: " fmt , ## arg)
/* --------------------------------------------------------------------- */
/*
* Return a scatterlist for some page-aligned vmalloc()'ed memory
* block (NULL on errors). Memory for the scatterlist is allocated
* using kmalloc. The caller must free the memory.
*/
static struct scatterlist *videobuf_vmalloc_to_sg(unsigned char *virt,
int nr_pages)
{
struct scatterlist *sglist;
struct page *pg;
int i;
treewide: Use array_size() in vzalloc() The vzalloc() function has no 2-factor argument form, so multiplication factors need to be wrapped in array_size(). This patch replaces cases of: vzalloc(a * b) with: vzalloc(array_size(a, b)) as well as handling cases of: vzalloc(a * b * c) with: vzalloc(array3_size(a, b, c)) This does, however, attempt to ignore constant size factors like: vzalloc(4 * 1024) though any constants defined via macros get caught up in the conversion. Any factors with a sizeof() of "unsigned char", "char", and "u8" were dropped, since they're redundant. The Coccinelle script used for this was: // Fix redundant parens around sizeof(). @@ type TYPE; expression THING, E; @@ ( vzalloc( - (sizeof(TYPE)) * E + sizeof(TYPE) * E , ...) | vzalloc( - (sizeof(THING)) * E + sizeof(THING) * E , ...) ) // Drop single-byte sizes and redundant parens. @@ expression COUNT; typedef u8; typedef __u8; @@ ( vzalloc( - sizeof(u8) * (COUNT) + COUNT , ...) | vzalloc( - sizeof(__u8) * (COUNT) + COUNT , ...) | vzalloc( - sizeof(char) * (COUNT) + COUNT , ...) | vzalloc( - sizeof(unsigned char) * (COUNT) + COUNT , ...) | vzalloc( - sizeof(u8) * COUNT + COUNT , ...) | vzalloc( - sizeof(__u8) * COUNT + COUNT , ...) | vzalloc( - sizeof(char) * COUNT + COUNT , ...) | vzalloc( - sizeof(unsigned char) * COUNT + COUNT , ...) ) // 2-factor product with sizeof(type/expression) and identifier or constant. @@ type TYPE; expression THING; identifier COUNT_ID; constant COUNT_CONST; @@ ( vzalloc( - sizeof(TYPE) * (COUNT_ID) + array_size(COUNT_ID, sizeof(TYPE)) , ...) | vzalloc( - sizeof(TYPE) * COUNT_ID + array_size(COUNT_ID, sizeof(TYPE)) , ...) | vzalloc( - sizeof(TYPE) * (COUNT_CONST) + array_size(COUNT_CONST, sizeof(TYPE)) , ...) | vzalloc( - sizeof(TYPE) * COUNT_CONST + array_size(COUNT_CONST, sizeof(TYPE)) , ...) | vzalloc( - sizeof(THING) * (COUNT_ID) + array_size(COUNT_ID, sizeof(THING)) , ...) | vzalloc( - sizeof(THING) * COUNT_ID + array_size(COUNT_ID, sizeof(THING)) , ...) | vzalloc( - sizeof(THING) * (COUNT_CONST) + array_size(COUNT_CONST, sizeof(THING)) , ...) | vzalloc( - sizeof(THING) * COUNT_CONST + array_size(COUNT_CONST, sizeof(THING)) , ...) ) // 2-factor product, only identifiers. @@ identifier SIZE, COUNT; @@ vzalloc( - SIZE * COUNT + array_size(COUNT, SIZE) , ...) // 3-factor product with 1 sizeof(type) or sizeof(expression), with // redundant parens removed. @@ expression THING; identifier STRIDE, COUNT; type TYPE; @@ ( vzalloc( - sizeof(TYPE) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | vzalloc( - sizeof(TYPE) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | vzalloc( - sizeof(TYPE) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | vzalloc( - sizeof(TYPE) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | vzalloc( - sizeof(THING) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | vzalloc( - sizeof(THING) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | vzalloc( - sizeof(THING) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | vzalloc( - sizeof(THING) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) ) // 3-factor product with 2 sizeof(variable), with redundant parens removed. @@ expression THING1, THING2; identifier COUNT; type TYPE1, TYPE2; @@ ( vzalloc( - sizeof(TYPE1) * sizeof(TYPE2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | vzalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | vzalloc( - sizeof(THING1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | vzalloc( - sizeof(THING1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | vzalloc( - sizeof(TYPE1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) | vzalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) ) // 3-factor product, only identifiers, with redundant parens removed. @@ identifier STRIDE, SIZE, COUNT; @@ ( vzalloc( - (COUNT) * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | vzalloc( - COUNT * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | vzalloc( - COUNT * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | vzalloc( - (COUNT) * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | vzalloc( - COUNT * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | vzalloc( - (COUNT) * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | vzalloc( - (COUNT) * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | vzalloc( - COUNT * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) ) // Any remaining multi-factor products, first at least 3-factor products // when they're not all constants... @@ expression E1, E2, E3; constant C1, C2, C3; @@ ( vzalloc(C1 * C2 * C3, ...) | vzalloc( - E1 * E2 * E3 + array3_size(E1, E2, E3) , ...) ) // And then all remaining 2 factors products when they're not all constants. @@ expression E1, E2; constant C1, C2; @@ ( vzalloc(C1 * C2, ...) | vzalloc( - E1 * E2 + array_size(E1, E2) , ...) ) Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-12 15:27:37 -06:00
sglist = vzalloc(array_size(nr_pages, sizeof(*sglist)));
if (NULL == sglist)
return NULL;
sg_init_table(sglist, nr_pages);
for (i = 0; i < nr_pages; i++, virt += PAGE_SIZE) {
pg = vmalloc_to_page(virt);
if (NULL == pg)
goto err;
BUG_ON(PageHighMem(pg));
sg_set_page(&sglist[i], pg, PAGE_SIZE, 0);
}
return sglist;
err:
vfree(sglist);
return NULL;
}
/*
* Return a scatterlist for a an array of userpages (NULL on errors).
* Memory for the scatterlist is allocated using kmalloc. The caller
* must free the memory.
*/
static struct scatterlist *videobuf_pages_to_sg(struct page **pages,
int nr_pages, int offset, size_t size)
{
struct scatterlist *sglist;
int i;
if (NULL == pages[0])
return NULL;
treewide: Use array_size() in vmalloc() The vmalloc() function has no 2-factor argument form, so multiplication factors need to be wrapped in array_size(). This patch replaces cases of: vmalloc(a * b) with: vmalloc(array_size(a, b)) as well as handling cases of: vmalloc(a * b * c) with: vmalloc(array3_size(a, b, c)) This does, however, attempt to ignore constant size factors like: vmalloc(4 * 1024) though any constants defined via macros get caught up in the conversion. Any factors with a sizeof() of "unsigned char", "char", and "u8" were dropped, since they're redundant. The Coccinelle script used for this was: // Fix redundant parens around sizeof(). @@ type TYPE; expression THING, E; @@ ( vmalloc( - (sizeof(TYPE)) * E + sizeof(TYPE) * E , ...) | vmalloc( - (sizeof(THING)) * E + sizeof(THING) * E , ...) ) // Drop single-byte sizes and redundant parens. @@ expression COUNT; typedef u8; typedef __u8; @@ ( vmalloc( - sizeof(u8) * (COUNT) + COUNT , ...) | vmalloc( - sizeof(__u8) * (COUNT) + COUNT , ...) | vmalloc( - sizeof(char) * (COUNT) + COUNT , ...) | vmalloc( - sizeof(unsigned char) * (COUNT) + COUNT , ...) | vmalloc( - sizeof(u8) * COUNT + COUNT , ...) | vmalloc( - sizeof(__u8) * COUNT + COUNT , ...) | vmalloc( - sizeof(char) * COUNT + COUNT , ...) | vmalloc( - sizeof(unsigned char) * COUNT + COUNT , ...) ) // 2-factor product with sizeof(type/expression) and identifier or constant. @@ type TYPE; expression THING; identifier COUNT_ID; constant COUNT_CONST; @@ ( vmalloc( - sizeof(TYPE) * (COUNT_ID) + array_size(COUNT_ID, sizeof(TYPE)) , ...) | vmalloc( - sizeof(TYPE) * COUNT_ID + array_size(COUNT_ID, sizeof(TYPE)) , ...) | vmalloc( - sizeof(TYPE) * (COUNT_CONST) + array_size(COUNT_CONST, sizeof(TYPE)) , ...) | vmalloc( - sizeof(TYPE) * COUNT_CONST + array_size(COUNT_CONST, sizeof(TYPE)) , ...) | vmalloc( - sizeof(THING) * (COUNT_ID) + array_size(COUNT_ID, sizeof(THING)) , ...) | vmalloc( - sizeof(THING) * COUNT_ID + array_size(COUNT_ID, sizeof(THING)) , ...) | vmalloc( - sizeof(THING) * (COUNT_CONST) + array_size(COUNT_CONST, sizeof(THING)) , ...) | vmalloc( - sizeof(THING) * COUNT_CONST + array_size(COUNT_CONST, sizeof(THING)) , ...) ) // 2-factor product, only identifiers. @@ identifier SIZE, COUNT; @@ vmalloc( - SIZE * COUNT + array_size(COUNT, SIZE) , ...) // 3-factor product with 1 sizeof(type) or sizeof(expression), with // redundant parens removed. @@ expression THING; identifier STRIDE, COUNT; type TYPE; @@ ( vmalloc( - sizeof(TYPE) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | vmalloc( - sizeof(TYPE) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | vmalloc( - sizeof(TYPE) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | vmalloc( - sizeof(TYPE) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | vmalloc( - sizeof(THING) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | vmalloc( - sizeof(THING) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | vmalloc( - sizeof(THING) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | vmalloc( - sizeof(THING) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) ) // 3-factor product with 2 sizeof(variable), with redundant parens removed. @@ expression THING1, THING2; identifier COUNT; type TYPE1, TYPE2; @@ ( vmalloc( - sizeof(TYPE1) * sizeof(TYPE2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | vmalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | vmalloc( - sizeof(THING1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | vmalloc( - sizeof(THING1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | vmalloc( - sizeof(TYPE1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) | vmalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) ) // 3-factor product, only identifiers, with redundant parens removed. @@ identifier STRIDE, SIZE, COUNT; @@ ( vmalloc( - (COUNT) * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | vmalloc( - COUNT * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | vmalloc( - COUNT * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | vmalloc( - (COUNT) * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | vmalloc( - COUNT * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | vmalloc( - (COUNT) * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | vmalloc( - (COUNT) * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | vmalloc( - COUNT * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) ) // Any remaining multi-factor products, first at least 3-factor products // when they're not all constants... @@ expression E1, E2, E3; constant C1, C2, C3; @@ ( vmalloc(C1 * C2 * C3, ...) | vmalloc( - E1 * E2 * E3 + array3_size(E1, E2, E3) , ...) ) // And then all remaining 2 factors products when they're not all constants. @@ expression E1, E2; constant C1, C2; @@ ( vmalloc(C1 * C2, ...) | vmalloc( - E1 * E2 + array_size(E1, E2) , ...) ) Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-12 15:27:11 -06:00
sglist = vmalloc(array_size(nr_pages, sizeof(*sglist)));
if (NULL == sglist)
return NULL;
sg_init_table(sglist, nr_pages);
if (PageHighMem(pages[0]))
/* DMA to highmem pages might not work */
goto highmem;
sg_set_page(&sglist[0], pages[0],
min_t(size_t, PAGE_SIZE - offset, size), offset);
size -= min_t(size_t, PAGE_SIZE - offset, size);
for (i = 1; i < nr_pages; i++) {
if (NULL == pages[i])
goto nopage;
if (PageHighMem(pages[i]))
goto highmem;
sg_set_page(&sglist[i], pages[i], min_t(size_t, PAGE_SIZE, size), 0);
size -= min_t(size_t, PAGE_SIZE, size);
}
return sglist;
nopage:
dprintk(2, "sgl: oops - no page\n");
vfree(sglist);
return NULL;
highmem:
dprintk(2, "sgl: oops - highmem page\n");
vfree(sglist);
return NULL;
}
/* --------------------------------------------------------------------- */
struct videobuf_dmabuf *videobuf_to_dma(struct videobuf_buffer *buf)
{
struct videobuf_dma_sg_memory *mem = buf->priv;
BUG_ON(!mem);
MAGIC_CHECK(mem->magic, MAGIC_SG_MEM);
return &mem->dma;
}
EXPORT_SYMBOL_GPL(videobuf_to_dma);
static void videobuf_dma_init(struct videobuf_dmabuf *dma)
{
memset(dma, 0, sizeof(*dma));
dma->magic = MAGIC_DMABUF;
}
static int videobuf_dma_init_user_locked(struct videobuf_dmabuf *dma,
int direction, unsigned long data, unsigned long size)
{
unsigned long first, last;
int err, rw = 0;
unsigned int flags = FOLL_FORCE;
dma->direction = direction;
switch (dma->direction) {
case DMA_FROM_DEVICE:
rw = READ;
break;
case DMA_TO_DEVICE:
rw = WRITE;
break;
default:
BUG();
}
first = (data & PAGE_MASK) >> PAGE_SHIFT;
last = ((data+size-1) & PAGE_MASK) >> PAGE_SHIFT;
dma->offset = data & ~PAGE_MASK;
dma->size = size;
dma->nr_pages = last-first+1;
treewide: kmalloc() -> kmalloc_array() The kmalloc() function has a 2-factor argument form, kmalloc_array(). This patch replaces cases of: kmalloc(a * b, gfp) with: kmalloc_array(a * b, gfp) as well as handling cases of: kmalloc(a * b * c, gfp) with: kmalloc(array3_size(a, b, c), gfp) as it's slightly less ugly than: kmalloc_array(array_size(a, b), c, gfp) This does, however, attempt to ignore constant size factors like: kmalloc(4 * 1024, gfp) though any constants defined via macros get caught up in the conversion. Any factors with a sizeof() of "unsigned char", "char", and "u8" were dropped, since they're redundant. The tools/ directory was manually excluded, since it has its own implementation of kmalloc(). The Coccinelle script used for this was: // Fix redundant parens around sizeof(). @@ type TYPE; expression THING, E; @@ ( kmalloc( - (sizeof(TYPE)) * E + sizeof(TYPE) * E , ...) | kmalloc( - (sizeof(THING)) * E + sizeof(THING) * E , ...) ) // Drop single-byte sizes and redundant parens. @@ expression COUNT; typedef u8; typedef __u8; @@ ( kmalloc( - sizeof(u8) * (COUNT) + COUNT , ...) | kmalloc( - sizeof(__u8) * (COUNT) + COUNT , ...) | kmalloc( - sizeof(char) * (COUNT) + COUNT , ...) | kmalloc( - sizeof(unsigned char) * (COUNT) + COUNT , ...) | kmalloc( - sizeof(u8) * COUNT + COUNT , ...) | kmalloc( - sizeof(__u8) * COUNT + COUNT , ...) | kmalloc( - sizeof(char) * COUNT + COUNT , ...) | kmalloc( - sizeof(unsigned char) * COUNT + COUNT , ...) ) // 2-factor product with sizeof(type/expression) and identifier or constant. @@ type TYPE; expression THING; identifier COUNT_ID; constant COUNT_CONST; @@ ( - kmalloc + kmalloc_array ( - sizeof(TYPE) * (COUNT_ID) + COUNT_ID, sizeof(TYPE) , ...) | - kmalloc + kmalloc_array ( - sizeof(TYPE) * COUNT_ID + COUNT_ID, sizeof(TYPE) , ...) | - kmalloc + kmalloc_array ( - sizeof(TYPE) * (COUNT_CONST) + COUNT_CONST, sizeof(TYPE) , ...) | - kmalloc + kmalloc_array ( - sizeof(TYPE) * COUNT_CONST + COUNT_CONST, sizeof(TYPE) , ...) | - kmalloc + kmalloc_array ( - sizeof(THING) * (COUNT_ID) + COUNT_ID, sizeof(THING) , ...) | - kmalloc + kmalloc_array ( - sizeof(THING) * COUNT_ID + COUNT_ID, sizeof(THING) , ...) | - kmalloc + kmalloc_array ( - sizeof(THING) * (COUNT_CONST) + COUNT_CONST, sizeof(THING) , ...) | - kmalloc + kmalloc_array ( - sizeof(THING) * COUNT_CONST + COUNT_CONST, sizeof(THING) , ...) ) // 2-factor product, only identifiers. @@ identifier SIZE, COUNT; @@ - kmalloc + kmalloc_array ( - SIZE * COUNT + COUNT, SIZE , ...) // 3-factor product with 1 sizeof(type) or sizeof(expression), with // redundant parens removed. @@ expression THING; identifier STRIDE, COUNT; type TYPE; @@ ( kmalloc( - sizeof(TYPE) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kmalloc( - sizeof(TYPE) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kmalloc( - sizeof(TYPE) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kmalloc( - sizeof(TYPE) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kmalloc( - sizeof(THING) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kmalloc( - sizeof(THING) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kmalloc( - sizeof(THING) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kmalloc( - sizeof(THING) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) ) // 3-factor product with 2 sizeof(variable), with redundant parens removed. @@ expression THING1, THING2; identifier COUNT; type TYPE1, TYPE2; @@ ( kmalloc( - sizeof(TYPE1) * sizeof(TYPE2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | kmalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | kmalloc( - sizeof(THING1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | kmalloc( - sizeof(THING1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | kmalloc( - sizeof(TYPE1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) | kmalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) ) // 3-factor product, only identifiers, with redundant parens removed. @@ identifier STRIDE, SIZE, COUNT; @@ ( kmalloc( - (COUNT) * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kmalloc( - COUNT * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kmalloc( - COUNT * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kmalloc( - (COUNT) * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kmalloc( - COUNT * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kmalloc( - (COUNT) * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kmalloc( - (COUNT) * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kmalloc( - COUNT * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) ) // Any remaining multi-factor products, first at least 3-factor products, // when they're not all constants... @@ expression E1, E2, E3; constant C1, C2, C3; @@ ( kmalloc(C1 * C2 * C3, ...) | kmalloc( - (E1) * E2 * E3 + array3_size(E1, E2, E3) , ...) | kmalloc( - (E1) * (E2) * E3 + array3_size(E1, E2, E3) , ...) | kmalloc( - (E1) * (E2) * (E3) + array3_size(E1, E2, E3) , ...) | kmalloc( - E1 * E2 * E3 + array3_size(E1, E2, E3) , ...) ) // And then all remaining 2 factors products when they're not all constants, // keeping sizeof() as the second factor argument. @@ expression THING, E1, E2; type TYPE; constant C1, C2, C3; @@ ( kmalloc(sizeof(THING) * C2, ...) | kmalloc(sizeof(TYPE) * C2, ...) | kmalloc(C1 * C2 * C3, ...) | kmalloc(C1 * C2, ...) | - kmalloc + kmalloc_array ( - sizeof(TYPE) * (E2) + E2, sizeof(TYPE) , ...) | - kmalloc + kmalloc_array ( - sizeof(TYPE) * E2 + E2, sizeof(TYPE) , ...) | - kmalloc + kmalloc_array ( - sizeof(THING) * (E2) + E2, sizeof(THING) , ...) | - kmalloc + kmalloc_array ( - sizeof(THING) * E2 + E2, sizeof(THING) , ...) | - kmalloc + kmalloc_array ( - (E1) * E2 + E1, E2 , ...) | - kmalloc + kmalloc_array ( - (E1) * (E2) + E1, E2 , ...) | - kmalloc + kmalloc_array ( - E1 * E2 + E1, E2 , ...) ) Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-12 14:55:00 -06:00
dma->pages = kmalloc_array(dma->nr_pages, sizeof(struct page *),
GFP_KERNEL);
if (NULL == dma->pages)
return -ENOMEM;
if (rw == READ)
flags |= FOLL_WRITE;
dprintk(1, "init user [0x%lx+0x%lx => %d pages]\n",
data, size, dma->nr_pages);
mm/gup: replace get_user_pages_longterm() with FOLL_LONGTERM Pach series "Add FOLL_LONGTERM to GUP fast and use it". HFI1, qib, and mthca, use get_user_pages_fast() due to its performance advantages. These pages can be held for a significant time. But get_user_pages_fast() does not protect against mapping FS DAX pages. Introduce FOLL_LONGTERM and use this flag in get_user_pages_fast() which retains the performance while also adding the FS DAX checks. XDP has also shown interest in using this functionality.[1] In addition we change get_user_pages() to use the new FOLL_LONGTERM flag and remove the specialized get_user_pages_longterm call. [1] https://lkml.org/lkml/2019/3/19/939 "longterm" is a relative thing and at this point is probably a misnomer. This is really flagging a pin which is going to be given to hardware and can't move. I've thought of a couple of alternative names but I think we have to settle on if we are going to use FL_LAYOUT or something else to solve the "longterm" problem. Then I think we can change the flag to a better name. Secondly, it depends on how often you are registering memory. I have spoken with some RDMA users who consider MR in the performance path... For the overall application performance. I don't have the numbers as the tests for HFI1 were done a long time ago. But there was a significant advantage. Some of which is probably due to the fact that you don't have to hold mmap_sem. Finally, architecturally I think it would be good for everyone to use *_fast. There are patches submitted to the RDMA list which would allow the use of *_fast (they reworking the use of mmap_sem) and as soon as they are accepted I'll submit a patch to convert the RDMA core as well. Also to this point others are looking to use *_fast. As an aside, Jasons pointed out in my previous submission that *_fast and *_unlocked look very much the same. I agree and I think further cleanup will be coming. But I'm focused on getting the final solution for DAX at the moment. This patch (of 7): This patch starts a series which aims to support FOLL_LONGTERM in get_user_pages_fast(). Some callers who would like to do a longterm (user controlled pin) of pages with the fast variant of GUP for performance purposes. Rather than have a separate get_user_pages_longterm() call, introduce FOLL_LONGTERM and change the longterm callers to use it. This patch does not change any functionality. In the short term "longterm" or user controlled pins are unsafe for Filesystems and FS DAX in particular has been blocked. However, callers of get_user_pages_fast() were not "protected". FOLL_LONGTERM can _only_ be supported with get_user_pages[_fast]() as it requires vmas to determine if DAX is in use. NOTE: In merging with the CMA changes we opt to change the get_user_pages() call in check_and_migrate_cma_pages() to a call of __get_user_pages_locked() on the newly migrated pages. This makes the code read better in that we are calling __get_user_pages_locked() on the pages before and after a potential migration. As a side affect some of the interfaces are cleaned up but this is not the primary purpose of the series. In review[1] it was asked: <quote> > This I don't get - if you do lock down long term mappings performance > of the actual get_user_pages call shouldn't matter to start with. > > What do I miss? A couple of points. First "longterm" is a relative thing and at this point is probably a misnomer. This is really flagging a pin which is going to be given to hardware and can't move. I've thought of a couple of alternative names but I think we have to settle on if we are going to use FL_LAYOUT or something else to solve the "longterm" problem. Then I think we can change the flag to a better name. Second, It depends on how often you are registering memory. I have spoken with some RDMA users who consider MR in the performance path... For the overall application performance. I don't have the numbers as the tests for HFI1 were done a long time ago. But there was a significant advantage. Some of which is probably due to the fact that you don't have to hold mmap_sem. Finally, architecturally I think it would be good for everyone to use *_fast. There are patches submitted to the RDMA list which would allow the use of *_fast (they reworking the use of mmap_sem) and as soon as they are accepted I'll submit a patch to convert the RDMA core as well. Also to this point others are looking to use *_fast. As an asside, Jasons pointed out in my previous submission that *_fast and *_unlocked look very much the same. I agree and I think further cleanup will be coming. But I'm focused on getting the final solution for DAX at the moment. </quote> [1] https://lore.kernel.org/lkml/20190220180255.GA12020@iweiny-DESK2.sc.intel.com/T/#md6abad2569f3bf6c1f03686c8097ab6563e94965 [ira.weiny@intel.com: v3] Link: http://lkml.kernel.org/r/20190328084422.29911-2-ira.weiny@intel.com Link: http://lkml.kernel.org/r/20190328084422.29911-2-ira.weiny@intel.com Link: http://lkml.kernel.org/r/20190317183438.2057-2-ira.weiny@intel.com Signed-off-by: Ira Weiny <ira.weiny@intel.com> Reviewed-by: Andrew Morton <akpm@linux-foundation.org> Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: John Hubbard <jhubbard@nvidia.com> Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Jason Gunthorpe <jgg@ziepe.ca> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: "David S. Miller" <davem@davemloft.net> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Rich Felker <dalias@libc.org> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@redhat.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: James Hogan <jhogan@kernel.org> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Mike Marshall <hubcap@omnibond.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-05-13 18:17:03 -06:00
err = get_user_pages(data & PAGE_MASK, dma->nr_pages,
flags | FOLL_LONGTERM, dma->pages, NULL);
if (err != dma->nr_pages) {
dma->nr_pages = (err >= 0) ? err : 0;
mm/gup: replace get_user_pages_longterm() with FOLL_LONGTERM Pach series "Add FOLL_LONGTERM to GUP fast and use it". HFI1, qib, and mthca, use get_user_pages_fast() due to its performance advantages. These pages can be held for a significant time. But get_user_pages_fast() does not protect against mapping FS DAX pages. Introduce FOLL_LONGTERM and use this flag in get_user_pages_fast() which retains the performance while also adding the FS DAX checks. XDP has also shown interest in using this functionality.[1] In addition we change get_user_pages() to use the new FOLL_LONGTERM flag and remove the specialized get_user_pages_longterm call. [1] https://lkml.org/lkml/2019/3/19/939 "longterm" is a relative thing and at this point is probably a misnomer. This is really flagging a pin which is going to be given to hardware and can't move. I've thought of a couple of alternative names but I think we have to settle on if we are going to use FL_LAYOUT or something else to solve the "longterm" problem. Then I think we can change the flag to a better name. Secondly, it depends on how often you are registering memory. I have spoken with some RDMA users who consider MR in the performance path... For the overall application performance. I don't have the numbers as the tests for HFI1 were done a long time ago. But there was a significant advantage. Some of which is probably due to the fact that you don't have to hold mmap_sem. Finally, architecturally I think it would be good for everyone to use *_fast. There are patches submitted to the RDMA list which would allow the use of *_fast (they reworking the use of mmap_sem) and as soon as they are accepted I'll submit a patch to convert the RDMA core as well. Also to this point others are looking to use *_fast. As an aside, Jasons pointed out in my previous submission that *_fast and *_unlocked look very much the same. I agree and I think further cleanup will be coming. But I'm focused on getting the final solution for DAX at the moment. This patch (of 7): This patch starts a series which aims to support FOLL_LONGTERM in get_user_pages_fast(). Some callers who would like to do a longterm (user controlled pin) of pages with the fast variant of GUP for performance purposes. Rather than have a separate get_user_pages_longterm() call, introduce FOLL_LONGTERM and change the longterm callers to use it. This patch does not change any functionality. In the short term "longterm" or user controlled pins are unsafe for Filesystems and FS DAX in particular has been blocked. However, callers of get_user_pages_fast() were not "protected". FOLL_LONGTERM can _only_ be supported with get_user_pages[_fast]() as it requires vmas to determine if DAX is in use. NOTE: In merging with the CMA changes we opt to change the get_user_pages() call in check_and_migrate_cma_pages() to a call of __get_user_pages_locked() on the newly migrated pages. This makes the code read better in that we are calling __get_user_pages_locked() on the pages before and after a potential migration. As a side affect some of the interfaces are cleaned up but this is not the primary purpose of the series. In review[1] it was asked: <quote> > This I don't get - if you do lock down long term mappings performance > of the actual get_user_pages call shouldn't matter to start with. > > What do I miss? A couple of points. First "longterm" is a relative thing and at this point is probably a misnomer. This is really flagging a pin which is going to be given to hardware and can't move. I've thought of a couple of alternative names but I think we have to settle on if we are going to use FL_LAYOUT or something else to solve the "longterm" problem. Then I think we can change the flag to a better name. Second, It depends on how often you are registering memory. I have spoken with some RDMA users who consider MR in the performance path... For the overall application performance. I don't have the numbers as the tests for HFI1 were done a long time ago. But there was a significant advantage. Some of which is probably due to the fact that you don't have to hold mmap_sem. Finally, architecturally I think it would be good for everyone to use *_fast. There are patches submitted to the RDMA list which would allow the use of *_fast (they reworking the use of mmap_sem) and as soon as they are accepted I'll submit a patch to convert the RDMA core as well. Also to this point others are looking to use *_fast. As an asside, Jasons pointed out in my previous submission that *_fast and *_unlocked look very much the same. I agree and I think further cleanup will be coming. But I'm focused on getting the final solution for DAX at the moment. </quote> [1] https://lore.kernel.org/lkml/20190220180255.GA12020@iweiny-DESK2.sc.intel.com/T/#md6abad2569f3bf6c1f03686c8097ab6563e94965 [ira.weiny@intel.com: v3] Link: http://lkml.kernel.org/r/20190328084422.29911-2-ira.weiny@intel.com Link: http://lkml.kernel.org/r/20190328084422.29911-2-ira.weiny@intel.com Link: http://lkml.kernel.org/r/20190317183438.2057-2-ira.weiny@intel.com Signed-off-by: Ira Weiny <ira.weiny@intel.com> Reviewed-by: Andrew Morton <akpm@linux-foundation.org> Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: John Hubbard <jhubbard@nvidia.com> Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Jason Gunthorpe <jgg@ziepe.ca> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: "David S. Miller" <davem@davemloft.net> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Rich Felker <dalias@libc.org> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@redhat.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: James Hogan <jhogan@kernel.org> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Mike Marshall <hubcap@omnibond.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-05-13 18:17:03 -06:00
dprintk(1, "get_user_pages: err=%d [%d]\n", err,
v4l2: disable filesystem-dax mapping support V4L2 memory registrations are incompatible with filesystem-dax that needs the ability to revoke dma access to a mapping at will, or otherwise allow the kernel to wait for completion of DMA. The filesystem-dax implementation breaks the traditional solution of truncate of active file backed mappings since there is no page-cache page we can orphan to sustain ongoing DMA. If v4l2 wants to support long lived DMA mappings it needs to arrange to hold a file lease or use some other mechanism so that the kernel can coordinate revoking DMA access when the filesystem needs to truncate mappings. Link: http://lkml.kernel.org/r/151068940499.7446.12846708245365671207.stgit@dwillia2-desk3.amr.corp.intel.com Fixes: 3565fce3a659 ("mm, x86: get_user_pages() for dax mappings") Signed-off-by: Dan Williams <dan.j.williams@intel.com> Reported-by: Jan Kara <jack@suse.cz> Reviewed-by: Jan Kara <jack@suse.cz> Cc: Mauro Carvalho Chehab <mchehab@kernel.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Doug Ledford <dledford@redhat.com> Cc: Hal Rosenstock <hal.rosenstock@gmail.com> Cc: Inki Dae <inki.dae@samsung.com> Cc: Jason Gunthorpe <jgg@mellanox.com> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Joonyoung Shim <jy0922.shim@samsung.com> Cc: Kyungmin Park <kyungmin.park@samsung.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Ross Zwisler <ross.zwisler@linux.intel.com> Cc: Sean Hefty <sean.hefty@intel.com> Cc: Seung-Woo Kim <sw0312.kim@samsung.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-11-29 17:10:43 -07:00
dma->nr_pages);
return err < 0 ? err : -EINVAL;
}
return 0;
}
static int videobuf_dma_init_user(struct videobuf_dmabuf *dma, int direction,
unsigned long data, unsigned long size)
{
int ret;
down_read(&current->mm->mmap_sem);
ret = videobuf_dma_init_user_locked(dma, direction, data, size);
up_read(&current->mm->mmap_sem);
return ret;
}
static int videobuf_dma_init_kernel(struct videobuf_dmabuf *dma, int direction,
int nr_pages)
{
int i;
dprintk(1, "init kernel [%d pages]\n", nr_pages);
dma->direction = direction;
dma->vaddr_pages = kcalloc(nr_pages, sizeof(*dma->vaddr_pages),
GFP_KERNEL);
if (!dma->vaddr_pages)
return -ENOMEM;
dma->dma_addr = kcalloc(nr_pages, sizeof(*dma->dma_addr), GFP_KERNEL);
if (!dma->dma_addr) {
kfree(dma->vaddr_pages);
return -ENOMEM;
}
for (i = 0; i < nr_pages; i++) {
void *addr;
addr = dma_alloc_coherent(dma->dev, PAGE_SIZE,
&(dma->dma_addr[i]), GFP_KERNEL);
if (addr == NULL)
goto out_free_pages;
dma->vaddr_pages[i] = virt_to_page(addr);
}
dma->vaddr = vmap(dma->vaddr_pages, nr_pages, VM_MAP | VM_IOREMAP,
PAGE_KERNEL);
if (NULL == dma->vaddr) {
dprintk(1, "vmalloc_32(%d pages) failed\n", nr_pages);
goto out_free_pages;
}
dprintk(1, "vmalloc is at addr %p, size=%d\n",
dma->vaddr, nr_pages << PAGE_SHIFT);
memset(dma->vaddr, 0, nr_pages << PAGE_SHIFT);
dma->nr_pages = nr_pages;
return 0;
out_free_pages:
while (i > 0) {
void *addr;
i--;
addr = page_address(dma->vaddr_pages[i]);
dma_free_coherent(dma->dev, PAGE_SIZE, addr, dma->dma_addr[i]);
}
kfree(dma->dma_addr);
dma->dma_addr = NULL;
kfree(dma->vaddr_pages);
dma->vaddr_pages = NULL;
return -ENOMEM;
}
static int videobuf_dma_init_overlay(struct videobuf_dmabuf *dma, int direction,
dma_addr_t addr, int nr_pages)
{
dprintk(1, "init overlay [%d pages @ bus 0x%lx]\n",
nr_pages, (unsigned long)addr);
dma->direction = direction;
if (0 == addr)
return -EINVAL;
dma->bus_addr = addr;
dma->nr_pages = nr_pages;
return 0;
}
static int videobuf_dma_map(struct device *dev, struct videobuf_dmabuf *dma)
{
MAGIC_CHECK(dma->magic, MAGIC_DMABUF);
BUG_ON(0 == dma->nr_pages);
if (dma->pages) {
dma->sglist = videobuf_pages_to_sg(dma->pages, dma->nr_pages,
dma->offset, dma->size);
}
if (dma->vaddr) {
dma->sglist = videobuf_vmalloc_to_sg(dma->vaddr,
dma->nr_pages);
}
if (dma->bus_addr) {
dma->sglist = vmalloc(sizeof(*dma->sglist));
if (NULL != dma->sglist) {
dma->sglen = 1;
sg_dma_address(&dma->sglist[0]) = dma->bus_addr
& PAGE_MASK;
dma->sglist[0].offset = dma->bus_addr & ~PAGE_MASK;
sg_dma_len(&dma->sglist[0]) = dma->nr_pages * PAGE_SIZE;
}
}
if (NULL == dma->sglist) {
dprintk(1, "scatterlist is NULL\n");
return -ENOMEM;
}
if (!dma->bus_addr) {
dma->sglen = dma_map_sg(dev, dma->sglist,
dma->nr_pages, dma->direction);
if (0 == dma->sglen) {
printk(KERN_WARNING
"%s: videobuf_map_sg failed\n", __func__);
vfree(dma->sglist);
dma->sglist = NULL;
dma->sglen = 0;
return -ENOMEM;
}
}
return 0;
}
int videobuf_dma_unmap(struct device *dev, struct videobuf_dmabuf *dma)
{
MAGIC_CHECK(dma->magic, MAGIC_DMABUF);
if (!dma->sglen)
return 0;
dma_unmap_sg(dev, dma->sglist, dma->nr_pages, dma->direction);
vfree(dma->sglist);
dma->sglist = NULL;
dma->sglen = 0;
return 0;
}
EXPORT_SYMBOL_GPL(videobuf_dma_unmap);
int videobuf_dma_free(struct videobuf_dmabuf *dma)
{
int i;
MAGIC_CHECK(dma->magic, MAGIC_DMABUF);
BUG_ON(dma->sglen);
if (dma->pages) {
media/v4l2-core: set pages dirty upon releasing DMA buffers commit 3c7470b6f68434acae459482ab920d1e3fabd1c7 upstream. After DMA is complete, and the device and CPU caches are synchronized, it's still required to mark the CPU pages as dirty, if the data was coming from the device. However, this driver was just issuing a bare put_page() call, without any set_page_dirty*() call. Fix the problem, by calling set_page_dirty_lock() if the CPU pages were potentially receiving data from the device. Link: http://lkml.kernel.org/r/20200107224558.2362728-11-jhubbard@nvidia.com Signed-off-by: John Hubbard <jhubbard@nvidia.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Acked-by: Hans Verkuil <hverkuil-cisco@xs4all.nl> Cc: Mauro Carvalho Chehab <mchehab@kernel.org> Cc: <stable@vger.kernel.org> Cc: Alex Williamson <alex.williamson@redhat.com> Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> Cc: Björn Töpel <bjorn.topel@intel.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Ira Weiny <ira.weiny@intel.com> Cc: Jan Kara <jack@suse.cz> Cc: Jason Gunthorpe <jgg@mellanox.com> Cc: Jason Gunthorpe <jgg@ziepe.ca> Cc: Jens Axboe <axboe@kernel.dk> Cc: Jerome Glisse <jglisse@redhat.com> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Kirill A. Shutemov <kirill@shutemov.name> Cc: Leon Romanovsky <leonro@mellanox.com> Cc: Mike Rapoport <rppt@linux.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2020-01-30 23:12:50 -07:00
for (i = 0; i < dma->nr_pages; i++) {
if (dma->direction == DMA_FROM_DEVICE)
set_page_dirty_lock(dma->pages[i]);
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time ago with promise that one day it will be possible to implement page cache with bigger chunks than PAGE_SIZE. This promise never materialized. And unlikely will. We have many places where PAGE_CACHE_SIZE assumed to be equal to PAGE_SIZE. And it's constant source of confusion on whether PAGE_CACHE_* or PAGE_* constant should be used in a particular case, especially on the border between fs and mm. Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much breakage to be doable. Let's stop pretending that pages in page cache are special. They are not. The changes are pretty straight-forward: - <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>; - <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>; - PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN}; - page_cache_get() -> get_page(); - page_cache_release() -> put_page(); This patch contains automated changes generated with coccinelle using script below. For some reason, coccinelle doesn't patch header files. I've called spatch for them manually. The only adjustment after coccinelle is revert of changes to PAGE_CAHCE_ALIGN definition: we are going to drop it later. There are few places in the code where coccinelle didn't reach. I'll fix them manually in a separate patch. Comments and documentation also will be addressed with the separate patch. virtual patch @@ expression E; @@ - E << (PAGE_CACHE_SHIFT - PAGE_SHIFT) + E @@ expression E; @@ - E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) + E @@ @@ - PAGE_CACHE_SHIFT + PAGE_SHIFT @@ @@ - PAGE_CACHE_SIZE + PAGE_SIZE @@ @@ - PAGE_CACHE_MASK + PAGE_MASK @@ expression E; @@ - PAGE_CACHE_ALIGN(E) + PAGE_ALIGN(E) @@ expression E; @@ - page_cache_get(E) + get_page(E) @@ expression E; @@ - page_cache_release(E) + put_page(E) Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Acked-by: Michal Hocko <mhocko@suse.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 06:29:47 -06:00
put_page(dma->pages[i]);
media/v4l2-core: set pages dirty upon releasing DMA buffers commit 3c7470b6f68434acae459482ab920d1e3fabd1c7 upstream. After DMA is complete, and the device and CPU caches are synchronized, it's still required to mark the CPU pages as dirty, if the data was coming from the device. However, this driver was just issuing a bare put_page() call, without any set_page_dirty*() call. Fix the problem, by calling set_page_dirty_lock() if the CPU pages were potentially receiving data from the device. Link: http://lkml.kernel.org/r/20200107224558.2362728-11-jhubbard@nvidia.com Signed-off-by: John Hubbard <jhubbard@nvidia.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Acked-by: Hans Verkuil <hverkuil-cisco@xs4all.nl> Cc: Mauro Carvalho Chehab <mchehab@kernel.org> Cc: <stable@vger.kernel.org> Cc: Alex Williamson <alex.williamson@redhat.com> Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> Cc: Björn Töpel <bjorn.topel@intel.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Ira Weiny <ira.weiny@intel.com> Cc: Jan Kara <jack@suse.cz> Cc: Jason Gunthorpe <jgg@mellanox.com> Cc: Jason Gunthorpe <jgg@ziepe.ca> Cc: Jens Axboe <axboe@kernel.dk> Cc: Jerome Glisse <jglisse@redhat.com> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Kirill A. Shutemov <kirill@shutemov.name> Cc: Leon Romanovsky <leonro@mellanox.com> Cc: Mike Rapoport <rppt@linux.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2020-01-30 23:12:50 -07:00
}
kfree(dma->pages);
dma->pages = NULL;
}
if (dma->dma_addr) {
for (i = 0; i < dma->nr_pages; i++) {
void *addr;
addr = page_address(dma->vaddr_pages[i]);
dma_free_coherent(dma->dev, PAGE_SIZE, addr,
dma->dma_addr[i]);
}
kfree(dma->dma_addr);
dma->dma_addr = NULL;
kfree(dma->vaddr_pages);
dma->vaddr_pages = NULL;
vunmap(dma->vaddr);
dma->vaddr = NULL;
}
if (dma->bus_addr)
dma->bus_addr = 0;
dma->direction = DMA_NONE;
return 0;
}
EXPORT_SYMBOL_GPL(videobuf_dma_free);
/* --------------------------------------------------------------------- */
static void videobuf_vm_open(struct vm_area_struct *vma)
{
struct videobuf_mapping *map = vma->vm_private_data;
dprintk(2, "vm_open %p [count=%d,vma=%08lx-%08lx]\n", map,
map->count, vma->vm_start, vma->vm_end);
map->count++;
}
static void videobuf_vm_close(struct vm_area_struct *vma)
{
struct videobuf_mapping *map = vma->vm_private_data;
struct videobuf_queue *q = map->q;
struct videobuf_dma_sg_memory *mem;
int i;
dprintk(2, "vm_close %p [count=%d,vma=%08lx-%08lx]\n", map,
map->count, vma->vm_start, vma->vm_end);
map->count--;
if (0 == map->count) {
dprintk(1, "munmap %p q=%p\n", map, q);
videobuf_queue_lock(q);
for (i = 0; i < VIDEO_MAX_FRAME; i++) {
if (NULL == q->bufs[i])
continue;
mem = q->bufs[i]->priv;
if (!mem)
continue;
MAGIC_CHECK(mem->magic, MAGIC_SG_MEM);
if (q->bufs[i]->map != map)
continue;
q->bufs[i]->map = NULL;
q->bufs[i]->baddr = 0;
q->ops->buf_release(q, q->bufs[i]);
}
videobuf_queue_unlock(q);
kfree(map);
}
return;
}
/*
* Get a anonymous page for the mapping. Make sure we can DMA to that
* memory location with 32bit PCI devices (i.e. don't use highmem for
* now ...). Bounce buffers don't work very well for the data rates
* video capture has.
*/
static vm_fault_t videobuf_vm_fault(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
struct page *page;
dprintk(3, "fault: fault @ %08lx [vma %08lx-%08lx]\n",
vmf->address, vma->vm_start, vma->vm_end);
page = alloc_page(GFP_USER | __GFP_DMA32);
if (!page)
return VM_FAULT_OOM;
clear_user_highpage(page, vmf->address);
vmf->page = page;
return 0;
}
static const struct vm_operations_struct videobuf_vm_ops = {
.open = videobuf_vm_open,
.close = videobuf_vm_close,
.fault = videobuf_vm_fault,
};
/* ---------------------------------------------------------------------
* SG handlers for the generic methods
*/
/* Allocated area consists on 3 parts:
struct video_buffer
struct <driver>_buffer (cx88_buffer, saa7134_buf, ...)
struct videobuf_dma_sg_memory
*/
static struct videobuf_buffer *__videobuf_alloc_vb(size_t size)
{
struct videobuf_dma_sg_memory *mem;
struct videobuf_buffer *vb;
vb = kzalloc(size + sizeof(*mem), GFP_KERNEL);
if (!vb)
return vb;
mem = vb->priv = ((char *)vb) + size;
mem->magic = MAGIC_SG_MEM;
videobuf_dma_init(&mem->dma);
dprintk(1, "%s: allocated at %p(%ld+%ld) & %p(%ld)\n",
__func__, vb, (long)sizeof(*vb), (long)size - sizeof(*vb),
mem, (long)sizeof(*mem));
return vb;
}
static void *__videobuf_to_vaddr(struct videobuf_buffer *buf)
{
struct videobuf_dma_sg_memory *mem = buf->priv;
BUG_ON(!mem);
MAGIC_CHECK(mem->magic, MAGIC_SG_MEM);
return mem->dma.vaddr;
}
static int __videobuf_iolock(struct videobuf_queue *q,
struct videobuf_buffer *vb,
struct v4l2_framebuffer *fbuf)
{
int err, pages;
dma_addr_t bus;
struct videobuf_dma_sg_memory *mem = vb->priv;
BUG_ON(!mem);
MAGIC_CHECK(mem->magic, MAGIC_SG_MEM);
if (!mem->dma.dev)
mem->dma.dev = q->dev;
else
WARN_ON(mem->dma.dev != q->dev);
switch (vb->memory) {
case V4L2_MEMORY_MMAP:
case V4L2_MEMORY_USERPTR:
if (0 == vb->baddr) {
/* no userspace addr -- kernel bounce buffer */
pages = PAGE_ALIGN(vb->size) >> PAGE_SHIFT;
err = videobuf_dma_init_kernel(&mem->dma,
DMA_FROM_DEVICE,
pages);
if (0 != err)
return err;
} else if (vb->memory == V4L2_MEMORY_USERPTR) {
/* dma directly to userspace */
err = videobuf_dma_init_user(&mem->dma,
DMA_FROM_DEVICE,
vb->baddr, vb->bsize);
if (0 != err)
return err;
} else {
/* NOTE: HACK: videobuf_iolock on V4L2_MEMORY_MMAP
buffers can only be called from videobuf_qbuf
we take current->mm->mmap_sem there, to prevent
locking inversion, so don't take it here */
err = videobuf_dma_init_user_locked(&mem->dma,
DMA_FROM_DEVICE,
vb->baddr, vb->bsize);
if (0 != err)
return err;
}
break;
case V4L2_MEMORY_OVERLAY:
if (NULL == fbuf)
return -EINVAL;
/* FIXME: need sanity checks for vb->boff */
/*
* Using a double cast to avoid compiler warnings when
* building for PAE. Compiler doesn't like direct casting
* of a 32 bit ptr to 64 bit integer.
*/
bus = (dma_addr_t)(unsigned long)fbuf->base + vb->boff;
pages = PAGE_ALIGN(vb->size) >> PAGE_SHIFT;
err = videobuf_dma_init_overlay(&mem->dma, DMA_FROM_DEVICE,
bus, pages);
if (0 != err)
return err;
break;
default:
BUG();
}
err = videobuf_dma_map(q->dev, &mem->dma);
if (0 != err)
return err;
return 0;
}
static int __videobuf_sync(struct videobuf_queue *q,
struct videobuf_buffer *buf)
{
struct videobuf_dma_sg_memory *mem = buf->priv;
BUG_ON(!mem || !mem->dma.sglen);
MAGIC_CHECK(mem->magic, MAGIC_SG_MEM);
MAGIC_CHECK(mem->dma.magic, MAGIC_DMABUF);
dma_sync_sg_for_cpu(q->dev, mem->dma.sglist,
mem->dma.nr_pages, mem->dma.direction);
return 0;
}
static int __videobuf_mmap_mapper(struct videobuf_queue *q,
struct videobuf_buffer *buf,
struct vm_area_struct *vma)
{
struct videobuf_dma_sg_memory *mem = buf->priv;
struct videobuf_mapping *map;
unsigned int first, last, size = 0, i;
int retval;
retval = -EINVAL;
BUG_ON(!mem);
MAGIC_CHECK(mem->magic, MAGIC_SG_MEM);
/* look for first buffer to map */
for (first = 0; first < VIDEO_MAX_FRAME; first++) {
if (buf == q->bufs[first]) {
size = PAGE_ALIGN(q->bufs[first]->bsize);
break;
}
}
/* paranoia, should never happen since buf is always valid. */
if (!size) {
dprintk(1, "mmap app bug: offset invalid [offset=0x%lx]\n",
(vma->vm_pgoff << PAGE_SHIFT));
goto done;
}
last = first;
/* create mapping + update buffer list */
retval = -ENOMEM;
map = kmalloc(sizeof(struct videobuf_mapping), GFP_KERNEL);
if (NULL == map)
goto done;
size = 0;
for (i = first; i <= last; i++) {
if (NULL == q->bufs[i])
continue;
q->bufs[i]->map = map;
q->bufs[i]->baddr = vma->vm_start + size;
size += PAGE_ALIGN(q->bufs[i]->bsize);
}
map->count = 1;
map->q = q;
vma->vm_ops = &videobuf_vm_ops;
mm: kill vma flag VM_RESERVED and mm->reserved_vm counter A long time ago, in v2.4, VM_RESERVED kept swapout process off VMA, currently it lost original meaning but still has some effects: | effect | alternative flags -+------------------------+--------------------------------------------- 1| account as reserved_vm | VM_IO 2| skip in core dump | VM_IO, VM_DONTDUMP 3| do not merge or expand | VM_IO, VM_DONTEXPAND, VM_HUGETLB, VM_PFNMAP 4| do not mlock | VM_IO, VM_DONTEXPAND, VM_HUGETLB, VM_PFNMAP This patch removes reserved_vm counter from mm_struct. Seems like nobody cares about it, it does not exported into userspace directly, it only reduces total_vm showed in proc. Thus VM_RESERVED can be replaced with VM_IO or pair VM_DONTEXPAND | VM_DONTDUMP. remap_pfn_range() and io_remap_pfn_range() set VM_IO|VM_DONTEXPAND|VM_DONTDUMP. remap_vmalloc_range() set VM_DONTEXPAND | VM_DONTDUMP. [akpm@linux-foundation.org: drivers/vfio/pci/vfio_pci.c fixup] Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Carsten Otte <cotte@de.ibm.com> Cc: Chris Metcalf <cmetcalf@tilera.com> Cc: Cyrill Gorcunov <gorcunov@openvz.org> Cc: Eric Paris <eparis@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Hugh Dickins <hughd@google.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: James Morris <james.l.morris@oracle.com> Cc: Jason Baron <jbaron@redhat.com> Cc: Kentaro Takeda <takedakn@nttdata.co.jp> Cc: Matt Helsley <matthltc@us.ibm.com> Cc: Nick Piggin <npiggin@kernel.dk> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Robert Richter <robert.richter@amd.com> Cc: Suresh Siddha <suresh.b.siddha@intel.com> Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> Cc: Venkatesh Pallipadi <venki@google.com> Acked-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-10-08 17:29:02 -06:00
vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
vma->vm_flags &= ~VM_IO; /* using shared anonymous pages */
vma->vm_private_data = map;
dprintk(1, "mmap %p: q=%p %08lx-%08lx pgoff %08lx bufs %d-%d\n",
map, q, vma->vm_start, vma->vm_end, vma->vm_pgoff, first, last);
retval = 0;
done:
return retval;
}
static struct videobuf_qtype_ops sg_ops = {
.magic = MAGIC_QTYPE_OPS,
.alloc_vb = __videobuf_alloc_vb,
.iolock = __videobuf_iolock,
.sync = __videobuf_sync,
.mmap_mapper = __videobuf_mmap_mapper,
.vaddr = __videobuf_to_vaddr,
};
void *videobuf_sg_alloc(size_t size)
{
struct videobuf_queue q;
/* Required to make generic handler to call __videobuf_alloc */
q.int_ops = &sg_ops;
q.msize = size;
return videobuf_alloc_vb(&q);
}
EXPORT_SYMBOL_GPL(videobuf_sg_alloc);
void videobuf_queue_sg_init(struct videobuf_queue *q,
const struct videobuf_queue_ops *ops,
struct device *dev,
spinlock_t *irqlock,
enum v4l2_buf_type type,
enum v4l2_field field,
unsigned int msize,
void *priv,
struct mutex *ext_lock)
{
videobuf_queue_core_init(q, ops, dev, irqlock, type, field, msize,
priv, &sg_ops, ext_lock);
}
EXPORT_SYMBOL_GPL(videobuf_queue_sg_init);