diff --git a/MAINTAINERS b/MAINTAINERS index 2abf6d28db64..d6600715a662 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5191,6 +5191,7 @@ T: git git://people.freedesktop.org/~thomash/linux S: Supported F: drivers/gpu/drm/vmwgfx/ F: include/uapi/drm/vmwgfx_drm.h +F: mm/as_dirty_helpers.c DRM DRIVERS M: David Airlie diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index c7de667d482a..6953dd264172 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1739,6 +1739,7 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev, mutex_lock(&ttm_global_mutex); list_add_tail(&bdev->device_list, &glob->device_list); mutex_unlock(&ttm_global_mutex); + bdev->vm_ops = &ttm_bo_vm_ops; return 0; out_no_sys: diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 6dacff49c1cc..0c4576cbafcf 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -42,8 +42,6 @@ #include #include -#define TTM_BO_VM_NUM_PREFAULT 16 - static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo, struct vm_fault *vmf) { @@ -106,25 +104,30 @@ static unsigned long ttm_bo_io_mem_pfn(struct ttm_buffer_object *bo, + page_offset; } -static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) +/** + * ttm_bo_vm_reserve - Reserve a buffer object in a retryable vm callback + * @bo: The buffer object + * @vmf: The fault structure handed to the callback + * + * vm callbacks like fault() and *_mkwrite() allow for the mm_sem to be dropped + * during long waits, and after the wait the callback will be restarted. This + * is to allow other threads using the same virtual memory space concurrent + * access to map(), unmap() completely unrelated buffer objects. TTM buffer + * object reservations sometimes wait for GPU and should therefore be + * considered long waits. This function reserves the buffer object interruptibly + * taking this into account. Starvation is avoided by the vm system not + * allowing too many repeated restarts. + * This function is intended to be used in customized fault() and _mkwrite() + * handlers. + * + * Return: + * 0 on success and the bo was reserved. + * VM_FAULT_RETRY if blocking wait. + * VM_FAULT_NOPAGE if blocking wait and retrying was not allowed. + */ +vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, + struct vm_fault *vmf) { - struct vm_area_struct *vma = vmf->vma; - struct ttm_buffer_object *bo = (struct ttm_buffer_object *) - vma->vm_private_data; - struct ttm_bo_device *bdev = bo->bdev; - unsigned long page_offset; - unsigned long page_last; - unsigned long pfn; - struct ttm_tt *ttm = NULL; - struct page *page; - int err; - int i; - vm_fault_t ret = VM_FAULT_NOPAGE; - unsigned long address = vmf->address; - struct ttm_mem_type_manager *man = - &bdev->man[bo->mem.mem_type]; - struct vm_area_struct cvma; - /* * Work around locking order reversal in fault / nopfn * between mmap_sem and bo_reserve: Perform a trylock operation @@ -151,14 +154,55 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) return VM_FAULT_NOPAGE; } + return 0; +} +EXPORT_SYMBOL(ttm_bo_vm_reserve); + +/** + * ttm_bo_vm_fault_reserved - TTM fault helper + * @vmf: The struct vm_fault given as argument to the fault callback + * @prot: The page protection to be used for this memory area. + * @num_prefault: Maximum number of prefault pages. The caller may want to + * specify this based on madvice settings and the size of the GPU object + * backed by the memory. + * + * This function inserts one or more page table entries pointing to the + * memory backing the buffer object, and then returns a return code + * instructing the caller to retry the page access. + * + * Return: + * VM_FAULT_NOPAGE on success or pending signal + * VM_FAULT_SIGBUS on unspecified error + * VM_FAULT_OOM on out-of-memory + * VM_FAULT_RETRY if retryable wait + */ +vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, + pgprot_t prot, + pgoff_t num_prefault) +{ + struct vm_area_struct *vma = vmf->vma; + struct vm_area_struct cvma = *vma; + struct ttm_buffer_object *bo = (struct ttm_buffer_object *) + vma->vm_private_data; + struct ttm_bo_device *bdev = bo->bdev; + unsigned long page_offset; + unsigned long page_last; + unsigned long pfn; + struct ttm_tt *ttm = NULL; + struct page *page; + int err; + pgoff_t i; + vm_fault_t ret = VM_FAULT_NOPAGE; + unsigned long address = vmf->address; + struct ttm_mem_type_manager *man = + &bdev->man[bo->mem.mem_type]; + /* * Refuse to fault imported pages. This should be handled * (if at all) by redirecting mmap to the exporter. */ - if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG)) { - ret = VM_FAULT_SIGBUS; - goto out_unlock; - } + if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG)) + return VM_FAULT_SIGBUS; if (bdev->driver->fault_reserve_notify) { struct dma_fence *moving = dma_fence_get(bo->moving); @@ -169,11 +213,9 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) break; case -EBUSY: case -ERESTARTSYS: - ret = VM_FAULT_NOPAGE; - goto out_unlock; + return VM_FAULT_NOPAGE; default: - ret = VM_FAULT_SIGBUS; - goto out_unlock; + return VM_FAULT_SIGBUS; } if (bo->moving != moving) { @@ -189,21 +231,12 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) * move. */ ret = ttm_bo_vm_fault_idle(bo, vmf); - if (unlikely(ret != 0)) { - if (ret == VM_FAULT_RETRY && - !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) { - /* The BO has already been unreserved. */ - return ret; - } - - goto out_unlock; - } + if (unlikely(ret != 0)) + return ret; err = ttm_mem_io_lock(man, true); - if (unlikely(err != 0)) { - ret = VM_FAULT_NOPAGE; - goto out_unlock; - } + if (unlikely(err != 0)) + return VM_FAULT_NOPAGE; err = ttm_mem_io_reserve_vm(bo); if (unlikely(err != 0)) { ret = VM_FAULT_SIGBUS; @@ -220,18 +253,8 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) goto out_io_unlock; } - /* - * Make a local vma copy to modify the page_prot member - * and vm_flags if necessary. The vma parameter is protected - * by mmap_sem in write mode. - */ - cvma = *vma; - cvma.vm_page_prot = vm_get_page_prot(cvma.vm_flags); - - if (bo->mem.bus.is_iomem) { - cvma.vm_page_prot = ttm_io_prot(bo->mem.placement, - cvma.vm_page_prot); - } else { + cvma.vm_page_prot = ttm_io_prot(bo->mem.placement, prot); + if (!bo->mem.bus.is_iomem) { struct ttm_operation_ctx ctx = { .interruptible = false, .no_wait_gpu = false, @@ -240,24 +263,21 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) }; ttm = bo->ttm; - cvma.vm_page_prot = ttm_io_prot(bo->mem.placement, - cvma.vm_page_prot); - - /* Allocate all page at once, most common usage */ - if (ttm_tt_populate(ttm, &ctx)) { + if (ttm_tt_populate(bo->ttm, &ctx)) { ret = VM_FAULT_OOM; goto out_io_unlock; } + } else { + /* Iomem should not be marked encrypted */ + cvma.vm_page_prot = pgprot_decrypted(cvma.vm_page_prot); } /* * Speculatively prefault a number of pages. Only error on * first page. */ - for (i = 0; i < TTM_BO_VM_NUM_PREFAULT; ++i) { + for (i = 0; i < num_prefault; ++i) { if (bo->mem.bus.is_iomem) { - /* Iomem should not be marked encrypted */ - cvma.vm_page_prot = pgprot_decrypted(cvma.vm_page_prot); pfn = ttm_bo_io_mem_pfn(bo, page_offset); } else { page = ttm->pages[page_offset]; @@ -295,7 +315,26 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) ret = VM_FAULT_NOPAGE; out_io_unlock: ttm_mem_io_unlock(man); -out_unlock: + return ret; +} +EXPORT_SYMBOL(ttm_bo_vm_fault_reserved); + +static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + pgprot_t prot; + struct ttm_buffer_object *bo = vma->vm_private_data; + vm_fault_t ret; + + ret = ttm_bo_vm_reserve(bo, vmf); + if (ret) + return ret; + + prot = vm_get_page_prot(vma->vm_flags); + ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT); + if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) + return ret; + reservation_object_unlock(bo->resv); return ret; } @@ -395,7 +434,7 @@ static int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, return ret; } -static const struct vm_operations_struct ttm_bo_vm_ops = { +const struct vm_operations_struct ttm_bo_vm_ops = { .fault = ttm_bo_vm_fault, .open = ttm_bo_vm_open, .close = ttm_bo_vm_close, @@ -448,7 +487,7 @@ int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma, if (unlikely(ret != 0)) goto out_unref; - vma->vm_ops = &ttm_bo_vm_ops; + vma->vm_ops = bdev->vm_ops; /* * Note: We're transferring the bo reference to @@ -480,7 +519,7 @@ int ttm_fbdev_mmap(struct vm_area_struct *vma, struct ttm_buffer_object *bo) ttm_bo_get(bo); - vma->vm_ops = &ttm_bo_vm_ops; + vma->vm_ops = bo->bdev->vm_ops; vma->vm_private_data = bo; vma->vm_flags |= VM_MIXEDMAP; vma->vm_flags |= VM_IO | VM_DONTEXPAND; diff --git a/drivers/gpu/drm/vmwgfx/Kconfig b/drivers/gpu/drm/vmwgfx/Kconfig index 6b28a326f8bb..d5fd81a521f6 100644 --- a/drivers/gpu/drm/vmwgfx/Kconfig +++ b/drivers/gpu/drm/vmwgfx/Kconfig @@ -8,6 +8,7 @@ config DRM_VMWGFX select FB_CFB_IMAGEBLIT select DRM_TTM select FB + select AS_DIRTY_HELPERS # Only needed for the transitional use of drm_crtc_init - can be removed # again once vmwgfx sets up the primary plane itself. select DRM_KMS_HELPER diff --git a/drivers/gpu/drm/vmwgfx/Makefile b/drivers/gpu/drm/vmwgfx/Makefile index 8841bd30e1e5..c877a21a0739 100644 --- a/drivers/gpu/drm/vmwgfx/Makefile +++ b/drivers/gpu/drm/vmwgfx/Makefile @@ -8,7 +8,7 @@ vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \ vmwgfx_cmdbuf_res.o vmwgfx_cmdbuf.o vmwgfx_stdu.o \ vmwgfx_cotable.o vmwgfx_so.o vmwgfx_binding.o vmwgfx_msg.o \ vmwgfx_simple_resource.o vmwgfx_va.o vmwgfx_blit.o \ - vmwgfx_validation.o \ + vmwgfx_validation.o vmwgfx_page_dirty.o \ ttm_object.o ttm_lock.o obj-$(CONFIG_DRM_VMWGFX) := vmwgfx.o diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h b/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h index f2bfd3d80598..61414f105c67 100644 --- a/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h +++ b/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h @@ -1280,7 +1280,6 @@ svga3dsurface_get_pixel_offset(SVGA3dSurfaceFormat format, return offset; } - static inline u32 svga3dsurface_get_image_offset(SVGA3dSurfaceFormat format, surf_size_struct baseLevelSize, @@ -1375,4 +1374,236 @@ svga3dsurface_is_screen_target_format(SVGA3dSurfaceFormat format) return svga3dsurface_is_dx_screen_target_format(format); } +/** + * struct svga3dsurface_mip - Mimpmap level information + * @bytes: Bytes required in the backing store of this mipmap level. + * @img_stride: Byte stride per image. + * @row_stride: Byte stride per block row. + * @size: The size of the mipmap. + */ +struct svga3dsurface_mip { + size_t bytes; + size_t img_stride; + size_t row_stride; + struct drm_vmw_size size; + +}; + +/** + * struct svga3dsurface_cache - Cached surface information + * @desc: Pointer to the surface descriptor + * @mip: Array of mipmap level information. Valid size is @num_mip_levels. + * @mip_chain_bytes: Bytes required in the backing store for the whole chain + * of mip levels. + * @sheet_bytes: Bytes required in the backing store for a sheet + * representing a single sample. + * @num_mip_levels: Valid size of the @mip array. Number of mipmap levels in + * a chain. + * @num_layers: Number of slices in an array texture or number of faces in + * a cubemap texture. + */ +struct svga3dsurface_cache { + const struct svga3d_surface_desc *desc; + struct svga3dsurface_mip mip[DRM_VMW_MAX_MIP_LEVELS]; + size_t mip_chain_bytes; + size_t sheet_bytes; + u32 num_mip_levels; + u32 num_layers; +}; + +/** + * struct svga3dsurface_loc - Surface location + * @sub_resource: Surface subresource. Defined as layer * num_mip_levels + + * mip_level. + * @x: X coordinate. + * @y: Y coordinate. + * @z: Z coordinate. + */ +struct svga3dsurface_loc { + u32 sub_resource; + u32 x, y, z; +}; + +/** + * svga3dsurface_subres - Compute the subresource from layer and mipmap. + * @cache: Surface layout data. + * @mip_level: The mipmap level. + * @layer: The surface layer (face or array slice). + * + * Return: The subresource. + */ +static inline u32 svga3dsurface_subres(const struct svga3dsurface_cache *cache, + u32 mip_level, u32 layer) +{ + return cache->num_mip_levels * layer + mip_level; +} + +/** + * svga3dsurface_setup_cache - Build a surface cache entry + * @size: The surface base level dimensions. + * @format: The surface format. + * @num_mip_levels: Number of mipmap levels. + * @num_layers: Number of layers. + * @cache: Pointer to a struct svga3dsurface_cach object to be filled in. + * + * Return: Zero on success, -EINVAL on invalid surface layout. + */ +static inline int svga3dsurface_setup_cache(const struct drm_vmw_size *size, + SVGA3dSurfaceFormat format, + u32 num_mip_levels, + u32 num_layers, + u32 num_samples, + struct svga3dsurface_cache *cache) +{ + const struct svga3d_surface_desc *desc; + u32 i; + + memset(cache, 0, sizeof(*cache)); + cache->desc = desc = svga3dsurface_get_desc(format); + cache->num_mip_levels = num_mip_levels; + cache->num_layers = num_layers; + for (i = 0; i < cache->num_mip_levels; i++) { + struct svga3dsurface_mip *mip = &cache->mip[i]; + + mip->size = svga3dsurface_get_mip_size(*size, i); + mip->bytes = svga3dsurface_get_image_buffer_size + (desc, &mip->size, 0); + mip->row_stride = + __KERNEL_DIV_ROUND_UP(mip->size.width, + desc->block_size.width) * + desc->bytes_per_block * num_samples; + if (!mip->row_stride) + goto invalid_dim; + + mip->img_stride = + __KERNEL_DIV_ROUND_UP(mip->size.height, + desc->block_size.height) * + mip->row_stride; + if (!mip->img_stride) + goto invalid_dim; + + cache->mip_chain_bytes += mip->bytes; + } + cache->sheet_bytes = cache->mip_chain_bytes * num_layers; + if (!cache->sheet_bytes) + goto invalid_dim; + + return 0; + +invalid_dim: + VMW_DEBUG_USER("Invalid surface layout for dirty tracking.\n"); + return -EINVAL; +} + +/** + * svga3dsurface_get_loc - Get a surface location from an offset into the + * backing store + * @cache: Surface layout data. + * @loc: Pointer to a struct svga3dsurface_loc to be filled in. + * @offset: Offset into the surface backing store. + */ +static inline void +svga3dsurface_get_loc(const struct svga3dsurface_cache *cache, + struct svga3dsurface_loc *loc, + size_t offset) +{ + const struct svga3dsurface_mip *mip = &cache->mip[0]; + const struct svga3d_surface_desc *desc = cache->desc; + u32 layer; + int i; + + if (offset >= cache->sheet_bytes) + offset %= cache->sheet_bytes; + + layer = offset / cache->mip_chain_bytes; + offset -= layer * cache->mip_chain_bytes; + for (i = 0; i < cache->num_mip_levels; ++i, ++mip) { + if (mip->bytes > offset) + break; + offset -= mip->bytes; + } + + loc->sub_resource = svga3dsurface_subres(cache, i, layer); + loc->z = offset / mip->img_stride; + offset -= loc->z * mip->img_stride; + loc->z *= desc->block_size.depth; + loc->y = offset / mip->row_stride; + offset -= loc->y * mip->row_stride; + loc->y *= desc->block_size.height; + loc->x = offset / desc->bytes_per_block; + loc->x *= desc->block_size.width; +} + +/** + * svga3dsurface_inc_loc - Clamp increment a surface location with one block + * size + * in each dimension. + * @loc: Pointer to a struct svga3dsurface_loc to be incremented. + * + * When computing the size of a range as size = end - start, the range does not + * include the end element. However a location representing the last byte + * of a touched region in the backing store *is* included in the range. + * This function modifies such a location to match the end definition + * given as start + size which is the one used in a SVGA3dBox. + */ +static inline void +svga3dsurface_inc_loc(const struct svga3dsurface_cache *cache, + struct svga3dsurface_loc *loc) +{ + const struct svga3d_surface_desc *desc = cache->desc; + u32 mip = loc->sub_resource % cache->num_mip_levels; + const struct drm_vmw_size *size = &cache->mip[mip].size; + + loc->sub_resource++; + loc->x += desc->block_size.width; + if (loc->x > size->width) + loc->x = size->width; + loc->y += desc->block_size.height; + if (loc->y > size->height) + loc->y = size->height; + loc->z += desc->block_size.depth; + if (loc->z > size->depth) + loc->z = size->depth; +} + +/** + * svga3dsurface_min_loc - The start location in a subresource + * @cache: Surface layout data. + * @sub_resource: The subresource. + * @loc: Pointer to a struct svga3dsurface_loc to be filled in. + */ +static inline void +svga3dsurface_min_loc(const struct svga3dsurface_cache *cache, + u32 sub_resource, + struct svga3dsurface_loc *loc) +{ + loc->sub_resource = sub_resource; + loc->x = loc->y = loc->z = 0; +} + +/** + * svga3dsurface_min_loc - The end location in a subresource + * @cache: Surface layout data. + * @sub_resource: The subresource. + * @loc: Pointer to a struct svga3dsurface_loc to be filled in. + * + * Following the end definition given in svga3dsurface_inc_loc(), + * Compute the end location of a surface subresource. + */ +static inline void +svga3dsurface_max_loc(const struct svga3dsurface_cache *cache, + u32 sub_resource, + struct svga3dsurface_loc *loc) +{ + const struct drm_vmw_size *size; + u32 mip; + + loc->sub_resource = sub_resource + 1; + mip = sub_resource % cache->num_mip_levels; + size = &cache->mip[mip].size; + loc->x = size->width; + loc->y = size->height; + loc->z = size->depth; +} + #endif /* _SVGA3D_SURFACEDEFS_H_ */ diff --git a/drivers/gpu/drm/vmwgfx/ttm_lock.c b/drivers/gpu/drm/vmwgfx/ttm_lock.c index 16b2083cb9d4..5971c72e6d10 100644 --- a/drivers/gpu/drm/vmwgfx/ttm_lock.c +++ b/drivers/gpu/drm/vmwgfx/ttm_lock.c @@ -29,7 +29,6 @@ * Authors: Thomas Hellstrom */ -#include #include #include #include @@ -49,8 +48,6 @@ void ttm_lock_init(struct ttm_lock *lock) init_waitqueue_head(&lock->queue); lock->rw = 0; lock->flags = 0; - lock->kill_takers = false; - lock->signal = SIGKILL; } void ttm_read_unlock(struct ttm_lock *lock) @@ -66,11 +63,6 @@ static bool __ttm_read_lock(struct ttm_lock *lock) bool locked = false; spin_lock(&lock->lock); - if (unlikely(lock->kill_takers)) { - send_sig(lock->signal, current, 0); - spin_unlock(&lock->lock); - return false; - } if (lock->rw >= 0 && lock->flags == 0) { ++lock->rw; locked = true; @@ -98,11 +90,6 @@ static bool __ttm_read_trylock(struct ttm_lock *lock, bool *locked) *locked = false; spin_lock(&lock->lock); - if (unlikely(lock->kill_takers)) { - send_sig(lock->signal, current, 0); - spin_unlock(&lock->lock); - return false; - } if (lock->rw >= 0 && lock->flags == 0) { ++lock->rw; block = false; @@ -147,11 +134,6 @@ static bool __ttm_write_lock(struct ttm_lock *lock) bool locked = false; spin_lock(&lock->lock); - if (unlikely(lock->kill_takers)) { - send_sig(lock->signal, current, 0); - spin_unlock(&lock->lock); - return false; - } if (lock->rw == 0 && ((lock->flags & ~TTM_WRITE_LOCK_PENDING) == 0)) { lock->rw = -1; lock->flags &= ~TTM_WRITE_LOCK_PENDING; @@ -182,88 +164,6 @@ int ttm_write_lock(struct ttm_lock *lock, bool interruptible) return ret; } -static int __ttm_vt_unlock(struct ttm_lock *lock) -{ - int ret = 0; - - spin_lock(&lock->lock); - if (unlikely(!(lock->flags & TTM_VT_LOCK))) - ret = -EINVAL; - lock->flags &= ~TTM_VT_LOCK; - wake_up_all(&lock->queue); - spin_unlock(&lock->lock); - - return ret; -} - -static void ttm_vt_lock_remove(struct ttm_base_object **p_base) -{ - struct ttm_base_object *base = *p_base; - struct ttm_lock *lock = container_of(base, struct ttm_lock, base); - int ret; - - *p_base = NULL; - ret = __ttm_vt_unlock(lock); - BUG_ON(ret != 0); -} - -static bool __ttm_vt_lock(struct ttm_lock *lock) -{ - bool locked = false; - - spin_lock(&lock->lock); - if (lock->rw == 0) { - lock->flags &= ~TTM_VT_LOCK_PENDING; - lock->flags |= TTM_VT_LOCK; - locked = true; - } else { - lock->flags |= TTM_VT_LOCK_PENDING; - } - spin_unlock(&lock->lock); - return locked; -} - -int ttm_vt_lock(struct ttm_lock *lock, - bool interruptible, - struct ttm_object_file *tfile) -{ - int ret = 0; - - if (interruptible) { - ret = wait_event_interruptible(lock->queue, - __ttm_vt_lock(lock)); - if (unlikely(ret != 0)) { - spin_lock(&lock->lock); - lock->flags &= ~TTM_VT_LOCK_PENDING; - wake_up_all(&lock->queue); - spin_unlock(&lock->lock); - return ret; - } - } else - wait_event(lock->queue, __ttm_vt_lock(lock)); - - /* - * Add a base-object, the destructor of which will - * make sure the lock is released if the client dies - * while holding it. - */ - - ret = ttm_base_object_init(tfile, &lock->base, false, - ttm_lock_type, &ttm_vt_lock_remove, NULL); - if (ret) - (void)__ttm_vt_unlock(lock); - else - lock->vt_holder = tfile; - - return ret; -} - -int ttm_vt_unlock(struct ttm_lock *lock) -{ - return ttm_ref_object_base_unref(lock->vt_holder, - lock->base.handle, TTM_REF_USAGE); -} - void ttm_suspend_unlock(struct ttm_lock *lock) { spin_lock(&lock->lock); diff --git a/drivers/gpu/drm/vmwgfx/ttm_lock.h b/drivers/gpu/drm/vmwgfx/ttm_lock.h index 0c3af9836863..3d454e8b491f 100644 --- a/drivers/gpu/drm/vmwgfx/ttm_lock.h +++ b/drivers/gpu/drm/vmwgfx/ttm_lock.h @@ -63,8 +63,6 @@ * @lock: Spinlock protecting some lock members. * @rw: Read-write lock counter. Protected by @lock. * @flags: Lock state. Protected by @lock. - * @kill_takers: Boolean whether to kill takers of the lock. - * @signal: Signal to send when kill_takers is true. */ struct ttm_lock { @@ -73,9 +71,6 @@ struct ttm_lock { spinlock_t lock; int32_t rw; uint32_t flags; - bool kill_takers; - int signal; - struct ttm_object_file *vt_holder; }; @@ -220,29 +215,4 @@ extern void ttm_write_unlock(struct ttm_lock *lock); */ extern int ttm_write_lock(struct ttm_lock *lock, bool interruptible); -/** - * ttm_lock_set_kill - * - * @lock: Pointer to a struct ttm_lock - * @val: Boolean whether to kill processes taking the lock. - * @signal: Signal to send to the process taking the lock. - * - * The kill-when-taking-lock functionality is used to kill processes that keep - * on using the TTM functionality when its resources has been taken down, for - * example when the X server exits. A typical sequence would look like this: - * - X server takes lock in write mode. - * - ttm_lock_set_kill() is called with @val set to true. - * - As part of X server exit, TTM resources are taken down. - * - X server releases the lock on file release. - * - Another dri client wants to render, takes the lock and is killed. - * - */ -static inline void ttm_lock_set_kill(struct ttm_lock *lock, bool val, - int signal) -{ - lock->kill_takers = val; - if (val) - lock->signal = signal; -} - #endif diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index 5d5c2bce01f3..e8bc7a7ac031 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -463,6 +463,8 @@ void vmw_bo_bo_free(struct ttm_buffer_object *bo) { struct vmw_buffer_object *vmw_bo = vmw_buffer_object(bo); + WARN_ON(vmw_bo->dirty); + WARN_ON(!RB_EMPTY_ROOT(&vmw_bo->res_tree)); vmw_bo_unmap(vmw_bo); kfree(vmw_bo); } @@ -476,8 +478,11 @@ void vmw_bo_bo_free(struct ttm_buffer_object *bo) static void vmw_user_bo_destroy(struct ttm_buffer_object *bo) { struct vmw_user_buffer_object *vmw_user_bo = vmw_user_buffer_object(bo); + struct vmw_buffer_object *vbo = &vmw_user_bo->vbo; - vmw_bo_unmap(&vmw_user_bo->vbo); + WARN_ON(vbo->dirty); + WARN_ON(!RB_EMPTY_ROOT(&vbo->res_tree)); + vmw_bo_unmap(vbo); ttm_prime_object_kfree(vmw_user_bo, prime); } @@ -510,8 +515,9 @@ int vmw_bo_init(struct vmw_private *dev_priv, acc_size = vmw_bo_acc_size(dev_priv, size, user); memset(vmw_bo, 0, sizeof(*vmw_bo)); - - INIT_LIST_HEAD(&vmw_bo->res_list); + BUILD_BUG_ON(TTM_MAX_BO_PRIORITY <= 3); + vmw_bo->base.priority = 3; + vmw_bo->res_tree = RB_ROOT; ret = ttm_bo_init(bdev, &vmw_bo->base, size, ttm_bo_type_device, placement, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_context.c b/drivers/gpu/drm/vmwgfx/vmwgfx_context.c index 63f111068a44..a56c9d802382 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_context.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_context.c @@ -88,6 +88,8 @@ static const struct vmw_res_func vmw_gb_context_func = { .res_type = vmw_res_context, .needs_backup = true, .may_evict = true, + .prio = 3, + .dirty_prio = 3, .type_name = "guest backed contexts", .backup_placement = &vmw_mob_placement, .create = vmw_gb_context_create, @@ -100,6 +102,8 @@ static const struct vmw_res_func vmw_dx_context_func = { .res_type = vmw_res_dx_context, .needs_backup = true, .may_evict = true, + .prio = 3, + .dirty_prio = 3, .type_name = "dx contexts", .backup_placement = &vmw_mob_placement, .create = vmw_dx_context_create, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c index b4f6e1217c9d..8c699cb2565b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c @@ -116,6 +116,8 @@ static const struct vmw_res_func vmw_cotable_func = { .res_type = vmw_res_cotable, .needs_backup = true, .may_evict = true, + .prio = 3, + .dirty_prio = 3, .type_name = "context guest backed object tables", .backup_placement = &vmw_mob_placement, .create = vmw_cotable_create, @@ -307,7 +309,7 @@ static int vmw_cotable_unbind(struct vmw_resource *res, struct ttm_buffer_object *bo = val_buf->bo; struct vmw_fence_obj *fence; - if (list_empty(&res->mob_head)) + if (!vmw_resource_mob_attached(res)) return 0; WARN_ON_ONCE(bo->mem.mem_type != VMW_PL_MOB); @@ -453,6 +455,7 @@ static int vmw_cotable_resize(struct vmw_resource *res, size_t new_size) goto out_wait; } + vmw_resource_mob_detach(res); res->backup = buf; res->backup_size = new_size; vcotbl->size_read_back = cur_size_read_back; @@ -467,12 +470,12 @@ static int vmw_cotable_resize(struct vmw_resource *res, size_t new_size) res->backup = old_buf; res->backup_size = old_size; vcotbl->size_read_back = old_size_read_back; + vmw_resource_mob_attach(res); goto out_wait; } + vmw_resource_mob_attach(res); /* Let go of the old mob. */ - list_del(&res->mob_head); - list_add_tail(&res->mob_head, &buf->res_list); vmw_bo_unreference(&old_buf); res->id = vcotbl->type; @@ -496,7 +499,7 @@ out_wait: * is called before bind() in the validation sequence is instead used for two * things. * 1) Unscrub the cotable if it is scrubbed and still attached to a backup - * buffer, that is, if @res->mob_head is non-empty. + * buffer. * 2) Resize the cotable if needed. */ static int vmw_cotable_create(struct vmw_resource *res) @@ -512,7 +515,7 @@ static int vmw_cotable_create(struct vmw_resource *res) new_size *= 2; if (likely(new_size <= res->backup_size)) { - if (vcotbl->scrubbed && !list_empty(&res->mob_head)) { + if (vcotbl->scrubbed && vmw_resource_mob_attached(res)) { ret = vmw_cotable_unscrub(res); if (ret) return ret; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 4ff11a0077e1..8349a6cc126f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -254,7 +254,6 @@ static int vmw_restrict_dma_mask; static int vmw_assume_16bpp; static int vmw_probe(struct pci_dev *, const struct pci_device_id *); -static void vmw_master_init(struct vmw_master *); static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val, void *ptr); @@ -762,10 +761,6 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) DRM_INFO("MMIO at 0x%08x size is %u kiB\n", dev_priv->mmio_start, dev_priv->mmio_size / 1024); - vmw_master_init(&dev_priv->fbdev_master); - ttm_lock_set_kill(&dev_priv->fbdev_master.lock, false, SIGTERM); - dev_priv->active_master = &dev_priv->fbdev_master; - dev_priv->mmio_virt = memremap(dev_priv->mmio_start, dev_priv->mmio_size, MEMREMAP_WB); @@ -833,6 +828,11 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) DRM_ERROR("Failed initializing TTM buffer object driver.\n"); goto out_no_bdev; } + dev_priv->vm_ops = *dev_priv->bdev.vm_ops; + dev_priv->vm_ops.fault = vmw_bo_vm_fault; + dev_priv->vm_ops.pfn_mkwrite = vmw_bo_vm_mkwrite; + dev_priv->vm_ops.page_mkwrite = vmw_bo_vm_mkwrite; + dev_priv->bdev.vm_ops = &dev_priv->vm_ops; /* * Enable VRAM, but initially don't use it until SVGA is enabled and @@ -1007,18 +1007,7 @@ static void vmw_driver_unload(struct drm_device *dev) static void vmw_postclose(struct drm_device *dev, struct drm_file *file_priv) { - struct vmw_fpriv *vmw_fp; - - vmw_fp = vmw_fpriv(file_priv); - - if (vmw_fp->locked_master) { - struct vmw_master *vmaster = - vmw_master(vmw_fp->locked_master); - - ttm_lock_set_kill(&vmaster->lock, true, SIGTERM); - ttm_vt_unlock(&vmaster->lock); - drm_master_put(&vmw_fp->locked_master); - } + struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv); ttm_object_file_release(&vmw_fp->tfile); kfree(vmw_fp); @@ -1047,55 +1036,6 @@ out_no_tfile: return ret; } -static struct vmw_master *vmw_master_check(struct drm_device *dev, - struct drm_file *file_priv, - unsigned int flags) -{ - int ret; - struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv); - struct vmw_master *vmaster; - - if (!drm_is_primary_client(file_priv) || !(flags & DRM_AUTH)) - return NULL; - - ret = mutex_lock_interruptible(&dev->master_mutex); - if (unlikely(ret != 0)) - return ERR_PTR(-ERESTARTSYS); - - if (drm_is_current_master(file_priv)) { - mutex_unlock(&dev->master_mutex); - return NULL; - } - - /* - * Check if we were previously master, but now dropped. In that - * case, allow at least render node functionality. - */ - if (vmw_fp->locked_master) { - mutex_unlock(&dev->master_mutex); - - if (flags & DRM_RENDER_ALLOW) - return NULL; - - DRM_ERROR("Dropped master trying to access ioctl that " - "requires authentication.\n"); - return ERR_PTR(-EACCES); - } - mutex_unlock(&dev->master_mutex); - - /* - * Take the TTM lock. Possibly sleep waiting for the authenticating - * master to become master again, or for a SIGTERM if the - * authenticating master exits. - */ - vmaster = vmw_master(file_priv->master); - ret = ttm_read_lock(&vmaster->lock, true); - if (unlikely(ret != 0)) - vmaster = ERR_PTR(ret); - - return vmaster; -} - static long vmw_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg, long (*ioctl_func)(struct file *, unsigned int, @@ -1104,7 +1044,6 @@ static long vmw_generic_ioctl(struct file *filp, unsigned int cmd, struct drm_file *file_priv = filp->private_data; struct drm_device *dev = file_priv->minor->dev; unsigned int nr = DRM_IOCTL_NR(cmd); - struct vmw_master *vmaster; unsigned int flags; long ret; @@ -1140,21 +1079,7 @@ static long vmw_generic_ioctl(struct file *filp, unsigned int cmd, } else if (!drm_ioctl_flags(nr, &flags)) return -EINVAL; - vmaster = vmw_master_check(dev, file_priv, flags); - if (IS_ERR(vmaster)) { - ret = PTR_ERR(vmaster); - - if (ret != -ERESTARTSYS) - DRM_INFO("IOCTL ERROR Command %d, Error %ld.\n", - nr, ret); - return ret; - } - - ret = ioctl_func(filp, cmd, arg); - if (vmaster) - ttm_read_unlock(&vmaster->lock); - - return ret; + return ioctl_func(filp, cmd, arg); out_io_encoding: DRM_ERROR("Invalid command format, ioctl %d\n", @@ -1181,65 +1106,10 @@ static void vmw_lastclose(struct drm_device *dev) { } -static void vmw_master_init(struct vmw_master *vmaster) -{ - ttm_lock_init(&vmaster->lock); -} - -static int vmw_master_create(struct drm_device *dev, - struct drm_master *master) -{ - struct vmw_master *vmaster; - - vmaster = kzalloc(sizeof(*vmaster), GFP_KERNEL); - if (unlikely(!vmaster)) - return -ENOMEM; - - vmw_master_init(vmaster); - ttm_lock_set_kill(&vmaster->lock, true, SIGTERM); - master->driver_priv = vmaster; - - return 0; -} - -static void vmw_master_destroy(struct drm_device *dev, - struct drm_master *master) -{ - struct vmw_master *vmaster = vmw_master(master); - - master->driver_priv = NULL; - kfree(vmaster); -} - static int vmw_master_set(struct drm_device *dev, struct drm_file *file_priv, bool from_open) { - struct vmw_private *dev_priv = vmw_priv(dev); - struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv); - struct vmw_master *active = dev_priv->active_master; - struct vmw_master *vmaster = vmw_master(file_priv->master); - int ret = 0; - - if (active) { - BUG_ON(active != &dev_priv->fbdev_master); - ret = ttm_vt_lock(&active->lock, false, vmw_fp->tfile); - if (unlikely(ret != 0)) - return ret; - - ttm_lock_set_kill(&active->lock, true, SIGTERM); - dev_priv->active_master = NULL; - } - - ttm_lock_set_kill(&vmaster->lock, false, SIGTERM); - if (!from_open) { - ttm_vt_unlock(&vmaster->lock); - BUG_ON(vmw_fp->locked_master != file_priv->master); - drm_master_put(&vmw_fp->locked_master); - } - - dev_priv->active_master = vmaster; - /* * Inform a new master that the layout may have changed while * it was gone. @@ -1254,31 +1124,10 @@ static void vmw_master_drop(struct drm_device *dev, struct drm_file *file_priv) { struct vmw_private *dev_priv = vmw_priv(dev); - struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv); - struct vmw_master *vmaster = vmw_master(file_priv->master); - int ret; - /** - * Make sure the master doesn't disappear while we have - * it locked. - */ - - vmw_fp->locked_master = drm_master_get(file_priv->master); - ret = ttm_vt_lock(&vmaster->lock, false, vmw_fp->tfile); vmw_kms_legacy_hotspot_clear(dev_priv); - if (unlikely((ret != 0))) { - DRM_ERROR("Unable to lock TTM at VT switch.\n"); - drm_master_put(&vmw_fp->locked_master); - } - - ttm_lock_set_kill(&vmaster->lock, false, SIGTERM); - if (!dev_priv->enable_fb) vmw_svga_disable(dev_priv); - - dev_priv->active_master = &dev_priv->fbdev_master; - ttm_lock_set_kill(&dev_priv->fbdev_master.lock, false, SIGTERM); - ttm_vt_unlock(&dev_priv->fbdev_master.lock); } /** @@ -1557,8 +1406,6 @@ static struct drm_driver driver = { .disable_vblank = vmw_disable_vblank, .ioctls = vmw_ioctls, .num_ioctls = ARRAY_SIZE(vmw_ioctls), - .master_create = vmw_master_create, - .master_destroy = vmw_master_destroy, .master_set = vmw_master_set, .master_drop = vmw_master_drop, .open = vmw_driver_open, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 366dcfc1f9bb..3a358a5495e4 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -44,9 +44,9 @@ #include #define VMWGFX_DRIVER_NAME "vmwgfx" -#define VMWGFX_DRIVER_DATE "20180704" +#define VMWGFX_DRIVER_DATE "20190328" #define VMWGFX_DRIVER_MAJOR 2 -#define VMWGFX_DRIVER_MINOR 15 +#define VMWGFX_DRIVER_MINOR 16 #define VMWGFX_DRIVER_PATCHLEVEL 0 #define VMWGFX_FIFO_STATIC_SIZE (1024*1024) #define VMWGFX_MAX_RELOCATIONS 2048 @@ -81,19 +81,30 @@ #define VMW_RES_SHADER ttm_driver_type4 struct vmw_fpriv { - struct drm_master *locked_master; struct ttm_object_file *tfile; bool gb_aware; /* user-space is guest-backed aware */ }; +/** + * struct vmw_buffer_object - TTM buffer object with vmwgfx additions + * @base: The TTM buffer object + * @res_tree: RB tree of resources using this buffer object as a backing MOB + * @pin_count: pin depth + * @dx_query_ctx: DX context if this buffer object is used as a DX query MOB + * @map: Kmap object for semi-persistent mappings + * @res_prios: Eviction priority counts for attached resources + * @dirty: structure for user-space dirty-tracking + */ struct vmw_buffer_object { struct ttm_buffer_object base; - struct list_head res_list; + struct rb_root res_tree; s32 pin_count; /* Not ref-counted. Protected by binding_mutex */ struct vmw_resource *dx_query_ctx; /* Protected by reservation */ struct ttm_bo_kmap_obj map; + u32 res_prios[TTM_MAX_BO_PRIORITY]; + struct vmw_bo_dirty *dirty; }; /** @@ -124,7 +135,8 @@ struct vmw_res_func; * @res_dirty: Resource contains data not yet in the backup buffer. Protected * by resource reserved. * @backup_dirty: Backup buffer contains data not yet in the HW resource. - * Protecte by resource reserved. + * Protected by resource reserved. + * @coherent: Emulate coherency by tracking vm accesses. * @backup: The backup buffer if any. Protected by resource reserved. * @backup_offset: Offset into the backup buffer if any. Protected by resource * reserved. Note that only a few resource types can have a @backup_offset @@ -133,28 +145,32 @@ struct vmw_res_func; * pin-count greater than zero. It is not on the resource LRU lists and its * backup buffer is pinned. Hence it can't be evicted. * @func: Method vtable for this resource. Immutable. + * @mob_node; Node for the MOB backup rbtree. Protected by @backup reserved. * @lru_head: List head for the LRU list. Protected by @dev_priv::resource_lock. - * @mob_head: List head for the MOB backup list. Protected by @backup reserved. * @binding_head: List head for the context binding list. Protected by * the @dev_priv::binding_mutex * @res_free: The resource destructor. * @hw_destroy: Callback to destroy the resource on the device, as part of * resource destruction. */ +struct vmw_resource_dirty; struct vmw_resource { struct kref kref; struct vmw_private *dev_priv; int id; + u32 used_prio; unsigned long backup_size; - bool res_dirty; - bool backup_dirty; + u32 res_dirty : 1; + u32 backup_dirty : 1; + u32 coherent : 1; struct vmw_buffer_object *backup; unsigned long backup_offset; unsigned long pin_count; const struct vmw_res_func *func; + struct rb_node mob_node; struct list_head lru_head; - struct list_head mob_head; struct list_head binding_head; + struct vmw_resource_dirty *dirty; void (*res_free) (struct vmw_resource *res); void (*hw_destroy) (struct vmw_resource *res); }; @@ -376,10 +392,6 @@ struct vmw_sw_context{ struct vmw_legacy_display; struct vmw_overlay; -struct vmw_master { - struct ttm_lock lock; -}; - struct vmw_vga_topology_state { uint32_t width; uint32_t height; @@ -542,11 +554,8 @@ struct vmw_private { spinlock_t svga_lock; /** - * Master management. + * PM management. */ - - struct vmw_master *active_master; - struct vmw_master fbdev_master; struct notifier_block pm_nb; bool refuse_hibernation; bool suspend_locked; @@ -595,6 +604,9 @@ struct vmw_private { /* Validation memory reservation */ struct vmw_validation_mem vvm; + + /* VM operations */ + struct vm_operations_struct vm_ops; }; static inline struct vmw_surface *vmw_res_to_srf(struct vmw_resource *res) @@ -612,11 +624,6 @@ static inline struct vmw_fpriv *vmw_fpriv(struct drm_file *file_priv) return (struct vmw_fpriv *)file_priv->driver_priv; } -static inline struct vmw_master *vmw_master(struct drm_master *master) -{ - return (struct vmw_master *) master->driver_priv; -} - /* * The locking here is fine-grained, so that it is performed once * for every read- and write operation. This is of course costly, but we @@ -669,7 +676,8 @@ extern void vmw_resource_unreference(struct vmw_resource **p_res); extern struct vmw_resource *vmw_resource_reference(struct vmw_resource *res); extern struct vmw_resource * vmw_resource_reference_unless_doomed(struct vmw_resource *res); -extern int vmw_resource_validate(struct vmw_resource *res, bool intr); +extern int vmw_resource_validate(struct vmw_resource *res, bool intr, + bool dirtying); extern int vmw_resource_reserve(struct vmw_resource *res, bool interruptible, bool no_backup); extern bool vmw_resource_needs_backup(const struct vmw_resource *res); @@ -709,6 +717,23 @@ extern void vmw_query_move_notify(struct ttm_buffer_object *bo, extern int vmw_query_readback_all(struct vmw_buffer_object *dx_query_mob); extern void vmw_resource_evict_all(struct vmw_private *dev_priv); extern void vmw_resource_unbind_list(struct vmw_buffer_object *vbo); +void vmw_resource_mob_attach(struct vmw_resource *res); +void vmw_resource_mob_detach(struct vmw_resource *res); +void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start, + pgoff_t end); +int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start, + pgoff_t end, pgoff_t *num_prefault); + +/** + * vmw_resource_mob_attached - Whether a resource currently has a mob attached + * @res: The resource + * + * Return: true if the resource has a mob attached, false otherwise. + */ +static inline bool vmw_resource_mob_attached(const struct vmw_resource *res) +{ + return !RB_EMPTY_NODE(&res->mob_node); +} /** * vmw_user_resource_noref_release - release a user resource pointer looked up @@ -787,6 +812,54 @@ static inline void vmw_user_bo_noref_release(void) ttm_base_object_noref_release(); } +/** + * vmw_bo_adjust_prio - Adjust the buffer object eviction priority + * according to attached resources + * @vbo: The struct vmw_buffer_object + */ +static inline void vmw_bo_prio_adjust(struct vmw_buffer_object *vbo) +{ + int i = ARRAY_SIZE(vbo->res_prios); + + while (i--) { + if (vbo->res_prios[i]) { + vbo->base.priority = i; + return; + } + } + + vbo->base.priority = 3; +} + +/** + * vmw_bo_prio_add - Notify a buffer object of a newly attached resource + * eviction priority + * @vbo: The struct vmw_buffer_object + * @prio: The resource priority + * + * After being notified, the code assigns the highest resource eviction priority + * to the backing buffer object (mob). + */ +static inline void vmw_bo_prio_add(struct vmw_buffer_object *vbo, int prio) +{ + if (vbo->res_prios[prio]++ == 0) + vmw_bo_prio_adjust(vbo); +} + +/** + * vmw_bo_prio_del - Notify a buffer object of a resource with a certain + * priority being removed + * @vbo: The struct vmw_buffer_object + * @prio: The resource priority + * + * After being notified, the code assigns the highest resource eviction priority + * to the backing buffer object (mob). + */ +static inline void vmw_bo_prio_del(struct vmw_buffer_object *vbo, int prio) +{ + if (--vbo->res_prios[prio] == 0) + vmw_bo_prio_adjust(vbo); +} /** * Misc Ioctl functionality - vmwgfx_ioctl.c @@ -1016,7 +1089,6 @@ void vmw_kms_cursor_snoop(struct vmw_surface *srf, int vmw_kms_write_svga(struct vmw_private *vmw_priv, unsigned width, unsigned height, unsigned pitch, unsigned bpp, unsigned depth); -void vmw_kms_idle_workqueues(struct vmw_master *vmaster); bool vmw_kms_validate_mode_vram(struct vmw_private *dev_priv, uint32_t pitch, uint32_t height); @@ -1338,6 +1410,25 @@ int vmw_host_log(const char *log); #define VMW_DEBUG_USER(fmt, ...) \ DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__) +/** + * VMW_DEBUG_KMS - Debug output for kernel mode-setting + * + * This macro is for debugging vmwgfx mode-setting code. + */ +#define VMW_DEBUG_KMS(fmt, ...) \ + DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__) + +/* Resource dirtying - vmwgfx_page_dirty.c */ +void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo); +int vmw_bo_dirty_add(struct vmw_buffer_object *vbo); +void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res); +void vmw_bo_dirty_clear_res(struct vmw_resource *res); +void vmw_bo_dirty_release(struct vmw_buffer_object *vbo); +void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo, + pgoff_t start, pgoff_t end); +vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf); +vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf); + /** * Inline helper functions */ diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 33533d126277..319c1ca35663 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -2560,7 +2560,6 @@ static int vmw_cmd_dx_check_subresource(struct vmw_private *dev_priv, offsetof(typeof(*cmd), sid)); cmd = container_of(header, typeof(*cmd), header); - return vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface, VMW_RES_DIRTY_NONE, user_surface_converter, &cmd->sid, NULL); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index b97bc8e5944b..e7222fa2cfdf 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -1462,7 +1462,7 @@ static int vmw_kms_check_display_memory(struct drm_device *dev, if (dev_priv->active_display_unit == vmw_du_screen_target && (drm_rect_width(&rects[i]) > dev_priv->stdu_max_width || drm_rect_height(&rects[i]) > dev_priv->stdu_max_height)) { - DRM_ERROR("Screen size not supported.\n"); + VMW_DEBUG_KMS("Screen size not supported.\n"); return -EINVAL; } @@ -1486,7 +1486,7 @@ static int vmw_kms_check_display_memory(struct drm_device *dev, * limit on primary bounding box */ if (pixel_mem > dev_priv->prim_bb_mem) { - DRM_ERROR("Combined output size too large.\n"); + VMW_DEBUG_KMS("Combined output size too large.\n"); return -EINVAL; } @@ -1496,7 +1496,7 @@ static int vmw_kms_check_display_memory(struct drm_device *dev, bb_mem = (u64) bounding_box.x2 * bounding_box.y2 * 4; if (bb_mem > dev_priv->prim_bb_mem) { - DRM_ERROR("Topology is beyond supported limits.\n"); + VMW_DEBUG_KMS("Topology is beyond supported limits.\n"); return -EINVAL; } } @@ -1645,6 +1645,7 @@ static int vmw_kms_check_topology(struct drm_device *dev, struct vmw_connector_state *vmw_conn_state; if (!du->pref_active && new_crtc_state->enable) { + VMW_DEBUG_KMS("Enabling a disabled display unit\n"); ret = -EINVAL; goto clean; } @@ -1701,8 +1702,10 @@ vmw_kms_atomic_check_modeset(struct drm_device *dev, return ret; ret = vmw_kms_check_implicit(dev, state); - if (ret) + if (ret) { + VMW_DEBUG_KMS("Invalid implicit state\n"); return ret; + } if (!state->allow_modeset) return ret; @@ -2347,6 +2350,9 @@ int vmw_kms_update_layout_ioctl(struct drm_device *dev, void *data, if (!arg->num_outputs) { struct drm_rect def_rect = {0, 0, 800, 600}; + VMW_DEBUG_KMS("Default layout x1 = %d y1 = %d x2 = %d y2 = %d\n", + def_rect.x1, def_rect.y1, + def_rect.x2, def_rect.y2); vmw_du_update_layout(dev_priv, 1, &def_rect); return 0; } @@ -2367,6 +2373,7 @@ int vmw_kms_update_layout_ioctl(struct drm_device *dev, void *data, drm_rects = (struct drm_rect *)rects; + VMW_DEBUG_KMS("Layout count = %u\n", arg->num_outputs); for (i = 0; i < arg->num_outputs; i++) { struct drm_vmw_rect curr_rect; @@ -2383,6 +2390,10 @@ int vmw_kms_update_layout_ioctl(struct drm_device *dev, void *data, drm_rects[i].x2 = curr_rect.x + curr_rect.w; drm_rects[i].y2 = curr_rect.y + curr_rect.h; + VMW_DEBUG_KMS(" x1 = %d y1 = %d x2 = %d y2 = %d\n", + drm_rects[i].x1, drm_rects[i].y1, + drm_rects[i].x2, drm_rects[i].y2); + /* * Currently this check is limiting the topology within * mode_config->max (which actually is max texture size @@ -2393,7 +2404,9 @@ int vmw_kms_update_layout_ioctl(struct drm_device *dev, void *data, if (drm_rects[i].x1 < 0 || drm_rects[i].y1 < 0 || drm_rects[i].x2 > mode_config->max_width || drm_rects[i].y2 > mode_config->max_height) { - DRM_ERROR("Invalid GUI layout.\n"); + VMW_DEBUG_KMS("Invalid layout %d %d %d %d\n", + drm_rects[i].x1, drm_rects[i].y1, + drm_rects[i].x2, drm_rects[i].y2); ret = -EINVAL; goto out_free; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c new file mode 100644 index 000000000000..730c51e397dd --- /dev/null +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c @@ -0,0 +1,472 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/************************************************************************** + * + * Copyright 2019 VMware, Inc., Palo Alto, CA., USA + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ +#include "vmwgfx_drv.h" + +/* + * Different methods for tracking dirty: + * VMW_BO_DIRTY_PAGETABLE - Scan the pagetable for hardware dirty bits + * VMW_BO_DIRTY_MKWRITE - Write-protect page table entries and record write- + * accesses in the VM mkwrite() callback + */ +enum vmw_bo_dirty_method { + VMW_BO_DIRTY_PAGETABLE, + VMW_BO_DIRTY_MKWRITE, +}; + +/* + * No dirtied pages at scan trigger a transition to the _MKWRITE method, + * similarly a certain percentage of dirty pages trigger a transition to + * the _PAGETABLE method. How many triggers should we wait for before + * changing method? + */ +#define VMW_DIRTY_NUM_CHANGE_TRIGGERS 2 + +/* Percentage to trigger a transition to the _PAGETABLE method */ +#define VMW_DIRTY_PERCENTAGE 10 + +/** + * struct vmw_bo_dirty - Dirty information for buffer objects + * @start: First currently dirty bit + * @end: Last currently dirty bit + 1 + * @method: The currently used dirty method + * @change_count: Number of consecutive method change triggers + * @ref_count: Reference count for this structure + * @bitmap_size: The size of the bitmap in bits. Typically equal to the + * nuber of pages in the bo. + * @size: The accounting size for this struct. + * @bitmap: A bitmap where each bit represents a page. A set bit means a + * dirty page. + */ +struct vmw_bo_dirty { + unsigned long start; + unsigned long end; + enum vmw_bo_dirty_method method; + unsigned int change_count; + unsigned int ref_count; + unsigned long bitmap_size; + size_t size; + unsigned long bitmap[0]; +}; + +/** + * vmw_bo_dirty_scan_pagetable - Perform a pagetable scan for dirty bits + * @vbo: The buffer object to scan + * + * Scans the pagetable for dirty bits. Clear those bits and modify the + * dirty structure with the results. This function may change the + * dirty-tracking method. + */ +static void vmw_bo_dirty_scan_pagetable(struct vmw_buffer_object *vbo) +{ + struct vmw_bo_dirty *dirty = vbo->dirty; + pgoff_t offset = drm_vma_node_start(&vbo->base.vma_node); + struct address_space *mapping = vbo->base.bdev->dev_mapping; + pgoff_t num_marked; + + num_marked = apply_as_clean(mapping, + offset, dirty->bitmap_size, + offset, &dirty->bitmap[0], + &dirty->start, &dirty->end); + if (num_marked == 0) + dirty->change_count++; + else + dirty->change_count = 0; + + if (dirty->change_count > VMW_DIRTY_NUM_CHANGE_TRIGGERS) { + dirty->change_count = 0; + dirty->method = VMW_BO_DIRTY_MKWRITE; + apply_as_wrprotect(mapping, + offset, dirty->bitmap_size); + apply_as_clean(mapping, + offset, dirty->bitmap_size, + offset, &dirty->bitmap[0], + &dirty->start, &dirty->end); + } +} + +/** + * vmw_bo_dirty_scan_mkwrite - Reset the mkwrite dirty-tracking method + * @vbo: The buffer object to scan + * + * Write-protect pages written to so that consecutive write accesses will + * trigger a call to mkwrite. + * + * This function may change the dirty-tracking method. + */ +static void vmw_bo_dirty_scan_mkwrite(struct vmw_buffer_object *vbo) +{ + struct vmw_bo_dirty *dirty = vbo->dirty; + unsigned long offset = drm_vma_node_start(&vbo->base.vma_node); + struct address_space *mapping = vbo->base.bdev->dev_mapping; + pgoff_t num_marked; + + if (dirty->end <= dirty->start) + return; + + num_marked = apply_as_wrprotect(vbo->base.bdev->dev_mapping, + dirty->start + offset, + dirty->end - dirty->start); + + if (100UL * num_marked / dirty->bitmap_size > + VMW_DIRTY_PERCENTAGE) { + dirty->change_count++; + } else { + dirty->change_count = 0; + } + + if (dirty->change_count > VMW_DIRTY_NUM_CHANGE_TRIGGERS) { + pgoff_t start = 0; + pgoff_t end = dirty->bitmap_size; + + dirty->method = VMW_BO_DIRTY_PAGETABLE; + apply_as_clean(mapping, offset, end, offset, &dirty->bitmap[0], + &start, &end); + bitmap_clear(&dirty->bitmap[0], 0, dirty->bitmap_size); + if (dirty->start < dirty->end) + bitmap_set(&dirty->bitmap[0], dirty->start, + dirty->end - dirty->start); + dirty->change_count = 0; + } +} + +/** + * vmw_bo_dirty_scan - Scan for dirty pages and add them to the dirty + * tracking structure + * @vbo: The buffer object to scan + * + * This function may change the dirty tracking method. + */ +void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo) +{ + struct vmw_bo_dirty *dirty = vbo->dirty; + + if (dirty->method == VMW_BO_DIRTY_PAGETABLE) + vmw_bo_dirty_scan_pagetable(vbo); + else + vmw_bo_dirty_scan_mkwrite(vbo); +} + +/** + * vmw_bo_dirty_pre_unmap - write-protect and pick up dirty pages before + * an unmap_mapping_range operation. + * @vbo: The buffer object, + * @start: First page of the range within the buffer object. + * @end: Last page of the range within the buffer object + 1. + * + * If we're using the _PAGETABLE scan method, we may leak dirty pages + * when calling unmap_mapping_range(). This function makes sure we pick + * up all dirty pages. + */ +static void vmw_bo_dirty_pre_unmap(struct vmw_buffer_object *vbo, + pgoff_t start, pgoff_t end) +{ + struct vmw_bo_dirty *dirty = vbo->dirty; + unsigned long offset = drm_vma_node_start(&vbo->base.vma_node); + struct address_space *mapping = vbo->base.bdev->dev_mapping; + + if (dirty->method != VMW_BO_DIRTY_PAGETABLE || start >= end) + return; + + apply_as_wrprotect(mapping, start + offset, end - start); + apply_as_clean(mapping, start + offset, end - start, offset, + &dirty->bitmap[0], &dirty->start, &dirty->end); +} + +/** + * vmw_bo_dirty_unmap - Clear all ptes pointing to a range within a bo + * @vbo: The buffer object, + * @start: First page of the range within the buffer object. + * @end: Last page of the range within the buffer object + 1. + * + * This is similar to ttm_bo_unmap_virtual_locked() except it takes a subrange. + */ +void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo, + pgoff_t start, pgoff_t end) +{ + unsigned long offset = drm_vma_node_start(&vbo->base.vma_node); + struct address_space *mapping = vbo->base.bdev->dev_mapping; + + vmw_bo_dirty_pre_unmap(vbo, start, end); + unmap_shared_mapping_range(mapping, (offset + start) << PAGE_SHIFT, + (loff_t) (end - start) << PAGE_SHIFT); +} + +/** + * vmw_bo_dirty_add - Add a dirty-tracking user to a buffer object + * @vbo: The buffer object + * + * This function registers a dirty-tracking user to a buffer object. + * A user can be for example a resource or a vma in a special user-space + * mapping. + * + * Return: Zero on success, -ENOMEM on memory allocation failure. + */ +int vmw_bo_dirty_add(struct vmw_buffer_object *vbo) +{ + struct vmw_bo_dirty *dirty = vbo->dirty; + pgoff_t num_pages = vbo->base.num_pages; + size_t size, acc_size; + int ret; + static struct ttm_operation_ctx ctx = { + .interruptible = false, + .no_wait_gpu = false + }; + + if (dirty) { + dirty->ref_count++; + return 0; + } + + size = sizeof(*dirty) + BITS_TO_LONGS(num_pages) * sizeof(long); + acc_size = ttm_round_pot(size); + ret = ttm_mem_global_alloc(&ttm_mem_glob, acc_size, &ctx); + if (ret) { + VMW_DEBUG_USER("Out of graphics memory for buffer object " + "dirty tracker.\n"); + return ret; + } + dirty = kvzalloc(size, GFP_KERNEL); + if (!dirty) { + ret = -ENOMEM; + goto out_no_dirty; + } + + dirty->size = acc_size; + dirty->bitmap_size = num_pages; + dirty->start = dirty->bitmap_size; + dirty->end = 0; + dirty->ref_count = 1; + if (num_pages < PAGE_SIZE / sizeof(pte_t)) { + dirty->method = VMW_BO_DIRTY_PAGETABLE; + } else { + struct address_space *mapping = vbo->base.bdev->dev_mapping; + pgoff_t offset = drm_vma_node_start(&vbo->base.vma_node); + + dirty->method = VMW_BO_DIRTY_MKWRITE; + + /* Write-protect and then pick up already dirty bits */ + apply_as_wrprotect(mapping, offset, num_pages); + apply_as_clean(mapping, offset, num_pages, offset, + &dirty->bitmap[0], &dirty->start, &dirty->end); + } + + vbo->dirty = dirty; + + return 0; + +out_no_dirty: + ttm_mem_global_free(&ttm_mem_glob, acc_size); + return ret; +} + +/** + * vmw_bo_dirty_release - Release a dirty-tracking user from a buffer object + * @vbo: The buffer object + * + * This function releases a dirty-tracking user from a buffer object. + * If the reference count reaches zero, then the dirty-tracking object is + * freed and the pointer to it cleared. + * + * Return: Zero on success, -ENOMEM on memory allocation failure. + */ +void vmw_bo_dirty_release(struct vmw_buffer_object *vbo) +{ + struct vmw_bo_dirty *dirty = vbo->dirty; + + if (dirty && --dirty->ref_count == 0) { + size_t acc_size = dirty->size; + + kvfree(dirty); + ttm_mem_global_free(&ttm_mem_glob, acc_size); + vbo->dirty = NULL; + } +} + +/** + * vmw_bo_dirty_transfer_to_res - Pick up a resource's dirty region from + * its backing mob. + * @res: The resource + * + * This function will pick up all dirty ranges affecting the resource from + * it's backup mob, and call vmw_resource_dirty_update() once for each + * range. The transferred ranges will be cleared from the backing mob's + * dirty tracking. + */ +void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res) +{ + struct vmw_buffer_object *vbo = res->backup; + struct vmw_bo_dirty *dirty = vbo->dirty; + pgoff_t start, cur, end; + unsigned long res_start = res->backup_offset; + unsigned long res_end = res->backup_offset + res->backup_size; + + WARN_ON_ONCE(res_start & ~PAGE_MASK); + res_start >>= PAGE_SHIFT; + res_end = DIV_ROUND_UP(res_end, PAGE_SIZE); + + if (res_start >= dirty->end || res_end <= dirty->start) + return; + + cur = max(res_start, dirty->start); + res_end = max(res_end, dirty->end); + while (cur < res_end) { + unsigned long num; + + start = find_next_bit(&dirty->bitmap[0], res_end, cur); + if (start >= res_end) + break; + + end = find_next_zero_bit(&dirty->bitmap[0], res_end, start + 1); + cur = end + 1; + num = end - start; + bitmap_clear(&dirty->bitmap[0], start, num); + vmw_resource_dirty_update(res, start, end); + } + + if (res_start <= dirty->start && res_end > dirty->start) + dirty->start = res_end; + if (res_start < dirty->end && res_end >= dirty->end) + dirty->end = res_start; +} + +/** + * vmw_bo_dirty_clear_res - Clear a resource's dirty region from + * its backing mob. + * @res: The resource + * + * This function will clear all dirty ranges affecting the resource from + * it's backup mob's dirty tracking. + */ +void vmw_bo_dirty_clear_res(struct vmw_resource *res) +{ + unsigned long res_start = res->backup_offset; + unsigned long res_end = res->backup_offset + res->backup_size; + struct vmw_buffer_object *vbo = res->backup; + struct vmw_bo_dirty *dirty = vbo->dirty; + + res_start >>= PAGE_SHIFT; + res_end = DIV_ROUND_UP(res_end, PAGE_SIZE); + + if (res_start >= dirty->end || res_end <= dirty->start) + return; + + res_start = max(res_start, dirty->start); + res_end = min(res_end, dirty->end); + bitmap_clear(&dirty->bitmap[0], res_start, res_end - res_start); + + if (res_start <= dirty->start && res_end > dirty->start) + dirty->start = res_end; + if (res_start < dirty->end && res_end >= dirty->end) + dirty->end = res_start; +} + +vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct ttm_buffer_object *bo = (struct ttm_buffer_object *) + vma->vm_private_data; + vm_fault_t ret; + unsigned long page_offset; + struct vmw_buffer_object *vbo = + container_of(bo, typeof(*vbo), base); + + ret = ttm_bo_vm_reserve(bo, vmf); + if (ret) + return ret; + + page_offset = vmf->pgoff - drm_vma_node_start(&bo->vma_node); + if (unlikely(page_offset >= bo->num_pages)) { + ret = VM_FAULT_SIGBUS; + goto out_unlock; + } + + if (vbo->dirty && vbo->dirty->method == VMW_BO_DIRTY_MKWRITE && + !test_bit(page_offset, &vbo->dirty->bitmap[0])) { + struct vmw_bo_dirty *dirty = vbo->dirty; + + __set_bit(page_offset, &dirty->bitmap[0]); + dirty->start = min(dirty->start, page_offset); + dirty->end = max(dirty->end, page_offset + 1); + } + +out_unlock: + reservation_object_unlock(bo->resv); + return ret; +} + +vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct ttm_buffer_object *bo = (struct ttm_buffer_object *) + vma->vm_private_data; + struct vmw_buffer_object *vbo = + container_of(bo, struct vmw_buffer_object, base); + pgoff_t num_prefault; + pgprot_t prot; + vm_fault_t ret; + + ret = ttm_bo_vm_reserve(bo, vmf); + if (ret) + return ret; + + num_prefault = (vma->vm_flags & VM_RAND_READ) ? 1 : + TTM_BO_VM_NUM_PREFAULT; + + if (vbo->dirty) { + pgoff_t allowed_prefault; + unsigned long page_offset; + + page_offset = vmf->pgoff - drm_vma_node_start(&bo->vma_node); + if (page_offset >= bo->num_pages || + vmw_resources_clean(vbo, page_offset, + page_offset + PAGE_SIZE, + &allowed_prefault)) { + ret = VM_FAULT_SIGBUS; + goto out_unlock; + } + + num_prefault = min(num_prefault, allowed_prefault); + } + + /* + * If we don't track dirty using the MKWRITE method, make sure + * sure the page protection is write-enabled so we don't get + * a lot of unnecessary write faults. + */ + if (vbo->dirty && vbo->dirty->method == VMW_BO_DIRTY_MKWRITE) + prot = vma->vm_page_prot; + else + prot = vm_get_page_prot(vma->vm_flags); + + ret = ttm_bo_vm_fault_reserved(vmf, prot, num_prefault); + if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) + return ret; + +out_unlock: + reservation_object_unlock(bo->resv); + return ret; +} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index 1d38a8b2f2ec..d70ee0df5c13 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -34,6 +34,51 @@ #define VMW_RES_EVICT_ERR_COUNT 10 +/** + * vmw_resource_mob_attach - Mark a resource as attached to its backing mob + * @res: The resource + */ +void vmw_resource_mob_attach(struct vmw_resource *res) +{ + struct vmw_buffer_object *backup = res->backup; + struct rb_node **new = &backup->res_tree.rb_node, *parent = NULL; + + lockdep_assert_held(&backup->base.resv->lock.base); + res->used_prio = (res->res_dirty) ? res->func->dirty_prio : + res->func->prio; + + while (*new) { + struct vmw_resource *this = + container_of(*new, struct vmw_resource, mob_node); + + parent = *new; + new = (res->backup_offset < this->backup_offset) ? + &((*new)->rb_left) : &((*new)->rb_right); + } + + rb_link_node(&res->mob_node, parent, new); + rb_insert_color(&res->mob_node, &backup->res_tree); + + vmw_bo_prio_add(backup, res->used_prio); +} + +/** + * vmw_resource_mob_detach - Mark a resource as detached from its backing mob + * @res: The resource + */ +void vmw_resource_mob_detach(struct vmw_resource *res) +{ + struct vmw_buffer_object *backup = res->backup; + + lockdep_assert_held(&backup->base.resv->lock.base); + if (vmw_resource_mob_attached(res)) { + rb_erase(&res->mob_node, &backup->res_tree); + RB_CLEAR_NODE(&res->mob_node); + vmw_bo_prio_del(backup, res->used_prio); + } +} + + struct vmw_resource *vmw_resource_reference(struct vmw_resource *res) { kref_get(&res->kref); @@ -80,7 +125,7 @@ static void vmw_resource_release(struct kref *kref) struct ttm_buffer_object *bo = &res->backup->base; ttm_bo_reserve(bo, false, false, NULL); - if (!list_empty(&res->mob_head) && + if (vmw_resource_mob_attached(res) && res->func->unbind != NULL) { struct ttm_validate_buffer val_buf; @@ -89,7 +134,11 @@ static void vmw_resource_release(struct kref *kref) res->func->unbind(res, false, &val_buf); } res->backup_dirty = false; - list_del_init(&res->mob_head); + vmw_resource_mob_detach(res); + if (res->dirty) + res->func->dirty_free(res); + if (res->coherent) + vmw_bo_dirty_release(res->backup); ttm_bo_unreserve(bo); vmw_bo_unreference(&res->backup); } @@ -171,14 +220,17 @@ int vmw_resource_init(struct vmw_private *dev_priv, struct vmw_resource *res, res->res_free = res_free; res->dev_priv = dev_priv; res->func = func; + RB_CLEAR_NODE(&res->mob_node); INIT_LIST_HEAD(&res->lru_head); - INIT_LIST_HEAD(&res->mob_head); INIT_LIST_HEAD(&res->binding_head); res->id = -1; res->backup = NULL; res->backup_offset = 0; res->backup_dirty = false; res->res_dirty = false; + res->coherent = false; + res->used_prio = 3; + res->dirty = NULL; if (delay_id) return 0; else @@ -343,7 +395,8 @@ out_no_bo: * should be retried once resources have been freed up. */ static int vmw_resource_do_validate(struct vmw_resource *res, - struct ttm_validate_buffer *val_buf) + struct ttm_validate_buffer *val_buf, + bool dirtying) { int ret = 0; const struct vmw_res_func *func = res->func; @@ -355,14 +408,47 @@ static int vmw_resource_do_validate(struct vmw_resource *res, } if (func->bind && - ((func->needs_backup && list_empty(&res->mob_head) && + ((func->needs_backup && !vmw_resource_mob_attached(res) && val_buf->bo != NULL) || (!func->needs_backup && val_buf->bo != NULL))) { ret = func->bind(res, val_buf); if (unlikely(ret != 0)) goto out_bind_failed; if (func->needs_backup) - list_add_tail(&res->mob_head, &res->backup->res_list); + vmw_resource_mob_attach(res); + } + + /* + * Handle the case where the backup mob is marked coherent but + * the resource isn't. + */ + if (func->dirty_alloc && vmw_resource_mob_attached(res) && + !res->coherent) { + if (res->backup->dirty && !res->dirty) { + ret = func->dirty_alloc(res); + if (ret) + return ret; + } else if (!res->backup->dirty && res->dirty) { + func->dirty_free(res); + } + } + + /* + * Transfer the dirty regions to the resource and update + * the resource. + */ + if (res->dirty) { + if (dirtying && !res->res_dirty) { + pgoff_t start = res->backup_offset >> PAGE_SHIFT; + pgoff_t end = __KERNEL_DIV_ROUND_UP + (res->backup_offset + res->backup_size, + PAGE_SIZE); + + vmw_bo_dirty_unmap(res->backup, start, end); + } + + vmw_bo_dirty_transfer_to_res(res); + return func->dirty_sync(res); } return 0; @@ -402,19 +488,29 @@ void vmw_resource_unreserve(struct vmw_resource *res, if (switch_backup && new_backup != res->backup) { if (res->backup) { - lockdep_assert_held(&res->backup->base.resv->lock.base); - list_del_init(&res->mob_head); + vmw_resource_mob_detach(res); + if (res->coherent) + vmw_bo_dirty_release(res->backup); vmw_bo_unreference(&res->backup); } if (new_backup) { res->backup = vmw_bo_reference(new_backup); - lockdep_assert_held(&new_backup->base.resv->lock.base); - list_add_tail(&res->mob_head, &new_backup->res_list); + + /* + * The validation code should already have added a + * dirty tracker here. + */ + WARN_ON(res->coherent && !new_backup->dirty); + + vmw_resource_mob_attach(res); } else { res->backup = NULL; } + } else if (switch_backup && res->coherent) { + vmw_bo_dirty_release(res->backup); } + if (switch_backup) res->backup_offset = new_backup_offset; @@ -469,7 +565,7 @@ vmw_resource_check_buffer(struct ww_acquire_ctx *ticket, if (unlikely(ret != 0)) goto out_no_reserve; - if (res->func->needs_backup && list_empty(&res->mob_head)) + if (res->func->needs_backup && !vmw_resource_mob_attached(res)) return 0; backup_dirty = res->backup_dirty; @@ -574,11 +670,11 @@ static int vmw_resource_do_evict(struct ww_acquire_ctx *ticket, return ret; if (unlikely(func->unbind != NULL && - (!func->needs_backup || !list_empty(&res->mob_head)))) { + (!func->needs_backup || vmw_resource_mob_attached(res)))) { ret = func->unbind(res, res->res_dirty, &val_buf); if (unlikely(ret != 0)) goto out_no_unbind; - list_del_init(&res->mob_head); + vmw_resource_mob_detach(res); } ret = func->destroy(res); res->backup_dirty = true; @@ -595,6 +691,7 @@ out_no_unbind: * to the device. * @res: The resource to make visible to the device. * @intr: Perform waits interruptible if possible. + * @dirtying: Pending GPU operation will dirty the resource * * On succesful return, any backup DMA buffer pointed to by @res->backup will * be reserved and validated. @@ -604,7 +701,8 @@ out_no_unbind: * Return: Zero on success, -ERESTARTSYS if interrupted, negative error code * on failure. */ -int vmw_resource_validate(struct vmw_resource *res, bool intr) +int vmw_resource_validate(struct vmw_resource *res, bool intr, + bool dirtying) { int ret; struct vmw_resource *evict_res; @@ -621,7 +719,7 @@ int vmw_resource_validate(struct vmw_resource *res, bool intr) if (res->backup) val_buf.bo = &res->backup->base; do { - ret = vmw_resource_do_validate(res, &val_buf); + ret = vmw_resource_do_validate(res, &val_buf, dirtying); if (likely(ret != -EBUSY)) break; @@ -660,7 +758,7 @@ int vmw_resource_validate(struct vmw_resource *res, bool intr) if (unlikely(ret != 0)) goto out_no_validate; else if (!res->func->needs_backup && res->backup) { - list_del_init(&res->mob_head); + WARN_ON_ONCE(vmw_resource_mob_attached(res)); vmw_bo_unreference(&res->backup); } @@ -684,22 +782,23 @@ out_no_validate: */ void vmw_resource_unbind_list(struct vmw_buffer_object *vbo) { - - struct vmw_resource *res, *next; struct ttm_validate_buffer val_buf = { .bo = &vbo->base, .num_shared = 0 }; lockdep_assert_held(&vbo->base.resv->lock.base); - list_for_each_entry_safe(res, next, &vbo->res_list, mob_head) { - if (!res->func->unbind) - continue; + while (!RB_EMPTY_ROOT(&vbo->res_tree)) { + struct rb_node *node = vbo->res_tree.rb_node; + struct vmw_resource *res = + container_of(node, struct vmw_resource, mob_node); + + if (!WARN_ON_ONCE(!res->func->unbind)) + (void) res->func->unbind(res, res->res_dirty, &val_buf); - (void) res->func->unbind(res, res->res_dirty, &val_buf); res->backup_dirty = true; res->res_dirty = false; - list_del_init(&res->mob_head); + vmw_resource_mob_detach(res); } (void) ttm_bo_wait(&vbo->base, false, false); @@ -920,7 +1019,7 @@ int vmw_resource_pin(struct vmw_resource *res, bool interruptible) /* Do we really need to pin the MOB as well? */ vmw_bo_pin_reserved(vbo, true); } - ret = vmw_resource_validate(res, interruptible); + ret = vmw_resource_validate(res, interruptible, true); if (vbo) ttm_bo_unreserve(&vbo->base); if (ret) @@ -980,3 +1079,101 @@ enum vmw_res_type vmw_res_type(const struct vmw_resource *res) { return res->func->res_type; } + +/** + * vmw_resource_update_dirty - Update a resource's dirty tracker with a + * sequential range of touched backing store memory. + * @res: The resource. + * @start: The first page touched. + * @end: The last page touched + 1. + */ +void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start, + pgoff_t end) +{ + if (res->dirty) + res->func->dirty_range_add(res, start << PAGE_SHIFT, + end << PAGE_SHIFT); +} + +/** + * vmw_resources_clean - Clean resources intersecting a mob range + * @vbo: The mob buffer object + * @start: The mob page offset starting the range + * @end: The mob page offset ending the range + * @num_prefault: Returns how many pages including the first have been + * cleaned and are ok to prefault + */ +int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start, + pgoff_t end, pgoff_t *num_prefault) +{ + struct rb_node *cur = vbo->res_tree.rb_node; + struct vmw_resource *found = NULL; + unsigned long res_start = start << PAGE_SHIFT; + unsigned long res_end = end << PAGE_SHIFT; + unsigned long last_cleaned = 0; + + /* + * Find the resource with lowest backup_offset that intersects the + * range. + */ + while (cur) { + struct vmw_resource *cur_res = + container_of(cur, struct vmw_resource, mob_node); + + if (cur_res->backup_offset >= res_end) { + cur = cur->rb_left; + } else if (cur_res->backup_offset + cur_res->backup_size <= + res_start) { + cur = cur->rb_right; + } else { + found = cur_res; + cur = cur->rb_left; + /* Continue to look for resources with lower offsets */ + } + } + + /* + * In order of increasing backup_offset, clean dirty resorces + * intersecting the range. + */ + while (found) { + if (found->res_dirty) { + int ret; + + if (!found->func->clean) + return -EINVAL; + + ret = found->func->clean(found); + if (ret) + return ret; + + found->res_dirty = false; + } + last_cleaned = found->backup_offset + found->backup_size; + cur = rb_next(&found->mob_node); + if (!cur) + break; + + found = container_of(cur, struct vmw_resource, mob_node); + if (found->backup_offset >= res_end) + break; + } + + /* + * Set number of pages allowed prefaulting and fence the buffer object + */ + *num_prefault = 1; + if (last_cleaned > res_start) { + struct ttm_buffer_object *bo = &vbo->base; + + *num_prefault = __KERNEL_DIV_ROUND_UP(last_cleaned - res_start, + PAGE_SIZE); + vmw_bo_fence_single(bo, NULL); + if (bo->moving) + dma_fence_put(bo->moving); + bo->moving = dma_fence_get + (reservation_object_get_excl(bo->resv)); + } + + return 0; +} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h index 7e19eba0b0b8..3b7438b2d289 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h @@ -71,6 +71,13 @@ struct vmw_user_resource_conv { * @commit_notify: If the resource is a command buffer managed resource, * callback to notify that a define or remove command * has been committed to the device. + * @dirty_alloc: Allocate a dirty tracker. NULL if dirty-tracking is not + * supported. + * @dirty_free: Free the dirty tracker. + * @dirty_sync: Upload the dirty mob contents to the resource. + * @dirty_add_range: Add a sequential dirty range to the resource + * dirty tracker. + * @clean: Clean the resource. */ struct vmw_res_func { enum vmw_res_type res_type; @@ -78,6 +85,8 @@ struct vmw_res_func { const char *type_name; struct ttm_placement *backup_placement; bool may_evict; + u32 prio; + u32 dirty_prio; int (*create) (struct vmw_resource *res); int (*destroy) (struct vmw_resource *res); @@ -88,6 +97,12 @@ struct vmw_res_func { struct ttm_validate_buffer *val_buf); void (*commit_notify)(struct vmw_resource *res, enum vmw_cmdbuf_res_state state); + int (*dirty_alloc)(struct vmw_resource *res); + void (*dirty_free)(struct vmw_resource *res); + int (*dirty_sync)(struct vmw_resource *res); + void (*dirty_range_add)(struct vmw_resource *res, size_t start, + size_t end); + int (*clean)(struct vmw_resource *res); }; /** diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c index d310d21f0d54..e139fdfd1635 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c @@ -95,6 +95,8 @@ static const struct vmw_res_func vmw_gb_shader_func = { .res_type = vmw_res_shader, .needs_backup = true, .may_evict = true, + .prio = 3, + .dirty_prio = 3, .type_name = "guest backed shaders", .backup_placement = &vmw_mob_placement, .create = vmw_gb_shader_create, @@ -106,7 +108,9 @@ static const struct vmw_res_func vmw_gb_shader_func = { static const struct vmw_res_func vmw_dx_shader_func = { .res_type = vmw_res_shader, .needs_backup = true, - .may_evict = false, + .may_evict = true, + .prio = 3, + .dirty_prio = 3, .type_name = "dx shaders", .backup_placement = &vmw_mob_placement, .create = vmw_dx_shader_create, @@ -423,7 +427,7 @@ static int vmw_dx_shader_create(struct vmw_resource *res) WARN_ON_ONCE(!shader->committed); - if (!list_empty(&res->mob_head)) { + if (vmw_resource_mob_attached(res)) { mutex_lock(&dev_priv->binding_mutex); ret = vmw_dx_shader_unscrub(res); mutex_unlock(&dev_priv->binding_mutex); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index 219471903bc1..862ca44680ca 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -68,6 +68,20 @@ struct vmw_surface_offset { uint32_t bo_offset; }; +/** + * vmw_surface_dirty - Surface dirty-tracker + * @cache: Cached layout information of the surface. + * @size: Accounting size for the struct vmw_surface_dirty. + * @num_subres: Number of subresources. + * @boxes: Array of SVGA3dBoxes indicating dirty regions. One per subresource. + */ +struct vmw_surface_dirty { + struct svga3dsurface_cache cache; + size_t size; + u32 num_subres; + SVGA3dBox boxes[0]; +}; + static void vmw_user_surface_free(struct vmw_resource *res); static struct vmw_resource * vmw_user_surface_base_to_res(struct ttm_base_object *base); @@ -96,6 +110,13 @@ vmw_gb_surface_reference_internal(struct drm_device *dev, struct drm_vmw_gb_surface_ref_ext_rep *rep, struct drm_file *file_priv); +static void vmw_surface_dirty_free(struct vmw_resource *res); +static int vmw_surface_dirty_alloc(struct vmw_resource *res); +static int vmw_surface_dirty_sync(struct vmw_resource *res); +static void vmw_surface_dirty_range_add(struct vmw_resource *res, size_t start, + size_t end); +static int vmw_surface_clean(struct vmw_resource *res); + static const struct vmw_user_resource_conv user_surface_conv = { .object_type = VMW_RES_SURFACE, .base_obj_to_res = vmw_user_surface_base_to_res, @@ -112,6 +133,8 @@ static const struct vmw_res_func vmw_legacy_surface_func = { .res_type = vmw_res_surface, .needs_backup = false, .may_evict = true, + .prio = 1, + .dirty_prio = 1, .type_name = "legacy surfaces", .backup_placement = &vmw_srf_placement, .create = &vmw_legacy_srf_create, @@ -124,12 +147,19 @@ static const struct vmw_res_func vmw_gb_surface_func = { .res_type = vmw_res_surface, .needs_backup = true, .may_evict = true, + .prio = 1, + .dirty_prio = 2, .type_name = "guest backed surfaces", .backup_placement = &vmw_mob_placement, .create = vmw_gb_surface_create, .destroy = vmw_gb_surface_destroy, .bind = vmw_gb_surface_bind, - .unbind = vmw_gb_surface_unbind + .unbind = vmw_gb_surface_unbind, + .dirty_alloc = vmw_surface_dirty_alloc, + .dirty_free = vmw_surface_dirty_free, + .dirty_sync = vmw_surface_dirty_sync, + .dirty_range_add = vmw_surface_dirty_range_add, + .clean = vmw_surface_clean, }; /** @@ -637,6 +667,7 @@ static void vmw_user_surface_free(struct vmw_resource *res) struct vmw_private *dev_priv = srf->res.dev_priv; uint32_t size = user_srf->size; + WARN_ON_ONCE(res->dirty); if (user_srf->master) drm_master_put(&user_srf->master); kfree(srf->offsets); @@ -915,12 +946,6 @@ vmw_surface_handle_reference(struct vmw_private *dev_priv, if (unlikely(drm_is_render_client(file_priv))) require_exist = true; - if (READ_ONCE(vmw_fpriv(file_priv)->locked_master)) { - DRM_ERROR("Locked master refused legacy " - "surface reference.\n"); - return -EACCES; - } - handle = u_handle; } @@ -1170,10 +1195,16 @@ static int vmw_gb_surface_bind(struct vmw_resource *res, cmd2->header.id = SVGA_3D_CMD_UPDATE_GB_SURFACE; cmd2->header.size = sizeof(cmd2->body); cmd2->body.sid = res->id; - res->backup_dirty = false; } vmw_fifo_commit(dev_priv, submit_size); + if (res->backup->dirty && res->backup_dirty) { + /* We've just made a full upload. Cear dirty regions. */ + vmw_bo_dirty_clear_res(res); + } + + res->backup_dirty = false; + return 0; } @@ -1638,7 +1669,8 @@ vmw_gb_surface_define_internal(struct drm_device *dev, } } } else if (req->base.drm_surface_flags & - drm_vmw_surface_flag_create_buffer) + (drm_vmw_surface_flag_create_buffer | + drm_vmw_surface_flag_coherent)) ret = vmw_user_bo_alloc(dev_priv, tfile, res->backup_size, req->base.drm_surface_flags & @@ -1652,6 +1684,26 @@ vmw_gb_surface_define_internal(struct drm_device *dev, goto out_unlock; } + if (req->base.drm_surface_flags & drm_vmw_surface_flag_coherent) { + struct vmw_buffer_object *backup = res->backup; + + ttm_bo_reserve(&backup->base, false, false, NULL); + if (!res->func->dirty_alloc) + ret = -EINVAL; + if (!ret) + ret = vmw_bo_dirty_add(backup); + if (!ret) { + res->coherent = true; + ret = res->func->dirty_alloc(res); + } + ttm_bo_unreserve(&backup->base); + if (ret) { + vmw_resource_unreference(&res); + goto out_unlock; + } + + } + tmp = vmw_resource_reference(res); ret = ttm_prime_object_init(tfile, res->backup_size, &user_srf->prime, req->base.drm_surface_flags & @@ -1760,3 +1812,338 @@ out_bad_resource: return ret; } + +/** + * vmw_subres_dirty_add - Add a dirty region to a subresource + * @dirty: The surfaces's dirty tracker. + * @loc_start: The location corresponding to the start of the region. + * @loc_end: The location corresponding to the end of the region. + * + * As we are assuming that @loc_start and @loc_end represent a sequential + * range of backing store memory, if the region spans multiple lines then + * regardless of the x coordinate, the full lines are dirtied. + * Correspondingly if the region spans multiple z slices, then full rather + * than partial z slices are dirtied. + */ +static void vmw_subres_dirty_add(struct vmw_surface_dirty *dirty, + const struct svga3dsurface_loc *loc_start, + const struct svga3dsurface_loc *loc_end) +{ + const struct svga3dsurface_cache *cache = &dirty->cache; + SVGA3dBox *box = &dirty->boxes[loc_start->sub_resource]; + u32 mip = loc_start->sub_resource % cache->num_mip_levels; + const struct drm_vmw_size *size = &cache->mip[mip].size; + u32 box_c2 = box->z + box->d; + + if (WARN_ON(loc_start->sub_resource >= dirty->num_subres)) + return; + + if (box->d == 0 || box->z > loc_start->z) + box->z = loc_start->z; + if (box_c2 < loc_end->z) + box->d = loc_end->z - box->z; + + if (loc_start->z + 1 == loc_end->z) { + box_c2 = box->y + box->h; + if (box->h == 0 || box->y > loc_start->y) + box->y = loc_start->y; + if (box_c2 < loc_end->y) + box->h = loc_end->y - box->y; + + if (loc_start->y + 1 == loc_end->y) { + box_c2 = box->x + box->w; + if (box->w == 0 || box->x > loc_start->x) + box->x = loc_start->x; + if (box_c2 < loc_end->x) + box->w = loc_end->x - box->x; + } else { + box->x = 0; + box->w = size->width; + } + } else { + box->y = 0; + box->h = size->height; + box->x = 0; + box->w = size->width; + } +} + +/** + * vmw_subres_dirty_full - Mark a full subresource as dirty + * @dirty: The surface's dirty tracker. + * @subres: The subresource + */ +static void vmw_subres_dirty_full(struct vmw_surface_dirty *dirty, u32 subres) +{ + const struct svga3dsurface_cache *cache = &dirty->cache; + u32 mip = subres % cache->num_mip_levels; + const struct drm_vmw_size *size = &cache->mip[mip].size; + SVGA3dBox *box = &dirty->boxes[subres]; + + box->x = 0; + box->y = 0; + box->z = 0; + box->w = size->width; + box->h = size->height; + box->d = size->depth; +} + +/* + * vmw_surface_tex_dirty_add_range - The dirty_add_range callback for texture + * surfaces. + */ +static void vmw_surface_tex_dirty_range_add(struct vmw_resource *res, + size_t start, size_t end) +{ + struct vmw_surface_dirty *dirty = + (struct vmw_surface_dirty *) res->dirty; + size_t backup_end = res->backup_offset + res->backup_size; + struct svga3dsurface_loc loc1, loc2; + const struct svga3dsurface_cache *cache; + + start = max_t(size_t, start, res->backup_offset) - res->backup_offset; + end = min(end, backup_end) - res->backup_offset; + cache = &dirty->cache; + svga3dsurface_get_loc(cache, &loc1, start); + svga3dsurface_get_loc(cache, &loc2, end - 1); + svga3dsurface_inc_loc(cache, &loc2); + + if (loc1.sub_resource + 1 == loc2.sub_resource) { + /* Dirty range covers a single sub-resource */ + vmw_subres_dirty_add(dirty, &loc1, &loc2); + } else { + /* Dirty range covers multiple sub-resources */ + struct svga3dsurface_loc loc_min, loc_max; + u32 sub_res = loc1.sub_resource; + + svga3dsurface_max_loc(cache, loc1.sub_resource, &loc_max); + vmw_subres_dirty_add(dirty, &loc1, &loc_max); + svga3dsurface_min_loc(cache, loc2.sub_resource - 1, &loc_min); + vmw_subres_dirty_add(dirty, &loc_min, &loc2); + for (sub_res = loc1.sub_resource + 1; + sub_res < loc2.sub_resource - 1; ++sub_res) + vmw_subres_dirty_full(dirty, sub_res); + } +} + +/* + * vmw_surface_tex_dirty_add_range - The dirty_add_range callback for buffer + * surfaces. + */ +static void vmw_surface_buf_dirty_range_add(struct vmw_resource *res, + size_t start, size_t end) +{ + struct vmw_surface_dirty *dirty = + (struct vmw_surface_dirty *) res->dirty; + const struct svga3dsurface_cache *cache = &dirty->cache; + size_t backup_end = res->backup_offset + cache->mip_chain_bytes; + SVGA3dBox *box = &dirty->boxes[0]; + u32 box_c2; + + box->h = box->d = 1; + start = max_t(size_t, start, res->backup_offset) - res->backup_offset; + end = min(end, backup_end) - res->backup_offset; + box_c2 = box->x + box->w; + if (box->w == 0 || box->x > start) + box->x = start; + if (box_c2 < end) + box->w = end - box->x; +} + +/* + * vmw_surface_tex_dirty_add_range - The dirty_add_range callback for surfaces + */ +static void vmw_surface_dirty_range_add(struct vmw_resource *res, size_t start, + size_t end) +{ + struct vmw_surface *srf = vmw_res_to_srf(res); + + if (WARN_ON(end <= res->backup_offset || + start >= res->backup_offset + res->backup_size)) + return; + + if (srf->format == SVGA3D_BUFFER) + vmw_surface_buf_dirty_range_add(res, start, end); + else + vmw_surface_tex_dirty_range_add(res, start, end); +} + +/* + * vmw_surface_dirty_sync - The surface's dirty_sync callback. + */ +static int vmw_surface_dirty_sync(struct vmw_resource *res) +{ + struct vmw_private *dev_priv = res->dev_priv; + bool has_dx = 0; + u32 i, num_dirty; + struct vmw_surface_dirty *dirty = + (struct vmw_surface_dirty *) res->dirty; + size_t alloc_size; + const struct svga3dsurface_cache *cache = &dirty->cache; + struct { + SVGA3dCmdHeader header; + SVGA3dCmdDXUpdateSubResource body; + } *cmd1; + struct { + SVGA3dCmdHeader header; + SVGA3dCmdUpdateGBImage body; + } *cmd2; + void *cmd; + + num_dirty = 0; + for (i = 0; i < dirty->num_subres; ++i) { + const SVGA3dBox *box = &dirty->boxes[i]; + + if (box->d) + num_dirty++; + } + + if (!num_dirty) + goto out; + + alloc_size = num_dirty * ((has_dx) ? sizeof(*cmd1) : sizeof(*cmd2)); + cmd = VMW_FIFO_RESERVE(dev_priv, alloc_size); + if (!cmd) + return -ENOMEM; + + cmd1 = cmd; + cmd2 = cmd; + + for (i = 0; i < dirty->num_subres; ++i) { + const SVGA3dBox *box = &dirty->boxes[i]; + + if (!box->d) + continue; + + /* + * DX_UPDATE_SUBRESOURCE is aware of array surfaces. + * UPDATE_GB_IMAGE is not. + */ + if (has_dx) { + cmd1->header.id = SVGA_3D_CMD_DX_UPDATE_SUBRESOURCE; + cmd1->header.size = sizeof(cmd1->body); + cmd1->body.sid = res->id; + cmd1->body.subResource = i; + cmd1->body.box = *box; + cmd1++; + } else { + cmd2->header.id = SVGA_3D_CMD_UPDATE_GB_IMAGE; + cmd2->header.size = sizeof(cmd2->body); + cmd2->body.image.sid = res->id; + cmd2->body.image.face = i / cache->num_mip_levels; + cmd2->body.image.mipmap = i - + (cache->num_mip_levels * cmd2->body.image.face); + cmd2->body.box = *box; + cmd2++; + } + + } + vmw_fifo_commit(dev_priv, alloc_size); + out: + memset(&dirty->boxes[0], 0, sizeof(dirty->boxes[0]) * + dirty->num_subres); + + return 0; +} + +/* + * vmw_surface_dirty_alloc - The surface's dirty_alloc callback. + */ +static int vmw_surface_dirty_alloc(struct vmw_resource *res) +{ + struct vmw_surface *srf = vmw_res_to_srf(res); + struct vmw_surface_dirty *dirty; + u32 num_layers = 1; + u32 num_mip; + u32 num_subres; + u32 num_samples; + size_t dirty_size, acc_size; + static struct ttm_operation_ctx ctx = { + .interruptible = false, + .no_wait_gpu = false + }; + int ret; + + if (srf->array_size) + num_layers = srf->array_size; + else if (srf->flags & SVGA3D_SURFACE_CUBEMAP) + num_layers *= SVGA3D_MAX_SURFACE_FACES; + + num_mip = srf->mip_levels[0]; + if (!num_mip) + num_mip = 1; + + num_subres = num_layers * num_mip; + dirty_size = sizeof(*dirty) + num_subres * sizeof(dirty->boxes[0]); + acc_size = ttm_round_pot(dirty_size); + ret = ttm_mem_global_alloc(vmw_mem_glob(res->dev_priv), + acc_size, &ctx); + if (ret) { + VMW_DEBUG_USER("Out of graphics memory for surface " + "dirty tracker.\n"); + return ret; + } + + dirty = kvzalloc(dirty_size, GFP_KERNEL); + if (!dirty) { + ret = -ENOMEM; + goto out_no_dirty; + } + + num_samples = max_t(u32, 1, srf->multisample_count); + ret = svga3dsurface_setup_cache(&srf->base_size, srf->format, num_mip, + num_layers, num_samples, &dirty->cache); + if (ret) + goto out_no_cache; + + dirty->num_subres = num_subres; + dirty->size = acc_size; + res->dirty = (struct vmw_resource_dirty *) dirty; + + return 0; + +out_no_cache: + kvfree(dirty); +out_no_dirty: + ttm_mem_global_free(vmw_mem_glob(res->dev_priv), acc_size); + return ret; +} + +/* + * vmw_surface_dirty_free - The surface's dirty_free callback + */ +static void vmw_surface_dirty_free(struct vmw_resource *res) +{ + struct vmw_surface_dirty *dirty = + (struct vmw_surface_dirty *) res->dirty; + size_t acc_size = dirty->size; + + kvfree(dirty); + ttm_mem_global_free(vmw_mem_glob(res->dev_priv), acc_size); + res->dirty = NULL; +} + +/* + * vmw_surface_clean - The surface's clean callback + */ +static int vmw_surface_clean(struct vmw_resource *res) +{ + struct vmw_private *dev_priv = res->dev_priv; + size_t alloc_size; + struct { + SVGA3dCmdHeader header; + SVGA3dCmdReadbackGBSurface body; + } *cmd; + + alloc_size = sizeof(*cmd); + cmd = VMW_FIFO_RESERVE(dev_priv, alloc_size); + if (!cmd) + return -ENOMEM; + + cmd->header.id = SVGA_3D_CMD_READBACK_GB_SURFACE; + cmd->header.size = sizeof(cmd->body); + cmd->body.sid = res->id; + vmw_fifo_commit(dev_priv, alloc_size); + + return 0; +} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c index f611b2290a1b..9aaf807ed73c 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c @@ -33,6 +33,8 @@ * struct vmw_validation_bo_node - Buffer object validation metadata. * @base: Metadata used for TTM reservation- and validation. * @hash: A hash entry used for the duplicate detection hash table. + * @coherent_count: If switching backup buffers, number of new coherent + * resources that will have this buffer as a backup buffer. * @as_mob: Validate as mob. * @cpu_blit: Validate for cpu blit access. * @@ -42,6 +44,7 @@ struct vmw_validation_bo_node { struct ttm_validate_buffer base; struct drm_hash_item hash; + unsigned int coherent_count; u32 as_mob : 1; u32 cpu_blit : 1; }; @@ -459,6 +462,19 @@ int vmw_validation_res_reserve(struct vmw_validation_context *ctx, if (ret) goto out_unreserve; } + + if (val->switching_backup && val->new_backup && + res->coherent) { + struct vmw_validation_bo_node *bo_node = + vmw_validation_find_bo_dup(ctx, + val->new_backup); + + if (WARN_ON(!bo_node)) { + ret = -EINVAL; + goto out_unreserve; + } + bo_node->coherent_count++; + } } return 0; @@ -562,6 +578,9 @@ int vmw_validation_bo_validate(struct vmw_validation_context *ctx, bool intr) int ret; list_for_each_entry(entry, &ctx->bo_list, base.head) { + struct vmw_buffer_object *vbo = + container_of(entry->base.bo, typeof(*vbo), base); + if (entry->cpu_blit) { struct ttm_operation_ctx ctx = { .interruptible = intr, @@ -576,6 +595,27 @@ int vmw_validation_bo_validate(struct vmw_validation_context *ctx, bool intr) } if (ret) return ret; + + /* + * Rather than having the resource code allocating the bo + * dirty tracker in resource_unreserve() where we can't fail, + * Do it here when validating the buffer object. + */ + if (entry->coherent_count) { + unsigned int coherent_count = entry->coherent_count; + + while (coherent_count) { + ret = vmw_bo_dirty_add(vbo); + if (ret) + return ret; + + coherent_count--; + } + entry->coherent_count -= coherent_count; + } + + if (vbo->dirty) + vmw_bo_dirty_scan(vbo); } return 0; } @@ -601,7 +641,8 @@ int vmw_validation_res_validate(struct vmw_validation_context *ctx, bool intr) struct vmw_resource *res = val->res; struct vmw_buffer_object *backup = res->backup; - ret = vmw_resource_validate(res, intr); + ret = vmw_resource_validate(res, intr, val->dirty_set && + val->dirty); if (ret) { if (ret != -ERESTARTSYS) DRM_ERROR("Failed to validate resource.\n"); @@ -828,3 +869,34 @@ int vmw_validation_preload_res(struct vmw_validation_context *ctx, ctx->mem_size_left += size; return 0; } + +/** + * vmw_validation_bo_backoff - Unreserve buffer objects registered with a + * validation context + * @ctx: The validation context + * + * This function unreserves the buffer objects previously reserved using + * vmw_validation_bo_reserve. It's typically used as part of an error path + */ +void vmw_validation_bo_backoff(struct vmw_validation_context *ctx) +{ + struct vmw_validation_bo_node *entry; + + /* + * Switching coherent resource backup buffers failed. + * Release corresponding buffer object dirty trackers. + */ + list_for_each_entry(entry, &ctx->bo_list, base.head) { + if (entry->coherent_count) { + unsigned int coherent_count = entry->coherent_count; + struct vmw_buffer_object *vbo = + container_of(entry->base.bo, typeof(*vbo), + base); + + while (coherent_count--) + vmw_bo_dirty_release(vbo); + } + } + + ttm_eu_backoff_reservation(&ctx->ticket, &ctx->bo_list); +} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h index 1d2322ad6fd5..fd83e017c2a5 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h @@ -172,20 +172,6 @@ vmw_validation_bo_reserve(struct vmw_validation_context *ctx, NULL, true); } -/** - * vmw_validation_bo_backoff - Unreserve buffer objects registered with a - * validation context - * @ctx: The validation context - * - * This function unreserves the buffer objects previously reserved using - * vmw_validation_bo_reserve. It's typically used as part of an error path - */ -static inline void -vmw_validation_bo_backoff(struct vmw_validation_context *ctx) -{ - ttm_eu_backoff_reservation(&ctx->ticket, &ctx->bo_list); -} - /** * vmw_validation_bo_fence - Unreserve and fence buffer objects registered * with a validation context @@ -268,4 +254,6 @@ int vmw_validation_preload_res(struct vmw_validation_context *ctx, unsigned int size); void vmw_validation_res_set_dirty(struct vmw_validation_context *ctx, void *val_private, u32 dirty); +void vmw_validation_bo_backoff(struct vmw_validation_context *ctx); + #endif diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 49d9cdfc58f2..435d02f719a8 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -768,4 +768,14 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx); void ttm_bo_swapout_all(struct ttm_bo_device *bdev); int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo); + +/* Default number of pre-faulted pages in the TTM fault handler */ +#define TTM_BO_VM_NUM_PREFAULT 16 + +vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, + struct vm_fault *vmf); + +vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, + pgprot_t prot, + pgoff_t num_prefault); #endif diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index c9b8ba492f24..a2d810a2504d 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -442,6 +442,9 @@ extern struct ttm_bo_global { * @driver: Pointer to a struct ttm_bo_driver struct setup by the driver. * @man: An array of mem_type_managers. * @vma_manager: Address space manager + * @vm_ops: Pointer to the struct vm_operations_struct used for this + * device's VM operations. The driver may override this before the first + * mmap() call. * lru_lock: Spinlock that protects the buffer+device lru lists and * ddestroy lists. * @dev_mapping: A pointer to the struct address_space representing the @@ -460,6 +463,7 @@ struct ttm_bo_device { struct ttm_bo_global *glob; struct ttm_bo_driver *driver; struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES]; + const struct vm_operations_struct *vm_ops; /* * Protected by internal locks. @@ -488,6 +492,8 @@ struct ttm_bo_device { bool no_retry; }; +extern const struct vm_operations_struct ttm_bo_vm_ops; + /** * struct ttm_lru_bulk_move_pos * diff --git a/include/linux/mm.h b/include/linux/mm.h index dd0b5f4e1e45..798cdda9560e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2686,7 +2686,24 @@ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, pte_fn_t fn, void *data); - +struct pfn_range_apply; +typedef int (*pter_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, + struct pfn_range_apply *closure); +struct pfn_range_apply { + struct mm_struct *mm; + pter_fn_t ptefn; + unsigned int alloc; +}; +extern int apply_to_pfn_range(struct pfn_range_apply *closure, + unsigned long address, unsigned long size); +unsigned long apply_as_wrprotect(struct address_space *mapping, + pgoff_t first_index, pgoff_t nr); +unsigned long apply_as_clean(struct address_space *mapping, + pgoff_t first_index, pgoff_t nr, + pgoff_t bitmap_pgoff, + unsigned long *bitmap, + pgoff_t *start, + pgoff_t *end); #ifdef CONFIG_PAGE_POISONING extern bool page_poisoning_enabled(void); extern void kernel_poison_pages(struct page *page, int numpages, int enable); diff --git a/include/uapi/drm/vmwgfx_drm.h b/include/uapi/drm/vmwgfx_drm.h index 399f58317cff..02cab33f2f25 100644 --- a/include/uapi/drm/vmwgfx_drm.h +++ b/include/uapi/drm/vmwgfx_drm.h @@ -891,11 +891,13 @@ struct drm_vmw_shader_arg { * surface. * @drm_vmw_surface_flag_create_buffer: Create a backup buffer if none is * given. + * @drm_vmw_surface_flag_coherent: Back surface with coherent memory. */ enum drm_vmw_surface_flags { drm_vmw_surface_flag_shareable = (1 << 0), drm_vmw_surface_flag_scanout = (1 << 1), - drm_vmw_surface_flag_create_buffer = (1 << 2) + drm_vmw_surface_flag_create_buffer = (1 << 2), + drm_vmw_surface_flag_coherent = (1 << 3), }; /** diff --git a/mm/Kconfig b/mm/Kconfig index f0c76ba47695..5006d0e6a5c7 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -765,4 +765,7 @@ config GUP_BENCHMARK config ARCH_HAS_PTE_SPECIAL bool +config AS_DIRTY_HELPERS + bool + endmenu diff --git a/mm/Makefile b/mm/Makefile index ac5e5ba78874..f5d412bbc2f7 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -104,3 +104,4 @@ obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o obj-$(CONFIG_HMM) += hmm.o obj-$(CONFIG_MEMFD_CREATE) += memfd.o +obj-$(CONFIG_AS_DIRTY_HELPERS) += as_dirty_helpers.o diff --git a/mm/as_dirty_helpers.c b/mm/as_dirty_helpers.c new file mode 100644 index 000000000000..f600e31534fb --- /dev/null +++ b/mm/as_dirty_helpers.c @@ -0,0 +1,300 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include + +/** + * struct apply_as - Closure structure for apply_as_range + * @base: struct pfn_range_apply we derive from + * @start: Address of first modified pte + * @end: Address of last modified pte + 1 + * @total: Total number of modified ptes + * @vma: Pointer to the struct vm_area_struct we're currently operating on + */ +struct apply_as { + struct pfn_range_apply base; + unsigned long start; + unsigned long end; + unsigned long total; + struct vm_area_struct *vma; +}; + +/** + * apply_pt_wrprotect - Leaf pte callback to write-protect a pte + * @pte: Pointer to the pte + * @token: Page table token, see apply_to_pfn_range() + * @addr: The virtual page address + * @closure: Pointer to a struct pfn_range_apply embedded in a + * struct apply_as + * + * The function write-protects a pte and records the range in + * virtual address space of touched ptes for efficient range TLB flushes. + * + * Return: Always zero. + */ +static int apply_pt_wrprotect(pte_t *pte, pgtable_t token, + unsigned long addr, + struct pfn_range_apply *closure) +{ + struct apply_as *aas = container_of(closure, typeof(*aas), base); + pte_t ptent = *pte; + + if (pte_write(ptent)) { + pte_t old_pte = ptep_modify_prot_start(aas->vma, addr, pte); + + ptent = pte_wrprotect(old_pte); + ptep_modify_prot_commit(aas->vma, addr, pte, old_pte, ptent); + aas->total++; + aas->start = min(aas->start, addr); + aas->end = max(aas->end, addr + PAGE_SIZE); + } + + return 0; +} + +/** + * struct apply_as_clean - Closure structure for apply_as_clean + * @base: struct apply_as we derive from + * @bitmap_pgoff: Address_space Page offset of the first bit in @bitmap + * @bitmap: Bitmap with one bit for each page offset in the address_space range + * covered. + * @start: Address_space page offset of first modified pte relative + * to @bitmap_pgoff + * @end: Address_space page offset of last modified pte relative + * to @bitmap_pgoff + */ +struct apply_as_clean { + struct apply_as base; + pgoff_t bitmap_pgoff; + unsigned long *bitmap; + pgoff_t start; + pgoff_t end; +}; + +/** + * apply_pt_clean - Leaf pte callback to clean a pte + * @pte: Pointer to the pte + * @token: Page table token, see apply_to_pfn_range() + * @addr: The virtual page address + * @closure: Pointer to a struct pfn_range_apply embedded in a + * struct apply_as_clean + * + * The function cleans a pte and records the range in + * virtual address space of touched ptes for efficient TLB flushes. + * It also records dirty ptes in a bitmap representing page offsets + * in the address_space, as well as the first and last of the bits + * touched. + * + * Return: Always zero. + */ +static int apply_pt_clean(pte_t *pte, pgtable_t token, + unsigned long addr, + struct pfn_range_apply *closure) +{ + struct apply_as *aas = container_of(closure, typeof(*aas), base); + struct apply_as_clean *clean = container_of(aas, typeof(*clean), base); + pte_t ptent = *pte; + + if (pte_dirty(ptent)) { + pgoff_t pgoff = ((addr - aas->vma->vm_start) >> PAGE_SHIFT) + + aas->vma->vm_pgoff - clean->bitmap_pgoff; + pte_t old_pte = ptep_modify_prot_start(aas->vma, addr, pte); + + ptent = pte_mkclean(old_pte); + ptep_modify_prot_commit(aas->vma, addr, pte, old_pte, ptent); + + aas->total++; + aas->start = min(aas->start, addr); + aas->end = max(aas->end, addr + PAGE_SIZE); + + __set_bit(pgoff, clean->bitmap); + clean->start = min(clean->start, pgoff); + clean->end = max(clean->end, pgoff + 1); + } + + return 0; +} + +/** + * apply_as_range - Apply a pte callback to all PTEs pointing into a range + * of an address_space. + * @mapping: Pointer to the struct address_space + * @aas: Closure structure + * @first_index: First page offset in the address_space + * @nr: Number of incremental page offsets to cover + * + * Return: Number of ptes touched. Note that this number might be larger + * than @nr if there are overlapping vmas + */ +static unsigned long apply_as_range(struct address_space *mapping, + struct apply_as *aas, + pgoff_t first_index, pgoff_t nr) +{ + struct vm_area_struct *vma; + pgoff_t vba, vea, cba, cea; + unsigned long start_addr, end_addr; + struct mmu_notifier_range range; + + i_mmap_lock_read(mapping); + vma_interval_tree_foreach(vma, &mapping->i_mmap, first_index, + first_index + nr - 1) { + unsigned long vm_flags = READ_ONCE(vma->vm_flags); + + /* + * We can only do advisory flag tests below, since we can't + * require the vm's mmap_sem to be held to protect the flags. + * Therefore, callers that strictly depend on specific mmap + * flags to remain constant throughout the operation must + * either ensure those flags are immutable for all relevant + * vmas or can't use this function. Fixing this properly would + * require the vma::vm_flags to be protected by a separate + * lock taken after the i_mmap_lock + */ + + /* Skip non-applicable VMAs */ + if ((vm_flags & (VM_SHARED | VM_WRITE)) != + (VM_SHARED | VM_WRITE)) + continue; + + /* Warn on and skip VMAs whose flags indicate illegal usage */ + if (WARN_ON((vm_flags & (VM_HUGETLB | VM_IO)) != VM_IO)) + continue; + + /* Clip to the vma */ + vba = vma->vm_pgoff; + vea = vba + vma_pages(vma); + cba = first_index; + cba = max(cba, vba); + cea = first_index + nr; + cea = min(cea, vea); + + /* Translate to virtual address */ + start_addr = ((cba - vba) << PAGE_SHIFT) + vma->vm_start; + end_addr = ((cea - vba) << PAGE_SHIFT) + vma->vm_start; + if (start_addr >= end_addr) + continue; + + aas->base.mm = vma->vm_mm; + aas->vma = vma; + aas->start = end_addr; + aas->end = start_addr; + + mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE, 0, + vma, vma->vm_mm, start_addr, end_addr); + mmu_notifier_invalidate_range_start(&range); + + /* Needed when we only change protection? */ + flush_cache_range(vma, start_addr, end_addr); + + /* + * We're not using tlb_gather_mmu() since typically + * only a small subrange of PTEs are affected. + */ + inc_tlb_flush_pending(vma->vm_mm); + + /* Should not error since aas->base.alloc == 0 */ + WARN_ON(apply_to_pfn_range(&aas->base, start_addr, + end_addr - start_addr)); + if (aas->end > aas->start) + flush_tlb_range(vma, aas->start, aas->end); + + mmu_notifier_invalidate_range_end(&range); + dec_tlb_flush_pending(vma->vm_mm); + } + i_mmap_unlock_read(mapping); + + return aas->total; +} + +/** + * apply_as_wrprotect - Write-protect all ptes in an address_space range + * @mapping: The address_space we want to write protect + * @first_index: The first page offset in the range + * @nr: Number of incremental page offsets to cover + * + * WARNING: This function should only be used for address spaces whose + * vmas are marked VM_IO and that do not contain huge pages. + * To avoid interference with COW'd pages, vmas not marked VM_SHARED are + * simply skipped. + * + * Return: The number of ptes actually write-protected. Note that + * already write-protected ptes are not counted. + */ +unsigned long apply_as_wrprotect(struct address_space *mapping, + pgoff_t first_index, pgoff_t nr) +{ + struct apply_as aas = { + .base = { + .alloc = 0, + .ptefn = apply_pt_wrprotect, + }, + .total = 0, + }; + + return apply_as_range(mapping, &aas, first_index, nr); +} +EXPORT_SYMBOL_GPL(apply_as_wrprotect); + +/** + * apply_as_clean - Clean all ptes in an address_space range + * @mapping: The address_space we want to clean + * @first_index: The first page offset in the range + * @nr: Number of incremental page offsets to cover + * @bitmap_pgoff: The page offset of the first bit in @bitmap + * @bitmap: Pointer to a bitmap of at least @nr bits. The bitmap needs to + * cover the whole range @first_index..@first_index + @nr. + * @start: Pointer to number of the first set bit in @bitmap. + * is modified as new bits are set by the function. + * @end: Pointer to the number of the last set bit in @bitmap. + * none set. The value is modified as new bits are set by the function. + * + * Note: When this function returns there is no guarantee that a CPU has + * not already dirtied new ptes. However it will not clean any ptes not + * reported in the bitmap. + * + * If a caller needs to make sure all dirty ptes are picked up and none + * additional are added, it first needs to write-protect the address-space + * range and make sure new writers are blocked in page_mkwrite() or + * pfn_mkwrite(). And then after a TLB flush following the write-protection + * pick up all dirty bits. + * + * WARNING: This function should only be used for address spaces whose + * vmas are marked VM_IO and that do not contain huge pages. + * To avoid interference with COW'd pages, vmas not marked VM_SHARED are + * simply skipped. + * + * Return: The number of dirty ptes actually cleaned. + */ +unsigned long apply_as_clean(struct address_space *mapping, + pgoff_t first_index, pgoff_t nr, + pgoff_t bitmap_pgoff, + unsigned long *bitmap, + pgoff_t *start, + pgoff_t *end) +{ + bool none_set = (*start >= *end); + struct apply_as_clean clean = { + .base = { + .base = { + .alloc = 0, + .ptefn = apply_pt_clean, + }, + .total = 0, + }, + .bitmap_pgoff = bitmap_pgoff, + .bitmap = bitmap, + .start = none_set ? nr : *start, + .end = none_set ? 0 : *end, + }; + unsigned long ret = apply_as_range(mapping, &clean.base, first_index, + nr); + + *start = clean.start; + *end = clean.end; + return ret; +} +EXPORT_SYMBOL_GPL(apply_as_clean); diff --git a/mm/memory.c b/mm/memory.c index ddf20bd0c317..462aa47f8878 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2032,18 +2032,17 @@ int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long } EXPORT_SYMBOL(vm_iomap_memory); -static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, - unsigned long addr, unsigned long end, - pte_fn_t fn, void *data) +static int apply_to_pte_range(struct pfn_range_apply *closure, pmd_t *pmd, + unsigned long addr, unsigned long end) { pte_t *pte; int err; pgtable_t token; spinlock_t *uninitialized_var(ptl); - pte = (mm == &init_mm) ? + pte = (closure->mm == &init_mm) ? pte_alloc_kernel(pmd, addr) : - pte_alloc_map_lock(mm, pmd, addr, &ptl); + pte_alloc_map_lock(closure->mm, pmd, addr, &ptl); if (!pte) return -ENOMEM; @@ -2054,86 +2053,109 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, token = pmd_pgtable(*pmd); do { - err = fn(pte++, token, addr, data); + err = closure->ptefn(pte++, token, addr, closure); if (err) break; } while (addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); - if (mm != &init_mm) + if (closure->mm != &init_mm) pte_unmap_unlock(pte-1, ptl); return err; } -static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, - unsigned long addr, unsigned long end, - pte_fn_t fn, void *data) +static int apply_to_pmd_range(struct pfn_range_apply *closure, pud_t *pud, + unsigned long addr, unsigned long end) { pmd_t *pmd; unsigned long next; - int err; + int err = 0; BUG_ON(pud_huge(*pud)); - pmd = pmd_alloc(mm, pud, addr); + pmd = pmd_alloc(closure->mm, pud, addr); if (!pmd) return -ENOMEM; + do { next = pmd_addr_end(addr, end); - err = apply_to_pte_range(mm, pmd, addr, next, fn, data); + if (!closure->alloc && pmd_none_or_clear_bad(pmd)) + continue; + err = apply_to_pte_range(closure, pmd, addr, next); if (err) break; } while (pmd++, addr = next, addr != end); return err; } -static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d, - unsigned long addr, unsigned long end, - pte_fn_t fn, void *data) +static int apply_to_pud_range(struct pfn_range_apply *closure, p4d_t *p4d, + unsigned long addr, unsigned long end) { pud_t *pud; unsigned long next; - int err; + int err = 0; - pud = pud_alloc(mm, p4d, addr); + pud = pud_alloc(closure->mm, p4d, addr); if (!pud) return -ENOMEM; + do { next = pud_addr_end(addr, end); - err = apply_to_pmd_range(mm, pud, addr, next, fn, data); + if (!closure->alloc && pud_none_or_clear_bad(pud)) + continue; + err = apply_to_pmd_range(closure, pud, addr, next); if (err) break; } while (pud++, addr = next, addr != end); return err; } -static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd, - unsigned long addr, unsigned long end, - pte_fn_t fn, void *data) +static int apply_to_p4d_range(struct pfn_range_apply *closure, pgd_t *pgd, + unsigned long addr, unsigned long end) { p4d_t *p4d; unsigned long next; - int err; + int err = 0; - p4d = p4d_alloc(mm, pgd, addr); + p4d = p4d_alloc(closure->mm, pgd, addr); if (!p4d) return -ENOMEM; + do { next = p4d_addr_end(addr, end); - err = apply_to_pud_range(mm, p4d, addr, next, fn, data); + if (!closure->alloc && p4d_none_or_clear_bad(p4d)) + continue; + err = apply_to_pud_range(closure, p4d, addr, next); if (err) break; } while (p4d++, addr = next, addr != end); return err; } -/* - * Scan a region of virtual memory, filling in page tables as necessary - * and calling a provided function on each leaf page table. +/** + * apply_to_pfn_range - Scan a region of virtual memory, calling a provided + * function on each leaf page table entry + * @closure: Details about how to scan and what function to apply + * @addr: Start virtual address + * @size: Size of the region + * + * If @closure->alloc is set to 1, the function will fill in the page table + * as necessary. Otherwise it will skip non-present parts. + * Note: The caller must ensure that the range does not contain huge pages. + * The caller must also assure that the proper mmu_notifier functions are + * called before and after the call to apply_to_pfn_range. + * + * WARNING: Do not use this function unless you know exactly what you are + * doing. It is lacking support for huge pages and transparent huge pages. + * + * Return: Zero on success. If the provided function returns a non-zero status, + * the page table walk will terminate and that status will be returned. + * If @closure->alloc is set to 1, then this function may also return memory + * allocation errors arising from allocating page table memory. */ -int apply_to_page_range(struct mm_struct *mm, unsigned long addr, - unsigned long size, pte_fn_t fn, void *data) +int apply_to_pfn_range(struct pfn_range_apply *closure, + unsigned long addr, unsigned long size) { pgd_t *pgd; unsigned long next; @@ -2143,16 +2165,65 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr, if (WARN_ON(addr >= end)) return -EINVAL; - pgd = pgd_offset(mm, addr); + pgd = pgd_offset(closure->mm, addr); do { next = pgd_addr_end(addr, end); - err = apply_to_p4d_range(mm, pgd, addr, next, fn, data); + if (!closure->alloc && pgd_none_or_clear_bad(pgd)) + continue; + err = apply_to_p4d_range(closure, pgd, addr, next); if (err) break; } while (pgd++, addr = next, addr != end); return err; } + +/** + * struct page_range_apply - Closure structure for apply_to_page_range() + * @pter: The base closure structure we derive from + * @fn: The leaf pte function to call + * @data: The leaf pte function closure + */ +struct page_range_apply { + struct pfn_range_apply pter; + pte_fn_t fn; + void *data; +}; + +/* + * Callback wrapper to enable use of apply_to_pfn_range for + * the apply_to_page_range interface + */ +static int apply_to_page_range_wrapper(pte_t *pte, pgtable_t token, + unsigned long addr, + struct pfn_range_apply *pter) +{ + struct page_range_apply *pra = + container_of(pter, typeof(*pra), pter); + + return pra->fn(pte, token, addr, pra->data); +} + +/* + * Scan a region of virtual memory, filling in page tables as necessary + * and calling a provided function on each leaf page table. + * + * WARNING: Do not use this function unless you know exactly what you are + * doing. It is lacking support for huge pages and transparent huge pages. + */ +int apply_to_page_range(struct mm_struct *mm, unsigned long addr, + unsigned long size, pte_fn_t fn, void *data) +{ + struct page_range_apply pra = { + .pter = {.mm = mm, + .alloc = 1, + .ptefn = apply_to_page_range_wrapper }, + .fn = fn, + .data = data + }; + + return apply_to_pfn_range(&pra.pter, addr, size); +} EXPORT_SYMBOL_GPL(apply_to_page_range); /* @@ -2238,7 +2309,7 @@ static vm_fault_t do_page_mkwrite(struct vm_fault *vmf) ret = vmf->vma->vm_ops->page_mkwrite(vmf); /* Restore original flags so that caller is not surprised */ vmf->flags = old_flags; - if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) + if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) return ret; if (unlikely(!(ret & VM_FAULT_LOCKED))) { lock_page(page); @@ -2515,7 +2586,7 @@ static vm_fault_t wp_pfn_shared(struct vm_fault *vmf) pte_unmap_unlock(vmf->pte, vmf->ptl); vmf->flags |= FAULT_FLAG_MKWRITE; ret = vma->vm_ops->pfn_mkwrite(vmf); - if (ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)) + if (ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)) return ret; return finish_mkwrite_fault(vmf); } @@ -2536,7 +2607,8 @@ static vm_fault_t wp_page_shared(struct vm_fault *vmf) pte_unmap_unlock(vmf->pte, vmf->ptl); tmp = do_page_mkwrite(vmf); if (unlikely(!tmp || (tmp & - (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { + (VM_FAULT_ERROR | VM_FAULT_NOPAGE | + VM_FAULT_RETRY)))) { put_page(vmf->page); return tmp; } @@ -3601,7 +3673,8 @@ static vm_fault_t do_shared_fault(struct vm_fault *vmf) unlock_page(vmf->page); tmp = do_page_mkwrite(vmf); if (unlikely(!tmp || - (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { + (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | + VM_FAULT_RETRY)))) { put_page(vmf->page); return tmp; }