diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index efcd610d4fca..bccd4146d55c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -446,6 +446,9 @@ struct drm_i915_gem_object { uint32_t tiling_mode; uint32_t stride; + /** Record of address bit 17 of each page at last unbind. */ + long *bit_17; + /** AGP mapping type (AGP_USER_MEMORY or AGP_USER_CACHED_MEMORY */ uint32_t agp_type; @@ -640,6 +643,8 @@ void i915_gem_object_put_pages(struct drm_gem_object *obj); /* i915_gem_tiling.c */ void i915_gem_detect_bit_6_swizzle(struct drm_device *dev); +void i915_gem_object_do_bit_17_swizzle(struct drm_gem_object *obj); +void i915_gem_object_save_bit_17_swizzle(struct drm_gem_object *obj); /* i915_gem_debug.c */ void i915_gem_dump_object(struct drm_gem_object *obj, int len, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3a1189d94a9a..6dca9fc7c1db 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -155,6 +155,15 @@ fast_shmem_read(struct page **pages, return 0; } +static int i915_gem_object_needs_bit17_swizzle(struct drm_gem_object *obj) +{ + drm_i915_private_t *dev_priv = obj->dev->dev_private; + struct drm_i915_gem_object *obj_priv = obj->driver_private; + + return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 && + obj_priv->tiling_mode != I915_TILING_NONE; +} + static inline int slow_shmem_copy(struct page *dst_page, int dst_offset, @@ -182,6 +191,64 @@ slow_shmem_copy(struct page *dst_page, return 0; } +static inline int +slow_shmem_bit17_copy(struct page *gpu_page, + int gpu_offset, + struct page *cpu_page, + int cpu_offset, + int length, + int is_read) +{ + char *gpu_vaddr, *cpu_vaddr; + + /* Use the unswizzled path if this page isn't affected. */ + if ((page_to_phys(gpu_page) & (1 << 17)) == 0) { + if (is_read) + return slow_shmem_copy(cpu_page, cpu_offset, + gpu_page, gpu_offset, length); + else + return slow_shmem_copy(gpu_page, gpu_offset, + cpu_page, cpu_offset, length); + } + + gpu_vaddr = kmap_atomic(gpu_page, KM_USER0); + if (gpu_vaddr == NULL) + return -ENOMEM; + + cpu_vaddr = kmap_atomic(cpu_page, KM_USER1); + if (cpu_vaddr == NULL) { + kunmap_atomic(gpu_vaddr, KM_USER0); + return -ENOMEM; + } + + /* Copy the data, XORing A6 with A17 (1). The user already knows he's + * XORing with the other bits (A9 for Y, A9 and A10 for X) + */ + while (length > 0) { + int cacheline_end = ALIGN(gpu_offset + 1, 64); + int this_length = min(cacheline_end - gpu_offset, length); + int swizzled_gpu_offset = gpu_offset ^ 64; + + if (is_read) { + memcpy(cpu_vaddr + cpu_offset, + gpu_vaddr + swizzled_gpu_offset, + this_length); + } else { + memcpy(gpu_vaddr + swizzled_gpu_offset, + cpu_vaddr + cpu_offset, + this_length); + } + cpu_offset += this_length; + gpu_offset += this_length; + length -= this_length; + } + + kunmap_atomic(cpu_vaddr, KM_USER1); + kunmap_atomic(gpu_vaddr, KM_USER0); + + return 0; +} + /** * This is the fast shmem pread path, which attempts to copy_from_user directly * from the backing pages of the object to the user's address space. On a @@ -270,6 +337,7 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj, int page_length; int ret; uint64_t data_ptr = args->data_ptr; + int do_bit17_swizzling; remain = args->size; @@ -294,6 +362,8 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj, goto fail_put_user_pages; } + do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); + mutex_lock(&dev->struct_mutex); ret = i915_gem_object_get_pages(obj); @@ -328,11 +398,20 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj, if ((data_page_offset + page_length) > PAGE_SIZE) page_length = PAGE_SIZE - data_page_offset; - ret = slow_shmem_copy(user_pages[data_page_index], - data_page_offset, - obj_priv->pages[shmem_page_index], - shmem_page_offset, - page_length); + if (do_bit17_swizzling) { + ret = slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index], + shmem_page_offset, + user_pages[data_page_index], + data_page_offset, + page_length, + 1); + } else { + ret = slow_shmem_copy(user_pages[data_page_index], + data_page_offset, + obj_priv->pages[shmem_page_index], + shmem_page_offset, + page_length); + } if (ret) goto fail_put_pages; @@ -384,9 +463,14 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv); - if (ret != 0) + if (i915_gem_object_needs_bit17_swizzle(obj)) { ret = i915_gem_shmem_pread_slow(dev, obj, args, file_priv); + } else { + ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv); + if (ret != 0) + ret = i915_gem_shmem_pread_slow(dev, obj, args, + file_priv); + } drm_gem_object_unreference(obj); @@ -728,6 +812,7 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, int page_length; int ret; uint64_t data_ptr = args->data_ptr; + int do_bit17_swizzling; remain = args->size; @@ -752,6 +837,8 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, goto fail_put_user_pages; } + do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); + mutex_lock(&dev->struct_mutex); ret = i915_gem_object_get_pages(obj); @@ -786,11 +873,20 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, if ((data_page_offset + page_length) > PAGE_SIZE) page_length = PAGE_SIZE - data_page_offset; - ret = slow_shmem_copy(obj_priv->pages[shmem_page_index], - shmem_page_offset, - user_pages[data_page_index], - data_page_offset, - page_length); + if (do_bit17_swizzling) { + ret = slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index], + shmem_page_offset, + user_pages[data_page_index], + data_page_offset, + page_length, + 0); + } else { + ret = slow_shmem_copy(obj_priv->pages[shmem_page_index], + shmem_page_offset, + user_pages[data_page_index], + data_page_offset, + page_length); + } if (ret) goto fail_put_pages; @@ -855,6 +951,8 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, ret = i915_gem_gtt_pwrite_slow(dev, obj, args, file_priv); } + } else if (i915_gem_object_needs_bit17_swizzle(obj)) { + ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file_priv); } else { ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file_priv); if (ret == -EFAULT) { @@ -1298,6 +1396,9 @@ i915_gem_object_put_pages(struct drm_gem_object *obj) if (--obj_priv->pages_refcount != 0) return; + if (obj_priv->tiling_mode != I915_TILING_NONE) + i915_gem_object_save_bit_17_swizzle(obj); + for (i = 0; i < page_count; i++) if (obj_priv->pages[i] != NULL) { if (obj_priv->dirty) @@ -1923,6 +2024,10 @@ i915_gem_object_get_pages(struct drm_gem_object *obj) } obj_priv->pages[i] = page; } + + if (obj_priv->tiling_mode != I915_TILING_NONE) + i915_gem_object_do_bit_17_swizzle(obj); + return 0; } @@ -3601,6 +3706,7 @@ void i915_gem_free_object(struct drm_gem_object *obj) i915_gem_free_mmap_offset(obj); drm_free(obj_priv->page_cpu_valid, 1, DRM_MEM_DRIVER); + kfree(obj_priv->bit_17); drm_free(obj->driver_private, 1, DRM_MEM_DRIVER); } diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 6be3f927c86a..f27e523c764f 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -25,6 +25,8 @@ * */ +#include "linux/string.h" +#include "linux/bitops.h" #include "drmP.h" #include "drm.h" #include "i915_drm.h" @@ -127,8 +129,8 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev) swizzle_y = I915_BIT_6_SWIZZLE_9_11; } else { /* Bit 17 swizzling by the CPU in addition. */ - swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; - swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; + swizzle_x = I915_BIT_6_SWIZZLE_9_10_17; + swizzle_y = I915_BIT_6_SWIZZLE_9_17; } break; } @@ -288,6 +290,19 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x; else args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y; + + /* Hide bit 17 swizzling from the user. This prevents old Mesa + * from aborting the application on sw fallbacks to bit 17, + * and we use the pread/pwrite bit17 paths to swizzle for it. + * If there was a user that was relying on the swizzle + * information for drm_intel_bo_map()ed reads/writes this would + * break it, but we don't have any of those. + */ + if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17) + args->swizzle_mode = I915_BIT_6_SWIZZLE_9; + if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17) + args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10; + /* If we can't handle the swizzling, make it untiled. */ if (args->swizzle_mode == I915_BIT_6_SWIZZLE_UNKNOWN) { args->tiling_mode = I915_TILING_NONE; @@ -354,8 +369,100 @@ i915_gem_get_tiling(struct drm_device *dev, void *data, DRM_ERROR("unknown tiling mode\n"); } + /* Hide bit 17 from the user -- see comment in i915_gem_set_tiling */ + if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17) + args->swizzle_mode = I915_BIT_6_SWIZZLE_9; + if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17) + args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10; + drm_gem_object_unreference(obj); mutex_unlock(&dev->struct_mutex); return 0; } + +/** + * Swap every 64 bytes of this page around, to account for it having a new + * bit 17 of its physical address and therefore being interpreted differently + * by the GPU. + */ +static int +i915_gem_swizzle_page(struct page *page) +{ + char *vaddr; + int i; + char temp[64]; + + vaddr = kmap(page); + if (vaddr == NULL) + return -ENOMEM; + + for (i = 0; i < PAGE_SIZE; i += 128) { + memcpy(temp, &vaddr[i], 64); + memcpy(&vaddr[i], &vaddr[i + 64], 64); + memcpy(&vaddr[i + 64], temp, 64); + } + + kunmap(page); + + return 0; +} + +void +i915_gem_object_do_bit_17_swizzle(struct drm_gem_object *obj) +{ + struct drm_device *dev = obj->dev; + drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_gem_object *obj_priv = obj->driver_private; + int page_count = obj->size >> PAGE_SHIFT; + int i; + + if (dev_priv->mm.bit_6_swizzle_x != I915_BIT_6_SWIZZLE_9_10_17) + return; + + if (obj_priv->bit_17 == NULL) + return; + + for (i = 0; i < page_count; i++) { + char new_bit_17 = page_to_phys(obj_priv->pages[i]) >> 17; + if ((new_bit_17 & 0x1) != + (test_bit(i, obj_priv->bit_17) != 0)) { + int ret = i915_gem_swizzle_page(obj_priv->pages[i]); + if (ret != 0) { + DRM_ERROR("Failed to swizzle page\n"); + return; + } + set_page_dirty(obj_priv->pages[i]); + } + } +} + +void +i915_gem_object_save_bit_17_swizzle(struct drm_gem_object *obj) +{ + struct drm_device *dev = obj->dev; + drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_gem_object *obj_priv = obj->driver_private; + int page_count = obj->size >> PAGE_SHIFT; + int i; + + if (dev_priv->mm.bit_6_swizzle_x != I915_BIT_6_SWIZZLE_9_10_17) + return; + + if (obj_priv->bit_17 == NULL) { + obj_priv->bit_17 = kmalloc(BITS_TO_LONGS(page_count) * + sizeof(long), GFP_KERNEL); + if (obj_priv->bit_17 == NULL) { + DRM_ERROR("Failed to allocate memory for bit 17 " + "record\n"); + return; + } + } + + for (i = 0; i < page_count; i++) { + if (page_to_phys(obj_priv->pages[i]) & (1 << 17)) + __set_bit(i, obj_priv->bit_17); + else + __clear_bit(i, obj_priv->bit_17); + } +} diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h index 67e3353a56d6..95962fa8398a 100644 --- a/include/drm/i915_drm.h +++ b/include/drm/i915_drm.h @@ -594,6 +594,9 @@ struct drm_i915_gem_busy { #define I915_BIT_6_SWIZZLE_9_10_11 4 /* Not seen by userland */ #define I915_BIT_6_SWIZZLE_UNKNOWN 5 +/* Seen by userland. */ +#define I915_BIT_6_SWIZZLE_9_17 6 +#define I915_BIT_6_SWIZZLE_9_10_17 7 struct drm_i915_gem_set_tiling { /** Handle of the buffer to have its tiling state updated */