diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl index 20c39b7e21ee..43042739ae63 100644 --- a/Documentation/DocBook/drm.tmpl +++ b/Documentation/DocBook/drm.tmpl @@ -4198,9 +4198,21 @@ int num_ioctls; !Idrivers/gpu/drm/i915/i915_gem_gtt.c - Global GTT Fence Handling -!Pdrivers/gpu/drm/i915/i915_gem_fence.c fence register handling + GTT Fences and Swizzling !Idrivers/gpu/drm/i915/i915_gem_fence.c + + Global GTT Fence Handling +!Pdrivers/gpu/drm/i915/i915_gem_fence.c fence register handling + + + Hardware Tiling and Swizzling Details +!Pdrivers/gpu/drm/i915/i915_gem_fence.c tiling swizzling details + + + + Object Tiling IOCTLs +!Idrivers/gpu/drm/i915/i915_gem_tiling.c +!Pdrivers/gpu/drm/i915/i915_gem_tiling.c buffer object tiling Buffer Object Eviction diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c index c643260a90c5..af1f8c461060 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence.c +++ b/drivers/gpu/drm/i915/i915_gem_fence.c @@ -497,8 +497,7 @@ void i915_gem_restore_fences(struct drm_device *dev) } /** - * - * Support for managing tiling state of buffer objects. + * DOC: tiling swizzling details * * The idea behind tiling is to increase cache hit rates by rearranging * pixel data so that a group of pixel accesses are in the same cacheline. @@ -546,6 +545,9 @@ void i915_gem_restore_fences(struct drm_device *dev) */ /** + * i915_gem_detect_bit_6_swizzle - detect bit 6 swizzling pattern + * @dev: DRM device + * * Detects bit 6 swizzling of address lookup between IGD access and CPU * access through main memory. */ @@ -692,7 +694,7 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev) dev_priv->mm.bit_6_swizzle_y = swizzle_y; } -/** +/* * Swap every 64 bytes of this page around, to account for it having a new * bit 17 of its physical address and therefore being interpreted differently * by the GPU. @@ -715,6 +717,18 @@ i915_gem_swizzle_page(struct page *page) kunmap(page); } +/** + * i915_gem_object_do_bit_17_swizzle - fixup bit 17 swizzling + * @obj: i915 GEM buffer object + * + * This function fixes up the swizzling in case any page frame number for this + * object has changed in bit 17 since that state has been saved with + * i915_gem_object_save_bit_17_swizzle(). + * + * This is called when pinning backing storage again, since the kernel is free + * to move unpinned backing storage around (either by directly moving pages or + * by swapping them out and back in again). + */ void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj) { @@ -737,6 +751,14 @@ i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj) } } +/** + * i915_gem_object_save_bit_17_swizzle - save bit 17 swizzling + * @obj: i915 GEM buffer object + * + * This function saves the bit 17 of each page frame number so that swizzling + * can be fixed up later on with i915_gem_object_do_bit_17_swizzle(). This must + * be called before the backing storage can be unpinned. + */ void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj) { diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index fa7a8d7a24e0..ac3eb566c9d2 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -31,53 +31,31 @@ #include #include "i915_drv.h" -/** @file i915_gem_tiling.c +/** + * DOC: buffer object tiling * - * Support for managing tiling state of buffer objects. + * i915_gem_set_tiling() and i915_gem_get_tiling() is the userspace interface to + * declare fence register requirements. * - * The idea behind tiling is to increase cache hit rates by rearranging - * pixel data so that a group of pixel accesses are in the same cacheline. - * Performance improvement from doing this on the back/depth buffer are on - * the order of 30%. + * In principle GEM doesn't care at all about the internal data layout of an + * object, and hence it also doesn't care about tiling or swizzling. There's two + * exceptions: * - * Intel architectures make this somewhat more complicated, though, by - * adjustments made to addressing of data when the memory is in interleaved - * mode (matched pairs of DIMMS) to improve memory bandwidth. - * For interleaved memory, the CPU sends every sequential 64 bytes - * to an alternate memory channel so it can get the bandwidth from both. + * - For X and Y tiling the hardware provides detilers for CPU access, so called + * fences. Since there's only a limited amount of them the kernel must manage + * these, and therefore userspace must tell the kernel the object tiling if it + * wants to use fences for detiling. + * - On gen3 and gen4 platforms have a swizzling pattern for tiled objects which + * depends upon the physical page frame number. When swapping such objects the + * page frame number might change and the kernel must be able to fix this up + * and hence now the tiling. Note that on a subset of platforms with + * asymmetric memory channel population the swizzling pattern changes in an + * unknown way, and for those the kernel simply forbids swapping completely. * - * The GPU also rearranges its accesses for increased bandwidth to interleaved - * memory, and it matches what the CPU does for non-tiled. However, when tiled - * it does it a little differently, since one walks addresses not just in the - * X direction but also Y. So, along with alternating channels when bit - * 6 of the address flips, it also alternates when other bits flip -- Bits 9 - * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines) - * are common to both the 915 and 965-class hardware. - * - * The CPU also sometimes XORs in higher bits as well, to improve - * bandwidth doing strided access like we do so frequently in graphics. This - * is called "Channel XOR Randomization" in the MCH documentation. The result - * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address - * decode. - * - * All of this bit 6 XORing has an effect on our memory management, - * as we need to make sure that the 3d driver can correctly address object - * contents. - * - * If we don't have interleaved memory, all tiling is safe and no swizzling is - * required. - * - * When bit 17 is XORed in, we simply refuse to tile at all. Bit - * 17 is not just a page offset, so as we page an objet out and back in, - * individual pages in it will have different bit 17 addresses, resulting in - * each 64 bytes being swapped with its neighbor! - * - * Otherwise, if interleaved, we have to tell the 3d driver what the address - * swizzling it needs to do is, since it's writing with the CPU to the pages - * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the - * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling - * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order - * to match what the GPU expects. + * Since neither of this applies for new tiling layouts on modern platforms like + * W, Ys and Yf tiling GEM only allows object tiling to be set to X or Y tiled. + * Anything else can be handled in userspace entirely without the kernel's + * invovlement. */ /* Check pitch constriants for all chips & tiling formats */ @@ -166,8 +144,18 @@ i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode) } /** + * i915_gem_set_tiling - IOCTL handler to set tiling mode + * @dev: DRM device + * @data: data pointer for the ioctl + * @file: DRM file for the ioctl call + * * Sets the tiling mode of an object, returning the required swizzling of * bit 6 of addresses in the object. + * + * Called by the user via ioctl. + * + * Returns: + * Zero on success, negative errno on failure. */ int i915_gem_set_tiling(struct drm_device *dev, void *data, @@ -285,7 +273,17 @@ err: } /** + * i915_gem_get_tiling - IOCTL handler to get tiling mode + * @dev: DRM device + * @data: data pointer for the ioctl + * @file: DRM file for the ioctl call + * * Returns the current tiling mode and required bit 6 swizzling for the object. + * + * Called by the user via ioctl. + * + * Returns: + * Zero on success, negative errno on failure. */ int i915_gem_get_tiling(struct drm_device *dev, void *data,