alistair23-linux/drivers/gpu/drm/i915/i915_gem_debug.c

/*
 * Copyright © 2008 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 *
 * Authors:
 *    Keith Packard <keithp@keithp.com>
 *
 */

#include <drm/drmP.h>
#include <drm/i915_drm.h>
#include "i915_drv.h"

#if WATCH_LISTS
int
i915_verify_lists(struct drm_device *dev)
{
	static int warned;
	struct drm_i915_private *dev_priv = to_i915(dev);
	struct drm_i915_gem_object *obj;
	struct intel_engine_cs *ring;
	int err = 0;
	int i;

	if (warned)
		return 0;

	for_each_ring(ring, dev_priv, i) {
		list_for_each_entry(obj, &ring->active_list, ring_list[ring->id]) {
			if (obj->base.dev != dev ||
			    !atomic_read(&obj->base.refcount.refcount)) {
				DRM_ERROR("%s: freed active obj %p\n",
					  ring->name, obj);
				err++;
				break;
			} else if (!obj->active ||
				   obj->last_read_req[ring->id] == NULL) {
				DRM_ERROR("%s: invalid active obj %p\n",
					  ring->name, obj);
				err++;
			} else if (obj->base.write_domain) {
				DRM_ERROR("%s: invalid write obj %p (w %x)\n",
					  ring->name,
					  obj, obj->base.write_domain);
				err++;
			}
		}
	}

	return warned = err;
}
#endif /* WATCH_LIST */
drm: Add GEM ("graphics execution manager") to i915 driver. GEM allows the creation of persistent buffer objects accessible by the graphics device through new ioctls for managing execution of commands on the device. The userland API is almost entirely driver-specific to ensure that any driver building on this model can easily map the interface to individual driver requirements. GEM is used by the 2d driver for managing its internal state allocations and will be used for pixmap storage to reduce memory consumption and enable zero-copy GLX_EXT_texture_from_pixmap, and in the 3d driver is used to enable GL_EXT_framebuffer_object and GL_ARB_pixel_buffer_object. Signed-off-by: Eric Anholt <eric@anholt.net> Signed-off-by: Dave Airlie <airlied@redhat.com> 2008-07-30 13:06:12 -06:00			`/*`
			`* Copyright © 2008 Intel Corporation`
			`*`
			`* Permission is hereby granted, free of charge, to any person obtaining a`
			`* copy of this software and associated documentation files (the "Software"),`
			`* to deal in the Software without restriction, including without limitation`
			`* the rights to use, copy, modify, merge, publish, distribute, sublicense,`
			`* and/or sell copies of the Software, and to permit persons to whom the`
			`* Software is furnished to do so, subject to the following conditions:`
			`*`
			`* The above copyright notice and this permission notice (including the next`
			`* paragraph) shall be included in all copies or substantial portions of the`
			`* Software.`
			`*`
			`* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR`
			`* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,`
			`* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL`
			`* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER`
			`* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING`
			`* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS`
			`* IN THE SOFTWARE.`
			`*`
			`* Authors:`
			`* Keith Packard <keithp@keithp.com>`
			`*`
			`*/`

UAPI: (Scripted) Convert #include "..." to #include <path/...> in drivers/gpu/ Convert #include "..." to #include <path/...> in drivers/gpu/. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: Dave Airlie <airlied@redhat.com> Acked-by: Arnd Bergmann <arnd@arndb.de> Acked-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Acked-by: Dave Jones <davej@redhat.com> 2012-10-02 11:01:07 -06:00			`#include <drm/drmP.h>`
			`#include <drm/i915_drm.h>`
drm: Add GEM ("graphics execution manager") to i915 driver. GEM allows the creation of persistent buffer objects accessible by the graphics device through new ioctls for managing execution of commands on the device. The userland API is almost entirely driver-specific to ensure that any driver building on this model can easily map the interface to individual driver requirements. GEM is used by the 2d driver for managing its internal state allocations and will be used for pixmap storage to reduce memory consumption and enable zero-copy GLX_EXT_texture_from_pixmap, and in the 3d driver is used to enable GL_EXT_framebuffer_object and GL_ARB_pixel_buffer_object. Signed-off-by: Eric Anholt <eric@anholt.net> Signed-off-by: Dave Airlie <airlied@redhat.com> 2008-07-30 13:06:12 -06:00			`#include "i915_drv.h"`

drm/i915/debug: Convert i915_verify_active() to scan all lists ... and check more regularly. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> 2010-09-29 09:10:57 -06:00			`#if WATCH_LISTS`
			`int`
			`i915_verify_lists(struct drm_device *dev)`
drm: Add GEM ("graphics execution manager") to i915 driver. GEM allows the creation of persistent buffer objects accessible by the graphics device through new ioctls for managing execution of commands on the device. The userland API is almost entirely driver-specific to ensure that any driver building on this model can easily map the interface to individual driver requirements. GEM is used by the 2d driver for managing its internal state allocations and will be used for pixmap storage to reduce memory consumption and enable zero-copy GLX_EXT_texture_from_pixmap, and in the 3d driver is used to enable GL_EXT_framebuffer_object and GL_ARB_pixel_buffer_object. Signed-off-by: Eric Anholt <eric@anholt.net> Signed-off-by: Dave Airlie <airlied@redhat.com> 2008-07-30 13:06:12 -06:00			`{`
drm/i915/debug: Convert i915_verify_active() to scan all lists ... and check more regularly. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> 2010-09-29 09:10:57 -06:00			`static int warned;`
drm/i915: Implement inter-engine read-read optimisations Currently, we only track the last request globally across all engines. This prevents us from issuing concurrent read requests on e.g. the RCS and BCS engines (or more likely the render and media engines). Without semaphores, we incur costly stalls as we synchronise between rings - greatly impacting the current performance of Broadwell versus Haswell in certain workloads (like video decode). With the introduction of reference counted requests, it is much easier to track the last request per ring, as well as the last global write request so that we can optimise inter-engine read read requests (as well as better optimise certain CPU waits). v2: Fix inverted readonly condition for nonblocking waits. v3: Handle non-continguous engine array after waits v4: Rebase, tidy, rewrite ring list debugging v5: Use obj->active as a bitfield, it looks cool v6: Micro-optimise, mostly involving moving code around v7: Fix retire-requests-upto for execlists (and multiple rq->ringbuf) v8: Rebase v9: Refactor i915_gem_object_sync() to allow the compiler to better optimise it. Benchmark: igt/gem_read_read_speed hsw:gt3e (with semaphores): Before: Time to read-read 1024k: 275.794µs After: Time to read-read 1024k: 123.260µs hsw:gt3e (w/o semaphores): Before: Time to read-read 1024k: 230.433µs After: Time to read-read 1024k: 124.593µs bdw-u (w/o semaphores): Before After Time to read-read 1x1: 26.274µs 10.350µs Time to read-read 128x128: 40.097µs 21.366µs Time to read-read 256x256: 77.087µs 42.608µs Time to read-read 512x512: 281.999µs 181.155µs Time to read-read 1024x1024: 1196.141µs 1118.223µs Time to read-read 2048x2048: 5639.072µs 5225.837µs Time to read-read 4096x4096: 22401.662µs 21137.067µs Time to read-read 8192x8192: 89617.735µs 85637.681µs Testcase: igt/gem_concurrent_blit (read-read and friends) Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> [v8] [danvet: s/\<rq\>/req/g] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> 2015-04-27 06:41:17 -06:00			`struct drm_i915_private *dev_priv = to_i915(dev);`
drm/i915/debug: Convert i915_verify_active() to scan all lists ... and check more regularly. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> 2010-09-29 09:10:57 -06:00			`struct drm_i915_gem_object *obj;`
drm/i915: Implement inter-engine read-read optimisations Currently, we only track the last request globally across all engines. This prevents us from issuing concurrent read requests on e.g. the RCS and BCS engines (or more likely the render and media engines). Without semaphores, we incur costly stalls as we synchronise between rings - greatly impacting the current performance of Broadwell versus Haswell in certain workloads (like video decode). With the introduction of reference counted requests, it is much easier to track the last request per ring, as well as the last global write request so that we can optimise inter-engine read read requests (as well as better optimise certain CPU waits). v2: Fix inverted readonly condition for nonblocking waits. v3: Handle non-continguous engine array after waits v4: Rebase, tidy, rewrite ring list debugging v5: Use obj->active as a bitfield, it looks cool v6: Micro-optimise, mostly involving moving code around v7: Fix retire-requests-upto for execlists (and multiple rq->ringbuf) v8: Rebase v9: Refactor i915_gem_object_sync() to allow the compiler to better optimise it. Benchmark: igt/gem_read_read_speed hsw:gt3e (with semaphores): Before: Time to read-read 1024k: 275.794µs After: Time to read-read 1024k: 123.260µs hsw:gt3e (w/o semaphores): Before: Time to read-read 1024k: 230.433µs After: Time to read-read 1024k: 124.593µs bdw-u (w/o semaphores): Before After Time to read-read 1x1: 26.274µs 10.350µs Time to read-read 128x128: 40.097µs 21.366µs Time to read-read 256x256: 77.087µs 42.608µs Time to read-read 512x512: 281.999µs 181.155µs Time to read-read 1024x1024: 1196.141µs 1118.223µs Time to read-read 2048x2048: 5639.072µs 5225.837µs Time to read-read 4096x4096: 22401.662µs 21137.067µs Time to read-read 8192x8192: 89617.735µs 85637.681µs Testcase: igt/gem_concurrent_blit (read-read and friends) Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> [v8] [danvet: s/\<rq\>/req/g] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> 2015-04-27 06:41:17 -06:00			`struct intel_engine_cs *ring;`
drm/i915/debug: Convert i915_verify_active() to scan all lists ... and check more regularly. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> 2010-09-29 09:10:57 -06:00			`int err = 0;`
drm/i915: Implement inter-engine read-read optimisations Currently, we only track the last request globally across all engines. This prevents us from issuing concurrent read requests on e.g. the RCS and BCS engines (or more likely the render and media engines). Without semaphores, we incur costly stalls as we synchronise between rings - greatly impacting the current performance of Broadwell versus Haswell in certain workloads (like video decode). With the introduction of reference counted requests, it is much easier to track the last request per ring, as well as the last global write request so that we can optimise inter-engine read read requests (as well as better optimise certain CPU waits). v2: Fix inverted readonly condition for nonblocking waits. v3: Handle non-continguous engine array after waits v4: Rebase, tidy, rewrite ring list debugging v5: Use obj->active as a bitfield, it looks cool v6: Micro-optimise, mostly involving moving code around v7: Fix retire-requests-upto for execlists (and multiple rq->ringbuf) v8: Rebase v9: Refactor i915_gem_object_sync() to allow the compiler to better optimise it. Benchmark: igt/gem_read_read_speed hsw:gt3e (with semaphores): Before: Time to read-read 1024k: 275.794µs After: Time to read-read 1024k: 123.260µs hsw:gt3e (w/o semaphores): Before: Time to read-read 1024k: 230.433µs After: Time to read-read 1024k: 124.593µs bdw-u (w/o semaphores): Before After Time to read-read 1x1: 26.274µs 10.350µs Time to read-read 128x128: 40.097µs 21.366µs Time to read-read 256x256: 77.087µs 42.608µs Time to read-read 512x512: 281.999µs 181.155µs Time to read-read 1024x1024: 1196.141µs 1118.223µs Time to read-read 2048x2048: 5639.072µs 5225.837µs Time to read-read 4096x4096: 22401.662µs 21137.067µs Time to read-read 8192x8192: 89617.735µs 85637.681µs Testcase: igt/gem_concurrent_blit (read-read and friends) Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> [v8] [danvet: s/\<rq\>/req/g] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> 2015-04-27 06:41:17 -06:00			`int i;`
drm/i915/debug: Convert i915_verify_active() to scan all lists ... and check more regularly. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> 2010-09-29 09:10:57 -06:00
			`if (warned)`
			`return 0;`

drm/i915: Implement inter-engine read-read optimisations Currently, we only track the last request globally across all engines. This prevents us from issuing concurrent read requests on e.g. the RCS and BCS engines (or more likely the render and media engines). Without semaphores, we incur costly stalls as we synchronise between rings - greatly impacting the current performance of Broadwell versus Haswell in certain workloads (like video decode). With the introduction of reference counted requests, it is much easier to track the last request per ring, as well as the last global write request so that we can optimise inter-engine read read requests (as well as better optimise certain CPU waits). v2: Fix inverted readonly condition for nonblocking waits. v3: Handle non-continguous engine array after waits v4: Rebase, tidy, rewrite ring list debugging v5: Use obj->active as a bitfield, it looks cool v6: Micro-optimise, mostly involving moving code around v7: Fix retire-requests-upto for execlists (and multiple rq->ringbuf) v8: Rebase v9: Refactor i915_gem_object_sync() to allow the compiler to better optimise it. Benchmark: igt/gem_read_read_speed hsw:gt3e (with semaphores): Before: Time to read-read 1024k: 275.794µs After: Time to read-read 1024k: 123.260µs hsw:gt3e (w/o semaphores): Before: Time to read-read 1024k: 230.433µs After: Time to read-read 1024k: 124.593µs bdw-u (w/o semaphores): Before After Time to read-read 1x1: 26.274µs 10.350µs Time to read-read 128x128: 40.097µs 21.366µs Time to read-read 256x256: 77.087µs 42.608µs Time to read-read 512x512: 281.999µs 181.155µs Time to read-read 1024x1024: 1196.141µs 1118.223µs Time to read-read 2048x2048: 5639.072µs 5225.837µs Time to read-read 4096x4096: 22401.662µs 21137.067µs Time to read-read 8192x8192: 89617.735µs 85637.681µs Testcase: igt/gem_concurrent_blit (read-read and friends) Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> [v8] [danvet: s/\<rq\>/req/g] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> 2015-04-27 06:41:17 -06:00			`for_each_ring(ring, dev_priv, i) {`
			`list_for_each_entry(obj, &ring->active_list, ring_list[ring->id]) {`
			`if (obj->base.dev != dev \|\|`
			`!atomic_read(&obj->base.refcount.refcount)) {`
			`DRM_ERROR("%s: freed active obj %p\n",`
			`ring->name, obj);`
			`err++;`
			`break;`
			`} else if (!obj->active \|\|`
			`obj->last_read_req[ring->id] == NULL) {`
			`DRM_ERROR("%s: invalid active obj %p\n",`
			`ring->name, obj);`
			`err++;`
			`} else if (obj->base.write_domain) {`
			`DRM_ERROR("%s: invalid write obj %p (w %x)\n",`
			`ring->name,`
			`obj, obj->base.write_domain);`
			`err++;`
			`}`
drm/i915/debug: Convert i915_verify_active() to scan all lists ... and check more regularly. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> 2010-09-29 09:10:57 -06:00			`}`
drm: Add GEM ("graphics execution manager") to i915 driver. GEM allows the creation of persistent buffer objects accessible by the graphics device through new ioctls for managing execution of commands on the device. The userland API is almost entirely driver-specific to ensure that any driver building on this model can easily map the interface to individual driver requirements. GEM is used by the 2d driver for managing its internal state allocations and will be used for pixmap storage to reduce memory consumption and enable zero-copy GLX_EXT_texture_from_pixmap, and in the 3d driver is used to enable GL_EXT_framebuffer_object and GL_ARB_pixel_buffer_object. Signed-off-by: Eric Anholt <eric@anholt.net> Signed-off-by: Dave Airlie <airlied@redhat.com> 2008-07-30 13:06:12 -06:00			`}`
drm/i915/debug: Convert i915_verify_active() to scan all lists ... and check more regularly. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> 2010-09-29 09:10:57 -06:00
			`return warned = err;`
drm: Add GEM ("graphics execution manager") to i915 driver. GEM allows the creation of persistent buffer objects accessible by the graphics device through new ioctls for managing execution of commands on the device. The userland API is almost entirely driver-specific to ensure that any driver building on this model can easily map the interface to individual driver requirements. GEM is used by the 2d driver for managing its internal state allocations and will be used for pixmap storage to reduce memory consumption and enable zero-copy GLX_EXT_texture_from_pixmap, and in the 3d driver is used to enable GL_EXT_framebuffer_object and GL_ARB_pixel_buffer_object. Signed-off-by: Eric Anholt <eric@anholt.net> Signed-off-by: Dave Airlie <airlied@redhat.com> 2008-07-30 13:06:12 -06:00			`}`
drm/i915: Fix #endif comment Did you say OCD? Signed-off-by: Damien Lespiau <damien.lespiau@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> 2013-08-08 15:28:55 -06:00			`#endif /* WATCH_LIST */`